Source code for hdf5file

#
##
##  SPDX-FileCopyrightText: © 2007-2024 Benedict Verhegghe <bverheg@gmail.com>
##  SPDX-License-Identifier: GPL-3.0-or-later
##
##  This file is part of pyFormex 3.5  (Thu Feb  8 19:11:13 CET 2024)
##  pyFormex is a tool for generating, manipulating and transforming 3D
##  geometrical models by sequences of mathematical operations.
##  Home page: https://pyformex.org
##  Project page: https://savannah.nongnu.org/projects/pyformex/
##  Development: https://gitlab.com/bverheg/pyformex
##  Distributed under the GNU General Public License version 3 or later.
##
##  This program is free software: you can redistribute it and/or modify
##  it under the terms of the GNU General Public License as published by
##  the Free Software Foundation, either version 3 of the License, or
##  (at your option) any later version.
##
##  This program is distributed in the hope that it will be useful,
##  but WITHOUT ANY WARRANTY; without even the implied warranty of
##  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
##  GNU General Public License for more details.
##
##  You should have received a copy of the GNU General Public License
##  along with this program.  If not, see http://www.gnu.org/licenses/.
##
"""Store pyFormex geometry or projects in HDF5 format.

Hierarchical Data Format (HDF) is a set of file formats designed to store and
organize large amounts of data.
The HDF libraries and associated tools are available under a liberal, BSD-like
license for general use.
HDF is supported by many commercial and non-commercial software platforms and
programming languages.
See more on `<https://en.wikipedia.org/wiki/Hierarchical_Data_Format>`_.

This module is currently under development and should be considered experimental.
Do not rely on it yet as the only ouput format for your data.
"""
from pyformex import software
software.Module.require('h5py')

import h5py  # noqa: E402
import numpy as np  # noqa: E402
import pyformex as pf  # noqa: E402
from pyformex.filetools import File  # noqa: E402

h5py.get_config().track_order = True

if pf._pytest or pf._sphinx:
    print("IMPORTING CORE LANG", file=open('output', 'w'))
    pf._import_core()

# TODO: - create a Hdf5File class, like PzfFile
#       - add transparent compression


# functions creating dataset

[docs]def dump_array(arr, parent, name): """Store array in a dataset with parent/name""" return parent.create_dataset(name, arr.shape, dtype=arr.dtype, data=arr, track_order=True)
def dump_coords(self, parent, name): dset = dump_array(self, parent, name) dset.attrs['class'] = 'pyformex.Coords' def dump_formex(self, parent, name): grp = parent.create_group(name, track_order=True) dump_array(self.coords, grp, 'coords') grp.attrs['class'] = 'pyformex.Formex' if self.eltype is not None: grp.attrs['eltype'] = str(self.eltype) if self.prop is not None: dump_array(self.prop, grp, 'prop') def dump_mesh(self, parent, name, clas='Mesh'): grp = parent.create_group(name, track_order=True) dump_array(self.coords, grp, 'coords') dump_array(self.elems, grp, 'elems') grp.attrs['class'] = 'pyformex.' + clas grp.attrs['eltype'] = self.elName() if self.prop is not None: dump_array(self.prop, grp, 'prop') def dump_trisurface(self, parent, name): return dump_mesh(self, parent, name, clas='TriSurface') def dump_bezierspline(self, parent, name, clas='BezierSpline'): grp = parent.create_group(name, track_order=True) dump_array(self.coords, grp, 'control') grp.attrs['class'] = 'pyformex.' + clas grp.attrs['degree'] = self.degree grp.attrs['closed'] = self.closed def dump_polyline(self, parent, name): return dump_bezierspline(self, parent, name, clas='PolyLine') def dump_dict(obj, parent, name): grp = parent.create_group(name, track_order=True) grp.attrs['class'] = 'dict' for key, val in obj.items(): dump_any(val, grp, key) def dump_list(obj, parent, name, clas='list'): grp = parent.create_group(name, track_order=True) grp.attrs['class'] = 'list' grp.attrs['len'] = len(obj) objname = pf.NameSequence('obj') for val in obj: dump_any(val, grp, next(objname)) def dump_tuple(obj, parent, name): return dump_list(obj, parent, name, clas='tuple') hdf5_dump = { pf.Coords: dump_coords, pf.Formex: dump_formex, pf.Mesh: dump_mesh, pf.TriSurface: dump_trisurface, dict: dump_dict, list: dump_list, tuple: dump_tuple, } # functions creating group simple_attrs = False def dump_any(obj, parent, name, attrs=None): clas = type(obj) dump = hdf5_dump.get(clas, None) if dump: print(f"Add {clas.__name__} object {name} to {parent.name}") dump(obj, parent, name) return if isinstance(obj, (str, int, float)): if simple_attrs: # Store in attrs parent.attrs[name] = obj print(f"Add attr '{name}' to group {parent.name}") return else: # Store as an array attrs = {'class': type(obj).__name__} if isinstance(obj, str): obj = obj.encode('utf-8') obj = np.array(obj) if isinstance(obj, np.ndarray): # Create dataset for the array print(f"ndarray {name} {type(obj)}") dset = dump_array(obj, parent, name) if attrs: dset.attrs.update(attrs) return print(f"hdf5_dump: ignore object {name} of type {type(obj)}")
[docs]def write_hdf5(filename, **kargs): """Allows transparent compression""" with File(filename, 'wb') as fil: with h5py.File(fil, 'w') as f: for name, obj in kargs.items(): dump_any(obj, f, name)
def h5py_dataset_iterator(g, prefix=''): for key, item in g.items(): path = '{}/{}'.format(prefix, key) if isinstance(item, h5py.Dataset): # test for dataset yield (path, item) elif isinstance(item, h5py.Group): # test for group (go down) yield from h5py_dataset_iterator(item, path) def h5py_iterator(grp, prefix=''): print(f"Group {grp.name} Attrs: {dict(grp.attrs)}") for key, item in grp.items(): path = f"{prefix}/{key}" if isinstance(item, h5py.Group): # test for group (go down) yield from h5py_iterator(item, path) elif isinstance(item, h5py.Dataset): # test for dataset yield (path, item) def list_hdf5(filename): print(f"===== List {filename} ======") with h5py.File(filename, 'r') as f: for (path, dset) in h5py_iterator(f): if isinstance(dset, h5py.Dataset): print(f"Dataset {path} Attrs: {dict(dset.attrs)}") def load_pyf_obj(obj, clas): try: clas = eval(clas) except AttributeError: print("No such class:", clas) return print(f"{clas=}") if clas in (pf.Mesh, pf.TriSurface): print(f"Reading {clas}") coords = obj['coords'] elems = obj['elems'] eltype = obj.attrs['eltype'] prop = obj.get('prop', None) out = clas(coords, elems, eltype=eltype, prop=prop) return out elif clas == pf.Formex: print(f"Reading {clas}") coords = obj['coords'] eltype = obj.attrs.get('eltype', None) prop = obj.get('prop', None) return clas(coords, eltype=eltype, prop=prop) elif clas in (pf.Coords,): print(f"Reading {clas}") return clas(obj.data) def load_hdf5_dset(obj): clas = obj.attrs.get('class', '') val = np.asarray(obj) if clas == 'int': return int(val) elif clas == 'float': return float(val) elif clas == 'str': return bytes(val).decode('utf-8') else: return val update_attrs = False def load_hdf5_group(obj): clas = obj.attrs.get('class', '') print(f"{obj.name=} {clas=}") if clas.startswith('pyformex.'): return load_pyf_obj(obj, clas) elif clas in ('', 'dict'): d = {} for key, val in obj.items(): val = load_hdf5_any(val) d[key] = val if update_attrs: d.update(obj.attrs) return d elif clas in ('tuple', 'list'): length = int(obj.attrs.get('len', '0')) d = [] for i in range(length): key = f"obj-{i}" if key in obj: val = load_hdf5_any(obj[key]) else: val = obj.attrs.get(key, None) d.append(val) return d else: print(f"Can not load {obj.name=} {clas=}") def load_hdf5_any(obj): if isinstance(obj, h5py.Group): return load_hdf5_group(obj) elif isinstance(obj, h5py.Dataset): return load_hdf5_dset(obj)
[docs]def read_hdf5(filename): """Load a HDF5 file Parameters ---------- filename: :term:`path_like` The path to the HDF5 file to be loaded Returns ------- dict The contents of the HDF5 file. """ print(f"===== Load {filename} ======") with h5py.File(filename, 'r') as f: return load_hdf5_group(f)
# End