/tables/hdf5extension.pyx

https://github.com/Ringenberg/PyTables · Cython · 2110 lines · 1487 code · 347 blank · 276 comment · 224 complexity · e92e06a85a3f5bf6c74f07c6620ebf8b MD5 · raw file

Large files are truncated click here to view the full file

  1. # -*- coding: utf-8 -*-
  2. ########################################################################
  3. #
  4. # License: BSD
  5. # Created: September 21, 2002
  6. # Author: Francesc Alted - faltet@pytables.com
  7. #
  8. # $Id$
  9. #
  10. ########################################################################
  11. """Cython interface between several PyTables classes and HDF5 library.
  12. Classes (type extensions):
  13. File
  14. AttributeSet
  15. Node
  16. Leaf
  17. Group
  18. Array
  19. VLArray
  20. UnImplemented
  21. Functions:
  22. Misc variables:
  23. """
  24. import os
  25. import warnings
  26. from cpython cimport PY_MAJOR_VERSION
  27. if PY_MAJOR_VERSION < 3:
  28. import cPickle as pickle
  29. else:
  30. import pickle
  31. import numpy
  32. from tables.exceptions import HDF5ExtError, DataTypeWarning
  33. from tables.utils import (check_file_access, byteorders, correct_byteorder,
  34. SizeType)
  35. from tables.atom import Atom
  36. from tables.description import descr_from_dtype
  37. from tables.utilsextension import (encode_filename, set_blosc_max_threads,
  38. atom_to_hdf5_type, atom_from_hdf5_type, hdf5_to_np_ext_type, create_nested_type,
  39. pttype_to_hdf5, pt_special_kinds, npext_prefixes_to_ptkinds, hdf5_class_to_string,
  40. platform_byteorder)
  41. from tables._past import previous_api
  42. # Types, constants, functions, classes & other objects from everywhere
  43. from libc.stdlib cimport malloc, free
  44. from libc.string cimport strdup, strlen
  45. from numpy cimport import_array, ndarray, npy_intp
  46. from cpython cimport (PyBytes_AsString, PyBytes_FromStringAndSize,
  47. PyBytes_Check)
  48. from cpython.unicode cimport PyUnicode_DecodeUTF8
  49. from definitions cimport (const_char, uintptr_t, hid_t, herr_t, hsize_t, hvl_t,
  50. H5S_seloper_t, H5D_FILL_VALUE_UNDEFINED,
  51. H5O_TYPE_UNKNOWN, H5O_TYPE_GROUP, H5O_TYPE_DATASET, H5O_TYPE_NAMED_DATATYPE,
  52. H5L_TYPE_ERROR, H5L_TYPE_HARD, H5L_TYPE_SOFT, H5L_TYPE_EXTERNAL,
  53. H5T_class_t, H5T_sign_t, H5T_NATIVE_INT,
  54. H5T_cset_t, H5T_CSET_ASCII, H5T_CSET_UTF8,
  55. H5F_SCOPE_GLOBAL, H5F_ACC_TRUNC, H5F_ACC_RDONLY, H5F_ACC_RDWR,
  56. H5P_DEFAULT, H5P_FILE_ACCESS, H5P_FILE_CREATE,
  57. H5S_SELECT_SET, H5S_SELECT_AND, H5S_SELECT_NOTB,
  58. H5Fcreate, H5Fopen, H5Fclose, H5Fflush, H5Fget_vfd_handle, H5Fget_filesize,
  59. H5Fget_create_plist,
  60. H5Gcreate, H5Gopen, H5Gclose, H5Ldelete, H5Lmove,
  61. H5Dopen, H5Dclose, H5Dread, H5Dwrite, H5Dget_type,
  62. H5Dget_space, H5Dvlen_reclaim, H5Dget_storage_size, H5Dvlen_get_buf_size,
  63. H5Tclose, H5Tis_variable_str, H5Tget_sign,
  64. H5Adelete, H5T_BITFIELD, H5T_INTEGER, H5T_FLOAT, H5T_STRING, H5Tget_order,
  65. H5Pcreate, H5Pset_cache, H5Pclose, H5Pget_userblock, H5Pset_userblock,
  66. H5Pset_fapl_sec2, H5Pset_fapl_log, H5Pset_fapl_stdio, H5Pset_fapl_core,
  67. H5Pset_fapl_split,
  68. H5Sselect_all, H5Sselect_elements, H5Sselect_hyperslab,
  69. H5Screate_simple, H5Sclose,
  70. H5ATTRset_attribute, H5ATTRset_attribute_string,
  71. H5ATTRget_attribute, H5ATTRget_attribute_string,
  72. H5ATTRget_attribute_vlen_string_array,
  73. H5ATTRfind_attribute, H5ATTRget_type_ndims, H5ATTRget_dims,
  74. H5ARRAYget_ndims, H5ARRAYget_info,
  75. set_cache_size, get_objinfo, get_linkinfo, Giterate, Aiterate, H5UIget_info,
  76. get_len_of_range, conv_float64_timeval32, truncate_dset,
  77. H5_HAVE_DIRECT_DRIVER, pt_H5Pset_fapl_direct,
  78. H5_HAVE_WINDOWS_DRIVER, pt_H5Pset_fapl_windows,
  79. H5_HAVE_IMAGE_FILE, pt_H5Pset_file_image, pt_H5Fget_file_image)
  80. cdef int H5T_CSET_DEFAULT = 16
  81. from utilsextension cimport malloc_dims, get_native_type, cstr_to_pystr
  82. #-------------------------------------------------------------------
  83. # Functions from HDF5 ARRAY (this is not part of HDF5 HL; it's private)
  84. cdef extern from "H5ARRAY.h" nogil:
  85. herr_t H5ARRAYmake(hid_t loc_id, char *dset_name, char *obversion,
  86. int rank, hsize_t *dims, int extdim,
  87. hid_t type_id, hsize_t *dims_chunk, void *fill_data,
  88. int complevel, char *complib, int shuffle,
  89. int fletcher32, void *data)
  90. herr_t H5ARRAYappend_records(hid_t dataset_id, hid_t type_id,
  91. int rank, hsize_t *dims_orig,
  92. hsize_t *dims_new, int extdim, void *data )
  93. herr_t H5ARRAYwrite_records(hid_t dataset_id, hid_t type_id,
  94. int rank, hsize_t *start, hsize_t *step,
  95. hsize_t *count, void *data)
  96. herr_t H5ARRAYread(hid_t dataset_id, hid_t type_id,
  97. hsize_t start, hsize_t nrows, hsize_t step,
  98. int extdim, void *data)
  99. herr_t H5ARRAYreadSlice(hid_t dataset_id, hid_t type_id,
  100. hsize_t *start, hsize_t *stop,
  101. hsize_t *step, void *data)
  102. herr_t H5ARRAYreadIndex(hid_t dataset_id, hid_t type_id, int notequal,
  103. hsize_t *start, hsize_t *stop, hsize_t *step,
  104. void *data)
  105. herr_t H5ARRAYget_chunkshape(hid_t dataset_id, int rank, hsize_t *dims_chunk)
  106. herr_t H5ARRAYget_fill_value( hid_t dataset_id, hid_t type_id,
  107. int *status, void *value)
  108. # Functions for dealing with VLArray objects
  109. cdef extern from "H5VLARRAY.h" nogil:
  110. herr_t H5VLARRAYmake( hid_t loc_id, char *dset_name, char *obversion,
  111. int rank, hsize_t *dims, hid_t type_id,
  112. hsize_t chunk_size, void *fill_data, int complevel,
  113. char *complib, int shuffle, int flecther32,
  114. void *data)
  115. herr_t H5VLARRAYappend_records( hid_t dataset_id, hid_t type_id,
  116. int nobjects, hsize_t nrecords,
  117. void *data )
  118. herr_t H5VLARRAYmodify_records( hid_t dataset_id, hid_t type_id,
  119. hsize_t nrow, int nobjects,
  120. void *data )
  121. herr_t H5VLARRAYget_info( hid_t dataset_id, hid_t type_id,
  122. hsize_t *nrecords, char *base_byteorder)
  123. #----------------------------------------------------------------------------
  124. # Initialization code
  125. # The numpy API requires this function to be called before
  126. # using any numpy facilities in an extension module.
  127. import_array()
  128. #---------------------------------------------------------------------------
  129. # Helper functions
  130. cdef hsize_t *npy_malloc_dims(int rank, npy_intp *pdims):
  131. """Returns a malloced hsize_t dims from a npy_intp *pdims."""
  132. cdef int i
  133. cdef hsize_t *dims
  134. dims = NULL
  135. if rank > 0:
  136. dims = <hsize_t *>malloc(rank * sizeof(hsize_t))
  137. for i from 0 <= i < rank:
  138. dims[i] = pdims[i]
  139. return dims
  140. cdef object getshape(int rank, hsize_t *dims):
  141. """Return a shape (tuple) from a dims C array of rank dimensions."""
  142. cdef int i
  143. cdef object shape
  144. shape = []
  145. for i from 0 <= i < rank:
  146. shape.append(SizeType(dims[i]))
  147. return tuple(shape)
  148. # Helper function for quickly fetch an attribute string
  149. cdef object get_attribute_string_or_none(hid_t node_id, char* attr_name):
  150. """Returns a string/unicode attribute if it exists in node_id.
  151. It returns ``None`` in case it don't exists (or there have been problems
  152. reading it).
  153. """
  154. cdef char *attr_value
  155. cdef int cset = H5T_CSET_DEFAULT
  156. cdef object retvalue
  157. cdef hsize_t size
  158. attr_value = NULL
  159. retvalue = None # Default value
  160. if H5ATTRfind_attribute(node_id, attr_name):
  161. size = H5ATTRget_attribute_string(node_id, attr_name, &attr_value, &cset)
  162. if size == 0:
  163. return None
  164. if cset == H5T_CSET_UTF8:
  165. retvalue = PyUnicode_DecodeUTF8(attr_value, size, NULL)
  166. retvalue = numpy.unicode_(retvalue)
  167. else:
  168. retvalue = PyBytes_FromStringAndSize(attr_value, size)
  169. # AV: oct 2012
  170. # since now we use the string size got form HDF5 we have to stip
  171. # trailing zeros used for padding.
  172. # The entire process is quite odd but due to a bug (??) in the way
  173. # numpy arrays are pickled in python 3 we can't assume that we can't
  174. # assume that strlen(attr_value) is the actual length of the attibute
  175. # and numpy.bytes_(attr_value) can give a truncated pickle sting
  176. retvalue = retvalue.rstrip(b'\x00')
  177. retvalue = numpy.bytes_(retvalue)
  178. # Important to release attr_value, because it has been malloc'ed!
  179. if attr_value:
  180. free(<void *>attr_value)
  181. return retvalue
  182. # Get the numpy dtype scalar attribute from an HDF5 type as fast as possible
  183. cdef object get_dtype_scalar(hid_t type_id, H5T_class_t class_id,
  184. size_t itemsize):
  185. cdef H5T_sign_t sign
  186. cdef object stype
  187. if class_id == H5T_BITFIELD:
  188. stype = "b1"
  189. elif class_id == H5T_INTEGER:
  190. # Get the sign
  191. sign = H5Tget_sign(type_id)
  192. if (sign > 0):
  193. stype = "i%s" % (itemsize)
  194. else:
  195. stype = "u%s" % (itemsize)
  196. elif class_id == H5T_FLOAT:
  197. stype = "f%s" % (itemsize)
  198. elif class_id == H5T_STRING:
  199. if H5Tis_variable_str(type_id):
  200. raise TypeError("variable length strings are not supported yet")
  201. stype = "S%s" % (itemsize)
  202. # Try to get a NumPy type. If this can't be done, return None.
  203. try:
  204. ntype = numpy.dtype(stype)
  205. except TypeError:
  206. ntype = None
  207. return ntype
  208. _supported_drivers = (
  209. "H5FD_SEC2",
  210. "H5FD_DIRECT",
  211. #"H5FD_LOG",
  212. "H5FD_WINDOWS",
  213. "H5FD_STDIO",
  214. "H5FD_CORE",
  215. #"H5FD_FAMILY",
  216. #"H5FD_MULTI",
  217. "H5FD_SPLIT",
  218. #"H5FD_MPIO",
  219. #"H5FD_MPIPOSIX",
  220. #"H5FD_STREAM",
  221. )
  222. HAVE_DIRECT_DRIVER = bool(H5_HAVE_DIRECT_DRIVER)
  223. HAVE_WINDOWS_DRIVER = bool(H5_HAVE_WINDOWS_DRIVER)
  224. # Type extensions declarations (these are subclassed by PyTables
  225. # Python classes)
  226. cdef class File:
  227. cdef hid_t file_id
  228. cdef hid_t access_plist
  229. cdef object name
  230. def _g_new(self, name, pymode, **params):
  231. cdef herr_t err = 0
  232. cdef hid_t access_plist, create_plist = H5P_DEFAULT
  233. cdef hid_t meta_plist_id = H5P_DEFAULT, raw_plist_id = H5P_DEFAULT
  234. cdef size_t img_buf_len = 0, user_block_size = 0
  235. cdef void *img_buf_p = NULL
  236. cdef bytes encname
  237. #cdef bytes logfile_name
  238. # Check if we can handle the driver
  239. driver = params["DRIVER"]
  240. if driver is not None and driver not in _supported_drivers:
  241. raise ValueError("Invalid or not supported driver: '%s'" % driver)
  242. if driver == "H5FD_SPLIT":
  243. meta_ext = params.get("DRIVER_SPLIT_META_EXT", "-m.h5")
  244. raw_ext = params.get("DRIVER_SPLIT_RAW_EXT", "-r.h5")
  245. meta_name = meta_ext % name if "%s" in meta_ext else name + meta_ext
  246. raw_name = raw_ext % name if "%s" in raw_ext else name + raw_ext
  247. enc_meta_ext = encode_filename(meta_ext)
  248. enc_raw_ext = encode_filename(raw_ext)
  249. # Create a new file using default properties
  250. self.name = name
  251. # Encode the filename in case it is unicode
  252. encname = encode_filename(name)
  253. # These fields can be seen from Python.
  254. self._v_new = None # this will be computed later
  255. # """Is this file going to be created from scratch?"""
  256. self._isPTFile = True # assume a PyTables file by default
  257. # """Does this HDF5 file have a PyTables format?"""
  258. assert pymode in ('r', 'r+', 'a', 'w'), ("an invalid mode string ``%s`` "
  259. "passed the ``check_file_access()`` test; "
  260. "please report this to the authors" % pymode)
  261. image = params.get('DRIVER_CORE_IMAGE')
  262. if image:
  263. if driver != "H5FD_CORE":
  264. warnings.warn("The DRIVER_CORE_IMAGE parameter will be ignored by "
  265. "the '%s' driver" % driver)
  266. elif not PyBytes_Check(image):
  267. raise TypeError("The DRIVER_CORE_IMAGE must be a string of bytes")
  268. elif not H5_HAVE_IMAGE_FILE:
  269. raise RuntimeError("Support for image files is only availabe in "
  270. "HDF5 >= 1.8.9")
  271. # After the following check we can be quite sure
  272. # that the file or directory exists and permissions are right.
  273. if driver == "H5FD_SPLIT":
  274. for n in meta_name, raw_name:
  275. check_file_access(n, pymode)
  276. else:
  277. backing_store = params.get("DRIVER_CORE_BACKING_STORE", 1)
  278. if driver != "H5FD_CORE" or backing_store:
  279. check_file_access(name, pymode)
  280. # Should a new file be created?
  281. if image:
  282. exists = True
  283. elif driver == "H5FD_SPLIT":
  284. exists = os.path.exists(meta_name) and os.path.exists(raw_name)
  285. else:
  286. exists = os.path.exists(name)
  287. self._v_new = not (pymode in ('r', 'r+') or (pymode == 'a' and exists))
  288. user_block_size = params.get("USER_BLOCK_SIZE", 0)
  289. if user_block_size and not self._v_new:
  290. warnings.warn("The HDF5 file already esists: the USER_BLOCK_SIZE "
  291. "will be ignored")
  292. elif user_block_size:
  293. user_block_size = int(user_block_size)
  294. is_pow_of_2 = ((user_block_size & (user_block_size - 1)) == 0)
  295. if user_block_size < 512 or not is_pow_of_2:
  296. raise ValueError("The USER_BLOCK_SIZE must be a power od 2 greather "
  297. "than 512 or zero")
  298. # File creation property list
  299. create_plist = H5Pcreate(H5P_FILE_CREATE)
  300. err = H5Pset_userblock(create_plist, user_block_size)
  301. if err < 0:
  302. H5Pclose(create_plist)
  303. raise HDF5ExtError("Unable to set the user block size")
  304. # File access property list
  305. access_plist = H5Pcreate(H5P_FILE_ACCESS)
  306. # Set parameters for chunk cache
  307. H5Pset_cache(access_plist, 0,
  308. params["CHUNK_CACHE_NELMTS"],
  309. params["CHUNK_CACHE_SIZE"],
  310. params["CHUNK_CACHE_PREEMPT"])
  311. # Set the I/O driver
  312. if driver == "H5FD_SEC2":
  313. err = H5Pset_fapl_sec2(access_plist)
  314. elif driver == "H5FD_DIRECT":
  315. if not H5_HAVE_DIRECT_DRIVER:
  316. H5Pclose(create_plist)
  317. H5Pclose(access_plist)
  318. raise RuntimeError("The H5FD_DIRECT driver is not available")
  319. err = pt_H5Pset_fapl_direct(access_plist,
  320. params["DRIVER_DIRECT_ALIGNMENT"],
  321. params["DRIVER_DIRECT_BLOCK_SIZE"],
  322. params["DRIVER_DIRECT_CBUF_SIZE"])
  323. #elif driver == "H5FD_LOG":
  324. # if "DRIVER_LOG_FILE" not in params:
  325. # H5Pclose(access_plist)
  326. # raise ValueError("The DRIVER_LOG_FILE parameter is required for "
  327. # "the H5FD_LOG driver")
  328. # logfile_name = encode_filename(params["DRIVER_LOG_FILE"])
  329. # err = H5Pset_fapl_log(access_plist,
  330. # <char*>logfile_name,
  331. # params["DRIVER_LOG_FLAGS"],
  332. # params["DRIVER_LOG_BUF_SIZE"])
  333. elif driver == "H5FD_WINDOWS":
  334. if not H5_HAVE_WINDOWS_DRIVER:
  335. H5Pclose(access_plist)
  336. H5Pclose(create_plist)
  337. raise RuntimeError("The H5FD_WINDOWS driver is not available")
  338. err = pt_H5Pset_fapl_windows(access_plist)
  339. elif driver == "H5FD_STDIO":
  340. err = H5Pset_fapl_stdio(access_plist)
  341. elif driver == "H5FD_CORE":
  342. err = H5Pset_fapl_core(access_plist,
  343. params["DRIVER_CORE_INCREMENT"],
  344. backing_store)
  345. if image:
  346. img_buf_len = len(image)
  347. img_buf_p = <void *>PyBytes_AsString(image)
  348. err = pt_H5Pset_file_image(access_plist, img_buf_p, img_buf_len)
  349. if err < 0:
  350. H5Pclose(create_plist)
  351. H5Pclose(access_plist)
  352. raise HDF5ExtError("Unable to set the file image")
  353. #elif driver == "H5FD_FAMILY":
  354. # H5Pset_fapl_family(access_plist,
  355. # params["DRIVER_FAMILY_MEMB_SIZE"],
  356. # fapl_id)
  357. #elif driver == "H5FD_MULTI":
  358. # err = H5Pset_fapl_multi(access_plist, memb_map, memb_fapl, memb_name,
  359. # memb_addr, relax)
  360. elif driver == "H5FD_SPLIT":
  361. err = H5Pset_fapl_split(access_plist, enc_meta_ext, meta_plist_id,
  362. enc_raw_ext, raw_plist_id)
  363. if err < 0:
  364. e = HDF5ExtError("Unable to set the file access property list")
  365. H5Pclose(create_plist)
  366. H5Pclose(access_plist)
  367. raise e
  368. if pymode == 'r':
  369. self.file_id = H5Fopen(encname, H5F_ACC_RDONLY, access_plist)
  370. elif pymode == 'r+':
  371. self.file_id = H5Fopen(encname, H5F_ACC_RDWR, access_plist)
  372. elif pymode == 'a':
  373. if exists:
  374. # A test for logging.
  375. ## H5Pset_sieve_buf_size(access_plist, 0)
  376. ## H5Pset_fapl_log (access_plist, "test.log", H5FD_LOG_LOC_WRITE, 0)
  377. self.file_id = H5Fopen(encname, H5F_ACC_RDWR, access_plist)
  378. else:
  379. self.file_id = H5Fcreate(encname, H5F_ACC_TRUNC, create_plist,
  380. access_plist)
  381. elif pymode == 'w':
  382. self.file_id = H5Fcreate(encname, H5F_ACC_TRUNC, create_plist,
  383. access_plist)
  384. if self.file_id < 0:
  385. e = HDF5ExtError("Unable to open/create file '%s'" % name)
  386. H5Pclose(create_plist)
  387. H5Pclose(access_plist)
  388. raise e
  389. H5Pclose(create_plist)
  390. H5Pclose(access_plist)
  391. # Set the cache size
  392. set_cache_size(self.file_id, params["METADATA_CACHE_SIZE"])
  393. # Set the maximum number of threads for Blosc
  394. set_blosc_max_threads(params["MAX_BLOSC_THREADS"])
  395. # XXX: add the possibility to pass a pre-allocated buffer
  396. def get_file_image(self):
  397. """Retrieves an in-memory image of an existing, open HDF5 file.
  398. .. note:: this method requires HDF5 >= 1.8.9.
  399. .. versionadded:: 3.0
  400. """
  401. cdef ssize_t size = 0
  402. cdef size_t buf_len = 0
  403. cdef bytes image
  404. cdef char* cimage
  405. self.flush()
  406. # retrieve the size of the buffer for the file image
  407. size = pt_H5Fget_file_image(self.file_id, NULL, buf_len)
  408. if size < 0:
  409. raise HDF5ExtError("Unable to retrieve the size of the buffer for the "
  410. "file image. Plese note that not all drivers "
  411. "provide support for image files.")
  412. # allocate the memory buffer
  413. image = PyBytes_FromStringAndSize(NULL, size)
  414. if not image:
  415. raise RuntimeError("Unable to allecote meomory fir the file image")
  416. cimage = image
  417. buf_len = size
  418. size = pt_H5Fget_file_image(self.file_id, <void*>cimage, buf_len)
  419. if size < 0:
  420. raise HDF5ExtError("Unable to retrieve the file image. "
  421. "Plese note that not all drivers provide support "
  422. "for image files.")
  423. return image
  424. def get_filesize(self):
  425. """Returns the size of an HDF5 file.
  426. The returned size is that of the entire file, as opposed to only
  427. the HDF5 portion of the file. I.e., size includes the user block,
  428. if any, the HDF5 portion of the file, and any data that may have
  429. been appended beyond the data written through the HDF5 Library.
  430. .. versionadded:: 3.0
  431. """
  432. cdef herr_t err = 0
  433. cdef hsize_t size = 0
  434. err = H5Fget_filesize(self.file_id, &size)
  435. if err < 0:
  436. raise HDF5ExtError("Unable to retrieve the HDF5 file size")
  437. return size
  438. def get_userblock_size(self):
  439. """Retrieves the size of a user block.
  440. .. versionadded:: 3.0
  441. """
  442. cdef herr_t err = 0
  443. cdef hsize_t size = 0
  444. cdef hid_t create_plist
  445. create_plist = H5Fget_create_plist(self.file_id)
  446. if create_plist < 0:
  447. raise HDF5ExtError("Unable to get the creation property list")
  448. err = H5Pget_userblock(create_plist, &size)
  449. if err < 0:
  450. H5Pclose(create_plist)
  451. raise HDF5ExtError("unable to retrieve the user block size")
  452. H5Pclose(create_plist)
  453. return size
  454. # Accessor definitions
  455. def _get_file_id(self):
  456. return self.file_id
  457. def fileno(self):
  458. """Return the underlying OS integer file descriptor.
  459. This is needed for lower-level file interfaces, such as the ``fcntl``
  460. module.
  461. """
  462. cdef void *file_handle
  463. cdef uintptr_t *descriptor
  464. cdef herr_t err
  465. err = H5Fget_vfd_handle(self.file_id, H5P_DEFAULT, &file_handle)
  466. if err < 0:
  467. raise HDF5ExtError(
  468. "Problems getting file descriptor for file ``%s``" % self.name)
  469. # Convert the 'void *file_handle' into an 'int *descriptor'
  470. descriptor = <uintptr_t *>file_handle
  471. return descriptor[0]
  472. _getFileId = previous_api(_get_file_id)
  473. def _flush_file(self, scope):
  474. # Close the file
  475. H5Fflush(self.file_id, scope)
  476. _flushFile = previous_api(_flush_file)
  477. def _close_file(self):
  478. # Close the file
  479. H5Fclose( self.file_id )
  480. self.file_id = 0 # Means file closed
  481. _closeFile = previous_api(_close_file)
  482. # This method is moved out of scope, until we provide code to delete
  483. # the memory booked by this extension types
  484. def __dealloc__(self):
  485. cdef int ret
  486. if self.file_id > 0:
  487. # Close the HDF5 file because user didn't do that!
  488. ret = H5Fclose(self.file_id)
  489. if ret < 0:
  490. raise HDF5ExtError("Problems closing the file '%s'" % self.name)
  491. cdef class AttributeSet:
  492. cdef object name
  493. def _g_new(self, node):
  494. self.name = node._v_name
  495. def _g_list_attr(self, node):
  496. "Return a tuple with the attribute list"
  497. a = Aiterate(node._v_objectid)
  498. return a
  499. _g_listAttr = previous_api(_g_list_attr)
  500. def _g_setattr(self, node, name, object value):
  501. """Save Python or NumPy objects as HDF5 attributes.
  502. Scalar Python objects, scalar NumPy & 0-dim NumPy objects will all be
  503. saved as H5T_SCALAR type. N-dim NumPy objects will be saved as H5T_ARRAY
  504. type.
  505. """
  506. cdef int ret
  507. cdef hid_t dset_id, type_id
  508. cdef hsize_t *dims
  509. cdef ndarray ndv
  510. cdef object byteorder, rabyteorder, baseatom
  511. cdef char* cname = NULL
  512. cdef bytes encoded_name
  513. cdef int cset = H5T_CSET_DEFAULT
  514. encoded_name = name.encode('utf-8')
  515. # get the C pointer
  516. cname = encoded_name
  517. # The dataset id of the node
  518. dset_id = node._v_objectid
  519. # Convert a NumPy scalar into a NumPy 0-dim ndarray
  520. if isinstance(value, numpy.generic):
  521. value = numpy.array(value)
  522. # Check if value is a NumPy ndarray and of a supported type
  523. if (isinstance(value, numpy.ndarray) and
  524. value.dtype.kind in ('V', 'S', 'b', 'i', 'u', 'f', 'c')):
  525. # get a contiguous array: fixes #270 and gh-176
  526. #value = numpy.ascontiguousarray(value)
  527. value = value.copy()
  528. if value.dtype.kind == 'V':
  529. description, rabyteorder = descr_from_dtype(value.dtype)
  530. byteorder = byteorders[rabyteorder]
  531. type_id = create_nested_type(description, byteorder)
  532. else:
  533. # Get the associated native HDF5 type of the scalar type
  534. baseatom = Atom.from_dtype(value.dtype.base)
  535. byteorder = byteorders[value.dtype.byteorder]
  536. type_id = atom_to_hdf5_type(baseatom, byteorder)
  537. # Get dimensionality info
  538. ndv = <ndarray>value
  539. dims = npy_malloc_dims(ndv.ndim, ndv.shape)
  540. # Actually write the attribute
  541. ret = H5ATTRset_attribute(dset_id, cname, type_id,
  542. ndv.ndim, dims, ndv.data)
  543. if ret < 0:
  544. raise HDF5ExtError("Can't set attribute '%s' in node:\n %s." %
  545. (name, self._v_node))
  546. # Release resources
  547. free(<void *>dims)
  548. H5Tclose(type_id)
  549. else:
  550. # Object cannot be natively represented in HDF5.
  551. if (isinstance(value, numpy.ndarray) and
  552. value.dtype.kind == 'U' and
  553. value.shape == ()):
  554. value = value[()].encode('utf-8')
  555. cset = H5T_CSET_UTF8
  556. else:
  557. # Convert this object to a null-terminated string
  558. # (binary pickles are not supported at this moment)
  559. value = pickle.dumps(value, 0)
  560. ret = H5ATTRset_attribute_string(dset_id, cname, value, len(value), cset)
  561. if ret < 0:
  562. raise HDF5ExtError("Can't set attribute '%s' in node:\n %s." %
  563. (name, self._v_node))
  564. _g_setAttr = previous_api(_g_setattr)
  565. # Get attributes
  566. def _g_getattr(self, node, attrname):
  567. """Get HDF5 attributes and retrieve them as NumPy objects.
  568. H5T_SCALAR types will be retrieved as scalar NumPy.
  569. H5T_ARRAY types will be retrieved as ndarray NumPy objects.
  570. """
  571. cdef hsize_t *dims
  572. cdef H5T_class_t class_id
  573. cdef size_t type_size
  574. cdef hid_t mem_type, dset_id, type_id, native_type
  575. cdef int rank, ret, enumtype
  576. cdef void *rbuf
  577. cdef char *str_value
  578. cdef char **str_values = NULL
  579. cdef ndarray ndvalue
  580. cdef object shape, stype_atom, shape_atom, retvalue
  581. cdef int i, nelements
  582. cdef char* cattrname = NULL
  583. cdef bytes encoded_attrname
  584. cdef int cset = H5T_CSET_DEFAULT
  585. encoded_attrname = attrname.encode('utf-8')
  586. # Get the C pointer
  587. cattrname = encoded_attrname
  588. # The dataset id of the node
  589. dset_id = node._v_objectid
  590. dims = NULL
  591. ret = H5ATTRget_type_ndims(dset_id, cattrname, &type_id, &class_id,
  592. &type_size, &rank )
  593. if ret < 0:
  594. raise HDF5ExtError("Can't get type info on attribute %s in node %s." %
  595. (attrname, self.name))
  596. # Call a fast function for scalar values and typical class types
  597. if (rank == 0 and class_id == H5T_STRING):
  598. type_size = H5ATTRget_attribute_string(dset_id, cattrname, &str_value,
  599. &cset)
  600. if type_size == 0:
  601. raise HDF5ExtError("Can't read attribute %s in node %s." %
  602. (attrname, self.name))
  603. if cset == H5T_CSET_UTF8:
  604. retvalue = PyUnicode_DecodeUTF8(str_value, strlen(str_value), NULL)
  605. retvalue = numpy.unicode_(retvalue)
  606. else:
  607. retvalue = PyBytes_FromStringAndSize(str_value, type_size)
  608. # AV: oct 2012
  609. # since now we use the string size got form HDF5 we have to strip
  610. # trailing zeros used for padding.
  611. # The entire process is quite odd but due to a bug (??) in the way
  612. # numpy arrays are pickled in python 3 we can't assume that
  613. # strlen(attr_value) is the actual length of the attibute
  614. # and numpy.bytes_(attr_value) can give a truncated pickle sting
  615. retvalue = retvalue.rstrip(b'\x00')
  616. retvalue = numpy.bytes_(retvalue) # bytes
  617. # Important to release attr_value, because it has been malloc'ed!
  618. if str_value:
  619. free(str_value)
  620. H5Tclose(type_id)
  621. return retvalue
  622. elif (rank == 0 and class_id in (H5T_BITFIELD, H5T_INTEGER, H5T_FLOAT)):
  623. dtype_ = get_dtype_scalar(type_id, class_id, type_size)
  624. if dtype_ is None:
  625. warnings.warn("Unsupported type for attribute '%s' in node '%s'. "
  626. "Offending HDF5 class: %d" % (attrname, self.name,
  627. class_id), DataTypeWarning)
  628. self._v_unimplemented.append(attrname)
  629. return None
  630. shape = ()
  631. else:
  632. # General case
  633. # Get the dimensional info
  634. dims = <hsize_t *>malloc(rank * sizeof(hsize_t))
  635. ret = H5ATTRget_dims(dset_id, cattrname, dims)
  636. if ret < 0:
  637. raise HDF5ExtError("Can't get dims info on attribute %s in node %s." %
  638. (attrname, self.name))
  639. shape = getshape(rank, dims)
  640. # dims is not needed anymore
  641. free(<void *> dims)
  642. # Get the NumPy dtype from the type_id
  643. try:
  644. stype_, shape_ = hdf5_to_np_ext_type(type_id, pure_numpy_types=True)
  645. dtype_ = numpy.dtype(stype_, shape_)
  646. except TypeError:
  647. if class_id == H5T_STRING and H5Tis_variable_str(type_id):
  648. nelements = H5ATTRget_attribute_vlen_string_array(dset_id, cattrname,
  649. &str_values, &cset)
  650. if nelements < 0:
  651. raise HDF5ExtError("Can't read attribute %s in node %s." %
  652. (attrname, self.name))
  653. # The following generator expressions do not work with Cython 0.15.1
  654. if cset == H5T_CSET_UTF8:
  655. #retvalue = numpy.fromiter(
  656. # PyUnicode_DecodeUTF8(<char*>str_values[i],
  657. # strlen(<char*>str_values[i]),
  658. # NULL)
  659. # for i in range(nelements), "O8")
  660. retvalue = numpy.array([
  661. PyUnicode_DecodeUTF8(<char*>str_values[i],
  662. strlen(<char*>str_values[i]),
  663. NULL)
  664. for i in range(nelements)], "O8")
  665. else:
  666. #retvalue = numpy.fromiter(
  667. # <char*>str_values[i] for i in range(nelements), "O8")
  668. retvalue = numpy.array(
  669. [<char*>str_values[i] for i in range(nelements)], "O8")
  670. retvalue.shape = shape
  671. # Important to release attr_value, because it has been malloc'ed!
  672. for i in range(nelements):
  673. free(str_values[i]);
  674. free(str_values)
  675. return retvalue
  676. # This class is not supported. Instead of raising a TypeError, issue a
  677. # warning explaining the problem. This will allow to continue browsing
  678. # native HDF5 files, while informing the user about the problem.
  679. warnings.warn("Unsupported type for attribute '%s' in node '%s'. "
  680. "Offending HDF5 class: %d" % (attrname, self.name,
  681. class_id), DataTypeWarning)
  682. self._v_unimplemented.append(attrname)
  683. return None
  684. # Get the native type (so that it is HDF5 who is the responsible to deal
  685. # with non-native byteorders on-disk)
  686. native_type_id = get_native_type(type_id)
  687. # Get the container for data
  688. ndvalue = numpy.empty(dtype=dtype_, shape=shape)
  689. # Get the pointer to the buffer data area
  690. rbuf = ndvalue.data
  691. # Actually read the attribute from disk
  692. ret = H5ATTRget_attribute(dset_id, cattrname, native_type_id, rbuf)
  693. if ret < 0:
  694. raise HDF5ExtError("Attribute %s exists in node %s, but can't get it." %
  695. (attrname, self.name))
  696. H5Tclose(native_type_id)
  697. H5Tclose(type_id)
  698. if rank > 0: # multidimensional case
  699. retvalue = ndvalue
  700. else:
  701. retvalue = ndvalue[()] # 0-dim ndarray becomes a NumPy scalar
  702. return retvalue
  703. _g_getAttr = previous_api(_g_getattr)
  704. def _g_remove(self, node, attrname):
  705. cdef int ret
  706. cdef hid_t dset_id
  707. cdef char *cattrname = NULL
  708. cdef bytes encoded_attrname
  709. encoded_attrname = attrname.encode('utf-8')
  710. # Get the C pointer
  711. cattrname = encoded_attrname
  712. # The dataset id of the node
  713. dset_id = node._v_objectid
  714. ret = H5Adelete(dset_id, cattrname)
  715. if ret < 0:
  716. raise HDF5ExtError("Attribute '%s' exists in node '%s', but cannot be "
  717. "deleted." % (attrname, self.name))
  718. cdef class Node:
  719. # Instance variables declared in .pxd
  720. def _g_new(self, where, name, init):
  721. self.name = name
  722. # """The name of this node in its parent group."""
  723. self.parent_id = where._v_objectid
  724. # """The identifier of the parent group."""
  725. def _g_delete(self, parent):
  726. cdef int ret
  727. cdef bytes encoded_name
  728. encoded_name = self.name.encode('utf-8')
  729. # Delete this node
  730. ret = H5Ldelete(parent._v_objectid, encoded_name, H5P_DEFAULT)
  731. if ret < 0:
  732. raise HDF5ExtError("problems deleting the node ``%s``" % self.name)
  733. return ret
  734. def __dealloc__(self):
  735. self.parent_id = 0
  736. cdef class Group(Node):
  737. cdef hid_t group_id
  738. def _g_create(self):
  739. cdef hid_t ret
  740. cdef bytes encoded_name
  741. encoded_name = self.name.encode('utf-8')
  742. # @TODO: set property list --> utf-8
  743. # Create a new group
  744. ret = H5Gcreate(self.parent_id, encoded_name, H5P_DEFAULT, H5P_DEFAULT,
  745. H5P_DEFAULT)
  746. if ret < 0:
  747. raise HDF5ExtError("Can't create the group %s." % self.name)
  748. self.group_id = ret
  749. return self.group_id
  750. def _g_open(self):
  751. cdef hid_t ret
  752. cdef bytes encoded_name
  753. encoded_name = self.name.encode('utf-8')
  754. ret = H5Gopen(self.parent_id, encoded_name, H5P_DEFAULT)
  755. if ret < 0:
  756. raise HDF5ExtError("Can't open the group: '%s'." % self.name)
  757. self.group_id = ret
  758. return self.group_id
  759. def _g_get_objinfo(self, object h5name):
  760. """Check whether 'name' is a children of 'self' and return its type."""
  761. cdef int ret
  762. cdef object node_type
  763. cdef bytes encoded_name
  764. cdef char *cname
  765. encoded_name = h5name.encode('utf-8')
  766. # Get the C pointer
  767. cname = encoded_name
  768. ret = get_linkinfo(self.group_id, cname)
  769. if ret == -2 or ret == H5L_TYPE_ERROR:
  770. node_type = "NoSuchNode"
  771. elif ret == H5L_TYPE_SOFT:
  772. node_type = "SoftLink"
  773. elif ret == H5L_TYPE_EXTERNAL:
  774. node_type = "ExternalLink"
  775. elif ret == H5L_TYPE_HARD:
  776. ret = get_objinfo(self.group_id, cname)
  777. if ret == -2:
  778. node_type = "NoSuchNode"
  779. elif ret == H5O_TYPE_UNKNOWN:
  780. node_type = "Unknown"
  781. elif ret == H5O_TYPE_GROUP:
  782. node_type = "Group"
  783. elif ret == H5O_TYPE_DATASET:
  784. node_type = "Leaf"
  785. elif ret == H5O_TYPE_NAMED_DATATYPE:
  786. node_type = "NamedType" # Not supported yet
  787. else:
  788. node_type = "Unknown"
  789. return node_type
  790. def _g_list_group(self, parent):
  791. """Return a tuple with the groups and the leaves hanging from self."""
  792. cdef bytes encoded_name
  793. encoded_name = self.name.encode('utf-8')
  794. return Giterate(parent._v_objectid, self._v_objectid, encoded_name)
  795. _g_listGroup = previous_api(_g_list_group)
  796. def _g_get_gchild_attr(self, group_name, attr_name):
  797. """Return an attribute of a child `Group`.
  798. If the attribute does not exist, ``None`` is returned.
  799. """
  800. cdef hid_t gchild_id
  801. cdef object retvalue
  802. cdef bytes encoded_group_name
  803. cdef bytes encoded_attr_name
  804. encoded_group_name = group_name.encode('utf-8')
  805. encoded_attr_name = attr_name.encode('utf-8')
  806. # Open the group
  807. retvalue = None # Default value
  808. gchild_id = H5Gopen(self.group_id, encoded_group_name, H5P_DEFAULT)
  809. if gchild_id < 0:
  810. raise HDF5ExtError("Non-existing node ``%s`` under ``%s``" %
  811. (group_name, self._v_pathname))
  812. retvalue = get_attribute_string_or_none(gchild_id, encoded_attr_name)
  813. # Close child group
  814. H5Gclose(gchild_id)
  815. return retvalue
  816. _g_getGChildAttr = previous_api(_g_get_gchild_attr)
  817. def _g_get_lchild_attr(self, leaf_name, attr_name):
  818. """Return an attribute of a child `Leaf`.
  819. If the attribute does not exist, ``None`` is returned.
  820. """
  821. cdef hid_t leaf_id
  822. cdef object retvalue
  823. cdef bytes encoded_leaf_name
  824. cdef bytes encoded_attr_name
  825. encoded_leaf_name = leaf_name.encode('utf-8')
  826. encoded_attr_name = attr_name.encode('utf-8')
  827. # Open the dataset
  828. leaf_id = H5Dopen(self.group_id, encoded_leaf_name, H5P_DEFAULT)
  829. if leaf_id < 0:
  830. raise HDF5ExtError("Non-existing node ``%s`` under ``%s``" %
  831. (leaf_name, self._v_pathname))
  832. retvalue = get_attribute_string_or_none(leaf_id, encoded_attr_name)
  833. # Close the dataset
  834. H5Dclose(leaf_id)
  835. return retvalue
  836. _g_getLChildAttr = previous_api(_g_get_lchild_attr)
  837. def _g_flush_group(self):
  838. # Close the group
  839. H5Fflush(self.group_id, H5F_SCOPE_GLOBAL)
  840. _g_flushGroup = previous_api(_g_flush_group)
  841. def _g_close_group(self):
  842. cdef int ret
  843. ret = H5Gclose(self.group_id)
  844. if ret < 0:
  845. raise HDF5ExtError("Problems closing the Group %s" % self.name)
  846. self.group_id = 0 # indicate that this group is closed
  847. _g_closeGroup = previous_api(_g_close_group)
  848. def _g_move_node(self, hid_t oldparent, oldname, hid_t newparent, newname,
  849. oldpathname, newpathname):
  850. cdef int ret
  851. cdef bytes encoded_oldname, encoded_newname
  852. encoded_oldname = oldname.encode('utf-8')
  853. encoded_newname = newname.encode('utf-8')
  854. ret = H5Lmove(oldparent, encoded_oldname, newparent, encoded_newname,
  855. H5P_DEFAULT, H5P_DEFAULT)
  856. if ret < 0:
  857. raise HDF5ExtError("Problems moving the node %s to %s" %
  858. (oldpathname, newpathname) )
  859. return ret
  860. _g_moveNode = previous_api(_g_move_node)
  861. cdef class Leaf(Node):
  862. # Instance variables declared in .pxd
  863. def _get_storage_size(self):
  864. return H5Dget_storage_size(self.dataset_id)
  865. def _g_new(self, where, name, init):
  866. if init:
  867. # Put this info to 0 just when the class is initialized
  868. self.dataset_id = -1
  869. self.type_id = -1
  870. self.base_type_id = -1
  871. self.disk_type_id = -1
  872. super(Leaf, self)._g_new(where, name, init)
  873. cdef _get_type_ids(self):
  874. """Get the disk and native HDF5 types associated with this leaf.
  875. It is guaranteed that both disk and native types are not the same
  876. descriptor (so that it is safe to close them separately).
  877. """
  878. cdef hid_t disk_type_id, native_type_id
  879. disk_type_id = H5Dget_type(self.dataset_id)
  880. native_type_id = get_native_type(disk_type_id)
  881. return (disk_type_id, native_type_id)
  882. cdef _convert_time64(self, ndarray nparr, int sense):
  883. """Converts a NumPy of Time64 elements between NumPy and HDF5 formats.
  884. NumPy to HDF5 conversion is performed when 'sense' is 0. Otherwise, HDF5
  885. to NumPy conversion is performed. The conversion is done in place,
  886. i.e. 'nparr' is modified.
  887. """
  888. cdef void *t64buf
  889. cdef long byteoffset, bytestride, nelements
  890. cdef hsize_t nrecords
  891. byteoffset = 0 # NumPy objects doesn't have an offset
  892. if (<object>nparr).shape == ():
  893. # 0-dim array does contain *one* element
  894. nrecords = 1
  895. bytestride = 8
  896. else:
  897. nrecords = len(nparr)
  898. bytestride = nparr.strides[0] # supports multi-dimensional recarray
  899. nelements = <size_t>nparr.size / nrecords
  900. t64buf = nparr.data
  901. conv_float64_timeval32(
  902. t64buf, byteoffset, bytestride, nrecords, nelements, sense)
  903. # can't do since cdef'd
  904. #_convertTime64 = previous_api(_convert_time64)
  905. def _g_truncate(self, hsize_t size):
  906. """Truncate a Leaf to `size` nrows."""
  907. cdef hsize_t ret
  908. ret = truncate_dset(self.dataset_id, self.maindim, size)
  909. if ret < 0:
  910. raise HDF5ExtError("Problems truncating the leaf: %s" % self)
  911. classname = self.__class__.__name__
  912. if classname in ('EArray', 'CArray'):
  913. # Update the new dimensionality
  914. self.dims[self.maindim] = size
  915. # Update the shape
  916. shape = list(self.shape)
  917. shape[self.maindim] = SizeType(size)
  918. self.shape = tuple(shape)
  919. elif classname in ('Table', 'VLArray'):
  920. self.nrows = size
  921. else:
  922. raise ValueError("Unexpected classname: %s" % classname)
  923. def _g_flush(self):
  924. # Flush the dataset (in fact, the entire buffers in file!)
  925. if self.dataset_id >= 0:
  926. H5Fflush(self.dataset_id, H5F_SCOPE_GLOBAL)
  927. def _g_close(self):
  928. # Close dataset in HDF5 space
  929. # Release resources
  930. if self.type_id >= 0:
  931. H5Tclose(self.type_id)
  932. if self.disk_type_id >= 0:
  933. H5Tclose(self.disk_type_id)
  934. if self.base_type_id >= 0:
  935. H5Tclose(self.base_type_id)
  936. if self.dataset_id >= 0:
  937. H5Dclose(self.dataset_id)
  938. cdef class Array(Leaf):
  939. # Instance variables declared in .pxd
  940. def _create_array(self, ndarray nparr, object title, object atom):
  941. cdef int i
  942. cdef herr_t ret
  943. cdef void *rbuf
  944. cdef bytes complib, version, class_
  945. cdef object dtype_, atom_, shape
  946. cdef ndarray dims
  947. cdef bytes encoded_title, encoded_name
  948. cdef H5T_cset_t cset = H5T_CSET_ASCII
  949. encoded_title = title.encode('utf-8')
  950. encoded_name = self.name.encode('utf-8')
  951. # Get the HDF5 type associated with this numpy type
  952. shape = (<object>nparr).shape
  953. if atom is None or atom.shape == ():
  954. dtype_ = nparr.dtype.base
  955. atom_ = Atom.from_dtype(dtype_)
  956. else:
  957. atom_ = atom
  958. shape = shape[:-len(atom_.shape)]
  959. self.disk_type_id = atom_to_hdf5_type(atom_, self.byteorder)
  960. # Allocate space for the dimension axis info and fill it
  961. dims = numpy.array(shape, dtype=numpy.intp)
  962. self.rank = len(shape)
  963. self.dims = npy_malloc_dims(self.rank, <npy_intp *>(dims.data))
  964. # Get the pointer to the buffer data area
  965. strides = (<object>nparr).strides
  966. # When the object is not a 0-d ndarray and its strides == 0, that
  967. # means that the array does not contain actual data
  968. if strides != () and sum(strides) == 0:
  969. rbuf = NULL
  970. else:
  971. rbuf = nparr.data
  972. # Save the array
  973. complib = (self.filters.complib or '').encode('utf-8')
  974. version = self._v_version.encode('utf-8')
  975. class_ = self._c_classid.encode('utf-8')
  976. self.dataset_id = H5ARRAYmake(self.parent_id, encoded_name, version,
  977. self.rank, self.dims,
  978. self.extdim, self.disk_type_id, NULL, NULL,
  979. self.filters.complevel, complib,
  980. self.filters.shuffle,
  981. self.filters.fletcher32,
  982. rbuf)
  983. if self.dataset_id < 0:
  984. raise HDF5ExtError("Problems creating the %s." % self.__class__.__name__)
  985. if self._v_file.params['PYTABLES_SYS_ATTRS']:
  986. if PY_MAJOR_VERSION > 2:
  987. cset = H5T_CSET_UTF8
  988. # Set the conforming array attributes
  989. H5ATTRset_attribute_string(self.dataset_id, "CLASS", class_,
  990. len(class_), cset)
  991. H5ATTRset_attribute_string(self.dataset_id, "VERSION", version,
  992. len(version), cset)
  993. H5ATTRset_attribute_string(self.dataset_id, "TITLE", encoded_title,
  994. len(encoded_title), cset)
  995. # Get the native type (so that it is HDF5 who is the responsible to deal
  996. # with non-native byteorders on-disk)
  997. self.type_id = get_native_type(self.disk_type_id)
  998. return (self.dataset_id, shape, atom_)
  999. _createArray = previous_api(_create_array)
  1000. def _create_carray(self, object title):
  1001. cdef int i
  1002. cdef herr_t ret
  1003. cdef void *rbuf
  1004. cdef bytes complib, version, class_
  1005. cdef ndarray dflts
  1006. cdef void *fill_data
  1007. cdef ndarray extdim
  1008. cdef object atom
  1009. cdef bytes encoded_title, encoded_name
  1010. encoded_title = title.encode('utf-8')
  1011. encoded_name = self.name.encode('utf-8')
  1012. atom = self.atom
  1013. self.disk_type_id = atom_to_hdf5_type(atom, self.byteorder)
  1014. self.rank = len(self.shape)
  1015. self.dims = malloc_dims(self.shape)
  1016. if self.chunkshape:
  1017. self.dims_chunk = malloc_dims(self.chunkshape)
  1018. rbuf = NULL # The data pointer. We don't have data to save initially
  1019. # Encode strings
  1020. complib = (self.filters.complib or '').encode('utf-8')
  1021. version = self._v_version.encode('utf-8')
  1022. class_ = self._c_classid.encode('utf-8')
  1023. # Get the fill values
  1024. if isinstance(atom.dflt, numpy.ndarray) or atom.dflt:
  1025. dflts = numpy.array(atom.dflt, dtype=atom.dtype)
  1026. fill_data = dflts.data
  1027. else:
  1028. dflts = numpy.zeros((), dtype=atom.dtype)
  1029. fill_data = NULL
  1030. if atom.shape == ():
  1031. # The default is preferred as a scalar value instead of 0-dim array
  1032. atom.dflt = dflts[()]
  1033. else:
  1034. atom.dflt = dflts
  1035. # Create the CArray/EArray
  1036. self.dataset_id = H5ARRAYmake(
  1037. self.parent_id, encoded_name, version, self.rank,
  1038. self.dims, self.extdim, self.disk_type_id, self.dims_chunk,
  1039. fill_data, self.filters.complevel, complib,
  1040. self.filters.shuffle, self.filters.fletcher32, rbuf)
  1041. if self.dataset_id < 0:
  1042. raise HDF5ExtError("Problems creating the %s." % self.__class__.__name__)
  1043. if self._v_file.params['PYTABLES_SYS_ATTRS']:
  1044. # Set the conforming array attributes
  1045. H5ATTRset_attribute_string(self.dataset_id, "CLASS", class_,
  1046. len(class_), H5T_CSET_ASCII)
  1047. H5ATTRset_attribute_string(self.dataset_id, "VERSION", version,
  1048. len(version), H5T_CSET_ASCII)
  1049. H5ATTRset_attribute_string(self.dataset_id, "TITLE", encoded_title,
  1050. len(encoded_title), H5T_CSET_ASCII)
  1051. if self.extdim >= 0:
  1052. extdim = <ndarray>numpy.array([self.extdim], dtype="int32")
  1053. # Attach the EXTDIM attribute in case of enlargeable arrays
  1054. H5ATTRset_attribute(self.dataset_id, "EXTDIM", H5T_NATIVE_INT,
  1055. 0, NULL, extdim.data)
  1056. # Get the native type (so that it is HDF5 who is the responsible to deal
  1057. # with non-native byteorders on-disk)
  1058. self.type_id = get_native_type(self.disk_type_id)
  1059. return self.dataset_id
  1060. _createCArray = previous_api(_create_carray)
  1061. def _open_array(self):
  1062. cdef size_t type_size, type_precision
  1063. cdef H5T_class_t class_id
  1064. cdef char cbyteorder[11] # "irrelevant" fits easily here
  1065. cdef int i
  1066. cdef int extdim
  1067. cdef herr_t ret
  1068. cdef object shape, chunkshapes, atom
  1069. cdef int fill_status
  1070. cdef ndarray dflts
  1071. cdef void *fill_data
  1072. cdef bytes encoded_name
  1073. cdef str byteorder
  1074. encoded_name = self.name.encode('utf-8')
  1075. # Open the dataset
  1076. self.dataset_id = H5Dopen(self.parent_id, encoded_name, H5P_DEFAULT)
  1077. if self.dataset_id < 0:
  1078. raise HDF5ExtError("Non-existing node ``%s`` under ``%s``" %
  1079. (self.name, self._v_parent._v_pathname))
  1080. # Get the datatype handles
  1081. self.disk_type_id, self.type_id = self._get_type_ids()
  1082. # Get the atom for this type
  1083. atom = atom_from_hdf5_type(self.type_id)
  1084. # Get the rank for this array object
  1085. if H5ARRAYget_ndims(self.dataset_id, &self.rank) < 0:
  1086. raise HDF5ExtError("Problems getting ndims!")
  1087. # Allocate space for the dimension axis info
  1088. self.dims = <hsize_t *>malloc(self.rank * sizeof(hsize_t))
  1089. self.maxdims = <hsize_t *>malloc(self.rank * sizeof(hsize_t))
  1090. # Get info on dimensions, class and type (of base class)
  1091. ret = H5ARRAYget_info(self.dataset_id, self.disk_type_id,
  1092. self.dims, self.maxdims,
  1093. &class_id, cbyteorder)
  1094. if ret < 0:
  1095. raise HDF5ExtError("Unable to get array info.")
  1096. byteorder = cstr_to_pystr(cbyteorder)
  1097. # Get the extendable dimension (if any)
  1098. self.extdim = -1 # default is non-extensible Array
  1099. for i from 0 <= i < self.rank:
  1100. if self.maxdims[i] == -1:
  1101. self.extdim = i
  1102. break
  1103. # Get the shape as a python tuple
  1104. shape = getshape(self.rank, self.dims)
  1105. # Allocate space for the dimension chunking info
  1106. self.dims_chunk = <hsize_t *>malloc(self.rank * sizeof(hsize_t))
  1107. if H5ARRAYget_chunkshape(self.dataset_id, self.rank, self.dims_chunk) < 0:
  1108. # The Array class is not chunked!
  1109. chunkshapes = None
  1110. else:
  1111. # Get the chunkshape as a python tuple
  1112. chunkshapes = getshape(self.rank, self.dims_chunk)
  1113. # Get the fill value
  1114. dflts = numpy.zeros((), dtype=atom.dtype)
  1115. fill_data = dflts.data
  1116. H5ARRAYget_fill_value(self.dataset_id, self.type_id,
  1117. &fill_status, fill_data);
  1118. if fill_status == H5D_FILL_VALUE_UNDEFINED:
  1119. # This can only happen with datasets created with other libraries
  1120. # than PyTables.
  1121. dflts = None
  1122. if dflts is not None and atom.shape == ():
  1123. # The default is preferred as a scalar value instead of 0-dim array
  1124. atom.dflt = dflts[()]
  1125. else:
  1126. atom.dflt = dflts
  1127. # Get the byteorder
  1128. self.byteorder = correct_byteorder(atom.type, byteorder)
  1129. return (self.dataset_id, atom, shape, chunkshapes)
  1130. _openArray = previous_api(_open_array)
  1131. def _append(self, ndarray nparr):
  1132. cdef int ret, extdim
  1133. cdef hsize_t *dims_arr
  1134. cdef void *rbuf
  1135. cdef object shape
  1136. # Allocate space for the dimension axis info
  1137. dims_arr = npy_malloc_dims(self.rank, nparr.shape)
  1138. # Get the pointer to the buffer data area
  1139. rbuf = nparr.data
  1140. # Convert some NumPy types to HDF5 before storing.
  1141. if self.atom.type == 'time64':
  1142. self._convert_time64(nparr, 0)
  1143. # Append the records
  1144. extdim = self.extdim
  1145. with nogil:
  1146. ret = H5ARRAYappend_records(self.dataset_id, self.type_id, self.rank,
  1147. self.dims, dims_arr, extdim, rbuf)
  1148. if ret < 0:
  1149. raise HDF5ExtError("Problems appending the elements")
  1150. free(dims_arr)
  1151. # Update the new dimensionality
  1152. shape = list(self.shape)
  1153. shape[self.extdim] = SizeType(self.dims[self.extdim])
  1154. self.shape = tuple(shape)
  1155. def _read_array(self, hsize_t start, hsize_t stop, hsize_t step,
  1156. ndarray nparr):
  1157. cdef herr_t ret
  1158. cdef void *rbuf
  1159. cdef hsize_t nrows
  1160. cdef int extdim
  1161. # Get the pointer to the buffer data area
  1162. rbuf = nparr.data
  1163. # Number of rows to read
  1164. nrows = get_len_of_range(start, stop, step)
  1165. if hasattr(self, "extdim"):
  1166. extdim = self.extdim
  1167. else:
  1168. extdim = -1
  1169. # Do the physical read
  1170. with nogil:
  1171. ret = H5ARRAYread(self.dataset_id, self.type_id, start, nrows, step,
  1172. extdim, rbuf)
  1173. if ret < 0:
  1174. raise HDF5ExtError("Problems reading the array data.")
  1175. if self.atom.kind == 'time':
  1176. # Swap the byteorder by hand (this is not currently supported by HDF5)
  1177. if H5Tget_order(self.type_id) != platform_byteorder:
  1178. nparr.byteswap(True)
  1179. # Convert some HDF5 types to NumPy after reading.
  1180. if self.atom.type == 'time64':
  1181. self._convert_time64(nparr, 1)
  1182. return
  1183. _readArray = previous_api(_read_array)
  1184. def _g_read_slice(self, ndarray startl, ndarray stopl, ndarray stepl,
  1185. ndarray nparr):
  1186. cdef herr_t ret
  1187. cdef hsize_t *start
  1188. cdef hsize_t *stop
  1189. cdef hsize_t *step
  1190. cdef void *rbuf
  1191. # Get the pointer to the buffer data area of startl, stopl and stepl arrays
  1192. start = <hsize_t *>startl.data
  1193. stop = <hsize_t *>stopl.data
  1194. step = <hsize_t *>stepl.data
  1195. # Get the pointer to the buffer data area
  1196. rbuf = nparr.data
  1197. # Do the physical rea