/bench/poly.py

https://github.com/tnorth/PyTables
Python | 192 lines | 112 code | 39 blank | 41 comment | 19 complexity | 128b77115f6a7741633acfb7e1745e33 MD5 | raw file
  1. #######################################################################
  2. # This script compares the speed of the computation of a polynomial
  3. # for different (numpy.memmap and tables.Expr) out-of-memory paradigms.
  4. #
  5. # Author: Francesc Alted
  6. # Date: 2010-02-24
  7. #######################################################################
  8. import os
  9. import sys
  10. from time import time
  11. import numpy as np
  12. import tables as tb
  13. import numexpr as ne
  14. expr = ".25*x**3 + .75*x**2 - 1.5*x - 2" # the polynomial to compute
  15. N = 10*1000*1000 # the number of points to compute expression (80 MB)
  16. step = 100*1000 # perform calculation in slices of `step` elements
  17. dtype = np.dtype('f8') # the datatype
  18. #CHUNKSHAPE = (2**17,)
  19. CHUNKSHAPE = None
  20. # Global variable for the x values for pure numpy & numexpr
  21. x = None
  22. # *** The next variables do not need to be changed ***
  23. # Filenames for numpy.memmap
  24. fprefix = "numpy.memmap" # the I/O file prefix
  25. mpfnames = [fprefix+"-x.bin", fprefix+"-r.bin"]
  26. # Filename for tables.Expr
  27. h5fname = "tablesExpr.h5" # the I/O file
  28. MB = 1024*1024. # a MegaByte
  29. def print_filesize(filename, clib=None, clevel=0):
  30. """Print some statistics about file sizes."""
  31. #os.system("sync") # make sure that all data has been flushed to disk
  32. if type(filename) is list:
  33. filesize_bytes = 0
  34. for fname in filename:
  35. filesize_bytes += os.stat(fname)[6]
  36. else:
  37. filesize_bytes = os.stat(filename)[6]
  38. filesize_MB = round(filesize_bytes / MB, 1)
  39. print "\t\tTotal file sizes: %d -- (%s MB)" % (filesize_bytes, filesize_MB),
  40. if clevel > 0:
  41. print "(using %s lvl%s)" % (clib, clevel)
  42. else:
  43. print
  44. def populate_x_numpy():
  45. """Populate the values in x axis for numpy."""
  46. global x
  47. # Populate x in range [-1, 1]
  48. x = np.linspace(-1, 1, N)
  49. def populate_x_memmap():
  50. """Populate the values in x axis for numpy.memmap."""
  51. # Create container for input
  52. x = np.memmap(mpfnames[0], dtype=dtype, mode="w+", shape=(N,))
  53. # Populate x in range [-1, 1]
  54. for i in xrange(0, N, step):
  55. chunk = np.linspace((2*i-N)/float(N), (2*(i+step)-N)/float(N), step)
  56. x[i:i+step] = chunk
  57. del x # close x memmap
  58. def populate_x_tables(clib, clevel):
  59. """Populate the values in x axis for pytables."""
  60. f = tb.openFile(h5fname, "w")
  61. # Create container for input
  62. atom = tb.Atom.from_dtype(dtype)
  63. filters = tb.Filters(complib=clib, complevel=clevel)
  64. x = f.createCArray(f.root, "x", atom=atom, shape=(N,),
  65. filters=filters,
  66. chunkshape=CHUNKSHAPE,
  67. )
  68. # Populate x in range [-1, 1]
  69. for i in xrange(0, N, step):
  70. chunk = np.linspace((2*i-N)/float(N), (2*(i+step)-N)/float(N), step)
  71. x[i:i+step] = chunk
  72. f.close()
  73. def compute_numpy():
  74. """Compute the polynomial with pure numpy."""
  75. y = eval(expr)
  76. def compute_numexpr():
  77. """Compute the polynomial with pure numexpr."""
  78. y = ne.evaluate(expr)
  79. def compute_memmap():
  80. """Compute the polynomial with numpy.memmap."""
  81. # Reopen inputs in read-only mode
  82. x = np.memmap(mpfnames[0], dtype=dtype, mode='r', shape=(N,))
  83. # Create the array output
  84. r = np.memmap(mpfnames[1], dtype=dtype, mode="w+", shape=(N,))
  85. # Do the computation by chunks and store in output
  86. r[:] = eval(expr) # where is stored the result?
  87. #r = eval(expr) # result is stored in-memory
  88. del x, r # close x and r memmap arrays
  89. print_filesize(mpfnames)
  90. def compute_tables(clib, clevel):
  91. """Compute the polynomial with tables.Expr."""
  92. f = tb.openFile(h5fname, "a")
  93. x = f.root.x # get the x input
  94. # Create container for output
  95. atom = tb.Atom.from_dtype(dtype)
  96. filters = tb.Filters(complib=clib, complevel=clevel)
  97. r = f.createCArray(f.root, "r", atom=atom, shape=(N,),
  98. filters=filters,
  99. chunkshape=CHUNKSHAPE,
  100. )
  101. # Do the actual computation and store in output
  102. ex = tb.Expr(expr) # parse the expression
  103. ex.setOutput(r) # where is stored the result?
  104. # when commented out, the result goes in-memory
  105. ex.eval() # evaluate!
  106. f.close()
  107. print_filesize(h5fname, clib, clevel)
  108. if __name__ == '__main__':
  109. tb.print_versions()
  110. print "Total size for datasets:", round(2*N*dtype.itemsize/MB, 1), "MB"
  111. # Get the compression libraries supported
  112. #supported_clibs = [clib for clib in ("zlib", "lzo", "bzip2", "blosc")
  113. #supported_clibs = [clib for clib in ("zlib", "lzo", "blosc")
  114. supported_clibs = [clib for clib in ("blosc",)
  115. if tb.whichLibVersion(clib)]
  116. # Initialization code
  117. #for what in ["numpy", "numpy.memmap", "numexpr"]:
  118. for what in ["numpy", "numexpr"]:
  119. #break
  120. print "Populating x using %s with %d points..." % (what, N)
  121. t0 = time()
  122. if what == "numpy":
  123. populate_x_numpy()
  124. compute = compute_numpy
  125. elif what == "numexpr":
  126. populate_x_numpy()
  127. compute = compute_numexpr
  128. elif what == "numpy.memmap":
  129. populate_x_memmap()
  130. compute = compute_memmap
  131. print "*** Time elapsed populating:", round(time() - t0, 3)
  132. print "Computing: '%s' using %s" % (expr, what)
  133. t0 = time()
  134. compute()
  135. print "**************** Time elapsed computing:", round(time() - t0, 3)
  136. for what in ["tables.Expr"]:
  137. t0 = time()
  138. first = True # Sentinel
  139. for clib in supported_clibs:
  140. #for clevel in (0, 1, 3, 6, 9):
  141. for clevel in range(10):
  142. #for clevel in (1,):
  143. if not first and clevel == 0:
  144. continue
  145. print "Populating x using %s with %d points..." % (what, N)
  146. populate_x_tables(clib, clevel)
  147. print "*** Time elapsed populating:", round(time() - t0, 3)
  148. print "Computing: '%s' using %s" % (expr, what)
  149. t0 = time()
  150. compute_tables(clib, clevel)
  151. print "**************** Time elapsed computing:", \
  152. round(time() - t0, 3)
  153. first = False