poly.py - This script compares the speed of the computation…

/bench/poly.py

https://github.com/tnorth/PyTables · Python · 192 lines · 112 code · 39 blank · 41 comment · 19 complexity · 128b77115f6a7741633acfb7e1745e33 MD5 · raw file

#######################################################################
# This script compares the speed of the computation of a polynomial
# for different (numpy.memmap and tables.Expr) out-of-memory paradigms.
#
# Author: Francesc Alted
# Date: 2010-02-24
#######################################################################

import os
import sys
from time import time
import numpy as np
import tables as tb
import numexpr as ne

expr = ".25*x**3 + .75*x**2 - 1.5*x - 2"  # the polynomial to compute
N = 10*1000*1000          # the number of points to compute expression (80 MB)
step = 100*1000           # perform calculation in slices of `step` elements
dtype = np.dtype('f8')    # the datatype
#CHUNKSHAPE = (2**17,)
CHUNKSHAPE = None

# Global variable for the x values for pure numpy & numexpr
x = None

# *** The next variables do not need to be changed ***

# Filenames for numpy.memmap
fprefix = "numpy.memmap"             # the I/O file prefix
mpfnames = [fprefix+"-x.bin", fprefix+"-r.bin"]

# Filename for tables.Expr
h5fname = "tablesExpr.h5"     # the I/O file

MB = 1024*1024.               # a MegaByte


def print_filesize(filename, clib=None, clevel=0):
    """Print some statistics about file sizes."""

    #os.system("sync")    # make sure that all data has been flushed to disk
    if type(filename) is list:
        filesize_bytes = 0
        for fname in filename:
            filesize_bytes += os.stat(fname)[6]
    else:
        filesize_bytes = os.stat(filename)[6]
    filesize_MB  = round(filesize_bytes / MB, 1)
    print "\t\tTotal file sizes: %d -- (%s MB)" % (filesize_bytes, filesize_MB),
    if clevel > 0:
        print "(using %s lvl%s)" % (clib, clevel)
    else:
        print


def populate_x_numpy():
    """Populate the values in x axis for numpy."""
    global x
    # Populate x in range [-1, 1]
    x = np.linspace(-1, 1, N)


def populate_x_memmap():
    """Populate the values in x axis for numpy.memmap."""
    # Create container for input
    x = np.memmap(mpfnames[0], dtype=dtype, mode="w+", shape=(N,))

    # Populate x in range [-1, 1]
    for i in xrange(0, N, step):
        chunk = np.linspace((2*i-N)/float(N), (2*(i+step)-N)/float(N), step)
        x[i:i+step] = chunk
    del x        # close x memmap


def populate_x_tables(clib, clevel):
    """Populate the values in x axis for pytables."""
    f = tb.openFile(h5fname, "w")

    # Create container for input
    atom = tb.Atom.from_dtype(dtype)
    filters = tb.Filters(complib=clib, complevel=clevel)
    x = f.createCArray(f.root, "x", atom=atom, shape=(N,),
                       filters=filters,
                       chunkshape=CHUNKSHAPE,
                       )

    # Populate x in range [-1, 1]
    for i in xrange(0, N, step):
        chunk = np.linspace((2*i-N)/float(N), (2*(i+step)-N)/float(N), step)
        x[i:i+step] = chunk
    f.close()


def compute_numpy():
    """Compute the polynomial with pure numpy."""
    y = eval(expr)


def compute_numexpr():
    """Compute the polynomial with pure numexpr."""
    y = ne.evaluate(expr)


def compute_memmap():
    """Compute the polynomial with numpy.memmap."""
    # Reopen inputs in read-only mode
    x = np.memmap(mpfnames[0], dtype=dtype, mode='r', shape=(N,))
    # Create the array output
    r = np.memmap(mpfnames[1], dtype=dtype, mode="w+", shape=(N,))

    # Do the computation by chunks and store in output
    r[:] = eval(expr)          # where is stored the result?
    #r = eval(expr)            # result is stored in-memory

    del x, r                   # close x and r memmap arrays
    print_filesize(mpfnames)


def compute_tables(clib, clevel):
    """Compute the polynomial with tables.Expr."""
    f = tb.openFile(h5fname, "a")
    x = f.root.x               # get the x input
    # Create container for output
    atom = tb.Atom.from_dtype(dtype)
    filters = tb.Filters(complib=clib, complevel=clevel)
    r = f.createCArray(f.root, "r", atom=atom, shape=(N,),
                       filters=filters,
                       chunkshape=CHUNKSHAPE,
                       )

    # Do the actual computation and store in output
    ex = tb.Expr(expr)         # parse the expression
    ex.setOutput(r)            # where is stored the result?
                               # when commented out, the result goes in-memory
    ex.eval()                  # evaluate!

    f.close()
    print_filesize(h5fname, clib, clevel)


if __name__ == '__main__':

    tb.print_versions()

    print "Total size for datasets:", round(2*N*dtype.itemsize/MB, 1), "MB"

    # Get the compression libraries supported
    #supported_clibs = [clib for clib in ("zlib", "lzo", "bzip2", "blosc")
    #supported_clibs = [clib for clib in ("zlib", "lzo", "blosc")
    supported_clibs = [clib for clib in ("blosc",)
                       if tb.whichLibVersion(clib)]

    # Initialization code
    #for what in ["numpy", "numpy.memmap", "numexpr"]:
    for what in ["numpy", "numexpr"]:
        #break
        print "Populating x using %s with %d points..." % (what, N)
        t0 = time()
        if what == "numpy":
            populate_x_numpy()
            compute = compute_numpy
        elif what == "numexpr":
            populate_x_numpy()
            compute = compute_numexpr
        elif what == "numpy.memmap":
            populate_x_memmap()
            compute = compute_memmap
        print "*** Time elapsed populating:", round(time() - t0, 3)
        print "Computing: '%s' using %s" % (expr, what)
        t0 = time()
        compute()
        print "**************** Time elapsed computing:", round(time() - t0, 3)

    for what in ["tables.Expr"]:
        t0 = time()
        first = True    # Sentinel
        for clib in supported_clibs:
            #for clevel in (0, 1, 3, 6, 9):
            for clevel in range(10):
            #for clevel in (1,):
                if not first and clevel == 0:
                    continue
                print "Populating x using %s with %d points..." % (what, N)
                populate_x_tables(clib, clevel)
                print "*** Time elapsed populating:", round(time() - t0, 3)
                print "Computing: '%s' using %s" % (expr, what)
                t0 = time()
                compute_tables(clib, clevel)
                print "**************** Time elapsed computing:", \
                      round(time() - t0, 3)
                first = False
Tech Fingerprint

Alerts (8)

'type(' Use isinstance() for type checking instead of type()
42
'global' Avoid global variables; use function parameters or class attributes for better scope management
58
'del' Avoid unless necessary; Python's garbage collector typically handles object deletion
72 115
'eval(' Avoid due to security risks; use ast.literal_eval for safer evaluation of literals
96 112 113 135