table bench.py :  » Database » PyTables » tables-2.1.2 » bench » Python Open Source

Home
Python Open Source
1.3.1.2 Python
2.Ajax
3.Aspect Oriented
4.Blog
5.Build
6.Business Application
7.Chart Report
8.Content Management Systems
9.Cryptographic
10.Database
11.Development
12.Editor
13.Email
14.ERP
15.Game 2D 3D
16.GIS
17.GUI
18.IDE
19.Installer
20.IRC
21.Issue Tracker
22.Language Interface
23.Log
24.Math
25.Media Sound Audio
26.Mobile
27.Network
28.Parser
29.PDF
30.Project Management
31.RSS
32.Search
33.Security
34.Template Engines
35.Test
36.UML
37.USB Serial
38.Web Frameworks
39.Web Server
40.Web Services
41.Web Unit
42.Wiki
43.Windows
44.XML
Python Open Source » Database » PyTables 
PyTables » tables 2.1.2 » bench » table-bench.py
#!/usr/bin/env python

import numpy as NP
from tables import *
import random

# This class is accessible only for the examples
class Small(IsDescription):
    var1 = StringCol(itemsize=4 ,pos=2)
    var2 = Int32Col(pos=1)
    var3 = Float64Col(pos=0)

# Define a user record to characterize some kind of particles
class Medium(IsDescription):
    name        = StringCol(itemsize=16, pos=0)  # 16-character String
    float1      = Float64Col(shape=2, dflt=NP.arange(2),pos=1)
    #float1      = Float64Col(dflt=2.3)
    #float2      = Float64Col(dflt=2.3)
    #zADCcount    = Int16Col()    # signed short integer
    ADCcount    = Int32Col(pos=6)    # signed short integer
    grid_i      = Int32Col(pos=7)    # integer
    grid_j      = Int32Col(pos=8)    # integer
    pressure    = Float32Col(pos=9)    # float  (single-precision)
    energy      = Float64Col(pos=2)    # double (double-precision)

# Define a user record to characterize some kind of particles
class Big(IsDescription):
    name        = StringCol(itemsize=16)  # 16-character String
    float1      = Float64Col(shape=32, dflt=NP.arange(32))
    float2      = Float64Col(shape=32, dflt=2.2)
    TDCcount    = Int8Col()    # signed short integer
    #ADCcount    = Int32Col()
    #ADCcount    = Int16Col()    # signed short integer
    grid_i      = Int32Col()    # integer
    grid_j      = Int32Col()    # integer
    pressure    = Float32Col()    # float  (single-precision)
    energy      = Float64Col()    # double (double-precision)

def createFile(filename, totalrows, filters, recsize):

    # Open a file in "w"rite mode
    fileh = openFile(filename, mode = "w", title="Table Benchmark",
                     filters=filters)

    # Table title
    title = "This is the table title"

    # Create a Table instance
    group = fileh.root
    rowswritten = 0
    for j in range(3):
        # Create a table
        if recsize == "big":
            table = fileh.createTable(group, 'tuple'+str(j), Big, title,
                                      None,
                                      totalrows)
            arr = NP.array(NP.arange(32), dtype=NP.float64)
            arr2 = NP.array(NP.arange(32), dtype=NP.float64)
        elif recsize == "medium":
            table = fileh.createTable(group, 'tuple'+str(j), Medium, title,
                                      None,
                                      totalrows)
            arr = NP.array(NP.arange(2), dtype=NP.float64)
        elif recsize == "small":
            table = fileh.createTable(group, 'tuple'+str(j), Small, title,
                                      None,
                                      totalrows)
        else:
            raise RuntimeError, "This should never happen"

        table.attrs.test = 2
        rowsize = table.rowsize
        # Get the row object associated with the new table
        d = table.row
        # Fill the table
        if recsize == "big":
            for i in xrange(totalrows):
                # d['name']  = 'Part: %6d' % (i)
                d['TDCcount'] = i % 256
                #d['float1'] = NP.array([i]*32, NP.float64)
                #d['float2'] = NP.array([i**2]*32, NP.float64)
                #d['float1'][0] = float(i)
                #d['float2'][0] = float(i*2)
                # Common part with medium
                d['grid_i'] = i
                d['grid_j'] = 10 - i
                d['pressure'] = float(i*i)
                # d['energy'] = float(d['pressure'] ** 4)
                d['energy'] = d['pressure']
                # d['idnumber'] = i * (2 ** 34)
                d.append()
        elif recsize == "medium":
            for i in xrange(totalrows):
                #d['name']  = 'Part: %6d' % (i)
                #d['float1'] = NP.array([i]*2, NP.float64)
                #d['float1'] = arr
                #d['float1'] = i
                #d['float2'] = float(i)
                # Common part with big:
                d['grid_i'] = i
                d['grid_j'] = 10 - i
                d['pressure'] = i*2
                # d['energy'] = float(d['pressure'] ** 4)
                d['energy'] = d['pressure']
                d.append()
        else: # Small record
            for i in xrange(totalrows):
                #d['var1'] = str(random.randrange(1000000))
                #d['var3'] = random.randrange(10000000)
                d['var1'] = str(i)
                #d['var2'] = random.randrange(totalrows)
                d['var2'] = i
                #d['var3'] = 12.1e10
                d['var3'] = totalrows-i
                d.append()  # This is a 10% faster than table.append()
        rowswritten += totalrows

        if recsize == "small":
            # Testing with indexing
            pass
#            table._createIndex("var3", Filters(1,"zlib",shuffle=1))

        #table.flush()
        group._v_attrs.test2 = "just a test"
        # Create a new group
        group2 = fileh.createGroup(group, 'group'+str(j))
        # Iterate over this new group (group2)
        group = group2
        table.flush()

    # Close the file (eventually destroy the extended type)
    fileh.close()
    return (rowswritten, rowsize)

def readFile(filename, recsize, verbose):
    # Open the HDF5 file in read-only mode

    fileh = openFile(filename, mode = "r")
    rowsread = 0
    for groupobj in fileh.walkGroups(fileh.root):
        #print "Group pathname:", groupobj._v_pathname
        row = 0
        for table in fileh.listNodes(groupobj, 'Table'):
            rowsize = table.rowsize
            print "reading", table
            if verbose:
                print "Max rows in buf:", table.nrowsinbuf
                print "Rows in", table._v_pathname, ":", table.nrows
                print "Buffersize:", table.rowsize * table.nrowsinbuf
                print "MaxTuples:", table.nrowsinbuf

            if recsize == "big" or recsize == "medium":
                #e = [ p.float1 for p in table.iterrows()
                #      if p.grid_i < 2 ]
                #e = [ str(p) for p in table.iterrows() ]
                #      if p.grid_i < 2 ]
#                 e = [ p['grid_i'] for p in table.iterrows()
#                       if p['grid_j'] == 20 and p['grid_i'] < 20 ]
#                 e = [ p['grid_i'] for p in table
#                       if p['grid_i'] <= 2 ]
#                e = [ p['grid_i'] for p in table.where("grid_i<=20")]
                e = [ p['grid_i'] for p in
                      table.where('grid_i <= 20')]
#                 e = [ p['grid_i'] for p in table.iterrows()
#                       if p.nrow() == 20 ]
#                 e = [ table.delrow(p.nrow()) for p in table.iterrows()
#                       if p.nrow() == 20 ]
                # The version with a for loop is only 1% better than
                # comprenhension list
                #e = []
                #for p in table.iterrows():
                #    if p.grid_i < 20:
                #        e.append(p.grid_j)
            else:  # small record case
#                 e = [ p['var3'] for p in table.iterrows()
#                       if p['var2'] < 20 and p['var3'] < 20 ]
#                e = [ p['var3'] for p in table.where("var3 <= 20")
#                      if p['var2'] < 20 ]
#               e = [ p['var3'] for p in table.where("var3 <= 20")]
# Cuts 1) and 2) issues the same results but 2) is about 10 times faster
# ######## Cut 1)
#                e = [ p.nrow() for p in
#                      table.where(table.cols.var2 > 5)
#                      if p["var2"] < 10]
# ######## Cut 2)
#                 e = [ p.nrow() for p in
#                       table.where(table.cols.var2 < 10)
#                       if p["var2"] > 5]
#                e = [ (p._nrow,p["var3"]) for p in
#                e = [ p["var3"] for p in
#                      table.where(table.cols.var3 < 10)]
#                      table.where(table.cols.var3 < 10)]
#                      table if p["var3"] <= 10]
#               e = [ p['var3'] for p in table.where("var3 <= 20")]
#                e = [ p['var3'] for p in
#                      table.where(table.cols.var1 == "10")]  # More
                     # than ten times faster than the next one
#                e = [ p['var3'] for p in table
#                      if p['var1'] == "10"]
                e = [ p['var3'] for p in table.where('var2 <= 20')]
                # e = [ p[0] for p in table.where('var2 <= 20')]
                #e = [ p['var3'] for p in table if p['var2'] <= 20 ]
                # e = [ p[:] for p in table if p[1] <= 20 ]
#                  e = [ p['var3'] for p in table._whereInRange(table.cols.var2 <=20)]
                #e = [ p['var3'] for p in table.iterrows(0,21) ]
#                  e = [ p['var3'] for p in table.iterrows()
#                       if p.nrow() <= 20 ]
                #e = [ p['var3'] for p in table.iterrows(1,0,1000)]
                #e = [ p['var3'] for p in table.iterrows(1,100)]
                #e = [ p['var3'] for p in table.iterrows(step=2)
                #      if p.nrow() < 20 ]
                #e = [ p['var2'] for p in table.iterrows()
                #      if p['var2'] < 20 ]
                #for p in table.iterrows():
                #      pass
            if verbose:
                #print "Last record read:", p
                print "resulting selection list ==>", e

            rowsread += table.nrows
            row += 1
            if verbose:
                print "Total selected records ==> ", len(e)

    # Close the file (eventually destroy the extended type)
    fileh.close()

    return (rowsread, rowsize)

def readField(filename, field, rng, verbose):
    fileh = openFile(filename, mode = "r")
    rowsread = 0
    if rng is None:
        rng = [0,-1,1]
    if field == "all":
        field = None
    for groupobj in fileh.walkGroups(fileh.root):
        row = 0
        for table in fileh.listNodes(groupobj, 'Table'):
            rowsize = table.rowsize
            #table.nrowsinbuf = 3 # For testing purposes
            if verbose:
                print "Max rows in buf:", table.nrowsinbuf
                print "Rows in", table._v_pathname, ":", table.nrows
                print "Buffersize:", table.rowsize * table.nrowsinbuf
                print "MaxTuples:", table.nrowsinbuf
                print "(field, start, stop, step) ==>", (field, rng[0], rng[1], rng[2])

            e = table.read(rng[0], rng[1], rng[2], field)

            rowsread += table.nrows
            if verbose:
                print "Selected rows ==> ", e
                print "Total selected rows ==> ", len(e)

    # Close the file (eventually destroy the extended type)
    fileh.close()
    return (rowsread, rowsize)

if __name__=="__main__":
    import sys
    import getopt
    import getopt, pstats
    import profile as prof

    try:
        import psyco
        psyco_imported = 1
    except:
        psyco_imported = 0

    import time

    usage = """usage: %s [-v] [-p] [-P] [-R range] [-r] [-w] [-s recsize] [-f field] [-c level] [-l complib] [-i iterations] [-S] [-F] file
            -v verbose
            -p use "psyco" if available
            -P do profile
            -R select a range in a field in the form "start,stop,step"
            -r only read test
            -w only write test
            -s use [big] record, [medium] or [small]
            -f only read stated field name in tables ("all" means all fields)
            -c sets a compression level (do not set it or 0 for no compression)
            -S activate shuffling filter
            -F activate fletcher32 filter
            -l sets the compression library to be used ("zlib", "lzo", "ucl", "bzip2")
            -i sets the number of rows in each table\n""" % sys.argv[0]

    try:
        opts, pargs = getopt.getopt(sys.argv[1:], 'vpPSFR:rwf:s:c:l:i:')
    except:
        sys.stderr.write(usage)
        sys.exit(0)

    # if we pass too much parameters, abort
    if len(pargs) <> 1:
        sys.stderr.write(usage)
        sys.exit(0)

    # default options
    verbose = 0
    profile = 0
    rng = None
    recsize = "medium"
    fieldName = None
    testread = 1
    testwrite = 1
    usepsyco = 0
    complevel = 0
    shuffle = 0
    fletcher32 = 0
    complib = "zlib"
    iterations = 100

    # Get the options
    for option in opts:
        if option[0] == '-v':
            verbose = 1
        if option[0] == '-p':
            usepsyco = 1
        if option[0] == '-P':
            profile = 1
        if option[0] == '-S':
            shuffle = 1
        if option[0] == '-F':
            fletcher32 = 1
        elif option[0] == '-R':
            rng = [int(i) for i in option[1].split(",")]
        elif option[0] == '-r':
            testwrite = 0
        elif option[0] == '-w':
            testread = 0
        elif option[0] == '-f':
            fieldName = option[1]
        elif option[0] == '-s':
            recsize = option[1]
            if recsize not in ["big", "medium", "small"]:
                sys.stderr.write(usage)
                sys.exit(0)
        elif option[0] == '-c':
            complevel = int(option[1])
        elif option[0] == '-l':
            complib = option[1]
        elif option[0] == '-i':
            iterations = int(option[1])

    # Build the Filters instance
    filters = Filters(complevel=complevel, complib=complib,
                      shuffle=shuffle, fletcher32=fletcher32)

    # Catch the hdf5 file passed as the last argument
    file = pargs[0]

    if verbose:
        print "numpy version:", NP.__version__
        if psyco_imported and usepsyco:
            print "Using psyco version:", psyco.version_info

    if testwrite:
        print "Compression level:", complevel
        if complevel > 0:
            print "Compression library:", complib
            if shuffle:
                print "Suffling..."
        t1 = time.time()
        cpu1 = time.clock()
        if psyco_imported and usepsyco:
            psyco.bind(createFile)
        if profile:
            prof.run('(rowsw, rowsz) = createFile(file, iterations, filters, recsize)', 'table-bench.prof')
            stats = pstats.Stats('table-bench.prof')
            stats.strip_dirs()
            stats.sort_stats('time', 'calls')
            stats.print_stats(20)
        else:
            (rowsw, rowsz) = createFile(file, iterations, filters, recsize)
        t2 = time.time()
        cpu2 = time.clock()
        tapprows = round(t2-t1, 3)
        cpuapprows = round(cpu2-cpu1, 3)
        tpercent = int(round(cpuapprows/tapprows, 2)*100)
        print "Rows written:", rowsw, " Row size:", rowsz
        print "Time writing rows: %s s (real) %s s (cpu)  %s%%" % \
              (tapprows, cpuapprows, tpercent)
        print "Write rows/sec: ", int(rowsw / float(tapprows))
        print "Write KB/s :", int(rowsw * rowsz / (tapprows * 1024))

    if testread:
        t1 = time.time()
        cpu1 = time.clock()
        if psyco_imported and usepsyco:
            psyco.bind(readFile)
            #psyco.bind(readField)
            pass
        if rng or fieldName:
            (rowsr, rowsz) = readField(file, fieldName, rng, verbose)
            pass
        else:
            for i in range(1):
                (rowsr, rowsz) = readFile(file, recsize, verbose)
        t2 = time.time()
        cpu2 = time.clock()
        treadrows = round(t2-t1, 3)
        cpureadrows = round(cpu2-cpu1, 3)
        tpercent = int(round(cpureadrows/treadrows, 2)*100)
        print "Rows read:", rowsr, " Row size:", rowsz
        print "Time reading rows: %s s (real) %s s (cpu)  %s%%" % \
              (treadrows, cpureadrows, tpercent)
        print "Read rows/sec: ", int(rowsr / float(treadrows))
        print "Read KB/s :", int(rowsr * rowsz / (treadrows * 1024))
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.