set_dataset_sizes.py - This is a Python script that initial…

/scripts/set_dataset_sizes.py

https://bitbucket.org/cistrome/cistrome-harvard/ · Python · 53 lines · 41 code · 11 blank · 1 comment · 5 complexity · 062bd0d359d4ec8c0e327375cf72d47a MD5 · raw file


#!/usr/bin/env python

import os, sys
from ConfigParser import ConfigParser
from optparse import OptionParser

default_config = os.path.abspath( os.path.join( os.path.dirname( __file__ ), '..', 'universe_wsgi.ini') )

parser = OptionParser()
parser.add_option( '-c', '--config', dest='config', help='Path to Galaxy config file (universe_wsgi.ini)', default=default_config )
( options, args ) = parser.parse_args()

def init():

    options.config = os.path.abspath( options.config )
    os.chdir( os.path.dirname( options.config ) )
    sys.path.append( 'lib' )

    from galaxy import eggs
    import pkg_resources

    config = ConfigParser( dict( file_path = 'database/files',
                                 database_connection = 'sqlite:///database/universe.sqlite?isolation_level=IMMEDIATE' ) )
    config.read( os.path.basename( options.config ) )

    from galaxy.model import mapping

    return mapping.init( config.get( 'app:main', 'file_path' ), config.get( 'app:main', 'database_connection' ), create_tables = False )

if __name__ == '__main__':
    print 'Loading Galaxy model...'
    model = init()
    sa_session = model.context.current

    set = 0
    dataset_count = sa_session.query( model.Dataset ).count()
    print 'Processing %i datasets...' % dataset_count
    percent = 0
    print 'Completed %i%%' % percent,
    sys.stdout.flush()
    for i, dataset in enumerate( sa_session.query( model.Dataset ).enable_eagerloads( False ).yield_per( 1000 ) ):
        if dataset.total_size is None:
            dataset.set_total_size()
            set += 1
            if not set % 1000:
                sa_session.flush()
        new_percent = int( float(i) / dataset_count * 100 )
        if new_percent != percent:
            percent = new_percent
            print '\rCompleted %i%%' % percent,
            sys.stdout.flush()
    sa_session.flush()
    print 'Completed 100%%'

Summary ✨

This is a Python script that initializes the Galaxy model and sets the total size of each dataset in the database. It uses the ConfigParser module to read the configuration file, universe_wsgi.ini, and the pkg_resources module to import the necessary dependencies. The script then queries the database for all datasets and iterates over them, setting the total size of each one if it is not already set.

Tech Fingerprint

Alerts (1)

'def' Ensure functions have docstrings for documentation
13