/scripts/set_dataset_sizes.py

https://bitbucket.org/cistrome/cistrome-harvard/ · Python · 53 lines · 41 code · 11 blank · 1 comment · 5 complexity · 062bd0d359d4ec8c0e327375cf72d47a MD5 · raw file

  1. #!/usr/bin/env python
  2. import os, sys
  3. from ConfigParser import ConfigParser
  4. from optparse import OptionParser
  5. default_config = os.path.abspath( os.path.join( os.path.dirname( __file__ ), '..', 'universe_wsgi.ini') )
  6. parser = OptionParser()
  7. parser.add_option( '-c', '--config', dest='config', help='Path to Galaxy config file (universe_wsgi.ini)', default=default_config )
  8. ( options, args ) = parser.parse_args()
  9. def init():
  10. options.config = os.path.abspath( options.config )
  11. os.chdir( os.path.dirname( options.config ) )
  12. sys.path.append( 'lib' )
  13. from galaxy import eggs
  14. import pkg_resources
  15. config = ConfigParser( dict( file_path = 'database/files',
  16. database_connection = 'sqlite:///database/universe.sqlite?isolation_level=IMMEDIATE' ) )
  17. config.read( os.path.basename( options.config ) )
  18. from galaxy.model import mapping
  19. return mapping.init( config.get( 'app:main', 'file_path' ), config.get( 'app:main', 'database_connection' ), create_tables = False )
  20. if __name__ == '__main__':
  21. print 'Loading Galaxy model...'
  22. model = init()
  23. sa_session = model.context.current
  24. set = 0
  25. dataset_count = sa_session.query( model.Dataset ).count()
  26. print 'Processing %i datasets...' % dataset_count
  27. percent = 0
  28. print 'Completed %i%%' % percent,
  29. sys.stdout.flush()
  30. for i, dataset in enumerate( sa_session.query( model.Dataset ).enable_eagerloads( False ).yield_per( 1000 ) ):
  31. if dataset.total_size is None:
  32. dataset.set_total_size()
  33. set += 1
  34. if not set % 1000:
  35. sa_session.flush()
  36. new_percent = int( float(i) / dataset_count * 100 )
  37. if new_percent != percent:
  38. percent = new_percent
  39. print '\rCompleted %i%%' % percent,
  40. sys.stdout.flush()
  41. sa_session.flush()
  42. print 'Completed 100%%'