PageRenderTime 23ms CodeModel.GetById 15ms RepoModel.GetById 0ms app.codeStats 0ms

/scripts/others/incorrect_gops_jobs.py

https://bitbucket.org/cistrome/cistrome-harvard/
Python | 115 lines | 101 code | 2 blank | 12 comment | 13 complexity | 6d580faeb123852d49381a46f91cb555 MD5 | raw file
  1. #!/usr/bin/env python
  2. """
  3. Fetch jobs using gops_intersect, gops_merge, gops_subtract, gops_complement, gops_coverage
  4. wherein the second dataset doesn't have chr, start and end in standard columns 1, 2 and 3.
  5. """
  6. from galaxy import eggs
  7. import sys, os, ConfigParser, tempfile
  8. import galaxy.app
  9. import galaxy.model.mapping
  10. import pkg_resources
  11. pkg_resources.require( "SQLAlchemy >= 0.4" )
  12. import sqlalchemy as sa
  13. assert sys.version_info[:2] >= ( 2, 4 )
  14. class TestApplication( object ):
  15. """Encapsulates the state of a Universe application"""
  16. def __init__( self, database_connection=None, file_path=None ):
  17. print >> sys.stderr, "python path is: " + ", ".join( sys.path )
  18. if database_connection is None:
  19. raise Exception( "CleanupDatasetsApplication requires a database_connection value" )
  20. if file_path is None:
  21. raise Exception( "CleanupDatasetsApplication requires a file_path value" )
  22. self.database_connection = database_connection
  23. self.file_path = file_path
  24. # Setup the database engine and ORM
  25. self.model = galaxy.model.mapping.init( self.file_path, self.database_connection, engine_options={}, create_tables=False )
  26. def main():
  27. ini_file = sys.argv[1]
  28. conf_parser = ConfigParser.ConfigParser( {'here':os.getcwd()} )
  29. conf_parser.read( ini_file )
  30. configuration = {}
  31. for key, value in conf_parser.items( "app:main" ):
  32. configuration[key] = value
  33. database_connection = configuration['database_connection']
  34. file_path = configuration['file_path']
  35. app = TestApplication( database_connection=database_connection, file_path=file_path )
  36. jobs = {}
  37. try:
  38. for job in app.model.Job.filter( sa.and_( app.model.Job.table.c.create_time.between( '2008-05-23', '2008-11-29' ),
  39. app.model.Job.table.c.state == 'ok',
  40. sa.or_(
  41. sa.and_( sa.or_( app.model.Job.table.c.tool_id == 'gops_intersect_1',
  42. app.model.Job.table.c.tool_id == 'gops_subtract_1',
  43. app.model.Job.table.c.tool_id == 'gops_coverage_1',
  44. ),
  45. sa.not_( app.model.Job.table.c.command_line.like( '%-2 1,2,3%' ) )
  46. ),
  47. sa.and_( sa.or_( app.model.Job.table.c.tool_id == 'gops_complement_1',
  48. app.model.Job.table.c.tool_id == 'gops_merge_1',
  49. ),
  50. sa.not_( app.model.Job.table.c.command_line.like( '%-1 1,2,3%' ) )
  51. )
  52. )
  53. )
  54. ).all():
  55. print "# processing job id %s" % str( job.id )
  56. for jtoda in job.output_datasets:
  57. print "# --> processing JobToOutputDatasetAssociation id %s" % str( jtoda.id )
  58. hda = app.model.HistoryDatasetAssociation.get( jtoda.dataset_id )
  59. print "# ----> processing HistoryDatasetAssociation id %s" % str( hda.id )
  60. if not hda.deleted:
  61. # Probably don't need this check, since the job state should suffice, but...
  62. if hda.dataset.state == 'ok':
  63. history = app.model.History.get( hda.history_id )
  64. print "# ------> processing history id %s" % str( history.id )
  65. if history.user_id:
  66. cmd_line = str( job.command_line )
  67. new_output = tempfile.NamedTemporaryFile('w')
  68. if job.tool_id in ['gops_intersect_1','gops_subtract_1','gops_coverage_1']:
  69. new_cmd_line = " ".join(map(str,cmd_line.split()[:4])) + " " + new_output.name + " " + " ".join(map(str,cmd_line.split()[5:]))
  70. job_output = cmd_line.split()[4]
  71. else:
  72. new_cmd_line = " ".join(map(str,cmd_line.split()[:3])) + " " + new_output.name + " " + " ".join(map(str,cmd_line.split()[4:]))
  73. job_output = cmd_line.split()[3]
  74. try:
  75. os.system(new_cmd_line)
  76. except:
  77. pass
  78. diff_status = os.system('diff %s %s >> /dev/null' %(new_output.name, job_output))
  79. if diff_status == 0:
  80. continue
  81. print "# --------> Outputs differ"
  82. user = app.model.User.get( history.user_id )
  83. jobs[ job.id ] = {}
  84. jobs[ job.id ][ 'hda_id' ] = hda.id
  85. jobs[ job.id ][ 'hda_name' ] = hda.name
  86. jobs[ job.id ][ 'hda_info' ] = hda.info
  87. jobs[ job.id ][ 'history_id' ] = history.id
  88. jobs[ job.id ][ 'history_name' ] = history.name
  89. jobs[ job.id ][ 'history_update_time' ] = history.update_time
  90. jobs[ job.id ][ 'user_email' ] = user.email
  91. except Exception, e:
  92. print "# caught exception: %s" % str( e )
  93. print "\n\n# Number of incorrect Jobs: %d\n\n" % ( len( jobs ) )
  94. print "#job_id\thda_id\thda_name\thda_info\thistory_id\thistory_name\thistory_update_time\tuser_email"
  95. for jid in jobs:
  96. print '%s\t%s\t"%s"\t"%s"\t%s\t"%s"\t"%s"\t%s' % \
  97. ( str( jid ),
  98. str( jobs[ jid ][ 'hda_id' ] ),
  99. jobs[ jid ][ 'hda_name' ],
  100. jobs[ jid ][ 'hda_info' ],
  101. str( jobs[ jid ][ 'history_id' ] ),
  102. jobs[ jid ][ 'history_name' ],
  103. jobs[ jid ][ 'history_update_time' ],
  104. jobs[ jid ][ 'user_email' ]
  105. )
  106. sys.exit(0)
  107. if __name__ == "__main__":
  108. main()