/scripts/others/incorrect_gops_join_jobs.py

https://bitbucket.org/cistrome/cistrome-harvard/ · Python · 99 lines · 85 code · 7 blank · 7 comment · 14 complexity · 240eb5d683510e771f84f0aab9dd767d MD5 · raw file

  1. #!/usr/bin/env python
  2. """
  3. Fetch gops_join wherein the use specified minimum coverage is not 1.
  4. """
  5. from galaxy import eggs
  6. import sys, os, ConfigParser, tempfile
  7. import galaxy.app
  8. import galaxy.model.mapping
  9. import pkg_resources
  10. pkg_resources.require( "SQLAlchemy >= 0.4" )
  11. import sqlalchemy as sa
  12. assert sys.version_info[:2] >= ( 2, 4 )
  13. class TestApplication( object ):
  14. """Encapsulates the state of a Universe application"""
  15. def __init__( self, database_connection=None, file_path=None ):
  16. print >> sys.stderr, "python path is: " + ", ".join( sys.path )
  17. if database_connection is None:
  18. raise Exception( "CleanupDatasetsApplication requires a database_connection value" )
  19. if file_path is None:
  20. raise Exception( "CleanupDatasetsApplication requires a file_path value" )
  21. self.database_connection = database_connection
  22. self.file_path = file_path
  23. # Setup the database engine and ORM
  24. self.model = galaxy.model.mapping.init( self.file_path, self.database_connection, engine_options={}, create_tables=False )
  25. def main():
  26. ini_file = sys.argv[1]
  27. conf_parser = ConfigParser.ConfigParser( {'here':os.getcwd()} )
  28. conf_parser.read( ini_file )
  29. configuration = {}
  30. for key, value in conf_parser.items( "app:main" ):
  31. configuration[key] = value
  32. database_connection = configuration['database_connection']
  33. file_path = configuration['file_path']
  34. app = TestApplication( database_connection=database_connection, file_path=file_path )
  35. jobs = {}
  36. try:
  37. for job in app.model.Job.filter( sa.and_( app.model.Job.table.c.create_time < '2008-12-16',
  38. app.model.Job.table.c.state == 'ok',
  39. app.model.Job.table.c.tool_id == 'gops_join_1',
  40. sa.not_( app.model.Job.table.c.command_line.like( '%-m 1 %' ) )
  41. )
  42. ).all():
  43. print "# processing job id %s" % str( job.id )
  44. for jtoda in job.output_datasets:
  45. print "# --> processing JobToOutputDatasetAssociation id %s" % str( jtoda.id )
  46. hda = app.model.HistoryDatasetAssociation.get( jtoda.dataset_id )
  47. print "# ----> processing HistoryDatasetAssociation id %s" % str( hda.id )
  48. if not hda.deleted:
  49. # Probably don't need this check, since the job state should suffice, but...
  50. if hda.dataset.state == 'ok':
  51. history = app.model.History.get( hda.history_id )
  52. print "# ------> processing history id %s" % str( history.id )
  53. if history.user_id:
  54. cmd_line = str( job.command_line )
  55. new_output = tempfile.NamedTemporaryFile('w')
  56. new_cmd_line = " ".join(map(str,cmd_line.split()[:4])) + " " + new_output.name + " " + " ".join(map(str,cmd_line.split()[5:]))
  57. job_output = cmd_line.split()[4]
  58. try:
  59. os.system(new_cmd_line)
  60. except:
  61. pass
  62. diff_status = os.system('diff %s %s >> /dev/null' %(new_output.name, job_output))
  63. if diff_status == 0:
  64. continue
  65. print "# --------> Outputs differ"
  66. user = app.model.User.get( history.user_id )
  67. jobs[ job.id ] = {}
  68. jobs[ job.id ][ 'hda_id' ] = hda.id
  69. jobs[ job.id ][ 'hda_name' ] = hda.name
  70. jobs[ job.id ][ 'hda_info' ] = hda.info
  71. jobs[ job.id ][ 'history_id' ] = history.id
  72. jobs[ job.id ][ 'history_name' ] = history.name
  73. jobs[ job.id ][ 'history_update_time' ] = history.update_time
  74. jobs[ job.id ][ 'user_email' ] = user.email
  75. except Exception, e:
  76. print "# caught exception: %s" % str( e )
  77. print "\n\n# Number of incorrect Jobs: %d\n\n" % ( len( jobs ) )
  78. print "#job_id\thda_id\thda_name\thda_info\thistory_id\thistory_name\thistory_update_time\tuser_email"
  79. for jid in jobs:
  80. print '%s\t%s\t"%s"\t"%s"\t%s\t"%s"\t"%s"\t%s' % \
  81. ( str( jid ),
  82. str( jobs[ jid ][ 'hda_id' ] ),
  83. jobs[ jid ][ 'hda_name' ],
  84. jobs[ jid ][ 'hda_info' ],
  85. str( jobs[ jid ][ 'history_id' ] ),
  86. jobs[ jid ][ 'history_name' ],
  87. jobs[ jid ][ 'history_update_time' ],
  88. jobs[ jid ][ 'user_email' ]
  89. )
  90. sys.exit(0)
  91. if __name__ == "__main__":
  92. main()