PageRenderTime 25ms CodeModel.GetById 10ms app.highlight 12ms RepoModel.GetById 1ms app.codeStats 0ms

/scripts/others/incorrect_gops_jobs.py

https://bitbucket.org/cistrome/cistrome-harvard/
Python | 115 lines | 101 code | 2 blank | 12 comment | 15 complexity | 6d580faeb123852d49381a46f91cb555 MD5 | raw file
  1#!/usr/bin/env python
  2"""
  3Fetch jobs using gops_intersect, gops_merge, gops_subtract, gops_complement, gops_coverage 
  4wherein the second dataset doesn't have chr, start and end in standard columns 1, 2 and 3.
  5"""
  6
  7from galaxy import eggs
  8import sys, os, ConfigParser, tempfile
  9import galaxy.app
 10import galaxy.model.mapping
 11import pkg_resources
 12        
 13pkg_resources.require( "SQLAlchemy >= 0.4" )
 14import sqlalchemy as sa
 15
 16assert sys.version_info[:2] >= ( 2, 4 )
 17
 18class TestApplication( object ):
 19    """Encapsulates the state of a Universe application"""
 20    def __init__( self, database_connection=None, file_path=None ):
 21        print >> sys.stderr, "python path is: " + ", ".join( sys.path )
 22        if database_connection is None:
 23            raise Exception( "CleanupDatasetsApplication requires a database_connection value" )
 24        if file_path is None:
 25            raise Exception( "CleanupDatasetsApplication requires a file_path value" )
 26        self.database_connection = database_connection
 27        self.file_path = file_path
 28        # Setup the database engine and ORM
 29        self.model = galaxy.model.mapping.init( self.file_path, self.database_connection, engine_options={}, create_tables=False )
 30
 31def main():
 32    ini_file = sys.argv[1]
 33    conf_parser = ConfigParser.ConfigParser( {'here':os.getcwd()} )
 34    conf_parser.read( ini_file )
 35    configuration = {}
 36    for key, value in conf_parser.items( "app:main" ):
 37        configuration[key] = value
 38    database_connection = configuration['database_connection']
 39    file_path = configuration['file_path']
 40    app = TestApplication( database_connection=database_connection, file_path=file_path )
 41    jobs = {}
 42    try:
 43        for job in app.model.Job.filter( sa.and_( app.model.Job.table.c.create_time.between( '2008-05-23', '2008-11-29' ),
 44                                                  app.model.Job.table.c.state == 'ok',
 45                                                  sa.or_(
 46                                                          sa.and_( sa.or_( app.model.Job.table.c.tool_id == 'gops_intersect_1',
 47                                                                           app.model.Job.table.c.tool_id == 'gops_subtract_1',
 48                                                                           app.model.Job.table.c.tool_id == 'gops_coverage_1',
 49                                                                         ),
 50                                                                   sa.not_( app.model.Job.table.c.command_line.like( '%-2 1,2,3%' ) )
 51                                                                 ),
 52                                                          sa.and_( sa.or_( app.model.Job.table.c.tool_id == 'gops_complement_1',
 53                                                                           app.model.Job.table.c.tool_id == 'gops_merge_1',
 54                                                                         ),
 55                                                                   sa.not_( app.model.Job.table.c.command_line.like( '%-1 1,2,3%' ) )
 56                                                                 )
 57                                                          )
 58                                                )
 59                                        ).all():
 60            print "# processing job id %s" % str( job.id )
 61            for jtoda in job.output_datasets:
 62                print "# --> processing JobToOutputDatasetAssociation id %s" % str( jtoda.id )
 63                hda = app.model.HistoryDatasetAssociation.get( jtoda.dataset_id )
 64                print "# ----> processing HistoryDatasetAssociation id %s" % str( hda.id )
 65                if not hda.deleted:
 66                    # Probably don't need this check, since the job state should suffice, but...
 67                    if hda.dataset.state == 'ok':
 68                        history = app.model.History.get( hda.history_id )
 69                        print "# ------> processing history id %s" % str( history.id )
 70                        if history.user_id:
 71                            cmd_line = str( job.command_line )
 72                            new_output = tempfile.NamedTemporaryFile('w')
 73                            if job.tool_id in ['gops_intersect_1','gops_subtract_1','gops_coverage_1']:
 74                                new_cmd_line = " ".join(map(str,cmd_line.split()[:4])) + " " + new_output.name + " " + " ".join(map(str,cmd_line.split()[5:]))
 75                                job_output = cmd_line.split()[4]
 76                            else:
 77                                new_cmd_line = " ".join(map(str,cmd_line.split()[:3])) + " " +  new_output.name + " " + " ".join(map(str,cmd_line.split()[4:]))
 78                                job_output = cmd_line.split()[3]
 79                            try:
 80                                os.system(new_cmd_line)
 81                            except:
 82                                pass
 83                            diff_status = os.system('diff %s %s >> /dev/null' %(new_output.name, job_output))
 84                            if diff_status == 0:
 85                                continue
 86                            print "# --------> Outputs differ"
 87                            user = app.model.User.get( history.user_id )
 88                            jobs[ job.id ] = {}
 89                            jobs[ job.id ][ 'hda_id' ] = hda.id
 90                            jobs[ job.id ][ 'hda_name' ] = hda.name
 91                            jobs[ job.id ][ 'hda_info' ] = hda.info
 92                            jobs[ job.id ][ 'history_id' ] = history.id 
 93                            jobs[ job.id ][ 'history_name' ] = history.name 
 94                            jobs[ job.id ][ 'history_update_time' ] = history.update_time
 95                            jobs[ job.id ][ 'user_email' ] = user.email
 96    except Exception, e:
 97        print "# caught exception: %s" % str( e )
 98    
 99    print "\n\n# Number of incorrect Jobs: %d\n\n" % ( len( jobs ) )
100    print "#job_id\thda_id\thda_name\thda_info\thistory_id\thistory_name\thistory_update_time\tuser_email"
101    for jid in jobs:
102        print '%s\t%s\t"%s"\t"%s"\t%s\t"%s"\t"%s"\t%s' % \
103            ( str( jid ), 
104              str( jobs[ jid ][ 'hda_id' ] ), 
105              jobs[ jid ][ 'hda_name' ], 
106              jobs[ jid ][ 'hda_info' ],
107              str( jobs[ jid ][ 'history_id' ] ),
108              jobs[ jid ][ 'history_name' ],
109              jobs[ jid ][ 'history_update_time' ],
110              jobs[ jid ][ 'user_email' ]
111            )
112    sys.exit(0)
113
114if __name__ == "__main__":
115    main()