PageRenderTime 35ms CodeModel.GetById 21ms app.highlight 12ms RepoModel.GetById 1ms app.codeStats 0ms

/scripts/others/incorrect_gops_join_jobs.py

https://bitbucket.org/cistrome/cistrome-harvard/
Python | 99 lines | 85 code | 7 blank | 7 comment | 16 complexity | 240eb5d683510e771f84f0aab9dd767d MD5 | raw file
 1#!/usr/bin/env python
 2"""
 3Fetch gops_join wherein the use specified minimum coverage is not 1.
 4"""
 5
 6from galaxy import eggs
 7import sys, os, ConfigParser, tempfile
 8import galaxy.app
 9import galaxy.model.mapping
10import pkg_resources
11        
12pkg_resources.require( "SQLAlchemy >= 0.4" )
13import sqlalchemy as sa
14
15assert sys.version_info[:2] >= ( 2, 4 )
16
17class TestApplication( object ):
18    """Encapsulates the state of a Universe application"""
19    def __init__( self, database_connection=None, file_path=None ):
20        print >> sys.stderr, "python path is: " + ", ".join( sys.path )
21        if database_connection is None:
22            raise Exception( "CleanupDatasetsApplication requires a database_connection value" )
23        if file_path is None:
24            raise Exception( "CleanupDatasetsApplication requires a file_path value" )
25        self.database_connection = database_connection
26        self.file_path = file_path
27        # Setup the database engine and ORM
28        self.model = galaxy.model.mapping.init( self.file_path, self.database_connection, engine_options={}, create_tables=False )
29
30def main():
31    ini_file = sys.argv[1]
32    conf_parser = ConfigParser.ConfigParser( {'here':os.getcwd()} )
33    conf_parser.read( ini_file )
34    configuration = {}
35    for key, value in conf_parser.items( "app:main" ):
36        configuration[key] = value
37    database_connection = configuration['database_connection']
38    file_path = configuration['file_path']
39    app = TestApplication( database_connection=database_connection, file_path=file_path )
40    jobs = {}
41    try:
42        for job in app.model.Job.filter( sa.and_( app.model.Job.table.c.create_time < '2008-12-16',
43                                                  app.model.Job.table.c.state == 'ok',
44                                                  app.model.Job.table.c.tool_id == 'gops_join_1',
45                                                  sa.not_( app.model.Job.table.c.command_line.like( '%-m 1 %' ) )
46                                                )
47                                        ).all():
48            print "# processing job id %s" % str( job.id )
49            for jtoda in job.output_datasets:
50                print "# --> processing JobToOutputDatasetAssociation id %s" % str( jtoda.id )
51                hda = app.model.HistoryDatasetAssociation.get( jtoda.dataset_id )
52                print "# ----> processing HistoryDatasetAssociation id %s" % str( hda.id )
53                if not hda.deleted:
54                    # Probably don't need this check, since the job state should suffice, but...
55                    if hda.dataset.state == 'ok':
56                        history = app.model.History.get( hda.history_id )
57                        print "# ------> processing history id %s" % str( history.id )
58                        if history.user_id:
59                            cmd_line = str( job.command_line )
60                            new_output = tempfile.NamedTemporaryFile('w')
61                            new_cmd_line = " ".join(map(str,cmd_line.split()[:4])) + " " + new_output.name + " " + " ".join(map(str,cmd_line.split()[5:]))
62                            job_output = cmd_line.split()[4]
63                            try:
64                                os.system(new_cmd_line)
65                            except:
66                                pass
67                            diff_status = os.system('diff %s %s >> /dev/null' %(new_output.name, job_output))
68                            if diff_status == 0:
69                                continue
70                            print "# --------> Outputs differ"
71                            user = app.model.User.get( history.user_id )
72                            jobs[ job.id ] = {}
73                            jobs[ job.id ][ 'hda_id' ] = hda.id
74                            jobs[ job.id ][ 'hda_name' ] = hda.name
75                            jobs[ job.id ][ 'hda_info' ] = hda.info
76                            jobs[ job.id ][ 'history_id' ] = history.id 
77                            jobs[ job.id ][ 'history_name' ] = history.name 
78                            jobs[ job.id ][ 'history_update_time' ] = history.update_time
79                            jobs[ job.id ][ 'user_email' ] = user.email
80    except Exception, e:
81        print "# caught exception: %s" % str( e )
82    
83    print "\n\n# Number of incorrect Jobs: %d\n\n" % ( len( jobs ) )
84    print "#job_id\thda_id\thda_name\thda_info\thistory_id\thistory_name\thistory_update_time\tuser_email"
85    for jid in jobs:
86        print '%s\t%s\t"%s"\t"%s"\t%s\t"%s"\t"%s"\t%s' % \
87            ( str( jid ), 
88              str( jobs[ jid ][ 'hda_id' ] ), 
89              jobs[ jid ][ 'hda_name' ], 
90              jobs[ jid ][ 'hda_info' ],
91              str( jobs[ jid ][ 'history_id' ] ),
92              jobs[ jid ][ 'history_name' ],
93              jobs[ jid ][ 'history_update_time' ],
94              jobs[ jid ][ 'user_email' ]
95            )
96    sys.exit(0)
97
98if __name__ == "__main__":
99    main()