PageRenderTime 10ms CodeModel.GetById 1ms app.highlight 6ms RepoModel.GetById 1ms app.codeStats 0ms

/scripts/tools/maf/check_loc_file.py

https://bitbucket.org/cistrome/cistrome-harvard/
Python | 55 lines | 47 code | 3 blank | 5 comment | 38 complexity | a3eda9e135c97e063acf6160b55f858b MD5 | raw file
 1#Dan Blankenberg
 2#This script checks maf_index.loc file for inconsistencies between what is listed as available and what is really available.
 3#Make sure that required dependencies (e.g. galaxy_root/lib and galaxy_root/eggs) are included in your PYTHONPATH
 4from galaxy import eggs
 5import pkg_resources; pkg_resources.require( "bx-python" )
 6import bx.align.maf
 7from galaxy.tools.util import maf_utilities
 8import sys
 9
10assert sys.version_info[:2] >= ( 2, 4 )
11
12def __main__():
13    index_location_file = sys.argv[ 1 ]
14    for i, line in enumerate( open( index_location_file ) ):
15        try:
16            if line.startswith( '#' ):
17                continue
18            display_name, uid, indexed_for_species, species_exist, maf_files = line.rstrip().split('\t')
19            indexed_for_species = indexed_for_species.split( ',' )
20            species_exist = species_exist.split( ',' )
21            maf_files = maf_files.split( ',' )
22            species_indexed_in_maf = []
23            species_found_in_maf = []
24            for maf_file in maf_files:
25                indexed_maf = bx.align.maf.MAFIndexedAccess( maf_file, keep_open = True, parse_e_rows = False )
26                for key in indexed_maf.indexes.indexes.keys():
27                    spec = maf_utilities.src_split( key )[0]
28                    if spec not in species_indexed_in_maf:
29                        species_indexed_in_maf.append( spec )
30                while True: #reading entire maf set will take some time
31                    block = indexed_maf.read_at_current_offset( indexed_maf.f )
32                    if block is None:
33                        break
34                    for comp in block.components:
35                        spec = maf_utilities.src_split( comp.src )[0]
36                        if spec not in species_found_in_maf:
37                            species_found_in_maf.append( spec )
38            #indexed species
39            for spec in indexed_for_species:
40                if spec not in species_indexed_in_maf:
41                    print "Line %i, %s claims to be indexed for %s, but indexes do not exist." % ( i, uid, spec )
42            for spec in species_indexed_in_maf:
43                if spec not in indexed_for_species:
44                    print "Line %i, %s is indexed for %s, but is not listed in loc file." % ( i, uid, spec )
45            #existing species
46            for spec in species_exist:
47                if spec not in species_found_in_maf:
48                    print "Line %i, %s claims to have blocks for %s, but was not found in MAF files." % ( i, uid, spec )
49            for spec in species_found_in_maf:
50                if spec not in species_exist:
51                    print "Line %i, %s contains %s, but is not listed in loc file." % ( i, uid, spec )
52        except Exception, e:
53            print "Line %i is invalid: %s" % ( i, e )
54
55if __name__ == "__main__": __main__()