PageRenderTime 21ms CodeModel.GetById 16ms RepoModel.GetById 0ms app.codeStats 0ms

/scripts/tools/maf/check_loc_file.py

https://bitbucket.org/cistrome/cistrome-harvard/
Python | 55 lines | 47 code | 3 blank | 5 comment | 20 complexity | a3eda9e135c97e063acf6160b55f858b MD5 | raw file
  1. #Dan Blankenberg
  2. #This script checks maf_index.loc file for inconsistencies between what is listed as available and what is really available.
  3. #Make sure that required dependencies (e.g. galaxy_root/lib and galaxy_root/eggs) are included in your PYTHONPATH
  4. from galaxy import eggs
  5. import pkg_resources; pkg_resources.require( "bx-python" )
  6. import bx.align.maf
  7. from galaxy.tools.util import maf_utilities
  8. import sys
  9. assert sys.version_info[:2] >= ( 2, 4 )
  10. def __main__():
  11. index_location_file = sys.argv[ 1 ]
  12. for i, line in enumerate( open( index_location_file ) ):
  13. try:
  14. if line.startswith( '#' ):
  15. continue
  16. display_name, uid, indexed_for_species, species_exist, maf_files = line.rstrip().split('\t')
  17. indexed_for_species = indexed_for_species.split( ',' )
  18. species_exist = species_exist.split( ',' )
  19. maf_files = maf_files.split( ',' )
  20. species_indexed_in_maf = []
  21. species_found_in_maf = []
  22. for maf_file in maf_files:
  23. indexed_maf = bx.align.maf.MAFIndexedAccess( maf_file, keep_open = True, parse_e_rows = False )
  24. for key in indexed_maf.indexes.indexes.keys():
  25. spec = maf_utilities.src_split( key )[0]
  26. if spec not in species_indexed_in_maf:
  27. species_indexed_in_maf.append( spec )
  28. while True: #reading entire maf set will take some time
  29. block = indexed_maf.read_at_current_offset( indexed_maf.f )
  30. if block is None:
  31. break
  32. for comp in block.components:
  33. spec = maf_utilities.src_split( comp.src )[0]
  34. if spec not in species_found_in_maf:
  35. species_found_in_maf.append( spec )
  36. #indexed species
  37. for spec in indexed_for_species:
  38. if spec not in species_indexed_in_maf:
  39. print "Line %i, %s claims to be indexed for %s, but indexes do not exist." % ( i, uid, spec )
  40. for spec in species_indexed_in_maf:
  41. if spec not in indexed_for_species:
  42. print "Line %i, %s is indexed for %s, but is not listed in loc file." % ( i, uid, spec )
  43. #existing species
  44. for spec in species_exist:
  45. if spec not in species_found_in_maf:
  46. print "Line %i, %s claims to have blocks for %s, but was not found in MAF files." % ( i, uid, spec )
  47. for spec in species_found_in_maf:
  48. if spec not in species_exist:
  49. print "Line %i, %s contains %s, but is not listed in loc file." % ( i, uid, spec )
  50. except Exception, e:
  51. print "Line %i is invalid: %s" % ( i, e )
  52. if __name__ == "__main__": __main__()