PageRenderTime 5ms CodeModel.GetById 1ms RepoModel.GetById 0ms app.codeStats 0ms

/tools/maf/maf_to_fasta_multiple_sets.py

https://bitbucket.org/cistrome/cistrome-harvard/
Python | 58 lines | 47 code | 6 blank | 5 comment | 20 complexity | a1854887c47cbf17d0030a6cc46daed5 MD5 | raw file
  1. #!/usr/bin/env python
  2. """
  3. Read a maf and output a multiple block fasta file.
  4. """
  5. #Dan Blankenberg
  6. import sys
  7. from galaxy import eggs
  8. import pkg_resources; pkg_resources.require( "bx-python" )
  9. from bx.align import maf
  10. from galaxy.tools.util import maf_utilities
  11. assert sys.version_info[:2] >= ( 2, 4 )
  12. def __main__():
  13. try:
  14. maf_reader = maf.Reader( open( sys.argv[1] ) )
  15. except Exception, e:
  16. maf_utilities.tool_fail( "Error opening input MAF: %s" % e )
  17. try:
  18. file_out = open( sys.argv[2], 'w' )
  19. except Exception, e:
  20. maf_utilities.tool_fail( "Error opening file for output: %s" % e )
  21. try:
  22. species = maf_utilities.parse_species_option( sys.argv[3] )
  23. if species:
  24. num_species = len( species )
  25. else:
  26. num_species = 0
  27. except Exception, e:
  28. maf_utilities.tool_fail( "Error determining species value: %s" % e )
  29. try:
  30. partial = sys.argv[4]
  31. except Exception, e:
  32. maf_utilities.tool_fail( "Error determining keep partial value: %s" % e )
  33. if species:
  34. print "Restricted to species: %s" % ', '.join( species )
  35. else:
  36. print "Not restricted to species."
  37. for block_num, block in enumerate( maf_reader ):
  38. if species:
  39. block = block.limit_to_species( species )
  40. if len( maf_utilities.get_species_in_block( block ) ) < num_species and partial == "partial_disallowed": continue
  41. spec_counts = {}
  42. for component in block.components:
  43. spec, chrom = maf_utilities.src_split( component.src )
  44. if spec not in spec_counts:
  45. spec_counts[ spec ] = 0
  46. else:
  47. spec_counts[ spec ] += 1
  48. file_out.write( "%s\n" % maf_utilities.get_fasta_header( component, { 'block_index' : block_num, 'species' : spec, 'sequence_index' : spec_counts[ spec ] }, suffix = "%s_%i_%i" % ( spec, block_num, spec_counts[ spec ] ) ) )
  49. file_out.write( "%s\n" % component.text )
  50. file_out.write( "\n" )
  51. file_out.close()
  52. if __name__ == "__main__": __main__()