/tools/maf/interval2maf.py

https://bitbucket.org/cistrome/cistrome-harvard/ · Python · 139 lines · 84 code · 21 blank · 34 comment · 41 complexity · 26af573448fb0af34682da35441bb440 MD5 · raw file

  1. #!/usr/bin/env python
  2. """
  3. Reads a list of intervals and a maf. Produces a new maf containing the
  4. blocks or parts of blocks in the original that overlapped the intervals.
  5. If a MAF file, not UID, is provided the MAF file is indexed before being processed.
  6. NOTE: If two intervals overlap the same block it will be written twice.
  7. usage: %prog maf_file [options]
  8. -d, --dbkey=d: Database key, ie hg17
  9. -c, --chromCol=c: Column of Chr
  10. -s, --startCol=s: Column of Start
  11. -e, --endCol=e: Column of End
  12. -S, --strandCol=S: Column of Strand
  13. -t, --mafType=t: Type of MAF source to use
  14. -m, --mafFile=m: Path of source MAF file, if not using cached version
  15. -I, --mafIndex=I: Path of precomputed source MAF file index, if not using cached version
  16. -i, --interval_file=i: Input interval file
  17. -o, --output_file=o: Output MAF file
  18. -p, --species=p: Species to include in output
  19. -P, --split_blocks_by_species=P: Split blocks by species
  20. -r, --remove_all_gap_columns=r: Remove all Gap columns
  21. -l, --indexLocation=l: Override default maf_index.loc file
  22. -z, --mafIndexFile=z: Directory of local maf index file ( maf_index.loc or maf_pairwise.loc )
  23. """
  24. #Dan Blankenberg
  25. from galaxy import eggs
  26. import pkg_resources; pkg_resources.require( "bx-python" )
  27. from bx.cookbook import doc_optparse
  28. import bx.align.maf
  29. import bx.intervals.io
  30. from galaxy.tools.util import maf_utilities
  31. import sys
  32. assert sys.version_info[:2] >= ( 2, 4 )
  33. def __main__():
  34. index = index_filename = None
  35. mincols = 0
  36. #Parse Command Line
  37. options, args = doc_optparse.parse( __doc__ )
  38. if options.dbkey: dbkey = options.dbkey
  39. else: dbkey = None
  40. if dbkey in [None, "?"]:
  41. maf_utilities.tool_fail( "You must specify a proper build in order to extract alignments. You can specify your genome build by clicking on the pencil icon associated with your interval file." )
  42. species = maf_utilities.parse_species_option( options.species )
  43. if options.chromCol: chromCol = int( options.chromCol ) - 1
  44. else:
  45. maf_utilities.tool_fail( "Chromosome column not set, click the pencil icon in the history item to set the metadata attributes." )
  46. if options.startCol: startCol = int( options.startCol ) - 1
  47. else:
  48. maf_utilities.tool_fail( "Start column not set, click the pencil icon in the history item to set the metadata attributes." )
  49. if options.endCol: endCol = int( options.endCol ) - 1
  50. else:
  51. maf_utilities.tool_fail( "End column not set, click the pencil icon in the history item to set the metadata attributes." )
  52. if options.strandCol: strandCol = int( options.strandCol ) - 1
  53. else:
  54. strandCol = -1
  55. if options.interval_file: interval_file = options.interval_file
  56. else:
  57. maf_utilities.tool_fail( "Input interval file has not been specified." )
  58. if options.output_file: output_file = options.output_file
  59. else:
  60. maf_utilities.tool_fail( "Output file has not been specified." )
  61. split_blocks_by_species = remove_all_gap_columns = False
  62. if options.split_blocks_by_species and options.split_blocks_by_species == 'split_blocks_by_species':
  63. split_blocks_by_species = True
  64. if options.remove_all_gap_columns and options.remove_all_gap_columns == 'remove_all_gap_columns':
  65. remove_all_gap_columns = True
  66. else:
  67. remove_all_gap_columns = True
  68. #Finish parsing command line
  69. #Open indexed access to MAFs
  70. if options.mafType:
  71. if options.indexLocation:
  72. index = maf_utilities.maf_index_by_uid( options.mafType, options.indexLocation )
  73. else:
  74. index = maf_utilities.maf_index_by_uid( options.mafType, options.mafIndexFile )
  75. if index is None:
  76. maf_utilities.tool_fail( "The MAF source specified (%s) appears to be invalid." % ( options.mafType ) )
  77. elif options.mafFile:
  78. index, index_filename = maf_utilities.open_or_build_maf_index( options.mafFile, options.mafIndex, species = [dbkey] )
  79. if index is None:
  80. maf_utilities.tool_fail( "Your MAF file appears to be malformed." )
  81. else:
  82. maf_utilities.tool_fail( "Desired source MAF type has not been specified." )
  83. #Create MAF writter
  84. out = bx.align.maf.Writer( open(output_file, "w") )
  85. #Iterate over input regions
  86. num_blocks = 0
  87. num_regions = None
  88. for num_regions, region in enumerate( bx.intervals.io.NiceReaderWrapper( open( interval_file, 'r' ), chrom_col = chromCol, start_col = startCol, end_col = endCol, strand_col = strandCol, fix_strand = True, return_header = False, return_comments = False ) ):
  89. src = maf_utilities.src_merge( dbkey, region.chrom )
  90. for block in index.get_as_iterator( src, region.start, region.end ):
  91. if split_blocks_by_species:
  92. blocks = [ new_block for new_block in maf_utilities.iter_blocks_split_by_species( block ) if maf_utilities.component_overlaps_region( new_block.get_component_by_src_start( dbkey ), region ) ]
  93. else:
  94. blocks = [ block ]
  95. for block in blocks:
  96. block = maf_utilities.chop_block_by_region( block, src, region )
  97. if block is not None:
  98. if species is not None:
  99. block = block.limit_to_species( species )
  100. block = maf_utilities.orient_block_by_region( block, src, region )
  101. if remove_all_gap_columns:
  102. block.remove_all_gap_columns()
  103. out.write( block )
  104. num_blocks += 1
  105. #Close output MAF
  106. out.close()
  107. #remove index file if created during run
  108. maf_utilities.remove_temp_index_file( index_filename )
  109. if num_blocks:
  110. print "%i MAF blocks extracted for %i regions." % ( num_blocks, ( num_regions + 1 ) )
  111. elif num_regions is not None:
  112. print "No MAF blocks could be extracted for %i regions." % ( num_regions + 1 )
  113. else:
  114. print "No valid regions have been provided."
  115. if __name__ == "__main__": __main__()