PageRenderTime 46ms CodeModel.GetById 9ms app.highlight 30ms RepoModel.GetById 1ms app.codeStats 1ms

/tools/maf/interval2maf.py

https://bitbucket.org/cistrome/cistrome-harvard/
Python | 139 lines | 84 code | 21 blank | 34 comment | 35 complexity | 26af573448fb0af34682da35441bb440 MD5 | raw file
  1#!/usr/bin/env python
  2
  3"""
  4Reads a list of intervals and a maf. Produces a new maf containing the
  5blocks or parts of blocks in the original that overlapped the intervals.
  6
  7If a MAF file, not UID, is provided the MAF file is indexed before being processed.
  8
  9NOTE: If two intervals overlap the same block it will be written twice.
 10
 11usage: %prog maf_file [options]
 12   -d, --dbkey=d: Database key, ie hg17
 13   -c, --chromCol=c: Column of Chr
 14   -s, --startCol=s: Column of Start
 15   -e, --endCol=e: Column of End
 16   -S, --strandCol=S: Column of Strand
 17   -t, --mafType=t: Type of MAF source to use
 18   -m, --mafFile=m: Path of source MAF file, if not using cached version
 19   -I, --mafIndex=I: Path of precomputed source MAF file index, if not using cached version
 20   -i, --interval_file=i:       Input interval file
 21   -o, --output_file=o:      Output MAF file
 22   -p, --species=p: Species to include in output
 23   -P, --split_blocks_by_species=P: Split blocks by species
 24   -r, --remove_all_gap_columns=r: Remove all Gap columns
 25   -l, --indexLocation=l: Override default maf_index.loc file
 26   -z, --mafIndexFile=z: Directory of local maf index file ( maf_index.loc or maf_pairwise.loc )
 27"""
 28
 29#Dan Blankenberg
 30from galaxy import eggs
 31import pkg_resources; pkg_resources.require( "bx-python" )
 32from bx.cookbook import doc_optparse
 33import bx.align.maf
 34import bx.intervals.io
 35from galaxy.tools.util import maf_utilities
 36import sys
 37
 38assert sys.version_info[:2] >= ( 2, 4 )
 39
 40def __main__():
 41    index = index_filename = None
 42    mincols = 0
 43    
 44    #Parse Command Line
 45    options, args = doc_optparse.parse( __doc__ )
 46    
 47    if options.dbkey: dbkey = options.dbkey
 48    else: dbkey = None
 49    if dbkey in [None, "?"]:
 50        maf_utilities.tool_fail( "You must specify a proper build in order to extract alignments. You can specify your genome build by clicking on the pencil icon associated with your interval file." )
 51    
 52    species = maf_utilities.parse_species_option( options.species )
 53    
 54    if options.chromCol: chromCol = int( options.chromCol ) - 1
 55    else: 
 56        maf_utilities.tool_fail( "Chromosome column not set, click the pencil icon in the history item to set the metadata attributes." )
 57    
 58    if options.startCol: startCol = int( options.startCol ) - 1
 59    else: 
 60        maf_utilities.tool_fail( "Start column not set, click the pencil icon in the history item to set the metadata attributes." )
 61    
 62    if options.endCol: endCol = int( options.endCol ) - 1
 63    else: 
 64        maf_utilities.tool_fail( "End column not set, click the pencil icon in the history item to set the metadata attributes." )
 65    
 66    if options.strandCol: strandCol = int( options.strandCol ) - 1
 67    else: 
 68        strandCol = -1
 69    
 70    if options.interval_file: interval_file = options.interval_file
 71    else: 
 72        maf_utilities.tool_fail( "Input interval file has not been specified." )
 73    
 74    if options.output_file: output_file = options.output_file
 75    else: 
 76        maf_utilities.tool_fail( "Output file has not been specified." )
 77    
 78    split_blocks_by_species = remove_all_gap_columns = False
 79    if options.split_blocks_by_species and options.split_blocks_by_species == 'split_blocks_by_species':
 80        split_blocks_by_species = True
 81        if options.remove_all_gap_columns and options.remove_all_gap_columns == 'remove_all_gap_columns':
 82            remove_all_gap_columns = True
 83    else:
 84        remove_all_gap_columns = True
 85    #Finish parsing command line
 86    
 87    #Open indexed access to MAFs
 88    if options.mafType:
 89        if options.indexLocation:
 90            index = maf_utilities.maf_index_by_uid( options.mafType, options.indexLocation )
 91        else:
 92            index = maf_utilities.maf_index_by_uid( options.mafType, options.mafIndexFile )
 93        if index is None:
 94            maf_utilities.tool_fail( "The MAF source specified (%s) appears to be invalid." % ( options.mafType ) )
 95    elif options.mafFile:
 96        index, index_filename = maf_utilities.open_or_build_maf_index( options.mafFile, options.mafIndex, species = [dbkey] )
 97        if index is None:
 98            maf_utilities.tool_fail( "Your MAF file appears to be malformed." )
 99    else:
100        maf_utilities.tool_fail( "Desired source MAF type has not been specified." )
101    
102    #Create MAF writter
103    out = bx.align.maf.Writer( open(output_file, "w") )
104    
105    #Iterate over input regions 
106    num_blocks = 0
107    num_regions = None
108    for num_regions, region in enumerate( bx.intervals.io.NiceReaderWrapper( open( interval_file, 'r' ), chrom_col = chromCol, start_col = startCol, end_col = endCol, strand_col = strandCol, fix_strand = True, return_header = False, return_comments = False ) ):
109        src = maf_utilities.src_merge( dbkey, region.chrom )
110        for block in index.get_as_iterator( src, region.start, region.end ):
111            if split_blocks_by_species:
112                blocks = [ new_block for new_block in maf_utilities.iter_blocks_split_by_species( block ) if maf_utilities.component_overlaps_region( new_block.get_component_by_src_start( dbkey ), region ) ]
113            else:
114                blocks = [ block ]
115            for block in blocks:
116                block = maf_utilities.chop_block_by_region( block, src, region )
117                if block is not None:
118                    if species is not None:
119                        block = block.limit_to_species( species )
120                    block = maf_utilities.orient_block_by_region( block, src, region )
121                    if remove_all_gap_columns:
122                        block.remove_all_gap_columns()
123                    out.write( block )
124                    num_blocks += 1
125    
126    #Close output MAF
127    out.close()
128    
129    #remove index file if created during run
130    maf_utilities.remove_temp_index_file( index_filename )
131    
132    if num_blocks:
133        print "%i MAF blocks extracted for %i regions." % ( num_blocks, ( num_regions + 1 ) )
134    elif num_regions is not None:
135        print "No MAF blocks could be extracted for %i regions." % ( num_regions + 1 )
136    else:
137        print "No valid regions have been provided."
138    
139if __name__ == "__main__": __main__()