PageRenderTime 11ms CodeModel.GetById 1ms app.highlight 8ms RepoModel.GetById 1ms app.codeStats 0ms

/tools/samtools/sam_bitwise_flag_filter.py

https://bitbucket.org/cistrome/cistrome-harvard/
Python | 149 lines | 144 code | 3 blank | 2 comment | 0 complexity | 4571f32d802305066135d90538e3161e MD5 | raw file
  1#!/usr/bin/env python
  2# Refactored on 11/13/2010 by Kanwei Li
  3
  4import sys
  5import optparse
  6
  7def stop_err( msg ):
  8    sys.stderr.write( msg )
  9    sys.exit()
 10
 11def main():
 12    usage = """%prog [options]
 13    
 14options (listed below) default to 'None' if omitted
 15    """
 16    parser = optparse.OptionParser(usage=usage)
 17    
 18    parser.add_option(
 19        '--0x0001','--is_paired',
 20        choices = ( '0','1' ),
 21        dest='is_paired',
 22        metavar="<0|1>",
 23        help='The read is paired in sequencing')
 24
 25    parser.add_option(
 26        '--0x0002','--is_proper_pair',
 27        choices = ( '0','1' ),
 28        metavar="<0|1>",
 29        dest='is_proper_pair',
 30        help='The read is mapped in a proper pair')
 31
 32    parser.add_option(
 33        '--0x0004','--is_unmapped',
 34        choices = ( '0','1' ),
 35        metavar="<0|1>",
 36        dest='is_unmapped',
 37        help='The query sequence itself is unmapped')
 38
 39    parser.add_option(
 40        '--0x0008','--mate_is_unmapped',
 41        choices = ( '0','1' ),
 42        metavar="<0|1>",
 43        dest='mate_is_unmapped',
 44        help='The mate is unmapped')
 45
 46    parser.add_option(
 47        '--0x0010','--query_strand',
 48        dest='query_strand',
 49        metavar="<0|1>",
 50        choices = ( '0','1' ),
 51        help='Strand of the query: 0 = forward, 1 = reverse.')
 52
 53    parser.add_option(
 54        '--0x0020','--mate_strand',
 55        dest='mate_strand',
 56        metavar="<0|1>",
 57        choices = ('0','1'),
 58        help='Strand of the mate: 0 = forward, 1 = reverse.')
 59
 60    parser.add_option(
 61        '--0x0040','--is_first',
 62        choices = ( '0','1' ),
 63        metavar="<0|1>",
 64        dest='is_first',
 65        help='The read is the first read in a pair')
 66
 67    parser.add_option(
 68        '--0x0080','--is_second',
 69        choices = ( '0','1' ),
 70        metavar="<0|1>",
 71        dest='is_second',
 72        help='The read is the second read in a pair')
 73
 74    parser.add_option(
 75        '--0x0100','--is_not_primary',
 76        choices = ( '0','1' ),
 77        metavar="<0|1>",
 78        dest='is_not_primary',
 79        help='The alignment for the given read is not primary')
 80
 81    parser.add_option(
 82        '--0x0200','--is_bad_quality',
 83        choices = ( '0','1' ),
 84        metavar="<0|1>",
 85        dest='is_bad_quality',
 86        help='The read fails platform/vendor quality checks')
 87
 88    parser.add_option(
 89        '--0x0400','--is_duplicate',
 90        choices = ( '0','1' ),
 91        metavar="<0|1>",
 92        dest='is_duplicate',
 93        help='The read is either a PCR or an optical duplicate')
 94        
 95    parser.add_option(
 96        '-f','--input_sam_file',
 97        metavar="INPUT_SAM_FILE",
 98        dest='input_sam',
 99        default = False,
100        help='Name of the SAM file to be filtered. STDIN is default')
101            
102    parser.add_option(
103        '-c','--flag_column',
104        dest='flag_col',
105        default = '2',
106        help='Column containing SAM bitwise flag. 1-based')
107
108    options, args = parser.parse_args()
109
110    if options.input_sam:
111		infile = open ( options.input_sam, 'r')
112    else:
113    	infile = sys.stdin
114        
115    opt_ary = [
116        options.is_paired,
117        options.is_proper_pair,
118        options.is_unmapped,
119        options.mate_is_unmapped,
120        options.query_strand,
121        options.mate_strand,
122        options.is_first,
123        options.is_second,
124        options.is_not_primary,
125        options.is_bad_quality,
126        options.is_duplicate
127    ]
128    
129    opt_map = { '0': False, '1': True }
130    used_indices = [(index, opt_map[opt]) for index, opt in enumerate(opt_ary) if opt is not None]
131    flag_col = int( options.flag_col ) - 1
132    
133    for line in infile:
134        line = line.rstrip( '\r\n' )
135        if line and not line.startswith( '#' ) and not line.startswith( '@' ) :
136            fields = line.split( '\t' )
137            flags = int( fields[flag_col] )
138            
139            valid_line = True
140            for index, opt_bool in used_indices:
141                if bool(flags & 0x0001 << index) != opt_bool:
142                    valid_line = False
143                    break
144                    
145            if valid_line:
146                print line
147
148if __name__ == "__main__": main()
149