/tools/samtools/sam_bitwise_flag_filter.py

https://bitbucket.org/cistrome/cistrome-harvard/ · Python · 149 lines · 122 code · 25 blank · 2 comment · 12 complexity · 4571f32d802305066135d90538e3161e MD5 · raw file

  1. #!/usr/bin/env python
  2. # Refactored on 11/13/2010 by Kanwei Li
  3. import sys
  4. import optparse
  5. def stop_err( msg ):
  6. sys.stderr.write( msg )
  7. sys.exit()
  8. def main():
  9. usage = """%prog [options]
  10. options (listed below) default to 'None' if omitted
  11. """
  12. parser = optparse.OptionParser(usage=usage)
  13. parser.add_option(
  14. '--0x0001','--is_paired',
  15. choices = ( '0','1' ),
  16. dest='is_paired',
  17. metavar="<0|1>",
  18. help='The read is paired in sequencing')
  19. parser.add_option(
  20. '--0x0002','--is_proper_pair',
  21. choices = ( '0','1' ),
  22. metavar="<0|1>",
  23. dest='is_proper_pair',
  24. help='The read is mapped in a proper pair')
  25. parser.add_option(
  26. '--0x0004','--is_unmapped',
  27. choices = ( '0','1' ),
  28. metavar="<0|1>",
  29. dest='is_unmapped',
  30. help='The query sequence itself is unmapped')
  31. parser.add_option(
  32. '--0x0008','--mate_is_unmapped',
  33. choices = ( '0','1' ),
  34. metavar="<0|1>",
  35. dest='mate_is_unmapped',
  36. help='The mate is unmapped')
  37. parser.add_option(
  38. '--0x0010','--query_strand',
  39. dest='query_strand',
  40. metavar="<0|1>",
  41. choices = ( '0','1' ),
  42. help='Strand of the query: 0 = forward, 1 = reverse.')
  43. parser.add_option(
  44. '--0x0020','--mate_strand',
  45. dest='mate_strand',
  46. metavar="<0|1>",
  47. choices = ('0','1'),
  48. help='Strand of the mate: 0 = forward, 1 = reverse.')
  49. parser.add_option(
  50. '--0x0040','--is_first',
  51. choices = ( '0','1' ),
  52. metavar="<0|1>",
  53. dest='is_first',
  54. help='The read is the first read in a pair')
  55. parser.add_option(
  56. '--0x0080','--is_second',
  57. choices = ( '0','1' ),
  58. metavar="<0|1>",
  59. dest='is_second',
  60. help='The read is the second read in a pair')
  61. parser.add_option(
  62. '--0x0100','--is_not_primary',
  63. choices = ( '0','1' ),
  64. metavar="<0|1>",
  65. dest='is_not_primary',
  66. help='The alignment for the given read is not primary')
  67. parser.add_option(
  68. '--0x0200','--is_bad_quality',
  69. choices = ( '0','1' ),
  70. metavar="<0|1>",
  71. dest='is_bad_quality',
  72. help='The read fails platform/vendor quality checks')
  73. parser.add_option(
  74. '--0x0400','--is_duplicate',
  75. choices = ( '0','1' ),
  76. metavar="<0|1>",
  77. dest='is_duplicate',
  78. help='The read is either a PCR or an optical duplicate')
  79. parser.add_option(
  80. '-f','--input_sam_file',
  81. metavar="INPUT_SAM_FILE",
  82. dest='input_sam',
  83. default = False,
  84. help='Name of the SAM file to be filtered. STDIN is default')
  85. parser.add_option(
  86. '-c','--flag_column',
  87. dest='flag_col',
  88. default = '2',
  89. help='Column containing SAM bitwise flag. 1-based')
  90. options, args = parser.parse_args()
  91. if options.input_sam:
  92. infile = open ( options.input_sam, 'r')
  93. else:
  94. infile = sys.stdin
  95. opt_ary = [
  96. options.is_paired,
  97. options.is_proper_pair,
  98. options.is_unmapped,
  99. options.mate_is_unmapped,
  100. options.query_strand,
  101. options.mate_strand,
  102. options.is_first,
  103. options.is_second,
  104. options.is_not_primary,
  105. options.is_bad_quality,
  106. options.is_duplicate
  107. ]
  108. opt_map = { '0': False, '1': True }
  109. used_indices = [(index, opt_map[opt]) for index, opt in enumerate(opt_ary) if opt is not None]
  110. flag_col = int( options.flag_col ) - 1
  111. for line in infile:
  112. line = line.rstrip( '\r\n' )
  113. if line and not line.startswith( '#' ) and not line.startswith( '@' ) :
  114. fields = line.split( '\t' )
  115. flags = int( fields[flag_col] )
  116. valid_line = True
  117. for index, opt_bool in used_indices:
  118. if bool(flags & 0x0001 << index) != opt_bool:
  119. valid_line = False
  120. break
  121. if valid_line:
  122. print line
  123. if __name__ == "__main__": main()