/tools/genome_diversity/select_restriction_enzymes.py

https://bitbucket.org/cistrome/cistrome-harvard/ · Python · 103 lines · 88 code · 14 blank · 1 comment · 20 complexity · 8e4f01529f050bca33f23c9f2d776df7 MD5 · raw file

  1. #!/usr/bin/env python2.5
  2. import os
  3. import sys
  4. from optparse import OptionParser
  5. import genome_diversity as gd
  6. def main_function( parse_arguments=None ):
  7. if parse_arguments is None:
  8. parse_arguments = lambda arguments: ( None, arguments )
  9. def main_decorator( to_decorate ):
  10. def decorated_main( arguments=None ):
  11. if arguments is None:
  12. arguments = sys.argv
  13. options, arguments = parse_arguments( arguments )
  14. rc = 1
  15. try:
  16. rc = to_decorate( options, arguments )
  17. except Exception, err:
  18. sys.stderr.write( 'ERROR: %s\n' % str( err ) )
  19. traceback.print_exc()
  20. finally:
  21. sys.exit( rc )
  22. return decorated_main
  23. return main_decorator
  24. def parse_arguments( arguments ):
  25. parser = OptionParser()
  26. parser.add_option('--input',
  27. type='string', dest='input',
  28. help='file of selected SNPs')
  29. parser.add_option('--output',
  30. type='string', dest='output',
  31. help='output file')
  32. parser.add_option('--primers_loc',
  33. type='string', dest='primers_loc',
  34. help='primers .loc file')
  35. parser.add_option('--scaffold_col',
  36. type="int", dest='scaffold_col',
  37. help='scaffold column in the input file')
  38. parser.add_option('--pos_col',
  39. type="int", dest='pos_col',
  40. help='position column in the input file')
  41. parser.add_option('--enzyme_list',
  42. type="string", dest='enzyme_list_string',
  43. help='comma separated list of enzymes')
  44. parser.add_option('--species',
  45. type="string", dest='species',
  46. help='species')
  47. return parser.parse_args( arguments[1:] )
  48. @main_function( parse_arguments )
  49. def main( options, arguments ):
  50. if not options.input:
  51. raise RuntimeError( 'missing --input option' )
  52. if not options.output:
  53. raise RuntimeError( 'missing --output option' )
  54. if not options.primers_loc:
  55. raise RuntimeError( 'missing --primers_loc option' )
  56. if not options.scaffold_col:
  57. raise RuntimeError( 'missing --scaffold_col option' )
  58. if not options.pos_col:
  59. raise RuntimeError( 'missing --pos_col option' )
  60. if not options.enzyme_list_string:
  61. raise RuntimeError( 'missing --enzyme_list option' )
  62. if not options.species:
  63. raise RuntimeError( 'missing --species option' )
  64. snps = gd.SnpFile( filename=options.input, seq_col=int( options.scaffold_col ), pos_col=int( options.pos_col ) )
  65. out_fh = gd._openfile( options.output, 'w' )
  66. enzyme_dict = {}
  67. for enzyme in options.enzyme_list_string.split( ',' ):
  68. enzyme = enzyme.strip()
  69. if enzyme:
  70. enzyme_dict[enzyme] = 1
  71. primer_data_file = gd.get_filename_from_loc( options.species, options.primers_loc )
  72. file_root, file_ext = os.path.splitext( primer_data_file )
  73. primer_index_file = file_root + ".cdb"
  74. primers = gd.PrimersFile( data_file=primer_data_file, index_file=primer_index_file )
  75. comments_printed = False
  76. while snps.next():
  77. seq, pos = snps.get_seq_pos()
  78. enzyme_list = primers.get_enzymes( seq, pos )
  79. for enzyme in enzyme_list:
  80. if enzyme in enzyme_dict:
  81. if not comments_printed:
  82. for comment in snps.comments:
  83. out_fh.write( "%s\n" % comment )
  84. comments_printed = True
  85. out_fh.write( "%s\n" % snps.line )
  86. break
  87. out_fh.close()
  88. if __name__ == "__main__":
  89. main()