PageRenderTime 30ms CodeModel.GetById 18ms app.highlight 9ms RepoModel.GetById 2ms app.codeStats 0ms

/tools/genome_diversity/select_restriction_enzymes.py

https://bitbucket.org/cistrome/cistrome-harvard/
Python | 103 lines | 88 code | 14 blank | 1 comment | 29 complexity | 8e4f01529f050bca33f23c9f2d776df7 MD5 | raw file
  1#!/usr/bin/env python2.5
  2
  3import os
  4import sys
  5from optparse import OptionParser
  6import genome_diversity as gd
  7
  8def main_function( parse_arguments=None ):
  9    if parse_arguments is None:
 10        parse_arguments = lambda arguments: ( None, arguments )
 11    def main_decorator( to_decorate ):
 12        def decorated_main( arguments=None ):
 13            if arguments is None:
 14                arguments = sys.argv
 15            options, arguments = parse_arguments( arguments )
 16            rc = 1
 17            try:
 18                rc = to_decorate( options, arguments )
 19            except Exception, err:
 20                sys.stderr.write( 'ERROR: %s\n' % str( err ) )
 21                traceback.print_exc()
 22            finally:
 23                sys.exit( rc )
 24        return decorated_main
 25    return main_decorator
 26
 27def parse_arguments( arguments ):
 28    parser = OptionParser()
 29    parser.add_option('--input',
 30                        type='string', dest='input',
 31                        help='file of selected SNPs')
 32    parser.add_option('--output',
 33                        type='string', dest='output',
 34                        help='output file')
 35    parser.add_option('--primers_loc',
 36                        type='string', dest='primers_loc',
 37                        help='primers .loc file')
 38    parser.add_option('--scaffold_col',
 39                        type="int", dest='scaffold_col',
 40                        help='scaffold column in the input file')
 41    parser.add_option('--pos_col',
 42                        type="int", dest='pos_col',
 43                        help='position column in the input file')
 44    parser.add_option('--enzyme_list',
 45                        type="string", dest='enzyme_list_string',
 46                        help='comma separated list of enzymes')
 47    parser.add_option('--species',
 48                        type="string", dest='species',
 49                        help='species')
 50    return parser.parse_args( arguments[1:] )
 51
 52
 53@main_function( parse_arguments )
 54def main( options, arguments ):
 55    if not options.input:
 56        raise RuntimeError( 'missing --input option' )
 57    if not options.output:
 58        raise RuntimeError( 'missing --output option' )
 59    if not options.primers_loc:
 60        raise RuntimeError( 'missing --primers_loc option' )
 61    if not options.scaffold_col:
 62        raise RuntimeError( 'missing --scaffold_col option' )
 63    if not options.pos_col:
 64        raise RuntimeError( 'missing --pos_col option' )
 65    if not options.enzyme_list_string:
 66        raise RuntimeError( 'missing --enzyme_list option' )
 67    if not options.species:
 68        raise RuntimeError( 'missing --species option' )
 69    
 70    snps = gd.SnpFile( filename=options.input, seq_col=int( options.scaffold_col ), pos_col=int( options.pos_col ) )
 71
 72    out_fh = gd._openfile( options.output, 'w' )
 73
 74    enzyme_dict = {}
 75    for enzyme in options.enzyme_list_string.split( ',' ):
 76        enzyme = enzyme.strip()
 77        if enzyme:
 78            enzyme_dict[enzyme] = 1
 79
 80    primer_data_file = gd.get_filename_from_loc( options.species, options.primers_loc )
 81    file_root, file_ext = os.path.splitext( primer_data_file )
 82    primer_index_file = file_root + ".cdb"
 83    primers = gd.PrimersFile( data_file=primer_data_file, index_file=primer_index_file )
 84
 85    comments_printed = False
 86
 87    while snps.next():
 88        seq, pos = snps.get_seq_pos()
 89        enzyme_list = primers.get_enzymes( seq, pos )
 90        for enzyme in enzyme_list:
 91            if enzyme in enzyme_dict:
 92                if not comments_printed:
 93                    for comment in snps.comments:
 94                        out_fh.write( "%s\n" % comment )
 95                    comments_printed = True
 96                out_fh.write( "%s\n" % snps.line )
 97                break
 98
 99    out_fh.close()
100
101if __name__ == "__main__":
102    main()
103