PageRenderTime 28ms CodeModel.GetById 20ms app.highlight 6ms RepoModel.GetById 1ms app.codeStats 0ms

/tools/data_source/genbank.py

https://bitbucket.org/cistrome/cistrome-harvard/
Python | 42 lines | 38 code | 2 blank | 2 comment | 1 complexity | 4d3b68b8f6d8a60b362e644be58a4120 MD5 | raw file
 1#!/usr/bin/env python
 2from Bio import GenBank
 3import sys, os, textwrap
 4
 5assert sys.version_info[:2] >= ( 2, 4 )
 6
 7def make_fasta(rec):
 8    '''Creates fasta format from a record'''
 9    gi   = rec.annotations.get('gi','')
10    org  = rec.annotations.get('organism','')
11    date = rec.annotations.get('date','')
12    head = '>gi:%s, id:%s, org:%s, date:%s\n' % (gi, rec.id, org, date)
13    body = '\n'.join(textwrap.wrap(rec.seq.data, width=80))
14    return head, body
15    
16if __name__ == '__main__':
17    
18    mode  = sys.argv[1]
19    text  = sys.argv[2]
20    output_file = sys.argv[3]
21
22    print 'Searching for %s <br>' % text
23    
24    # check if inputs are all numbers
25    try:
26        gi_list = text.split()
27        tmp = map(int, gi_list)
28    except ValueError:
29        gi_list = GenBank.search_for(text, max_ids=10)
30    
31    fp = open(output_file, 'wt')
32    record_parser = GenBank.FeatureParser()
33    ncbi_dict = GenBank.NCBIDictionary(mode, 'genbank', parser = record_parser)
34    for gid in gi_list:
35        res = ncbi_dict[gid]
36        head, body =  make_fasta(res)
37        fp.write(head+body+'\n')
38        print head
39    fp.close()
40
41   
42