/tools/data_source/genbank.py
Python | 42 lines | 38 code | 2 blank | 2 comment | 1 complexity | 4d3b68b8f6d8a60b362e644be58a4120 MD5 | raw file
1#!/usr/bin/env python 2from Bio import GenBank 3import sys, os, textwrap 4 5assert sys.version_info[:2] >= ( 2, 4 ) 6 7def make_fasta(rec): 8 '''Creates fasta format from a record''' 9 gi = rec.annotations.get('gi','') 10 org = rec.annotations.get('organism','') 11 date = rec.annotations.get('date','') 12 head = '>gi:%s, id:%s, org:%s, date:%s\n' % (gi, rec.id, org, date) 13 body = '\n'.join(textwrap.wrap(rec.seq.data, width=80)) 14 return head, body 15 16if __name__ == '__main__': 17 18 mode = sys.argv[1] 19 text = sys.argv[2] 20 output_file = sys.argv[3] 21 22 print 'Searching for %s <br>' % text 23 24 # check if inputs are all numbers 25 try: 26 gi_list = text.split() 27 tmp = map(int, gi_list) 28 except ValueError: 29 gi_list = GenBank.search_for(text, max_ids=10) 30 31 fp = open(output_file, 'wt') 32 record_parser = GenBank.FeatureParser() 33 ncbi_dict = GenBank.NCBIDictionary(mode, 'genbank', parser = record_parser) 34 for gid in gi_list: 35 res = ncbi_dict[gid] 36 head, body = make_fasta(res) 37 fp.write(head+body+'\n') 38 print head 39 fp.close() 40 41 42