/tools/data_source/genbank.py

https://bitbucket.org/cistrome/cistrome-harvard/ · Python · 42 lines · 29 code · 10 blank · 3 comment · 4 complexity · 4d3b68b8f6d8a60b362e644be58a4120 MD5 · raw file

  1. #!/usr/bin/env python
  2. from Bio import GenBank
  3. import sys, os, textwrap
  4. assert sys.version_info[:2] >= ( 2, 4 )
  5. def make_fasta(rec):
  6. '''Creates fasta format from a record'''
  7. gi = rec.annotations.get('gi','')
  8. org = rec.annotations.get('organism','')
  9. date = rec.annotations.get('date','')
  10. head = '>gi:%s, id:%s, org:%s, date:%s\n' % (gi, rec.id, org, date)
  11. body = '\n'.join(textwrap.wrap(rec.seq.data, width=80))
  12. return head, body
  13. if __name__ == '__main__':
  14. mode = sys.argv[1]
  15. text = sys.argv[2]
  16. output_file = sys.argv[3]
  17. print 'Searching for %s <br>' % text
  18. # check if inputs are all numbers
  19. try:
  20. gi_list = text.split()
  21. tmp = map(int, gi_list)
  22. except ValueError:
  23. gi_list = GenBank.search_for(text, max_ids=10)
  24. fp = open(output_file, 'wt')
  25. record_parser = GenBank.FeatureParser()
  26. ncbi_dict = GenBank.NCBIDictionary(mode, 'genbank', parser = record_parser)
  27. for gid in gi_list:
  28. res = ncbi_dict[gid]
  29. head, body = make_fasta(res)
  30. fp.write(head+body+'\n')
  31. print head
  32. fp.close()