PageRenderTime 44ms CodeModel.GetById 21ms RepoModel.GetById 0ms app.codeStats 0ms

/interproAppender.py

https://github.com/jcpickett1/WegrzynLab
Python | 86 lines | 69 code | 9 blank | 8 comment | 8 complexity | 972b6ade4b67d6732f2302da3af9c4ad MD5 | raw file
  1. ## Script Name: interproAppender.py
  2. ## Purpose: Append info from interpro output into annotation file
  3. ## Created by James Pickett
  4. ## University of Connecticut
  5. ##
  6. ## Version: 1.2.0
  7. ## Last Edit 9/19/2014
  8. ## Usage: Run from terminal, filename can be included in line or a query will appear on run (Wildcard and Relative OK)
  9. import glob
  10. target = glob.glob(raw_input('Enter the name of the annotation file (Relative and wildcards OK) \n'))[0]
  11. print "Annotation file being used is %s\n"%target
  12. jobInput = glob.glob(raw_input('Enter the name of the InterPro output file (Relative and wildcards OK) \n'))[0]
  13. print "InterPro file being used is %s\n"%jobInput
  14. with open(target,'r') as inputFile:
  15. annoArray = inputFile.read().split('\n')
  16. with open(jobInput,'r') as inputFile:
  17. interArray = inputFile.read().split('\n')
  18. modAnnoArray = []
  19. modInterArray = []
  20. sequencePFams = []
  21. targets = []
  22. misses = []
  23. lastContents = []
  24. addContents = []
  25. for fields in annoArray:
  26. fields = fields.split('\t')
  27. modAnnoArray.append(fields)
  28. targets.append(fields[0][:fields[0].find('|')])
  29. for fields in interArray:
  30. modInterArray.append(fields.split('\t'))
  31. interList = []
  32. for fields in modInterArray:
  33. fields[0] = fields[0][fields[0].find('.') + 1:]
  34. interList.append(fields[0])
  35. if len(fields) < 13:
  36. for i in range(13 - len(fields)):
  37. fields.append('')
  38. for i in range(len(modInterArray)):
  39. try:
  40. pos = targets.index(modInterArray[i][0])
  41. except ValueError:
  42. misses.append(modInterArray[i][0])
  43. for j in range(len(modAnnoArray[0]) - 1):
  44. misses[len(misses) - 1] += '\t'
  45. misses[len(misses) - 1] += '\t' + (modInterArray[i][len(modInterArray[i]) - 3])
  46. misses[len(misses) - 1] += '\t' + (modInterArray[i][len(modInterArray[i]) - 9])
  47. misses[len(misses) - 1] += '\t' + (modInterArray[i][len(modInterArray[i]) - 2])
  48. misses[len(misses) - 1] += '\t' + (modInterArray[i][len(modInterArray[i]) - 1])
  49. continue
  50. addContents.append(modInterArray[i][len(modInterArray[i]) - 3])
  51. addContents.append(modInterArray[i][len(modInterArray[i]) - 9])
  52. addContents.append(modInterArray[i][len(modInterArray[i]) - 2])
  53. addContents.append(modInterArray[i][len(modInterArray[i]) - 1])
  54. for fields in addContents:
  55. if fields in modAnnoArray[pos]:
  56. pass
  57. else:
  58. modAnnoArray[pos].append(fields.strip('\n'))
  59. lastContents = addContents
  60. addContents = []
  61. output = raw_input('Enter filename you would like output stored in (Relative and wildcards OK, include filetype) \n')
  62. with open(output,'w') as outputFile:
  63. for fields in modAnnoArray:
  64. outputFile.write('%s\n'%'\t'.join(fields))
  65. outputFile.write('END OF ANNOTATION FILE. INTERPRO MISMATCHES LISTED BELOW.\n')
  66. for fields in misses:
  67. outputFile.write('%s\n'%fields)