/scripts/store_orthomcl_clusters.py

https://github.com/tolotos/tfactors
Python | 80 lines | 62 code | 5 blank | 13 comment | 3 complexity | 06507b147f4ebbfeb3fe140d3cba212e MD5 | raw file
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. #
  4. # store_orthomcl_clusters.py
  5. #==============================================================================
  6. from optparse import OptionParser
  7. from Tfsuite.Parser.orthomcl import Orthomcl
  8. from Tfsuite.Parser.hmmout import Hmmout
  9. from Tfsuite.Parser.fasta import Fasta
  10. from Tfsuite.Parser.species import SpeciesMapping
  11. from Tfsuite.Parser.cafe import Cafe
  12. from Tfsuite.Parser.family import Family
  13. import os
  14. import glob
  15. import copy
  16. import cPickle as pickle
  17. #==============================================================================
  18. #Command line options==========================================================
  19. #==============================================================================
  20. usage = 'usage: %prog [options]'
  21. desc='''%prog takes orthomcl output (clusters) and creates a pickable object
  22. containing all loaded clusters.'''
  23. cloptions = OptionParser(usage = usage, description=desc)
  24. cloptions.add_option('-o', '--orthomcl', dest = 'clusters',
  25. help = 'Orthomcl clusters', metavar='FILE',
  26. default = '')
  27. cloptions.add_option('-f', '--fasta', dest = 'fasta',
  28. help = 'Fasta file containing sequences of Proteins', metavar='FILE',
  29. default = '')
  30. cloptions.add_option('-s', '--species', dest = 'species',
  31. help = 'Protein to species mapping', metavar='FILE',
  32. default = '')
  33. cloptions.add_option('-d', '--hmmout', dest = 'hmmout',
  34. help = 'Hmmout, containing domain annotated proteins', metavar='FILE',
  35. default = '')
  36. cloptions.add_option('-m', '--familymapping', dest = 'family',
  37. help = 'Mapping from domain arrangement to family.', metavar='FILE',
  38. default = '')
  39. cloptions.add_option('-p', '--pickle', dest = 'pickle',
  40. help = 'Filename for the pickled clusters', metavar='FILE',
  41. default = 'pickled_orthomcl_clusters.p')
  42. (options, args) = cloptions.parse_args()
  43. #==============================================================================
  44. def create_clusters(f_orthomcl,f_hmmout,f_fasta,f_species,f_family):
  45. ''' Loads an orthomcl output file, to create clusters. In addition proteins
  46. are added from the corresponding hmmout file, species information is
  47. added from speciesMapping(Andreas) and fasta sequences for each protein
  48. are loaded. Function returns an interable with cluster objects'''
  49. orthomcl, hmmout, fasta, = Orthomcl(), Hmmout(), Fasta()
  50. species, family = SpeciesMapping(), Family()
  51. fasta.load(f_fasta)
  52. hmmout.load(f_hmmout)
  53. orthomcl.load(f_orthomcl)
  54. species.load(f_species)
  55. family.load(f_family)
  56. for protein in hmmout:
  57. protein.add_sequence(fasta)
  58. protein.add_species(species)
  59. protein.add_family(family)
  60. for cluster in orthomcl:
  61. cluster.add_proteins(hmmout)
  62. cluster.counts = copy.deepcopy(species.all())
  63. cluster.add_counts()
  64. cluster.add_cluster_to_members()
  65. cluster.add_family()
  66. return orthomcl
  67. def main():
  68. clusters = create_clusters(options.clusters,
  69. options.hmmout,
  70. options.fasta,
  71. options.species,
  72. options.family)
  73. pickle.dump(clusters, open(options.pickle, "wb"))
  74. if __name__ == '__main__':
  75. main()