/tools/data_source/microbial_import_code.py

https://bitbucket.org/cistrome/cistrome-harvard/ · Python · 154 lines · 137 code · 7 blank · 10 comment · 24 complexity · b23f4367caf4c357fbd2bd548c7e11ba MD5 · raw file

  1. def load_microbial_data( GALAXY_DATA_INDEX_DIR, sep='\t' ):
  2. # FIXME: this function is duplicated in the DynamicOptions class. It is used here only to
  3. # set data.name in exec_after_process().
  4. microbe_info= {}
  5. orgs = {}
  6. filename = "%s/microbial_data.loc" % GALAXY_DATA_INDEX_DIR
  7. for i, line in enumerate( open( filename ) ):
  8. line = line.rstrip( '\r\n' )
  9. if line and not line.startswith( '#' ):
  10. fields = line.split( sep )
  11. #read each line, if not enough fields, go to next line
  12. try:
  13. info_type = fields.pop(0)
  14. if info_type.upper() == "ORG":
  15. #ORG 12521 Clostridium perfringens SM101 bacteria Firmicutes CP000312,CP000313,CP000314,CP000315 http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=genomeprj&cmd=Retrieve&dopt=Overview&list_uids=12521
  16. org_num = fields.pop(0)
  17. name = fields.pop(0)
  18. kingdom = fields.pop(0)
  19. group = fields.pop(0)
  20. chromosomes = fields.pop(0)
  21. info_url = fields.pop(0)
  22. link_site = fields.pop(0)
  23. if org_num not in orgs:
  24. orgs[ org_num ] = {}
  25. orgs[ org_num ][ 'chrs' ] = {}
  26. orgs[ org_num ][ 'name' ] = name
  27. orgs[ org_num ][ 'kingdom' ] = kingdom
  28. orgs[ org_num ][ 'group' ] = group
  29. orgs[ org_num ][ 'chromosomes' ] = chromosomes
  30. orgs[ org_num ][ 'info_url' ] = info_url
  31. orgs[ org_num ][ 'link_site' ] = link_site
  32. elif info_type.upper() == "CHR":
  33. #CHR 12521 CP000315 Clostridium perfringens phage phiSM101, complete genome 38092 110684521 CP000315.1
  34. org_num = fields.pop(0)
  35. chr_acc = fields.pop(0)
  36. name = fields.pop(0)
  37. length = fields.pop(0)
  38. gi = fields.pop(0)
  39. gb = fields.pop(0)
  40. info_url = fields.pop(0)
  41. chr = {}
  42. chr[ 'name' ] = name
  43. chr[ 'length' ] = length
  44. chr[ 'gi' ] = gi
  45. chr[ 'gb' ] = gb
  46. chr[ 'info_url' ] = info_url
  47. if org_num not in orgs:
  48. orgs[ org_num ] = {}
  49. orgs[ org_num ][ 'chrs' ] = {}
  50. orgs[ org_num ][ 'chrs' ][ chr_acc ] = chr
  51. elif info_type.upper() == "DATA":
  52. #DATA 12521_12521_CDS 12521 CP000315 CDS bed /home/djb396/alignments/playground/bacteria/12521/CP000315.CDS.bed
  53. uid = fields.pop(0)
  54. org_num = fields.pop(0)
  55. chr_acc = fields.pop(0)
  56. feature = fields.pop(0)
  57. filetype = fields.pop(0)
  58. path = fields.pop(0)
  59. data = {}
  60. data[ 'filetype' ] = filetype
  61. data[ 'path' ] = path
  62. data[ 'feature' ] = feature
  63. if org_num not in orgs:
  64. orgs[ org_num ] = {}
  65. orgs[ org_num ][ 'chrs' ] = {}
  66. if 'data' not in orgs[ org_num ][ 'chrs' ][ chr_acc ]:
  67. orgs[ org_num ][ 'chrs' ][ chr_acc ][ 'data' ] = {}
  68. orgs[ org_num ][ 'chrs' ][ chr_acc ][ 'data' ][ uid ] = data
  69. else: continue
  70. except: continue
  71. for org_num in orgs:
  72. org = orgs[ org_num ]
  73. if org[ 'kingdom' ] not in microbe_info:
  74. microbe_info[ org[ 'kingdom' ] ] = {}
  75. if org_num not in microbe_info[ org[ 'kingdom' ] ]:
  76. microbe_info[ org[ 'kingdom' ] ][org_num] = org
  77. return microbe_info
  78. #post processing, set build for data and add additional data to history
  79. from galaxy import datatypes, config, tools
  80. from shutil import copyfile
  81. def exec_after_process(app, inp_data, out_data, param_dict, tool, stdout, stderr):
  82. base_dataset = out_data.items()[0][1]
  83. history = base_dataset.history
  84. if history == None:
  85. print "unknown history!"
  86. return
  87. kingdom = param_dict.get( 'kingdom', None )
  88. #group = param_dict.get( 'group', None )
  89. org = param_dict.get( 'org', None )
  90. #if not (kingdom or group or org):
  91. if not (kingdom or org):
  92. print "Parameters are not available."
  93. #workflow passes galaxy.tools.parameters.basic.UnvalidatedValue instead of values
  94. if isinstance( kingdom, tools.parameters.basic.UnvalidatedValue ):
  95. kingdom = kingdom.value
  96. if isinstance( org, tools.parameters.basic.UnvalidatedValue ):
  97. org = org.value
  98. GALAXY_DATA_INDEX_DIR = app.config.tool_data_path
  99. microbe_info = load_microbial_data( GALAXY_DATA_INDEX_DIR, sep='\t' )
  100. new_stdout = ""
  101. split_stdout = stdout.split("\n")
  102. basic_name = ""
  103. for line in split_stdout:
  104. fields = line.split("\t")
  105. if fields[0] == "#File1":
  106. description = fields[1]
  107. chr = fields[2]
  108. dbkey = fields[3]
  109. file_type = fields[4]
  110. name, data = out_data.items()[0]
  111. data.set_size()
  112. basic_name = data.name
  113. data.name = data.name + " (" + microbe_info[kingdom][org]['chrs'][chr]['data'][description]['feature'] +" for " + microbe_info[kingdom][org]['name'] + ":" + chr + ")"
  114. data.dbkey = dbkey
  115. data.info = data.name
  116. data = app.datatypes_registry.change_datatype( data, file_type )
  117. data.init_meta()
  118. data.set_peek()
  119. app.model.context.add( data )
  120. app.model.context.flush()
  121. elif fields[0] == "#NewFile":
  122. description = fields[1]
  123. chr = fields[2]
  124. dbkey = fields[3]
  125. filepath = fields[4]
  126. file_type = fields[5]
  127. newdata = app.model.HistoryDatasetAssociation( create_dataset = True, sa_session = app.model.context ) #This import should become a library
  128. newdata.set_size()
  129. newdata.extension = file_type
  130. newdata.name = basic_name + " (" + microbe_info[kingdom][org]['chrs'][chr]['data'][description]['feature'] +" for "+microbe_info[kingdom][org]['name']+":"+chr + ")"
  131. app.model.context.add( newdata )
  132. app.model.context.flush()
  133. app.security_agent.copy_dataset_permissions( base_dataset.dataset, newdata.dataset )
  134. history.add_dataset( newdata )
  135. app.model.context.add( history )
  136. app.model.context.flush()
  137. try:
  138. copyfile(filepath,newdata.file_name)
  139. newdata.info = newdata.name
  140. newdata.state = newdata.states.OK
  141. except:
  142. newdata.info = "The requested file is missing from the system."
  143. newdata.state = newdata.states.ERROR
  144. newdata.dbkey = dbkey
  145. newdata.init_meta()
  146. newdata.set_peek()
  147. app.model.context.flush()