/tools/data_source/ucsc_filter.py

https://bitbucket.org/cistrome/cistrome-harvard/ · Python · 68 lines · 66 code · 1 blank · 1 comment · 0 complexity · 7b6a192f88566f421b41d5c7fb21401f MD5 · raw file

  1. # runs after the job (and after the default post-filter)
  2. from galaxy import datatypes, jobs
  3. def validate(incoming):
  4. """Validator"""
  5. #raise Exception, 'not quite right'
  6. pass
  7. def exec_before_job( app, inp_data, out_data, param_dict, tool=None):
  8. """Sets the name of the data"""
  9. outputType = param_dict.get( 'hgta_outputType', None )
  10. if isinstance(outputType, list) and len(outputType)>0: outputType = outputType[-1]
  11. items = out_data.items()
  12. for name, data in items:
  13. data.name = param_dict.get('display', data.name)
  14. data.dbkey = param_dict.get('dbkey', '???')
  15. if outputType == 'wigData':
  16. ext = "wig"
  17. elif outputType == 'maf':
  18. ext = "maf"
  19. elif outputType == 'gff':
  20. ext = "gff"
  21. elif outputType == 'gff3':
  22. ext = "gff3"
  23. else:
  24. if 'hgta_doPrintSelectedFields' in param_dict:
  25. ext = "interval"
  26. elif 'hgta_doGetBed' in param_dict:
  27. ext = "bed"
  28. elif 'hgta_doGenomicDna' in param_dict:
  29. ext = "fasta"
  30. elif 'hgta_doGenePredSequence' in param_dict:
  31. ext = "fasta"
  32. else:
  33. ext = "interval"
  34. data = app.datatypes_registry.change_datatype(data, ext)
  35. out_data[name] = data
  36. def exec_after_process( app, inp_data, out_data, param_dict, tool=None, stdout=None, stderr=None):
  37. """Verifies the data after the run"""
  38. items = out_data.items()
  39. for name, data in items:
  40. data.set_size()
  41. try:
  42. err_msg, err_flag = 'Errors:', False
  43. line_count = 0
  44. num_lines = len(file(data.file_name).readlines())
  45. for line in file(data.file_name):
  46. line_count += 1
  47. if line and line[0] == '-':
  48. if line_count + 3 == num_lines and not err_flag:
  49. err_flag = True
  50. err_msg = "Warning: It appears that your results have been truncated by UCSC. View the bottom of your result file for details."
  51. break
  52. err_flag = True
  53. err_msg = err_msg +" (line "+str(line_count)+")"+line
  54. data.set_peek()
  55. if isinstance(data.datatype, datatypes.interval.Interval) and data.missing_meta():
  56. data = app.datatypes_registry.change_datatype(data, 'tabular')
  57. out_data[name] = data
  58. if err_flag:
  59. raise Exception(err_msg)
  60. except Exception, exc:
  61. data.info = data.info + "\n" + str(exc)
  62. data.blurb = "error"