/ruffus/test/test_files_post_merge.py

https://code.google.com/p/ruffus/ · Python · 294 lines · 166 code · 62 blank · 66 comment · 19 complexity · 64b40b076b8ccd97a70ff2d278d125e4 MD5 · raw file

  1. #!/usr/bin/env python
  2. """
  3. test_files_post_merge.py
  4. bug where @files follows merge and extra parenthesis inserted
  5. use :
  6. --debug to test automatically
  7. --start_again the first time you run the file
  8. --jobs_per_task N to simulate tasks with N numbers of files per task
  9. -j N / --jobs N to speify multitasking
  10. -v to see the jobs in action
  11. -n / --just_print to see what jobs would run
  12. """
  13. #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
  14. # options
  15. #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
  16. from optparse import OptionParser
  17. import sys, os
  18. import os.path
  19. import StringIO
  20. import re,time
  21. # add self to search path for testing
  22. exe_path = os.path.split(os.path.abspath(sys.argv[0]))[0]
  23. sys.path.insert(0,os.path.abspath(os.path.join(exe_path,"..", "..")))
  24. if __name__ == '__main__':
  25. module_name = os.path.split(sys.argv[0])[1]
  26. module_name = os.path.splitext(module_name)[0];
  27. else:
  28. module_name = __name__
  29. import ruffus
  30. parser = OptionParser(version="%%prog v1.0, ruffus v%s" % ruffus.ruffus_version.__version)
  31. parser.add_option("-D", "--debug", dest="debug",
  32. action="store_true", default=False,
  33. help="Make sure output is correct and clean up.")
  34. parser.add_option("-s", "--start_again", dest="start_again",
  35. action="store_true", default=False,
  36. help="Make a new 'original.fa' file to simulate having to restart "
  37. "pipeline from scratch.")
  38. parser.add_option("--jobs_per_task", dest="jobs_per_task",
  39. default=3,
  40. metavar="N",
  41. type="int",
  42. help="Simulates tasks with N numbers of files per task.")
  43. parser.add_option("-t", "--target_tasks", dest="target_tasks",
  44. action="append",
  45. default = list(),
  46. metavar="JOBNAME",
  47. type="string",
  48. help="Target task(s) of pipeline.")
  49. parser.add_option("-f", "--forced_tasks", dest="forced_tasks",
  50. action="append",
  51. default = list(),
  52. metavar="JOBNAME",
  53. type="string",
  54. help="Pipeline task(s) which will be included even if they are up to date.")
  55. parser.add_option("-j", "--jobs", dest="jobs",
  56. default=1,
  57. metavar="jobs",
  58. type="int",
  59. help="Specifies the number of jobs (commands) to run simultaneously.")
  60. parser.add_option("-v", "--verbose", dest = "verbose",
  61. action="count", default=0,
  62. help="Print more verbose messages for each additional verbose level.")
  63. parser.add_option("-d", "--dependency", dest="dependency_file",
  64. #default="simple.svg",
  65. metavar="FILE",
  66. type="string",
  67. help="Print a dependency graph of the pipeline that would be executed "
  68. "to FILE, but do not execute it.")
  69. parser.add_option("-F", "--dependency_graph_format", dest="dependency_graph_format",
  70. metavar="FORMAT",
  71. type="string",
  72. default = 'svg',
  73. help="format of dependency graph file. Can be 'ps' (PostScript), "+
  74. "'svg' 'svgz' (Structured Vector Graphics), " +
  75. "'png' 'gif' (bitmap graphics) etc ")
  76. parser.add_option("-n", "--just_print", dest="just_print",
  77. action="store_true", default=False,
  78. help="Print a description of the jobs that would be executed, "
  79. "but do not execute them.")
  80. parser.add_option("-M", "--minimal_rebuild_mode", dest="minimal_rebuild_mode",
  81. action="store_true", default=False,
  82. help="Rebuild a minimum of tasks necessary for the target. "
  83. "Ignore upstream out of date tasks if intervening tasks are fine.")
  84. parser.add_option("-K", "--no_key_legend_in_graph", dest="no_key_legend_in_graph",
  85. action="store_true", default=False,
  86. help="Do not print out legend and key for dependency graph.")
  87. parser.add_option("-H", "--draw_graph_horizontally", dest="draw_horizontally",
  88. action="store_true", default=False,
  89. help="Draw horizontal dependency graph.")
  90. parameters = [
  91. ]
  92. #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
  93. # imports
  94. #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
  95. import StringIO
  96. import re
  97. import operator
  98. import sys,os
  99. from collections import defaultdict
  100. import random
  101. sys.path.append(os.path.abspath(os.path.join(exe_path,"..", "..")))
  102. from ruffus import *
  103. # use simplejson in place of json for python < 2.6
  104. try:
  105. import json
  106. except ImportError:
  107. import simplejson
  108. json = simplejson
  109. #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
  110. # Main logic
  111. #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
  112. # get help string
  113. f =StringIO.StringIO()
  114. parser.print_help(f)
  115. helpstr = f.getvalue()
  116. (options, remaining_args) = parser.parse_args()
  117. tempdir = "temp_filesre_split_and_combine/"
  118. if options.verbose:
  119. verbose_output = sys.stderr
  120. else:
  121. verbose_output =open("/dev/null", "w")
  122. #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
  123. # Tasks
  124. #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
  125. #
  126. # split_fasta_file
  127. #
  128. @posttask(lambda: verbose_output.write("Split into %d files\n" % options.jobs_per_task))
  129. @split(tempdir + "original.fa", [tempdir + "files.split.success", tempdir + "files.split.*.fa"])
  130. def split_fasta_file (input_file, outputs):
  131. #
  132. # remove previous fasta files
  133. #
  134. success_flag = outputs[0]
  135. output_file_names = outputs[1:]
  136. for f in output_file_names:
  137. os.unlink(f)
  138. #
  139. # create as many files as we are simulating in jobs_per_task
  140. #
  141. for i in range(options.jobs_per_task):
  142. open(tempdir + "files.split.%03d.fa" % i, "w")
  143. open(success_flag, "w")
  144. #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
  145. #
  146. # align_sequences
  147. #
  148. @posttask(lambda: verbose_output.write("Sequences aligned\n"))
  149. @transform(split_fasta_file, suffix(".fa"), ".aln") # fa -> aln
  150. def align_sequences (input_file, output_filename):
  151. open(output_filename, "w").write("%s\n" % output_filename)
  152. #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
  153. #
  154. # percentage_identity
  155. #
  156. @posttask(lambda: verbose_output.write("%Identity calculated\n"))
  157. @transform(align_sequences, # find all results from align_sequences
  158. suffix(".aln"), # replace suffix with:
  159. [r".pcid", # .pcid suffix for the result
  160. r".pcid_success"]) # .pcid_success to indicate job completed
  161. def percentage_identity (input_file, output_files):
  162. (output_filename, success_flag_filename) = output_files
  163. open(output_filename, "w").write("%s\n" % output_filename)
  164. open(success_flag_filename, "w")
  165. #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
  166. #
  167. # combine_results
  168. #
  169. @posttask(lambda: verbose_output.write("Results recombined\n"))
  170. @merge(percentage_identity, tempdir + "all.combine_results")
  171. def combine_results (input_files, output_files):
  172. """
  173. Combine all
  174. """
  175. (output_filename) = output_files
  176. out = open(output_filename, "w")
  177. for inp, flag in input_files:
  178. out.write(open(inp).read())
  179. @files(combine_results, "check_all_is.well")
  180. def post_merge_check (input_filename, output_filename):
  181. """
  182. check that merge sends just one file, not a list to me
  183. """
  184. open(output_filename, "w").write(open(input_filename).read())
  185. @files(post_merge_check, "check_all_is.weller")
  186. def post_post_merge_check (input_files, output_files):
  187. """
  188. check that @files forwards a single file on when given a single file
  189. """
  190. open(output_filename, "w").write(open(input_filename).read())
  191. def start_pipeline_afresh ():
  192. """
  193. Recreate directory and starting file
  194. """
  195. print >>verbose_output, "Start again"
  196. import os
  197. os.system("rm -rf %s" % tempdir)
  198. os.makedirs(tempdir)
  199. open(tempdir + "original.fa", "w").close()
  200. if __name__ == '__main__':
  201. if options.start_again:
  202. start_pipeline_afresh()
  203. if options.just_print:
  204. pipeline_printout(sys.stdout, options.target_tasks, options.forced_tasks,
  205. verbose = options.verbose,
  206. gnu_make_maximal_rebuild_mode = not options.minimal_rebuild_mode)
  207. elif options.dependency_file:
  208. pipeline_printout_graph ( open(options.dependency_file, "w"),
  209. options.dependency_graph_format,
  210. options.target_tasks,
  211. options.forced_tasks,
  212. draw_vertically = not options.draw_horizontally,
  213. gnu_make_maximal_rebuild_mode = not options.minimal_rebuild_mode,
  214. no_key_legend = options.no_key_legend_in_graph)
  215. elif options.debug:
  216. start_pipeline_afresh()
  217. pipeline_run(options.target_tasks, options.forced_tasks, multiprocess = options.jobs,
  218. logger = stderr_logger if options.verbose else black_hole_logger,
  219. gnu_make_maximal_rebuild_mode = not options.minimal_rebuild_mode,
  220. verbose = options.verbose)
  221. os.system("rm -rf %s" % tempdir)
  222. print "OK"
  223. else:
  224. pipeline_run(options.target_tasks, options.forced_tasks, multiprocess = options.jobs,
  225. logger = stderr_logger if options.verbose else black_hole_logger,
  226. gnu_make_maximal_rebuild_mode = not options.minimal_rebuild_mode,
  227. verbose = options.verbose)