PageRenderTime 55ms CodeModel.GetById 10ms app.highlight 39ms RepoModel.GetById 1ms app.codeStats 1ms

/ruffus/test/test_files_post_merge.py

https://code.google.com/p/ruffus/
Python | 294 lines | 166 code | 62 blank | 66 comment | 20 complexity | 64b40b076b8ccd97a70ff2d278d125e4 MD5 | raw file
  1#!/usr/bin/env python
  2"""
  3
  4    test_files_post_merge.py
  5
  6        bug where @files follows merge and extra parenthesis inserted
  7
  8        use :
  9            --debug               to test automatically
 10            --start_again         the first time you run the file
 11            --jobs_per_task N     to simulate tasks with N numbers of files per task
 12
 13            -j N / --jobs N       to speify multitasking
 14            -v                    to see the jobs in action
 15            -n / --just_print     to see what jobs would run               
 16
 17"""
 18
 19
 20#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
 21
 22#   options        
 23
 24
 25#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
 26
 27from optparse import OptionParser
 28import sys, os
 29import os.path
 30import StringIO
 31import re,time
 32
 33# add self to search path for testing
 34exe_path = os.path.split(os.path.abspath(sys.argv[0]))[0]
 35sys.path.insert(0,os.path.abspath(os.path.join(exe_path,"..", "..")))
 36if __name__ == '__main__':
 37    module_name = os.path.split(sys.argv[0])[1]
 38    module_name = os.path.splitext(module_name)[0];
 39else:
 40    module_name = __name__
 41
 42
 43
 44import ruffus
 45parser = OptionParser(version="%%prog v1.0, ruffus v%s" % ruffus.ruffus_version.__version)
 46parser.add_option("-D", "--debug", dest="debug",
 47                    action="store_true", default=False,
 48                    help="Make sure output is correct and clean up.")
 49parser.add_option("-s", "--start_again", dest="start_again",
 50                    action="store_true", default=False,
 51                    help="Make a new 'original.fa' file to simulate having to restart "
 52                            "pipeline from scratch.")
 53parser.add_option("--jobs_per_task", dest="jobs_per_task",
 54                      default=3,
 55                      metavar="N", 
 56                      type="int",
 57                      help="Simulates tasks with N numbers of files per task.")
 58
 59
 60parser.add_option("-t", "--target_tasks", dest="target_tasks",
 61                  action="append",
 62                  default = list(),
 63                  metavar="JOBNAME", 
 64                  type="string",
 65                  help="Target task(s) of pipeline.")
 66parser.add_option("-f", "--forced_tasks", dest="forced_tasks",
 67                  action="append",
 68                  default = list(),
 69                  metavar="JOBNAME", 
 70                  type="string",
 71                  help="Pipeline task(s) which will be included even if they are up to date.")
 72parser.add_option("-j", "--jobs", dest="jobs",
 73                  default=1,
 74                  metavar="jobs", 
 75                  type="int",
 76                  help="Specifies  the number of jobs (commands) to run simultaneously.")
 77parser.add_option("-v", "--verbose", dest = "verbose",
 78                  action="count", default=0,
 79                  help="Print more verbose messages for each additional verbose level.")
 80parser.add_option("-d", "--dependency", dest="dependency_file",
 81                  #default="simple.svg",
 82                  metavar="FILE", 
 83                  type="string",
 84                  help="Print a dependency graph of the pipeline that would be executed "
 85                        "to FILE, but do not execute it.")
 86parser.add_option("-F", "--dependency_graph_format", dest="dependency_graph_format",
 87                  metavar="FORMAT", 
 88                  type="string",
 89                  default = 'svg',
 90                  help="format of dependency graph file. Can be 'ps' (PostScript), "+
 91                  "'svg' 'svgz' (Structured Vector Graphics), " +
 92                  "'png' 'gif' (bitmap  graphics) etc ")
 93parser.add_option("-n", "--just_print", dest="just_print",
 94                    action="store_true", default=False,
 95                    help="Print a description of the jobs that would be executed, "
 96                        "but do not execute them.")
 97parser.add_option("-M", "--minimal_rebuild_mode", dest="minimal_rebuild_mode",
 98                    action="store_true", default=False,
 99                    help="Rebuild a minimum of tasks necessary for the target. "
100                    "Ignore upstream out of date tasks if intervening tasks are fine.")
101parser.add_option("-K", "--no_key_legend_in_graph", dest="no_key_legend_in_graph",
102                    action="store_true", default=False,
103                    help="Do not print out legend and key for dependency graph.")
104parser.add_option("-H", "--draw_graph_horizontally", dest="draw_horizontally",
105                    action="store_true", default=False,
106                    help="Draw horizontal dependency graph.")
107
108parameters = [  
109                ]
110
111
112
113
114
115
116
117#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
118
119#   imports        
120
121
122#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
123
124import StringIO
125import re
126import operator
127import sys,os
128from collections import defaultdict
129import random
130
131sys.path.append(os.path.abspath(os.path.join(exe_path,"..", "..")))
132from ruffus import *
133
134# use simplejson in place of json for python < 2.6
135try:
136    import json
137except ImportError:
138    import simplejson
139    json = simplejson
140
141#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
142
143#   Main logic
144
145
146#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
147
148
149
150
151
152# get help string
153f =StringIO.StringIO()
154parser.print_help(f)
155helpstr = f.getvalue()
156(options, remaining_args) = parser.parse_args()
157
158
159tempdir = "temp_filesre_split_and_combine/"
160
161
162
163if options.verbose:
164    verbose_output = sys.stderr
165else:
166    verbose_output =open("/dev/null", "w")
167#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
168
169#   Tasks
170
171
172#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
173#
174#    split_fasta_file
175#
176@posttask(lambda: verbose_output.write("Split into %d files\n" % options.jobs_per_task))
177@split(tempdir  + "original.fa", [tempdir  + "files.split.success", tempdir + "files.split.*.fa"])
178def split_fasta_file (input_file, outputs):
179
180    # 
181    # remove previous fasta files
182    # 
183    success_flag = outputs[0]
184    output_file_names = outputs[1:]
185    for f in output_file_names:
186        os.unlink(f)
187
188    # 
189    # create as many files as we are simulating in jobs_per_task    
190    #
191    for i in range(options.jobs_per_task):
192        open(tempdir + "files.split.%03d.fa" % i, "w")
193
194    open(success_flag,  "w")
195
196
197#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
198#
199#    align_sequences
200#
201@posttask(lambda: verbose_output.write("Sequences aligned\n"))
202@transform(split_fasta_file, suffix(".fa"), ".aln")                     # fa -> aln
203def align_sequences (input_file, output_filename):
204    open(output_filename, "w").write("%s\n" % output_filename)
205
206
207
208#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
209#
210#    percentage_identity
211#
212@posttask(lambda: verbose_output.write("%Identity calculated\n"))
213@transform(align_sequences,             # find all results from align_sequences
214            suffix(".aln"),             # replace suffix with:
215            [r".pcid",                  #   .pcid suffix for the result
216             r".pcid_success"])         #   .pcid_success to indicate job completed
217def percentage_identity (input_file, output_files):
218    (output_filename, success_flag_filename) = output_files
219    open(output_filename, "w").write("%s\n" % output_filename)
220    open(success_flag_filename, "w")
221
222
223
224#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
225#
226#    combine_results
227#
228@posttask(lambda: verbose_output.write("Results recombined\n"))
229@merge(percentage_identity, tempdir + "all.combine_results")
230def combine_results (input_files, output_files):
231    """
232    Combine all 
233    """
234    (output_filename) = output_files
235    out = open(output_filename, "w")
236    for inp, flag in input_files:
237        out.write(open(inp).read())
238
239
240
241@files(combine_results, "check_all_is.well")
242def post_merge_check (input_filename, output_filename):
243    """
244    check that merge sends just one file, not a list to me
245    """
246    open(output_filename, "w").write(open(input_filename).read())
247
248@files(post_merge_check, "check_all_is.weller")
249def post_post_merge_check (input_files, output_files):
250    """
251    check that @files forwards a single file on when given a single file
252    """
253    open(output_filename, "w").write(open(input_filename).read())
254
255def start_pipeline_afresh ():
256    """
257    Recreate directory and starting file
258    """
259    print >>verbose_output, "Start again"
260    import os
261    os.system("rm -rf %s" % tempdir)
262    os.makedirs(tempdir)
263    open(tempdir + "original.fa", "w").close()
264
265if __name__ == '__main__':
266    if options.start_again:
267        start_pipeline_afresh()
268    if options.just_print:
269        pipeline_printout(sys.stdout, options.target_tasks, options.forced_tasks,
270                            verbose = options.verbose,
271                            gnu_make_maximal_rebuild_mode = not options.minimal_rebuild_mode)
272
273    elif options.dependency_file:
274        pipeline_printout_graph (     open(options.dependency_file, "w"),
275                             options.dependency_graph_format,
276                             options.target_tasks,
277                             options.forced_tasks,
278                             draw_vertically = not options.draw_horizontally,
279                             gnu_make_maximal_rebuild_mode  = not options.minimal_rebuild_mode,
280                             no_key_legend  = options.no_key_legend_in_graph)
281    elif options.debug:
282        start_pipeline_afresh()
283        pipeline_run(options.target_tasks, options.forced_tasks, multiprocess = options.jobs,
284                            logger = stderr_logger if options.verbose else black_hole_logger,
285                            gnu_make_maximal_rebuild_mode  = not options.minimal_rebuild_mode,
286                            verbose = options.verbose)
287        os.system("rm -rf %s" % tempdir)
288        print "OK"
289    else:
290        pipeline_run(options.target_tasks, options.forced_tasks, multiprocess = options.jobs,
291                            logger = stderr_logger if options.verbose else black_hole_logger,
292                             gnu_make_maximal_rebuild_mode  = not options.minimal_rebuild_mode,
293                            verbose = options.verbose)
294