PageRenderTime 29ms CodeModel.GetById 13ms app.highlight 12ms RepoModel.GetById 2ms app.codeStats 0ms

/tools/ilmn_pacbio/quake_wrapper.py

https://bitbucket.org/cistrome/cistrome-harvard/
Python | 132 lines | 108 code | 3 blank | 21 comment | 5 complexity | 9b39e7b42d3b468f9cf5fd5ef79e2834 MD5 | raw file
  1#!/usr/bin/python
  2#
  3# Copyright (c) 2011, Pacific Biosciences of California, Inc.
  4#
  5# All rights reserved.
  6#
  7#Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
  8#    * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
  9#    * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
 10#    * Neither the name of Pacific Biosciences nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
 11#
 12#THIS SOFTWARE IS PROVIDED BY PACIFIC BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 13#WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS CONTRIBUTORS BE LIABLE FOR ANY
 14#DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 15#LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 16#(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 17#
 18import sys
 19import os
 20import subprocess
 21
 22QUAKE_EXE = os.path.join( os.path.dirname(os.path.abspath(sys.argv[0])), 'quake.py' )
 23cmdLine = sys.argv
 24cmdLine.pop(0)
 25
 26#
 27# horribly not robust, but it was a pain to rewrite everything with
 28# optparse
 29#
 30j = -1
 31cut = 0
 32for i,arg in enumerate(cmdLine):
 33    if '--default_cutoff' in arg:
 34        j = i
 35        cut = int(arg.split('=')[1])
 36if j>=0:
 37    cmdLine = cmdLine[:j] + cmdLine[j+1:]
 38
 39j = -1
 40output=''
 41for i,arg in enumerate(cmdLine):
 42    if '--output' in arg:
 43        j = i
 44        output = arg.split('=')[1]
 45if j>=0:
 46    cmdLine = cmdLine[:j] + cmdLine[j+1:]
 47
 48def backticks( cmd, merge_stderr=True ):
 49    """
 50    Simulates the perl backticks (``) command with error-handling support
 51    Returns ( command output as sequence of strings, error code, error message )
 52    """
 53    if merge_stderr:
 54        _stderr = subprocess.STDOUT
 55    else:
 56        _stderr = subprocess.PIPE
 57
 58    p = subprocess.Popen( cmd, shell=True, stdin=subprocess.PIPE,
 59                          stdout=subprocess.PIPE, stderr=_stderr,
 60                          close_fds=True )
 61
 62    out = [ l[:-1] for l in p.stdout.readlines() ]
 63
 64    p.stdout.close()
 65    if not merge_stderr:
 66        p.stderr.close()
 67
 68    # need to allow process to terminate
 69    p.wait()
 70
 71    errCode = p.returncode and p.returncode or 0
 72    if p.returncode>0:
 73        errorMessage = os.linesep.join(out)
 74        output = []
 75    else:
 76        errorMessage = ''
 77        output = out
 78
 79    return output, errCode, errorMessage
 80
 81def to_stdout():
 82    def toCorFastq(f):
 83        stem, ext = os.path.splitext( os.path.basename(f) )
 84        dir = os.path.dirname(f)
 85        corFastq = os.path.join(dir,'%s.cor%s' % (stem,ext) )
 86        if not os.path.exists(corFastq):
 87            print >>sys.stderr, "Can't find path %s" % corFastq
 88            sys.exit(1)
 89        return corFastq
 90    if '-r' in cmdLine:
 91        fastqFile = cmdLine[ cmdLine.index('-r')+1 ]
 92        corFastq = toCorFastq(fastqFile)
 93        infile = open( corFastq, 'r' )
 94        for line in infile:
 95            sys.stdout.write( line )
 96        infile.close()
 97    else:
 98        fofnFile = cmdLine[ cmdLine.index('-f')+1 ]
 99        infile = open(fofnFile,'r')
100        for line in infile:
101            line = line.strip()
102            if len(line)>0:
103                fastqFiles = line.split()
104                break
105        infile.close()
106        outs = output.split(',')
107        for o,f in zip(outs,fastqFiles):
108            cf = toCorFastq(f)
109            os.system( 'cp %s %s' % ( cf, o ) )
110
111def run():
112    cmd = '%s %s' % ( QUAKE_EXE, " ".join(cmdLine) )
113    output, errCode, errMsg = backticks( cmd )
114
115    if errCode==0:
116        to_stdout()
117    else:
118        # if Quake exits with an error in cutoff determination we  
119        # can force correction if requested
120        if 'cutoff.txt' in errMsg and cut>0:
121            outfile = open( 'cutoff.txt', 'w' )
122            print >>outfile, str(cut)
123            outfile.close()
124            cmd = '%s --no_count --no_cut %s' % ( QUAKE_EXE, " ".join(cmdLine) )
125            output, errCode, errMsg = backticks( cmd )
126        if errCode==0:
127            to_stdout()
128        else:
129            print >>sys.stderr, errMsg
130            sys.exit(1)
131
132if __name__=='__main__': run()