PageRenderTime 34ms CodeModel.GetById 25ms RepoModel.GetById 0ms app.codeStats 0ms

/tools/ilmn_pacbio/quake_wrapper.py

https://bitbucket.org/cistrome/cistrome-harvard/
Python | 132 lines | 108 code | 3 blank | 21 comment | 3 complexity | 9b39e7b42d3b468f9cf5fd5ef79e2834 MD5 | raw file
  1. #!/usr/bin/python
  2. #
  3. # Copyright (c) 2011, Pacific Biosciences of California, Inc.
  4. #
  5. # All rights reserved.
  6. #
  7. #Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
  8. # * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
  9. # * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
  10. # * Neither the name of Pacific Biosciences nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
  11. #
  12. #THIS SOFTWARE IS PROVIDED BY PACIFIC BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  13. #WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS CONTRIBUTORS BE LIABLE FOR ANY
  14. #DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  15. #LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  16. #(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  17. #
  18. import sys
  19. import os
  20. import subprocess
  21. QUAKE_EXE = os.path.join( os.path.dirname(os.path.abspath(sys.argv[0])), 'quake.py' )
  22. cmdLine = sys.argv
  23. cmdLine.pop(0)
  24. #
  25. # horribly not robust, but it was a pain to rewrite everything with
  26. # optparse
  27. #
  28. j = -1
  29. cut = 0
  30. for i,arg in enumerate(cmdLine):
  31. if '--default_cutoff' in arg:
  32. j = i
  33. cut = int(arg.split('=')[1])
  34. if j>=0:
  35. cmdLine = cmdLine[:j] + cmdLine[j+1:]
  36. j = -1
  37. output=''
  38. for i,arg in enumerate(cmdLine):
  39. if '--output' in arg:
  40. j = i
  41. output = arg.split('=')[1]
  42. if j>=0:
  43. cmdLine = cmdLine[:j] + cmdLine[j+1:]
  44. def backticks( cmd, merge_stderr=True ):
  45. """
  46. Simulates the perl backticks (``) command with error-handling support
  47. Returns ( command output as sequence of strings, error code, error message )
  48. """
  49. if merge_stderr:
  50. _stderr = subprocess.STDOUT
  51. else:
  52. _stderr = subprocess.PIPE
  53. p = subprocess.Popen( cmd, shell=True, stdin=subprocess.PIPE,
  54. stdout=subprocess.PIPE, stderr=_stderr,
  55. close_fds=True )
  56. out = [ l[:-1] for l in p.stdout.readlines() ]
  57. p.stdout.close()
  58. if not merge_stderr:
  59. p.stderr.close()
  60. # need to allow process to terminate
  61. p.wait()
  62. errCode = p.returncode and p.returncode or 0
  63. if p.returncode>0:
  64. errorMessage = os.linesep.join(out)
  65. output = []
  66. else:
  67. errorMessage = ''
  68. output = out
  69. return output, errCode, errorMessage
  70. def to_stdout():
  71. def toCorFastq(f):
  72. stem, ext = os.path.splitext( os.path.basename(f) )
  73. dir = os.path.dirname(f)
  74. corFastq = os.path.join(dir,'%s.cor%s' % (stem,ext) )
  75. if not os.path.exists(corFastq):
  76. print >>sys.stderr, "Can't find path %s" % corFastq
  77. sys.exit(1)
  78. return corFastq
  79. if '-r' in cmdLine:
  80. fastqFile = cmdLine[ cmdLine.index('-r')+1 ]
  81. corFastq = toCorFastq(fastqFile)
  82. infile = open( corFastq, 'r' )
  83. for line in infile:
  84. sys.stdout.write( line )
  85. infile.close()
  86. else:
  87. fofnFile = cmdLine[ cmdLine.index('-f')+1 ]
  88. infile = open(fofnFile,'r')
  89. for line in infile:
  90. line = line.strip()
  91. if len(line)>0:
  92. fastqFiles = line.split()
  93. break
  94. infile.close()
  95. outs = output.split(',')
  96. for o,f in zip(outs,fastqFiles):
  97. cf = toCorFastq(f)
  98. os.system( 'cp %s %s' % ( cf, o ) )
  99. def run():
  100. cmd = '%s %s' % ( QUAKE_EXE, " ".join(cmdLine) )
  101. output, errCode, errMsg = backticks( cmd )
  102. if errCode==0:
  103. to_stdout()
  104. else:
  105. # if Quake exits with an error in cutoff determination we
  106. # can force correction if requested
  107. if 'cutoff.txt' in errMsg and cut>0:
  108. outfile = open( 'cutoff.txt', 'w' )
  109. print >>outfile, str(cut)
  110. outfile.close()
  111. cmd = '%s --no_count --no_cut %s' % ( QUAKE_EXE, " ".join(cmdLine) )
  112. output, errCode, errMsg = backticks( cmd )
  113. if errCode==0:
  114. to_stdout()
  115. else:
  116. print >>sys.stderr, errMsg
  117. sys.exit(1)
  118. if __name__=='__main__': run()