/tools/meme/fimo_wrapper.py

https://bitbucket.org/cistrome/cistrome-harvard/ · Python · 73 lines · 54 code · 9 blank · 10 comment · 13 complexity · 53354ffd0f8dcd474aa4455b7fefd8e2 MD5 · raw file

  1. #!/usr/bin/env python
  2. #Dan Blankenberg
  3. """
  4. Read text output from FIMO and create an interval file.
  5. """
  6. import sys, tempfile, subprocess, shutil, os
  7. from galaxy_utils.sequence.transform import DNA_reverse_complement
  8. buffsize = 1048576
  9. def stop_err( msg ):
  10. sys.stderr.write( msg )
  11. sys.exit()
  12. def main():
  13. assert len( sys.argv ) == 8, "Wrong number of arguments"
  14. sys.argv.pop(0)
  15. fimo_cmd = sys.argv.pop(0)
  16. html_path = sys.argv.pop(0)
  17. html_out = sys.argv.pop(0)
  18. interval_out = sys.argv.pop(0)
  19. txt_out = sys.argv.pop(0)
  20. xml_out = sys.argv.pop(0)
  21. gff_out = sys.argv.pop(0)
  22. #run fimo
  23. try:
  24. tmp_stderr = tempfile.NamedTemporaryFile()
  25. #tmp_stderr = open( tmp_filename, 'wb' )
  26. proc = subprocess.Popen( args=fimo_cmd, shell=True, stderr=tmp_stderr )
  27. returncode = proc.wait()
  28. #tmp_stderr.close()
  29. # get stderr, allowing for case where it's very large
  30. #tmp_stderr = open( tmp, 'rb' )
  31. tmp_stderr.seek(0)
  32. stderr = ''
  33. try:
  34. while True:
  35. stderr += tmp_stderr.read( buffsize )
  36. if not stderr or len( stderr ) % buffsize != 0:
  37. break
  38. except OverflowError:
  39. pass
  40. if returncode != 0:
  41. raise Exception, stderr
  42. except Exception, e:
  43. raise Exception, 'Error running FIMO:\n' + str( e )
  44. shutil.move( os.path.join( html_path, 'fimo.txt' ), txt_out )
  45. shutil.move( os.path.join( html_path, 'fimo.gff' ), gff_out )
  46. shutil.move( os.path.join( html_path, 'fimo.xml' ), xml_out )
  47. shutil.move( os.path.join( html_path, 'fimo.html' ), html_out )
  48. out_file = open( interval_out, 'wb' )
  49. out_file.write( "#%s\n" % "\t".join( ( "chr", "start", "end", "pattern name", "score", "strand", "matched sequence", "p-value", "q-value" ) ) )
  50. for line in open( txt_out ):
  51. if line.startswith( '#' ): continue
  52. fields = line.rstrip( "\n\r" ).split( "\t" )
  53. start, end = int( fields[2] ), int( fields[3] )
  54. sequence = fields[7]
  55. if start > end:
  56. start, end = end, start #flip start and end, and set strand
  57. strand = "-"
  58. sequence = DNA_reverse_complement( sequence ) #we want sequences relative to strand; FIMO always provides + stranded sequence
  59. else:
  60. strand = "+"
  61. start -= 1 #make 0-based start position
  62. out_file.write( "%s\n" % "\t".join( [ fields[1], str( start ), str( end ), fields[0], fields[4], strand, sequence, fields[5], fields[6] ] ) )
  63. out_file.close()
  64. if __name__ == "__main__": main()