PageRenderTime 19ms CodeModel.GetById 10ms app.highlight 7ms RepoModel.GetById 1ms app.codeStats 0ms

/tools/fastq/fastq_paired_end_splitter.py

https://bitbucket.org/cistrome/cistrome-harvard/
Python | 33 lines | 27 code | 4 blank | 2 comment | 7 complexity | 64c7e8b5d571e91565b8701af9e9e281 MD5 | raw file
 1#Dan Blankenberg
 2import sys, os, shutil
 3from galaxy_utils.sequence.fastq import fastqReader, fastqWriter, fastqSplitter
 4
 5def main():
 6    #Read command line arguments
 7    input_filename = sys.argv[1]
 8    input_type = sys.argv[2] or 'sanger'
 9    output1_filename = sys.argv[3]
10    output2_filename = sys.argv[4]
11    
12    splitter = fastqSplitter()
13    out1 = fastqWriter( open( output1_filename, 'wb' ), format = input_type )
14    out2 = fastqWriter( open( output2_filename, 'wb' ), format = input_type )
15    
16    i = None
17    skip_count = 0
18    for i, fastq_read in enumerate( fastqReader( open( input_filename, 'rb' ), format = input_type ) ):
19        read1, read2 = splitter.split( fastq_read )
20        if read1 and read2:
21            out1.write( read1 )
22            out2.write( read2 )
23        else:
24            skip_count += 1
25    out1.close()
26    out2.close()
27    if i is None:
28        print "Your file contains no valid FASTQ reads."
29    else:
30        print 'Split %s of %s reads (%.2f%%).' % ( i - skip_count + 1, i + 1, float( i - skip_count + 1 ) / float( i + 1 ) * 100.0 )
31
32if __name__ == "__main__":
33    main()