PageRenderTime 18ms CodeModel.GetById 1ms app.highlight 12ms RepoModel.GetById 2ms app.codeStats 0ms

/tools/fastq/fastq_trimmer.py

https://bitbucket.org/cistrome/cistrome-harvard/
Python | 41 lines | 37 code | 3 blank | 1 comment | 10 complexity | a13c4e0f582e56c1766d1aaf0d8a7990 MD5 | raw file
 1#Dan Blankenberg
 2import sys
 3from galaxy_utils.sequence.fastq import fastqReader, fastqWriter
 4
 5def main():
 6    input_filename = sys.argv[1]
 7    output_filename = sys.argv[2]
 8    left_offset = sys.argv[3]
 9    right_offset = sys.argv[4]
10    percent_offsets = sys.argv[5] == 'offsets_percent'
11    input_type = sys.argv[6] or 'sanger'
12    keep_zero_length = sys.argv[7] == 'keep_zero_length'
13    
14    out = fastqWriter( open( output_filename, 'wb' ), format = input_type )
15    num_reads_excluded = 0
16    num_reads = None
17    for num_reads, fastq_read in enumerate( fastqReader( open( input_filename ), format = input_type ) ):
18        if percent_offsets:
19            left_column_offset = int( round( float( left_offset ) / 100.0 * float( len( fastq_read ) ) ) )
20            right_column_offset = int( round( float( right_offset ) / 100.0 * float( len( fastq_read ) ) ) )
21        else:
22            left_column_offset = int( left_offset )
23            right_column_offset = int( right_offset )
24        if right_column_offset > 0:
25            right_column_offset = -right_column_offset
26        else:
27            right_column_offset = None
28        fastq_read = fastq_read.slice( left_column_offset, right_column_offset )
29        if keep_zero_length or len( fastq_read ):
30            out.write( fastq_read )
31        else:
32            num_reads_excluded += 1
33    out.close()
34    if num_reads is None:
35        print "No valid fastq reads could be processed."
36    else:
37        print "%i fastq reads were processed." % ( num_reads + 1 )
38    if num_reads_excluded:
39        print "%i reads of zero length were excluded from the output." % num_reads_excluded
40
41if __name__ == "__main__": main()