PageRenderTime 43ms CodeModel.GetById 32ms app.highlight 7ms RepoModel.GetById 1ms app.codeStats 0ms

/tools/fastq/fastq_combiner.py

https://bitbucket.org/cistrome/cistrome-harvard/
Python | 49 lines | 42 code | 5 blank | 2 comment | 11 complexity | c68562f80a7c79f12fc109b36529dd5b MD5 | raw file
 1#Dan Blankenberg
 2import sys, os, shutil
 3from galaxy_utils.sequence.fastq import fastqWriter, fastqSequencingRead, fastqCombiner, fastqFakeFastaScoreReader
 4from galaxy_utils.sequence.fasta import fastaReader, fastaNamedReader
 5
 6def main():
 7    #Read command line arguments
 8    fasta_filename = sys.argv[1]
 9    fasta_type = sys.argv[2] or 'fasta' #should always be fasta or csfasta? what if txt?
10    qual_filename = sys.argv[3]
11    qual_type = sys.argv[4] or 'qualsanger' #qual454 qualsolid
12    output_filename = sys.argv[5]
13    force_quality_encoding = sys.argv[6]
14    if force_quality_encoding == 'None':
15        force_quality_encoding = None
16    
17    format = 'sanger'
18    if fasta_type == 'csfasta' or qual_type == 'qualsolid':
19        format = 'cssanger'
20    elif qual_type == 'qualsolexa':
21        format = 'solexa'
22    elif qual_type == 'qualillumina':
23        format = 'illumina'
24    
25    out = fastqWriter( open( output_filename, 'wb' ), format = format, force_quality_encoding = force_quality_encoding )
26    if qual_filename == 'None':
27        qual_input = fastqFakeFastaScoreReader( format, quality_encoding = force_quality_encoding )
28    else:
29        qual_input = fastaNamedReader( open( qual_filename, 'rb' )  )
30    
31    fastq_combiner = fastqCombiner( format )
32    i = None
33    skip_count = 0
34    for i, sequence in enumerate( fastaReader( open( fasta_filename, 'rb' ) ) ):
35        quality = qual_input.get( sequence )
36        if quality:
37            fastq_read = fastq_combiner.combine( sequence, quality )
38            out.write( fastq_read )
39        else:
40            skip_count += 1
41    out.close()
42    if i is None:
43        print "Your file contains no valid FASTA sequences."
44    else:
45        print qual_input.has_data()
46        print 'Combined %s of %s sequences with quality scores (%.2f%%).' % ( i - skip_count + 1, i + 1, float( i - skip_count + 1 ) / float( i + 1 ) * 100.0 )
47
48if __name__ == "__main__":
49    main()