PageRenderTime 38ms CodeModel.GetById 32ms app.highlight 4ms RepoModel.GetById 1ms app.codeStats 0ms

/lib/galaxy/datatypes/converters/fastqsolexa_to_fasta_converter.py

https://bitbucket.org/cistrome/cistrome-harvard/
Python | 55 lines | 32 code | 6 blank | 17 comment | 5 complexity | 100c3ff13a4d750bb8d91cf40544c158 MD5 | raw file
 1#!/usr/bin/env python
 2"""
 3convert fastqsolexa file to separated sequence and quality files.
 4
 5assume each sequence and quality score are contained in one line
 6the order should be:
 71st line: @title_of_seq
 82nd line: nucleotides
 93rd line: +title_of_qualityscore (might be skipped)
104th line: quality scores
11(in three forms: a. digits, b. ASCII codes, the first char as the coding base, c. ASCII codes without the first char.)
12
13Usage:
14%python fastqsolexa_to_fasta_converter.py <your_fastqsolexa_filename> <output_seq_filename> <output_score_filename>
15"""
16
17import sys, os
18from math import *
19
20assert sys.version_info[:2] >= ( 2, 4 )
21
22def stop_err( msg ):
23    sys.stderr.write( "%s" % msg )
24    sys.exit()
25
26def __main__():
27    infile_name = sys.argv[1]
28    outfile = open( sys.argv[2], 'w' )
29    fastq_block_lines = 0
30    seq_title_startswith = ''
31
32    for i, line in enumerate( file( infile_name ) ):
33        line = line.rstrip() # eliminate trailing space and new line characters
34        if not line or line.startswith( '#' ):
35            continue
36        fastq_block_lines = ( fastq_block_lines + 1 ) % 4
37        line_startswith = line[0:1]
38        if fastq_block_lines == 1:
39            # line 1 is sequence title
40            if not seq_title_startswith:
41                seq_title_startswith = line_startswith
42            if seq_title_startswith != line_startswith:
43                stop_err( 'Invalid fastqsolexa format at line %d: %s.' %( i + 1, line ) )
44            read_title = line[ 1: ]
45            outfile.write( '>%s\n' % line[1:] )
46        elif fastq_block_lines == 2:
47            # line 2 is nucleotides
48            read_length = len( line )
49            outfile.write( '%s\n' % line )
50        else:
51            pass
52
53    outfile.close()
54
55if __name__ == "__main__": __main__()