PageRenderTime 25ms CodeModel.GetById 15ms app.highlight 8ms RepoModel.GetById 1ms app.codeStats 0ms

/tools/fasta_tools/fasta_to_tabular.py

https://bitbucket.org/cistrome/cistrome-harvard/
Python | 57 lines | 44 code | 3 blank | 10 comment | 4 complexity | 1895f935c76c44d433cc9f53e142ae80 MD5 | raw file
 1#!/usr/bin/env python
 2# This code exists in 2 places: ~/datatypes/converters and ~/tools/fasta_tools
 3"""
 4Input: fasta (input file), tabular (output file), int (truncation of id), int (columns from description)
 5Output: tabular
 6format convert: fasta to tabular
 7"""
 8
 9import sys, os
10
11def stop_err( msg ):
12    sys.stderr.write( msg )
13    sys.exit()
14
15def __main__():
16    if len(sys.argv) != 5:
17        stop_err("Wrong number of argument. Expect four (fasta, tabular, truncation, columns)")
18    infile = sys.argv[1]
19    outfile = sys.argv[2]
20    keep_first = int( sys.argv[3] )
21    descr_split = int( sys.argv[4] )
22    fasta_title = fasta_seq = ''
23    if keep_first == 0:
24        keep_first = None
25    elif descr_split == 1:
26        #Added one for the ">" character
27        #(which is removed if using descr_split > 1)
28        keep_first += 1
29    if descr_split < 1:
30        stop_err("Bad description split value (should be 1 or more)")
31    out = open( outfile, 'w' )
32    for i, line in enumerate( open( infile ) ):
33        line = line.rstrip( '\r\n' )
34        if not line or line.startswith( '#' ):
35            continue
36        if line.startswith( '>' ):
37            #Don't want any existing tabs to trigger extra columns:
38            line = line.replace('\t', ' ')
39            if i > 0:
40                out.write('\n')
41            if descr_split == 1:
42                out.write(line[1:keep_first])
43            else:
44                words = line[1:].split(None, descr_split-1)
45                #apply any truncation to first word (the id)
46                words[0] = words[0][0:keep_first]
47                #pad with empty columns if required
48                words += [""]*(descr_split-len(words))
49                out.write("\t".join(words))
50            out.write('\t')
51        else:
52            out.write(line)
53    if i > 0:
54        out.write('\n')
55    out.close()
56
57if __name__ == "__main__" : __main__()