PageRenderTime 23ms CodeModel.GetById 16ms app.highlight 4ms RepoModel.GetById 2ms app.codeStats 0ms

/tools/metag_tools/rmap_wrapper.py

https://bitbucket.org/cistrome/cistrome-harvard/
Python | 88 lines | 77 code | 8 blank | 3 comment | 5 complexity | 9c95495dbcc7f51a652bb344f89dd9c3 MD5 | raw file
 1#!/usr/bin/env python
 2
 3import os, sys, tempfile
 4
 5assert sys.version_info[:2] >= (2.4)
 6
 7def stop_err( msg ):
 8    
 9    sys.stderr.write( "%s\n" % msg )
10    sys.exit()
11    
12
13def __main__():
14    
15    # I/O
16    target_path = sys.argv[1]
17    infile = sys.argv[2]
18    read_len = sys.argv[3]              # -w
19    align_len = sys.argv[4]             # -h
20    mismatch = sys.argv[5]              # -m
21    output_file = sys.argv[6]
22    
23    # first guess the read length
24    guess_read_len = 0
25    seq = ''
26    for i, line in enumerate(open(infile)):
27        line = line.rstrip('\r\n')
28        if line.startswith('>'):
29            if seq:
30                guess_read_len = len(seq)
31                break
32        else:
33            seq += line
34            
35    try: 
36        test = int(read_len)
37        if test == 0:
38            read_len = str(guess_read_len)
39        else:
40            assert test >= 20 and test <= 64
41    except:
42        stop_err('Invalid value for read length. Must be between 20 and 64.')
43    
44    try:
45        int(align_len)    
46    except:
47        stop_err('Invalid value for minimal length of a hit.')
48    
49    try:
50        int(mismatch)
51        #assert test >= 0 and test <= int(0.1*int(read_len))
52    except:
53        stop_err('Invalid value for mismatch numbers in an alignment.')
54    
55    all_files = []
56    if os.path.isdir(target_path):
57        
58        # check target genome
59        fa_files = os.listdir(target_path)
60            
61        for file in fa_files:
62            file = "%s/%s" % ( target_path, file )
63            file = os.path.normpath(file)
64            all_files.append(file)
65    else:
66        stop_err("No sequences for %s are available for search, please report this error." %(target_path))
67   
68    for detail_file_path in all_files:
69        output_tempfile = tempfile.NamedTemporaryFile().name
70        command = "rmap -h %s -w %s -m %s -c %s %s -o %s 2>&1" % ( align_len, read_len, mismatch, detail_file_path, infile, output_tempfile )
71        #print command
72        try:
73            os.system( command )
74        except Exception, e:
75            stop_err( str( e ) )
76
77        try:
78            os.system( 'cat %s >> %s' % ( output_tempfile, output_file ) )
79        except Exception, e:
80            stop_err( str( e ) )
81        
82        try:
83            os.remove( output_tempfile )
84        except:
85            pass
86        
87        
88if __name__ == '__main__': __main__()