PageRenderTime 35ms CodeModel.GetById 25ms app.highlight 9ms RepoModel.GetById 0ms app.codeStats 0ms

/tools/regVariation/maf_cpg_filter.py

https://bitbucket.org/ialbert/galaxy-genetrack
Python | 60 lines | 49 code | 3 blank | 8 comment | 5 complexity | afd02507e622ceaf2f4e5fd476453f39 MD5 | raw file
 1#!/usr/bin/python
 2#Guruprasad Ananda
 3#Adapted from bx/scripts/maf_mask_cpg.py
 4"""
 5Mask out potential CpG sites from a maf. Restricted or inclusive definition
 6of CpG sites can be used. The total fraction masked is printed to stderr.
 7
 8usage: %prog < input > output restricted
 9    -m, --mask=N: Character to use as mask ('?' is default)
10"""
11
12from galaxy import eggs
13import pkg_resources 
14pkg_resources.require( "bx-python" )
15try:
16    pkg_resources.require( "numpy" )
17except:
18    pass
19import bx.align
20import bx.align.maf
21from bx.cookbook import doc_optparse
22import sys
23import bx.align.sitemask.cpg
24
25assert sys.version_info[:2] >= ( 2, 4 )
26
27def main():
28    options, args = doc_optparse.parse( __doc__ )
29    try:
30        inp_file, out_file, sitetype, definition = args
31        if options.mask:
32            mask = int(options.mask)
33        else:
34            mask = 0
35    except:
36        print >> sys.stderr, "Tool initialization error."
37        sys.exit()
38
39    reader = bx.align.maf.Reader( open(inp_file, 'r') )
40    writer = bx.align.maf.Writer( open(out_file,'w') )
41    
42    mask_chr_dict = {0:'#', 1:'$', 2:'^', 3:'*', 4:'?'}
43    mask = mask_chr_dict[mask]
44    
45    if sitetype == "CpG":
46        if int(definition) == 1:
47            cpgfilter = bx.align.sitemask.cpg.Restricted( mask=mask )
48            defn = "CpG-Restricted"
49        else:
50            cpgfilter = bx.align.sitemask.cpg.Inclusive( mask=mask )
51            defn = "CpG-Inclusive"
52    else:
53        cpgfilter = bx.align.sitemask.cpg.nonCpG( mask=mask )
54        defn = "non-CpG"
55    cpgfilter.run( reader, writer.write )
56    
57    print "%2.2f percent bases masked; Mask character = %s, Definition = %s" %(float(cpgfilter.masked)/float(cpgfilter.total) * 100, mask, defn)
58
59if __name__ == "__main__":
60    main()