/tools/regVariation/maf_cpg_filter.py
Python | 60 lines | 49 code | 3 blank | 8 comment | 3 complexity | 982bea7b187d1bd52b97d46ac94aa221 MD5 | raw file
1#!/usr/bin/env python 2#Guruprasad Ananda 3#Adapted from bx/scripts/maf_mask_cpg.py 4""" 5Mask out potential CpG sites from a maf. Restricted or inclusive definition 6of CpG sites can be used. The total fraction masked is printed to stderr. 7 8usage: %prog < input > output restricted 9 -m, --mask=N: Character to use as mask ('?' is default) 10""" 11 12from galaxy import eggs 13import pkg_resources 14pkg_resources.require( "bx-python" ) 15try: 16 pkg_resources.require( "numpy" ) 17except: 18 pass 19import bx.align 20import bx.align.maf 21from bx.cookbook import doc_optparse 22import sys 23import bx.align.sitemask.cpg 24 25assert sys.version_info[:2] >= ( 2, 4 ) 26 27def main(): 28 options, args = doc_optparse.parse( __doc__ ) 29 try: 30 inp_file, out_file, sitetype, definition = args 31 if options.mask: 32 mask = int(options.mask) 33 else: 34 mask = 0 35 except: 36 print >> sys.stderr, "Tool initialization error." 37 sys.exit() 38 39 reader = bx.align.maf.Reader( open(inp_file, 'r') ) 40 writer = bx.align.maf.Writer( open(out_file,'w') ) 41 42 mask_chr_dict = {0:'#', 1:'$', 2:'^', 3:'*', 4:'?', 5:'N'} 43 mask = mask_chr_dict[mask] 44 45 if sitetype == "CpG": 46 if int(definition) == 1: 47 cpgfilter = bx.align.sitemask.cpg.Restricted( mask=mask ) 48 defn = "CpG-Restricted" 49 else: 50 cpgfilter = bx.align.sitemask.cpg.Inclusive( mask=mask ) 51 defn = "CpG-Inclusive" 52 else: 53 cpgfilter = bx.align.sitemask.cpg.nonCpG( mask=mask ) 54 defn = "non-CpG" 55 cpgfilter.run( reader, writer.write ) 56 57 print "%2.2f percent bases masked; Mask character = %s, Definition = %s" %(float(cpgfilter.masked)/float(cpgfilter.total) * 100, mask, defn) 58 59if __name__ == "__main__": 60 main()