/tools/regVariation/maf_cpg_filter.py
https://bitbucket.org/ialbert/galaxy-genetrack · Python · 60 lines · 49 code · 3 blank · 8 comment · 5 complexity · afd02507e622ceaf2f4e5fd476453f39 MD5 · raw file
- #!/usr/bin/python
- #Guruprasad Ananda
- #Adapted from bx/scripts/maf_mask_cpg.py
- """
- Mask out potential CpG sites from a maf. Restricted or inclusive definition
- of CpG sites can be used. The total fraction masked is printed to stderr.
- usage: %prog < input > output restricted
- -m, --mask=N: Character to use as mask ('?' is default)
- """
- from galaxy import eggs
- import pkg_resources
- pkg_resources.require( "bx-python" )
- try:
- pkg_resources.require( "numpy" )
- except:
- pass
- import bx.align
- import bx.align.maf
- from bx.cookbook import doc_optparse
- import sys
- import bx.align.sitemask.cpg
- assert sys.version_info[:2] >= ( 2, 4 )
- def main():
- options, args = doc_optparse.parse( __doc__ )
- try:
- inp_file, out_file, sitetype, definition = args
- if options.mask:
- mask = int(options.mask)
- else:
- mask = 0
- except:
- print >> sys.stderr, "Tool initialization error."
- sys.exit()
- reader = bx.align.maf.Reader( open(inp_file, 'r') )
- writer = bx.align.maf.Writer( open(out_file,'w') )
-
- mask_chr_dict = {0:'#', 1:'$', 2:'^', 3:'*', 4:'?'}
- mask = mask_chr_dict[mask]
-
- if sitetype == "CpG":
- if int(definition) == 1:
- cpgfilter = bx.align.sitemask.cpg.Restricted( mask=mask )
- defn = "CpG-Restricted"
- else:
- cpgfilter = bx.align.sitemask.cpg.Inclusive( mask=mask )
- defn = "CpG-Inclusive"
- else:
- cpgfilter = bx.align.sitemask.cpg.nonCpG( mask=mask )
- defn = "non-CpG"
- cpgfilter.run( reader, writer.write )
-
- print "%2.2f percent bases masked; Mask character = %s, Definition = %s" %(float(cpgfilter.masked)/float(cpgfilter.total) * 100, mask, defn)
- if __name__ == "__main__":
- main()