/tools/regVariation/maf_cpg_filter.py

https://bitbucket.org/ialbert/galaxy-genetrack · Python · 60 lines · 49 code · 3 blank · 8 comment · 5 complexity · afd02507e622ceaf2f4e5fd476453f39 MD5 · raw file

  1. #!/usr/bin/python
  2. #Guruprasad Ananda
  3. #Adapted from bx/scripts/maf_mask_cpg.py
  4. """
  5. Mask out potential CpG sites from a maf. Restricted or inclusive definition
  6. of CpG sites can be used. The total fraction masked is printed to stderr.
  7. usage: %prog < input > output restricted
  8. -m, --mask=N: Character to use as mask ('?' is default)
  9. """
  10. from galaxy import eggs
  11. import pkg_resources
  12. pkg_resources.require( "bx-python" )
  13. try:
  14. pkg_resources.require( "numpy" )
  15. except:
  16. pass
  17. import bx.align
  18. import bx.align.maf
  19. from bx.cookbook import doc_optparse
  20. import sys
  21. import bx.align.sitemask.cpg
  22. assert sys.version_info[:2] >= ( 2, 4 )
  23. def main():
  24. options, args = doc_optparse.parse( __doc__ )
  25. try:
  26. inp_file, out_file, sitetype, definition = args
  27. if options.mask:
  28. mask = int(options.mask)
  29. else:
  30. mask = 0
  31. except:
  32. print >> sys.stderr, "Tool initialization error."
  33. sys.exit()
  34. reader = bx.align.maf.Reader( open(inp_file, 'r') )
  35. writer = bx.align.maf.Writer( open(out_file,'w') )
  36. mask_chr_dict = {0:'#', 1:'$', 2:'^', 3:'*', 4:'?'}
  37. mask = mask_chr_dict[mask]
  38. if sitetype == "CpG":
  39. if int(definition) == 1:
  40. cpgfilter = bx.align.sitemask.cpg.Restricted( mask=mask )
  41. defn = "CpG-Restricted"
  42. else:
  43. cpgfilter = bx.align.sitemask.cpg.Inclusive( mask=mask )
  44. defn = "CpG-Inclusive"
  45. else:
  46. cpgfilter = bx.align.sitemask.cpg.nonCpG( mask=mask )
  47. defn = "non-CpG"
  48. cpgfilter.run( reader, writer.write )
  49. print "%2.2f percent bases masked; Mask character = %s, Definition = %s" %(float(cpgfilter.masked)/float(cpgfilter.total) * 100, mask, defn)
  50. if __name__ == "__main__":
  51. main()