gops_coverage.py - This Python script calculates the covera…

/tools/new_operations/gops_coverage.py

https://bitbucket.org/cistrome/cistrome-harvard/ · Python · 68 lines · 50 code · 9 blank · 9 comment · 9 complexity · b599448b791491cf8f279659003959eb MD5 · raw file


#!/usr/bin/env python
"""
Calculate coverage of one query on another, and append the coverage to
the last two columns as bases covered and percent coverage.

usage: %prog bed_file_1 bed_file_2 out_file
    -1, --cols1=N,N,N,N: Columns for start, end, strand in first file
    -2, --cols2=N,N,N,N: Columns for start, end, strand in second file
"""
from galaxy import eggs
import pkg_resources
pkg_resources.require( "bx-python" )
import sys, traceback, fileinput
from warnings import warn
from bx.intervals import *
from bx.intervals.io import *
from bx.intervals.operations.coverage import *
from bx.cookbook import doc_optparse
from galaxy.tools.util.galaxyops import *

assert sys.version_info[:2] >= ( 2, 4 )

def main():
    upstream_pad = 0
    downstream_pad = 0

    options, args = doc_optparse.parse( __doc__ )
    try:
        chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg( options.cols1 )
        chr_col_2, start_col_2, end_col_2, strand_col_2 = parse_cols_arg( options.cols2 )      
        in_fname, in2_fname, out_fname = args
    except:
        doc_optparse.exception()

    g1 = NiceReaderWrapper( fileinput.FileInput( in_fname ),
                            chrom_col=chr_col_1,
                            start_col=start_col_1,
                            end_col=end_col_1,
                            strand_col=strand_col_1,
                            fix_strand=True )
    g2 = NiceReaderWrapper( fileinput.FileInput( in2_fname ),
                            chrom_col=chr_col_2,
                            start_col=start_col_2,
                            end_col=end_col_2,
                            strand_col=strand_col_2,
                            fix_strand=True )

    out_file = open( out_fname, "w" )

    try:
        for line in coverage( [g1,g2] ):
            if type( line ) is GenomicInterval:
                out_file.write( "%s\n" % "\t".join( line.fields ) )
            else:
                out_file.write( "%s\n" % line )
    except ParseError, exc:
        out_file.close()
        fail( "Invalid file format: %s" % str( exc ) )

    out_file.close()

    if g1.skipped > 0:
        print skipped( g1, filedesc=" of 1st dataset" )
    if g2.skipped > 0:
        print skipped( g2, filedesc=" of 2nd dataset" )

if __name__ == "__main__":
    main()

Summary ✨

This Python script calculates the coverage of one query on another and appends the coverage to the last two columns as bases covered and percent coverage.

Tech Fingerprint

Alerts (11)

'import *' Avoid to prevent namespace pollution; import specific names or use aliases
15 16 17 19
'def' Ensure functions have docstrings for documentation
23
'except:' Avoid catching all exceptions; specify exception types to catch only expected errors
32
'open(' Use 'with open()' to ensure Files are properly closed
48
Complexity hotspot; lines 50 to 52 (total complexity: 3)
50 51 52
'type(' Use isinstance() for type checking instead of type()
52