PageRenderTime 29ms CodeModel.GetById 19ms app.highlight 7ms RepoModel.GetById 2ms app.codeStats 0ms

/lib/galaxy/datatypes/converters/bgzip.py

https://bitbucket.org/cistrome/cistrome-harvard/
Python | 47 lines | 32 code | 7 blank | 8 comment | 4 complexity | 86bcce9063f17ffa9ba824298b917b64 MD5 | raw file
 1#!/usr/bin/env python
 2
 3"""
 4Uses pysam to bgzip a file
 5
 6usage: %prog in_file out_file
 7"""
 8
 9from galaxy import eggs
10import pkg_resources; pkg_resources.require( "pysam" )
11import ctabix, subprocess, tempfile, sys, optparse
12
13def main():
14    # Read options, args.
15    parser = optparse.OptionParser()
16    parser.add_option( '-c', '--chr-col', type='int', dest='chrom_col' )
17    parser.add_option( '-s', '--start-col', type='int', dest='start_col' )
18    parser.add_option( '-e', '--end-col', type='int', dest='end_col' )
19    parser.add_option( '-P', '--preset', dest='preset' )
20    (options, args) = parser.parse_args()
21    input_fname, output_fname = args
22
23    tmpfile = tempfile.NamedTemporaryFile()
24    sort_params = None
25
26    if options.chrom_col and options.start_col and options.end_col:
27        sort_params = ["sort",
28                        "-k%(i)s,%(i)s" % { 'i': options.chrom_col },
29                        "-k%(i)i,%(i)in" % { 'i': options.start_col },
30                        "-k%(i)i,%(i)in" % { 'i': options.end_col }
31                      ]
32    elif options.preset == "bed":
33        sort_params = ["sort", "-k1,1", "-k2,2n", "-k3,3n"]
34    elif options.preset == "vcf":
35        sort_params = ["sort", "-k1,1", "-k2,2n"]
36    elif options.preset == "gff":
37        sort_params = ["sort", "-s", "-k1,1", "-k4,4n"] # stable sort on start column
38    # Skip any lines starting with "#" and "track"
39    grepped = subprocess.Popen(["grep", "-e", "^\"#\"", "-e", "^track", "-v", input_fname], stderr=subprocess.PIPE, stdout=subprocess.PIPE )
40    after_sort = subprocess.Popen(sort_params, stdin=grepped.stdout, stderr=subprocess.PIPE, stdout=tmpfile )
41    grepped.stdout.close()
42    output, err = after_sort.communicate()
43
44    ctabix.tabix_compress(tmpfile.name, output_fname, force=True)
45
46if __name__ == "__main__":
47    main()