PageRenderTime 30ms CodeModel.GetById 19ms app.highlight 7ms RepoModel.GetById 1ms app.codeStats 0ms

/lib/galaxy/datatypes/converters/interval_to_tabix_converter.py

https://bitbucket.org/cistrome/cistrome-harvard/
Python | 38 lines | 22 code | 5 blank | 11 comment | 3 complexity | 92c1952421d6d93c88d319f4d9838542 MD5 | raw file
 1#!/usr/bin/env python
 2
 3"""
 4Uses pysam to index a bgzipped interval file with tabix
 5Supported presets: bed, gff, vcf
 6
 7usage: %prog in_file out_file
 8"""
 9
10from galaxy import eggs
11import pkg_resources; pkg_resources.require( "pysam" )
12import ctabix, subprocess, tempfile, sys, os, optparse
13
14def main():
15    # Read options, args.
16    parser = optparse.OptionParser()
17    parser.add_option( '-c', '--chr-col', type='int', dest='chrom_col' )
18    parser.add_option( '-s', '--start-col', type='int', dest='start_col' )
19    parser.add_option( '-e', '--end-col', type='int', dest='end_col' )
20    parser.add_option( '-P', '--preset', dest='preset' )
21    (options, args) = parser.parse_args()
22    input_fname, index_fname, out_fname = args
23
24    # Create index.
25    if options.preset:
26        # Preset type.
27        ctabix.tabix_index(filename=index_fname, preset=options.preset, keep_original=True,
28                           already_compressed=True, index_filename=out_fname)
29    else:
30        # For interval files; column indices are 0-based.
31        ctabix.tabix_index(filename=index_fname, seq_col=(options.chrom_col - 1),
32                           start_col=(options.start_col - 1), end_col=(options.end_col - 1),
33                           keep_original=True, already_compressed=True, index_filename=out_fname)
34    if os.path.getsize(index_fname) == 0:
35        sys.stderr.write("The converted tabix index file is empty, meaning the input data is invalid.")
36
37if __name__ == "__main__":
38    main()