PageRenderTime 59ms CodeModel.GetById 47ms app.highlight 9ms RepoModel.GetById 1ms app.codeStats 0ms

/tools/samtools/bam_to_sam.py

https://bitbucket.org/cistrome/cistrome-harvard/
Python | 129 lines | 117 code | 3 blank | 9 comment | 1 complexity | 8bef67ec54e69d1a42ac689464c63890 MD5 | raw file
  1#!/usr/bin/env python
  2"""
  3Converts BAM data to sorted SAM data.
  4usage: bam_to_sam.py [options]
  5   --input1: SAM file to be converted
  6   --output1: output dataset in bam format
  7"""
  8
  9import optparse, os, sys, subprocess, tempfile, shutil
 10from galaxy import eggs
 11import pkg_resources; pkg_resources.require( "bx-python" )
 12from bx.cookbook import doc_optparse
 13#from galaxy import util
 14
 15def stop_err( msg ):
 16    sys.stderr.write( '%s\n' % msg )
 17    sys.exit()
 18
 19def __main__():
 20    #Parse Command Line
 21    parser = optparse.OptionParser()
 22    parser.add_option( '', '--input1', dest='input1', help='The input SAM dataset' )
 23    parser.add_option( '', '--output1', dest='output1', help='The output BAM dataset' )
 24    parser.add_option( '', '--header', dest='header', action='store_true', default=False, help='Write SAM Header' )
 25    ( options, args ) = parser.parse_args()
 26
 27    # output version # of tool
 28    try:
 29        tmp = tempfile.NamedTemporaryFile().name
 30        tmp_stdout = open( tmp, 'wb' )
 31        proc = subprocess.Popen( args='samtools 2>&1', shell=True, stdout=tmp_stdout )
 32        tmp_stdout.close()
 33        returncode = proc.wait()
 34        stdout = None
 35        for line in open( tmp_stdout.name, 'rb' ):
 36            if line.lower().find( 'version' ) >= 0:
 37                stdout = line.strip()
 38                break
 39        if stdout:
 40            sys.stdout.write( 'Samtools %s\n' % stdout )
 41        else:
 42            raise Exception
 43    except:
 44        sys.stdout.write( 'Could not determine Samtools version\n' )
 45
 46    tmp_dir = tempfile.mkdtemp()
 47
 48    try:
 49        # exit if input file empty
 50        if os.path.getsize( options.input1 ) == 0:
 51            raise Exception, 'Initial BAM file empty'
 52        # Sort alignments by leftmost coordinates. File <out.prefix>.bam will be created. This command
 53        # may also create temporary files <out.prefix>.%d.bam when the whole alignment cannot be fitted
 54        # into memory ( controlled by option -m ).
 55        tmp_sorted_aligns_file = tempfile.NamedTemporaryFile( dir=tmp_dir )
 56        tmp_sorted_aligns_file_base = tmp_sorted_aligns_file.name
 57        tmp_sorted_aligns_file_name = '%s.bam' % tmp_sorted_aligns_file.name
 58        tmp_sorted_aligns_file.close()
 59        command = 'samtools sort %s %s' % ( options.input1, tmp_sorted_aligns_file_base )
 60        tmp = tempfile.NamedTemporaryFile( dir=tmp_dir ).name
 61        tmp_stderr = open( tmp, 'wb' )
 62        proc = subprocess.Popen( args=command, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno() )
 63        returncode = proc.wait()
 64        tmp_stderr.close()
 65        # get stderr, allowing for case where it's very large
 66        tmp_stderr = open( tmp, 'rb' )
 67        stderr = ''
 68        buffsize = 1048576
 69        try:
 70            while True:
 71                stderr += tmp_stderr.read( buffsize )
 72                if not stderr or len( stderr ) % buffsize != 0:
 73                    break
 74        except OverflowError:
 75            pass
 76        tmp_stderr.close()
 77        if returncode != 0:
 78            raise Exception, stderr
 79        # exit if sorted BAM file empty
 80        if os.path.getsize( tmp_sorted_aligns_file_name) == 0:
 81            raise Exception, 'Intermediate sorted BAM file empty'
 82    except Exception, e:
 83        #clean up temp files
 84        if os.path.exists( tmp_dir ):
 85            shutil.rmtree( tmp_dir )
 86        stop_err( 'Error sorting alignments from (%s), %s' % ( options.input1, str( e ) ) )
 87
 88
 89    try:
 90        # Extract all alignments from the input BAM file to SAM format ( since no region is specified, all the alignments will be extracted ).
 91        if options.header:
 92            view_options = "-h"
 93        else:
 94            view_options = ""
 95        command = 'samtools view %s -o %s %s' % ( view_options, options.output1, tmp_sorted_aligns_file_name )
 96        tmp = tempfile.NamedTemporaryFile( dir=tmp_dir ).name
 97        tmp_stderr = open( tmp, 'wb' )
 98        proc = subprocess.Popen( args=command, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno() )
 99        returncode = proc.wait()
100        tmp_stderr.close()
101        # get stderr, allowing for case where it's very large
102        tmp_stderr = open( tmp, 'rb' )
103        stderr = ''
104        buffsize = 1048576
105        try:
106            while True:
107                stderr += tmp_stderr.read( buffsize )
108                if not stderr or len( stderr ) % buffsize != 0:
109                    break
110        except OverflowError:
111            pass
112        tmp_stderr.close()
113        if returncode != 0:
114            raise Exception, stderr
115    except Exception, e:
116        #clean up temp files
117        if os.path.exists( tmp_dir ):
118            shutil.rmtree( tmp_dir )
119        stop_err( 'Error extracting alignments from (%s), %s' % ( options.input1, str( e ) ) )
120    #clean up temp files
121    if os.path.exists( tmp_dir ):
122        shutil.rmtree( tmp_dir )
123    # check that there are results in the output file
124    if os.path.getsize( options.output1 ) > 0:
125        sys.stdout.write( 'BAM file converted to SAM' )
126    else:
127        stop_err( 'The output file is empty, there may be an error with your input file.' )
128
129if __name__=="__main__": __main__()