PageRenderTime 59ms CodeModel.GetById 18ms app.highlight 35ms RepoModel.GetById 1ms app.codeStats 0ms

/tools/rgenetics/rgWebLogo3.py

https://bitbucket.org/cistrome/cistrome-harvard/
Python | 157 lines | 133 code | 10 blank | 14 comment | 17 complexity | aa4e3a2c86b07640948b1e3422acaa77 MD5 | raw file
  1"""
  2# modified june 2 ross lazarus to add units option at Assaf Gordon's suggestion
  3# rgWebLogo3.py
  4# wrapper to check that all fasta files are same length
  5
  6"""
  7import optparse, os, sys, subprocess, tempfile
  8
  9WEBLOGO = 'weblogo' # executable name for weblogo3 - confusing isn't it?
 10
 11class WL3:
 12    """
 13    simple wrapper class to check fasta sequence lengths are all identical
 14    """
 15    FASTASTARTSYM = '>'
 16    badseq = '## error - sequences in file %s are not all the same length - cannot proceed. Please read the tool documentation carefully'
 17
 18    def __init__(self,opts=None):
 19        assert opts<>None,'WL3 class needs opts passed in - got None'
 20        self.opts = opts
 21        self.fastaf = file(self.opts.input,'r')
 22        self.clparams = {}
 23
 24    def whereis(self,program):
 25        for path in os.environ.get('PATH', '').split(':'):
 26            if os.path.exists(os.path.join(path, program)) and not os.path.isdir(os.path.join(path, program)):
 27                return os.path.join(path, program)
 28        return None
 29
 30    def runCL(self):
 31        """ construct and run a command line
 32        """
 33        wl = self.whereis(WEBLOGO)
 34        if not wl:
 35             print >> sys.stderr, '## rgWebLogo3.py error - cannot locate the weblogo binary %s on the current path' % WEBLOGO
 36             print >> sys.stderr, '## Please ensure it is installed and working from http://code.google.com/p/weblogo'
 37             sys.exit(1)
 38        cll = [WEBLOGO,]
 39        cll += [' '.join(it) for it in list(self.clparams.items())]
 40        cl = ' '.join(cll)
 41        assert cl > '', 'runCL needs a command line as clparms'
 42        fd,templog = tempfile.mkstemp(suffix='rgtempRun.txt')
 43        tlf = open(templog,'w')
 44        process = subprocess.Popen(cl, shell=True, stderr=tlf, stdout=tlf)
 45        rval = process.wait()
 46        tlf.close()
 47        tlogs = ''.join(open(templog,'r').readlines())
 48        if len(tlogs) > 1:
 49            s = '## executing %s returned status %d and log (stdout/stderr) records: \n%s\n' % (cl,rval,tlogs)
 50        else:
 51            s = '## executing %s returned status %d. Nothing appeared on stderr/stdout\n' % (cl,rval)
 52        os.unlink(templog) # always
 53        if rval <> 0:
 54             print >> sys.stderr, '## rgWebLogo3.py error - executing %s returned error code %d' % (cl,rval)
 55             print >> sys.stderr, '## This may be a data problem or a tool dependency (%s) installation problem' % WEBLOGO
 56             print >> sys.stderr, '## Please ensure %s is correctly installed and working on the command line -see http://code.google.com/p/weblogo' % WEBLOGO
 57             sys.exit(1)
 58        return s
 59
 60        
 61    def iter_fasta(self):
 62        """
 63        generator for fasta sequences from a file
 64        """
 65        aseq = []
 66        seqname = None
 67        for i,row in enumerate(self.fastaf):
 68            if row.startswith(self.FASTASTARTSYM):
 69                if seqname <> None: # already in a sequence
 70                    s = ''.join(aseq)
 71                    l = len(s)
 72                    yield (seqname,l)
 73                    seqname = row[1:].strip()
 74                    aseq = []
 75                else:
 76                    if i > 0:
 77                        print >> sys.stderr,'Invalid fasta file %s - does not start with %s - please read the tool documentation carefully' % (self.opts.input,self.FASTASTARTSYM)
 78                        sys.exit(1)
 79                    else:
 80                        seqname = row[1:].strip() 
 81            else: # sequence row
 82                if seqname == None:
 83                    print >> sys.stderr,'Invalid fasta file %s - does not start with %s - please read the tool documentation carefully' % (self.opts.input,self.FASTASTARTSYM)
 84                    sys.exit(1) 
 85                else:
 86                    aseq.append(row.strip())
 87                
 88        if seqname <> None: # last one
 89            l = len(''.join(aseq))
 90            yield (seqname,l)
 91                
 92        
 93    def fcheck(self):
 94        """ are all fasta sequence same length?
 95        might be mongo big
 96        """
 97        flen = None
 98        lasti = None
 99        f = self.iter_fasta()
100        for i,(seqname,seqlen) in enumerate(f):
101            lasti = i
102            if i == 0:
103                flen = seqlen
104            else:
105                if seqlen <> flen:
106                    print >> sys.stderr,self.badseq % self.opts.input
107                    sys.exit(1)
108        return '# weblogo input %s has %d sequences all of length %d' % (self.opts.input,lasti,flen)
109
110
111    def run(self):
112        check = self.fcheck()
113        self.clparams['-f'] = self.opts.input
114        self.clparams['-o'] = self.opts.output
115        self.clparams['-t'] = '"%s"' % self.opts.logoname # must be wrapped as a string       
116        self.clparams['-F'] = self.opts.outformat       
117        if self.opts.size <> None:
118            self.clparams['-s'] = self.opts.size
119        if self.opts.lower <> None:
120            self.clparams['-l'] = self.opts.lower
121        if self.opts.upper <> None:
122            self.clparams['-u'] = self.opts.upper        
123        if self.opts.colours <> None:
124            self.clparams['-c'] = self.opts.colours
125        if self.opts.units <> None:
126            self.clparams['-U'] = self.opts.units
127        s = self.runCL()
128        return check,s
129
130
131if __name__ == '__main__':
132    '''
133    called as
134<command interpreter="python"> 
135    rgWebLogo3.py --outformat $outformat -s $size -i $input -o $output -t "$logoname" -c "$colours"
136#if $range.mode == 'part'
137-l "$range.seqstart" -u "$range.seqend"
138#end if
139    </command>
140
141    '''
142    op = optparse.OptionParser()
143    op.add_option('-i', '--input', default=None)
144    op.add_option('-F', '--outformat', default='png')
145    op.add_option('-s', '--size', default=None) 
146    op.add_option('-o', '--output', default='rgWebLogo3')
147    op.add_option('-t', '--logoname', default='rgWebLogo3')
148    op.add_option('-c', '--colours', default=None)
149    op.add_option('-l', '--lower', default=None)
150    op.add_option('-u', '--upper', default=None)  
151    op.add_option('-U', '--units', default=None)  
152    opts, args = op.parse_args()
153    assert opts.input <> None,'weblogo3 needs a -i parameter with a fasta input file - cannot open'
154    assert os.path.isfile(opts.input),'weblogo3 needs a valid fasta input file - cannot open %s' % opts.input
155    w = WL3(opts)
156    checks,s = w.run()
157    print >> sys.stdout, checks # for info