PageRenderTime 140ms CodeModel.GetById 29ms app.highlight 95ms RepoModel.GetById 1ms app.codeStats 1ms

/Lib/pipes.py

http://unladen-swallow.googlecode.com/
Python | 282 lines | 226 code | 14 blank | 42 comment | 8 complexity | bddc74f34d96b3999939bfc866dee126 MD5 | raw file
  1"""Conversion pipeline templates.
  2
  3The problem:
  4------------
  5
  6Suppose you have some data that you want to convert to another format,
  7such as from GIF image format to PPM image format.  Maybe the
  8conversion involves several steps (e.g. piping it through compress or
  9uuencode).  Some of the conversion steps may require that their input
 10is a disk file, others may be able to read standard input; similar for
 11their output.  The input to the entire conversion may also be read
 12from a disk file or from an open file, and similar for its output.
 13
 14The module lets you construct a pipeline template by sticking one or
 15more conversion steps together.  It will take care of creating and
 16removing temporary files if they are necessary to hold intermediate
 17data.  You can then use the template to do conversions from many
 18different sources to many different destinations.  The temporary
 19file names used are different each time the template is used.
 20
 21The templates are objects so you can create templates for many
 22different conversion steps and store them in a dictionary, for
 23instance.
 24
 25
 26Directions:
 27-----------
 28
 29To create a template:
 30    t = Template()
 31
 32To add a conversion step to a template:
 33   t.append(command, kind)
 34where kind is a string of two characters: the first is '-' if the
 35command reads its standard input or 'f' if it requires a file; the
 36second likewise for the output. The command must be valid /bin/sh
 37syntax.  If input or output files are required, they are passed as
 38$IN and $OUT; otherwise, it must be  possible to use the command in
 39a pipeline.
 40
 41To add a conversion step at the beginning:
 42   t.prepend(command, kind)
 43
 44To convert a file to another file using a template:
 45  sts = t.copy(infile, outfile)
 46If infile or outfile are the empty string, standard input is read or
 47standard output is written, respectively.  The return value is the
 48exit status of the conversion pipeline.
 49
 50To open a file for reading or writing through a conversion pipeline:
 51   fp = t.open(file, mode)
 52where mode is 'r' to read the file, or 'w' to write it -- just like
 53for the built-in function open() or for os.popen().
 54
 55To create a new template object initialized to a given one:
 56   t2 = t.clone()
 57
 58For an example, see the function test() at the end of the file.
 59"""                                     # '
 60
 61
 62import re
 63import os
 64import tempfile
 65import string
 66
 67__all__ = ["Template"]
 68
 69# Conversion step kinds
 70
 71FILEIN_FILEOUT = 'ff'                   # Must read & write real files
 72STDIN_FILEOUT  = '-f'                   # Must write a real file
 73FILEIN_STDOUT  = 'f-'                   # Must read a real file
 74STDIN_STDOUT   = '--'                   # Normal pipeline element
 75SOURCE         = '.-'                   # Must be first, writes stdout
 76SINK           = '-.'                   # Must be last, reads stdin
 77
 78stepkinds = [FILEIN_FILEOUT, STDIN_FILEOUT, FILEIN_STDOUT, STDIN_STDOUT, \
 79             SOURCE, SINK]
 80
 81
 82class Template:
 83    """Class representing a pipeline template."""
 84
 85    def __init__(self):
 86        """Template() returns a fresh pipeline template."""
 87        self.debugging = 0
 88        self.reset()
 89
 90    def __repr__(self):
 91        """t.__repr__() implements repr(t)."""
 92        return '<Template instance, steps=%r>' % (self.steps,)
 93
 94    def reset(self):
 95        """t.reset() restores a pipeline template to its initial state."""
 96        self.steps = []
 97
 98    def clone(self):
 99        """t.clone() returns a new pipeline template with identical
100        initial state as the current one."""
101        t = Template()
102        t.steps = self.steps[:]
103        t.debugging = self.debugging
104        return t
105
106    def debug(self, flag):
107        """t.debug(flag) turns debugging on or off."""
108        self.debugging = flag
109
110    def append(self, cmd, kind):
111        """t.append(cmd, kind) adds a new step at the end."""
112        if type(cmd) is not type(''):
113            raise TypeError, \
114                  'Template.append: cmd must be a string'
115        if kind not in stepkinds:
116            raise ValueError, \
117                  'Template.append: bad kind %r' % (kind,)
118        if kind == SOURCE:
119            raise ValueError, \
120                  'Template.append: SOURCE can only be prepended'
121        if self.steps and self.steps[-1][1] == SINK:
122            raise ValueError, \
123                  'Template.append: already ends with SINK'
124        if kind[0] == 'f' and not re.search(r'\$IN\b', cmd):
125            raise ValueError, \
126                  'Template.append: missing $IN in cmd'
127        if kind[1] == 'f' and not re.search(r'\$OUT\b', cmd):
128            raise ValueError, \
129                  'Template.append: missing $OUT in cmd'
130        self.steps.append((cmd, kind))
131
132    def prepend(self, cmd, kind):
133        """t.prepend(cmd, kind) adds a new step at the front."""
134        if type(cmd) is not type(''):
135            raise TypeError, \
136                  'Template.prepend: cmd must be a string'
137        if kind not in stepkinds:
138            raise ValueError, \
139                  'Template.prepend: bad kind %r' % (kind,)
140        if kind == SINK:
141            raise ValueError, \
142                  'Template.prepend: SINK can only be appended'
143        if self.steps and self.steps[0][1] == SOURCE:
144            raise ValueError, \
145                  'Template.prepend: already begins with SOURCE'
146        if kind[0] == 'f' and not re.search(r'\$IN\b', cmd):
147            raise ValueError, \
148                  'Template.prepend: missing $IN in cmd'
149        if kind[1] == 'f' and not re.search(r'\$OUT\b', cmd):
150            raise ValueError, \
151                  'Template.prepend: missing $OUT in cmd'
152        self.steps.insert(0, (cmd, kind))
153
154    def open(self, file, rw):
155        """t.open(file, rw) returns a pipe or file object open for
156        reading or writing; the file is the other end of the pipeline."""
157        if rw == 'r':
158            return self.open_r(file)
159        if rw == 'w':
160            return self.open_w(file)
161        raise ValueError, \
162              'Template.open: rw must be \'r\' or \'w\', not %r' % (rw,)
163
164    def open_r(self, file):
165        """t.open_r(file) and t.open_w(file) implement
166        t.open(file, 'r') and t.open(file, 'w') respectively."""
167        if not self.steps:
168            return open(file, 'r')
169        if self.steps[-1][1] == SINK:
170            raise ValueError, \
171                  'Template.open_r: pipeline ends width SINK'
172        cmd = self.makepipeline(file, '')
173        return os.popen(cmd, 'r')
174
175    def open_w(self, file):
176        if not self.steps:
177            return open(file, 'w')
178        if self.steps[0][1] == SOURCE:
179            raise ValueError, \
180                  'Template.open_w: pipeline begins with SOURCE'
181        cmd = self.makepipeline('', file)
182        return os.popen(cmd, 'w')
183
184    def copy(self, infile, outfile):
185        return os.system(self.makepipeline(infile, outfile))
186
187    def makepipeline(self, infile, outfile):
188        cmd = makepipeline(infile, self.steps, outfile)
189        if self.debugging:
190            print cmd
191            cmd = 'set -x; ' + cmd
192        return cmd
193
194
195def makepipeline(infile, steps, outfile):
196    # Build a list with for each command:
197    # [input filename or '', command string, kind, output filename or '']
198
199    list = []
200    for cmd, kind in steps:
201        list.append(['', cmd, kind, ''])
202    #
203    # Make sure there is at least one step
204    #
205    if not list:
206        list.append(['', 'cat', '--', ''])
207    #
208    # Take care of the input and output ends
209    #
210    [cmd, kind] = list[0][1:3]
211    if kind[0] == 'f' and not infile:
212        list.insert(0, ['', 'cat', '--', ''])
213    list[0][0] = infile
214    #
215    [cmd, kind] = list[-1][1:3]
216    if kind[1] == 'f' and not outfile:
217        list.append(['', 'cat', '--', ''])
218    list[-1][-1] = outfile
219    #
220    # Invent temporary files to connect stages that need files
221    #
222    garbage = []
223    for i in range(1, len(list)):
224        lkind = list[i-1][2]
225        rkind = list[i][2]
226        if lkind[1] == 'f' or rkind[0] == 'f':
227            (fd, temp) = tempfile.mkstemp()
228            os.close(fd)
229            garbage.append(temp)
230            list[i-1][-1] = list[i][0] = temp
231    #
232    for item in list:
233        [inf, cmd, kind, outf] = item
234        if kind[1] == 'f':
235            cmd = 'OUT=' + quote(outf) + '; ' + cmd
236        if kind[0] == 'f':
237            cmd = 'IN=' + quote(inf) + '; ' + cmd
238        if kind[0] == '-' and inf:
239            cmd = cmd + ' <' + quote(inf)
240        if kind[1] == '-' and outf:
241            cmd = cmd + ' >' + quote(outf)
242        item[1] = cmd
243    #
244    cmdlist = list[0][1]
245    for item in list[1:]:
246        [cmd, kind] = item[1:3]
247        if item[0] == '':
248            if 'f' in kind:
249                cmd = '{ ' + cmd + '; }'
250            cmdlist = cmdlist + ' |\n' + cmd
251        else:
252            cmdlist = cmdlist + '\n' + cmd
253    #
254    if garbage:
255        rmcmd = 'rm -f'
256        for file in garbage:
257            rmcmd = rmcmd + ' ' + quote(file)
258        trapcmd = 'trap ' + quote(rmcmd + '; exit') + ' 1 2 3 13 14 15'
259        cmdlist = trapcmd + '\n' + cmdlist + '\n' + rmcmd
260    #
261    return cmdlist
262
263
264# Reliably quote a string as a single argument for /bin/sh
265
266_safechars = string.ascii_letters + string.digits + '!@%_-+=:,./' # Safe unquoted
267_funnychars = '"`$\\'                           # Unsafe inside "double quotes"
268
269def quote(file):
270    for c in file:
271        if c not in _safechars:
272            break
273    else:
274        return file
275    if '\'' not in file:
276        return '\'' + file + '\''
277    res = ''
278    for c in file:
279        if c in _funnychars:
280            c = '\\' + c
281        res = res + c
282    return '"' + res + '"'