PageRenderTime 69ms CodeModel.GetById 21ms app.highlight 41ms RepoModel.GetById 2ms app.codeStats 0ms

/Lib/distutils/text_file.py

http://unladen-swallow.googlecode.com/
Python | 381 lines | 304 code | 43 blank | 34 comment | 35 complexity | ce486d337be73ac3f23701231821ea0d MD5 | raw file
  1"""text_file
  2
  3provides the TextFile class, which gives an interface to text files
  4that (optionally) takes care of stripping comments, ignoring blank
  5lines, and joining lines with backslashes."""
  6
  7__revision__ = "$Id: text_file.py 60923 2008-02-21 18:18:37Z guido.van.rossum $"
  8
  9import sys, os
 10
 11
 12class TextFile:
 13
 14    """Provides a file-like object that takes care of all the things you
 15       commonly want to do when processing a text file that has some
 16       line-by-line syntax: strip comments (as long as "#" is your
 17       comment character), skip blank lines, join adjacent lines by
 18       escaping the newline (ie. backslash at end of line), strip
 19       leading and/or trailing whitespace.  All of these are optional
 20       and independently controllable.
 21
 22       Provides a 'warn()' method so you can generate warning messages that
 23       report physical line number, even if the logical line in question
 24       spans multiple physical lines.  Also provides 'unreadline()' for
 25       implementing line-at-a-time lookahead.
 26
 27       Constructor is called as:
 28
 29           TextFile (filename=None, file=None, **options)
 30
 31       It bombs (RuntimeError) if both 'filename' and 'file' are None;
 32       'filename' should be a string, and 'file' a file object (or
 33       something that provides 'readline()' and 'close()' methods).  It is
 34       recommended that you supply at least 'filename', so that TextFile
 35       can include it in warning messages.  If 'file' is not supplied,
 36       TextFile creates its own using the 'open()' builtin.
 37
 38       The options are all boolean, and affect the value returned by
 39       'readline()':
 40         strip_comments [default: true]
 41           strip from "#" to end-of-line, as well as any whitespace
 42           leading up to the "#" -- unless it is escaped by a backslash
 43         lstrip_ws [default: false]
 44           strip leading whitespace from each line before returning it
 45         rstrip_ws [default: true]
 46           strip trailing whitespace (including line terminator!) from
 47           each line before returning it
 48         skip_blanks [default: true}
 49           skip lines that are empty *after* stripping comments and
 50           whitespace.  (If both lstrip_ws and rstrip_ws are false,
 51           then some lines may consist of solely whitespace: these will
 52           *not* be skipped, even if 'skip_blanks' is true.)
 53         join_lines [default: false]
 54           if a backslash is the last non-newline character on a line
 55           after stripping comments and whitespace, join the following line
 56           to it to form one "logical line"; if N consecutive lines end
 57           with a backslash, then N+1 physical lines will be joined to
 58           form one logical line.
 59         collapse_join [default: false]
 60           strip leading whitespace from lines that are joined to their
 61           predecessor; only matters if (join_lines and not lstrip_ws)
 62
 63       Note that since 'rstrip_ws' can strip the trailing newline, the
 64       semantics of 'readline()' must differ from those of the builtin file
 65       object's 'readline()' method!  In particular, 'readline()' returns
 66       None for end-of-file: an empty string might just be a blank line (or
 67       an all-whitespace line), if 'rstrip_ws' is true but 'skip_blanks' is
 68       not."""
 69
 70    default_options = { 'strip_comments': 1,
 71                        'skip_blanks':    1,
 72                        'lstrip_ws':      0,
 73                        'rstrip_ws':      1,
 74                        'join_lines':     0,
 75                        'collapse_join':  0,
 76                      }
 77
 78    def __init__ (self, filename=None, file=None, **options):
 79        """Construct a new TextFile object.  At least one of 'filename'
 80           (a string) and 'file' (a file-like object) must be supplied.
 81           They keyword argument options are described above and affect
 82           the values returned by 'readline()'."""
 83
 84        if filename is None and file is None:
 85            raise RuntimeError, \
 86                  "you must supply either or both of 'filename' and 'file'"
 87
 88        # set values for all options -- either from client option hash
 89        # or fallback to default_options
 90        for opt in self.default_options.keys():
 91            if opt in options:
 92                setattr (self, opt, options[opt])
 93
 94            else:
 95                setattr (self, opt, self.default_options[opt])
 96
 97        # sanity check client option hash
 98        for opt in options.keys():
 99            if opt not in self.default_options:
100                raise KeyError, "invalid TextFile option '%s'" % opt
101
102        if file is None:
103            self.open (filename)
104        else:
105            self.filename = filename
106            self.file = file
107            self.current_line = 0       # assuming that file is at BOF!
108
109        # 'linebuf' is a stack of lines that will be emptied before we
110        # actually read from the file; it's only populated by an
111        # 'unreadline()' operation
112        self.linebuf = []
113
114
115    def open (self, filename):
116        """Open a new file named 'filename'.  This overrides both the
117           'filename' and 'file' arguments to the constructor."""
118
119        self.filename = filename
120        self.file = open (self.filename, 'r')
121        self.current_line = 0
122
123
124    def close (self):
125        """Close the current file and forget everything we know about it
126           (filename, current line number)."""
127
128        self.file.close ()
129        self.file = None
130        self.filename = None
131        self.current_line = None
132
133
134    def gen_error (self, msg, line=None):
135        outmsg = []
136        if line is None:
137            line = self.current_line
138        outmsg.append(self.filename + ", ")
139        if type (line) in (list, tuple):
140            outmsg.append("lines %d-%d: " % tuple (line))
141        else:
142            outmsg.append("line %d: " % line)
143        outmsg.append(str(msg))
144        return "".join(outmsg)
145
146
147    def error (self, msg, line=None):
148        raise ValueError, "error: " + self.gen_error(msg, line)
149
150    def warn (self, msg, line=None):
151        """Print (to stderr) a warning message tied to the current logical
152           line in the current file.  If the current logical line in the
153           file spans multiple physical lines, the warning refers to the
154           whole range, eg. "lines 3-5".  If 'line' supplied, it overrides
155           the current line number; it may be a list or tuple to indicate a
156           range of physical lines, or an integer for a single physical
157           line."""
158        sys.stderr.write("warning: " + self.gen_error(msg, line) + "\n")
159
160
161    def readline (self):
162        """Read and return a single logical line from the current file (or
163           from an internal buffer if lines have previously been "unread"
164           with 'unreadline()').  If the 'join_lines' option is true, this
165           may involve reading multiple physical lines concatenated into a
166           single string.  Updates the current line number, so calling
167           'warn()' after 'readline()' emits a warning about the physical
168           line(s) just read.  Returns None on end-of-file, since the empty
169           string can occur if 'rstrip_ws' is true but 'strip_blanks' is
170           not."""
171
172        # If any "unread" lines waiting in 'linebuf', return the top
173        # one.  (We don't actually buffer read-ahead data -- lines only
174        # get put in 'linebuf' if the client explicitly does an
175        # 'unreadline()'.
176        if self.linebuf:
177            line = self.linebuf[-1]
178            del self.linebuf[-1]
179            return line
180
181        buildup_line = ''
182
183        while 1:
184            # read the line, make it None if EOF
185            line = self.file.readline()
186            if line == '': line = None
187
188            if self.strip_comments and line:
189
190                # Look for the first "#" in the line.  If none, never
191                # mind.  If we find one and it's the first character, or
192                # is not preceded by "\", then it starts a comment --
193                # strip the comment, strip whitespace before it, and
194                # carry on.  Otherwise, it's just an escaped "#", so
195                # unescape it (and any other escaped "#"'s that might be
196                # lurking in there) and otherwise leave the line alone.
197
198                pos = line.find("#")
199                if pos == -1:           # no "#" -- no comments
200                    pass
201
202                # It's definitely a comment -- either "#" is the first
203                # character, or it's elsewhere and unescaped.
204                elif pos == 0 or line[pos-1] != "\\":
205                    # Have to preserve the trailing newline, because it's
206                    # the job of a later step (rstrip_ws) to remove it --
207                    # and if rstrip_ws is false, we'd better preserve it!
208                    # (NB. this means that if the final line is all comment
209                    # and has no trailing newline, we will think that it's
210                    # EOF; I think that's OK.)
211                    eol = (line[-1] == '\n') and '\n' or ''
212                    line = line[0:pos] + eol
213
214                    # If all that's left is whitespace, then skip line
215                    # *now*, before we try to join it to 'buildup_line' --
216                    # that way constructs like
217                    #   hello \\
218                    #   # comment that should be ignored
219                    #   there
220                    # result in "hello there".
221                    if line.strip() == "":
222                        continue
223
224                else:                   # it's an escaped "#"
225                    line = line.replace("\\#", "#")
226
227
228            # did previous line end with a backslash? then accumulate
229            if self.join_lines and buildup_line:
230                # oops: end of file
231                if line is None:
232                    self.warn ("continuation line immediately precedes "
233                               "end-of-file")
234                    return buildup_line
235
236                if self.collapse_join:
237                    line = line.lstrip()
238                line = buildup_line + line
239
240                # careful: pay attention to line number when incrementing it
241                if type (self.current_line) is list:
242                    self.current_line[1] = self.current_line[1] + 1
243                else:
244                    self.current_line = [self.current_line,
245                                         self.current_line+1]
246            # just an ordinary line, read it as usual
247            else:
248                if line is None:        # eof
249                    return None
250
251                # still have to be careful about incrementing the line number!
252                if type (self.current_line) is list:
253                    self.current_line = self.current_line[1] + 1
254                else:
255                    self.current_line = self.current_line + 1
256
257
258            # strip whitespace however the client wants (leading and
259            # trailing, or one or the other, or neither)
260            if self.lstrip_ws and self.rstrip_ws:
261                line = line.strip()
262            elif self.lstrip_ws:
263                line = line.lstrip()
264            elif self.rstrip_ws:
265                line = line.rstrip()
266
267            # blank line (whether we rstrip'ed or not)? skip to next line
268            # if appropriate
269            if (line == '' or line == '\n') and self.skip_blanks:
270                continue
271
272            if self.join_lines:
273                if line[-1] == '\\':
274                    buildup_line = line[:-1]
275                    continue
276
277                if line[-2:] == '\\\n':
278                    buildup_line = line[0:-2] + '\n'
279                    continue
280
281            # well, I guess there's some actual content there: return it
282            return line
283
284    # readline ()
285
286
287    def readlines (self):
288        """Read and return the list of all logical lines remaining in the
289           current file."""
290
291        lines = []
292        while 1:
293            line = self.readline()
294            if line is None:
295                return lines
296            lines.append (line)
297
298
299    def unreadline (self, line):
300        """Push 'line' (a string) onto an internal buffer that will be
301           checked by future 'readline()' calls.  Handy for implementing
302           a parser with line-at-a-time lookahead."""
303
304        self.linebuf.append (line)
305
306
307if __name__ == "__main__":
308    test_data = """# test file
309
310line 3 \\
311# intervening comment
312  continues on next line
313"""
314    # result 1: no fancy options
315    result1 = map (lambda x: x + "\n", test_data.split("\n")[0:-1])
316
317    # result 2: just strip comments
318    result2 = ["\n",
319               "line 3 \\\n",
320               "  continues on next line\n"]
321
322    # result 3: just strip blank lines
323    result3 = ["# test file\n",
324               "line 3 \\\n",
325               "# intervening comment\n",
326               "  continues on next line\n"]
327
328    # result 4: default, strip comments, blank lines, and trailing whitespace
329    result4 = ["line 3 \\",
330               "  continues on next line"]
331
332    # result 5: strip comments and blanks, plus join lines (but don't
333    # "collapse" joined lines
334    result5 = ["line 3   continues on next line"]
335
336    # result 6: strip comments and blanks, plus join lines (and
337    # "collapse" joined lines
338    result6 = ["line 3 continues on next line"]
339
340    def test_input (count, description, file, expected_result):
341        result = file.readlines ()
342        # result = ''.join(result)
343        if result == expected_result:
344            print "ok %d (%s)" % (count, description)
345        else:
346            print "not ok %d (%s):" % (count, description)
347            print "** expected:"
348            print expected_result
349            print "** received:"
350            print result
351
352
353    filename = "test.txt"
354    out_file = open (filename, "w")
355    out_file.write (test_data)
356    out_file.close ()
357
358    in_file = TextFile (filename, strip_comments=0, skip_blanks=0,
359                        lstrip_ws=0, rstrip_ws=0)
360    test_input (1, "no processing", in_file, result1)
361
362    in_file = TextFile (filename, strip_comments=1, skip_blanks=0,
363                        lstrip_ws=0, rstrip_ws=0)
364    test_input (2, "strip comments", in_file, result2)
365
366    in_file = TextFile (filename, strip_comments=0, skip_blanks=1,
367                        lstrip_ws=0, rstrip_ws=0)
368    test_input (3, "strip blanks", in_file, result3)
369
370    in_file = TextFile (filename)
371    test_input (4, "default processing", in_file, result4)
372
373    in_file = TextFile (filename, strip_comments=1, skip_blanks=1,
374                        join_lines=1, rstrip_ws=1)
375    test_input (5, "join lines without collapsing", in_file, result5)
376
377    in_file = TextFile (filename, strip_comments=1, skip_blanks=1,
378                        join_lines=1, rstrip_ws=1, collapse_join=1)
379    test_input (6, "join lines with collapsing", in_file, result6)
380
381    os.remove (filename)