PageRenderTime 124ms CodeModel.GetById 59ms app.highlight 51ms RepoModel.GetById 1ms app.codeStats 1ms

/Lib/io.py

http://unladen-swallow.googlecode.com/
Python | 1867 lines | 1784 code | 21 blank | 62 comment | 57 complexity | fc255fde32d0e42796666ddb78d71643 MD5 | raw file

Large files files are truncated, but you can click here to view the full file

   1"""
   2The io module provides the Python interfaces to stream handling. The
   3builtin open function is defined in this module.
   4
   5At the top of the I/O hierarchy is the abstract base class IOBase. It
   6defines the basic interface to a stream. Note, however, that there is no
   7separation between reading and writing to streams; implementations are
   8allowed to throw an IOError if they do not support a given operation.
   9
  10Extending IOBase is RawIOBase which deals simply with the reading and
  11writing of raw bytes to a stream. FileIO subclasses RawIOBase to provide
  12an interface to OS files.
  13
  14BufferedIOBase deals with buffering on a raw byte stream (RawIOBase). Its
  15subclasses, BufferedWriter, BufferedReader, and BufferedRWPair buffer
  16streams that are readable, writable, and both respectively.
  17BufferedRandom provides a buffered interface to random access
  18streams. BytesIO is a simple stream of in-memory bytes.
  19
  20Another IOBase subclass, TextIOBase, deals with the encoding and decoding
  21of streams into text. TextIOWrapper, which extends it, is a buffered text
  22interface to a buffered raw stream (`BufferedIOBase`). Finally, StringIO
  23is a in-memory stream for text.
  24
  25Argument names are not part of the specification, and only the arguments
  26of open() are intended to be used as keyword arguments.
  27
  28data:
  29
  30DEFAULT_BUFFER_SIZE
  31
  32   An int containing the default buffer size used by the module's buffered
  33   I/O classes. open() uses the file's blksize (as obtained by os.stat) if
  34   possible.
  35"""
  36# New I/O library conforming to PEP 3116.
  37
  38# This is a prototype; hopefully eventually some of this will be
  39# reimplemented in C.
  40
  41# XXX edge cases when switching between reading/writing
  42# XXX need to support 1 meaning line-buffered
  43# XXX whenever an argument is None, use the default value
  44# XXX read/write ops should check readable/writable
  45# XXX buffered readinto should work with arbitrary buffer objects
  46# XXX use incremental encoder for text output, at least for UTF-16 and UTF-8-SIG
  47# XXX check writable, readable and seekable in appropriate places
  48from __future__ import print_function
  49from __future__ import unicode_literals
  50
  51__author__ = ("Guido van Rossum <guido@python.org>, "
  52              "Mike Verdone <mike.verdone@gmail.com>, "
  53              "Mark Russell <mark.russell@zen.co.uk>")
  54
  55__all__ = ["BlockingIOError", "open", "IOBase", "RawIOBase", "FileIO",
  56           "BytesIO", "StringIO", "BufferedIOBase",
  57           "BufferedReader", "BufferedWriter", "BufferedRWPair",
  58           "BufferedRandom", "TextIOBase", "TextIOWrapper"]
  59
  60import os
  61import abc
  62import codecs
  63import _fileio
  64import threading
  65
  66# open() uses st_blksize whenever we can
  67DEFAULT_BUFFER_SIZE = 8 * 1024  # bytes
  68
  69# py3k has only new style classes
  70__metaclass__ = type
  71
  72class BlockingIOError(IOError):
  73
  74    """Exception raised when I/O would block on a non-blocking I/O stream."""
  75
  76    def __init__(self, errno, strerror, characters_written=0):
  77        IOError.__init__(self, errno, strerror)
  78        self.characters_written = characters_written
  79
  80
  81def open(file, mode="r", buffering=None, encoding=None, errors=None,
  82         newline=None, closefd=True):
  83    r"""Open file and return a stream. If the file cannot be opened, an IOError is
  84    raised.
  85
  86    file is either a string giving the name (and the path if the file
  87    isn't in the current working directory) of the file to be opened or an
  88    integer file descriptor of the file to be wrapped. (If a file
  89    descriptor is given, it is closed when the returned I/O object is
  90    closed, unless closefd is set to False.)
  91
  92    mode is an optional string that specifies the mode in which the file
  93    is opened. It defaults to 'r' which means open for reading in text
  94    mode.  Other common values are 'w' for writing (truncating the file if
  95    it already exists), and 'a' for appending (which on some Unix systems,
  96    means that all writes append to the end of the file regardless of the
  97    current seek position). In text mode, if encoding is not specified the
  98    encoding used is platform dependent. (For reading and writing raw
  99    bytes use binary mode and leave encoding unspecified.) The available
 100    modes are:
 101
 102    ========= ===============================================================
 103    Character Meaning
 104    --------- ---------------------------------------------------------------
 105    'r'       open for reading (default)
 106    'w'       open for writing, truncating the file first
 107    'a'       open for writing, appending to the end of the file if it exists
 108    'b'       binary mode
 109    't'       text mode (default)
 110    '+'       open a disk file for updating (reading and writing)
 111    'U'       universal newline mode (for backwards compatibility; unneeded
 112              for new code)
 113    ========= ===============================================================
 114
 115    The default mode is 'rt' (open for reading text). For binary random
 116    access, the mode 'w+b' opens and truncates the file to 0 bytes, while
 117    'r+b' opens the file without truncation.
 118
 119    Python distinguishes between files opened in binary and text modes,
 120    even when the underlying operating system doesn't. Files opened in
 121    binary mode (appending 'b' to the mode argument) return contents as
 122    bytes objects without any decoding. In text mode (the default, or when
 123    't' is appended to the mode argument), the contents of the file are
 124    returned as strings, the bytes having been first decoded using a
 125    platform-dependent encoding or using the specified encoding if given.
 126
 127    buffering is an optional integer used to set the buffering policy. By
 128    default full buffering is on. Pass 0 to switch buffering off (only
 129    allowed in binary mode), 1 to set line buffering, and an integer > 1
 130    for full buffering.
 131
 132    encoding is the name of the encoding used to decode or encode the
 133    file. This should only be used in text mode. The default encoding is
 134    platform dependent, but any encoding supported by Python can be
 135    passed.  See the codecs module for the list of supported encodings.
 136
 137    errors is an optional string that specifies how encoding errors are to
 138    be handled---this argument should not be used in binary mode. Pass
 139    'strict' to raise a ValueError exception if there is an encoding error
 140    (the default of None has the same effect), or pass 'ignore' to ignore
 141    errors. (Note that ignoring encoding errors can lead to data loss.)
 142    See the documentation for codecs.register for a list of the permitted
 143    encoding error strings.
 144
 145    newline controls how universal newlines works (it only applies to text
 146    mode). It can be None, '', '\n', '\r', and '\r\n'.  It works as
 147    follows:
 148
 149    * On input, if newline is None, universal newlines mode is
 150      enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
 151      these are translated into '\n' before being returned to the
 152      caller. If it is '', universal newline mode is enabled, but line
 153      endings are returned to the caller untranslated. If it has any of
 154      the other legal values, input lines are only terminated by the given
 155      string, and the line ending is returned to the caller untranslated.
 156
 157    * On output, if newline is None, any '\n' characters written are
 158      translated to the system default line separator, os.linesep. If
 159      newline is '', no translation takes place. If newline is any of the
 160      other legal values, any '\n' characters written are translated to
 161      the given string.
 162
 163    If closefd is False, the underlying file descriptor will be kept open
 164    when the file is closed. This does not work when a file name is given
 165    and must be True in that case.
 166
 167    open() returns a file object whose type depends on the mode, and
 168    through which the standard file operations such as reading and writing
 169    are performed. When open() is used to open a file in a text mode ('w',
 170    'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open
 171    a file in a binary mode, the returned class varies: in read binary
 172    mode, it returns a BufferedReader; in write binary and append binary
 173    modes, it returns a BufferedWriter, and in read/write mode, it returns
 174    a BufferedRandom.
 175
 176    It is also possible to use a string or bytearray as a file for both
 177    reading and writing. For strings StringIO can be used like a file
 178    opened in a text mode, and for bytes a BytesIO can be used like a file
 179    opened in a binary mode.
 180    """
 181    if not isinstance(file, (basestring, int)):
 182        raise TypeError("invalid file: %r" % file)
 183    if not isinstance(mode, basestring):
 184        raise TypeError("invalid mode: %r" % mode)
 185    if buffering is not None and not isinstance(buffering, int):
 186        raise TypeError("invalid buffering: %r" % buffering)
 187    if encoding is not None and not isinstance(encoding, basestring):
 188        raise TypeError("invalid encoding: %r" % encoding)
 189    if errors is not None and not isinstance(errors, basestring):
 190        raise TypeError("invalid errors: %r" % errors)
 191    modes = set(mode)
 192    if modes - set("arwb+tU") or len(mode) > len(modes):
 193        raise ValueError("invalid mode: %r" % mode)
 194    reading = "r" in modes
 195    writing = "w" in modes
 196    appending = "a" in modes
 197    updating = "+" in modes
 198    text = "t" in modes
 199    binary = "b" in modes
 200    if "U" in modes:
 201        if writing or appending:
 202            raise ValueError("can't use U and writing mode at once")
 203        reading = True
 204    if text and binary:
 205        raise ValueError("can't have text and binary mode at once")
 206    if reading + writing + appending > 1:
 207        raise ValueError("can't have read/write/append mode at once")
 208    if not (reading or writing or appending):
 209        raise ValueError("must have exactly one of read/write/append mode")
 210    if binary and encoding is not None:
 211        raise ValueError("binary mode doesn't take an encoding argument")
 212    if binary and errors is not None:
 213        raise ValueError("binary mode doesn't take an errors argument")
 214    if binary and newline is not None:
 215        raise ValueError("binary mode doesn't take a newline argument")
 216    raw = FileIO(file,
 217                 (reading and "r" or "") +
 218                 (writing and "w" or "") +
 219                 (appending and "a" or "") +
 220                 (updating and "+" or ""),
 221                 closefd)
 222    if buffering is None:
 223        buffering = -1
 224    line_buffering = False
 225    if buffering == 1 or buffering < 0 and raw.isatty():
 226        buffering = -1
 227        line_buffering = True
 228    if buffering < 0:
 229        buffering = DEFAULT_BUFFER_SIZE
 230        try:
 231            bs = os.fstat(raw.fileno()).st_blksize
 232        except (os.error, AttributeError):
 233            pass
 234        else:
 235            if bs > 1:
 236                buffering = bs
 237    if buffering < 0:
 238        raise ValueError("invalid buffering size")
 239    if buffering == 0:
 240        if binary:
 241            return raw
 242        raise ValueError("can't have unbuffered text I/O")
 243    if updating:
 244        buffer = BufferedRandom(raw, buffering)
 245    elif writing or appending:
 246        buffer = BufferedWriter(raw, buffering)
 247    elif reading:
 248        buffer = BufferedReader(raw, buffering)
 249    else:
 250        raise ValueError("unknown mode: %r" % mode)
 251    if binary:
 252        return buffer
 253    text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering)
 254    text.mode = mode
 255    return text
 256
 257class _DocDescriptor:
 258    """Helper for builtins.open.__doc__
 259    """
 260    def __get__(self, obj, typ):
 261        return (
 262            "open(file, mode='r', buffering=None, encoding=None, "
 263                 "errors=None, newline=None, closefd=True)\n\n" +
 264            open.__doc__)
 265
 266class OpenWrapper:
 267    """Wrapper for builtins.open
 268
 269    Trick so that open won't become a bound method when stored
 270    as a class variable (as dumbdbm does).
 271
 272    See initstdio() in Python/pythonrun.c.
 273    """
 274    __doc__ = _DocDescriptor()
 275
 276    def __new__(cls, *args, **kwargs):
 277        return open(*args, **kwargs)
 278
 279
 280class UnsupportedOperation(ValueError, IOError):
 281    pass
 282
 283
 284class IOBase(object):
 285
 286    """The abstract base class for all I/O classes, acting on streams of
 287    bytes. There is no public constructor.
 288
 289    This class provides dummy implementations for many methods that
 290    derived classes can override selectively; the default implementations
 291    represent a file that cannot be read, written or seeked.
 292
 293    Even though IOBase does not declare read, readinto, or write because
 294    their signatures will vary, implementations and clients should
 295    consider those methods part of the interface. Also, implementations
 296    may raise a IOError when operations they do not support are called.
 297
 298    The basic type used for binary data read from or written to a file is
 299    bytes. bytearrays are accepted too, and in some cases (such as
 300    readinto) needed. Text I/O classes work with str data.
 301
 302    Note that calling any method (even inquiries) on a closed stream is
 303    undefined. Implementations may raise IOError in this case.
 304
 305    IOBase (and its subclasses) support the iterator protocol, meaning
 306    that an IOBase object can be iterated over yielding the lines in a
 307    stream.
 308
 309    IOBase also supports the :keyword:`with` statement. In this example,
 310    fp is closed after the suite of the with statment is complete:
 311
 312    with open('spam.txt', 'r') as fp:
 313        fp.write('Spam and eggs!')
 314    """
 315
 316    __metaclass__ = abc.ABCMeta
 317
 318    ### Internal ###
 319
 320    def _unsupported(self, name):
 321        """Internal: raise an exception for unsupported operations."""
 322        raise UnsupportedOperation("%s.%s() not supported" %
 323                                   (self.__class__.__name__, name))
 324
 325    ### Positioning ###
 326
 327    def seek(self, pos, whence = 0):
 328        """Change stream position.
 329
 330        Change the stream position to byte offset offset. offset is
 331        interpreted relative to the position indicated by whence.  Values
 332        for whence are:
 333
 334        * 0 -- start of stream (the default); offset should be zero or positive
 335        * 1 -- current stream position; offset may be negative
 336        * 2 -- end of stream; offset is usually negative
 337
 338        Return the new absolute position.
 339        """
 340        self._unsupported("seek")
 341
 342    def tell(self):
 343        """Return current stream position."""
 344        return self.seek(0, 1)
 345
 346    def truncate(self, pos = None):
 347        """Truncate file to size bytes.
 348
 349        Size defaults to the current IO position as reported by tell().  Return
 350        the new size.
 351        """
 352        self._unsupported("truncate")
 353
 354    ### Flush and close ###
 355
 356    def flush(self):
 357        """Flush write buffers, if applicable.
 358
 359        This is not implemented for read-only and non-blocking streams.
 360        """
 361        # XXX Should this return the number of bytes written???
 362
 363    __closed = False
 364
 365    def close(self):
 366        """Flush and close the IO object.
 367
 368        This method has no effect if the file is already closed.
 369        """
 370        if not self.__closed:
 371            try:
 372                self.flush()
 373            except IOError:
 374                pass  # If flush() fails, just give up
 375            self.__closed = True
 376
 377    def __del__(self):
 378        """Destructor.  Calls close()."""
 379        # The try/except block is in case this is called at program
 380        # exit time, when it's possible that globals have already been
 381        # deleted, and then the close() call might fail.  Since
 382        # there's nothing we can do about such failures and they annoy
 383        # the end users, we suppress the traceback.
 384        try:
 385            self.close()
 386        except:
 387            pass
 388
 389    ### Inquiries ###
 390
 391    def seekable(self):
 392        """Return whether object supports random access.
 393
 394        If False, seek(), tell() and truncate() will raise IOError.
 395        This method may need to do a test seek().
 396        """
 397        return False
 398
 399    def _checkSeekable(self, msg=None):
 400        """Internal: raise an IOError if file is not seekable
 401        """
 402        if not self.seekable():
 403            raise IOError("File or stream is not seekable."
 404                          if msg is None else msg)
 405
 406
 407    def readable(self):
 408        """Return whether object was opened for reading.
 409
 410        If False, read() will raise IOError.
 411        """
 412        return False
 413
 414    def _checkReadable(self, msg=None):
 415        """Internal: raise an IOError if file is not readable
 416        """
 417        if not self.readable():
 418            raise IOError("File or stream is not readable."
 419                          if msg is None else msg)
 420
 421    def writable(self):
 422        """Return whether object was opened for writing.
 423
 424        If False, write() and truncate() will raise IOError.
 425        """
 426        return False
 427
 428    def _checkWritable(self, msg=None):
 429        """Internal: raise an IOError if file is not writable
 430        """
 431        if not self.writable():
 432            raise IOError("File or stream is not writable."
 433                          if msg is None else msg)
 434
 435    @property
 436    def closed(self):
 437        """closed: bool.  True iff the file has been closed.
 438
 439        For backwards compatibility, this is a property, not a predicate.
 440        """
 441        return self.__closed
 442
 443    def _checkClosed(self, msg=None):
 444        """Internal: raise an ValueError if file is closed
 445        """
 446        if self.closed:
 447            raise ValueError("I/O operation on closed file."
 448                             if msg is None else msg)
 449
 450    ### Context manager ###
 451
 452    def __enter__(self):
 453        """Context management protocol.  Returns self."""
 454        self._checkClosed()
 455        return self
 456
 457    def __exit__(self, *args):
 458        """Context management protocol.  Calls close()"""
 459        self.close()
 460
 461    ### Lower-level APIs ###
 462
 463    # XXX Should these be present even if unimplemented?
 464
 465    def fileno(self):
 466        """Returns underlying file descriptor if one exists.
 467
 468        An IOError is raised if the IO object does not use a file descriptor.
 469        """
 470        self._unsupported("fileno")
 471
 472    def isatty(self):
 473        """Return whether this is an 'interactive' stream.
 474
 475        Return False if it can't be determined.
 476        """
 477        self._checkClosed()
 478        return False
 479
 480    ### Readline[s] and writelines ###
 481
 482    def readline(self, limit = -1):
 483        r"""Read and return a line from the stream.
 484
 485        If limit is specified, at most limit bytes will be read.
 486
 487        The line terminator is always b'\n' for binary files; for text
 488        files, the newlines argument to open can be used to select the line
 489        terminator(s) recognized.
 490        """
 491        self._checkClosed()
 492        if hasattr(self, "peek"):
 493            def nreadahead():
 494                readahead = self.peek(1)
 495                if not readahead:
 496                    return 1
 497                n = (readahead.find(b"\n") + 1) or len(readahead)
 498                if limit >= 0:
 499                    n = min(n, limit)
 500                return n
 501        else:
 502            def nreadahead():
 503                return 1
 504        if limit is None:
 505            limit = -1
 506        if not isinstance(limit, (int, long)):
 507            raise TypeError("limit must be an integer")
 508        res = bytearray()
 509        while limit < 0 or len(res) < limit:
 510            b = self.read(nreadahead())
 511            if not b:
 512                break
 513            res += b
 514            if res.endswith(b"\n"):
 515                break
 516        return bytes(res)
 517
 518    def __iter__(self):
 519        self._checkClosed()
 520        return self
 521
 522    def next(self):
 523        line = self.readline()
 524        if not line:
 525            raise StopIteration
 526        return line
 527
 528    def readlines(self, hint=None):
 529        """Return a list of lines from the stream.
 530
 531        hint can be specified to control the number of lines read: no more
 532        lines will be read if the total size (in bytes/characters) of all
 533        lines so far exceeds hint.
 534        """
 535        if hint is None:
 536            hint = -1
 537        if not isinstance(hint, (int, long)):
 538            raise TypeError("hint must be an integer")
 539        if hint <= 0:
 540            return list(self)
 541        n = 0
 542        lines = []
 543        for line in self:
 544            lines.append(line)
 545            n += len(line)
 546            if n >= hint:
 547                break
 548        return lines
 549
 550    def writelines(self, lines):
 551        self._checkClosed()
 552        for line in lines:
 553            self.write(line)
 554
 555
 556class RawIOBase(IOBase):
 557
 558    """Base class for raw binary I/O."""
 559
 560    # The read() method is implemented by calling readinto(); derived
 561    # classes that want to support read() only need to implement
 562    # readinto() as a primitive operation.  In general, readinto() can be
 563    # more efficient than read().
 564
 565    # (It would be tempting to also provide an implementation of
 566    # readinto() in terms of read(), in case the latter is a more suitable
 567    # primitive operation, but that would lead to nasty recursion in case
 568    # a subclass doesn't implement either.)
 569
 570    def read(self, n = -1):
 571        """Read and return up to n bytes.
 572
 573        Returns an empty bytes array on EOF, or None if the object is
 574        set not to block and has no data to read.
 575        """
 576        if n is None:
 577            n = -1
 578        if n < 0:
 579            return self.readall()
 580        b = bytearray(n.__index__())
 581        n = self.readinto(b)
 582        del b[n:]
 583        return bytes(b)
 584
 585    def readall(self):
 586        """Read until EOF, using multiple read() call."""
 587        res = bytearray()
 588        while True:
 589            data = self.read(DEFAULT_BUFFER_SIZE)
 590            if not data:
 591                break
 592            res += data
 593        return bytes(res)
 594
 595    def readinto(self, b):
 596        """Read up to len(b) bytes into b.
 597
 598        Returns number of bytes read (0 for EOF), or None if the object
 599        is set not to block as has no data to read.
 600        """
 601        self._unsupported("readinto")
 602
 603    def write(self, b):
 604        """Write the given buffer to the IO stream.
 605
 606        Returns the number of bytes written, which may be less than len(b).
 607        """
 608        self._unsupported("write")
 609
 610
 611class FileIO(_fileio._FileIO, RawIOBase):
 612
 613    """Raw I/O implementation for OS files."""
 614
 615    # This multiply inherits from _FileIO and RawIOBase to make
 616    # isinstance(io.FileIO(), io.RawIOBase) return True without requiring
 617    # that _fileio._FileIO inherits from io.RawIOBase (which would be hard
 618    # to do since _fileio.c is written in C).
 619
 620    def __init__(self, name, mode="r", closefd=True):
 621        _fileio._FileIO.__init__(self, name, mode, closefd)
 622        self._name = name
 623
 624    def close(self):
 625        _fileio._FileIO.close(self)
 626        RawIOBase.close(self)
 627
 628    @property
 629    def name(self):
 630        return self._name
 631
 632
 633class BufferedIOBase(IOBase):
 634
 635    """Base class for buffered IO objects.
 636
 637    The main difference with RawIOBase is that the read() method
 638    supports omitting the size argument, and does not have a default
 639    implementation that defers to readinto().
 640
 641    In addition, read(), readinto() and write() may raise
 642    BlockingIOError if the underlying raw stream is in non-blocking
 643    mode and not ready; unlike their raw counterparts, they will never
 644    return None.
 645
 646    A typical implementation should not inherit from a RawIOBase
 647    implementation, but wrap one.
 648    """
 649
 650    def read(self, n = None):
 651        """Read and return up to n bytes.
 652
 653        If the argument is omitted, None, or negative, reads and
 654        returns all data until EOF.
 655
 656        If the argument is positive, and the underlying raw stream is
 657        not 'interactive', multiple raw reads may be issued to satisfy
 658        the byte count (unless EOF is reached first).  But for
 659        interactive raw streams (XXX and for pipes?), at most one raw
 660        read will be issued, and a short result does not imply that
 661        EOF is imminent.
 662
 663        Returns an empty bytes array on EOF.
 664
 665        Raises BlockingIOError if the underlying raw stream has no
 666        data at the moment.
 667        """
 668        self._unsupported("read")
 669
 670    def readinto(self, b):
 671        """Read up to len(b) bytes into b.
 672
 673        Like read(), this may issue multiple reads to the underlying raw
 674        stream, unless the latter is 'interactive'.
 675
 676        Returns the number of bytes read (0 for EOF).
 677
 678        Raises BlockingIOError if the underlying raw stream has no
 679        data at the moment.
 680        """
 681        # XXX This ought to work with anything that supports the buffer API
 682        data = self.read(len(b))
 683        n = len(data)
 684        try:
 685            b[:n] = data
 686        except TypeError as err:
 687            import array
 688            if not isinstance(b, array.array):
 689                raise err
 690            b[:n] = array.array(b'b', data)
 691        return n
 692
 693    def write(self, b):
 694        """Write the given buffer to the IO stream.
 695
 696        Return the number of bytes written, which is never less than
 697        len(b).
 698
 699        Raises BlockingIOError if the buffer is full and the
 700        underlying raw stream cannot accept more data at the moment.
 701        """
 702        self._unsupported("write")
 703
 704
 705class _BufferedIOMixin(BufferedIOBase):
 706
 707    """A mixin implementation of BufferedIOBase with an underlying raw stream.
 708
 709    This passes most requests on to the underlying raw stream.  It
 710    does *not* provide implementations of read(), readinto() or
 711    write().
 712    """
 713
 714    def __init__(self, raw):
 715        self.raw = raw
 716
 717    ### Positioning ###
 718
 719    def seek(self, pos, whence=0):
 720        return self.raw.seek(pos, whence)
 721
 722    def tell(self):
 723        return self.raw.tell()
 724
 725    def truncate(self, pos=None):
 726        # Flush the stream.  We're mixing buffered I/O with lower-level I/O,
 727        # and a flush may be necessary to synch both views of the current
 728        # file state.
 729        self.flush()
 730
 731        if pos is None:
 732            pos = self.tell()
 733        # XXX: Should seek() be used, instead of passing the position
 734        # XXX  directly to truncate?
 735        return self.raw.truncate(pos)
 736
 737    ### Flush and close ###
 738
 739    def flush(self):
 740        self.raw.flush()
 741
 742    def close(self):
 743        if not self.closed:
 744            try:
 745                self.flush()
 746            except IOError:
 747                pass  # If flush() fails, just give up
 748            self.raw.close()
 749
 750    ### Inquiries ###
 751
 752    def seekable(self):
 753        return self.raw.seekable()
 754
 755    def readable(self):
 756        return self.raw.readable()
 757
 758    def writable(self):
 759        return self.raw.writable()
 760
 761    @property
 762    def closed(self):
 763        return self.raw.closed
 764
 765    @property
 766    def name(self):
 767        return self.raw.name
 768
 769    @property
 770    def mode(self):
 771        return self.raw.mode
 772
 773    ### Lower-level APIs ###
 774
 775    def fileno(self):
 776        return self.raw.fileno()
 777
 778    def isatty(self):
 779        return self.raw.isatty()
 780
 781
 782class _BytesIO(BufferedIOBase):
 783
 784    """Buffered I/O implementation using an in-memory bytes buffer."""
 785
 786    # XXX More docs
 787
 788    def __init__(self, initial_bytes=None):
 789        buf = bytearray()
 790        if initial_bytes is not None:
 791            buf += bytearray(initial_bytes)
 792        self._buffer = buf
 793        self._pos = 0
 794
 795    def getvalue(self):
 796        """Return the bytes value (contents) of the buffer
 797        """
 798        if self.closed:
 799            raise ValueError("getvalue on closed file")
 800        return bytes(self._buffer)
 801
 802    def read(self, n=None):
 803        if self.closed:
 804            raise ValueError("read from closed file")
 805        if n is None:
 806            n = -1
 807        if not isinstance(n, (int, long)):
 808            raise TypeError("argument must be an integer")
 809        if n < 0:
 810            n = len(self._buffer)
 811        if len(self._buffer) <= self._pos:
 812            return b""
 813        newpos = min(len(self._buffer), self._pos + n)
 814        b = self._buffer[self._pos : newpos]
 815        self._pos = newpos
 816        return bytes(b)
 817
 818    def read1(self, n):
 819        """this is the same as read.
 820        """
 821        return self.read(n)
 822
 823    def write(self, b):
 824        if self.closed:
 825            raise ValueError("write to closed file")
 826        if isinstance(b, unicode):
 827            raise TypeError("can't write unicode to binary stream")
 828        n = len(b)
 829        if n == 0:
 830            return 0
 831        pos = self._pos
 832        if pos > len(self._buffer):
 833            # Inserts null bytes between the current end of the file
 834            # and the new write position.
 835            padding = b'\x00' * (pos - len(self._buffer))
 836            self._buffer += padding
 837        self._buffer[pos:pos + n] = b
 838        self._pos += n
 839        return n
 840
 841    def seek(self, pos, whence=0):
 842        if self.closed:
 843            raise ValueError("seek on closed file")
 844        try:
 845            pos = pos.__index__()
 846        except AttributeError as err:
 847            raise TypeError("an integer is required") # from err
 848        if whence == 0:
 849            if pos < 0:
 850                raise ValueError("negative seek position %r" % (pos,))
 851            self._pos = pos
 852        elif whence == 1:
 853            self._pos = max(0, self._pos + pos)
 854        elif whence == 2:
 855            self._pos = max(0, len(self._buffer) + pos)
 856        else:
 857            raise ValueError("invalid whence value")
 858        return self._pos
 859
 860    def tell(self):
 861        if self.closed:
 862            raise ValueError("tell on closed file")
 863        return self._pos
 864
 865    def truncate(self, pos=None):
 866        if self.closed:
 867            raise ValueError("truncate on closed file")
 868        if pos is None:
 869            pos = self._pos
 870        elif pos < 0:
 871            raise ValueError("negative truncate position %r" % (pos,))
 872        del self._buffer[pos:]
 873        return self.seek(pos)
 874
 875    def readable(self):
 876        return True
 877
 878    def writable(self):
 879        return True
 880
 881    def seekable(self):
 882        return True
 883
 884# Use the faster implementation of BytesIO if available
 885try:
 886    import _bytesio
 887
 888    class BytesIO(_bytesio._BytesIO, BufferedIOBase):
 889        __doc__ = _bytesio._BytesIO.__doc__
 890
 891except ImportError:
 892    BytesIO = _BytesIO
 893
 894
 895class BufferedReader(_BufferedIOMixin):
 896
 897    """BufferedReader(raw[, buffer_size])
 898
 899    A buffer for a readable, sequential BaseRawIO object.
 900
 901    The constructor creates a BufferedReader for the given readable raw
 902    stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE
 903    is used.
 904    """
 905
 906    def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
 907        """Create a new buffered reader using the given readable raw IO object.
 908        """
 909        raw._checkReadable()
 910        _BufferedIOMixin.__init__(self, raw)
 911        self.buffer_size = buffer_size
 912        self._reset_read_buf()
 913        self._read_lock = threading.Lock()
 914
 915    def _reset_read_buf(self):
 916        self._read_buf = b""
 917        self._read_pos = 0
 918
 919    def read(self, n=None):
 920        """Read n bytes.
 921
 922        Returns exactly n bytes of data unless the underlying raw IO
 923        stream reaches EOF or if the call would block in non-blocking
 924        mode. If n is negative, read until EOF or until read() would
 925        block.
 926        """
 927        with self._read_lock:
 928            return self._read_unlocked(n)
 929
 930    def _read_unlocked(self, n=None):
 931        nodata_val = b""
 932        empty_values = (b"", None)
 933        buf = self._read_buf
 934        pos = self._read_pos
 935
 936        # Special case for when the number of bytes to read is unspecified.
 937        if n is None or n == -1:
 938            self._reset_read_buf()
 939            chunks = [buf[pos:]]  # Strip the consumed bytes.
 940            current_size = 0
 941            while True:
 942                # Read until EOF or until read() would block.
 943                chunk = self.raw.read()
 944                if chunk in empty_values:
 945                    nodata_val = chunk
 946                    break
 947                current_size += len(chunk)
 948                chunks.append(chunk)
 949            return b"".join(chunks) or nodata_val
 950
 951        # The number of bytes to read is specified, return at most n bytes.
 952        avail = len(buf) - pos  # Length of the available buffered data.
 953        if n <= avail:
 954            # Fast path: the data to read is fully buffered.
 955            self._read_pos += n
 956            return buf[pos:pos+n]
 957        # Slow path: read from the stream until enough bytes are read,
 958        # or until an EOF occurs or until read() would block.
 959        chunks = [buf[pos:]]
 960        wanted = max(self.buffer_size, n)
 961        while avail < n:
 962            chunk = self.raw.read(wanted)
 963            if chunk in empty_values:
 964                nodata_val = chunk
 965                break
 966            avail += len(chunk)
 967            chunks.append(chunk)
 968        # n is more then avail only when an EOF occurred or when
 969        # read() would have blocked.
 970        n = min(n, avail)
 971        out = b"".join(chunks)
 972        self._read_buf = out[n:]  # Save the extra data in the buffer.
 973        self._read_pos = 0
 974        return out[:n] if out else nodata_val
 975
 976    def peek(self, n=0):
 977        """Returns buffered bytes without advancing the position.
 978
 979        The argument indicates a desired minimal number of bytes; we
 980        do at most one raw read to satisfy it.  We never return more
 981        than self.buffer_size.
 982        """
 983        with self._read_lock:
 984            return self._peek_unlocked(n)
 985
 986    def _peek_unlocked(self, n=0):
 987        want = min(n, self.buffer_size)
 988        have = len(self._read_buf) - self._read_pos
 989        if have < want:
 990            to_read = self.buffer_size - have
 991            current = self.raw.read(to_read)
 992            if current:
 993                self._read_buf = self._read_buf[self._read_pos:] + current
 994                self._read_pos = 0
 995        return self._read_buf[self._read_pos:]
 996
 997    def read1(self, n):
 998        """Reads up to n bytes, with at most one read() system call."""
 999        # Returns up to n bytes.  If at least one byte is buffered, we
1000        # only return buffered bytes.  Otherwise, we do one raw read.
1001        if n <= 0:
1002            return b""
1003        with self._read_lock:
1004            self._peek_unlocked(1)
1005            return self._read_unlocked(
1006                min(n, len(self._read_buf) - self._read_pos))
1007
1008    def tell(self):
1009        return self.raw.tell() - len(self._read_buf) + self._read_pos
1010
1011    def seek(self, pos, whence=0):
1012        with self._read_lock:
1013            if whence == 1:
1014                pos -= len(self._read_buf) - self._read_pos
1015            pos = self.raw.seek(pos, whence)
1016            self._reset_read_buf()
1017            return pos
1018
1019
1020class BufferedWriter(_BufferedIOMixin):
1021
1022    """A buffer for a writeable sequential RawIO object.
1023
1024    The constructor creates a BufferedWriter for the given writeable raw
1025    stream. If the buffer_size is not given, it defaults to
1026    DEAFULT_BUFFER_SIZE. If max_buffer_size is omitted, it defaults to
1027    twice the buffer size.
1028    """
1029
1030    def __init__(self, raw,
1031                 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1032        raw._checkWritable()
1033        _BufferedIOMixin.__init__(self, raw)
1034        self.buffer_size = buffer_size
1035        self.max_buffer_size = (2*buffer_size
1036                                if max_buffer_size is None
1037                                else max_buffer_size)
1038        self._write_buf = bytearray()
1039        self._write_lock = threading.Lock()
1040
1041    def write(self, b):
1042        if self.closed:
1043            raise ValueError("write to closed file")
1044        if isinstance(b, unicode):
1045            raise TypeError("can't write unicode to binary stream")
1046        with self._write_lock:
1047            # XXX we can implement some more tricks to try and avoid
1048            # partial writes
1049            if len(self._write_buf) > self.buffer_size:
1050                # We're full, so let's pre-flush the buffer
1051                try:
1052                    self._flush_unlocked()
1053                except BlockingIOError as e:
1054                    # We can't accept anything else.
1055                    # XXX Why not just let the exception pass through?
1056                    raise BlockingIOError(e.errno, e.strerror, 0)
1057            before = len(self._write_buf)
1058            self._write_buf.extend(b)
1059            written = len(self._write_buf) - before
1060            if len(self._write_buf) > self.buffer_size:
1061                try:
1062                    self._flush_unlocked()
1063                except BlockingIOError as e:
1064                    if len(self._write_buf) > self.max_buffer_size:
1065                        # We've hit max_buffer_size. We have to accept a
1066                        # partial write and cut back our buffer.
1067                        overage = len(self._write_buf) - self.max_buffer_size
1068                        self._write_buf = self._write_buf[:self.max_buffer_size]
1069                        raise BlockingIOError(e.errno, e.strerror, overage)
1070            return written
1071
1072    def truncate(self, pos=None):
1073        with self._write_lock:
1074            self._flush_unlocked()
1075            if pos is None:
1076                pos = self.raw.tell()
1077            return self.raw.truncate(pos)
1078
1079    def flush(self):
1080        with self._write_lock:
1081            self._flush_unlocked()
1082
1083    def _flush_unlocked(self):
1084        if self.closed:
1085            raise ValueError("flush of closed file")
1086        written = 0
1087        try:
1088            while self._write_buf:
1089                n = self.raw.write(self._write_buf)
1090                del self._write_buf[:n]
1091                written += n
1092        except BlockingIOError as e:
1093            n = e.characters_written
1094            del self._write_buf[:n]
1095            written += n
1096            raise BlockingIOError(e.errno, e.strerror, written)
1097
1098    def tell(self):
1099        return self.raw.tell() + len(self._write_buf)
1100
1101    def seek(self, pos, whence=0):
1102        with self._write_lock:
1103            self._flush_unlocked()
1104            return self.raw.seek(pos, whence)
1105
1106
1107class BufferedRWPair(BufferedIOBase):
1108
1109    """A buffered reader and writer object together.
1110
1111    A buffered reader object and buffered writer object put together to
1112    form a sequential IO object that can read and write. This is typically
1113    used with a socket or two-way pipe.
1114
1115    reader and writer are RawIOBase objects that are readable and
1116    writeable respectively. If the buffer_size is omitted it defaults to
1117    DEFAULT_BUFFER_SIZE. The max_buffer_size (for the buffered writer)
1118    defaults to twice the buffer size.
1119    """
1120
1121    # XXX The usefulness of this (compared to having two separate IO
1122    # objects) is questionable.
1123
1124    def __init__(self, reader, writer,
1125                 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1126        """Constructor.
1127
1128        The arguments are two RawIO instances.
1129        """
1130        reader._checkReadable()
1131        writer._checkWritable()
1132        self.reader = BufferedReader(reader, buffer_size)
1133        self.writer = BufferedWriter(writer, buffer_size, max_buffer_size)
1134
1135    def read(self, n=None):
1136        if n is None:
1137            n = -1
1138        return self.reader.read(n)
1139
1140    def readinto(self, b):
1141        return self.reader.readinto(b)
1142
1143    def write(self, b):
1144        return self.writer.write(b)
1145
1146    def peek(self, n=0):
1147        return self.reader.peek(n)
1148
1149    def read1(self, n):
1150        return self.reader.read1(n)
1151
1152    def readable(self):
1153        return self.reader.readable()
1154
1155    def writable(self):
1156        return self.writer.writable()
1157
1158    def flush(self):
1159        return self.writer.flush()
1160
1161    def close(self):
1162        self.writer.close()
1163        self.reader.close()
1164
1165    def isatty(self):
1166        return self.reader.isatty() or self.writer.isatty()
1167
1168    @property
1169    def closed(self):
1170        return self.writer.closed
1171
1172
1173class BufferedRandom(BufferedWriter, BufferedReader):
1174
1175    """A buffered interface to random access streams.
1176
1177    The constructor creates a reader and writer for a seekable stream,
1178    raw, given in the first argument. If the buffer_size is omitted it
1179    defaults to DEFAULT_BUFFER_SIZE. The max_buffer_size (for the buffered
1180    writer) defaults to twice the buffer size.
1181    """
1182
1183    def __init__(self, raw,
1184                 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1185        raw._checkSeekable()
1186        BufferedReader.__init__(self, raw, buffer_size)
1187        BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size)
1188
1189    def seek(self, pos, whence=0):
1190        self.flush()
1191        # First do the raw seek, then empty the read buffer, so that
1192        # if the raw seek fails, we don't lose buffered data forever.
1193        pos = self.raw.seek(pos, whence)
1194        with self._read_lock:
1195            self._reset_read_buf()
1196        return pos
1197
1198    def tell(self):
1199        if self._write_buf:
1200            return self.raw.tell() + len(self._write_buf)
1201        else:
1202            return BufferedReader.tell(self)
1203
1204    def truncate(self, pos=None):
1205        if pos is None:
1206            pos = self.tell()
1207        # Use seek to flush the read buffer.
1208        self.seek(pos)
1209        return BufferedWriter.truncate(self)
1210
1211    def read(self, n=None):
1212        if n is None:
1213            n = -1
1214        self.flush()
1215        return BufferedReader.read(self, n)
1216
1217    def readinto(self, b):
1218        self.flush()
1219        return BufferedReader.readinto(self, b)
1220
1221    def peek(self, n=0):
1222        self.flush()
1223        return BufferedReader.peek(self, n)
1224
1225    def read1(self, n):
1226        self.flush()
1227        return BufferedReader.read1(self, n)
1228
1229    def write(self, b):
1230        if self._read_buf:
1231            # Undo readahead
1232            with self._read_lock:
1233                self.raw.seek(self._read_pos - len(self._read_buf), 1)
1234                self._reset_read_buf()
1235        return BufferedWriter.write(self, b)
1236
1237
1238class TextIOBase(IOBase):
1239
1240    """Base class for text I/O.
1241
1242    This class provides a character and line based interface to stream
1243    I/O. There is no readinto method because Python's character strings
1244    are immutable. There is no public constructor.
1245    """
1246
1247    def read(self, n = -1):
1248        """Read at most n characters from stream.
1249
1250        Read from underlying buffer until we have n characters or we hit EOF.
1251        If n is negative or omitted, read until EOF.
1252        """
1253        self._unsupported("read")
1254
1255    def write(self, s):
1256        """Write string s to stream."""
1257        self._unsupported("write")
1258
1259    def truncate(self, pos = None):
1260        """Truncate size to pos."""
1261        self._unsupported("truncate")
1262
1263    def readline(self):
1264        """Read until newline or EOF.
1265
1266        Returns an empty string if EOF is hit immediately.
1267        """
1268        self._unsupported("readline")
1269
1270    @property
1271    def encoding(self):
1272        """Subclasses should override."""
1273        return None
1274
1275    @property
1276    def newlines(self):
1277        """Line endings translated so far.
1278
1279        Only line endings translated during reading are considered.
1280
1281        Subclasses should override.
1282        """
1283        return None
1284
1285
1286class IncrementalNewlineDecoder(codecs.IncrementalDecoder):
1287    """Codec used when reading a file in universal newlines mode.
1288    It wraps another incremental decoder, translating \\r\\n and \\r into \\n.
1289    It also records the types of newlines encountered.
1290    When used with translate=False, it ensures that the newline sequence is
1291    returned in one piece.
1292    """
1293    def __init__(self, decoder, translate, errors='strict'):
1294        codecs.IncrementalDecoder.__init__(self, errors=errors)
1295        self.translate = translate
1296        self.decoder = decoder
1297        self.seennl = 0
1298        self.pendingcr = False
1299
1300    def decode(self, input, final=False):
1301        # decode input (with the eventual \r from a previous pass)
1302        output = self.decoder.decode(input, final=final)
1303        if self.pendingcr and (output or final):
1304            output = "\r" + output
1305            self.pendingcr = False
1306
1307        # retain last \r even when not translating data:
1308        # then readline() is sure to get \r\n in one pass
1309        if output.endswith("\r") and not final:
1310            output = output[:-1]
1311            self.pendingcr = True
1312
1313        # Record which newlines are read
1314        crlf = output.count('\r\n')
1315        cr = output.count('\r') - crlf
1316        lf = output.count('\n') - crlf
1317        self.seennl |= (lf and self._LF) | (cr and self._CR) \
1318                    | (crlf and self._CRLF)
1319
1320        if self.translate:
1321            if crlf:
1322                output = output.replace("\r\n", "\n")
1323            if cr:
1324                output = output.replace("\r", "\n")
1325
1326        return output
1327
1328    def getstate(self):
1329        buf, flag = self.decoder.getstate()
1330        flag <<= 1
1331        if self.pendingcr:
1332            flag |= 1
1333        return buf, flag
1334
1335    def setstate(self, state):
1336        buf, flag = state
1337        self.pendingcr = bool(flag & 1)
1338        self.decoder.setstate((buf, flag >> 1))
1339
1340    def reset(self):
1341        self.seennl = 0
1342        self.pendingcr = False
1343        self.decoder.reset()
1344
1345    _LF = 1
1346    _CR = 2
1347    _CRLF = 4
1348
1349    @property
1350    def newlines(self):
1351        return (None,
1352                "\n",
1353                "\r",
1354                ("\r", "\n"),
1355                "\r\n",
1356                ("\n", "\r\n"),
1357                ("\r", "\r\n"),
1358                ("\r", "\n", "\r\n")
1359               )[self.seennl]
1360
1361
1362class TextIOWrapper(TextIOBase):
1363
1364    r"""Character and line based layer over a BufferedIOBase object, buffer.
1365
1366    encoding gives the name of the encoding that the stream will be
1367    decoded or encoded with. It defaults to locale.getpreferredencoding.
1368
1369    errors determines the strictness of encoding and decoding (see the
1370    codecs.register) and defaults to "strict".
1371
1372    newline can be None, '', '\n', '\r', or '\r\n'.  It controls the
1373    handling of line endings. If it is None, universal newlines is
1374    enabled.  With this enabled, on input, the lines endings '\n', '\r',
1375    or '\r\n' are translated to '\n' before being returned to the
1376    caller. Conversely, on output, '\n' is translated to the system
1377    default line separator, os.linesep. If newline is any other of its
1378    legal values, that newline becomes the newline when the file is read
1379    and it is returned untranslated. On output, '\n' is converted to the
1380    newline.
1381
1382    If line_buffering is True, a call to flush is implied when a call to
1383    write contains a newline character.
1384    """
1385
1386    _CHUNK_SIZE = 128
1387
1388    def __init__(self, buffer, encoding=None, errors=None, newline=None,
1389                 line_buffering=False):
1390        if newline not in (None, "", "\n", "\r", "\r\n"):
1391            raise ValueError("illegal newline value: %r" % (newline,))
1392        if encoding is None:
1393            try:
1394                encoding = os.device_encoding(buffer.fileno())
1395            except (AttributeError, UnsupportedOperation):
1396                pass
1397            if encoding is None:
1398                try:
1399                    import locale
1400                except ImportError:
1401                    # Importing locale may fail if Python is being built
1402                    encoding = "ascii"
1403                else:
1404                    encoding = locale.getpreferredencoding()
1405
1406        if not isinstance(encoding, basestring):
1407            raise ValueError("invalid encoding: %r" % encoding)
1408
1409        if errors is None:
1410            errors = "strict"
1411        else:
1412            if not isinstance(errors, basestring):
1413                raise ValueError("invalid errors: %r" % errors)
1414
1415        self.buffer = buffer
1416        self._line_buffering = line_buffering
1417        self._encoding = encoding
1418        self._errors = errors
1419        self._readuniversal = not newline
1420        self._readtranslate = newline is None
1421        self._readnl = newline
1422        self._writetranslate = newline != ''
1423        self._writenl = newline or os.linesep
1424        self._encoder = None
1425        self._decoder = None
1426        self._decoded_chars = ''  # buffer for text returned from decoder
1427        self._decoded_chars_used = 0  # offset into _decoded_chars for read()
1428        self._snapshot = None  # info for reconstructing decoder state
1429        self._seekable = self._telling = self.buffer.seekable()
1430
1431    # self._snapshot is either None, or a tuple (dec_flags, next_input)
1432    # where dec_flags is the second (integer) item of the decoder state
1433    # and next_input is the chunk of input bytes that comes next after the
1434    # snapshot point.  We use this to reconstruct decoder states in tell().
1435
1436    # Naming convention:
1437    #   - "bytes_..." for integer variables that count input bytes
1438    #   - "chars_..." for integer variables that count decoded characters
1439
1440    @property
1441    def encoding(self):
1442        return self._encoding
1443
1444    @property
1445    def errors(self):
1446        return self._errors
1447
1448    @property
1449    def line_buffering(self):
1450        return self._line_buffering
1451
1452    def seekable(self):
1453        return self._seekable
1454
1455    def readable(self):
1456        return self.buffer.readable()
1457
1458    def writable(self):
1459        return self.buffer.writable()
1460
1461    def flush(self):
1462        self.buffer.flush()
1463        self._telling = self._seekable
1464
1465    def close(self):
1466        try:
1467            self.flush()
1468        except:
1469            pass  # If flush() fails, just give up
1470        self.buffer.close()
1471
1472    @property
1473    def closed(self):
1474        return self.buffer.closed
1475
1476    @property
1477    def name(self):
1478        return self.buffer.name
1479
1480    def fileno(self):
1481        return self.buffer.fileno()
1482
1483    def isatty(self):
1484        return self.buffer.isatty()
1485
1486    def write(self, s):
1487        if self.closed:
1488            raise ValueError("write to closed file")
1489        if not isinstance(s, unicode):
1490            raise TypeError("can't write %s to text stream" %
1491                            s.__class__.__name__)
1492        length = len(s)
1493        haslf = (self._writetranslate or self._line_buffering) and "\n" in s
1494        if haslf and self._writetranslate and self._writenl != "\n":
1495            s = s.replace("\n", self._writenl)
1496        encoder = self._encoder or se

Large files files are truncated, but you can click here to view the full file