PageRenderTime 69ms CodeModel.GetById 31ms RepoModel.GetById 1ms app.codeStats 0ms

/pypy/rlib/streamio.py

http://github.com/pypy/pypy
Python | 1250 lines | 1208 code | 16 blank | 26 comment | 15 complexity | 1db4a882b8df7ebbb282bf9077b48ed2 MD5 | raw file
  1. """New standard I/O library.
  2. Based on sio.py from Guido van Rossum.
  3. - This module contains various stream classes which provide a subset of the
  4. classic Python I/O API: read(n), write(s), tell(), seek(offset, whence=0),
  5. readall(), readline(), truncate(size), flush(), close(), peek(),
  6. flushable(), try_to_find_file_descriptor().
  7. - This is not for general usage:
  8. * read(n) may return less than n bytes, just like os.read().
  9. * some other methods also have no default parameters.
  10. * close() should be called exactly once and no further operations performed;
  11. there is no __del__() closing the stream for you.
  12. * some methods may raise MyNotImplementedError.
  13. * peek() returns some (or no) characters that have already been read ahead.
  14. * flushable() returns True/False if flushing that stream is useful/pointless.
  15. - A 'basis stream' provides I/O using a low-level API, like the os, mmap or
  16. socket modules.
  17. - A 'filtering stream' builds on top of another stream. There are filtering
  18. streams for universal newline translation, for unicode translation, and
  19. for buffering.
  20. You typically take a basis stream, place zero or more filtering
  21. streams on top of it, and then top it off with an input-buffering and/or
  22. an outout-buffering stream.
  23. """
  24. #
  25. # File offsets are all 'r_longlong', but a single read or write cannot
  26. # transfer more data that fits in an RPython 'int' (because that would not
  27. # fit in a single string anyway). This module needs to be careful about
  28. # where r_longlong values end up: as argument to seek() and truncate() and
  29. # return value of tell(), but not as argument to read().
  30. #
  31. import os, sys, errno
  32. from pypy.rlib.objectmodel import specialize, we_are_translated
  33. from pypy.rlib.rarithmetic import r_longlong, intmask
  34. from pypy.rlib import rposix
  35. from os import O_RDONLY, O_WRONLY, O_RDWR, O_CREAT, O_TRUNC
  36. O_BINARY = getattr(os, "O_BINARY", 0)
  37. # (basemode, plus)
  38. OS_MODE = {('r', False): O_RDONLY,
  39. ('r', True): O_RDWR,
  40. ('w', False): O_WRONLY | O_CREAT | O_TRUNC,
  41. ('w', True): O_RDWR | O_CREAT | O_TRUNC,
  42. ('a', False): O_WRONLY | O_CREAT,
  43. ('a', True): O_RDWR | O_CREAT,
  44. }
  45. class MyNotImplementedError(Exception):
  46. """
  47. Catching NotImplementedError is not RPython, so we use this custom class
  48. instead of it
  49. """
  50. # ____________________________________________________________
  51. def replace_crlf_with_lf(s):
  52. substrings = s.split("\r")
  53. result = [substrings[0]]
  54. for substring in substrings[1:]:
  55. if not substring:
  56. result.append("")
  57. elif substring[0] == "\n":
  58. result.append(substring[1:])
  59. else:
  60. result.append(substring)
  61. return "\n".join(result)
  62. def replace_char_with_str(string, c, s):
  63. return s.join(string.split(c))
  64. @specialize.argtype(0)
  65. def open_file_as_stream(path, mode="r", buffering=-1):
  66. os_flags, universal, reading, writing, basemode, binary = decode_mode(mode)
  67. stream = open_path_helper(path, os_flags, basemode == "a")
  68. return construct_stream_tower(stream, buffering, universal, reading,
  69. writing, binary)
  70. def _setfd_binary(fd):
  71. pass
  72. def fdopen_as_stream(fd, mode, buffering=-1):
  73. # XXX XXX XXX you want do check whether the modes are compatible
  74. # otherwise you get funny results
  75. os_flags, universal, reading, writing, basemode, binary = decode_mode(mode)
  76. _setfd_binary(fd)
  77. stream = DiskFile(fd)
  78. return construct_stream_tower(stream, buffering, universal, reading,
  79. writing, binary)
  80. @specialize.argtype(0)
  81. def open_path_helper(path, os_flags, append):
  82. # XXX for now always return DiskFile
  83. fd = rposix.open(path, os_flags, 0666)
  84. if append:
  85. try:
  86. os.lseek(fd, 0, 2)
  87. except OSError:
  88. # XXX does this pass make sense?
  89. pass
  90. return DiskFile(fd)
  91. def decode_mode(mode):
  92. if mode[0] == 'U':
  93. mode = 'r' + mode
  94. basemode = mode[0] # 'r', 'w' or 'a'
  95. plus = False
  96. universal = False
  97. binary = False
  98. for c in mode[1:]:
  99. if c == '+':
  100. plus = True
  101. elif c == 'U':
  102. universal = True
  103. elif c == 'b':
  104. binary = True
  105. else:
  106. break
  107. flag = OS_MODE[basemode, plus]
  108. flag |= O_BINARY
  109. reading = basemode == 'r' or plus
  110. writing = basemode != 'r' or plus
  111. return flag, universal, reading, writing, basemode, binary
  112. def construct_stream_tower(stream, buffering, universal, reading, writing,
  113. binary):
  114. if buffering == 0: # no buffering
  115. if reading: # force some minimal buffering for readline()
  116. stream = ReadlineInputStream(stream)
  117. elif buffering == 1: # line-buffering
  118. if writing:
  119. stream = LineBufferingOutputStream(stream)
  120. if reading:
  121. stream = BufferingInputStream(stream)
  122. else: # default or explicit buffer sizes
  123. if buffering is not None and buffering < 0:
  124. buffering = -1
  125. if writing:
  126. stream = BufferingOutputStream(stream, buffering)
  127. if reading:
  128. stream = BufferingInputStream(stream, buffering)
  129. if universal: # Wants universal newlines
  130. if writing and os.linesep != '\n':
  131. stream = TextOutputFilter(stream)
  132. if reading:
  133. stream = TextInputFilter(stream)
  134. elif not binary and os.linesep == '\r\n':
  135. stream = TextCRLFFilter(stream)
  136. return stream
  137. class StreamError(Exception):
  138. def __init__(self, message):
  139. self.message = message
  140. StreamErrors = (OSError, StreamError) # errors that can generally be raised
  141. if sys.platform == "win32":
  142. from pypy.rlib.rwin32 import BOOL, HANDLE, get_osfhandle, GetLastError
  143. from pypy.translator.tool.cbuild import ExternalCompilationInfo
  144. from pypy.rpython.lltypesystem import rffi
  145. _eci = ExternalCompilationInfo()
  146. _setmode = rffi.llexternal('_setmode', [rffi.INT, rffi.INT], rffi.INT,
  147. compilation_info=_eci)
  148. SetEndOfFile = rffi.llexternal('SetEndOfFile', [HANDLE], BOOL,
  149. compilation_info=_eci)
  150. # HACK: These implementations are specific to MSVCRT and the C backend.
  151. # When generating on CLI or JVM, these are patched out.
  152. # See PyPyTarget.target() in targetpypystandalone.py
  153. def _setfd_binary(fd):
  154. #Allow this to succeed on invalid fd's
  155. if rposix.is_valid_fd(fd):
  156. _setmode(fd, os.O_BINARY)
  157. def ftruncate_win32(fd, size):
  158. curpos = os.lseek(fd, 0, 1)
  159. try:
  160. # move to the position to be truncated
  161. os.lseek(fd, size, 0)
  162. # Truncate. Note that this may grow the file!
  163. handle = get_osfhandle(fd)
  164. if not SetEndOfFile(handle):
  165. raise WindowsError(GetLastError(),
  166. "Could not truncate file")
  167. finally:
  168. # we restore the file pointer position in any case
  169. os.lseek(fd, curpos, 0)
  170. class Stream(object):
  171. """Base class for streams. Provides a default implementation of
  172. some methods."""
  173. def read(self, n):
  174. raise MyNotImplementedError
  175. def write(self, data):
  176. raise MyNotImplementedError
  177. def tell(self):
  178. raise MyNotImplementedError
  179. def seek(self, offset, whence):
  180. raise MyNotImplementedError
  181. def readall(self):
  182. bufsize = 8192
  183. result = []
  184. while True:
  185. data = self.read(bufsize)
  186. if not data:
  187. break
  188. result.append(data)
  189. if bufsize < 4194304: # 4 Megs
  190. bufsize <<= 1
  191. return ''.join(result)
  192. def readline(self):
  193. # very inefficient unless there is a peek()
  194. result = []
  195. while True:
  196. # "peeks" on the underlying stream to see how many characters
  197. # we can safely read without reading past an end-of-line
  198. peeked = self.peek()
  199. pn = peeked.find("\n")
  200. if pn < 0:
  201. pn = len(peeked)
  202. c = self.read(pn + 1)
  203. if not c:
  204. break
  205. result.append(c)
  206. if c.endswith('\n'):
  207. break
  208. return ''.join(result)
  209. def truncate(self, size):
  210. raise MyNotImplementedError
  211. def flush_buffers(self):
  212. pass
  213. def flush(self):
  214. pass
  215. def flushable(self):
  216. return False
  217. def close(self):
  218. pass
  219. def peek(self):
  220. return ''
  221. def try_to_find_file_descriptor(self):
  222. return -1
  223. def getnewlines(self):
  224. return 0
  225. class DiskFile(Stream):
  226. """Standard I/O basis stream using os.open/close/read/write/lseek"""
  227. def __init__(self, fd):
  228. self.fd = fd
  229. def seek(self, offset, whence):
  230. os.lseek(self.fd, offset, whence)
  231. def tell(self):
  232. return os.lseek(self.fd, 0, 1)
  233. def read(self, n):
  234. assert isinstance(n, int)
  235. return os.read(self.fd, n)
  236. def write(self, data):
  237. while data:
  238. n = os.write(self.fd, data)
  239. data = data[n:]
  240. def close(self):
  241. os.close(self.fd)
  242. if sys.platform == "win32":
  243. def truncate(self, size):
  244. ftruncate_win32(self.fd, size)
  245. else:
  246. def truncate(self, size):
  247. # Note: for consistency, in translated programs a failing
  248. # os.ftruncate() raises OSError. However, on top of
  249. # CPython, we get an IOError. As it is (as far as I know)
  250. # the only place that have this behavior, we just convert it
  251. # to an OSError instead of adding IOError to StreamErrors.
  252. if we_are_translated():
  253. os.ftruncate(self.fd, size)
  254. else:
  255. try:
  256. os.ftruncate(self.fd, size)
  257. except IOError, e:
  258. raise OSError(*e.args)
  259. def try_to_find_file_descriptor(self):
  260. return self.fd
  261. # next class is not RPython
  262. class MMapFile(Stream):
  263. """Standard I/O basis stream using mmap."""
  264. def __init__(self, fd, mmapaccess):
  265. """NOT_RPYTHON"""
  266. self.fd = fd
  267. self.access = mmapaccess
  268. self.pos = 0
  269. self.remapfile()
  270. def remapfile(self):
  271. import mmap
  272. size = os.fstat(self.fd).st_size
  273. self.mm = mmap.mmap(self.fd, size, access=self.access)
  274. def close(self):
  275. self.mm.close()
  276. os.close(self.fd)
  277. def tell(self):
  278. return self.pos
  279. def seek(self, offset, whence):
  280. if whence == 0:
  281. self.pos = max(0, offset)
  282. elif whence == 1:
  283. self.pos = max(0, self.pos + offset)
  284. elif whence == 2:
  285. self.pos = max(0, self.mm.size() + offset)
  286. else:
  287. raise StreamError("seek(): whence must be 0, 1 or 2")
  288. def readall(self):
  289. filesize = self.mm.size() # Actual file size, may be more than mapped
  290. n = filesize - self.pos
  291. data = self.mm[self.pos:]
  292. if len(data) < n:
  293. del data
  294. # File grew since opened; remap to get the new data
  295. self.remapfile()
  296. data = self.mm[self.pos:]
  297. self.pos += len(data)
  298. return data
  299. def read(self, n):
  300. assert isinstance(n, int)
  301. end = self.pos + n
  302. data = self.mm[self.pos:end]
  303. if not data:
  304. # is there more data to read?
  305. filesize = self.mm.size() #Actual file size, may be more than mapped
  306. if filesize > self.pos:
  307. # File grew since opened; remap to get the new data
  308. self.remapfile()
  309. data = self.mm[self.pos:end]
  310. self.pos += len(data)
  311. return data
  312. def readline(self):
  313. hit = self.mm.find("\n", self.pos) + 1
  314. if not hit:
  315. # is there more data to read?
  316. filesize = self.mm.size() #Actual file size, may be more than mapped
  317. if filesize > len(self.mm):
  318. # File grew since opened; remap to get the new data
  319. self.remapfile()
  320. hit = self.mm.find("\n", self.pos) + 1
  321. if hit:
  322. # Got a whole line
  323. data = self.mm[self.pos:hit]
  324. self.pos = hit
  325. else:
  326. # Read whatever we've got -- may be empty
  327. data = self.mm[self.pos:]
  328. self.pos += len(data)
  329. return data
  330. def write(self, data):
  331. end = self.pos + len(data)
  332. try:
  333. self.mm[self.pos:end] = data
  334. # This can raise IndexError on Windows, ValueError on Unix
  335. except (IndexError, ValueError):
  336. # XXX On Unix, this resize() call doesn't work
  337. self.mm.resize(end)
  338. self.mm[self.pos:end] = data
  339. self.pos = end
  340. def flush(self):
  341. self.mm.flush()
  342. def flushable(self):
  343. import mmap
  344. return self.access == mmap.ACCESS_WRITE
  345. def try_to_find_file_descriptor(self):
  346. return self.fd
  347. # ____________________________________________________________
  348. STREAM_METHODS = dict([
  349. ("read", [int]),
  350. ("write", [str]),
  351. ("tell", []),
  352. ("seek", [r_longlong, int]),
  353. ("readall", []),
  354. ("readline", []),
  355. ("truncate", [r_longlong]),
  356. ("flush", []),
  357. ("flushable", []),
  358. ("close", []),
  359. ("peek", []),
  360. ("try_to_find_file_descriptor", []),
  361. ("getnewlines", []),
  362. ])
  363. def PassThrough(meth_name, flush_buffers):
  364. if meth_name in STREAM_METHODS:
  365. signature = STREAM_METHODS[meth_name]
  366. args = ", ".join(["v%s" % (i, ) for i in range(len(signature))])
  367. else:
  368. assert 0, "not a good idea"
  369. args = "*args"
  370. if flush_buffers:
  371. code = """def %s(self, %s):
  372. self.flush_buffers()
  373. return self.base.%s(%s)
  374. """
  375. else:
  376. code = """def %s(self, %s):
  377. return self.base.%s(%s)
  378. """
  379. d = {}
  380. exec code % (meth_name, args, meth_name, args) in d
  381. return d[meth_name]
  382. def offset2int(offset):
  383. intoffset = intmask(offset)
  384. if intoffset != offset:
  385. raise StreamError("seek() from a non-seekable source:"
  386. " this would read and discard more"
  387. " than sys.maxint bytes")
  388. return intoffset
  389. class BufferingInputStream(Stream):
  390. """Standard buffering input stream.
  391. This, and BufferingOutputStream if needed, are typically at the top of
  392. the stack of streams.
  393. """
  394. bigsize = 2**19 # Half a Meg
  395. bufsize = 2**13 # 8 K
  396. def __init__(self, base, bufsize=-1):
  397. self.base = base
  398. self.do_read = base.read # function to fill buffer some more
  399. self.do_tell = base.tell # return a byte offset
  400. self.do_seek = base.seek # seek to a byte offset
  401. if bufsize == -1: # Get default from the class
  402. bufsize = self.bufsize
  403. self.bufsize = bufsize # buffer size (hint only)
  404. self.buf = "" # raw data
  405. self.pos = 0
  406. def flush_buffers(self):
  407. if self.buf:
  408. try:
  409. self.do_seek(self.tell(), 0)
  410. except MyNotImplementedError:
  411. pass
  412. else:
  413. self.buf = ""
  414. self.pos = 0
  415. def tell(self):
  416. tellpos = self.do_tell() # This may fail
  417. offset = len(self.buf) - self.pos
  418. assert tellpos >= offset #, (locals(), self.__dict__)
  419. return tellpos - offset
  420. def seek(self, offset, whence):
  421. # This may fail on the do_seek() or do_tell() call.
  422. # But it won't call either on a relative forward seek.
  423. # Nor on a seek to the very end.
  424. if whence == 0:
  425. self.do_seek(offset, 0)
  426. self.buf = ""
  427. self.pos = 0
  428. return
  429. if whence == 1:
  430. currentsize = len(self.buf) - self.pos
  431. if offset < 0:
  432. if self.pos + offset >= 0:
  433. self.pos += offset
  434. else:
  435. self.do_seek(self.tell() + offset, 0)
  436. self.pos = 0
  437. self.buf = ""
  438. return
  439. elif offset <= currentsize:
  440. self.pos += offset
  441. return
  442. self.buf = ""
  443. self.pos = 0
  444. offset -= currentsize
  445. try:
  446. self.do_seek(offset, 1)
  447. except MyNotImplementedError:
  448. intoffset = offset2int(offset)
  449. self.read(intoffset)
  450. return
  451. if whence == 2:
  452. try:
  453. self.do_seek(offset, 2)
  454. except MyNotImplementedError:
  455. pass
  456. else:
  457. self.pos = 0
  458. self.buf = ""
  459. return
  460. # Skip relative to EOF by reading and saving only just as
  461. # much as needed
  462. intoffset = offset2int(offset)
  463. pos = self.pos
  464. assert pos >= 0
  465. buffers = [self.buf[pos:]]
  466. total = len(buffers[0])
  467. self.buf = ""
  468. self.pos = 0
  469. while 1:
  470. data = self.do_read(self.bufsize)
  471. if not data:
  472. break
  473. buffers.append(data)
  474. total += len(data)
  475. while buffers and total >= len(buffers[0]) - intoffset:
  476. total -= len(buffers[0])
  477. del buffers[0]
  478. cutoff = total + intoffset
  479. if cutoff < 0:
  480. raise StreamError("cannot seek back")
  481. if buffers:
  482. assert cutoff >= 0
  483. buffers[0] = buffers[0][cutoff:]
  484. self.buf = "".join(buffers)
  485. return
  486. raise StreamError("whence should be 0, 1 or 2")
  487. def readall(self):
  488. pos = self.pos
  489. assert pos >= 0
  490. if self.buf:
  491. chunks = [self.buf[pos:]]
  492. else:
  493. chunks = []
  494. self.buf = ""
  495. self.pos = 0
  496. bufsize = self.bufsize
  497. while 1:
  498. try:
  499. data = self.do_read(bufsize)
  500. except OSError, o:
  501. if o.errno != errno.EAGAIN:
  502. raise
  503. if not chunks:
  504. raise
  505. break
  506. if not data:
  507. break
  508. chunks.append(data)
  509. bufsize = min(bufsize*2, self.bigsize)
  510. return "".join(chunks)
  511. def read(self, n=-1):
  512. assert isinstance(n, int)
  513. if n < 0:
  514. return self.readall()
  515. currentsize = len(self.buf) - self.pos
  516. start = self.pos
  517. assert start >= 0
  518. if n <= currentsize:
  519. stop = start + n
  520. assert stop >= 0
  521. result = self.buf[start:stop]
  522. self.pos += n
  523. return result
  524. else:
  525. chunks = [self.buf[start:]]
  526. while 1:
  527. self.buf = self.do_read(self.bufsize)
  528. if not self.buf:
  529. self.pos = 0
  530. break
  531. currentsize += len(self.buf)
  532. if currentsize >= n:
  533. self.pos = len(self.buf) - (currentsize - n)
  534. stop = self.pos
  535. assert stop >= 0
  536. chunks.append(self.buf[:stop])
  537. break
  538. chunks.append(self.buf)
  539. return ''.join(chunks)
  540. def readline(self):
  541. pos = self.pos
  542. assert pos >= 0
  543. i = self.buf.find("\n", pos)
  544. start = self.pos
  545. assert start >= 0
  546. if i >= 0: # new line found
  547. i += 1
  548. result = self.buf[start:i]
  549. self.pos = i
  550. return result
  551. temp = self.buf[start:]
  552. # read one buffer and most of the time a new line will be found
  553. self.buf = self.do_read(self.bufsize)
  554. i = self.buf.find("\n")
  555. if i >= 0: # new line found
  556. i += 1
  557. result = temp + self.buf[:i]
  558. self.pos = i
  559. return result
  560. if not self.buf:
  561. self.pos = 0
  562. return temp
  563. # need to keep getting data until we find a new line
  564. chunks = [temp, self.buf]
  565. while 1:
  566. self.buf = self.do_read(self.bufsize)
  567. if not self.buf:
  568. self.pos = 0
  569. break
  570. i = self.buf.find("\n")
  571. if i >= 0:
  572. i += 1
  573. chunks.append(self.buf[:i])
  574. self.pos = i
  575. break
  576. chunks.append(self.buf)
  577. return "".join(chunks)
  578. def peek(self):
  579. pos = self.pos
  580. assert pos >= 0
  581. return self.buf[pos:]
  582. write = PassThrough("write", flush_buffers=True)
  583. truncate = PassThrough("truncate", flush_buffers=True)
  584. flush = PassThrough("flush", flush_buffers=True)
  585. flushable = PassThrough("flushable", flush_buffers=False)
  586. close = PassThrough("close", flush_buffers=False)
  587. try_to_find_file_descriptor = PassThrough("try_to_find_file_descriptor",
  588. flush_buffers=False)
  589. class ReadlineInputStream(Stream):
  590. """Minimal buffering input stream.
  591. Only does buffering for readline(). The other kinds of reads, and
  592. all writes, are not buffered at all.
  593. """
  594. bufsize = 2**13 # 8 K
  595. def __init__(self, base, bufsize=-1):
  596. self.base = base
  597. self.do_read = base.read # function to fill buffer some more
  598. self.do_seek = base.seek # seek to a byte offset
  599. if bufsize == -1: # Get default from the class
  600. bufsize = self.bufsize
  601. self.bufsize = bufsize # buffer size (hint only)
  602. self.buf = None # raw data (may contain "\n")
  603. self.bufstart = 0
  604. def flush_buffers(self):
  605. if self.buf is not None:
  606. try:
  607. self.do_seek(self.bufstart-len(self.buf), 1)
  608. except MyNotImplementedError:
  609. pass
  610. else:
  611. self.buf = None
  612. self.bufstart = 0
  613. def readline(self):
  614. if self.buf is not None:
  615. i = self.buf.find('\n', self.bufstart)
  616. else:
  617. self.buf = ''
  618. i = -1
  619. #
  620. if i < 0:
  621. self.buf = self.buf[self.bufstart:]
  622. self.bufstart = 0
  623. while True:
  624. bufsize = max(self.bufsize, len(self.buf) >> 2)
  625. data = self.do_read(bufsize)
  626. if not data:
  627. result = self.buf # end-of-file reached
  628. self.buf = None
  629. return result
  630. startsearch = len(self.buf) # there is no '\n' in buf so far
  631. self.buf += data
  632. i = self.buf.find('\n', startsearch)
  633. if i >= 0:
  634. break
  635. #
  636. i += 1
  637. result = self.buf[self.bufstart:i]
  638. self.bufstart = i
  639. return result
  640. def peek(self):
  641. if self.buf is None:
  642. return ''
  643. if self.bufstart > 0:
  644. self.buf = self.buf[self.bufstart:]
  645. self.bufstart = 0
  646. return self.buf
  647. def tell(self):
  648. pos = self.base.tell()
  649. if self.buf is not None:
  650. pos -= (len(self.buf) - self.bufstart)
  651. return pos
  652. def readall(self):
  653. result = self.base.readall()
  654. if self.buf is not None:
  655. result = self.buf[self.bufstart:] + result
  656. self.buf = None
  657. self.bufstart = 0
  658. return result
  659. def read(self, n):
  660. if self.buf is None:
  661. return self.do_read(n)
  662. else:
  663. m = n - (len(self.buf) - self.bufstart)
  664. start = self.bufstart
  665. if m > 0:
  666. result = self.buf[start:] + self.do_read(m)
  667. self.buf = None
  668. self.bufstart = 0
  669. return result
  670. elif n >= 0:
  671. self.bufstart = start + n
  672. return self.buf[start : self.bufstart]
  673. else:
  674. return ''
  675. seek = PassThrough("seek", flush_buffers=True)
  676. write = PassThrough("write", flush_buffers=True)
  677. truncate = PassThrough("truncate", flush_buffers=True)
  678. flush = PassThrough("flush", flush_buffers=True)
  679. flushable = PassThrough("flushable", flush_buffers=False)
  680. close = PassThrough("close", flush_buffers=False)
  681. try_to_find_file_descriptor = PassThrough("try_to_find_file_descriptor",
  682. flush_buffers=False)
  683. class BufferingOutputStream(Stream):
  684. """Standard buffering output stream.
  685. This, and BufferingInputStream if needed, are typically at the top of
  686. the stack of streams.
  687. """
  688. bigsize = 2**19 # Half a Meg
  689. bufsize = 2**13 # 8 K
  690. def __init__(self, base, bufsize=-1):
  691. self.base = base
  692. self.do_write = base.write # write more data
  693. self.do_tell = base.tell # return a byte offset
  694. if bufsize == -1: # Get default from the class
  695. bufsize = self.bufsize
  696. self.bufsize = bufsize # buffer size (hint only)
  697. self.buf = []
  698. self.buflen = 0
  699. def flush_buffers(self):
  700. if self.buf:
  701. self.do_write(''.join(self.buf))
  702. self.buf = []
  703. self.buflen = 0
  704. def tell(self):
  705. return self.do_tell() + self.buflen
  706. def write(self, data):
  707. buflen = self.buflen
  708. datalen = len(data)
  709. if datalen + buflen < self.bufsize:
  710. self.buf.append(data)
  711. self.buflen += datalen
  712. elif buflen:
  713. self.buf.append(data)
  714. self.do_write(''.join(self.buf))
  715. self.buf = []
  716. self.buflen = 0
  717. else:
  718. self.do_write(data)
  719. read = PassThrough("read", flush_buffers=True)
  720. readall = PassThrough("readall", flush_buffers=True)
  721. readline = PassThrough("readline", flush_buffers=True)
  722. seek = PassThrough("seek", flush_buffers=True)
  723. truncate = PassThrough("truncate", flush_buffers=True)
  724. flush = PassThrough("flush", flush_buffers=True)
  725. close = PassThrough("close", flush_buffers=True)
  726. try_to_find_file_descriptor = PassThrough("try_to_find_file_descriptor",
  727. flush_buffers=False)
  728. def flushable(self):
  729. return True
  730. class LineBufferingOutputStream(BufferingOutputStream):
  731. """Line buffering output stream.
  732. This is typically the top of the stack.
  733. """
  734. def write(self, data):
  735. p = data.rfind('\n') + 1
  736. assert p >= 0
  737. if self.buflen + len(data) < self.bufsize:
  738. if p == 0:
  739. self.buf.append(data)
  740. self.buflen += len(data)
  741. else:
  742. if self.buflen:
  743. self.do_write(''.join(self.buf))
  744. self.do_write(data[:p])
  745. self.buf = [data[p:]]
  746. self.buflen = len(self.buf[0])
  747. else:
  748. if self.buflen + p < self.bufsize:
  749. p = self.bufsize - self.buflen
  750. if self.buflen:
  751. self.do_write(''.join(self.buf))
  752. assert p >= 0
  753. self.do_write(data[:p])
  754. self.buf = [data[p:]]
  755. self.buflen = len(self.buf[0])
  756. # ____________________________________________________________
  757. class CRLFFilter(Stream):
  758. """Filtering stream for universal newlines.
  759. TextInputFilter is more general, but this is faster when you don't
  760. need tell/seek.
  761. """
  762. def __init__(self, base):
  763. self.base = base
  764. self.do_read = base.read
  765. self.atcr = False
  766. def read(self, n):
  767. data = self.do_read(n)
  768. if self.atcr:
  769. if data.startswith("\n"):
  770. data = data[1:] # Very rare case: in the middle of "\r\n"
  771. self.atcr = False
  772. if "\r" in data:
  773. self.atcr = data.endswith("\r") # Test this before removing \r
  774. data = replace_crlf_with_lf(data)
  775. return data
  776. flush = PassThrough("flush", flush_buffers=False)
  777. flushable= PassThrough("flushable", flush_buffers=False)
  778. close = PassThrough("close", flush_buffers=False)
  779. try_to_find_file_descriptor = PassThrough("try_to_find_file_descriptor",
  780. flush_buffers=False)
  781. class TextCRLFFilter(Stream):
  782. """Filtering stream for universal newlines.
  783. TextInputFilter is more general, but this is faster when you don't
  784. need tell/seek.
  785. """
  786. def __init__(self, base):
  787. self.base = base
  788. self.do_read = base.read
  789. self.do_write = base.write
  790. self.do_flush = base.flush_buffers
  791. self.lfbuffer = ""
  792. def read(self, n):
  793. data = self.lfbuffer + self.do_read(n)
  794. self.lfbuffer = ""
  795. if data.endswith("\r"):
  796. c = self.do_read(1)
  797. if c and c[0] == '\n':
  798. data = data + '\n'
  799. self.lfbuffer = c[1:]
  800. else:
  801. self.lfbuffer = c
  802. result = []
  803. offset = 0
  804. while True:
  805. newoffset = data.find('\r\n', offset)
  806. if newoffset < 0:
  807. result.append(data[offset:])
  808. break
  809. result.append(data[offset:newoffset])
  810. offset = newoffset + 2
  811. return '\n'.join(result)
  812. def tell(self):
  813. pos = self.base.tell()
  814. return pos - len(self.lfbuffer)
  815. def seek(self, offset, whence):
  816. if whence == 1:
  817. offset -= len(self.lfbuffer) # correct for already-read-ahead character
  818. self.base.seek(offset, whence)
  819. self.lfbuffer = ""
  820. def flush_buffers(self):
  821. if self.lfbuffer:
  822. self.base.seek(-len(self.lfbuffer), 1)
  823. self.lfbuffer = ""
  824. self.do_flush()
  825. def write(self, data):
  826. data = replace_char_with_str(data, '\n', '\r\n')
  827. self.flush_buffers()
  828. self.do_write(data)
  829. truncate = PassThrough("truncate", flush_buffers=True)
  830. flush = PassThrough("flush", flush_buffers=False)
  831. flushable= PassThrough("flushable", flush_buffers=False)
  832. close = PassThrough("close", flush_buffers=False)
  833. try_to_find_file_descriptor = PassThrough("try_to_find_file_descriptor",
  834. flush_buffers=False)
  835. class TextInputFilter(Stream):
  836. """Filtering input stream for universal newline translation."""
  837. def __init__(self, base):
  838. self.base = base # must implement read, may implement tell, seek
  839. self.do_read = base.read
  840. self.atcr = False # Set when last char read was \r
  841. self.buf = "" # Optional one-character read-ahead buffer
  842. self.CR = False
  843. self.NL = False
  844. self.CRLF = False
  845. def getnewlines(self):
  846. return self.CR * 1 + self.NL * 2 + self.CRLF * 4
  847. def read(self, n):
  848. """Read up to n bytes."""
  849. if self.buf:
  850. assert not self.atcr
  851. data = self.buf
  852. self.buf = ""
  853. else:
  854. data = self.do_read(n)
  855. # The following whole ugly mess is because we need to keep track of
  856. # exactly which line separators we have seen for self.newlines,
  857. # grumble, grumble. This has an interesting corner-case.
  858. #
  859. # Consider a file consisting of exactly one line ending with '\r'.
  860. # The first time you read(), you will not know whether it is a
  861. # CR separator or half of a CRLF separator. Neither will be marked
  862. # as seen, since you are waiting for your next read to determine
  863. # what you have seen. But there's no more to read ...
  864. if self.atcr:
  865. if data.startswith("\n"):
  866. data = data[1:]
  867. self.CRLF = True
  868. if not data:
  869. data = self.do_read(n)
  870. else:
  871. self.CR = True
  872. self.atcr = False
  873. for i in range(len(data)):
  874. if data[i] == '\n':
  875. if i > 0 and data[i-1] == '\r':
  876. self.CRLF = True
  877. else:
  878. self.NL = True
  879. elif data[i] == '\r':
  880. if i < len(data)-1 and data[i+1] != '\n':
  881. self.CR = True
  882. if "\r" in data:
  883. self.atcr = data.endswith("\r")
  884. data = replace_crlf_with_lf(data)
  885. return data
  886. def readline(self):
  887. result = []
  888. while True:
  889. # "peeks" on the underlying stream to see how many characters
  890. # we can safely read without reading past an end-of-line
  891. peeked = self.base.peek()
  892. pn = peeked.find("\n")
  893. pr = peeked.find("\r")
  894. if pn < 0: pn = len(peeked)
  895. if pr < 0: pr = len(peeked)
  896. c = self.read(min(pn, pr) + 1)
  897. if not c:
  898. break
  899. result.append(c)
  900. if c.endswith('\n'):
  901. break
  902. return ''.join(result)
  903. def seek(self, offset, whence):
  904. """Seeks based on knowledge that does not come from a tell()
  905. may go to the wrong place, since the number of
  906. characters seen may not match the number of characters
  907. that are actually in the file (where \r\n is the
  908. line separator). Arithmetics on the result
  909. of a tell() that moves beyond a newline character may in the
  910. same way give the wrong result.
  911. """
  912. if whence == 1:
  913. offset -= len(self.buf) # correct for already-read-ahead character
  914. self.base.seek(offset, whence)
  915. self.atcr = False
  916. self.buf = ""
  917. def tell(self):
  918. pos = self.base.tell()
  919. if self.atcr:
  920. # Must read the next byte to see if it's \n,
  921. # because then we must report the next position.
  922. assert not self.buf
  923. self.buf = self.do_read(1)
  924. pos += 1
  925. self.atcr = False
  926. if self.buf == "\n":
  927. self.CRLF = True
  928. self.buf = ""
  929. return pos - len(self.buf)
  930. def flush_buffers(self):
  931. if self.atcr:
  932. assert not self.buf
  933. self.buf = self.do_read(1)
  934. self.atcr = False
  935. if self.buf == "\n":
  936. self.buf = ""
  937. if self.buf:
  938. try:
  939. self.base.seek(-len(self.buf), 1)
  940. except MyNotImplementedError:
  941. pass
  942. else:
  943. self.buf = ""
  944. def peek(self):
  945. return self.buf
  946. write = PassThrough("write", flush_buffers=True)
  947. truncate = PassThrough("truncate", flush_buffers=True)
  948. flush = PassThrough("flush", flush_buffers=True)
  949. flushable = PassThrough("flushable", flush_buffers=False)
  950. close = PassThrough("close", flush_buffers=False)
  951. try_to_find_file_descriptor = PassThrough("try_to_find_file_descriptor",
  952. flush_buffers=False)
  953. class TextOutputFilter(Stream):
  954. """Filtering output stream for universal newline translation."""
  955. def __init__(self, base, linesep=os.linesep):
  956. assert linesep in ["\n", "\r\n", "\r"]
  957. self.base = base # must implement write, may implement seek, tell
  958. self.linesep = linesep
  959. def write(self, data):
  960. data = replace_char_with_str(data, "\n", self.linesep)
  961. self.base.write(data)
  962. tell = PassThrough("tell", flush_buffers=False)
  963. seek = PassThrough("seek", flush_buffers=False)
  964. read = PassThrough("read", flush_buffers=False)
  965. readall = PassThrough("readall", flush_buffers=False)
  966. readline = PassThrough("readline", flush_buffers=False)
  967. truncate = PassThrough("truncate", flush_buffers=False)
  968. flush = PassThrough("flush", flush_buffers=False)
  969. flushable = PassThrough("flushable", flush_buffers=False)
  970. close = PassThrough("close", flush_buffers=False)
  971. try_to_find_file_descriptor = PassThrough("try_to_find_file_descriptor",
  972. flush_buffers=False)
  973. class CallbackReadFilter(Stream):
  974. """Pseudo read filter that invokes a callback before blocking on a read.
  975. """
  976. def __init__(self, base, callback):
  977. self.base = base
  978. self.callback = callback
  979. def flush_buffers(self):
  980. self.callback()
  981. tell = PassThrough("tell", flush_buffers=False)
  982. seek = PassThrough("seek", flush_buffers=False)
  983. read = PassThrough("read", flush_buffers=True)
  984. readall = PassThrough("readall", flush_buffers=True)
  985. readline = PassThrough("readline", flush_buffers=True)
  986. peek = PassThrough("peek", flush_buffers=False)
  987. flush = PassThrough("flush", flush_buffers=False)
  988. flushable = PassThrough("flushable", flush_buffers=False)
  989. close = PassThrough("close", flush_buffers=False)
  990. write = PassThrough("write", flush_buffers=False)
  991. truncate = PassThrough("truncate", flush_buffers=False)
  992. getnewlines= PassThrough("getnewlines",flush_buffers=False)
  993. try_to_find_file_descriptor = PassThrough("try_to_find_file_descriptor",
  994. flush_buffers=False)
  995. # _________________________________________________
  996. # The following functions are _not_ RPython!
  997. class DecodingInputFilter(Stream):
  998. """Filtering input stream that decodes an encoded file."""
  999. def __init__(self, base, encoding="utf8", errors="strict"):
  1000. """NOT_RPYTHON"""
  1001. self.base = base
  1002. self.do_read = base.read
  1003. self.encoding = encoding
  1004. self.errors = errors
  1005. def read(self, n):
  1006. """Read *approximately* n bytes, then decode them.
  1007. Under extreme circumstances,
  1008. the return length could be longer than n!
  1009. Always return a unicode string.
  1010. This does *not* translate newlines;
  1011. you can stack TextInputFilter.
  1012. """
  1013. data = self.do_read(n)
  1014. try:
  1015. return data.decode(self.encoding, self.errors)
  1016. except ValueError:
  1017. # XXX Sigh. decode() doesn't handle incomplete strings well.
  1018. # Use the retry strategy from codecs.StreamReader.
  1019. for i in range(9):
  1020. more = self.do_read(1)
  1021. if not more:
  1022. raise
  1023. data += more
  1024. try:
  1025. return data.decode(self.encoding, self.errors)
  1026. except ValueError:
  1027. pass
  1028. raise
  1029. write = PassThrough("write", flush_buffers=False)
  1030. truncate = PassThrough("truncate", flush_buffers=False)
  1031. flush = PassThrough("flush", flush_buffers=False)
  1032. flushable = PassThrough("flushable", flush_buffers=False)
  1033. close = PassThrough("close", flush_buffers=False)
  1034. try_to_find_file_descriptor = PassThrough("try_to_find_file_descriptor",
  1035. flush_buffers=False)
  1036. class EncodingOutputFilter(Stream):
  1037. """Filtering output stream that writes to an encoded file."""
  1038. def __init__(self, base, encoding="utf8", errors="strict"):
  1039. """NOT_RPYTHON"""
  1040. self.base = base
  1041. self.do_write = base.write
  1042. self.encoding = encoding
  1043. self.errors = errors
  1044. def write(self, chars):
  1045. if isinstance(chars, str):
  1046. chars = unicode(chars) # Fail if it's not ASCII
  1047. self.do_write(chars.encode(self.encoding, self.errors))
  1048. tell = PassThrough("tell", flush_buffers=False)
  1049. seek = PassThrough("seek", flush_buffers=False)
  1050. read = PassThrough("read", flush_buffers=False)
  1051. readall = PassThrough("readall", flush_buffers=False)
  1052. readline = PassThrough("readline", flush_buffers=False)
  1053. truncate = PassThrough("truncate", flush_buffers=False)
  1054. flush = PassThrough("flush", flush_buffers=False)
  1055. flushable = PassThrough("flushable", flush_buffers=False)
  1056. close = PassThrough("close", flush_buffers=False)
  1057. try_to_find_file_descriptor = PassThrough("try_to_find_file_descriptor",
  1058. flush_buffers=False)