PageRenderTime 115ms CodeModel.GetById 72ms RepoModel.GetById 0ms app.codeStats 1ms

/pypy/module/_file/interp_file.py

https://bitbucket.org/pypy/pypy/
Python | 650 lines | 628 code | 11 blank | 11 comment | 16 complexity | 8fb3308dd0b083183ae948b5362dd2d2 MD5 | raw file
Possible License(s): AGPL-3.0, BSD-3-Clause, Apache-2.0
  1. import py
  2. import os
  3. import stat
  4. import errno
  5. from rpython.rlib import streamio
  6. from rpython.rlib.objectmodel import specialize
  7. from rpython.rlib.rarithmetic import r_longlong
  8. from rpython.rlib.rstring import StringBuilder
  9. from pypy.module._file.interp_stream import W_AbstractStream, StreamErrors
  10. from pypy.module.posix.interp_posix import dispatch_filename
  11. from pypy.interpreter.error import OperationError, oefmt, wrap_oserror
  12. from pypy.interpreter.typedef import (TypeDef, GetSetProperty,
  13. interp_attrproperty, make_weakref_descr, interp_attrproperty_w)
  14. from pypy.interpreter.gateway import interp2app, unwrap_spec
  15. from pypy.interpreter.baseobjspace import BufferInterfaceNotFound
  16. from pypy.interpreter.streamutil import wrap_streamerror, wrap_oserror_as_ioerror
  17. class W_File(W_AbstractStream):
  18. """An interp-level file object. This implements the same interface than
  19. the app-level files, with the following differences:
  20. * method names are prefixed with 'file_'
  21. * the 'normal' app-level constructor is implemented by file___init__().
  22. * the methods with the 'direct_' prefix should be used if the caller
  23. locks and unlocks the file itself, and takes care of StreamErrors.
  24. """
  25. # Default values until the file is successfully opened
  26. stream = None
  27. w_name = None
  28. mode = "<uninitialized file>"
  29. binary = False
  30. readable = False
  31. writable = False
  32. softspace= 0 # Required according to file object docs
  33. encoding = None
  34. errors = None
  35. fd = -1
  36. cffi_fileobj = None # pypy/module/_cffi_backend
  37. w_tb = None # String representation of the traceback at creation time
  38. newlines = 0 # Updated when the stream is closed
  39. def __init__(self, space):
  40. self.space = space
  41. self.register_finalizer(space)
  42. if self.space.sys.track_resources:
  43. self.w_tb = self.space.format_traceback()
  44. def _finalize_(self):
  45. # assume that the file and stream objects are only visible in the
  46. # thread that runs _finalize_, so no race condition should be
  47. # possible and no locking is done here.
  48. if self.stream is None:
  49. return
  50. if self.space.sys.track_resources:
  51. w_repr = self.space.repr(self)
  52. str_repr = self.space.str_w(w_repr)
  53. w_msg = self.space.wrap("WARNING: unclosed file: " + str_repr)
  54. self.space.resource_warning(w_msg, self.w_tb)
  55. #
  56. try:
  57. self.direct_close()
  58. except StreamErrors as e:
  59. operr = wrap_streamerror(self.space, e, self.w_name)
  60. raise operr
  61. def fdopenstream(self, stream, fd, mode, w_name=None):
  62. self.fd = fd
  63. self.mode = mode
  64. self.binary = "b" in mode
  65. if 'r' in mode or 'U' in mode:
  66. self.readable = True
  67. if 'w' in mode or 'a' in mode:
  68. self.writable = True
  69. if '+' in mode:
  70. self.readable = self.writable = True
  71. if w_name is not None:
  72. self.w_name = w_name
  73. self.stream = stream
  74. if stream.flushable():
  75. getopenstreams(self.space)[stream] = None
  76. def check_not_dir(self, fd):
  77. try:
  78. st = os.fstat(fd)
  79. except OSError:
  80. pass
  81. else:
  82. if (stat.S_ISDIR(st[0])):
  83. ose = OSError(errno.EISDIR, '')
  84. raise wrap_oserror_as_ioerror(self.space, ose, self.w_name)
  85. def check_mode_ok(self, mode):
  86. if (not mode or mode[0] not in ['r', 'w', 'a', 'U'] or
  87. ('U' in mode and ('w' in mode or 'a' in mode))):
  88. space = self.space
  89. raise oefmt(space.w_ValueError, "invalid mode: '%s'", mode)
  90. def check_closed(self):
  91. if self.stream is None:
  92. raise oefmt(self.space.w_ValueError,
  93. "I/O operation on closed file")
  94. def check_readable(self):
  95. if not self.readable:
  96. raise oefmt(self.space.w_IOError, "File not open for reading")
  97. def check_writable(self):
  98. if not self.writable:
  99. raise oefmt(self.space.w_IOError, "File not open for writing")
  100. def getstream(self):
  101. """Return self.stream or raise an app-level ValueError if missing
  102. (i.e. if the file is closed)."""
  103. self.check_closed()
  104. return self.stream
  105. def _when_reading_first_flush(self, otherfile):
  106. """Flush otherfile before reading from self."""
  107. self.stream = streamio.CallbackReadFilter(self.stream,
  108. otherfile._try_to_flush)
  109. def _try_to_flush(self):
  110. stream = self.stream
  111. if stream is not None:
  112. stream.flush()
  113. # ____________________________________________________________
  114. #
  115. # The 'direct_' methods assume that the caller already acquired the
  116. # file lock. They don't convert StreamErrors to OperationErrors, too.
  117. @unwrap_spec(mode=str, buffering=int)
  118. def direct___init__(self, w_name, mode='r', buffering=-1):
  119. self.direct_close()
  120. self.w_name = w_name
  121. self.check_mode_ok(mode)
  122. stream = dispatch_filename(streamio.open_file_as_stream)(
  123. self.space, w_name, mode, buffering, signal_checker(self.space))
  124. fd = stream.try_to_find_file_descriptor()
  125. self.check_not_dir(fd)
  126. self.fdopenstream(stream, fd, mode)
  127. def direct___enter__(self):
  128. self.check_closed()
  129. return self
  130. def file__exit__(self, __args__):
  131. """__exit__(*excinfo) -> None. Closes the file."""
  132. self.space.call_method(self, "close")
  133. # can't return close() value
  134. return None
  135. def direct_fdopen(self, fd, mode='r', buffering=-1):
  136. self.direct_close()
  137. self.w_name = self.space.wrap('<fdopen>')
  138. self.check_mode_ok(mode)
  139. stream = streamio.fdopen_as_stream(fd, mode, buffering,
  140. signal_checker(self.space))
  141. self.check_not_dir(fd)
  142. self.fdopenstream(stream, fd, mode)
  143. def direct_close(self):
  144. stream = self.stream
  145. if stream is not None:
  146. self.newlines = self.stream.getnewlines()
  147. self.stream = None
  148. self.fd = -1
  149. openstreams = getopenstreams(self.space)
  150. try:
  151. del openstreams[stream]
  152. except KeyError:
  153. pass
  154. # close the stream. If cffi_fileobj is None, we close the
  155. # underlying fileno too. Otherwise, we leave that to
  156. # cffi_fileobj.close().
  157. cffifo = self.cffi_fileobj
  158. self.cffi_fileobj = None
  159. stream.close1(cffifo is None)
  160. if cffifo is not None:
  161. cffifo.close()
  162. def direct_fileno(self):
  163. self.getstream() # check if the file is still open
  164. return self.fd
  165. def direct_flush(self):
  166. self.getstream().flush()
  167. def direct_next(self):
  168. line = self.getstream().readline()
  169. if line == '':
  170. raise OperationError(self.space.w_StopIteration, self.space.w_None)
  171. return line
  172. @unwrap_spec(n=int)
  173. def direct_read(self, n=-1):
  174. stream = self.getstream()
  175. self.check_readable()
  176. if n < 0:
  177. return stream.readall()
  178. else:
  179. result = StringBuilder(n)
  180. while n > 0:
  181. try:
  182. data = stream.read(n)
  183. except OSError as e:
  184. # a special-case only for read() (similar to CPython, which
  185. # also loses partial data with other methods): if we get
  186. # EAGAIN after already some data was received, return it.
  187. # Note that we can get EAGAIN while there is buffered data
  188. # waiting; read that too.
  189. if is_wouldblock_error(e.errno):
  190. m = stream.count_buffered_bytes()
  191. if m > 0:
  192. result.append(stream.read(min(n, m)))
  193. got = result.build()
  194. if len(got) > 0:
  195. return got
  196. raise
  197. if not data:
  198. break
  199. n -= len(data)
  200. result.append(data)
  201. return result.build()
  202. @unwrap_spec(size=int)
  203. def direct_readline(self, size=-1):
  204. stream = self.getstream()
  205. self.check_readable()
  206. if size < 0:
  207. return stream.readline()
  208. else:
  209. # very inefficient unless there is a peek()
  210. result = StringBuilder()
  211. while size > 0:
  212. # "peeks" on the underlying stream to see how many chars
  213. # we can safely read without reading past an end-of-line
  214. startindex, peeked = stream.peek()
  215. assert 0 <= startindex <= len(peeked)
  216. endindex = startindex + size
  217. pn = peeked.find("\n", startindex, endindex)
  218. if pn < 0:
  219. pn = min(endindex - 1, len(peeked))
  220. c = stream.read(pn - startindex + 1)
  221. if not c:
  222. break
  223. result.append(c)
  224. if c.endswith('\n'):
  225. break
  226. size -= len(c)
  227. return result.build()
  228. @unwrap_spec(size=int)
  229. def direct_readlines(self, size=0):
  230. stream = self.getstream()
  231. self.check_readable()
  232. # this is implemented as: .read().split('\n')
  233. # except that it keeps the \n in the resulting strings
  234. if size <= 0:
  235. data = stream.readall()
  236. else:
  237. data = stream.read(size)
  238. result = []
  239. splitfrom = 0
  240. for i in range(len(data)):
  241. if data[i] == '\n':
  242. result.append(data[splitfrom : i + 1])
  243. splitfrom = i + 1
  244. #
  245. if splitfrom < len(data):
  246. # there is a partial line at the end. If size > 0, it is likely
  247. # to be because the 'read(size)' returned data up to the middle
  248. # of a line. In that case, use 'readline()' to read until the
  249. # end of the current line.
  250. data = data[splitfrom:]
  251. if size > 0:
  252. data += stream.readline()
  253. result.append(data)
  254. return result
  255. @unwrap_spec(offset=r_longlong, whence=int)
  256. def direct_seek(self, offset, whence=0):
  257. self.getstream().seek(offset, whence)
  258. def direct_tell(self):
  259. return self.getstream().tell()
  260. def direct_truncate(self, w_size=None): # note: a wrapped size!
  261. stream = self.getstream()
  262. self.check_writable()
  263. space = self.space
  264. if space.is_none(w_size):
  265. size = stream.tell()
  266. else:
  267. size = space.r_longlong_w(w_size)
  268. stream.truncate(size)
  269. def direct_write(self, w_data):
  270. space = self.space
  271. self.check_writable()
  272. if self.binary:
  273. data = space.getarg_w('s*', w_data).as_str()
  274. else:
  275. if space.isinstance_w(w_data, space.w_unicode):
  276. w_errors = w_encoding = None
  277. if self.encoding:
  278. w_encoding = space.wrap(self.encoding)
  279. if self.errors:
  280. w_errors = space.wrap(self.errors)
  281. w_data = space.call_method(w_data, "encode",
  282. w_encoding, w_errors)
  283. data = space.charbuf_w(w_data)
  284. self.do_direct_write(data)
  285. def do_direct_write(self, data):
  286. self.softspace = 0
  287. self.getstream().write(data)
  288. def direct___iter__(self):
  289. self.getstream()
  290. return self
  291. direct_xreadlines = direct___iter__
  292. def direct_isatty(self):
  293. self.getstream() # check if the file is still open
  294. return os.isatty(self.fd)
  295. def direct_readinto(self, w_rwbuffer):
  296. from pypy.module._file.readinto import direct_readinto
  297. return direct_readinto(self, w_rwbuffer)
  298. # ____________________________________________________________
  299. #
  300. # The 'file_' methods are the one exposed to app-level.
  301. def file_fdopen(self, fd, mode="r", buffering=-1):
  302. try:
  303. self.direct_fdopen(fd, mode, buffering)
  304. except OSError as e:
  305. raise wrap_oserror(self.space, e)
  306. _exposed_method_names = []
  307. def _decl(class_scope, name, docstring,
  308. wrapresult="space.wrap(result)"):
  309. # hack hack to build a wrapper around the direct_xxx methods.
  310. # The wrapper adds lock/unlock calls and a space.wrap() on
  311. # the result, conversion of stream errors to OperationErrors,
  312. # and has the specified docstring and unwrap_spec.
  313. direct_fn = class_scope['direct_' + name]
  314. co = direct_fn.func_code
  315. argnames = co.co_varnames[:co.co_argcount]
  316. defaults = direct_fn.func_defaults or ()
  317. unwrap_spec = getattr(direct_fn, 'unwrap_spec', None)
  318. args = []
  319. for i, argname in enumerate(argnames):
  320. try:
  321. default = defaults[-len(argnames) + i]
  322. except IndexError:
  323. args.append(argname)
  324. else:
  325. args.append('%s=%r' % (argname, default))
  326. sig = ', '.join(args)
  327. assert argnames[0] == 'self'
  328. callsig = ', '.join(argnames[1:])
  329. src = py.code.Source("""
  330. def file_%(name)s(%(sig)s):
  331. %(docstring)r
  332. space = self.space
  333. self.lock()
  334. try:
  335. try:
  336. result = self.direct_%(name)s(%(callsig)s)
  337. except StreamErrors, e:
  338. raise wrap_streamerror(space, e, self.w_name)
  339. finally:
  340. self.unlock()
  341. return %(wrapresult)s
  342. """ % locals())
  343. exec str(src) in globals(), class_scope
  344. if unwrap_spec is not None:
  345. class_scope['file_' + name].unwrap_spec = unwrap_spec
  346. class_scope['_exposed_method_names'].append(name)
  347. _decl(locals(), "__init__", """Opens a file.""")
  348. _decl(locals(), "__enter__", """__enter__() -> self.""")
  349. _decl(locals(), "close",
  350. """close() -> None or (perhaps) an integer. Close the file.
  351. Sets data attribute .closed to True. A closed file cannot be used for
  352. further I/O operations. close() may be called more than once without
  353. error. Some kinds of file objects (for example, opened by popen())
  354. may return an exit status upon closing.""")
  355. # NB. close() needs to use the stream lock to avoid double-closes or
  356. # close-while-another-thread-uses-it.
  357. _decl(locals(), "fileno",
  358. '''fileno() -> integer "file descriptor".
  359. This is needed for lower-level file interfaces, such os.read().''')
  360. _decl(locals(), "flush",
  361. """flush() -> None. Flush the internal I/O buffer.""")
  362. _decl(locals(), "isatty",
  363. """isatty() -> true or false. True if the file is connected to a tty device.""")
  364. _decl(locals(), "next",
  365. """next() -> the next line in the file, or raise StopIteration""")
  366. _decl(locals(), "read",
  367. """read([size]) -> read at most size bytes, returned as a string.
  368. If the size argument is negative or omitted, read until EOF is reached.
  369. Notice that when in non-blocking mode, less data than what was requested
  370. may be returned, even if no size parameter was given.""")
  371. _decl(locals(), "readinto",
  372. """readinto(buf) -> length. Read into the given read-write buffer.""")
  373. _decl(locals(), "readline",
  374. """readline([size]) -> next line from the file, as a string.
  375. Retain newline. A non-negative size argument limits the maximum
  376. number of bytes to return (an incomplete line may be returned then).
  377. Return an empty string at EOF.""")
  378. _decl(locals(), "readlines",
  379. """readlines([size]) -> list of strings, each a line from the file.
  380. Call readline() repeatedly and return a list of the lines so read.
  381. The optional size argument, if given, is an approximate bound on the
  382. total number of bytes in the lines returned.""",
  383. wrapresult = "wrap_list_of_str(space, result)")
  384. _decl(locals(), "seek",
  385. """seek(offset[, whence]) -> None. Move to new file position.
  386. Argument offset is a byte count. Optional argument whence defaults to
  387. 0 (offset from start of file, offset should be >= 0); other values are 1
  388. (move relative to current position, positive or negative), and 2 (move
  389. relative to end of file, usually negative, although many platforms allow
  390. seeking beyond the end of a file). If the file is opened in text mode,
  391. only offsets returned by tell() are legal. Use of other offsets causes
  392. undefined behavior.
  393. Note that not all file objects are seekable.""")
  394. _decl(locals(), "tell",
  395. "tell() -> current file position, an integer (may be a long integer).")
  396. _decl(locals(), "truncate",
  397. """truncate([size]) -> None. Truncate the file to at most size bytes.
  398. Size defaults to the current file position, as returned by tell().""")
  399. _decl(locals(), "write",
  400. """write(str) -> None. Write string str to file.
  401. Note that due to buffering, flush() or close() may be needed before
  402. the file on disk reflects the data written.""")
  403. _decl(locals(), "__iter__",
  404. """Iterating over files, as in 'for line in f:', returns each line of
  405. the file one by one.""")
  406. _decl(locals(), "xreadlines",
  407. """xreadlines() -> returns self.
  408. For backward compatibility. File objects now include the performance
  409. optimizations previously implemented in the xreadlines module.""")
  410. def file__repr__(self):
  411. if self.stream is None:
  412. head = "closed"
  413. else:
  414. head = "open"
  415. info = "%s file %s, mode '%s'" % (
  416. head,
  417. self.getdisplayname(),
  418. self.mode)
  419. return self.getrepr(self.space, info)
  420. def getdisplayname(self):
  421. space = self.space
  422. w_name = self.w_name
  423. if w_name is None:
  424. return '?'
  425. else:
  426. return space.str_w(space.repr(w_name))
  427. def file_writelines(self, w_lines):
  428. """writelines(sequence_of_strings) -> None. Write the strings to the file.
  429. Note that newlines are not added. The sequence can be any iterable object
  430. producing strings. This is equivalent to calling write() for each string."""
  431. space = self.space
  432. self.check_closed()
  433. self.check_writable()
  434. lines = space.fixedview(w_lines)
  435. for i, w_line in enumerate(lines):
  436. if not space.isinstance_w(w_line, space.w_str):
  437. try:
  438. if self.binary:
  439. line = w_line.readbuf_w(space).as_str()
  440. else:
  441. line = w_line.charbuf_w(space)
  442. except BufferInterfaceNotFound:
  443. raise oefmt(space.w_TypeError,
  444. "writelines() argument must be a sequence of "
  445. "strings")
  446. else:
  447. lines[i] = space.wrap(line)
  448. for w_line in lines:
  449. self.file_write(w_line)
  450. # ____________________________________________________________
  451. def descr_file__new__(space, w_subtype, __args__):
  452. file = space.allocate_instance(W_File, w_subtype)
  453. W_File.__init__(file, space)
  454. return space.wrap(file)
  455. @unwrap_spec(fd=int, mode=str, buffering=int)
  456. def descr_file_fdopen(space, w_subtype, fd, mode='r', buffering=-1):
  457. file = space.allocate_instance(W_File, w_subtype)
  458. W_File.__init__(file, space)
  459. file.file_fdopen(fd, mode, buffering)
  460. return space.wrap(file)
  461. def descr_file_closed(space, file):
  462. return space.wrap(file.stream is None)
  463. def descr_file_newlines(space, file):
  464. if file.stream:
  465. newlines = file.stream.getnewlines()
  466. else:
  467. newlines = file.newlines
  468. if newlines == 0:
  469. return space.w_None
  470. elif newlines == 1:
  471. return space.wrap("\r")
  472. elif newlines == 2:
  473. return space.wrap("\n")
  474. elif newlines == 4:
  475. return space.wrap("\r\n")
  476. result = []
  477. if newlines & 1:
  478. result.append(space.wrap('\r'))
  479. if newlines & 2:
  480. result.append(space.wrap('\n'))
  481. if newlines & 4:
  482. result.append(space.wrap('\r\n'))
  483. return space.newtuple(result[:])
  484. def descr_file_softspace(space, file):
  485. return space.wrap(file.softspace)
  486. def descr_file_setsoftspace(space, file, w_newvalue):
  487. file.softspace = space.int_w(w_newvalue)
  488. # ____________________________________________________________
  489. W_File.typedef = TypeDef(
  490. "file",
  491. __doc__ = """file(name[, mode[, buffering]]) -> file object
  492. Open a file. The mode can be 'r', 'w' or 'a' for reading (default),
  493. writing or appending. The file will be created if it doesn't exist
  494. when opened for writing or appending; it will be truncated when
  495. opened for writing. Add a 'b' to the mode for binary files.
  496. Add a '+' to the mode to allow simultaneous reading and writing.
  497. If the buffering argument is given, 0 means unbuffered, 1 means line
  498. buffered, and larger numbers specify the buffer size.
  499. Add a 'U' to mode to open the file for input with universal newline
  500. support. Any line ending in the input file will be seen as a '\n'
  501. in Python. Also, a file so opened gains the attribute 'newlines';
  502. the value for this attribute is one of None (no newline read yet),
  503. '\r', '\n', '\r\n' or a tuple containing all the newline types seen.
  504. Note: open() is an alias for file().
  505. """,
  506. __new__ = interp2app(descr_file__new__),
  507. fdopen = interp2app(descr_file_fdopen, as_classmethod=True),
  508. name = interp_attrproperty_w('w_name', cls=W_File, doc="file name"),
  509. mode = interp_attrproperty('mode', cls=W_File,
  510. doc = "file mode ('r', 'U', 'w', 'a', "
  511. "possibly with 'b' or '+' added)"),
  512. encoding = interp_attrproperty('encoding', cls=W_File),
  513. errors = interp_attrproperty('errors', cls=W_File),
  514. closed = GetSetProperty(descr_file_closed, cls=W_File,
  515. doc="True if the file is closed"),
  516. newlines = GetSetProperty(descr_file_newlines, cls=W_File,
  517. doc="end-of-line convention used in this file"),
  518. softspace= GetSetProperty(descr_file_softspace,
  519. descr_file_setsoftspace,
  520. cls=W_File,
  521. doc="Support for 'print'."),
  522. __repr__ = interp2app(W_File.file__repr__),
  523. writelines = interp2app(W_File.file_writelines),
  524. __exit__ = interp2app(W_File.file__exit__),
  525. __weakref__ = make_weakref_descr(W_File),
  526. **dict([(name, interp2app(getattr(W_File, 'file_' + name)))
  527. for name in W_File._exposed_method_names])
  528. )
  529. # ____________________________________________________________
  530. def wrap_list_of_str(space, lst):
  531. return space.newlist_bytes(lst)
  532. class FileState:
  533. def __init__(self, space):
  534. self.openstreams = {}
  535. def getopenstreams(space):
  536. return space.fromcache(FileState).openstreams
  537. @specialize.memo()
  538. def signal_checker(space):
  539. def checksignals():
  540. space.getexecutioncontext().checksignals()
  541. return checksignals
  542. MAYBE_EAGAIN = getattr(errno, 'EAGAIN', None)
  543. MAYBE_EWOULDBLOCK = getattr(errno, 'EWOULDBLOCK', None)
  544. def is_wouldblock_error(errno):
  545. if MAYBE_EAGAIN is not None and errno == MAYBE_EAGAIN:
  546. return True
  547. if MAYBE_EWOULDBLOCK is not None and errno == MAYBE_EWOULDBLOCK:
  548. return True
  549. return False
  550. @unwrap_spec(w_file=W_File, encoding="str_or_None", errors="str_or_None")
  551. def set_file_encoding(space, w_file, encoding=None, errors=None):
  552. w_file.encoding = encoding
  553. w_file.errors = errors