PageRenderTime 88ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 0ms

/pypy/translator/sandbox/sandlib.py

https://bitbucket.org/dac_io/pypy
Python | 580 lines | 553 code | 8 blank | 19 comment | 12 complexity | bb8aeaef025a666070361823b8ae4748 MD5 | raw file
  1. """
  2. A Python library to execute and communicate with a subprocess that
  3. was translated from RPython code with --sandbox. This library is
  4. for the outer process, which can run CPython or PyPy.
  5. """
  6. import sys, os, posixpath, errno, stat, time
  7. import subprocess
  8. from pypy.tool.killsubprocess import killsubprocess
  9. from pypy.translator.sandbox.vfs import UID, GID
  10. import py
  11. def create_log():
  12. """Make and return a log for the sandbox to use, if needed."""
  13. # These imports are local to avoid importing pypy if we don't need to.
  14. from pypy.tool.ansi_print import AnsiLog
  15. class MyAnsiLog(AnsiLog):
  16. KW_TO_COLOR = {
  17. 'call': ((34,), False),
  18. 'result': ((34,), False),
  19. 'exception': ((34,), False),
  20. 'vpath': ((35,), False),
  21. 'timeout': ((1, 31), True),
  22. }
  23. log = py.log.Producer("sandlib")
  24. py.log.setconsumer("sandlib", MyAnsiLog())
  25. return log
  26. # Note: we use lib_pypy/marshal.py instead of the built-in marshal
  27. # for two reasons. The built-in module could be made to segfault
  28. # or be attackable in other ways by sending malicious input to
  29. # load(). Also, marshal.load(f) blocks with the GIL held when
  30. # f is a pipe with no data immediately avaialble, preventing the
  31. # _waiting_thread to run.
  32. import pypy
  33. marshal = py.path.local(pypy.__file__).join('..', '..', 'lib_pypy',
  34. 'marshal.py').pyimport()
  35. # Non-marshal result types
  36. RESULTTYPE_STATRESULT = object()
  37. RESULTTYPE_LONGLONG = object()
  38. def read_message(f, timeout=None):
  39. # warning: 'timeout' is not really reliable and should only be used
  40. # for testing. Also, it doesn't work if the file f does any buffering.
  41. if timeout is not None:
  42. import select
  43. iwtd, owtd, ewtd = select.select([f], [], [], timeout)
  44. if not iwtd:
  45. raise EOFError("timed out waiting for data")
  46. return marshal.load(f)
  47. def write_message(g, msg, resulttype=None):
  48. if resulttype is None:
  49. if sys.version_info < (2, 4):
  50. marshal.dump(msg, g)
  51. else:
  52. marshal.dump(msg, g, 0)
  53. elif resulttype is RESULTTYPE_STATRESULT:
  54. # Hand-coded marshal for stat results that mimics what rmarshal expects.
  55. # marshal.dump(tuple(msg)) would have been too easy. rmarshal insists
  56. # on 64-bit ints at places, even when the value fits in 32 bits.
  57. import struct
  58. st = tuple(msg)
  59. fmt = "iIIiiiIfff"
  60. buf = []
  61. buf.append(struct.pack("<ci", '(', len(st)))
  62. for c, v in zip(fmt, st):
  63. if c == 'i':
  64. buf.append(struct.pack("<ci", c, v))
  65. elif c == 'I':
  66. buf.append(struct.pack("<cq", c, v))
  67. elif c == 'f':
  68. fstr = "%g" % v
  69. buf.append(struct.pack("<cB", c, len(fstr)))
  70. buf.append(fstr)
  71. g.write(''.join(buf))
  72. elif resulttype is RESULTTYPE_LONGLONG:
  73. import struct
  74. g.write(struct.pack("<cq", 'I', msg))
  75. else:
  76. raise Exception("Can't marshal: %r (%r)" % (msg, resulttype))
  77. # keep the table in sync with rsandbox.reraise_error()
  78. EXCEPTION_TABLE = [
  79. (1, OSError),
  80. (2, IOError),
  81. (3, OverflowError),
  82. (4, ValueError),
  83. (5, ZeroDivisionError),
  84. (6, MemoryError),
  85. (7, KeyError),
  86. (8, IndexError),
  87. (9, RuntimeError),
  88. ]
  89. def write_exception(g, exception, tb=None):
  90. for i, excclass in EXCEPTION_TABLE:
  91. if isinstance(exception, excclass):
  92. write_message(g, i)
  93. if excclass is OSError:
  94. error = exception.errno
  95. if error is None:
  96. error = errno.EPERM
  97. write_message(g, error)
  98. g.flush()
  99. break
  100. else:
  101. # just re-raise the exception
  102. raise exception.__class__, exception, tb
  103. def shortrepr(x):
  104. r = repr(x)
  105. if len(r) >= 80:
  106. r = r[:20] + '...' + r[-8:]
  107. return r
  108. def signal_name(n):
  109. import signal
  110. for key, value in signal.__dict__.items():
  111. if key.startswith('SIG') and not key.startswith('SIG_') and value == n:
  112. return key
  113. return 'signal %d' % (n,)
  114. class SandboxedProc(object):
  115. """Base class to control a sandboxed subprocess.
  116. Inherit from this class and implement all the do_xxx() methods
  117. for the external functions xxx that you want to support.
  118. """
  119. debug = False
  120. log = None
  121. os_level_sandboxing = False # Linux only: /proc/PID/seccomp
  122. def __init__(self, args, executable=None):
  123. """'args' should a sequence of argument for the subprocess,
  124. starting with the full path of the executable.
  125. """
  126. self.popen = subprocess.Popen(args, executable=executable,
  127. bufsize=-1,
  128. stdin=subprocess.PIPE,
  129. stdout=subprocess.PIPE,
  130. close_fds=True,
  131. env={})
  132. self.popenlock = None
  133. self.currenttimeout = None
  134. self.currentlyidlefrom = None
  135. if self.debug:
  136. self.log = create_log()
  137. def withlock(self, function, *args, **kwds):
  138. lock = self.popenlock
  139. if lock is not None:
  140. lock.acquire()
  141. try:
  142. return function(*args, **kwds)
  143. finally:
  144. if lock is not None:
  145. lock.release()
  146. def settimeout(self, timeout, interrupt_main=False):
  147. """Start a timeout that will kill the subprocess after the given
  148. amount of time. Only one timeout can be active at a time.
  149. """
  150. import thread
  151. def _waiting_thread():
  152. while True:
  153. while self.currentlyidlefrom is not None:
  154. time.sleep(1) # can't timeout while idle
  155. t = self.currenttimeout
  156. if t is None:
  157. return # cancelled
  158. delay = t - time.time()
  159. if delay <= 0.0:
  160. break # expired!
  161. time.sleep(min(delay*1.001, 1))
  162. if self.log:
  163. self.log.timeout("timeout!")
  164. self.kill()
  165. #if interrupt_main:
  166. # if hasattr(os, 'kill'):
  167. # import signal
  168. # os.kill(os.getpid(), signal.SIGINT)
  169. # else:
  170. # thread.interrupt_main()
  171. def _settimeout():
  172. need_new_thread = self.currenttimeout is None
  173. self.currenttimeout = time.time() + timeout
  174. if need_new_thread:
  175. thread.start_new_thread(_waiting_thread, ())
  176. if self.popenlock is None:
  177. self.popenlock = thread.allocate_lock()
  178. self.withlock(_settimeout)
  179. def canceltimeout(self):
  180. """Cancel the current timeout."""
  181. self.currenttimeout = None
  182. self.currentlyidlefrom = None
  183. def enter_idle(self):
  184. self.currentlyidlefrom = time.time()
  185. def leave_idle(self):
  186. def _postpone_timeout():
  187. t = self.currentlyidlefrom
  188. if t is not None and self.currenttimeout is not None:
  189. self.currenttimeout += time.time() - t
  190. try:
  191. self.withlock(_postpone_timeout)
  192. finally:
  193. self.currentlyidlefrom = None
  194. def poll(self):
  195. returncode = self.withlock(self.popen.poll)
  196. if returncode is not None:
  197. self.canceltimeout()
  198. return returncode
  199. def wait(self):
  200. returncode = self.withlock(self.popen.wait)
  201. if returncode is not None:
  202. self.canceltimeout()
  203. return returncode
  204. def kill(self):
  205. self.withlock(killsubprocess, self.popen)
  206. def handle_forever(self):
  207. returncode = self.handle_until_return()
  208. if returncode != 0:
  209. raise OSError("the sandboxed subprocess exited with code %d" % (
  210. returncode,))
  211. def handle_until_return(self):
  212. child_stdin = self.popen.stdin
  213. child_stdout = self.popen.stdout
  214. if self.os_level_sandboxing and sys.platform.startswith('linux'):
  215. # rationale: we wait until the child process started completely,
  216. # letting the C library do any system calls it wants for
  217. # initialization. When the RPython code starts up, it quickly
  218. # does its first system call. At this point we turn seccomp on.
  219. import select
  220. select.select([child_stdout], [], [])
  221. f = open('/proc/%d/seccomp' % self.popen.pid, 'w')
  222. print >> f, 1
  223. f.close()
  224. while True:
  225. try:
  226. fnname = read_message(child_stdout)
  227. args = read_message(child_stdout)
  228. except EOFError, e:
  229. break
  230. if self.log and not self.is_spam(fnname, *args):
  231. self.log.call('%s(%s)' % (fnname,
  232. ', '.join([shortrepr(x) for x in args])))
  233. try:
  234. answer, resulttype = self.handle_message(fnname, *args)
  235. except Exception, e:
  236. tb = sys.exc_info()[2]
  237. write_exception(child_stdin, e, tb)
  238. if self.log:
  239. if str(e):
  240. self.log.exception('%s: %s' % (e.__class__.__name__, e))
  241. else:
  242. self.log.exception('%s' % (e.__class__.__name__,))
  243. else:
  244. if self.log and not self.is_spam(fnname, *args):
  245. self.log.result(shortrepr(answer))
  246. try:
  247. write_message(child_stdin, 0) # error code - 0 for ok
  248. write_message(child_stdin, answer, resulttype)
  249. child_stdin.flush()
  250. except (IOError, OSError):
  251. # likely cause: subprocess is dead, child_stdin closed
  252. if self.poll() is not None:
  253. break
  254. else:
  255. raise
  256. returncode = self.wait()
  257. return returncode
  258. def is_spam(self, fnname, *args):
  259. # To hide the spamming amounts of reads and writes to stdin and stdout
  260. # in interactive sessions
  261. return (fnname in ('ll_os.ll_os_read', 'll_os.ll_os_write') and
  262. args[0] in (0, 1, 2))
  263. def handle_message(self, fnname, *args):
  264. if '__' in fnname:
  265. raise ValueError("unsafe fnname")
  266. try:
  267. handler = getattr(self, 'do_' + fnname.replace('.', '__'))
  268. except AttributeError:
  269. raise RuntimeError("no handler for this function")
  270. resulttype = getattr(handler, 'resulttype', None)
  271. return handler(*args), resulttype
  272. class SimpleIOSandboxedProc(SandboxedProc):
  273. """Control a sandboxed subprocess which is only allowed to read from
  274. its stdin and write to its stdout and stderr.
  275. """
  276. _input = None
  277. _output = None
  278. _error = None
  279. inputlogfile = None
  280. def communicate(self, input=None):
  281. """Send data to stdin. Read data from stdout and stderr,
  282. until end-of-file is reached. Wait for process to terminate.
  283. """
  284. import cStringIO
  285. if input:
  286. if isinstance(input, str):
  287. input = cStringIO.StringIO(input)
  288. self._input = input
  289. self._output = cStringIO.StringIO()
  290. self._error = cStringIO.StringIO()
  291. self.handle_forever()
  292. output = self._output.getvalue()
  293. self._output = None
  294. error = self._error.getvalue()
  295. self._error = None
  296. return (output, error)
  297. def interact(self, stdin=None, stdout=None, stderr=None):
  298. """Interact with the subprocess. By default, stdin, stdout and
  299. stderr are set to the ones from 'sys'."""
  300. import sys
  301. self._input = stdin or sys.stdin
  302. self._output = stdout or sys.stdout
  303. self._error = stderr or sys.stderr
  304. returncode = self.handle_until_return()
  305. if returncode != 0:
  306. if os.name == 'posix' and returncode < 0:
  307. print >> self._error, "[Subprocess killed by %s]" % (
  308. signal_name(-returncode),)
  309. else:
  310. print >> self._error, "[Subprocess exit code: %d]" % (
  311. returncode,)
  312. self._input = None
  313. self._output = None
  314. self._error = None
  315. return returncode
  316. def setlogfile(self, filename):
  317. self.inputlogfile = open(filename, 'a')
  318. def do_ll_os__ll_os_read(self, fd, size):
  319. if fd == 0:
  320. if self._input is None:
  321. return ""
  322. elif (getattr(self, 'virtual_console_isatty', False) or
  323. self._input.isatty()):
  324. # don't wait for all 'size' chars if reading from a tty,
  325. # to avoid blocking. Instead, stop after reading a line.
  326. # For now, waiting at the interactive console is the
  327. # only time that counts as idle.
  328. self.enter_idle()
  329. try:
  330. inputdata = self._input.readline(size)
  331. finally:
  332. self.leave_idle()
  333. else:
  334. inputdata = self._input.read(size)
  335. if self.inputlogfile is not None:
  336. self.inputlogfile.write(inputdata)
  337. return inputdata
  338. raise OSError("trying to read from fd %d" % (fd,))
  339. def do_ll_os__ll_os_write(self, fd, data):
  340. if fd == 1:
  341. self._output.write(data)
  342. return len(data)
  343. if fd == 2:
  344. self._error.write(data)
  345. return len(data)
  346. raise OSError("trying to write to fd %d" % (fd,))
  347. # let's allow access to the real time
  348. def do_ll_time__ll_time_sleep(self, seconds):
  349. # regularly check for timeouts that could have killed the
  350. # subprocess
  351. while seconds > 5.0:
  352. time.sleep(5.0)
  353. seconds -= 5.0
  354. if self.poll() is not None: # subprocess finished?
  355. return
  356. time.sleep(seconds)
  357. def do_ll_time__ll_time_time(self):
  358. return time.time()
  359. def do_ll_time__ll_time_clock(self):
  360. # measuring the CPU time of the controller process has
  361. # not much meaning, so let's emulate this and return
  362. # the real time elapsed since the first call to clock()
  363. # (this is one of the behaviors allowed by the docs)
  364. try:
  365. starttime = self.starttime
  366. except AttributeError:
  367. starttime = self.starttime = time.time()
  368. return time.time() - starttime
  369. class VirtualizedSandboxedProc(SandboxedProc):
  370. """Control a virtualized sandboxed process, which is given a custom
  371. view on the filesystem and a custom environment.
  372. """
  373. virtual_env = {}
  374. virtual_cwd = '/tmp'
  375. virtual_console_isatty = False
  376. virtual_fd_range = range(3, 50)
  377. def __init__(self, *args, **kwds):
  378. super(VirtualizedSandboxedProc, self).__init__(*args, **kwds)
  379. self.virtual_root = self.build_virtual_root()
  380. self.open_fds = {} # {virtual_fd: (real_file_object, node)}
  381. def build_virtual_root(self):
  382. raise NotImplementedError("must be overridden")
  383. def do_ll_os__ll_os_envitems(self):
  384. return self.virtual_env.items()
  385. def do_ll_os__ll_os_getenv(self, name):
  386. return self.virtual_env.get(name)
  387. def translate_path(self, vpath):
  388. # XXX this assumes posix vpaths for now, but os-specific real paths
  389. vpath = posixpath.normpath(posixpath.join(self.virtual_cwd, vpath))
  390. dirnode = self.virtual_root
  391. components = [component for component in vpath.split('/')]
  392. for component in components[:-1]:
  393. if component:
  394. dirnode = dirnode.join(component)
  395. if dirnode.kind != stat.S_IFDIR:
  396. raise OSError(errno.ENOTDIR, component)
  397. return dirnode, components[-1]
  398. def get_node(self, vpath):
  399. dirnode, name = self.translate_path(vpath)
  400. if name:
  401. node = dirnode.join(name)
  402. else:
  403. node = dirnode
  404. if self.log:
  405. self.log.vpath('%r => %r' % (vpath, node))
  406. return node
  407. def do_ll_os__ll_os_stat(self, vpathname):
  408. node = self.get_node(vpathname)
  409. return node.stat()
  410. do_ll_os__ll_os_stat.resulttype = RESULTTYPE_STATRESULT
  411. do_ll_os__ll_os_lstat = do_ll_os__ll_os_stat
  412. def do_ll_os__ll_os_isatty(self, fd):
  413. return self.virtual_console_isatty and fd in (0, 1, 2)
  414. def allocate_fd(self, f, node=None):
  415. for fd in self.virtual_fd_range:
  416. if fd not in self.open_fds:
  417. self.open_fds[fd] = (f, node)
  418. return fd
  419. else:
  420. raise OSError(errno.EMFILE, "trying to open too many files")
  421. def get_fd(self, fd, throw=True):
  422. """Get the objects implementing file descriptor `fd`.
  423. Returns a pair, (open file, vfs node)
  424. `throw`: if true, raise OSError for bad fd, else return (None, None).
  425. """
  426. try:
  427. f, node = self.open_fds[fd]
  428. except KeyError:
  429. if throw:
  430. raise OSError(errno.EBADF, "bad file descriptor")
  431. return None, None
  432. return f, node
  433. def get_file(self, fd, throw=True):
  434. """Return the open file for file descriptor `fd`."""
  435. return self.get_fd(fd, throw)[0]
  436. def do_ll_os__ll_os_open(self, vpathname, flags, mode):
  437. node = self.get_node(vpathname)
  438. if flags & (os.O_RDONLY|os.O_WRONLY|os.O_RDWR) != os.O_RDONLY:
  439. raise OSError(errno.EPERM, "write access denied")
  440. # all other flags are ignored
  441. f = node.open()
  442. return self.allocate_fd(f, node)
  443. def do_ll_os__ll_os_close(self, fd):
  444. f = self.get_file(fd)
  445. del self.open_fds[fd]
  446. f.close()
  447. def do_ll_os__ll_os_read(self, fd, size):
  448. f = self.get_file(fd, throw=False)
  449. if f is None:
  450. return super(VirtualizedSandboxedProc, self).do_ll_os__ll_os_read(
  451. fd, size)
  452. else:
  453. if not (0 <= size <= sys.maxint):
  454. raise OSError(errno.EINVAL, "invalid read size")
  455. # don't try to read more than 256KB at once here
  456. return f.read(min(size, 256*1024))
  457. def do_ll_os__ll_os_fstat(self, fd):
  458. f, node = self.get_fd(fd)
  459. return node.stat()
  460. do_ll_os__ll_os_fstat.resulttype = RESULTTYPE_STATRESULT
  461. def do_ll_os__ll_os_lseek(self, fd, pos, how):
  462. f = self.get_file(fd)
  463. f.seek(pos, how)
  464. return f.tell()
  465. do_ll_os__ll_os_lseek.resulttype = RESULTTYPE_LONGLONG
  466. def do_ll_os__ll_os_getcwd(self):
  467. return self.virtual_cwd
  468. def do_ll_os__ll_os_strerror(self, errnum):
  469. # unsure if this shouldn't be considered safeboxsafe
  470. return os.strerror(errnum) or ('Unknown error %d' % (errnum,))
  471. def do_ll_os__ll_os_listdir(self, vpathname):
  472. node = self.get_node(vpathname)
  473. return node.keys()
  474. def do_ll_os__ll_os_getuid(self):
  475. return UID
  476. do_ll_os__ll_os_geteuid = do_ll_os__ll_os_getuid
  477. def do_ll_os__ll_os_getgid(self):
  478. return GID
  479. do_ll_os__ll_os_getegid = do_ll_os__ll_os_getgid
  480. class VirtualizedSocketProc(VirtualizedSandboxedProc):
  481. """ Extends VirtualizedSandboxProc with socket
  482. options, ie tcp://host:port as args to os.open
  483. """
  484. def __init__(self, *args, **kwds):
  485. super(VirtualizedSocketProc, self).__init__(*args, **kwds)
  486. self.sockets = {}
  487. def do_ll_os__ll_os_open(self, name, flags, mode):
  488. if not name.startswith("tcp://"):
  489. return super(VirtualizedSocketProc, self).do_ll_os__ll_os_open(
  490. name, flags, mode)
  491. import socket
  492. host, port = name[6:].split(":")
  493. sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
  494. sock.connect((host, int(port)))
  495. fd = self.allocate_fd(sock)
  496. self.sockets[fd] = True
  497. return fd
  498. def do_ll_os__ll_os_read(self, fd, size):
  499. if fd in self.sockets:
  500. return self.get_file(fd).recv(size)
  501. return super(VirtualizedSocketProc, self).do_ll_os__ll_os_read(
  502. fd, size)
  503. def do_ll_os__ll_os_write(self, fd, data):
  504. if fd in self.sockets:
  505. return self.get_file(fd).send(data)
  506. return super(VirtualizedSocketProc, self).do_ll_os__ll_os_write(
  507. fd, data)