PageRenderTime 52ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 0ms

/pypy/module/micronumpy/compile.py

https://bitbucket.org/Raemi/pypy-stm-logging
Python | 731 lines | 619 code | 102 blank | 10 comment | 80 complexity | d8ea5737814aa4d9b382bad7dd46d661 MD5 | raw file
Possible License(s): Apache-2.0
  1. """ This is a set of tools for standalone compiling of numpy expressions.
  2. It should not be imported by the module itself
  3. """
  4. import re
  5. from pypy.interpreter.baseobjspace import InternalSpaceCache, W_Root
  6. from pypy.interpreter.error import OperationError
  7. from pypy.module.micronumpy import interp_boxes
  8. from pypy.module.micronumpy.interp_dtype import get_dtype_cache
  9. from pypy.module.micronumpy.base import W_NDimArray
  10. from pypy.module.micronumpy.interp_numarray import array
  11. from pypy.module.micronumpy.interp_arrayops import where
  12. from pypy.module.micronumpy import interp_ufuncs
  13. from rpython.rlib.objectmodel import specialize, instantiate
  14. class BogusBytecode(Exception):
  15. pass
  16. class ArgumentMismatch(Exception):
  17. pass
  18. class ArgumentNotAnArray(Exception):
  19. pass
  20. class WrongFunctionName(Exception):
  21. pass
  22. class TokenizerError(Exception):
  23. pass
  24. class BadToken(Exception):
  25. pass
  26. SINGLE_ARG_FUNCTIONS = ["sum", "prod", "max", "min", "all", "any",
  27. "unegative", "flat", "tostring","count_nonzero",
  28. "argsort"]
  29. TWO_ARG_FUNCTIONS = ["dot", 'take']
  30. THREE_ARG_FUNCTIONS = ['where']
  31. class FakeSpace(object):
  32. w_ValueError = "ValueError"
  33. w_TypeError = "TypeError"
  34. w_IndexError = "IndexError"
  35. w_OverflowError = "OverflowError"
  36. w_NotImplementedError = "NotImplementedError"
  37. w_None = None
  38. w_bool = "bool"
  39. w_int = "int"
  40. w_float = "float"
  41. w_list = "list"
  42. w_long = "long"
  43. w_tuple = 'tuple'
  44. w_slice = "slice"
  45. w_str = "str"
  46. w_unicode = "unicode"
  47. w_complex = "complex"
  48. def __init__(self):
  49. """NOT_RPYTHON"""
  50. self.fromcache = InternalSpaceCache(self).getorbuild
  51. def _freeze_(self):
  52. return True
  53. def is_none(self, w_obj):
  54. return w_obj is None or w_obj is self.w_None
  55. def issequence_w(self, w_obj):
  56. return isinstance(w_obj, ListObject) or isinstance(w_obj, W_NDimArray)
  57. def isinstance_w(self, w_obj, w_tp):
  58. return w_obj.tp == w_tp
  59. def decode_index4(self, w_idx, size):
  60. if isinstance(w_idx, IntObject):
  61. return (self.int_w(w_idx), 0, 0, 1)
  62. else:
  63. assert isinstance(w_idx, SliceObject)
  64. start, stop, step = w_idx.start, w_idx.stop, w_idx.step
  65. if step == 0:
  66. return (0, size, 1, size)
  67. if start < 0:
  68. start += size
  69. if stop < 0:
  70. stop += size + 1
  71. if step < 0:
  72. lgt = (stop - start + 1) / step + 1
  73. else:
  74. lgt = (stop - start - 1) / step + 1
  75. return (start, stop, step, lgt)
  76. @specialize.argtype(1)
  77. def wrap(self, obj):
  78. if isinstance(obj, float):
  79. return FloatObject(obj)
  80. elif isinstance(obj, bool):
  81. return BoolObject(obj)
  82. elif isinstance(obj, int):
  83. return IntObject(obj)
  84. elif isinstance(obj, long):
  85. return LongObject(obj)
  86. elif isinstance(obj, W_Root):
  87. return obj
  88. elif isinstance(obj, str):
  89. return StringObject(obj)
  90. raise NotImplementedError
  91. def newlist(self, items):
  92. return ListObject(items)
  93. def newcomplex(self, r, i):
  94. return ComplexObject(r, i)
  95. def listview(self, obj):
  96. assert isinstance(obj, ListObject)
  97. return obj.items
  98. fixedview = listview
  99. def float(self, w_obj):
  100. if isinstance(w_obj, FloatObject):
  101. return w_obj
  102. assert isinstance(w_obj, interp_boxes.W_GenericBox)
  103. return self.float(w_obj.descr_float(self))
  104. def float_w(self, w_obj):
  105. assert isinstance(w_obj, FloatObject)
  106. return w_obj.floatval
  107. def int_w(self, w_obj):
  108. if isinstance(w_obj, IntObject):
  109. return w_obj.intval
  110. elif isinstance(w_obj, FloatObject):
  111. return int(w_obj.floatval)
  112. elif isinstance(w_obj, SliceObject):
  113. raise OperationError(self.w_TypeError, self.wrap("slice."))
  114. raise NotImplementedError
  115. def unpackcomplex(self, w_obj):
  116. if isinstance(w_obj, ComplexObject):
  117. return w_obj.r, w_obj.i
  118. raise NotImplementedError
  119. def index(self, w_obj):
  120. return self.wrap(self.int_w(w_obj))
  121. def str_w(self, w_obj):
  122. if isinstance(w_obj, StringObject):
  123. return w_obj.v
  124. raise NotImplementedError
  125. def int(self, w_obj):
  126. if isinstance(w_obj, IntObject):
  127. return w_obj
  128. assert isinstance(w_obj, interp_boxes.W_GenericBox)
  129. return self.int(w_obj.descr_int(self))
  130. def str(self, w_obj):
  131. if isinstance(w_obj, StringObject):
  132. return w_obj
  133. assert isinstance(w_obj, interp_boxes.W_GenericBox)
  134. return self.str(w_obj.descr_str(self))
  135. def is_true(self, w_obj):
  136. assert isinstance(w_obj, BoolObject)
  137. return False
  138. #return w_obj.boolval
  139. def is_w(self, w_obj, w_what):
  140. return w_obj is w_what
  141. def type(self, w_obj):
  142. return w_obj.tp
  143. def gettypefor(self, w_obj):
  144. return None
  145. def call_function(self, tp, w_dtype):
  146. return w_dtype
  147. @specialize.arg(1)
  148. def interp_w(self, tp, what):
  149. assert isinstance(what, tp)
  150. return what
  151. def allocate_instance(self, klass, w_subtype):
  152. return instantiate(klass)
  153. def newtuple(self, list_w):
  154. return ListObject(list_w)
  155. def newdict(self):
  156. return {}
  157. def setitem(self, dict, item, value):
  158. dict[item] = value
  159. def len_w(self, w_obj):
  160. if isinstance(w_obj, ListObject):
  161. return len(w_obj.items)
  162. # XXX array probably
  163. assert False
  164. def exception_match(self, w_exc_type, w_check_class):
  165. # Good enough for now
  166. raise NotImplementedError
  167. class FloatObject(W_Root):
  168. tp = FakeSpace.w_float
  169. def __init__(self, floatval):
  170. self.floatval = floatval
  171. class BoolObject(W_Root):
  172. tp = FakeSpace.w_bool
  173. def __init__(self, boolval):
  174. self.boolval = boolval
  175. class IntObject(W_Root):
  176. tp = FakeSpace.w_int
  177. def __init__(self, intval):
  178. self.intval = intval
  179. class LongObject(W_Root):
  180. tp = FakeSpace.w_long
  181. def __init__(self, intval):
  182. self.intval = intval
  183. class ListObject(W_Root):
  184. tp = FakeSpace.w_list
  185. def __init__(self, items):
  186. self.items = items
  187. class SliceObject(W_Root):
  188. tp = FakeSpace.w_slice
  189. def __init__(self, start, stop, step):
  190. self.start = start
  191. self.stop = stop
  192. self.step = step
  193. class StringObject(W_Root):
  194. tp = FakeSpace.w_str
  195. def __init__(self, v):
  196. self.v = v
  197. class ComplexObject(W_Root):
  198. tp = FakeSpace.w_complex
  199. def __init__(self, r, i):
  200. self.r = r
  201. self.i = i
  202. class InterpreterState(object):
  203. def __init__(self, code):
  204. self.code = code
  205. self.variables = {}
  206. self.results = []
  207. def run(self, space):
  208. self.space = space
  209. for stmt in self.code.statements:
  210. stmt.execute(self)
  211. class Node(object):
  212. def __eq__(self, other):
  213. return (self.__class__ == other.__class__ and
  214. self.__dict__ == other.__dict__)
  215. def __ne__(self, other):
  216. return not self == other
  217. def wrap(self, space):
  218. raise NotImplementedError
  219. def execute(self, interp):
  220. raise NotImplementedError
  221. class Assignment(Node):
  222. def __init__(self, name, expr):
  223. self.name = name
  224. self.expr = expr
  225. def execute(self, interp):
  226. interp.variables[self.name] = self.expr.execute(interp)
  227. def __repr__(self):
  228. return "%r = %r" % (self.name, self.expr)
  229. class ArrayAssignment(Node):
  230. def __init__(self, name, index, expr):
  231. self.name = name
  232. self.index = index
  233. self.expr = expr
  234. def execute(self, interp):
  235. arr = interp.variables[self.name]
  236. w_index = self.index.execute(interp)
  237. # cast to int
  238. if isinstance(w_index, FloatObject):
  239. w_index = IntObject(int(w_index.floatval))
  240. w_val = self.expr.execute(interp)
  241. assert isinstance(arr, W_NDimArray)
  242. arr.descr_setitem(interp.space, w_index, w_val)
  243. def __repr__(self):
  244. return "%s[%r] = %r" % (self.name, self.index, self.expr)
  245. class Variable(Node):
  246. def __init__(self, name):
  247. self.name = name.strip(" ")
  248. def execute(self, interp):
  249. return interp.variables[self.name]
  250. def __repr__(self):
  251. return 'v(%s)' % self.name
  252. class Operator(Node):
  253. def __init__(self, lhs, name, rhs):
  254. self.name = name
  255. self.lhs = lhs
  256. self.rhs = rhs
  257. def execute(self, interp):
  258. w_lhs = self.lhs.execute(interp)
  259. if isinstance(self.rhs, SliceConstant):
  260. w_rhs = self.rhs.wrap(interp.space)
  261. else:
  262. w_rhs = self.rhs.execute(interp)
  263. if not isinstance(w_lhs, W_NDimArray):
  264. # scalar
  265. dtype = get_dtype_cache(interp.space).w_float64dtype
  266. w_lhs = W_NDimArray.new_scalar(interp.space, dtype, w_lhs)
  267. assert isinstance(w_lhs, W_NDimArray)
  268. if self.name == '+':
  269. w_res = w_lhs.descr_add(interp.space, w_rhs)
  270. elif self.name == '*':
  271. w_res = w_lhs.descr_mul(interp.space, w_rhs)
  272. elif self.name == '-':
  273. w_res = w_lhs.descr_sub(interp.space, w_rhs)
  274. elif self.name == '->':
  275. if isinstance(w_rhs, FloatObject):
  276. w_rhs = IntObject(int(w_rhs.floatval))
  277. assert isinstance(w_lhs, W_NDimArray)
  278. w_res = w_lhs.descr_getitem(interp.space, w_rhs)
  279. else:
  280. raise NotImplementedError
  281. if (not isinstance(w_res, W_NDimArray) and
  282. not isinstance(w_res, interp_boxes.W_GenericBox)):
  283. dtype = get_dtype_cache(interp.space).w_float64dtype
  284. w_res = W_NDimArray.new_scalar(interp.space, dtype, w_res)
  285. return w_res
  286. def __repr__(self):
  287. return '(%r %s %r)' % (self.lhs, self.name, self.rhs)
  288. class FloatConstant(Node):
  289. def __init__(self, v):
  290. self.v = float(v)
  291. def __repr__(self):
  292. return "Const(%s)" % self.v
  293. def wrap(self, space):
  294. return space.wrap(self.v)
  295. def execute(self, interp):
  296. return interp.space.wrap(self.v)
  297. class ComplexConstant(Node):
  298. def __init__(self, r, i):
  299. self.r = float(r)
  300. self.i = float(i)
  301. def __repr__(self):
  302. return 'ComplexConst(%s, %s)' % (self.r, self.i)
  303. def wrap(self, space):
  304. return space.newcomplex(self.r, self.i)
  305. def execute(self, interp):
  306. return self.wrap(interp.space)
  307. class RangeConstant(Node):
  308. def __init__(self, v):
  309. self.v = int(v)
  310. def execute(self, interp):
  311. w_list = interp.space.newlist(
  312. [interp.space.wrap(float(i)) for i in range(self.v)]
  313. )
  314. dtype = get_dtype_cache(interp.space).w_float64dtype
  315. return array(interp.space, w_list, w_dtype=dtype, w_order=None)
  316. def __repr__(self):
  317. return 'Range(%s)' % self.v
  318. class Code(Node):
  319. def __init__(self, statements):
  320. self.statements = statements
  321. def __repr__(self):
  322. return "\n".join([repr(i) for i in self.statements])
  323. class ArrayConstant(Node):
  324. def __init__(self, items):
  325. self.items = items
  326. def wrap(self, space):
  327. return space.newlist([item.wrap(space) for item in self.items])
  328. def execute(self, interp):
  329. w_list = self.wrap(interp.space)
  330. return array(interp.space, w_list)
  331. def __repr__(self):
  332. return "[" + ", ".join([repr(item) for item in self.items]) + "]"
  333. class SliceConstant(Node):
  334. def __init__(self, start, stop, step):
  335. # no negative support for now
  336. self.start = start
  337. self.stop = stop
  338. self.step = step
  339. def wrap(self, space):
  340. return SliceObject(self.start, self.stop, self.step)
  341. def execute(self, interp):
  342. return SliceObject(self.start, self.stop, self.step)
  343. def __repr__(self):
  344. return 'slice(%s,%s,%s)' % (self.start, self.stop, self.step)
  345. class Execute(Node):
  346. def __init__(self, expr):
  347. self.expr = expr
  348. def __repr__(self):
  349. return repr(self.expr)
  350. def execute(self, interp):
  351. interp.results.append(self.expr.execute(interp))
  352. class FunctionCall(Node):
  353. def __init__(self, name, args):
  354. self.name = name.strip(" ")
  355. self.args = args
  356. def __repr__(self):
  357. return "%s(%s)" % (self.name, ", ".join([repr(arg)
  358. for arg in self.args]))
  359. def execute(self, interp):
  360. arr = self.args[0].execute(interp)
  361. if not isinstance(arr, W_NDimArray):
  362. raise ArgumentNotAnArray
  363. if self.name in SINGLE_ARG_FUNCTIONS:
  364. if len(self.args) != 1 and self.name != 'sum':
  365. raise ArgumentMismatch
  366. if self.name == "sum":
  367. if len(self.args)>1:
  368. w_res = arr.descr_sum(interp.space,
  369. self.args[1].execute(interp))
  370. else:
  371. w_res = arr.descr_sum(interp.space)
  372. elif self.name == "prod":
  373. w_res = arr.descr_prod(interp.space)
  374. elif self.name == "max":
  375. w_res = arr.descr_max(interp.space)
  376. elif self.name == "min":
  377. w_res = arr.descr_min(interp.space)
  378. elif self.name == "any":
  379. w_res = arr.descr_any(interp.space)
  380. elif self.name == "all":
  381. w_res = arr.descr_all(interp.space)
  382. elif self.name == "unegative":
  383. neg = interp_ufuncs.get(interp.space).negative
  384. w_res = neg.call(interp.space, [arr])
  385. elif self.name == "cos":
  386. cos = interp_ufuncs.get(interp.space).cos
  387. w_res = cos.call(interp.space, [arr])
  388. elif self.name == "flat":
  389. w_res = arr.descr_get_flatiter(interp.space)
  390. elif self.name == "argsort":
  391. w_res = arr.descr_argsort(interp.space)
  392. elif self.name == "tostring":
  393. arr.descr_tostring(interp.space)
  394. w_res = None
  395. else:
  396. assert False # unreachable code
  397. elif self.name in TWO_ARG_FUNCTIONS:
  398. if len(self.args) != 2:
  399. raise ArgumentMismatch
  400. arg = self.args[1].execute(interp)
  401. if not isinstance(arg, W_NDimArray):
  402. raise ArgumentNotAnArray
  403. if self.name == "dot":
  404. w_res = arr.descr_dot(interp.space, arg)
  405. elif self.name == 'take':
  406. w_res = arr.descr_take(interp.space, arg)
  407. else:
  408. assert False # unreachable code
  409. elif self.name in THREE_ARG_FUNCTIONS:
  410. if len(self.args) != 3:
  411. raise ArgumentMismatch
  412. arg1 = self.args[1].execute(interp)
  413. arg2 = self.args[2].execute(interp)
  414. if not isinstance(arg1, W_NDimArray):
  415. raise ArgumentNotAnArray
  416. if not isinstance(arg2, W_NDimArray):
  417. raise ArgumentNotAnArray
  418. if self.name == "where":
  419. w_res = where(interp.space, arr, arg1, arg2)
  420. else:
  421. assert False
  422. else:
  423. raise WrongFunctionName
  424. if isinstance(w_res, W_NDimArray):
  425. return w_res
  426. if isinstance(w_res, FloatObject):
  427. dtype = get_dtype_cache(interp.space).w_float64dtype
  428. elif isinstance(w_res, IntObject):
  429. dtype = get_dtype_cache(interp.space).w_int64dtype
  430. elif isinstance(w_res, BoolObject):
  431. dtype = get_dtype_cache(interp.space).w_booldtype
  432. elif isinstance(w_res, interp_boxes.W_GenericBox):
  433. dtype = w_res.get_dtype(interp.space)
  434. else:
  435. dtype = None
  436. return W_NDimArray.new_scalar(interp.space, dtype, w_res)
  437. _REGEXES = [
  438. ('-?[\d\.]+', 'number'),
  439. ('\[', 'array_left'),
  440. (':', 'colon'),
  441. ('\w+', 'identifier'),
  442. ('\]', 'array_right'),
  443. ('(->)|[\+\-\*\/]', 'operator'),
  444. ('=', 'assign'),
  445. (',', 'comma'),
  446. ('\|', 'pipe'),
  447. ('\(', 'paren_left'),
  448. ('\)', 'paren_right'),
  449. ]
  450. REGEXES = []
  451. for r, name in _REGEXES:
  452. REGEXES.append((re.compile(r' *(' + r + ')'), name))
  453. del _REGEXES
  454. class Token(object):
  455. def __init__(self, name, v):
  456. self.name = name
  457. self.v = v
  458. def __repr__(self):
  459. return '(%s, %s)' % (self.name, self.v)
  460. empty = Token('', '')
  461. class TokenStack(object):
  462. def __init__(self, tokens):
  463. self.tokens = tokens
  464. self.c = 0
  465. def pop(self):
  466. token = self.tokens[self.c]
  467. self.c += 1
  468. return token
  469. def get(self, i):
  470. if self.c + i >= len(self.tokens):
  471. return empty
  472. return self.tokens[self.c + i]
  473. def remaining(self):
  474. return len(self.tokens) - self.c
  475. def push(self):
  476. self.c -= 1
  477. def __repr__(self):
  478. return repr(self.tokens[self.c:])
  479. class Parser(object):
  480. def tokenize(self, line):
  481. tokens = []
  482. while True:
  483. for r, name in REGEXES:
  484. m = r.match(line)
  485. if m is not None:
  486. g = m.group(0)
  487. tokens.append(Token(name, g))
  488. line = line[len(g):]
  489. if not line:
  490. return TokenStack(tokens)
  491. break
  492. else:
  493. raise TokenizerError(line)
  494. def parse_number_or_slice(self, tokens):
  495. start_tok = tokens.pop()
  496. if start_tok.name == 'colon':
  497. start = 0
  498. else:
  499. if tokens.get(0).name != 'colon':
  500. return FloatConstant(start_tok.v)
  501. start = int(start_tok.v)
  502. tokens.pop()
  503. if not tokens.get(0).name in ['colon', 'number']:
  504. stop = -1
  505. step = 1
  506. else:
  507. next = tokens.pop()
  508. if next.name == 'colon':
  509. stop = -1
  510. step = int(tokens.pop().v)
  511. else:
  512. stop = int(next.v)
  513. if tokens.get(0).name == 'colon':
  514. tokens.pop()
  515. step = int(tokens.pop().v)
  516. else:
  517. step = 1
  518. return SliceConstant(start, stop, step)
  519. def parse_expression(self, tokens, accept_comma=False):
  520. stack = []
  521. while tokens.remaining():
  522. token = tokens.pop()
  523. if token.name == 'identifier':
  524. if tokens.remaining() and tokens.get(0).name == 'paren_left':
  525. stack.append(self.parse_function_call(token.v, tokens))
  526. else:
  527. stack.append(Variable(token.v))
  528. elif token.name == 'array_left':
  529. stack.append(ArrayConstant(self.parse_array_const(tokens)))
  530. elif token.name == 'operator':
  531. stack.append(Variable(token.v))
  532. elif token.name == 'number' or token.name == 'colon':
  533. tokens.push()
  534. stack.append(self.parse_number_or_slice(tokens))
  535. elif token.name == 'pipe':
  536. stack.append(RangeConstant(tokens.pop().v))
  537. end = tokens.pop()
  538. assert end.name == 'pipe'
  539. elif token.name == 'paren_left':
  540. stack.append(self.parse_complex_constant(tokens))
  541. elif accept_comma and token.name == 'comma':
  542. continue
  543. else:
  544. tokens.push()
  545. break
  546. if accept_comma:
  547. return stack
  548. stack.reverse()
  549. lhs = stack.pop()
  550. while stack:
  551. op = stack.pop()
  552. assert isinstance(op, Variable)
  553. rhs = stack.pop()
  554. lhs = Operator(lhs, op.name, rhs)
  555. return lhs
  556. def parse_function_call(self, name, tokens):
  557. args = []
  558. tokens.pop() # lparen
  559. while tokens.get(0).name != 'paren_right':
  560. args += self.parse_expression(tokens, accept_comma=True)
  561. return FunctionCall(name, args)
  562. def parse_complex_constant(self, tokens):
  563. r = tokens.pop()
  564. assert r.name == 'number'
  565. assert tokens.pop().name == 'comma'
  566. i = tokens.pop()
  567. assert i.name == 'number'
  568. assert tokens.pop().name == 'paren_right'
  569. return ComplexConstant(r.v, i.v)
  570. def parse_array_const(self, tokens):
  571. elems = []
  572. while True:
  573. token = tokens.pop()
  574. if token.name == 'number':
  575. elems.append(FloatConstant(token.v))
  576. elif token.name == 'array_left':
  577. elems.append(ArrayConstant(self.parse_array_const(tokens)))
  578. elif token.name == 'paren_left':
  579. elems.append(self.parse_complex_constant(tokens))
  580. else:
  581. raise BadToken()
  582. token = tokens.pop()
  583. if token.name == 'array_right':
  584. return elems
  585. assert token.name == 'comma'
  586. def parse_statement(self, tokens):
  587. if (tokens.get(0).name == 'identifier' and
  588. tokens.get(1).name == 'assign'):
  589. lhs = tokens.pop().v
  590. tokens.pop()
  591. rhs = self.parse_expression(tokens)
  592. return Assignment(lhs, rhs)
  593. elif (tokens.get(0).name == 'identifier' and
  594. tokens.get(1).name == 'array_left'):
  595. name = tokens.pop().v
  596. tokens.pop()
  597. index = self.parse_expression(tokens)
  598. tokens.pop()
  599. tokens.pop()
  600. return ArrayAssignment(name, index, self.parse_expression(tokens))
  601. return Execute(self.parse_expression(tokens))
  602. def parse(self, code):
  603. statements = []
  604. for line in code.split("\n"):
  605. if '#' in line:
  606. line = line.split('#', 1)[0]
  607. line = line.strip(" ")
  608. if line:
  609. tokens = self.tokenize(line)
  610. statements.append(self.parse_statement(tokens))
  611. return Code(statements)
  612. def numpy_compile(code):
  613. parser = Parser()
  614. return InterpreterState(parser.parse(code))