PageRenderTime 47ms CodeModel.GetById 20ms RepoModel.GetById 1ms app.codeStats 0ms

/pypy/module/micronumpy/compile.py

https://bitbucket.org/uiappstore/pypy
Python | 654 lines | 553 code | 92 blank | 9 comment | 68 complexity | d4778683e2b747dccc2b513160be15de MD5 | raw file
  1. """ This is a set of tools for standalone compiling of numpy expressions.
  2. It should not be imported by the module itself
  3. """
  4. import re
  5. from pypy.interpreter.baseobjspace import InternalSpaceCache, W_Root
  6. from pypy.module.micronumpy import interp_boxes
  7. from pypy.module.micronumpy.interp_dtype import get_dtype_cache
  8. from pypy.module.micronumpy.interp_numarray import (Scalar, BaseArray,
  9. scalar_w, W_NDimArray, array)
  10. from pypy.module.micronumpy import interp_ufuncs
  11. from pypy.rlib.objectmodel import specialize, instantiate
  12. class BogusBytecode(Exception):
  13. pass
  14. class ArgumentMismatch(Exception):
  15. pass
  16. class ArgumentNotAnArray(Exception):
  17. pass
  18. class WrongFunctionName(Exception):
  19. pass
  20. class TokenizerError(Exception):
  21. pass
  22. class BadToken(Exception):
  23. pass
  24. SINGLE_ARG_FUNCTIONS = ["sum", "prod", "max", "min", "all", "any",
  25. "unegative", "flat", "tostring"]
  26. TWO_ARG_FUNCTIONS = ["dot", 'take']
  27. class FakeSpace(object):
  28. w_ValueError = None
  29. w_TypeError = None
  30. w_IndexError = None
  31. w_OverflowError = None
  32. w_NotImplementedError = None
  33. w_None = None
  34. w_bool = "bool"
  35. w_int = "int"
  36. w_float = "float"
  37. w_list = "list"
  38. w_long = "long"
  39. w_tuple = 'tuple'
  40. w_slice = "slice"
  41. w_str = "str"
  42. w_unicode = "unicode"
  43. def __init__(self):
  44. """NOT_RPYTHON"""
  45. self.fromcache = InternalSpaceCache(self).getorbuild
  46. def _freeze_(self):
  47. return True
  48. def issequence_w(self, w_obj):
  49. return isinstance(w_obj, ListObject) or isinstance(w_obj, W_NDimArray)
  50. def isinstance_w(self, w_obj, w_tp):
  51. return w_obj.tp == w_tp
  52. def decode_index4(self, w_idx, size):
  53. if isinstance(w_idx, IntObject):
  54. return (self.int_w(w_idx), 0, 0, 1)
  55. else:
  56. assert isinstance(w_idx, SliceObject)
  57. start, stop, step = w_idx.start, w_idx.stop, w_idx.step
  58. if step == 0:
  59. return (0, size, 1, size)
  60. if start < 0:
  61. start += size
  62. if stop < 0:
  63. stop += size + 1
  64. if step < 0:
  65. lgt = (stop - start + 1) / step + 1
  66. else:
  67. lgt = (stop - start - 1) / step + 1
  68. return (start, stop, step, lgt)
  69. @specialize.argtype(1)
  70. def wrap(self, obj):
  71. if isinstance(obj, float):
  72. return FloatObject(obj)
  73. elif isinstance(obj, bool):
  74. return BoolObject(obj)
  75. elif isinstance(obj, int):
  76. return IntObject(obj)
  77. elif isinstance(obj, long):
  78. return LongObject(obj)
  79. elif isinstance(obj, W_Root):
  80. return obj
  81. elif isinstance(obj, str):
  82. return StringObject(obj)
  83. raise NotImplementedError
  84. def newlist(self, items):
  85. return ListObject(items)
  86. def listview(self, obj):
  87. assert isinstance(obj, ListObject)
  88. return obj.items
  89. fixedview = listview
  90. def float(self, w_obj):
  91. if isinstance(w_obj, FloatObject):
  92. return w_obj
  93. assert isinstance(w_obj, interp_boxes.W_GenericBox)
  94. return self.float(w_obj.descr_float(self))
  95. def float_w(self, w_obj):
  96. assert isinstance(w_obj, FloatObject)
  97. return w_obj.floatval
  98. def int_w(self, w_obj):
  99. if isinstance(w_obj, IntObject):
  100. return w_obj.intval
  101. elif isinstance(w_obj, FloatObject):
  102. return int(w_obj.floatval)
  103. raise NotImplementedError
  104. def str_w(self, w_obj):
  105. if isinstance(w_obj, StringObject):
  106. return w_obj.v
  107. raise NotImplementedError
  108. def int(self, w_obj):
  109. if isinstance(w_obj, IntObject):
  110. return w_obj
  111. assert isinstance(w_obj, interp_boxes.W_GenericBox)
  112. return self.int(w_obj.descr_int(self))
  113. def is_true(self, w_obj):
  114. assert isinstance(w_obj, BoolObject)
  115. return w_obj.boolval
  116. def is_w(self, w_obj, w_what):
  117. return w_obj is w_what
  118. def type(self, w_obj):
  119. return w_obj.tp
  120. def gettypefor(self, w_obj):
  121. return None
  122. def call_function(self, tp, w_dtype):
  123. return w_dtype
  124. @specialize.arg(1)
  125. def interp_w(self, tp, what):
  126. assert isinstance(what, tp)
  127. return what
  128. def allocate_instance(self, klass, w_subtype):
  129. return instantiate(klass)
  130. def newtuple(self, list_w):
  131. return ListObject(list_w)
  132. def newdict(self):
  133. return {}
  134. def setitem(self, dict, item, value):
  135. dict[item] = value
  136. def len_w(self, w_obj):
  137. if isinstance(w_obj, ListObject):
  138. return len(w_obj.items)
  139. # XXX array probably
  140. assert False
  141. def exception_match(self, w_exc_type, w_check_class):
  142. # Good enough for now
  143. raise NotImplementedError
  144. class FloatObject(W_Root):
  145. tp = FakeSpace.w_float
  146. def __init__(self, floatval):
  147. self.floatval = floatval
  148. class BoolObject(W_Root):
  149. tp = FakeSpace.w_bool
  150. def __init__(self, boolval):
  151. self.boolval = boolval
  152. class IntObject(W_Root):
  153. tp = FakeSpace.w_int
  154. def __init__(self, intval):
  155. self.intval = intval
  156. class LongObject(W_Root):
  157. tp = FakeSpace.w_long
  158. def __init__(self, intval):
  159. self.intval = intval
  160. class ListObject(W_Root):
  161. tp = FakeSpace.w_list
  162. def __init__(self, items):
  163. self.items = items
  164. class SliceObject(W_Root):
  165. tp = FakeSpace.w_slice
  166. def __init__(self, start, stop, step):
  167. self.start = start
  168. self.stop = stop
  169. self.step = step
  170. class StringObject(W_Root):
  171. tp = FakeSpace.w_str
  172. def __init__(self, v):
  173. self.v = v
  174. class InterpreterState(object):
  175. def __init__(self, code):
  176. self.code = code
  177. self.variables = {}
  178. self.results = []
  179. def run(self, space):
  180. self.space = space
  181. for stmt in self.code.statements:
  182. stmt.execute(self)
  183. class Node(object):
  184. def __eq__(self, other):
  185. return (self.__class__ == other.__class__ and
  186. self.__dict__ == other.__dict__)
  187. def __ne__(self, other):
  188. return not self == other
  189. def wrap(self, space):
  190. raise NotImplementedError
  191. def execute(self, interp):
  192. raise NotImplementedError
  193. class Assignment(Node):
  194. def __init__(self, name, expr):
  195. self.name = name
  196. self.expr = expr
  197. def execute(self, interp):
  198. interp.variables[self.name] = self.expr.execute(interp)
  199. def __repr__(self):
  200. return "%r = %r" % (self.name, self.expr)
  201. class ArrayAssignment(Node):
  202. def __init__(self, name, index, expr):
  203. self.name = name
  204. self.index = index
  205. self.expr = expr
  206. def execute(self, interp):
  207. arr = interp.variables[self.name]
  208. w_index = self.index.execute(interp)
  209. # cast to int
  210. if isinstance(w_index, FloatObject):
  211. w_index = IntObject(int(w_index.floatval))
  212. w_val = self.expr.execute(interp)
  213. assert isinstance(arr, BaseArray)
  214. arr.descr_setitem(interp.space, w_index, w_val)
  215. def __repr__(self):
  216. return "%s[%r] = %r" % (self.name, self.index, self.expr)
  217. class Variable(Node):
  218. def __init__(self, name):
  219. self.name = name.strip(" ")
  220. def execute(self, interp):
  221. return interp.variables[self.name]
  222. def __repr__(self):
  223. return 'v(%s)' % self.name
  224. class Operator(Node):
  225. def __init__(self, lhs, name, rhs):
  226. self.name = name
  227. self.lhs = lhs
  228. self.rhs = rhs
  229. def execute(self, interp):
  230. w_lhs = self.lhs.execute(interp)
  231. if isinstance(self.rhs, SliceConstant):
  232. w_rhs = self.rhs.wrap(interp.space)
  233. else:
  234. w_rhs = self.rhs.execute(interp)
  235. if not isinstance(w_lhs, BaseArray):
  236. # scalar
  237. dtype = get_dtype_cache(interp.space).w_float64dtype
  238. w_lhs = scalar_w(interp.space, dtype, w_lhs)
  239. assert isinstance(w_lhs, BaseArray)
  240. if self.name == '+':
  241. w_res = w_lhs.descr_add(interp.space, w_rhs)
  242. elif self.name == '*':
  243. w_res = w_lhs.descr_mul(interp.space, w_rhs)
  244. elif self.name == '-':
  245. w_res = w_lhs.descr_sub(interp.space, w_rhs)
  246. elif self.name == '->':
  247. assert not isinstance(w_rhs, Scalar)
  248. if isinstance(w_rhs, FloatObject):
  249. w_rhs = IntObject(int(w_rhs.floatval))
  250. assert isinstance(w_lhs, BaseArray)
  251. w_res = w_lhs.descr_getitem(interp.space, w_rhs)
  252. else:
  253. raise NotImplementedError
  254. if (not isinstance(w_res, BaseArray) and
  255. not isinstance(w_res, interp_boxes.W_GenericBox)):
  256. dtype = get_dtype_cache(interp.space).w_float64dtype
  257. w_res = scalar_w(interp.space, dtype, w_res)
  258. return w_res
  259. def __repr__(self):
  260. return '(%r %s %r)' % (self.lhs, self.name, self.rhs)
  261. class FloatConstant(Node):
  262. def __init__(self, v):
  263. self.v = float(v)
  264. def __repr__(self):
  265. return "Const(%s)" % self.v
  266. def wrap(self, space):
  267. return space.wrap(self.v)
  268. def execute(self, interp):
  269. return interp.space.wrap(self.v)
  270. class RangeConstant(Node):
  271. def __init__(self, v):
  272. self.v = int(v)
  273. def execute(self, interp):
  274. w_list = interp.space.newlist(
  275. [interp.space.wrap(float(i)) for i in range(self.v)]
  276. )
  277. dtype = get_dtype_cache(interp.space).w_float64dtype
  278. return array(interp.space, w_list, w_dtype=dtype, w_order=None)
  279. def __repr__(self):
  280. return 'Range(%s)' % self.v
  281. class Code(Node):
  282. def __init__(self, statements):
  283. self.statements = statements
  284. def __repr__(self):
  285. return "\n".join([repr(i) for i in self.statements])
  286. class ArrayConstant(Node):
  287. def __init__(self, items):
  288. self.items = items
  289. def wrap(self, space):
  290. return space.newlist([item.wrap(space) for item in self.items])
  291. def execute(self, interp):
  292. w_list = self.wrap(interp.space)
  293. dtype = get_dtype_cache(interp.space).w_float64dtype
  294. return array(interp.space, w_list, w_dtype=dtype, w_order=None)
  295. def __repr__(self):
  296. return "[" + ", ".join([repr(item) for item in self.items]) + "]"
  297. class SliceConstant(Node):
  298. def __init__(self, start, stop, step):
  299. # no negative support for now
  300. self.start = start
  301. self.stop = stop
  302. self.step = step
  303. def wrap(self, space):
  304. return SliceObject(self.start, self.stop, self.step)
  305. def execute(self, interp):
  306. return SliceObject(self.start, self.stop, self.step)
  307. def __repr__(self):
  308. return 'slice(%s,%s,%s)' % (self.start, self.stop, self.step)
  309. class Execute(Node):
  310. def __init__(self, expr):
  311. self.expr = expr
  312. def __repr__(self):
  313. return repr(self.expr)
  314. def execute(self, interp):
  315. interp.results.append(self.expr.execute(interp))
  316. class FunctionCall(Node):
  317. def __init__(self, name, args):
  318. self.name = name.strip(" ")
  319. self.args = args
  320. def __repr__(self):
  321. return "%s(%s)" % (self.name, ", ".join([repr(arg)
  322. for arg in self.args]))
  323. def execute(self, interp):
  324. arr = self.args[0].execute(interp)
  325. if not isinstance(arr, BaseArray):
  326. raise ArgumentNotAnArray
  327. if self.name in SINGLE_ARG_FUNCTIONS:
  328. if len(self.args) != 1 and self.name != 'sum':
  329. raise ArgumentMismatch
  330. if self.name == "sum":
  331. if len(self.args)>1:
  332. w_res = arr.descr_sum(interp.space,
  333. self.args[1].execute(interp))
  334. else:
  335. w_res = arr.descr_sum(interp.space)
  336. elif self.name == "prod":
  337. w_res = arr.descr_prod(interp.space)
  338. elif self.name == "max":
  339. w_res = arr.descr_max(interp.space)
  340. elif self.name == "min":
  341. w_res = arr.descr_min(interp.space)
  342. elif self.name == "any":
  343. w_res = arr.descr_any(interp.space)
  344. elif self.name == "all":
  345. w_res = arr.descr_all(interp.space)
  346. elif self.name == "unegative":
  347. neg = interp_ufuncs.get(interp.space).negative
  348. w_res = neg.call(interp.space, [arr])
  349. elif self.name == "flat":
  350. w_res = arr.descr_get_flatiter(interp.space)
  351. elif self.name == "tostring":
  352. arr.descr_tostring(interp.space)
  353. w_res = None
  354. else:
  355. assert False # unreachable code
  356. elif self.name in TWO_ARG_FUNCTIONS:
  357. if len(self.args) != 2:
  358. raise ArgumentMismatch
  359. arg = self.args[1].execute(interp)
  360. if not isinstance(arg, BaseArray):
  361. raise ArgumentNotAnArray
  362. if not isinstance(arg, BaseArray):
  363. raise ArgumentNotAnArray
  364. if self.name == "dot":
  365. w_res = arr.descr_dot(interp.space, arg)
  366. elif self.name == 'take':
  367. w_res = arr.descr_take(interp.space, arg)
  368. else:
  369. assert False # unreachable code
  370. else:
  371. raise WrongFunctionName
  372. if isinstance(w_res, BaseArray):
  373. return w_res
  374. if isinstance(w_res, FloatObject):
  375. dtype = get_dtype_cache(interp.space).w_float64dtype
  376. elif isinstance(w_res, BoolObject):
  377. dtype = get_dtype_cache(interp.space).w_booldtype
  378. elif isinstance(w_res, interp_boxes.W_GenericBox):
  379. dtype = w_res.get_dtype(interp.space)
  380. else:
  381. dtype = None
  382. return scalar_w(interp.space, dtype, w_res)
  383. _REGEXES = [
  384. ('-?[\d\.]+', 'number'),
  385. ('\[', 'array_left'),
  386. (':', 'colon'),
  387. ('\w+', 'identifier'),
  388. ('\]', 'array_right'),
  389. ('(->)|[\+\-\*\/]', 'operator'),
  390. ('=', 'assign'),
  391. (',', 'comma'),
  392. ('\|', 'pipe'),
  393. ('\(', 'paren_left'),
  394. ('\)', 'paren_right'),
  395. ]
  396. REGEXES = []
  397. for r, name in _REGEXES:
  398. REGEXES.append((re.compile(r' *(' + r + ')'), name))
  399. del _REGEXES
  400. class Token(object):
  401. def __init__(self, name, v):
  402. self.name = name
  403. self.v = v
  404. def __repr__(self):
  405. return '(%s, %s)' % (self.name, self.v)
  406. empty = Token('', '')
  407. class TokenStack(object):
  408. def __init__(self, tokens):
  409. self.tokens = tokens
  410. self.c = 0
  411. def pop(self):
  412. token = self.tokens[self.c]
  413. self.c += 1
  414. return token
  415. def get(self, i):
  416. if self.c + i >= len(self.tokens):
  417. return empty
  418. return self.tokens[self.c + i]
  419. def remaining(self):
  420. return len(self.tokens) - self.c
  421. def push(self):
  422. self.c -= 1
  423. def __repr__(self):
  424. return repr(self.tokens[self.c:])
  425. class Parser(object):
  426. def tokenize(self, line):
  427. tokens = []
  428. while True:
  429. for r, name in REGEXES:
  430. m = r.match(line)
  431. if m is not None:
  432. g = m.group(0)
  433. tokens.append(Token(name, g))
  434. line = line[len(g):]
  435. if not line:
  436. return TokenStack(tokens)
  437. break
  438. else:
  439. raise TokenizerError(line)
  440. def parse_number_or_slice(self, tokens):
  441. start_tok = tokens.pop()
  442. if start_tok.name == 'colon':
  443. start = 0
  444. else:
  445. if tokens.get(0).name != 'colon':
  446. return FloatConstant(start_tok.v)
  447. start = int(start_tok.v)
  448. tokens.pop()
  449. if not tokens.get(0).name in ['colon', 'number']:
  450. stop = -1
  451. step = 1
  452. else:
  453. next = tokens.pop()
  454. if next.name == 'colon':
  455. stop = -1
  456. step = int(tokens.pop().v)
  457. else:
  458. stop = int(next.v)
  459. if tokens.get(0).name == 'colon':
  460. tokens.pop()
  461. step = int(tokens.pop().v)
  462. else:
  463. step = 1
  464. return SliceConstant(start, stop, step)
  465. def parse_expression(self, tokens, accept_comma=False):
  466. stack = []
  467. while tokens.remaining():
  468. token = tokens.pop()
  469. if token.name == 'identifier':
  470. if tokens.remaining() and tokens.get(0).name == 'paren_left':
  471. stack.append(self.parse_function_call(token.v, tokens))
  472. else:
  473. stack.append(Variable(token.v))
  474. elif token.name == 'array_left':
  475. stack.append(ArrayConstant(self.parse_array_const(tokens)))
  476. elif token.name == 'operator':
  477. stack.append(Variable(token.v))
  478. elif token.name == 'number' or token.name == 'colon':
  479. tokens.push()
  480. stack.append(self.parse_number_or_slice(tokens))
  481. elif token.name == 'pipe':
  482. stack.append(RangeConstant(tokens.pop().v))
  483. end = tokens.pop()
  484. assert end.name == 'pipe'
  485. elif accept_comma and token.name == 'comma':
  486. continue
  487. else:
  488. tokens.push()
  489. break
  490. if accept_comma:
  491. return stack
  492. stack.reverse()
  493. lhs = stack.pop()
  494. while stack:
  495. op = stack.pop()
  496. assert isinstance(op, Variable)
  497. rhs = stack.pop()
  498. lhs = Operator(lhs, op.name, rhs)
  499. return lhs
  500. def parse_function_call(self, name, tokens):
  501. args = []
  502. tokens.pop() # lparen
  503. while tokens.get(0).name != 'paren_right':
  504. args += self.parse_expression(tokens, accept_comma=True)
  505. return FunctionCall(name, args)
  506. def parse_array_const(self, tokens):
  507. elems = []
  508. while True:
  509. token = tokens.pop()
  510. if token.name == 'number':
  511. elems.append(FloatConstant(token.v))
  512. elif token.name == 'array_left':
  513. elems.append(ArrayConstant(self.parse_array_const(tokens)))
  514. else:
  515. raise BadToken()
  516. token = tokens.pop()
  517. if token.name == 'array_right':
  518. return elems
  519. assert token.name == 'comma'
  520. def parse_statement(self, tokens):
  521. if (tokens.get(0).name == 'identifier' and
  522. tokens.get(1).name == 'assign'):
  523. lhs = tokens.pop().v
  524. tokens.pop()
  525. rhs = self.parse_expression(tokens)
  526. return Assignment(lhs, rhs)
  527. elif (tokens.get(0).name == 'identifier' and
  528. tokens.get(1).name == 'array_left'):
  529. name = tokens.pop().v
  530. tokens.pop()
  531. index = self.parse_expression(tokens)
  532. tokens.pop()
  533. tokens.pop()
  534. return ArrayAssignment(name, index, self.parse_expression(tokens))
  535. return Execute(self.parse_expression(tokens))
  536. def parse(self, code):
  537. statements = []
  538. for line in code.split("\n"):
  539. if '#' in line:
  540. line = line.split('#', 1)[0]
  541. line = line.strip(" ")
  542. if line:
  543. tokens = self.tokenize(line)
  544. statements.append(self.parse_statement(tokens))
  545. return Code(statements)
  546. def numpy_compile(code):
  547. parser = Parser()
  548. return InterpreterState(parser.parse(code))