PageRenderTime 62ms CodeModel.GetById 32ms RepoModel.GetById 0ms app.codeStats 0ms

/rpython/rlib/parsing/test/test_pypackrat.py

https://bitbucket.org/jonathanslenders/pypy
Python | 551 lines | 514 code | 8 blank | 29 comment | 0 complexity | 7c5f6af5daace1fa7a114d2012a10f03 MD5 | raw file
  1. import py
  2. from rpython.rlib.parsing import regex
  3. from rpython.rlib.parsing.pypackrat import *
  4. import operator
  5. class TestPackrat(object):
  6. def test_simple(self):
  7. class parser(PackratParser):
  8. """
  9. a: 'a'*;
  10. b: 'a'+;
  11. c: ('a' | 'b')+;
  12. """
  13. print parser._code
  14. p = parser("ababababa")
  15. assert p.c() == list("ababababa")
  16. p = parser("aaaaaaaa")
  17. assert p.a() == list("aaaaaaaa")
  18. p = parser("")
  19. assert p.a() == []
  20. p = parser("")
  21. py.test.raises(BacktrackException, p.b)
  22. def test_questionmark(self):
  23. class parser(PackratParser):
  24. """
  25. a: 'a'? 'b';
  26. """
  27. print parser._code
  28. p = parser("ab")
  29. assert p.a() == 'b'
  30. p = parser("b")
  31. assert p.a() == 'b'
  32. def test_call(self):
  33. class parser(PackratParser):
  34. """
  35. a: 'a'? 'b';
  36. b: a 'c';
  37. """
  38. print parser._code
  39. p = parser("abc")
  40. res = p.b()
  41. assert res == 'c'
  42. p = parser("bc")
  43. res = p.b()
  44. assert res == 'c'
  45. def test_memoize(self):
  46. class parser(PackratParser):
  47. """
  48. x: a 'end';
  49. a: b c | b;
  50. b: 'b';
  51. c: 'c';
  52. """
  53. print parser._code
  54. p = parser("bend")
  55. res = p.x()
  56. assert res == 'end'
  57. def test_enclose(self):
  58. class parser(PackratParser):
  59. """
  60. a: 'a' <'b'> 'c'+;
  61. """
  62. print parser._code
  63. p = parser("abcccccc")
  64. p.a() == 'b'
  65. def test_not(self):
  66. class parser(PackratParser):
  67. """
  68. a: 'bh' !'a';
  69. """
  70. print parser._code
  71. p = parser('bhc')
  72. assert p.a() == 'bh'
  73. p.__chars__('c') == 'c'
  74. p = parser('bh')
  75. p.a() == 'bh'
  76. py.test.raises(BacktrackException, p.__any__)
  77. def test_lookahead(self):
  78. class parser(PackratParser):
  79. """
  80. a: 'b' !!'a';
  81. """
  82. print parser._code
  83. p = parser('ba')
  84. res = p.a()
  85. assert res == 'b'
  86. assert p.__any__() == 'a'
  87. def test_regex1(self):
  88. class parser(PackratParser):
  89. """
  90. a: 'b' `a|b`;
  91. """
  92. print parser._code
  93. p = parser('ba')
  94. res = p.a()
  95. assert res == 'a'
  96. py.test.raises(BacktrackException, p.__any__)
  97. p = parser('bb')
  98. res = p.a()
  99. assert res == 'b'
  100. py.test.raises(BacktrackException, p.__any__)
  101. def test_regex2(self):
  102. class parser(PackratParser):
  103. """
  104. a: 'b' `[^\n]*`;
  105. """
  106. print parser._code
  107. p = parser('ba#$@@$%\nbc')
  108. res = p.a()
  109. assert res == 'a#$@@$%'
  110. assert p.__any__() == '\n'
  111. def test_name(self):
  112. class parser(PackratParser):
  113. """
  114. a: c = 'b'
  115. r = `[^\n]*`
  116. return {c + r};
  117. """
  118. print parser._code
  119. p = parser('ba#$@@$%\nbc')
  120. res = p.a()
  121. assert res == 'ba#$@@$%'
  122. assert p.__any__() == '\n'
  123. def test_name2(self):
  124. class parser(PackratParser):
  125. """
  126. a: c = 'b'*
  127. r = `[^\n]*`
  128. return {(len(c), r)};
  129. """
  130. print parser._code
  131. p = parser('bbbbbba#$@@$%\nbc')
  132. res = p.a()
  133. assert res == (6, "a#$@@$%")
  134. assert p.__any__() == '\n'
  135. def test_name3(self):
  136. class parser(PackratParser):
  137. """
  138. a: c = 'd'+
  139. r = 'f'+
  140. return {"".join(c) + "".join(r)}
  141. | c = 'b'*
  142. r = `[^\n]*`
  143. return {(len(c), r)};
  144. """
  145. print parser._code
  146. p = parser('bbbbbba#$@@$%\nbc')
  147. res = p.a()
  148. assert res == (6, "a#$@@$%")
  149. assert p.__any__() == '\n'
  150. p = parser('dddffffx')
  151. res = p.a()
  152. assert res == "dddffff"
  153. assert p.__any__() == 'x'
  154. def test_nested_repetition(self):
  155. class parser(PackratParser):
  156. """
  157. a: ('a' 'b'*)+;
  158. """
  159. print parser._code
  160. p = parser('aaabbbab')
  161. res = p.a()
  162. assert res == [[], [], ['b', 'b', 'b'], ['b']]
  163. def test_ignore(self):
  164. class parser(PackratParser):
  165. """
  166. a: ('a' ['b'])+;
  167. """
  168. print parser._code
  169. p = parser('abababababab')
  170. res = p.a()
  171. assert res == list('aaaaaa')
  172. def test_regex(self):
  173. class parser(PackratParser):
  174. r"""
  175. a: `\"`;
  176. """
  177. print parser._code
  178. p = parser('"')
  179. res = p.a()
  180. assert res == '"'
  181. def test_memoize_exceptions(self):
  182. class parser(PackratParser):
  183. """
  184. b: 'a';
  185. """
  186. print parser._code
  187. p = parser("c")
  188. excinfo = py.test.raises(BacktrackException, p.b)
  189. excinfo = py.test.raises(BacktrackException, p.b)
  190. excinfo = py.test.raises(BacktrackException, p.b)
  191. def test_error_character(self):
  192. class parser(PackratParser):
  193. """
  194. b: 'a';
  195. """
  196. print parser._code
  197. p = parser("c")
  198. excinfo = py.test.raises(BacktrackException, p.b)
  199. assert excinfo.value.error.pos == 0
  200. assert excinfo.value.error.expected == ['a']
  201. def test_error_or(self):
  202. class parser(PackratParser):
  203. """
  204. b: 'a' | 'b';
  205. """
  206. print parser._code
  207. p = parser("c")
  208. excinfo = py.test.raises(BacktrackException, p.b)
  209. assert excinfo.value.error.pos == 0
  210. assert excinfo.value.error.expected == ['a', 'b']
  211. def test_error_not(self):
  212. class parser(PackratParser):
  213. """
  214. b:
  215. 'b' !'a';
  216. """
  217. p = parser("ba")
  218. excinfo = py.test.raises(BacktrackException, p.b)
  219. assert excinfo.value.error.pos == 1
  220. assert excinfo.value.error.expected == ['NOT a']
  221. print parser._code
  222. def test_error_lookahead(self):
  223. class parser(PackratParser):
  224. """
  225. b:
  226. 'b' !!'a';
  227. """
  228. p = parser("bc")
  229. print parser._code
  230. excinfo = py.test.raises(BacktrackException, p.b)
  231. assert excinfo.value.error.pos == 1
  232. assert excinfo.value.error.expected == ['a']
  233. def test_error_star(self):
  234. class parser(PackratParser):
  235. """
  236. b:
  237. 'b'* !__any__;
  238. """
  239. print parser._code
  240. p = parser("bbc")
  241. print parser._code
  242. excinfo = py.test.raises(BacktrackException, p.b)
  243. assert excinfo.value.error.pos == 2
  244. assert excinfo.value.error.expected == ['b']
  245. def test_error_success(self):
  246. class parser(PackratParser):
  247. """
  248. b:
  249. bstar !__any__;
  250. bstar:
  251. 'b'*;
  252. """
  253. print parser._code
  254. p = parser("bbc")
  255. print parser._code
  256. excinfo = py.test.raises(BacktrackException, p.b)
  257. assert excinfo.value.error.pos == 2
  258. assert excinfo.value.error.expected == ['b']
  259. def test_leftrecursion(self):
  260. class parser(PackratParser):
  261. """
  262. b: b 'a' | 'b';
  263. """
  264. print parser._code
  265. p = parser("b")
  266. res = p.b()
  267. assert res == "b"
  268. p = parser("bac")
  269. res = p.b()
  270. assert p._pos == 2
  271. assert res == "a"
  272. p = parser("baaaaaaaaaaaaaac")
  273. res = p.b()
  274. assert p._pos == 15
  275. assert res == "a"
  276. def test_leftrecursion_arithmetic(self):
  277. class parser(PackratParser):
  278. """
  279. additive:
  280. a = additive
  281. '-'
  282. b = multitive
  283. return {a - b}
  284. | multitive;
  285. multitive:
  286. a = multitive
  287. '*'
  288. b = simple
  289. return {a * b}
  290. | simple;
  291. simple:
  292. x = `0|([1-9][0-9]*)`
  293. return {int(x)};
  294. """
  295. print parser._code
  296. p = parser("5")
  297. res = p.multitive()
  298. assert res == 5
  299. p._pos = 0
  300. res = p.multitive()
  301. assert res == 5
  302. p = parser("5-5-5")
  303. res = p.additive()
  304. assert res == -5
  305. assert p._pos == 5
  306. def test_leftrecursion_more_choices(self):
  307. class parser(PackratParser):
  308. """
  309. b:
  310. b 'a'
  311. | b 'c'
  312. | 'b';
  313. """
  314. print parser._code
  315. p = parser("b")
  316. res = p.b()
  317. assert res == "b"
  318. p = parser("bcx")
  319. res = p.b()
  320. assert p._pos == 2
  321. assert res == "c"
  322. def test_leftrecursion_argument(self):
  323. class parser(PackratParser):
  324. """
  325. additive(op):
  326. a = additive({op})
  327. __chars__({op})
  328. b = multitive
  329. return {eval('a %s b' % (op, ))}
  330. | multitive;
  331. multitive:
  332. a = multitive
  333. '*'
  334. b = simple
  335. return {a * b}
  336. | simple;
  337. simple:
  338. x = `0|([1-9][0-9]*)`
  339. return {int(x)};
  340. """
  341. p = parser('4-4-4-4')
  342. res = p.additive('-')
  343. assert res == -8
  344. def test_doif(self):
  345. class parser(PackratParser):
  346. """
  347. INT:
  348. c = `[1-9][0-9]*`
  349. return {int(c)};
  350. b:
  351. do
  352. c = INT
  353. if {c > 42};
  354. """
  355. print parser._code
  356. p = parser("54")
  357. res = p.b()
  358. assert res == 54
  359. p = parser("12")
  360. excinfo = py.test.raises(BacktrackException, p.b)
  361. assert excinfo.value.error.pos == 0
  362. assert excinfo.value.error.expected == ['condition not met']
  363. def test_if(self):
  364. class parser(PackratParser):
  365. """
  366. b(c):
  367. if {c > 42}
  368. c = __chars__({str(c)})
  369. return {int(c)}
  370. | 'xyz';
  371. """
  372. print parser._code
  373. p = parser("54")
  374. res = p.b(54)
  375. assert res == 54
  376. p = parser("xyz")
  377. res = p.b(21)
  378. assert res == 'xyz'
  379. def test_parse_arguments(self):
  380. class parser(PackratParser):
  381. """
  382. between(a, b):
  383. do
  384. c = __any__
  385. if {ord(a) <= ord(c) <= ord(b)}
  386. return {c};
  387. small_big_small:
  388. x = between({'a'}, {'z'})+
  389. y = between({'A'}, {'Z'})+
  390. z = between({'a'}, {'z'})+
  391. return {"".join(x) + "".join(y) + "".join(z)};
  392. """
  393. p = parser('abc')
  394. c = p.between('a', 'z')
  395. assert c == 'a'
  396. p._pos = 0
  397. c = p.between('a', 'z')
  398. assert c == 'a'
  399. excinfo = py.test.raises(BacktrackException, p.between, 'A', 'Z')
  400. assert excinfo.value.error.pos == 1
  401. assert excinfo.value.error.expected == ['condition not met']
  402. p = parser('aBc')
  403. res = p.small_big_small()
  404. assert res == 'aBc'
  405. p = parser('aaaaarstawfpacawBAAAFPAcccfafp')
  406. res = p.small_big_small()
  407. assert res == 'aaaaarstawfpacawBAAAFPAcccfafp'
  408. def test_choose(self):
  409. # getting more and more like Prolog, not sure that's good
  410. class parser(PackratParser):
  411. """
  412. choice:
  413. choose a in {self.possibilities}
  414. __chars__({a})+
  415. return {a};
  416. """
  417. possibilities = ['a', 'x', 'y']
  418. p = parser('aaaaaaa')
  419. c = p.choice()
  420. assert c == 'a'
  421. p = parser('xxxxxxxxxxxx')
  422. c = p.choice()
  423. assert c == 'x'
  424. p = parser('y')
  425. c = p.choice()
  426. assert c == 'y'
  427. p = parser('y')
  428. c = p.choice()
  429. assert c == 'y'
  430. p = parser('vvvvvv')
  431. excinfo = py.test.raises(BacktrackException, p.choice)
  432. assert excinfo.value.error.pos == 0
  433. expected = excinfo.value.error.expected
  434. expected.sort()
  435. assert expected == ['a', 'x', 'y']
  436. def test_python_future(self):
  437. class parser(PackratParser):
  438. r"""
  439. comment:
  440. `#[^\r\n]*` lineend;
  441. lineend:
  442. `(\r|\n)+`;
  443. docstring:
  444. `(\"\"\"[^\\]*(\\[^\\]+)*\"\"\")|(\'\'\'[^\\]*(\\[^\\]+)*\'\'\')`
  445. ignore*
  446. | `(\"[^\\]*(\\[^\\]+)*\")|(\'[^\\]*(\\[^\\]+)*\')`
  447. ignore*;
  448. ignore:
  449. `[ \t]+`;
  450. ignoreline:
  451. `[ \t]*[\r\n]+`;
  452. fromimport:
  453. 'from' ignore+
  454. '__future__' ignore+
  455. 'import' ignore+
  456. what;
  457. identifier:
  458. `[a-zA-Z0-9_]+`;
  459. what:
  460. '(' ignoreline*
  461. g = group
  462. ignoreline*
  463. rest = ([',' ignoreline*] group)*
  464. ')'
  465. return {[g] + rest}
  466. | g = group
  467. rest = ([',' ignore*] group)*
  468. return {[g] + rest};
  469. group:
  470. name = identifier ignore+ 'as' ignore+ identifier ignore*
  471. return {name}
  472. | name = identifier ignore*
  473. return {name};
  474. line:
  475. comment
  476. return {None}
  477. | docstring lineend
  478. return {None}
  479. | ignore lineend
  480. return {None}
  481. | t = fromimport
  482. ignore*
  483. lineend
  484. return {t};
  485. header:
  486. l = line*
  487. return {[elt for sublist in l if sublist is not None for elt in sublist]};
  488. """
  489. p = parser("#\n")
  490. lines = p.header()
  491. assert lines == []
  492. p = parser('''"abc"\n''')
  493. lines = p.header()
  494. assert lines == []
  495. p = parser(''''abc'\n''')
  496. lines = p.header()
  497. assert lines == []
  498. p = parser(''''abc'\n''')
  499. lines = p.header()
  500. assert lines == []
  501. p = parser('''from __future__ import division\n''')
  502. lines = p.header()
  503. assert lines == ['division']
  504. p = parser('''from __future__ import division, generators\n''')
  505. lines = p.fromimport()
  506. assert lines == ['division', 'generators']
  507. p = parser('''from __future__ import (division, \ngenerators)\n''')
  508. lines = p.fromimport()
  509. assert lines == ['division', 'generators']
  510. p = parser('''from __future__ import (division as d, \ngenerators)\n''')
  511. lines = p.fromimport()
  512. assert lines == ['division', 'generators']