PageRenderTime 50ms CodeModel.GetById 18ms RepoModel.GetById 0ms app.codeStats 0ms

/tests/reader-tests.py

http://github.com/halgari/clojure-py
Python | 629 lines | 534 code | 15 blank | 80 comment | 19 complexity | 52cb0c4f9e0582877ec84066c0758c29 MD5 | raw file
  1. #!/usr/bin/python -t
  2. # -*- coding: utf-8 -*-
  3. """reader-tests.py
  4. Friday, March 16 2012
  5. """
  6. import re
  7. import string
  8. import unittest
  9. from random import choice
  10. from fractions import Fraction
  11. from clojure.lang.lispreader import read, readDelimitedList
  12. from clojure.lang.symbol import Symbol
  13. from clojure.lang.ipersistentlist import IPersistentList
  14. from clojure.lang.persistentlist import PersistentList
  15. from clojure.lang.persistentlist import EmptyList
  16. from clojure.lang.persistentvector import PersistentVector
  17. from clojure.lang.persistenthashmap import PersistentHashMap
  18. from clojure.lang.persistenthashset import PersistentHashSet
  19. from clojure.lang.fileseq import StringReader
  20. from clojure.lang.cljexceptions import ReaderException
  21. from clojure.lang.pytypes import *
  22. # reader returns this unique *value* if it's out of characters
  23. EOF = object()
  24. # A unique *type* to return at the EOF.
  25. # For use in testReturnedType_PASS().
  26. class Sentinal(object): pass
  27. sentinal = Sentinal()
  28. sentinalType = type(sentinal)
  29. class TestReader(unittest.TestCase):
  30. # literal integers
  31. def testIntegerReader_PASS(self):
  32. # base 8
  33. for k, v in base8IntegerMap_PASS.items():
  34. r = StringReader(k)
  35. self.assertEqual(read(r, False, EOF, False), v)
  36. # base 10
  37. for k, v in base10IntegerMap_PASS.items():
  38. r = StringReader(k)
  39. self.assertEqual(read(r, False, EOF, False), v)
  40. # base 16
  41. for k, v in base16IntegerMap_PASS.items():
  42. r = StringReader(k)
  43. self.assertEqual(read(r, False, EOF, False), v)
  44. # base N
  45. for k, v in baseNIntegerMap_PASS.items():
  46. r = StringReader(k)
  47. self.assertEqual(read(r, False, EOF, False), v)
  48. def testIntegerReader_FAIL(self):
  49. for t in integer_FAIL:
  50. r = StringReader(t)
  51. self.assertRaises(ReaderException, read, r, False, EOF, False)
  52. # literal floating point
  53. def testFloatingPointReader_PASS(self):
  54. for k, v in floatingPointMap_PASS.items():
  55. r = StringReader(k)
  56. self.assertEqual(read(r, False, EOF, False), v)
  57. def testFloatingPointReader_FAIL(self):
  58. for t in floatingPoint_FAIL:
  59. r = StringReader(t)
  60. self.assertRaises(ReaderException, read, r, False, EOF, False)
  61. # literal ratios
  62. def testRationalReader_PASS(self):
  63. for k, v in rationalMap_PASS.items():
  64. r = StringReader(k)
  65. self.assertEqual(read(r, False, EOF, False), v)
  66. def testRationalReader_FAIL(self):
  67. for t in rational_FAIL:
  68. r = StringReader(t)
  69. self.assertRaises(ReaderException, read, r, False, EOF, False)
  70. # literal characters
  71. def testCharacterReader_PASS(self):
  72. for k, v in literalCharacterMap_PASS.items():
  73. r = StringReader(k)
  74. self.assertEqual(read(r, False, EOF, False), v)
  75. def testCharacterReader_FAIL(self):
  76. for s in literalCharacter_FAIL:
  77. r = StringReader(s)
  78. self.assertRaises(ReaderException, read, r, False, EOF, False)
  79. # literal strings
  80. def testStringReader_PASS(self):
  81. for k, v in literalStringMap_PASS.items():
  82. r = StringReader('"' + k + '"')
  83. self.assertEqual(read(r, False, EOF, False), v)
  84. def testStringReader_FAIL(self):
  85. # special case, missing trailing "
  86. r = StringReader('"foo')
  87. self.assertRaises(ReaderException, read, r, False, EOF, False)
  88. for s in literalString_FAIL:
  89. r = StringReader('"' + s + '"')
  90. self.assertRaises(ReaderException, read, r, False, EOF, False)
  91. # literal regex pattern strings
  92. def testRegexPattern_PASS(self):
  93. for k, v in regexPatternMap_PASS.items():
  94. r = StringReader(k)
  95. self.assertEqual(read(r, False, EOF, False).pattern, v.pattern)
  96. def testRegexPattern_FAIL(self):
  97. for s in regexPattern_FAIL:
  98. r = StringReader(s)
  99. self.assertRaises(ReaderException, read, r, False, EOF, False)
  100. # literal raw regex pattern strings
  101. def testRawRegexPattern_PASS(self):
  102. for k, v in rawRegexPatternMap_PASS.items():
  103. r = StringReader(k)
  104. self.assertEqual(read(r, False, EOF, False).pattern, v.pattern)
  105. def testRawRegexPattern_FAIL(self):
  106. for s in rawRegexPattern_FAIL:
  107. r = StringReader(s)
  108. self.assertRaises(ReaderException, read, r, False, EOF, False)
  109. # delimited lists
  110. def testDelimitedLists_PASS(self):
  111. # length test
  112. for k, v in delimitedListLength_PASS.items():
  113. r = StringReader(k)
  114. delim = k[-1]
  115. self.assertEqual(readDelimitedList(delim, r, False), v)
  116. # returned type tests
  117. def testReturnedType_PASS(self):
  118. for k, v in returnedType_PASS.items():
  119. r = StringReader(k)
  120. self.assertEqual(type(read(r, False, sentinal, False)), v)
  121. # raise on EOF
  122. def testEOFRaisesReaderException(self):
  123. r = StringReader("")
  124. self.assertRaises(ReaderException, read, r, True, # <- True
  125. EOF, False)
  126. # miscellaneous failures
  127. def testMiscellaneous_FAIL(self):
  128. for s in miscellaneous_FAIL:
  129. r = StringReader(s)
  130. self.assertRaises(ReaderException, read, r, False, EOF, False)
  131. # ======================================================================
  132. # Literal Integer Cases
  133. # ======================================================================
  134. base8IntegerMap_PASS = {
  135. "00": 0, "-00": 0, "+00": 0,
  136. "012345670": 2739128, "-012345670": -2739128, "+012345670": 2739128,
  137. "06235436235462365452777171623500712635712365712236" :
  138. 140667142011619517350321483099394425046406302L,
  139. "-06235436235462365452777171623500712635712365712236" :
  140. -140667142011619517350321483099394425046406302L,
  141. "+06235436235462365452777171623500712635712365712236" :
  142. 140667142011619517350321483099394425046406302L,
  143. }
  144. base10IntegerMap_PASS = {
  145. "0" : 0, "-0" : 0, "+0" : 0,
  146. "1" : 1, "-1" : -1, "+1" : 1,
  147. "1234567890" : 1234567890,
  148. "-1234567890" : -1234567890,
  149. "+1234567890" : 1234567890,
  150. "20399572305720357120320399572305720357203" :
  151. 20399572305720357120320399572305720357203L,
  152. "-20399572305720357120320399572305720357203" :
  153. -20399572305720357120320399572305720357203L,
  154. "+20399572305720357120320399572305720357203" :
  155. 20399572305720357120320399572305720357203L,
  156. }
  157. base16IntegerMap_PASS = {
  158. "0x0" : 0, "-0x0" : 0, "+0x0" : 0,
  159. "0X0" : 0, "-0X0" : 0, "+0X0" : 0,
  160. "0x1234567890abcdefABCDEF" :
  161. 22007822917795467892608495L,
  162. "-0X1234567890abcdefABCDEF" :
  163. -22007822917795467892608495L,
  164. "+0x1234567890abcdefABCDEF" :
  165. +22007822917795467892608495L,
  166. }
  167. def gen_baseNIntegerMap_PASS():
  168. """Return a dict as a string to test the base-N syntax (2r101010)
  169. This map is eval'd below.
  170. Each entry is of the form:
  171. "2r10" : 2
  172. To see wtf is going on...
  173. >>> pprint(eval(gen_baseNIntegerMap_PASS()))"""
  174. # don't change the order of these
  175. digits = "1023456789aBcDeFgHiJkLmNoPqRsTuVwXyZ"
  176. entries = []
  177. for radix in range(2, 37):
  178. strDigits = digits[:radix]
  179. res1 = int(strDigits, radix)
  180. res2 = int('-' + strDigits, radix)
  181. entry = '"%s":%d, "%s":%d, "%s":%d' \
  182. % ("%d%s%s" % (radix, choice('rR'), strDigits), res1,
  183. "-%d%s%s" % (radix, choice('rR'), strDigits), res2,
  184. "+%d%s%s" % (radix, choice('rR'), strDigits), res1)
  185. entries.append(entry)
  186. return "{%s}" % ",".join(entries)
  187. baseNIntegerMap_PASS = eval(gen_baseNIntegerMap_PASS())
  188. integer_FAIL = [
  189. # no f suffix
  190. "3333f", "-3333f", "+3333f",
  191. # Clojure M not a suffix (yet)
  192. "3333M", "-3333M", "+3333M",
  193. # 8 not an octal digit
  194. "08", "-08", "+08",
  195. # g not a hex digit
  196. "0xfgaa00", "-0xfgaa00", "+0xfgaa00",
  197. # z not a base 32 number
  198. "32rzzz", "-32rzzz", "+32rzzz",
  199. # radix out of range [2, 36]
  200. "1r0", "-1r0", "+1r0", "37r0", "-37r0", "+37r0",
  201. ]
  202. # ======================================================================
  203. # Literal Floating Point Cases
  204. # ======================================================================
  205. floatingPointMap_PASS = {
  206. # no decimal, exponent
  207. "0e0" : 0.0, "-0e0" : 0.0, "+0e0" : 0.0,
  208. "0e-0" : 0.0, "-0e-0" : 0.0, "+0e-0" : 0.0,
  209. "0E-0" : 0.0, "-0E-0" : 0.0, "+0E-0" : 0.0,
  210. "0e+0" : 0.0, "-0e+0" : 0.0, "+0e+0" : 0.0,
  211. "0E+0" : 0.0, "-0E+0" : 0.0, "+0E+0" : 0.0,
  212. # with decimal, no digit after decimal, exponent
  213. "0." : 0.0, "-0." : 0.0, "+0." : 0.0,
  214. "0.e0" : 0.0, "-0.e0" : 0.0, "+0.e0" : 0.0,
  215. "0.E0" : 0.0, "-0.E0" : 0.0, "+0.E0" : 0.0,
  216. "0.e-0" : 0.0, "-0.e-0" : 0.0, "+0.e-0" : 0.0,
  217. "0.E-0" : 0.0, "-0.E-0" : 0.0, "+0.E-0" : 0.0,
  218. "0.e+0" : 0.0, "-0.e+0" : 0.0, "+0.e+0" : 0.0,
  219. "0.E+0" : 0.0, "-0.E+0" : 0.0, "+0.E+0" : 0.0,
  220. # with decimal, digit after decimal, exponent
  221. "0.0" : 0.0, "-0.0" : 0.0, "+0.0" : 0.0,
  222. "0.0e0" : 0.0, "-0.0e0" : 0.0, "+0.0e0" : 0.0,
  223. "0.0E0" : 0.0, "-0.0E0" : 0.0, "+0.0E0" : 0.0,
  224. "0.0e-0" : 0.0, "-0.0e-0" : 0.0, "+0.0e-0" : 0.0,
  225. "0.0E-0" : 0.0, "-0.0E-0" : 0.0, "+0.0E-0" : 0.0,
  226. "0.0e+0" : 0.0, "-0.0e+0" : 0.0, "+0.0e+0" : 0.0,
  227. "0.0E+0" : 0.0, "-0.0E+0" : 0.0, "+0.0E+0" : 0.0,
  228. }
  229. floatingPoint_FAIL = [
  230. # no suffix
  231. "3.3f", "-3.3f", "+3.3f",
  232. # s, f, d, l, etc. not an exponent specifier
  233. "23.0s-4", "-23.0f-4", "+23.0d-4",
  234. # double decimal
  235. "3..", "-3..", "+3..",
  236. ]
  237. # ======================================================================
  238. # Literal Rational Cases
  239. # ======================================================================
  240. rationalMap_PASS = {
  241. "22/7" : Fraction(22, 7),
  242. "-22/7" : Fraction(-22, 7),
  243. "+22/7" : Fraction(22, 7),
  244. "0/1" : Fraction(0, 1),
  245. "-0/1" : Fraction(0, 1),
  246. "+0/1" : Fraction(0, 1),
  247. # regex was fubar, didn't allow zeros after the first digit
  248. "100/203" : Fraction(100, 203),
  249. "-100/203" : Fraction(-100, 203),
  250. "+100/203" : Fraction(100, 203),
  251. }
  252. rational_FAIL = [
  253. # These actually pass in Clojure, but are interpreted as base 10 integers,
  254. # not base 8.
  255. "033/029", "-033/029", "+033/029",
  256. ]
  257. # ======================================================================
  258. # Literal Character Cases
  259. # ======================================================================
  260. literalCharacterMap_PASS = {
  261. # basic
  262. "\\x" : "x",
  263. "\\ " : " ",
  264. "\\X" : "X",
  265. # newline after the \
  266. """\\
  267. """ : "\n",
  268. # named characters
  269. "\\space" : " ",
  270. "\\newline" : "\n",
  271. "\\return" : "\r",
  272. "\\backspace" : "\b",
  273. "\\formfeed" : "\f",
  274. "\\tab" : "\t",
  275. # octal
  276. "\\o0" : "\x00",
  277. "\\o41" : "!",
  278. "\\o377" : u"\u00ff",
  279. # hex
  280. "\\u03bb" : u"\u03bb",
  281. # BZZZZT!
  282. # Because this file is encoded as UTF-8, and the reader is expecting ASCII,
  283. # it will crap out every time.
  284. # "\\?" : character(u"\u03bb"),
  285. }
  286. literalCharacter_FAIL = [
  287. # According to a random web page:
  288. # The only reason the range D800:DFFF is invalid is because of UTF-16's
  289. # inability to encode it.
  290. "\ud800", "\udfff",
  291. # missing char at eof
  292. "\\",
  293. # not enough digits after \u (\u is the character u)
  294. "\u1", "\u22", "\u333",
  295. # too many digits after \u
  296. "\u03bbb",
  297. # too many digits after \o
  298. "\o0333",
  299. # octal value > 0377
  300. "\o400"
  301. ]
  302. # ======================================================================
  303. # Literal String Cases
  304. # These are tests that conform to Clojure. Some Python string syntax is
  305. # not permitted:
  306. # \U, \N{foo}, \x, \v, \a
  307. # ======================================================================
  308. literalStringMap_PASS = {
  309. # basic
  310. "": "",
  311. "x": "x",
  312. "foo": "foo",
  313. "0123456789": "0123456789",
  314. "~!@#$%^&*()_+-=[]{}';:/?>.<,": "~!@#$%^&*()_+-=[]{}';:/?>.<,",
  315. "qwertyuiopasdfghjklzxcvbnm": "qwertyuiopasdfghjklzxcvbnm",
  316. "QWERTYUIOPASDFGHJKLZXCVBNM": "QWERTYUIOPASDFGHJKLZXCVBNM",
  317. # escape | |<------ trailing escaped escape
  318. '\\"\\n\\t\\f\\b\\r\\\\': '"\n\t\f\b\r\\',
  319. # 4 hex digit
  320. "\u03bb": u"\u03bb",
  321. "\u03bb@": u"\u03bb@",
  322. "@\u03bb": u"@\u03bb",
  323. # octal
  324. "\\0": "\x00",
  325. "\\0@": "\x00@",
  326. "@\\0": "@\x00",
  327. "\\41": "!",
  328. "\\41@": "!@",
  329. "@\\41": "@!",
  330. "\\176": "~",
  331. "\\176@": "~@",
  332. "@\\176": "@~",
  333. }
  334. literalString_FAIL = [
  335. # invalid escape characters
  336. "\\x", "\\a", "\\v", "@\\x", "@\\a", "@\\v", "\\x@", "\\a@", "\\v@",
  337. "\\o041"
  338. # not enough digits after \u
  339. "\\u", "\\u3", "\\u33", "\\u333",
  340. "@\\u", "@\\u3", "@\\u33", "@\\u333",
  341. "\\u@", "\\u3@", "\\u33@", "\\u333@",
  342. # octal value > 0377
  343. "\\400", "@\\400", "\\400@",
  344. ]
  345. # ======================================================================
  346. # Regular Expression Pattern
  347. #
  348. # Each key is the string sent to lispreader. The escapes have to be
  349. # handled in such a way as to allow the reader to do escape
  350. # interpretation. If Python would treat the escape special, it needs
  351. # an additional \ before sending it to the reader.
  352. # ======================================================================
  353. regexPatternMap_PASS = {
  354. # all using #"", not raw #r""
  355. '#""' : re.compile(""),
  356. '#"."' : re.compile("."),
  357. '#"^."' : re.compile("^."),
  358. '#".$"' : re.compile(".$"),
  359. '#".*"' : re.compile(".*"),
  360. '#".+"' : re.compile(".+"),
  361. '#".?"' : re.compile(".?"),
  362. '#".*?"' : re.compile(".*?"),
  363. '#".+?"' : re.compile(".+?"),
  364. '#".??"' : re.compile(".??"),
  365. '#".{3}"' : re.compile(".{3}"),
  366. '#".{3,}"' : re.compile(".{3,}"),
  367. '#".{,3}"' : re.compile(".{,3}"),
  368. '#".{3,3}"' : re.compile(".{3,3}"),
  369. '#".{3,3}"' : re.compile(".{3,3}"),
  370. '#".{3,3}?"' : re.compile(".{3,3}?"),
  371. # None of these \ are special. Python will send them to the reader as is.
  372. # \ . \ ^ \ $, etc.
  373. '#"\.\^\$\*\+\?\{\}\[\]"' : re.compile("\.\^\$\*\+\?\{\}\[\]"),
  374. '#"[a-z]"' : re.compile("[a-z]"),
  375. '#"[]]"' : re.compile("[]]"),
  376. '#"[-]"' : re.compile("[-]"),
  377. # Nor are these
  378. '#"[\-\]\[]"' : re.compile(r"[\-\]\[]"),
  379. # or these
  380. '#"[\w\S]"' : re.compile("[\w\S]"),
  381. '#"[^5]"' : re.compile("[^5]"),
  382. # or the |
  383. '#"A|B[|]\|"' : re.compile("A|B[|]\|"),
  384. # or ( )
  385. '#"([()]\(\))"' : re.compile("([()]\(\))"),
  386. '#"(?iLmsux)"' : re.compile("(?iLmsux)"),
  387. '#"(?iLmsux)"' : re.compile("(?iLmsux)"),
  388. '#"(:?)"' : re.compile("(:?)"),
  389. '#"(?P<foo>)"' : re.compile("(?P<foo>)"),
  390. '#"(?P<foo>)(?P=foo)"' : re.compile("(?P<foo>)(?P=foo)"),
  391. '#"(?# comment )"' : re.compile("(?# comment )"),
  392. '#"(?=foo)"' : re.compile("(?=foo)"),
  393. '#"(?!foo)"' : re.compile("(?!foo)"),
  394. '#"(?<=foo)bar"' : re.compile("(?<=foo)bar"),
  395. '#"(?<!foo)bar"' : re.compile("(?<!foo)bar"),
  396. '#"(?P<foo>)(?(foo)yes|no)"' : re.compile("(?P<foo>)(?(foo)yes|no)"),
  397. # | |<---- Python will send two \'s to the lisp reader, not four
  398. '#"(.+) \\\\1"' : re.compile("(.+) \\1"),
  399. '#"(.+) \\\\1"' : re.compile(r"(.+) \1"),
  400. # send one \ each, so the octal sequences are interpreted in lispreader
  401. # >>> u"\377" == "\377" # funky warning on the Python repl
  402. '#"\\377\\021"' : re.compile(u"\377\021"),
  403. # Again, send one \ each. Python would interpret \1 as the char 0x01
  404. # *before* sending it to lispreader.
  405. '#"[\\1\\2\\3\\4\\5\\6\\7\\10]"' : re.compile("[\1\2\3\4\5\6\7\10]"),
  406. # Python does not interpret \A, but it does \b
  407. # The dict value here is a raw string so the char sequence will be:
  408. # \ A \ \ b \ B, etc.
  409. '#"\A\\\\b\B\d\D\s\S\w\W\Z"' : re.compile(r"\A\b\B\d\D\s\S\w\W\Z"),
  410. # dict val is a raw string, and Python interprets all these chars
  411. '#"\\\\a\\\\b\\\\f\\\\n\\\\r\\\\t\\\\v"' : re.compile(r"\a\b\f\n\r\t\v"),
  412. # I want Python to interpret here. lispreader will simply return
  413. # 0x07, 0x08 etc. (no escape interpretation)
  414. '#"\a\b\f\n\r\t\v"' : re.compile("\a\b\f\n\r\t\v"),
  415. # Send \ and letter separately. lispreader will see \ n and
  416. # return 0x0a (reader interpretation)
  417. '#"\\a\\b\\f\\n\\r\\t\\v"' : re.compile("\a\b\f\n\r\t\v"),
  418. # \N, \u, and \U are only special in a unicode string (in Python)
  419. '#"\N{DIGIT ZERO}{5, 10}"' : re.compile(u"\N{DIGIT ZERO}{5, 10}"),
  420. '#"\u03bb{1,3}"' : re.compile(u"\u03bb{1,3}"),
  421. '#"\U000003bb{1,3}"' : re.compile(u"\U000003bb{1,3}"),
  422. # but \x is always special, hence the \\
  423. '#"\\xff\\x7f"' : re.compile(u"\xff\x7f"),
  424. '''#"(?x)
  425. # foo
  426. [a-z]
  427. # bar
  428. [0-9a-zA-Z_]+
  429. "''' : re.compile("""(?x)
  430. # foo
  431. [a-z]
  432. # bar
  433. [0-9a-zA-Z_]+
  434. """),
  435. }
  436. regexPattern_FAIL = [
  437. # # unmatched paren, bracket, (can't make it catch a missing } O_o)
  438. '#"([()]\(\)"', '#"["',
  439. # foo not defined
  440. '#"(?(foo)yes|no)"',
  441. # bogus escape
  442. '#"[\\8]"',
  443. # need 4 hex digits
  444. '#"\u"', '#"\u1"', '#"\u12"', '#"\u123"',
  445. # need 8 hex digits
  446. '#"\U"', '#"\U1"', '#"\U12"', '#"\U123"', '#"\U1234"', '#"\U12345"',
  447. '#"\U123456"', '#"\U1234567"',
  448. # need 2 hex digits
  449. '#"\\x"', '#"\\x1"',
  450. # missing }, missing ", can't escape }
  451. '#"\N{foo"', '#"\N{foo', '#"\N{foo\\}}"',
  452. # unknown name
  453. '#"\N{KLINGON LETTER NG}"',
  454. # empty {}
  455. '#"\N{}"', '#"\N{ }"',
  456. ]
  457. rawRegexPatternMap_PASS = {
  458. '#r""' : re.compile(r""),
  459. '#r"\\."' : re.compile(r"\."),
  460. '#r"\\."' : re.compile(r"\."),
  461. '#r"\\n"' : re.compile(r"\n"),
  462. '#r"\.\^\$\*\+\?\{\}\[\]"' : re.compile(r"\.\^\$\*\+\?\{\}\[\]"),
  463. '#r"[\-\]\[]"' : re.compile(r"[\-\]\[]"),
  464. '#r"[\w\S]"' : re.compile(r"[\w\S]"),
  465. '#r"A|B[|]\|"' : re.compile(r"A|B[|]\|"),
  466. '#r"([()]\(\))"' : re.compile(r"([()]\(\))"),
  467. '#r"(.+) \\1"' : re.compile(r"(.+) \1"),
  468. '#r"\\377\\021"' : re.compile(ur"\377\021"),
  469. '#r"[\\1\\2\\3\\4\\5\\6\\7\\10]"' : re.compile(r"[\1\2\3\4\5\6\7\10]"),
  470. '#r"\A\\b\B\d\D\s\S\w\W\Z"' : re.compile(r"\A\b\B\d\D\s\S\w\W\Z"),
  471. '#r"\\a\\b\\f\\n\\r\\t\\v"' : re.compile(r"\a\b\f\n\r\t\v"),
  472. '#r"\a\b\f\n\r\t\v"' : re.compile("\a\b\f\n\r\t\v"),
  473. '#r"\N{DIGIT ZERO}{5, 10}"' : re.compile(ur"\N{DIGIT ZERO}{5, 10}"),
  474. '#r"\u03bb{1,3}"' : re.compile(ur"\u03bb{1,3}"),
  475. '#r"\\\u03bb{1,3}"' : re.compile(ur"\\u03bb{1,3}"),
  476. '#r"\\\\\u03bb{1,3}"' : re.compile(ur"\\\u03bb{1,3}"),
  477. '#r"\\\\\\\u03bb{1,3}"' : re.compile(ur"\\\\u03bb{1,3}"),
  478. '#r"\U000003bb{1,3}"' : re.compile(ur"\U000003bb{1,3}"),
  479. '#r"\\xff\\x7f"' : re.compile(ur"\xff\x7f"),
  480. '#r"\\0"' : re.compile(ur"\0"),
  481. '#r"\\01"' : re.compile(ur"\01"),
  482. '#r"\\012"' : re.compile(ur"\012"),
  483. '''#r"\\
  484. "''' : re.compile(r"""\
  485. """),
  486. }
  487. rawRegexPattern_FAIL = [
  488. # craps out the regex compiler
  489. '#r"\\x"',
  490. # can't end with an odd number of \
  491. '#r"\\"', # #r"\" ; in clojure-py
  492. '#r"\\\\\\"', # #r"\\\" ; in clojure-py
  493. # missing trailing "
  494. '#r"foo',
  495. # need 4 hex digits
  496. '#r"\u"', '#r"\u1"', '#r"\u12"', '#r"\u123"',
  497. # need 8 hex digits
  498. '#r"\U"', '#r"\U1"', '#r"\U12"', '#r"\U123"', '#r"\U1234"', '#r"\U12345"',
  499. '#r"\U123456"', '#r"\U1234567"',
  500. ]
  501. # ======================================================================
  502. # Literal Delimited Lists
  503. # ======================================================================
  504. # The keys define the clojure syntax of any object that would result in a call
  505. # to lispreader.readDelimitedList() (minus the leading macro character(s)).
  506. # Some objects like map and set have the same terminating character `}'. So
  507. # there is only one entry for both.
  508. #
  509. # The value is a the expected contents of the Python list returned from
  510. # readDelimitedList(). Integers are used because I don't care what type the
  511. # items are. There are separate tests for that.
  512. delimitedListLength_PASS = {
  513. "]" : [],
  514. "}" : [],
  515. ")" : [],
  516. "0]" : [0],
  517. "0)" : [0],
  518. "0}" : [0],
  519. "0 0]" : [0, 0],
  520. "0 0)" : [0, 0],
  521. "0 0}" : [0, 0],
  522. }
  523. # ======================================================================
  524. # Returned Type
  525. # ======================================================================
  526. returnedType_PASS = {
  527. "" : sentinalType,
  528. "," : sentinalType,
  529. " " : sentinalType,
  530. """
  531. """ : sentinalType,
  532. "\r" : sentinalType,
  533. "\n" : sentinalType,
  534. "\r\n" : sentinalType,
  535. "\n\r" : sentinalType,
  536. "\t" : sentinalType,
  537. "\b" : sentinalType,
  538. "\f" : sentinalType,
  539. ", \n\r\n\t\n\b\r\f" : sentinalType,
  540. "\v" : Symbol, # O_o
  541. # "\?" : pyUnicodeType,
  542. "\\x" : pyStrType, # TODO: always return unicode, never str
  543. "%foo" : Symbol, # not in an anonymous function #()
  544. "[]" : PersistentVector,
  545. "()" : EmptyList,
  546. "{}" : PersistentHashMap,
  547. '"foo"' : pyStrType, # TODO: always return unicode, never str
  548. # "???" : Symbol,
  549. '#"foo"' : pyRegexType,
  550. '#r"foo"' : pyRegexType,
  551. "#()" : PersistentList,
  552. "#{}" : PersistentHashSet,
  553. "'foo" : PersistentList,
  554. "~foo" : PersistentList,
  555. "~@(foo)" : PersistentList,
  556. "#^:foo()" : EmptyList,
  557. "^:foo()" : EmptyList,
  558. "; comment" : sentinalType,
  559. "#_ foo" : sentinalType,
  560. "0" : pyIntType,
  561. "0x0" : pyIntType,
  562. "041" : pyIntType,
  563. "2r10" : pyIntType,
  564. "2.2" : pyFloatType,
  565. "2e-3" : pyFloatType,
  566. "1/2" : Fraction,
  567. "foo" : Symbol,
  568. ".3" : Symbol,
  569. "+.3" : Symbol,
  570. "-.3" : Symbol,
  571. "true" : pyBoolType,
  572. "True" : Symbol,
  573. "false" : pyBoolType,
  574. "False" : Symbol,
  575. "nil" : pyNoneType,
  576. "None" : Symbol,
  577. }
  578. # ======================================================================
  579. # Miscellaneous Failures
  580. # Any type of random failures should go here
  581. # ======================================================================
  582. miscellaneous_FAIL = [
  583. # always raises
  584. "#<unreadable object>",
  585. # deref not implemented (yet)
  586. # reader eval not implemented (yet)
  587. "#=foo",
  588. ]