PageRenderTime 45ms CodeModel.GetById 23ms RepoModel.GetById 0ms app.codeStats 0ms

/rpython/rlib/unicodedata/test/test_unicodedata.py

https://bitbucket.org/bwesterb/pypy
Python | 77 lines | 64 code | 13 blank | 0 comment | 12 complexity | db50becb4ba01f539686eca1fa4755d3 MD5 | raw file
  1. import random
  2. import unicodedata
  3. import py
  4. from rpython.rlib.unicodedata import unicodedb_3_2_0, unicodedb_5_2_0
  5. class TestUnicodeData(object):
  6. def setup_class(cls):
  7. if unicodedata.unidata_version != '5.2.0':
  8. py.test.skip('Needs python with unicode 5.2.0 database.')
  9. seed = random.getrandbits(32)
  10. print "random seed: ", seed
  11. random.seed(seed)
  12. cls.charlist = charlist = []
  13. cls.nocharlist = nocharlist = []
  14. while len(charlist) < 1000 or len(nocharlist) < 1000:
  15. chr = unichr(random.randrange(65536))
  16. try:
  17. charlist.append((chr, unicodedata.name(chr)))
  18. except ValueError:
  19. nocharlist.append(chr)
  20. def test_random_charnames(self):
  21. for chr, name in self.charlist:
  22. assert unicodedb_5_2_0.name(ord(chr)) == name
  23. assert unicodedb_5_2_0.lookup(name) == ord(chr)
  24. def test_random_missing_chars(self):
  25. for chr in self.nocharlist:
  26. py.test.raises(KeyError, unicodedb_5_2_0.name, ord(chr))
  27. def test_compare_functions(self):
  28. def getX(fun, code):
  29. try:
  30. return getattr(unicodedb_5_2_0, fun)(code)
  31. except KeyError:
  32. return -1
  33. for code in range(0x10000):
  34. char = unichr(code)
  35. assert unicodedata.digit(char, -1) == getX('digit', code)
  36. assert unicodedata.numeric(char, -1) == getX('numeric', code)
  37. assert unicodedata.decimal(char, -1) == getX('decimal', code)
  38. assert unicodedata.category(char) == unicodedb_5_2_0.category(code)
  39. assert unicodedata.bidirectional(char) == unicodedb_5_2_0.bidirectional(code)
  40. assert unicodedata.decomposition(char) == unicodedb_5_2_0.decomposition(code)
  41. assert unicodedata.mirrored(char) == unicodedb_5_2_0.mirrored(code)
  42. assert unicodedata.combining(char) == unicodedb_5_2_0.combining(code)
  43. def test_compare_methods(self):
  44. for code in range(0x10000):
  45. char = unichr(code)
  46. assert char.isalnum() == unicodedb_5_2_0.isalnum(code)
  47. assert char.isalpha() == unicodedb_5_2_0.isalpha(code)
  48. assert char.isdecimal() == unicodedb_5_2_0.isdecimal(code)
  49. assert char.isdigit() == unicodedb_5_2_0.isdigit(code)
  50. assert char.islower() == unicodedb_5_2_0.islower(code)
  51. assert char.isnumeric() == unicodedb_5_2_0.isnumeric(code)
  52. assert char.isspace() == unicodedb_5_2_0.isspace(code), hex(code)
  53. assert char.istitle() == (unicodedb_5_2_0.isupper(code) or unicodedb_5_2_0.istitle(code)), code
  54. assert char.isupper() == unicodedb_5_2_0.isupper(code)
  55. assert char.lower() == unichr(unicodedb_5_2_0.tolower(code))
  56. assert char.upper() == unichr(unicodedb_5_2_0.toupper(code))
  57. assert char.title() == unichr(unicodedb_5_2_0.totitle(code)), hex(code)
  58. def test_hangul_difference_520(self):
  59. assert unicodedb_5_2_0.name(40874) == 'CJK UNIFIED IDEOGRAPH-9FAA'
  60. def test_differences(self):
  61. assert unicodedb_5_2_0.name(9187) == 'BENZENE RING WITH CIRCLE'
  62. assert unicodedb_5_2_0.lookup('BENZENE RING WITH CIRCLE') == 9187
  63. py.test.raises(KeyError, unicodedb_3_2_0.lookup, 'BENZENE RING WITH CIRCLE')
  64. py.test.raises(KeyError, unicodedb_3_2_0.name, 9187)