PageRenderTime 63ms CodeModel.GetById 18ms RepoModel.GetById 0ms app.codeStats 0ms

/src/whoosh/support/charset.py

https://bitbucket.org/rayleyva/whoosh
Python | 1379 lines | 1353 code | 9 blank | 17 comment | 2 complexity | 61e8287f1ddeb0640c926f21d2c13261 MD5 | raw file
Possible License(s): Apache-2.0

Large files files are truncated, but you can click here to view the full file

  1. # coding=utf-8
  2. """This module contains tools for working with Sphinx charset table files. These files
  3. are useful for doing case and accent folding.
  4. See :class:`whoosh.analysis.CharsetTokenizer` and :class:`whoosh.analysis.CharsetFilter`.
  5. """
  6. from collections import defaultdict
  7. import re
  8. from whoosh.compat import izip, u, iteritems, unichr, xrange
  9. # This is a straightforward accent-folding charset taken from Carlos Bueno's
  10. # article "Accent Folding for Auto-Complete", for use with CharsetFilter.
  11. #
  12. # http://www.alistapart.com/articles/accent-folding-for-auto-complete/
  13. #
  14. # See the article for information and caveats. The code is lifted directly
  15. # from here:
  16. #
  17. # http://github.com/aristus/accent-folding/blob/master/accent_fold.py
  18. accent_map = {
  19. u('H'): u('h'), # H -> h
  20. u('I'): u('i'), # I -> i
  21. u('J'): u('j'), # J -> j
  22. u('N'): u('n'), # N -> n
  23. u('P'): u('p'), # P -> p
  24. u('S'): u('s'), # S -> s
  25. u('T'): u('t'), # T -> t
  26. u('W'): u('w'), # W -> w
  27. u('Y'): u('y'), # Y -> y
  28. u('i'): u('i'), # i -> i
  29. u('n'): u('n'), # n -> n
  30. u('p'): u('p'), # p -> p
  31. u('s'): u('s'), # s -> s
  32. u('\xc0'): u('a'), # À -> a
  33. u('\xc1'): u('a'), # Á -> a
  34. u('\xc2'): u('a'), # Â -> a
  35. u('\xc3'): u('a'), # Ã -> a
  36. u('\xc4'): u('a'), # Ä -> a
  37. u('\xc5'): u('a'), # Å -> a
  38. u('\xc7'): u('c'), # Ç -> c
  39. u('\xc8'): u('e'), # È -> e
  40. u('\xc9'): u('e'), # É -> e
  41. u('\xca'): u('e'), # Ê -> e
  42. u('\xcb'): u('e'), # Ë -> e
  43. u('\xcc'): u('i'), # Ì -> i
  44. u('\xcd'): u('i'), # Í -> i
  45. u('\xce'): u('i'), # Î -> i
  46. u('\xcf'): u('i'), # Ï -> i
  47. u('\xd1'): u('n'), # Ñ -> n
  48. u('\xd2'): u('o'), # Ò -> o
  49. u('\xd3'): u('o'), # Ó -> o
  50. u('\xd4'): u('o'), # Ô -> o
  51. u('\xd5'): u('o'), # Õ -> o
  52. u('\xd6'): u('o'), # Ö -> o
  53. u('\xd8'): u('o'), # Ø -> o
  54. u('\xd9'): u('u'), # Ù -> u
  55. u('\xda'): u('u'), # Ú -> u
  56. u('\xdb'): u('u'), # Û -> u
  57. u('\xdc'): u('u'), # Ü -> u
  58. u('\xdd'): u('y'), # Ý -> y
  59. u('\xde'): u('t'), # Þ -> t
  60. u('\xdf'): u('s'), # ß -> s
  61. u('\xe0'): u('a'), # à -> a
  62. u('\xe1'): u('a'), # á -> a
  63. u('\xe2'): u('a'), # â -> a
  64. u('\xe3'): u('a'), # ã -> a
  65. u('\xe4'): u('a'), # ä -> a
  66. u('\xe5'): u('a'), # å -> a
  67. u('\xe7'): u('c'), # ç -> c
  68. u('\xe8'): u('e'), # è -> e
  69. u('\xe9'): u('e'), # é -> e
  70. u('\xea'): u('e'), # ê -> e
  71. u('\xeb'): u('e'), # ë -> e
  72. u('\xec'): u('i'), # ì -> i
  73. u('\xed'): u('i'), # í -> i
  74. u('\xee'): u('i'), # î -> i
  75. u('\xef'): u('i'), # ï -> i
  76. u('\xf0'): u('d'), # ð -> d
  77. u('\xf1'): u('n'), # ñ -> n
  78. u('\xf2'): u('o'), # ò -> o
  79. u('\xf3'): u('o'), # ó -> o
  80. u('\xf4'): u('o'), # ô -> o
  81. u('\xf5'): u('o'), # õ -> o
  82. u('\xf6'): u('o'), # ö -> o
  83. u('\xf8'): u('o'), # ø -> o
  84. u('\xf9'): u('u'), # ù -> u
  85. u('\xfa'): u('u'), # ú -> u
  86. u('\xfb'): u('u'), # û -> u
  87. u('\xfc'): u('u'), # ü -> u
  88. u('\xfd'): u('y'), # ý -> y
  89. u('\xfe'): u('t'), # þ -> t
  90. u('\xff'): u('y'), # ÿ -> y
  91. u('\u0100'): u('a'), # Ā -> a
  92. u('\u0101'): u('a'), # ā -> a
  93. u('\u0102'): u('a'), # Ă -> a
  94. u('\u0103'): u('a'), # ă -> a
  95. u('\u0104'): u('a'), # Ą -> a
  96. u('\u0105'): u('a'), # ą -> a
  97. u('\u0106'): u('c'), # Ć -> c
  98. u('\u0107'): u('c'), # ć -> c
  99. u('\u0108'): u('c'), # Ĉ -> c
  100. u('\u0109'): u('c'), # ĉ -> c
  101. u('\u010a'): u('c'), # Ċ -> c
  102. u('\u010b'): u('c'), # ċ -> c
  103. u('\u010c'): u('c'), # Č -> c
  104. u('\u010d'): u('c'), # č -> c
  105. u('\u010e'): u('d'), # Ď -> d
  106. u('\u010f'): u('d'), # ď -> d
  107. u('\u0110'): u('d'), # Đ -> d
  108. u('\u0111'): u('d'), # đ -> d
  109. u('\u0112'): u('e'), # Ē -> e
  110. u('\u0113'): u('e'), # ē -> e
  111. u('\u0114'): u('e'), # Ĕ -> e
  112. u('\u0115'): u('e'), # ĕ -> e
  113. u('\u0116'): u('e'), # Ė -> e
  114. u('\u0117'): u('e'), # ė -> e
  115. u('\u0118'): u('e'), # Ę -> e
  116. u('\u0119'): u('e'), # ę -> e
  117. u('\u011a'): u('e'), # Ě -> e
  118. u('\u011b'): u('e'), # ě -> e
  119. u('\u011c'): u('g'), # Ĝ -> g
  120. u('\u011d'): u('g'), # ĝ -> g
  121. u('\u011e'): u('g'), # Ğ -> g
  122. u('\u011f'): u('g'), # ğ -> g
  123. u('\u0120'): u('g'), # Ġ -> g
  124. u('\u0121'): u('g'), # ġ -> g
  125. u('\u0122'): u('g'), # Ģ -> g
  126. u('\u0123'): u('g'), # ģ -> g
  127. u('\u0124'): u('h'), # Ĥ -> h
  128. u('\u0125'): u('h'), # ĥ -> h
  129. u('\u0126'): u('h'), # Ħ -> h
  130. u('\u0127'): u('h'), # ħ -> h
  131. u('\u0128'): u('i'), # Ĩ -> i
  132. u('\u0129'): u('i'), # ĩ -> i
  133. u('\u012a'): u('i'), # Ī -> i
  134. u('\u012b'): u('i'), # ī -> i
  135. u('\u012c'): u('i'), # Ĭ -> i
  136. u('\u012d'): u('i'), # ĭ -> i
  137. u('\u012e'): u('i'), # Į -> i
  138. u('\u012f'): u('i'), # į -> i
  139. u('\u0130'): u('i'), # İ -> i
  140. u('\u0131'): u('i'), # ı -> i
  141. u('\u0134'): u('j'), # Ĵ -> j
  142. u('\u0135'): u('j'), # ĵ -> j
  143. u('\u0136'): u('k'), # Ķ -> k
  144. u('\u0137'): u('k'), # ķ -> k
  145. u('\u0139'): u('a'), # Ĺ -> a
  146. u('\u013a'): u('l'), # ĺ -> l
  147. u('\u013b'): u('l'), # Ļ -> l
  148. u('\u013c'): u('l'), # ļ -> l
  149. u('\u013d'): u('l'), # Ľ -> l
  150. u('\u013e'): u('l'), # ľ -> l
  151. u('\u013f'): u('l'), # Ŀ -> l
  152. u('\u0140'): u('l'), # ŀ -> l
  153. u('\u0141'): u('l'), # Ł -> l
  154. u('\u0142'): u('l'), # ł -> l
  155. u('\u0143'): u('n'), # Ń -> n
  156. u('\u0144'): u('n'), # ń -> n
  157. u('\u0145'): u('n'), # Ņ -> n
  158. u('\u0146'): u('n'), # ņ -> n
  159. u('\u0147'): u('n'), # Ň -> n
  160. u('\u0148'): u('n'), # ň -> n
  161. u('\u014c'): u('o'), # Ō -> o
  162. u('\u014d'): u('o'), # ō -> o
  163. u('\u014e'): u('o'), # Ŏ -> o
  164. u('\u014f'): u('o'), # ŏ -> o
  165. u('\u0150'): u('o'), # Ő -> o
  166. u('\u0151'): u('o'), # ő -> o
  167. u('\u0154'): u('r'), # Ŕ -> r
  168. u('\u0155'): u('r'), # ŕ -> r
  169. u('\u0156'): u('r'), # Ŗ -> r
  170. u('\u0157'): u('r'), # ŗ -> r
  171. u('\u0158'): u('r'), # Ř -> r
  172. u('\u0159'): u('r'), # ř -> r
  173. u('\u015a'): u('s'), # Ś -> s
  174. u('\u015b'): u('s'), # ś -> s
  175. u('\u015c'): u('s'), # Ŝ -> s
  176. u('\u015d'): u('s'), # ŝ -> s
  177. u('\u015e'): u('s'), # Ş -> s
  178. u('\u015f'): u('s'), # ş -> s
  179. u('\u0160'): u('s'), # Š -> s
  180. u('\u0161'): u('s'), # š -> s
  181. u('\u0162'): u('t'), # Ţ -> t
  182. u('\u0163'): u('t'), # ţ -> t
  183. u('\u0164'): u('t'), # Ť -> t
  184. u('\u0165'): u('t'), # ť -> t
  185. u('\u0166'): u('t'), # Ŧ -> t
  186. u('\u0167'): u('t'), # ŧ -> t
  187. u('\u0168'): u('u'), # Ũ -> u
  188. u('\u0169'): u('u'), # ũ -> u
  189. u('\u016a'): u('u'), # Ū -> u
  190. u('\u016b'): u('u'), # ū -> u
  191. u('\u016c'): u('u'), # Ŭ -> u
  192. u('\u016d'): u('u'), # ŭ -> u
  193. u('\u016e'): u('u'), # Ů -> u
  194. u('\u016f'): u('u'), # ů -> u
  195. u('\u0170'): u('u'), # Ű -> u
  196. u('\u0171'): u('u'), # ű -> u
  197. u('\u0172'): u('u'), # Ų -> u
  198. u('\u0173'): u('u'), # ų -> u
  199. u('\u0174'): u('w'), # Ŵ -> w
  200. u('\u0175'): u('w'), # ŵ -> w
  201. u('\u0176'): u('y'), # Ŷ -> y
  202. u('\u0177'): u('y'), # ŷ -> y
  203. u('\u0178'): u('y'), # Ÿ -> y
  204. u('\u0179'): u('z'), # Ź -> z
  205. u('\u017a'): u('z'), # ź -> z
  206. u('\u017b'): u('z'), # Ż -> z
  207. u('\u017c'): u('z'), # ż -> z
  208. u('\u017d'): u('z'), # Ž -> z
  209. u('\u017e'): u('z'), # ž -> z
  210. u('\u0180'): u('b'), # ƀ -> b
  211. u('\u0181'): u('b'), # Ɓ -> b
  212. u('\u0182'): u('b'), # Ƃ -> b
  213. u('\u0183'): u('b'), # ƃ -> b
  214. u('\u0187'): u('c'), # Ƈ -> c
  215. u('\u0188'): u('c'), # ƈ -> c
  216. u('\u0189'): u('d'), # Ɖ -> d
  217. u('\u018a'): u('d'), # Ɗ -> d
  218. u('\u018b'): u('d'), # Ƌ -> d
  219. u('\u018c'): u('d'), # ƌ -> d
  220. u('\u018e'): u('e'), # Ǝ -> e
  221. u('\u018f'): u('e'), # Ə -> e
  222. u('\u0191'): u('f'), # Ƒ -> f
  223. u('\u0192'): u('f'), # ƒ -> f
  224. u('\u0193'): u('g'), # Ɠ -> g
  225. u('\u0197'): u('i'), # Ɨ -> i
  226. u('\u0198'): u('k'), # Ƙ -> k
  227. u('\u0199'): u('k'), # ƙ -> k
  228. u('\u019a'): u('l'), # ƚ -> l
  229. u('\u019d'): u('n'), # Ɲ -> n
  230. u('\u019e'): u('n'), # ƞ -> n
  231. u('\u019f'): u('o'), # Ɵ -> o
  232. u('\u01a0'): u('o'), # Ơ -> o
  233. u('\u01a1'): u('o'), # ơ -> o
  234. u('\u01a4'): u('p'), # Ƥ -> p
  235. u('\u01a5'): u('p'), # ƥ -> p
  236. u('\u01ab'): u('t'), # ƫ -> t
  237. u('\u01ac'): u('t'), # Ƭ -> t
  238. u('\u01ad'): u('t'), # ƭ -> t
  239. u('\u01ae'): u('t'), # Ʈ -> t
  240. u('\u01af'): u('u'), # Ư -> u
  241. u('\u01b0'): u('u'), # ư -> u
  242. u('\u01b2'): u('v'), # Ʋ -> v
  243. u('\u01b3'): u('y'), # Ƴ -> y
  244. u('\u01b4'): u('y'), # ƴ -> y
  245. u('\u01b5'): u('z'), # Ƶ -> z
  246. u('\u01b6'): u('z'), # ƶ -> z
  247. u('\u01ba'): u('z'), # ƺ -> z
  248. u('\u01cd'): u('a'), # Ǎ -> a
  249. u('\u01ce'): u('a'), # ǎ -> a
  250. u('\u01cf'): u('i'), # Ǐ -> i
  251. u('\u01d0'): u('i'), # ǐ -> i
  252. u('\u01d1'): u('o'), # Ǒ -> o
  253. u('\u01d2'): u('o'), # ǒ -> o
  254. u('\u01d3'): u('u'), # Ǔ -> u
  255. u('\u01d4'): u('u'), # ǔ -> u
  256. u('\u01d5'): u('u'), # Ǖ -> u
  257. u('\u01d6'): u('u'), # ǖ -> u
  258. u('\u01d7'): u('u'), # Ǘ -> u
  259. u('\u01d8'): u('u'), # ǘ -> u
  260. u('\u01d9'): u('u'), # Ǚ -> u
  261. u('\u01da'): u('u'), # ǚ -> u
  262. u('\u01db'): u('u'), # Ǜ -> u
  263. u('\u01dc'): u('u'), # ǜ -> u
  264. u('\u01dd'): u('e'), # ǝ -> e
  265. u('\u01de'): u('a'), # Ǟ -> a
  266. u('\u01df'): u('a'), # ǟ -> a
  267. u('\u01e0'): u('a'), # Ǡ -> a
  268. u('\u01e1'): u('a'), # ǡ -> a
  269. u('\u01e2'): u('a'), # Ǣ -> a
  270. u('\u01e3'): u('a'), # ǣ -> a
  271. u('\u01e4'): u('g'), # Ǥ -> g
  272. u('\u01e5'): u('g'), # ǥ -> g
  273. u('\u01e6'): u('g'), # Ǧ -> g
  274. u('\u01e7'): u('g'), # ǧ -> g
  275. u('\u01e8'): u('k'), # Ǩ -> k
  276. u('\u01e9'): u('k'), # ǩ -> k
  277. u('\u01ea'): u('o'), # Ǫ -> o
  278. u('\u01eb'): u('o'), # ǫ -> o
  279. u('\u01ec'): u('o'), # Ǭ -> o
  280. u('\u01ed'): u('o'), # ǭ -> o
  281. u('\u01ee'): u('z'), # Ǯ -> z
  282. u('\u01ef'): u('z'), # ǯ -> z
  283. u('\u01f0'): u('j'), # ǰ -> j
  284. u('\u01f4'): u('g'), # Ǵ -> g
  285. u('\u01f5'): u('g'), # ǵ -> g
  286. u('\u01f8'): u('n'), # Ǹ -> n
  287. u('\u01f9'): u('n'), # ǹ -> n
  288. u('\u01fa'): u('a'), # Ǻ -> a
  289. u('\u01fb'): u('a'), # ǻ -> a
  290. u('\u01fc'): u('a'), # Ǽ -> a
  291. u('\u01fd'): u('a'), # ǽ -> a
  292. u('\u01fe'): u('o'), # Ǿ -> o
  293. u('\u01ff'): u('o'), # ǿ -> o
  294. u('\u0200'): u('a'), # Ȁ -> a
  295. u('\u0201'): u('a'), # ȁ -> a
  296. u('\u0202'): u('a'), # Ȃ -> a
  297. u('\u0203'): u('a'), # ȃ -> a
  298. u('\u0204'): u('e'), # Ȅ -> e
  299. u('\u0205'): u('e'), # ȅ -> e
  300. u('\u0206'): u('e'), # Ȇ -> e
  301. u('\u0207'): u('e'), # ȇ -> e
  302. u('\u0208'): u('i'), # Ȉ -> i
  303. u('\u0209'): u('i'), # ȉ -> i
  304. u('\u020a'): u('i'), # Ȋ -> i
  305. u('\u020b'): u('i'), # ȋ -> i
  306. u('\u020c'): u('o'), # Ȍ -> o
  307. u('\u020d'): u('o'), # ȍ -> o
  308. u('\u020e'): u('o'), # Ȏ -> o
  309. u('\u020f'): u('o'), # ȏ -> o
  310. u('\u0210'): u('r'), # Ȑ -> r
  311. u('\u0211'): u('r'), # ȑ -> r
  312. u('\u0212'): u('r'), # Ȓ -> r
  313. u('\u0213'): u('r'), # ȓ -> r
  314. u('\u0214'): u('u'), # Ȕ -> u
  315. u('\u0215'): u('u'), # ȕ -> u
  316. u('\u0216'): u('u'), # Ȗ -> u
  317. u('\u0217'): u('u'), # ȗ -> u
  318. u('\u0218'): u('s'), # Ș -> s
  319. u('\u0219'): u('s'), # ș -> s
  320. u('\u021a'): u('t'), # Ț -> t
  321. u('\u021b'): u('t'), # ț -> t
  322. u('\u021e'): u('h'), # Ȟ -> h
  323. u('\u021f'): u('h'), # ȟ -> h
  324. u('\u0220'): u('n'), # Ƞ -> n
  325. u('\u0221'): u('d'), # ȡ -> d
  326. u('\u0224'): u('z'), # Ȥ -> z
  327. u('\u0225'): u('z'), # ȥ -> z
  328. u('\u0226'): u('a'), # Ȧ -> a
  329. u('\u0227'): u('a'), # ȧ -> a
  330. u('\u0228'): u('e'), # Ȩ -> e
  331. u('\u0229'): u('e'), # ȩ -> e
  332. u('\u022a'): u('o'), # Ȫ -> o
  333. u('\u022b'): u('o'), # ȫ -> o
  334. u('\u022c'): u('o'), # Ȭ -> o
  335. u('\u022d'): u('o'), # ȭ -> o
  336. u('\u022e'): u('o'), # Ȯ -> o
  337. u('\u022f'): u('o'), # ȯ -> o
  338. u('\u0230'): u('o'), # Ȱ -> o
  339. u('\u0231'): u('o'), # ȱ -> o
  340. u('\u0232'): u('y'), # Ȳ -> y
  341. u('\u0233'): u('y'), # ȳ -> y
  342. u('\u0234'): u('l'), # ȴ -> l
  343. u('\u0235'): u('n'), # ȵ -> n
  344. u('\u0236'): u('t'), # ȶ -> t
  345. u('\u0237'): u('j'), # ȷ -> j
  346. u('\u023a'): u('a'), # Ⱥ -> a
  347. u('\u023b'): u('c'), # Ȼ -> c
  348. u('\u023c'): u('c'), # ȼ -> c
  349. u('\u023d'): u('l'), # Ƚ -> l
  350. u('\u023e'): u('t'), # Ⱦ -> t
  351. u('\u0243'): u('b'), # Ƀ -> b
  352. u('\u0244'): u('u'), # Ʉ -> u
  353. u('\u0246'): u('e'), # Ɇ -> e
  354. u('\u0247'): u('e'), # ɇ -> e
  355. u('\u0248'): u('j'), # Ɉ -> j
  356. u('\u0249'): u('j'), # ɉ -> j
  357. u('\u024a'): u('q'), # Ɋ -> q
  358. u('\u024b'): u('q'), # ɋ -> q
  359. u('\u024c'): u('r'), # Ɍ -> r
  360. u('\u024d'): u('r'), # ɍ -> r
  361. u('\u024e'): u('y'), # Ɏ -> y
  362. u('\u024f'): u('y'), # ɏ -> y
  363. u('\u0253'): u('b'), # ɓ -> b
  364. u('\u0255'): u('c'), # ɕ -> c
  365. u('\u0256'): u('d'), # ɖ -> d
  366. u('\u0257'): u('d'), # ɗ -> d
  367. u('\u025a'): u('e'), # ɚ -> e
  368. u('\u025d'): u('e'), # ɝ -> e
  369. u('\u025f'): u('j'), # ɟ -> j
  370. u('\u0260'): u('g'), # ɠ -> g
  371. u('\u0268'): u('i'), # ɨ -> i
  372. u('\u026b'): u('l'), # ɫ -> l
  373. u('\u026c'): u('l'), # ɬ -> l
  374. u('\u026d'): u('l'), # ɭ -> l
  375. u('\u0271'): u('m'), # ɱ -> m
  376. u('\u0272'): u('n'), # ɲ -> n
  377. u('\u0273'): u('n'), # ɳ -> n
  378. u('\u0275'): u('o'), # ɵ -> o
  379. u('\u027c'): u('r'), # ɼ -> r
  380. u('\u027d'): u('r'), # ɽ -> r
  381. u('\u027e'): u('r'), # ɾ -> r
  382. u('\u0282'): u('s'), # ʂ -> s
  383. u('\u0284'): u('j'), # ʄ -> j
  384. u('\u0288'): u('t'), # ʈ -> t
  385. u('\u0289'): u('u'), # ʉ -> u
  386. u('\u028b'): u('v'), # ʋ -> v
  387. u('\u028f'): u('y'), # ʏ -> y
  388. u('\u0290'): u('z'), # ʐ -> z
  389. u('\u0291'): u('z'), # ʑ -> z
  390. u('\u029d'): u('j'), # ʝ -> j
  391. u('\u02a0'): u('q'), # ʠ -> q
  392. u('\u0303'): u('p'), # ̃ -> p
  393. u('\u0308'): u('t'), # ̈ -> t
  394. u('\u030a'): u('y'), # ̊ -> y
  395. u('\u030c'): u('j'), # ̌ -> j
  396. u('\u0323'): u('l'), # ̣ -> l
  397. u('\u0329'): u('s'), # ̩ -> s
  398. u('\u0331'): u('h'), # ̱ -> h
  399. u('\u1d6c'): u('b'), # -> b
  400. u('\u1d6d'): u('d'), # -> d
  401. u('\u1d6e'): u('f'), # -> f
  402. u('\u1d72'): u('r'), # -> r
  403. u('\u1d73'): u('r'), # -> r
  404. u('\u1d75'): u('t'), # -> t
  405. u('\u1e00'): u('a'), # -> a
  406. u('\u1e01'): u('a'), # -> a
  407. u('\u1e02'): u('b'), # -> b
  408. u('\u1e03'): u('b'), # -> b
  409. u('\u1e04'): u('b'), # -> b
  410. u('\u1e05'): u('b'), # -> b
  411. u('\u1e06'): u('b'), # -> b
  412. u('\u1e07'): u('b'), # -> b
  413. u('\u1e08'): u('c'), # -> c
  414. u('\u1e09'): u('c'), # -> c
  415. u('\u1e0a'): u('d'), # -> d
  416. u('\u1e0b'): u('d'), # -> d
  417. u('\u1e0c'): u('d'), # -> d
  418. u('\u1e0d'): u('d'), # -> d
  419. u('\u1e0e'): u('d'), # -> d
  420. u('\u1e0f'): u('d'), # -> d
  421. u('\u1e10'): u('d'), # -> d
  422. u('\u1e11'): u('d'), # -> d
  423. u('\u1e12'): u('d'), # -> d
  424. u('\u1e13'): u('d'), # -> d
  425. u('\u1e14'): u('e'), # -> e
  426. u('\u1e15'): u('e'), # -> e
  427. u('\u1e16'): u('e'), # -> e
  428. u('\u1e17'): u('e'), # -> e
  429. u('\u1e18'): u('e'), # -> e
  430. u('\u1e19'): u('e'), # -> e
  431. u('\u1e1a'): u('e'), # -> e
  432. u('\u1e1b'): u('e'), # -> e
  433. u('\u1e1c'): u('e'), # -> e
  434. u('\u1e1d'): u('e'), # -> e
  435. u('\u1e1e'): u('f'), # -> f
  436. u('\u1e1f'): u('f'), # -> f
  437. u('\u1e20'): u('g'), # -> g
  438. u('\u1e21'): u('g'), # -> g
  439. u('\u1e22'): u('h'), # -> h
  440. u('\u1e23'): u('h'), # -> h
  441. u('\u1e24'): u('h'), # -> h
  442. u('\u1e25'): u('h'), # -> h
  443. u('\u1e26'): u('h'), # -> h
  444. u('\u1e27'): u('h'), # -> h
  445. u('\u1e28'): u('h'), # -> h
  446. u('\u1e29'): u('h'), # -> h
  447. u('\u1e2a'): u('h'), # -> h
  448. u('\u1e2b'): u('h'), # -> h
  449. u('\u1e2c'): u('i'), # -> i
  450. u('\u1e2d'): u('i'), # -> i
  451. u('\u1e2e'): u('i'), # -> i
  452. u('\u1e2f'): u('i'), # -> i
  453. u('\u1e30'): u('k'), # -> k
  454. u('\u1e31'): u('k'), # -> k
  455. u('\u1e32'): u('k'), # -> k
  456. u('\u1e33'): u('k'), # -> k
  457. u('\u1e34'): u('k'), # -> k
  458. u('\u1e35'): u('k'), # -> k
  459. u('\u1e36'): u('l'), # -> l
  460. u('\u1e37'): u('l'), # -> l
  461. u('\u1e38'): u('l'), # -> l
  462. u('\u1e39'): u('l'), # -> l
  463. u('\u1e3a'): u('l'), # -> l
  464. u('\u1e3b'): u('l'), # -> l
  465. u('\u1e3c'): u('l'), # -> l
  466. u('\u1e3d'): u('l'), # -> l
  467. u('\u1e3e'): u('m'), # -> m
  468. u('\u1e3f'): u('m'), # ḿ -> m
  469. u('\u1e40'): u('m'), # -> m
  470. u('\u1e41'): u('m'), # -> m
  471. u('\u1e42'): u('m'), # -> m
  472. u('\u1e43'): u('m'), # -> m
  473. u('\u1e44'): u('n'), # -> n
  474. u('\u1e45'): u('n'), # -> n
  475. u('\u1e46'): u('n'), # -> n
  476. u('\u1e47'): u('n'), # -> n
  477. u('\u1e48'): u('n'), # -> n
  478. u('\u1e49'): u('n'), # -> n
  479. u('\u1e4a'): u('n'), # -> n
  480. u('\u1e4b'): u('n'), # -> n
  481. u('\u1e4c'): u('o'), # -> o
  482. u('\u1e4d'): u('o'), # -> o
  483. u('\u1e4e'): u('o'), # -> o
  484. u('\u1e4f'): u('o'), # -> o
  485. u('\u1e50'): u('o'), # -> o
  486. u('\u1e51'): u('o'), # -> o
  487. u('\u1e52'): u('o'), # -> o
  488. u('\u1e53'): u('o'), # -> o
  489. u('\u1e54'): u('p'), # -> p
  490. u('\u1e55'): u('p'), # -> p
  491. u('\u1e56'): u('p'), # -> p
  492. u('\u1e57'): u('p'), # -> p
  493. u('\u1e58'): u('r'), # -> r
  494. u('\u1e59'): u('r'), # -> r
  495. u('\u1e5a'): u('r'), # -> r
  496. u('\u1e5b'): u('r'), # -> r
  497. u('\u1e5c'): u('r'), # -> r
  498. u('\u1e5d'): u('r'), # -> r
  499. u('\u1e5e'): u('r'), # -> r
  500. u('\u1e5f'): u('r'), # -> r
  501. u('\u1e60'): u('s'), # -> s
  502. u('\u1e61'): u('s'), # -> s
  503. u('\u1e62'): u('s'), # -> s
  504. u('\u1e63'): u('s'), # -> s
  505. u('\u1e64'): u('s'), # -> s
  506. u('\u1e65'): u('s'), # -> s
  507. u('\u1e66'): u('s'), # -> s
  508. u('\u1e67'): u('s'), # -> s
  509. u('\u1e68'): u('s'), # -> s
  510. u('\u1e69'): u('s'), # -> s
  511. u('\u1e6a'): u('t'), # -> t
  512. u('\u1e6b'): u('t'), # -> t
  513. u('\u1e6c'): u('t'), # -> t
  514. u('\u1e6d'): u('t'), # -> t
  515. u('\u1e6e'): u('t'), # -> t
  516. u('\u1e6f'): u('t'), # -> t
  517. u('\u1e70'): u('t'), # -> t
  518. u('\u1e71'): u('t'), # -> t
  519. u('\u1e72'): u('u'), # -> u
  520. u('\u1e73'): u('u'), # -> u
  521. u('\u1e74'): u('u'), # -> u
  522. u('\u1e75'): u('u'), # -> u
  523. u('\u1e76'): u('u'), # -> u
  524. u('\u1e77'): u('u'), # -> u
  525. u('\u1e78'): u('u'), # -> u
  526. u('\u1e79'): u('u'), # -> u
  527. u('\u1e7a'): u('u'), # -> u
  528. u('\u1e7b'): u('u'), # -> u
  529. u('\u1e7c'): u('v'), # -> v
  530. u('\u1e7d'): u('v'), # -> v
  531. u('\u1e7e'): u('v'), # -> v
  532. u('\u1e7f'): u('v'), # ṿ -> v
  533. u('\u1e80'): u('w'), # -> w
  534. u('\u1e81'): u('w'), # -> w
  535. u('\u1e82'): u('w'), # -> w
  536. u('\u1e83'): u('w'), # -> w
  537. u('\u1e84'): u('w'), # -> w
  538. u('\u1e85'): u('w'), # -> w
  539. u('\u1e86'): u('w'), # -> w
  540. u('\u1e87'): u('w'), # -> w
  541. u('\u1e88'): u('w'), # -> w
  542. u('\u1e89'): u('w'), # -> w
  543. u('\u1e8a'): u('x'), # -> x
  544. u('\u1e8b'): u('x'), # -> x
  545. u('\u1e8c'): u('x'), # -> x
  546. u('\u1e8d'): u('x'), # -> x
  547. u('\u1e8e'): u('y'), # -> y
  548. u('\u1e8f'): u('y'), # -> y
  549. u('\u1e90'): u('z'), # -> z
  550. u('\u1e91'): u('z'), # -> z
  551. u('\u1e92'): u('z'), # -> z
  552. u('\u1e93'): u('z'), # -> z
  553. u('\u1e94'): u('z'), # -> z
  554. u('\u1e95'): u('z'), # -> z
  555. u('\u1e96'): u('h'), # -> h
  556. u('\u1e97'): u('t'), # -> t
  557. u('\u1e98'): u('w'), # -> w
  558. u('\u1e99'): u('y'), # -> y
  559. u('\u1e9a'): u('a'), # -> a
  560. u('\u1e9b'): u('s'), # -> s
  561. u('\u1ea0'): u('a'), # -> a
  562. u('\u1ea1'): u('a'), # -> a
  563. u('\u1ea2'): u('a'), # -> a
  564. u('\u1ea3'): u('a'), # -> a
  565. u('\u1ea4'): u('a'), # -> a
  566. u('\u1ea5'): u('a'), # -> a
  567. u('\u1ea6'): u('a'), # -> a
  568. u('\u1ea7'): u('a'), # -> a
  569. u('\u1ea8'): u('a'), # -> a
  570. u('\u1ea9'): u('a'), # -> a
  571. u('\u1eaa'): u('a'), # -> a
  572. u('\u1eab'): u('a'), # -> a
  573. u('\u1eac'): u('a'), # -> a
  574. u('\u1ead'): u('a'), # -> a
  575. u('\u1eae'): u('a'), # -> a
  576. u('\u1eaf'): u('a'), # -> a
  577. u('\u1eb0'): u('a'), # -> a
  578. u('\u1eb1'): u('a'), # -> a
  579. u('\u1eb2'): u('a'), # -> a
  580. u('\u1eb3'): u('a'), # -> a
  581. u('\u1eb4'): u('a'), # -> a
  582. u('\u1eb5'): u('a'), # -> a
  583. u('\u1eb6'): u('a'), # -> a
  584. u('\u1eb7'): u('a'), # -> a
  585. u('\u1eb8'): u('e'), # -> e
  586. u('\u1eb9'): u('e'), # -> e
  587. u('\u1eba'): u('e'), # -> e
  588. u('\u1ebb'): u('e'), # -> e
  589. u('\u1ebc'): u('e'), # -> e
  590. u('\u1ebd'): u('e'), # -> e
  591. u('\u1ebe'): u('e'), # -> e
  592. u('\u1ebf'): u('e'), # ế -> e
  593. u('\u1ec0'): u('e'), # -> e
  594. u('\u1ec1'): u('e'), # -> e
  595. u('\u1ec2'): u('e'), # -> e
  596. u('\u1ec3'): u('e'), # -> e
  597. u('\u1ec4'): u('e'), # -> e
  598. u('\u1ec5'): u('e'), # -> e
  599. u('\u1ec6'): u('e'), # -> e
  600. u('\u1ec7'): u('e'), # -> e
  601. u('\u1ec8'): u('i'), # -> i
  602. u('\u1ec9'): u('i'), # -> i
  603. u('\u1eca'): u('i'), # -> i
  604. u('\u1ecb'): u('i'), # -> i
  605. u('\u1ecc'): u('o'), # -> o
  606. u('\u1ecd'): u('o'), # -> o
  607. u('\u1ece'): u('o'), # -> o
  608. u('\u1ecf'): u('o'), # -> o
  609. u('\u1ed0'): u('o'), # -> o
  610. u('\u1ed1'): u('o'), # -> o
  611. u('\u1ed2'): u('o'), # -> o
  612. u('\u1ed3'): u('o'), # -> o
  613. u('\u1ed4'): u('o'), # -> o
  614. u('\u1ed5'): u('o'), # -> o
  615. u('\u1ed6'): u('o'), # -> o
  616. u('\u1ed7'): u('o'), # -> o
  617. u('\u1ed8'): u('o'), # -> o
  618. u('\u1ed9'): u('o'), # -> o
  619. u('\u1eda'): u('o'), # -> o
  620. u('\u1edb'): u('o'), # -> o
  621. u('\u1edc'): u('o'), # -> o
  622. u('\u1edd'): u('o'), # -> o
  623. u('\u1ede'): u('o'), # -> o
  624. u('\u1edf'): u('o'), # -> o
  625. u('\u1ee0'): u('o'), # -> o
  626. u('\u1ee1'): u('o'), # -> o
  627. u('\u1ee2'): u('o'), # -> o
  628. u('\u1ee3'): u('o'), # -> o
  629. u('\u1ee4'): u('u'), # -> u
  630. u('\u1ee5'): u('u'), # -> u
  631. u('\u1ee6'): u('u'), # -> u
  632. u('\u1ee7'): u('u'), # -> u
  633. u('\u1ee8'): u('u'), # -> u
  634. u('\u1ee9'): u('u'), # -> u
  635. u('\u1eea'): u('u'), # -> u
  636. u('\u1eeb'): u('u'), # -> u
  637. u('\u1eec'): u('u'), # -> u
  638. u('\u1eed'): u('u'), # -> u
  639. u('\u1eee'): u('u'), # -> u
  640. u('\u1eef'): u('u'), # -> u
  641. u('\u1ef0'): u('u'), # -> u
  642. u('\u1ef1'): u('u'), # -> u
  643. u('\u1ef2'): u('y'), # -> y
  644. u('\u1ef3'): u('y'), # -> y
  645. u('\u1ef4'): u('y'), # -> y
  646. u('\u1ef5'): u('y'), # -> y
  647. u('\u1ef6'): u('y'), # -> y
  648. u('\u1ef7'): u('y'), # -> y
  649. u('\u1ef8'): u('y'), # -> y
  650. u('\u1ef9'): u('y'), # -> y
  651. u('\u2c60'): u('l'), # -> l
  652. u('\u2c61'): u('l'), # -> l
  653. u('\u2c62'): u('l'), # -> l
  654. u('\u2c63'): u('p'), # -> p
  655. u('\u2c64'): u('r'), # -> r
  656. u('\u2c65'): u('a'), # -> a
  657. u('\u2c66'): u('t'), # -> t
  658. u('\u2c67'): u('h'), # -> h
  659. u('\u2c68'): u('h'), # -> h
  660. u('\u2c69'): u('k'), # -> k
  661. u('\u2c6a'): u('k'), # -> k
  662. u('\u2c6b'): u('z'), # -> z
  663. u('\u2c6c'): u('z'), # -> z
  664. u('\uff10'): u('0'), # -> 0
  665. u('\uff11'): u('1'), # -> 1
  666. u('\uff12'): u('2'), # -> 2
  667. u('\uff13'): u('3'), # -> 3
  668. u('\uff14'): u('4'), # -> 4
  669. u('\uff15'): u('5'), # -> 5
  670. u('\uff16'): u('6'), # -> 6
  671. u('\uff17'): u('7'), # -> 7
  672. u('\uff18'): u('8'), # -> 8
  673. u('\uff19'): u('9'), # -> 9
  674. u('\uff21'): u('A'), # -> A
  675. u('\uff22'): u('B'), # -> B
  676. u('\uff23'): u('C'), # -> C
  677. u('\uff24'): u('D'), # -> D
  678. u('\uff25'): u('E'), # -> E
  679. u('\uff26'): u('F'), # -> F
  680. u('\uff27'): u('G'), # -> G
  681. u('\uff28'): u('H'), # -> H
  682. u('\uff29'): u('I'), # -> I
  683. u('\uff2a'): u('J'), # -> J
  684. u('\uff2b'): u('K'), # -> K
  685. u('\uff2c'): u('L'), # -> L
  686. u('\uff2d'): u('M'), # -> M
  687. u('\uff2e'): u('N'), # -> N
  688. u('\uff2f'): u('O'), # -> O
  689. u('\uff30'): u('P'), # -> P
  690. u('\uff31'): u('Q'), # -> Q
  691. u('\uff32'): u('R'), # -> R
  692. u('\uff33'): u('S'), # -> S
  693. u('\uff34'): u('T'), # -> T
  694. u('\uff35'): u('U'), # -> U
  695. u('\uff36'): u('V'), # -> V
  696. u('\uff37'): u('W'), # -> W
  697. u('\uff38'): u('X'), # -> X
  698. u('\uff39'): u('Y'), # -> Y
  699. u('\uff3a'): u('Z'), # -> Z
  700. u('\uff41'): u('a'), # -> a
  701. u('\uff42'): u('b'), # -> b
  702. u('\uff43'): u('c'), # -> c
  703. u('\uff44'): u('d'), # -> d
  704. u('\uff45'): u('e'), # -> e
  705. u('\uff46'): u('f'), # -> f
  706. u('\uff47'): u('g'), # -> g
  707. u('\uff48'): u('h'), # -> h
  708. u('\uff49'): u('i'), # -> i
  709. u('\uff4a'): u('j'), # -> j
  710. u('\uff4b'): u('k'), # -> k
  711. u('\uff4c'): u('l'), # -> l
  712. u('\uff4d'): u('m'), # -> m
  713. u('\uff4e'): u('n'), # -> n
  714. u('\uff4f'): u('o'), # -> o
  715. u('\uff50'): u('p'), # -> p
  716. u('\uff51'): u('q'), # -> q
  717. u('\uff52'): u('r'), # -> r
  718. u('\uff53'): u('s'), # -> s
  719. u('\uff54'): u('t'), # -> t
  720. u('\uff55'): u('u'), # -> u
  721. u('\uff56'): u('v'), # -> v
  722. u('\uff57'): u('w'), # -> w
  723. u('\uff58'): u('x'), # -> x
  724. u('\uff59'): u('y'), # -> y
  725. u('\uff5a'): u('z'), # -> z
  726. }
  727. # The unicode.translate() method actually requires a dictionary mapping
  728. # character *numbers* to characters, for some reason.
  729. accent_map = dict((ord(k), v) for k, v in iteritems(accent_map))
  730. # This Sphinx charset table taken from http://speeple.com/unicode-maps.txt
  731. default_charset = """
  732. ##################################################
  733. # Latin
  734. # A
  735. U+00C0->a, U+00C1->a, U+00C2->a, U+00C3->a, U+00C4->a, U+00C5->a, U+00E0->a, U+00E1->a, U+00E2->a, U+00E3->a, U+00E4->a, U+00E5->a,
  736. U+0100->a, U+0101->a, U+0102->a, U+0103->a, U+010300->a, U+0104->a, U+0105->a, U+01CD->a, U+01CE->a, U+01DE->a, U+01DF->a, U+01E0->a,
  737. U+01E1->a, U+01FA->a, U+01FB->a, U+0200->a, U+0201->a, U+0202->a, U+0203->a, U+0226->a, U+0227->a, U+023A->a, U+0250->a, U+04D0->a,
  738. U+04D1->a, U+1D2C->a, U+1D43->a, U+1D44->a, U+1D8F->a, U+1E00->a, U+1E01->a, U+1E9A->a, U+1EA0->a, U+1EA1->a, U+1EA2->a, U+1EA3->a,
  739. U+1EA4->a, U+1EA5->a, U+1EA6->a, U+1EA7->a, U+1EA8->a, U+1EA9->a, U+1EAA->a, U+1EAB->a, U+1EAC->a, U+1EAD->a, U+1EAE->a, U+1EAF->a,
  740. U+1EB0->a, U+1EB1->a, U+1EB2->a, U+1EB3->a, U+1EB4->a, U+1EB5->a, U+1EB6->a, U+1EB7->a, U+2090->a, U+2C65->a
  741. # B
  742. U+0180->b, U+0181->b, U+0182->b, U+0183->b, U+0243->b, U+0253->b, U+0299->b, U+16D2->b, U+1D03->b, U+1D2E->b, U+1D2F->b, U+1D47->b,
  743. U+1D6C->b, U+1D80->b, U+1E02->b, U+1E03->b, U+1E04->b, U+1E05->b, U+1E06->b, U+1E07->b
  744. # C
  745. U+00C7->c, U+00E7->c, U+0106->c, U+0107->c, U+0108->c, U+0109->c, U+010A->c, U+010B->c, U+010C->c, U+010D->c, U+0187->c, U+0188->c,
  746. U+023B->c, U+023C->c, U+0255->c, U+0297->c, U+1D9C->c, U+1D9D->c, U+1E08->c, U+1E09->c, U+212D->c, U+2184->c
  747. # D
  748. U+010E->d, U+010F->d, U+0110->d, U+0111->d, U+0189->d, U+018A->d, U+018B->d, U+018C->d, U+01C5->d, U+01F2->d, U+0221->d, U+0256->d,
  749. U+0257->d, U+1D05->d, U+1D30->d, U+1D48->d, U+1D6D->d, U+1D81->d, U+1D91->d, U+1E0A->d, U+1E0B->d, U+1E0C->d, U+1E0D->d, U+1E0E->d,
  750. U+1E0F->d, U+1E10->d, U+1E11->d, U+1E12->d, U+1E13->d
  751. # E
  752. U+00C8->e, U+00C9->e, U+00CA->e, U+00CB->e, U+00E8->e, U+00E9->e, U+00EA->e, U+00EB->e, U+0112->e, U+0113->e, U+0114->e, U+0115->e,
  753. U+0116->e, U+0117->e, U+0118->e, U+0119->e, U+011A->e, U+011B->e, U+018E->e, U+0190->e, U+01DD->e, U+0204->e, U+0205->e, U+0206->e,
  754. U+0207->e, U+0228->e, U+0229->e, U+0246->e, U+0247->e, U+0258->e, U+025B->e, U+025C->e, U+025D->e, U+025E->e, U+029A->e, U+1D07->e,
  755. U+1D08->e, U+1D31->e, U+1D32->e, U+1D49->e, U+1D4B->e, U+1D4C->e, U+1D92->e, U+1D93->e, U+1D94->e, U+1D9F->e, U+1E14->e, U+1E15->e,
  756. U+1E16->e, U+1E17->e, U+1E18->e, U+1E19->e, U+1E1A->e, U+1E1B->e, U+1E1C->e, U+1E1D->e, U+1EB8->e, U+1EB9->e, U+1EBA->e, U+1EBB->e,
  757. U+1EBC->e, U+1EBD->e, U+1EBE->e, U+1EBF->e, U+1EC0->e, U+1EC1->e, U+1EC2->e, U+1EC3->e, U+1EC4->e, U+1EC5->e, U+1EC6->e, U+1EC7->e,
  758. U+2091->e
  759. # F
  760. U+0191->f, U+0192->f, U+1D6E->f, U+1D82->f, U+1DA0->f, U+1E1E->f, U+1E1F->f
  761. # G
  762. U+011C->g, U+011D->g, U+011E->g, U+011F->g, U+0120->g, U+0121->g, U+0122->g, U+0123->g, U+0193->g, U+01E4->g, U+01E5->g, U+01E6->g,
  763. U+01E7->g, U+01F4->g, U+01F5->g, U+0260->g, U+0261->g, U+0262->g, U+029B->g, U+1D33->g, U+1D4D->g, U+1D77->g, U+1D79->g, U+1D83->g,
  764. U+1DA2->g, U+1E20->g, U+1E21->g
  765. # H
  766. U+0124->h, U+0125->h, U+0126->h, U+0127->h, U+021E->h, U+021F->h, U+0265->h, U+0266->h, U+029C->h, U+02AE->h, U+02AF->h, U+02B0->h,
  767. U+02B1->h, U+1D34->h, U+1DA3->h, U+1E22->h, U+1E23->h, U+1E24->h, U+1E25->h, U+1E26->h, U+1E27->h, U+1E28->h, U+1E29->h, U+1E2A->h,
  768. U+1E2B->h, U+1E96->h, U+210C->h, U+2C67->h, U+2C68->h, U+2C75->h, U+2C76->h
  769. # I
  770. U+00CC->i, U+00CD->i, U+00CE->i, U+00CF->i, U+00EC->i, U+00ED->i, U+00EE->i, U+00EF->i, U+010309->i, U+0128->i, U+0129->i, U+012A->i,
  771. U+012B->i, U+012C->i, U+012D->i, U+012E->i, U+012F->i, U+0130->i, U+0131->i, U+0197->i, U+01CF->i, U+01D0->i, U+0208->i, U+0209->i,
  772. U+020A->i, U+020B->i, U+0268->i, U+026A->i, U+040D->i, U+0418->i, U+0419->i, U+0438->i, U+0439->i, U+0456->i, U+1D09->i, U+1D35->i,
  773. U+1D4E->i, U+1D62->i, U+1D7B->i, U+1D96->i, U+1DA4->i, U+1DA6->i, U+1DA7->i, U+1E2C->i, U+1E2D->i, U+1E2E->i, U+1E2F->i, U+1EC8->i,
  774. U+1EC9->i, U+1ECA->i, U+1ECB->i, U+2071->i, U+2111->i
  775. # J
  776. U+0134->j, U+0135->j, U+01C8->j, U+01CB->j, U+01F0->j, U+0237->j, U+0248->j, U+0249->j, U+025F->j, U+0284->j, U+029D->j, U+02B2->j,
  777. U+1D0A->j, U+1D36->j, U+1DA1->j, U+1DA8->j
  778. # K
  779. U+0136->k, U+0137->k, U+0198->k, U+0199->k, U+01E8->k, U+01E9->k, U+029E->k, U+1D0B->k, U+1D37->k, U+1D4F->k, U+1D84->k, U+1E30->k,
  780. U+1E31->k, U+1E32->k, U+1E33->k, U+1E34->k, U+1E35->k, U+2C69->k, U+2C6A->k
  781. # L
  782. U+0139->l, U+013A->l, U+013B->l, U+013C->l, U+013D->l, U+013E->l, U+013F->l, U+0140->l, U+0141->l, U+0142->l, U+019A->l, U+01C8->l,
  783. U+0234->l, U+023D->l, U+026B->l, U+026C->l, U+026D->l, U+029F->l, U+02E1->l, U+1D0C->l, U+1D38->l, U+1D85->l, U+1DA9->l, U+1DAA->l,
  784. U+1DAB->l, U+1E36->l, U+1E37->l, U+1E38->l, U+1E39->l, U+1E3A->l, U+1E3B->l, U+1E3C->l, U+1E3D->l, U+2C60->l, U+2C61->l, U+2C62->l
  785. # M
  786. U+019C->m, U+026F->m, U+0270->m, U+0271->m, U+1D0D->m, U+1D1F->m, U+1D39->m, U+1D50->m, U+1D5A->m, U+1D6F->m, U+1D86->m, U+1DAC->m,
  787. U+1DAD->m, U+1E3E->m, U+1E3F->m, U+1E40->m, U+1E41->m, U+1E42->m, U+1E43->m
  788. # N
  789. U+00D1->n, U+00F1->n, U+0143->n, U+0144->n, U+0145->n, U+0146->n, U+0147->n, U+0148->n, U+0149->n, U+019D->n, U+019E->n, U+01CB->n,
  790. U+01F8->n, U+01F9->n, U+0220->n, U+0235->n, U+0272->n, U+0273->n, U+0274->n, U+1D0E->n, U+1D3A->n, U+1D3B->n, U+1D70->n, U+1D87->n,
  791. U+1DAE->n, U+1DAF->n, U+1DB0->n, U+1E44->n, U+1E45->n, U+1E46->n, U+1E47->n, U+1E48->n, U+1E49->n, U+1E4A->n, U+1E4B->n, U+207F->n
  792. # O
  793. U+00D2->o, U+00D3->o, U+00D4->o, U+00D5->o, U+00D6->o, U+00D8->o, U+00F2->o, U+00F3->o, U+00F4->o, U+00F5->o, U+00F6->o, U+00F8->o,
  794. U+01030F->o, U+014C->o, U+014D->o, U+014E->o, U+014F->o, U+0150->o, U+0151->o, U+0186->o, U+019F->o, U+01A0->o, U+01A1->o, U+01D1->o,
  795. U+01D2->o, U+01EA->o, U+01EB->o, U+01EC->o, U+01ED->o, U+01FE->o, U+01FF->o, U+020C->o, U+020D->o, U+020E->o, U+020F->o, U+022A->o,
  796. U+022B->o, U+022C->o, U+022D->o, U+022E->o, U+022F->o, U+0230->o, U+0231->o, U+0254->o, U+0275->o, U+043E->o, U+04E6->o, U+04E7->o,
  797. U+04E8->o, U+04E9->o, U+04EA->o, U+04EB->o, U+1D0F->o, U+1D10->o, U+1D11->o, U+1D12->o, U+1D13->o, U+1D16->o, U+1D17->o, U+1D3C->o,
  798. U+1D52->o, U+1D53->o, U+1D54->o, U+1D55->o, U+1D97->o, U+1DB1->o, U+1E4C->o, U+1E4D->o, U+1E4E->o, U+1E4F->o, U+1E50->o, U+1E51->o,
  799. U+1E52->o, U+1E53->o, U+1ECC->o, U+1ECD->o, U+1ECE->o, U+1ECF->o, U+1ED0->o, U+1ED1->o, U+1ED2->o, U+1ED3->o, U+1ED4->o, U+1ED5->o,
  800. U+1ED6->o, U+1ED7->o, U+1ED8->o, U+1ED9->o, U+1EDA->o, U+1EDB->o, U+1EDC->o, U+1EDD->o, U+1EDE->o, U+1EDF->o, U+1EE0->o, U+1EE1->o,
  801. U+1EE2->o, U+1EE3->o, U+2092->o, U+2C9E->o, U+2C9F->o
  802. # P
  803. U+01A4->p, U+01A5->p, U+1D18->p, U+1D3E->p, U+1D56->p, U+1D71->p, U+1D7D->p, U+1D88->p, U+1E54->p, U+1E55->p, U+1E56->p, U+1E57->p,
  804. U+2C63->p
  805. # Q
  806. U+024A->q, U+024B->q, U+02A0->q
  807. # R
  808. U+0154->r, U+0155->r, U+0156->r, U+0157->r, U+0158->r, U+0159->r, U+0210->r, U+0211->r, U+0212->r, U+0213->r, U+024C->r, U+024D->r,
  809. U+0279->r, U+027A->r, U+027B->r, U+027C->r, U+027D->r, U+027E->r, U+027F->r, U+0280->r, U+0281->r, U+02B3->r, U+02B4->r, U+02B5->r,
  810. U+02B6->r, U+1D19->r, U+1D1A->r, U+1D3F->r, U+1D63->r, U+1D72->r, U+1D73->r, U+1D89->r, U+1DCA->r, U+1E58->r, U+1E59->r, U+1E5A->r,
  811. U+1E5B->r, U+1E5C->r, U+1E5D->r, U+1E5E->r, U+1E5F->r, U+211C->r, U+2C64->r
  812. # S
  813. U+00DF->s, U+015A->s, U+015B->s, U+015C->s, U+015D->s, U+015E->s, U+015F->s, U+0160->s, U+0161->s, U+017F->s, U+0218->s, U+0219->s,
  814. U+023F->s, U+0282->s, U+02E2->s, U+1D74->s, U+1D8A->s, U+1DB3->s, U+1E60->s, U+1E61->s, U+1E62->s, U+1E63->s, U+1E64->s, U+1E65->s,
  815. U+1E66->s, U+1E67->s, U+1E68->s, U+1E69->s, U+1E9B->s
  816. # T
  817. U+0162->t, U+0163->t, U+0164->t, U+0165->t, U+0166->t, U+0167->t, U+01AB->t, U+01AC->t, U+01AD->t, U+01AE->t, U+021A->t, U+021B->t,
  818. U+0236->t, U+023E->t, U+0287->t, U+0288->t, U+1D1B->t, U+1D40->t, U+1D57->t, U+1D75->t, U+1DB5->t, U+1E6A->t, U+1E6B->t, U+1E6C->t,
  819. U+1E6D->t, U+1E6E->t, U+1E6F->t, U+1E70->t, U+1E71->t, U+1E97->t, U+2C66->t
  820. # U
  821. U+00D9->u, U+00DA->u, U+00DB->u, U+00DC->u, U+00F9->u, U+00FA->u, U+00FB->u, U+00FC->u, U+010316->u, U+0168->u, U+0169->u, U+016A->u,
  822. U+016B->u, U+016C->u, U+016D->u, U+016E->u, U+016F->u, U+0170->u, U+0171->u, U+0172->u, U+0173->u, U+01AF->u, U+01B0->u, U+01D3->u,
  823. U+01D4->u, U+01D5->u, U+01D6->u, U+01D7->u, U+01D8->u, U+01D9->u, U+01DA->u, U+01DB->u, U+01DC->u, U+0214->u, U+0215->u, U+0216->u,
  824. U+0217->u, U+0244->u, U+0289->u, U+1D1C->u, U+1D1D->u, U+1D1E->u, U+1D41->u, U+1D58->u, U+1D59->u, U+1D64->u, U+1D7E->u, U+1D99->u,
  825. U+1DB6->u, U+1DB8->u, U+1E72->u, U+1E73->u, U+1E74->u, U+1E75->u, U+1E76->u, U+1E77->u, U+1E78->u, U+1E79->u, U+1E7A->u, U+1E7B->u,
  826. U+1EE4->u, U+1EE5->u, U+1EE6->u, U+1EE7->u, U+1EE8->u, U+1EE9->u, U+1EEA->u, U+1EEB->u, U+1EEC->u, U+1EED->u, U+1EEE->u, U+1EEF->u,
  827. U+1EF0->u, U+1EF1->u
  828. # V
  829. U+01B2->v, U+0245->v, U+028B->v, U+028C->v, U+1D20->v, U+1D5B->v, U+1D65->v, U+1D8C->v, U+1DB9->v, U+1DBA->v, U+1E7C->v, U+1E7D->v,
  830. U+1E7E->v, U+1E7F->v, U+2C74->v
  831. # W
  832. U+0174->w, U+0175->w, U+028D->w, U+02B7->w, U+1D21->w, U+1D42->w, U+1E80->w, U+1E81->w, U+1E82->w, U+1E83->w, U+1E84->w, U+1E85->w,
  833. U+1E86->w, U+1E87->w, U+1E88->w, U+1E89->w, U+1E98->w
  834. # X
  835. U+02E3->x, U+1D8D->x, U+1E8A->x, U+1E8B->x, U+1E8C->x, U+1E8D->x, U+2093->x
  836. # Y
  837. U+00DD->y, U+00FD->y, U+00FF->y, U+0176->y, U+0177->y, U+0178->y, U+01B3->y, U+01B4->y, U+0232->y, U+0233->y, U+024E->y, U+024F->y,
  838. U+028E->y, U+028F->y, U+02B8->y, U+1E8E->y, U+1E8F->y, U+1E99->y, U+1EF2->y, U+1EF3->y, U+1EF4->y, U+1EF5->y, U+1EF6->y, U+1EF7->y,
  839. U+1EF8->y, U+1EF9->y
  840. # Z
  841. U+0179->z, U+017A->z, U+017B->z, U+017C->z, U+017D->z, U+017E->z, U+01B5->z, U+01B6->z, U+0224->z, U+0225->z, U+0240->z, U+0290->z,
  842. U+0291->z, U+1D22->z, U+1D76->z, U+1D8E->z, U+1DBB->z, U+1DBC->z, U+1DBD->z, U+1E90->z, U+1E91->z, U+1E92->z, U+1E93->z, U+1E94->z,
  843. U+1E95->z, U+2128->z, U+2C6B->z, U+2C6C->z
  844. # Latin Extras:
  845. U+00C6->U+00E6, U+01E2->U+00E6, U+01E3->U+00E6, U+01FC->U+00E6, U+01FD->U+00E6, U+1D01->U+00E6, U+1D02->U+00E6, U+1D2D->U+00E6,
  846. U+1D46->U+00E6, U+00E6
  847. ##################################################
  848. # Arabic
  849. U+0622->U+0627, U+0623->U+0627, U+0624->U+0648, U+0625->U+0627, U+0626->U+064A, U+06C0->U+06D5, U+06C2->U+06C1, U+06D3->U+06D2,
  850. U+FB50->U+0671, U+FB51->U+0671, U+FB52->U+067B, U+FB53->U+067B, U+FB54->U+067B, U+FB56->U+067E, U+FB57->U+067E, U+FB58->U+067E,
  851. U+FB5A->U+0680, U+FB5B->U+0680, U+FB5C->U+0680, U+FB5E->U+067A, U+FB5F->U+067A, U+FB60->U+067A, U+FB62->U+067F, U+FB63->U+067F,
  852. U+FB64->U+067F, U+FB66->U+0679, U+FB67->U+0679, U+FB68->U+0679, U+FB6A->U+06A4, U+FB6B->U+06A4, U+FB6C->U+06A4, U+FB6E->U+06A6,
  853. U+FB6F->U+06A6, U+FB70->U+06A6, U+FB72->U+0684, U+FB73->U+0684, U+FB74->U+0684, U+FB76->U+0683, U+FB77->U+0683, U+FB78->U+0683,
  854. U+FB7A->U+0686, U+FB7B->U+0686, U+FB7C->U+0686, U+FB7E->U+0687, U+FB7F->U+0687, U+FB80->U+0687, U+FB82->U+068D, U+FB83->U+068D,
  855. U+FB84->U+068C, U+FB85->U+068C, U+FB86->U+068E, U+FB87->U+068E, U+FB88->U+0688, U+FB89->U+0688, U+FB8A->U+0698, U+FB8B->U+0698,
  856. U+FB8C->U+0691, U+FB8D->U+0691, U+FB8E->U+06A9, U+FB8F->U+06A9, U+FB90->U+06A9, U+FB92->U+06AF, U+FB93->U+06AF, U+FB94->U+06AF,
  857. U+FB96->U+06B3, U+FB97->U+06B3, U+FB98->U+06B3, U+FB9A->U+06B1, U+FB9B->U+06B1, U+FB9C->U+06B1, U+FB9E->U+06BA, U+FB9F->U+06BA,
  858. U+FBA0->U+06BB, U+FBA1->U+06BB, U+FBA2->U+06BB, U+FBA4->U+06C0, U+FBA5->U+06C0, U+FBA6->U+06C1, U+FBA7->U+06C1, U+FBA8->U+06C1,
  859. U+FBAA->U+06BE, U+FBAB->U+06BE, U+FBAC->U+06BE, U+FBAE->U+06D2, U+FBAF->U+06D2, U+FBB0->U+06D3, U+FBB1->U+06D3, U+FBD3->U+06AD,
  860. U+FBD4->U+06AD, U+FBD5->U+06AD, U+FBD7->U+06C7, U+FBD8->U+06C7, U+FBD9->U+06C6, U+FBDA->U+06C6, U+FBDB->U+06C8, U+FBDC->U+06C8,
  861. U+FBDD->U+0677, U+FBDE->U+06CB, U+FBDF->U+06CB, U+FBE0->U+06C5, U+FBE1->U+06C5, U+FBE2->U+06C9, U+FBE3->U+06C9, U+FBE4->U+06D0,
  862. U+FBE5->U+06D0, U+FBE6->U+06D0, U+FBE8->U+0649, U+FBFC->U+06CC, U+FBFD->U+06CC, U+FBFE->U+06CC, U+0621, U+0627..U+063A, U+0641..U+064A,
  863. U+0660..U+0669, U+066E, U+066F, U+0671..U+06BF, U+06C1, U+06C3..U+06D2, U+06D5, U+06EE..U+06FC, U+06FF, U+0750..U+076D, U+FB55, U+FB59,
  864. U+FB5D, U+FB61, U+FB65, U+FB69, U+FB6D, U+FB71, U+FB75, U+FB79, U+FB7D, U+FB81, U+FB91, U+FB95, U+FB99, U+FB9D, U+FBA3, U+FBA9, U+FBAD,
  865. U+FBD6, U+FBE7, U+FBE9, U+FBFF
  866. ##################################################
  867. # Armenian
  868. U+0531..U+0556->U+0561..U+0586, U+0561..U+0586, U+0587
  869. #################################################
  870. # Bengali
  871. U+09DC->U+09A1, U+09DD->U+09A2, U+09DF->U+09AF, U+09F0->U+09AC, U+09F1->U+09AC, U+0985..U+0990, U+0993..U+09B0, U+09B2, U+09B6..U+09B9,
  872. U+09CE, U+09E0, U+09E1, U+09E6..U+09EF
  873. #################################################
  874. # CJK*
  875. U+F900->U+8C48, U+F901->U+66F4, U+F902->U+8ECA, U+F903->U+8CC8, U+F904->U+6ED1, U+F905->U+4E32, U+F906->U+53E5, U+F907->U+9F9C,
  876. U+F908->U+9F9C, U+F909->U+5951, U+F90A->U+91D1, U+F90B->U+5587, U+F90C->U+5948, U+F90D->U+61F6, U+F90E->U+7669, U+F90F->U+7F85,
  877. U+F910->U+863F, U+F911->U+87BA, U+F912->U+88F8, U+F913->U+908F, U+F914->U+6A02, U+F915->U+6D1B, U+F916->U+70D9, U+F917->U+73DE,
  878. U+F918->U+843D, U+F919->U+916A, U+F91A->U+99F1, U+F91B->U+4E82, U+F91C->U+5375, U+F91D->U+6B04, U+F91E->U+721B, U+F91F->U+862D,
  879. U+F920->U+9E1E, U+F921->U+5D50, U+F922->U+6FEB, U+F923->U+85CD, U+F924->U+8964, U+F925->U+62C9, U+F926->U+81D8, U+F927->U+881F,
  880. U+F928->U+5ECA, U+F929->U+6717, U+F92A->U+6D6A, U+F92B->U+72FC, U+F92C->U+90CE, U+F92D->U+4F86, U+F92E->U+51B7, U+F92F->U+52DE,
  881. U+F930->U+64C4, U+F931->U+6AD3, U+F932->U+7210, U+F933->U+76E7, U+F934->U+8001, U+F935->U+8606, U+F936->U+865C, U+F937->U+8DEF,
  882. U+F938->U+9732, U+F939->U+9B6F, U+F93A->U+9DFA, U+F93B->U+788C, U+F93C->U+797F, U+F93D->U+7DA0, U+F93E->U+83C9, U+F93F->U+9304,
  883. U+F940->U+9E7F, U+F941->U+8AD6, U+F942->U+58DF, U+F943->U+5F04, U+F944->U+7C60, U+F945->U+807E, U+F946->U+7262, U+F947->U+78CA,
  884. U+F948->U+8CC2, U+F949->U+96F7, U+F94A->U+58D8, U+F94B->U+5C62, U+F94C->U+6A13, U+F94D->U+6DDA, U+F94E->U+6F0F, U+F94F->U+7D2F,
  885. U+F950->U+7E37, U+F951->U+964B, U+F952->U+52D2, U+F953->U+808B, U+F954->U+51DC, U+F955->U+51CC, U+F956->U+7A1C, U+F957->U+7DBE,
  886. U+F958->U+83F1, U+F959->U+9675, U+F95A->U+8B80, U+F95B->U+62CF, U+F95C->U+6A02, U+F95D->U+8AFE, U+F95E->U+4E39, U+F95F->U+5BE7,
  887. U+F960->U+6012, U+F961->U+7387, U+F962->U+7570, U+F963->U+5317, U+F964->U+78FB, U+F965->U+4FBF, U+F966->U+5FA9, U+F967->U+4E0D,
  888. U+F968->U+6CCC, U+F969->U+6578, U+F96A->U+7D22, U+F96B->U+53C3, U+F96C->U+585E, U+F96D->U+7701, U+F96E->U+8449, U+F96F->U+8AAA,
  889. U+F970->U+6BBA, U+F971->U+8FB0, U+F972->U+6C88, U+F973->U+62FE, U+F974->U+82E5, U+F975->U+63A0, U+F976->U+7565, U+F977->U+4EAE,
  890. U+F978->U+5169, U+F979->U+51C9, U+F97A->U+6881, U+F97B->U+7CE7, U+F97C->U+826F, U+F97D->U+8AD2, U+F97E->U+91CF, U+F97F->U+52F5,
  891. U+F980->U+5442, U+F981->U+5973, U+F982->U+5EEC, U+F983->U+65C5, U+F984->U+6FFE, U+F985->U+792A, U+F986->U+95AD, U+F987->U+9A6A,
  892. U+F988->U+9E97, U+F989->U+9ECE, U+F98A->U+529B, U+F98B->U+66C6, U+F98C->U+6B77, U+F98D->U+8F62, U+F98E->U+5E74, U+F98F->U+6190,
  893. U+F990->U+6200, U+F991->U+649A, U+F992->U+6F23, U+F993->U+7149, U+F994->U+7489, U+F995->U+79CA, U+F996->U+7DF4, U+F997->U+806F,
  894. U+F998->U+8F26, U+F999->U+84EE, U+F99A->U+9023, U+F99B->U+934A, U+F99C->U+5217, U+F99D->U+52A3, U+F99E->U+54BD, U+F99F->U+70C8,
  895. U+F9A0->U+88C2, U+F9A1->U+8AAA, U+F9A2->U+5EC9, U+F9A3->U+5FF5, U+F9A4->U+637B, U+F9A5->U+6BAE, U+F9A6->U+7C3E, U+F9A7->U+7375,
  896. U+F9A8->U+4EE4, U+F9A9->U+56F9, U+F9AA->U+5BE7, U+F9AB->U+5DBA, U+F9AC->U+601C, U+F9AD->U+73B2, U+F9AE->U+7469, U+F9AF->U+7F9A,
  897. U+F9B0->U+8046, U+F9B1->U+9234, U+F9B2->U+96F6, U+F9B3->U+9748, U+F9B4->U+9818, U+F9B5->U+4F8B, U+F9B6->U+79AE, U+F9B7->U+91B4,
  898. U+F9B8->U+96B8, U+F9B9->U+60E1, U+F9BA->U+4E86, U+F9BB->U+50DA, U+F9BC->U+5BEE, U+F9BD->U+5C3F, U+F9BE->U+6599, U+F9BF->U+6A02,
  899. U+F9C0->U+71CE, U+F9C1->U+7642, U+F9C2->U+84FC, U+F9C3->U+907C, U+F9C4->U+9F8D, U+F9C5->U+6688, U+F9C6->U+962E, U+F9C7->U+5289,
  900. U+F9C8->U+677B, U+F9C9->U+67F3, U+F9CA->U+6D41, U+F9CB->U+6E9C, U+F9CC->U+7409, U+F9CD->U+7559, U+F9CE->U+786B, U+F9CF->U+7D10,
  901. U+F9D0->U+985E, U+F9D1->U+516D, U+F9D2->U+622E, U+F9D3->U+9678, U+F9D4->U+502B, U+F9D5->U+5D19, U+F9D6->U+6DEA, U+F9D7->U+8F2A,
  902. U+F9D8->U+5F8B, U+F9D9->U+6144, U+F9DA->U+6817, U+F9DB->U+7387, U+F9DC->U+9686, U+F9DD->U+5229, U+F9DE->U+540F, U+F9DF->U+5C65,
  903. U+F9E0->U+6613, U+F9E1->U+674E, U+F9E2->U+68A8, U+F9E3->U+6CE5, U+F9E4->U+7406, U+F9E5->U+75E2, U+F9E6->U+7F79, U+F9E7->U+88CF,
  904. U+F9E8->U+88E1, U+F9E9->U+91CC, U+F9EA->U+96E2, U+F9EB->U+533F, U+F9EC->U+6EBA, U+F9ED->U+541D, U+F9EE->U+71D0, U+F9EF->U+7498,
  905. U+F9F0->U+85FA, U+F9F1->U+96A3, U+F9F2->U+9C57, U+F9F3->U+9E9F, U+F9F4->U+6797, U+F9F5->U+6DCB, U+F9F6->U+81E8, U+F9F7->U+7ACB,
  906. U+F9F8->U+7B20, U+F9F9->U+7C92, U+F9FA->U+72C0, U+F9FB->U+7099, U+F9FC->U+8B58, U+F9FD->U+4EC0, U+F9FE->U+8336, U+F9FF->U+523A,
  907. U+FA00->U+5207, U+FA01->U+5EA6, U+FA02->U+62D3, U+FA03->U+7CD6, U+FA04->U+5B85, U+FA05->U+6D1E, U+FA06->U+66B4, U+FA07->U+8F3B,
  908. U+FA08->U+884C, U+FA09->U+964D, U+FA0A->U+898B, U+FA0B->U+5ED3, U+FA0C->U+5140, U+FA0D->U+55C0, U+FA10->U+585A, U+FA12->U+6674,
  909. U+FA15->U+51DE, U+FA16->U+732A, U+FA17->U+76CA, U+FA18->U+793C, U+FA19->U+795E, U+FA1A->U+7965, U+FA1B->U+798F, U+FA1C->U+9756,
  910. U+FA1D->U+7CBE, U+FA1E->U+7FBD, U+FA20->U+8612, U+FA22->U+8AF8, U+FA25->U+9038, U+FA26->U+90FD, U+FA2A->U+98EF, U+FA2B->U+98FC,
  911. U+FA2C->U+9928, U+FA2D->U+9DB4, U+FA30->U+4FAE, U+FA31->U+50E7, U+FA32->U+514D, U+FA33->U+52C9, U+FA34->U+52E4, U+FA35->U+5351,
  912. U+FA36->U+559D, U+FA37->U+5606, U+FA38->U+5668, U+FA39->U+5840, U+FA3A->U+58A8, U+FA3B->U+5C64, U+FA3C->U+5C6E, U+FA3D->U+6094,
  913. U+FA3E->U+6168, U+FA3F->U+618E, U+FA40->U+61F2, U+FA41->U+654F, U+FA42->U+65E2, U+FA43->U+6691, U+FA44->U+6885, U+FA45->U+6D77,
  914. U+FA46->U+6E1A, U+FA47->U+6F22, U+FA48->U+716E, U+FA49->U+722B, U+FA4A->U+7422, U+FA4B->U+7891, U+FA4C->U+793E, U+FA4D->U+7949,
  915. U+FA4E->U+7948, U+FA4F->U+7950, U+FA50->U+7956, U+FA51->U+795D, U+FA52->U+798D, U+FA53->U+798E, U+FA54->U+7A40, U+FA55->U+7A81,
  916. U+FA56->U+7BC0, U+FA57->U+7DF4, U+FA58->U+7E09, U+FA59->U+7E41, U+FA5A->U+7F72, U+FA5B->U+8005, U+FA5C->U+81ED, U+FA5D->U+8279,
  917. U+FA5E->U+8279, U+FA5F->U+8457, U+FA60->U+8910, U+FA61->U+8996, U+FA62->U+8B01, U+FA63->U+8B39, U+FA64->U+8CD3, U+FA65->U+8D08,
  918. U+FA66->U+8FB6, U+FA67->U+9038, U+FA68->U+96E3, U+FA69->U+97FF, U+FA6A->U+983B, U+FA70->U+4E26, U+FA71->U+51B5, U+FA72->U+5168,
  919. U+FA73->U+4F80, U+FA74->U+5145, U+FA75->U+5180, U+FA76->U+52C7, U+FA77->U+52FA, U+FA78->U+559D, U+FA79->U+5555, U+FA7A->U+5599,
  920. U+FA7B->U+55E2, U+FA7C->U+585A, U+FA7D->U+58B3, U+FA7E->U+5944, U+FA7F->U+5954, U+FA80->U+5A62, U+FA81->U+5B28, U+FA82->U+5ED2,
  921. U+FA83->U+5ED9, U+FA84->U+5F69, U+FA85->U+5FAD, U+FA86->U+60D8, U+FA87->U+614E, U+FA88->U+6108, U+FA89->U+618E, U+FA8A->U+6160,
  922. U+FA8B->U+61F2, U+FA8C->U+6234, U+FA8D->U+63C4, U+FA8E->U+641C, U+FA8F->U+6452, U+FA90->U+6556, U+FA91->U+6674, U+FA92->U+6717,
  923. U+FA93->U+671B, U+FA94->U+6756, U+FA95->U+6B79, U+FA96->U+6BBA, U+FA97->U+6D41, U+FA98->U+6EDB, U+FA99->U+6ECB, U+FA9A->U+6F22,
  924. U+FA9B->U+701E, U+FA9C->U+716E, U+FA9D->U+77A7, U+FA9E->U+7235, U+FA9F->U+72AF, U+FAA0->U+732A, U+FAA1->U+7471, U+FAA2->U+7506,
  925. U+FAA3->U+753B, U+FAA4->U+761D, U+FAA5->U+761F, U+FAA6->U+76CA, U+FAA7->U+76DB, U+FAA8->U+76F4, U+FAA9->U+774A, U+FAAA->U+7740,
  926. U+FAAB->U+78CC, U+FAAC->U+7AB1, U+FAAD->U+7BC0, U+FAAE->U+7C7B, U+FAAF->U+7D5B, U+FAB0->U+7DF4, U+FAB1->U+7F3E, U+FAB2->U+8005,
  927. U+FAB3->U+8352, U+FAB4->U+83EF, U+FAB5->U+8779, U+FAB6->U+8941, U+FAB7->U+8986, U+FAB8->U+8996, U+FAB9->U+8ABF, U+FABA->U+8AF8,
  928. U+FABB->U+8ACB, U+FABC->U+8B01, U+FABD->U+8AFE, U+FABE->U+8AED, U+FABF->U+8B39, U+FAC0->U+8B8A, U+FAC1->U+8D08, U+FAC2->U+8F38,
  929. U+FAC3->U+9072, U+FAC4->U+9199, U+FAC5->U+9276, U+FAC6->U+967C, U+FAC7->U+96E3, U+FAC8->U+9756, U+FAC9->U+97DB, U+FACA->U+97FF,
  930. U+FACB->U+980B, U+FACC->U+983B, U+FACD->U+9B12, U+FACE->U+9F9C, U+FACF->U+2284A, U+FAD0->U+22844, U+FAD1->U+233D5, U+FAD2->U+3B9D,
  931. U+FAD3->U+4018, U+FAD4->U+4039, U+FAD5->U+25249, U+FAD6->U+25CD0, U+FAD7->U+27ED3, U+FAD8->U+9F43, U+FAD9->U+9F8E, U+2F800->U+4E3D,
  932. U+2F801->U+4E38, U+2F802->U+4E41, U+2F803->U+20122, U+2F804->U+4F60, U+2F805->U+4FAE, U+2F806->U+4FBB, U+2F807->U+5002, U+2F808->U+507A,
  933. U+2F809->U+5099, U+2F80A->U+50E7, U+2F80B->U+50CF, U+2F80C->U+349E, U+2F80D->U+2063A, U+2F80E->U+514D, U+2F80F->U+5154, U+2F810->U+5164,
  934. U+2F811->U+5177, U+2F812->U+2051C, U+2F813->U+34B9, U+2F814->U+5167, U+2F815->U+518D, U+2F816->U+2054B, U+2F817->U+5197,
  935. U+2F818->U+51A4, U+2F819->U+4ECC, U+2F81A->U+51AC, U+2F81B->U+51B5, U+2F81C->U+291DF, U+2F81D->U+51F5, U+2F81E->U+5203,
  936. U+2F81F->U+34DF, U+2F820->U+523B, U+2F821->U+5246, U+2F822->U+5272, U+2F823->U+5277, U+2F824->U+3515, U+2F825->U+52C7,
  937. U+2F826->U+52C9, U+2F827->U+52E4, U+2F828->U+52FA, U+2F829->U+5305, U+2F82A->U+5306, U+2F82B->U+5317, U+2F82C->U+5349,
  938. U+2F82D->U+5351, U+2F82E->U+535A, U+2F82F->U+5373, U+2F830->U+537D, U+2F831->U+537F, U+2F832->U+537F, U+2F833->U+537F,
  939. U+2F834->U+20A2C, U+2F835->U+7070, U+2F836->U+53CA, U+2F837->U+53DF, U+2F838->U+20B63, U+2F839->U+53EB, U+2F83A->U+53F1,
  940. U+2F83B->U+5406, U+2F83C->U+549E, U+2F83D->U+5438, U+2F83E->U+5448, U+2F83F->U+5468, U+2F840->U+54A2, U+2F841->U+54F6,
  941. U+2F842->U+5510, U+2F843->U+5553, U+2F844->U+5563, U+2F845->U+5584, U+2F846->U+5584, U+2F847->U+5599, U+2F848->U+55AB,
  942. U+2F849->U+55B3, U+2F84A->U+55C2, U+2F84B->U+5716, U+2F84C->U+5606, U+2F84D->U+5717, U+2F84E->U+5651, U+2F84F->U+5674,
  943. U+2F850->U+5207, U+2F851->U+58EE, U+2F852->U+57CE, U+2F853->U+57F4, U+2F854->U+580D, U+2F855->U+578B, U+2F856->U+5832,
  944. U+2F857->U+5831, U+2F858->U+58AC, U+2F859->U+214E4, U+2F85A->U+58F2, U+2F85B->U+58F7, U+2F85C->U+5906, U+2F85D->U+591A,
  945. U+2F85E->U+5922, U+2F85F->U+5962, U+2F860->U+216A8, U+2F861->U+216EA, U+2F862->U+59EC, U+2F863->U+5A1B, U+2F864->U+5A27,
  946. U+2F865->U+59D8, U+2F866->U+5A66, U+2F867->U+36EE, U+2F868->U+36FC, U+2F869->U+5B08, U+2F86A->U+5B3E, U+2F86B->U+5B3E,
  947. U+2F86C->U+219C8, U+2F86D->U+5BC3, U+2F86E->U+5BD8, U+2F86F->U+5BE7, U+2F870->U+5BF3, U+2F871->U+21B18, U+2F872->U+5BFF,
  948. U+2F873->U+5C06, U+2F874->U+5F53, U+2F875->U+5C22, U+2F876->U+3781, U+2F877->U+5C60, U+2F878->U+5C6E, U+2F879->U+5CC0,
  949. U+2F87A->U+5C8D, U+2F87B->U+21DE4, U+2F87C->U+5D43, U+2F87D->U+21DE6, U+2F87E->U+5D6E, U+2F87F->U+5D6B, U+2F880->U+5D7C,
  950. U+2F881->U+5DE1, U+2F882->U+5DE2, U+2F883->U+382F, U+2F884->U+5DFD, U+2F885->U+5E28, U+2F886->U+5E3D, U+2F887->U+5E69,
  951. U+2F888->U+3862, U+2F889->U+22183, U+2F88A->U+387C, U+2F88B->U+5EB0, U+2F88C->U+5EB3, U+2F88D->U+5EB6, U+2F88E->U+5ECA,
  952. U+2F88F->U+2A392, U+2F890->U+5EFE, U+2F891->U+22331, U+2F892->U+22331, U+2F893->U+8201, U+2F894->U+5F22, U+2F895->U+5F22,
  953. U+2F896->U+38C7, U+2F897->U+232B8, U+2F898->U+261DA, U+2F899->U+5F62, U+2F89A->U+5F6B, U+2F89B->U+38E3, U+2F89C->U+5F9A,
  954. U+2F89D->U+5FCD, U+2F89E->U+5FD7, U+2F89F->U+5FF9, U+2F8A0->U+6081, U+2F8A1->U+393A, U+2F8A2->U+391C, U+2F8A3->U+6094,
  955. U+2F8A4->U+226D4, U+2F8A5->U+60C7, U+2F8A6->U+6148, U+2F8A7->U+614C, U+2F8A8->U+614E, U+2F8A9->U+614C, U+2F8AA->U+617A,
  956. U+2F8AB->U+618E, U+2F8AC->U+61B2, U+2F8AD->U+61A4, U+2F8AE->U+61AF, U+2F8AF->U+61DE, U+2F8B0->U+61F2, U+2F8B1->U+61F6,
  957. U+2F8B2->U+6210, U+2F8B3->U+621B, U+2F8B4->U+625D, U+2F8B5->U+62B1, U+2F8B6->U+62D4, U+2F8B7->U+6350, U+2F8B8->U+22B0C,
  958. U+2F8B9->U+633D, U+2F8BA->U+62FC, U+2F8BB->U+6368, U+2F8BC->U+6383, U+2F8BD->U+63E4, U+2F8BE->U+22BF1, U+2F8BF->U+6422,
  959. U+2F8C0->U+63C5, U+2F8C1->U+63A9, U+2F8C2->U+3A2E, U+2F8C3->U+6469, U+2F8C4->U+647E, U+2F8C5->U+649D, U+2F8C6->U+6477,
  960. U+2F8C7->U+3A6C, U+2F8C8->U+654F, U+2F8C9->U+656C, U+2F8CA->U+2300A, U+2F8CB->U+65E3, U+2F8CC->U+66F8, U+2F8CD->U+6649,
  961. U+2F8CE->U+3B19, U+2F8CF->U+6691, U+2F8D0->U+3B08, U+2F8D1->U+3AE4, U+2F8D2->U+5192, U+2F8D3->U+5195, U+2F8D4->U+6700,
  962. U+2F8D5->U+669C, U+2F8D6->U+80AD, U+2F8D7->U+43D9, U+2F8D8->U+6717, U+2F8D9->U+671B, U+2F8DA->U+6721, U+2F8DB->U+675E,
  963. U+2F8DC->U+6753, U+2F8DD->U+233C3, U+2F8DE->U+3B49, U+2F8DF->U+67FA, U+2F8E0->U+6785, U+2F8E1->U+6852, U+2F8E2->U+6885,
  964. U+2F8E3->U+2346D, U+2F8E4->U+688E, U+2F8E5->U+681F, U+2F8E6->U+6914, U+2F8E7->U+3B9D, U+2F8E8->U+6942, U+2F8E9->U+69A3,
  965. U+2F8EA->U+69EA, U+2F8EB->U+6AA8, U+2F8EC->U+236A3, U+2F8ED->U+6ADB, U+2F8EE->U+3C18, U+2F8EF->U+6B21, U+2F8F0->U+238A7,
  966. U+2F8F1->U+6B54, U+2F8F2->U+3C4E, U+2F8F3->U+6B72, U+2F8F4->U+6B9F, U+2F8F5->U+6BBA, U+2F8F6->U+6BBB, U+2F8F7->U+23A8D,
  967. U+2F8F8->U+21D0B, U+2F8F9->U+23AFA, U+2F8FA->U+6C4E, U+2F8FB->U+23CBC, U+2F8FC->U+6CBF, U+2F8FD->U+6CCD, U+2F8FE->U+6C67,
  968. U+2F8FF->U+6D16, U+2F900->U+6D3E, U+2F901->U+6D77, U+2F902->U+6D41, U+2F903->U+6D69, U+2F904->U+6D78, U+2F905->U+6D85,
  969. U+2F906->U+23D1E, U+2F907->U+6D34, U+2F908->U+6E2F, U+2F909->U+6E6E, U+2F90A->U+3D33, U+2F90B-

Large files files are truncated, but you can click here to view the full file