/share/i18n/csmapper/APPLE/ARABIC%UCS.src

https://bitbucket.org/freebsd/freebsd-head/ · Unknown · 451 lines · 449 code · 2 blank · 0 comment · 0 complexity · bfdd202553b66768bf4298aba48bb1f3 MD5 · raw file

  1. # $FreeBSD$
  2. TYPE ROWCOL
  3. NAME ARABIC/UCS
  4. SRC_ZONE 0x00-0xFF
  5. OOB_MODE ILSEQ
  6. DST_ILSEQ 0xFFFE
  7. DST_UNIT_BITS 16
  8. BEGIN_MAP
  9. #=======================================================================
  10. # File name: ARABIC.TXT
  11. #
  12. # Contents: Map (external version) from Mac OS Arabic
  13. # character set to Unicode 2.1 and later.
  14. #
  15. # Copyright: (c) 1994-2002, 2005 by Apple Computer, Inc., all rights
  16. # reserved.
  17. #
  18. # Contact: charsets@apple.com
  19. #
  20. # Changes:
  21. #
  22. # c02 2005-Apr-04 Update header comments. Matches internal xml
  23. # <c1.2> and Text Encoding Converter 2.0.
  24. # b3,c1 2002-Dec-19 Add comments about character display and
  25. # direction overrides. Update URLs, notes.
  26. # Matches internal utom<b4>.
  27. # b02 1999-Sep-22 Update contact e-mail address. Matches
  28. # internal utom<b1>, ufrm<b1>, and Text
  29. # Encoding Converter version 1.5.
  30. # n10 1998-Feb-05 Show required Unicode character
  31. # directionality in a different way. Matches
  32. # internal utom<n4>, ufrm<n21>, and Text
  33. # Encoding Converter version 1.3. Update
  34. # header comments; include information on
  35. # loose mapping of digits.
  36. # n07 1997-Jul-17 Update to match internal utom<n2>, ufrm<n17>:
  37. # Change standard mapping for 0xC0 from U+066D
  38. # to U+274A. Add direction overrides to
  39. # mappings for 0x25, 0x2C, 0x3B, 0x3F. Add
  40. # information on variants.
  41. # n03 1995-Apr-18 First version (after fixing some typos).
  42. # Matches internal ufrm<n11>.
  43. #
  44. # Standard header:
  45. # ----------------
  46. #
  47. # Apple, the Apple logo, and Macintosh are trademarks of Apple
  48. # Computer, Inc., registered in the United States and other countries.
  49. # Unicode is a trademark of Unicode Inc. For the sake of brevity,
  50. # throughout this document, "Macintosh" can be used to refer to
  51. # Macintosh computers and "Unicode" can be used to refer to the
  52. # Unicode standard.
  53. #
  54. # Apple Computer, Inc. ("Apple") makes no warranty or representation,
  55. # either express or implied, with respect to this document and the
  56. # included data, its quality, accuracy, or fitness for a particular
  57. # purpose. In no event will Apple be liable for direct, indirect,
  58. # special, incidental, or consequential damages resulting from any
  59. # defect or inaccuracy in this document or the included data.
  60. #
  61. # These mapping tables and character lists are subject to change.
  62. # The latest tables should be available from the following:
  63. #
  64. # <http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/>
  65. #
  66. # For general information about Mac OS encodings and these mapping
  67. # tables, see the file "README.TXT".
  68. #
  69. # Format:
  70. # -------
  71. #
  72. # Three tab-separated columns;
  73. # '#' begins a comment which continues to the end of the line.
  74. # Column #1 is the Mac OS Arabic code (in hex as 0xNN).
  75. # Column #2 is the corresponding Unicode (in hex as 0xNNNN),
  76. # possibly preceded by a tag indicating required directionality
  77. # (i.e. <LR>+0xNNNN or <RL>+0xNNNN).
  78. # Column #3 is a comment containing the Unicode name.
  79. #
  80. # The entries are in Mac OS Arabic code order.
  81. #
  82. # Control character mappings are not shown in this table, following
  83. # the conventions of the standard UTC mapping tables. However, the
  84. # Mac OS Arabic character set uses the standard control characters at
  85. # 0x00-0x1F and 0x7F.
  86. #
  87. # Notes on Mac OS Arabic:
  88. # -----------------------
  89. #
  90. # This is a legacy Mac OS encoding; in the Mac OS X Carbon and Cocoa
  91. # environments, it is only supported via transcoding to and from
  92. # Unicode.
  93. #
  94. # 1. General
  95. #
  96. # The Mac OS Arabic character set is intended to cover Arabic as
  97. # used in North Africa, the Arabian peninsula, and the Levant. It
  98. # also contains several characters needed for Urdu and/or Farsi.
  99. #
  100. # The Mac OS Arabic character set is essentially a superset of ISO
  101. # 8859-6. The 8859-6 code points that are interpreted differently
  102. # in the Mac OS Arabic set are as follows:
  103. # 0xA0 is NO-BREAK SPACE in 8859-6 and right-left SPACE in Mac OS
  104. # Arabic; NO-BREAK is 0x81 in Mac OS Arabic.
  105. # 0xA4 is CURRENCY SIGN in 8859-6 and right-left DOLLAR SIGN in
  106. # Mac OS Arabic.
  107. # 0xAD is SOFT HYPHEN in 8859-6 and right-left HYPHEN-MINUS in
  108. # Mac OS Arabic.
  109. # ISO 8859-6 specifies that codes 0x30-0x39 can be rendered either
  110. # with European digit shapes or Arabic digit shapes. This is also
  111. # true in Mac OS Arabic, which determines from context which digit
  112. # shapes to use (see below).
  113. #
  114. # The Mac OS Arabic character set uses the C1 controls area and other
  115. # code points which are undefined in ISO 8859-6 for additional
  116. # graphic characters: additional Arabic letters for Farsi and Urdu,
  117. # some accented Roman letters for European languages (such as French),
  118. # and duplicates of some of the punctuation, symbols, and digits in
  119. # the ASCII block. The duplicate punctuation, symbol, and digit
  120. # characters have right-left directionality, while the ASCII versions
  121. # have left-right directionality. See the next section for more
  122. # information on this.
  123. #
  124. # Mac OS Arabic characters 0xEB-0xF2 are non-spacing/combining marks.
  125. #
  126. # 2. Directional characters and roundtrip fidelity
  127. #
  128. # The Mac OS Arabic character set was developed in 1986-1987. At that
  129. # time the bidirectional line layout algorithm used in the Mac OS
  130. # Arabic system was fairly simple; it used only a few direction
  131. # classes (instead of the 19 now used in the Unicode bidirectional
  132. # algorithm). In order to permit users to handle some tricky layout
  133. # problems, certain punctuation and symbol characters were encoded
  134. # twice, one with a left-right direction attribute and the other with
  135. # a right-left direction attribute.
  136. #
  137. # For example, plus sign is encoded at 0x2B with a left-right
  138. # attribute, and at 0xAB with a right-left attribute. However, there
  139. # is only one PLUS SIGN character in Unicode. This leads to some
  140. # interesting problems when mapping between Mac OS Arabic and Unicode;
  141. # see below.
  142. #
  143. # A related problem is that even when a particular character is
  144. # encoded only once in Mac OS Arabic, it may have a different
  145. # direction attribute than the corresponding Unicode character.
  146. #
  147. # For example, the Mac OS Arabic character at 0x93 is HORIZONTAL
  148. # ELLIPSIS with strong right-left direction. However, the Unicode
  149. # character HORIZONTAL ELLIPSIS has direction class neutral.
  150. #
  151. # 3. Behavior of ASCII-range numbers in WorldScript
  152. #
  153. # Mac OS Arabic also has two sets of digit codes.
  154. #
  155. # The digits at 0x30-0x39 may be displayed using either European
  156. # digit forms or Arabic digit forms, depending on context. If there
  157. # is a "strong European" character such as a Latin letter on either
  158. # side of a sequence consisting of digits 0x30-0x39 and possibly comma
  159. # 0x2C or period 0x2E, then the characters will be displayed using
  160. # European forms (This will happen even if there are neutral characters
  161. # between the digits and the strong European character). Otherwise, the
  162. # digits will be displayed using Arabic forms, the comma will be
  163. # displayed as Arabic thousands separator, and the period as Arabic
  164. # decimal separator. In any case, 0x2C, 0x2E, and 0x30-0x39 are always
  165. # left-right.
  166. #
  167. # The digits at 0xB0-0xB9 are always displayed using Arabic digit
  168. # shapes, and moreover, these digits always have strong right-left
  169. # directionality. These are mainly intended for special layout
  170. # purposes such as part numbers, etc.
  171. #
  172. # 4. Font variants
  173. #
  174. # The table in this file gives the Unicode mappings for the standard
  175. # Mac OS Arabic encoding. This encoding is supported by the Cairo font
  176. # (the system font for Arabic), and is the encoding supported by the
  177. # text processing utilities. However, the other Arabic fonts actually
  178. # implement slightly different encodings; this mainly affects the code
  179. # points 0xAA and 0xC0. For these code points the standard Mac OS
  180. # Arabic encoding has the following mappings:
  181. # 0xAA -> <RL>+0x002A ASTERISK, right-left
  182. # 0xC0 -> <RL>+0x274A EIGHT TEARDROP-SPOKED PROPELLER ASTERISK,
  183. # right-left
  184. # This mapping of 0xAA is consistent with the normal convention for
  185. # Mac OS Arabic and Hebrew that the right-left duplicates have codes
  186. # that are equal to the ASCII code of the left-right character plus
  187. # 0x80. However, in all of the other fonts, 0xAA is MULTIPLY SIGN, and
  188. # right-left ASTERISK may be at a different code point. The other
  189. # variants are described below.
  190. #
  191. # The TrueType variant is used for most of the Arabic TrueType fonts:
  192. # Baghdad, Geeza, Kufi, Nadeem. It differs from the standard variant
  193. # in the following way:
  194. # 0xAA -> <RL>+0x00D7 MULTIPLICATION SIGN, right-left
  195. # 0xC0 -> <RL>+0x002A ASTERISK, right-left
  196. #
  197. # The Thuluth variant is used for the Arabic Postscript-only fonts:
  198. # Thuluth and Thuluth bold. It differs from the standard variant in
  199. # the following way:
  200. # 0xAA -> <RL>+0x00D7 MULTIPLICATION SIGN, right-left
  201. # 0xC0 -> 0x066D ARABIC FIVE POINTED STAR
  202. #
  203. # The AlBayan variant is used for the Arabic TrueType font Al Bayan.
  204. # It differs from the standard variant in the following way:
  205. # 0x81 -> no mapping (glyph just has authorship information, etc.)
  206. # 0xA3 -> 0xFDFA ARABIC LIGATURE SALLALLAHOU ALAYHE WASALLAM
  207. # 0xA4 -> 0xFDF2 ARABIC LIGATURE ALLAH ISOLATED FORM
  208. # 0xAA -> <RL>+0x00D7 MULTIPLICATION SIGN, right-left
  209. # 0xDC -> <RL>+0x25CF BLACK CIRCLE, right-left
  210. # 0xFC -> <RL>+0x25A0 BLACK SQUARE, right-left
  211. #
  212. # Unicode mapping issues and notes:
  213. # ---------------------------------
  214. #
  215. # 1. Matching the direction of Mac OS Arabic characters
  216. #
  217. # When Mac OS Arabic encodes a character twice but with different
  218. # direction attributes for the two code points - as in the case of
  219. # plus sign mentioned above - we need a way to map both Mac OS Arabic
  220. # code points to Unicode and back again without loss of information.
  221. # With the plus sign, for example, mapping one of the Mac OS Arabic
  222. # characters to a code in the Unicode corporate use zone is
  223. # undesirable, since both of the plus sign characters are likely to
  224. # be used in text that is interchanged.
  225. #
  226. # The problem is solved with the use of direction override characters
  227. # and direction-dependent mappings. When mapping from Mac OS Arabic
  228. # to Unicode, we use direction overrides as necessary to force the
  229. # direction of the resulting Unicode characters.
  230. #
  231. # The required direction is indicated by a direction tag in the
  232. # mappings. A tag of <LR> means the corresponding Unicode character
  233. # must have a strong left-right context, and a tag of <RL> indicates
  234. # a right-left context.
  235. #
  236. # For example, the mapping of 0x2B is given as <LR>+0x002B; the
  237. # mapping of 0xAB is given as <RL>+0x002B. If we map an isolated
  238. # instance of 0x2B to Unicode, it should be mapped as follows (LRO
  239. # indicates LEFT-RIGHT OVERRIDE, PDF indicates POP DIRECTION
  240. # FORMATTING):
  241. #
  242. # 0x2B -> 0x202D (LRO) + 0x002B (PLUS SIGN) + 0x202C (PDF)
  243. #
  244. # When mapping several characters in a row that require direction
  245. # forcing, the overrides need only be used at the beginning and end.
  246. # For example:
  247. #
  248. # 0x24 0x20 0x28 0x29 -> 0x202D 0x0024 0x0020 0x0028 0x0029 0x202C
  249. #
  250. # If neutral characters that require direction forcing are already
  251. # between strong-direction characters with matching directionality,
  252. # then direction overrides need not be used. Direction overrides are
  253. # always needed to map the right-left digits at 0xB0-0xB9.
  254. #
  255. # When mapping from Unicode to Mac OS Arabic, the Unicode
  256. # bidirectional algorithm should be used to determine resolved
  257. # direction of the Unicode characters. The mapping from Unicode to
  258. # Mac OS Arabic can then be disambiguated by the use of the resolved
  259. # direction:
  260. #
  261. # Unicode 0x002B -> Mac OS Arabic 0x2B (if L) or 0xAB (if R)
  262. #
  263. # However, this also means the direction override characters should
  264. # be discarded when mapping from Unicode to Mac OS Arabic (after
  265. # they have been used to determine resolved direction), since the
  266. # direction override information is carried by the code point itself.
  267. #
  268. # Even when direction overrides are not needed for roundtrip
  269. # fidelity, they are sometimes used when mapping Mac OS Arabic
  270. # characters to Unicode in order to achieve similar text layout with
  271. # the resulting Unicode text. For example, the single Mac OS Arabic
  272. # ellipsis character has direction class right-left,and there is no
  273. # left-right version. However, the Unicode HORIZONTAL ELLIPSIS
  274. # character has direction class neutral (which means it may end up
  275. # with a resolved direction of left-right if surrounded by left-right
  276. # characters). When mapping the Mac OS Arabic ellipsis to Unicode, it
  277. # is surrounded with a direction override to help preserve proper
  278. # text layout. The resolved direction is not needed or used when
  279. # mapping the Unicode HORIZONTAL ELLIPSIS back to Mac OS Arabic.
  280. #
  281. # 2. Mapping the Mac OS Arabic digits
  282. #
  283. # The main table below contains mappings that should be used when
  284. # strict round-trip fidelity is required. However, for numeric
  285. # values, the mappings in that table will produce Unicode characters
  286. # that may appear different than the Mac OS Arabic text displayed on
  287. # a Mac OS system using WorldScript. This is because WorldScript
  288. # uses context-dependent display for the 0x30-0x39 digits.
  289. #
  290. # If roundtrip fidelity is not required, then the following
  291. # alternate mappings should be used when a sequence of 0x30-0x39
  292. # digits - possibly including 0x2C and 0x2E - occurs in an Arabic
  293. # context (that is, when the first "strong" character on either side
  294. # of the digit sequence is Arabic, or there is no strong character):
  295. #
  296. # 0x2C 0x066C # ARABIC THOUSANDS SEPARATOR
  297. # 0x2E 0x066B # ARABIC DECIMAL SEPARATOR
  298. # 0x30 0x0660 # ARABIC-INDIC DIGIT ZERO
  299. # 0x31 0x0661 # ARABIC-INDIC DIGIT ONE
  300. # 0x32 0x0662 # ARABIC-INDIC DIGIT TWO
  301. # 0x33 0x0663 # ARABIC-INDIC DIGIT THREE
  302. # 0x34 0x0664 # ARABIC-INDIC DIGIT FOUR
  303. # 0x35 0x0665 # ARABIC-INDIC DIGIT FIVE
  304. # 0x36 0x0666 # ARABIC-INDIC DIGIT SIX
  305. # 0x37 0x0667 # ARABIC-INDIC DIGIT SEVEN
  306. # 0x38 0x0668 # ARABIC-INDIC DIGIT EIGHT
  307. # 0x39 0x0669 # ARABIC-INDIC DIGIT NINE
  308. #
  309. # Details of mapping changes in each version:
  310. # -------------------------------------------
  311. #
  312. # Changes from version n03 to version n07:
  313. #
  314. # - Change mapping for 0xC0 from U+066D to U+274A.
  315. #
  316. # - Add direction overrides (required directionality) to mappings
  317. # for 0x25, 0x2C, 0x3B, 0x3F.
  318. #
  319. ##################
  320. 0x00 - 0x7F = 0x0000 -
  321. 0x80 = 0x00C4
  322. 0x81 = 0x00A0
  323. 0x82 = 0x00C7
  324. 0x83 = 0x00C9
  325. 0x84 = 0x00D1
  326. 0x85 = 0x00D6
  327. 0x86 = 0x00DC
  328. 0x87 = 0x00E1
  329. 0x88 = 0x00E0
  330. 0x89 = 0x00E2
  331. 0x8A = 0x00E4
  332. 0x8B = 0x06BA
  333. 0x8C = 0x00AB
  334. 0x8D = 0x00E7
  335. 0x8E = 0x00E9
  336. 0x8F = 0x00E8
  337. 0x90 = 0x00EA
  338. 0x91 = 0x00EB
  339. 0x92 = 0x00ED
  340. 0x93 = 0x2026
  341. 0x94 = 0x00EE
  342. 0x95 = 0x00EF
  343. 0x96 = 0x00F1
  344. 0x97 = 0x00F3
  345. 0x98 = 0x00BB
  346. 0x99 = 0x00F4
  347. 0x9A = 0x00F6
  348. 0x9B = 0x00F7
  349. 0x9C = 0x00FA
  350. 0x9D = 0x00F9
  351. 0x9E = 0x00FB
  352. 0x9F = 0x00FC
  353. 0xA0 = 0x0020
  354. 0xA1 = 0x0021
  355. 0xA2 = 0x0022
  356. 0xA3 = 0x0023
  357. 0xA4 = 0x0024
  358. 0xA5 = 0x066A
  359. 0xA6 = 0x0026
  360. 0xA7 = 0x0027
  361. 0xA8 = 0x0028
  362. 0xA9 = 0x0029
  363. 0xAA = 0x002A
  364. 0xAB = 0x002B
  365. 0xAC = 0x060C
  366. 0xAD = 0x002D
  367. 0xAE = 0x002E
  368. 0xAF = 0x002F
  369. 0xB0 = 0x0660
  370. 0xB1 = 0x0661
  371. 0xB2 = 0x0662
  372. 0xB3 = 0x0663
  373. 0xB4 = 0x0664
  374. 0xB5 = 0x0665
  375. 0xB6 = 0x0666
  376. 0xB7 = 0x0667
  377. 0xB8 = 0x0668
  378. 0xB9 = 0x0669
  379. 0xBA = 0x003A
  380. 0xBB = 0x061B
  381. 0xBC = 0x003C
  382. 0xBD = 0x003D
  383. 0xBE = 0x003E
  384. 0xBF = 0x061F
  385. 0xC0 = 0x274A
  386. 0xC1 = 0x0621
  387. 0xC2 = 0x0622
  388. 0xC3 = 0x0623
  389. 0xC4 = 0x0624
  390. 0xC5 = 0x0625
  391. 0xC6 = 0x0626
  392. 0xC7 = 0x0627
  393. 0xC8 = 0x0628
  394. 0xC9 = 0x0629
  395. 0xCA = 0x062A
  396. 0xCB = 0x062B
  397. 0xCC = 0x062C
  398. 0xCD = 0x062D
  399. 0xCE = 0x062E
  400. 0xCF = 0x062F
  401. 0xD0 = 0x0630
  402. 0xD1 = 0x0631
  403. 0xD2 = 0x0632
  404. 0xD3 = 0x0633
  405. 0xD4 = 0x0634
  406. 0xD5 = 0x0635
  407. 0xD6 = 0x0636
  408. 0xD7 = 0x0637
  409. 0xD8 = 0x0638
  410. 0xD9 = 0x0639
  411. 0xDA = 0x063A
  412. 0xDB = 0x005B
  413. 0xDC = 0x005C
  414. 0xDD = 0x005D
  415. 0xDE = 0x005E
  416. 0xDF = 0x005F
  417. 0xE0 = 0x0640
  418. 0xE1 = 0x0641
  419. 0xE2 = 0x0642
  420. 0xE3 = 0x0643
  421. 0xE4 = 0x0644
  422. 0xE5 = 0x0645
  423. 0xE6 = 0x0646
  424. 0xE7 = 0x0647
  425. 0xE8 = 0x0648
  426. 0xE9 = 0x0649
  427. 0xEA = 0x064A
  428. 0xEB = 0x064B
  429. 0xEC = 0x064C
  430. 0xED = 0x064D
  431. 0xEE = 0x064E
  432. 0xEF = 0x064F
  433. 0xF0 = 0x0650
  434. 0xF1 = 0x0651
  435. 0xF2 = 0x0652
  436. 0xF3 = 0x067E
  437. 0xF4 = 0x0679
  438. 0xF5 = 0x0686
  439. 0xF6 = 0x06D5
  440. 0xF7 = 0x06A4
  441. 0xF8 = 0x06AF
  442. 0xF9 = 0x0688
  443. 0xFA = 0x0691
  444. 0xFB = 0x007B
  445. 0xFC = 0x007C
  446. 0xFD = 0x007D
  447. 0xFE = 0x0698
  448. 0xFF = 0x06D2
  449. END_MAP