PageRenderTime 1022ms CodeModel.GetById 0ms RepoModel.GetById 0ms app.codeStats 0ms

/mined-2012.22/src/mkkmuhan

#
Shell | 197 lines | 185 code | 8 blank | 4 comment | 5 complexity | c8db3d0a96f949f47f262fee6b9d13ba MD5 | raw file
Possible License(s): GPL-3.0
  1. #! /bin/sh
  2. if [ -f Unihan_DictionaryLikeData.txt -a -f Unihan_OtherMappings.txt -a -f Unihan_Readings.txt ]
  3. then true
  4. else if make Unihan.zip
  5. then unzip Unihan.zip Unihan_DictionaryLikeData.txt Unihan_OtherMappings.txt Unihan_Readings.txt
  6. else echo Could not acquire Unicode database
  7. exit 1
  8. fi
  9. fi
  10. punct=
  11. case "$1" in
  12. -c) punct=Chinese
  13. shift;;
  14. -j) punct=Japanese
  15. shift;;
  16. -k) punct=Korean
  17. shift;;
  18. esac
  19. name=$1
  20. shortcut=$2
  21. mergein=$3
  22. ranges () {
  23. # insert priority marks before Unicode values:
  24. prio Unicode block
  25. 01 4E00..9FFF; CJK Unified Ideographs
  26. 02 3400..4DBF; CJK Unified Ideographs Extension A
  27. 03 20000..2A6DF; CJK Unified Ideographs Extension B
  28. 04? 2A700..2B73F; CJK Unified Ideographs Extension C
  29. 04? 2B740..2B81F; CJK Unified Ideographs Extension D
  30. 11? 2E80..2EFF; CJK Radicals Supplement
  31. 21 F900..FAFF; CJK Compatibility Ideographs
  32. 22 2F800..2FA1F; CJK Compatibility Ideographs Supplement
  33. 23? 3300..33FF; CJK Compatibility
  34. 31? FE30..FE4F; CJK Compatibility Forms
  35. 41? 3000..303F; CJK Symbols and Punctuation
  36. 42? 3200..32FF; Enclosed CJK Letters and Months
  37. 51? 31C0..31EF; CJK Strokes
  38. #(?) do these occur?
  39. 99 others - if this occurs:
  40. - add range using 'grep CJK Blocks.txt'
  41. - assign / arrange prios
  42. }
  43. priouni () {
  44. sed -e "s,\(U+0*4[EeFf]\),01 \1," -e t \
  45. -e "s,\(U+0*[5-9]\),01 \1," -e t \
  46. -e "s,\(U+0*3[4-9A-Fa-f]\),02 \1," -e t \
  47. -e "s,\(U+0*4[0-9A-Da-d]\),02 \1," -e t \
  48. -e "s,\(U+2[0-9Aa]\),03 \1," -e t \
  49. -e "s,\(U+2[Aa][789A-Fa-f]\),04 \1," -e t \
  50. -e "s,\(U+2[Bb][0-9A-Ba-b]\),04 \1," -e t \
  51. -e "s,\(U+0*2[Ee][89A-Fa-f]\),11 \1," -e t \
  52. -e "s,\(U+0*[Ff][9Aa]\),21 \1," -e t \
  53. -e "s,\(U+2[Ff][89Aa]\),22 \1," -e t \
  54. -e "s,\(U+0*33\),23 \1," -e t \
  55. -e "s,\(U+0*[Ff][Ee][34]\),31 \1," -e t \
  56. -e "s,\(U+0*30[0-3]\),41 \1," -e t \
  57. -e "s,\(U+0*32\),42 \1," -e t \
  58. -e "s,\(U+0*31[C-Ec-e]\),51 \1," -e t \
  59. -e "s,\(U+\),99 \1,"
  60. }
  61. (
  62. cat <<\/eoc
  63. #include <stdio.h>
  64. char * keys = "";
  65. void
  66. printutf8 (unichar)
  67. unsigned long unichar;
  68. {
  69. if (unichar == '\\' || unichar == '"') {
  70. printf ("\\");
  71. }
  72. if (unichar < 0x80) {
  73. printf ("%c", unichar);
  74. } else if (unichar < 0x800) {
  75. printf ("%c", 0xC0 | (unichar >> 6));
  76. printf ("%c", 0x80 | (unichar & 0x3F));
  77. } else if (unichar < 0x10000) {
  78. printf ("%c", 0xE0 | (unichar >> 12));
  79. printf ("%c", 0x80 | ((unichar >> 6) & 0x3F));
  80. printf ("%c", 0x80 | (unichar & 0x3F));
  81. } else if (unichar < 0x200000) {
  82. printf ("%c", 0xF0 | (unichar >> 18));
  83. printf ("%c", 0x80 | ((unichar >> 12) & 0x3F));
  84. printf ("%c", 0x80 | ((unichar >> 6) & 0x3F));
  85. printf ("%c", 0x80 | (unichar & 0x3F));
  86. } else if (unichar < 0x4000000) {
  87. printf ("%c", 0xF8 | (unichar >> 24));
  88. printf ("%c", 0x80 | ((unichar >> 18) & 0x3F));
  89. printf ("%c", 0x80 | ((unichar >> 12) & 0x3F));
  90. printf ("%c", 0x80 | ((unichar >> 6) & 0x3F));
  91. printf ("%c", 0x80 | (unichar & 0x3F));
  92. } else if (unichar < 0x80000000) {
  93. printf ("%c", 0xFC | (unichar >> 30));
  94. printf ("%c", 0x80 | ((unichar >> 24) & 0x3F));
  95. printf ("%c", 0x80 | ((unichar >> 18) & 0x3F));
  96. printf ("%c", 0x80 | ((unichar >> 12) & 0x3F));
  97. printf ("%c", 0x80 | ((unichar >> 6) & 0x3F));
  98. printf ("%c", 0x80 | (unichar & 0x3F));
  99. }
  100. }
  101. void
  102. addmap (k, ch)
  103. char * k;
  104. unsigned long ch;
  105. {
  106. if (strcmp (k, keys) != 0) {
  107. if (* keys != '\0') {
  108. printf ("\"},\n");
  109. }
  110. if (* k != '\0') {
  111. printf (" {\"%s\", \"", k);
  112. }
  113. } else {
  114. printf (" ");
  115. }
  116. if (* k != '\0') {
  117. printutf8 (ch);
  118. }
  119. keys = k;
  120. }
  121. int
  122. main () {
  123. /eoc
  124. LC_ALL=C
  125. export LC_ALL
  126. # extract mappings from Unihan database, merge with additional data
  127. (
  128. sed -e "s/^U+\([^ ]*\) k$name \([^ ]*\)$/\2 \1/" \
  129. -e t -e d Unihan_DictionaryLikeData.txt Unihan_OtherMappings.txt Unihan_Readings.txt
  130. cat ${mergein:-/dev/null}
  131. ) |
  132. sed -f $0.sed |
  133. tr '[A-Z]' '[a-z]' |
  134. sed -e "s,?&#x153;,??,g" -e "s, \(....\)$, 0\1," -e "s, , U+," |
  135. priouni > .kmuhan
  136. LC_ALL=C sort .kmuhan | uniq |
  137. sed -e 's/\(.*\) .. U+\(.*\)/ addmap ("\1", 0x\2);/'
  138. grep " 99 " .kmuhan > .kmuhan.99
  139. rm -f .kmuhan
  140. cat <</eoc
  141. addmap ("", 0);
  142. return 0;
  143. }
  144. /eoc
  145. ) > keymaps/$name.c
  146. if ${CC-cc} -o keymaps/$name.exe keymaps/$name.c
  147. then if [ -f keymaps/$name.h ]
  148. then echo saving previous keyboard mapping file to keymaps/$name.h.sav
  149. mv -i keymaps/$name.h keymaps/$name.h.sav
  150. fi
  151. (
  152. echo "/***************************************************"
  153. echo " mined keyboard mapping table"
  154. echo " * generated with mkkbmap (mkkmuhan)"
  155. echo " from Unihan database, k$name entries"
  156. if [ -n "$mergein" -a -f "$mergein.gen" ]
  157. then cat $mergein.gen
  158. fi
  159. if [ -n "$punct" ]
  160. then echo " * supplemented with punctuation mappings"
  161. fi
  162. echo "*/"
  163. echo "struct keymap keymap_$name [] = {"
  164. if [ -n "$punct" ]
  165. then cat keymaps0/punctuation.$punct
  166. echo
  167. fi
  168. keymaps/$name.exe
  169. echo " {NIL_PTR}"
  170. echo "};"
  171. ) > keymaps/$name.h
  172. ./mkkentry -H $name $shortcut
  173. rm -f keymaps/$name.c keymaps/$name.exe
  174. if [ -s .kmuhan.99 ]
  175. then echo unidentified character ranges, see .kmuhan.99
  176. false
  177. else rm -f .kmuhan.99
  178. fi
  179. fi