/clip/tools/gen_tbl/main.c

https://github.com/amery/clip-angelo · C · 388 lines · 329 code · 57 blank · 2 comment · 38 complexity · 0b4e3be71a262523b7a17bcda8bc406d MD5 · raw file

  1. int
  2. main(int argc, char **argv)
  3. {
  4. FILE *file;
  5. cons_CharsetEntry *cp = 0;
  6. int len = 0, i, mb = 0;
  7. char buf[4096];
  8. UniRecord *rp;
  9. /* unicode-order */
  10. Coll uni;
  11. /* charset-order */
  12. Coll cs;
  13. Coll out;
  14. Coll cmp;
  15. char *fname;
  16. if (argc < 2)
  17. {
  18. fprintf(stderr, "usage: %s charset_file [cs_file1...] < unicode_data\n", argv[0]);
  19. exit(1);
  20. }
  21. for (i = 1; i < argc; i++)
  22. {
  23. fname = argv[i];
  24. file = fopen(fname, "r");
  25. if (!file)
  26. {
  27. fprintf(stderr, "cannot open %s: %s\n", fname, strerror(errno));
  28. exit(2);
  29. }
  30. fprintf(stderr, "load charset '%s'\n", fname);
  31. if (load_charset(file, &cp, &len))
  32. {
  33. fprintf(stderr, "cannot read %s: %s\n", fname, strerror(errno));
  34. exit(3);
  35. }
  36. fprintf(stderr, "loaded %d entries\n", len);
  37. fclose(file);
  38. #ifdef DBG
  39. fprintf(stderr, "readed %d charset entries\n", len);
  40. #endif
  41. }
  42. init_Coll(&uni, 0, cmp_UniRecord);
  43. init_Coll(&cs, 0, cmp_CSRecord);
  44. init_Coll(&out, 0, 0);
  45. init_Coll(&cmp, 0, cmp_UniRecord);
  46. while (fgets(buf, sizeof(buf), stdin))
  47. {
  48. int l;
  49. char *s;
  50. rp = (UniRecord *) calloc(1, sizeof(*rp));
  51. l = strlen(buf);
  52. if (l > 0 && buf[--l] == '\n')
  53. buf[l] = 0;
  54. if (!l)
  55. break;
  56. s = strdup(buf);
  57. rp->mem_of_UniRecord = s;
  58. rp->hex_of_UniRecord = s;
  59. rp->no_of_UniRecord = strtoul(s, 0, 16);
  60. l = strcspn(s, ";");
  61. s += l;
  62. *s++ = 0;
  63. rp->name_of_UniRecord = s;
  64. l = strcspn(s, ";");
  65. s += l;
  66. *s++ = 0;
  67. rp->cat_of_UniRecord = s;
  68. l = strcspn(s, ";");
  69. s += l;
  70. *s++ = 0;
  71. rp->comb_of_UniRecord = s;
  72. l = strcspn(s, ";");
  73. s += l;
  74. *s++ = 0;
  75. rp->bidir_of_UniRecord = s;
  76. l = strcspn(s, ";");
  77. s += l;
  78. *s++ = 0;
  79. rp->decomp_of_UniRecord = s;
  80. l = strcspn(s, ";");
  81. s += l;
  82. *s++ = 0;
  83. rp->dec_of_UniRecord = s;
  84. l = strcspn(s, ";");
  85. s += l;
  86. *s++ = 0;
  87. rp->dig_of_UniRecord = s;
  88. l = strcspn(s, ";");
  89. s += l;
  90. *s++ = 0;
  91. rp->num_of_UniRecord = s;
  92. l = strcspn(s, ";");
  93. s += l;
  94. *s++ = 0;
  95. rp->mirror_of_UniRecord = s;
  96. l = strcspn(s, ";");
  97. s += l;
  98. *s++ = 0;
  99. rp->name1_of_UniRecord = s;
  100. l = strcspn(s, ";");
  101. s += l;
  102. *s++ = 0;
  103. rp->comment_of_UniRecord = s;
  104. l = strcspn(s, ";");
  105. s += l;
  106. *s++ = 0;
  107. rp->upper_of_UniRecord = s;
  108. l = strcspn(s, ";");
  109. s += l;
  110. *s++ = 0;
  111. rp->lower_of_UniRecord = s;
  112. l = strcspn(s, ";");
  113. s += l;
  114. *s++ = 0;
  115. rp->title_of_UniRecord = s;
  116. l = strcspn(s, ";");
  117. s += l;
  118. *s++ = 0;
  119. rp->cp_of_UniRecord = in_map(cp, len, rp->no_of_UniRecord);
  120. if (rp->cp_of_UniRecord)
  121. {
  122. remove_char(&uni, rp->cp_of_UniRecord->ch_of_cons_CharsetEntry);
  123. remove_char(&cs, rp->cp_of_UniRecord->ch_of_cons_CharsetEntry);
  124. insert_Coll(&uni, rp);
  125. insert_Coll(&cs, rp);
  126. }
  127. else
  128. {
  129. free(rp->mem_of_UniRecord);
  130. free(rp);
  131. }
  132. }
  133. fprintf(stderr, "appended %d uni %d chars\n", uni.count_of_Coll, cs.count_of_Coll);
  134. #ifdef DBG
  135. fprintf(stderr, "uni: %d\n", uni.count_of_Coll);
  136. for (i = 0; i < uni.count_of_Coll; i++)
  137. {
  138. rp = (UniRecord *) uni.items[i];
  139. fprintf(stderr, "hex; '%s'\n", rp->hex_of_UniRecord);
  140. fprintf(stderr, "name; '%s'\n", rp->name_of_UniRecord);
  141. fprintf(stderr, "cat; '%s'\n", rp->cat_of_UniRecord);
  142. fprintf(stderr, "comb; '%s'\n", rp->comb_of_UniRecord);
  143. fprintf(stderr, "bidir; '%s'\n", rp->bidir_of_UniRecord);
  144. fprintf(stderr, "decomp; '%s'\n", rp->decomp_of_UniRecord);
  145. fprintf(stderr, "dec; '%s'\n", rp->dec_of_UniRecord);
  146. fprintf(stderr, "dig; '%s'\n", rp->dig_of_UniRecord);
  147. fprintf(stderr, "num; '%s'\n", rp->num_of_UniRecord);
  148. fprintf(stderr, "mirror; '%s'\n", rp->mirror_of_UniRecord);
  149. fprintf(stderr, "name1; '%s'\n", rp->name1_of_UniRecord);
  150. fprintf(stderr, "comment;'%s'\n", rp->comment_of_UniRecord);
  151. fprintf(stderr, "upper; '%s'\n", rp->upper_of_UniRecord);
  152. fprintf(stderr, "lower; '%s'\n", rp->lower_of_UniRecord);
  153. fprintf(stderr, "title; '%s'\n", rp->title_of_UniRecord);
  154. fprintf(stderr, "\n");
  155. }
  156. #endif
  157. #ifdef DBG1
  158. fprintf(stderr, "cs: %d\n", cs.count_of_Coll);
  159. for (i = 0; i < cs.count_of_Coll; i++)
  160. {
  161. rp = (UniRecord *) cs.items[i];
  162. fprintf(stderr, "ch=%d\n", rp->cp_of_UniRecord->ch_of_cons_CharsetEntry);
  163. fprintf(stderr, "hex; '%s'\n", rp->hex_of_UniRecord);
  164. fprintf(stderr, "name; '%s'\n", rp->name_of_UniRecord);
  165. fprintf(stderr, "cat; '%s'\n", rp->cat_of_UniRecord);
  166. fprintf(stderr, "comb; '%s'\n", rp->comb_of_UniRecord);
  167. fprintf(stderr, "bidir; '%s'\n", rp->bidir_of_UniRecord);
  168. fprintf(stderr, "decomp; '%s'\n", rp->decomp_of_UniRecord);
  169. fprintf(stderr, "dec; '%s'\n", rp->dec_of_UniRecord);
  170. fprintf(stderr, "dig; '%s'\n", rp->dig_of_UniRecord);
  171. fprintf(stderr, "num; '%s'\n", rp->num_of_UniRecord);
  172. fprintf(stderr, "mirror; '%s'\n", rp->mirror_of_UniRecord);
  173. fprintf(stderr, "name1; '%s'\n", rp->name1_of_UniRecord);
  174. fprintf(stderr, "comment;'%s'\n", rp->comment_of_UniRecord);
  175. fprintf(stderr, "upper; '%s'\n", rp->upper_of_UniRecord);
  176. fprintf(stderr, "lower; '%s'\n", rp->lower_of_UniRecord);
  177. fprintf(stderr, "title; '%s'\n", rp->title_of_UniRecord);
  178. fprintf(stderr, "\n");
  179. }
  180. #endif
  181. for (i = 0; i < 256; i++)
  182. {
  183. UniRecord *rp = find_ch(&cs, i);
  184. if (!rp)
  185. {
  186. cons_CharsetEntry *cp = (cons_CharsetEntry *) calloc(sizeof(cons_CharsetEntry *), 1);
  187. rp = (UniRecord *) calloc(sizeof(UniRecord), 1);
  188. rp->no_of_UniRecord = i;
  189. rp->ch_of_UniRecord = i;
  190. cp->ch_of_cons_CharsetEntry = i;
  191. cp->unich_of_cons_CharsetEntry = i;
  192. rp->cp_of_UniRecord = cp;
  193. rp->hex_of_UniRecord = "";
  194. rp->name_of_UniRecord = "";
  195. rp->cat_of_UniRecord = "";
  196. rp->comb_of_UniRecord = "";
  197. rp->bidir_of_UniRecord = "";
  198. rp->decomp_of_UniRecord = "";
  199. rp->dec_of_UniRecord = "";
  200. rp->dig_of_UniRecord = "";
  201. rp->num_of_UniRecord = "";
  202. rp->mirror_of_UniRecord = "";
  203. rp->name1_of_UniRecord = "";
  204. rp->comment_of_UniRecord = "";
  205. rp->upper_of_UniRecord = "";
  206. rp->lower_of_UniRecord = "";
  207. rp->title_of_UniRecord = "";
  208. }
  209. if (rp->cp_of_UniRecord->ch_of_cons_CharsetEntry < 256)
  210. {
  211. append_Coll(&out, rp);
  212. insert_Coll(&cmp, rp);
  213. }
  214. }
  215. #ifdef DBG3
  216. fprintf(stderr, "cmp: %d\n", cmp.count_of_Coll);
  217. for (i = 0; i < cmp.count_of_Coll; i++)
  218. {
  219. rp = (UniRecord *) cmp.items_of_Coll[i];
  220. fprintf(stderr, "ch=%d\n", rp->cp_of_UniRecord->ch_of_cons_CharsetEntry);
  221. fprintf(stderr, "hex; '%s'\n", rp->hex_of_UniRecord);
  222. fprintf(stderr, "name; '%s'\n", rp->name_of_UniRecord);
  223. fprintf(stderr, "cat; '%s'\n", rp->cat_of_UniRecord);
  224. fprintf(stderr, "comb; '%s'\n", rp->comb_of_UniRecord);
  225. fprintf(stderr, "bidir; '%s'\n", rp->bidir_of_UniRecord);
  226. fprintf(stderr, "decomp; '%s'\n", rp->decomp_of_UniRecord);
  227. fprintf(stderr, "dec; '%s'\n", rp->dec_of_UniRecord);
  228. fprintf(stderr, "dig; '%s'\n", rp->dig_of_UniRecord);
  229. fprintf(stderr, "num; '%s'\n", rp->num_of_UniRecord);
  230. fprintf(stderr, "mirror; '%s'\n", rp->mirror_of_UniRecord);
  231. fprintf(stderr, "name1; '%s'\n", rp->name1_of_UniRecord);
  232. fprintf(stderr, "comment;'%s'\n", rp->comment_of_UniRecord);
  233. fprintf(stderr, "upper; '%s'\n", rp->upper_of_UniRecord);
  234. fprintf(stderr, "lower; '%s'\n", rp->lower_of_UniRecord);
  235. fprintf(stderr, "title; '%s'\n", rp->title_of_UniRecord);
  236. fprintf(stderr, "\n");
  237. }
  238. #endif
  239. printf("# generated by gen_tbl from sources");
  240. for (i = 1; i < argc; i++)
  241. printf(" %s", argv[i]);
  242. printf("\n# cmptbl\n");
  243. for (i = 0; i < 256; i++)
  244. {
  245. int ch;
  246. ch = find_cmp(&cmp, i);
  247. printf("%d\n", ch);
  248. }
  249. printf("# uptbl\n");
  250. for (i = 0; i < out.count_of_Coll; i++)
  251. {
  252. UniRecord *rp1;
  253. int ch;
  254. rp = (UniRecord *) out.items_of_Coll[i];
  255. if (!rp)
  256. {
  257. printf("%d\n", i);
  258. continue;
  259. }
  260. if (!strcasecmp(rp->cat_of_UniRecord, "Ll") && rp->upper_of_UniRecord)
  261. {
  262. unsigned long unich = strtoul(rp->upper_of_UniRecord, 0, 16);
  263. rp1 = find_uni(&uni, unich);
  264. if (rp1)
  265. ch = rp1->cp_of_UniRecord->ch_of_cons_CharsetEntry;
  266. else
  267. ch = rp->cp_of_UniRecord->ch_of_cons_CharsetEntry;
  268. }
  269. else
  270. ch = rp->cp_of_UniRecord->ch_of_cons_CharsetEntry;
  271. printf("%d\n", ch);
  272. }
  273. printf("# lowtbl\n");
  274. for (i = 0; i < out.count_of_Coll; i++)
  275. {
  276. UniRecord *rp1;
  277. int ch;
  278. rp = (UniRecord *) out.items_of_Coll[i];
  279. if (!rp)
  280. {
  281. printf("%d\n", i);
  282. continue;
  283. }
  284. if (!strcasecmp(rp->cat_of_UniRecord, "Lu") && rp->lower_of_UniRecord)
  285. {
  286. unsigned long unich = strtoul(rp->lower_of_UniRecord, 0, 16);
  287. rp1 = find_uni(&uni, unich);
  288. if (rp1)
  289. ch = rp1->cp_of_UniRecord->ch_of_cons_CharsetEntry;
  290. else
  291. ch = rp->cp_of_UniRecord->ch_of_cons_CharsetEntry;
  292. }
  293. else
  294. ch = rp->cp_of_UniRecord->ch_of_cons_CharsetEntry;
  295. printf("%d\n", ch);
  296. }
  297. printf("# isalpha\n");
  298. for (i = 0; i < out.count_of_Coll; i++)
  299. {
  300. int r = 0;
  301. char *s;
  302. rp = (UniRecord *) out.items_of_Coll[i];
  303. if (!rp)
  304. {
  305. printf("0\n");
  306. continue;
  307. }
  308. s = rp->cat_of_UniRecord;
  309. if (s && s[0] == 'L')
  310. r = 1;
  311. printf("%d\n", r);
  312. }
  313. printf("# pgtbl\n");
  314. for (i = 0; i < out.count_of_Coll; i++)
  315. {
  316. int r = 0;
  317. char *s;
  318. rp = (UniRecord *) out.items_of_Coll[i];
  319. if (!rp)
  320. {
  321. printf("0\n");
  322. continue;
  323. }
  324. s = rp->cat_of_UniRecord;
  325. if (!strcmp(s, "So") && rp->no_of_UniRecord >= 0x2500 && rp->no_of_UniRecord < 0x25ff)
  326. r = pg_chars[rp->no_of_UniRecord - 0x2500];
  327. printf("%d\n", r);
  328. }
  329. printf("# multibyte\n%d\n", mb ? 1 : 0);
  330. return 0;
  331. }