PageRenderTime 61ms CodeModel.GetById 35ms RepoModel.GetById 0ms app.codeStats 0ms

/racket-5.0.2-bin-x86_64-linux-f7/collects/plot/src/all/gdkanji.c

http://github.com/smorin/f4f.arc
C | 660 lines | 589 code | 64 blank | 7 comment | 213 complexity | daeef9c5262653c20e72da2281708df1 MD5 | raw file
Possible License(s): LGPL-2.0
  1. /* gdkanji.c (Kanji code converter) */
  2. /* written by Masahito Yamaga (ma@yama-ga.com) */
  3. #ifdef HAVE_CONFIG_H
  4. #include "config.h"
  5. #endif
  6. #include <stdio.h>
  7. #include <stdlib.h>
  8. #include <string.h>
  9. #include "gd.h"
  10. #include "gdhelpers.h"
  11. #ifdef HAVE_ERRNO_H
  12. #include <errno.h>
  13. #endif
  14. #include <stdarg.h>
  15. #if defined(HAVE_ICONV_H)
  16. #include <iconv.h>
  17. #endif
  18. #ifndef HAVE_ICONV_T_DEF
  19. typedef void *iconv_t;
  20. #endif
  21. #ifndef HAVE_ICONV
  22. #define ICONV_CONST /**/
  23. iconv_t iconv_open (const char *, const char *);
  24. size_t iconv (iconv_t, ICONV_CONST char **, size_t *, char **, size_t *);
  25. int iconv_close (iconv_t);
  26. iconv_t
  27. iconv_open (const char *tocode, const char *fromcode)
  28. {
  29. return (iconv_t) (-1);
  30. }
  31. size_t
  32. iconv (iconv_t cd, ICONV_CONST char **inbuf, size_t * inbytesleft,
  33. char **outbuf, size_t * outbytesleft)
  34. {
  35. return 0;
  36. }
  37. int
  38. iconv_close (iconv_t cd)
  39. {
  40. return 0;
  41. }
  42. #endif /* !HAVE_ICONV */
  43. #define LIBNAME "any2eucjp()"
  44. #if defined(__MSC__) || defined(__BORLANDC__) || defined(__TURBOC__) || defined(_Windows) || defined(MSDOS)
  45. #ifndef SJISPRE
  46. #define SJISPRE 1
  47. #endif
  48. #endif
  49. #ifdef TRUE
  50. #undef TRUE
  51. #endif
  52. #ifdef FALSE
  53. #undef FALSE
  54. #endif
  55. #define TRUE 1
  56. #define FALSE 0
  57. #define NEW 1
  58. #define OLD 2
  59. #define ESCI 3
  60. #define NEC 4
  61. #define EUC 5
  62. #define SJIS 6
  63. #define EUCORSJIS 7
  64. #define ASCII 8
  65. #define NEWJISSTR "JIS7"
  66. #define OLDJISSTR "jis"
  67. #define EUCSTR "eucJP"
  68. #define SJISSTR "SJIS"
  69. #define ESC 27
  70. #define SS2 142
  71. static void
  72. debug (const char *format, ...)
  73. {
  74. #ifdef DEBUG
  75. va_list args;
  76. va_start (args, format);
  77. fprintf (stdout, "%s: ", LIBNAME);
  78. vfprintf (stdout, format, args);
  79. fprintf (stdout, "\n");
  80. va_end (args);
  81. #endif
  82. }
  83. static void
  84. error (const char *format, ...)
  85. {
  86. va_list args;
  87. va_start (args, format);
  88. fprintf (stderr, "%s: ", LIBNAME);
  89. vfprintf (stderr, format, args);
  90. fprintf (stderr, "\n");
  91. va_end (args);
  92. }
  93. /* DetectKanjiCode() derived from DetectCodeType() by Ken Lunde. */
  94. static int
  95. DetectKanjiCode (unsigned char *str)
  96. {
  97. static int whatcode = ASCII;
  98. int oldcode = ASCII;
  99. int c, i;
  100. char *lang = NULL;
  101. c = '\1';
  102. i = 0;
  103. if (whatcode != EUCORSJIS && whatcode != ASCII)
  104. {
  105. oldcode = whatcode;
  106. whatcode = ASCII;
  107. }
  108. while ((whatcode == EUCORSJIS || whatcode == ASCII) && c != '\0')
  109. {
  110. if ((c = str[i++]) != '\0')
  111. {
  112. if (c == ESC)
  113. {
  114. c = str[i++];
  115. if (c == '$')
  116. {
  117. c = str[i++];
  118. if (c == 'B')
  119. whatcode = NEW;
  120. else if (c == '@')
  121. whatcode = OLD;
  122. }
  123. else if (c == '(')
  124. {
  125. c = str[i++];
  126. if (c == 'I')
  127. whatcode = ESCI;
  128. }
  129. else if (c == 'K')
  130. whatcode = NEC;
  131. }
  132. else if ((c >= 129 && c <= 141) || (c >= 143 && c <= 159))
  133. whatcode = SJIS;
  134. else if (c == SS2)
  135. {
  136. c = str[i++];
  137. if ((c >= 64 && c <= 126) || (c >= 128 && c <= 160)
  138. || (c >= 224 && c <= 252))
  139. whatcode = SJIS;
  140. else if (c >= 161 && c <= 223)
  141. whatcode = EUCORSJIS;
  142. }
  143. else if (c >= 161 && c <= 223)
  144. {
  145. c = str[i++];
  146. if (c >= 240 && c <= 254)
  147. whatcode = EUC;
  148. else if (c >= 161 && c <= 223)
  149. whatcode = EUCORSJIS;
  150. else if (c >= 224 && c <= 239)
  151. {
  152. whatcode = EUCORSJIS;
  153. while (c >= 64 && c != '\0' && whatcode == EUCORSJIS)
  154. {
  155. if (c >= 129)
  156. {
  157. if (c <= 141 || (c >= 143 && c <= 159))
  158. whatcode = SJIS;
  159. else if (c >= 253 && c <= 254)
  160. whatcode = EUC;
  161. }
  162. c = str[i++];
  163. }
  164. }
  165. else if (c <= 159)
  166. whatcode = SJIS;
  167. }
  168. else if (c >= 240 && c <= 254)
  169. whatcode = EUC;
  170. else if (c >= 224 && c <= 239)
  171. {
  172. c = str[i++];
  173. if ((c >= 64 && c <= 126) || (c >= 128 && c <= 160))
  174. whatcode = SJIS;
  175. else if (c >= 253 && c <= 254)
  176. whatcode = EUC;
  177. else if (c >= 161 && c <= 252)
  178. whatcode = EUCORSJIS;
  179. }
  180. }
  181. }
  182. #ifdef DEBUG
  183. if (whatcode == ASCII)
  184. debug ("Kanji code not included.");
  185. else if (whatcode == EUCORSJIS)
  186. debug ("Kanji code not detected.");
  187. else
  188. debug ("Kanji code detected at %d byte.", i);
  189. #endif
  190. if (whatcode == EUCORSJIS && oldcode != ASCII)
  191. whatcode = oldcode;
  192. if (whatcode == EUCORSJIS)
  193. {
  194. if (getenv ("LC_ALL"))
  195. lang = getenv ("LC_ALL");
  196. else if (getenv ("LC_CTYPE"))
  197. lang = getenv ("LC_CTYPE");
  198. else if (getenv ("LANG"))
  199. lang = getenv ("LANG");
  200. if (lang)
  201. {
  202. if (strcmp (lang, "ja_JP.SJIS") == 0 ||
  203. #ifdef hpux
  204. strcmp (lang, "japanese") == 0 ||
  205. #endif
  206. strcmp (lang, "ja_JP.mscode") == 0 ||
  207. strcmp (lang, "ja_JP.PCK") == 0)
  208. whatcode = SJIS;
  209. else if (strncmp (lang, "ja", 2) == 0)
  210. #ifdef SJISPRE
  211. whatcode = SJIS;
  212. #else
  213. whatcode = EUC;
  214. #endif
  215. }
  216. }
  217. if (whatcode == EUCORSJIS)
  218. #ifdef SJISPRE
  219. whatcode = SJIS;
  220. #else
  221. whatcode = EUC;
  222. #endif
  223. return whatcode;
  224. }
  225. /* SJIStoJIS() is sjis2jis() by Ken Lunde. */
  226. static void
  227. SJIStoJIS (int *p1, int *p2)
  228. {
  229. register unsigned char c1 = *p1;
  230. register unsigned char c2 = *p2;
  231. register int adjust = c2 < 159;
  232. register int rowOffset = c1 < 160 ? 112 : 176;
  233. register int cellOffset = adjust ? (31 + (c2 > 127)) : 126;
  234. *p1 = ((c1 - rowOffset) << 1) - adjust;
  235. *p2 -= cellOffset;
  236. }
  237. /* han2zen() was derived from han2zen() written by Ken Lunde. */
  238. #define IS_DAKU(c) ((c >= 182 && c <= 196) || (c >= 202 && c <= 206) || (c == 179))
  239. #define IS_HANDAKU(c) (c >= 202 && c <= 206)
  240. static void
  241. han2zen (int *p1, int *p2)
  242. {
  243. int c = *p1;
  244. int daku = FALSE;
  245. int handaku = FALSE;
  246. int mtable[][2] = {
  247. {129, 66},
  248. {129, 117},
  249. {129, 118},
  250. {129, 65},
  251. {129, 69},
  252. {131, 146},
  253. {131, 64},
  254. {131, 66},
  255. {131, 68},
  256. {131, 70},
  257. {131, 72},
  258. {131, 131},
  259. {131, 133},
  260. {131, 135},
  261. {131, 98},
  262. {129, 91},
  263. {131, 65},
  264. {131, 67},
  265. {131, 69},
  266. {131, 71},
  267. {131, 73},
  268. {131, 74},
  269. {131, 76},
  270. {131, 78},
  271. {131, 80},
  272. {131, 82},
  273. {131, 84},
  274. {131, 86},
  275. {131, 88},
  276. {131, 90},
  277. {131, 92},
  278. {131, 94},
  279. {131, 96},
  280. {131, 99},
  281. {131, 101},
  282. {131, 103},
  283. {131, 105},
  284. {131, 106},
  285. {131, 107},
  286. {131, 108},
  287. {131, 109},
  288. {131, 110},
  289. {131, 113},
  290. {131, 116},
  291. {131, 119},
  292. {131, 122},
  293. {131, 125},
  294. {131, 126},
  295. {131, 128},
  296. {131, 129},
  297. {131, 130},
  298. {131, 132},
  299. {131, 134},
  300. {131, 136},
  301. {131, 137},
  302. {131, 138},
  303. {131, 139},
  304. {131, 140},
  305. {131, 141},
  306. {131, 143},
  307. {131, 147},
  308. {129, 74},
  309. {129, 75}
  310. };
  311. if (*p2 == 222 && IS_DAKU (*p1))
  312. daku = TRUE; /* Daku-ten */
  313. else if (*p2 == 223 && IS_HANDAKU (*p1))
  314. handaku = TRUE; /* Han-daku-ten */
  315. *p1 = mtable[c - 161][0];
  316. *p2 = mtable[c - 161][1];
  317. if (daku)
  318. {
  319. if ((*p2 >= 74 && *p2 <= 103) || (*p2 >= 110 && *p2 <= 122))
  320. (*p2)++;
  321. else if (*p2 == 131 && *p2 == 69)
  322. *p2 = 148;
  323. }
  324. else if (handaku && *p2 >= 110 && *p2 <= 122)
  325. (*p2) += 2;
  326. }
  327. /* Recast strcpy to handle unsigned chars used below. */
  328. #define ustrcpy(A,B) (strcpy((char*)(A),(const char*)(B)))
  329. static void
  330. do_convert (unsigned char *to, unsigned char *from, const char *code)
  331. {
  332. #ifdef HAVE_ICONV
  333. iconv_t cd;
  334. size_t from_len, to_len;
  335. if ((cd = iconv_open (EUCSTR, code)) == (iconv_t) - 1)
  336. {
  337. error ("iconv_open() error");
  338. #ifdef HAVE_ERRNO_H
  339. if (errno == EINVAL)
  340. error ("invalid code specification: \"%s\" or \"%s\"", EUCSTR, code);
  341. #endif
  342. strcpy ((char *) to, (const char *) from);
  343. return;
  344. }
  345. from_len = strlen ((const char *) from) + 1;
  346. to_len = BUFSIZ;
  347. if ((int) (iconv (cd, (char **) &from, &from_len, (char **) &to, &to_len))
  348. == -1)
  349. {
  350. #ifdef HAVE_ERRNO_H
  351. if (errno == EINVAL)
  352. error ("invalid end of input string");
  353. else if (errno == EILSEQ)
  354. error ("invalid code in input string");
  355. else if (errno == E2BIG)
  356. error ("output buffer overflow at do_convert()");
  357. else
  358. #endif
  359. error ("something happen");
  360. strcpy ((char *) to, (const char *) from);
  361. return;
  362. }
  363. if (iconv_close (cd) != 0)
  364. {
  365. error ("iconv_close() error");
  366. }
  367. #else
  368. int p1, p2, i, j;
  369. int jisx0208 = FALSE;
  370. int hankaku = FALSE;
  371. j = 0;
  372. if (strcmp (code, NEWJISSTR) == 0 || strcmp (code, OLDJISSTR) == 0)
  373. {
  374. for (i = 0; from[i] != '\0' && j < BUFSIZ; i++)
  375. {
  376. if (from[i] == ESC)
  377. {
  378. i++;
  379. if (from[i] == '$')
  380. {
  381. jisx0208 = TRUE;
  382. hankaku = FALSE;
  383. i++;
  384. }
  385. else if (from[i] == '(')
  386. {
  387. jisx0208 = FALSE;
  388. i++;
  389. if (from[i] == 'I') /* Hankaku Kana */
  390. hankaku = TRUE;
  391. else
  392. hankaku = FALSE;
  393. }
  394. }
  395. else
  396. {
  397. if (jisx0208)
  398. to[j++] = from[i] + 128;
  399. else if (hankaku)
  400. {
  401. to[j++] = SS2;
  402. to[j++] = from[i] + 128;
  403. }
  404. else
  405. to[j++] = from[i];
  406. }
  407. }
  408. }
  409. else if (strcmp (code, SJISSTR) == 0)
  410. {
  411. for (i = 0; from[i] != '\0' && j < BUFSIZ; i++)
  412. {
  413. p1 = from[i];
  414. if (p1 < 127)
  415. to[j++] = p1;
  416. else if ((p1 >= 161) && (p1 <= 223))
  417. { /* Hankaku Kana */
  418. to[j++] = SS2;
  419. to[j++] = p1;
  420. }
  421. else
  422. {
  423. p2 = from[++i];
  424. SJIStoJIS (&p1, &p2);
  425. to[j++] = p1 + 128;
  426. to[j++] = p2 + 128;
  427. }
  428. }
  429. }
  430. else
  431. {
  432. error ("invalid code specification: \"%s\"", code);
  433. return;
  434. }
  435. if (j >= BUFSIZ)
  436. {
  437. error ("output buffer overflow at do_convert()");
  438. ustrcpy (to, from);
  439. }
  440. else
  441. to[j] = '\0';
  442. #endif /* HAVE_ICONV */
  443. }
  444. static int
  445. do_check_and_conv (unsigned char *to, unsigned char *from)
  446. {
  447. static unsigned char tmp[BUFSIZ];
  448. int p1, p2, i, j;
  449. int kanji = TRUE;
  450. switch (DetectKanjiCode (from))
  451. {
  452. case NEW:
  453. debug ("Kanji code is New JIS.");
  454. do_convert (tmp, from, NEWJISSTR);
  455. break;
  456. case OLD:
  457. debug ("Kanji code is Old JIS.");
  458. do_convert (tmp, from, OLDJISSTR);
  459. break;
  460. case ESCI:
  461. debug
  462. ("This string includes Hankaku-Kana (jisx0201) escape sequence [ESC] + ( + I.");
  463. do_convert (tmp, from, NEWJISSTR);
  464. break;
  465. case NEC:
  466. debug ("Kanji code is NEC Kanji.");
  467. error ("cannot convert NEC Kanji.");
  468. ustrcpy (tmp, from);
  469. kanji = FALSE;
  470. break;
  471. case EUC:
  472. debug ("Kanji code is EUC.");
  473. ustrcpy (tmp, from);
  474. break;
  475. case SJIS:
  476. debug ("Kanji code is SJIS.");
  477. do_convert (tmp, from, SJISSTR);
  478. break;
  479. case EUCORSJIS:
  480. debug ("Kanji code is EUC or SJIS.");
  481. ustrcpy (tmp, from);
  482. kanji = FALSE;
  483. break;
  484. case ASCII:
  485. debug ("This is ASCII string.");
  486. ustrcpy (tmp, from);
  487. kanji = FALSE;
  488. break;
  489. default:
  490. debug ("This string includes unknown code.");
  491. ustrcpy (tmp, from);
  492. kanji = FALSE;
  493. break;
  494. }
  495. /* Hankaku Kana ---> Zenkaku Kana */
  496. if (kanji)
  497. {
  498. j = 0;
  499. for (i = 0; tmp[i] != '\0' && j < BUFSIZ; i++)
  500. {
  501. if (tmp[i] == SS2)
  502. {
  503. p1 = tmp[++i];
  504. if (tmp[i + 1] == SS2)
  505. {
  506. p2 = tmp[i + 2];
  507. if (p2 == 222 || p2 == 223)
  508. i += 2;
  509. else
  510. p2 = 0;
  511. }
  512. else
  513. p2 = 0;
  514. han2zen (&p1, &p2);
  515. SJIStoJIS (&p1, &p2);
  516. to[j++] = p1 + 128;
  517. to[j++] = p2 + 128;
  518. }
  519. else
  520. to[j++] = tmp[i];
  521. }
  522. if (j >= BUFSIZ)
  523. {
  524. error ("output buffer overflow at Hankaku --> Zenkaku");
  525. ustrcpy (to, tmp);
  526. }
  527. else
  528. to[j] = '\0';
  529. }
  530. else
  531. ustrcpy (to, tmp);
  532. return kanji;
  533. }
  534. int
  535. any2eucjp (unsigned char *dest, unsigned char *src, unsigned int dest_max)
  536. {
  537. static unsigned char tmp_dest[BUFSIZ];
  538. int ret;
  539. if (strlen ((const char *) src) >= BUFSIZ)
  540. {
  541. error ("input string too large");
  542. return -1;
  543. }
  544. if (dest_max > BUFSIZ)
  545. {
  546. error
  547. ("invalid maximum size of destination\nit should be less than %d.",
  548. BUFSIZ);
  549. return -1;
  550. }
  551. ret = do_check_and_conv (tmp_dest, src);
  552. if (strlen ((const char *) tmp_dest) >= dest_max)
  553. {
  554. error ("output buffer overflow");
  555. ustrcpy (dest, src);
  556. return -1;
  557. }
  558. ustrcpy (dest, tmp_dest);
  559. return ret;
  560. }
  561. #if 0
  562. unsigned int
  563. strwidth (unsigned char *s)
  564. {
  565. unsigned char *t;
  566. unsigned int i;
  567. t = (unsigned char *) gdMalloc (BUFSIZ);
  568. any2eucjp (t, s, BUFSIZ);
  569. i = strlen (t);
  570. gdFree (t);
  571. return i;
  572. }
  573. #ifdef DEBUG
  574. int
  575. main ()
  576. {
  577. unsigned char input[BUFSIZ];
  578. unsigned char *output;
  579. unsigned char *str;
  580. int c, i = 0;
  581. while ((c = fgetc (stdin)) != '\n' && i < BUFSIZ)
  582. input[i++] = c;
  583. input[i] = '\0';
  584. printf ("input : %d bytes\n", strlen ((const char *) input));
  585. printf ("output: %d bytes\n", strwidth (input));
  586. output = (unsigned char *) gdMalloc (BUFSIZ);
  587. any2eucjp (output, input, BUFSIZ);
  588. str = output;
  589. while (*str != '\0')
  590. putchar (*(str++));
  591. putchar ('\n');
  592. gdFree (output);
  593. return 0;
  594. }
  595. #endif
  596. #endif