PageRenderTime 27ms CodeModel.GetById 12ms RepoModel.GetById 0ms app.codeStats 0ms

/release/src/router/php/ext/gd/libgd/gdkanji.c

https://gitlab.com/envieidoc/advancedtomato2
C | 627 lines | 563 code | 57 blank | 7 comment | 215 complexity | f882b963cdad3a5ce9532821dbf480c0 MD5 | raw file
  1. /* gdkanji.c (Kanji code converter) */
  2. /* written by Masahito Yamaga (ma@yama-ga.com) */
  3. #include <stdio.h>
  4. #include <stdlib.h>
  5. #include <string.h>
  6. #include "gd.h"
  7. #include "gdhelpers.h"
  8. #include <stdarg.h>
  9. #if defined(HAVE_ICONV_H) || defined(HAVE_ICONV)
  10. #include <iconv.h>
  11. #ifdef HAVE_ERRNO_H
  12. #include <errno.h>
  13. #endif
  14. #endif
  15. #if defined(HAVE_ICONV_H) && !defined(HAVE_ICONV)
  16. #define HAVE_ICONV 1
  17. #endif
  18. #define LIBNAME "any2eucjp()"
  19. #if defined(__MSC__) || defined(__BORLANDC__) || defined(__TURBOC__) || defined(_Windows) || defined(MSDOS)
  20. #ifndef SJISPRE
  21. #define SJISPRE 1
  22. #endif
  23. #endif
  24. #ifdef TRUE
  25. #undef TRUE
  26. #endif
  27. #ifdef FALSE
  28. #undef FALSE
  29. #endif
  30. #define TRUE 1
  31. #define FALSE 0
  32. #define NEW 1
  33. #define OLD 2
  34. #define ESCI 3
  35. #define NEC 4
  36. #define EUC 5
  37. #define SJIS 6
  38. #define EUCORSJIS 7
  39. #define ASCII 8
  40. #define NEWJISSTR "JIS7"
  41. #define OLDJISSTR "jis"
  42. #define EUCSTR "eucJP"
  43. #define SJISSTR "SJIS"
  44. #define ESC 27
  45. #define SS2 142
  46. static void
  47. debug (const char *format,...)
  48. {
  49. #ifdef DEBUG
  50. va_list args;
  51. va_start (args, format);
  52. fprintf (stdout, "%s: ", LIBNAME);
  53. vfprintf (stdout, format, args);
  54. fprintf (stdout, "\n");
  55. va_end (args);
  56. #endif
  57. }
  58. static void
  59. error (const char *format,...)
  60. {
  61. va_list args;
  62. char *tmp;
  63. TSRMLS_FETCH();
  64. va_start(args, format);
  65. vspprintf(&tmp, 0, format, args);
  66. va_end(args);
  67. php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s: %s", LIBNAME, tmp);
  68. efree(tmp);
  69. }
  70. /* DetectKanjiCode() derived from DetectCodeType() by Ken Lunde. */
  71. static int
  72. DetectKanjiCode (unsigned char *str)
  73. {
  74. static int whatcode = ASCII;
  75. int oldcode = ASCII;
  76. int c, i;
  77. char *lang = NULL;
  78. c = '\1';
  79. i = 0;
  80. if (whatcode != EUCORSJIS && whatcode != ASCII)
  81. {
  82. oldcode = whatcode;
  83. whatcode = ASCII;
  84. }
  85. while ((whatcode == EUCORSJIS || whatcode == ASCII) && c != '\0')
  86. {
  87. if ((c = str[i++]) != '\0')
  88. {
  89. if (c == ESC)
  90. {
  91. c = str[i++];
  92. if (c == '$')
  93. {
  94. c = str[i++];
  95. if (c == 'B')
  96. whatcode = NEW;
  97. else if (c == '@')
  98. whatcode = OLD;
  99. }
  100. else if (c == '(')
  101. {
  102. c = str[i++];
  103. if (c == 'I')
  104. whatcode = ESCI;
  105. }
  106. else if (c == 'K')
  107. whatcode = NEC;
  108. }
  109. else if ((c >= 129 && c <= 141) || (c >= 143 && c <= 159))
  110. whatcode = SJIS;
  111. else if (c == SS2)
  112. {
  113. c = str[i++];
  114. if ((c >= 64 && c <= 126) || (c >= 128 && c <= 160) || (c >= 224 && c <= 252))
  115. whatcode = SJIS;
  116. else if (c >= 161 && c <= 223)
  117. whatcode = EUCORSJIS;
  118. }
  119. else if (c >= 161 && c <= 223)
  120. {
  121. c = str[i++];
  122. if (c >= 240 && c <= 254)
  123. whatcode = EUC;
  124. else if (c >= 161 && c <= 223)
  125. whatcode = EUCORSJIS;
  126. else if (c >= 224 && c <= 239)
  127. {
  128. whatcode = EUCORSJIS;
  129. while (c >= 64 && c != '\0' && whatcode == EUCORSJIS)
  130. {
  131. if (c >= 129)
  132. {
  133. if (c <= 141 || (c >= 143 && c <= 159))
  134. whatcode = SJIS;
  135. else if (c >= 253 && c <= 254)
  136. whatcode = EUC;
  137. }
  138. c = str[i++];
  139. }
  140. }
  141. else if (c <= 159)
  142. whatcode = SJIS;
  143. }
  144. else if (c >= 240 && c <= 254)
  145. whatcode = EUC;
  146. else if (c >= 224 && c <= 239)
  147. {
  148. c = str[i++];
  149. if ((c >= 64 && c <= 126) || (c >= 128 && c <= 160))
  150. whatcode = SJIS;
  151. else if (c >= 253 && c <= 254)
  152. whatcode = EUC;
  153. else if (c >= 161 && c <= 252)
  154. whatcode = EUCORSJIS;
  155. }
  156. }
  157. }
  158. #ifdef DEBUG
  159. if (whatcode == ASCII)
  160. debug ("Kanji code not included.");
  161. else if (whatcode == EUCORSJIS)
  162. debug ("Kanji code not detected.");
  163. else
  164. debug ("Kanji code detected at %d byte.", i);
  165. #endif
  166. if (whatcode == EUCORSJIS && oldcode != ASCII)
  167. whatcode = oldcode;
  168. if (whatcode == EUCORSJIS)
  169. {
  170. if (getenv ("LC_ALL"))
  171. lang = getenv ("LC_ALL");
  172. else if (getenv ("LC_CTYPE"))
  173. lang = getenv ("LC_CTYPE");
  174. else if (getenv ("LANG"))
  175. lang = getenv ("LANG");
  176. if (lang)
  177. {
  178. if (strcmp (lang, "ja_JP.SJIS") == 0 ||
  179. #ifdef hpux
  180. strcmp (lang, "japanese") == 0 ||
  181. #endif
  182. strcmp (lang, "ja_JP.mscode") == 0 ||
  183. strcmp (lang, "ja_JP.PCK") == 0)
  184. whatcode = SJIS;
  185. else if (strncmp (lang, "ja", 2) == 0)
  186. #ifdef SJISPRE
  187. whatcode = SJIS;
  188. #else
  189. whatcode = EUC;
  190. #endif
  191. }
  192. }
  193. if (whatcode == EUCORSJIS)
  194. #ifdef SJISPRE
  195. whatcode = SJIS;
  196. #else
  197. whatcode = EUC;
  198. #endif
  199. return whatcode;
  200. }
  201. /* SJIStoJIS() is sjis2jis() by Ken Lunde. */
  202. static void
  203. SJIStoJIS (int *p1, int *p2)
  204. {
  205. register unsigned char c1 = *p1;
  206. register unsigned char c2 = *p2;
  207. register int adjust = c2 < 159;
  208. register int rowOffset = c1 < 160 ? 112 : 176;
  209. register int cellOffset = adjust ? (31 + (c2 > 127)) : 126;
  210. *p1 = ((c1 - rowOffset) << 1) - adjust;
  211. *p2 -= cellOffset;
  212. }
  213. /* han2zen() was derived from han2zen() written by Ken Lunde. */
  214. #define IS_DAKU(c) ((c >= 182 && c <= 196) || (c >= 202 && c <= 206) || (c == 179))
  215. #define IS_HANDAKU(c) (c >= 202 && c <= 206)
  216. static void
  217. han2zen (int *p1, int *p2)
  218. {
  219. int c = *p1;
  220. int daku = FALSE;
  221. int handaku = FALSE;
  222. int mtable[][2] =
  223. {
  224. {129, 66},
  225. {129, 117},
  226. {129, 118},
  227. {129, 65},
  228. {129, 69},
  229. {131, 146},
  230. {131, 64},
  231. {131, 66},
  232. {131, 68},
  233. {131, 70},
  234. {131, 72},
  235. {131, 131},
  236. {131, 133},
  237. {131, 135},
  238. {131, 98},
  239. {129, 91},
  240. {131, 65},
  241. {131, 67},
  242. {131, 69},
  243. {131, 71},
  244. {131, 73},
  245. {131, 74},
  246. {131, 76},
  247. {131, 78},
  248. {131, 80},
  249. {131, 82},
  250. {131, 84},
  251. {131, 86},
  252. {131, 88},
  253. {131, 90},
  254. {131, 92},
  255. {131, 94},
  256. {131, 96},
  257. {131, 99},
  258. {131, 101},
  259. {131, 103},
  260. {131, 105},
  261. {131, 106},
  262. {131, 107},
  263. {131, 108},
  264. {131, 109},
  265. {131, 110},
  266. {131, 113},
  267. {131, 116},
  268. {131, 119},
  269. {131, 122},
  270. {131, 125},
  271. {131, 126},
  272. {131, 128},
  273. {131, 129},
  274. {131, 130},
  275. {131, 132},
  276. {131, 134},
  277. {131, 136},
  278. {131, 137},
  279. {131, 138},
  280. {131, 139},
  281. {131, 140},
  282. {131, 141},
  283. {131, 143},
  284. {131, 147},
  285. {129, 74},
  286. {129, 75}
  287. };
  288. if (*p2 == 222 && IS_DAKU (*p1))
  289. daku = TRUE; /* Daku-ten */
  290. else if (*p2 == 223 && IS_HANDAKU (*p1))
  291. handaku = TRUE; /* Han-daku-ten */
  292. *p1 = mtable[c - 161][0];
  293. *p2 = mtable[c - 161][1];
  294. if (daku)
  295. {
  296. if ((*p2 >= 74 && *p2 <= 103) || (*p2 >= 110 && *p2 <= 122))
  297. (*p2)++;
  298. else if (*p2 == 131 && *p2 == 69)
  299. *p2 = 148;
  300. }
  301. else if (handaku && *p2 >= 110 && *p2 <= 122)
  302. (*p2) += 2;
  303. }
  304. /* Recast strcpy to handle unsigned chars used below. */
  305. #define ustrcpy(A,B) (strcpy((char*)(A),(const char*)(B)))
  306. static void
  307. do_convert (unsigned char *to, unsigned char *from, const char *code)
  308. {
  309. #ifdef HAVE_ICONV
  310. iconv_t cd;
  311. size_t from_len, to_len;
  312. if ((cd = iconv_open (EUCSTR, code)) == (iconv_t) - 1)
  313. {
  314. error ("iconv_open() error");
  315. #ifdef HAVE_ERRNO_H
  316. if (errno == EINVAL)
  317. error ("invalid code specification: \"%s\" or \"%s\"",
  318. EUCSTR, code);
  319. #endif
  320. strcpy ((char *) to, (const char *) from);
  321. return;
  322. }
  323. from_len = strlen ((const char *) from) + 1;
  324. to_len = BUFSIZ;
  325. if ((int) iconv(cd, (char **) &from, &from_len, (char **) &to, &to_len) == -1)
  326. {
  327. #ifdef HAVE_ERRNO_H
  328. if (errno == EINVAL)
  329. error ("invalid end of input string");
  330. else if (errno == EILSEQ)
  331. error ("invalid code in input string");
  332. else if (errno == E2BIG)
  333. error ("output buffer overflow at do_convert()");
  334. else
  335. #endif
  336. error ("something happen");
  337. strcpy ((char *) to, (const char *) from);
  338. return;
  339. }
  340. if (iconv_close (cd) != 0)
  341. {
  342. error ("iconv_close() error");
  343. }
  344. #else
  345. int p1, p2, i, j;
  346. int jisx0208 = FALSE;
  347. int hankaku = FALSE;
  348. j = 0;
  349. if (strcmp (code, NEWJISSTR) == 0 || strcmp (code, OLDJISSTR) == 0)
  350. {
  351. for (i = 0; from[i] != '\0' && j < BUFSIZ; i++)
  352. {
  353. if (from[i] == ESC)
  354. {
  355. i++;
  356. if (from[i] == '$')
  357. {
  358. jisx0208 = TRUE;
  359. hankaku = FALSE;
  360. i++;
  361. }
  362. else if (from[i] == '(')
  363. {
  364. jisx0208 = FALSE;
  365. i++;
  366. if (from[i] == 'I') /* Hankaku Kana */
  367. hankaku = TRUE;
  368. else
  369. hankaku = FALSE;
  370. }
  371. }
  372. else
  373. {
  374. if (jisx0208)
  375. to[j++] = from[i] + 128;
  376. else if (hankaku)
  377. {
  378. to[j++] = SS2;
  379. to[j++] = from[i] + 128;
  380. }
  381. else
  382. to[j++] = from[i];
  383. }
  384. }
  385. }
  386. else if (strcmp (code, SJISSTR) == 0)
  387. {
  388. for (i = 0; from[i] != '\0' && j < BUFSIZ; i++)
  389. {
  390. p1 = from[i];
  391. if (p1 < 127)
  392. to[j++] = p1;
  393. else if ((p1 >= 161) && (p1 <= 223))
  394. { /* Hankaku Kana */
  395. to[j++] = SS2;
  396. to[j++] = p1;
  397. }
  398. else
  399. {
  400. p2 = from[++i];
  401. SJIStoJIS (&p1, &p2);
  402. to[j++] = p1 + 128;
  403. to[j++] = p2 + 128;
  404. }
  405. }
  406. }
  407. else
  408. {
  409. error ("invalid code specification: \"%s\"", code);
  410. return;
  411. }
  412. if (j >= BUFSIZ)
  413. {
  414. error ("output buffer overflow at do_convert()");
  415. ustrcpy (to, from);
  416. }
  417. else
  418. to[j] = '\0';
  419. #endif /* HAVE_ICONV */
  420. }
  421. static int
  422. do_check_and_conv (unsigned char *to, unsigned char *from)
  423. {
  424. static unsigned char tmp[BUFSIZ];
  425. int p1, p2, i, j;
  426. int kanji = TRUE;
  427. switch (DetectKanjiCode (from))
  428. {
  429. case NEW:
  430. debug ("Kanji code is New JIS.");
  431. do_convert (tmp, from, NEWJISSTR);
  432. break;
  433. case OLD:
  434. debug ("Kanji code is Old JIS.");
  435. do_convert (tmp, from, OLDJISSTR);
  436. break;
  437. case ESCI:
  438. debug ("This string includes Hankaku-Kana (jisx0201) escape sequence [ESC] + ( + I.");
  439. do_convert (tmp, from, NEWJISSTR);
  440. break;
  441. case NEC:
  442. debug ("Kanji code is NEC Kanji.");
  443. error ("cannot convert NEC Kanji.");
  444. ustrcpy (tmp, from);
  445. kanji = FALSE;
  446. break;
  447. case EUC:
  448. debug ("Kanji code is EUC.");
  449. ustrcpy (tmp, from);
  450. break;
  451. case SJIS:
  452. debug ("Kanji code is SJIS.");
  453. do_convert (tmp, from, SJISSTR);
  454. break;
  455. case EUCORSJIS:
  456. debug ("Kanji code is EUC or SJIS.");
  457. ustrcpy (tmp, from);
  458. kanji = FALSE;
  459. break;
  460. case ASCII:
  461. debug ("This is ASCII string.");
  462. ustrcpy (tmp, from);
  463. kanji = FALSE;
  464. break;
  465. default:
  466. debug ("This string includes unknown code.");
  467. ustrcpy (tmp, from);
  468. kanji = FALSE;
  469. break;
  470. }
  471. /* Hankaku Kana ---> Zenkaku Kana */
  472. if (kanji)
  473. {
  474. j = 0;
  475. for (i = 0; tmp[i] != '\0' && j < BUFSIZ; i++)
  476. {
  477. if (tmp[i] == SS2)
  478. {
  479. p1 = tmp[++i];
  480. if (tmp[i + 1] == SS2)
  481. {
  482. p2 = tmp[i + 2];
  483. if (p2 == 222 || p2 == 223)
  484. i += 2;
  485. else
  486. p2 = 0;
  487. }
  488. else
  489. p2 = 0;
  490. han2zen (&p1, &p2);
  491. SJIStoJIS (&p1, &p2);
  492. to[j++] = p1 + 128;
  493. to[j++] = p2 + 128;
  494. }
  495. else
  496. to[j++] = tmp[i];
  497. }
  498. if (j >= BUFSIZ)
  499. {
  500. error ("output buffer overflow at Hankaku --> Zenkaku");
  501. ustrcpy (to, tmp);
  502. }
  503. else
  504. to[j] = '\0';
  505. }
  506. else
  507. ustrcpy (to, tmp);
  508. return kanji;
  509. }
  510. int
  511. any2eucjp (unsigned char *dest, unsigned char *src, unsigned int dest_max)
  512. {
  513. static unsigned char tmp_dest[BUFSIZ];
  514. int ret;
  515. if (strlen ((const char *) src) >= BUFSIZ)
  516. {
  517. error ("input string too large");
  518. return -1;
  519. }
  520. if (dest_max > BUFSIZ)
  521. {
  522. error ("invalid maximum size of destination\nit should be less than %d.", BUFSIZ);
  523. return -1;
  524. }
  525. ret = do_check_and_conv (tmp_dest, src);
  526. if (strlen ((const char *) tmp_dest) >= dest_max)
  527. {
  528. error ("output buffer overflow");
  529. ustrcpy (dest, src);
  530. return -1;
  531. }
  532. ustrcpy (dest, tmp_dest);
  533. return ret;
  534. }
  535. #if 0
  536. unsigned int
  537. strwidth (unsigned char *s)
  538. {
  539. unsigned char *t;
  540. unsigned int i;
  541. t = (unsigned char *) gdMalloc (BUFSIZ);
  542. any2eucjp (t, s, BUFSIZ);
  543. i = strlen (t);
  544. gdFree (t);
  545. return i;
  546. }
  547. #ifdef DEBUG
  548. int
  549. main ()
  550. {
  551. unsigned char input[BUFSIZ];
  552. unsigned char *output;
  553. unsigned char *str;
  554. int c, i = 0;
  555. while ((c = fgetc (stdin)) != '\n' && i < BUFSIZ)
  556. input[i++] = c;
  557. input[i] = '\0';
  558. printf ("input : %d bytes\n", strlen ((const char *) input));
  559. printf ("output: %d bytes\n", strwidth (input));
  560. output = (unsigned char *) gdMalloc (BUFSIZ);
  561. any2eucjp (output, input, BUFSIZ);
  562. str = output;
  563. while (*str != '\0')
  564. putchar (*(str++));
  565. putchar ('\n');
  566. gdFree (output);
  567. return 0;
  568. }
  569. #endif
  570. #endif