/racket-5.0.2-bin-x86_64-linux-f7/collects/plot/src/all/gdkanji.c
C | 660 lines | 589 code | 64 blank | 7 comment | 213 complexity | daeef9c5262653c20e72da2281708df1 MD5 | raw file
- /* gdkanji.c (Kanji code converter) */
- /* written by Masahito Yamaga (ma@yama-ga.com) */
- #ifdef HAVE_CONFIG_H
- #include "config.h"
- #endif
- #include <stdio.h>
- #include <stdlib.h>
- #include <string.h>
- #include "gd.h"
- #include "gdhelpers.h"
- #ifdef HAVE_ERRNO_H
- #include <errno.h>
- #endif
- #include <stdarg.h>
- #if defined(HAVE_ICONV_H)
- #include <iconv.h>
- #endif
- #ifndef HAVE_ICONV_T_DEF
- typedef void *iconv_t;
- #endif
- #ifndef HAVE_ICONV
- #define ICONV_CONST /**/
- iconv_t iconv_open (const char *, const char *);
- size_t iconv (iconv_t, ICONV_CONST char **, size_t *, char **, size_t *);
- int iconv_close (iconv_t);
- iconv_t
- iconv_open (const char *tocode, const char *fromcode)
- {
- return (iconv_t) (-1);
- }
- size_t
- iconv (iconv_t cd, ICONV_CONST char **inbuf, size_t * inbytesleft,
- char **outbuf, size_t * outbytesleft)
- {
- return 0;
- }
- int
- iconv_close (iconv_t cd)
- {
- return 0;
- }
- #endif /* !HAVE_ICONV */
- #define LIBNAME "any2eucjp()"
- #if defined(__MSC__) || defined(__BORLANDC__) || defined(__TURBOC__) || defined(_Windows) || defined(MSDOS)
- #ifndef SJISPRE
- #define SJISPRE 1
- #endif
- #endif
- #ifdef TRUE
- #undef TRUE
- #endif
- #ifdef FALSE
- #undef FALSE
- #endif
- #define TRUE 1
- #define FALSE 0
- #define NEW 1
- #define OLD 2
- #define ESCI 3
- #define NEC 4
- #define EUC 5
- #define SJIS 6
- #define EUCORSJIS 7
- #define ASCII 8
- #define NEWJISSTR "JIS7"
- #define OLDJISSTR "jis"
- #define EUCSTR "eucJP"
- #define SJISSTR "SJIS"
- #define ESC 27
- #define SS2 142
- static void
- debug (const char *format, ...)
- {
- #ifdef DEBUG
- va_list args;
- va_start (args, format);
- fprintf (stdout, "%s: ", LIBNAME);
- vfprintf (stdout, format, args);
- fprintf (stdout, "\n");
- va_end (args);
- #endif
- }
- static void
- error (const char *format, ...)
- {
- va_list args;
- va_start (args, format);
- fprintf (stderr, "%s: ", LIBNAME);
- vfprintf (stderr, format, args);
- fprintf (stderr, "\n");
- va_end (args);
- }
- /* DetectKanjiCode() derived from DetectCodeType() by Ken Lunde. */
- static int
- DetectKanjiCode (unsigned char *str)
- {
- static int whatcode = ASCII;
- int oldcode = ASCII;
- int c, i;
- char *lang = NULL;
- c = '\1';
- i = 0;
- if (whatcode != EUCORSJIS && whatcode != ASCII)
- {
- oldcode = whatcode;
- whatcode = ASCII;
- }
- while ((whatcode == EUCORSJIS || whatcode == ASCII) && c != '\0')
- {
- if ((c = str[i++]) != '\0')
- {
- if (c == ESC)
- {
- c = str[i++];
- if (c == '$')
- {
- c = str[i++];
- if (c == 'B')
- whatcode = NEW;
- else if (c == '@')
- whatcode = OLD;
- }
- else if (c == '(')
- {
- c = str[i++];
- if (c == 'I')
- whatcode = ESCI;
- }
- else if (c == 'K')
- whatcode = NEC;
- }
- else if ((c >= 129 && c <= 141) || (c >= 143 && c <= 159))
- whatcode = SJIS;
- else if (c == SS2)
- {
- c = str[i++];
- if ((c >= 64 && c <= 126) || (c >= 128 && c <= 160)
- || (c >= 224 && c <= 252))
- whatcode = SJIS;
- else if (c >= 161 && c <= 223)
- whatcode = EUCORSJIS;
- }
- else if (c >= 161 && c <= 223)
- {
- c = str[i++];
- if (c >= 240 && c <= 254)
- whatcode = EUC;
- else if (c >= 161 && c <= 223)
- whatcode = EUCORSJIS;
- else if (c >= 224 && c <= 239)
- {
- whatcode = EUCORSJIS;
- while (c >= 64 && c != '\0' && whatcode == EUCORSJIS)
- {
- if (c >= 129)
- {
- if (c <= 141 || (c >= 143 && c <= 159))
- whatcode = SJIS;
- else if (c >= 253 && c <= 254)
- whatcode = EUC;
- }
- c = str[i++];
- }
- }
- else if (c <= 159)
- whatcode = SJIS;
- }
- else if (c >= 240 && c <= 254)
- whatcode = EUC;
- else if (c >= 224 && c <= 239)
- {
- c = str[i++];
- if ((c >= 64 && c <= 126) || (c >= 128 && c <= 160))
- whatcode = SJIS;
- else if (c >= 253 && c <= 254)
- whatcode = EUC;
- else if (c >= 161 && c <= 252)
- whatcode = EUCORSJIS;
- }
- }
- }
- #ifdef DEBUG
- if (whatcode == ASCII)
- debug ("Kanji code not included.");
- else if (whatcode == EUCORSJIS)
- debug ("Kanji code not detected.");
- else
- debug ("Kanji code detected at %d byte.", i);
- #endif
- if (whatcode == EUCORSJIS && oldcode != ASCII)
- whatcode = oldcode;
- if (whatcode == EUCORSJIS)
- {
- if (getenv ("LC_ALL"))
- lang = getenv ("LC_ALL");
- else if (getenv ("LC_CTYPE"))
- lang = getenv ("LC_CTYPE");
- else if (getenv ("LANG"))
- lang = getenv ("LANG");
- if (lang)
- {
- if (strcmp (lang, "ja_JP.SJIS") == 0 ||
- #ifdef hpux
- strcmp (lang, "japanese") == 0 ||
- #endif
- strcmp (lang, "ja_JP.mscode") == 0 ||
- strcmp (lang, "ja_JP.PCK") == 0)
- whatcode = SJIS;
- else if (strncmp (lang, "ja", 2) == 0)
- #ifdef SJISPRE
- whatcode = SJIS;
- #else
- whatcode = EUC;
- #endif
- }
- }
- if (whatcode == EUCORSJIS)
- #ifdef SJISPRE
- whatcode = SJIS;
- #else
- whatcode = EUC;
- #endif
- return whatcode;
- }
- /* SJIStoJIS() is sjis2jis() by Ken Lunde. */
- static void
- SJIStoJIS (int *p1, int *p2)
- {
- register unsigned char c1 = *p1;
- register unsigned char c2 = *p2;
- register int adjust = c2 < 159;
- register int rowOffset = c1 < 160 ? 112 : 176;
- register int cellOffset = adjust ? (31 + (c2 > 127)) : 126;
- *p1 = ((c1 - rowOffset) << 1) - adjust;
- *p2 -= cellOffset;
- }
- /* han2zen() was derived from han2zen() written by Ken Lunde. */
- #define IS_DAKU(c) ((c >= 182 && c <= 196) || (c >= 202 && c <= 206) || (c == 179))
- #define IS_HANDAKU(c) (c >= 202 && c <= 206)
- static void
- han2zen (int *p1, int *p2)
- {
- int c = *p1;
- int daku = FALSE;
- int handaku = FALSE;
- int mtable[][2] = {
- {129, 66},
- {129, 117},
- {129, 118},
- {129, 65},
- {129, 69},
- {131, 146},
- {131, 64},
- {131, 66},
- {131, 68},
- {131, 70},
- {131, 72},
- {131, 131},
- {131, 133},
- {131, 135},
- {131, 98},
- {129, 91},
- {131, 65},
- {131, 67},
- {131, 69},
- {131, 71},
- {131, 73},
- {131, 74},
- {131, 76},
- {131, 78},
- {131, 80},
- {131, 82},
- {131, 84},
- {131, 86},
- {131, 88},
- {131, 90},
- {131, 92},
- {131, 94},
- {131, 96},
- {131, 99},
- {131, 101},
- {131, 103},
- {131, 105},
- {131, 106},
- {131, 107},
- {131, 108},
- {131, 109},
- {131, 110},
- {131, 113},
- {131, 116},
- {131, 119},
- {131, 122},
- {131, 125},
- {131, 126},
- {131, 128},
- {131, 129},
- {131, 130},
- {131, 132},
- {131, 134},
- {131, 136},
- {131, 137},
- {131, 138},
- {131, 139},
- {131, 140},
- {131, 141},
- {131, 143},
- {131, 147},
- {129, 74},
- {129, 75}
- };
- if (*p2 == 222 && IS_DAKU (*p1))
- daku = TRUE; /* Daku-ten */
- else if (*p2 == 223 && IS_HANDAKU (*p1))
- handaku = TRUE; /* Han-daku-ten */
- *p1 = mtable[c - 161][0];
- *p2 = mtable[c - 161][1];
- if (daku)
- {
- if ((*p2 >= 74 && *p2 <= 103) || (*p2 >= 110 && *p2 <= 122))
- (*p2)++;
- else if (*p2 == 131 && *p2 == 69)
- *p2 = 148;
- }
- else if (handaku && *p2 >= 110 && *p2 <= 122)
- (*p2) += 2;
- }
- /* Recast strcpy to handle unsigned chars used below. */
- #define ustrcpy(A,B) (strcpy((char*)(A),(const char*)(B)))
- static void
- do_convert (unsigned char *to, unsigned char *from, const char *code)
- {
- #ifdef HAVE_ICONV
- iconv_t cd;
- size_t from_len, to_len;
- if ((cd = iconv_open (EUCSTR, code)) == (iconv_t) - 1)
- {
- error ("iconv_open() error");
- #ifdef HAVE_ERRNO_H
- if (errno == EINVAL)
- error ("invalid code specification: \"%s\" or \"%s\"", EUCSTR, code);
- #endif
- strcpy ((char *) to, (const char *) from);
- return;
- }
- from_len = strlen ((const char *) from) + 1;
- to_len = BUFSIZ;
- if ((int) (iconv (cd, (char **) &from, &from_len, (char **) &to, &to_len))
- == -1)
- {
- #ifdef HAVE_ERRNO_H
- if (errno == EINVAL)
- error ("invalid end of input string");
- else if (errno == EILSEQ)
- error ("invalid code in input string");
- else if (errno == E2BIG)
- error ("output buffer overflow at do_convert()");
- else
- #endif
- error ("something happen");
- strcpy ((char *) to, (const char *) from);
- return;
- }
- if (iconv_close (cd) != 0)
- {
- error ("iconv_close() error");
- }
- #else
- int p1, p2, i, j;
- int jisx0208 = FALSE;
- int hankaku = FALSE;
- j = 0;
- if (strcmp (code, NEWJISSTR) == 0 || strcmp (code, OLDJISSTR) == 0)
- {
- for (i = 0; from[i] != '\0' && j < BUFSIZ; i++)
- {
- if (from[i] == ESC)
- {
- i++;
- if (from[i] == '$')
- {
- jisx0208 = TRUE;
- hankaku = FALSE;
- i++;
- }
- else if (from[i] == '(')
- {
- jisx0208 = FALSE;
- i++;
- if (from[i] == 'I') /* Hankaku Kana */
- hankaku = TRUE;
- else
- hankaku = FALSE;
- }
- }
- else
- {
- if (jisx0208)
- to[j++] = from[i] + 128;
- else if (hankaku)
- {
- to[j++] = SS2;
- to[j++] = from[i] + 128;
- }
- else
- to[j++] = from[i];
- }
- }
- }
- else if (strcmp (code, SJISSTR) == 0)
- {
- for (i = 0; from[i] != '\0' && j < BUFSIZ; i++)
- {
- p1 = from[i];
- if (p1 < 127)
- to[j++] = p1;
- else if ((p1 >= 161) && (p1 <= 223))
- { /* Hankaku Kana */
- to[j++] = SS2;
- to[j++] = p1;
- }
- else
- {
- p2 = from[++i];
- SJIStoJIS (&p1, &p2);
- to[j++] = p1 + 128;
- to[j++] = p2 + 128;
- }
- }
- }
- else
- {
- error ("invalid code specification: \"%s\"", code);
- return;
- }
- if (j >= BUFSIZ)
- {
- error ("output buffer overflow at do_convert()");
- ustrcpy (to, from);
- }
- else
- to[j] = '\0';
- #endif /* HAVE_ICONV */
- }
- static int
- do_check_and_conv (unsigned char *to, unsigned char *from)
- {
- static unsigned char tmp[BUFSIZ];
- int p1, p2, i, j;
- int kanji = TRUE;
- switch (DetectKanjiCode (from))
- {
- case NEW:
- debug ("Kanji code is New JIS.");
- do_convert (tmp, from, NEWJISSTR);
- break;
- case OLD:
- debug ("Kanji code is Old JIS.");
- do_convert (tmp, from, OLDJISSTR);
- break;
- case ESCI:
- debug
- ("This string includes Hankaku-Kana (jisx0201) escape sequence [ESC] + ( + I.");
- do_convert (tmp, from, NEWJISSTR);
- break;
- case NEC:
- debug ("Kanji code is NEC Kanji.");
- error ("cannot convert NEC Kanji.");
- ustrcpy (tmp, from);
- kanji = FALSE;
- break;
- case EUC:
- debug ("Kanji code is EUC.");
- ustrcpy (tmp, from);
- break;
- case SJIS:
- debug ("Kanji code is SJIS.");
- do_convert (tmp, from, SJISSTR);
- break;
- case EUCORSJIS:
- debug ("Kanji code is EUC or SJIS.");
- ustrcpy (tmp, from);
- kanji = FALSE;
- break;
- case ASCII:
- debug ("This is ASCII string.");
- ustrcpy (tmp, from);
- kanji = FALSE;
- break;
- default:
- debug ("This string includes unknown code.");
- ustrcpy (tmp, from);
- kanji = FALSE;
- break;
- }
- /* Hankaku Kana ---> Zenkaku Kana */
- if (kanji)
- {
- j = 0;
- for (i = 0; tmp[i] != '\0' && j < BUFSIZ; i++)
- {
- if (tmp[i] == SS2)
- {
- p1 = tmp[++i];
- if (tmp[i + 1] == SS2)
- {
- p2 = tmp[i + 2];
- if (p2 == 222 || p2 == 223)
- i += 2;
- else
- p2 = 0;
- }
- else
- p2 = 0;
- han2zen (&p1, &p2);
- SJIStoJIS (&p1, &p2);
- to[j++] = p1 + 128;
- to[j++] = p2 + 128;
- }
- else
- to[j++] = tmp[i];
- }
- if (j >= BUFSIZ)
- {
- error ("output buffer overflow at Hankaku --> Zenkaku");
- ustrcpy (to, tmp);
- }
- else
- to[j] = '\0';
- }
- else
- ustrcpy (to, tmp);
- return kanji;
- }
- int
- any2eucjp (unsigned char *dest, unsigned char *src, unsigned int dest_max)
- {
- static unsigned char tmp_dest[BUFSIZ];
- int ret;
- if (strlen ((const char *) src) >= BUFSIZ)
- {
- error ("input string too large");
- return -1;
- }
- if (dest_max > BUFSIZ)
- {
- error
- ("invalid maximum size of destination\nit should be less than %d.",
- BUFSIZ);
- return -1;
- }
- ret = do_check_and_conv (tmp_dest, src);
- if (strlen ((const char *) tmp_dest) >= dest_max)
- {
- error ("output buffer overflow");
- ustrcpy (dest, src);
- return -1;
- }
- ustrcpy (dest, tmp_dest);
- return ret;
- }
- #if 0
- unsigned int
- strwidth (unsigned char *s)
- {
- unsigned char *t;
- unsigned int i;
- t = (unsigned char *) gdMalloc (BUFSIZ);
- any2eucjp (t, s, BUFSIZ);
- i = strlen (t);
- gdFree (t);
- return i;
- }
- #ifdef DEBUG
- int
- main ()
- {
- unsigned char input[BUFSIZ];
- unsigned char *output;
- unsigned char *str;
- int c, i = 0;
- while ((c = fgetc (stdin)) != '\n' && i < BUFSIZ)
- input[i++] = c;
- input[i] = '\0';
- printf ("input : %d bytes\n", strlen ((const char *) input));
- printf ("output: %d bytes\n", strwidth (input));
- output = (unsigned char *) gdMalloc (BUFSIZ);
- any2eucjp (output, input, BUFSIZ);
- str = output;
- while (*str != '\0')
- putchar (*(str++));
- putchar ('\n');
- gdFree (output);
- return 0;
- }
- #endif
- #endif