/pdf/pdf_cmap_parse.c
https://github.com/Lafriks/mupdf · C · 490 lines · 401 code · 83 blank · 6 comment · 137 complexity · 931d9c4ff9729362d0900ea1cd339af6 MD5 · raw file
- #include "fitz.h"
- #include "mupdf.h"
- /*
- * CMap parser
- */
- enum
- {
- TOK_USECMAP = PDF_NUM_TOKENS,
- TOK_BEGIN_CODESPACE_RANGE,
- TOK_END_CODESPACE_RANGE,
- TOK_BEGIN_BF_CHAR,
- TOK_END_BF_CHAR,
- TOK_BEGIN_BF_RANGE,
- TOK_END_BF_RANGE,
- TOK_BEGIN_CID_CHAR,
- TOK_END_CID_CHAR,
- TOK_BEGIN_CID_RANGE,
- TOK_END_CID_RANGE,
- TOK_END_CMAP
- };
- static int
- pdf_cmap_token_from_keyword(char *key)
- {
- if (!strcmp(key, "usecmap")) return TOK_USECMAP;
- if (!strcmp(key, "begincodespacerange")) return TOK_BEGIN_CODESPACE_RANGE;
- if (!strcmp(key, "endcodespacerange")) return TOK_END_CODESPACE_RANGE;
- if (!strcmp(key, "beginbfchar")) return TOK_BEGIN_BF_CHAR;
- if (!strcmp(key, "endbfchar")) return TOK_END_BF_CHAR;
- if (!strcmp(key, "beginbfrange")) return TOK_BEGIN_BF_RANGE;
- if (!strcmp(key, "endbfrange")) return TOK_END_BF_RANGE;
- if (!strcmp(key, "begincidchar")) return TOK_BEGIN_CID_CHAR;
- if (!strcmp(key, "endcidchar")) return TOK_END_CID_CHAR;
- if (!strcmp(key, "begincidrange")) return TOK_BEGIN_CID_RANGE;
- if (!strcmp(key, "endcidrange")) return TOK_END_CID_RANGE;
- if (!strcmp(key, "endcmap")) return TOK_END_CMAP;
- return PDF_TOK_KEYWORD;
- }
- static int
- pdf_code_from_string(char *buf, int len)
- {
- int a = 0;
- while (len--)
- a = (a << 8) | *(unsigned char *)buf++;
- return a;
- }
- static fz_error
- pdf_lex_cmap(int *tok, fz_stream *file, char *buf, int n, int *sl)
- {
- fz_error error;
- error = pdf_lex(tok, file, buf, n, sl);
- if (error)
- return fz_error_note(file->ctx, error, "cannot parse cmap token");
- if (*tok == PDF_TOK_KEYWORD)
- *tok = pdf_cmap_token_from_keyword(buf);
- return fz_okay;
- }
- static fz_error
- pdf_parse_cmap_name(pdf_cmap *cmap, fz_stream *file)
- {
- fz_error error;
- char buf[256];
- int tok;
- int len;
- error = pdf_lex_cmap(&tok, file, buf, sizeof buf, &len);
- if (error)
- return fz_error_note(file->ctx, error, "syntaxerror in cmap");
- if (tok == PDF_TOK_NAME)
- fz_strlcpy(cmap->cmap_name, buf, sizeof(cmap->cmap_name));
- else
- fz_warn(file->ctx, "expected name after CMapName in cmap");
- return fz_okay;
- }
- static fz_error
- pdf_parse_wmode(pdf_cmap *cmap, fz_stream *file)
- {
- fz_error error;
- char buf[256];
- int tok;
- int len;
- error = pdf_lex_cmap(&tok, file, buf, sizeof buf, &len);
- if (error)
- return fz_error_note(file->ctx, error, "syntaxerror in cmap");
- if (tok == PDF_TOK_INT)
- pdf_set_wmode(cmap, atoi(buf));
- else
- fz_warn(file->ctx, "expected integer after WMode in cmap");
- return fz_okay;
- }
- static fz_error
- pdf_parse_codespace_range(pdf_cmap *cmap, fz_stream *file)
- {
- fz_error error;
- char buf[256];
- int tok;
- int len;
- int lo, hi;
- while (1)
- {
- error = pdf_lex_cmap(&tok, file, buf, sizeof buf, &len);
- if (error)
- return fz_error_note(file->ctx, error, "syntaxerror in cmap");
- if (tok == TOK_END_CODESPACE_RANGE)
- return fz_okay;
- else if (tok == PDF_TOK_STRING)
- {
- lo = pdf_code_from_string(buf, len);
- error = pdf_lex_cmap(&tok, file, buf, sizeof buf, &len);
- if (error)
- return fz_error_note(file->ctx, error, "syntaxerror in cmap");
- if (tok == PDF_TOK_STRING)
- {
- hi = pdf_code_from_string(buf, len);
- pdf_add_codespace(file->ctx, cmap, lo, hi, len);
- }
- else break;
- }
- else break;
- }
- return fz_error_make(file->ctx, "expected string or endcodespacerange");
- }
- static fz_error
- pdf_parse_cid_range(pdf_cmap *cmap, fz_stream *file)
- {
- fz_error error;
- char buf[256];
- int tok;
- int len;
- int lo, hi, dst;
- while (1)
- {
- error = pdf_lex_cmap(&tok, file, buf, sizeof buf, &len);
- if (error)
- return fz_error_note(file->ctx, error, "syntaxerror in cmap");
- if (tok == TOK_END_CID_RANGE)
- return fz_okay;
- else if (tok != PDF_TOK_STRING)
- return fz_error_make(file->ctx, "expected string or endcidrange");
- lo = pdf_code_from_string(buf, len);
- error = pdf_lex_cmap(&tok, file, buf, sizeof buf, &len);
- if (error)
- return fz_error_note(file->ctx, error, "syntaxerror in cmap");
- if (tok != PDF_TOK_STRING)
- return fz_error_make(file->ctx, "expected string");
- hi = pdf_code_from_string(buf, len);
- error = pdf_lex_cmap(&tok, file, buf, sizeof buf, &len);
- if (error)
- return fz_error_note(file->ctx, error, "syntaxerror in cmap");
- if (tok != PDF_TOK_INT)
- return fz_error_make(file->ctx, "expected integer");
- dst = atoi(buf);
- pdf_map_range_to_range(file->ctx, cmap, lo, hi, dst);
- }
- }
- static fz_error
- pdf_parse_cid_char(pdf_cmap *cmap, fz_stream *file)
- {
- fz_error error;
- char buf[256];
- int tok;
- int len;
- int src, dst;
- while (1)
- {
- error = pdf_lex_cmap(&tok, file, buf, sizeof buf, &len);
- if (error)
- return fz_error_note(file->ctx, error, "syntaxerror in cmap");
- if (tok == TOK_END_CID_CHAR)
- return fz_okay;
- else if (tok != PDF_TOK_STRING)
- return fz_error_make(file->ctx, "expected string or endcidchar");
- src = pdf_code_from_string(buf, len);
- error = pdf_lex_cmap(&tok, file, buf, sizeof buf, &len);
- if (error)
- return fz_error_note(file->ctx, error, "syntaxerror in cmap");
- if (tok != PDF_TOK_INT)
- return fz_error_make(file->ctx, "expected integer");
- dst = atoi(buf);
- pdf_map_range_to_range(file->ctx, cmap, src, src, dst);
- }
- }
- static fz_error
- pdf_parse_bf_range_array(pdf_cmap *cmap, fz_stream *file, int lo, int hi)
- {
- fz_error error;
- char buf[256];
- int tok;
- int len;
- int dst[256];
- int i;
- while (1)
- {
- error = pdf_lex_cmap(&tok, file, buf, sizeof buf, &len);
- if (error)
- return fz_error_note(file->ctx, error, "syntaxerror in cmap");
- if (tok == PDF_TOK_CLOSE_ARRAY)
- return fz_okay;
- /* Note: does not handle [ /Name /Name ... ] */
- else if (tok != PDF_TOK_STRING)
- return fz_error_make(file->ctx, "expected string or ]");
- if (len / 2)
- {
- for (i = 0; i < len / 2; i++)
- dst[i] = pdf_code_from_string(buf + i * 2, 2);
- pdf_map_one_to_many(file->ctx, cmap, lo, dst, len / 2);
- }
- lo ++;
- }
- }
- static fz_error
- pdf_parse_bf_range(pdf_cmap *cmap, fz_stream *file)
- {
- fz_error error;
- char buf[256];
- int tok;
- int len;
- int lo, hi, dst;
- while (1)
- {
- error = pdf_lex_cmap(&tok, file, buf, sizeof buf, &len);
- if (error)
- return fz_error_note(file->ctx, error, "syntaxerror in cmap");
- if (tok == TOK_END_BF_RANGE)
- return fz_okay;
- else if (tok != PDF_TOK_STRING)
- return fz_error_make(file->ctx, "expected string or endbfrange");
- lo = pdf_code_from_string(buf, len);
- error = pdf_lex_cmap(&tok, file, buf, sizeof buf, &len);
- if (error)
- return fz_error_note(file->ctx, error, "syntaxerror in cmap");
- if (tok != PDF_TOK_STRING)
- return fz_error_make(file->ctx, "expected string");
- hi = pdf_code_from_string(buf, len);
- error = pdf_lex_cmap(&tok, file, buf, sizeof buf, &len);
- if (error)
- return fz_error_note(file->ctx, error, "syntaxerror in cmap");
- if (tok == PDF_TOK_STRING)
- {
- if (len == 2)
- {
- dst = pdf_code_from_string(buf, len);
- pdf_map_range_to_range(file->ctx, cmap, lo, hi, dst);
- }
- else
- {
- int dststr[256];
- int i;
- if (len / 2)
- {
- for (i = 0; i < len / 2; i++)
- dststr[i] = pdf_code_from_string(buf + i * 2, 2);
- while (lo <= hi)
- {
- dststr[i-1] ++;
- pdf_map_one_to_many(file->ctx, cmap, lo, dststr, i);
- lo ++;
- }
- }
- }
- }
- else if (tok == PDF_TOK_OPEN_ARRAY)
- {
- error = pdf_parse_bf_range_array(cmap, file, lo, hi);
- if (error)
- return fz_error_note(file->ctx, error, "cannot map bfrange");
- }
- else
- {
- return fz_error_make(file->ctx, "expected string or array or endbfrange");
- }
- }
- }
- static fz_error
- pdf_parse_bf_char(pdf_cmap *cmap, fz_stream *file)
- {
- fz_error error;
- char buf[256];
- int tok;
- int len;
- int dst[256];
- int src;
- int i;
- while (1)
- {
- error = pdf_lex_cmap(&tok, file, buf, sizeof buf, &len);
- if (error)
- return fz_error_note(file->ctx, error, "syntaxerror in cmap");
- if (tok == TOK_END_BF_CHAR)
- return fz_okay;
- else if (tok != PDF_TOK_STRING)
- return fz_error_make(file->ctx, "expected string or endbfchar");
- src = pdf_code_from_string(buf, len);
- error = pdf_lex_cmap(&tok, file, buf, sizeof buf, &len);
- if (error)
- return fz_error_note(file->ctx, error, "syntaxerror in cmap");
- /* Note: does not handle /dstName */
- if (tok != PDF_TOK_STRING)
- return fz_error_make(file->ctx, "expected string");
- if (len / 2)
- {
- for (i = 0; i < len / 2; i++)
- dst[i] = pdf_code_from_string(buf + i * 2, 2);
- pdf_map_one_to_many(file->ctx, cmap, src, dst, i);
- }
- }
- }
- fz_error
- pdf_parse_cmap(pdf_cmap **cmapp, fz_stream *file)
- {
- fz_error error;
- pdf_cmap *cmap;
- char key[64];
- char buf[256];
- int tok;
- int len;
- cmap = pdf_new_cmap(file->ctx);
- strcpy(key, ".notdef");
- while (1)
- {
- error = pdf_lex_cmap(&tok, file, buf, sizeof buf, &len);
- if (error)
- {
- error = fz_error_note(file->ctx, error, "syntaxerror in cmap");
- goto cleanup;
- }
- if (tok == PDF_TOK_EOF || tok == TOK_END_CMAP)
- break;
- else if (tok == PDF_TOK_NAME)
- {
- if (!strcmp(buf, "CMapName"))
- {
- error = pdf_parse_cmap_name(cmap, file);
- if (error)
- {
- error = fz_error_note(file->ctx, error, "syntaxerror in cmap after CMapName");
- goto cleanup;
- }
- }
- else if (!strcmp(buf, "WMode"))
- {
- error = pdf_parse_wmode(cmap, file);
- if (error)
- {
- error = fz_error_note(file->ctx, error, "syntaxerror in cmap after WMode");
- goto cleanup;
- }
- }
- else
- fz_strlcpy(key, buf, sizeof key);
- }
- else if (tok == TOK_USECMAP)
- {
- fz_strlcpy(cmap->usecmap_name, key, sizeof(cmap->usecmap_name));
- }
- else if (tok == TOK_BEGIN_CODESPACE_RANGE)
- {
- error = pdf_parse_codespace_range(cmap, file);
- if (error)
- {
- error = fz_error_note(file->ctx, error, "syntaxerror in cmap codespacerange");
- goto cleanup;
- }
- }
- else if (tok == TOK_BEGIN_BF_CHAR)
- {
- error = pdf_parse_bf_char(cmap, file);
- if (error)
- {
- error = fz_error_note(file->ctx, error, "syntaxerror in cmap bfchar");
- goto cleanup;
- }
- }
- else if (tok == TOK_BEGIN_CID_CHAR)
- {
- error = pdf_parse_cid_char(cmap, file);
- if (error)
- {
- error = fz_error_note(file->ctx, error, "syntaxerror in cmap cidchar");
- goto cleanup;
- }
- }
- else if (tok == TOK_BEGIN_BF_RANGE)
- {
- error = pdf_parse_bf_range(cmap, file);
- if (error)
- {
- error = fz_error_note(file->ctx, error, "syntaxerror in cmap bfrange");
- goto cleanup;
- }
- }
- else if (tok == TOK_BEGIN_CID_RANGE)
- {
- error = pdf_parse_cid_range(cmap, file);
- if (error)
- {
- error = fz_error_note(file->ctx, error, "syntaxerror in cmap cidrange");
- goto cleanup;
- }
- }
- /* ignore everything else */
- }
- pdf_sort_cmap(file->ctx, cmap);
- *cmapp = cmap;
- return fz_okay;
- cleanup:
- pdf_drop_cmap(file->ctx, cmap);
- return error; /* already rethrown */
- }