PageRenderTime 78ms CodeModel.GetById 15ms app.highlight 57ms RepoModel.GetById 1ms app.codeStats 1ms

/Modules/expat/xmlrole.c

http://unladen-swallow.googlecode.com/
C | 1330 lines | 1250 code | 70 blank | 10 comment | 91 complexity | 141292a7dc327998f41b4787af9d2b72 MD5 | raw file
   1/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
   2   See the file COPYING for copying permission.
   3*/
   4
   5#ifdef COMPILED_FROM_DSP
   6#include "winconfig.h"
   7#elif defined(MACOS_CLASSIC)
   8#include "macconfig.h"
   9#elif defined(__amigaos4__)
  10#include "amigaconfig.h"
  11#else
  12#ifdef HAVE_EXPAT_CONFIG_H
  13#include <expat_config.h>
  14#endif
  15#endif /* ndef COMPILED_FROM_DSP */
  16
  17#include <stddef.h>
  18
  19#include "expat_external.h"
  20#include "internal.h"
  21#include "xmlrole.h"
  22#include "ascii.h"
  23
  24/* Doesn't check:
  25
  26 that ,| are not mixed in a model group
  27 content of literals
  28
  29*/
  30
  31static const char KW_ANY[] = {
  32    ASCII_A, ASCII_N, ASCII_Y, '\0' };
  33static const char KW_ATTLIST[] = {
  34    ASCII_A, ASCII_T, ASCII_T, ASCII_L, ASCII_I, ASCII_S, ASCII_T, '\0' };
  35static const char KW_CDATA[] = {
  36    ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
  37static const char KW_DOCTYPE[] = {
  38    ASCII_D, ASCII_O, ASCII_C, ASCII_T, ASCII_Y, ASCII_P, ASCII_E, '\0' };
  39static const char KW_ELEMENT[] = {
  40    ASCII_E, ASCII_L, ASCII_E, ASCII_M, ASCII_E, ASCII_N, ASCII_T, '\0' };
  41static const char KW_EMPTY[] = {
  42    ASCII_E, ASCII_M, ASCII_P, ASCII_T, ASCII_Y, '\0' };
  43static const char KW_ENTITIES[] = {
  44    ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_I, ASCII_E, ASCII_S,
  45    '\0' };
  46static const char KW_ENTITY[] = {
  47    ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0' };
  48static const char KW_FIXED[] = {
  49    ASCII_F, ASCII_I, ASCII_X, ASCII_E, ASCII_D, '\0' };
  50static const char KW_ID[] = {
  51    ASCII_I, ASCII_D, '\0' };
  52static const char KW_IDREF[] = {
  53    ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0' };
  54static const char KW_IDREFS[] = {
  55    ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0' };
  56static const char KW_IGNORE[] = {
  57    ASCII_I, ASCII_G, ASCII_N, ASCII_O, ASCII_R, ASCII_E, '\0' };
  58static const char KW_IMPLIED[] = {
  59    ASCII_I, ASCII_M, ASCII_P, ASCII_L, ASCII_I, ASCII_E, ASCII_D, '\0' };
  60static const char KW_INCLUDE[] = {
  61    ASCII_I, ASCII_N, ASCII_C, ASCII_L, ASCII_U, ASCII_D, ASCII_E, '\0' };
  62static const char KW_NDATA[] = {
  63    ASCII_N, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
  64static const char KW_NMTOKEN[] = {
  65    ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0' };
  66static const char KW_NMTOKENS[] = {
  67    ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, ASCII_S,
  68    '\0' };
  69static const char KW_NOTATION[] =
  70    { ASCII_N, ASCII_O, ASCII_T, ASCII_A, ASCII_T, ASCII_I, ASCII_O, ASCII_N,
  71      '\0' };
  72static const char KW_PCDATA[] = {
  73    ASCII_P, ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
  74static const char KW_PUBLIC[] = {
  75    ASCII_P, ASCII_U, ASCII_B, ASCII_L, ASCII_I, ASCII_C, '\0' };
  76static const char KW_REQUIRED[] = {
  77    ASCII_R, ASCII_E, ASCII_Q, ASCII_U, ASCII_I, ASCII_R, ASCII_E, ASCII_D,
  78    '\0' };
  79static const char KW_SYSTEM[] = {
  80    ASCII_S, ASCII_Y, ASCII_S, ASCII_T, ASCII_E, ASCII_M, '\0' };
  81
  82#ifndef MIN_BYTES_PER_CHAR
  83#define MIN_BYTES_PER_CHAR(enc) ((enc)->minBytesPerChar)
  84#endif
  85
  86#ifdef XML_DTD
  87#define setTopLevel(state) \
  88  ((state)->handler = ((state)->documentEntity \
  89                       ? internalSubset \
  90                       : externalSubset1))
  91#else /* not XML_DTD */
  92#define setTopLevel(state) ((state)->handler = internalSubset)
  93#endif /* not XML_DTD */
  94
  95typedef int PTRCALL PROLOG_HANDLER(PROLOG_STATE *state,
  96                                   int tok,
  97                                   const char *ptr,
  98                                   const char *end,
  99                                   const ENCODING *enc);
 100
 101static PROLOG_HANDLER
 102  prolog0, prolog1, prolog2,
 103  doctype0, doctype1, doctype2, doctype3, doctype4, doctype5,
 104  internalSubset,
 105  entity0, entity1, entity2, entity3, entity4, entity5, entity6,
 106  entity7, entity8, entity9, entity10,
 107  notation0, notation1, notation2, notation3, notation4,
 108  attlist0, attlist1, attlist2, attlist3, attlist4, attlist5, attlist6,
 109  attlist7, attlist8, attlist9,
 110  element0, element1, element2, element3, element4, element5, element6,
 111  element7,
 112#ifdef XML_DTD
 113  externalSubset0, externalSubset1,
 114  condSect0, condSect1, condSect2,
 115#endif /* XML_DTD */
 116  declClose,
 117  error;
 118
 119static int FASTCALL common(PROLOG_STATE *state, int tok);
 120
 121static int PTRCALL
 122prolog0(PROLOG_STATE *state,
 123        int tok,
 124        const char *ptr,
 125        const char *end,
 126        const ENCODING *enc)
 127{
 128  switch (tok) {
 129  case XML_TOK_PROLOG_S:
 130    state->handler = prolog1;
 131    return XML_ROLE_NONE;
 132  case XML_TOK_XML_DECL:
 133    state->handler = prolog1;
 134    return XML_ROLE_XML_DECL;
 135  case XML_TOK_PI:
 136    state->handler = prolog1;
 137    return XML_ROLE_PI;
 138  case XML_TOK_COMMENT:
 139    state->handler = prolog1;
 140    return XML_ROLE_COMMENT;
 141  case XML_TOK_BOM:
 142    return XML_ROLE_NONE;
 143  case XML_TOK_DECL_OPEN:
 144    if (!XmlNameMatchesAscii(enc,
 145                             ptr + 2 * MIN_BYTES_PER_CHAR(enc),
 146                             end,
 147                             KW_DOCTYPE))
 148      break;
 149    state->handler = doctype0;
 150    return XML_ROLE_DOCTYPE_NONE;
 151  case XML_TOK_INSTANCE_START:
 152    state->handler = error;
 153    return XML_ROLE_INSTANCE_START;
 154  }
 155  return common(state, tok);
 156}
 157
 158static int PTRCALL
 159prolog1(PROLOG_STATE *state,
 160        int tok,
 161        const char *ptr,
 162        const char *end,
 163        const ENCODING *enc)
 164{
 165  switch (tok) {
 166  case XML_TOK_PROLOG_S:
 167    return XML_ROLE_NONE;
 168  case XML_TOK_PI:
 169    return XML_ROLE_PI;
 170  case XML_TOK_COMMENT:
 171    return XML_ROLE_COMMENT;
 172  case XML_TOK_BOM:
 173    return XML_ROLE_NONE;
 174  case XML_TOK_DECL_OPEN:
 175    if (!XmlNameMatchesAscii(enc,
 176                             ptr + 2 * MIN_BYTES_PER_CHAR(enc),
 177                             end,
 178                             KW_DOCTYPE))
 179      break;
 180    state->handler = doctype0;
 181    return XML_ROLE_DOCTYPE_NONE;
 182  case XML_TOK_INSTANCE_START:
 183    state->handler = error;
 184    return XML_ROLE_INSTANCE_START;
 185  }
 186  return common(state, tok);
 187}
 188
 189static int PTRCALL
 190prolog2(PROLOG_STATE *state,
 191        int tok,
 192        const char *ptr,
 193        const char *end,
 194        const ENCODING *enc)
 195{
 196  switch (tok) {
 197  case XML_TOK_PROLOG_S:
 198    return XML_ROLE_NONE;
 199  case XML_TOK_PI:
 200    return XML_ROLE_PI;
 201  case XML_TOK_COMMENT:
 202    return XML_ROLE_COMMENT;
 203  case XML_TOK_INSTANCE_START:
 204    state->handler = error;
 205    return XML_ROLE_INSTANCE_START;
 206  }
 207  return common(state, tok);
 208}
 209
 210static int PTRCALL
 211doctype0(PROLOG_STATE *state,
 212         int tok,
 213         const char *ptr,
 214         const char *end,
 215         const ENCODING *enc)
 216{
 217  switch (tok) {
 218  case XML_TOK_PROLOG_S:
 219    return XML_ROLE_DOCTYPE_NONE;
 220  case XML_TOK_NAME:
 221  case XML_TOK_PREFIXED_NAME:
 222    state->handler = doctype1;
 223    return XML_ROLE_DOCTYPE_NAME;
 224  }
 225  return common(state, tok);
 226}
 227
 228static int PTRCALL
 229doctype1(PROLOG_STATE *state,
 230         int tok,
 231         const char *ptr,
 232         const char *end,
 233         const ENCODING *enc)
 234{
 235  switch (tok) {
 236  case XML_TOK_PROLOG_S:
 237    return XML_ROLE_DOCTYPE_NONE;
 238  case XML_TOK_OPEN_BRACKET:
 239    state->handler = internalSubset;
 240    return XML_ROLE_DOCTYPE_INTERNAL_SUBSET;
 241  case XML_TOK_DECL_CLOSE:
 242    state->handler = prolog2;
 243    return XML_ROLE_DOCTYPE_CLOSE;
 244  case XML_TOK_NAME:
 245    if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
 246      state->handler = doctype3;
 247      return XML_ROLE_DOCTYPE_NONE;
 248    }
 249    if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
 250      state->handler = doctype2;
 251      return XML_ROLE_DOCTYPE_NONE;
 252    }
 253    break;
 254  }
 255  return common(state, tok);
 256}
 257
 258static int PTRCALL
 259doctype2(PROLOG_STATE *state,
 260         int tok,
 261         const char *ptr,
 262         const char *end,
 263         const ENCODING *enc)
 264{
 265  switch (tok) {
 266  case XML_TOK_PROLOG_S:
 267    return XML_ROLE_DOCTYPE_NONE;
 268  case XML_TOK_LITERAL:
 269    state->handler = doctype3;
 270    return XML_ROLE_DOCTYPE_PUBLIC_ID;
 271  }
 272  return common(state, tok);
 273}
 274
 275static int PTRCALL
 276doctype3(PROLOG_STATE *state,
 277         int tok,
 278         const char *ptr,
 279         const char *end,
 280         const ENCODING *enc)
 281{
 282  switch (tok) {
 283  case XML_TOK_PROLOG_S:
 284    return XML_ROLE_DOCTYPE_NONE;
 285  case XML_TOK_LITERAL:
 286    state->handler = doctype4;
 287    return XML_ROLE_DOCTYPE_SYSTEM_ID;
 288  }
 289  return common(state, tok);
 290}
 291
 292static int PTRCALL
 293doctype4(PROLOG_STATE *state,
 294         int tok,
 295         const char *ptr,
 296         const char *end,
 297         const ENCODING *enc)
 298{
 299  switch (tok) {
 300  case XML_TOK_PROLOG_S:
 301    return XML_ROLE_DOCTYPE_NONE;
 302  case XML_TOK_OPEN_BRACKET:
 303    state->handler = internalSubset;
 304    return XML_ROLE_DOCTYPE_INTERNAL_SUBSET;
 305  case XML_TOK_DECL_CLOSE:
 306    state->handler = prolog2;
 307    return XML_ROLE_DOCTYPE_CLOSE;
 308  }
 309  return common(state, tok);
 310}
 311
 312static int PTRCALL
 313doctype5(PROLOG_STATE *state,
 314         int tok,
 315         const char *ptr,
 316         const char *end,
 317         const ENCODING *enc)
 318{
 319  switch (tok) {
 320  case XML_TOK_PROLOG_S:
 321    return XML_ROLE_DOCTYPE_NONE;
 322  case XML_TOK_DECL_CLOSE:
 323    state->handler = prolog2;
 324    return XML_ROLE_DOCTYPE_CLOSE;
 325  }
 326  return common(state, tok);
 327}
 328
 329static int PTRCALL
 330internalSubset(PROLOG_STATE *state,
 331               int tok,
 332               const char *ptr,
 333               const char *end,
 334               const ENCODING *enc)
 335{
 336  switch (tok) {
 337  case XML_TOK_PROLOG_S:
 338    return XML_ROLE_NONE;
 339  case XML_TOK_DECL_OPEN:
 340    if (XmlNameMatchesAscii(enc,
 341                            ptr + 2 * MIN_BYTES_PER_CHAR(enc),
 342                            end,
 343                            KW_ENTITY)) {
 344      state->handler = entity0;
 345      return XML_ROLE_ENTITY_NONE;
 346    }
 347    if (XmlNameMatchesAscii(enc,
 348                            ptr + 2 * MIN_BYTES_PER_CHAR(enc),
 349                            end,
 350                            KW_ATTLIST)) {
 351      state->handler = attlist0;
 352      return XML_ROLE_ATTLIST_NONE;
 353    }
 354    if (XmlNameMatchesAscii(enc,
 355                            ptr + 2 * MIN_BYTES_PER_CHAR(enc),
 356                            end,
 357                            KW_ELEMENT)) {
 358      state->handler = element0;
 359      return XML_ROLE_ELEMENT_NONE;
 360    }
 361    if (XmlNameMatchesAscii(enc,
 362                            ptr + 2 * MIN_BYTES_PER_CHAR(enc),
 363                            end,
 364                            KW_NOTATION)) {
 365      state->handler = notation0;
 366      return XML_ROLE_NOTATION_NONE;
 367    }
 368    break;
 369  case XML_TOK_PI:
 370    return XML_ROLE_PI;
 371  case XML_TOK_COMMENT:
 372    return XML_ROLE_COMMENT;
 373  case XML_TOK_PARAM_ENTITY_REF:
 374    return XML_ROLE_PARAM_ENTITY_REF;
 375  case XML_TOK_CLOSE_BRACKET:
 376    state->handler = doctype5;
 377    return XML_ROLE_DOCTYPE_NONE;
 378  case XML_TOK_NONE:
 379    return XML_ROLE_NONE;
 380  }
 381  return common(state, tok);
 382}
 383
 384#ifdef XML_DTD
 385
 386static int PTRCALL
 387externalSubset0(PROLOG_STATE *state,
 388                int tok,
 389                const char *ptr,
 390                const char *end,
 391                const ENCODING *enc)
 392{
 393  state->handler = externalSubset1;
 394  if (tok == XML_TOK_XML_DECL)
 395    return XML_ROLE_TEXT_DECL;
 396  return externalSubset1(state, tok, ptr, end, enc);
 397}
 398
 399static int PTRCALL
 400externalSubset1(PROLOG_STATE *state,
 401                int tok,
 402                const char *ptr,
 403                const char *end,
 404                const ENCODING *enc)
 405{
 406  switch (tok) {
 407  case XML_TOK_COND_SECT_OPEN:
 408    state->handler = condSect0;
 409    return XML_ROLE_NONE;
 410  case XML_TOK_COND_SECT_CLOSE:
 411    if (state->includeLevel == 0)
 412      break;
 413    state->includeLevel -= 1;
 414    return XML_ROLE_NONE;
 415  case XML_TOK_PROLOG_S:
 416    return XML_ROLE_NONE;
 417  case XML_TOK_CLOSE_BRACKET:
 418    break;
 419  case XML_TOK_NONE:
 420    if (state->includeLevel)
 421      break;
 422    return XML_ROLE_NONE;
 423  default:
 424    return internalSubset(state, tok, ptr, end, enc);
 425  }
 426  return common(state, tok);
 427}
 428
 429#endif /* XML_DTD */
 430
 431static int PTRCALL
 432entity0(PROLOG_STATE *state,
 433        int tok,
 434        const char *ptr,
 435        const char *end,
 436        const ENCODING *enc)
 437{
 438  switch (tok) {
 439  case XML_TOK_PROLOG_S:
 440    return XML_ROLE_ENTITY_NONE;
 441  case XML_TOK_PERCENT:
 442    state->handler = entity1;
 443    return XML_ROLE_ENTITY_NONE;
 444  case XML_TOK_NAME:
 445    state->handler = entity2;
 446    return XML_ROLE_GENERAL_ENTITY_NAME;
 447  }
 448  return common(state, tok);
 449}
 450
 451static int PTRCALL
 452entity1(PROLOG_STATE *state,
 453        int tok,
 454        const char *ptr,
 455        const char *end,
 456        const ENCODING *enc)
 457{
 458  switch (tok) {
 459  case XML_TOK_PROLOG_S:
 460    return XML_ROLE_ENTITY_NONE;
 461  case XML_TOK_NAME:
 462    state->handler = entity7;
 463    return XML_ROLE_PARAM_ENTITY_NAME;
 464  }
 465  return common(state, tok);
 466}
 467
 468static int PTRCALL
 469entity2(PROLOG_STATE *state,
 470        int tok,
 471        const char *ptr,
 472        const char *end,
 473        const ENCODING *enc)
 474{
 475  switch (tok) {
 476  case XML_TOK_PROLOG_S:
 477    return XML_ROLE_ENTITY_NONE;
 478  case XML_TOK_NAME:
 479    if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
 480      state->handler = entity4;
 481      return XML_ROLE_ENTITY_NONE;
 482    }
 483    if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
 484      state->handler = entity3;
 485      return XML_ROLE_ENTITY_NONE;
 486    }
 487    break;
 488  case XML_TOK_LITERAL:
 489    state->handler = declClose;
 490    state->role_none = XML_ROLE_ENTITY_NONE;
 491    return XML_ROLE_ENTITY_VALUE;
 492  }
 493  return common(state, tok);
 494}
 495
 496static int PTRCALL
 497entity3(PROLOG_STATE *state,
 498        int tok,
 499        const char *ptr,
 500        const char *end,
 501        const ENCODING *enc)
 502{
 503  switch (tok) {
 504  case XML_TOK_PROLOG_S:
 505    return XML_ROLE_ENTITY_NONE;
 506  case XML_TOK_LITERAL:
 507    state->handler = entity4;
 508    return XML_ROLE_ENTITY_PUBLIC_ID;
 509  }
 510  return common(state, tok);
 511}
 512
 513static int PTRCALL
 514entity4(PROLOG_STATE *state,
 515        int tok,
 516        const char *ptr,
 517        const char *end,
 518        const ENCODING *enc)
 519{
 520  switch (tok) {
 521  case XML_TOK_PROLOG_S:
 522    return XML_ROLE_ENTITY_NONE;
 523  case XML_TOK_LITERAL:
 524    state->handler = entity5;
 525    return XML_ROLE_ENTITY_SYSTEM_ID;
 526  }
 527  return common(state, tok);
 528}
 529
 530static int PTRCALL
 531entity5(PROLOG_STATE *state,
 532        int tok,
 533        const char *ptr,
 534        const char *end,
 535        const ENCODING *enc)
 536{
 537  switch (tok) {
 538  case XML_TOK_PROLOG_S:
 539    return XML_ROLE_ENTITY_NONE;
 540  case XML_TOK_DECL_CLOSE:
 541    setTopLevel(state);
 542    return XML_ROLE_ENTITY_COMPLETE;
 543  case XML_TOK_NAME:
 544    if (XmlNameMatchesAscii(enc, ptr, end, KW_NDATA)) {
 545      state->handler = entity6;
 546      return XML_ROLE_ENTITY_NONE;
 547    }
 548    break;
 549  }
 550  return common(state, tok);
 551}
 552
 553static int PTRCALL
 554entity6(PROLOG_STATE *state,
 555        int tok,
 556        const char *ptr,
 557        const char *end,
 558        const ENCODING *enc)
 559{
 560  switch (tok) {
 561  case XML_TOK_PROLOG_S:
 562    return XML_ROLE_ENTITY_NONE;
 563  case XML_TOK_NAME:
 564    state->handler = declClose;
 565    state->role_none = XML_ROLE_ENTITY_NONE;
 566    return XML_ROLE_ENTITY_NOTATION_NAME;
 567  }
 568  return common(state, tok);
 569}
 570
 571static int PTRCALL
 572entity7(PROLOG_STATE *state,
 573        int tok,
 574        const char *ptr,
 575        const char *end,
 576        const ENCODING *enc)
 577{
 578  switch (tok) {
 579  case XML_TOK_PROLOG_S:
 580    return XML_ROLE_ENTITY_NONE;
 581  case XML_TOK_NAME:
 582    if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
 583      state->handler = entity9;
 584      return XML_ROLE_ENTITY_NONE;
 585    }
 586    if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
 587      state->handler = entity8;
 588      return XML_ROLE_ENTITY_NONE;
 589    }
 590    break;
 591  case XML_TOK_LITERAL:
 592    state->handler = declClose;
 593    state->role_none = XML_ROLE_ENTITY_NONE;
 594    return XML_ROLE_ENTITY_VALUE;
 595  }
 596  return common(state, tok);
 597}
 598
 599static int PTRCALL
 600entity8(PROLOG_STATE *state,
 601        int tok,
 602        const char *ptr,
 603        const char *end,
 604        const ENCODING *enc)
 605{
 606  switch (tok) {
 607  case XML_TOK_PROLOG_S:
 608    return XML_ROLE_ENTITY_NONE;
 609  case XML_TOK_LITERAL:
 610    state->handler = entity9;
 611    return XML_ROLE_ENTITY_PUBLIC_ID;
 612  }
 613  return common(state, tok);
 614}
 615
 616static int PTRCALL
 617entity9(PROLOG_STATE *state,
 618        int tok,
 619        const char *ptr,
 620        const char *end,
 621        const ENCODING *enc)
 622{
 623  switch (tok) {
 624  case XML_TOK_PROLOG_S:
 625    return XML_ROLE_ENTITY_NONE;
 626  case XML_TOK_LITERAL:
 627    state->handler = entity10;
 628    return XML_ROLE_ENTITY_SYSTEM_ID;
 629  }
 630  return common(state, tok);
 631}
 632
 633static int PTRCALL
 634entity10(PROLOG_STATE *state,
 635         int tok,
 636         const char *ptr,
 637         const char *end,
 638         const ENCODING *enc)
 639{
 640  switch (tok) {
 641  case XML_TOK_PROLOG_S:
 642    return XML_ROLE_ENTITY_NONE;
 643  case XML_TOK_DECL_CLOSE:
 644    setTopLevel(state);
 645    return XML_ROLE_ENTITY_COMPLETE;
 646  }
 647  return common(state, tok);
 648}
 649
 650static int PTRCALL
 651notation0(PROLOG_STATE *state,
 652          int tok,
 653          const char *ptr,
 654          const char *end,
 655          const ENCODING *enc)
 656{
 657  switch (tok) {
 658  case XML_TOK_PROLOG_S:
 659    return XML_ROLE_NOTATION_NONE;
 660  case XML_TOK_NAME:
 661    state->handler = notation1;
 662    return XML_ROLE_NOTATION_NAME;
 663  }
 664  return common(state, tok);
 665}
 666
 667static int PTRCALL
 668notation1(PROLOG_STATE *state,
 669          int tok,
 670          const char *ptr,
 671          const char *end,
 672          const ENCODING *enc)
 673{
 674  switch (tok) {
 675  case XML_TOK_PROLOG_S:
 676    return XML_ROLE_NOTATION_NONE;
 677  case XML_TOK_NAME:
 678    if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
 679      state->handler = notation3;
 680      return XML_ROLE_NOTATION_NONE;
 681    }
 682    if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
 683      state->handler = notation2;
 684      return XML_ROLE_NOTATION_NONE;
 685    }
 686    break;
 687  }
 688  return common(state, tok);
 689}
 690
 691static int PTRCALL
 692notation2(PROLOG_STATE *state,
 693          int tok,
 694          const char *ptr,
 695          const char *end,
 696          const ENCODING *enc)
 697{
 698  switch (tok) {
 699  case XML_TOK_PROLOG_S:
 700    return XML_ROLE_NOTATION_NONE;
 701  case XML_TOK_LITERAL:
 702    state->handler = notation4;
 703    return XML_ROLE_NOTATION_PUBLIC_ID;
 704  }
 705  return common(state, tok);
 706}
 707
 708static int PTRCALL
 709notation3(PROLOG_STATE *state,
 710          int tok,
 711          const char *ptr,
 712          const char *end,
 713          const ENCODING *enc)
 714{
 715  switch (tok) {
 716  case XML_TOK_PROLOG_S:
 717    return XML_ROLE_NOTATION_NONE;
 718  case XML_TOK_LITERAL:
 719    state->handler = declClose;
 720    state->role_none = XML_ROLE_NOTATION_NONE;
 721    return XML_ROLE_NOTATION_SYSTEM_ID;
 722  }
 723  return common(state, tok);
 724}
 725
 726static int PTRCALL
 727notation4(PROLOG_STATE *state,
 728          int tok,
 729          const char *ptr,
 730          const char *end,
 731          const ENCODING *enc)
 732{
 733  switch (tok) {
 734  case XML_TOK_PROLOG_S:
 735    return XML_ROLE_NOTATION_NONE;
 736  case XML_TOK_LITERAL:
 737    state->handler = declClose;
 738    state->role_none = XML_ROLE_NOTATION_NONE;
 739    return XML_ROLE_NOTATION_SYSTEM_ID;
 740  case XML_TOK_DECL_CLOSE:
 741    setTopLevel(state);
 742    return XML_ROLE_NOTATION_NO_SYSTEM_ID;
 743  }
 744  return common(state, tok);
 745}
 746
 747static int PTRCALL
 748attlist0(PROLOG_STATE *state,
 749         int tok,
 750         const char *ptr,
 751         const char *end,
 752         const ENCODING *enc)
 753{
 754  switch (tok) {
 755  case XML_TOK_PROLOG_S:
 756    return XML_ROLE_ATTLIST_NONE;
 757  case XML_TOK_NAME:
 758  case XML_TOK_PREFIXED_NAME:
 759    state->handler = attlist1;
 760    return XML_ROLE_ATTLIST_ELEMENT_NAME;
 761  }
 762  return common(state, tok);
 763}
 764
 765static int PTRCALL
 766attlist1(PROLOG_STATE *state,
 767         int tok,
 768         const char *ptr,
 769         const char *end,
 770         const ENCODING *enc)
 771{
 772  switch (tok) {
 773  case XML_TOK_PROLOG_S:
 774    return XML_ROLE_ATTLIST_NONE;
 775  case XML_TOK_DECL_CLOSE:
 776    setTopLevel(state);
 777    return XML_ROLE_ATTLIST_NONE;
 778  case XML_TOK_NAME:
 779  case XML_TOK_PREFIXED_NAME:
 780    state->handler = attlist2;
 781    return XML_ROLE_ATTRIBUTE_NAME;
 782  }
 783  return common(state, tok);
 784}
 785
 786static int PTRCALL
 787attlist2(PROLOG_STATE *state,
 788         int tok,
 789         const char *ptr,
 790         const char *end,
 791         const ENCODING *enc)
 792{
 793  switch (tok) {
 794  case XML_TOK_PROLOG_S:
 795    return XML_ROLE_ATTLIST_NONE;
 796  case XML_TOK_NAME:
 797    {
 798      static const char * const types[] = {
 799        KW_CDATA,
 800        KW_ID,
 801        KW_IDREF,
 802        KW_IDREFS,
 803        KW_ENTITY,
 804        KW_ENTITIES,
 805        KW_NMTOKEN,
 806        KW_NMTOKENS,
 807      };
 808      int i;
 809      for (i = 0; i < (int)(sizeof(types)/sizeof(types[0])); i++)
 810        if (XmlNameMatchesAscii(enc, ptr, end, types[i])) {
 811          state->handler = attlist8;
 812          return XML_ROLE_ATTRIBUTE_TYPE_CDATA + i;
 813        }
 814    }
 815    if (XmlNameMatchesAscii(enc, ptr, end, KW_NOTATION)) {
 816      state->handler = attlist5;
 817      return XML_ROLE_ATTLIST_NONE;
 818    }
 819    break;
 820  case XML_TOK_OPEN_PAREN:
 821    state->handler = attlist3;
 822    return XML_ROLE_ATTLIST_NONE;
 823  }
 824  return common(state, tok);
 825}
 826
 827static int PTRCALL
 828attlist3(PROLOG_STATE *state,
 829         int tok,
 830         const char *ptr,
 831         const char *end,
 832         const ENCODING *enc)
 833{
 834  switch (tok) {
 835  case XML_TOK_PROLOG_S:
 836    return XML_ROLE_ATTLIST_NONE;
 837  case XML_TOK_NMTOKEN:
 838  case XML_TOK_NAME:
 839  case XML_TOK_PREFIXED_NAME:
 840    state->handler = attlist4;
 841    return XML_ROLE_ATTRIBUTE_ENUM_VALUE;
 842  }
 843  return common(state, tok);
 844}
 845
 846static int PTRCALL
 847attlist4(PROLOG_STATE *state,
 848         int tok,
 849         const char *ptr,
 850         const char *end,
 851         const ENCODING *enc)
 852{
 853  switch (tok) {
 854  case XML_TOK_PROLOG_S:
 855    return XML_ROLE_ATTLIST_NONE;
 856  case XML_TOK_CLOSE_PAREN:
 857    state->handler = attlist8;
 858    return XML_ROLE_ATTLIST_NONE;
 859  case XML_TOK_OR:
 860    state->handler = attlist3;
 861    return XML_ROLE_ATTLIST_NONE;
 862  }
 863  return common(state, tok);
 864}
 865
 866static int PTRCALL
 867attlist5(PROLOG_STATE *state,
 868         int tok,
 869         const char *ptr,
 870         const char *end,
 871         const ENCODING *enc)
 872{
 873  switch (tok) {
 874  case XML_TOK_PROLOG_S:
 875    return XML_ROLE_ATTLIST_NONE;
 876  case XML_TOK_OPEN_PAREN:
 877    state->handler = attlist6;
 878    return XML_ROLE_ATTLIST_NONE;
 879  }
 880  return common(state, tok);
 881}
 882
 883static int PTRCALL
 884attlist6(PROLOG_STATE *state,
 885         int tok,
 886         const char *ptr,
 887         const char *end,
 888         const ENCODING *enc)
 889{
 890  switch (tok) {
 891  case XML_TOK_PROLOG_S:
 892    return XML_ROLE_ATTLIST_NONE;
 893  case XML_TOK_NAME:
 894    state->handler = attlist7;
 895    return XML_ROLE_ATTRIBUTE_NOTATION_VALUE;
 896  }
 897  return common(state, tok);
 898}
 899
 900static int PTRCALL
 901attlist7(PROLOG_STATE *state,
 902         int tok,
 903         const char *ptr,
 904         const char *end,
 905         const ENCODING *enc)
 906{
 907  switch (tok) {
 908  case XML_TOK_PROLOG_S:
 909    return XML_ROLE_ATTLIST_NONE;
 910  case XML_TOK_CLOSE_PAREN:
 911    state->handler = attlist8;
 912    return XML_ROLE_ATTLIST_NONE;
 913  case XML_TOK_OR:
 914    state->handler = attlist6;
 915    return XML_ROLE_ATTLIST_NONE;
 916  }
 917  return common(state, tok);
 918}
 919
 920/* default value */
 921static int PTRCALL
 922attlist8(PROLOG_STATE *state,
 923         int tok,
 924         const char *ptr,
 925         const char *end,
 926         const ENCODING *enc)
 927{
 928  switch (tok) {
 929  case XML_TOK_PROLOG_S:
 930    return XML_ROLE_ATTLIST_NONE;
 931  case XML_TOK_POUND_NAME:
 932    if (XmlNameMatchesAscii(enc,
 933                            ptr + MIN_BYTES_PER_CHAR(enc),
 934                            end,
 935                            KW_IMPLIED)) {
 936      state->handler = attlist1;
 937      return XML_ROLE_IMPLIED_ATTRIBUTE_VALUE;
 938    }
 939    if (XmlNameMatchesAscii(enc,
 940                            ptr + MIN_BYTES_PER_CHAR(enc),
 941                            end,
 942                            KW_REQUIRED)) {
 943      state->handler = attlist1;
 944      return XML_ROLE_REQUIRED_ATTRIBUTE_VALUE;
 945    }
 946    if (XmlNameMatchesAscii(enc,
 947                            ptr + MIN_BYTES_PER_CHAR(enc),
 948                            end,
 949                            KW_FIXED)) {
 950      state->handler = attlist9;
 951      return XML_ROLE_ATTLIST_NONE;
 952    }
 953    break;
 954  case XML_TOK_LITERAL:
 955    state->handler = attlist1;
 956    return XML_ROLE_DEFAULT_ATTRIBUTE_VALUE;
 957  }
 958  return common(state, tok);
 959}
 960
 961static int PTRCALL
 962attlist9(PROLOG_STATE *state,
 963         int tok,
 964         const char *ptr,
 965         const char *end,
 966         const ENCODING *enc)
 967{
 968  switch (tok) {
 969  case XML_TOK_PROLOG_S:
 970    return XML_ROLE_ATTLIST_NONE;
 971  case XML_TOK_LITERAL:
 972    state->handler = attlist1;
 973    return XML_ROLE_FIXED_ATTRIBUTE_VALUE;
 974  }
 975  return common(state, tok);
 976}
 977
 978static int PTRCALL
 979element0(PROLOG_STATE *state,
 980         int tok,
 981         const char *ptr,
 982         const char *end,
 983         const ENCODING *enc)
 984{
 985  switch (tok) {
 986  case XML_TOK_PROLOG_S:
 987    return XML_ROLE_ELEMENT_NONE;
 988  case XML_TOK_NAME:
 989  case XML_TOK_PREFIXED_NAME:
 990    state->handler = element1;
 991    return XML_ROLE_ELEMENT_NAME;
 992  }
 993  return common(state, tok);
 994}
 995
 996static int PTRCALL
 997element1(PROLOG_STATE *state,
 998         int tok,
 999         const char *ptr,
1000         const char *end,
1001         const ENCODING *enc)
1002{
1003  switch (tok) {
1004  case XML_TOK_PROLOG_S:
1005    return XML_ROLE_ELEMENT_NONE;
1006  case XML_TOK_NAME:
1007    if (XmlNameMatchesAscii(enc, ptr, end, KW_EMPTY)) {
1008      state->handler = declClose;
1009      state->role_none = XML_ROLE_ELEMENT_NONE;
1010      return XML_ROLE_CONTENT_EMPTY;
1011    }
1012    if (XmlNameMatchesAscii(enc, ptr, end, KW_ANY)) {
1013      state->handler = declClose;
1014      state->role_none = XML_ROLE_ELEMENT_NONE;
1015      return XML_ROLE_CONTENT_ANY;
1016    }
1017    break;
1018  case XML_TOK_OPEN_PAREN:
1019    state->handler = element2;
1020    state->level = 1;
1021    return XML_ROLE_GROUP_OPEN;
1022  }
1023  return common(state, tok);
1024}
1025
1026static int PTRCALL
1027element2(PROLOG_STATE *state,
1028         int tok,
1029         const char *ptr,
1030         const char *end,
1031         const ENCODING *enc)
1032{
1033  switch (tok) {
1034  case XML_TOK_PROLOG_S:
1035    return XML_ROLE_ELEMENT_NONE;
1036  case XML_TOK_POUND_NAME:
1037    if (XmlNameMatchesAscii(enc,
1038                            ptr + MIN_BYTES_PER_CHAR(enc),
1039                            end,
1040                            KW_PCDATA)) {
1041      state->handler = element3;
1042      return XML_ROLE_CONTENT_PCDATA;
1043    }
1044    break;
1045  case XML_TOK_OPEN_PAREN:
1046    state->level = 2;
1047    state->handler = element6;
1048    return XML_ROLE_GROUP_OPEN;
1049  case XML_TOK_NAME:
1050  case XML_TOK_PREFIXED_NAME:
1051    state->handler = element7;
1052    return XML_ROLE_CONTENT_ELEMENT;
1053  case XML_TOK_NAME_QUESTION:
1054    state->handler = element7;
1055    return XML_ROLE_CONTENT_ELEMENT_OPT;
1056  case XML_TOK_NAME_ASTERISK:
1057    state->handler = element7;
1058    return XML_ROLE_CONTENT_ELEMENT_REP;
1059  case XML_TOK_NAME_PLUS:
1060    state->handler = element7;
1061    return XML_ROLE_CONTENT_ELEMENT_PLUS;
1062  }
1063  return common(state, tok);
1064}
1065
1066static int PTRCALL
1067element3(PROLOG_STATE *state,
1068         int tok,
1069         const char *ptr,
1070         const char *end,
1071         const ENCODING *enc)
1072{
1073  switch (tok) {
1074  case XML_TOK_PROLOG_S:
1075    return XML_ROLE_ELEMENT_NONE;
1076  case XML_TOK_CLOSE_PAREN:
1077    state->handler = declClose;
1078    state->role_none = XML_ROLE_ELEMENT_NONE;
1079    return XML_ROLE_GROUP_CLOSE;
1080  case XML_TOK_CLOSE_PAREN_ASTERISK:
1081    state->handler = declClose;
1082    state->role_none = XML_ROLE_ELEMENT_NONE;
1083    return XML_ROLE_GROUP_CLOSE_REP;
1084  case XML_TOK_OR:
1085    state->handler = element4;
1086    return XML_ROLE_ELEMENT_NONE;
1087  }
1088  return common(state, tok);
1089}
1090
1091static int PTRCALL
1092element4(PROLOG_STATE *state,
1093         int tok,
1094         const char *ptr,
1095         const char *end,
1096         const ENCODING *enc)
1097{
1098  switch (tok) {
1099  case XML_TOK_PROLOG_S:
1100    return XML_ROLE_ELEMENT_NONE;
1101  case XML_TOK_NAME:
1102  case XML_TOK_PREFIXED_NAME:
1103    state->handler = element5;
1104    return XML_ROLE_CONTENT_ELEMENT;
1105  }
1106  return common(state, tok);
1107}
1108
1109static int PTRCALL
1110element5(PROLOG_STATE *state,
1111         int tok,
1112         const char *ptr,
1113         const char *end,
1114         const ENCODING *enc)
1115{
1116  switch (tok) {
1117  case XML_TOK_PROLOG_S:
1118    return XML_ROLE_ELEMENT_NONE;
1119  case XML_TOK_CLOSE_PAREN_ASTERISK:
1120    state->handler = declClose;
1121    state->role_none = XML_ROLE_ELEMENT_NONE;
1122    return XML_ROLE_GROUP_CLOSE_REP;
1123  case XML_TOK_OR:
1124    state->handler = element4;
1125    return XML_ROLE_ELEMENT_NONE;
1126  }
1127  return common(state, tok);
1128}
1129
1130static int PTRCALL
1131element6(PROLOG_STATE *state,
1132         int tok,
1133         const char *ptr,
1134         const char *end,
1135         const ENCODING *enc)
1136{
1137  switch (tok) {
1138  case XML_TOK_PROLOG_S:
1139    return XML_ROLE_ELEMENT_NONE;
1140  case XML_TOK_OPEN_PAREN:
1141    state->level += 1;
1142    return XML_ROLE_GROUP_OPEN;
1143  case XML_TOK_NAME:
1144  case XML_TOK_PREFIXED_NAME:
1145    state->handler = element7;
1146    return XML_ROLE_CONTENT_ELEMENT;
1147  case XML_TOK_NAME_QUESTION:
1148    state->handler = element7;
1149    return XML_ROLE_CONTENT_ELEMENT_OPT;
1150  case XML_TOK_NAME_ASTERISK:
1151    state->handler = element7;
1152    return XML_ROLE_CONTENT_ELEMENT_REP;
1153  case XML_TOK_NAME_PLUS:
1154    state->handler = element7;
1155    return XML_ROLE_CONTENT_ELEMENT_PLUS;
1156  }
1157  return common(state, tok);
1158}
1159
1160static int PTRCALL
1161element7(PROLOG_STATE *state,
1162         int tok,
1163         const char *ptr,
1164         const char *end,
1165         const ENCODING *enc)
1166{
1167  switch (tok) {
1168  case XML_TOK_PROLOG_S:
1169    return XML_ROLE_ELEMENT_NONE;
1170  case XML_TOK_CLOSE_PAREN:
1171    state->level -= 1;
1172    if (state->level == 0) {
1173      state->handler = declClose;
1174      state->role_none = XML_ROLE_ELEMENT_NONE;
1175    }
1176    return XML_ROLE_GROUP_CLOSE;
1177  case XML_TOK_CLOSE_PAREN_ASTERISK:
1178    state->level -= 1;
1179    if (state->level == 0) {
1180      state->handler = declClose;
1181      state->role_none = XML_ROLE_ELEMENT_NONE;
1182    }
1183    return XML_ROLE_GROUP_CLOSE_REP;
1184  case XML_TOK_CLOSE_PAREN_QUESTION:
1185    state->level -= 1;
1186    if (state->level == 0) {
1187      state->handler = declClose;
1188      state->role_none = XML_ROLE_ELEMENT_NONE;
1189    }
1190    return XML_ROLE_GROUP_CLOSE_OPT;
1191  case XML_TOK_CLOSE_PAREN_PLUS:
1192    state->level -= 1;
1193    if (state->level == 0) {
1194      state->handler = declClose;
1195      state->role_none = XML_ROLE_ELEMENT_NONE;
1196    }
1197    return XML_ROLE_GROUP_CLOSE_PLUS;
1198  case XML_TOK_COMMA:
1199    state->handler = element6;
1200    return XML_ROLE_GROUP_SEQUENCE;
1201  case XML_TOK_OR:
1202    state->handler = element6;
1203    return XML_ROLE_GROUP_CHOICE;
1204  }
1205  return common(state, tok);
1206}
1207
1208#ifdef XML_DTD
1209
1210static int PTRCALL
1211condSect0(PROLOG_STATE *state,
1212          int tok,
1213          const char *ptr,
1214          const char *end,
1215          const ENCODING *enc)
1216{
1217  switch (tok) {
1218  case XML_TOK_PROLOG_S:
1219    return XML_ROLE_NONE;
1220  case XML_TOK_NAME:
1221    if (XmlNameMatchesAscii(enc, ptr, end, KW_INCLUDE)) {
1222      state->handler = condSect1;
1223      return XML_ROLE_NONE;
1224    }
1225    if (XmlNameMatchesAscii(enc, ptr, end, KW_IGNORE)) {
1226      state->handler = condSect2;
1227      return XML_ROLE_NONE;
1228    }
1229    break;
1230  }
1231  return common(state, tok);
1232}
1233
1234static int PTRCALL
1235condSect1(PROLOG_STATE *state,
1236          int tok,
1237          const char *ptr,
1238          const char *end,
1239          const ENCODING *enc)
1240{
1241  switch (tok) {
1242  case XML_TOK_PROLOG_S:
1243    return XML_ROLE_NONE;
1244  case XML_TOK_OPEN_BRACKET:
1245    state->handler = externalSubset1;
1246    state->includeLevel += 1;
1247    return XML_ROLE_NONE;
1248  }
1249  return common(state, tok);
1250}
1251
1252static int PTRCALL
1253condSect2(PROLOG_STATE *state,
1254          int tok,
1255          const char *ptr,
1256          const char *end,
1257          const ENCODING *enc)
1258{
1259  switch (tok) {
1260  case XML_TOK_PROLOG_S:
1261    return XML_ROLE_NONE;
1262  case XML_TOK_OPEN_BRACKET:
1263    state->handler = externalSubset1;
1264    return XML_ROLE_IGNORE_SECT;
1265  }
1266  return common(state, tok);
1267}
1268
1269#endif /* XML_DTD */
1270
1271static int PTRCALL
1272declClose(PROLOG_STATE *state,
1273          int tok,
1274          const char *ptr,
1275          const char *end,
1276          const ENCODING *enc)
1277{
1278  switch (tok) {
1279  case XML_TOK_PROLOG_S:
1280    return state->role_none;
1281  case XML_TOK_DECL_CLOSE:
1282    setTopLevel(state);
1283    return state->role_none;
1284  }
1285  return common(state, tok);
1286}
1287
1288static int PTRCALL
1289error(PROLOG_STATE *state,
1290      int tok,
1291      const char *ptr,
1292      const char *end,
1293      const ENCODING *enc)
1294{
1295  return XML_ROLE_NONE;
1296}
1297
1298static int FASTCALL
1299common(PROLOG_STATE *state, int tok)
1300{
1301#ifdef XML_DTD
1302  if (!state->documentEntity && tok == XML_TOK_PARAM_ENTITY_REF)
1303    return XML_ROLE_INNER_PARAM_ENTITY_REF;
1304#endif
1305  state->handler = error;
1306  return XML_ROLE_ERROR;
1307}
1308
1309void
1310XmlPrologStateInit(PROLOG_STATE *state)
1311{
1312  state->handler = prolog0;
1313#ifdef XML_DTD
1314  state->documentEntity = 1;
1315  state->includeLevel = 0;
1316  state->inEntityValue = 0;
1317#endif /* XML_DTD */
1318}
1319
1320#ifdef XML_DTD
1321
1322void
1323XmlPrologStateInitExternalEntity(PROLOG_STATE *state)
1324{
1325  state->handler = externalSubset0;
1326  state->documentEntity = 0;
1327  state->includeLevel = 0;
1328}
1329
1330#endif /* XML_DTD */