00001
00002
00003
00004
00005
00006
00007 #ifdef COMPILED_FROM_DSP
00008 # include "winconfig.h"
00009 #else
00010 # include <config.h>
00011 #endif
00012
00013 #include "xmlrole.h"
00014 #include "ascii.h"
00015
00016
00017
00018
00019
00020
00021
00022
00023 static const char KW_ANY[] = { ASCII_A, ASCII_N, ASCII_Y, '\0' };
00024 static const char KW_ATTLIST[] = { ASCII_A, ASCII_T, ASCII_T, ASCII_L, ASCII_I, ASCII_S, ASCII_T, '\0' };
00025 static const char KW_CDATA[] = { ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
00026 static const char KW_DOCTYPE[] = { ASCII_D, ASCII_O, ASCII_C, ASCII_T, ASCII_Y, ASCII_P, ASCII_E, '\0' };
00027 static const char KW_ELEMENT[] = { ASCII_E, ASCII_L, ASCII_E, ASCII_M, ASCII_E, ASCII_N, ASCII_T, '\0' };
00028 static const char KW_EMPTY[] = { ASCII_E, ASCII_M, ASCII_P, ASCII_T, ASCII_Y, '\0' };
00029 static const char KW_ENTITIES[] = { ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_I, ASCII_E, ASCII_S, '\0' };
00030 static const char KW_ENTITY[] = { ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0' };
00031 static const char KW_FIXED[] = { ASCII_F, ASCII_I, ASCII_X, ASCII_E, ASCII_D, '\0' };
00032 static const char KW_ID[] = { ASCII_I, ASCII_D, '\0' };
00033 static const char KW_IDREF[] = { ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0' };
00034 static const char KW_IDREFS[] = { ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0' };
00035 static const char KW_IGNORE[] = { ASCII_I, ASCII_G, ASCII_N, ASCII_O, ASCII_R, ASCII_E, '\0' };
00036 static const char KW_IMPLIED[] = { ASCII_I, ASCII_M, ASCII_P, ASCII_L, ASCII_I, ASCII_E, ASCII_D, '\0' };
00037 static const char KW_INCLUDE[] = { ASCII_I, ASCII_N, ASCII_C, ASCII_L, ASCII_U, ASCII_D, ASCII_E, '\0' };
00038 static const char KW_NDATA[] = { ASCII_N, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
00039 static const char KW_NMTOKEN[] = { ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0' };
00040 static const char KW_NMTOKENS[] = { ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, ASCII_S, '\0' };
00041 static const char KW_NOTATION[] = { ASCII_N, ASCII_O, ASCII_T, ASCII_A, ASCII_T, ASCII_I, ASCII_O, ASCII_N, '\0' };
00042 static const char KW_PCDATA[] = { ASCII_P, ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
00043 static const char KW_PUBLIC[] = { ASCII_P, ASCII_U, ASCII_B, ASCII_L, ASCII_I, ASCII_C, '\0' };
00044 static const char KW_REQUIRED[] = { ASCII_R, ASCII_E, ASCII_Q, ASCII_U, ASCII_I, ASCII_R, ASCII_E, ASCII_D, '\0' };
00045 static const char KW_SYSTEM[] = { ASCII_S, ASCII_Y, ASCII_S, ASCII_T, ASCII_E, ASCII_M, '\0' };
00046
00047 #ifndef MIN_BYTES_PER_CHAR
00048 #define MIN_BYTES_PER_CHAR(enc) ((enc)->minBytesPerChar)
00049 #endif
00050
00051 #ifdef XML_DTD
00052 #define setTopLevel(state) \
00053 ((state)->handler = ((state)->documentEntity \
00054 ? internalSubset \
00055 : externalSubset1))
00056 #else
00057 #define setTopLevel(state) ((state)->handler = internalSubset)
00058 #endif
00059
00060 typedef int PROLOG_HANDLER(PROLOG_STATE *state,
00061 int tok,
00062 const char *ptr,
00063 const char *end,
00064 const ENCODING *enc);
00065
00066 static PROLOG_HANDLER
00067 prolog0, prolog1, prolog2,
00068 doctype0, doctype1, doctype2, doctype3, doctype4, doctype5,
00069 internalSubset,
00070 entity0, entity1, entity2, entity3, entity4, entity5, entity6,
00071 entity7, entity8, entity9,
00072 notation0, notation1, notation2, notation3, notation4,
00073 attlist0, attlist1, attlist2, attlist3, attlist4, attlist5, attlist6,
00074 attlist7, attlist8, attlist9,
00075 element0, element1, element2, element3, element4, element5, element6,
00076 element7,
00077 #ifdef XML_DTD
00078 externalSubset0, externalSubset1,
00079 condSect0, condSect1, condSect2,
00080 #endif
00081 declClose,
00082 error;
00083
00084 static
00085 int common(PROLOG_STATE *state, int tok);
00086
00087 static
00088 int prolog0(PROLOG_STATE *state,
00089 int tok,
00090 const char *ptr,
00091 const char *end,
00092 const ENCODING *enc)
00093 {
00094 switch (tok) {
00095 case XML_TOK_PROLOG_S:
00096 state->handler = prolog1;
00097 return XML_ROLE_NONE;
00098 case XML_TOK_XML_DECL:
00099 state->handler = prolog1;
00100 return XML_ROLE_XML_DECL;
00101 case XML_TOK_PI:
00102 state->handler = prolog1;
00103 return XML_ROLE_NONE;
00104 case XML_TOK_COMMENT:
00105 state->handler = prolog1;
00106 case XML_TOK_BOM:
00107 return XML_ROLE_NONE;
00108 case XML_TOK_DECL_OPEN:
00109 if (!XmlNameMatchesAscii(enc,
00110 ptr + 2 * MIN_BYTES_PER_CHAR(enc),
00111 end,
00112 KW_DOCTYPE))
00113 break;
00114 state->handler = doctype0;
00115 return XML_ROLE_NONE;
00116 case XML_TOK_INSTANCE_START:
00117 state->handler = error;
00118 return XML_ROLE_INSTANCE_START;
00119 }
00120 return common(state, tok);
00121 }
00122
00123 static
00124 int prolog1(PROLOG_STATE *state,
00125 int tok,
00126 const char *ptr,
00127 const char *end,
00128 const ENCODING *enc)
00129 {
00130 switch (tok) {
00131 case XML_TOK_PROLOG_S:
00132 return XML_ROLE_NONE;
00133 case XML_TOK_PI:
00134 case XML_TOK_COMMENT:
00135 case XML_TOK_BOM:
00136 return XML_ROLE_NONE;
00137 case XML_TOK_DECL_OPEN:
00138 if (!XmlNameMatchesAscii(enc,
00139 ptr + 2 * MIN_BYTES_PER_CHAR(enc),
00140 end,
00141 KW_DOCTYPE))
00142 break;
00143 state->handler = doctype0;
00144 return XML_ROLE_NONE;
00145 case XML_TOK_INSTANCE_START:
00146 state->handler = error;
00147 return XML_ROLE_INSTANCE_START;
00148 }
00149 return common(state, tok);
00150 }
00151
00152 static
00153 int prolog2(PROLOG_STATE *state,
00154 int tok,
00155 const char *ptr,
00156 const char *end,
00157 const ENCODING *enc)
00158 {
00159 switch (tok) {
00160 case XML_TOK_PROLOG_S:
00161 return XML_ROLE_NONE;
00162 case XML_TOK_PI:
00163 case XML_TOK_COMMENT:
00164 return XML_ROLE_NONE;
00165 case XML_TOK_INSTANCE_START:
00166 state->handler = error;
00167 return XML_ROLE_INSTANCE_START;
00168 }
00169 return common(state, tok);
00170 }
00171
00172 static
00173 int doctype0(PROLOG_STATE *state,
00174 int tok,
00175 const char *ptr,
00176 const char *end,
00177 const ENCODING *enc)
00178 {
00179 switch (tok) {
00180 case XML_TOK_PROLOG_S:
00181 return XML_ROLE_NONE;
00182 case XML_TOK_NAME:
00183 case XML_TOK_PREFIXED_NAME:
00184 state->handler = doctype1;
00185 return XML_ROLE_DOCTYPE_NAME;
00186 }
00187 return common(state, tok);
00188 }
00189
00190 static
00191 int doctype1(PROLOG_STATE *state,
00192 int tok,
00193 const char *ptr,
00194 const char *end,
00195 const ENCODING *enc)
00196 {
00197 switch (tok) {
00198 case XML_TOK_PROLOG_S:
00199 return XML_ROLE_NONE;
00200 case XML_TOK_OPEN_BRACKET:
00201 state->handler = internalSubset;
00202 return XML_ROLE_DOCTYPE_INTERNAL_SUBSET;
00203 case XML_TOK_DECL_CLOSE:
00204 state->handler = prolog2;
00205 return XML_ROLE_DOCTYPE_CLOSE;
00206 case XML_TOK_NAME:
00207 if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
00208 state->handler = doctype3;
00209 return XML_ROLE_NONE;
00210 }
00211 if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
00212 state->handler = doctype2;
00213 return XML_ROLE_NONE;
00214 }
00215 break;
00216 }
00217 return common(state, tok);
00218 }
00219
00220 static
00221 int doctype2(PROLOG_STATE *state,
00222 int tok,
00223 const char *ptr,
00224 const char *end,
00225 const ENCODING *enc)
00226 {
00227 switch (tok) {
00228 case XML_TOK_PROLOG_S:
00229 return XML_ROLE_NONE;
00230 case XML_TOK_LITERAL:
00231 state->handler = doctype3;
00232 return XML_ROLE_DOCTYPE_PUBLIC_ID;
00233 }
00234 return common(state, tok);
00235 }
00236
00237 static
00238 int doctype3(PROLOG_STATE *state,
00239 int tok,
00240 const char *ptr,
00241 const char *end,
00242 const ENCODING *enc)
00243 {
00244 switch (tok) {
00245 case XML_TOK_PROLOG_S:
00246 return XML_ROLE_NONE;
00247 case XML_TOK_LITERAL:
00248 state->handler = doctype4;
00249 return XML_ROLE_DOCTYPE_SYSTEM_ID;
00250 }
00251 return common(state, tok);
00252 }
00253
00254 static
00255 int doctype4(PROLOG_STATE *state,
00256 int tok,
00257 const char *ptr,
00258 const char *end,
00259 const ENCODING *enc)
00260 {
00261 switch (tok) {
00262 case XML_TOK_PROLOG_S:
00263 return XML_ROLE_NONE;
00264 case XML_TOK_OPEN_BRACKET:
00265 state->handler = internalSubset;
00266 return XML_ROLE_DOCTYPE_INTERNAL_SUBSET;
00267 case XML_TOK_DECL_CLOSE:
00268 state->handler = prolog2;
00269 return XML_ROLE_DOCTYPE_CLOSE;
00270 }
00271 return common(state, tok);
00272 }
00273
00274 static
00275 int doctype5(PROLOG_STATE *state,
00276 int tok,
00277 const char *ptr,
00278 const char *end,
00279 const ENCODING *enc)
00280 {
00281 switch (tok) {
00282 case XML_TOK_PROLOG_S:
00283 return XML_ROLE_NONE;
00284 case XML_TOK_DECL_CLOSE:
00285 state->handler = prolog2;
00286 return XML_ROLE_DOCTYPE_CLOSE;
00287 }
00288 return common(state, tok);
00289 }
00290
00291 static
00292 int internalSubset(PROLOG_STATE *state,
00293 int tok,
00294 const char *ptr,
00295 const char *end,
00296 const ENCODING *enc)
00297 {
00298 switch (tok) {
00299 case XML_TOK_PROLOG_S:
00300 return XML_ROLE_NONE;
00301 case XML_TOK_DECL_OPEN:
00302 if (XmlNameMatchesAscii(enc,
00303 ptr + 2 * MIN_BYTES_PER_CHAR(enc),
00304 end,
00305 KW_ENTITY)) {
00306 state->handler = entity0;
00307 return XML_ROLE_NONE;
00308 }
00309 if (XmlNameMatchesAscii(enc,
00310 ptr + 2 * MIN_BYTES_PER_CHAR(enc),
00311 end,
00312 KW_ATTLIST)) {
00313 state->handler = attlist0;
00314 return XML_ROLE_NONE;
00315 }
00316 if (XmlNameMatchesAscii(enc,
00317 ptr + 2 * MIN_BYTES_PER_CHAR(enc),
00318 end,
00319 KW_ELEMENT)) {
00320 state->handler = element0;
00321 return XML_ROLE_NONE;
00322 }
00323 if (XmlNameMatchesAscii(enc,
00324 ptr + 2 * MIN_BYTES_PER_CHAR(enc),
00325 end,
00326 KW_NOTATION)) {
00327 state->handler = notation0;
00328 return XML_ROLE_NONE;
00329 }
00330 break;
00331 case XML_TOK_PI:
00332 case XML_TOK_COMMENT:
00333 return XML_ROLE_NONE;
00334 case XML_TOK_PARAM_ENTITY_REF:
00335 return XML_ROLE_PARAM_ENTITY_REF;
00336 case XML_TOK_CLOSE_BRACKET:
00337 state->handler = doctype5;
00338 return XML_ROLE_NONE;
00339 }
00340 return common(state, tok);
00341 }
00342
00343 #ifdef XML_DTD
00344
00345 static
00346 int externalSubset0(PROLOG_STATE *state,
00347 int tok,
00348 const char *ptr,
00349 const char *end,
00350 const ENCODING *enc)
00351 {
00352 state->handler = externalSubset1;
00353 if (tok == XML_TOK_XML_DECL)
00354 return XML_ROLE_TEXT_DECL;
00355 return externalSubset1(state, tok, ptr, end, enc);
00356 }
00357
00358 static
00359 int externalSubset1(PROLOG_STATE *state,
00360 int tok,
00361 const char *ptr,
00362 const char *end,
00363 const ENCODING *enc)
00364 {
00365 switch (tok) {
00366 case XML_TOK_COND_SECT_OPEN:
00367 state->handler = condSect0;
00368 return XML_ROLE_NONE;
00369 case XML_TOK_COND_SECT_CLOSE:
00370 if (state->includeLevel == 0)
00371 break;
00372 state->includeLevel -= 1;
00373 return XML_ROLE_NONE;
00374 case XML_TOK_PROLOG_S:
00375 return XML_ROLE_NONE;
00376 case XML_TOK_CLOSE_BRACKET:
00377 break;
00378 case XML_TOK_NONE:
00379 if (state->includeLevel)
00380 break;
00381 return XML_ROLE_NONE;
00382 default:
00383 return internalSubset(state, tok, ptr, end, enc);
00384 }
00385 return common(state, tok);
00386 }
00387
00388 #endif
00389
00390 static
00391 int entity0(PROLOG_STATE *state,
00392 int tok,
00393 const char *ptr,
00394 const char *end,
00395 const ENCODING *enc)
00396 {
00397 switch (tok) {
00398 case XML_TOK_PROLOG_S:
00399 return XML_ROLE_NONE;
00400 case XML_TOK_PERCENT:
00401 state->handler = entity1;
00402 return XML_ROLE_NONE;
00403 case XML_TOK_NAME:
00404 state->handler = entity2;
00405 return XML_ROLE_GENERAL_ENTITY_NAME;
00406 }
00407 return common(state, tok);
00408 }
00409
00410 static
00411 int entity1(PROLOG_STATE *state,
00412 int tok,
00413 const char *ptr,
00414 const char *end,
00415 const ENCODING *enc)
00416 {
00417 switch (tok) {
00418 case XML_TOK_PROLOG_S:
00419 return XML_ROLE_NONE;
00420 case XML_TOK_NAME:
00421 state->handler = entity7;
00422 return XML_ROLE_PARAM_ENTITY_NAME;
00423 }
00424 return common(state, tok);
00425 }
00426
00427 static
00428 int entity2(PROLOG_STATE *state,
00429 int tok,
00430 const char *ptr,
00431 const char *end,
00432 const ENCODING *enc)
00433 {
00434 switch (tok) {
00435 case XML_TOK_PROLOG_S:
00436 return XML_ROLE_NONE;
00437 case XML_TOK_NAME:
00438 if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
00439 state->handler = entity4;
00440 return XML_ROLE_NONE;
00441 }
00442 if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
00443 state->handler = entity3;
00444 return XML_ROLE_NONE;
00445 }
00446 break;
00447 case XML_TOK_LITERAL:
00448 state->handler = declClose;
00449 return XML_ROLE_ENTITY_VALUE;
00450 }
00451 return common(state, tok);
00452 }
00453
00454 static
00455 int entity3(PROLOG_STATE *state,
00456 int tok,
00457 const char *ptr,
00458 const char *end,
00459 const ENCODING *enc)
00460 {
00461 switch (tok) {
00462 case XML_TOK_PROLOG_S:
00463 return XML_ROLE_NONE;
00464 case XML_TOK_LITERAL:
00465 state->handler = entity4;
00466 return XML_ROLE_ENTITY_PUBLIC_ID;
00467 }
00468 return common(state, tok);
00469 }
00470
00471
00472 static
00473 int entity4(PROLOG_STATE *state,
00474 int tok,
00475 const char *ptr,
00476 const char *end,
00477 const ENCODING *enc)
00478 {
00479 switch (tok) {
00480 case XML_TOK_PROLOG_S:
00481 return XML_ROLE_NONE;
00482 case XML_TOK_LITERAL:
00483 state->handler = entity5;
00484 return XML_ROLE_ENTITY_SYSTEM_ID;
00485 }
00486 return common(state, tok);
00487 }
00488
00489 static
00490 int entity5(PROLOG_STATE *state,
00491 int tok,
00492 const char *ptr,
00493 const char *end,
00494 const ENCODING *enc)
00495 {
00496 switch (tok) {
00497 case XML_TOK_PROLOG_S:
00498 return XML_ROLE_NONE;
00499 case XML_TOK_DECL_CLOSE:
00500 setTopLevel(state);
00501 return XML_ROLE_ENTITY_COMPLETE;
00502 case XML_TOK_NAME:
00503 if (XmlNameMatchesAscii(enc, ptr, end, KW_NDATA)) {
00504 state->handler = entity6;
00505 return XML_ROLE_NONE;
00506 }
00507 break;
00508 }
00509 return common(state, tok);
00510 }
00511
00512 static
00513 int entity6(PROLOG_STATE *state,
00514 int tok,
00515 const char *ptr,
00516 const char *end,
00517 const ENCODING *enc)
00518 {
00519 switch (tok) {
00520 case XML_TOK_PROLOG_S:
00521 return XML_ROLE_NONE;
00522 case XML_TOK_NAME:
00523 state->handler = declClose;
00524 return XML_ROLE_ENTITY_NOTATION_NAME;
00525 }
00526 return common(state, tok);
00527 }
00528
00529 static
00530 int entity7(PROLOG_STATE *state,
00531 int tok,
00532 const char *ptr,
00533 const char *end,
00534 const ENCODING *enc)
00535 {
00536 switch (tok) {
00537 case XML_TOK_PROLOG_S:
00538 return XML_ROLE_NONE;
00539 case XML_TOK_NAME:
00540 if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
00541 state->handler = entity9;
00542 return XML_ROLE_NONE;
00543 }
00544 if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
00545 state->handler = entity8;
00546 return XML_ROLE_NONE;
00547 }
00548 break;
00549 case XML_TOK_LITERAL:
00550 state->handler = declClose;
00551 return XML_ROLE_ENTITY_VALUE;
00552 }
00553 return common(state, tok);
00554 }
00555
00556 static
00557 int entity8(PROLOG_STATE *state,
00558 int tok,
00559 const char *ptr,
00560 const char *end,
00561 const ENCODING *enc)
00562 {
00563 switch (tok) {
00564 case XML_TOK_PROLOG_S:
00565 return XML_ROLE_NONE;
00566 case XML_TOK_LITERAL:
00567 state->handler = entity9;
00568 return XML_ROLE_ENTITY_PUBLIC_ID;
00569 }
00570 return common(state, tok);
00571 }
00572
00573 static
00574 int entity9(PROLOG_STATE *state,
00575 int tok,
00576 const char *ptr,
00577 const char *end,
00578 const ENCODING *enc)
00579 {
00580 switch (tok) {
00581 case XML_TOK_PROLOG_S:
00582 return XML_ROLE_NONE;
00583 case XML_TOK_LITERAL:
00584 state->handler = declClose;
00585 return XML_ROLE_ENTITY_SYSTEM_ID;
00586 }
00587 return common(state, tok);
00588 }
00589
00590 static
00591 int notation0(PROLOG_STATE *state,
00592 int tok,
00593 const char *ptr,
00594 const char *end,
00595 const ENCODING *enc)
00596 {
00597 switch (tok) {
00598 case XML_TOK_PROLOG_S:
00599 return XML_ROLE_NONE;
00600 case XML_TOK_NAME:
00601 state->handler = notation1;
00602 return XML_ROLE_NOTATION_NAME;
00603 }
00604 return common(state, tok);
00605 }
00606
00607 static
00608 int notation1(PROLOG_STATE *state,
00609 int tok,
00610 const char *ptr,
00611 const char *end,
00612 const ENCODING *enc)
00613 {
00614 switch (tok) {
00615 case XML_TOK_PROLOG_S:
00616 return XML_ROLE_NONE;
00617 case XML_TOK_NAME:
00618 if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
00619 state->handler = notation3;
00620 return XML_ROLE_NONE;
00621 }
00622 if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
00623 state->handler = notation2;
00624 return XML_ROLE_NONE;
00625 }
00626 break;
00627 }
00628 return common(state, tok);
00629 }
00630
00631 static
00632 int notation2(PROLOG_STATE *state,
00633 int tok,
00634 const char *ptr,
00635 const char *end,
00636 const ENCODING *enc)
00637 {
00638 switch (tok) {
00639 case XML_TOK_PROLOG_S:
00640 return XML_ROLE_NONE;
00641 case XML_TOK_LITERAL:
00642 state->handler = notation4;
00643 return XML_ROLE_NOTATION_PUBLIC_ID;
00644 }
00645 return common(state, tok);
00646 }
00647
00648 static
00649 int notation3(PROLOG_STATE *state,
00650 int tok,
00651 const char *ptr,
00652 const char *end,
00653 const ENCODING *enc)
00654 {
00655 switch (tok) {
00656 case XML_TOK_PROLOG_S:
00657 return XML_ROLE_NONE;
00658 case XML_TOK_LITERAL:
00659 state->handler = declClose;
00660 return XML_ROLE_NOTATION_SYSTEM_ID;
00661 }
00662 return common(state, tok);
00663 }
00664
00665 static
00666 int notation4(PROLOG_STATE *state,
00667 int tok,
00668 const char *ptr,
00669 const char *end,
00670 const ENCODING *enc)
00671 {
00672 switch (tok) {
00673 case XML_TOK_PROLOG_S:
00674 return XML_ROLE_NONE;
00675 case XML_TOK_LITERAL:
00676 state->handler = declClose;
00677 return XML_ROLE_NOTATION_SYSTEM_ID;
00678 case XML_TOK_DECL_CLOSE:
00679 setTopLevel(state);
00680 return XML_ROLE_NOTATION_NO_SYSTEM_ID;
00681 }
00682 return common(state, tok);
00683 }
00684
00685 static
00686 int attlist0(PROLOG_STATE *state,
00687 int tok,
00688 const char *ptr,
00689 const char *end,
00690 const ENCODING *enc)
00691 {
00692 switch (tok) {
00693 case XML_TOK_PROLOG_S:
00694 return XML_ROLE_NONE;
00695 case XML_TOK_NAME:
00696 case XML_TOK_PREFIXED_NAME:
00697 state->handler = attlist1;
00698 return XML_ROLE_ATTLIST_ELEMENT_NAME;
00699 }
00700 return common(state, tok);
00701 }
00702
00703 static
00704 int attlist1(PROLOG_STATE *state,
00705 int tok,
00706 const char *ptr,
00707 const char *end,
00708 const ENCODING *enc)
00709 {
00710 switch (tok) {
00711 case XML_TOK_PROLOG_S:
00712 return XML_ROLE_NONE;
00713 case XML_TOK_DECL_CLOSE:
00714 setTopLevel(state);
00715 return XML_ROLE_NONE;
00716 case XML_TOK_NAME:
00717 case XML_TOK_PREFIXED_NAME:
00718 state->handler = attlist2;
00719 return XML_ROLE_ATTRIBUTE_NAME;
00720 }
00721 return common(state, tok);
00722 }
00723
00724 static
00725 int attlist2(PROLOG_STATE *state,
00726 int tok,
00727 const char *ptr,
00728 const char *end,
00729 const ENCODING *enc)
00730 {
00731 switch (tok) {
00732 case XML_TOK_PROLOG_S:
00733 return XML_ROLE_NONE;
00734 case XML_TOK_NAME:
00735 {
00736 static const char *types[] = {
00737 KW_CDATA,
00738 KW_ID,
00739 KW_IDREF,
00740 KW_IDREFS,
00741 KW_ENTITY,
00742 KW_ENTITIES,
00743 KW_NMTOKEN,
00744 KW_NMTOKENS,
00745 };
00746 int i;
00747 for (i = 0; i < (int)(sizeof(types)/sizeof(types[0])); i++)
00748 if (XmlNameMatchesAscii(enc, ptr, end, types[i])) {
00749 state->handler = attlist8;
00750 return XML_ROLE_ATTRIBUTE_TYPE_CDATA + i;
00751 }
00752 }
00753 if (XmlNameMatchesAscii(enc, ptr, end, KW_NOTATION)) {
00754 state->handler = attlist5;
00755 return XML_ROLE_NONE;
00756 }
00757 break;
00758 case XML_TOK_OPEN_PAREN:
00759 state->handler = attlist3;
00760 return XML_ROLE_NONE;
00761 }
00762 return common(state, tok);
00763 }
00764
00765 static
00766 int attlist3(PROLOG_STATE *state,
00767 int tok,
00768 const char *ptr,
00769 const char *end,
00770 const ENCODING *enc)
00771 {
00772 switch (tok) {
00773 case XML_TOK_PROLOG_S:
00774 return XML_ROLE_NONE;
00775 case XML_TOK_NMTOKEN:
00776 case XML_TOK_NAME:
00777 case XML_TOK_PREFIXED_NAME:
00778 state->handler = attlist4;
00779 return XML_ROLE_ATTRIBUTE_ENUM_VALUE;
00780 }
00781 return common(state, tok);
00782 }
00783
00784 static
00785 int attlist4(PROLOG_STATE *state,
00786 int tok,
00787 const char *ptr,
00788 const char *end,
00789 const ENCODING *enc)
00790 {
00791 switch (tok) {
00792 case XML_TOK_PROLOG_S:
00793 return XML_ROLE_NONE;
00794 case XML_TOK_CLOSE_PAREN:
00795 state->handler = attlist8;
00796 return XML_ROLE_NONE;
00797 case XML_TOK_OR:
00798 state->handler = attlist3;
00799 return XML_ROLE_NONE;
00800 }
00801 return common(state, tok);
00802 }
00803
00804 static
00805 int attlist5(PROLOG_STATE *state,
00806 int tok,
00807 const char *ptr,
00808 const char *end,
00809 const ENCODING *enc)
00810 {
00811 switch (tok) {
00812 case XML_TOK_PROLOG_S:
00813 return XML_ROLE_NONE;
00814 case XML_TOK_OPEN_PAREN:
00815 state->handler = attlist6;
00816 return XML_ROLE_NONE;
00817 }
00818 return common(state, tok);
00819 }
00820
00821
00822 static
00823 int attlist6(PROLOG_STATE *state,
00824 int tok,
00825 const char *ptr,
00826 const char *end,
00827 const ENCODING *enc)
00828 {
00829 switch (tok) {
00830 case XML_TOK_PROLOG_S:
00831 return XML_ROLE_NONE;
00832 case XML_TOK_NAME:
00833 state->handler = attlist7;
00834 return XML_ROLE_ATTRIBUTE_NOTATION_VALUE;
00835 }
00836 return common(state, tok);
00837 }
00838
00839 static
00840 int attlist7(PROLOG_STATE *state,
00841 int tok,
00842 const char *ptr,
00843 const char *end,
00844 const ENCODING *enc)
00845 {
00846 switch (tok) {
00847 case XML_TOK_PROLOG_S:
00848 return XML_ROLE_NONE;
00849 case XML_TOK_CLOSE_PAREN:
00850 state->handler = attlist8;
00851 return XML_ROLE_NONE;
00852 case XML_TOK_OR:
00853 state->handler = attlist6;
00854 return XML_ROLE_NONE;
00855 }
00856 return common(state, tok);
00857 }
00858
00859
00860 static
00861 int attlist8(PROLOG_STATE *state,
00862 int tok,
00863 const char *ptr,
00864 const char *end,
00865 const ENCODING *enc)
00866 {
00867 switch (tok) {
00868 case XML_TOK_PROLOG_S:
00869 return XML_ROLE_NONE;
00870 case XML_TOK_POUND_NAME:
00871 if (XmlNameMatchesAscii(enc,
00872 ptr + MIN_BYTES_PER_CHAR(enc),
00873 end,
00874 KW_IMPLIED)) {
00875 state->handler = attlist1;
00876 return XML_ROLE_IMPLIED_ATTRIBUTE_VALUE;
00877 }
00878 if (XmlNameMatchesAscii(enc,
00879 ptr + MIN_BYTES_PER_CHAR(enc),
00880 end,
00881 KW_REQUIRED)) {
00882 state->handler = attlist1;
00883 return XML_ROLE_REQUIRED_ATTRIBUTE_VALUE;
00884 }
00885 if (XmlNameMatchesAscii(enc,
00886 ptr + MIN_BYTES_PER_CHAR(enc),
00887 end,
00888 KW_FIXED)) {
00889 state->handler = attlist9;
00890 return XML_ROLE_NONE;
00891 }
00892 break;
00893 case XML_TOK_LITERAL:
00894 state->handler = attlist1;
00895 return XML_ROLE_DEFAULT_ATTRIBUTE_VALUE;
00896 }
00897 return common(state, tok);
00898 }
00899
00900 static
00901 int attlist9(PROLOG_STATE *state,
00902 int tok,
00903 const char *ptr,
00904 const char *end,
00905 const ENCODING *enc)
00906 {
00907 switch (tok) {
00908 case XML_TOK_PROLOG_S:
00909 return XML_ROLE_NONE;
00910 case XML_TOK_LITERAL:
00911 state->handler = attlist1;
00912 return XML_ROLE_FIXED_ATTRIBUTE_VALUE;
00913 }
00914 return common(state, tok);
00915 }
00916
00917 static
00918 int element0(PROLOG_STATE *state,
00919 int tok,
00920 const char *ptr,
00921 const char *end,
00922 const ENCODING *enc)
00923 {
00924 switch (tok) {
00925 case XML_TOK_PROLOG_S:
00926 return XML_ROLE_NONE;
00927 case XML_TOK_NAME:
00928 case XML_TOK_PREFIXED_NAME:
00929 state->handler = element1;
00930 return XML_ROLE_ELEMENT_NAME;
00931 }
00932 return common(state, tok);
00933 }
00934
00935 static
00936 int element1(PROLOG_STATE *state,
00937 int tok,
00938 const char *ptr,
00939 const char *end,
00940 const ENCODING *enc)
00941 {
00942 switch (tok) {
00943 case XML_TOK_PROLOG_S:
00944 return XML_ROLE_NONE;
00945 case XML_TOK_NAME:
00946 if (XmlNameMatchesAscii(enc, ptr, end, KW_EMPTY)) {
00947 state->handler = declClose;
00948 return XML_ROLE_CONTENT_EMPTY;
00949 }
00950 if (XmlNameMatchesAscii(enc, ptr, end, KW_ANY)) {
00951 state->handler = declClose;
00952 return XML_ROLE_CONTENT_ANY;
00953 }
00954 break;
00955 case XML_TOK_OPEN_PAREN:
00956 state->handler = element2;
00957 state->level = 1;
00958 return XML_ROLE_GROUP_OPEN;
00959 }
00960 return common(state, tok);
00961 }
00962
00963 static
00964 int element2(PROLOG_STATE *state,
00965 int tok,
00966 const char *ptr,
00967 const char *end,
00968 const ENCODING *enc)
00969 {
00970 switch (tok) {
00971 case XML_TOK_PROLOG_S:
00972 return XML_ROLE_NONE;
00973 case XML_TOK_POUND_NAME:
00974 if (XmlNameMatchesAscii(enc,
00975 ptr + MIN_BYTES_PER_CHAR(enc),
00976 end,
00977 KW_PCDATA)) {
00978 state->handler = element3;
00979 return XML_ROLE_CONTENT_PCDATA;
00980 }
00981 break;
00982 case XML_TOK_OPEN_PAREN:
00983 state->level = 2;
00984 state->handler = element6;
00985 return XML_ROLE_GROUP_OPEN;
00986 case XML_TOK_NAME:
00987 case XML_TOK_PREFIXED_NAME:
00988 state->handler = element7;
00989 return XML_ROLE_CONTENT_ELEMENT;
00990 case XML_TOK_NAME_QUESTION:
00991 state->handler = element7;
00992 return XML_ROLE_CONTENT_ELEMENT_OPT;
00993 case XML_TOK_NAME_ASTERISK:
00994 state->handler = element7;
00995 return XML_ROLE_CONTENT_ELEMENT_REP;
00996 case XML_TOK_NAME_PLUS:
00997 state->handler = element7;
00998 return XML_ROLE_CONTENT_ELEMENT_PLUS;
00999 }
01000 return common(state, tok);
01001 }
01002
01003 static
01004 int element3(PROLOG_STATE *state,
01005 int tok,
01006 const char *ptr,
01007 const char *end,
01008 const ENCODING *enc)
01009 {
01010 switch (tok) {
01011 case XML_TOK_PROLOG_S:
01012 return XML_ROLE_NONE;
01013 case XML_TOK_CLOSE_PAREN:
01014 state->handler = declClose;
01015 return XML_ROLE_GROUP_CLOSE;
01016 case XML_TOK_CLOSE_PAREN_ASTERISK:
01017 state->handler = declClose;
01018 return XML_ROLE_GROUP_CLOSE_REP;
01019 case XML_TOK_OR:
01020 state->handler = element4;
01021 return XML_ROLE_NONE;
01022 }
01023 return common(state, tok);
01024 }
01025
01026 static
01027 int element4(PROLOG_STATE *state,
01028 int tok,
01029 const char *ptr,
01030 const char *end,
01031 const ENCODING *enc)
01032 {
01033 switch (tok) {
01034 case XML_TOK_PROLOG_S:
01035 return XML_ROLE_NONE;
01036 case XML_TOK_NAME:
01037 case XML_TOK_PREFIXED_NAME:
01038 state->handler = element5;
01039 return XML_ROLE_CONTENT_ELEMENT;
01040 }
01041 return common(state, tok);
01042 }
01043
01044 static
01045 int element5(PROLOG_STATE *state,
01046 int tok,
01047 const char *ptr,
01048 const char *end,
01049 const ENCODING *enc)
01050 {
01051 switch (tok) {
01052 case XML_TOK_PROLOG_S:
01053 return XML_ROLE_NONE;
01054 case XML_TOK_CLOSE_PAREN_ASTERISK:
01055 state->handler = declClose;
01056 return XML_ROLE_GROUP_CLOSE_REP;
01057 case XML_TOK_OR:
01058 state->handler = element4;
01059 return XML_ROLE_NONE;
01060 }
01061 return common(state, tok);
01062 }
01063
01064 static
01065 int element6(PROLOG_STATE *state,
01066 int tok,
01067 const char *ptr,
01068 const char *end,
01069 const ENCODING *enc)
01070 {
01071 switch (tok) {
01072 case XML_TOK_PROLOG_S:
01073 return XML_ROLE_NONE;
01074 case XML_TOK_OPEN_PAREN:
01075 state->level += 1;
01076 return XML_ROLE_GROUP_OPEN;
01077 case XML_TOK_NAME:
01078 case XML_TOK_PREFIXED_NAME:
01079 state->handler = element7;
01080 return XML_ROLE_CONTENT_ELEMENT;
01081 case XML_TOK_NAME_QUESTION:
01082 state->handler = element7;
01083 return XML_ROLE_CONTENT_ELEMENT_OPT;
01084 case XML_TOK_NAME_ASTERISK:
01085 state->handler = element7;
01086 return XML_ROLE_CONTENT_ELEMENT_REP;
01087 case XML_TOK_NAME_PLUS:
01088 state->handler = element7;
01089 return XML_ROLE_CONTENT_ELEMENT_PLUS;
01090 }
01091 return common(state, tok);
01092 }
01093
01094 static
01095 int element7(PROLOG_STATE *state,
01096 int tok,
01097 const char *ptr,
01098 const char *end,
01099 const ENCODING *enc)
01100 {
01101 switch (tok) {
01102 case XML_TOK_PROLOG_S:
01103 return XML_ROLE_NONE;
01104 case XML_TOK_CLOSE_PAREN:
01105 state->level -= 1;
01106 if (state->level == 0)
01107 state->handler = declClose;
01108 return XML_ROLE_GROUP_CLOSE;
01109 case XML_TOK_CLOSE_PAREN_ASTERISK:
01110 state->level -= 1;
01111 if (state->level == 0)
01112 state->handler = declClose;
01113 return XML_ROLE_GROUP_CLOSE_REP;
01114 case XML_TOK_CLOSE_PAREN_QUESTION:
01115 state->level -= 1;
01116 if (state->level == 0)
01117 state->handler = declClose;
01118 return XML_ROLE_GROUP_CLOSE_OPT;
01119 case XML_TOK_CLOSE_PAREN_PLUS:
01120 state->level -= 1;
01121 if (state->level == 0)
01122 state->handler = declClose;
01123 return XML_ROLE_GROUP_CLOSE_PLUS;
01124 case XML_TOK_COMMA:
01125 state->handler = element6;
01126 return XML_ROLE_GROUP_SEQUENCE;
01127 case XML_TOK_OR:
01128 state->handler = element6;
01129 return XML_ROLE_GROUP_CHOICE;
01130 }
01131 return common(state, tok);
01132 }
01133
01134 #ifdef XML_DTD
01135
01136 static
01137 int condSect0(PROLOG_STATE *state,
01138 int tok,
01139 const char *ptr,
01140 const char *end,
01141 const ENCODING *enc)
01142 {
01143 switch (tok) {
01144 case XML_TOK_PROLOG_S:
01145 return XML_ROLE_NONE;
01146 case XML_TOK_NAME:
01147 if (XmlNameMatchesAscii(enc, ptr, end, KW_INCLUDE)) {
01148 state->handler = condSect1;
01149 return XML_ROLE_NONE;
01150 }
01151 if (XmlNameMatchesAscii(enc, ptr, end, KW_IGNORE)) {
01152 state->handler = condSect2;
01153 return XML_ROLE_NONE;
01154 }
01155 break;
01156 }
01157 return common(state, tok);
01158 }
01159
01160 static
01161 int condSect1(PROLOG_STATE *state,
01162 int tok,
01163 const char *ptr,
01164 const char *end,
01165 const ENCODING *enc)
01166 {
01167 switch (tok) {
01168 case XML_TOK_PROLOG_S:
01169 return XML_ROLE_NONE;
01170 case XML_TOK_OPEN_BRACKET:
01171 state->handler = externalSubset1;
01172 state->includeLevel += 1;
01173 return XML_ROLE_NONE;
01174 }
01175 return common(state, tok);
01176 }
01177
01178 static
01179 int condSect2(PROLOG_STATE *state,
01180 int tok,
01181 const char *ptr,
01182 const char *end,
01183 const ENCODING *enc)
01184 {
01185 switch (tok) {
01186 case XML_TOK_PROLOG_S:
01187 return XML_ROLE_NONE;
01188 case XML_TOK_OPEN_BRACKET:
01189 state->handler = externalSubset1;
01190 return XML_ROLE_IGNORE_SECT;
01191 }
01192 return common(state, tok);
01193 }
01194
01195 #endif
01196
01197 static
01198 int declClose(PROLOG_STATE *state,
01199 int tok,
01200 const char *ptr,
01201 const char *end,
01202 const ENCODING *enc)
01203 {
01204 switch (tok) {
01205 case XML_TOK_PROLOG_S:
01206 return XML_ROLE_NONE;
01207 case XML_TOK_DECL_CLOSE:
01208 setTopLevel(state);
01209 return XML_ROLE_NONE;
01210 }
01211 return common(state, tok);
01212 }
01213
01214 #if 0
01215
01216 static
01217 int ignore(PROLOG_STATE *state,
01218 int tok,
01219 const char *ptr,
01220 const char *end,
01221 const ENCODING *enc)
01222 {
01223 switch (tok) {
01224 case XML_TOK_DECL_CLOSE:
01225 state->handler = internalSubset;
01226 return 0;
01227 default:
01228 return XML_ROLE_NONE;
01229 }
01230 return common(state, tok);
01231 }
01232 #endif
01233
01234 static
01235 int error(PROLOG_STATE *state,
01236 int tok,
01237 const char *ptr,
01238 const char *end,
01239 const ENCODING *enc)
01240 {
01241 return XML_ROLE_NONE;
01242 }
01243
01244 static
01245 int common(PROLOG_STATE *state, int tok)
01246 {
01247 #ifdef XML_DTD
01248 if (!state->documentEntity && tok == XML_TOK_PARAM_ENTITY_REF)
01249 return XML_ROLE_INNER_PARAM_ENTITY_REF;
01250 #endif
01251 state->handler = error;
01252 return XML_ROLE_ERROR;
01253 }
01254
01255 void XmlPrologStateInit(PROLOG_STATE *state)
01256 {
01257 state->handler = prolog0;
01258 #ifdef XML_DTD
01259 state->documentEntity = 1;
01260 state->includeLevel = 0;
01261 #endif
01262 }
01263
01264 #ifdef XML_DTD
01265
01266 void XmlPrologStateInitExternalEntity(PROLOG_STATE *state)
01267 {
01268 state->handler = externalSubset0;
01269 state->documentEntity = 0;
01270 state->includeLevel = 0;
01271 }
01272
01273 #endif