00001
00002
00003
00004
00005
00006 #ifndef IS_INVALID_CHAR
00007 #define IS_INVALID_CHAR(enc, ptr, n) (0)
00008 #endif
00009
00010 #define INVALID_LEAD_CASE(n, ptr, nextTokPtr) \
00011 case BT_LEAD ## n: \
00012 if (end - ptr < n) \
00013 return XML_TOK_PARTIAL_CHAR; \
00014 if (IS_INVALID_CHAR(enc, ptr, n)) { \
00015 *(nextTokPtr) = (ptr); \
00016 return XML_TOK_INVALID; \
00017 } \
00018 ptr += n; \
00019 break;
00020
00021 #define INVALID_CASES(ptr, nextTokPtr) \
00022 INVALID_LEAD_CASE(2, ptr, nextTokPtr) \
00023 INVALID_LEAD_CASE(3, ptr, nextTokPtr) \
00024 INVALID_LEAD_CASE(4, ptr, nextTokPtr) \
00025 case BT_NONXML: \
00026 case BT_MALFORM: \
00027 case BT_TRAIL: \
00028 *(nextTokPtr) = (ptr); \
00029 return XML_TOK_INVALID;
00030
00031 #define CHECK_NAME_CASE(n, enc, ptr, end, nextTokPtr) \
00032 case BT_LEAD ## n: \
00033 if (end - ptr < n) \
00034 return XML_TOK_PARTIAL_CHAR; \
00035 if (!IS_NAME_CHAR(enc, ptr, n)) { \
00036 *nextTokPtr = ptr; \
00037 return XML_TOK_INVALID; \
00038 } \
00039 ptr += n; \
00040 break;
00041
00042 #define CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) \
00043 case BT_NONASCII: \
00044 if (!IS_NAME_CHAR_MINBPC(enc, ptr)) { \
00045 *nextTokPtr = ptr; \
00046 return XML_TOK_INVALID; \
00047 } \
00048 case BT_NMSTRT: \
00049 case BT_HEX: \
00050 case BT_DIGIT: \
00051 case BT_NAME: \
00052 case BT_MINUS: \
00053 ptr += MINBPC(enc); \
00054 break; \
00055 CHECK_NAME_CASE(2, enc, ptr, end, nextTokPtr) \
00056 CHECK_NAME_CASE(3, enc, ptr, end, nextTokPtr) \
00057 CHECK_NAME_CASE(4, enc, ptr, end, nextTokPtr)
00058
00059 #define CHECK_NMSTRT_CASE(n, enc, ptr, end, nextTokPtr) \
00060 case BT_LEAD ## n: \
00061 if (end - ptr < n) \
00062 return XML_TOK_PARTIAL_CHAR; \
00063 if (!IS_NMSTRT_CHAR(enc, ptr, n)) { \
00064 *nextTokPtr = ptr; \
00065 return XML_TOK_INVALID; \
00066 } \
00067 ptr += n; \
00068 break;
00069
00070 #define CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) \
00071 case BT_NONASCII: \
00072 if (!IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { \
00073 *nextTokPtr = ptr; \
00074 return XML_TOK_INVALID; \
00075 } \
00076 case BT_NMSTRT: \
00077 case BT_HEX: \
00078 ptr += MINBPC(enc); \
00079 break; \
00080 CHECK_NMSTRT_CASE(2, enc, ptr, end, nextTokPtr) \
00081 CHECK_NMSTRT_CASE(3, enc, ptr, end, nextTokPtr) \
00082 CHECK_NMSTRT_CASE(4, enc, ptr, end, nextTokPtr)
00083
00084 #ifndef PREFIX
00085 #define PREFIX(ident) ident
00086 #endif
00087
00088
00089
00090 static
00091 int PREFIX(scanComment)(const ENCODING *enc, const char *ptr, const char *end,
00092 const char **nextTokPtr)
00093 {
00094 if (ptr != end) {
00095 if (!CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
00096 *nextTokPtr = ptr;
00097 return XML_TOK_INVALID;
00098 }
00099 ptr += MINBPC(enc);
00100 while (ptr != end) {
00101 switch (BYTE_TYPE(enc, ptr)) {
00102 INVALID_CASES(ptr, nextTokPtr)
00103 case BT_MINUS:
00104 if ((ptr += MINBPC(enc)) == end)
00105 return XML_TOK_PARTIAL;
00106 if (CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
00107 if ((ptr += MINBPC(enc)) == end)
00108 return XML_TOK_PARTIAL;
00109 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
00110 *nextTokPtr = ptr;
00111 return XML_TOK_INVALID;
00112 }
00113 *nextTokPtr = ptr + MINBPC(enc);
00114 return XML_TOK_COMMENT;
00115 }
00116 break;
00117 default:
00118 ptr += MINBPC(enc);
00119 break;
00120 }
00121 }
00122 }
00123 return XML_TOK_PARTIAL;
00124 }
00125
00126
00127
00128 static
00129 int PREFIX(scanDecl)(const ENCODING *enc, const char *ptr, const char *end,
00130 const char **nextTokPtr)
00131 {
00132 if (ptr == end)
00133 return XML_TOK_PARTIAL;
00134 switch (BYTE_TYPE(enc, ptr)) {
00135 case BT_MINUS:
00136 return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
00137 case BT_LSQB:
00138 *nextTokPtr = ptr + MINBPC(enc);
00139 return XML_TOK_COND_SECT_OPEN;
00140 case BT_NMSTRT:
00141 case BT_HEX:
00142 ptr += MINBPC(enc);
00143 break;
00144 default:
00145 *nextTokPtr = ptr;
00146 return XML_TOK_INVALID;
00147 }
00148 while (ptr != end) {
00149 switch (BYTE_TYPE(enc, ptr)) {
00150 case BT_PERCNT:
00151 if (ptr + MINBPC(enc) == end)
00152 return XML_TOK_PARTIAL;
00153
00154 switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) {
00155 case BT_S: case BT_CR: case BT_LF: case BT_PERCNT:
00156 *nextTokPtr = ptr;
00157 return XML_TOK_INVALID;
00158 }
00159
00160 case BT_S: case BT_CR: case BT_LF:
00161 *nextTokPtr = ptr;
00162 return XML_TOK_DECL_OPEN;
00163 case BT_NMSTRT:
00164 case BT_HEX:
00165 ptr += MINBPC(enc);
00166 break;
00167 default:
00168 *nextTokPtr = ptr;
00169 return XML_TOK_INVALID;
00170 }
00171 }
00172 return XML_TOK_PARTIAL;
00173 }
00174
00175 static
00176 int PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr, const char *end, int *tokPtr)
00177 {
00178 int upper = 0;
00179 *tokPtr = XML_TOK_PI;
00180 if (end - ptr != MINBPC(enc)*3)
00181 return 1;
00182 switch (BYTE_TO_ASCII(enc, ptr)) {
00183 case ASCII_x:
00184 break;
00185 case ASCII_X:
00186 upper = 1;
00187 break;
00188 default:
00189 return 1;
00190 }
00191 ptr += MINBPC(enc);
00192 switch (BYTE_TO_ASCII(enc, ptr)) {
00193 case ASCII_m:
00194 break;
00195 case ASCII_M:
00196 upper = 1;
00197 break;
00198 default:
00199 return 1;
00200 }
00201 ptr += MINBPC(enc);
00202 switch (BYTE_TO_ASCII(enc, ptr)) {
00203 case ASCII_l:
00204 break;
00205 case ASCII_L:
00206 upper = 1;
00207 break;
00208 default:
00209 return 1;
00210 }
00211 if (upper)
00212 return 0;
00213 *tokPtr = XML_TOK_XML_DECL;
00214 return 1;
00215 }
00216
00217
00218
00219 static
00220 int PREFIX(scanPi)(const ENCODING *enc, const char *ptr, const char *end,
00221 const char **nextTokPtr)
00222 {
00223 int tok;
00224 const char *target = ptr;
00225 if (ptr == end)
00226 return XML_TOK_PARTIAL;
00227 switch (BYTE_TYPE(enc, ptr)) {
00228 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
00229 default:
00230 *nextTokPtr = ptr;
00231 return XML_TOK_INVALID;
00232 }
00233 while (ptr != end) {
00234 switch (BYTE_TYPE(enc, ptr)) {
00235 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
00236 case BT_S: case BT_CR: case BT_LF:
00237 if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
00238 *nextTokPtr = ptr;
00239 return XML_TOK_INVALID;
00240 }
00241 ptr += MINBPC(enc);
00242 while (ptr != end) {
00243 switch (BYTE_TYPE(enc, ptr)) {
00244 INVALID_CASES(ptr, nextTokPtr)
00245 case BT_QUEST:
00246 ptr += MINBPC(enc);
00247 if (ptr == end)
00248 return XML_TOK_PARTIAL;
00249 if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
00250 *nextTokPtr = ptr + MINBPC(enc);
00251 return tok;
00252 }
00253 break;
00254 default:
00255 ptr += MINBPC(enc);
00256 break;
00257 }
00258 }
00259 return XML_TOK_PARTIAL;
00260 case BT_QUEST:
00261 if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
00262 *nextTokPtr = ptr;
00263 return XML_TOK_INVALID;
00264 }
00265 ptr += MINBPC(enc);
00266 if (ptr == end)
00267 return XML_TOK_PARTIAL;
00268 if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
00269 *nextTokPtr = ptr + MINBPC(enc);
00270 return tok;
00271 }
00272
00273 default:
00274 *nextTokPtr = ptr;
00275 return XML_TOK_INVALID;
00276 }
00277 }
00278 return XML_TOK_PARTIAL;
00279 }
00280
00281
00282 static
00283 int PREFIX(scanCdataSection)(const ENCODING *enc, const char *ptr, const char *end,
00284 const char **nextTokPtr)
00285 {
00286 static const char CDATA_LSQB[] = { ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, ASCII_LSQB };
00287 int i;
00288
00289 if (end - ptr < 6 * MINBPC(enc))
00290 return XML_TOK_PARTIAL;
00291 for (i = 0; i < 6; i++, ptr += MINBPC(enc)) {
00292 if (!CHAR_MATCHES(enc, ptr, CDATA_LSQB[i])) {
00293 *nextTokPtr = ptr;
00294 return XML_TOK_INVALID;
00295 }
00296 }
00297 *nextTokPtr = ptr;
00298 return XML_TOK_CDATA_SECT_OPEN;
00299 }
00300
00301 static
00302 int PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, const char *end,
00303 const char **nextTokPtr)
00304 {
00305 if (ptr == end)
00306 return XML_TOK_NONE;
00307 if (MINBPC(enc) > 1) {
00308 size_t n = end - ptr;
00309 if (n & (MINBPC(enc) - 1)) {
00310 n &= ~(MINBPC(enc) - 1);
00311 if (n == 0)
00312 return XML_TOK_PARTIAL;
00313 end = ptr + n;
00314 }
00315 }
00316 switch (BYTE_TYPE(enc, ptr)) {
00317 case BT_RSQB:
00318 ptr += MINBPC(enc);
00319 if (ptr == end)
00320 return XML_TOK_PARTIAL;
00321 if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))
00322 break;
00323 ptr += MINBPC(enc);
00324 if (ptr == end)
00325 return XML_TOK_PARTIAL;
00326 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
00327 ptr -= MINBPC(enc);
00328 break;
00329 }
00330 *nextTokPtr = ptr + MINBPC(enc);
00331 return XML_TOK_CDATA_SECT_CLOSE;
00332 case BT_CR:
00333 ptr += MINBPC(enc);
00334 if (ptr == end)
00335 return XML_TOK_PARTIAL;
00336 if (BYTE_TYPE(enc, ptr) == BT_LF)
00337 ptr += MINBPC(enc);
00338 *nextTokPtr = ptr;
00339 return XML_TOK_DATA_NEWLINE;
00340 case BT_LF:
00341 *nextTokPtr = ptr + MINBPC(enc);
00342 return XML_TOK_DATA_NEWLINE;
00343 INVALID_CASES(ptr, nextTokPtr)
00344 default:
00345 ptr += MINBPC(enc);
00346 break;
00347 }
00348 while (ptr != end) {
00349 switch (BYTE_TYPE(enc, ptr)) {
00350 #define LEAD_CASE(n) \
00351 case BT_LEAD ## n: \
00352 if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \
00353 *nextTokPtr = ptr; \
00354 return XML_TOK_DATA_CHARS; \
00355 } \
00356 ptr += n; \
00357 break;
00358 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
00359 #undef LEAD_CASE
00360 case BT_NONXML:
00361 case BT_MALFORM:
00362 case BT_TRAIL:
00363 case BT_CR:
00364 case BT_LF:
00365 case BT_RSQB:
00366 *nextTokPtr = ptr;
00367 return XML_TOK_DATA_CHARS;
00368 default:
00369 ptr += MINBPC(enc);
00370 break;
00371 }
00372 }
00373 *nextTokPtr = ptr;
00374 return XML_TOK_DATA_CHARS;
00375 }
00376
00377
00378
00379 static
00380 int PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr, const char *end,
00381 const char **nextTokPtr)
00382 {
00383 if (ptr == end)
00384 return XML_TOK_PARTIAL;
00385 switch (BYTE_TYPE(enc, ptr)) {
00386 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
00387 default:
00388 *nextTokPtr = ptr;
00389 return XML_TOK_INVALID;
00390 }
00391 while (ptr != end) {
00392 switch (BYTE_TYPE(enc, ptr)) {
00393 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
00394 case BT_S: case BT_CR: case BT_LF:
00395 for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
00396 switch (BYTE_TYPE(enc, ptr)) {
00397 case BT_S: case BT_CR: case BT_LF:
00398 break;
00399 case BT_GT:
00400 *nextTokPtr = ptr + MINBPC(enc);
00401 return XML_TOK_END_TAG;
00402 default:
00403 *nextTokPtr = ptr;
00404 return XML_TOK_INVALID;
00405 }
00406 }
00407 return XML_TOK_PARTIAL;
00408 #ifdef XML_NS
00409 case BT_COLON:
00410
00411 ptr += MINBPC(enc);
00412 break;
00413 #endif
00414 case BT_GT:
00415 *nextTokPtr = ptr + MINBPC(enc);
00416 return XML_TOK_END_TAG;
00417 default:
00418 *nextTokPtr = ptr;
00419 return XML_TOK_INVALID;
00420 }
00421 }
00422 return XML_TOK_PARTIAL;
00423 }
00424
00425
00426
00427 static
00428 int PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr, const char *end,
00429 const char **nextTokPtr)
00430 {
00431 if (ptr != end) {
00432 switch (BYTE_TYPE(enc, ptr)) {
00433 case BT_DIGIT:
00434 case BT_HEX:
00435 break;
00436 default:
00437 *nextTokPtr = ptr;
00438 return XML_TOK_INVALID;
00439 }
00440 for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
00441 switch (BYTE_TYPE(enc, ptr)) {
00442 case BT_DIGIT:
00443 case BT_HEX:
00444 break;
00445 case BT_SEMI:
00446 *nextTokPtr = ptr + MINBPC(enc);
00447 return XML_TOK_CHAR_REF;
00448 default:
00449 *nextTokPtr = ptr;
00450 return XML_TOK_INVALID;
00451 }
00452 }
00453 }
00454 return XML_TOK_PARTIAL;
00455 }
00456
00457
00458
00459 static
00460 int PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr, const char *end,
00461 const char **nextTokPtr)
00462 {
00463 if (ptr != end) {
00464 if (CHAR_MATCHES(enc, ptr, ASCII_x))
00465 return PREFIX(scanHexCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
00466 switch (BYTE_TYPE(enc, ptr)) {
00467 case BT_DIGIT:
00468 break;
00469 default:
00470 *nextTokPtr = ptr;
00471 return XML_TOK_INVALID;
00472 }
00473 for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
00474 switch (BYTE_TYPE(enc, ptr)) {
00475 case BT_DIGIT:
00476 break;
00477 case BT_SEMI:
00478 *nextTokPtr = ptr + MINBPC(enc);
00479 return XML_TOK_CHAR_REF;
00480 default:
00481 *nextTokPtr = ptr;
00482 return XML_TOK_INVALID;
00483 }
00484 }
00485 }
00486 return XML_TOK_PARTIAL;
00487 }
00488
00489
00490
00491 static
00492 int PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end,
00493 const char **nextTokPtr)
00494 {
00495 if (ptr == end)
00496 return XML_TOK_PARTIAL;
00497 switch (BYTE_TYPE(enc, ptr)) {
00498 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
00499 case BT_NUM:
00500 return PREFIX(scanCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
00501 default:
00502 *nextTokPtr = ptr;
00503 return XML_TOK_INVALID;
00504 }
00505 while (ptr != end) {
00506 switch (BYTE_TYPE(enc, ptr)) {
00507 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
00508 case BT_SEMI:
00509 *nextTokPtr = ptr + MINBPC(enc);
00510 return XML_TOK_ENTITY_REF;
00511 default:
00512 *nextTokPtr = ptr;
00513 return XML_TOK_INVALID;
00514 }
00515 }
00516 return XML_TOK_PARTIAL;
00517 }
00518
00519
00520
00521 static
00522 int PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
00523 const char **nextTokPtr)
00524 {
00525 #ifdef XML_NS
00526 int hadColon = 0;
00527 #endif
00528 while (ptr != end) {
00529 switch (BYTE_TYPE(enc, ptr)) {
00530 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
00531 #ifdef XML_NS
00532 case BT_COLON:
00533 if (hadColon) {
00534 *nextTokPtr = ptr;
00535 return XML_TOK_INVALID;
00536 }
00537 hadColon = 1;
00538 ptr += MINBPC(enc);
00539 if (ptr == end)
00540 return XML_TOK_PARTIAL;
00541 switch (BYTE_TYPE(enc, ptr)) {
00542 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
00543 default:
00544 *nextTokPtr = ptr;
00545 return XML_TOK_INVALID;
00546 }
00547 break;
00548 #endif
00549 case BT_S: case BT_CR: case BT_LF:
00550 for (;;) {
00551 int t;
00552
00553 ptr += MINBPC(enc);
00554 if (ptr == end)
00555 return XML_TOK_PARTIAL;
00556 t = BYTE_TYPE(enc, ptr);
00557 if (t == BT_EQUALS)
00558 break;
00559 switch (t) {
00560 case BT_S:
00561 case BT_LF:
00562 case BT_CR:
00563 break;
00564 default:
00565 *nextTokPtr = ptr;
00566 return XML_TOK_INVALID;
00567 }
00568 }
00569
00570 case BT_EQUALS:
00571 {
00572 int open;
00573 #ifdef XML_NS
00574 hadColon = 0;
00575 #endif
00576 for (;;) {
00577
00578 ptr += MINBPC(enc);
00579 if (ptr == end)
00580 return XML_TOK_PARTIAL;
00581 open = BYTE_TYPE(enc, ptr);
00582 if (open == BT_QUOT || open == BT_APOS)
00583 break;
00584 switch (open) {
00585 case BT_S:
00586 case BT_LF:
00587 case BT_CR:
00588 break;
00589 default:
00590 *nextTokPtr = ptr;
00591 return XML_TOK_INVALID;
00592 }
00593 }
00594 ptr += MINBPC(enc);
00595
00596 for (;;) {
00597 int t;
00598 if (ptr == end)
00599 return XML_TOK_PARTIAL;
00600 t = BYTE_TYPE(enc, ptr);
00601 if (t == open)
00602 break;
00603 switch (t) {
00604 INVALID_CASES(ptr, nextTokPtr)
00605 case BT_AMP:
00606 {
00607 int tok = PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, &ptr);
00608 if (tok <= 0) {
00609 if (tok == XML_TOK_INVALID)
00610 *nextTokPtr = ptr;
00611 return tok;
00612 }
00613 break;
00614 }
00615 case BT_LT:
00616 *nextTokPtr = ptr;
00617 return XML_TOK_INVALID;
00618 default:
00619 ptr += MINBPC(enc);
00620 break;
00621 }
00622 }
00623 ptr += MINBPC(enc);
00624 if (ptr == end)
00625 return XML_TOK_PARTIAL;
00626 switch (BYTE_TYPE(enc, ptr)) {
00627 case BT_S:
00628 case BT_CR:
00629 case BT_LF:
00630 break;
00631 case BT_SOL:
00632 goto sol;
00633 case BT_GT:
00634 goto gt;
00635 default:
00636 *nextTokPtr = ptr;
00637 return XML_TOK_INVALID;
00638 }
00639
00640 for (;;) {
00641 ptr += MINBPC(enc);
00642 if (ptr == end)
00643 return XML_TOK_PARTIAL;
00644 switch (BYTE_TYPE(enc, ptr)) {
00645 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
00646 case BT_S: case BT_CR: case BT_LF:
00647 continue;
00648 case BT_GT:
00649 gt:
00650 *nextTokPtr = ptr + MINBPC(enc);
00651 return XML_TOK_START_TAG_WITH_ATTS;
00652 case BT_SOL:
00653 sol:
00654 ptr += MINBPC(enc);
00655 if (ptr == end)
00656 return XML_TOK_PARTIAL;
00657 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
00658 *nextTokPtr = ptr;
00659 return XML_TOK_INVALID;
00660 }
00661 *nextTokPtr = ptr + MINBPC(enc);
00662 return XML_TOK_EMPTY_ELEMENT_WITH_ATTS;
00663 default:
00664 *nextTokPtr = ptr;
00665 return XML_TOK_INVALID;
00666 }
00667 break;
00668 }
00669 break;
00670 }
00671 default:
00672 *nextTokPtr = ptr;
00673 return XML_TOK_INVALID;
00674 }
00675 }
00676 return XML_TOK_PARTIAL;
00677 }
00678
00679
00680
00681 static
00682 int PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
00683 const char **nextTokPtr)
00684 {
00685 #ifdef XML_NS
00686 int hadColon;
00687 #endif
00688 if (ptr == end)
00689 return XML_TOK_PARTIAL;
00690 switch (BYTE_TYPE(enc, ptr)) {
00691 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
00692 case BT_EXCL:
00693 if ((ptr += MINBPC(enc)) == end)
00694 return XML_TOK_PARTIAL;
00695 switch (BYTE_TYPE(enc, ptr)) {
00696 case BT_MINUS:
00697 return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
00698 case BT_LSQB:
00699 return PREFIX(scanCdataSection)(enc, ptr + MINBPC(enc), end, nextTokPtr);
00700 }
00701 *nextTokPtr = ptr;
00702 return XML_TOK_INVALID;
00703 case BT_QUEST:
00704 return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
00705 case BT_SOL:
00706 return PREFIX(scanEndTag)(enc, ptr + MINBPC(enc), end, nextTokPtr);
00707 default:
00708 *nextTokPtr = ptr;
00709 return XML_TOK_INVALID;
00710 }
00711 #ifdef XML_NS
00712 hadColon = 0;
00713 #endif
00714
00715 while (ptr != end) {
00716 switch (BYTE_TYPE(enc, ptr)) {
00717 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
00718 #ifdef XML_NS
00719 case BT_COLON:
00720 if (hadColon) {
00721 *nextTokPtr = ptr;
00722 return XML_TOK_INVALID;
00723 }
00724 hadColon = 1;
00725 ptr += MINBPC(enc);
00726 if (ptr == end)
00727 return XML_TOK_PARTIAL;
00728 switch (BYTE_TYPE(enc, ptr)) {
00729 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
00730 default:
00731 *nextTokPtr = ptr;
00732 return XML_TOK_INVALID;
00733 }
00734 break;
00735 #endif
00736 case BT_S: case BT_CR: case BT_LF:
00737 {
00738 ptr += MINBPC(enc);
00739 while (ptr != end) {
00740 switch (BYTE_TYPE(enc, ptr)) {
00741 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
00742 case BT_GT:
00743 goto gt;
00744 case BT_SOL:
00745 goto sol;
00746 case BT_S: case BT_CR: case BT_LF:
00747 ptr += MINBPC(enc);
00748 continue;
00749 default:
00750 *nextTokPtr = ptr;
00751 return XML_TOK_INVALID;
00752 }
00753 return PREFIX(scanAtts)(enc, ptr, end, nextTokPtr);
00754 }
00755 return XML_TOK_PARTIAL;
00756 }
00757 case BT_GT:
00758 gt:
00759 *nextTokPtr = ptr + MINBPC(enc);
00760 return XML_TOK_START_TAG_NO_ATTS;
00761 case BT_SOL:
00762 sol:
00763 ptr += MINBPC(enc);
00764 if (ptr == end)
00765 return XML_TOK_PARTIAL;
00766 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
00767 *nextTokPtr = ptr;
00768 return XML_TOK_INVALID;
00769 }
00770 *nextTokPtr = ptr + MINBPC(enc);
00771 return XML_TOK_EMPTY_ELEMENT_NO_ATTS;
00772 default:
00773 *nextTokPtr = ptr;
00774 return XML_TOK_INVALID;
00775 }
00776 }
00777 return XML_TOK_PARTIAL;
00778 }
00779
00780 static
00781 int PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
00782 const char **nextTokPtr)
00783 {
00784 if (ptr == end)
00785 return XML_TOK_NONE;
00786 if (MINBPC(enc) > 1) {
00787 size_t n = end - ptr;
00788 if (n & (MINBPC(enc) - 1)) {
00789 n &= ~(MINBPC(enc) - 1);
00790 if (n == 0)
00791 return XML_TOK_PARTIAL;
00792 end = ptr + n;
00793 }
00794 }
00795 switch (BYTE_TYPE(enc, ptr)) {
00796 case BT_LT:
00797 return PREFIX(scanLt)(enc, ptr + MINBPC(enc), end, nextTokPtr);
00798 case BT_AMP:
00799 return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
00800 case BT_CR:
00801 ptr += MINBPC(enc);
00802 if (ptr == end)
00803 return XML_TOK_TRAILING_CR;
00804 if (BYTE_TYPE(enc, ptr) == BT_LF)
00805 ptr += MINBPC(enc);
00806 *nextTokPtr = ptr;
00807 return XML_TOK_DATA_NEWLINE;
00808 case BT_LF:
00809 *nextTokPtr = ptr + MINBPC(enc);
00810 return XML_TOK_DATA_NEWLINE;
00811 case BT_RSQB:
00812 ptr += MINBPC(enc);
00813 if (ptr == end)
00814 return XML_TOK_TRAILING_RSQB;
00815 if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))
00816 break;
00817 ptr += MINBPC(enc);
00818 if (ptr == end)
00819 return XML_TOK_TRAILING_RSQB;
00820 if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
00821 ptr -= MINBPC(enc);
00822 break;
00823 }
00824 *nextTokPtr = ptr;
00825 return XML_TOK_INVALID;
00826 INVALID_CASES(ptr, nextTokPtr)
00827 default:
00828 ptr += MINBPC(enc);
00829 break;
00830 }
00831 while (ptr != end) {
00832 switch (BYTE_TYPE(enc, ptr)) {
00833 #define LEAD_CASE(n) \
00834 case BT_LEAD ## n: \
00835 if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \
00836 *nextTokPtr = ptr; \
00837 return XML_TOK_DATA_CHARS; \
00838 } \
00839 ptr += n; \
00840 break;
00841 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
00842 #undef LEAD_CASE
00843 case BT_RSQB:
00844 if (ptr + MINBPC(enc) != end) {
00845 if (!CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) {
00846 ptr += MINBPC(enc);
00847 break;
00848 }
00849 if (ptr + 2*MINBPC(enc) != end) {
00850 if (!CHAR_MATCHES(enc, ptr + 2*MINBPC(enc), ASCII_GT)) {
00851 ptr += MINBPC(enc);
00852 break;
00853 }
00854 *nextTokPtr = ptr + 2*MINBPC(enc);
00855 return XML_TOK_INVALID;
00856 }
00857 }
00858
00859 case BT_AMP:
00860 case BT_LT:
00861 case BT_NONXML:
00862 case BT_MALFORM:
00863 case BT_TRAIL:
00864 case BT_CR:
00865 case BT_LF:
00866 *nextTokPtr = ptr;
00867 return XML_TOK_DATA_CHARS;
00868 default:
00869 ptr += MINBPC(enc);
00870 break;
00871 }
00872 }
00873 *nextTokPtr = ptr;
00874 return XML_TOK_DATA_CHARS;
00875 }
00876
00877
00878
00879 static
00880 int PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end,
00881 const char **nextTokPtr)
00882 {
00883 if (ptr == end)
00884 return XML_TOK_PARTIAL;
00885 switch (BYTE_TYPE(enc, ptr)) {
00886 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
00887 case BT_S: case BT_LF: case BT_CR: case BT_PERCNT:
00888 *nextTokPtr = ptr;
00889 return XML_TOK_PERCENT;
00890 default:
00891 *nextTokPtr = ptr;
00892 return XML_TOK_INVALID;
00893 }
00894 while (ptr != end) {
00895 switch (BYTE_TYPE(enc, ptr)) {
00896 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
00897 case BT_SEMI:
00898 *nextTokPtr = ptr + MINBPC(enc);
00899 return XML_TOK_PARAM_ENTITY_REF;
00900 default:
00901 *nextTokPtr = ptr;
00902 return XML_TOK_INVALID;
00903 }
00904 }
00905 return XML_TOK_PARTIAL;
00906 }
00907
00908 static
00909 int PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end,
00910 const char **nextTokPtr)
00911 {
00912 if (ptr == end)
00913 return XML_TOK_PARTIAL;
00914 switch (BYTE_TYPE(enc, ptr)) {
00915 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
00916 default:
00917 *nextTokPtr = ptr;
00918 return XML_TOK_INVALID;
00919 }
00920 while (ptr != end) {
00921 switch (BYTE_TYPE(enc, ptr)) {
00922 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
00923 case BT_CR: case BT_LF: case BT_S:
00924 case BT_RPAR: case BT_GT: case BT_PERCNT: case BT_VERBAR:
00925 *nextTokPtr = ptr;
00926 return XML_TOK_POUND_NAME;
00927 default:
00928 *nextTokPtr = ptr;
00929 return XML_TOK_INVALID;
00930 }
00931 }
00932 return -XML_TOK_POUND_NAME;
00933 }
00934
00935 static
00936 int PREFIX(scanLit)(int open, const ENCODING *enc,
00937 const char *ptr, const char *end,
00938 const char **nextTokPtr)
00939 {
00940 while (ptr != end) {
00941 int t = BYTE_TYPE(enc, ptr);
00942 switch (t) {
00943 INVALID_CASES(ptr, nextTokPtr)
00944 case BT_QUOT:
00945 case BT_APOS:
00946 ptr += MINBPC(enc);
00947 if (t != open)
00948 break;
00949 if (ptr == end)
00950 return -XML_TOK_LITERAL;
00951 *nextTokPtr = ptr;
00952 switch (BYTE_TYPE(enc, ptr)) {
00953 case BT_S: case BT_CR: case BT_LF:
00954 case BT_GT: case BT_PERCNT: case BT_LSQB:
00955 return XML_TOK_LITERAL;
00956 default:
00957 return XML_TOK_INVALID;
00958 }
00959 default:
00960 ptr += MINBPC(enc);
00961 break;
00962 }
00963 }
00964 return XML_TOK_PARTIAL;
00965 }
00966
00967 static
00968 int PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
00969 const char **nextTokPtr)
00970 {
00971 int tok;
00972 if (ptr == end)
00973 return XML_TOK_NONE;
00974 if (MINBPC(enc) > 1) {
00975 size_t n = end - ptr;
00976 if (n & (MINBPC(enc) - 1)) {
00977 n &= ~(MINBPC(enc) - 1);
00978 if (n == 0)
00979 return XML_TOK_PARTIAL;
00980 end = ptr + n;
00981 }
00982 }
00983 switch (BYTE_TYPE(enc, ptr)) {
00984 case BT_QUOT:
00985 return PREFIX(scanLit)(BT_QUOT, enc, ptr + MINBPC(enc), end, nextTokPtr);
00986 case BT_APOS:
00987 return PREFIX(scanLit)(BT_APOS, enc, ptr + MINBPC(enc), end, nextTokPtr);
00988 case BT_LT:
00989 {
00990 ptr += MINBPC(enc);
00991 if (ptr == end)
00992 return XML_TOK_PARTIAL;
00993 switch (BYTE_TYPE(enc, ptr)) {
00994 case BT_EXCL:
00995 return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr);
00996 case BT_QUEST:
00997 return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
00998 case BT_NMSTRT:
00999 case BT_HEX:
01000 case BT_NONASCII:
01001 case BT_LEAD2:
01002 case BT_LEAD3:
01003 case BT_LEAD4:
01004 *nextTokPtr = ptr - MINBPC(enc);
01005 return XML_TOK_INSTANCE_START;
01006 }
01007 *nextTokPtr = ptr;
01008 return XML_TOK_INVALID;
01009 }
01010 case BT_CR:
01011 if (ptr + MINBPC(enc) == end)
01012 return -XML_TOK_PROLOG_S;
01013
01014 case BT_S: case BT_LF:
01015 for (;;) {
01016 ptr += MINBPC(enc);
01017 if (ptr == end)
01018 break;
01019 switch (BYTE_TYPE(enc, ptr)) {
01020 case BT_S: case BT_LF:
01021 break;
01022 case BT_CR:
01023
01024 if (ptr + MINBPC(enc) != end)
01025 break;
01026
01027 default:
01028 *nextTokPtr = ptr;
01029 return XML_TOK_PROLOG_S;
01030 }
01031 }
01032 *nextTokPtr = ptr;
01033 return XML_TOK_PROLOG_S;
01034 case BT_PERCNT:
01035 return PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr);
01036 case BT_COMMA:
01037 *nextTokPtr = ptr + MINBPC(enc);
01038 return XML_TOK_COMMA;
01039 case BT_LSQB:
01040 *nextTokPtr = ptr + MINBPC(enc);
01041 return XML_TOK_OPEN_BRACKET;
01042 case BT_RSQB:
01043 ptr += MINBPC(enc);
01044 if (ptr == end)
01045 return -XML_TOK_CLOSE_BRACKET;
01046 if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
01047 if (ptr + MINBPC(enc) == end)
01048 return XML_TOK_PARTIAL;
01049 if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) {
01050 *nextTokPtr = ptr + 2*MINBPC(enc);
01051 return XML_TOK_COND_SECT_CLOSE;
01052 }
01053 }
01054 *nextTokPtr = ptr;
01055 return XML_TOK_CLOSE_BRACKET;
01056 case BT_LPAR:
01057 *nextTokPtr = ptr + MINBPC(enc);
01058 return XML_TOK_OPEN_PAREN;
01059 case BT_RPAR:
01060 ptr += MINBPC(enc);
01061 if (ptr == end)
01062 return -XML_TOK_CLOSE_PAREN;
01063 switch (BYTE_TYPE(enc, ptr)) {
01064 case BT_AST:
01065 *nextTokPtr = ptr + MINBPC(enc);
01066 return XML_TOK_CLOSE_PAREN_ASTERISK;
01067 case BT_QUEST:
01068 *nextTokPtr = ptr + MINBPC(enc);
01069 return XML_TOK_CLOSE_PAREN_QUESTION;
01070 case BT_PLUS:
01071 *nextTokPtr = ptr + MINBPC(enc);
01072 return XML_TOK_CLOSE_PAREN_PLUS;
01073 case BT_CR: case BT_LF: case BT_S:
01074 case BT_GT: case BT_COMMA: case BT_VERBAR:
01075 case BT_RPAR:
01076 *nextTokPtr = ptr;
01077 return XML_TOK_CLOSE_PAREN;
01078 }
01079 *nextTokPtr = ptr;
01080 return XML_TOK_INVALID;
01081 case BT_VERBAR:
01082 *nextTokPtr = ptr + MINBPC(enc);
01083 return XML_TOK_OR;
01084 case BT_GT:
01085 *nextTokPtr = ptr + MINBPC(enc);
01086 return XML_TOK_DECL_CLOSE;
01087 case BT_NUM:
01088 return PREFIX(scanPoundName)(enc, ptr + MINBPC(enc), end, nextTokPtr);
01089 #define LEAD_CASE(n) \
01090 case BT_LEAD ## n: \
01091 if (end - ptr < n) \
01092 return XML_TOK_PARTIAL_CHAR; \
01093 if (IS_NMSTRT_CHAR(enc, ptr, n)) { \
01094 ptr += n; \
01095 tok = XML_TOK_NAME; \
01096 break; \
01097 } \
01098 if (IS_NAME_CHAR(enc, ptr, n)) { \
01099 ptr += n; \
01100 tok = XML_TOK_NMTOKEN; \
01101 break; \
01102 } \
01103 *nextTokPtr = ptr; \
01104 return XML_TOK_INVALID;
01105 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
01106 #undef LEAD_CASE
01107 case BT_NMSTRT:
01108 case BT_HEX:
01109 tok = XML_TOK_NAME;
01110 ptr += MINBPC(enc);
01111 break;
01112 case BT_DIGIT:
01113 case BT_NAME:
01114 case BT_MINUS:
01115 #ifdef XML_NS
01116 case BT_COLON:
01117 #endif
01118 tok = XML_TOK_NMTOKEN;
01119 ptr += MINBPC(enc);
01120 break;
01121 case BT_NONASCII:
01122 if (IS_NMSTRT_CHAR_MINBPC(enc, ptr)) {
01123 ptr += MINBPC(enc);
01124 tok = XML_TOK_NAME;
01125 break;
01126 }
01127 if (IS_NAME_CHAR_MINBPC(enc, ptr)) {
01128 ptr += MINBPC(enc);
01129 tok = XML_TOK_NMTOKEN;
01130 break;
01131 }
01132
01133 default:
01134 *nextTokPtr = ptr;
01135 return XML_TOK_INVALID;
01136 }
01137 while (ptr != end) {
01138 switch (BYTE_TYPE(enc, ptr)) {
01139 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
01140 case BT_GT: case BT_RPAR: case BT_COMMA:
01141 case BT_VERBAR: case BT_LSQB: case BT_PERCNT:
01142 case BT_S: case BT_CR: case BT_LF:
01143 *nextTokPtr = ptr;
01144 return tok;
01145 #ifdef XML_NS
01146 case BT_COLON:
01147 ptr += MINBPC(enc);
01148 switch (tok) {
01149 case XML_TOK_NAME:
01150 if (ptr == end)
01151 return XML_TOK_PARTIAL;
01152 tok = XML_TOK_PREFIXED_NAME;
01153 switch (BYTE_TYPE(enc, ptr)) {
01154 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
01155 default:
01156 tok = XML_TOK_NMTOKEN;
01157 break;
01158 }
01159 break;
01160 case XML_TOK_PREFIXED_NAME:
01161 tok = XML_TOK_NMTOKEN;
01162 break;
01163 }
01164 break;
01165 #endif
01166 case BT_PLUS:
01167 if (tok == XML_TOK_NMTOKEN) {
01168 *nextTokPtr = ptr;
01169 return XML_TOK_INVALID;
01170 }
01171 *nextTokPtr = ptr + MINBPC(enc);
01172 return XML_TOK_NAME_PLUS;
01173 case BT_AST:
01174 if (tok == XML_TOK_NMTOKEN) {
01175 *nextTokPtr = ptr;
01176 return XML_TOK_INVALID;
01177 }
01178 *nextTokPtr = ptr + MINBPC(enc);
01179 return XML_TOK_NAME_ASTERISK;
01180 case BT_QUEST:
01181 if (tok == XML_TOK_NMTOKEN) {
01182 *nextTokPtr = ptr;
01183 return XML_TOK_INVALID;
01184 }
01185 *nextTokPtr = ptr + MINBPC(enc);
01186 return XML_TOK_NAME_QUESTION;
01187 default:
01188 *nextTokPtr = ptr;
01189 return XML_TOK_INVALID;
01190 }
01191 }
01192 return -tok;
01193 }
01194
01195 static
01196 int PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, const char *end,
01197 const char **nextTokPtr)
01198 {
01199 const char *start;
01200 if (ptr == end)
01201 return XML_TOK_NONE;
01202 start = ptr;
01203 while (ptr != end) {
01204 switch (BYTE_TYPE(enc, ptr)) {
01205 #define LEAD_CASE(n) \
01206 case BT_LEAD ## n: ptr += n; break;
01207 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
01208 #undef LEAD_CASE
01209 case BT_AMP:
01210 if (ptr == start)
01211 return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
01212 *nextTokPtr = ptr;
01213 return XML_TOK_DATA_CHARS;
01214 case BT_LT:
01215
01216 *nextTokPtr = ptr;
01217 return XML_TOK_INVALID;
01218 case BT_LF:
01219 if (ptr == start) {
01220 *nextTokPtr = ptr + MINBPC(enc);
01221 return XML_TOK_DATA_NEWLINE;
01222 }
01223 *nextTokPtr = ptr;
01224 return XML_TOK_DATA_CHARS;
01225 case BT_CR:
01226 if (ptr == start) {
01227 ptr += MINBPC(enc);
01228 if (ptr == end)
01229 return XML_TOK_TRAILING_CR;
01230 if (BYTE_TYPE(enc, ptr) == BT_LF)
01231 ptr += MINBPC(enc);
01232 *nextTokPtr = ptr;
01233 return XML_TOK_DATA_NEWLINE;
01234 }
01235 *nextTokPtr = ptr;
01236 return XML_TOK_DATA_CHARS;
01237 case BT_S:
01238 if (ptr == start) {
01239 *nextTokPtr = ptr + MINBPC(enc);
01240 return XML_TOK_ATTRIBUTE_VALUE_S;
01241 }
01242 *nextTokPtr = ptr;
01243 return XML_TOK_DATA_CHARS;
01244 default:
01245 ptr += MINBPC(enc);
01246 break;
01247 }
01248 }
01249 *nextTokPtr = ptr;
01250 return XML_TOK_DATA_CHARS;
01251 }
01252
01253 static
01254 int PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, const char *end,
01255 const char **nextTokPtr)
01256 {
01257 const char *start;
01258 if (ptr == end)
01259 return XML_TOK_NONE;
01260 start = ptr;
01261 while (ptr != end) {
01262 switch (BYTE_TYPE(enc, ptr)) {
01263 #define LEAD_CASE(n) \
01264 case BT_LEAD ## n: ptr += n; break;
01265 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
01266 #undef LEAD_CASE
01267 case BT_AMP:
01268 if (ptr == start)
01269 return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
01270 *nextTokPtr = ptr;
01271 return XML_TOK_DATA_CHARS;
01272 case BT_PERCNT:
01273 if (ptr == start) {
01274 int tok = PREFIX(scanPercent)(enc, ptr + MINBPC(enc),
01275 end, nextTokPtr);
01276 return (tok == XML_TOK_PERCENT) ? XML_TOK_INVALID : tok;
01277 }
01278 *nextTokPtr = ptr;
01279 return XML_TOK_DATA_CHARS;
01280 case BT_LF:
01281 if (ptr == start) {
01282 *nextTokPtr = ptr + MINBPC(enc);
01283 return XML_TOK_DATA_NEWLINE;
01284 }
01285 *nextTokPtr = ptr;
01286 return XML_TOK_DATA_CHARS;
01287 case BT_CR:
01288 if (ptr == start) {
01289 ptr += MINBPC(enc);
01290 if (ptr == end)
01291 return XML_TOK_TRAILING_CR;
01292 if (BYTE_TYPE(enc, ptr) == BT_LF)
01293 ptr += MINBPC(enc);
01294 *nextTokPtr = ptr;
01295 return XML_TOK_DATA_NEWLINE;
01296 }
01297 *nextTokPtr = ptr;
01298 return XML_TOK_DATA_CHARS;
01299 default:
01300 ptr += MINBPC(enc);
01301 break;
01302 }
01303 }
01304 *nextTokPtr = ptr;
01305 return XML_TOK_DATA_CHARS;
01306 }
01307
01308 #ifdef XML_DTD
01309
01310 static
01311 int PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr, const char *end,
01312 const char **nextTokPtr)
01313 {
01314 int level = 0;
01315 if (MINBPC(enc) > 1) {
01316 size_t n = end - ptr;
01317 if (n & (MINBPC(enc) - 1)) {
01318 n &= ~(MINBPC(enc) - 1);
01319 end = ptr + n;
01320 }
01321 }
01322 while (ptr != end) {
01323 switch (BYTE_TYPE(enc, ptr)) {
01324 INVALID_CASES(ptr, nextTokPtr)
01325 case BT_LT:
01326 if ((ptr += MINBPC(enc)) == end)
01327 return XML_TOK_PARTIAL;
01328 if (CHAR_MATCHES(enc, ptr, ASCII_EXCL)) {
01329 if ((ptr += MINBPC(enc)) == end)
01330 return XML_TOK_PARTIAL;
01331 if (CHAR_MATCHES(enc, ptr, ASCII_LSQB)) {
01332 ++level;
01333 ptr += MINBPC(enc);
01334 }
01335 }
01336 break;
01337 case BT_RSQB:
01338 if ((ptr += MINBPC(enc)) == end)
01339 return XML_TOK_PARTIAL;
01340 if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
01341 if ((ptr += MINBPC(enc)) == end)
01342 return XML_TOK_PARTIAL;
01343 if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
01344 ptr += MINBPC(enc);
01345 if (level == 0) {
01346 *nextTokPtr = ptr;
01347 return XML_TOK_IGNORE_SECT;
01348 }
01349 --level;
01350 }
01351 }
01352 break;
01353 default:
01354 ptr += MINBPC(enc);
01355 break;
01356 }
01357 }
01358 return XML_TOK_PARTIAL;
01359 }
01360
01361 #endif
01362
01363 static
01364 int PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end,
01365 const char **badPtr)
01366 {
01367 ptr += MINBPC(enc);
01368 end -= MINBPC(enc);
01369 for (; ptr != end; ptr += MINBPC(enc)) {
01370 switch (BYTE_TYPE(enc, ptr)) {
01371 case BT_DIGIT:
01372 case BT_HEX:
01373 case BT_MINUS:
01374 case BT_APOS:
01375 case BT_LPAR:
01376 case BT_RPAR:
01377 case BT_PLUS:
01378 case BT_COMMA:
01379 case BT_SOL:
01380 case BT_EQUALS:
01381 case BT_QUEST:
01382 case BT_CR:
01383 case BT_LF:
01384 case BT_SEMI:
01385 case BT_EXCL:
01386 case BT_AST:
01387 case BT_PERCNT:
01388 case BT_NUM:
01389 #ifdef XML_NS
01390 case BT_COLON:
01391 #endif
01392 break;
01393 case BT_S:
01394 if (CHAR_MATCHES(enc, ptr, ASCII_TAB)) {
01395 *badPtr = ptr;
01396 return 0;
01397 }
01398 break;
01399 case BT_NAME:
01400 case BT_NMSTRT:
01401 if (!(BYTE_TO_ASCII(enc, ptr) & ~0x7f))
01402 break;
01403 default:
01404 switch (BYTE_TO_ASCII(enc, ptr)) {
01405 case 0x24:
01406 case 0x40:
01407 break;
01408 default:
01409 *badPtr = ptr;
01410 return 0;
01411 }
01412 break;
01413 }
01414 }
01415 return 1;
01416 }
01417
01418
01419
01420
01421
01422 static
01423 int PREFIX(getAtts)(const ENCODING *enc, const char *ptr,
01424 int attsMax, ATTRIBUTE *atts)
01425 {
01426 enum { other, inName, inValue } state = inName;
01427 int nAtts = 0;
01428 int open = 0;
01429
01430
01431 for (ptr += MINBPC(enc);; ptr += MINBPC(enc)) {
01432 switch (BYTE_TYPE(enc, ptr)) {
01433 #define START_NAME \
01434 if (state == other) { \
01435 if (nAtts < attsMax) { \
01436 atts[nAtts].name = ptr; \
01437 atts[nAtts].normalized = 1; \
01438 } \
01439 state = inName; \
01440 }
01441 #define LEAD_CASE(n) \
01442 case BT_LEAD ## n: START_NAME ptr += (n - MINBPC(enc)); break;
01443 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
01444 #undef LEAD_CASE
01445 case BT_NONASCII:
01446 case BT_NMSTRT:
01447 case BT_HEX:
01448 START_NAME
01449 break;
01450 #undef START_NAME
01451 case BT_QUOT:
01452 if (state != inValue) {
01453 if (nAtts < attsMax)
01454 atts[nAtts].valuePtr = ptr + MINBPC(enc);
01455 state = inValue;
01456 open = BT_QUOT;
01457 }
01458 else if (open == BT_QUOT) {
01459 state = other;
01460 if (nAtts < attsMax)
01461 atts[nAtts].valueEnd = ptr;
01462 nAtts++;
01463 }
01464 break;
01465 case BT_APOS:
01466 if (state != inValue) {
01467 if (nAtts < attsMax)
01468 atts[nAtts].valuePtr = ptr + MINBPC(enc);
01469 state = inValue;
01470 open = BT_APOS;
01471 }
01472 else if (open == BT_APOS) {
01473 state = other;
01474 if (nAtts < attsMax)
01475 atts[nAtts].valueEnd = ptr;
01476 nAtts++;
01477 }
01478 break;
01479 case BT_AMP:
01480 if (nAtts < attsMax)
01481 atts[nAtts].normalized = 0;
01482 break;
01483 case BT_S:
01484 if (state == inName)
01485 state = other;
01486 else if (state == inValue
01487 && nAtts < attsMax
01488 && atts[nAtts].normalized
01489 && (ptr == atts[nAtts].valuePtr
01490 || BYTE_TO_ASCII(enc, ptr) != ASCII_SPACE
01491 || BYTE_TO_ASCII(enc, ptr + MINBPC(enc)) == ASCII_SPACE
01492 || BYTE_TYPE(enc, ptr + MINBPC(enc)) == open))
01493 atts[nAtts].normalized = 0;
01494 break;
01495 case BT_CR: case BT_LF:
01496
01497
01498 if (state == inName)
01499 state = other;
01500 else if (state == inValue && nAtts < attsMax)
01501 atts[nAtts].normalized = 0;
01502 break;
01503 case BT_GT:
01504 case BT_SOL:
01505 if (state != inValue)
01506 return nAtts;
01507 break;
01508 default:
01509 break;
01510 }
01511 }
01512
01513 }
01514
01515 static
01516 int PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr)
01517 {
01518 int result = 0;
01519
01520 ptr += 2*MINBPC(enc);
01521 if (CHAR_MATCHES(enc, ptr, ASCII_x)) {
01522 for (ptr += MINBPC(enc); !CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) {
01523 int c = BYTE_TO_ASCII(enc, ptr);
01524 switch (c) {
01525 case ASCII_0: case ASCII_1: case ASCII_2: case ASCII_3: case ASCII_4:
01526 case ASCII_5: case ASCII_6: case ASCII_7: case ASCII_8: case ASCII_9:
01527 result <<= 4;
01528 result |= (c - ASCII_0);
01529 break;
01530 case ASCII_A: case ASCII_B: case ASCII_C: case ASCII_D: case ASCII_E: case ASCII_F:
01531 result <<= 4;
01532 result += 10 + (c - ASCII_A);
01533 break;
01534 case ASCII_a: case ASCII_b: case ASCII_c: case ASCII_d: case ASCII_e: case ASCII_f:
01535 result <<= 4;
01536 result += 10 + (c - ASCII_a);
01537 break;
01538 }
01539 if (result >= 0x110000)
01540 return -1;
01541 }
01542 }
01543 else {
01544 for (; !CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) {
01545 int c = BYTE_TO_ASCII(enc, ptr);
01546 result *= 10;
01547 result += (c - ASCII_0);
01548 if (result >= 0x110000)
01549 return -1;
01550 }
01551 }
01552 return checkCharRefNumber(result);
01553 }
01554
01555 static
01556 int PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr, const char *end)
01557 {
01558 switch ((end - ptr)/MINBPC(enc)) {
01559 case 2:
01560 if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_t)) {
01561 switch (BYTE_TO_ASCII(enc, ptr)) {
01562 case ASCII_l:
01563 return ASCII_LT;
01564 case ASCII_g:
01565 return ASCII_GT;
01566 }
01567 }
01568 break;
01569 case 3:
01570 if (CHAR_MATCHES(enc, ptr, ASCII_a)) {
01571 ptr += MINBPC(enc);
01572 if (CHAR_MATCHES(enc, ptr, ASCII_m)) {
01573 ptr += MINBPC(enc);
01574 if (CHAR_MATCHES(enc, ptr, ASCII_p))
01575 return ASCII_AMP;
01576 }
01577 }
01578 break;
01579 case 4:
01580 switch (BYTE_TO_ASCII(enc, ptr)) {
01581 case ASCII_q:
01582 ptr += MINBPC(enc);
01583 if (CHAR_MATCHES(enc, ptr, ASCII_u)) {
01584 ptr += MINBPC(enc);
01585 if (CHAR_MATCHES(enc, ptr, ASCII_o)) {
01586 ptr += MINBPC(enc);
01587 if (CHAR_MATCHES(enc, ptr, ASCII_t))
01588 return ASCII_QUOT;
01589 }
01590 }
01591 break;
01592 case ASCII_a:
01593 ptr += MINBPC(enc);
01594 if (CHAR_MATCHES(enc, ptr, ASCII_p)) {
01595 ptr += MINBPC(enc);
01596 if (CHAR_MATCHES(enc, ptr, ASCII_o)) {
01597 ptr += MINBPC(enc);
01598 if (CHAR_MATCHES(enc, ptr, ASCII_s))
01599 return ASCII_APOS;
01600 }
01601 }
01602 break;
01603 }
01604 }
01605 return 0;
01606 }
01607
01608 static
01609 int PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2)
01610 {
01611 for (;;) {
01612 switch (BYTE_TYPE(enc, ptr1)) {
01613 #define LEAD_CASE(n) \
01614 case BT_LEAD ## n: \
01615 if (*ptr1++ != *ptr2++) \
01616 return 0;
01617 LEAD_CASE(4) LEAD_CASE(3) LEAD_CASE(2)
01618 #undef LEAD_CASE
01619
01620 if (*ptr1++ != *ptr2++)
01621 return 0;
01622 break;
01623 case BT_NONASCII:
01624 case BT_NMSTRT:
01625 #ifdef XML_NS
01626 case BT_COLON:
01627 #endif
01628 case BT_HEX:
01629 case BT_DIGIT:
01630 case BT_NAME:
01631 case BT_MINUS:
01632 if (*ptr2++ != *ptr1++)
01633 return 0;
01634 if (MINBPC(enc) > 1) {
01635 if (*ptr2++ != *ptr1++)
01636 return 0;
01637 if (MINBPC(enc) > 2) {
01638 if (*ptr2++ != *ptr1++)
01639 return 0;
01640 if (MINBPC(enc) > 3) {
01641 if (*ptr2++ != *ptr1++)
01642 return 0;
01643 }
01644 }
01645 }
01646 break;
01647 default:
01648 if (MINBPC(enc) == 1 && *ptr1 == *ptr2)
01649 return 1;
01650 switch (BYTE_TYPE(enc, ptr2)) {
01651 case BT_LEAD2:
01652 case BT_LEAD3:
01653 case BT_LEAD4:
01654 case BT_NONASCII:
01655 case BT_NMSTRT:
01656 #ifdef XML_NS
01657 case BT_COLON:
01658 #endif
01659 case BT_HEX:
01660 case BT_DIGIT:
01661 case BT_NAME:
01662 case BT_MINUS:
01663 return 0;
01664 default:
01665 return 1;
01666 }
01667 }
01668 }
01669
01670 }
01671
01672 static
01673 int PREFIX(nameMatchesAscii)(const ENCODING *enc, const char *ptr1,
01674 const char *end1, const char *ptr2)
01675 {
01676 for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) {
01677 if (ptr1 == end1)
01678 return 0;
01679 if (!CHAR_MATCHES(enc, ptr1, *ptr2))
01680 return 0;
01681 }
01682 return ptr1 == end1;
01683 }
01684
01685 static
01686 int PREFIX(nameLength)(const ENCODING *enc, const char *ptr)
01687 {
01688 const char *start = ptr;
01689 for (;;) {
01690 switch (BYTE_TYPE(enc, ptr)) {
01691 #define LEAD_CASE(n) \
01692 case BT_LEAD ## n: ptr += n; break;
01693 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
01694 #undef LEAD_CASE
01695 case BT_NONASCII:
01696 case BT_NMSTRT:
01697 #ifdef XML_NS
01698 case BT_COLON:
01699 #endif
01700 case BT_HEX:
01701 case BT_DIGIT:
01702 case BT_NAME:
01703 case BT_MINUS:
01704 ptr += MINBPC(enc);
01705 break;
01706 default:
01707 return ptr - start;
01708 }
01709 }
01710 }
01711
01712 static
01713 const char *PREFIX(skipS)(const ENCODING *enc, const char *ptr)
01714 {
01715 for (;;) {
01716 switch (BYTE_TYPE(enc, ptr)) {
01717 case BT_LF:
01718 case BT_CR:
01719 case BT_S:
01720 ptr += MINBPC(enc);
01721 break;
01722 default:
01723 return ptr;
01724 }
01725 }
01726 }
01727
01728 static
01729 void PREFIX(updatePosition)(const ENCODING *enc,
01730 const char *ptr,
01731 const char *end,
01732 POSITION *pos)
01733 {
01734 while (ptr != end) {
01735 switch (BYTE_TYPE(enc, ptr)) {
01736 #define LEAD_CASE(n) \
01737 case BT_LEAD ## n: \
01738 ptr += n; \
01739 break;
01740 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
01741 #undef LEAD_CASE
01742 case BT_LF:
01743 pos->columnNumber = (unsigned)-1;
01744 pos->lineNumber++;
01745 ptr += MINBPC(enc);
01746 break;
01747 case BT_CR:
01748 pos->lineNumber++;
01749 ptr += MINBPC(enc);
01750 if (ptr != end && BYTE_TYPE(enc, ptr) == BT_LF)
01751 ptr += MINBPC(enc);
01752 pos->columnNumber = (unsigned)-1;
01753 break;
01754 default:
01755 ptr += MINBPC(enc);
01756 break;
01757 }
01758 pos->columnNumber++;
01759 }
01760 }
01761
01762 #undef DO_LEAD_CASE
01763 #undef MULTIBYTE_CASES
01764 #undef INVALID_CASES
01765 #undef CHECK_NAME_CASE
01766 #undef CHECK_NAME_CASES
01767 #undef CHECK_NMSTRT_CASE
01768 #undef CHECK_NMSTRT_CASES