Annotation of embedaddon/expat/lib/xmltok_impl.c, revision 1.1
1.1 ! misho 1: /* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
! 2: See the file COPYING for copying permission.
! 3: */
! 4:
! 5: /* This file is included! */
! 6: #ifdef XML_TOK_IMPL_C
! 7:
! 8: #ifndef IS_INVALID_CHAR
! 9: #define IS_INVALID_CHAR(enc, ptr, n) (0)
! 10: #endif
! 11:
! 12: #define INVALID_LEAD_CASE(n, ptr, nextTokPtr) \
! 13: case BT_LEAD ## n: \
! 14: if (end - ptr < n) \
! 15: return XML_TOK_PARTIAL_CHAR; \
! 16: if (IS_INVALID_CHAR(enc, ptr, n)) { \
! 17: *(nextTokPtr) = (ptr); \
! 18: return XML_TOK_INVALID; \
! 19: } \
! 20: ptr += n; \
! 21: break;
! 22:
! 23: #define INVALID_CASES(ptr, nextTokPtr) \
! 24: INVALID_LEAD_CASE(2, ptr, nextTokPtr) \
! 25: INVALID_LEAD_CASE(3, ptr, nextTokPtr) \
! 26: INVALID_LEAD_CASE(4, ptr, nextTokPtr) \
! 27: case BT_NONXML: \
! 28: case BT_MALFORM: \
! 29: case BT_TRAIL: \
! 30: *(nextTokPtr) = (ptr); \
! 31: return XML_TOK_INVALID;
! 32:
! 33: #define CHECK_NAME_CASE(n, enc, ptr, end, nextTokPtr) \
! 34: case BT_LEAD ## n: \
! 35: if (end - ptr < n) \
! 36: return XML_TOK_PARTIAL_CHAR; \
! 37: if (!IS_NAME_CHAR(enc, ptr, n)) { \
! 38: *nextTokPtr = ptr; \
! 39: return XML_TOK_INVALID; \
! 40: } \
! 41: ptr += n; \
! 42: break;
! 43:
! 44: #define CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) \
! 45: case BT_NONASCII: \
! 46: if (!IS_NAME_CHAR_MINBPC(enc, ptr)) { \
! 47: *nextTokPtr = ptr; \
! 48: return XML_TOK_INVALID; \
! 49: } \
! 50: case BT_NMSTRT: \
! 51: case BT_HEX: \
! 52: case BT_DIGIT: \
! 53: case BT_NAME: \
! 54: case BT_MINUS: \
! 55: ptr += MINBPC(enc); \
! 56: break; \
! 57: CHECK_NAME_CASE(2, enc, ptr, end, nextTokPtr) \
! 58: CHECK_NAME_CASE(3, enc, ptr, end, nextTokPtr) \
! 59: CHECK_NAME_CASE(4, enc, ptr, end, nextTokPtr)
! 60:
! 61: #define CHECK_NMSTRT_CASE(n, enc, ptr, end, nextTokPtr) \
! 62: case BT_LEAD ## n: \
! 63: if (end - ptr < n) \
! 64: return XML_TOK_PARTIAL_CHAR; \
! 65: if (!IS_NMSTRT_CHAR(enc, ptr, n)) { \
! 66: *nextTokPtr = ptr; \
! 67: return XML_TOK_INVALID; \
! 68: } \
! 69: ptr += n; \
! 70: break;
! 71:
! 72: #define CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) \
! 73: case BT_NONASCII: \
! 74: if (!IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { \
! 75: *nextTokPtr = ptr; \
! 76: return XML_TOK_INVALID; \
! 77: } \
! 78: case BT_NMSTRT: \
! 79: case BT_HEX: \
! 80: ptr += MINBPC(enc); \
! 81: break; \
! 82: CHECK_NMSTRT_CASE(2, enc, ptr, end, nextTokPtr) \
! 83: CHECK_NMSTRT_CASE(3, enc, ptr, end, nextTokPtr) \
! 84: CHECK_NMSTRT_CASE(4, enc, ptr, end, nextTokPtr)
! 85:
! 86: #ifndef PREFIX
! 87: #define PREFIX(ident) ident
! 88: #endif
! 89:
! 90: /* ptr points to character following "<!-" */
! 91:
! 92: static int PTRCALL
! 93: PREFIX(scanComment)(const ENCODING *enc, const char *ptr,
! 94: const char *end, const char **nextTokPtr)
! 95: {
! 96: if (ptr != end) {
! 97: if (!CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
! 98: *nextTokPtr = ptr;
! 99: return XML_TOK_INVALID;
! 100: }
! 101: ptr += MINBPC(enc);
! 102: while (ptr != end) {
! 103: switch (BYTE_TYPE(enc, ptr)) {
! 104: INVALID_CASES(ptr, nextTokPtr)
! 105: case BT_MINUS:
! 106: if ((ptr += MINBPC(enc)) == end)
! 107: return XML_TOK_PARTIAL;
! 108: if (CHAR_MATCHES(enc, ptr, ASCII_MINUS)) {
! 109: if ((ptr += MINBPC(enc)) == end)
! 110: return XML_TOK_PARTIAL;
! 111: if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
! 112: *nextTokPtr = ptr;
! 113: return XML_TOK_INVALID;
! 114: }
! 115: *nextTokPtr = ptr + MINBPC(enc);
! 116: return XML_TOK_COMMENT;
! 117: }
! 118: break;
! 119: default:
! 120: ptr += MINBPC(enc);
! 121: break;
! 122: }
! 123: }
! 124: }
! 125: return XML_TOK_PARTIAL;
! 126: }
! 127:
! 128: /* ptr points to character following "<!" */
! 129:
! 130: static int PTRCALL
! 131: PREFIX(scanDecl)(const ENCODING *enc, const char *ptr,
! 132: const char *end, const char **nextTokPtr)
! 133: {
! 134: if (ptr == end)
! 135: return XML_TOK_PARTIAL;
! 136: switch (BYTE_TYPE(enc, ptr)) {
! 137: case BT_MINUS:
! 138: return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
! 139: case BT_LSQB:
! 140: *nextTokPtr = ptr + MINBPC(enc);
! 141: return XML_TOK_COND_SECT_OPEN;
! 142: case BT_NMSTRT:
! 143: case BT_HEX:
! 144: ptr += MINBPC(enc);
! 145: break;
! 146: default:
! 147: *nextTokPtr = ptr;
! 148: return XML_TOK_INVALID;
! 149: }
! 150: while (ptr != end) {
! 151: switch (BYTE_TYPE(enc, ptr)) {
! 152: case BT_PERCNT:
! 153: if (ptr + MINBPC(enc) == end)
! 154: return XML_TOK_PARTIAL;
! 155: /* don't allow <!ENTITY% foo "whatever"> */
! 156: switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) {
! 157: case BT_S: case BT_CR: case BT_LF: case BT_PERCNT:
! 158: *nextTokPtr = ptr;
! 159: return XML_TOK_INVALID;
! 160: }
! 161: /* fall through */
! 162: case BT_S: case BT_CR: case BT_LF:
! 163: *nextTokPtr = ptr;
! 164: return XML_TOK_DECL_OPEN;
! 165: case BT_NMSTRT:
! 166: case BT_HEX:
! 167: ptr += MINBPC(enc);
! 168: break;
! 169: default:
! 170: *nextTokPtr = ptr;
! 171: return XML_TOK_INVALID;
! 172: }
! 173: }
! 174: return XML_TOK_PARTIAL;
! 175: }
! 176:
! 177: static int PTRCALL
! 178: PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr,
! 179: const char *end, int *tokPtr)
! 180: {
! 181: int upper = 0;
! 182: *tokPtr = XML_TOK_PI;
! 183: if (end - ptr != MINBPC(enc)*3)
! 184: return 1;
! 185: switch (BYTE_TO_ASCII(enc, ptr)) {
! 186: case ASCII_x:
! 187: break;
! 188: case ASCII_X:
! 189: upper = 1;
! 190: break;
! 191: default:
! 192: return 1;
! 193: }
! 194: ptr += MINBPC(enc);
! 195: switch (BYTE_TO_ASCII(enc, ptr)) {
! 196: case ASCII_m:
! 197: break;
! 198: case ASCII_M:
! 199: upper = 1;
! 200: break;
! 201: default:
! 202: return 1;
! 203: }
! 204: ptr += MINBPC(enc);
! 205: switch (BYTE_TO_ASCII(enc, ptr)) {
! 206: case ASCII_l:
! 207: break;
! 208: case ASCII_L:
! 209: upper = 1;
! 210: break;
! 211: default:
! 212: return 1;
! 213: }
! 214: if (upper)
! 215: return 0;
! 216: *tokPtr = XML_TOK_XML_DECL;
! 217: return 1;
! 218: }
! 219:
! 220: /* ptr points to character following "<?" */
! 221:
! 222: static int PTRCALL
! 223: PREFIX(scanPi)(const ENCODING *enc, const char *ptr,
! 224: const char *end, const char **nextTokPtr)
! 225: {
! 226: int tok;
! 227: const char *target = ptr;
! 228: if (ptr == end)
! 229: return XML_TOK_PARTIAL;
! 230: switch (BYTE_TYPE(enc, ptr)) {
! 231: CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
! 232: default:
! 233: *nextTokPtr = ptr;
! 234: return XML_TOK_INVALID;
! 235: }
! 236: while (ptr != end) {
! 237: switch (BYTE_TYPE(enc, ptr)) {
! 238: CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
! 239: case BT_S: case BT_CR: case BT_LF:
! 240: if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
! 241: *nextTokPtr = ptr;
! 242: return XML_TOK_INVALID;
! 243: }
! 244: ptr += MINBPC(enc);
! 245: while (ptr != end) {
! 246: switch (BYTE_TYPE(enc, ptr)) {
! 247: INVALID_CASES(ptr, nextTokPtr)
! 248: case BT_QUEST:
! 249: ptr += MINBPC(enc);
! 250: if (ptr == end)
! 251: return XML_TOK_PARTIAL;
! 252: if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
! 253: *nextTokPtr = ptr + MINBPC(enc);
! 254: return tok;
! 255: }
! 256: break;
! 257: default:
! 258: ptr += MINBPC(enc);
! 259: break;
! 260: }
! 261: }
! 262: return XML_TOK_PARTIAL;
! 263: case BT_QUEST:
! 264: if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
! 265: *nextTokPtr = ptr;
! 266: return XML_TOK_INVALID;
! 267: }
! 268: ptr += MINBPC(enc);
! 269: if (ptr == end)
! 270: return XML_TOK_PARTIAL;
! 271: if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
! 272: *nextTokPtr = ptr + MINBPC(enc);
! 273: return tok;
! 274: }
! 275: /* fall through */
! 276: default:
! 277: *nextTokPtr = ptr;
! 278: return XML_TOK_INVALID;
! 279: }
! 280: }
! 281: return XML_TOK_PARTIAL;
! 282: }
! 283:
! 284: static int PTRCALL
! 285: PREFIX(scanCdataSection)(const ENCODING *enc, const char *ptr,
! 286: const char *end, const char **nextTokPtr)
! 287: {
! 288: static const char CDATA_LSQB[] = { ASCII_C, ASCII_D, ASCII_A,
! 289: ASCII_T, ASCII_A, ASCII_LSQB };
! 290: int i;
! 291: /* CDATA[ */
! 292: if (end - ptr < 6 * MINBPC(enc))
! 293: return XML_TOK_PARTIAL;
! 294: for (i = 0; i < 6; i++, ptr += MINBPC(enc)) {
! 295: if (!CHAR_MATCHES(enc, ptr, CDATA_LSQB[i])) {
! 296: *nextTokPtr = ptr;
! 297: return XML_TOK_INVALID;
! 298: }
! 299: }
! 300: *nextTokPtr = ptr;
! 301: return XML_TOK_CDATA_SECT_OPEN;
! 302: }
! 303:
! 304: static int PTRCALL
! 305: PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr,
! 306: const char *end, const char **nextTokPtr)
! 307: {
! 308: if (ptr == end)
! 309: return XML_TOK_NONE;
! 310: if (MINBPC(enc) > 1) {
! 311: size_t n = end - ptr;
! 312: if (n & (MINBPC(enc) - 1)) {
! 313: n &= ~(MINBPC(enc) - 1);
! 314: if (n == 0)
! 315: return XML_TOK_PARTIAL;
! 316: end = ptr + n;
! 317: }
! 318: }
! 319: switch (BYTE_TYPE(enc, ptr)) {
! 320: case BT_RSQB:
! 321: ptr += MINBPC(enc);
! 322: if (ptr == end)
! 323: return XML_TOK_PARTIAL;
! 324: if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))
! 325: break;
! 326: ptr += MINBPC(enc);
! 327: if (ptr == end)
! 328: return XML_TOK_PARTIAL;
! 329: if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
! 330: ptr -= MINBPC(enc);
! 331: break;
! 332: }
! 333: *nextTokPtr = ptr + MINBPC(enc);
! 334: return XML_TOK_CDATA_SECT_CLOSE;
! 335: case BT_CR:
! 336: ptr += MINBPC(enc);
! 337: if (ptr == end)
! 338: return XML_TOK_PARTIAL;
! 339: if (BYTE_TYPE(enc, ptr) == BT_LF)
! 340: ptr += MINBPC(enc);
! 341: *nextTokPtr = ptr;
! 342: return XML_TOK_DATA_NEWLINE;
! 343: case BT_LF:
! 344: *nextTokPtr = ptr + MINBPC(enc);
! 345: return XML_TOK_DATA_NEWLINE;
! 346: INVALID_CASES(ptr, nextTokPtr)
! 347: default:
! 348: ptr += MINBPC(enc);
! 349: break;
! 350: }
! 351: while (ptr != end) {
! 352: switch (BYTE_TYPE(enc, ptr)) {
! 353: #define LEAD_CASE(n) \
! 354: case BT_LEAD ## n: \
! 355: if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \
! 356: *nextTokPtr = ptr; \
! 357: return XML_TOK_DATA_CHARS; \
! 358: } \
! 359: ptr += n; \
! 360: break;
! 361: LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
! 362: #undef LEAD_CASE
! 363: case BT_NONXML:
! 364: case BT_MALFORM:
! 365: case BT_TRAIL:
! 366: case BT_CR:
! 367: case BT_LF:
! 368: case BT_RSQB:
! 369: *nextTokPtr = ptr;
! 370: return XML_TOK_DATA_CHARS;
! 371: default:
! 372: ptr += MINBPC(enc);
! 373: break;
! 374: }
! 375: }
! 376: *nextTokPtr = ptr;
! 377: return XML_TOK_DATA_CHARS;
! 378: }
! 379:
! 380: /* ptr points to character following "</" */
! 381:
! 382: static int PTRCALL
! 383: PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr,
! 384: const char *end, const char **nextTokPtr)
! 385: {
! 386: if (ptr == end)
! 387: return XML_TOK_PARTIAL;
! 388: switch (BYTE_TYPE(enc, ptr)) {
! 389: CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
! 390: default:
! 391: *nextTokPtr = ptr;
! 392: return XML_TOK_INVALID;
! 393: }
! 394: while (ptr != end) {
! 395: switch (BYTE_TYPE(enc, ptr)) {
! 396: CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
! 397: case BT_S: case BT_CR: case BT_LF:
! 398: for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
! 399: switch (BYTE_TYPE(enc, ptr)) {
! 400: case BT_S: case BT_CR: case BT_LF:
! 401: break;
! 402: case BT_GT:
! 403: *nextTokPtr = ptr + MINBPC(enc);
! 404: return XML_TOK_END_TAG;
! 405: default:
! 406: *nextTokPtr = ptr;
! 407: return XML_TOK_INVALID;
! 408: }
! 409: }
! 410: return XML_TOK_PARTIAL;
! 411: #ifdef XML_NS
! 412: case BT_COLON:
! 413: /* no need to check qname syntax here,
! 414: since end-tag must match exactly */
! 415: ptr += MINBPC(enc);
! 416: break;
! 417: #endif
! 418: case BT_GT:
! 419: *nextTokPtr = ptr + MINBPC(enc);
! 420: return XML_TOK_END_TAG;
! 421: default:
! 422: *nextTokPtr = ptr;
! 423: return XML_TOK_INVALID;
! 424: }
! 425: }
! 426: return XML_TOK_PARTIAL;
! 427: }
! 428:
! 429: /* ptr points to character following "&#X" */
! 430:
! 431: static int PTRCALL
! 432: PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr,
! 433: const char *end, const char **nextTokPtr)
! 434: {
! 435: if (ptr != end) {
! 436: switch (BYTE_TYPE(enc, ptr)) {
! 437: case BT_DIGIT:
! 438: case BT_HEX:
! 439: break;
! 440: default:
! 441: *nextTokPtr = ptr;
! 442: return XML_TOK_INVALID;
! 443: }
! 444: for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
! 445: switch (BYTE_TYPE(enc, ptr)) {
! 446: case BT_DIGIT:
! 447: case BT_HEX:
! 448: break;
! 449: case BT_SEMI:
! 450: *nextTokPtr = ptr + MINBPC(enc);
! 451: return XML_TOK_CHAR_REF;
! 452: default:
! 453: *nextTokPtr = ptr;
! 454: return XML_TOK_INVALID;
! 455: }
! 456: }
! 457: }
! 458: return XML_TOK_PARTIAL;
! 459: }
! 460:
! 461: /* ptr points to character following "&#" */
! 462:
! 463: static int PTRCALL
! 464: PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr,
! 465: const char *end, const char **nextTokPtr)
! 466: {
! 467: if (ptr != end) {
! 468: if (CHAR_MATCHES(enc, ptr, ASCII_x))
! 469: return PREFIX(scanHexCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
! 470: switch (BYTE_TYPE(enc, ptr)) {
! 471: case BT_DIGIT:
! 472: break;
! 473: default:
! 474: *nextTokPtr = ptr;
! 475: return XML_TOK_INVALID;
! 476: }
! 477: for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
! 478: switch (BYTE_TYPE(enc, ptr)) {
! 479: case BT_DIGIT:
! 480: break;
! 481: case BT_SEMI:
! 482: *nextTokPtr = ptr + MINBPC(enc);
! 483: return XML_TOK_CHAR_REF;
! 484: default:
! 485: *nextTokPtr = ptr;
! 486: return XML_TOK_INVALID;
! 487: }
! 488: }
! 489: }
! 490: return XML_TOK_PARTIAL;
! 491: }
! 492:
! 493: /* ptr points to character following "&" */
! 494:
! 495: static int PTRCALL
! 496: PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end,
! 497: const char **nextTokPtr)
! 498: {
! 499: if (ptr == end)
! 500: return XML_TOK_PARTIAL;
! 501: switch (BYTE_TYPE(enc, ptr)) {
! 502: CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
! 503: case BT_NUM:
! 504: return PREFIX(scanCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
! 505: default:
! 506: *nextTokPtr = ptr;
! 507: return XML_TOK_INVALID;
! 508: }
! 509: while (ptr != end) {
! 510: switch (BYTE_TYPE(enc, ptr)) {
! 511: CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
! 512: case BT_SEMI:
! 513: *nextTokPtr = ptr + MINBPC(enc);
! 514: return XML_TOK_ENTITY_REF;
! 515: default:
! 516: *nextTokPtr = ptr;
! 517: return XML_TOK_INVALID;
! 518: }
! 519: }
! 520: return XML_TOK_PARTIAL;
! 521: }
! 522:
! 523: /* ptr points to character following first character of attribute name */
! 524:
! 525: static int PTRCALL
! 526: PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
! 527: const char **nextTokPtr)
! 528: {
! 529: #ifdef XML_NS
! 530: int hadColon = 0;
! 531: #endif
! 532: while (ptr != end) {
! 533: switch (BYTE_TYPE(enc, ptr)) {
! 534: CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
! 535: #ifdef XML_NS
! 536: case BT_COLON:
! 537: if (hadColon) {
! 538: *nextTokPtr = ptr;
! 539: return XML_TOK_INVALID;
! 540: }
! 541: hadColon = 1;
! 542: ptr += MINBPC(enc);
! 543: if (ptr == end)
! 544: return XML_TOK_PARTIAL;
! 545: switch (BYTE_TYPE(enc, ptr)) {
! 546: CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
! 547: default:
! 548: *nextTokPtr = ptr;
! 549: return XML_TOK_INVALID;
! 550: }
! 551: break;
! 552: #endif
! 553: case BT_S: case BT_CR: case BT_LF:
! 554: for (;;) {
! 555: int t;
! 556:
! 557: ptr += MINBPC(enc);
! 558: if (ptr == end)
! 559: return XML_TOK_PARTIAL;
! 560: t = BYTE_TYPE(enc, ptr);
! 561: if (t == BT_EQUALS)
! 562: break;
! 563: switch (t) {
! 564: case BT_S:
! 565: case BT_LF:
! 566: case BT_CR:
! 567: break;
! 568: default:
! 569: *nextTokPtr = ptr;
! 570: return XML_TOK_INVALID;
! 571: }
! 572: }
! 573: /* fall through */
! 574: case BT_EQUALS:
! 575: {
! 576: int open;
! 577: #ifdef XML_NS
! 578: hadColon = 0;
! 579: #endif
! 580: for (;;) {
! 581: ptr += MINBPC(enc);
! 582: if (ptr == end)
! 583: return XML_TOK_PARTIAL;
! 584: open = BYTE_TYPE(enc, ptr);
! 585: if (open == BT_QUOT || open == BT_APOS)
! 586: break;
! 587: switch (open) {
! 588: case BT_S:
! 589: case BT_LF:
! 590: case BT_CR:
! 591: break;
! 592: default:
! 593: *nextTokPtr = ptr;
! 594: return XML_TOK_INVALID;
! 595: }
! 596: }
! 597: ptr += MINBPC(enc);
! 598: /* in attribute value */
! 599: for (;;) {
! 600: int t;
! 601: if (ptr == end)
! 602: return XML_TOK_PARTIAL;
! 603: t = BYTE_TYPE(enc, ptr);
! 604: if (t == open)
! 605: break;
! 606: switch (t) {
! 607: INVALID_CASES(ptr, nextTokPtr)
! 608: case BT_AMP:
! 609: {
! 610: int tok = PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, &ptr);
! 611: if (tok <= 0) {
! 612: if (tok == XML_TOK_INVALID)
! 613: *nextTokPtr = ptr;
! 614: return tok;
! 615: }
! 616: break;
! 617: }
! 618: case BT_LT:
! 619: *nextTokPtr = ptr;
! 620: return XML_TOK_INVALID;
! 621: default:
! 622: ptr += MINBPC(enc);
! 623: break;
! 624: }
! 625: }
! 626: ptr += MINBPC(enc);
! 627: if (ptr == end)
! 628: return XML_TOK_PARTIAL;
! 629: switch (BYTE_TYPE(enc, ptr)) {
! 630: case BT_S:
! 631: case BT_CR:
! 632: case BT_LF:
! 633: break;
! 634: case BT_SOL:
! 635: goto sol;
! 636: case BT_GT:
! 637: goto gt;
! 638: default:
! 639: *nextTokPtr = ptr;
! 640: return XML_TOK_INVALID;
! 641: }
! 642: /* ptr points to closing quote */
! 643: for (;;) {
! 644: ptr += MINBPC(enc);
! 645: if (ptr == end)
! 646: return XML_TOK_PARTIAL;
! 647: switch (BYTE_TYPE(enc, ptr)) {
! 648: CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
! 649: case BT_S: case BT_CR: case BT_LF:
! 650: continue;
! 651: case BT_GT:
! 652: gt:
! 653: *nextTokPtr = ptr + MINBPC(enc);
! 654: return XML_TOK_START_TAG_WITH_ATTS;
! 655: case BT_SOL:
! 656: sol:
! 657: ptr += MINBPC(enc);
! 658: if (ptr == end)
! 659: return XML_TOK_PARTIAL;
! 660: if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
! 661: *nextTokPtr = ptr;
! 662: return XML_TOK_INVALID;
! 663: }
! 664: *nextTokPtr = ptr + MINBPC(enc);
! 665: return XML_TOK_EMPTY_ELEMENT_WITH_ATTS;
! 666: default:
! 667: *nextTokPtr = ptr;
! 668: return XML_TOK_INVALID;
! 669: }
! 670: break;
! 671: }
! 672: break;
! 673: }
! 674: default:
! 675: *nextTokPtr = ptr;
! 676: return XML_TOK_INVALID;
! 677: }
! 678: }
! 679: return XML_TOK_PARTIAL;
! 680: }
! 681:
! 682: /* ptr points to character following "<" */
! 683:
! 684: static int PTRCALL
! 685: PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
! 686: const char **nextTokPtr)
! 687: {
! 688: #ifdef XML_NS
! 689: int hadColon;
! 690: #endif
! 691: if (ptr == end)
! 692: return XML_TOK_PARTIAL;
! 693: switch (BYTE_TYPE(enc, ptr)) {
! 694: CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
! 695: case BT_EXCL:
! 696: if ((ptr += MINBPC(enc)) == end)
! 697: return XML_TOK_PARTIAL;
! 698: switch (BYTE_TYPE(enc, ptr)) {
! 699: case BT_MINUS:
! 700: return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
! 701: case BT_LSQB:
! 702: return PREFIX(scanCdataSection)(enc, ptr + MINBPC(enc),
! 703: end, nextTokPtr);
! 704: }
! 705: *nextTokPtr = ptr;
! 706: return XML_TOK_INVALID;
! 707: case BT_QUEST:
! 708: return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
! 709: case BT_SOL:
! 710: return PREFIX(scanEndTag)(enc, ptr + MINBPC(enc), end, nextTokPtr);
! 711: default:
! 712: *nextTokPtr = ptr;
! 713: return XML_TOK_INVALID;
! 714: }
! 715: #ifdef XML_NS
! 716: hadColon = 0;
! 717: #endif
! 718: /* we have a start-tag */
! 719: while (ptr != end) {
! 720: switch (BYTE_TYPE(enc, ptr)) {
! 721: CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
! 722: #ifdef XML_NS
! 723: case BT_COLON:
! 724: if (hadColon) {
! 725: *nextTokPtr = ptr;
! 726: return XML_TOK_INVALID;
! 727: }
! 728: hadColon = 1;
! 729: ptr += MINBPC(enc);
! 730: if (ptr == end)
! 731: return XML_TOK_PARTIAL;
! 732: switch (BYTE_TYPE(enc, ptr)) {
! 733: CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
! 734: default:
! 735: *nextTokPtr = ptr;
! 736: return XML_TOK_INVALID;
! 737: }
! 738: break;
! 739: #endif
! 740: case BT_S: case BT_CR: case BT_LF:
! 741: {
! 742: ptr += MINBPC(enc);
! 743: while (ptr != end) {
! 744: switch (BYTE_TYPE(enc, ptr)) {
! 745: CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
! 746: case BT_GT:
! 747: goto gt;
! 748: case BT_SOL:
! 749: goto sol;
! 750: case BT_S: case BT_CR: case BT_LF:
! 751: ptr += MINBPC(enc);
! 752: continue;
! 753: default:
! 754: *nextTokPtr = ptr;
! 755: return XML_TOK_INVALID;
! 756: }
! 757: return PREFIX(scanAtts)(enc, ptr, end, nextTokPtr);
! 758: }
! 759: return XML_TOK_PARTIAL;
! 760: }
! 761: case BT_GT:
! 762: gt:
! 763: *nextTokPtr = ptr + MINBPC(enc);
! 764: return XML_TOK_START_TAG_NO_ATTS;
! 765: case BT_SOL:
! 766: sol:
! 767: ptr += MINBPC(enc);
! 768: if (ptr == end)
! 769: return XML_TOK_PARTIAL;
! 770: if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
! 771: *nextTokPtr = ptr;
! 772: return XML_TOK_INVALID;
! 773: }
! 774: *nextTokPtr = ptr + MINBPC(enc);
! 775: return XML_TOK_EMPTY_ELEMENT_NO_ATTS;
! 776: default:
! 777: *nextTokPtr = ptr;
! 778: return XML_TOK_INVALID;
! 779: }
! 780: }
! 781: return XML_TOK_PARTIAL;
! 782: }
! 783:
! 784: static int PTRCALL
! 785: PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
! 786: const char **nextTokPtr)
! 787: {
! 788: if (ptr == end)
! 789: return XML_TOK_NONE;
! 790: if (MINBPC(enc) > 1) {
! 791: size_t n = end - ptr;
! 792: if (n & (MINBPC(enc) - 1)) {
! 793: n &= ~(MINBPC(enc) - 1);
! 794: if (n == 0)
! 795: return XML_TOK_PARTIAL;
! 796: end = ptr + n;
! 797: }
! 798: }
! 799: switch (BYTE_TYPE(enc, ptr)) {
! 800: case BT_LT:
! 801: return PREFIX(scanLt)(enc, ptr + MINBPC(enc), end, nextTokPtr);
! 802: case BT_AMP:
! 803: return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
! 804: case BT_CR:
! 805: ptr += MINBPC(enc);
! 806: if (ptr == end)
! 807: return XML_TOK_TRAILING_CR;
! 808: if (BYTE_TYPE(enc, ptr) == BT_LF)
! 809: ptr += MINBPC(enc);
! 810: *nextTokPtr = ptr;
! 811: return XML_TOK_DATA_NEWLINE;
! 812: case BT_LF:
! 813: *nextTokPtr = ptr + MINBPC(enc);
! 814: return XML_TOK_DATA_NEWLINE;
! 815: case BT_RSQB:
! 816: ptr += MINBPC(enc);
! 817: if (ptr == end)
! 818: return XML_TOK_TRAILING_RSQB;
! 819: if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB))
! 820: break;
! 821: ptr += MINBPC(enc);
! 822: if (ptr == end)
! 823: return XML_TOK_TRAILING_RSQB;
! 824: if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) {
! 825: ptr -= MINBPC(enc);
! 826: break;
! 827: }
! 828: *nextTokPtr = ptr;
! 829: return XML_TOK_INVALID;
! 830: INVALID_CASES(ptr, nextTokPtr)
! 831: default:
! 832: ptr += MINBPC(enc);
! 833: break;
! 834: }
! 835: while (ptr != end) {
! 836: switch (BYTE_TYPE(enc, ptr)) {
! 837: #define LEAD_CASE(n) \
! 838: case BT_LEAD ## n: \
! 839: if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \
! 840: *nextTokPtr = ptr; \
! 841: return XML_TOK_DATA_CHARS; \
! 842: } \
! 843: ptr += n; \
! 844: break;
! 845: LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
! 846: #undef LEAD_CASE
! 847: case BT_RSQB:
! 848: if (ptr + MINBPC(enc) != end) {
! 849: if (!CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) {
! 850: ptr += MINBPC(enc);
! 851: break;
! 852: }
! 853: if (ptr + 2*MINBPC(enc) != end) {
! 854: if (!CHAR_MATCHES(enc, ptr + 2*MINBPC(enc), ASCII_GT)) {
! 855: ptr += MINBPC(enc);
! 856: break;
! 857: }
! 858: *nextTokPtr = ptr + 2*MINBPC(enc);
! 859: return XML_TOK_INVALID;
! 860: }
! 861: }
! 862: /* fall through */
! 863: case BT_AMP:
! 864: case BT_LT:
! 865: case BT_NONXML:
! 866: case BT_MALFORM:
! 867: case BT_TRAIL:
! 868: case BT_CR:
! 869: case BT_LF:
! 870: *nextTokPtr = ptr;
! 871: return XML_TOK_DATA_CHARS;
! 872: default:
! 873: ptr += MINBPC(enc);
! 874: break;
! 875: }
! 876: }
! 877: *nextTokPtr = ptr;
! 878: return XML_TOK_DATA_CHARS;
! 879: }
! 880:
! 881: /* ptr points to character following "%" */
! 882:
! 883: static int PTRCALL
! 884: PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end,
! 885: const char **nextTokPtr)
! 886: {
! 887: if (ptr == end)
! 888: return -XML_TOK_PERCENT;
! 889: switch (BYTE_TYPE(enc, ptr)) {
! 890: CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
! 891: case BT_S: case BT_LF: case BT_CR: case BT_PERCNT:
! 892: *nextTokPtr = ptr;
! 893: return XML_TOK_PERCENT;
! 894: default:
! 895: *nextTokPtr = ptr;
! 896: return XML_TOK_INVALID;
! 897: }
! 898: while (ptr != end) {
! 899: switch (BYTE_TYPE(enc, ptr)) {
! 900: CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
! 901: case BT_SEMI:
! 902: *nextTokPtr = ptr + MINBPC(enc);
! 903: return XML_TOK_PARAM_ENTITY_REF;
! 904: default:
! 905: *nextTokPtr = ptr;
! 906: return XML_TOK_INVALID;
! 907: }
! 908: }
! 909: return XML_TOK_PARTIAL;
! 910: }
! 911:
! 912: static int PTRCALL
! 913: PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end,
! 914: const char **nextTokPtr)
! 915: {
! 916: if (ptr == end)
! 917: return XML_TOK_PARTIAL;
! 918: switch (BYTE_TYPE(enc, ptr)) {
! 919: CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
! 920: default:
! 921: *nextTokPtr = ptr;
! 922: return XML_TOK_INVALID;
! 923: }
! 924: while (ptr != end) {
! 925: switch (BYTE_TYPE(enc, ptr)) {
! 926: CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
! 927: case BT_CR: case BT_LF: case BT_S:
! 928: case BT_RPAR: case BT_GT: case BT_PERCNT: case BT_VERBAR:
! 929: *nextTokPtr = ptr;
! 930: return XML_TOK_POUND_NAME;
! 931: default:
! 932: *nextTokPtr = ptr;
! 933: return XML_TOK_INVALID;
! 934: }
! 935: }
! 936: return -XML_TOK_POUND_NAME;
! 937: }
! 938:
! 939: static int PTRCALL
! 940: PREFIX(scanLit)(int open, const ENCODING *enc,
! 941: const char *ptr, const char *end,
! 942: const char **nextTokPtr)
! 943: {
! 944: while (ptr != end) {
! 945: int t = BYTE_TYPE(enc, ptr);
! 946: switch (t) {
! 947: INVALID_CASES(ptr, nextTokPtr)
! 948: case BT_QUOT:
! 949: case BT_APOS:
! 950: ptr += MINBPC(enc);
! 951: if (t != open)
! 952: break;
! 953: if (ptr == end)
! 954: return -XML_TOK_LITERAL;
! 955: *nextTokPtr = ptr;
! 956: switch (BYTE_TYPE(enc, ptr)) {
! 957: case BT_S: case BT_CR: case BT_LF:
! 958: case BT_GT: case BT_PERCNT: case BT_LSQB:
! 959: return XML_TOK_LITERAL;
! 960: default:
! 961: return XML_TOK_INVALID;
! 962: }
! 963: default:
! 964: ptr += MINBPC(enc);
! 965: break;
! 966: }
! 967: }
! 968: return XML_TOK_PARTIAL;
! 969: }
! 970:
! 971: static int PTRCALL
! 972: PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
! 973: const char **nextTokPtr)
! 974: {
! 975: int tok;
! 976: if (ptr == end)
! 977: return XML_TOK_NONE;
! 978: if (MINBPC(enc) > 1) {
! 979: size_t n = end - ptr;
! 980: if (n & (MINBPC(enc) - 1)) {
! 981: n &= ~(MINBPC(enc) - 1);
! 982: if (n == 0)
! 983: return XML_TOK_PARTIAL;
! 984: end = ptr + n;
! 985: }
! 986: }
! 987: switch (BYTE_TYPE(enc, ptr)) {
! 988: case BT_QUOT:
! 989: return PREFIX(scanLit)(BT_QUOT, enc, ptr + MINBPC(enc), end, nextTokPtr);
! 990: case BT_APOS:
! 991: return PREFIX(scanLit)(BT_APOS, enc, ptr + MINBPC(enc), end, nextTokPtr);
! 992: case BT_LT:
! 993: {
! 994: ptr += MINBPC(enc);
! 995: if (ptr == end)
! 996: return XML_TOK_PARTIAL;
! 997: switch (BYTE_TYPE(enc, ptr)) {
! 998: case BT_EXCL:
! 999: return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr);
! 1000: case BT_QUEST:
! 1001: return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
! 1002: case BT_NMSTRT:
! 1003: case BT_HEX:
! 1004: case BT_NONASCII:
! 1005: case BT_LEAD2:
! 1006: case BT_LEAD3:
! 1007: case BT_LEAD4:
! 1008: *nextTokPtr = ptr - MINBPC(enc);
! 1009: return XML_TOK_INSTANCE_START;
! 1010: }
! 1011: *nextTokPtr = ptr;
! 1012: return XML_TOK_INVALID;
! 1013: }
! 1014: case BT_CR:
! 1015: if (ptr + MINBPC(enc) == end) {
! 1016: *nextTokPtr = end;
! 1017: /* indicate that this might be part of a CR/LF pair */
! 1018: return -XML_TOK_PROLOG_S;
! 1019: }
! 1020: /* fall through */
! 1021: case BT_S: case BT_LF:
! 1022: for (;;) {
! 1023: ptr += MINBPC(enc);
! 1024: if (ptr == end)
! 1025: break;
! 1026: switch (BYTE_TYPE(enc, ptr)) {
! 1027: case BT_S: case BT_LF:
! 1028: break;
! 1029: case BT_CR:
! 1030: /* don't split CR/LF pair */
! 1031: if (ptr + MINBPC(enc) != end)
! 1032: break;
! 1033: /* fall through */
! 1034: default:
! 1035: *nextTokPtr = ptr;
! 1036: return XML_TOK_PROLOG_S;
! 1037: }
! 1038: }
! 1039: *nextTokPtr = ptr;
! 1040: return XML_TOK_PROLOG_S;
! 1041: case BT_PERCNT:
! 1042: return PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr);
! 1043: case BT_COMMA:
! 1044: *nextTokPtr = ptr + MINBPC(enc);
! 1045: return XML_TOK_COMMA;
! 1046: case BT_LSQB:
! 1047: *nextTokPtr = ptr + MINBPC(enc);
! 1048: return XML_TOK_OPEN_BRACKET;
! 1049: case BT_RSQB:
! 1050: ptr += MINBPC(enc);
! 1051: if (ptr == end)
! 1052: return -XML_TOK_CLOSE_BRACKET;
! 1053: if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
! 1054: if (ptr + MINBPC(enc) == end)
! 1055: return XML_TOK_PARTIAL;
! 1056: if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) {
! 1057: *nextTokPtr = ptr + 2*MINBPC(enc);
! 1058: return XML_TOK_COND_SECT_CLOSE;
! 1059: }
! 1060: }
! 1061: *nextTokPtr = ptr;
! 1062: return XML_TOK_CLOSE_BRACKET;
! 1063: case BT_LPAR:
! 1064: *nextTokPtr = ptr + MINBPC(enc);
! 1065: return XML_TOK_OPEN_PAREN;
! 1066: case BT_RPAR:
! 1067: ptr += MINBPC(enc);
! 1068: if (ptr == end)
! 1069: return -XML_TOK_CLOSE_PAREN;
! 1070: switch (BYTE_TYPE(enc, ptr)) {
! 1071: case BT_AST:
! 1072: *nextTokPtr = ptr + MINBPC(enc);
! 1073: return XML_TOK_CLOSE_PAREN_ASTERISK;
! 1074: case BT_QUEST:
! 1075: *nextTokPtr = ptr + MINBPC(enc);
! 1076: return XML_TOK_CLOSE_PAREN_QUESTION;
! 1077: case BT_PLUS:
! 1078: *nextTokPtr = ptr + MINBPC(enc);
! 1079: return XML_TOK_CLOSE_PAREN_PLUS;
! 1080: case BT_CR: case BT_LF: case BT_S:
! 1081: case BT_GT: case BT_COMMA: case BT_VERBAR:
! 1082: case BT_RPAR:
! 1083: *nextTokPtr = ptr;
! 1084: return XML_TOK_CLOSE_PAREN;
! 1085: }
! 1086: *nextTokPtr = ptr;
! 1087: return XML_TOK_INVALID;
! 1088: case BT_VERBAR:
! 1089: *nextTokPtr = ptr + MINBPC(enc);
! 1090: return XML_TOK_OR;
! 1091: case BT_GT:
! 1092: *nextTokPtr = ptr + MINBPC(enc);
! 1093: return XML_TOK_DECL_CLOSE;
! 1094: case BT_NUM:
! 1095: return PREFIX(scanPoundName)(enc, ptr + MINBPC(enc), end, nextTokPtr);
! 1096: #define LEAD_CASE(n) \
! 1097: case BT_LEAD ## n: \
! 1098: if (end - ptr < n) \
! 1099: return XML_TOK_PARTIAL_CHAR; \
! 1100: if (IS_NMSTRT_CHAR(enc, ptr, n)) { \
! 1101: ptr += n; \
! 1102: tok = XML_TOK_NAME; \
! 1103: break; \
! 1104: } \
! 1105: if (IS_NAME_CHAR(enc, ptr, n)) { \
! 1106: ptr += n; \
! 1107: tok = XML_TOK_NMTOKEN; \
! 1108: break; \
! 1109: } \
! 1110: *nextTokPtr = ptr; \
! 1111: return XML_TOK_INVALID;
! 1112: LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
! 1113: #undef LEAD_CASE
! 1114: case BT_NMSTRT:
! 1115: case BT_HEX:
! 1116: tok = XML_TOK_NAME;
! 1117: ptr += MINBPC(enc);
! 1118: break;
! 1119: case BT_DIGIT:
! 1120: case BT_NAME:
! 1121: case BT_MINUS:
! 1122: #ifdef XML_NS
! 1123: case BT_COLON:
! 1124: #endif
! 1125: tok = XML_TOK_NMTOKEN;
! 1126: ptr += MINBPC(enc);
! 1127: break;
! 1128: case BT_NONASCII:
! 1129: if (IS_NMSTRT_CHAR_MINBPC(enc, ptr)) {
! 1130: ptr += MINBPC(enc);
! 1131: tok = XML_TOK_NAME;
! 1132: break;
! 1133: }
! 1134: if (IS_NAME_CHAR_MINBPC(enc, ptr)) {
! 1135: ptr += MINBPC(enc);
! 1136: tok = XML_TOK_NMTOKEN;
! 1137: break;
! 1138: }
! 1139: /* fall through */
! 1140: default:
! 1141: *nextTokPtr = ptr;
! 1142: return XML_TOK_INVALID;
! 1143: }
! 1144: while (ptr != end) {
! 1145: switch (BYTE_TYPE(enc, ptr)) {
! 1146: CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
! 1147: case BT_GT: case BT_RPAR: case BT_COMMA:
! 1148: case BT_VERBAR: case BT_LSQB: case BT_PERCNT:
! 1149: case BT_S: case BT_CR: case BT_LF:
! 1150: *nextTokPtr = ptr;
! 1151: return tok;
! 1152: #ifdef XML_NS
! 1153: case BT_COLON:
! 1154: ptr += MINBPC(enc);
! 1155: switch (tok) {
! 1156: case XML_TOK_NAME:
! 1157: if (ptr == end)
! 1158: return XML_TOK_PARTIAL;
! 1159: tok = XML_TOK_PREFIXED_NAME;
! 1160: switch (BYTE_TYPE(enc, ptr)) {
! 1161: CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
! 1162: default:
! 1163: tok = XML_TOK_NMTOKEN;
! 1164: break;
! 1165: }
! 1166: break;
! 1167: case XML_TOK_PREFIXED_NAME:
! 1168: tok = XML_TOK_NMTOKEN;
! 1169: break;
! 1170: }
! 1171: break;
! 1172: #endif
! 1173: case BT_PLUS:
! 1174: if (tok == XML_TOK_NMTOKEN) {
! 1175: *nextTokPtr = ptr;
! 1176: return XML_TOK_INVALID;
! 1177: }
! 1178: *nextTokPtr = ptr + MINBPC(enc);
! 1179: return XML_TOK_NAME_PLUS;
! 1180: case BT_AST:
! 1181: if (tok == XML_TOK_NMTOKEN) {
! 1182: *nextTokPtr = ptr;
! 1183: return XML_TOK_INVALID;
! 1184: }
! 1185: *nextTokPtr = ptr + MINBPC(enc);
! 1186: return XML_TOK_NAME_ASTERISK;
! 1187: case BT_QUEST:
! 1188: if (tok == XML_TOK_NMTOKEN) {
! 1189: *nextTokPtr = ptr;
! 1190: return XML_TOK_INVALID;
! 1191: }
! 1192: *nextTokPtr = ptr + MINBPC(enc);
! 1193: return XML_TOK_NAME_QUESTION;
! 1194: default:
! 1195: *nextTokPtr = ptr;
! 1196: return XML_TOK_INVALID;
! 1197: }
! 1198: }
! 1199: return -tok;
! 1200: }
! 1201:
! 1202: static int PTRCALL
! 1203: PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr,
! 1204: const char *end, const char **nextTokPtr)
! 1205: {
! 1206: const char *start;
! 1207: if (ptr == end)
! 1208: return XML_TOK_NONE;
! 1209: start = ptr;
! 1210: while (ptr != end) {
! 1211: switch (BYTE_TYPE(enc, ptr)) {
! 1212: #define LEAD_CASE(n) \
! 1213: case BT_LEAD ## n: ptr += n; break;
! 1214: LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
! 1215: #undef LEAD_CASE
! 1216: case BT_AMP:
! 1217: if (ptr == start)
! 1218: return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
! 1219: *nextTokPtr = ptr;
! 1220: return XML_TOK_DATA_CHARS;
! 1221: case BT_LT:
! 1222: /* this is for inside entity references */
! 1223: *nextTokPtr = ptr;
! 1224: return XML_TOK_INVALID;
! 1225: case BT_LF:
! 1226: if (ptr == start) {
! 1227: *nextTokPtr = ptr + MINBPC(enc);
! 1228: return XML_TOK_DATA_NEWLINE;
! 1229: }
! 1230: *nextTokPtr = ptr;
! 1231: return XML_TOK_DATA_CHARS;
! 1232: case BT_CR:
! 1233: if (ptr == start) {
! 1234: ptr += MINBPC(enc);
! 1235: if (ptr == end)
! 1236: return XML_TOK_TRAILING_CR;
! 1237: if (BYTE_TYPE(enc, ptr) == BT_LF)
! 1238: ptr += MINBPC(enc);
! 1239: *nextTokPtr = ptr;
! 1240: return XML_TOK_DATA_NEWLINE;
! 1241: }
! 1242: *nextTokPtr = ptr;
! 1243: return XML_TOK_DATA_CHARS;
! 1244: case BT_S:
! 1245: if (ptr == start) {
! 1246: *nextTokPtr = ptr + MINBPC(enc);
! 1247: return XML_TOK_ATTRIBUTE_VALUE_S;
! 1248: }
! 1249: *nextTokPtr = ptr;
! 1250: return XML_TOK_DATA_CHARS;
! 1251: default:
! 1252: ptr += MINBPC(enc);
! 1253: break;
! 1254: }
! 1255: }
! 1256: *nextTokPtr = ptr;
! 1257: return XML_TOK_DATA_CHARS;
! 1258: }
! 1259:
! 1260: static int PTRCALL
! 1261: PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr,
! 1262: const char *end, const char **nextTokPtr)
! 1263: {
! 1264: const char *start;
! 1265: if (ptr == end)
! 1266: return XML_TOK_NONE;
! 1267: start = ptr;
! 1268: while (ptr != end) {
! 1269: switch (BYTE_TYPE(enc, ptr)) {
! 1270: #define LEAD_CASE(n) \
! 1271: case BT_LEAD ## n: ptr += n; break;
! 1272: LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
! 1273: #undef LEAD_CASE
! 1274: case BT_AMP:
! 1275: if (ptr == start)
! 1276: return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
! 1277: *nextTokPtr = ptr;
! 1278: return XML_TOK_DATA_CHARS;
! 1279: case BT_PERCNT:
! 1280: if (ptr == start) {
! 1281: int tok = PREFIX(scanPercent)(enc, ptr + MINBPC(enc),
! 1282: end, nextTokPtr);
! 1283: return (tok == XML_TOK_PERCENT) ? XML_TOK_INVALID : tok;
! 1284: }
! 1285: *nextTokPtr = ptr;
! 1286: return XML_TOK_DATA_CHARS;
! 1287: case BT_LF:
! 1288: if (ptr == start) {
! 1289: *nextTokPtr = ptr + MINBPC(enc);
! 1290: return XML_TOK_DATA_NEWLINE;
! 1291: }
! 1292: *nextTokPtr = ptr;
! 1293: return XML_TOK_DATA_CHARS;
! 1294: case BT_CR:
! 1295: if (ptr == start) {
! 1296: ptr += MINBPC(enc);
! 1297: if (ptr == end)
! 1298: return XML_TOK_TRAILING_CR;
! 1299: if (BYTE_TYPE(enc, ptr) == BT_LF)
! 1300: ptr += MINBPC(enc);
! 1301: *nextTokPtr = ptr;
! 1302: return XML_TOK_DATA_NEWLINE;
! 1303: }
! 1304: *nextTokPtr = ptr;
! 1305: return XML_TOK_DATA_CHARS;
! 1306: default:
! 1307: ptr += MINBPC(enc);
! 1308: break;
! 1309: }
! 1310: }
! 1311: *nextTokPtr = ptr;
! 1312: return XML_TOK_DATA_CHARS;
! 1313: }
! 1314:
! 1315: #ifdef XML_DTD
! 1316:
! 1317: static int PTRCALL
! 1318: PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr,
! 1319: const char *end, const char **nextTokPtr)
! 1320: {
! 1321: int level = 0;
! 1322: if (MINBPC(enc) > 1) {
! 1323: size_t n = end - ptr;
! 1324: if (n & (MINBPC(enc) - 1)) {
! 1325: n &= ~(MINBPC(enc) - 1);
! 1326: end = ptr + n;
! 1327: }
! 1328: }
! 1329: while (ptr != end) {
! 1330: switch (BYTE_TYPE(enc, ptr)) {
! 1331: INVALID_CASES(ptr, nextTokPtr)
! 1332: case BT_LT:
! 1333: if ((ptr += MINBPC(enc)) == end)
! 1334: return XML_TOK_PARTIAL;
! 1335: if (CHAR_MATCHES(enc, ptr, ASCII_EXCL)) {
! 1336: if ((ptr += MINBPC(enc)) == end)
! 1337: return XML_TOK_PARTIAL;
! 1338: if (CHAR_MATCHES(enc, ptr, ASCII_LSQB)) {
! 1339: ++level;
! 1340: ptr += MINBPC(enc);
! 1341: }
! 1342: }
! 1343: break;
! 1344: case BT_RSQB:
! 1345: if ((ptr += MINBPC(enc)) == end)
! 1346: return XML_TOK_PARTIAL;
! 1347: if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) {
! 1348: if ((ptr += MINBPC(enc)) == end)
! 1349: return XML_TOK_PARTIAL;
! 1350: if (CHAR_MATCHES(enc, ptr, ASCII_GT)) {
! 1351: ptr += MINBPC(enc);
! 1352: if (level == 0) {
! 1353: *nextTokPtr = ptr;
! 1354: return XML_TOK_IGNORE_SECT;
! 1355: }
! 1356: --level;
! 1357: }
! 1358: }
! 1359: break;
! 1360: default:
! 1361: ptr += MINBPC(enc);
! 1362: break;
! 1363: }
! 1364: }
! 1365: return XML_TOK_PARTIAL;
! 1366: }
! 1367:
! 1368: #endif /* XML_DTD */
! 1369:
! 1370: static int PTRCALL
! 1371: PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end,
! 1372: const char **badPtr)
! 1373: {
! 1374: ptr += MINBPC(enc);
! 1375: end -= MINBPC(enc);
! 1376: for (; ptr != end; ptr += MINBPC(enc)) {
! 1377: switch (BYTE_TYPE(enc, ptr)) {
! 1378: case BT_DIGIT:
! 1379: case BT_HEX:
! 1380: case BT_MINUS:
! 1381: case BT_APOS:
! 1382: case BT_LPAR:
! 1383: case BT_RPAR:
! 1384: case BT_PLUS:
! 1385: case BT_COMMA:
! 1386: case BT_SOL:
! 1387: case BT_EQUALS:
! 1388: case BT_QUEST:
! 1389: case BT_CR:
! 1390: case BT_LF:
! 1391: case BT_SEMI:
! 1392: case BT_EXCL:
! 1393: case BT_AST:
! 1394: case BT_PERCNT:
! 1395: case BT_NUM:
! 1396: #ifdef XML_NS
! 1397: case BT_COLON:
! 1398: #endif
! 1399: break;
! 1400: case BT_S:
! 1401: if (CHAR_MATCHES(enc, ptr, ASCII_TAB)) {
! 1402: *badPtr = ptr;
! 1403: return 0;
! 1404: }
! 1405: break;
! 1406: case BT_NAME:
! 1407: case BT_NMSTRT:
! 1408: if (!(BYTE_TO_ASCII(enc, ptr) & ~0x7f))
! 1409: break;
! 1410: default:
! 1411: switch (BYTE_TO_ASCII(enc, ptr)) {
! 1412: case 0x24: /* $ */
! 1413: case 0x40: /* @ */
! 1414: break;
! 1415: default:
! 1416: *badPtr = ptr;
! 1417: return 0;
! 1418: }
! 1419: break;
! 1420: }
! 1421: }
! 1422: return 1;
! 1423: }
! 1424:
! 1425: /* This must only be called for a well-formed start-tag or empty
! 1426: element tag. Returns the number of attributes. Pointers to the
! 1427: first attsMax attributes are stored in atts.
! 1428: */
! 1429:
! 1430: static int PTRCALL
! 1431: PREFIX(getAtts)(const ENCODING *enc, const char *ptr,
! 1432: int attsMax, ATTRIBUTE *atts)
! 1433: {
! 1434: enum { other, inName, inValue } state = inName;
! 1435: int nAtts = 0;
! 1436: int open = 0; /* defined when state == inValue;
! 1437: initialization just to shut up compilers */
! 1438:
! 1439: for (ptr += MINBPC(enc);; ptr += MINBPC(enc)) {
! 1440: switch (BYTE_TYPE(enc, ptr)) {
! 1441: #define START_NAME \
! 1442: if (state == other) { \
! 1443: if (nAtts < attsMax) { \
! 1444: atts[nAtts].name = ptr; \
! 1445: atts[nAtts].normalized = 1; \
! 1446: } \
! 1447: state = inName; \
! 1448: }
! 1449: #define LEAD_CASE(n) \
! 1450: case BT_LEAD ## n: START_NAME ptr += (n - MINBPC(enc)); break;
! 1451: LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
! 1452: #undef LEAD_CASE
! 1453: case BT_NONASCII:
! 1454: case BT_NMSTRT:
! 1455: case BT_HEX:
! 1456: START_NAME
! 1457: break;
! 1458: #undef START_NAME
! 1459: case BT_QUOT:
! 1460: if (state != inValue) {
! 1461: if (nAtts < attsMax)
! 1462: atts[nAtts].valuePtr = ptr + MINBPC(enc);
! 1463: state = inValue;
! 1464: open = BT_QUOT;
! 1465: }
! 1466: else if (open == BT_QUOT) {
! 1467: state = other;
! 1468: if (nAtts < attsMax)
! 1469: atts[nAtts].valueEnd = ptr;
! 1470: nAtts++;
! 1471: }
! 1472: break;
! 1473: case BT_APOS:
! 1474: if (state != inValue) {
! 1475: if (nAtts < attsMax)
! 1476: atts[nAtts].valuePtr = ptr + MINBPC(enc);
! 1477: state = inValue;
! 1478: open = BT_APOS;
! 1479: }
! 1480: else if (open == BT_APOS) {
! 1481: state = other;
! 1482: if (nAtts < attsMax)
! 1483: atts[nAtts].valueEnd = ptr;
! 1484: nAtts++;
! 1485: }
! 1486: break;
! 1487: case BT_AMP:
! 1488: if (nAtts < attsMax)
! 1489: atts[nAtts].normalized = 0;
! 1490: break;
! 1491: case BT_S:
! 1492: if (state == inName)
! 1493: state = other;
! 1494: else if (state == inValue
! 1495: && nAtts < attsMax
! 1496: && atts[nAtts].normalized
! 1497: && (ptr == atts[nAtts].valuePtr
! 1498: || BYTE_TO_ASCII(enc, ptr) != ASCII_SPACE
! 1499: || BYTE_TO_ASCII(enc, ptr + MINBPC(enc)) == ASCII_SPACE
! 1500: || BYTE_TYPE(enc, ptr + MINBPC(enc)) == open))
! 1501: atts[nAtts].normalized = 0;
! 1502: break;
! 1503: case BT_CR: case BT_LF:
! 1504: /* This case ensures that the first attribute name is counted
! 1505: Apart from that we could just change state on the quote. */
! 1506: if (state == inName)
! 1507: state = other;
! 1508: else if (state == inValue && nAtts < attsMax)
! 1509: atts[nAtts].normalized = 0;
! 1510: break;
! 1511: case BT_GT:
! 1512: case BT_SOL:
! 1513: if (state != inValue)
! 1514: return nAtts;
! 1515: break;
! 1516: default:
! 1517: break;
! 1518: }
! 1519: }
! 1520: /* not reached */
! 1521: }
! 1522:
! 1523: static int PTRFASTCALL
! 1524: PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr)
! 1525: {
! 1526: int result = 0;
! 1527: /* skip &# */
! 1528: ptr += 2*MINBPC(enc);
! 1529: if (CHAR_MATCHES(enc, ptr, ASCII_x)) {
! 1530: for (ptr += MINBPC(enc);
! 1531: !CHAR_MATCHES(enc, ptr, ASCII_SEMI);
! 1532: ptr += MINBPC(enc)) {
! 1533: int c = BYTE_TO_ASCII(enc, ptr);
! 1534: switch (c) {
! 1535: case ASCII_0: case ASCII_1: case ASCII_2: case ASCII_3: case ASCII_4:
! 1536: case ASCII_5: case ASCII_6: case ASCII_7: case ASCII_8: case ASCII_9:
! 1537: result <<= 4;
! 1538: result |= (c - ASCII_0);
! 1539: break;
! 1540: case ASCII_A: case ASCII_B: case ASCII_C:
! 1541: case ASCII_D: case ASCII_E: case ASCII_F:
! 1542: result <<= 4;
! 1543: result += 10 + (c - ASCII_A);
! 1544: break;
! 1545: case ASCII_a: case ASCII_b: case ASCII_c:
! 1546: case ASCII_d: case ASCII_e: case ASCII_f:
! 1547: result <<= 4;
! 1548: result += 10 + (c - ASCII_a);
! 1549: break;
! 1550: }
! 1551: if (result >= 0x110000)
! 1552: return -1;
! 1553: }
! 1554: }
! 1555: else {
! 1556: for (; !CHAR_MATCHES(enc, ptr, ASCII_SEMI); ptr += MINBPC(enc)) {
! 1557: int c = BYTE_TO_ASCII(enc, ptr);
! 1558: result *= 10;
! 1559: result += (c - ASCII_0);
! 1560: if (result >= 0x110000)
! 1561: return -1;
! 1562: }
! 1563: }
! 1564: return checkCharRefNumber(result);
! 1565: }
! 1566:
! 1567: static int PTRCALL
! 1568: PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr,
! 1569: const char *end)
! 1570: {
! 1571: switch ((end - ptr)/MINBPC(enc)) {
! 1572: case 2:
! 1573: if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_t)) {
! 1574: switch (BYTE_TO_ASCII(enc, ptr)) {
! 1575: case ASCII_l:
! 1576: return ASCII_LT;
! 1577: case ASCII_g:
! 1578: return ASCII_GT;
! 1579: }
! 1580: }
! 1581: break;
! 1582: case 3:
! 1583: if (CHAR_MATCHES(enc, ptr, ASCII_a)) {
! 1584: ptr += MINBPC(enc);
! 1585: if (CHAR_MATCHES(enc, ptr, ASCII_m)) {
! 1586: ptr += MINBPC(enc);
! 1587: if (CHAR_MATCHES(enc, ptr, ASCII_p))
! 1588: return ASCII_AMP;
! 1589: }
! 1590: }
! 1591: break;
! 1592: case 4:
! 1593: switch (BYTE_TO_ASCII(enc, ptr)) {
! 1594: case ASCII_q:
! 1595: ptr += MINBPC(enc);
! 1596: if (CHAR_MATCHES(enc, ptr, ASCII_u)) {
! 1597: ptr += MINBPC(enc);
! 1598: if (CHAR_MATCHES(enc, ptr, ASCII_o)) {
! 1599: ptr += MINBPC(enc);
! 1600: if (CHAR_MATCHES(enc, ptr, ASCII_t))
! 1601: return ASCII_QUOT;
! 1602: }
! 1603: }
! 1604: break;
! 1605: case ASCII_a:
! 1606: ptr += MINBPC(enc);
! 1607: if (CHAR_MATCHES(enc, ptr, ASCII_p)) {
! 1608: ptr += MINBPC(enc);
! 1609: if (CHAR_MATCHES(enc, ptr, ASCII_o)) {
! 1610: ptr += MINBPC(enc);
! 1611: if (CHAR_MATCHES(enc, ptr, ASCII_s))
! 1612: return ASCII_APOS;
! 1613: }
! 1614: }
! 1615: break;
! 1616: }
! 1617: }
! 1618: return 0;
! 1619: }
! 1620:
! 1621: static int PTRCALL
! 1622: PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2)
! 1623: {
! 1624: for (;;) {
! 1625: switch (BYTE_TYPE(enc, ptr1)) {
! 1626: #define LEAD_CASE(n) \
! 1627: case BT_LEAD ## n: \
! 1628: if (*ptr1++ != *ptr2++) \
! 1629: return 0;
! 1630: LEAD_CASE(4) LEAD_CASE(3) LEAD_CASE(2)
! 1631: #undef LEAD_CASE
! 1632: /* fall through */
! 1633: if (*ptr1++ != *ptr2++)
! 1634: return 0;
! 1635: break;
! 1636: case BT_NONASCII:
! 1637: case BT_NMSTRT:
! 1638: #ifdef XML_NS
! 1639: case BT_COLON:
! 1640: #endif
! 1641: case BT_HEX:
! 1642: case BT_DIGIT:
! 1643: case BT_NAME:
! 1644: case BT_MINUS:
! 1645: if (*ptr2++ != *ptr1++)
! 1646: return 0;
! 1647: if (MINBPC(enc) > 1) {
! 1648: if (*ptr2++ != *ptr1++)
! 1649: return 0;
! 1650: if (MINBPC(enc) > 2) {
! 1651: if (*ptr2++ != *ptr1++)
! 1652: return 0;
! 1653: if (MINBPC(enc) > 3) {
! 1654: if (*ptr2++ != *ptr1++)
! 1655: return 0;
! 1656: }
! 1657: }
! 1658: }
! 1659: break;
! 1660: default:
! 1661: if (MINBPC(enc) == 1 && *ptr1 == *ptr2)
! 1662: return 1;
! 1663: switch (BYTE_TYPE(enc, ptr2)) {
! 1664: case BT_LEAD2:
! 1665: case BT_LEAD3:
! 1666: case BT_LEAD4:
! 1667: case BT_NONASCII:
! 1668: case BT_NMSTRT:
! 1669: #ifdef XML_NS
! 1670: case BT_COLON:
! 1671: #endif
! 1672: case BT_HEX:
! 1673: case BT_DIGIT:
! 1674: case BT_NAME:
! 1675: case BT_MINUS:
! 1676: return 0;
! 1677: default:
! 1678: return 1;
! 1679: }
! 1680: }
! 1681: }
! 1682: /* not reached */
! 1683: }
! 1684:
! 1685: static int PTRCALL
! 1686: PREFIX(nameMatchesAscii)(const ENCODING *enc, const char *ptr1,
! 1687: const char *end1, const char *ptr2)
! 1688: {
! 1689: for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) {
! 1690: if (ptr1 == end1)
! 1691: return 0;
! 1692: if (!CHAR_MATCHES(enc, ptr1, *ptr2))
! 1693: return 0;
! 1694: }
! 1695: return ptr1 == end1;
! 1696: }
! 1697:
! 1698: static int PTRFASTCALL
! 1699: PREFIX(nameLength)(const ENCODING *enc, const char *ptr)
! 1700: {
! 1701: const char *start = ptr;
! 1702: for (;;) {
! 1703: switch (BYTE_TYPE(enc, ptr)) {
! 1704: #define LEAD_CASE(n) \
! 1705: case BT_LEAD ## n: ptr += n; break;
! 1706: LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
! 1707: #undef LEAD_CASE
! 1708: case BT_NONASCII:
! 1709: case BT_NMSTRT:
! 1710: #ifdef XML_NS
! 1711: case BT_COLON:
! 1712: #endif
! 1713: case BT_HEX:
! 1714: case BT_DIGIT:
! 1715: case BT_NAME:
! 1716: case BT_MINUS:
! 1717: ptr += MINBPC(enc);
! 1718: break;
! 1719: default:
! 1720: return (int)(ptr - start);
! 1721: }
! 1722: }
! 1723: }
! 1724:
! 1725: static const char * PTRFASTCALL
! 1726: PREFIX(skipS)(const ENCODING *enc, const char *ptr)
! 1727: {
! 1728: for (;;) {
! 1729: switch (BYTE_TYPE(enc, ptr)) {
! 1730: case BT_LF:
! 1731: case BT_CR:
! 1732: case BT_S:
! 1733: ptr += MINBPC(enc);
! 1734: break;
! 1735: default:
! 1736: return ptr;
! 1737: }
! 1738: }
! 1739: }
! 1740:
! 1741: static void PTRCALL
! 1742: PREFIX(updatePosition)(const ENCODING *enc,
! 1743: const char *ptr,
! 1744: const char *end,
! 1745: POSITION *pos)
! 1746: {
! 1747: while (ptr != end) {
! 1748: switch (BYTE_TYPE(enc, ptr)) {
! 1749: #define LEAD_CASE(n) \
! 1750: case BT_LEAD ## n: \
! 1751: ptr += n; \
! 1752: break;
! 1753: LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
! 1754: #undef LEAD_CASE
! 1755: case BT_LF:
! 1756: pos->columnNumber = (XML_Size)-1;
! 1757: pos->lineNumber++;
! 1758: ptr += MINBPC(enc);
! 1759: break;
! 1760: case BT_CR:
! 1761: pos->lineNumber++;
! 1762: ptr += MINBPC(enc);
! 1763: if (ptr != end && BYTE_TYPE(enc, ptr) == BT_LF)
! 1764: ptr += MINBPC(enc);
! 1765: pos->columnNumber = (XML_Size)-1;
! 1766: break;
! 1767: default:
! 1768: ptr += MINBPC(enc);
! 1769: break;
! 1770: }
! 1771: pos->columnNumber++;
! 1772: }
! 1773: }
! 1774:
! 1775: #undef DO_LEAD_CASE
! 1776: #undef MULTIBYTE_CASES
! 1777: #undef INVALID_CASES
! 1778: #undef CHECK_NAME_CASE
! 1779: #undef CHECK_NAME_CASES
! 1780: #undef CHECK_NMSTRT_CASE
! 1781: #undef CHECK_NMSTRT_CASES
! 1782:
! 1783: #endif /* XML_TOK_IMPL_C */
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>