Annotation of embedaddon/libxml2/testchar.c, revision 1.1
1.1 ! misho 1: /**
! 2: * Test the UTF-8 decoding routines
! 3: *
! 4: * author: Daniel Veillard
! 5: * copy: see Copyright for the status of this software.
! 6: */
! 7:
! 8: #include <stdio.h>
! 9: #include <string.h>
! 10: #include <libxml/parser.h>
! 11: #include <libxml/parserInternals.h>
! 12:
! 13: int lastError;
! 14:
! 15: static void errorHandler(void *unused, xmlErrorPtr err) {
! 16: if ((unused == NULL) && (err != NULL) && (lastError == 0)) {
! 17: lastError = err->code;
! 18: }
! 19: }
! 20:
! 21: char document1[100] = "<doc>XXXX</doc>";
! 22: char document2[100] = "<doc foo='XXXX'/>";
! 23:
! 24: static void testDocumentRangeByte1(xmlParserCtxtPtr ctxt, char *document,
! 25: int len, char *data, int forbid1, int forbid2) {
! 26: int i;
! 27: xmlDocPtr res;
! 28:
! 29: for (i = 0;i <= 0xFF;i++) {
! 30: lastError = 0;
! 31: xmlCtxtReset(ctxt);
! 32:
! 33: data[0] = i;
! 34:
! 35: res = xmlReadMemory(document, len, "test", NULL, 0);
! 36:
! 37: if ((i == forbid1) || (i == forbid2)) {
! 38: if ((lastError == 0) || (res != NULL))
! 39: fprintf(stderr,
! 40: "Failed to detect invalid char for Byte 0x%02X: %c\n",
! 41: i, i);
! 42: }
! 43:
! 44: else if ((i == '<') || (i == '&')) {
! 45: if ((lastError == 0) || (res != NULL))
! 46: fprintf(stderr,
! 47: "Failed to detect illegal char %c for Byte 0x%02X\n", i, i);
! 48: }
! 49: else if (((i < 0x20) || (i >= 0x80)) &&
! 50: (i != 0x9) && (i != 0xA) && (i != 0xD)) {
! 51: if ((lastError != XML_ERR_INVALID_CHAR) && (res != NULL))
! 52: fprintf(stderr,
! 53: "Failed to detect invalid char for Byte 0x%02X\n", i);
! 54: }
! 55: else if (res == NULL) {
! 56: fprintf(stderr,
! 57: "Failed to parse valid char for Byte 0x%02X : %c\n", i, i);
! 58: }
! 59: if (res != NULL)
! 60: xmlFreeDoc(res);
! 61: }
! 62: }
! 63:
! 64: static void testDocumentRangeByte2(xmlParserCtxtPtr ctxt, char *document,
! 65: int len, char *data) {
! 66: int i, j;
! 67: xmlDocPtr res;
! 68:
! 69: for (i = 0x80;i <= 0xFF;i++) {
! 70: for (j = 0;j <= 0xFF;j++) {
! 71: lastError = 0;
! 72: xmlCtxtReset(ctxt);
! 73:
! 74: data[0] = i;
! 75: data[1] = j;
! 76:
! 77: res = xmlReadMemory(document, len, "test", NULL, 0);
! 78:
! 79: /* if first bit of first char is set, then second bit must too */
! 80: if ((i & 0x80) && ((i & 0x40) == 0)) {
! 81: if ((lastError == 0) || (res != NULL))
! 82: fprintf(stderr,
! 83: "Failed to detect invalid char for Bytes 0x%02X 0x%02X\n",
! 84: i, j);
! 85: }
! 86:
! 87: /*
! 88: * if first bit of first char is set, then second char first
! 89: * bits must be 10
! 90: */
! 91: else if ((i & 0x80) && ((j & 0xC0) != 0x80)) {
! 92: if ((lastError == 0) || (res != NULL))
! 93: fprintf(stderr,
! 94: "Failed to detect invalid char for Bytes 0x%02X 0x%02X\n",
! 95: i, j);
! 96: }
! 97:
! 98: /*
! 99: * if using a 2 byte encoding then the value must be greater
! 100: * than 0x80, i.e. one of bits 5 to 1 of i must be set
! 101: */
! 102: else if ((i & 0x80) && ((i & 0x1E) == 0)) {
! 103: if ((lastError == 0) || (res != NULL))
! 104: fprintf(stderr,
! 105: "Failed to detect invalid char for Bytes 0x%02X 0x%02X\n",
! 106: i, j);
! 107: }
! 108:
! 109: /*
! 110: * if third bit of first char is set, then the sequence would need
! 111: * at least 3 bytes, but we give only 2 !
! 112: */
! 113: else if ((i & 0xE0) == 0xE0) {
! 114: if ((lastError == 0) || (res != NULL))
! 115: fprintf(stderr,
! 116: "Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x00\n",
! 117: i, j);
! 118: }
! 119:
! 120: /*
! 121: * We should see no error in remaning cases
! 122: */
! 123: else if ((lastError != 0) || (res == NULL)) {
! 124: fprintf(stderr,
! 125: "Failed to parse document for Bytes 0x%02X 0x%02X\n", i, j);
! 126: }
! 127: if (res != NULL)
! 128: xmlFreeDoc(res);
! 129: }
! 130: }
! 131: }
! 132:
! 133: /**
! 134: * testDocumentRanges:
! 135: *
! 136: * Test the correct UTF8 character parsing in context of XML documents
! 137: * Those are in-context injection tests checking the parser behaviour on
! 138: * edge case values at different point in content, beginning and end of
! 139: * CDATA in text or in attribute values.
! 140: */
! 141:
! 142: static void testDocumentRanges(void) {
! 143: xmlParserCtxtPtr ctxt;
! 144: char *data;
! 145:
! 146: /*
! 147: * Set up a parsing context using the first document as
! 148: * the current input source.
! 149: */
! 150: ctxt = xmlNewParserCtxt();
! 151: if (ctxt == NULL) {
! 152: fprintf(stderr, "Failed to allocate parser context\n");
! 153: return;
! 154: }
! 155:
! 156: printf("testing 1 byte char in document: 1");
! 157: fflush(stdout);
! 158: data = &document1[5];
! 159: data[0] = ' ';
! 160: data[1] = ' ';
! 161: data[2] = ' ';
! 162: data[3] = ' ';
! 163: /* test 1 byte injection at beginning of area */
! 164: testDocumentRangeByte1(ctxt, &document1[0], strlen(document1),
! 165: data, -1, -1);
! 166: printf(" 2");
! 167: fflush(stdout);
! 168: data[0] = ' ';
! 169: data[1] = ' ';
! 170: data[2] = ' ';
! 171: data[3] = ' ';
! 172: /* test 1 byte injection at end of area */
! 173: testDocumentRangeByte1(ctxt, &document1[0], strlen(document1),
! 174: data + 3, -1, -1);
! 175:
! 176: printf(" 3");
! 177: fflush(stdout);
! 178: data = &document2[10];
! 179: data[0] = ' ';
! 180: data[1] = ' ';
! 181: data[2] = ' ';
! 182: data[3] = ' ';
! 183: /* test 1 byte injection at beginning of area */
! 184: testDocumentRangeByte1(ctxt, &document2[0], strlen(document2),
! 185: data, '\'', -1);
! 186: printf(" 4");
! 187: fflush(stdout);
! 188: data[0] = ' ';
! 189: data[1] = ' ';
! 190: data[2] = ' ';
! 191: data[3] = ' ';
! 192: /* test 1 byte injection at end of area */
! 193: testDocumentRangeByte1(ctxt, &document2[0], strlen(document2),
! 194: data + 3, '\'', -1);
! 195: printf(" done\n");
! 196:
! 197: printf("testing 2 byte char in document: 1");
! 198: fflush(stdout);
! 199: data = &document1[5];
! 200: data[0] = ' ';
! 201: data[1] = ' ';
! 202: data[2] = ' ';
! 203: data[3] = ' ';
! 204: /* test 2 byte injection at beginning of area */
! 205: testDocumentRangeByte2(ctxt, &document1[0], strlen(document1),
! 206: data);
! 207: printf(" 2");
! 208: fflush(stdout);
! 209: data[0] = ' ';
! 210: data[1] = ' ';
! 211: data[2] = ' ';
! 212: data[3] = ' ';
! 213: /* test 2 byte injection at end of area */
! 214: testDocumentRangeByte2(ctxt, &document1[0], strlen(document1),
! 215: data + 2);
! 216:
! 217: printf(" 3");
! 218: fflush(stdout);
! 219: data = &document2[10];
! 220: data[0] = ' ';
! 221: data[1] = ' ';
! 222: data[2] = ' ';
! 223: data[3] = ' ';
! 224: /* test 2 byte injection at beginning of area */
! 225: testDocumentRangeByte2(ctxt, &document2[0], strlen(document2),
! 226: data);
! 227: printf(" 4");
! 228: fflush(stdout);
! 229: data[0] = ' ';
! 230: data[1] = ' ';
! 231: data[2] = ' ';
! 232: data[3] = ' ';
! 233: /* test 2 byte injection at end of area */
! 234: testDocumentRangeByte2(ctxt, &document2[0], strlen(document2),
! 235: data + 2);
! 236: printf(" done\n");
! 237:
! 238: xmlFreeParserCtxt(ctxt);
! 239: }
! 240:
! 241: static void testCharRangeByte1(xmlParserCtxtPtr ctxt, char *data) {
! 242: int i = 0;
! 243: int len, c;
! 244:
! 245: data[1] = 0;
! 246: data[2] = 0;
! 247: data[3] = 0;
! 248: for (i = 0;i <= 0xFF;i++) {
! 249: data[0] = i;
! 250: ctxt->charset = XML_CHAR_ENCODING_UTF8;
! 251:
! 252: lastError = 0;
! 253: c = xmlCurrentChar(ctxt, &len);
! 254: if ((i == 0) || (i >= 0x80)) {
! 255: /* we must see an error there */
! 256: if (lastError != XML_ERR_INVALID_CHAR)
! 257: fprintf(stderr,
! 258: "Failed to detect invalid char for Byte 0x%02X\n", i);
! 259: } else if (i == 0xD) {
! 260: if ((c != 0xA) || (len != 1))
! 261: fprintf(stderr, "Failed to convert char for Byte 0x%02X\n", i);
! 262: } else if ((c != i) || (len != 1)) {
! 263: fprintf(stderr, "Failed to parse char for Byte 0x%02X\n", i);
! 264: }
! 265: }
! 266: }
! 267:
! 268: static void testCharRangeByte2(xmlParserCtxtPtr ctxt, char *data) {
! 269: int i, j;
! 270: int len, c;
! 271:
! 272: data[2] = 0;
! 273: data[3] = 0;
! 274: for (i = 0x80;i <= 0xFF;i++) {
! 275: for (j = 0;j <= 0xFF;j++) {
! 276: data[0] = i;
! 277: data[1] = j;
! 278: ctxt->charset = XML_CHAR_ENCODING_UTF8;
! 279:
! 280: lastError = 0;
! 281: c = xmlCurrentChar(ctxt, &len);
! 282:
! 283: /* if first bit of first char is set, then second bit must too */
! 284: if ((i & 0x80) && ((i & 0x40) == 0)) {
! 285: if (lastError != XML_ERR_INVALID_CHAR)
! 286: fprintf(stderr,
! 287: "Failed to detect invalid char for Bytes 0x%02X 0x%02X\n",
! 288: i, j);
! 289: }
! 290:
! 291: /*
! 292: * if first bit of first char is set, then second char first
! 293: * bits must be 10
! 294: */
! 295: else if ((i & 0x80) && ((j & 0xC0) != 0x80)) {
! 296: if (lastError != XML_ERR_INVALID_CHAR)
! 297: fprintf(stderr,
! 298: "Failed to detect invalid char for Bytes 0x%02X 0x%02X: %d\n",
! 299: i, j, c);
! 300: }
! 301:
! 302: /*
! 303: * if using a 2 byte encoding then the value must be greater
! 304: * than 0x80, i.e. one of bits 5 to 1 of i must be set
! 305: */
! 306: else if ((i & 0x80) && ((i & 0x1E) == 0)) {
! 307: if (lastError != XML_ERR_INVALID_CHAR)
! 308: fprintf(stderr,
! 309: "Failed to detect invalid char for Bytes 0x%02X 0x%02X: %d\n",
! 310: i, j, c);
! 311: }
! 312:
! 313: /*
! 314: * if third bit of first char is set, then the sequence would need
! 315: * at least 3 bytes, but we give only 2 !
! 316: */
! 317: else if ((i & 0xE0) == 0xE0) {
! 318: if (lastError != XML_ERR_INVALID_CHAR)
! 319: fprintf(stderr,
! 320: "Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x00\n",
! 321: i, j);
! 322: }
! 323:
! 324: /*
! 325: * We should see no error in remaning cases
! 326: */
! 327: else if ((lastError != 0) || (len != 2)) {
! 328: fprintf(stderr,
! 329: "Failed to parse char for Bytes 0x%02X 0x%02X\n", i, j);
! 330: }
! 331:
! 332: /*
! 333: * Finally check the value is right
! 334: */
! 335: else if (c != (j & 0x3F) + ((i & 0x1F) << 6)) {
! 336: fprintf(stderr,
! 337: "Failed to parse char for Bytes 0x%02X 0x%02X: expect %d got %d\n",
! 338: i, j, ((j & 0x3F) + ((i & 0x1F) << 6)), c);
! 339: }
! 340: }
! 341: }
! 342: }
! 343:
! 344: static void testCharRangeByte3(xmlParserCtxtPtr ctxt, char *data) {
! 345: int i, j, k, K;
! 346: int len, c;
! 347: unsigned char lows[6] = {0, 0x80, 0x81, 0xC1, 0xFF, 0xBF};
! 348: int value;
! 349:
! 350: data[3] = 0;
! 351: for (i = 0xE0;i <= 0xFF;i++) {
! 352: for (j = 0;j <= 0xFF;j++) {
! 353: for (k = 0;k < 6;k++) {
! 354: data[0] = i;
! 355: data[1] = j;
! 356: K = lows[k];
! 357: data[2] = (char) K;
! 358: value = (K & 0x3F) + ((j & 0x3F) << 6) + ((i & 0xF) << 12);
! 359: ctxt->charset = XML_CHAR_ENCODING_UTF8;
! 360:
! 361: lastError = 0;
! 362: c = xmlCurrentChar(ctxt, &len);
! 363:
! 364: /*
! 365: * if fourth bit of first char is set, then the sequence would need
! 366: * at least 4 bytes, but we give only 3 !
! 367: */
! 368: if ((i & 0xF0) == 0xF0) {
! 369: if (lastError != XML_ERR_INVALID_CHAR)
! 370: fprintf(stderr,
! 371: "Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x%02X 0x%02X\n",
! 372: i, j, K, data[3]);
! 373: }
! 374:
! 375: /*
! 376: * The second and the third bytes must start with 10
! 377: */
! 378: else if (((j & 0xC0) != 0x80) || ((K & 0xC0) != 0x80)) {
! 379: if (lastError != XML_ERR_INVALID_CHAR)
! 380: fprintf(stderr,
! 381: "Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x%02X\n",
! 382: i, j, K);
! 383: }
! 384:
! 385: /*
! 386: * if using a 3 byte encoding then the value must be greater
! 387: * than 0x800, i.e. one of bits 4 to 0 of i must be set or
! 388: * the 6th byte of data[1] must be set
! 389: */
! 390: else if (((i & 0xF) == 0) && ((j & 0x20) == 0)) {
! 391: if (lastError != XML_ERR_INVALID_CHAR)
! 392: fprintf(stderr,
! 393: "Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x%02X\n",
! 394: i, j, K);
! 395: }
! 396:
! 397: /*
! 398: * There are values in that range that are not allowed in XML-1.0
! 399: */
! 400: else if (((value > 0xD7FF) && (value <0xE000)) ||
! 401: ((value > 0xFFFD) && (value <0x10000))) {
! 402: if (lastError != XML_ERR_INVALID_CHAR)
! 403: fprintf(stderr,
! 404: "Failed to detect invalid char 0x%04X for Bytes 0x%02X 0x%02X 0x%02X\n",
! 405: value, i, j, K);
! 406: }
! 407:
! 408: /*
! 409: * We should see no error in remaining cases
! 410: */
! 411: else if ((lastError != 0) || (len != 3)) {
! 412: fprintf(stderr,
! 413: "Failed to parse char for Bytes 0x%02X 0x%02X 0x%02X\n",
! 414: i, j, K);
! 415: }
! 416:
! 417: /*
! 418: * Finally check the value is right
! 419: */
! 420: else if (c != value) {
! 421: fprintf(stderr,
! 422: "Failed to parse char for Bytes 0x%02X 0x%02X 0x%02X: expect %d got %d\n",
! 423: i, j, data[2], value, c);
! 424: }
! 425: }
! 426: }
! 427: }
! 428: }
! 429:
! 430: static void testCharRangeByte4(xmlParserCtxtPtr ctxt, char *data) {
! 431: int i, j, k, K, l, L;
! 432: int len, c;
! 433: unsigned char lows[6] = {0, 0x80, 0x81, 0xC1, 0xFF, 0xBF};
! 434: int value;
! 435:
! 436: data[4] = 0;
! 437: for (i = 0xF0;i <= 0xFF;i++) {
! 438: for (j = 0;j <= 0xFF;j++) {
! 439: for (k = 0;k < 6;k++) {
! 440: for (l = 0;l < 6;l++) {
! 441: data[0] = i;
! 442: data[1] = j;
! 443: K = lows[k];
! 444: data[2] = (char) K;
! 445: L = lows[l];
! 446: data[3] = (char) L;
! 447: value = (L & 0x3F) + ((K & 0x3F) << 6) + ((j & 0x3F) << 12) +
! 448: ((i & 0x7) << 18);
! 449: ctxt->charset = XML_CHAR_ENCODING_UTF8;
! 450:
! 451: lastError = 0;
! 452: c = xmlCurrentChar(ctxt, &len);
! 453:
! 454: /*
! 455: * if fifth bit of first char is set, then the sequence would need
! 456: * at least 5 bytes, but we give only 4 !
! 457: */
! 458: if ((i & 0xF8) == 0xF8) {
! 459: if (lastError != XML_ERR_INVALID_CHAR)
! 460: fprintf(stderr,
! 461: "Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x%02X 0x%02X\n",
! 462: i, j, K, data[3]);
! 463: }
! 464:
! 465: /*
! 466: * The second, third and fourth bytes must start with 10
! 467: */
! 468: else if (((j & 0xC0) != 0x80) || ((K & 0xC0) != 0x80) ||
! 469: ((L & 0xC0) != 0x80)) {
! 470: if (lastError != XML_ERR_INVALID_CHAR)
! 471: fprintf(stderr,
! 472: "Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x%02X 0x%02X\n",
! 473: i, j, K, L);
! 474: }
! 475:
! 476: /*
! 477: * if using a 3 byte encoding then the value must be greater
! 478: * than 0x10000, i.e. one of bits 3 to 0 of i must be set or
! 479: * the 6 or 5th byte of j must be set
! 480: */
! 481: else if (((i & 0x7) == 0) && ((j & 0x30) == 0)) {
! 482: if (lastError != XML_ERR_INVALID_CHAR)
! 483: fprintf(stderr,
! 484: "Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x%02X 0x%02X\n",
! 485: i, j, K, L);
! 486: }
! 487:
! 488: /*
! 489: * There are values in that range that are not allowed in XML-1.0
! 490: */
! 491: else if (((value > 0xD7FF) && (value <0xE000)) ||
! 492: ((value > 0xFFFD) && (value <0x10000)) ||
! 493: (value > 0x10FFFF)) {
! 494: if (lastError != XML_ERR_INVALID_CHAR)
! 495: fprintf(stderr,
! 496: "Failed to detect invalid char 0x%04X for Bytes 0x%02X 0x%02X 0x%02X 0x%02X\n",
! 497: value, i, j, K, L);
! 498: }
! 499:
! 500: /*
! 501: * We should see no error in remaining cases
! 502: */
! 503: else if ((lastError != 0) || (len != 4)) {
! 504: fprintf(stderr,
! 505: "Failed to parse char for Bytes 0x%02X 0x%02X 0x%02X\n",
! 506: i, j, K);
! 507: }
! 508:
! 509: /*
! 510: * Finally check the value is right
! 511: */
! 512: else if (c != value) {
! 513: fprintf(stderr,
! 514: "Failed to parse char for Bytes 0x%02X 0x%02X 0x%02X: expect %d got %d\n",
! 515: i, j, data[2], value, c);
! 516: }
! 517: }
! 518: }
! 519: }
! 520: }
! 521: }
! 522:
! 523: /**
! 524: * testCharRanges:
! 525: *
! 526: * Test the correct UTF8 character parsing in isolation i.e.
! 527: * not when parsing a full document, this is less expensive and we can
! 528: * cover the full range of UTF-8 chars accepted by XML-1.0
! 529: */
! 530:
! 531: static void testCharRanges(void) {
! 532: char data[5];
! 533: xmlParserCtxtPtr ctxt;
! 534: xmlParserInputBufferPtr buf;
! 535: xmlParserInputPtr input;
! 536:
! 537: memset(data, 0, 5);
! 538:
! 539: /*
! 540: * Set up a parsing context using the above data buffer as
! 541: * the current input source.
! 542: */
! 543: ctxt = xmlNewParserCtxt();
! 544: if (ctxt == NULL) {
! 545: fprintf(stderr, "Failed to allocate parser context\n");
! 546: return;
! 547: }
! 548: buf = xmlParserInputBufferCreateStatic(data, sizeof(data),
! 549: XML_CHAR_ENCODING_NONE);
! 550: if (buf == NULL) {
! 551: fprintf(stderr, "Failed to allocate input buffer\n");
! 552: goto error;
! 553: }
! 554: input = xmlNewInputStream(ctxt);
! 555: if (input == NULL) {
! 556: xmlFreeParserInputBuffer(buf);
! 557: goto error;
! 558: }
! 559: input->filename = NULL;
! 560: input->buf = buf;
! 561: input->base = input->buf->buffer->content;
! 562: input->cur = input->buf->buffer->content;
! 563: input->end = &input->buf->buffer->content[4];
! 564: inputPush(ctxt, input);
! 565:
! 566: printf("testing char range: 1");
! 567: fflush(stdout);
! 568: testCharRangeByte1(ctxt, data);
! 569: printf(" 2");
! 570: fflush(stdout);
! 571: testCharRangeByte2(ctxt, data);
! 572: printf(" 3");
! 573: fflush(stdout);
! 574: testCharRangeByte3(ctxt, data);
! 575: printf(" 4");
! 576: fflush(stdout);
! 577: testCharRangeByte4(ctxt, data);
! 578: printf(" done\n");
! 579: fflush(stdout);
! 580:
! 581: error:
! 582: xmlFreeParserCtxt(ctxt);
! 583: }
! 584:
! 585: int main(void) {
! 586:
! 587: /*
! 588: * this initialize the library and check potential ABI mismatches
! 589: * between the version it was compiled for and the actual shared
! 590: * library used.
! 591: */
! 592: LIBXML_TEST_VERSION
! 593:
! 594: /*
! 595: * Catch errors separately
! 596: */
! 597:
! 598: xmlSetStructuredErrorFunc(NULL, errorHandler);
! 599:
! 600: /*
! 601: * Run the tests
! 602: */
! 603: testCharRanges();
! 604: testDocumentRanges();
! 605:
! 606: /*
! 607: * Cleanup function for the XML library.
! 608: */
! 609: xmlCleanupParser();
! 610: /*
! 611: * this is to debug memory for regression tests
! 612: */
! 613: xmlMemoryDump();
! 614: return(0);
! 615: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>