Annotation of embedaddon/libxml2/testchar.c, revision 1.1.1.2
1.1 misho 1: /**
2: * Test the UTF-8 decoding routines
3: *
4: * author: Daniel Veillard
5: * copy: see Copyright for the status of this software.
6: */
7:
8: #include <stdio.h>
9: #include <string.h>
10: #include <libxml/parser.h>
11: #include <libxml/parserInternals.h>
12:
1.1.1.2 ! misho 13: #include "buf.h"
! 14:
1.1 misho 15: int lastError;
16:
17: static void errorHandler(void *unused, xmlErrorPtr err) {
18: if ((unused == NULL) && (err != NULL) && (lastError == 0)) {
19: lastError = err->code;
20: }
21: }
22:
23: char document1[100] = "<doc>XXXX</doc>";
24: char document2[100] = "<doc foo='XXXX'/>";
25:
26: static void testDocumentRangeByte1(xmlParserCtxtPtr ctxt, char *document,
27: int len, char *data, int forbid1, int forbid2) {
28: int i;
29: xmlDocPtr res;
30:
31: for (i = 0;i <= 0xFF;i++) {
32: lastError = 0;
33: xmlCtxtReset(ctxt);
34:
35: data[0] = i;
36:
37: res = xmlReadMemory(document, len, "test", NULL, 0);
38:
39: if ((i == forbid1) || (i == forbid2)) {
40: if ((lastError == 0) || (res != NULL))
41: fprintf(stderr,
42: "Failed to detect invalid char for Byte 0x%02X: %c\n",
43: i, i);
44: }
45:
46: else if ((i == '<') || (i == '&')) {
47: if ((lastError == 0) || (res != NULL))
48: fprintf(stderr,
49: "Failed to detect illegal char %c for Byte 0x%02X\n", i, i);
50: }
51: else if (((i < 0x20) || (i >= 0x80)) &&
52: (i != 0x9) && (i != 0xA) && (i != 0xD)) {
53: if ((lastError != XML_ERR_INVALID_CHAR) && (res != NULL))
54: fprintf(stderr,
55: "Failed to detect invalid char for Byte 0x%02X\n", i);
56: }
57: else if (res == NULL) {
58: fprintf(stderr,
59: "Failed to parse valid char for Byte 0x%02X : %c\n", i, i);
60: }
61: if (res != NULL)
62: xmlFreeDoc(res);
63: }
64: }
65:
66: static void testDocumentRangeByte2(xmlParserCtxtPtr ctxt, char *document,
67: int len, char *data) {
68: int i, j;
69: xmlDocPtr res;
70:
71: for (i = 0x80;i <= 0xFF;i++) {
72: for (j = 0;j <= 0xFF;j++) {
73: lastError = 0;
74: xmlCtxtReset(ctxt);
75:
76: data[0] = i;
77: data[1] = j;
78:
79: res = xmlReadMemory(document, len, "test", NULL, 0);
80:
81: /* if first bit of first char is set, then second bit must too */
82: if ((i & 0x80) && ((i & 0x40) == 0)) {
83: if ((lastError == 0) || (res != NULL))
84: fprintf(stderr,
85: "Failed to detect invalid char for Bytes 0x%02X 0x%02X\n",
86: i, j);
87: }
88:
89: /*
90: * if first bit of first char is set, then second char first
91: * bits must be 10
92: */
93: else if ((i & 0x80) && ((j & 0xC0) != 0x80)) {
94: if ((lastError == 0) || (res != NULL))
95: fprintf(stderr,
96: "Failed to detect invalid char for Bytes 0x%02X 0x%02X\n",
97: i, j);
98: }
99:
100: /*
101: * if using a 2 byte encoding then the value must be greater
102: * than 0x80, i.e. one of bits 5 to 1 of i must be set
103: */
104: else if ((i & 0x80) && ((i & 0x1E) == 0)) {
105: if ((lastError == 0) || (res != NULL))
106: fprintf(stderr,
107: "Failed to detect invalid char for Bytes 0x%02X 0x%02X\n",
108: i, j);
109: }
110:
111: /*
112: * if third bit of first char is set, then the sequence would need
113: * at least 3 bytes, but we give only 2 !
114: */
115: else if ((i & 0xE0) == 0xE0) {
116: if ((lastError == 0) || (res != NULL))
117: fprintf(stderr,
118: "Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x00\n",
119: i, j);
120: }
121:
122: /*
123: * We should see no error in remaning cases
124: */
125: else if ((lastError != 0) || (res == NULL)) {
1.1.1.2 ! misho 126: fprintf(stderr,
1.1 misho 127: "Failed to parse document for Bytes 0x%02X 0x%02X\n", i, j);
128: }
129: if (res != NULL)
130: xmlFreeDoc(res);
131: }
132: }
133: }
134:
135: /**
136: * testDocumentRanges:
137: *
138: * Test the correct UTF8 character parsing in context of XML documents
139: * Those are in-context injection tests checking the parser behaviour on
140: * edge case values at different point in content, beginning and end of
141: * CDATA in text or in attribute values.
142: */
143:
144: static void testDocumentRanges(void) {
145: xmlParserCtxtPtr ctxt;
146: char *data;
147:
148: /*
149: * Set up a parsing context using the first document as
150: * the current input source.
151: */
152: ctxt = xmlNewParserCtxt();
153: if (ctxt == NULL) {
154: fprintf(stderr, "Failed to allocate parser context\n");
155: return;
156: }
157:
158: printf("testing 1 byte char in document: 1");
159: fflush(stdout);
160: data = &document1[5];
161: data[0] = ' ';
162: data[1] = ' ';
163: data[2] = ' ';
164: data[3] = ' ';
165: /* test 1 byte injection at beginning of area */
166: testDocumentRangeByte1(ctxt, &document1[0], strlen(document1),
167: data, -1, -1);
168: printf(" 2");
169: fflush(stdout);
170: data[0] = ' ';
171: data[1] = ' ';
172: data[2] = ' ';
173: data[3] = ' ';
174: /* test 1 byte injection at end of area */
175: testDocumentRangeByte1(ctxt, &document1[0], strlen(document1),
176: data + 3, -1, -1);
177:
178: printf(" 3");
179: fflush(stdout);
180: data = &document2[10];
181: data[0] = ' ';
182: data[1] = ' ';
183: data[2] = ' ';
184: data[3] = ' ';
185: /* test 1 byte injection at beginning of area */
186: testDocumentRangeByte1(ctxt, &document2[0], strlen(document2),
187: data, '\'', -1);
188: printf(" 4");
189: fflush(stdout);
190: data[0] = ' ';
191: data[1] = ' ';
192: data[2] = ' ';
193: data[3] = ' ';
194: /* test 1 byte injection at end of area */
195: testDocumentRangeByte1(ctxt, &document2[0], strlen(document2),
196: data + 3, '\'', -1);
197: printf(" done\n");
198:
199: printf("testing 2 byte char in document: 1");
200: fflush(stdout);
201: data = &document1[5];
202: data[0] = ' ';
203: data[1] = ' ';
204: data[2] = ' ';
205: data[3] = ' ';
206: /* test 2 byte injection at beginning of area */
207: testDocumentRangeByte2(ctxt, &document1[0], strlen(document1),
208: data);
209: printf(" 2");
210: fflush(stdout);
211: data[0] = ' ';
212: data[1] = ' ';
213: data[2] = ' ';
214: data[3] = ' ';
215: /* test 2 byte injection at end of area */
216: testDocumentRangeByte2(ctxt, &document1[0], strlen(document1),
217: data + 2);
218:
219: printf(" 3");
220: fflush(stdout);
221: data = &document2[10];
222: data[0] = ' ';
223: data[1] = ' ';
224: data[2] = ' ';
225: data[3] = ' ';
226: /* test 2 byte injection at beginning of area */
227: testDocumentRangeByte2(ctxt, &document2[0], strlen(document2),
228: data);
229: printf(" 4");
230: fflush(stdout);
231: data[0] = ' ';
232: data[1] = ' ';
233: data[2] = ' ';
234: data[3] = ' ';
235: /* test 2 byte injection at end of area */
236: testDocumentRangeByte2(ctxt, &document2[0], strlen(document2),
237: data + 2);
238: printf(" done\n");
239:
240: xmlFreeParserCtxt(ctxt);
241: }
242:
243: static void testCharRangeByte1(xmlParserCtxtPtr ctxt, char *data) {
244: int i = 0;
245: int len, c;
246:
247: data[1] = 0;
248: data[2] = 0;
249: data[3] = 0;
250: for (i = 0;i <= 0xFF;i++) {
251: data[0] = i;
252: ctxt->charset = XML_CHAR_ENCODING_UTF8;
253:
254: lastError = 0;
255: c = xmlCurrentChar(ctxt, &len);
256: if ((i == 0) || (i >= 0x80)) {
257: /* we must see an error there */
258: if (lastError != XML_ERR_INVALID_CHAR)
259: fprintf(stderr,
260: "Failed to detect invalid char for Byte 0x%02X\n", i);
261: } else if (i == 0xD) {
262: if ((c != 0xA) || (len != 1))
263: fprintf(stderr, "Failed to convert char for Byte 0x%02X\n", i);
264: } else if ((c != i) || (len != 1)) {
265: fprintf(stderr, "Failed to parse char for Byte 0x%02X\n", i);
266: }
267: }
268: }
269:
270: static void testCharRangeByte2(xmlParserCtxtPtr ctxt, char *data) {
271: int i, j;
272: int len, c;
273:
274: data[2] = 0;
275: data[3] = 0;
276: for (i = 0x80;i <= 0xFF;i++) {
277: for (j = 0;j <= 0xFF;j++) {
278: data[0] = i;
279: data[1] = j;
280: ctxt->charset = XML_CHAR_ENCODING_UTF8;
281:
282: lastError = 0;
283: c = xmlCurrentChar(ctxt, &len);
284:
285: /* if first bit of first char is set, then second bit must too */
286: if ((i & 0x80) && ((i & 0x40) == 0)) {
287: if (lastError != XML_ERR_INVALID_CHAR)
288: fprintf(stderr,
289: "Failed to detect invalid char for Bytes 0x%02X 0x%02X\n",
290: i, j);
291: }
292:
293: /*
294: * if first bit of first char is set, then second char first
295: * bits must be 10
296: */
297: else if ((i & 0x80) && ((j & 0xC0) != 0x80)) {
298: if (lastError != XML_ERR_INVALID_CHAR)
299: fprintf(stderr,
300: "Failed to detect invalid char for Bytes 0x%02X 0x%02X: %d\n",
301: i, j, c);
302: }
303:
304: /*
305: * if using a 2 byte encoding then the value must be greater
306: * than 0x80, i.e. one of bits 5 to 1 of i must be set
307: */
308: else if ((i & 0x80) && ((i & 0x1E) == 0)) {
309: if (lastError != XML_ERR_INVALID_CHAR)
310: fprintf(stderr,
311: "Failed to detect invalid char for Bytes 0x%02X 0x%02X: %d\n",
312: i, j, c);
313: }
314:
315: /*
316: * if third bit of first char is set, then the sequence would need
317: * at least 3 bytes, but we give only 2 !
318: */
319: else if ((i & 0xE0) == 0xE0) {
320: if (lastError != XML_ERR_INVALID_CHAR)
321: fprintf(stderr,
322: "Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x00\n",
323: i, j);
324: }
325:
326: /*
327: * We should see no error in remaning cases
328: */
329: else if ((lastError != 0) || (len != 2)) {
330: fprintf(stderr,
331: "Failed to parse char for Bytes 0x%02X 0x%02X\n", i, j);
332: }
333:
334: /*
335: * Finally check the value is right
336: */
337: else if (c != (j & 0x3F) + ((i & 0x1F) << 6)) {
338: fprintf(stderr,
339: "Failed to parse char for Bytes 0x%02X 0x%02X: expect %d got %d\n",
340: i, j, ((j & 0x3F) + ((i & 0x1F) << 6)), c);
341: }
342: }
343: }
344: }
345:
346: static void testCharRangeByte3(xmlParserCtxtPtr ctxt, char *data) {
347: int i, j, k, K;
348: int len, c;
349: unsigned char lows[6] = {0, 0x80, 0x81, 0xC1, 0xFF, 0xBF};
350: int value;
351:
352: data[3] = 0;
353: for (i = 0xE0;i <= 0xFF;i++) {
354: for (j = 0;j <= 0xFF;j++) {
355: for (k = 0;k < 6;k++) {
356: data[0] = i;
357: data[1] = j;
358: K = lows[k];
359: data[2] = (char) K;
360: value = (K & 0x3F) + ((j & 0x3F) << 6) + ((i & 0xF) << 12);
361: ctxt->charset = XML_CHAR_ENCODING_UTF8;
362:
363: lastError = 0;
364: c = xmlCurrentChar(ctxt, &len);
365:
366: /*
367: * if fourth bit of first char is set, then the sequence would need
368: * at least 4 bytes, but we give only 3 !
369: */
370: if ((i & 0xF0) == 0xF0) {
371: if (lastError != XML_ERR_INVALID_CHAR)
372: fprintf(stderr,
373: "Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x%02X 0x%02X\n",
374: i, j, K, data[3]);
375: }
376:
377: /*
378: * The second and the third bytes must start with 10
379: */
380: else if (((j & 0xC0) != 0x80) || ((K & 0xC0) != 0x80)) {
381: if (lastError != XML_ERR_INVALID_CHAR)
382: fprintf(stderr,
383: "Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x%02X\n",
384: i, j, K);
385: }
386:
387: /*
388: * if using a 3 byte encoding then the value must be greater
389: * than 0x800, i.e. one of bits 4 to 0 of i must be set or
390: * the 6th byte of data[1] must be set
391: */
392: else if (((i & 0xF) == 0) && ((j & 0x20) == 0)) {
393: if (lastError != XML_ERR_INVALID_CHAR)
394: fprintf(stderr,
395: "Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x%02X\n",
396: i, j, K);
397: }
398:
399: /*
400: * There are values in that range that are not allowed in XML-1.0
401: */
402: else if (((value > 0xD7FF) && (value <0xE000)) ||
403: ((value > 0xFFFD) && (value <0x10000))) {
404: if (lastError != XML_ERR_INVALID_CHAR)
405: fprintf(stderr,
406: "Failed to detect invalid char 0x%04X for Bytes 0x%02X 0x%02X 0x%02X\n",
407: value, i, j, K);
408: }
409:
410: /*
411: * We should see no error in remaining cases
412: */
413: else if ((lastError != 0) || (len != 3)) {
1.1.1.2 ! misho 414: fprintf(stderr,
1.1 misho 415: "Failed to parse char for Bytes 0x%02X 0x%02X 0x%02X\n",
416: i, j, K);
417: }
418:
419: /*
420: * Finally check the value is right
421: */
422: else if (c != value) {
1.1.1.2 ! misho 423: fprintf(stderr,
1.1 misho 424: "Failed to parse char for Bytes 0x%02X 0x%02X 0x%02X: expect %d got %d\n",
425: i, j, data[2], value, c);
426: }
427: }
428: }
429: }
430: }
431:
432: static void testCharRangeByte4(xmlParserCtxtPtr ctxt, char *data) {
433: int i, j, k, K, l, L;
434: int len, c;
435: unsigned char lows[6] = {0, 0x80, 0x81, 0xC1, 0xFF, 0xBF};
436: int value;
437:
438: data[4] = 0;
439: for (i = 0xF0;i <= 0xFF;i++) {
440: for (j = 0;j <= 0xFF;j++) {
441: for (k = 0;k < 6;k++) {
442: for (l = 0;l < 6;l++) {
443: data[0] = i;
444: data[1] = j;
445: K = lows[k];
446: data[2] = (char) K;
447: L = lows[l];
448: data[3] = (char) L;
449: value = (L & 0x3F) + ((K & 0x3F) << 6) + ((j & 0x3F) << 12) +
450: ((i & 0x7) << 18);
451: ctxt->charset = XML_CHAR_ENCODING_UTF8;
452:
453: lastError = 0;
454: c = xmlCurrentChar(ctxt, &len);
455:
456: /*
457: * if fifth bit of first char is set, then the sequence would need
458: * at least 5 bytes, but we give only 4 !
459: */
460: if ((i & 0xF8) == 0xF8) {
461: if (lastError != XML_ERR_INVALID_CHAR)
462: fprintf(stderr,
463: "Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x%02X 0x%02X\n",
464: i, j, K, data[3]);
465: }
466:
467: /*
468: * The second, third and fourth bytes must start with 10
469: */
470: else if (((j & 0xC0) != 0x80) || ((K & 0xC0) != 0x80) ||
471: ((L & 0xC0) != 0x80)) {
472: if (lastError != XML_ERR_INVALID_CHAR)
473: fprintf(stderr,
474: "Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x%02X 0x%02X\n",
475: i, j, K, L);
476: }
477:
478: /*
479: * if using a 3 byte encoding then the value must be greater
480: * than 0x10000, i.e. one of bits 3 to 0 of i must be set or
481: * the 6 or 5th byte of j must be set
482: */
483: else if (((i & 0x7) == 0) && ((j & 0x30) == 0)) {
484: if (lastError != XML_ERR_INVALID_CHAR)
485: fprintf(stderr,
486: "Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x%02X 0x%02X\n",
487: i, j, K, L);
488: }
489:
490: /*
491: * There are values in that range that are not allowed in XML-1.0
492: */
493: else if (((value > 0xD7FF) && (value <0xE000)) ||
1.1.1.2 ! misho 494: ((value > 0xFFFD) && (value <0x10000)) ||
1.1 misho 495: (value > 0x10FFFF)) {
496: if (lastError != XML_ERR_INVALID_CHAR)
497: fprintf(stderr,
498: "Failed to detect invalid char 0x%04X for Bytes 0x%02X 0x%02X 0x%02X 0x%02X\n",
499: value, i, j, K, L);
500: }
501:
502: /*
503: * We should see no error in remaining cases
504: */
505: else if ((lastError != 0) || (len != 4)) {
1.1.1.2 ! misho 506: fprintf(stderr,
1.1 misho 507: "Failed to parse char for Bytes 0x%02X 0x%02X 0x%02X\n",
508: i, j, K);
509: }
510:
511: /*
512: * Finally check the value is right
513: */
514: else if (c != value) {
1.1.1.2 ! misho 515: fprintf(stderr,
1.1 misho 516: "Failed to parse char for Bytes 0x%02X 0x%02X 0x%02X: expect %d got %d\n",
517: i, j, data[2], value, c);
518: }
519: }
520: }
521: }
522: }
523: }
524:
525: /**
526: * testCharRanges:
527: *
528: * Test the correct UTF8 character parsing in isolation i.e.
529: * not when parsing a full document, this is less expensive and we can
530: * cover the full range of UTF-8 chars accepted by XML-1.0
531: */
532:
533: static void testCharRanges(void) {
534: char data[5];
535: xmlParserCtxtPtr ctxt;
536: xmlParserInputBufferPtr buf;
537: xmlParserInputPtr input;
538:
539: memset(data, 0, 5);
540:
541: /*
542: * Set up a parsing context using the above data buffer as
543: * the current input source.
544: */
545: ctxt = xmlNewParserCtxt();
546: if (ctxt == NULL) {
547: fprintf(stderr, "Failed to allocate parser context\n");
548: return;
549: }
550: buf = xmlParserInputBufferCreateStatic(data, sizeof(data),
551: XML_CHAR_ENCODING_NONE);
552: if (buf == NULL) {
553: fprintf(stderr, "Failed to allocate input buffer\n");
554: goto error;
555: }
556: input = xmlNewInputStream(ctxt);
557: if (input == NULL) {
558: xmlFreeParserInputBuffer(buf);
559: goto error;
560: }
561: input->filename = NULL;
562: input->buf = buf;
1.1.1.2 ! misho 563: input->cur =
! 564: input->base = xmlBufContent(input->buf->buffer);
! 565: input->end = input->base + 4;
1.1 misho 566: inputPush(ctxt, input);
567:
568: printf("testing char range: 1");
569: fflush(stdout);
570: testCharRangeByte1(ctxt, data);
571: printf(" 2");
572: fflush(stdout);
573: testCharRangeByte2(ctxt, data);
574: printf(" 3");
575: fflush(stdout);
576: testCharRangeByte3(ctxt, data);
577: printf(" 4");
578: fflush(stdout);
579: testCharRangeByte4(ctxt, data);
580: printf(" done\n");
581: fflush(stdout);
582:
583: error:
584: xmlFreeParserCtxt(ctxt);
585: }
586:
587: int main(void) {
588:
589: /*
590: * this initialize the library and check potential ABI mismatches
591: * between the version it was compiled for and the actual shared
592: * library used.
593: */
594: LIBXML_TEST_VERSION
595:
596: /*
597: * Catch errors separately
598: */
599:
600: xmlSetStructuredErrorFunc(NULL, errorHandler);
601:
602: /*
603: * Run the tests
604: */
605: testCharRanges();
606: testDocumentRanges();
607:
608: /*
609: * Cleanup function for the XML library.
610: */
611: xmlCleanupParser();
612: /*
613: * this is to debug memory for regression tests
614: */
615: xmlMemoryDump();
616: return(0);
617: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>