Annotation of embedaddon/libxml2/testchar.c, revision 1.1.1.1
1.1 misho 1: /**
2: * Test the UTF-8 decoding routines
3: *
4: * author: Daniel Veillard
5: * copy: see Copyright for the status of this software.
6: */
7:
8: #include <stdio.h>
9: #include <string.h>
10: #include <libxml/parser.h>
11: #include <libxml/parserInternals.h>
12:
13: int lastError;
14:
15: static void errorHandler(void *unused, xmlErrorPtr err) {
16: if ((unused == NULL) && (err != NULL) && (lastError == 0)) {
17: lastError = err->code;
18: }
19: }
20:
21: char document1[100] = "<doc>XXXX</doc>";
22: char document2[100] = "<doc foo='XXXX'/>";
23:
24: static void testDocumentRangeByte1(xmlParserCtxtPtr ctxt, char *document,
25: int len, char *data, int forbid1, int forbid2) {
26: int i;
27: xmlDocPtr res;
28:
29: for (i = 0;i <= 0xFF;i++) {
30: lastError = 0;
31: xmlCtxtReset(ctxt);
32:
33: data[0] = i;
34:
35: res = xmlReadMemory(document, len, "test", NULL, 0);
36:
37: if ((i == forbid1) || (i == forbid2)) {
38: if ((lastError == 0) || (res != NULL))
39: fprintf(stderr,
40: "Failed to detect invalid char for Byte 0x%02X: %c\n",
41: i, i);
42: }
43:
44: else if ((i == '<') || (i == '&')) {
45: if ((lastError == 0) || (res != NULL))
46: fprintf(stderr,
47: "Failed to detect illegal char %c for Byte 0x%02X\n", i, i);
48: }
49: else if (((i < 0x20) || (i >= 0x80)) &&
50: (i != 0x9) && (i != 0xA) && (i != 0xD)) {
51: if ((lastError != XML_ERR_INVALID_CHAR) && (res != NULL))
52: fprintf(stderr,
53: "Failed to detect invalid char for Byte 0x%02X\n", i);
54: }
55: else if (res == NULL) {
56: fprintf(stderr,
57: "Failed to parse valid char for Byte 0x%02X : %c\n", i, i);
58: }
59: if (res != NULL)
60: xmlFreeDoc(res);
61: }
62: }
63:
64: static void testDocumentRangeByte2(xmlParserCtxtPtr ctxt, char *document,
65: int len, char *data) {
66: int i, j;
67: xmlDocPtr res;
68:
69: for (i = 0x80;i <= 0xFF;i++) {
70: for (j = 0;j <= 0xFF;j++) {
71: lastError = 0;
72: xmlCtxtReset(ctxt);
73:
74: data[0] = i;
75: data[1] = j;
76:
77: res = xmlReadMemory(document, len, "test", NULL, 0);
78:
79: /* if first bit of first char is set, then second bit must too */
80: if ((i & 0x80) && ((i & 0x40) == 0)) {
81: if ((lastError == 0) || (res != NULL))
82: fprintf(stderr,
83: "Failed to detect invalid char for Bytes 0x%02X 0x%02X\n",
84: i, j);
85: }
86:
87: /*
88: * if first bit of first char is set, then second char first
89: * bits must be 10
90: */
91: else if ((i & 0x80) && ((j & 0xC0) != 0x80)) {
92: if ((lastError == 0) || (res != NULL))
93: fprintf(stderr,
94: "Failed to detect invalid char for Bytes 0x%02X 0x%02X\n",
95: i, j);
96: }
97:
98: /*
99: * if using a 2 byte encoding then the value must be greater
100: * than 0x80, i.e. one of bits 5 to 1 of i must be set
101: */
102: else if ((i & 0x80) && ((i & 0x1E) == 0)) {
103: if ((lastError == 0) || (res != NULL))
104: fprintf(stderr,
105: "Failed to detect invalid char for Bytes 0x%02X 0x%02X\n",
106: i, j);
107: }
108:
109: /*
110: * if third bit of first char is set, then the sequence would need
111: * at least 3 bytes, but we give only 2 !
112: */
113: else if ((i & 0xE0) == 0xE0) {
114: if ((lastError == 0) || (res != NULL))
115: fprintf(stderr,
116: "Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x00\n",
117: i, j);
118: }
119:
120: /*
121: * We should see no error in remaning cases
122: */
123: else if ((lastError != 0) || (res == NULL)) {
124: fprintf(stderr,
125: "Failed to parse document for Bytes 0x%02X 0x%02X\n", i, j);
126: }
127: if (res != NULL)
128: xmlFreeDoc(res);
129: }
130: }
131: }
132:
133: /**
134: * testDocumentRanges:
135: *
136: * Test the correct UTF8 character parsing in context of XML documents
137: * Those are in-context injection tests checking the parser behaviour on
138: * edge case values at different point in content, beginning and end of
139: * CDATA in text or in attribute values.
140: */
141:
142: static void testDocumentRanges(void) {
143: xmlParserCtxtPtr ctxt;
144: char *data;
145:
146: /*
147: * Set up a parsing context using the first document as
148: * the current input source.
149: */
150: ctxt = xmlNewParserCtxt();
151: if (ctxt == NULL) {
152: fprintf(stderr, "Failed to allocate parser context\n");
153: return;
154: }
155:
156: printf("testing 1 byte char in document: 1");
157: fflush(stdout);
158: data = &document1[5];
159: data[0] = ' ';
160: data[1] = ' ';
161: data[2] = ' ';
162: data[3] = ' ';
163: /* test 1 byte injection at beginning of area */
164: testDocumentRangeByte1(ctxt, &document1[0], strlen(document1),
165: data, -1, -1);
166: printf(" 2");
167: fflush(stdout);
168: data[0] = ' ';
169: data[1] = ' ';
170: data[2] = ' ';
171: data[3] = ' ';
172: /* test 1 byte injection at end of area */
173: testDocumentRangeByte1(ctxt, &document1[0], strlen(document1),
174: data + 3, -1, -1);
175:
176: printf(" 3");
177: fflush(stdout);
178: data = &document2[10];
179: data[0] = ' ';
180: data[1] = ' ';
181: data[2] = ' ';
182: data[3] = ' ';
183: /* test 1 byte injection at beginning of area */
184: testDocumentRangeByte1(ctxt, &document2[0], strlen(document2),
185: data, '\'', -1);
186: printf(" 4");
187: fflush(stdout);
188: data[0] = ' ';
189: data[1] = ' ';
190: data[2] = ' ';
191: data[3] = ' ';
192: /* test 1 byte injection at end of area */
193: testDocumentRangeByte1(ctxt, &document2[0], strlen(document2),
194: data + 3, '\'', -1);
195: printf(" done\n");
196:
197: printf("testing 2 byte char in document: 1");
198: fflush(stdout);
199: data = &document1[5];
200: data[0] = ' ';
201: data[1] = ' ';
202: data[2] = ' ';
203: data[3] = ' ';
204: /* test 2 byte injection at beginning of area */
205: testDocumentRangeByte2(ctxt, &document1[0], strlen(document1),
206: data);
207: printf(" 2");
208: fflush(stdout);
209: data[0] = ' ';
210: data[1] = ' ';
211: data[2] = ' ';
212: data[3] = ' ';
213: /* test 2 byte injection at end of area */
214: testDocumentRangeByte2(ctxt, &document1[0], strlen(document1),
215: data + 2);
216:
217: printf(" 3");
218: fflush(stdout);
219: data = &document2[10];
220: data[0] = ' ';
221: data[1] = ' ';
222: data[2] = ' ';
223: data[3] = ' ';
224: /* test 2 byte injection at beginning of area */
225: testDocumentRangeByte2(ctxt, &document2[0], strlen(document2),
226: data);
227: printf(" 4");
228: fflush(stdout);
229: data[0] = ' ';
230: data[1] = ' ';
231: data[2] = ' ';
232: data[3] = ' ';
233: /* test 2 byte injection at end of area */
234: testDocumentRangeByte2(ctxt, &document2[0], strlen(document2),
235: data + 2);
236: printf(" done\n");
237:
238: xmlFreeParserCtxt(ctxt);
239: }
240:
241: static void testCharRangeByte1(xmlParserCtxtPtr ctxt, char *data) {
242: int i = 0;
243: int len, c;
244:
245: data[1] = 0;
246: data[2] = 0;
247: data[3] = 0;
248: for (i = 0;i <= 0xFF;i++) {
249: data[0] = i;
250: ctxt->charset = XML_CHAR_ENCODING_UTF8;
251:
252: lastError = 0;
253: c = xmlCurrentChar(ctxt, &len);
254: if ((i == 0) || (i >= 0x80)) {
255: /* we must see an error there */
256: if (lastError != XML_ERR_INVALID_CHAR)
257: fprintf(stderr,
258: "Failed to detect invalid char for Byte 0x%02X\n", i);
259: } else if (i == 0xD) {
260: if ((c != 0xA) || (len != 1))
261: fprintf(stderr, "Failed to convert char for Byte 0x%02X\n", i);
262: } else if ((c != i) || (len != 1)) {
263: fprintf(stderr, "Failed to parse char for Byte 0x%02X\n", i);
264: }
265: }
266: }
267:
268: static void testCharRangeByte2(xmlParserCtxtPtr ctxt, char *data) {
269: int i, j;
270: int len, c;
271:
272: data[2] = 0;
273: data[3] = 0;
274: for (i = 0x80;i <= 0xFF;i++) {
275: for (j = 0;j <= 0xFF;j++) {
276: data[0] = i;
277: data[1] = j;
278: ctxt->charset = XML_CHAR_ENCODING_UTF8;
279:
280: lastError = 0;
281: c = xmlCurrentChar(ctxt, &len);
282:
283: /* if first bit of first char is set, then second bit must too */
284: if ((i & 0x80) && ((i & 0x40) == 0)) {
285: if (lastError != XML_ERR_INVALID_CHAR)
286: fprintf(stderr,
287: "Failed to detect invalid char for Bytes 0x%02X 0x%02X\n",
288: i, j);
289: }
290:
291: /*
292: * if first bit of first char is set, then second char first
293: * bits must be 10
294: */
295: else if ((i & 0x80) && ((j & 0xC0) != 0x80)) {
296: if (lastError != XML_ERR_INVALID_CHAR)
297: fprintf(stderr,
298: "Failed to detect invalid char for Bytes 0x%02X 0x%02X: %d\n",
299: i, j, c);
300: }
301:
302: /*
303: * if using a 2 byte encoding then the value must be greater
304: * than 0x80, i.e. one of bits 5 to 1 of i must be set
305: */
306: else if ((i & 0x80) && ((i & 0x1E) == 0)) {
307: if (lastError != XML_ERR_INVALID_CHAR)
308: fprintf(stderr,
309: "Failed to detect invalid char for Bytes 0x%02X 0x%02X: %d\n",
310: i, j, c);
311: }
312:
313: /*
314: * if third bit of first char is set, then the sequence would need
315: * at least 3 bytes, but we give only 2 !
316: */
317: else if ((i & 0xE0) == 0xE0) {
318: if (lastError != XML_ERR_INVALID_CHAR)
319: fprintf(stderr,
320: "Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x00\n",
321: i, j);
322: }
323:
324: /*
325: * We should see no error in remaning cases
326: */
327: else if ((lastError != 0) || (len != 2)) {
328: fprintf(stderr,
329: "Failed to parse char for Bytes 0x%02X 0x%02X\n", i, j);
330: }
331:
332: /*
333: * Finally check the value is right
334: */
335: else if (c != (j & 0x3F) + ((i & 0x1F) << 6)) {
336: fprintf(stderr,
337: "Failed to parse char for Bytes 0x%02X 0x%02X: expect %d got %d\n",
338: i, j, ((j & 0x3F) + ((i & 0x1F) << 6)), c);
339: }
340: }
341: }
342: }
343:
344: static void testCharRangeByte3(xmlParserCtxtPtr ctxt, char *data) {
345: int i, j, k, K;
346: int len, c;
347: unsigned char lows[6] = {0, 0x80, 0x81, 0xC1, 0xFF, 0xBF};
348: int value;
349:
350: data[3] = 0;
351: for (i = 0xE0;i <= 0xFF;i++) {
352: for (j = 0;j <= 0xFF;j++) {
353: for (k = 0;k < 6;k++) {
354: data[0] = i;
355: data[1] = j;
356: K = lows[k];
357: data[2] = (char) K;
358: value = (K & 0x3F) + ((j & 0x3F) << 6) + ((i & 0xF) << 12);
359: ctxt->charset = XML_CHAR_ENCODING_UTF8;
360:
361: lastError = 0;
362: c = xmlCurrentChar(ctxt, &len);
363:
364: /*
365: * if fourth bit of first char is set, then the sequence would need
366: * at least 4 bytes, but we give only 3 !
367: */
368: if ((i & 0xF0) == 0xF0) {
369: if (lastError != XML_ERR_INVALID_CHAR)
370: fprintf(stderr,
371: "Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x%02X 0x%02X\n",
372: i, j, K, data[3]);
373: }
374:
375: /*
376: * The second and the third bytes must start with 10
377: */
378: else if (((j & 0xC0) != 0x80) || ((K & 0xC0) != 0x80)) {
379: if (lastError != XML_ERR_INVALID_CHAR)
380: fprintf(stderr,
381: "Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x%02X\n",
382: i, j, K);
383: }
384:
385: /*
386: * if using a 3 byte encoding then the value must be greater
387: * than 0x800, i.e. one of bits 4 to 0 of i must be set or
388: * the 6th byte of data[1] must be set
389: */
390: else if (((i & 0xF) == 0) && ((j & 0x20) == 0)) {
391: if (lastError != XML_ERR_INVALID_CHAR)
392: fprintf(stderr,
393: "Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x%02X\n",
394: i, j, K);
395: }
396:
397: /*
398: * There are values in that range that are not allowed in XML-1.0
399: */
400: else if (((value > 0xD7FF) && (value <0xE000)) ||
401: ((value > 0xFFFD) && (value <0x10000))) {
402: if (lastError != XML_ERR_INVALID_CHAR)
403: fprintf(stderr,
404: "Failed to detect invalid char 0x%04X for Bytes 0x%02X 0x%02X 0x%02X\n",
405: value, i, j, K);
406: }
407:
408: /*
409: * We should see no error in remaining cases
410: */
411: else if ((lastError != 0) || (len != 3)) {
412: fprintf(stderr,
413: "Failed to parse char for Bytes 0x%02X 0x%02X 0x%02X\n",
414: i, j, K);
415: }
416:
417: /*
418: * Finally check the value is right
419: */
420: else if (c != value) {
421: fprintf(stderr,
422: "Failed to parse char for Bytes 0x%02X 0x%02X 0x%02X: expect %d got %d\n",
423: i, j, data[2], value, c);
424: }
425: }
426: }
427: }
428: }
429:
430: static void testCharRangeByte4(xmlParserCtxtPtr ctxt, char *data) {
431: int i, j, k, K, l, L;
432: int len, c;
433: unsigned char lows[6] = {0, 0x80, 0x81, 0xC1, 0xFF, 0xBF};
434: int value;
435:
436: data[4] = 0;
437: for (i = 0xF0;i <= 0xFF;i++) {
438: for (j = 0;j <= 0xFF;j++) {
439: for (k = 0;k < 6;k++) {
440: for (l = 0;l < 6;l++) {
441: data[0] = i;
442: data[1] = j;
443: K = lows[k];
444: data[2] = (char) K;
445: L = lows[l];
446: data[3] = (char) L;
447: value = (L & 0x3F) + ((K & 0x3F) << 6) + ((j & 0x3F) << 12) +
448: ((i & 0x7) << 18);
449: ctxt->charset = XML_CHAR_ENCODING_UTF8;
450:
451: lastError = 0;
452: c = xmlCurrentChar(ctxt, &len);
453:
454: /*
455: * if fifth bit of first char is set, then the sequence would need
456: * at least 5 bytes, but we give only 4 !
457: */
458: if ((i & 0xF8) == 0xF8) {
459: if (lastError != XML_ERR_INVALID_CHAR)
460: fprintf(stderr,
461: "Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x%02X 0x%02X\n",
462: i, j, K, data[3]);
463: }
464:
465: /*
466: * The second, third and fourth bytes must start with 10
467: */
468: else if (((j & 0xC0) != 0x80) || ((K & 0xC0) != 0x80) ||
469: ((L & 0xC0) != 0x80)) {
470: if (lastError != XML_ERR_INVALID_CHAR)
471: fprintf(stderr,
472: "Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x%02X 0x%02X\n",
473: i, j, K, L);
474: }
475:
476: /*
477: * if using a 3 byte encoding then the value must be greater
478: * than 0x10000, i.e. one of bits 3 to 0 of i must be set or
479: * the 6 or 5th byte of j must be set
480: */
481: else if (((i & 0x7) == 0) && ((j & 0x30) == 0)) {
482: if (lastError != XML_ERR_INVALID_CHAR)
483: fprintf(stderr,
484: "Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x%02X 0x%02X\n",
485: i, j, K, L);
486: }
487:
488: /*
489: * There are values in that range that are not allowed in XML-1.0
490: */
491: else if (((value > 0xD7FF) && (value <0xE000)) ||
492: ((value > 0xFFFD) && (value <0x10000)) ||
493: (value > 0x10FFFF)) {
494: if (lastError != XML_ERR_INVALID_CHAR)
495: fprintf(stderr,
496: "Failed to detect invalid char 0x%04X for Bytes 0x%02X 0x%02X 0x%02X 0x%02X\n",
497: value, i, j, K, L);
498: }
499:
500: /*
501: * We should see no error in remaining cases
502: */
503: else if ((lastError != 0) || (len != 4)) {
504: fprintf(stderr,
505: "Failed to parse char for Bytes 0x%02X 0x%02X 0x%02X\n",
506: i, j, K);
507: }
508:
509: /*
510: * Finally check the value is right
511: */
512: else if (c != value) {
513: fprintf(stderr,
514: "Failed to parse char for Bytes 0x%02X 0x%02X 0x%02X: expect %d got %d\n",
515: i, j, data[2], value, c);
516: }
517: }
518: }
519: }
520: }
521: }
522:
523: /**
524: * testCharRanges:
525: *
526: * Test the correct UTF8 character parsing in isolation i.e.
527: * not when parsing a full document, this is less expensive and we can
528: * cover the full range of UTF-8 chars accepted by XML-1.0
529: */
530:
531: static void testCharRanges(void) {
532: char data[5];
533: xmlParserCtxtPtr ctxt;
534: xmlParserInputBufferPtr buf;
535: xmlParserInputPtr input;
536:
537: memset(data, 0, 5);
538:
539: /*
540: * Set up a parsing context using the above data buffer as
541: * the current input source.
542: */
543: ctxt = xmlNewParserCtxt();
544: if (ctxt == NULL) {
545: fprintf(stderr, "Failed to allocate parser context\n");
546: return;
547: }
548: buf = xmlParserInputBufferCreateStatic(data, sizeof(data),
549: XML_CHAR_ENCODING_NONE);
550: if (buf == NULL) {
551: fprintf(stderr, "Failed to allocate input buffer\n");
552: goto error;
553: }
554: input = xmlNewInputStream(ctxt);
555: if (input == NULL) {
556: xmlFreeParserInputBuffer(buf);
557: goto error;
558: }
559: input->filename = NULL;
560: input->buf = buf;
561: input->base = input->buf->buffer->content;
562: input->cur = input->buf->buffer->content;
563: input->end = &input->buf->buffer->content[4];
564: inputPush(ctxt, input);
565:
566: printf("testing char range: 1");
567: fflush(stdout);
568: testCharRangeByte1(ctxt, data);
569: printf(" 2");
570: fflush(stdout);
571: testCharRangeByte2(ctxt, data);
572: printf(" 3");
573: fflush(stdout);
574: testCharRangeByte3(ctxt, data);
575: printf(" 4");
576: fflush(stdout);
577: testCharRangeByte4(ctxt, data);
578: printf(" done\n");
579: fflush(stdout);
580:
581: error:
582: xmlFreeParserCtxt(ctxt);
583: }
584:
585: int main(void) {
586:
587: /*
588: * this initialize the library and check potential ABI mismatches
589: * between the version it was compiled for and the actual shared
590: * library used.
591: */
592: LIBXML_TEST_VERSION
593:
594: /*
595: * Catch errors separately
596: */
597:
598: xmlSetStructuredErrorFunc(NULL, errorHandler);
599:
600: /*
601: * Run the tests
602: */
603: testCharRanges();
604: testDocumentRanges();
605:
606: /*
607: * Cleanup function for the XML library.
608: */
609: xmlCleanupParser();
610: /*
611: * this is to debug memory for regression tests
612: */
613: xmlMemoryDump();
614: return(0);
615: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>