Annotation of embedaddon/libxml2/parserInternals.c, revision 1.1.1.3
1.1 misho 1: /*
2: * parserInternals.c : Internal routines (and obsolete ones) needed for the
3: * XML and HTML parsers.
4: *
5: * See Copyright for the status of this software.
6: *
7: * daniel@veillard.com
8: */
9:
10: #define IN_LIBXML
11: #include "libxml.h"
12:
13: #if defined(WIN32) && !defined (__CYGWIN__)
14: #define XML_DIR_SEP '\\'
15: #else
16: #define XML_DIR_SEP '/'
17: #endif
18:
19: #include <string.h>
20: #ifdef HAVE_CTYPE_H
21: #include <ctype.h>
22: #endif
23: #ifdef HAVE_STDLIB_H
24: #include <stdlib.h>
25: #endif
26: #ifdef HAVE_SYS_STAT_H
27: #include <sys/stat.h>
28: #endif
29: #ifdef HAVE_FCNTL_H
30: #include <fcntl.h>
31: #endif
32: #ifdef HAVE_UNISTD_H
33: #include <unistd.h>
34: #endif
35: #ifdef HAVE_ZLIB_H
36: #include <zlib.h>
37: #endif
38:
39: #include <libxml/xmlmemory.h>
40: #include <libxml/tree.h>
41: #include <libxml/parser.h>
42: #include <libxml/parserInternals.h>
43: #include <libxml/valid.h>
44: #include <libxml/entities.h>
45: #include <libxml/xmlerror.h>
46: #include <libxml/encoding.h>
47: #include <libxml/valid.h>
48: #include <libxml/xmlIO.h>
49: #include <libxml/uri.h>
50: #include <libxml/dict.h>
51: #include <libxml/SAX.h>
52: #ifdef LIBXML_CATALOG_ENABLED
53: #include <libxml/catalog.h>
54: #endif
55: #include <libxml/globals.h>
56: #include <libxml/chvalid.h>
57:
1.1.1.3 ! misho 58: #include "buf.h"
! 59: #include "enc.h"
! 60:
1.1 misho 61: /*
62: * Various global defaults for parsing
63: */
64:
65: /**
66: * xmlCheckVersion:
67: * @version: the include version number
68: *
69: * check the compiled lib version against the include one.
70: * This can warn or immediately kill the application
71: */
72: void
73: xmlCheckVersion(int version) {
74: int myversion = (int) LIBXML_VERSION;
75:
76: xmlInitParser();
77:
78: if ((myversion / 10000) != (version / 10000)) {
1.1.1.3 ! misho 79: xmlGenericError(xmlGenericErrorContext,
1.1 misho 80: "Fatal: program compiled against libxml %d using libxml %d\n",
81: (version / 10000), (myversion / 10000));
1.1.1.3 ! misho 82: fprintf(stderr,
1.1 misho 83: "Fatal: program compiled against libxml %d using libxml %d\n",
84: (version / 10000), (myversion / 10000));
85: }
86: if ((myversion / 100) < (version / 100)) {
1.1.1.3 ! misho 87: xmlGenericError(xmlGenericErrorContext,
1.1 misho 88: "Warning: program compiled against libxml %d using older %d\n",
89: (version / 100), (myversion / 100));
90: }
91: }
92:
93:
94: /************************************************************************
95: * *
1.1.1.3 ! misho 96: * Some factorized error routines *
1.1 misho 97: * *
98: ************************************************************************/
99:
100:
101: /**
102: * xmlErrMemory:
103: * @ctxt: an XML parser context
104: * @extra: extra informations
105: *
106: * Handle a redefinition of attribute error
107: */
108: void
109: xmlErrMemory(xmlParserCtxtPtr ctxt, const char *extra)
110: {
111: if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
112: (ctxt->instate == XML_PARSER_EOF))
113: return;
114: if (ctxt != NULL) {
115: ctxt->errNo = XML_ERR_NO_MEMORY;
116: ctxt->instate = XML_PARSER_EOF;
117: ctxt->disableSAX = 1;
118: }
119: if (extra)
120: __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
121: XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, extra,
122: NULL, NULL, 0, 0,
123: "Memory allocation failed : %s\n", extra);
124: else
125: __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
126: XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, NULL,
127: NULL, NULL, 0, 0, "Memory allocation failed\n");
128: }
129:
130: /**
131: * __xmlErrEncoding:
132: * @ctxt: an XML parser context
133: * @xmlerr: the error number
134: * @msg: the error message
135: * @str1: an string info
136: * @str2: an string info
137: *
138: * Handle an encoding error
139: */
140: void
141: __xmlErrEncoding(xmlParserCtxtPtr ctxt, xmlParserErrors xmlerr,
142: const char *msg, const xmlChar * str1, const xmlChar * str2)
143: {
144: if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
145: (ctxt->instate == XML_PARSER_EOF))
146: return;
147: if (ctxt != NULL)
148: ctxt->errNo = xmlerr;
149: __xmlRaiseError(NULL, NULL, NULL,
150: ctxt, NULL, XML_FROM_PARSER, xmlerr, XML_ERR_FATAL,
151: NULL, 0, (const char *) str1, (const char *) str2,
152: NULL, 0, 0, msg, str1, str2);
153: if (ctxt != NULL) {
154: ctxt->wellFormed = 0;
155: if (ctxt->recovery == 0)
156: ctxt->disableSAX = 1;
157: }
158: }
159:
160: /**
161: * xmlErrInternal:
162: * @ctxt: an XML parser context
163: * @msg: the error message
164: * @str: error informations
165: *
166: * Handle an internal error
167: */
168: static void
169: xmlErrInternal(xmlParserCtxtPtr ctxt, const char *msg, const xmlChar * str)
170: {
171: if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
172: (ctxt->instate == XML_PARSER_EOF))
173: return;
174: if (ctxt != NULL)
175: ctxt->errNo = XML_ERR_INTERNAL_ERROR;
176: __xmlRaiseError(NULL, NULL, NULL,
177: ctxt, NULL, XML_FROM_PARSER, XML_ERR_INTERNAL_ERROR,
178: XML_ERR_FATAL, NULL, 0, (const char *) str, NULL, NULL,
179: 0, 0, msg, str);
180: if (ctxt != NULL) {
181: ctxt->wellFormed = 0;
182: if (ctxt->recovery == 0)
183: ctxt->disableSAX = 1;
184: }
185: }
186:
187: /**
188: * xmlErrEncodingInt:
189: * @ctxt: an XML parser context
190: * @error: the error number
191: * @msg: the error message
192: * @val: an integer value
193: *
194: * n encoding error
195: */
196: static void
197: xmlErrEncodingInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
198: const char *msg, int val)
199: {
200: if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
201: (ctxt->instate == XML_PARSER_EOF))
202: return;
203: if (ctxt != NULL)
204: ctxt->errNo = error;
205: __xmlRaiseError(NULL, NULL, NULL,
206: ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
207: NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
208: if (ctxt != NULL) {
209: ctxt->wellFormed = 0;
210: if (ctxt->recovery == 0)
211: ctxt->disableSAX = 1;
212: }
213: }
214:
215: /**
216: * xmlIsLetter:
217: * @c: an unicode character (int)
218: *
219: * Check whether the character is allowed by the production
220: * [84] Letter ::= BaseChar | Ideographic
221: *
222: * Returns 0 if not, non-zero otherwise
223: */
224: int
225: xmlIsLetter(int c) {
226: return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c));
227: }
228:
229: /************************************************************************
230: * *
1.1.1.3 ! misho 231: * Input handling functions for progressive parsing *
1.1 misho 232: * *
233: ************************************************************************/
234:
235: /* #define DEBUG_INPUT */
236: /* #define DEBUG_STACK */
237: /* #define DEBUG_PUSH */
238:
239:
240: /* we need to keep enough input to show errors in context */
241: #define LINE_LEN 80
242:
243: #ifdef DEBUG_INPUT
244: #define CHECK_BUFFER(in) check_buffer(in)
245:
246: static
247: void check_buffer(xmlParserInputPtr in) {
1.1.1.3 ! misho 248: if (in->base != xmlBufContent(in->buf->buffer)) {
1.1 misho 249: xmlGenericError(xmlGenericErrorContext,
250: "xmlParserInput: base mismatch problem\n");
251: }
252: if (in->cur < in->base) {
253: xmlGenericError(xmlGenericErrorContext,
254: "xmlParserInput: cur < base problem\n");
255: }
1.1.1.3 ! misho 256: if (in->cur > in->base + xmlBufUse(in->buf->buffer)) {
1.1 misho 257: xmlGenericError(xmlGenericErrorContext,
258: "xmlParserInput: cur > base + use problem\n");
259: }
1.1.1.3 ! misho 260: xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d\n",
! 261: (int) in, (int) xmlBufContent(in->buf->buffer), in->cur - in->base,
! 262: xmlBufUse(in->buf->buffer));
1.1 misho 263: }
264:
265: #else
1.1.1.3 ! misho 266: #define CHECK_BUFFER(in)
1.1 misho 267: #endif
268:
269:
270: /**
271: * xmlParserInputRead:
272: * @in: an XML parser input
273: * @len: an indicative size for the lookahead
274: *
1.1.1.3 ! misho 275: * This function was internal and is deprecated.
1.1 misho 276: *
1.1.1.3 ! misho 277: * Returns -1 as this is an error to use it.
1.1 misho 278: */
279: int
1.1.1.3 ! misho 280: xmlParserInputRead(xmlParserInputPtr in ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED) {
! 281: return(-1);
1.1 misho 282: }
283:
284: /**
285: * xmlParserInputGrow:
286: * @in: an XML parser input
287: * @len: an indicative size for the lookahead
288: *
289: * This function increase the input for the parser. It tries to
290: * preserve pointers to the input buffer, and keep already read data
291: *
1.1.1.3 ! misho 292: * Returns the amount of char read, or -1 in case of error, 0 indicate the
1.1 misho 293: * end of this entity
294: */
295: int
296: xmlParserInputGrow(xmlParserInputPtr in, int len) {
1.1.1.3 ! misho 297: size_t ret;
! 298: size_t indx;
! 299: const xmlChar *content;
1.1 misho 300:
1.1.1.3 ! misho 301: if ((in == NULL) || (len < 0)) return(-1);
1.1 misho 302: #ifdef DEBUG_INPUT
303: xmlGenericError(xmlGenericErrorContext, "Grow\n");
304: #endif
305: if (in->buf == NULL) return(-1);
306: if (in->base == NULL) return(-1);
307: if (in->cur == NULL) return(-1);
308: if (in->buf->buffer == NULL) return(-1);
309:
310: CHECK_BUFFER(in);
311:
312: indx = in->cur - in->base;
1.1.1.3 ! misho 313: if (xmlBufUse(in->buf->buffer) > (unsigned int) indx + INPUT_CHUNK) {
1.1 misho 314:
315: CHECK_BUFFER(in);
316:
317: return(0);
318: }
1.1.1.3 ! misho 319: if (in->buf->readcallback != NULL) {
1.1 misho 320: ret = xmlParserInputBufferGrow(in->buf, len);
1.1.1.3 ! misho 321: } else
1.1 misho 322: return(0);
323:
324: /*
325: * NOTE : in->base may be a "dangling" i.e. freed pointer in this
326: * block, but we use it really as an integer to do some
327: * pointer arithmetic. Insure will raise it as a bug but in
328: * that specific case, that's not !
329: */
1.1.1.3 ! misho 330:
! 331: content = xmlBufContent(in->buf->buffer);
! 332: if (in->base != content) {
1.1 misho 333: /*
334: * the buffer has been reallocated
335: */
336: indx = in->cur - in->base;
1.1.1.3 ! misho 337: in->base = content;
! 338: in->cur = &content[indx];
1.1 misho 339: }
1.1.1.3 ! misho 340: in->end = xmlBufEnd(in->buf->buffer);
1.1 misho 341:
342: CHECK_BUFFER(in);
343:
344: return(ret);
345: }
346:
347: /**
348: * xmlParserInputShrink:
349: * @in: an XML parser input
350: *
351: * This function removes used input for the parser.
352: */
353: void
354: xmlParserInputShrink(xmlParserInputPtr in) {
1.1.1.3 ! misho 355: size_t used;
! 356: size_t ret;
! 357: size_t indx;
! 358: const xmlChar *content;
1.1 misho 359:
360: #ifdef DEBUG_INPUT
361: xmlGenericError(xmlGenericErrorContext, "Shrink\n");
362: #endif
363: if (in == NULL) return;
364: if (in->buf == NULL) return;
365: if (in->base == NULL) return;
366: if (in->cur == NULL) return;
367: if (in->buf->buffer == NULL) return;
368:
369: CHECK_BUFFER(in);
370:
1.1.1.3 ! misho 371: used = in->cur - xmlBufContent(in->buf->buffer);
1.1 misho 372: /*
373: * Do not shrink on large buffers whose only a tiny fraction
374: * was consumed
375: */
376: if (used > INPUT_CHUNK) {
1.1.1.3 ! misho 377: ret = xmlBufShrink(in->buf->buffer, used - LINE_LEN);
1.1 misho 378: if (ret > 0) {
379: in->cur -= ret;
380: in->consumed += ret;
381: }
1.1.1.3 ! misho 382: in->end = xmlBufEnd(in->buf->buffer);
1.1 misho 383: }
384:
385: CHECK_BUFFER(in);
386:
1.1.1.3 ! misho 387: if (xmlBufUse(in->buf->buffer) > INPUT_CHUNK) {
1.1 misho 388: return;
389: }
390: xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
1.1.1.3 ! misho 391: content = xmlBufContent(in->buf->buffer);
! 392: if (in->base != content) {
1.1 misho 393: /*
394: * the buffer has been reallocated
395: */
396: indx = in->cur - in->base;
1.1.1.3 ! misho 397: in->base = content;
! 398: in->cur = &content[indx];
1.1 misho 399: }
1.1.1.3 ! misho 400: in->end = xmlBufEnd(in->buf->buffer);
1.1 misho 401:
402: CHECK_BUFFER(in);
403: }
404:
405: /************************************************************************
406: * *
1.1.1.3 ! misho 407: * UTF8 character input and related functions *
1.1 misho 408: * *
409: ************************************************************************/
410:
411: /**
412: * xmlNextChar:
413: * @ctxt: the XML parser context
414: *
415: * Skip to the next char input char.
416: */
417:
418: void
419: xmlNextChar(xmlParserCtxtPtr ctxt)
420: {
421: if ((ctxt == NULL) || (ctxt->instate == XML_PARSER_EOF) ||
422: (ctxt->input == NULL))
423: return;
424:
425: if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
426: if ((*ctxt->input->cur == 0) &&
427: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
428: (ctxt->instate != XML_PARSER_COMMENT)) {
429: /*
430: * If we are at the end of the current entity and
431: * the context allows it, we pop consumed entities
432: * automatically.
433: * the auto closing should be blocked in other cases
434: */
435: xmlPopInput(ctxt);
436: } else {
437: const unsigned char *cur;
438: unsigned char c;
439:
440: /*
441: * 2.11 End-of-Line Handling
442: * the literal two-character sequence "#xD#xA" or a standalone
443: * literal #xD, an XML processor must pass to the application
444: * the single character #xA.
445: */
446: if (*(ctxt->input->cur) == '\n') {
447: ctxt->input->line++; ctxt->input->col = 1;
448: } else
449: ctxt->input->col++;
450:
451: /*
452: * We are supposed to handle UTF8, check it's valid
453: * From rfc2044: encoding of the Unicode values on UTF-8:
454: *
455: * UCS-4 range (hex.) UTF-8 octet sequence (binary)
456: * 0000 0000-0000 007F 0xxxxxxx
457: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1.1.1.3 ! misho 458: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1.1 misho 459: *
460: * Check for the 0x110000 limit too
461: */
462: cur = ctxt->input->cur;
463:
464: c = *cur;
465: if (c & 0x80) {
466: if (c == 0xC0)
467: goto encoding_error;
468: if (cur[1] == 0) {
469: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
470: cur = ctxt->input->cur;
471: }
472: if ((cur[1] & 0xc0) != 0x80)
473: goto encoding_error;
474: if ((c & 0xe0) == 0xe0) {
475: unsigned int val;
476:
477: if (cur[2] == 0) {
478: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
479: cur = ctxt->input->cur;
480: }
481: if ((cur[2] & 0xc0) != 0x80)
482: goto encoding_error;
483: if ((c & 0xf0) == 0xf0) {
484: if (cur[3] == 0) {
485: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
486: cur = ctxt->input->cur;
487: }
488: if (((c & 0xf8) != 0xf0) ||
489: ((cur[3] & 0xc0) != 0x80))
490: goto encoding_error;
491: /* 4-byte code */
492: ctxt->input->cur += 4;
493: val = (cur[0] & 0x7) << 18;
494: val |= (cur[1] & 0x3f) << 12;
495: val |= (cur[2] & 0x3f) << 6;
496: val |= cur[3] & 0x3f;
497: } else {
498: /* 3-byte code */
499: ctxt->input->cur += 3;
500: val = (cur[0] & 0xf) << 12;
501: val |= (cur[1] & 0x3f) << 6;
502: val |= cur[2] & 0x3f;
503: }
504: if (((val > 0xd7ff) && (val < 0xe000)) ||
505: ((val > 0xfffd) && (val < 0x10000)) ||
506: (val >= 0x110000)) {
507: xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
508: "Char 0x%X out of allowed range\n",
509: val);
510: }
511: } else
512: /* 2-byte code */
513: ctxt->input->cur += 2;
514: } else
515: /* 1-byte code */
516: ctxt->input->cur++;
517:
518: ctxt->nbChars++;
519: if (*ctxt->input->cur == 0)
520: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
521: }
522: } else {
523: /*
524: * Assume it's a fixed length encoding (1) with
525: * a compatible encoding for the ASCII set, since
526: * XML constructs only use < 128 chars
527: */
528:
529: if (*(ctxt->input->cur) == '\n') {
530: ctxt->input->line++; ctxt->input->col = 1;
531: } else
532: ctxt->input->col++;
533: ctxt->input->cur++;
534: ctxt->nbChars++;
535: if (*ctxt->input->cur == 0)
536: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
537: }
538: if ((*ctxt->input->cur == '%') && (!ctxt->html))
539: xmlParserHandlePEReference(ctxt);
540: if ((*ctxt->input->cur == 0) &&
541: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
542: xmlPopInput(ctxt);
543: return;
544: encoding_error:
545: /*
546: * If we detect an UTF8 error that probably mean that the
547: * input encoding didn't get properly advertised in the
548: * declaration header. Report the error and switch the encoding
549: * to ISO-Latin-1 (if you don't like this policy, just declare the
550: * encoding !)
551: */
552: if ((ctxt == NULL) || (ctxt->input == NULL) ||
553: (ctxt->input->end - ctxt->input->cur < 4)) {
554: __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
555: "Input is not proper UTF-8, indicate encoding !\n",
556: NULL, NULL);
557: } else {
558: char buffer[150];
559:
560: snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
561: ctxt->input->cur[0], ctxt->input->cur[1],
562: ctxt->input->cur[2], ctxt->input->cur[3]);
563: __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
564: "Input is not proper UTF-8, indicate encoding !\n%s",
565: BAD_CAST buffer, NULL);
566: }
567: ctxt->charset = XML_CHAR_ENCODING_8859_1;
568: ctxt->input->cur++;
569: return;
570: }
571:
572: /**
573: * xmlCurrentChar:
574: * @ctxt: the XML parser context
575: * @len: pointer to the length of the char read
576: *
577: * The current char value, if using UTF-8 this may actually span multiple
578: * bytes in the input buffer. Implement the end of line normalization:
579: * 2.11 End-of-Line Handling
580: * Wherever an external parsed entity or the literal entity value
581: * of an internal parsed entity contains either the literal two-character
582: * sequence "#xD#xA" or a standalone literal #xD, an XML processor
583: * must pass to the application the single character #xA.
584: * This behavior can conveniently be produced by normalizing all
585: * line breaks to #xA on input, before parsing.)
586: *
587: * Returns the current char value and its length
588: */
589:
590: int
591: xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
592: if ((ctxt == NULL) || (len == NULL) || (ctxt->input == NULL)) return(0);
593: if (ctxt->instate == XML_PARSER_EOF)
594: return(0);
595:
596: if ((*ctxt->input->cur >= 0x20) && (*ctxt->input->cur <= 0x7F)) {
597: *len = 1;
598: return((int) *ctxt->input->cur);
599: }
600: if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
601: /*
602: * We are supposed to handle UTF8, check it's valid
603: * From rfc2044: encoding of the Unicode values on UTF-8:
604: *
605: * UCS-4 range (hex.) UTF-8 octet sequence (binary)
606: * 0000 0000-0000 007F 0xxxxxxx
607: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1.1.1.3 ! misho 608: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1.1 misho 609: *
610: * Check for the 0x110000 limit too
611: */
612: const unsigned char *cur = ctxt->input->cur;
613: unsigned char c;
614: unsigned int val;
615:
616: c = *cur;
617: if (c & 0x80) {
618: if (((c & 0x40) == 0) || (c == 0xC0))
619: goto encoding_error;
620: if (cur[1] == 0) {
621: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
622: cur = ctxt->input->cur;
623: }
624: if ((cur[1] & 0xc0) != 0x80)
625: goto encoding_error;
626: if ((c & 0xe0) == 0xe0) {
627: if (cur[2] == 0) {
628: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
629: cur = ctxt->input->cur;
630: }
631: if ((cur[2] & 0xc0) != 0x80)
632: goto encoding_error;
633: if ((c & 0xf0) == 0xf0) {
634: if (cur[3] == 0) {
635: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
636: cur = ctxt->input->cur;
637: }
638: if (((c & 0xf8) != 0xf0) ||
639: ((cur[3] & 0xc0) != 0x80))
640: goto encoding_error;
641: /* 4-byte code */
642: *len = 4;
643: val = (cur[0] & 0x7) << 18;
644: val |= (cur[1] & 0x3f) << 12;
645: val |= (cur[2] & 0x3f) << 6;
646: val |= cur[3] & 0x3f;
647: if (val < 0x10000)
648: goto encoding_error;
649: } else {
650: /* 3-byte code */
651: *len = 3;
652: val = (cur[0] & 0xf) << 12;
653: val |= (cur[1] & 0x3f) << 6;
654: val |= cur[2] & 0x3f;
655: if (val < 0x800)
656: goto encoding_error;
657: }
658: } else {
659: /* 2-byte code */
660: *len = 2;
661: val = (cur[0] & 0x1f) << 6;
662: val |= cur[1] & 0x3f;
663: if (val < 0x80)
664: goto encoding_error;
665: }
666: if (!IS_CHAR(val)) {
667: xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
668: "Char 0x%X out of allowed range\n", val);
1.1.1.3 ! misho 669: }
1.1 misho 670: return(val);
671: } else {
672: /* 1-byte code */
673: *len = 1;
674: if (*ctxt->input->cur == 0)
675: xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
676: if ((*ctxt->input->cur == 0) &&
677: (ctxt->input->end > ctxt->input->cur)) {
678: xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
679: "Char 0x0 out of allowed range\n", 0);
680: }
681: if (*ctxt->input->cur == 0xD) {
682: if (ctxt->input->cur[1] == 0xA) {
683: ctxt->nbChars++;
684: ctxt->input->cur++;
685: }
686: return(0xA);
687: }
688: return((int) *ctxt->input->cur);
689: }
690: }
691: /*
692: * Assume it's a fixed length encoding (1) with
693: * a compatible encoding for the ASCII set, since
694: * XML constructs only use < 128 chars
695: */
696: *len = 1;
697: if (*ctxt->input->cur == 0xD) {
698: if (ctxt->input->cur[1] == 0xA) {
699: ctxt->nbChars++;
700: ctxt->input->cur++;
701: }
702: return(0xA);
703: }
704: return((int) *ctxt->input->cur);
705: encoding_error:
706: /*
707: * An encoding problem may arise from a truncated input buffer
708: * splitting a character in the middle. In that case do not raise
709: * an error but return 0 to endicate an end of stream problem
710: */
711: if (ctxt->input->end - ctxt->input->cur < 4) {
712: *len = 0;
713: return(0);
714: }
715:
716: /*
717: * If we detect an UTF8 error that probably mean that the
718: * input encoding didn't get properly advertised in the
719: * declaration header. Report the error and switch the encoding
720: * to ISO-Latin-1 (if you don't like this policy, just declare the
721: * encoding !)
722: */
723: {
724: char buffer[150];
725:
726: snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
727: ctxt->input->cur[0], ctxt->input->cur[1],
728: ctxt->input->cur[2], ctxt->input->cur[3]);
729: __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
730: "Input is not proper UTF-8, indicate encoding !\n%s",
731: BAD_CAST buffer, NULL);
732: }
1.1.1.3 ! misho 733: ctxt->charset = XML_CHAR_ENCODING_8859_1;
1.1 misho 734: *len = 1;
735: return((int) *ctxt->input->cur);
736: }
737:
738: /**
739: * xmlStringCurrentChar:
740: * @ctxt: the XML parser context
741: * @cur: pointer to the beginning of the char
742: * @len: pointer to the length of the char read
743: *
744: * The current char value, if using UTF-8 this may actually span multiple
745: * bytes in the input buffer.
746: *
747: * Returns the current char value and its length
748: */
749:
750: int
751: xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar * cur, int *len)
752: {
753: if ((len == NULL) || (cur == NULL)) return(0);
754: if ((ctxt == NULL) || (ctxt->charset == XML_CHAR_ENCODING_UTF8)) {
755: /*
756: * We are supposed to handle UTF8, check it's valid
757: * From rfc2044: encoding of the Unicode values on UTF-8:
758: *
759: * UCS-4 range (hex.) UTF-8 octet sequence (binary)
760: * 0000 0000-0000 007F 0xxxxxxx
761: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1.1.1.3 ! misho 762: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1.1 misho 763: *
764: * Check for the 0x110000 limit too
765: */
766: unsigned char c;
767: unsigned int val;
768:
769: c = *cur;
770: if (c & 0x80) {
771: if ((cur[1] & 0xc0) != 0x80)
772: goto encoding_error;
773: if ((c & 0xe0) == 0xe0) {
774:
775: if ((cur[2] & 0xc0) != 0x80)
776: goto encoding_error;
777: if ((c & 0xf0) == 0xf0) {
778: if (((c & 0xf8) != 0xf0) || ((cur[3] & 0xc0) != 0x80))
779: goto encoding_error;
780: /* 4-byte code */
781: *len = 4;
782: val = (cur[0] & 0x7) << 18;
783: val |= (cur[1] & 0x3f) << 12;
784: val |= (cur[2] & 0x3f) << 6;
785: val |= cur[3] & 0x3f;
786: } else {
787: /* 3-byte code */
788: *len = 3;
789: val = (cur[0] & 0xf) << 12;
790: val |= (cur[1] & 0x3f) << 6;
791: val |= cur[2] & 0x3f;
792: }
793: } else {
794: /* 2-byte code */
795: *len = 2;
796: val = (cur[0] & 0x1f) << 6;
797: val |= cur[1] & 0x3f;
798: }
799: if (!IS_CHAR(val)) {
800: xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
801: "Char 0x%X out of allowed range\n", val);
802: }
803: return (val);
804: } else {
805: /* 1-byte code */
806: *len = 1;
807: return ((int) *cur);
808: }
809: }
810: /*
811: * Assume it's a fixed length encoding (1) with
812: * a compatible encoding for the ASCII set, since
813: * XML constructs only use < 128 chars
814: */
815: *len = 1;
816: return ((int) *cur);
817: encoding_error:
818:
819: /*
820: * An encoding problem may arise from a truncated input buffer
821: * splitting a character in the middle. In that case do not raise
822: * an error but return 0 to endicate an end of stream problem
823: */
824: if ((ctxt == NULL) || (ctxt->input == NULL) ||
825: (ctxt->input->end - ctxt->input->cur < 4)) {
826: *len = 0;
827: return(0);
828: }
829: /*
830: * If we detect an UTF8 error that probably mean that the
831: * input encoding didn't get properly advertised in the
832: * declaration header. Report the error and switch the encoding
833: * to ISO-Latin-1 (if you don't like this policy, just declare the
834: * encoding !)
835: */
836: {
837: char buffer[150];
838:
839: snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
840: ctxt->input->cur[0], ctxt->input->cur[1],
841: ctxt->input->cur[2], ctxt->input->cur[3]);
842: __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
843: "Input is not proper UTF-8, indicate encoding !\n%s",
844: BAD_CAST buffer, NULL);
845: }
846: *len = 1;
847: return ((int) *cur);
848: }
849:
850: /**
851: * xmlCopyCharMultiByte:
852: * @out: pointer to an array of xmlChar
853: * @val: the char value
854: *
1.1.1.3 ! misho 855: * append the char value in the array
1.1 misho 856: *
857: * Returns the number of xmlChar written
858: */
859: int
860: xmlCopyCharMultiByte(xmlChar *out, int val) {
861: if (out == NULL) return(0);
862: /*
863: * We are supposed to handle UTF8, check it's valid
864: * From rfc2044: encoding of the Unicode values on UTF-8:
865: *
866: * UCS-4 range (hex.) UTF-8 octet sequence (binary)
867: * 0000 0000-0000 007F 0xxxxxxx
868: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1.1.1.3 ! misho 869: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1.1 misho 870: */
871: if (val >= 0x80) {
872: xmlChar *savedout = out;
873: int bits;
874: if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; }
875: else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6;}
876: else if (val < 0x110000) { *out++= (val >> 18) | 0xF0; bits= 12; }
877: else {
878: xmlErrEncodingInt(NULL, XML_ERR_INVALID_CHAR,
879: "Internal error, xmlCopyCharMultiByte 0x%X out of bound\n",
880: val);
881: return(0);
882: }
883: for ( ; bits >= 0; bits-= 6)
884: *out++= ((val >> bits) & 0x3F) | 0x80 ;
885: return (out - savedout);
886: }
887: *out = (xmlChar) val;
888: return 1;
889: }
890:
891: /**
892: * xmlCopyChar:
893: * @len: Ignored, compatibility
894: * @out: pointer to an array of xmlChar
895: * @val: the char value
896: *
1.1.1.3 ! misho 897: * append the char value in the array
1.1 misho 898: *
899: * Returns the number of xmlChar written
900: */
901:
902: int
903: xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) {
904: if (out == NULL) return(0);
905: /* the len parameter is ignored */
906: if (val >= 0x80) {
907: return(xmlCopyCharMultiByte (out, val));
908: }
909: *out = (xmlChar) val;
910: return 1;
911: }
912:
913: /************************************************************************
914: * *
915: * Commodity functions to switch encodings *
916: * *
917: ************************************************************************/
918:
919: static int
920: xmlSwitchToEncodingInt(xmlParserCtxtPtr ctxt,
921: xmlCharEncodingHandlerPtr handler, int len);
922: static int
923: xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
924: xmlCharEncodingHandlerPtr handler, int len);
925: /**
926: * xmlSwitchEncoding:
927: * @ctxt: the parser context
928: * @enc: the encoding value (number)
929: *
930: * change the input functions when discovering the character encoding
931: * of a given entity.
932: *
933: * Returns 0 in case of success, -1 otherwise
934: */
935: int
936: xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
937: {
938: xmlCharEncodingHandlerPtr handler;
939: int len = -1;
940:
941: if (ctxt == NULL) return(-1);
942: switch (enc) {
943: case XML_CHAR_ENCODING_ERROR:
944: __xmlErrEncoding(ctxt, XML_ERR_UNKNOWN_ENCODING,
945: "encoding unknown\n", NULL, NULL);
946: return(-1);
947: case XML_CHAR_ENCODING_NONE:
948: /* let's assume it's UTF-8 without the XML decl */
949: ctxt->charset = XML_CHAR_ENCODING_UTF8;
950: return(0);
951: case XML_CHAR_ENCODING_UTF8:
952: /* default encoding, no conversion should be needed */
953: ctxt->charset = XML_CHAR_ENCODING_UTF8;
954:
955: /*
956: * Errata on XML-1.0 June 20 2001
957: * Specific handling of the Byte Order Mark for
958: * UTF-8
959: */
960: if ((ctxt->input != NULL) &&
961: (ctxt->input->cur[0] == 0xEF) &&
962: (ctxt->input->cur[1] == 0xBB) &&
963: (ctxt->input->cur[2] == 0xBF)) {
964: ctxt->input->cur += 3;
965: }
966: return(0);
967: case XML_CHAR_ENCODING_UTF16LE:
968: case XML_CHAR_ENCODING_UTF16BE:
969: /*The raw input characters are encoded
970: *in UTF-16. As we expect this function
971: *to be called after xmlCharEncInFunc, we expect
972: *ctxt->input->cur to contain UTF-8 encoded characters.
973: *So the raw UTF16 Byte Order Mark
974: *has also been converted into
975: *an UTF-8 BOM. Let's skip that BOM.
976: */
977: if ((ctxt->input != NULL) && (ctxt->input->cur != NULL) &&
978: (ctxt->input->cur[0] == 0xEF) &&
979: (ctxt->input->cur[1] == 0xBB) &&
980: (ctxt->input->cur[2] == 0xBF)) {
981: ctxt->input->cur += 3;
982: }
983: len = 90;
984: break;
985: case XML_CHAR_ENCODING_UCS2:
986: len = 90;
987: break;
988: case XML_CHAR_ENCODING_UCS4BE:
989: case XML_CHAR_ENCODING_UCS4LE:
990: case XML_CHAR_ENCODING_UCS4_2143:
991: case XML_CHAR_ENCODING_UCS4_3412:
992: len = 180;
993: break;
994: case XML_CHAR_ENCODING_EBCDIC:
995: case XML_CHAR_ENCODING_8859_1:
996: case XML_CHAR_ENCODING_8859_2:
997: case XML_CHAR_ENCODING_8859_3:
998: case XML_CHAR_ENCODING_8859_4:
999: case XML_CHAR_ENCODING_8859_5:
1000: case XML_CHAR_ENCODING_8859_6:
1001: case XML_CHAR_ENCODING_8859_7:
1002: case XML_CHAR_ENCODING_8859_8:
1003: case XML_CHAR_ENCODING_8859_9:
1004: case XML_CHAR_ENCODING_ASCII:
1005: case XML_CHAR_ENCODING_2022_JP:
1006: case XML_CHAR_ENCODING_SHIFT_JIS:
1007: case XML_CHAR_ENCODING_EUC_JP:
1008: len = 45;
1009: break;
1010: }
1011: handler = xmlGetCharEncodingHandler(enc);
1012: if (handler == NULL) {
1013: /*
1014: * Default handlers.
1015: */
1016: switch (enc) {
1017: case XML_CHAR_ENCODING_ASCII:
1018: /* default encoding, no conversion should be needed */
1019: ctxt->charset = XML_CHAR_ENCODING_UTF8;
1020: return(0);
1021: case XML_CHAR_ENCODING_UTF16LE:
1022: break;
1023: case XML_CHAR_ENCODING_UTF16BE:
1024: break;
1025: case XML_CHAR_ENCODING_UCS4LE:
1026: __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1027: "encoding not supported %s\n",
1028: BAD_CAST "USC4 little endian", NULL);
1029: break;
1030: case XML_CHAR_ENCODING_UCS4BE:
1031: __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1032: "encoding not supported %s\n",
1033: BAD_CAST "USC4 big endian", NULL);
1034: break;
1035: case XML_CHAR_ENCODING_EBCDIC:
1036: __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1037: "encoding not supported %s\n",
1038: BAD_CAST "EBCDIC", NULL);
1039: break;
1040: case XML_CHAR_ENCODING_UCS4_2143:
1041: __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1042: "encoding not supported %s\n",
1043: BAD_CAST "UCS4 2143", NULL);
1044: break;
1045: case XML_CHAR_ENCODING_UCS4_3412:
1046: __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1047: "encoding not supported %s\n",
1048: BAD_CAST "UCS4 3412", NULL);
1049: break;
1050: case XML_CHAR_ENCODING_UCS2:
1051: __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1052: "encoding not supported %s\n",
1053: BAD_CAST "UCS2", NULL);
1054: break;
1055: case XML_CHAR_ENCODING_8859_1:
1056: case XML_CHAR_ENCODING_8859_2:
1057: case XML_CHAR_ENCODING_8859_3:
1058: case XML_CHAR_ENCODING_8859_4:
1059: case XML_CHAR_ENCODING_8859_5:
1060: case XML_CHAR_ENCODING_8859_6:
1061: case XML_CHAR_ENCODING_8859_7:
1062: case XML_CHAR_ENCODING_8859_8:
1063: case XML_CHAR_ENCODING_8859_9:
1064: /*
1065: * We used to keep the internal content in the
1066: * document encoding however this turns being unmaintainable
1067: * So xmlGetCharEncodingHandler() will return non-null
1068: * values for this now.
1069: */
1070: if ((ctxt->inputNr == 1) &&
1071: (ctxt->encoding == NULL) &&
1072: (ctxt->input != NULL) &&
1073: (ctxt->input->encoding != NULL)) {
1074: ctxt->encoding = xmlStrdup(ctxt->input->encoding);
1075: }
1076: ctxt->charset = enc;
1077: return(0);
1078: case XML_CHAR_ENCODING_2022_JP:
1079: __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1080: "encoding not supported %s\n",
1081: BAD_CAST "ISO-2022-JP", NULL);
1082: break;
1083: case XML_CHAR_ENCODING_SHIFT_JIS:
1084: __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1085: "encoding not supported %s\n",
1086: BAD_CAST "Shift_JIS", NULL);
1087: break;
1088: case XML_CHAR_ENCODING_EUC_JP:
1089: __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1090: "encoding not supported %s\n",
1091: BAD_CAST "EUC-JP", NULL);
1092: break;
1093: default:
1094: break;
1095: }
1096: }
1097: if (handler == NULL)
1098: return(-1);
1099: ctxt->charset = XML_CHAR_ENCODING_UTF8;
1100: return(xmlSwitchToEncodingInt(ctxt, handler, len));
1101: }
1102:
1103: /**
1104: * xmlSwitchInputEncoding:
1105: * @ctxt: the parser context
1106: * @input: the input stream
1107: * @handler: the encoding handler
1108: * @len: the number of bytes to convert for the first line or -1
1109: *
1110: * change the input functions when discovering the character encoding
1111: * of a given entity.
1112: *
1113: * Returns 0 in case of success, -1 otherwise
1114: */
1115: static int
1116: xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
1117: xmlCharEncodingHandlerPtr handler, int len)
1118: {
1119: int nbchars;
1120:
1121: if (handler == NULL)
1122: return (-1);
1123: if (input == NULL)
1124: return (-1);
1125: if (input->buf != NULL) {
1126: if (input->buf->encoder != NULL) {
1127: /*
1128: * Check in case the auto encoding detetection triggered
1129: * in already.
1130: */
1131: if (input->buf->encoder == handler)
1132: return (0);
1133:
1134: /*
1135: * "UTF-16" can be used for both LE and BE
1136: if ((!xmlStrncmp(BAD_CAST input->buf->encoder->name,
1137: BAD_CAST "UTF-16", 6)) &&
1138: (!xmlStrncmp(BAD_CAST handler->name,
1139: BAD_CAST "UTF-16", 6))) {
1140: return(0);
1141: }
1142: */
1143:
1144: /*
1145: * Note: this is a bit dangerous, but that's what it
1146: * takes to use nearly compatible signature for different
1147: * encodings.
1148: */
1149: xmlCharEncCloseFunc(input->buf->encoder);
1150: input->buf->encoder = handler;
1151: return (0);
1152: }
1153: input->buf->encoder = handler;
1154:
1155: /*
1156: * Is there already some content down the pipe to convert ?
1157: */
1.1.1.3 ! misho 1158: if (xmlBufIsEmpty(input->buf->buffer) == 0) {
1.1 misho 1159: int processed;
1160: unsigned int use;
1161:
1162: /*
1.1.1.3 ! misho 1163: * Specific handling of the Byte Order Mark for
1.1 misho 1164: * UTF-16
1165: */
1166: if ((handler->name != NULL) &&
1167: (!strcmp(handler->name, "UTF-16LE") ||
1168: !strcmp(handler->name, "UTF-16")) &&
1169: (input->cur[0] == 0xFF) && (input->cur[1] == 0xFE)) {
1170: input->cur += 2;
1171: }
1172: if ((handler->name != NULL) &&
1173: (!strcmp(handler->name, "UTF-16BE")) &&
1174: (input->cur[0] == 0xFE) && (input->cur[1] == 0xFF)) {
1175: input->cur += 2;
1176: }
1177: /*
1178: * Errata on XML-1.0 June 20 2001
1179: * Specific handling of the Byte Order Mark for
1180: * UTF-8
1181: */
1182: if ((handler->name != NULL) &&
1183: (!strcmp(handler->name, "UTF-8")) &&
1184: (input->cur[0] == 0xEF) &&
1185: (input->cur[1] == 0xBB) && (input->cur[2] == 0xBF)) {
1186: input->cur += 3;
1187: }
1188:
1189: /*
1190: * Shrink the current input buffer.
1191: * Move it as the raw buffer and create a new input buffer
1192: */
1193: processed = input->cur - input->base;
1.1.1.3 ! misho 1194: xmlBufShrink(input->buf->buffer, processed);
1.1 misho 1195: input->buf->raw = input->buf->buffer;
1.1.1.3 ! misho 1196: input->buf->buffer = xmlBufCreate();
1.1 misho 1197: input->buf->rawconsumed = processed;
1.1.1.3 ! misho 1198: use = xmlBufUse(input->buf->raw);
1.1 misho 1199:
1200: if (ctxt->html) {
1201: /*
1202: * convert as much as possible of the buffer
1203: */
1.1.1.3 ! misho 1204: nbchars = xmlCharEncInput(input->buf, 1);
1.1 misho 1205: } else {
1206: /*
1207: * convert just enough to get
1208: * '<?xml version="1.0" encoding="xxx"?>'
1209: * parsed with the autodetected encoding
1210: * into the parser reading buffer.
1211: */
1.1.1.3 ! misho 1212: nbchars = xmlCharEncFirstLineInput(input->buf, len);
1.1 misho 1213: }
1214: if (nbchars < 0) {
1215: xmlErrInternal(ctxt,
1216: "switching encoding: encoder error\n",
1217: NULL);
1218: return (-1);
1219: }
1.1.1.3 ! misho 1220: input->buf->rawconsumed += use - xmlBufUse(input->buf->raw);
! 1221: xmlBufResetInput(input->buf->buffer, input);
1.1 misho 1222: }
1223: return (0);
1224: } else if (input->length == 0) {
1225: /*
1226: * When parsing a static memory array one must know the
1227: * size to be able to convert the buffer.
1228: */
1229: xmlErrInternal(ctxt, "switching encoding : no input\n", NULL);
1230: return (-1);
1231: }
1232: return (0);
1233: }
1234:
1235: /**
1236: * xmlSwitchInputEncoding:
1237: * @ctxt: the parser context
1238: * @input: the input stream
1239: * @handler: the encoding handler
1240: *
1241: * change the input functions when discovering the character encoding
1242: * of a given entity.
1243: *
1244: * Returns 0 in case of success, -1 otherwise
1245: */
1246: int
1247: xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
1248: xmlCharEncodingHandlerPtr handler) {
1249: return(xmlSwitchInputEncodingInt(ctxt, input, handler, -1));
1250: }
1251:
1252: /**
1253: * xmlSwitchToEncodingInt:
1254: * @ctxt: the parser context
1255: * @handler: the encoding handler
1.1.1.3 ! misho 1256: * @len: the length to convert or -1
1.1 misho 1257: *
1258: * change the input functions when discovering the character encoding
1259: * of a given entity, and convert only @len bytes of the output, this
1260: * is needed on auto detect to allows any declared encoding later to
1261: * convert the actual content after the xmlDecl
1262: *
1263: * Returns 0 in case of success, -1 otherwise
1264: */
1265: static int
1266: xmlSwitchToEncodingInt(xmlParserCtxtPtr ctxt,
1267: xmlCharEncodingHandlerPtr handler, int len) {
1268: int ret = 0;
1269:
1270: if (handler != NULL) {
1271: if (ctxt->input != NULL) {
1272: ret = xmlSwitchInputEncodingInt(ctxt, ctxt->input, handler, len);
1273: } else {
1274: xmlErrInternal(ctxt, "xmlSwitchToEncoding : no input\n",
1275: NULL);
1276: return(-1);
1277: }
1278: /*
1279: * The parsing is now done in UTF8 natively
1280: */
1281: ctxt->charset = XML_CHAR_ENCODING_UTF8;
1282: } else
1283: return(-1);
1284: return(ret);
1285: }
1286:
1287: /**
1288: * xmlSwitchToEncoding:
1289: * @ctxt: the parser context
1290: * @handler: the encoding handler
1291: *
1292: * change the input functions when discovering the character encoding
1293: * of a given entity.
1294: *
1295: * Returns 0 in case of success, -1 otherwise
1296: */
1297: int
1.1.1.3 ! misho 1298: xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
1.1 misho 1299: {
1300: return (xmlSwitchToEncodingInt(ctxt, handler, -1));
1301: }
1302:
1303: /************************************************************************
1304: * *
1305: * Commodity functions to handle entities processing *
1306: * *
1307: ************************************************************************/
1308:
1309: /**
1310: * xmlFreeInputStream:
1311: * @input: an xmlParserInputPtr
1312: *
1313: * Free up an input stream.
1314: */
1315: void
1316: xmlFreeInputStream(xmlParserInputPtr input) {
1317: if (input == NULL) return;
1318:
1319: if (input->filename != NULL) xmlFree((char *) input->filename);
1320: if (input->directory != NULL) xmlFree((char *) input->directory);
1321: if (input->encoding != NULL) xmlFree((char *) input->encoding);
1322: if (input->version != NULL) xmlFree((char *) input->version);
1323: if ((input->free != NULL) && (input->base != NULL))
1324: input->free((xmlChar *) input->base);
1.1.1.3 ! misho 1325: if (input->buf != NULL)
1.1 misho 1326: xmlFreeParserInputBuffer(input->buf);
1327: xmlFree(input);
1328: }
1329:
1330: /**
1331: * xmlNewInputStream:
1332: * @ctxt: an XML parser context
1333: *
1.1.1.2 misho 1334: * Create a new input stream structure.
1335: *
1.1 misho 1336: * Returns the new input stream or NULL
1337: */
1338: xmlParserInputPtr
1339: xmlNewInputStream(xmlParserCtxtPtr ctxt) {
1340: xmlParserInputPtr input;
1341:
1342: input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1343: if (input == NULL) {
1344: xmlErrMemory(ctxt, "couldn't allocate a new input stream\n");
1345: return(NULL);
1346: }
1347: memset(input, 0, sizeof(xmlParserInput));
1348: input->line = 1;
1349: input->col = 1;
1350: input->standalone = -1;
1.1.1.2 misho 1351:
1.1 misho 1352: /*
1.1.1.2 misho 1353: * If the context is NULL the id cannot be initialized, but that
1354: * should not happen while parsing which is the situation where
1355: * the id is actually needed.
1.1 misho 1356: */
1.1.1.2 misho 1357: if (ctxt != NULL)
1358: input->id = ctxt->input_id++;
1359:
1.1 misho 1360: return(input);
1361: }
1362:
1363: /**
1364: * xmlNewIOInputStream:
1365: * @ctxt: an XML parser context
1366: * @input: an I/O Input
1367: * @enc: the charset encoding if known
1368: *
1369: * Create a new input stream structure encapsulating the @input into
1370: * a stream suitable for the parser.
1371: *
1372: * Returns the new input stream or NULL
1373: */
1374: xmlParserInputPtr
1375: xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,
1376: xmlCharEncoding enc) {
1377: xmlParserInputPtr inputStream;
1378:
1379: if (input == NULL) return(NULL);
1380: if (xmlParserDebugEntities)
1381: xmlGenericError(xmlGenericErrorContext, "new input from I/O\n");
1382: inputStream = xmlNewInputStream(ctxt);
1383: if (inputStream == NULL) {
1384: return(NULL);
1385: }
1386: inputStream->filename = NULL;
1387: inputStream->buf = input;
1.1.1.3 ! misho 1388: xmlBufResetInput(inputStream->buf->buffer, inputStream);
! 1389:
1.1 misho 1390: if (enc != XML_CHAR_ENCODING_NONE) {
1391: xmlSwitchEncoding(ctxt, enc);
1392: }
1393:
1394: return(inputStream);
1395: }
1396:
1397: /**
1398: * xmlNewEntityInputStream:
1399: * @ctxt: an XML parser context
1400: * @entity: an Entity pointer
1401: *
1402: * Create a new input stream based on an xmlEntityPtr
1403: *
1404: * Returns the new input stream or NULL
1405: */
1406: xmlParserInputPtr
1407: xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1408: xmlParserInputPtr input;
1409:
1410: if (entity == NULL) {
1411: xmlErrInternal(ctxt, "xmlNewEntityInputStream entity = NULL\n",
1412: NULL);
1413: return(NULL);
1414: }
1415: if (xmlParserDebugEntities)
1416: xmlGenericError(xmlGenericErrorContext,
1417: "new input from entity: %s\n", entity->name);
1418: if (entity->content == NULL) {
1419: switch (entity->etype) {
1420: case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
1421: xmlErrInternal(ctxt, "Cannot parse entity %s\n",
1422: entity->name);
1423: break;
1424: case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
1425: case XML_EXTERNAL_PARAMETER_ENTITY:
1426: return(xmlLoadExternalEntity((char *) entity->URI,
1427: (char *) entity->ExternalID, ctxt));
1428: case XML_INTERNAL_GENERAL_ENTITY:
1429: xmlErrInternal(ctxt,
1430: "Internal entity %s without content !\n",
1431: entity->name);
1432: break;
1433: case XML_INTERNAL_PARAMETER_ENTITY:
1434: xmlErrInternal(ctxt,
1435: "Internal parameter entity %s without content !\n",
1436: entity->name);
1437: break;
1438: case XML_INTERNAL_PREDEFINED_ENTITY:
1439: xmlErrInternal(ctxt,
1440: "Predefined entity %s without content !\n",
1441: entity->name);
1442: break;
1443: }
1444: return(NULL);
1445: }
1446: input = xmlNewInputStream(ctxt);
1447: if (input == NULL) {
1448: return(NULL);
1449: }
1450: if (entity->URI != NULL)
1451: input->filename = (char *) xmlStrdup((xmlChar *) entity->URI);
1452: input->base = entity->content;
1453: input->cur = entity->content;
1454: input->length = entity->length;
1455: input->end = &entity->content[input->length];
1456: return(input);
1457: }
1458:
1459: /**
1460: * xmlNewStringInputStream:
1461: * @ctxt: an XML parser context
1462: * @buffer: an memory buffer
1463: *
1464: * Create a new input stream based on a memory buffer.
1465: * Returns the new input stream
1466: */
1467: xmlParserInputPtr
1468: xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
1469: xmlParserInputPtr input;
1470:
1471: if (buffer == NULL) {
1472: xmlErrInternal(ctxt, "xmlNewStringInputStream string = NULL\n",
1473: NULL);
1474: return(NULL);
1475: }
1476: if (xmlParserDebugEntities)
1477: xmlGenericError(xmlGenericErrorContext,
1478: "new fixed input: %.30s\n", buffer);
1479: input = xmlNewInputStream(ctxt);
1480: if (input == NULL) {
1481: xmlErrMemory(ctxt, "couldn't allocate a new input stream\n");
1482: return(NULL);
1483: }
1484: input->base = buffer;
1485: input->cur = buffer;
1486: input->length = xmlStrlen(buffer);
1487: input->end = &buffer[input->length];
1488: return(input);
1489: }
1490:
1491: /**
1492: * xmlNewInputFromFile:
1493: * @ctxt: an XML parser context
1494: * @filename: the filename to use as entity
1495: *
1496: * Create a new input stream based on a file or an URL.
1497: *
1498: * Returns the new input stream or NULL in case of error
1499: */
1500: xmlParserInputPtr
1501: xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
1502: xmlParserInputBufferPtr buf;
1503: xmlParserInputPtr inputStream;
1504: char *directory = NULL;
1505: xmlChar *URI = NULL;
1506:
1507: if (xmlParserDebugEntities)
1508: xmlGenericError(xmlGenericErrorContext,
1509: "new input from file: %s\n", filename);
1510: if (ctxt == NULL) return(NULL);
1511: buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
1512: if (buf == NULL) {
1513: if (filename == NULL)
1514: __xmlLoaderErr(ctxt,
1515: "failed to load external entity: NULL filename \n",
1516: NULL);
1517: else
1518: __xmlLoaderErr(ctxt, "failed to load external entity \"%s\"\n",
1519: (const char *) filename);
1520: return(NULL);
1521: }
1522:
1523: inputStream = xmlNewInputStream(ctxt);
1524: if (inputStream == NULL)
1525: return(NULL);
1526:
1527: inputStream->buf = buf;
1528: inputStream = xmlCheckHTTPInput(ctxt, inputStream);
1529: if (inputStream == NULL)
1530: return(NULL);
1.1.1.3 ! misho 1531:
1.1 misho 1532: if (inputStream->filename == NULL)
1533: URI = xmlStrdup((xmlChar *) filename);
1534: else
1535: URI = xmlStrdup((xmlChar *) inputStream->filename);
1536: directory = xmlParserGetDirectory((const char *) URI);
1537: if (inputStream->filename != NULL) xmlFree((char *)inputStream->filename);
1538: inputStream->filename = (char *) xmlCanonicPath((const xmlChar *) URI);
1539: if (URI != NULL) xmlFree((char *) URI);
1540: inputStream->directory = directory;
1541:
1.1.1.3 ! misho 1542: xmlBufResetInput(inputStream->buf->buffer, inputStream);
1.1 misho 1543: if ((ctxt->directory == NULL) && (directory != NULL))
1544: ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
1545: return(inputStream);
1546: }
1547:
1548: /************************************************************************
1549: * *
1550: * Commodity functions to handle parser contexts *
1551: * *
1552: ************************************************************************/
1553:
1554: /**
1555: * xmlInitParserCtxt:
1556: * @ctxt: an XML parser context
1557: *
1558: * Initialize a parser context
1559: *
1560: * Returns 0 in case of success and -1 in case of error
1561: */
1562:
1563: int
1564: xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
1565: {
1566: xmlParserInputPtr input;
1567:
1568: if(ctxt==NULL) {
1569: xmlErrInternal(NULL, "Got NULL parser context\n", NULL);
1570: return(-1);
1571: }
1572:
1573: xmlDefaultSAXHandlerInit();
1574:
1575: if (ctxt->dict == NULL)
1576: ctxt->dict = xmlDictCreate();
1577: if (ctxt->dict == NULL) {
1578: xmlErrMemory(NULL, "cannot initialize parser context\n");
1579: return(-1);
1580: }
1.1.1.3 ! misho 1581: xmlDictSetLimit(ctxt->dict, XML_MAX_DICTIONARY_LIMIT);
! 1582:
1.1 misho 1583: if (ctxt->sax == NULL)
1584: ctxt->sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
1585: if (ctxt->sax == NULL) {
1586: xmlErrMemory(NULL, "cannot initialize parser context\n");
1587: return(-1);
1588: }
1589: else
1590: xmlSAXVersion(ctxt->sax, 2);
1591:
1592: ctxt->maxatts = 0;
1593: ctxt->atts = NULL;
1594: /* Allocate the Input stack */
1595: if (ctxt->inputTab == NULL) {
1596: ctxt->inputTab = (xmlParserInputPtr *)
1597: xmlMalloc(5 * sizeof(xmlParserInputPtr));
1598: ctxt->inputMax = 5;
1599: }
1600: if (ctxt->inputTab == NULL) {
1601: xmlErrMemory(NULL, "cannot initialize parser context\n");
1602: ctxt->inputNr = 0;
1603: ctxt->inputMax = 0;
1604: ctxt->input = NULL;
1605: return(-1);
1606: }
1607: while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
1608: xmlFreeInputStream(input);
1609: }
1610: ctxt->inputNr = 0;
1611: ctxt->input = NULL;
1612:
1613: ctxt->version = NULL;
1614: ctxt->encoding = NULL;
1615: ctxt->standalone = -1;
1616: ctxt->hasExternalSubset = 0;
1617: ctxt->hasPErefs = 0;
1618: ctxt->html = 0;
1619: ctxt->external = 0;
1620: ctxt->instate = XML_PARSER_START;
1621: ctxt->token = 0;
1622: ctxt->directory = NULL;
1623:
1624: /* Allocate the Node stack */
1625: if (ctxt->nodeTab == NULL) {
1626: ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
1627: ctxt->nodeMax = 10;
1628: }
1629: if (ctxt->nodeTab == NULL) {
1630: xmlErrMemory(NULL, "cannot initialize parser context\n");
1631: ctxt->nodeNr = 0;
1632: ctxt->nodeMax = 0;
1633: ctxt->node = NULL;
1634: ctxt->inputNr = 0;
1635: ctxt->inputMax = 0;
1636: ctxt->input = NULL;
1637: return(-1);
1638: }
1639: ctxt->nodeNr = 0;
1640: ctxt->node = NULL;
1641:
1642: /* Allocate the Name stack */
1643: if (ctxt->nameTab == NULL) {
1644: ctxt->nameTab = (const xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
1645: ctxt->nameMax = 10;
1646: }
1647: if (ctxt->nameTab == NULL) {
1648: xmlErrMemory(NULL, "cannot initialize parser context\n");
1649: ctxt->nodeNr = 0;
1650: ctxt->nodeMax = 0;
1651: ctxt->node = NULL;
1652: ctxt->inputNr = 0;
1653: ctxt->inputMax = 0;
1654: ctxt->input = NULL;
1655: ctxt->nameNr = 0;
1656: ctxt->nameMax = 0;
1657: ctxt->name = NULL;
1658: return(-1);
1659: }
1660: ctxt->nameNr = 0;
1661: ctxt->name = NULL;
1662:
1663: /* Allocate the space stack */
1664: if (ctxt->spaceTab == NULL) {
1665: ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int));
1666: ctxt->spaceMax = 10;
1667: }
1668: if (ctxt->spaceTab == NULL) {
1669: xmlErrMemory(NULL, "cannot initialize parser context\n");
1670: ctxt->nodeNr = 0;
1671: ctxt->nodeMax = 0;
1672: ctxt->node = NULL;
1673: ctxt->inputNr = 0;
1674: ctxt->inputMax = 0;
1675: ctxt->input = NULL;
1676: ctxt->nameNr = 0;
1677: ctxt->nameMax = 0;
1678: ctxt->name = NULL;
1679: ctxt->spaceNr = 0;
1680: ctxt->spaceMax = 0;
1681: ctxt->space = NULL;
1682: return(-1);
1683: }
1684: ctxt->spaceNr = 1;
1685: ctxt->spaceMax = 10;
1686: ctxt->spaceTab[0] = -1;
1687: ctxt->space = &ctxt->spaceTab[0];
1688: ctxt->userData = ctxt;
1689: ctxt->myDoc = NULL;
1690: ctxt->wellFormed = 1;
1691: ctxt->nsWellFormed = 1;
1692: ctxt->valid = 1;
1693: ctxt->loadsubset = xmlLoadExtDtdDefaultValue;
1694: ctxt->validate = xmlDoValidityCheckingDefaultValue;
1695: ctxt->pedantic = xmlPedanticParserDefaultValue;
1696: ctxt->linenumbers = xmlLineNumbersDefaultValue;
1697: ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
1698: if (ctxt->keepBlanks == 0)
1699: ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
1700:
1701: ctxt->vctxt.finishDtd = XML_CTXT_FINISH_DTD_0;
1702: ctxt->vctxt.userData = ctxt;
1703: ctxt->vctxt.error = xmlParserValidityError;
1704: ctxt->vctxt.warning = xmlParserValidityWarning;
1705: if (ctxt->validate) {
1706: if (xmlGetWarningsDefaultValue == 0)
1707: ctxt->vctxt.warning = NULL;
1708: else
1709: ctxt->vctxt.warning = xmlParserValidityWarning;
1710: ctxt->vctxt.nodeMax = 0;
1711: }
1712: ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
1713: ctxt->record_info = 0;
1714: ctxt->nbChars = 0;
1715: ctxt->checkIndex = 0;
1716: ctxt->inSubset = 0;
1717: ctxt->errNo = XML_ERR_OK;
1718: ctxt->depth = 0;
1719: ctxt->charset = XML_CHAR_ENCODING_UTF8;
1720: ctxt->catalogs = NULL;
1721: ctxt->nbentities = 0;
1.1.1.3 ! misho 1722: ctxt->sizeentities = 0;
! 1723: ctxt->sizeentcopy = 0;
1.1.1.2 misho 1724: ctxt->input_id = 1;
1.1 misho 1725: xmlInitNodeInfoSeq(&ctxt->node_seq);
1726: return(0);
1727: }
1728:
1729: /**
1730: * xmlFreeParserCtxt:
1731: * @ctxt: an XML parser context
1732: *
1733: * Free all the memory used by a parser context. However the parsed
1734: * document in ctxt->myDoc is not freed.
1735: */
1736:
1737: void
1738: xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
1739: {
1740: xmlParserInputPtr input;
1741:
1742: if (ctxt == NULL) return;
1743:
1744: while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
1745: xmlFreeInputStream(input);
1746: }
1747: if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
1748: if (ctxt->nameTab != NULL) xmlFree((xmlChar * *)ctxt->nameTab);
1749: if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
1750: if (ctxt->nodeInfoTab != NULL) xmlFree(ctxt->nodeInfoTab);
1751: if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
1752: if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
1753: if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
1754: if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
1755: if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
1756: #ifdef LIBXML_SAX1_ENABLED
1757: if ((ctxt->sax != NULL) &&
1758: (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler))
1759: #else
1760: if (ctxt->sax != NULL)
1761: #endif /* LIBXML_SAX1_ENABLED */
1762: xmlFree(ctxt->sax);
1763: if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
1764: if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
1765: if (ctxt->atts != NULL) xmlFree((xmlChar * *)ctxt->atts);
1766: if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
1767: if (ctxt->nsTab != NULL) xmlFree((char *) ctxt->nsTab);
1768: if (ctxt->pushTab != NULL) xmlFree(ctxt->pushTab);
1769: if (ctxt->attallocs != NULL) xmlFree(ctxt->attallocs);
1.1.1.3 ! misho 1770: if (ctxt->attsDefault != NULL)
1.1 misho 1771: xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
1772: if (ctxt->attsSpecial != NULL)
1773: xmlHashFree(ctxt->attsSpecial, NULL);
1774: if (ctxt->freeElems != NULL) {
1775: xmlNodePtr cur, next;
1776:
1777: cur = ctxt->freeElems;
1778: while (cur != NULL) {
1779: next = cur->next;
1780: xmlFree(cur);
1781: cur = next;
1782: }
1783: }
1784: if (ctxt->freeAttrs != NULL) {
1785: xmlAttrPtr cur, next;
1786:
1787: cur = ctxt->freeAttrs;
1788: while (cur != NULL) {
1789: next = cur->next;
1790: xmlFree(cur);
1791: cur = next;
1792: }
1793: }
1794: /*
1795: * cleanup the error strings
1796: */
1797: if (ctxt->lastError.message != NULL)
1798: xmlFree(ctxt->lastError.message);
1799: if (ctxt->lastError.file != NULL)
1800: xmlFree(ctxt->lastError.file);
1801: if (ctxt->lastError.str1 != NULL)
1802: xmlFree(ctxt->lastError.str1);
1803: if (ctxt->lastError.str2 != NULL)
1804: xmlFree(ctxt->lastError.str2);
1805: if (ctxt->lastError.str3 != NULL)
1806: xmlFree(ctxt->lastError.str3);
1807:
1808: #ifdef LIBXML_CATALOG_ENABLED
1809: if (ctxt->catalogs != NULL)
1810: xmlCatalogFreeLocal(ctxt->catalogs);
1811: #endif
1812: xmlFree(ctxt);
1813: }
1814:
1815: /**
1816: * xmlNewParserCtxt:
1817: *
1818: * Allocate and initialize a new parser context.
1819: *
1820: * Returns the xmlParserCtxtPtr or NULL
1821: */
1822:
1823: xmlParserCtxtPtr
1824: xmlNewParserCtxt(void)
1825: {
1826: xmlParserCtxtPtr ctxt;
1827:
1828: ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
1829: if (ctxt == NULL) {
1830: xmlErrMemory(NULL, "cannot allocate parser context\n");
1831: return(NULL);
1832: }
1833: memset(ctxt, 0, sizeof(xmlParserCtxt));
1834: if (xmlInitParserCtxt(ctxt) < 0) {
1835: xmlFreeParserCtxt(ctxt);
1836: return(NULL);
1837: }
1838: return(ctxt);
1839: }
1840:
1841: /************************************************************************
1842: * *
1843: * Handling of node informations *
1844: * *
1845: ************************************************************************/
1846:
1847: /**
1848: * xmlClearParserCtxt:
1849: * @ctxt: an XML parser context
1850: *
1851: * Clear (release owned resources) and reinitialize a parser context
1852: */
1853:
1854: void
1855: xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
1856: {
1857: if (ctxt==NULL)
1858: return;
1859: xmlClearNodeInfoSeq(&ctxt->node_seq);
1860: xmlCtxtReset(ctxt);
1861: }
1862:
1863:
1864: /**
1865: * xmlParserFindNodeInfo:
1866: * @ctx: an XML parser context
1867: * @node: an XML node within the tree
1868: *
1869: * Find the parser node info struct for a given node
1.1.1.3 ! misho 1870: *
1.1 misho 1871: * Returns an xmlParserNodeInfo block pointer or NULL
1872: */
1873: const xmlParserNodeInfo *
1874: xmlParserFindNodeInfo(const xmlParserCtxtPtr ctx, const xmlNodePtr node)
1875: {
1876: unsigned long pos;
1877:
1878: if ((ctx == NULL) || (node == NULL))
1879: return (NULL);
1880: /* Find position where node should be at */
1881: pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
1882: if (pos < ctx->node_seq.length
1883: && ctx->node_seq.buffer[pos].node == node)
1884: return &ctx->node_seq.buffer[pos];
1885: else
1886: return NULL;
1887: }
1888:
1889:
1890: /**
1891: * xmlInitNodeInfoSeq:
1892: * @seq: a node info sequence pointer
1893: *
1894: * -- Initialize (set to initial state) node info sequence
1895: */
1896: void
1897: xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1898: {
1899: if (seq == NULL)
1900: return;
1901: seq->length = 0;
1902: seq->maximum = 0;
1903: seq->buffer = NULL;
1904: }
1905:
1906: /**
1907: * xmlClearNodeInfoSeq:
1908: * @seq: a node info sequence pointer
1909: *
1910: * -- Clear (release memory and reinitialize) node
1911: * info sequence
1912: */
1913: void
1914: xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1915: {
1916: if (seq == NULL)
1917: return;
1918: if (seq->buffer != NULL)
1919: xmlFree(seq->buffer);
1920: xmlInitNodeInfoSeq(seq);
1921: }
1922:
1923: /**
1924: * xmlParserFindNodeInfoIndex:
1925: * @seq: a node info sequence pointer
1926: * @node: an XML node pointer
1927: *
1.1.1.3 ! misho 1928: *
1.1 misho 1929: * xmlParserFindNodeInfoIndex : Find the index that the info record for
1930: * the given node is or should be at in a sorted sequence
1931: *
1932: * Returns a long indicating the position of the record
1933: */
1934: unsigned long
1935: xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq,
1936: const xmlNodePtr node)
1937: {
1938: unsigned long upper, lower, middle;
1939: int found = 0;
1940:
1941: if ((seq == NULL) || (node == NULL))
1942: return ((unsigned long) -1);
1943:
1944: /* Do a binary search for the key */
1945: lower = 1;
1946: upper = seq->length;
1947: middle = 0;
1948: while (lower <= upper && !found) {
1949: middle = lower + (upper - lower) / 2;
1950: if (node == seq->buffer[middle - 1].node)
1951: found = 1;
1952: else if (node < seq->buffer[middle - 1].node)
1953: upper = middle - 1;
1954: else
1955: lower = middle + 1;
1956: }
1957:
1958: /* Return position */
1959: if (middle == 0 || seq->buffer[middle - 1].node < node)
1960: return middle;
1961: else
1962: return middle - 1;
1963: }
1964:
1965:
1966: /**
1967: * xmlParserAddNodeInfo:
1968: * @ctxt: an XML parser context
1969: * @info: a node info sequence pointer
1970: *
1971: * Insert node info record into the sorted sequence
1972: */
1973: void
1974: xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
1975: const xmlParserNodeInfoPtr info)
1976: {
1977: unsigned long pos;
1978:
1979: if ((ctxt == NULL) || (info == NULL)) return;
1980:
1981: /* Find pos and check to see if node is already in the sequence */
1982: pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, (xmlNodePtr)
1983: info->node);
1984:
1.1.1.3 ! misho 1985: if ((pos < ctxt->node_seq.length) &&
1.1 misho 1986: (ctxt->node_seq.buffer != NULL) &&
1987: (ctxt->node_seq.buffer[pos].node == info->node)) {
1988: ctxt->node_seq.buffer[pos] = *info;
1989: }
1990:
1991: /* Otherwise, we need to add new node to buffer */
1992: else {
1993: if (ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) {
1994: xmlParserNodeInfo *tmp_buffer;
1995: unsigned int byte_size;
1996:
1997: if (ctxt->node_seq.maximum == 0)
1998: ctxt->node_seq.maximum = 2;
1999: byte_size = (sizeof(*ctxt->node_seq.buffer) *
2000: (2 * ctxt->node_seq.maximum));
2001:
2002: if (ctxt->node_seq.buffer == NULL)
2003: tmp_buffer = (xmlParserNodeInfo *) xmlMalloc(byte_size);
2004: else
2005: tmp_buffer =
2006: (xmlParserNodeInfo *) xmlRealloc(ctxt->node_seq.buffer,
2007: byte_size);
2008:
2009: if (tmp_buffer == NULL) {
2010: xmlErrMemory(ctxt, "failed to allocate buffer\n");
2011: return;
2012: }
2013: ctxt->node_seq.buffer = tmp_buffer;
2014: ctxt->node_seq.maximum *= 2;
2015: }
2016:
2017: /* If position is not at end, move elements out of the way */
2018: if (pos != ctxt->node_seq.length) {
2019: unsigned long i;
2020:
2021: for (i = ctxt->node_seq.length; i > pos; i--)
2022: ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
2023: }
2024:
2025: /* Copy element and increase length */
2026: ctxt->node_seq.buffer[pos] = *info;
2027: ctxt->node_seq.length++;
2028: }
2029: }
2030:
2031: /************************************************************************
2032: * *
2033: * Defaults settings *
2034: * *
2035: ************************************************************************/
2036: /**
2037: * xmlPedanticParserDefault:
1.1.1.3 ! misho 2038: * @val: int 0 or 1
1.1 misho 2039: *
2040: * Set and return the previous value for enabling pedantic warnings.
2041: *
2042: * Returns the last value for 0 for no substitution, 1 for substitution.
2043: */
2044:
2045: int
2046: xmlPedanticParserDefault(int val) {
2047: int old = xmlPedanticParserDefaultValue;
2048:
2049: xmlPedanticParserDefaultValue = val;
2050: return(old);
2051: }
2052:
2053: /**
2054: * xmlLineNumbersDefault:
1.1.1.3 ! misho 2055: * @val: int 0 or 1
1.1 misho 2056: *
2057: * Set and return the previous value for enabling line numbers in elements
2058: * contents. This may break on old application and is turned off by default.
2059: *
2060: * Returns the last value for 0 for no substitution, 1 for substitution.
2061: */
2062:
2063: int
2064: xmlLineNumbersDefault(int val) {
2065: int old = xmlLineNumbersDefaultValue;
2066:
2067: xmlLineNumbersDefaultValue = val;
2068: return(old);
2069: }
2070:
2071: /**
2072: * xmlSubstituteEntitiesDefault:
1.1.1.3 ! misho 2073: * @val: int 0 or 1
1.1 misho 2074: *
2075: * Set and return the previous value for default entity support.
2076: * Initially the parser always keep entity references instead of substituting
2077: * entity values in the output. This function has to be used to change the
2078: * default parser behavior
2079: * SAX::substituteEntities() has to be used for changing that on a file by
2080: * file basis.
2081: *
2082: * Returns the last value for 0 for no substitution, 1 for substitution.
2083: */
2084:
2085: int
2086: xmlSubstituteEntitiesDefault(int val) {
2087: int old = xmlSubstituteEntitiesDefaultValue;
2088:
2089: xmlSubstituteEntitiesDefaultValue = val;
2090: return(old);
2091: }
2092:
2093: /**
2094: * xmlKeepBlanksDefault:
1.1.1.3 ! misho 2095: * @val: int 0 or 1
1.1 misho 2096: *
2097: * Set and return the previous value for default blanks text nodes support.
2098: * The 1.x version of the parser used an heuristic to try to detect
2099: * ignorable white spaces. As a result the SAX callback was generating
2100: * xmlSAX2IgnorableWhitespace() callbacks instead of characters() one, and when
2101: * using the DOM output text nodes containing those blanks were not generated.
2102: * The 2.x and later version will switch to the XML standard way and
2103: * ignorableWhitespace() are only generated when running the parser in
2104: * validating mode and when the current element doesn't allow CDATA or
2105: * mixed content.
1.1.1.3 ! misho 2106: * This function is provided as a way to force the standard behavior
1.1 misho 2107: * on 1.X libs and to switch back to the old mode for compatibility when
2108: * running 1.X client code on 2.X . Upgrade of 1.X code should be done
2109: * by using xmlIsBlankNode() commodity function to detect the "empty"
2110: * nodes generated.
2111: * This value also affect autogeneration of indentation when saving code
2112: * if blanks sections are kept, indentation is not generated.
2113: *
2114: * Returns the last value for 0 for no substitution, 1 for substitution.
2115: */
2116:
2117: int
2118: xmlKeepBlanksDefault(int val) {
2119: int old = xmlKeepBlanksDefaultValue;
2120:
2121: xmlKeepBlanksDefaultValue = val;
2122: if (!val) xmlIndentTreeOutput = 1;
2123: return(old);
2124: }
2125:
2126: #define bottom_parserInternals
2127: #include "elfgcchack.h"
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>