Annotation of embedaddon/libxml2/uri.c, revision 1.1.1.3
1.1 misho 1: /**
1.1.1.3 ! misho 2: * uri.c: set of generic URI related routines
1.1 misho 3: *
4: * Reference: RFCs 3986, 2732 and 2373
5: *
6: * See Copyright for the status of this software.
7: *
8: * daniel@veillard.com
9: */
10:
11: #define IN_LIBXML
12: #include "libxml.h"
13:
14: #include <string.h>
15:
16: #include <libxml/xmlmemory.h>
17: #include <libxml/uri.h>
18: #include <libxml/globals.h>
19: #include <libxml/xmlerror.h>
20:
1.1.1.3 ! misho 21: /**
! 22: * MAX_URI_LENGTH:
! 23: *
! 24: * The definition of the URI regexp in the above RFC has no size limit
! 25: * In practice they are usually relativey short except for the
! 26: * data URI scheme as defined in RFC 2397. Even for data URI the usual
! 27: * maximum size before hitting random practical limits is around 64 KB
! 28: * and 4KB is usually a maximum admitted limit for proper operations.
! 29: * The value below is more a security limit than anything else and
! 30: * really should never be hit by 'normal' operations
! 31: * Set to 1 MByte in 2012, this is only enforced on output
! 32: */
! 33: #define MAX_URI_LENGTH 1024 * 1024
! 34:
! 35: static void
! 36: xmlURIErrMemory(const char *extra)
! 37: {
! 38: if (extra)
! 39: __xmlRaiseError(NULL, NULL, NULL,
! 40: NULL, NULL, XML_FROM_URI,
! 41: XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
! 42: extra, NULL, NULL, 0, 0,
! 43: "Memory allocation failed : %s\n", extra);
! 44: else
! 45: __xmlRaiseError(NULL, NULL, NULL,
! 46: NULL, NULL, XML_FROM_URI,
! 47: XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
! 48: NULL, NULL, NULL, 0, 0,
! 49: "Memory allocation failed\n");
! 50: }
! 51:
1.1 misho 52: static void xmlCleanURI(xmlURIPtr uri);
53:
54: /*
55: * Old rule from 2396 used in legacy handling code
56: * alpha = lowalpha | upalpha
57: */
58: #define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
59:
60:
61: /*
62: * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
63: * "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
64: * "u" | "v" | "w" | "x" | "y" | "z"
65: */
66:
67: #define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
68:
69: /*
70: * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
71: * "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
72: * "U" | "V" | "W" | "X" | "Y" | "Z"
73: */
74: #define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
75:
76: #ifdef IS_DIGIT
77: #undef IS_DIGIT
78: #endif
79: /*
80: * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
81: */
82: #define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
83:
84: /*
85: * alphanum = alpha | digit
86: */
87:
88: #define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
89:
90: /*
91: * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
92: */
93:
94: #define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') || \
95: ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') || \
96: ((x) == '(') || ((x) == ')'))
97:
98: /*
99: * unwise = "{" | "}" | "|" | "\" | "^" | "`"
100: */
101:
102: #define IS_UNWISE(p) \
103: (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) || \
104: ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) || \
105: ((*(p) == ']')) || ((*(p) == '`')))
106: /*
107: * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
108: * "[" | "]"
109: */
110:
111: #define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
112: ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
113: ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \
114: ((x) == ']'))
115:
116: /*
117: * unreserved = alphanum | mark
118: */
119:
120: #define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
121:
122: /*
123: * Skip to next pointer char, handle escaped sequences
124: */
125:
126: #define NEXT(p) ((*p == '%')? p += 3 : p++)
127:
128: /*
129: * Productions from the spec.
130: *
131: * authority = server | reg_name
132: * reg_name = 1*( unreserved | escaped | "$" | "," |
133: * ";" | ":" | "@" | "&" | "=" | "+" )
134: *
135: * path = [ abs_path | opaque_part ]
136: */
137:
138: #define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n))
139:
140: /************************************************************************
141: * *
142: * RFC 3986 parser *
143: * *
144: ************************************************************************/
145:
146: #define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9'))
147: #define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) || \
148: ((*(p) >= 'A') && (*(p) <= 'Z')))
149: #define ISA_HEXDIG(p) \
150: (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) || \
151: ((*(p) >= 'A') && (*(p) <= 'F')))
152:
153: /*
154: * sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
155: * / "*" / "+" / "," / ";" / "="
156: */
157: #define ISA_SUB_DELIM(p) \
158: (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) || \
159: ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) || \
160: ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) || \
161: ((*(p) == '=')) || ((*(p) == '\'')))
162:
163: /*
164: * gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
165: */
166: #define ISA_GEN_DELIM(p) \
167: (((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) || \
168: ((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) || \
169: ((*(p) == '@')))
170:
171: /*
172: * reserved = gen-delims / sub-delims
173: */
174: #define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p)))
175:
176: /*
177: * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
178: */
179: #define ISA_UNRESERVED(p) \
180: ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) || \
181: ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~')))
182:
183: /*
184: * pct-encoded = "%" HEXDIG HEXDIG
185: */
186: #define ISA_PCT_ENCODED(p) \
187: ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2)))
188:
189: /*
190: * pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
191: */
192: #define ISA_PCHAR(p) \
193: (ISA_UNRESERVED(p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) || \
194: ((*(p) == ':')) || ((*(p) == '@')))
195:
196: /**
197: * xmlParse3986Scheme:
198: * @uri: pointer to an URI structure
199: * @str: pointer to the string to analyze
200: *
201: * Parse an URI scheme
202: *
203: * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
204: *
205: * Returns 0 or the error code
206: */
207: static int
208: xmlParse3986Scheme(xmlURIPtr uri, const char **str) {
209: const char *cur;
210:
211: if (str == NULL)
212: return(-1);
213:
214: cur = *str;
215: if (!ISA_ALPHA(cur))
216: return(2);
217: cur++;
218: while (ISA_ALPHA(cur) || ISA_DIGIT(cur) ||
219: (*cur == '+') || (*cur == '-') || (*cur == '.')) cur++;
220: if (uri != NULL) {
221: if (uri->scheme != NULL) xmlFree(uri->scheme);
222: uri->scheme = STRNDUP(*str, cur - *str);
223: }
224: *str = cur;
225: return(0);
226: }
227:
228: /**
229: * xmlParse3986Fragment:
230: * @uri: pointer to an URI structure
231: * @str: pointer to the string to analyze
232: *
233: * Parse the query part of an URI
234: *
235: * fragment = *( pchar / "/" / "?" )
236: * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']'
237: * in the fragment identifier but this is used very broadly for
238: * xpointer scheme selection, so we are allowing it here to not break
239: * for example all the DocBook processing chains.
240: *
241: * Returns 0 or the error code
242: */
243: static int
244: xmlParse3986Fragment(xmlURIPtr uri, const char **str)
245: {
246: const char *cur;
247:
248: if (str == NULL)
249: return (-1);
250:
251: cur = *str;
252:
253: while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
254: (*cur == '[') || (*cur == ']') ||
255: ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
256: NEXT(cur);
257: if (uri != NULL) {
258: if (uri->fragment != NULL)
259: xmlFree(uri->fragment);
260: if (uri->cleanup & 2)
261: uri->fragment = STRNDUP(*str, cur - *str);
262: else
263: uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
264: }
265: *str = cur;
266: return (0);
267: }
268:
269: /**
270: * xmlParse3986Query:
271: * @uri: pointer to an URI structure
272: * @str: pointer to the string to analyze
273: *
274: * Parse the query part of an URI
275: *
276: * query = *uric
277: *
278: * Returns 0 or the error code
279: */
280: static int
281: xmlParse3986Query(xmlURIPtr uri, const char **str)
282: {
283: const char *cur;
284:
285: if (str == NULL)
286: return (-1);
287:
288: cur = *str;
289:
290: while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
291: ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
292: NEXT(cur);
293: if (uri != NULL) {
294: if (uri->query != NULL)
295: xmlFree(uri->query);
296: if (uri->cleanup & 2)
297: uri->query = STRNDUP(*str, cur - *str);
298: else
299: uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
300:
301: /* Save the raw bytes of the query as well.
302: * See: http://mail.gnome.org/archives/xml/2007-April/thread.html#00114
303: */
304: if (uri->query_raw != NULL)
305: xmlFree (uri->query_raw);
306: uri->query_raw = STRNDUP (*str, cur - *str);
307: }
308: *str = cur;
309: return (0);
310: }
311:
312: /**
313: * xmlParse3986Port:
314: * @uri: pointer to an URI structure
315: * @str: the string to analyze
316: *
317: * Parse a port part and fills in the appropriate fields
318: * of the @uri structure
319: *
320: * port = *DIGIT
321: *
322: * Returns 0 or the error code
323: */
324: static int
325: xmlParse3986Port(xmlURIPtr uri, const char **str)
326: {
327: const char *cur = *str;
328:
329: if (ISA_DIGIT(cur)) {
330: if (uri != NULL)
331: uri->port = 0;
332: while (ISA_DIGIT(cur)) {
333: if (uri != NULL)
334: uri->port = uri->port * 10 + (*cur - '0');
335: cur++;
336: }
337: *str = cur;
338: return(0);
339: }
340: return(1);
341: }
342:
343: /**
344: * xmlParse3986Userinfo:
345: * @uri: pointer to an URI structure
346: * @str: the string to analyze
347: *
348: * Parse an user informations part and fills in the appropriate fields
349: * of the @uri structure
350: *
351: * userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
352: *
353: * Returns 0 or the error code
354: */
355: static int
356: xmlParse3986Userinfo(xmlURIPtr uri, const char **str)
357: {
358: const char *cur;
359:
360: cur = *str;
361: while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) ||
362: ISA_SUB_DELIM(cur) || (*cur == ':'))
363: NEXT(cur);
364: if (*cur == '@') {
365: if (uri != NULL) {
366: if (uri->user != NULL) xmlFree(uri->user);
367: if (uri->cleanup & 2)
368: uri->user = STRNDUP(*str, cur - *str);
369: else
370: uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
371: }
372: *str = cur;
373: return(0);
374: }
375: return(1);
376: }
377:
378: /**
379: * xmlParse3986DecOctet:
380: * @str: the string to analyze
381: *
382: * dec-octet = DIGIT ; 0-9
383: * / %x31-39 DIGIT ; 10-99
384: * / "1" 2DIGIT ; 100-199
385: * / "2" %x30-34 DIGIT ; 200-249
386: * / "25" %x30-35 ; 250-255
387: *
388: * Skip a dec-octet.
389: *
390: * Returns 0 if found and skipped, 1 otherwise
391: */
392: static int
393: xmlParse3986DecOctet(const char **str) {
394: const char *cur = *str;
395:
396: if (!(ISA_DIGIT(cur)))
397: return(1);
398: if (!ISA_DIGIT(cur+1))
399: cur++;
400: else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur+2)))
401: cur += 2;
402: else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2)))
403: cur += 3;
404: else if ((*cur == '2') && (*(cur + 1) >= '0') &&
405: (*(cur + 1) <= '4') && (ISA_DIGIT(cur + 2)))
406: cur += 3;
407: else if ((*cur == '2') && (*(cur + 1) == '5') &&
408: (*(cur + 2) >= '0') && (*(cur + 1) <= '5'))
409: cur += 3;
410: else
411: return(1);
412: *str = cur;
413: return(0);
414: }
415: /**
416: * xmlParse3986Host:
417: * @uri: pointer to an URI structure
418: * @str: the string to analyze
419: *
420: * Parse an host part and fills in the appropriate fields
421: * of the @uri structure
422: *
423: * host = IP-literal / IPv4address / reg-name
424: * IP-literal = "[" ( IPv6address / IPvFuture ) "]"
425: * IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
426: * reg-name = *( unreserved / pct-encoded / sub-delims )
427: *
428: * Returns 0 or the error code
429: */
430: static int
431: xmlParse3986Host(xmlURIPtr uri, const char **str)
432: {
433: const char *cur = *str;
434: const char *host;
435:
436: host = cur;
437: /*
438: * IPv6 and future adressing scheme are enclosed between brackets
439: */
440: if (*cur == '[') {
441: cur++;
442: while ((*cur != ']') && (*cur != 0))
443: cur++;
444: if (*cur != ']')
445: return(1);
446: cur++;
447: goto found;
448: }
449: /*
450: * try to parse an IPv4
451: */
452: if (ISA_DIGIT(cur)) {
453: if (xmlParse3986DecOctet(&cur) != 0)
454: goto not_ipv4;
455: if (*cur != '.')
456: goto not_ipv4;
457: cur++;
458: if (xmlParse3986DecOctet(&cur) != 0)
459: goto not_ipv4;
460: if (*cur != '.')
461: goto not_ipv4;
462: if (xmlParse3986DecOctet(&cur) != 0)
463: goto not_ipv4;
464: if (*cur != '.')
465: goto not_ipv4;
466: if (xmlParse3986DecOctet(&cur) != 0)
467: goto not_ipv4;
468: goto found;
469: not_ipv4:
470: cur = *str;
471: }
472: /*
473: * then this should be a hostname which can be empty
474: */
475: while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur))
476: NEXT(cur);
477: found:
478: if (uri != NULL) {
479: if (uri->authority != NULL) xmlFree(uri->authority);
480: uri->authority = NULL;
481: if (uri->server != NULL) xmlFree(uri->server);
482: if (cur != host) {
483: if (uri->cleanup & 2)
484: uri->server = STRNDUP(host, cur - host);
485: else
486: uri->server = xmlURIUnescapeString(host, cur - host, NULL);
487: } else
488: uri->server = NULL;
489: }
490: *str = cur;
491: return(0);
492: }
493:
494: /**
495: * xmlParse3986Authority:
496: * @uri: pointer to an URI structure
497: * @str: the string to analyze
498: *
499: * Parse an authority part and fills in the appropriate fields
500: * of the @uri structure
501: *
502: * authority = [ userinfo "@" ] host [ ":" port ]
503: *
504: * Returns 0 or the error code
505: */
506: static int
507: xmlParse3986Authority(xmlURIPtr uri, const char **str)
508: {
509: const char *cur;
510: int ret;
511:
512: cur = *str;
513: /*
514: * try to parse an userinfo and check for the trailing @
515: */
516: ret = xmlParse3986Userinfo(uri, &cur);
517: if ((ret != 0) || (*cur != '@'))
518: cur = *str;
519: else
520: cur++;
521: ret = xmlParse3986Host(uri, &cur);
522: if (ret != 0) return(ret);
523: if (*cur == ':') {
524: cur++;
525: ret = xmlParse3986Port(uri, &cur);
526: if (ret != 0) return(ret);
527: }
528: *str = cur;
529: return(0);
530: }
531:
532: /**
533: * xmlParse3986Segment:
534: * @str: the string to analyze
535: * @forbid: an optional forbidden character
536: * @empty: allow an empty segment
537: *
538: * Parse a segment and fills in the appropriate fields
539: * of the @uri structure
540: *
541: * segment = *pchar
542: * segment-nz = 1*pchar
543: * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
544: * ; non-zero-length segment without any colon ":"
545: *
546: * Returns 0 or the error code
547: */
548: static int
549: xmlParse3986Segment(const char **str, char forbid, int empty)
550: {
551: const char *cur;
552:
553: cur = *str;
554: if (!ISA_PCHAR(cur)) {
555: if (empty)
556: return(0);
557: return(1);
558: }
559: while (ISA_PCHAR(cur) && (*cur != forbid))
560: NEXT(cur);
561: *str = cur;
562: return (0);
563: }
564:
565: /**
566: * xmlParse3986PathAbEmpty:
567: * @uri: pointer to an URI structure
568: * @str: the string to analyze
569: *
570: * Parse an path absolute or empty and fills in the appropriate fields
571: * of the @uri structure
572: *
573: * path-abempty = *( "/" segment )
574: *
575: * Returns 0 or the error code
576: */
577: static int
578: xmlParse3986PathAbEmpty(xmlURIPtr uri, const char **str)
579: {
580: const char *cur;
581: int ret;
582:
583: cur = *str;
584:
585: while (*cur == '/') {
586: cur++;
587: ret = xmlParse3986Segment(&cur, 0, 1);
588: if (ret != 0) return(ret);
589: }
590: if (uri != NULL) {
591: if (uri->path != NULL) xmlFree(uri->path);
592: if (*str != cur) {
593: if (uri->cleanup & 2)
594: uri->path = STRNDUP(*str, cur - *str);
595: else
596: uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
597: } else {
598: uri->path = NULL;
599: }
600: }
601: *str = cur;
602: return (0);
603: }
604:
605: /**
606: * xmlParse3986PathAbsolute:
607: * @uri: pointer to an URI structure
608: * @str: the string to analyze
609: *
610: * Parse an path absolute and fills in the appropriate fields
611: * of the @uri structure
612: *
613: * path-absolute = "/" [ segment-nz *( "/" segment ) ]
614: *
615: * Returns 0 or the error code
616: */
617: static int
618: xmlParse3986PathAbsolute(xmlURIPtr uri, const char **str)
619: {
620: const char *cur;
621: int ret;
622:
623: cur = *str;
624:
625: if (*cur != '/')
626: return(1);
627: cur++;
628: ret = xmlParse3986Segment(&cur, 0, 0);
629: if (ret == 0) {
630: while (*cur == '/') {
631: cur++;
632: ret = xmlParse3986Segment(&cur, 0, 1);
633: if (ret != 0) return(ret);
634: }
635: }
636: if (uri != NULL) {
637: if (uri->path != NULL) xmlFree(uri->path);
638: if (cur != *str) {
639: if (uri->cleanup & 2)
640: uri->path = STRNDUP(*str, cur - *str);
641: else
642: uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
643: } else {
644: uri->path = NULL;
645: }
646: }
647: *str = cur;
648: return (0);
649: }
650:
651: /**
652: * xmlParse3986PathRootless:
653: * @uri: pointer to an URI structure
654: * @str: the string to analyze
655: *
656: * Parse an path without root and fills in the appropriate fields
657: * of the @uri structure
658: *
659: * path-rootless = segment-nz *( "/" segment )
660: *
661: * Returns 0 or the error code
662: */
663: static int
664: xmlParse3986PathRootless(xmlURIPtr uri, const char **str)
665: {
666: const char *cur;
667: int ret;
668:
669: cur = *str;
670:
671: ret = xmlParse3986Segment(&cur, 0, 0);
672: if (ret != 0) return(ret);
673: while (*cur == '/') {
674: cur++;
675: ret = xmlParse3986Segment(&cur, 0, 1);
676: if (ret != 0) return(ret);
677: }
678: if (uri != NULL) {
679: if (uri->path != NULL) xmlFree(uri->path);
680: if (cur != *str) {
681: if (uri->cleanup & 2)
682: uri->path = STRNDUP(*str, cur - *str);
683: else
684: uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
685: } else {
686: uri->path = NULL;
687: }
688: }
689: *str = cur;
690: return (0);
691: }
692:
693: /**
694: * xmlParse3986PathNoScheme:
695: * @uri: pointer to an URI structure
696: * @str: the string to analyze
697: *
698: * Parse an path which is not a scheme and fills in the appropriate fields
699: * of the @uri structure
700: *
701: * path-noscheme = segment-nz-nc *( "/" segment )
702: *
703: * Returns 0 or the error code
704: */
705: static int
706: xmlParse3986PathNoScheme(xmlURIPtr uri, const char **str)
707: {
708: const char *cur;
709: int ret;
710:
711: cur = *str;
712:
713: ret = xmlParse3986Segment(&cur, ':', 0);
714: if (ret != 0) return(ret);
715: while (*cur == '/') {
716: cur++;
717: ret = xmlParse3986Segment(&cur, 0, 1);
718: if (ret != 0) return(ret);
719: }
720: if (uri != NULL) {
721: if (uri->path != NULL) xmlFree(uri->path);
722: if (cur != *str) {
723: if (uri->cleanup & 2)
724: uri->path = STRNDUP(*str, cur - *str);
725: else
726: uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
727: } else {
728: uri->path = NULL;
729: }
730: }
731: *str = cur;
732: return (0);
733: }
734:
735: /**
736: * xmlParse3986HierPart:
737: * @uri: pointer to an URI structure
738: * @str: the string to analyze
739: *
740: * Parse an hierarchical part and fills in the appropriate fields
741: * of the @uri structure
742: *
743: * hier-part = "//" authority path-abempty
744: * / path-absolute
745: * / path-rootless
746: * / path-empty
747: *
748: * Returns 0 or the error code
749: */
750: static int
751: xmlParse3986HierPart(xmlURIPtr uri, const char **str)
752: {
753: const char *cur;
754: int ret;
755:
756: cur = *str;
757:
758: if ((*cur == '/') && (*(cur + 1) == '/')) {
759: cur += 2;
760: ret = xmlParse3986Authority(uri, &cur);
761: if (ret != 0) return(ret);
762: ret = xmlParse3986PathAbEmpty(uri, &cur);
763: if (ret != 0) return(ret);
764: *str = cur;
765: return(0);
766: } else if (*cur == '/') {
767: ret = xmlParse3986PathAbsolute(uri, &cur);
768: if (ret != 0) return(ret);
769: } else if (ISA_PCHAR(cur)) {
770: ret = xmlParse3986PathRootless(uri, &cur);
771: if (ret != 0) return(ret);
772: } else {
773: /* path-empty is effectively empty */
774: if (uri != NULL) {
775: if (uri->path != NULL) xmlFree(uri->path);
776: uri->path = NULL;
777: }
778: }
779: *str = cur;
780: return (0);
781: }
782:
783: /**
784: * xmlParse3986RelativeRef:
785: * @uri: pointer to an URI structure
786: * @str: the string to analyze
787: *
788: * Parse an URI string and fills in the appropriate fields
789: * of the @uri structure
790: *
791: * relative-ref = relative-part [ "?" query ] [ "#" fragment ]
792: * relative-part = "//" authority path-abempty
793: * / path-absolute
794: * / path-noscheme
795: * / path-empty
796: *
797: * Returns 0 or the error code
798: */
799: static int
800: xmlParse3986RelativeRef(xmlURIPtr uri, const char *str) {
801: int ret;
802:
803: if ((*str == '/') && (*(str + 1) == '/')) {
804: str += 2;
805: ret = xmlParse3986Authority(uri, &str);
806: if (ret != 0) return(ret);
807: ret = xmlParse3986PathAbEmpty(uri, &str);
808: if (ret != 0) return(ret);
809: } else if (*str == '/') {
810: ret = xmlParse3986PathAbsolute(uri, &str);
811: if (ret != 0) return(ret);
812: } else if (ISA_PCHAR(str)) {
813: ret = xmlParse3986PathNoScheme(uri, &str);
814: if (ret != 0) return(ret);
815: } else {
816: /* path-empty is effectively empty */
817: if (uri != NULL) {
818: if (uri->path != NULL) xmlFree(uri->path);
819: uri->path = NULL;
820: }
821: }
822:
823: if (*str == '?') {
824: str++;
825: ret = xmlParse3986Query(uri, &str);
826: if (ret != 0) return(ret);
827: }
828: if (*str == '#') {
829: str++;
830: ret = xmlParse3986Fragment(uri, &str);
831: if (ret != 0) return(ret);
832: }
833: if (*str != 0) {
834: xmlCleanURI(uri);
835: return(1);
836: }
837: return(0);
838: }
839:
840:
841: /**
842: * xmlParse3986URI:
843: * @uri: pointer to an URI structure
844: * @str: the string to analyze
845: *
846: * Parse an URI string and fills in the appropriate fields
847: * of the @uri structure
848: *
849: * scheme ":" hier-part [ "?" query ] [ "#" fragment ]
850: *
851: * Returns 0 or the error code
852: */
853: static int
854: xmlParse3986URI(xmlURIPtr uri, const char *str) {
855: int ret;
856:
857: ret = xmlParse3986Scheme(uri, &str);
858: if (ret != 0) return(ret);
859: if (*str != ':') {
860: return(1);
861: }
862: str++;
863: ret = xmlParse3986HierPart(uri, &str);
864: if (ret != 0) return(ret);
865: if (*str == '?') {
866: str++;
867: ret = xmlParse3986Query(uri, &str);
868: if (ret != 0) return(ret);
869: }
870: if (*str == '#') {
871: str++;
872: ret = xmlParse3986Fragment(uri, &str);
873: if (ret != 0) return(ret);
874: }
875: if (*str != 0) {
876: xmlCleanURI(uri);
877: return(1);
878: }
879: return(0);
880: }
881:
882: /**
883: * xmlParse3986URIReference:
884: * @uri: pointer to an URI structure
885: * @str: the string to analyze
886: *
887: * Parse an URI reference string and fills in the appropriate fields
888: * of the @uri structure
889: *
890: * URI-reference = URI / relative-ref
891: *
892: * Returns 0 or the error code
893: */
894: static int
895: xmlParse3986URIReference(xmlURIPtr uri, const char *str) {
896: int ret;
897:
898: if (str == NULL)
899: return(-1);
900: xmlCleanURI(uri);
901:
902: /*
903: * Try first to parse absolute refs, then fallback to relative if
904: * it fails.
905: */
906: ret = xmlParse3986URI(uri, str);
907: if (ret != 0) {
908: xmlCleanURI(uri);
909: ret = xmlParse3986RelativeRef(uri, str);
910: if (ret != 0) {
911: xmlCleanURI(uri);
912: return(ret);
913: }
914: }
915: return(0);
916: }
917:
918: /**
919: * xmlParseURI:
920: * @str: the URI string to analyze
921: *
922: * Parse an URI based on RFC 3986
923: *
924: * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
925: *
926: * Returns a newly built xmlURIPtr or NULL in case of error
927: */
928: xmlURIPtr
929: xmlParseURI(const char *str) {
930: xmlURIPtr uri;
931: int ret;
932:
933: if (str == NULL)
934: return(NULL);
935: uri = xmlCreateURI();
936: if (uri != NULL) {
937: ret = xmlParse3986URIReference(uri, str);
938: if (ret) {
939: xmlFreeURI(uri);
940: return(NULL);
941: }
942: }
943: return(uri);
944: }
945:
946: /**
947: * xmlParseURIReference:
948: * @uri: pointer to an URI structure
949: * @str: the string to analyze
950: *
951: * Parse an URI reference string based on RFC 3986 and fills in the
952: * appropriate fields of the @uri structure
953: *
954: * URI-reference = URI / relative-ref
955: *
956: * Returns 0 or the error code
957: */
958: int
959: xmlParseURIReference(xmlURIPtr uri, const char *str) {
960: return(xmlParse3986URIReference(uri, str));
961: }
962:
963: /**
964: * xmlParseURIRaw:
965: * @str: the URI string to analyze
966: * @raw: if 1 unescaping of URI pieces are disabled
967: *
968: * Parse an URI but allows to keep intact the original fragments.
969: *
970: * URI-reference = URI / relative-ref
971: *
972: * Returns a newly built xmlURIPtr or NULL in case of error
973: */
974: xmlURIPtr
975: xmlParseURIRaw(const char *str, int raw) {
976: xmlURIPtr uri;
977: int ret;
978:
979: if (str == NULL)
980: return(NULL);
981: uri = xmlCreateURI();
982: if (uri != NULL) {
983: if (raw) {
984: uri->cleanup |= 2;
985: }
986: ret = xmlParseURIReference(uri, str);
987: if (ret) {
988: xmlFreeURI(uri);
989: return(NULL);
990: }
991: }
992: return(uri);
993: }
994:
995: /************************************************************************
996: * *
997: * Generic URI structure functions *
998: * *
999: ************************************************************************/
1000:
1001: /**
1002: * xmlCreateURI:
1003: *
1004: * Simply creates an empty xmlURI
1005: *
1006: * Returns the new structure or NULL in case of error
1007: */
1008: xmlURIPtr
1009: xmlCreateURI(void) {
1010: xmlURIPtr ret;
1011:
1012: ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
1013: if (ret == NULL) {
1.1.1.3 ! misho 1014: xmlURIErrMemory("creating URI structure\n");
1.1 misho 1015: return(NULL);
1016: }
1017: memset(ret, 0, sizeof(xmlURI));
1018: return(ret);
1019: }
1020:
1021: /**
1.1.1.3 ! misho 1022: * xmlSaveUriRealloc:
! 1023: *
! 1024: * Function to handle properly a reallocation when saving an URI
! 1025: * Also imposes some limit on the length of an URI string output
! 1026: */
! 1027: static xmlChar *
! 1028: xmlSaveUriRealloc(xmlChar *ret, int *max) {
! 1029: xmlChar *temp;
! 1030: int tmp;
! 1031:
! 1032: if (*max > MAX_URI_LENGTH) {
! 1033: xmlURIErrMemory("reaching arbitrary MAX_URI_LENGTH limit\n");
! 1034: return(NULL);
! 1035: }
! 1036: tmp = *max * 2;
! 1037: temp = (xmlChar *) xmlRealloc(ret, (tmp + 1));
! 1038: if (temp == NULL) {
! 1039: xmlURIErrMemory("saving URI\n");
! 1040: return(NULL);
! 1041: }
! 1042: *max = tmp;
! 1043: return(temp);
! 1044: }
! 1045:
! 1046: /**
1.1 misho 1047: * xmlSaveUri:
1048: * @uri: pointer to an xmlURI
1049: *
1050: * Save the URI as an escaped string
1051: *
1052: * Returns a new string (to be deallocated by caller)
1053: */
1054: xmlChar *
1055: xmlSaveUri(xmlURIPtr uri) {
1056: xmlChar *ret = NULL;
1057: xmlChar *temp;
1058: const char *p;
1059: int len;
1060: int max;
1061:
1062: if (uri == NULL) return(NULL);
1063:
1064:
1065: max = 80;
1066: ret = (xmlChar *) xmlMallocAtomic((max + 1) * sizeof(xmlChar));
1067: if (ret == NULL) {
1.1.1.3 ! misho 1068: xmlURIErrMemory("saving URI\n");
1.1 misho 1069: return(NULL);
1070: }
1071: len = 0;
1072:
1073: if (uri->scheme != NULL) {
1074: p = uri->scheme;
1075: while (*p != 0) {
1076: if (len >= max) {
1.1.1.3 ! misho 1077: temp = xmlSaveUriRealloc(ret, &max);
! 1078: if (temp == NULL) goto mem_error;
1.1 misho 1079: ret = temp;
1080: }
1081: ret[len++] = *p++;
1082: }
1083: if (len >= max) {
1.1.1.3 ! misho 1084: temp = xmlSaveUriRealloc(ret, &max);
! 1085: if (temp == NULL) goto mem_error;
! 1086: ret = temp;
1.1 misho 1087: }
1088: ret[len++] = ':';
1089: }
1090: if (uri->opaque != NULL) {
1091: p = uri->opaque;
1092: while (*p != 0) {
1093: if (len + 3 >= max) {
1.1.1.3 ! misho 1094: temp = xmlSaveUriRealloc(ret, &max);
! 1095: if (temp == NULL) goto mem_error;
! 1096: ret = temp;
1.1 misho 1097: }
1098: if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p)))
1099: ret[len++] = *p++;
1100: else {
1101: int val = *(unsigned char *)p++;
1102: int hi = val / 0x10, lo = val % 0x10;
1103: ret[len++] = '%';
1104: ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1105: ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1106: }
1107: }
1108: } else {
1109: if (uri->server != NULL) {
1110: if (len + 3 >= max) {
1.1.1.3 ! misho 1111: temp = xmlSaveUriRealloc(ret, &max);
! 1112: if (temp == NULL) goto mem_error;
! 1113: ret = temp;
1.1 misho 1114: }
1115: ret[len++] = '/';
1116: ret[len++] = '/';
1117: if (uri->user != NULL) {
1118: p = uri->user;
1119: while (*p != 0) {
1120: if (len + 3 >= max) {
1.1.1.3 ! misho 1121: temp = xmlSaveUriRealloc(ret, &max);
! 1122: if (temp == NULL) goto mem_error;
! 1123: ret = temp;
1.1 misho 1124: }
1125: if ((IS_UNRESERVED(*(p))) ||
1126: ((*(p) == ';')) || ((*(p) == ':')) ||
1127: ((*(p) == '&')) || ((*(p) == '=')) ||
1128: ((*(p) == '+')) || ((*(p) == '$')) ||
1129: ((*(p) == ',')))
1130: ret[len++] = *p++;
1131: else {
1132: int val = *(unsigned char *)p++;
1133: int hi = val / 0x10, lo = val % 0x10;
1134: ret[len++] = '%';
1135: ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1136: ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1137: }
1138: }
1139: if (len + 3 >= max) {
1.1.1.3 ! misho 1140: temp = xmlSaveUriRealloc(ret, &max);
! 1141: if (temp == NULL) goto mem_error;
! 1142: ret = temp;
1.1 misho 1143: }
1144: ret[len++] = '@';
1145: }
1146: p = uri->server;
1147: while (*p != 0) {
1148: if (len >= max) {
1.1.1.3 ! misho 1149: temp = xmlSaveUriRealloc(ret, &max);
! 1150: if (temp == NULL) goto mem_error;
! 1151: ret = temp;
1.1 misho 1152: }
1153: ret[len++] = *p++;
1154: }
1155: if (uri->port > 0) {
1156: if (len + 10 >= max) {
1.1.1.3 ! misho 1157: temp = xmlSaveUriRealloc(ret, &max);
! 1158: if (temp == NULL) goto mem_error;
! 1159: ret = temp;
1.1 misho 1160: }
1161: len += snprintf((char *) &ret[len], max - len, ":%d", uri->port);
1162: }
1163: } else if (uri->authority != NULL) {
1164: if (len + 3 >= max) {
1.1.1.3 ! misho 1165: temp = xmlSaveUriRealloc(ret, &max);
! 1166: if (temp == NULL) goto mem_error;
! 1167: ret = temp;
1.1 misho 1168: }
1169: ret[len++] = '/';
1170: ret[len++] = '/';
1171: p = uri->authority;
1172: while (*p != 0) {
1173: if (len + 3 >= max) {
1.1.1.3 ! misho 1174: temp = xmlSaveUriRealloc(ret, &max);
! 1175: if (temp == NULL) goto mem_error;
! 1176: ret = temp;
1.1 misho 1177: }
1178: if ((IS_UNRESERVED(*(p))) ||
1179: ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
1180: ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1181: ((*(p) == '=')) || ((*(p) == '+')))
1182: ret[len++] = *p++;
1183: else {
1184: int val = *(unsigned char *)p++;
1185: int hi = val / 0x10, lo = val % 0x10;
1186: ret[len++] = '%';
1187: ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1188: ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1189: }
1190: }
1191: } else if (uri->scheme != NULL) {
1192: if (len + 3 >= max) {
1.1.1.3 ! misho 1193: temp = xmlSaveUriRealloc(ret, &max);
! 1194: if (temp == NULL) goto mem_error;
! 1195: ret = temp;
1.1 misho 1196: }
1197: ret[len++] = '/';
1198: ret[len++] = '/';
1199: }
1200: if (uri->path != NULL) {
1201: p = uri->path;
1202: /*
1203: * the colon in file:///d: should not be escaped or
1204: * Windows accesses fail later.
1205: */
1206: if ((uri->scheme != NULL) &&
1207: (p[0] == '/') &&
1208: (((p[1] >= 'a') && (p[1] <= 'z')) ||
1209: ((p[1] >= 'A') && (p[1] <= 'Z'))) &&
1210: (p[2] == ':') &&
1211: (xmlStrEqual(BAD_CAST uri->scheme, BAD_CAST "file"))) {
1212: if (len + 3 >= max) {
1.1.1.3 ! misho 1213: temp = xmlSaveUriRealloc(ret, &max);
! 1214: if (temp == NULL) goto mem_error;
! 1215: ret = temp;
1.1 misho 1216: }
1217: ret[len++] = *p++;
1218: ret[len++] = *p++;
1219: ret[len++] = *p++;
1220: }
1221: while (*p != 0) {
1222: if (len + 3 >= max) {
1.1.1.3 ! misho 1223: temp = xmlSaveUriRealloc(ret, &max);
! 1224: if (temp == NULL) goto mem_error;
! 1225: ret = temp;
1.1 misho 1226: }
1227: if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
1228: ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1229: ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
1230: ((*(p) == ',')))
1231: ret[len++] = *p++;
1232: else {
1233: int val = *(unsigned char *)p++;
1234: int hi = val / 0x10, lo = val % 0x10;
1235: ret[len++] = '%';
1236: ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1237: ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1238: }
1239: }
1240: }
1241: if (uri->query_raw != NULL) {
1242: if (len + 1 >= max) {
1.1.1.3 ! misho 1243: temp = xmlSaveUriRealloc(ret, &max);
! 1244: if (temp == NULL) goto mem_error;
! 1245: ret = temp;
1.1 misho 1246: }
1247: ret[len++] = '?';
1248: p = uri->query_raw;
1249: while (*p != 0) {
1250: if (len + 1 >= max) {
1.1.1.3 ! misho 1251: temp = xmlSaveUriRealloc(ret, &max);
! 1252: if (temp == NULL) goto mem_error;
! 1253: ret = temp;
1.1 misho 1254: }
1255: ret[len++] = *p++;
1256: }
1257: } else if (uri->query != NULL) {
1258: if (len + 3 >= max) {
1.1.1.3 ! misho 1259: temp = xmlSaveUriRealloc(ret, &max);
! 1260: if (temp == NULL) goto mem_error;
! 1261: ret = temp;
1.1 misho 1262: }
1263: ret[len++] = '?';
1264: p = uri->query;
1265: while (*p != 0) {
1266: if (len + 3 >= max) {
1.1.1.3 ! misho 1267: temp = xmlSaveUriRealloc(ret, &max);
! 1268: if (temp == NULL) goto mem_error;
! 1269: ret = temp;
1.1 misho 1270: }
1.1.1.3 ! misho 1271: if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1.1 misho 1272: ret[len++] = *p++;
1273: else {
1274: int val = *(unsigned char *)p++;
1275: int hi = val / 0x10, lo = val % 0x10;
1276: ret[len++] = '%';
1277: ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1278: ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1279: }
1280: }
1281: }
1282: }
1283: if (uri->fragment != NULL) {
1284: if (len + 3 >= max) {
1.1.1.3 ! misho 1285: temp = xmlSaveUriRealloc(ret, &max);
! 1286: if (temp == NULL) goto mem_error;
! 1287: ret = temp;
1.1 misho 1288: }
1289: ret[len++] = '#';
1290: p = uri->fragment;
1291: while (*p != 0) {
1292: if (len + 3 >= max) {
1.1.1.3 ! misho 1293: temp = xmlSaveUriRealloc(ret, &max);
! 1294: if (temp == NULL) goto mem_error;
! 1295: ret = temp;
1.1 misho 1296: }
1.1.1.3 ! misho 1297: if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1.1 misho 1298: ret[len++] = *p++;
1299: else {
1300: int val = *(unsigned char *)p++;
1301: int hi = val / 0x10, lo = val % 0x10;
1302: ret[len++] = '%';
1303: ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1304: ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1305: }
1306: }
1307: }
1308: if (len >= max) {
1.1.1.3 ! misho 1309: temp = xmlSaveUriRealloc(ret, &max);
! 1310: if (temp == NULL) goto mem_error;
! 1311: ret = temp;
1.1 misho 1312: }
1313: ret[len] = 0;
1314: return(ret);
1.1.1.3 ! misho 1315:
! 1316: mem_error:
! 1317: xmlFree(ret);
! 1318: return(NULL);
1.1 misho 1319: }
1320:
1321: /**
1322: * xmlPrintURI:
1323: * @stream: a FILE* for the output
1324: * @uri: pointer to an xmlURI
1325: *
1326: * Prints the URI in the stream @stream.
1327: */
1328: void
1329: xmlPrintURI(FILE *stream, xmlURIPtr uri) {
1330: xmlChar *out;
1331:
1332: out = xmlSaveUri(uri);
1333: if (out != NULL) {
1334: fprintf(stream, "%s", (char *) out);
1335: xmlFree(out);
1336: }
1337: }
1338:
1339: /**
1340: * xmlCleanURI:
1341: * @uri: pointer to an xmlURI
1342: *
1343: * Make sure the xmlURI struct is free of content
1344: */
1345: static void
1346: xmlCleanURI(xmlURIPtr uri) {
1347: if (uri == NULL) return;
1348:
1349: if (uri->scheme != NULL) xmlFree(uri->scheme);
1350: uri->scheme = NULL;
1351: if (uri->server != NULL) xmlFree(uri->server);
1352: uri->server = NULL;
1353: if (uri->user != NULL) xmlFree(uri->user);
1354: uri->user = NULL;
1355: if (uri->path != NULL) xmlFree(uri->path);
1356: uri->path = NULL;
1357: if (uri->fragment != NULL) xmlFree(uri->fragment);
1358: uri->fragment = NULL;
1359: if (uri->opaque != NULL) xmlFree(uri->opaque);
1360: uri->opaque = NULL;
1361: if (uri->authority != NULL) xmlFree(uri->authority);
1362: uri->authority = NULL;
1363: if (uri->query != NULL) xmlFree(uri->query);
1364: uri->query = NULL;
1365: if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1366: uri->query_raw = NULL;
1367: }
1368:
1369: /**
1370: * xmlFreeURI:
1371: * @uri: pointer to an xmlURI
1372: *
1373: * Free up the xmlURI struct
1374: */
1375: void
1376: xmlFreeURI(xmlURIPtr uri) {
1377: if (uri == NULL) return;
1378:
1379: if (uri->scheme != NULL) xmlFree(uri->scheme);
1380: if (uri->server != NULL) xmlFree(uri->server);
1381: if (uri->user != NULL) xmlFree(uri->user);
1382: if (uri->path != NULL) xmlFree(uri->path);
1383: if (uri->fragment != NULL) xmlFree(uri->fragment);
1384: if (uri->opaque != NULL) xmlFree(uri->opaque);
1385: if (uri->authority != NULL) xmlFree(uri->authority);
1386: if (uri->query != NULL) xmlFree(uri->query);
1387: if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1388: xmlFree(uri);
1389: }
1390:
1391: /************************************************************************
1392: * *
1393: * Helper functions *
1394: * *
1395: ************************************************************************/
1396:
1397: /**
1398: * xmlNormalizeURIPath:
1399: * @path: pointer to the path string
1400: *
1401: * Applies the 5 normalization steps to a path string--that is, RFC 2396
1402: * Section 5.2, steps 6.c through 6.g.
1403: *
1404: * Normalization occurs directly on the string, no new allocation is done
1405: *
1406: * Returns 0 or an error code
1407: */
1408: int
1409: xmlNormalizeURIPath(char *path) {
1410: char *cur, *out;
1411:
1412: if (path == NULL)
1413: return(-1);
1414:
1415: /* Skip all initial "/" chars. We want to get to the beginning of the
1416: * first non-empty segment.
1417: */
1418: cur = path;
1419: while (cur[0] == '/')
1420: ++cur;
1421: if (cur[0] == '\0')
1422: return(0);
1423:
1424: /* Keep everything we've seen so far. */
1425: out = cur;
1426:
1427: /*
1428: * Analyze each segment in sequence for cases (c) and (d).
1429: */
1430: while (cur[0] != '\0') {
1431: /*
1432: * c) All occurrences of "./", where "." is a complete path segment,
1433: * are removed from the buffer string.
1434: */
1435: if ((cur[0] == '.') && (cur[1] == '/')) {
1436: cur += 2;
1437: /* '//' normalization should be done at this point too */
1438: while (cur[0] == '/')
1439: cur++;
1440: continue;
1441: }
1442:
1443: /*
1444: * d) If the buffer string ends with "." as a complete path segment,
1445: * that "." is removed.
1446: */
1447: if ((cur[0] == '.') && (cur[1] == '\0'))
1448: break;
1449:
1450: /* Otherwise keep the segment. */
1451: while (cur[0] != '/') {
1452: if (cur[0] == '\0')
1453: goto done_cd;
1454: (out++)[0] = (cur++)[0];
1455: }
1456: /* nomalize // */
1457: while ((cur[0] == '/') && (cur[1] == '/'))
1458: cur++;
1459:
1460: (out++)[0] = (cur++)[0];
1461: }
1462: done_cd:
1463: out[0] = '\0';
1464:
1465: /* Reset to the beginning of the first segment for the next sequence. */
1466: cur = path;
1467: while (cur[0] == '/')
1468: ++cur;
1469: if (cur[0] == '\0')
1470: return(0);
1471:
1472: /*
1473: * Analyze each segment in sequence for cases (e) and (f).
1474: *
1475: * e) All occurrences of "<segment>/../", where <segment> is a
1476: * complete path segment not equal to "..", are removed from the
1477: * buffer string. Removal of these path segments is performed
1478: * iteratively, removing the leftmost matching pattern on each
1479: * iteration, until no matching pattern remains.
1480: *
1481: * f) If the buffer string ends with "<segment>/..", where <segment>
1482: * is a complete path segment not equal to "..", that
1483: * "<segment>/.." is removed.
1484: *
1485: * To satisfy the "iterative" clause in (e), we need to collapse the
1486: * string every time we find something that needs to be removed. Thus,
1487: * we don't need to keep two pointers into the string: we only need a
1488: * "current position" pointer.
1489: */
1490: while (1) {
1491: char *segp, *tmp;
1492:
1493: /* At the beginning of each iteration of this loop, "cur" points to
1494: * the first character of the segment we want to examine.
1495: */
1496:
1497: /* Find the end of the current segment. */
1498: segp = cur;
1499: while ((segp[0] != '/') && (segp[0] != '\0'))
1500: ++segp;
1501:
1502: /* If this is the last segment, we're done (we need at least two
1503: * segments to meet the criteria for the (e) and (f) cases).
1504: */
1505: if (segp[0] == '\0')
1506: break;
1507:
1508: /* If the first segment is "..", or if the next segment _isn't_ "..",
1509: * keep this segment and try the next one.
1510: */
1511: ++segp;
1512: if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
1513: || ((segp[0] != '.') || (segp[1] != '.')
1514: || ((segp[2] != '/') && (segp[2] != '\0')))) {
1515: cur = segp;
1516: continue;
1517: }
1518:
1519: /* If we get here, remove this segment and the next one and back up
1520: * to the previous segment (if there is one), to implement the
1521: * "iteratively" clause. It's pretty much impossible to back up
1522: * while maintaining two pointers into the buffer, so just compact
1523: * the whole buffer now.
1524: */
1525:
1526: /* If this is the end of the buffer, we're done. */
1527: if (segp[2] == '\0') {
1528: cur[0] = '\0';
1529: break;
1530: }
1531: /* Valgrind complained, strcpy(cur, segp + 3); */
1.1.1.2 misho 1532: /* string will overlap, do not use strcpy */
1533: tmp = cur;
1534: segp += 3;
1535: while ((*tmp++ = *segp++) != 0)
1536: ;
1.1 misho 1537:
1538: /* If there are no previous segments, then keep going from here. */
1539: segp = cur;
1540: while ((segp > path) && ((--segp)[0] == '/'))
1541: ;
1542: if (segp == path)
1543: continue;
1544:
1545: /* "segp" is pointing to the end of a previous segment; find it's
1546: * start. We need to back up to the previous segment and start
1547: * over with that to handle things like "foo/bar/../..". If we
1548: * don't do this, then on the first pass we'll remove the "bar/..",
1549: * but be pointing at the second ".." so we won't realize we can also
1550: * remove the "foo/..".
1551: */
1552: cur = segp;
1553: while ((cur > path) && (cur[-1] != '/'))
1554: --cur;
1555: }
1556: out[0] = '\0';
1557:
1558: /*
1559: * g) If the resulting buffer string still begins with one or more
1560: * complete path segments of "..", then the reference is
1561: * considered to be in error. Implementations may handle this
1562: * error by retaining these components in the resolved path (i.e.,
1563: * treating them as part of the final URI), by removing them from
1564: * the resolved path (i.e., discarding relative levels above the
1565: * root), or by avoiding traversal of the reference.
1566: *
1567: * We discard them from the final path.
1568: */
1569: if (path[0] == '/') {
1570: cur = path;
1571: while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.')
1572: && ((cur[3] == '/') || (cur[3] == '\0')))
1573: cur += 3;
1574:
1575: if (cur != path) {
1576: out = path;
1577: while (cur[0] != '\0')
1578: (out++)[0] = (cur++)[0];
1579: out[0] = 0;
1580: }
1581: }
1582:
1583: return(0);
1584: }
1585:
1586: static int is_hex(char c) {
1587: if (((c >= '0') && (c <= '9')) ||
1588: ((c >= 'a') && (c <= 'f')) ||
1589: ((c >= 'A') && (c <= 'F')))
1590: return(1);
1591: return(0);
1592: }
1593:
1594: /**
1595: * xmlURIUnescapeString:
1596: * @str: the string to unescape
1597: * @len: the length in bytes to unescape (or <= 0 to indicate full string)
1598: * @target: optional destination buffer
1599: *
1600: * Unescaping routine, but does not check that the string is an URI. The
1601: * output is a direct unsigned char translation of %XX values (no encoding)
1602: * Note that the length of the result can only be smaller or same size as
1603: * the input string.
1604: *
1605: * Returns a copy of the string, but unescaped, will return NULL only in case
1606: * of error
1607: */
1608: char *
1609: xmlURIUnescapeString(const char *str, int len, char *target) {
1610: char *ret, *out;
1611: const char *in;
1612:
1613: if (str == NULL)
1614: return(NULL);
1615: if (len <= 0) len = strlen(str);
1616: if (len < 0) return(NULL);
1617:
1618: if (target == NULL) {
1619: ret = (char *) xmlMallocAtomic(len + 1);
1620: if (ret == NULL) {
1.1.1.3 ! misho 1621: xmlURIErrMemory("unescaping URI value\n");
1.1 misho 1622: return(NULL);
1623: }
1624: } else
1625: ret = target;
1626: in = str;
1627: out = ret;
1628: while(len > 0) {
1629: if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
1630: in++;
1.1.1.3 ! misho 1631: if ((*in >= '0') && (*in <= '9'))
1.1 misho 1632: *out = (*in - '0');
1633: else if ((*in >= 'a') && (*in <= 'f'))
1634: *out = (*in - 'a') + 10;
1635: else if ((*in >= 'A') && (*in <= 'F'))
1636: *out = (*in - 'A') + 10;
1637: in++;
1.1.1.3 ! misho 1638: if ((*in >= '0') && (*in <= '9'))
1.1 misho 1639: *out = *out * 16 + (*in - '0');
1640: else if ((*in >= 'a') && (*in <= 'f'))
1641: *out = *out * 16 + (*in - 'a') + 10;
1642: else if ((*in >= 'A') && (*in <= 'F'))
1643: *out = *out * 16 + (*in - 'A') + 10;
1644: in++;
1645: len -= 3;
1646: out++;
1647: } else {
1648: *out++ = *in++;
1649: len--;
1650: }
1651: }
1652: *out = 0;
1653: return(ret);
1654: }
1655:
1656: /**
1657: * xmlURIEscapeStr:
1658: * @str: string to escape
1659: * @list: exception list string of chars not to escape
1660: *
1661: * This routine escapes a string to hex, ignoring reserved characters (a-z)
1662: * and the characters in the exception list.
1663: *
1664: * Returns a new escaped string or NULL in case of error.
1665: */
1666: xmlChar *
1667: xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
1668: xmlChar *ret, ch;
1669: xmlChar *temp;
1670: const xmlChar *in;
1.1.1.3 ! misho 1671: int len, out;
1.1 misho 1672:
1673: if (str == NULL)
1674: return(NULL);
1675: if (str[0] == 0)
1676: return(xmlStrdup(str));
1677: len = xmlStrlen(str);
1678: if (!(len > 0)) return(NULL);
1679:
1680: len += 20;
1681: ret = (xmlChar *) xmlMallocAtomic(len);
1682: if (ret == NULL) {
1.1.1.3 ! misho 1683: xmlURIErrMemory("escaping URI value\n");
1.1 misho 1684: return(NULL);
1685: }
1686: in = (const xmlChar *) str;
1687: out = 0;
1688: while(*in != 0) {
1689: if (len - out <= 3) {
1.1.1.3 ! misho 1690: temp = xmlSaveUriRealloc(ret, &len);
1.1 misho 1691: if (temp == NULL) {
1.1.1.3 ! misho 1692: xmlURIErrMemory("escaping URI value\n");
1.1 misho 1693: xmlFree(ret);
1694: return(NULL);
1695: }
1696: ret = temp;
1697: }
1698:
1699: ch = *in;
1700:
1701: if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
1702: unsigned char val;
1703: ret[out++] = '%';
1704: val = ch >> 4;
1705: if (val <= 9)
1706: ret[out++] = '0' + val;
1707: else
1708: ret[out++] = 'A' + val - 0xA;
1709: val = ch & 0xF;
1710: if (val <= 9)
1711: ret[out++] = '0' + val;
1712: else
1713: ret[out++] = 'A' + val - 0xA;
1714: in++;
1715: } else {
1716: ret[out++] = *in++;
1717: }
1718:
1719: }
1720: ret[out] = 0;
1721: return(ret);
1722: }
1723:
1724: /**
1725: * xmlURIEscape:
1726: * @str: the string of the URI to escape
1727: *
1728: * Escaping routine, does not do validity checks !
1729: * It will try to escape the chars needing this, but this is heuristic
1730: * based it's impossible to be sure.
1731: *
1732: * Returns an copy of the string, but escaped
1733: *
1734: * 25 May 2001
1735: * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
1736: * according to RFC2396.
1737: * - Carl Douglas
1738: */
1739: xmlChar *
1740: xmlURIEscape(const xmlChar * str)
1741: {
1742: xmlChar *ret, *segment = NULL;
1743: xmlURIPtr uri;
1744: int ret2;
1745:
1746: #define NULLCHK(p) if(!p) { \
1.1.1.3 ! misho 1747: xmlURIErrMemory("escaping URI value\n"); \
! 1748: xmlFreeURI(uri); \
! 1749: return NULL; } \
1.1 misho 1750:
1751: if (str == NULL)
1752: return (NULL);
1753:
1754: uri = xmlCreateURI();
1755: if (uri != NULL) {
1756: /*
1757: * Allow escaping errors in the unescaped form
1758: */
1759: uri->cleanup = 1;
1760: ret2 = xmlParseURIReference(uri, (const char *)str);
1761: if (ret2) {
1762: xmlFreeURI(uri);
1763: return (NULL);
1764: }
1765: }
1766:
1767: if (!uri)
1768: return NULL;
1769:
1770: ret = NULL;
1771:
1772: if (uri->scheme) {
1773: segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");
1774: NULLCHK(segment)
1775: ret = xmlStrcat(ret, segment);
1776: ret = xmlStrcat(ret, BAD_CAST ":");
1777: xmlFree(segment);
1778: }
1779:
1780: if (uri->authority) {
1781: segment =
1782: xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");
1783: NULLCHK(segment)
1784: ret = xmlStrcat(ret, BAD_CAST "//");
1785: ret = xmlStrcat(ret, segment);
1786: xmlFree(segment);
1787: }
1788:
1789: if (uri->user) {
1790: segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");
1791: NULLCHK(segment)
1.1.1.3 ! misho 1792: ret = xmlStrcat(ret,BAD_CAST "//");
1.1 misho 1793: ret = xmlStrcat(ret, segment);
1794: ret = xmlStrcat(ret, BAD_CAST "@");
1795: xmlFree(segment);
1796: }
1797:
1798: if (uri->server) {
1799: segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");
1800: NULLCHK(segment)
1801: if (uri->user == NULL)
1802: ret = xmlStrcat(ret, BAD_CAST "//");
1803: ret = xmlStrcat(ret, segment);
1804: xmlFree(segment);
1805: }
1806:
1807: if (uri->port) {
1808: xmlChar port[10];
1809:
1810: snprintf((char *) port, 10, "%d", uri->port);
1811: ret = xmlStrcat(ret, BAD_CAST ":");
1812: ret = xmlStrcat(ret, port);
1813: }
1814:
1815: if (uri->path) {
1816: segment =
1817: xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
1818: NULLCHK(segment)
1819: ret = xmlStrcat(ret, segment);
1820: xmlFree(segment);
1821: }
1822:
1823: if (uri->query_raw) {
1824: ret = xmlStrcat(ret, BAD_CAST "?");
1825: ret = xmlStrcat(ret, BAD_CAST uri->query_raw);
1826: }
1827: else if (uri->query) {
1828: segment =
1829: xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
1830: NULLCHK(segment)
1831: ret = xmlStrcat(ret, BAD_CAST "?");
1832: ret = xmlStrcat(ret, segment);
1833: xmlFree(segment);
1834: }
1835:
1836: if (uri->opaque) {
1837: segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");
1838: NULLCHK(segment)
1839: ret = xmlStrcat(ret, segment);
1840: xmlFree(segment);
1841: }
1842:
1843: if (uri->fragment) {
1844: segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");
1845: NULLCHK(segment)
1846: ret = xmlStrcat(ret, BAD_CAST "#");
1847: ret = xmlStrcat(ret, segment);
1848: xmlFree(segment);
1849: }
1850:
1851: xmlFreeURI(uri);
1852: #undef NULLCHK
1853:
1854: return (ret);
1855: }
1856:
1857: /************************************************************************
1858: * *
1859: * Public functions *
1860: * *
1861: ************************************************************************/
1862:
1863: /**
1864: * xmlBuildURI:
1865: * @URI: the URI instance found in the document
1866: * @base: the base value
1867: *
1868: * Computes he final URI of the reference done by checking that
1869: * the given URI is valid, and building the final URI using the
1.1.1.3 ! misho 1870: * base URI. This is processed according to section 5.2 of the
1.1 misho 1871: * RFC 2396
1872: *
1873: * 5.2. Resolving Relative References to Absolute Form
1874: *
1875: * Returns a new URI string (to be freed by the caller) or NULL in case
1876: * of error.
1877: */
1878: xmlChar *
1879: xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
1880: xmlChar *val = NULL;
1881: int ret, len, indx, cur, out;
1882: xmlURIPtr ref = NULL;
1883: xmlURIPtr bas = NULL;
1884: xmlURIPtr res = NULL;
1885:
1886: /*
1887: * 1) The URI reference is parsed into the potential four components and
1888: * fragment identifier, as described in Section 4.3.
1889: *
1890: * NOTE that a completely empty URI is treated by modern browsers
1891: * as a reference to "." rather than as a synonym for the current
1892: * URI. Should we do that here?
1893: */
1.1.1.3 ! misho 1894: if (URI == NULL)
1.1 misho 1895: ret = -1;
1896: else {
1897: if (*URI) {
1898: ref = xmlCreateURI();
1899: if (ref == NULL)
1900: goto done;
1901: ret = xmlParseURIReference(ref, (const char *) URI);
1902: }
1903: else
1904: ret = 0;
1905: }
1906: if (ret != 0)
1907: goto done;
1908: if ((ref != NULL) && (ref->scheme != NULL)) {
1909: /*
1910: * The URI is absolute don't modify.
1911: */
1912: val = xmlStrdup(URI);
1913: goto done;
1914: }
1915: if (base == NULL)
1916: ret = -1;
1917: else {
1918: bas = xmlCreateURI();
1919: if (bas == NULL)
1920: goto done;
1921: ret = xmlParseURIReference(bas, (const char *) base);
1922: }
1923: if (ret != 0) {
1924: if (ref)
1925: val = xmlSaveUri(ref);
1926: goto done;
1927: }
1928: if (ref == NULL) {
1929: /*
1930: * the base fragment must be ignored
1931: */
1932: if (bas->fragment != NULL) {
1933: xmlFree(bas->fragment);
1934: bas->fragment = NULL;
1935: }
1936: val = xmlSaveUri(bas);
1937: goto done;
1938: }
1939:
1940: /*
1941: * 2) If the path component is empty and the scheme, authority, and
1942: * query components are undefined, then it is a reference to the
1943: * current document and we are done. Otherwise, the reference URI's
1944: * query and fragment components are defined as found (or not found)
1945: * within the URI reference and not inherited from the base URI.
1946: *
1947: * NOTE that in modern browsers, the parsing differs from the above
1948: * in the following aspect: the query component is allowed to be
1949: * defined while still treating this as a reference to the current
1950: * document.
1951: */
1952: res = xmlCreateURI();
1953: if (res == NULL)
1954: goto done;
1955: if ((ref->scheme == NULL) && (ref->path == NULL) &&
1956: ((ref->authority == NULL) && (ref->server == NULL))) {
1957: if (bas->scheme != NULL)
1958: res->scheme = xmlMemStrdup(bas->scheme);
1959: if (bas->authority != NULL)
1960: res->authority = xmlMemStrdup(bas->authority);
1961: else if (bas->server != NULL) {
1962: res->server = xmlMemStrdup(bas->server);
1963: if (bas->user != NULL)
1964: res->user = xmlMemStrdup(bas->user);
1.1.1.3 ! misho 1965: res->port = bas->port;
1.1 misho 1966: }
1967: if (bas->path != NULL)
1968: res->path = xmlMemStrdup(bas->path);
1969: if (ref->query_raw != NULL)
1970: res->query_raw = xmlMemStrdup (ref->query_raw);
1971: else if (ref->query != NULL)
1972: res->query = xmlMemStrdup(ref->query);
1973: else if (bas->query_raw != NULL)
1974: res->query_raw = xmlMemStrdup(bas->query_raw);
1975: else if (bas->query != NULL)
1976: res->query = xmlMemStrdup(bas->query);
1977: if (ref->fragment != NULL)
1978: res->fragment = xmlMemStrdup(ref->fragment);
1979: goto step_7;
1980: }
1981:
1982: /*
1983: * 3) If the scheme component is defined, indicating that the reference
1984: * starts with a scheme name, then the reference is interpreted as an
1985: * absolute URI and we are done. Otherwise, the reference URI's
1986: * scheme is inherited from the base URI's scheme component.
1987: */
1988: if (ref->scheme != NULL) {
1989: val = xmlSaveUri(ref);
1990: goto done;
1991: }
1992: if (bas->scheme != NULL)
1993: res->scheme = xmlMemStrdup(bas->scheme);
1.1.1.3 ! misho 1994:
1.1 misho 1995: if (ref->query_raw != NULL)
1996: res->query_raw = xmlMemStrdup(ref->query_raw);
1997: else if (ref->query != NULL)
1998: res->query = xmlMemStrdup(ref->query);
1999: if (ref->fragment != NULL)
2000: res->fragment = xmlMemStrdup(ref->fragment);
2001:
2002: /*
2003: * 4) If the authority component is defined, then the reference is a
2004: * network-path and we skip to step 7. Otherwise, the reference
2005: * URI's authority is inherited from the base URI's authority
2006: * component, which will also be undefined if the URI scheme does not
2007: * use an authority component.
2008: */
2009: if ((ref->authority != NULL) || (ref->server != NULL)) {
2010: if (ref->authority != NULL)
2011: res->authority = xmlMemStrdup(ref->authority);
2012: else {
2013: res->server = xmlMemStrdup(ref->server);
2014: if (ref->user != NULL)
2015: res->user = xmlMemStrdup(ref->user);
1.1.1.3 ! misho 2016: res->port = ref->port;
1.1 misho 2017: }
2018: if (ref->path != NULL)
2019: res->path = xmlMemStrdup(ref->path);
2020: goto step_7;
2021: }
2022: if (bas->authority != NULL)
2023: res->authority = xmlMemStrdup(bas->authority);
2024: else if (bas->server != NULL) {
2025: res->server = xmlMemStrdup(bas->server);
2026: if (bas->user != NULL)
2027: res->user = xmlMemStrdup(bas->user);
1.1.1.3 ! misho 2028: res->port = bas->port;
1.1 misho 2029: }
2030:
2031: /*
2032: * 5) If the path component begins with a slash character ("/"), then
2033: * the reference is an absolute-path and we skip to step 7.
2034: */
2035: if ((ref->path != NULL) && (ref->path[0] == '/')) {
2036: res->path = xmlMemStrdup(ref->path);
2037: goto step_7;
2038: }
2039:
2040:
2041: /*
2042: * 6) If this step is reached, then we are resolving a relative-path
2043: * reference. The relative path needs to be merged with the base
2044: * URI's path. Although there are many ways to do this, we will
2045: * describe a simple method using a separate string buffer.
2046: *
2047: * Allocate a buffer large enough for the result string.
2048: */
2049: len = 2; /* extra / and 0 */
2050: if (ref->path != NULL)
2051: len += strlen(ref->path);
2052: if (bas->path != NULL)
2053: len += strlen(bas->path);
2054: res->path = (char *) xmlMallocAtomic(len);
2055: if (res->path == NULL) {
1.1.1.3 ! misho 2056: xmlURIErrMemory("resolving URI against base\n");
1.1 misho 2057: goto done;
2058: }
2059: res->path[0] = 0;
2060:
2061: /*
2062: * a) All but the last segment of the base URI's path component is
2063: * copied to the buffer. In other words, any characters after the
2064: * last (right-most) slash character, if any, are excluded.
2065: */
2066: cur = 0;
2067: out = 0;
2068: if (bas->path != NULL) {
2069: while (bas->path[cur] != 0) {
2070: while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
2071: cur++;
2072: if (bas->path[cur] == 0)
2073: break;
2074:
2075: cur++;
2076: while (out < cur) {
2077: res->path[out] = bas->path[out];
2078: out++;
2079: }
2080: }
2081: }
2082: res->path[out] = 0;
2083:
2084: /*
2085: * b) The reference's path component is appended to the buffer
2086: * string.
2087: */
2088: if (ref->path != NULL && ref->path[0] != 0) {
2089: indx = 0;
2090: /*
2091: * Ensure the path includes a '/'
2092: */
2093: if ((out == 0) && (bas->server != NULL))
2094: res->path[out++] = '/';
2095: while (ref->path[indx] != 0) {
2096: res->path[out++] = ref->path[indx++];
2097: }
2098: }
2099: res->path[out] = 0;
2100:
2101: /*
2102: * Steps c) to h) are really path normalization steps
2103: */
2104: xmlNormalizeURIPath(res->path);
2105:
2106: step_7:
2107:
2108: /*
2109: * 7) The resulting URI components, including any inherited from the
2110: * base URI, are recombined to give the absolute form of the URI
2111: * reference.
2112: */
2113: val = xmlSaveUri(res);
2114:
2115: done:
2116: if (ref != NULL)
2117: xmlFreeURI(ref);
2118: if (bas != NULL)
2119: xmlFreeURI(bas);
2120: if (res != NULL)
2121: xmlFreeURI(res);
2122: return(val);
2123: }
2124:
2125: /**
2126: * xmlBuildRelativeURI:
2127: * @URI: the URI reference under consideration
2128: * @base: the base value
2129: *
2130: * Expresses the URI of the reference in terms relative to the
2131: * base. Some examples of this operation include:
2132: * base = "http://site1.com/docs/book1.html"
2133: * URI input URI returned
2134: * docs/pic1.gif pic1.gif
2135: * docs/img/pic1.gif img/pic1.gif
2136: * img/pic1.gif ../img/pic1.gif
2137: * http://site1.com/docs/pic1.gif pic1.gif
2138: * http://site2.com/docs/pic1.gif http://site2.com/docs/pic1.gif
2139: *
2140: * base = "docs/book1.html"
2141: * URI input URI returned
2142: * docs/pic1.gif pic1.gif
2143: * docs/img/pic1.gif img/pic1.gif
2144: * img/pic1.gif ../img/pic1.gif
2145: * http://site1.com/docs/pic1.gif http://site1.com/docs/pic1.gif
2146: *
2147: *
2148: * Note: if the URI reference is really wierd or complicated, it may be
2149: * worthwhile to first convert it into a "nice" one by calling
2150: * xmlBuildURI (using 'base') before calling this routine,
2151: * since this routine (for reasonable efficiency) assumes URI has
2152: * already been through some validation.
2153: *
2154: * Returns a new URI string (to be freed by the caller) or NULL in case
2155: * error.
2156: */
2157: xmlChar *
2158: xmlBuildRelativeURI (const xmlChar * URI, const xmlChar * base)
2159: {
2160: xmlChar *val = NULL;
2161: int ret;
2162: int ix;
2163: int pos = 0;
2164: int nbslash = 0;
2165: int len;
2166: xmlURIPtr ref = NULL;
2167: xmlURIPtr bas = NULL;
2168: xmlChar *bptr, *uptr, *vptr;
2169: int remove_path = 0;
2170:
2171: if ((URI == NULL) || (*URI == 0))
2172: return NULL;
2173:
2174: /*
2175: * First parse URI into a standard form
2176: */
2177: ref = xmlCreateURI ();
2178: if (ref == NULL)
2179: return NULL;
2180: /* If URI not already in "relative" form */
2181: if (URI[0] != '.') {
2182: ret = xmlParseURIReference (ref, (const char *) URI);
2183: if (ret != 0)
2184: goto done; /* Error in URI, return NULL */
2185: } else
2186: ref->path = (char *)xmlStrdup(URI);
2187:
2188: /*
2189: * Next parse base into the same standard form
2190: */
2191: if ((base == NULL) || (*base == 0)) {
2192: val = xmlStrdup (URI);
2193: goto done;
2194: }
2195: bas = xmlCreateURI ();
2196: if (bas == NULL)
2197: goto done;
2198: if (base[0] != '.') {
2199: ret = xmlParseURIReference (bas, (const char *) base);
2200: if (ret != 0)
2201: goto done; /* Error in base, return NULL */
2202: } else
2203: bas->path = (char *)xmlStrdup(base);
2204:
2205: /*
2206: * If the scheme / server on the URI differs from the base,
2207: * just return the URI
2208: */
2209: if ((ref->scheme != NULL) &&
2210: ((bas->scheme == NULL) ||
2211: (xmlStrcmp ((xmlChar *)bas->scheme, (xmlChar *)ref->scheme)) ||
2212: (xmlStrcmp ((xmlChar *)bas->server, (xmlChar *)ref->server)))) {
2213: val = xmlStrdup (URI);
2214: goto done;
2215: }
2216: if (xmlStrEqual((xmlChar *)bas->path, (xmlChar *)ref->path)) {
2217: val = xmlStrdup(BAD_CAST "");
2218: goto done;
2219: }
2220: if (bas->path == NULL) {
2221: val = xmlStrdup((xmlChar *)ref->path);
2222: goto done;
2223: }
2224: if (ref->path == NULL) {
2225: ref->path = (char *) "/";
2226: remove_path = 1;
2227: }
2228:
2229: /*
2230: * At this point (at last!) we can compare the two paths
2231: *
2232: * First we take care of the special case where either of the
2233: * two path components may be missing (bug 316224)
2234: */
2235: if (bas->path == NULL) {
2236: if (ref->path != NULL) {
2237: uptr = (xmlChar *) ref->path;
2238: if (*uptr == '/')
2239: uptr++;
2240: /* exception characters from xmlSaveUri */
2241: val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
2242: }
2243: goto done;
2244: }
2245: bptr = (xmlChar *)bas->path;
2246: if (ref->path == NULL) {
2247: for (ix = 0; bptr[ix] != 0; ix++) {
2248: if (bptr[ix] == '/')
2249: nbslash++;
2250: }
2251: uptr = NULL;
2252: len = 1; /* this is for a string terminator only */
2253: } else {
2254: /*
2255: * Next we compare the two strings and find where they first differ
2256: */
2257: if ((ref->path[pos] == '.') && (ref->path[pos+1] == '/'))
2258: pos += 2;
2259: if ((*bptr == '.') && (bptr[1] == '/'))
2260: bptr += 2;
2261: else if ((*bptr == '/') && (ref->path[pos] != '/'))
2262: bptr++;
2263: while ((bptr[pos] == ref->path[pos]) && (bptr[pos] != 0))
2264: pos++;
2265:
2266: if (bptr[pos] == ref->path[pos]) {
2267: val = xmlStrdup(BAD_CAST "");
2268: goto done; /* (I can't imagine why anyone would do this) */
2269: }
2270:
2271: /*
2272: * In URI, "back up" to the last '/' encountered. This will be the
2273: * beginning of the "unique" suffix of URI
2274: */
2275: ix = pos;
2276: if ((ref->path[ix] == '/') && (ix > 0))
2277: ix--;
2278: else if ((ref->path[ix] == 0) && (ix > 1) && (ref->path[ix - 1] == '/'))
2279: ix -= 2;
2280: for (; ix > 0; ix--) {
2281: if (ref->path[ix] == '/')
2282: break;
2283: }
2284: if (ix == 0) {
2285: uptr = (xmlChar *)ref->path;
2286: } else {
2287: ix++;
2288: uptr = (xmlChar *)&ref->path[ix];
2289: }
2290:
2291: /*
2292: * In base, count the number of '/' from the differing point
2293: */
2294: if (bptr[pos] != ref->path[pos]) {/* check for trivial URI == base */
2295: for (; bptr[ix] != 0; ix++) {
2296: if (bptr[ix] == '/')
2297: nbslash++;
2298: }
2299: }
2300: len = xmlStrlen (uptr) + 1;
2301: }
1.1.1.3 ! misho 2302:
1.1 misho 2303: if (nbslash == 0) {
2304: if (uptr != NULL)
2305: /* exception characters from xmlSaveUri */
2306: val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
2307: goto done;
2308: }
2309:
2310: /*
2311: * Allocate just enough space for the returned string -
2312: * length of the remainder of the URI, plus enough space
2313: * for the "../" groups, plus one for the terminator
2314: */
2315: val = (xmlChar *) xmlMalloc (len + 3 * nbslash);
2316: if (val == NULL) {
1.1.1.3 ! misho 2317: xmlURIErrMemory("building relative URI\n");
1.1 misho 2318: goto done;
2319: }
2320: vptr = val;
2321: /*
2322: * Put in as many "../" as needed
2323: */
2324: for (; nbslash>0; nbslash--) {
2325: *vptr++ = '.';
2326: *vptr++ = '.';
2327: *vptr++ = '/';
2328: }
2329: /*
2330: * Finish up with the end of the URI
2331: */
2332: if (uptr != NULL) {
2333: if ((vptr > val) && (len > 0) &&
2334: (uptr[0] == '/') && (vptr[-1] == '/')) {
2335: memcpy (vptr, uptr + 1, len - 1);
2336: vptr[len - 2] = 0;
2337: } else {
2338: memcpy (vptr, uptr, len);
2339: vptr[len - 1] = 0;
2340: }
2341: } else {
2342: vptr[len - 1] = 0;
2343: }
2344:
2345: /* escape the freshly-built path */
2346: vptr = val;
2347: /* exception characters from xmlSaveUri */
2348: val = xmlURIEscapeStr(vptr, BAD_CAST "/;&=+$,");
2349: xmlFree(vptr);
2350:
2351: done:
2352: /*
2353: * Free the working variables
2354: */
2355: if (remove_path != 0)
2356: ref->path = NULL;
2357: if (ref != NULL)
2358: xmlFreeURI (ref);
2359: if (bas != NULL)
2360: xmlFreeURI (bas);
2361:
2362: return val;
2363: }
2364:
2365: /**
2366: * xmlCanonicPath:
2367: * @path: the resource locator in a filesystem notation
2368: *
1.1.1.3 ! misho 2369: * Constructs a canonic path from the specified path.
1.1 misho 2370: *
1.1.1.3 ! misho 2371: * Returns a new canonic path, or a duplicate of the path parameter if the
1.1 misho 2372: * construction fails. The caller is responsible for freeing the memory occupied
1.1.1.3 ! misho 2373: * by the returned string. If there is insufficient memory available, or the
1.1 misho 2374: * argument is NULL, the function returns NULL.
2375: */
1.1.1.3 ! misho 2376: #define IS_WINDOWS_PATH(p) \
1.1 misho 2377: ((p != NULL) && \
2378: (((p[0] >= 'a') && (p[0] <= 'z')) || \
2379: ((p[0] >= 'A') && (p[0] <= 'Z'))) && \
2380: (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\')))
2381: xmlChar *
2382: xmlCanonicPath(const xmlChar *path)
2383: {
2384: /*
2385: * For Windows implementations, additional work needs to be done to
2386: * replace backslashes in pathnames with "forward slashes"
2387: */
1.1.1.3 ! misho 2388: #if defined(_WIN32) && !defined(__CYGWIN__)
1.1 misho 2389: int len = 0;
2390: int i = 0;
2391: xmlChar *p = NULL;
2392: #endif
2393: xmlURIPtr uri;
2394: xmlChar *ret;
2395: const xmlChar *absuri;
2396:
2397: if (path == NULL)
2398: return(NULL);
2399:
1.1.1.3 ! misho 2400: #if defined(_WIN32)
! 2401: /*
! 2402: * We must not change the backslashes to slashes if the the path
! 2403: * starts with \\?\
! 2404: * Those paths can be up to 32k characters long.
! 2405: * Was added specifically for OpenOffice, those paths can't be converted
! 2406: * to URIs anyway.
! 2407: */
! 2408: if ((path[0] == '\\') && (path[1] == '\\') && (path[2] == '?') &&
! 2409: (path[3] == '\\') )
! 2410: return xmlStrdup((const xmlChar *) path);
! 2411: #endif
! 2412:
! 2413: /* sanitize filename starting with // so it can be used as URI */
1.1 misho 2414: if ((path[0] == '/') && (path[1] == '/') && (path[2] != '/'))
2415: path++;
2416:
2417: if ((uri = xmlParseURI((const char *) path)) != NULL) {
2418: xmlFreeURI(uri);
2419: return xmlStrdup(path);
2420: }
2421:
2422: /* Check if this is an "absolute uri" */
2423: absuri = xmlStrstr(path, BAD_CAST "://");
2424: if (absuri != NULL) {
2425: int l, j;
2426: unsigned char c;
2427: xmlChar *escURI;
2428:
2429: /*
2430: * this looks like an URI where some parts have not been
2431: * escaped leading to a parsing problem. Check that the first
2432: * part matches a protocol.
2433: */
2434: l = absuri - path;
2435: /* Bypass if first part (part before the '://') is > 20 chars */
2436: if ((l <= 0) || (l > 20))
2437: goto path_processing;
2438: /* Bypass if any non-alpha characters are present in first part */
2439: for (j = 0;j < l;j++) {
2440: c = path[j];
2441: if (!(((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'))))
2442: goto path_processing;
2443: }
2444:
2445: /* Escape all except the characters specified in the supplied path */
2446: escURI = xmlURIEscapeStr(path, BAD_CAST ":/?_.#&;=");
2447: if (escURI != NULL) {
2448: /* Try parsing the escaped path */
2449: uri = xmlParseURI((const char *) escURI);
2450: /* If successful, return the escaped string */
2451: if (uri != NULL) {
2452: xmlFreeURI(uri);
2453: return escURI;
2454: }
2455: }
2456: }
2457:
2458: path_processing:
2459: /* For Windows implementations, replace backslashes with 'forward slashes' */
1.1.1.3 ! misho 2460: #if defined(_WIN32) && !defined(__CYGWIN__)
1.1 misho 2461: /*
2462: * Create a URI structure
2463: */
2464: uri = xmlCreateURI();
2465: if (uri == NULL) { /* Guard against 'out of memory' */
2466: return(NULL);
2467: }
2468:
2469: len = xmlStrlen(path);
2470: if ((len > 2) && IS_WINDOWS_PATH(path)) {
2471: /* make the scheme 'file' */
2472: uri->scheme = xmlStrdup(BAD_CAST "file");
2473: /* allocate space for leading '/' + path + string terminator */
2474: uri->path = xmlMallocAtomic(len + 2);
2475: if (uri->path == NULL) {
2476: xmlFreeURI(uri); /* Guard agains 'out of memory' */
2477: return(NULL);
2478: }
2479: /* Put in leading '/' plus path */
2480: uri->path[0] = '/';
2481: p = uri->path + 1;
2482: strncpy(p, path, len + 1);
2483: } else {
2484: uri->path = xmlStrdup(path);
2485: if (uri->path == NULL) {
2486: xmlFreeURI(uri);
2487: return(NULL);
2488: }
2489: p = uri->path;
2490: }
2491: /* Now change all occurences of '\' to '/' */
2492: while (*p != '\0') {
2493: if (*p == '\\')
2494: *p = '/';
2495: p++;
2496: }
2497:
2498: if (uri->scheme == NULL) {
2499: ret = xmlStrdup((const xmlChar *) uri->path);
2500: } else {
2501: ret = xmlSaveUri(uri);
2502: }
2503:
2504: xmlFreeURI(uri);
2505: #else
2506: ret = xmlStrdup((const xmlChar *) path);
2507: #endif
2508: return(ret);
2509: }
2510:
2511: /**
2512: * xmlPathToURI:
2513: * @path: the resource locator in a filesystem notation
2514: *
2515: * Constructs an URI expressing the existing path
2516: *
1.1.1.3 ! misho 2517: * Returns a new URI, or a duplicate of the path parameter if the
1.1 misho 2518: * construction fails. The caller is responsible for freeing the memory
2519: * occupied by the returned string. If there is insufficient memory available,
2520: * or the argument is NULL, the function returns NULL.
2521: */
2522: xmlChar *
2523: xmlPathToURI(const xmlChar *path)
2524: {
2525: xmlURIPtr uri;
2526: xmlURI temp;
2527: xmlChar *ret, *cal;
2528:
2529: if (path == NULL)
2530: return(NULL);
2531:
2532: if ((uri = xmlParseURI((const char *) path)) != NULL) {
2533: xmlFreeURI(uri);
2534: return xmlStrdup(path);
2535: }
2536: cal = xmlCanonicPath(path);
2537: if (cal == NULL)
2538: return(NULL);
2539: #if defined(_WIN32) && !defined(__CYGWIN__)
1.1.1.3 ! misho 2540: /* xmlCanonicPath can return an URI on Windows (is that the intended behaviour?)
1.1 misho 2541: If 'cal' is a valid URI allready then we are done here, as continuing would make
2542: it invalid. */
2543: if ((uri = xmlParseURI((const char *) cal)) != NULL) {
2544: xmlFreeURI(uri);
2545: return cal;
2546: }
2547: /* 'cal' can contain a relative path with backslashes. If that is processed
2548: by xmlSaveURI, they will be escaped and the external entity loader machinery
2549: will fail. So convert them to slashes. Misuse 'ret' for walking. */
2550: ret = cal;
2551: while (*ret != '\0') {
2552: if (*ret == '\\')
2553: *ret = '/';
2554: ret++;
2555: }
2556: #endif
2557: memset(&temp, 0, sizeof(temp));
2558: temp.path = (char *) cal;
2559: ret = xmlSaveUri(&temp);
2560: xmlFree(cal);
2561: return(ret);
2562: }
2563: #define bottom_uri
2564: #include "elfgcchack.h"
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>