Annotation of embedaddon/libxml2/uri.c, revision 1.1.1.2
1.1 misho 1: /**
2: * uri.c: set of generic URI related routines
3: *
4: * Reference: RFCs 3986, 2732 and 2373
5: *
6: * See Copyright for the status of this software.
7: *
1.1.1.2 ! misho 8: * TODO: that module behaves really badly on OOM situation
! 9: *
1.1 misho 10: * daniel@veillard.com
11: */
12:
13: #define IN_LIBXML
14: #include "libxml.h"
15:
16: #include <string.h>
17:
18: #include <libxml/xmlmemory.h>
19: #include <libxml/uri.h>
20: #include <libxml/globals.h>
21: #include <libxml/xmlerror.h>
22:
23: static void xmlCleanURI(xmlURIPtr uri);
24:
25: /*
26: * Old rule from 2396 used in legacy handling code
27: * alpha = lowalpha | upalpha
28: */
29: #define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
30:
31:
32: /*
33: * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
34: * "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
35: * "u" | "v" | "w" | "x" | "y" | "z"
36: */
37:
38: #define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
39:
40: /*
41: * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
42: * "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
43: * "U" | "V" | "W" | "X" | "Y" | "Z"
44: */
45: #define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
46:
47: #ifdef IS_DIGIT
48: #undef IS_DIGIT
49: #endif
50: /*
51: * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
52: */
53: #define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
54:
55: /*
56: * alphanum = alpha | digit
57: */
58:
59: #define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
60:
61: /*
62: * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
63: */
64:
65: #define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') || \
66: ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') || \
67: ((x) == '(') || ((x) == ')'))
68:
69: /*
70: * unwise = "{" | "}" | "|" | "\" | "^" | "`"
71: */
72:
73: #define IS_UNWISE(p) \
74: (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) || \
75: ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) || \
76: ((*(p) == ']')) || ((*(p) == '`')))
77: /*
78: * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
79: * "[" | "]"
80: */
81:
82: #define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
83: ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
84: ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \
85: ((x) == ']'))
86:
87: /*
88: * unreserved = alphanum | mark
89: */
90:
91: #define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
92:
93: /*
94: * Skip to next pointer char, handle escaped sequences
95: */
96:
97: #define NEXT(p) ((*p == '%')? p += 3 : p++)
98:
99: /*
100: * Productions from the spec.
101: *
102: * authority = server | reg_name
103: * reg_name = 1*( unreserved | escaped | "$" | "," |
104: * ";" | ":" | "@" | "&" | "=" | "+" )
105: *
106: * path = [ abs_path | opaque_part ]
107: */
108:
109: #define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n))
110:
111: /************************************************************************
112: * *
113: * RFC 3986 parser *
114: * *
115: ************************************************************************/
116:
117: #define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9'))
118: #define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) || \
119: ((*(p) >= 'A') && (*(p) <= 'Z')))
120: #define ISA_HEXDIG(p) \
121: (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) || \
122: ((*(p) >= 'A') && (*(p) <= 'F')))
123:
124: /*
125: * sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
126: * / "*" / "+" / "," / ";" / "="
127: */
128: #define ISA_SUB_DELIM(p) \
129: (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) || \
130: ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) || \
131: ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) || \
132: ((*(p) == '=')) || ((*(p) == '\'')))
133:
134: /*
135: * gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
136: */
137: #define ISA_GEN_DELIM(p) \
138: (((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) || \
139: ((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) || \
140: ((*(p) == '@')))
141:
142: /*
143: * reserved = gen-delims / sub-delims
144: */
145: #define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p)))
146:
147: /*
148: * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
149: */
150: #define ISA_UNRESERVED(p) \
151: ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) || \
152: ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~')))
153:
154: /*
155: * pct-encoded = "%" HEXDIG HEXDIG
156: */
157: #define ISA_PCT_ENCODED(p) \
158: ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2)))
159:
160: /*
161: * pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
162: */
163: #define ISA_PCHAR(p) \
164: (ISA_UNRESERVED(p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) || \
165: ((*(p) == ':')) || ((*(p) == '@')))
166:
167: /**
168: * xmlParse3986Scheme:
169: * @uri: pointer to an URI structure
170: * @str: pointer to the string to analyze
171: *
172: * Parse an URI scheme
173: *
174: * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
175: *
176: * Returns 0 or the error code
177: */
178: static int
179: xmlParse3986Scheme(xmlURIPtr uri, const char **str) {
180: const char *cur;
181:
182: if (str == NULL)
183: return(-1);
184:
185: cur = *str;
186: if (!ISA_ALPHA(cur))
187: return(2);
188: cur++;
189: while (ISA_ALPHA(cur) || ISA_DIGIT(cur) ||
190: (*cur == '+') || (*cur == '-') || (*cur == '.')) cur++;
191: if (uri != NULL) {
192: if (uri->scheme != NULL) xmlFree(uri->scheme);
193: uri->scheme = STRNDUP(*str, cur - *str);
194: }
195: *str = cur;
196: return(0);
197: }
198:
199: /**
200: * xmlParse3986Fragment:
201: * @uri: pointer to an URI structure
202: * @str: pointer to the string to analyze
203: *
204: * Parse the query part of an URI
205: *
206: * fragment = *( pchar / "/" / "?" )
207: * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']'
208: * in the fragment identifier but this is used very broadly for
209: * xpointer scheme selection, so we are allowing it here to not break
210: * for example all the DocBook processing chains.
211: *
212: * Returns 0 or the error code
213: */
214: static int
215: xmlParse3986Fragment(xmlURIPtr uri, const char **str)
216: {
217: const char *cur;
218:
219: if (str == NULL)
220: return (-1);
221:
222: cur = *str;
223:
224: while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
225: (*cur == '[') || (*cur == ']') ||
226: ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
227: NEXT(cur);
228: if (uri != NULL) {
229: if (uri->fragment != NULL)
230: xmlFree(uri->fragment);
231: if (uri->cleanup & 2)
232: uri->fragment = STRNDUP(*str, cur - *str);
233: else
234: uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
235: }
236: *str = cur;
237: return (0);
238: }
239:
240: /**
241: * xmlParse3986Query:
242: * @uri: pointer to an URI structure
243: * @str: pointer to the string to analyze
244: *
245: * Parse the query part of an URI
246: *
247: * query = *uric
248: *
249: * Returns 0 or the error code
250: */
251: static int
252: xmlParse3986Query(xmlURIPtr uri, const char **str)
253: {
254: const char *cur;
255:
256: if (str == NULL)
257: return (-1);
258:
259: cur = *str;
260:
261: while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
262: ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
263: NEXT(cur);
264: if (uri != NULL) {
265: if (uri->query != NULL)
266: xmlFree(uri->query);
267: if (uri->cleanup & 2)
268: uri->query = STRNDUP(*str, cur - *str);
269: else
270: uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
271:
272: /* Save the raw bytes of the query as well.
273: * See: http://mail.gnome.org/archives/xml/2007-April/thread.html#00114
274: */
275: if (uri->query_raw != NULL)
276: xmlFree (uri->query_raw);
277: uri->query_raw = STRNDUP (*str, cur - *str);
278: }
279: *str = cur;
280: return (0);
281: }
282:
283: /**
284: * xmlParse3986Port:
285: * @uri: pointer to an URI structure
286: * @str: the string to analyze
287: *
288: * Parse a port part and fills in the appropriate fields
289: * of the @uri structure
290: *
291: * port = *DIGIT
292: *
293: * Returns 0 or the error code
294: */
295: static int
296: xmlParse3986Port(xmlURIPtr uri, const char **str)
297: {
298: const char *cur = *str;
299:
300: if (ISA_DIGIT(cur)) {
301: if (uri != NULL)
302: uri->port = 0;
303: while (ISA_DIGIT(cur)) {
304: if (uri != NULL)
305: uri->port = uri->port * 10 + (*cur - '0');
306: cur++;
307: }
308: *str = cur;
309: return(0);
310: }
311: return(1);
312: }
313:
314: /**
315: * xmlParse3986Userinfo:
316: * @uri: pointer to an URI structure
317: * @str: the string to analyze
318: *
319: * Parse an user informations part and fills in the appropriate fields
320: * of the @uri structure
321: *
322: * userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
323: *
324: * Returns 0 or the error code
325: */
326: static int
327: xmlParse3986Userinfo(xmlURIPtr uri, const char **str)
328: {
329: const char *cur;
330:
331: cur = *str;
332: while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) ||
333: ISA_SUB_DELIM(cur) || (*cur == ':'))
334: NEXT(cur);
335: if (*cur == '@') {
336: if (uri != NULL) {
337: if (uri->user != NULL) xmlFree(uri->user);
338: if (uri->cleanup & 2)
339: uri->user = STRNDUP(*str, cur - *str);
340: else
341: uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
342: }
343: *str = cur;
344: return(0);
345: }
346: return(1);
347: }
348:
349: /**
350: * xmlParse3986DecOctet:
351: * @str: the string to analyze
352: *
353: * dec-octet = DIGIT ; 0-9
354: * / %x31-39 DIGIT ; 10-99
355: * / "1" 2DIGIT ; 100-199
356: * / "2" %x30-34 DIGIT ; 200-249
357: * / "25" %x30-35 ; 250-255
358: *
359: * Skip a dec-octet.
360: *
361: * Returns 0 if found and skipped, 1 otherwise
362: */
363: static int
364: xmlParse3986DecOctet(const char **str) {
365: const char *cur = *str;
366:
367: if (!(ISA_DIGIT(cur)))
368: return(1);
369: if (!ISA_DIGIT(cur+1))
370: cur++;
371: else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur+2)))
372: cur += 2;
373: else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2)))
374: cur += 3;
375: else if ((*cur == '2') && (*(cur + 1) >= '0') &&
376: (*(cur + 1) <= '4') && (ISA_DIGIT(cur + 2)))
377: cur += 3;
378: else if ((*cur == '2') && (*(cur + 1) == '5') &&
379: (*(cur + 2) >= '0') && (*(cur + 1) <= '5'))
380: cur += 3;
381: else
382: return(1);
383: *str = cur;
384: return(0);
385: }
386: /**
387: * xmlParse3986Host:
388: * @uri: pointer to an URI structure
389: * @str: the string to analyze
390: *
391: * Parse an host part and fills in the appropriate fields
392: * of the @uri structure
393: *
394: * host = IP-literal / IPv4address / reg-name
395: * IP-literal = "[" ( IPv6address / IPvFuture ) "]"
396: * IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
397: * reg-name = *( unreserved / pct-encoded / sub-delims )
398: *
399: * Returns 0 or the error code
400: */
401: static int
402: xmlParse3986Host(xmlURIPtr uri, const char **str)
403: {
404: const char *cur = *str;
405: const char *host;
406:
407: host = cur;
408: /*
409: * IPv6 and future adressing scheme are enclosed between brackets
410: */
411: if (*cur == '[') {
412: cur++;
413: while ((*cur != ']') && (*cur != 0))
414: cur++;
415: if (*cur != ']')
416: return(1);
417: cur++;
418: goto found;
419: }
420: /*
421: * try to parse an IPv4
422: */
423: if (ISA_DIGIT(cur)) {
424: if (xmlParse3986DecOctet(&cur) != 0)
425: goto not_ipv4;
426: if (*cur != '.')
427: goto not_ipv4;
428: cur++;
429: if (xmlParse3986DecOctet(&cur) != 0)
430: goto not_ipv4;
431: if (*cur != '.')
432: goto not_ipv4;
433: if (xmlParse3986DecOctet(&cur) != 0)
434: goto not_ipv4;
435: if (*cur != '.')
436: goto not_ipv4;
437: if (xmlParse3986DecOctet(&cur) != 0)
438: goto not_ipv4;
439: goto found;
440: not_ipv4:
441: cur = *str;
442: }
443: /*
444: * then this should be a hostname which can be empty
445: */
446: while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur))
447: NEXT(cur);
448: found:
449: if (uri != NULL) {
450: if (uri->authority != NULL) xmlFree(uri->authority);
451: uri->authority = NULL;
452: if (uri->server != NULL) xmlFree(uri->server);
453: if (cur != host) {
454: if (uri->cleanup & 2)
455: uri->server = STRNDUP(host, cur - host);
456: else
457: uri->server = xmlURIUnescapeString(host, cur - host, NULL);
458: } else
459: uri->server = NULL;
460: }
461: *str = cur;
462: return(0);
463: }
464:
465: /**
466: * xmlParse3986Authority:
467: * @uri: pointer to an URI structure
468: * @str: the string to analyze
469: *
470: * Parse an authority part and fills in the appropriate fields
471: * of the @uri structure
472: *
473: * authority = [ userinfo "@" ] host [ ":" port ]
474: *
475: * Returns 0 or the error code
476: */
477: static int
478: xmlParse3986Authority(xmlURIPtr uri, const char **str)
479: {
480: const char *cur;
481: int ret;
482:
483: cur = *str;
484: /*
485: * try to parse an userinfo and check for the trailing @
486: */
487: ret = xmlParse3986Userinfo(uri, &cur);
488: if ((ret != 0) || (*cur != '@'))
489: cur = *str;
490: else
491: cur++;
492: ret = xmlParse3986Host(uri, &cur);
493: if (ret != 0) return(ret);
494: if (*cur == ':') {
495: cur++;
496: ret = xmlParse3986Port(uri, &cur);
497: if (ret != 0) return(ret);
498: }
499: *str = cur;
500: return(0);
501: }
502:
503: /**
504: * xmlParse3986Segment:
505: * @str: the string to analyze
506: * @forbid: an optional forbidden character
507: * @empty: allow an empty segment
508: *
509: * Parse a segment and fills in the appropriate fields
510: * of the @uri structure
511: *
512: * segment = *pchar
513: * segment-nz = 1*pchar
514: * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
515: * ; non-zero-length segment without any colon ":"
516: *
517: * Returns 0 or the error code
518: */
519: static int
520: xmlParse3986Segment(const char **str, char forbid, int empty)
521: {
522: const char *cur;
523:
524: cur = *str;
525: if (!ISA_PCHAR(cur)) {
526: if (empty)
527: return(0);
528: return(1);
529: }
530: while (ISA_PCHAR(cur) && (*cur != forbid))
531: NEXT(cur);
532: *str = cur;
533: return (0);
534: }
535:
536: /**
537: * xmlParse3986PathAbEmpty:
538: * @uri: pointer to an URI structure
539: * @str: the string to analyze
540: *
541: * Parse an path absolute or empty and fills in the appropriate fields
542: * of the @uri structure
543: *
544: * path-abempty = *( "/" segment )
545: *
546: * Returns 0 or the error code
547: */
548: static int
549: xmlParse3986PathAbEmpty(xmlURIPtr uri, const char **str)
550: {
551: const char *cur;
552: int ret;
553:
554: cur = *str;
555:
556: while (*cur == '/') {
557: cur++;
558: ret = xmlParse3986Segment(&cur, 0, 1);
559: if (ret != 0) return(ret);
560: }
561: if (uri != NULL) {
562: if (uri->path != NULL) xmlFree(uri->path);
563: if (*str != cur) {
564: if (uri->cleanup & 2)
565: uri->path = STRNDUP(*str, cur - *str);
566: else
567: uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
568: } else {
569: uri->path = NULL;
570: }
571: }
572: *str = cur;
573: return (0);
574: }
575:
576: /**
577: * xmlParse3986PathAbsolute:
578: * @uri: pointer to an URI structure
579: * @str: the string to analyze
580: *
581: * Parse an path absolute and fills in the appropriate fields
582: * of the @uri structure
583: *
584: * path-absolute = "/" [ segment-nz *( "/" segment ) ]
585: *
586: * Returns 0 or the error code
587: */
588: static int
589: xmlParse3986PathAbsolute(xmlURIPtr uri, const char **str)
590: {
591: const char *cur;
592: int ret;
593:
594: cur = *str;
595:
596: if (*cur != '/')
597: return(1);
598: cur++;
599: ret = xmlParse3986Segment(&cur, 0, 0);
600: if (ret == 0) {
601: while (*cur == '/') {
602: cur++;
603: ret = xmlParse3986Segment(&cur, 0, 1);
604: if (ret != 0) return(ret);
605: }
606: }
607: if (uri != NULL) {
608: if (uri->path != NULL) xmlFree(uri->path);
609: if (cur != *str) {
610: if (uri->cleanup & 2)
611: uri->path = STRNDUP(*str, cur - *str);
612: else
613: uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
614: } else {
615: uri->path = NULL;
616: }
617: }
618: *str = cur;
619: return (0);
620: }
621:
622: /**
623: * xmlParse3986PathRootless:
624: * @uri: pointer to an URI structure
625: * @str: the string to analyze
626: *
627: * Parse an path without root and fills in the appropriate fields
628: * of the @uri structure
629: *
630: * path-rootless = segment-nz *( "/" segment )
631: *
632: * Returns 0 or the error code
633: */
634: static int
635: xmlParse3986PathRootless(xmlURIPtr uri, const char **str)
636: {
637: const char *cur;
638: int ret;
639:
640: cur = *str;
641:
642: ret = xmlParse3986Segment(&cur, 0, 0);
643: if (ret != 0) return(ret);
644: while (*cur == '/') {
645: cur++;
646: ret = xmlParse3986Segment(&cur, 0, 1);
647: if (ret != 0) return(ret);
648: }
649: if (uri != NULL) {
650: if (uri->path != NULL) xmlFree(uri->path);
651: if (cur != *str) {
652: if (uri->cleanup & 2)
653: uri->path = STRNDUP(*str, cur - *str);
654: else
655: uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
656: } else {
657: uri->path = NULL;
658: }
659: }
660: *str = cur;
661: return (0);
662: }
663:
664: /**
665: * xmlParse3986PathNoScheme:
666: * @uri: pointer to an URI structure
667: * @str: the string to analyze
668: *
669: * Parse an path which is not a scheme and fills in the appropriate fields
670: * of the @uri structure
671: *
672: * path-noscheme = segment-nz-nc *( "/" segment )
673: *
674: * Returns 0 or the error code
675: */
676: static int
677: xmlParse3986PathNoScheme(xmlURIPtr uri, const char **str)
678: {
679: const char *cur;
680: int ret;
681:
682: cur = *str;
683:
684: ret = xmlParse3986Segment(&cur, ':', 0);
685: if (ret != 0) return(ret);
686: while (*cur == '/') {
687: cur++;
688: ret = xmlParse3986Segment(&cur, 0, 1);
689: if (ret != 0) return(ret);
690: }
691: if (uri != NULL) {
692: if (uri->path != NULL) xmlFree(uri->path);
693: if (cur != *str) {
694: if (uri->cleanup & 2)
695: uri->path = STRNDUP(*str, cur - *str);
696: else
697: uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
698: } else {
699: uri->path = NULL;
700: }
701: }
702: *str = cur;
703: return (0);
704: }
705:
706: /**
707: * xmlParse3986HierPart:
708: * @uri: pointer to an URI structure
709: * @str: the string to analyze
710: *
711: * Parse an hierarchical part and fills in the appropriate fields
712: * of the @uri structure
713: *
714: * hier-part = "//" authority path-abempty
715: * / path-absolute
716: * / path-rootless
717: * / path-empty
718: *
719: * Returns 0 or the error code
720: */
721: static int
722: xmlParse3986HierPart(xmlURIPtr uri, const char **str)
723: {
724: const char *cur;
725: int ret;
726:
727: cur = *str;
728:
729: if ((*cur == '/') && (*(cur + 1) == '/')) {
730: cur += 2;
731: ret = xmlParse3986Authority(uri, &cur);
732: if (ret != 0) return(ret);
733: ret = xmlParse3986PathAbEmpty(uri, &cur);
734: if (ret != 0) return(ret);
735: *str = cur;
736: return(0);
737: } else if (*cur == '/') {
738: ret = xmlParse3986PathAbsolute(uri, &cur);
739: if (ret != 0) return(ret);
740: } else if (ISA_PCHAR(cur)) {
741: ret = xmlParse3986PathRootless(uri, &cur);
742: if (ret != 0) return(ret);
743: } else {
744: /* path-empty is effectively empty */
745: if (uri != NULL) {
746: if (uri->path != NULL) xmlFree(uri->path);
747: uri->path = NULL;
748: }
749: }
750: *str = cur;
751: return (0);
752: }
753:
754: /**
755: * xmlParse3986RelativeRef:
756: * @uri: pointer to an URI structure
757: * @str: the string to analyze
758: *
759: * Parse an URI string and fills in the appropriate fields
760: * of the @uri structure
761: *
762: * relative-ref = relative-part [ "?" query ] [ "#" fragment ]
763: * relative-part = "//" authority path-abempty
764: * / path-absolute
765: * / path-noscheme
766: * / path-empty
767: *
768: * Returns 0 or the error code
769: */
770: static int
771: xmlParse3986RelativeRef(xmlURIPtr uri, const char *str) {
772: int ret;
773:
774: if ((*str == '/') && (*(str + 1) == '/')) {
775: str += 2;
776: ret = xmlParse3986Authority(uri, &str);
777: if (ret != 0) return(ret);
778: ret = xmlParse3986PathAbEmpty(uri, &str);
779: if (ret != 0) return(ret);
780: } else if (*str == '/') {
781: ret = xmlParse3986PathAbsolute(uri, &str);
782: if (ret != 0) return(ret);
783: } else if (ISA_PCHAR(str)) {
784: ret = xmlParse3986PathNoScheme(uri, &str);
785: if (ret != 0) return(ret);
786: } else {
787: /* path-empty is effectively empty */
788: if (uri != NULL) {
789: if (uri->path != NULL) xmlFree(uri->path);
790: uri->path = NULL;
791: }
792: }
793:
794: if (*str == '?') {
795: str++;
796: ret = xmlParse3986Query(uri, &str);
797: if (ret != 0) return(ret);
798: }
799: if (*str == '#') {
800: str++;
801: ret = xmlParse3986Fragment(uri, &str);
802: if (ret != 0) return(ret);
803: }
804: if (*str != 0) {
805: xmlCleanURI(uri);
806: return(1);
807: }
808: return(0);
809: }
810:
811:
812: /**
813: * xmlParse3986URI:
814: * @uri: pointer to an URI structure
815: * @str: the string to analyze
816: *
817: * Parse an URI string and fills in the appropriate fields
818: * of the @uri structure
819: *
820: * scheme ":" hier-part [ "?" query ] [ "#" fragment ]
821: *
822: * Returns 0 or the error code
823: */
824: static int
825: xmlParse3986URI(xmlURIPtr uri, const char *str) {
826: int ret;
827:
828: ret = xmlParse3986Scheme(uri, &str);
829: if (ret != 0) return(ret);
830: if (*str != ':') {
831: return(1);
832: }
833: str++;
834: ret = xmlParse3986HierPart(uri, &str);
835: if (ret != 0) return(ret);
836: if (*str == '?') {
837: str++;
838: ret = xmlParse3986Query(uri, &str);
839: if (ret != 0) return(ret);
840: }
841: if (*str == '#') {
842: str++;
843: ret = xmlParse3986Fragment(uri, &str);
844: if (ret != 0) return(ret);
845: }
846: if (*str != 0) {
847: xmlCleanURI(uri);
848: return(1);
849: }
850: return(0);
851: }
852:
853: /**
854: * xmlParse3986URIReference:
855: * @uri: pointer to an URI structure
856: * @str: the string to analyze
857: *
858: * Parse an URI reference string and fills in the appropriate fields
859: * of the @uri structure
860: *
861: * URI-reference = URI / relative-ref
862: *
863: * Returns 0 or the error code
864: */
865: static int
866: xmlParse3986URIReference(xmlURIPtr uri, const char *str) {
867: int ret;
868:
869: if (str == NULL)
870: return(-1);
871: xmlCleanURI(uri);
872:
873: /*
874: * Try first to parse absolute refs, then fallback to relative if
875: * it fails.
876: */
877: ret = xmlParse3986URI(uri, str);
878: if (ret != 0) {
879: xmlCleanURI(uri);
880: ret = xmlParse3986RelativeRef(uri, str);
881: if (ret != 0) {
882: xmlCleanURI(uri);
883: return(ret);
884: }
885: }
886: return(0);
887: }
888:
889: /**
890: * xmlParseURI:
891: * @str: the URI string to analyze
892: *
893: * Parse an URI based on RFC 3986
894: *
895: * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
896: *
897: * Returns a newly built xmlURIPtr or NULL in case of error
898: */
899: xmlURIPtr
900: xmlParseURI(const char *str) {
901: xmlURIPtr uri;
902: int ret;
903:
904: if (str == NULL)
905: return(NULL);
906: uri = xmlCreateURI();
907: if (uri != NULL) {
908: ret = xmlParse3986URIReference(uri, str);
909: if (ret) {
910: xmlFreeURI(uri);
911: return(NULL);
912: }
913: }
914: return(uri);
915: }
916:
917: /**
918: * xmlParseURIReference:
919: * @uri: pointer to an URI structure
920: * @str: the string to analyze
921: *
922: * Parse an URI reference string based on RFC 3986 and fills in the
923: * appropriate fields of the @uri structure
924: *
925: * URI-reference = URI / relative-ref
926: *
927: * Returns 0 or the error code
928: */
929: int
930: xmlParseURIReference(xmlURIPtr uri, const char *str) {
931: return(xmlParse3986URIReference(uri, str));
932: }
933:
934: /**
935: * xmlParseURIRaw:
936: * @str: the URI string to analyze
937: * @raw: if 1 unescaping of URI pieces are disabled
938: *
939: * Parse an URI but allows to keep intact the original fragments.
940: *
941: * URI-reference = URI / relative-ref
942: *
943: * Returns a newly built xmlURIPtr or NULL in case of error
944: */
945: xmlURIPtr
946: xmlParseURIRaw(const char *str, int raw) {
947: xmlURIPtr uri;
948: int ret;
949:
950: if (str == NULL)
951: return(NULL);
952: uri = xmlCreateURI();
953: if (uri != NULL) {
954: if (raw) {
955: uri->cleanup |= 2;
956: }
957: ret = xmlParseURIReference(uri, str);
958: if (ret) {
959: xmlFreeURI(uri);
960: return(NULL);
961: }
962: }
963: return(uri);
964: }
965:
966: /************************************************************************
967: * *
968: * Generic URI structure functions *
969: * *
970: ************************************************************************/
971:
972: /**
973: * xmlCreateURI:
974: *
975: * Simply creates an empty xmlURI
976: *
977: * Returns the new structure or NULL in case of error
978: */
979: xmlURIPtr
980: xmlCreateURI(void) {
981: xmlURIPtr ret;
982:
983: ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
984: if (ret == NULL) {
985: xmlGenericError(xmlGenericErrorContext,
986: "xmlCreateURI: out of memory\n");
987: return(NULL);
988: }
989: memset(ret, 0, sizeof(xmlURI));
990: return(ret);
991: }
992:
993: /**
994: * xmlSaveUri:
995: * @uri: pointer to an xmlURI
996: *
997: * Save the URI as an escaped string
998: *
999: * Returns a new string (to be deallocated by caller)
1000: */
1001: xmlChar *
1002: xmlSaveUri(xmlURIPtr uri) {
1003: xmlChar *ret = NULL;
1004: xmlChar *temp;
1005: const char *p;
1006: int len;
1007: int max;
1008:
1009: if (uri == NULL) return(NULL);
1010:
1011:
1012: max = 80;
1013: ret = (xmlChar *) xmlMallocAtomic((max + 1) * sizeof(xmlChar));
1014: if (ret == NULL) {
1015: xmlGenericError(xmlGenericErrorContext,
1016: "xmlSaveUri: out of memory\n");
1017: return(NULL);
1018: }
1019: len = 0;
1020:
1021: if (uri->scheme != NULL) {
1022: p = uri->scheme;
1023: while (*p != 0) {
1024: if (len >= max) {
1025: max *= 2;
1026: temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1027: if (temp == NULL) {
1028: xmlGenericError(xmlGenericErrorContext,
1029: "xmlSaveUri: out of memory\n");
1030: xmlFree(ret);
1031: return(NULL);
1032: }
1033: ret = temp;
1034: }
1035: ret[len++] = *p++;
1036: }
1037: if (len >= max) {
1038: max *= 2;
1039: temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1040: if (temp == NULL) {
1041: xmlGenericError(xmlGenericErrorContext,
1042: "xmlSaveUri: out of memory\n");
1043: xmlFree(ret);
1044: return(NULL);
1045: }
1046: ret = temp;
1047: }
1048: ret[len++] = ':';
1049: }
1050: if (uri->opaque != NULL) {
1051: p = uri->opaque;
1052: while (*p != 0) {
1053: if (len + 3 >= max) {
1054: max *= 2;
1055: temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1056: if (temp == NULL) {
1057: xmlGenericError(xmlGenericErrorContext,
1058: "xmlSaveUri: out of memory\n");
1059: xmlFree(ret);
1060: return(NULL);
1061: }
1062: ret = temp;
1063: }
1064: if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p)))
1065: ret[len++] = *p++;
1066: else {
1067: int val = *(unsigned char *)p++;
1068: int hi = val / 0x10, lo = val % 0x10;
1069: ret[len++] = '%';
1070: ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1071: ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1072: }
1073: }
1074: } else {
1075: if (uri->server != NULL) {
1076: if (len + 3 >= max) {
1077: max *= 2;
1078: temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1079: if (temp == NULL) {
1080: xmlGenericError(xmlGenericErrorContext,
1081: "xmlSaveUri: out of memory\n");
1082: xmlFree(ret);
1083: return(NULL);
1084: }
1085: ret = temp;
1086: }
1087: ret[len++] = '/';
1088: ret[len++] = '/';
1089: if (uri->user != NULL) {
1090: p = uri->user;
1091: while (*p != 0) {
1092: if (len + 3 >= max) {
1093: max *= 2;
1094: temp = (xmlChar *) xmlRealloc(ret,
1095: (max + 1) * sizeof(xmlChar));
1096: if (temp == NULL) {
1097: xmlGenericError(xmlGenericErrorContext,
1098: "xmlSaveUri: out of memory\n");
1099: xmlFree(ret);
1100: return(NULL);
1101: }
1102: ret = temp;
1103: }
1104: if ((IS_UNRESERVED(*(p))) ||
1105: ((*(p) == ';')) || ((*(p) == ':')) ||
1106: ((*(p) == '&')) || ((*(p) == '=')) ||
1107: ((*(p) == '+')) || ((*(p) == '$')) ||
1108: ((*(p) == ',')))
1109: ret[len++] = *p++;
1110: else {
1111: int val = *(unsigned char *)p++;
1112: int hi = val / 0x10, lo = val % 0x10;
1113: ret[len++] = '%';
1114: ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1115: ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1116: }
1117: }
1118: if (len + 3 >= max) {
1119: max *= 2;
1120: temp = (xmlChar *) xmlRealloc(ret,
1121: (max + 1) * sizeof(xmlChar));
1122: if (temp == NULL) {
1123: xmlGenericError(xmlGenericErrorContext,
1124: "xmlSaveUri: out of memory\n");
1125: xmlFree(ret);
1126: return(NULL);
1127: }
1128: ret = temp;
1129: }
1130: ret[len++] = '@';
1131: }
1132: p = uri->server;
1133: while (*p != 0) {
1134: if (len >= max) {
1135: max *= 2;
1136: temp = (xmlChar *) xmlRealloc(ret,
1137: (max + 1) * sizeof(xmlChar));
1138: if (temp == NULL) {
1139: xmlGenericError(xmlGenericErrorContext,
1140: "xmlSaveUri: out of memory\n");
1141: xmlFree(ret);
1142: return(NULL);
1143: }
1144: ret = temp;
1145: }
1146: ret[len++] = *p++;
1147: }
1148: if (uri->port > 0) {
1149: if (len + 10 >= max) {
1150: max *= 2;
1151: temp = (xmlChar *) xmlRealloc(ret,
1152: (max + 1) * sizeof(xmlChar));
1153: if (temp == NULL) {
1154: xmlGenericError(xmlGenericErrorContext,
1155: "xmlSaveUri: out of memory\n");
1156: xmlFree(ret);
1157: return(NULL);
1158: }
1159: ret = temp;
1160: }
1161: len += snprintf((char *) &ret[len], max - len, ":%d", uri->port);
1162: }
1163: } else if (uri->authority != NULL) {
1164: if (len + 3 >= max) {
1165: max *= 2;
1166: temp = (xmlChar *) xmlRealloc(ret,
1167: (max + 1) * sizeof(xmlChar));
1168: if (temp == NULL) {
1169: xmlGenericError(xmlGenericErrorContext,
1170: "xmlSaveUri: out of memory\n");
1171: xmlFree(ret);
1172: return(NULL);
1173: }
1174: ret = temp;
1175: }
1176: ret[len++] = '/';
1177: ret[len++] = '/';
1178: p = uri->authority;
1179: while (*p != 0) {
1180: if (len + 3 >= max) {
1181: max *= 2;
1182: temp = (xmlChar *) xmlRealloc(ret,
1183: (max + 1) * sizeof(xmlChar));
1184: if (temp == NULL) {
1185: xmlGenericError(xmlGenericErrorContext,
1186: "xmlSaveUri: out of memory\n");
1187: xmlFree(ret);
1188: return(NULL);
1189: }
1190: ret = temp;
1191: }
1192: if ((IS_UNRESERVED(*(p))) ||
1193: ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
1194: ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1195: ((*(p) == '=')) || ((*(p) == '+')))
1196: ret[len++] = *p++;
1197: else {
1198: int val = *(unsigned char *)p++;
1199: int hi = val / 0x10, lo = val % 0x10;
1200: ret[len++] = '%';
1201: ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1202: ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1203: }
1204: }
1205: } else if (uri->scheme != NULL) {
1206: if (len + 3 >= max) {
1207: max *= 2;
1208: temp = (xmlChar *) xmlRealloc(ret,
1209: (max + 1) * sizeof(xmlChar));
1210: if (temp == NULL) {
1211: xmlGenericError(xmlGenericErrorContext,
1212: "xmlSaveUri: out of memory\n");
1213: xmlFree(ret);
1214: return(NULL);
1215: }
1216: ret = temp;
1217: }
1218: ret[len++] = '/';
1219: ret[len++] = '/';
1220: }
1221: if (uri->path != NULL) {
1222: p = uri->path;
1223: /*
1224: * the colon in file:///d: should not be escaped or
1225: * Windows accesses fail later.
1226: */
1227: if ((uri->scheme != NULL) &&
1228: (p[0] == '/') &&
1229: (((p[1] >= 'a') && (p[1] <= 'z')) ||
1230: ((p[1] >= 'A') && (p[1] <= 'Z'))) &&
1231: (p[2] == ':') &&
1232: (xmlStrEqual(BAD_CAST uri->scheme, BAD_CAST "file"))) {
1233: if (len + 3 >= max) {
1234: max *= 2;
1235: ret = (xmlChar *) xmlRealloc(ret,
1236: (max + 1) * sizeof(xmlChar));
1237: if (ret == NULL) {
1238: xmlGenericError(xmlGenericErrorContext,
1239: "xmlSaveUri: out of memory\n");
1240: return(NULL);
1241: }
1242: }
1243: ret[len++] = *p++;
1244: ret[len++] = *p++;
1245: ret[len++] = *p++;
1246: }
1247: while (*p != 0) {
1248: if (len + 3 >= max) {
1249: max *= 2;
1250: temp = (xmlChar *) xmlRealloc(ret,
1251: (max + 1) * sizeof(xmlChar));
1252: if (temp == NULL) {
1253: xmlGenericError(xmlGenericErrorContext,
1254: "xmlSaveUri: out of memory\n");
1255: xmlFree(ret);
1256: return(NULL);
1257: }
1258: ret = temp;
1259: }
1260: if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
1261: ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1262: ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
1263: ((*(p) == ',')))
1264: ret[len++] = *p++;
1265: else {
1266: int val = *(unsigned char *)p++;
1267: int hi = val / 0x10, lo = val % 0x10;
1268: ret[len++] = '%';
1269: ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1270: ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1271: }
1272: }
1273: }
1274: if (uri->query_raw != NULL) {
1275: if (len + 1 >= max) {
1276: max *= 2;
1277: temp = (xmlChar *) xmlRealloc(ret,
1278: (max + 1) * sizeof(xmlChar));
1279: if (temp == NULL) {
1280: xmlGenericError(xmlGenericErrorContext,
1281: "xmlSaveUri: out of memory\n");
1282: xmlFree(ret);
1283: return(NULL);
1284: }
1285: ret = temp;
1286: }
1287: ret[len++] = '?';
1288: p = uri->query_raw;
1289: while (*p != 0) {
1290: if (len + 1 >= max) {
1291: max *= 2;
1292: temp = (xmlChar *) xmlRealloc(ret,
1293: (max + 1) * sizeof(xmlChar));
1294: if (temp == NULL) {
1295: xmlGenericError(xmlGenericErrorContext,
1296: "xmlSaveUri: out of memory\n");
1297: xmlFree(ret);
1298: return(NULL);
1299: }
1300: ret = temp;
1301: }
1302: ret[len++] = *p++;
1303: }
1304: } else if (uri->query != NULL) {
1305: if (len + 3 >= max) {
1306: max *= 2;
1307: temp = (xmlChar *) xmlRealloc(ret,
1308: (max + 1) * sizeof(xmlChar));
1309: if (temp == NULL) {
1310: xmlGenericError(xmlGenericErrorContext,
1311: "xmlSaveUri: out of memory\n");
1312: xmlFree(ret);
1313: return(NULL);
1314: }
1315: ret = temp;
1316: }
1317: ret[len++] = '?';
1318: p = uri->query;
1319: while (*p != 0) {
1320: if (len + 3 >= max) {
1321: max *= 2;
1322: temp = (xmlChar *) xmlRealloc(ret,
1323: (max + 1) * sizeof(xmlChar));
1324: if (temp == NULL) {
1325: xmlGenericError(xmlGenericErrorContext,
1326: "xmlSaveUri: out of memory\n");
1327: xmlFree(ret);
1328: return(NULL);
1329: }
1330: ret = temp;
1331: }
1332: if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1333: ret[len++] = *p++;
1334: else {
1335: int val = *(unsigned char *)p++;
1336: int hi = val / 0x10, lo = val % 0x10;
1337: ret[len++] = '%';
1338: ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1339: ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1340: }
1341: }
1342: }
1343: }
1344: if (uri->fragment != NULL) {
1345: if (len + 3 >= max) {
1346: max *= 2;
1347: temp = (xmlChar *) xmlRealloc(ret,
1348: (max + 1) * sizeof(xmlChar));
1349: if (temp == NULL) {
1350: xmlGenericError(xmlGenericErrorContext,
1351: "xmlSaveUri: out of memory\n");
1352: xmlFree(ret);
1353: return(NULL);
1354: }
1355: ret = temp;
1356: }
1357: ret[len++] = '#';
1358: p = uri->fragment;
1359: while (*p != 0) {
1360: if (len + 3 >= max) {
1361: max *= 2;
1362: temp = (xmlChar *) xmlRealloc(ret,
1363: (max + 1) * sizeof(xmlChar));
1364: if (temp == NULL) {
1365: xmlGenericError(xmlGenericErrorContext,
1366: "xmlSaveUri: out of memory\n");
1367: xmlFree(ret);
1368: return(NULL);
1369: }
1370: ret = temp;
1371: }
1372: if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1373: ret[len++] = *p++;
1374: else {
1375: int val = *(unsigned char *)p++;
1376: int hi = val / 0x10, lo = val % 0x10;
1377: ret[len++] = '%';
1378: ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1379: ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1380: }
1381: }
1382: }
1383: if (len >= max) {
1384: max *= 2;
1385: temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1386: if (temp == NULL) {
1387: xmlGenericError(xmlGenericErrorContext,
1388: "xmlSaveUri: out of memory\n");
1389: xmlFree(ret);
1390: return(NULL);
1391: }
1392: ret = temp;
1393: }
1394: ret[len] = 0;
1395: return(ret);
1396: }
1397:
1398: /**
1399: * xmlPrintURI:
1400: * @stream: a FILE* for the output
1401: * @uri: pointer to an xmlURI
1402: *
1403: * Prints the URI in the stream @stream.
1404: */
1405: void
1406: xmlPrintURI(FILE *stream, xmlURIPtr uri) {
1407: xmlChar *out;
1408:
1409: out = xmlSaveUri(uri);
1410: if (out != NULL) {
1411: fprintf(stream, "%s", (char *) out);
1412: xmlFree(out);
1413: }
1414: }
1415:
1416: /**
1417: * xmlCleanURI:
1418: * @uri: pointer to an xmlURI
1419: *
1420: * Make sure the xmlURI struct is free of content
1421: */
1422: static void
1423: xmlCleanURI(xmlURIPtr uri) {
1424: if (uri == NULL) return;
1425:
1426: if (uri->scheme != NULL) xmlFree(uri->scheme);
1427: uri->scheme = NULL;
1428: if (uri->server != NULL) xmlFree(uri->server);
1429: uri->server = NULL;
1430: if (uri->user != NULL) xmlFree(uri->user);
1431: uri->user = NULL;
1432: if (uri->path != NULL) xmlFree(uri->path);
1433: uri->path = NULL;
1434: if (uri->fragment != NULL) xmlFree(uri->fragment);
1435: uri->fragment = NULL;
1436: if (uri->opaque != NULL) xmlFree(uri->opaque);
1437: uri->opaque = NULL;
1438: if (uri->authority != NULL) xmlFree(uri->authority);
1439: uri->authority = NULL;
1440: if (uri->query != NULL) xmlFree(uri->query);
1441: uri->query = NULL;
1442: if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1443: uri->query_raw = NULL;
1444: }
1445:
1446: /**
1447: * xmlFreeURI:
1448: * @uri: pointer to an xmlURI
1449: *
1450: * Free up the xmlURI struct
1451: */
1452: void
1453: xmlFreeURI(xmlURIPtr uri) {
1454: if (uri == NULL) return;
1455:
1456: if (uri->scheme != NULL) xmlFree(uri->scheme);
1457: if (uri->server != NULL) xmlFree(uri->server);
1458: if (uri->user != NULL) xmlFree(uri->user);
1459: if (uri->path != NULL) xmlFree(uri->path);
1460: if (uri->fragment != NULL) xmlFree(uri->fragment);
1461: if (uri->opaque != NULL) xmlFree(uri->opaque);
1462: if (uri->authority != NULL) xmlFree(uri->authority);
1463: if (uri->query != NULL) xmlFree(uri->query);
1464: if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1465: xmlFree(uri);
1466: }
1467:
1468: /************************************************************************
1469: * *
1470: * Helper functions *
1471: * *
1472: ************************************************************************/
1473:
1474: /**
1475: * xmlNormalizeURIPath:
1476: * @path: pointer to the path string
1477: *
1478: * Applies the 5 normalization steps to a path string--that is, RFC 2396
1479: * Section 5.2, steps 6.c through 6.g.
1480: *
1481: * Normalization occurs directly on the string, no new allocation is done
1482: *
1483: * Returns 0 or an error code
1484: */
1485: int
1486: xmlNormalizeURIPath(char *path) {
1487: char *cur, *out;
1488:
1489: if (path == NULL)
1490: return(-1);
1491:
1492: /* Skip all initial "/" chars. We want to get to the beginning of the
1493: * first non-empty segment.
1494: */
1495: cur = path;
1496: while (cur[0] == '/')
1497: ++cur;
1498: if (cur[0] == '\0')
1499: return(0);
1500:
1501: /* Keep everything we've seen so far. */
1502: out = cur;
1503:
1504: /*
1505: * Analyze each segment in sequence for cases (c) and (d).
1506: */
1507: while (cur[0] != '\0') {
1508: /*
1509: * c) All occurrences of "./", where "." is a complete path segment,
1510: * are removed from the buffer string.
1511: */
1512: if ((cur[0] == '.') && (cur[1] == '/')) {
1513: cur += 2;
1514: /* '//' normalization should be done at this point too */
1515: while (cur[0] == '/')
1516: cur++;
1517: continue;
1518: }
1519:
1520: /*
1521: * d) If the buffer string ends with "." as a complete path segment,
1522: * that "." is removed.
1523: */
1524: if ((cur[0] == '.') && (cur[1] == '\0'))
1525: break;
1526:
1527: /* Otherwise keep the segment. */
1528: while (cur[0] != '/') {
1529: if (cur[0] == '\0')
1530: goto done_cd;
1531: (out++)[0] = (cur++)[0];
1532: }
1533: /* nomalize // */
1534: while ((cur[0] == '/') && (cur[1] == '/'))
1535: cur++;
1536:
1537: (out++)[0] = (cur++)[0];
1538: }
1539: done_cd:
1540: out[0] = '\0';
1541:
1542: /* Reset to the beginning of the first segment for the next sequence. */
1543: cur = path;
1544: while (cur[0] == '/')
1545: ++cur;
1546: if (cur[0] == '\0')
1547: return(0);
1548:
1549: /*
1550: * Analyze each segment in sequence for cases (e) and (f).
1551: *
1552: * e) All occurrences of "<segment>/../", where <segment> is a
1553: * complete path segment not equal to "..", are removed from the
1554: * buffer string. Removal of these path segments is performed
1555: * iteratively, removing the leftmost matching pattern on each
1556: * iteration, until no matching pattern remains.
1557: *
1558: * f) If the buffer string ends with "<segment>/..", where <segment>
1559: * is a complete path segment not equal to "..", that
1560: * "<segment>/.." is removed.
1561: *
1562: * To satisfy the "iterative" clause in (e), we need to collapse the
1563: * string every time we find something that needs to be removed. Thus,
1564: * we don't need to keep two pointers into the string: we only need a
1565: * "current position" pointer.
1566: */
1567: while (1) {
1568: char *segp, *tmp;
1569:
1570: /* At the beginning of each iteration of this loop, "cur" points to
1571: * the first character of the segment we want to examine.
1572: */
1573:
1574: /* Find the end of the current segment. */
1575: segp = cur;
1576: while ((segp[0] != '/') && (segp[0] != '\0'))
1577: ++segp;
1578:
1579: /* If this is the last segment, we're done (we need at least two
1580: * segments to meet the criteria for the (e) and (f) cases).
1581: */
1582: if (segp[0] == '\0')
1583: break;
1584:
1585: /* If the first segment is "..", or if the next segment _isn't_ "..",
1586: * keep this segment and try the next one.
1587: */
1588: ++segp;
1589: if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
1590: || ((segp[0] != '.') || (segp[1] != '.')
1591: || ((segp[2] != '/') && (segp[2] != '\0')))) {
1592: cur = segp;
1593: continue;
1594: }
1595:
1596: /* If we get here, remove this segment and the next one and back up
1597: * to the previous segment (if there is one), to implement the
1598: * "iteratively" clause. It's pretty much impossible to back up
1599: * while maintaining two pointers into the buffer, so just compact
1600: * the whole buffer now.
1601: */
1602:
1603: /* If this is the end of the buffer, we're done. */
1604: if (segp[2] == '\0') {
1605: cur[0] = '\0';
1606: break;
1607: }
1608: /* Valgrind complained, strcpy(cur, segp + 3); */
1.1.1.2 ! misho 1609: /* string will overlap, do not use strcpy */
! 1610: tmp = cur;
! 1611: segp += 3;
! 1612: while ((*tmp++ = *segp++) != 0)
! 1613: ;
1.1 misho 1614:
1615: /* If there are no previous segments, then keep going from here. */
1616: segp = cur;
1617: while ((segp > path) && ((--segp)[0] == '/'))
1618: ;
1619: if (segp == path)
1620: continue;
1621:
1622: /* "segp" is pointing to the end of a previous segment; find it's
1623: * start. We need to back up to the previous segment and start
1624: * over with that to handle things like "foo/bar/../..". If we
1625: * don't do this, then on the first pass we'll remove the "bar/..",
1626: * but be pointing at the second ".." so we won't realize we can also
1627: * remove the "foo/..".
1628: */
1629: cur = segp;
1630: while ((cur > path) && (cur[-1] != '/'))
1631: --cur;
1632: }
1633: out[0] = '\0';
1634:
1635: /*
1636: * g) If the resulting buffer string still begins with one or more
1637: * complete path segments of "..", then the reference is
1638: * considered to be in error. Implementations may handle this
1639: * error by retaining these components in the resolved path (i.e.,
1640: * treating them as part of the final URI), by removing them from
1641: * the resolved path (i.e., discarding relative levels above the
1642: * root), or by avoiding traversal of the reference.
1643: *
1644: * We discard them from the final path.
1645: */
1646: if (path[0] == '/') {
1647: cur = path;
1648: while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.')
1649: && ((cur[3] == '/') || (cur[3] == '\0')))
1650: cur += 3;
1651:
1652: if (cur != path) {
1653: out = path;
1654: while (cur[0] != '\0')
1655: (out++)[0] = (cur++)[0];
1656: out[0] = 0;
1657: }
1658: }
1659:
1660: return(0);
1661: }
1662:
1663: static int is_hex(char c) {
1664: if (((c >= '0') && (c <= '9')) ||
1665: ((c >= 'a') && (c <= 'f')) ||
1666: ((c >= 'A') && (c <= 'F')))
1667: return(1);
1668: return(0);
1669: }
1670:
1671: /**
1672: * xmlURIUnescapeString:
1673: * @str: the string to unescape
1674: * @len: the length in bytes to unescape (or <= 0 to indicate full string)
1675: * @target: optional destination buffer
1676: *
1677: * Unescaping routine, but does not check that the string is an URI. The
1678: * output is a direct unsigned char translation of %XX values (no encoding)
1679: * Note that the length of the result can only be smaller or same size as
1680: * the input string.
1681: *
1682: * Returns a copy of the string, but unescaped, will return NULL only in case
1683: * of error
1684: */
1685: char *
1686: xmlURIUnescapeString(const char *str, int len, char *target) {
1687: char *ret, *out;
1688: const char *in;
1689:
1690: if (str == NULL)
1691: return(NULL);
1692: if (len <= 0) len = strlen(str);
1693: if (len < 0) return(NULL);
1694:
1695: if (target == NULL) {
1696: ret = (char *) xmlMallocAtomic(len + 1);
1697: if (ret == NULL) {
1698: xmlGenericError(xmlGenericErrorContext,
1699: "xmlURIUnescapeString: out of memory\n");
1700: return(NULL);
1701: }
1702: } else
1703: ret = target;
1704: in = str;
1705: out = ret;
1706: while(len > 0) {
1707: if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
1708: in++;
1709: if ((*in >= '0') && (*in <= '9'))
1710: *out = (*in - '0');
1711: else if ((*in >= 'a') && (*in <= 'f'))
1712: *out = (*in - 'a') + 10;
1713: else if ((*in >= 'A') && (*in <= 'F'))
1714: *out = (*in - 'A') + 10;
1715: in++;
1716: if ((*in >= '0') && (*in <= '9'))
1717: *out = *out * 16 + (*in - '0');
1718: else if ((*in >= 'a') && (*in <= 'f'))
1719: *out = *out * 16 + (*in - 'a') + 10;
1720: else if ((*in >= 'A') && (*in <= 'F'))
1721: *out = *out * 16 + (*in - 'A') + 10;
1722: in++;
1723: len -= 3;
1724: out++;
1725: } else {
1726: *out++ = *in++;
1727: len--;
1728: }
1729: }
1730: *out = 0;
1731: return(ret);
1732: }
1733:
1734: /**
1735: * xmlURIEscapeStr:
1736: * @str: string to escape
1737: * @list: exception list string of chars not to escape
1738: *
1739: * This routine escapes a string to hex, ignoring reserved characters (a-z)
1740: * and the characters in the exception list.
1741: *
1742: * Returns a new escaped string or NULL in case of error.
1743: */
1744: xmlChar *
1745: xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
1746: xmlChar *ret, ch;
1747: xmlChar *temp;
1748: const xmlChar *in;
1749:
1750: unsigned int len, out;
1751:
1752: if (str == NULL)
1753: return(NULL);
1754: if (str[0] == 0)
1755: return(xmlStrdup(str));
1756: len = xmlStrlen(str);
1757: if (!(len > 0)) return(NULL);
1758:
1759: len += 20;
1760: ret = (xmlChar *) xmlMallocAtomic(len);
1761: if (ret == NULL) {
1762: xmlGenericError(xmlGenericErrorContext,
1763: "xmlURIEscapeStr: out of memory\n");
1764: return(NULL);
1765: }
1766: in = (const xmlChar *) str;
1767: out = 0;
1768: while(*in != 0) {
1769: if (len - out <= 3) {
1770: len += 20;
1771: temp = (xmlChar *) xmlRealloc(ret, len);
1772: if (temp == NULL) {
1773: xmlGenericError(xmlGenericErrorContext,
1774: "xmlURIEscapeStr: out of memory\n");
1775: xmlFree(ret);
1776: return(NULL);
1777: }
1778: ret = temp;
1779: }
1780:
1781: ch = *in;
1782:
1783: if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
1784: unsigned char val;
1785: ret[out++] = '%';
1786: val = ch >> 4;
1787: if (val <= 9)
1788: ret[out++] = '0' + val;
1789: else
1790: ret[out++] = 'A' + val - 0xA;
1791: val = ch & 0xF;
1792: if (val <= 9)
1793: ret[out++] = '0' + val;
1794: else
1795: ret[out++] = 'A' + val - 0xA;
1796: in++;
1797: } else {
1798: ret[out++] = *in++;
1799: }
1800:
1801: }
1802: ret[out] = 0;
1803: return(ret);
1804: }
1805:
1806: /**
1807: * xmlURIEscape:
1808: * @str: the string of the URI to escape
1809: *
1810: * Escaping routine, does not do validity checks !
1811: * It will try to escape the chars needing this, but this is heuristic
1812: * based it's impossible to be sure.
1813: *
1814: * Returns an copy of the string, but escaped
1815: *
1816: * 25 May 2001
1817: * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
1818: * according to RFC2396.
1819: * - Carl Douglas
1820: */
1821: xmlChar *
1822: xmlURIEscape(const xmlChar * str)
1823: {
1824: xmlChar *ret, *segment = NULL;
1825: xmlURIPtr uri;
1826: int ret2;
1827:
1828: #define NULLCHK(p) if(!p) { \
1829: xmlGenericError(xmlGenericErrorContext, \
1830: "xmlURIEscape: out of memory\n"); \
1831: xmlFreeURI(uri); \
1832: return NULL; } \
1833:
1834: if (str == NULL)
1835: return (NULL);
1836:
1837: uri = xmlCreateURI();
1838: if (uri != NULL) {
1839: /*
1840: * Allow escaping errors in the unescaped form
1841: */
1842: uri->cleanup = 1;
1843: ret2 = xmlParseURIReference(uri, (const char *)str);
1844: if (ret2) {
1845: xmlFreeURI(uri);
1846: return (NULL);
1847: }
1848: }
1849:
1850: if (!uri)
1851: return NULL;
1852:
1853: ret = NULL;
1854:
1855: if (uri->scheme) {
1856: segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");
1857: NULLCHK(segment)
1858: ret = xmlStrcat(ret, segment);
1859: ret = xmlStrcat(ret, BAD_CAST ":");
1860: xmlFree(segment);
1861: }
1862:
1863: if (uri->authority) {
1864: segment =
1865: xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");
1866: NULLCHK(segment)
1867: ret = xmlStrcat(ret, BAD_CAST "//");
1868: ret = xmlStrcat(ret, segment);
1869: xmlFree(segment);
1870: }
1871:
1872: if (uri->user) {
1873: segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");
1874: NULLCHK(segment)
1875: ret = xmlStrcat(ret,BAD_CAST "//");
1876: ret = xmlStrcat(ret, segment);
1877: ret = xmlStrcat(ret, BAD_CAST "@");
1878: xmlFree(segment);
1879: }
1880:
1881: if (uri->server) {
1882: segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");
1883: NULLCHK(segment)
1884: if (uri->user == NULL)
1885: ret = xmlStrcat(ret, BAD_CAST "//");
1886: ret = xmlStrcat(ret, segment);
1887: xmlFree(segment);
1888: }
1889:
1890: if (uri->port) {
1891: xmlChar port[10];
1892:
1893: snprintf((char *) port, 10, "%d", uri->port);
1894: ret = xmlStrcat(ret, BAD_CAST ":");
1895: ret = xmlStrcat(ret, port);
1896: }
1897:
1898: if (uri->path) {
1899: segment =
1900: xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
1901: NULLCHK(segment)
1902: ret = xmlStrcat(ret, segment);
1903: xmlFree(segment);
1904: }
1905:
1906: if (uri->query_raw) {
1907: ret = xmlStrcat(ret, BAD_CAST "?");
1908: ret = xmlStrcat(ret, BAD_CAST uri->query_raw);
1909: }
1910: else if (uri->query) {
1911: segment =
1912: xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
1913: NULLCHK(segment)
1914: ret = xmlStrcat(ret, BAD_CAST "?");
1915: ret = xmlStrcat(ret, segment);
1916: xmlFree(segment);
1917: }
1918:
1919: if (uri->opaque) {
1920: segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");
1921: NULLCHK(segment)
1922: ret = xmlStrcat(ret, segment);
1923: xmlFree(segment);
1924: }
1925:
1926: if (uri->fragment) {
1927: segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");
1928: NULLCHK(segment)
1929: ret = xmlStrcat(ret, BAD_CAST "#");
1930: ret = xmlStrcat(ret, segment);
1931: xmlFree(segment);
1932: }
1933:
1934: xmlFreeURI(uri);
1935: #undef NULLCHK
1936:
1937: return (ret);
1938: }
1939:
1940: /************************************************************************
1941: * *
1942: * Public functions *
1943: * *
1944: ************************************************************************/
1945:
1946: /**
1947: * xmlBuildURI:
1948: * @URI: the URI instance found in the document
1949: * @base: the base value
1950: *
1951: * Computes he final URI of the reference done by checking that
1952: * the given URI is valid, and building the final URI using the
1953: * base URI. This is processed according to section 5.2 of the
1954: * RFC 2396
1955: *
1956: * 5.2. Resolving Relative References to Absolute Form
1957: *
1958: * Returns a new URI string (to be freed by the caller) or NULL in case
1959: * of error.
1960: */
1961: xmlChar *
1962: xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
1963: xmlChar *val = NULL;
1964: int ret, len, indx, cur, out;
1965: xmlURIPtr ref = NULL;
1966: xmlURIPtr bas = NULL;
1967: xmlURIPtr res = NULL;
1968:
1969: /*
1970: * 1) The URI reference is parsed into the potential four components and
1971: * fragment identifier, as described in Section 4.3.
1972: *
1973: * NOTE that a completely empty URI is treated by modern browsers
1974: * as a reference to "." rather than as a synonym for the current
1975: * URI. Should we do that here?
1976: */
1977: if (URI == NULL)
1978: ret = -1;
1979: else {
1980: if (*URI) {
1981: ref = xmlCreateURI();
1982: if (ref == NULL)
1983: goto done;
1984: ret = xmlParseURIReference(ref, (const char *) URI);
1985: }
1986: else
1987: ret = 0;
1988: }
1989: if (ret != 0)
1990: goto done;
1991: if ((ref != NULL) && (ref->scheme != NULL)) {
1992: /*
1993: * The URI is absolute don't modify.
1994: */
1995: val = xmlStrdup(URI);
1996: goto done;
1997: }
1998: if (base == NULL)
1999: ret = -1;
2000: else {
2001: bas = xmlCreateURI();
2002: if (bas == NULL)
2003: goto done;
2004: ret = xmlParseURIReference(bas, (const char *) base);
2005: }
2006: if (ret != 0) {
2007: if (ref)
2008: val = xmlSaveUri(ref);
2009: goto done;
2010: }
2011: if (ref == NULL) {
2012: /*
2013: * the base fragment must be ignored
2014: */
2015: if (bas->fragment != NULL) {
2016: xmlFree(bas->fragment);
2017: bas->fragment = NULL;
2018: }
2019: val = xmlSaveUri(bas);
2020: goto done;
2021: }
2022:
2023: /*
2024: * 2) If the path component is empty and the scheme, authority, and
2025: * query components are undefined, then it is a reference to the
2026: * current document and we are done. Otherwise, the reference URI's
2027: * query and fragment components are defined as found (or not found)
2028: * within the URI reference and not inherited from the base URI.
2029: *
2030: * NOTE that in modern browsers, the parsing differs from the above
2031: * in the following aspect: the query component is allowed to be
2032: * defined while still treating this as a reference to the current
2033: * document.
2034: */
2035: res = xmlCreateURI();
2036: if (res == NULL)
2037: goto done;
2038: if ((ref->scheme == NULL) && (ref->path == NULL) &&
2039: ((ref->authority == NULL) && (ref->server == NULL))) {
2040: if (bas->scheme != NULL)
2041: res->scheme = xmlMemStrdup(bas->scheme);
2042: if (bas->authority != NULL)
2043: res->authority = xmlMemStrdup(bas->authority);
2044: else if (bas->server != NULL) {
2045: res->server = xmlMemStrdup(bas->server);
2046: if (bas->user != NULL)
2047: res->user = xmlMemStrdup(bas->user);
2048: res->port = bas->port;
2049: }
2050: if (bas->path != NULL)
2051: res->path = xmlMemStrdup(bas->path);
2052: if (ref->query_raw != NULL)
2053: res->query_raw = xmlMemStrdup (ref->query_raw);
2054: else if (ref->query != NULL)
2055: res->query = xmlMemStrdup(ref->query);
2056: else if (bas->query_raw != NULL)
2057: res->query_raw = xmlMemStrdup(bas->query_raw);
2058: else if (bas->query != NULL)
2059: res->query = xmlMemStrdup(bas->query);
2060: if (ref->fragment != NULL)
2061: res->fragment = xmlMemStrdup(ref->fragment);
2062: goto step_7;
2063: }
2064:
2065: /*
2066: * 3) If the scheme component is defined, indicating that the reference
2067: * starts with a scheme name, then the reference is interpreted as an
2068: * absolute URI and we are done. Otherwise, the reference URI's
2069: * scheme is inherited from the base URI's scheme component.
2070: */
2071: if (ref->scheme != NULL) {
2072: val = xmlSaveUri(ref);
2073: goto done;
2074: }
2075: if (bas->scheme != NULL)
2076: res->scheme = xmlMemStrdup(bas->scheme);
2077:
2078: if (ref->query_raw != NULL)
2079: res->query_raw = xmlMemStrdup(ref->query_raw);
2080: else if (ref->query != NULL)
2081: res->query = xmlMemStrdup(ref->query);
2082: if (ref->fragment != NULL)
2083: res->fragment = xmlMemStrdup(ref->fragment);
2084:
2085: /*
2086: * 4) If the authority component is defined, then the reference is a
2087: * network-path and we skip to step 7. Otherwise, the reference
2088: * URI's authority is inherited from the base URI's authority
2089: * component, which will also be undefined if the URI scheme does not
2090: * use an authority component.
2091: */
2092: if ((ref->authority != NULL) || (ref->server != NULL)) {
2093: if (ref->authority != NULL)
2094: res->authority = xmlMemStrdup(ref->authority);
2095: else {
2096: res->server = xmlMemStrdup(ref->server);
2097: if (ref->user != NULL)
2098: res->user = xmlMemStrdup(ref->user);
2099: res->port = ref->port;
2100: }
2101: if (ref->path != NULL)
2102: res->path = xmlMemStrdup(ref->path);
2103: goto step_7;
2104: }
2105: if (bas->authority != NULL)
2106: res->authority = xmlMemStrdup(bas->authority);
2107: else if (bas->server != NULL) {
2108: res->server = xmlMemStrdup(bas->server);
2109: if (bas->user != NULL)
2110: res->user = xmlMemStrdup(bas->user);
2111: res->port = bas->port;
2112: }
2113:
2114: /*
2115: * 5) If the path component begins with a slash character ("/"), then
2116: * the reference is an absolute-path and we skip to step 7.
2117: */
2118: if ((ref->path != NULL) && (ref->path[0] == '/')) {
2119: res->path = xmlMemStrdup(ref->path);
2120: goto step_7;
2121: }
2122:
2123:
2124: /*
2125: * 6) If this step is reached, then we are resolving a relative-path
2126: * reference. The relative path needs to be merged with the base
2127: * URI's path. Although there are many ways to do this, we will
2128: * describe a simple method using a separate string buffer.
2129: *
2130: * Allocate a buffer large enough for the result string.
2131: */
2132: len = 2; /* extra / and 0 */
2133: if (ref->path != NULL)
2134: len += strlen(ref->path);
2135: if (bas->path != NULL)
2136: len += strlen(bas->path);
2137: res->path = (char *) xmlMallocAtomic(len);
2138: if (res->path == NULL) {
2139: xmlGenericError(xmlGenericErrorContext,
2140: "xmlBuildURI: out of memory\n");
2141: goto done;
2142: }
2143: res->path[0] = 0;
2144:
2145: /*
2146: * a) All but the last segment of the base URI's path component is
2147: * copied to the buffer. In other words, any characters after the
2148: * last (right-most) slash character, if any, are excluded.
2149: */
2150: cur = 0;
2151: out = 0;
2152: if (bas->path != NULL) {
2153: while (bas->path[cur] != 0) {
2154: while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
2155: cur++;
2156: if (bas->path[cur] == 0)
2157: break;
2158:
2159: cur++;
2160: while (out < cur) {
2161: res->path[out] = bas->path[out];
2162: out++;
2163: }
2164: }
2165: }
2166: res->path[out] = 0;
2167:
2168: /*
2169: * b) The reference's path component is appended to the buffer
2170: * string.
2171: */
2172: if (ref->path != NULL && ref->path[0] != 0) {
2173: indx = 0;
2174: /*
2175: * Ensure the path includes a '/'
2176: */
2177: if ((out == 0) && (bas->server != NULL))
2178: res->path[out++] = '/';
2179: while (ref->path[indx] != 0) {
2180: res->path[out++] = ref->path[indx++];
2181: }
2182: }
2183: res->path[out] = 0;
2184:
2185: /*
2186: * Steps c) to h) are really path normalization steps
2187: */
2188: xmlNormalizeURIPath(res->path);
2189:
2190: step_7:
2191:
2192: /*
2193: * 7) The resulting URI components, including any inherited from the
2194: * base URI, are recombined to give the absolute form of the URI
2195: * reference.
2196: */
2197: val = xmlSaveUri(res);
2198:
2199: done:
2200: if (ref != NULL)
2201: xmlFreeURI(ref);
2202: if (bas != NULL)
2203: xmlFreeURI(bas);
2204: if (res != NULL)
2205: xmlFreeURI(res);
2206: return(val);
2207: }
2208:
2209: /**
2210: * xmlBuildRelativeURI:
2211: * @URI: the URI reference under consideration
2212: * @base: the base value
2213: *
2214: * Expresses the URI of the reference in terms relative to the
2215: * base. Some examples of this operation include:
2216: * base = "http://site1.com/docs/book1.html"
2217: * URI input URI returned
2218: * docs/pic1.gif pic1.gif
2219: * docs/img/pic1.gif img/pic1.gif
2220: * img/pic1.gif ../img/pic1.gif
2221: * http://site1.com/docs/pic1.gif pic1.gif
2222: * http://site2.com/docs/pic1.gif http://site2.com/docs/pic1.gif
2223: *
2224: * base = "docs/book1.html"
2225: * URI input URI returned
2226: * docs/pic1.gif pic1.gif
2227: * docs/img/pic1.gif img/pic1.gif
2228: * img/pic1.gif ../img/pic1.gif
2229: * http://site1.com/docs/pic1.gif http://site1.com/docs/pic1.gif
2230: *
2231: *
2232: * Note: if the URI reference is really wierd or complicated, it may be
2233: * worthwhile to first convert it into a "nice" one by calling
2234: * xmlBuildURI (using 'base') before calling this routine,
2235: * since this routine (for reasonable efficiency) assumes URI has
2236: * already been through some validation.
2237: *
2238: * Returns a new URI string (to be freed by the caller) or NULL in case
2239: * error.
2240: */
2241: xmlChar *
2242: xmlBuildRelativeURI (const xmlChar * URI, const xmlChar * base)
2243: {
2244: xmlChar *val = NULL;
2245: int ret;
2246: int ix;
2247: int pos = 0;
2248: int nbslash = 0;
2249: int len;
2250: xmlURIPtr ref = NULL;
2251: xmlURIPtr bas = NULL;
2252: xmlChar *bptr, *uptr, *vptr;
2253: int remove_path = 0;
2254:
2255: if ((URI == NULL) || (*URI == 0))
2256: return NULL;
2257:
2258: /*
2259: * First parse URI into a standard form
2260: */
2261: ref = xmlCreateURI ();
2262: if (ref == NULL)
2263: return NULL;
2264: /* If URI not already in "relative" form */
2265: if (URI[0] != '.') {
2266: ret = xmlParseURIReference (ref, (const char *) URI);
2267: if (ret != 0)
2268: goto done; /* Error in URI, return NULL */
2269: } else
2270: ref->path = (char *)xmlStrdup(URI);
2271:
2272: /*
2273: * Next parse base into the same standard form
2274: */
2275: if ((base == NULL) || (*base == 0)) {
2276: val = xmlStrdup (URI);
2277: goto done;
2278: }
2279: bas = xmlCreateURI ();
2280: if (bas == NULL)
2281: goto done;
2282: if (base[0] != '.') {
2283: ret = xmlParseURIReference (bas, (const char *) base);
2284: if (ret != 0)
2285: goto done; /* Error in base, return NULL */
2286: } else
2287: bas->path = (char *)xmlStrdup(base);
2288:
2289: /*
2290: * If the scheme / server on the URI differs from the base,
2291: * just return the URI
2292: */
2293: if ((ref->scheme != NULL) &&
2294: ((bas->scheme == NULL) ||
2295: (xmlStrcmp ((xmlChar *)bas->scheme, (xmlChar *)ref->scheme)) ||
2296: (xmlStrcmp ((xmlChar *)bas->server, (xmlChar *)ref->server)))) {
2297: val = xmlStrdup (URI);
2298: goto done;
2299: }
2300: if (xmlStrEqual((xmlChar *)bas->path, (xmlChar *)ref->path)) {
2301: val = xmlStrdup(BAD_CAST "");
2302: goto done;
2303: }
2304: if (bas->path == NULL) {
2305: val = xmlStrdup((xmlChar *)ref->path);
2306: goto done;
2307: }
2308: if (ref->path == NULL) {
2309: ref->path = (char *) "/";
2310: remove_path = 1;
2311: }
2312:
2313: /*
2314: * At this point (at last!) we can compare the two paths
2315: *
2316: * First we take care of the special case where either of the
2317: * two path components may be missing (bug 316224)
2318: */
2319: if (bas->path == NULL) {
2320: if (ref->path != NULL) {
2321: uptr = (xmlChar *) ref->path;
2322: if (*uptr == '/')
2323: uptr++;
2324: /* exception characters from xmlSaveUri */
2325: val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
2326: }
2327: goto done;
2328: }
2329: bptr = (xmlChar *)bas->path;
2330: if (ref->path == NULL) {
2331: for (ix = 0; bptr[ix] != 0; ix++) {
2332: if (bptr[ix] == '/')
2333: nbslash++;
2334: }
2335: uptr = NULL;
2336: len = 1; /* this is for a string terminator only */
2337: } else {
2338: /*
2339: * Next we compare the two strings and find where they first differ
2340: */
2341: if ((ref->path[pos] == '.') && (ref->path[pos+1] == '/'))
2342: pos += 2;
2343: if ((*bptr == '.') && (bptr[1] == '/'))
2344: bptr += 2;
2345: else if ((*bptr == '/') && (ref->path[pos] != '/'))
2346: bptr++;
2347: while ((bptr[pos] == ref->path[pos]) && (bptr[pos] != 0))
2348: pos++;
2349:
2350: if (bptr[pos] == ref->path[pos]) {
2351: val = xmlStrdup(BAD_CAST "");
2352: goto done; /* (I can't imagine why anyone would do this) */
2353: }
2354:
2355: /*
2356: * In URI, "back up" to the last '/' encountered. This will be the
2357: * beginning of the "unique" suffix of URI
2358: */
2359: ix = pos;
2360: if ((ref->path[ix] == '/') && (ix > 0))
2361: ix--;
2362: else if ((ref->path[ix] == 0) && (ix > 1) && (ref->path[ix - 1] == '/'))
2363: ix -= 2;
2364: for (; ix > 0; ix--) {
2365: if (ref->path[ix] == '/')
2366: break;
2367: }
2368: if (ix == 0) {
2369: uptr = (xmlChar *)ref->path;
2370: } else {
2371: ix++;
2372: uptr = (xmlChar *)&ref->path[ix];
2373: }
2374:
2375: /*
2376: * In base, count the number of '/' from the differing point
2377: */
2378: if (bptr[pos] != ref->path[pos]) {/* check for trivial URI == base */
2379: for (; bptr[ix] != 0; ix++) {
2380: if (bptr[ix] == '/')
2381: nbslash++;
2382: }
2383: }
2384: len = xmlStrlen (uptr) + 1;
2385: }
2386:
2387: if (nbslash == 0) {
2388: if (uptr != NULL)
2389: /* exception characters from xmlSaveUri */
2390: val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
2391: goto done;
2392: }
2393:
2394: /*
2395: * Allocate just enough space for the returned string -
2396: * length of the remainder of the URI, plus enough space
2397: * for the "../" groups, plus one for the terminator
2398: */
2399: val = (xmlChar *) xmlMalloc (len + 3 * nbslash);
2400: if (val == NULL) {
2401: xmlGenericError(xmlGenericErrorContext,
2402: "xmlBuildRelativeURI: out of memory\n");
2403: goto done;
2404: }
2405: vptr = val;
2406: /*
2407: * Put in as many "../" as needed
2408: */
2409: for (; nbslash>0; nbslash--) {
2410: *vptr++ = '.';
2411: *vptr++ = '.';
2412: *vptr++ = '/';
2413: }
2414: /*
2415: * Finish up with the end of the URI
2416: */
2417: if (uptr != NULL) {
2418: if ((vptr > val) && (len > 0) &&
2419: (uptr[0] == '/') && (vptr[-1] == '/')) {
2420: memcpy (vptr, uptr + 1, len - 1);
2421: vptr[len - 2] = 0;
2422: } else {
2423: memcpy (vptr, uptr, len);
2424: vptr[len - 1] = 0;
2425: }
2426: } else {
2427: vptr[len - 1] = 0;
2428: }
2429:
2430: /* escape the freshly-built path */
2431: vptr = val;
2432: /* exception characters from xmlSaveUri */
2433: val = xmlURIEscapeStr(vptr, BAD_CAST "/;&=+$,");
2434: xmlFree(vptr);
2435:
2436: done:
2437: /*
2438: * Free the working variables
2439: */
2440: if (remove_path != 0)
2441: ref->path = NULL;
2442: if (ref != NULL)
2443: xmlFreeURI (ref);
2444: if (bas != NULL)
2445: xmlFreeURI (bas);
2446:
2447: return val;
2448: }
2449:
2450: /**
2451: * xmlCanonicPath:
2452: * @path: the resource locator in a filesystem notation
2453: *
2454: * Constructs a canonic path from the specified path.
2455: *
2456: * Returns a new canonic path, or a duplicate of the path parameter if the
2457: * construction fails. The caller is responsible for freeing the memory occupied
2458: * by the returned string. If there is insufficient memory available, or the
2459: * argument is NULL, the function returns NULL.
2460: */
2461: #define IS_WINDOWS_PATH(p) \
2462: ((p != NULL) && \
2463: (((p[0] >= 'a') && (p[0] <= 'z')) || \
2464: ((p[0] >= 'A') && (p[0] <= 'Z'))) && \
2465: (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\')))
2466: xmlChar *
2467: xmlCanonicPath(const xmlChar *path)
2468: {
2469: /*
2470: * For Windows implementations, additional work needs to be done to
2471: * replace backslashes in pathnames with "forward slashes"
2472: */
2473: #if defined(_WIN32) && !defined(__CYGWIN__)
2474: int len = 0;
2475: int i = 0;
2476: xmlChar *p = NULL;
2477: #endif
2478: xmlURIPtr uri;
2479: xmlChar *ret;
2480: const xmlChar *absuri;
2481:
2482: if (path == NULL)
2483: return(NULL);
2484:
2485: /* sanitize filename starting with // so it can be used as URI */
2486: if ((path[0] == '/') && (path[1] == '/') && (path[2] != '/'))
2487: path++;
2488:
2489: if ((uri = xmlParseURI((const char *) path)) != NULL) {
2490: xmlFreeURI(uri);
2491: return xmlStrdup(path);
2492: }
2493:
2494: /* Check if this is an "absolute uri" */
2495: absuri = xmlStrstr(path, BAD_CAST "://");
2496: if (absuri != NULL) {
2497: int l, j;
2498: unsigned char c;
2499: xmlChar *escURI;
2500:
2501: /*
2502: * this looks like an URI where some parts have not been
2503: * escaped leading to a parsing problem. Check that the first
2504: * part matches a protocol.
2505: */
2506: l = absuri - path;
2507: /* Bypass if first part (part before the '://') is > 20 chars */
2508: if ((l <= 0) || (l > 20))
2509: goto path_processing;
2510: /* Bypass if any non-alpha characters are present in first part */
2511: for (j = 0;j < l;j++) {
2512: c = path[j];
2513: if (!(((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'))))
2514: goto path_processing;
2515: }
2516:
2517: /* Escape all except the characters specified in the supplied path */
2518: escURI = xmlURIEscapeStr(path, BAD_CAST ":/?_.#&;=");
2519: if (escURI != NULL) {
2520: /* Try parsing the escaped path */
2521: uri = xmlParseURI((const char *) escURI);
2522: /* If successful, return the escaped string */
2523: if (uri != NULL) {
2524: xmlFreeURI(uri);
2525: return escURI;
2526: }
2527: }
2528: }
2529:
2530: path_processing:
2531: /* For Windows implementations, replace backslashes with 'forward slashes' */
2532: #if defined(_WIN32) && !defined(__CYGWIN__)
2533: /*
2534: * Create a URI structure
2535: */
2536: uri = xmlCreateURI();
2537: if (uri == NULL) { /* Guard against 'out of memory' */
2538: return(NULL);
2539: }
2540:
2541: len = xmlStrlen(path);
2542: if ((len > 2) && IS_WINDOWS_PATH(path)) {
2543: /* make the scheme 'file' */
2544: uri->scheme = xmlStrdup(BAD_CAST "file");
2545: /* allocate space for leading '/' + path + string terminator */
2546: uri->path = xmlMallocAtomic(len + 2);
2547: if (uri->path == NULL) {
2548: xmlFreeURI(uri); /* Guard agains 'out of memory' */
2549: return(NULL);
2550: }
2551: /* Put in leading '/' plus path */
2552: uri->path[0] = '/';
2553: p = uri->path + 1;
2554: strncpy(p, path, len + 1);
2555: } else {
2556: uri->path = xmlStrdup(path);
2557: if (uri->path == NULL) {
2558: xmlFreeURI(uri);
2559: return(NULL);
2560: }
2561: p = uri->path;
2562: }
2563: /* Now change all occurences of '\' to '/' */
2564: while (*p != '\0') {
2565: if (*p == '\\')
2566: *p = '/';
2567: p++;
2568: }
2569:
2570: if (uri->scheme == NULL) {
2571: ret = xmlStrdup((const xmlChar *) uri->path);
2572: } else {
2573: ret = xmlSaveUri(uri);
2574: }
2575:
2576: xmlFreeURI(uri);
2577: #else
2578: ret = xmlStrdup((const xmlChar *) path);
2579: #endif
2580: return(ret);
2581: }
2582:
2583: /**
2584: * xmlPathToURI:
2585: * @path: the resource locator in a filesystem notation
2586: *
2587: * Constructs an URI expressing the existing path
2588: *
2589: * Returns a new URI, or a duplicate of the path parameter if the
2590: * construction fails. The caller is responsible for freeing the memory
2591: * occupied by the returned string. If there is insufficient memory available,
2592: * or the argument is NULL, the function returns NULL.
2593: */
2594: xmlChar *
2595: xmlPathToURI(const xmlChar *path)
2596: {
2597: xmlURIPtr uri;
2598: xmlURI temp;
2599: xmlChar *ret, *cal;
2600:
2601: if (path == NULL)
2602: return(NULL);
2603:
2604: if ((uri = xmlParseURI((const char *) path)) != NULL) {
2605: xmlFreeURI(uri);
2606: return xmlStrdup(path);
2607: }
2608: cal = xmlCanonicPath(path);
2609: if (cal == NULL)
2610: return(NULL);
2611: #if defined(_WIN32) && !defined(__CYGWIN__)
2612: /* xmlCanonicPath can return an URI on Windows (is that the intended behaviour?)
2613: If 'cal' is a valid URI allready then we are done here, as continuing would make
2614: it invalid. */
2615: if ((uri = xmlParseURI((const char *) cal)) != NULL) {
2616: xmlFreeURI(uri);
2617: return cal;
2618: }
2619: /* 'cal' can contain a relative path with backslashes. If that is processed
2620: by xmlSaveURI, they will be escaped and the external entity loader machinery
2621: will fail. So convert them to slashes. Misuse 'ret' for walking. */
2622: ret = cal;
2623: while (*ret != '\0') {
2624: if (*ret == '\\')
2625: *ret = '/';
2626: ret++;
2627: }
2628: #endif
2629: memset(&temp, 0, sizeof(temp));
2630: temp.path = (char *) cal;
2631: ret = xmlSaveUri(&temp);
2632: xmlFree(cal);
2633: return(ret);
2634: }
2635: #define bottom_uri
2636: #include "elfgcchack.h"
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>