Annotation of embedaddon/libxml2/uri.c, revision 1.1.1.1
1.1 misho 1: /**
2: * uri.c: set of generic URI related routines
3: *
4: * Reference: RFCs 3986, 2732 and 2373
5: *
6: * See Copyright for the status of this software.
7: *
8: * daniel@veillard.com
9: */
10:
11: #define IN_LIBXML
12: #include "libxml.h"
13:
14: #include <string.h>
15:
16: #include <libxml/xmlmemory.h>
17: #include <libxml/uri.h>
18: #include <libxml/globals.h>
19: #include <libxml/xmlerror.h>
20:
21: static void xmlCleanURI(xmlURIPtr uri);
22:
23: /*
24: * Old rule from 2396 used in legacy handling code
25: * alpha = lowalpha | upalpha
26: */
27: #define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
28:
29:
30: /*
31: * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
32: * "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
33: * "u" | "v" | "w" | "x" | "y" | "z"
34: */
35:
36: #define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
37:
38: /*
39: * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
40: * "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
41: * "U" | "V" | "W" | "X" | "Y" | "Z"
42: */
43: #define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
44:
45: #ifdef IS_DIGIT
46: #undef IS_DIGIT
47: #endif
48: /*
49: * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
50: */
51: #define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
52:
53: /*
54: * alphanum = alpha | digit
55: */
56:
57: #define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
58:
59: /*
60: * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
61: */
62:
63: #define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') || \
64: ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') || \
65: ((x) == '(') || ((x) == ')'))
66:
67: /*
68: * unwise = "{" | "}" | "|" | "\" | "^" | "`"
69: */
70:
71: #define IS_UNWISE(p) \
72: (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) || \
73: ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) || \
74: ((*(p) == ']')) || ((*(p) == '`')))
75: /*
76: * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
77: * "[" | "]"
78: */
79:
80: #define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
81: ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
82: ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \
83: ((x) == ']'))
84:
85: /*
86: * unreserved = alphanum | mark
87: */
88:
89: #define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
90:
91: /*
92: * Skip to next pointer char, handle escaped sequences
93: */
94:
95: #define NEXT(p) ((*p == '%')? p += 3 : p++)
96:
97: /*
98: * Productions from the spec.
99: *
100: * authority = server | reg_name
101: * reg_name = 1*( unreserved | escaped | "$" | "," |
102: * ";" | ":" | "@" | "&" | "=" | "+" )
103: *
104: * path = [ abs_path | opaque_part ]
105: */
106:
107: #define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n))
108:
109: /************************************************************************
110: * *
111: * RFC 3986 parser *
112: * *
113: ************************************************************************/
114:
115: #define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9'))
116: #define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) || \
117: ((*(p) >= 'A') && (*(p) <= 'Z')))
118: #define ISA_HEXDIG(p) \
119: (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) || \
120: ((*(p) >= 'A') && (*(p) <= 'F')))
121:
122: /*
123: * sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
124: * / "*" / "+" / "," / ";" / "="
125: */
126: #define ISA_SUB_DELIM(p) \
127: (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) || \
128: ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) || \
129: ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) || \
130: ((*(p) == '=')) || ((*(p) == '\'')))
131:
132: /*
133: * gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
134: */
135: #define ISA_GEN_DELIM(p) \
136: (((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) || \
137: ((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) || \
138: ((*(p) == '@')))
139:
140: /*
141: * reserved = gen-delims / sub-delims
142: */
143: #define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p)))
144:
145: /*
146: * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
147: */
148: #define ISA_UNRESERVED(p) \
149: ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) || \
150: ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~')))
151:
152: /*
153: * pct-encoded = "%" HEXDIG HEXDIG
154: */
155: #define ISA_PCT_ENCODED(p) \
156: ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2)))
157:
158: /*
159: * pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
160: */
161: #define ISA_PCHAR(p) \
162: (ISA_UNRESERVED(p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) || \
163: ((*(p) == ':')) || ((*(p) == '@')))
164:
165: /**
166: * xmlParse3986Scheme:
167: * @uri: pointer to an URI structure
168: * @str: pointer to the string to analyze
169: *
170: * Parse an URI scheme
171: *
172: * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
173: *
174: * Returns 0 or the error code
175: */
176: static int
177: xmlParse3986Scheme(xmlURIPtr uri, const char **str) {
178: const char *cur;
179:
180: if (str == NULL)
181: return(-1);
182:
183: cur = *str;
184: if (!ISA_ALPHA(cur))
185: return(2);
186: cur++;
187: while (ISA_ALPHA(cur) || ISA_DIGIT(cur) ||
188: (*cur == '+') || (*cur == '-') || (*cur == '.')) cur++;
189: if (uri != NULL) {
190: if (uri->scheme != NULL) xmlFree(uri->scheme);
191: uri->scheme = STRNDUP(*str, cur - *str);
192: }
193: *str = cur;
194: return(0);
195: }
196:
197: /**
198: * xmlParse3986Fragment:
199: * @uri: pointer to an URI structure
200: * @str: pointer to the string to analyze
201: *
202: * Parse the query part of an URI
203: *
204: * fragment = *( pchar / "/" / "?" )
205: * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']'
206: * in the fragment identifier but this is used very broadly for
207: * xpointer scheme selection, so we are allowing it here to not break
208: * for example all the DocBook processing chains.
209: *
210: * Returns 0 or the error code
211: */
212: static int
213: xmlParse3986Fragment(xmlURIPtr uri, const char **str)
214: {
215: const char *cur;
216:
217: if (str == NULL)
218: return (-1);
219:
220: cur = *str;
221:
222: while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
223: (*cur == '[') || (*cur == ']') ||
224: ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
225: NEXT(cur);
226: if (uri != NULL) {
227: if (uri->fragment != NULL)
228: xmlFree(uri->fragment);
229: if (uri->cleanup & 2)
230: uri->fragment = STRNDUP(*str, cur - *str);
231: else
232: uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
233: }
234: *str = cur;
235: return (0);
236: }
237:
238: /**
239: * xmlParse3986Query:
240: * @uri: pointer to an URI structure
241: * @str: pointer to the string to analyze
242: *
243: * Parse the query part of an URI
244: *
245: * query = *uric
246: *
247: * Returns 0 or the error code
248: */
249: static int
250: xmlParse3986Query(xmlURIPtr uri, const char **str)
251: {
252: const char *cur;
253:
254: if (str == NULL)
255: return (-1);
256:
257: cur = *str;
258:
259: while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
260: ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
261: NEXT(cur);
262: if (uri != NULL) {
263: if (uri->query != NULL)
264: xmlFree(uri->query);
265: if (uri->cleanup & 2)
266: uri->query = STRNDUP(*str, cur - *str);
267: else
268: uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
269:
270: /* Save the raw bytes of the query as well.
271: * See: http://mail.gnome.org/archives/xml/2007-April/thread.html#00114
272: */
273: if (uri->query_raw != NULL)
274: xmlFree (uri->query_raw);
275: uri->query_raw = STRNDUP (*str, cur - *str);
276: }
277: *str = cur;
278: return (0);
279: }
280:
281: /**
282: * xmlParse3986Port:
283: * @uri: pointer to an URI structure
284: * @str: the string to analyze
285: *
286: * Parse a port part and fills in the appropriate fields
287: * of the @uri structure
288: *
289: * port = *DIGIT
290: *
291: * Returns 0 or the error code
292: */
293: static int
294: xmlParse3986Port(xmlURIPtr uri, const char **str)
295: {
296: const char *cur = *str;
297:
298: if (ISA_DIGIT(cur)) {
299: if (uri != NULL)
300: uri->port = 0;
301: while (ISA_DIGIT(cur)) {
302: if (uri != NULL)
303: uri->port = uri->port * 10 + (*cur - '0');
304: cur++;
305: }
306: *str = cur;
307: return(0);
308: }
309: return(1);
310: }
311:
312: /**
313: * xmlParse3986Userinfo:
314: * @uri: pointer to an URI structure
315: * @str: the string to analyze
316: *
317: * Parse an user informations part and fills in the appropriate fields
318: * of the @uri structure
319: *
320: * userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
321: *
322: * Returns 0 or the error code
323: */
324: static int
325: xmlParse3986Userinfo(xmlURIPtr uri, const char **str)
326: {
327: const char *cur;
328:
329: cur = *str;
330: while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) ||
331: ISA_SUB_DELIM(cur) || (*cur == ':'))
332: NEXT(cur);
333: if (*cur == '@') {
334: if (uri != NULL) {
335: if (uri->user != NULL) xmlFree(uri->user);
336: if (uri->cleanup & 2)
337: uri->user = STRNDUP(*str, cur - *str);
338: else
339: uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
340: }
341: *str = cur;
342: return(0);
343: }
344: return(1);
345: }
346:
347: /**
348: * xmlParse3986DecOctet:
349: * @str: the string to analyze
350: *
351: * dec-octet = DIGIT ; 0-9
352: * / %x31-39 DIGIT ; 10-99
353: * / "1" 2DIGIT ; 100-199
354: * / "2" %x30-34 DIGIT ; 200-249
355: * / "25" %x30-35 ; 250-255
356: *
357: * Skip a dec-octet.
358: *
359: * Returns 0 if found and skipped, 1 otherwise
360: */
361: static int
362: xmlParse3986DecOctet(const char **str) {
363: const char *cur = *str;
364:
365: if (!(ISA_DIGIT(cur)))
366: return(1);
367: if (!ISA_DIGIT(cur+1))
368: cur++;
369: else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur+2)))
370: cur += 2;
371: else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2)))
372: cur += 3;
373: else if ((*cur == '2') && (*(cur + 1) >= '0') &&
374: (*(cur + 1) <= '4') && (ISA_DIGIT(cur + 2)))
375: cur += 3;
376: else if ((*cur == '2') && (*(cur + 1) == '5') &&
377: (*(cur + 2) >= '0') && (*(cur + 1) <= '5'))
378: cur += 3;
379: else
380: return(1);
381: *str = cur;
382: return(0);
383: }
384: /**
385: * xmlParse3986Host:
386: * @uri: pointer to an URI structure
387: * @str: the string to analyze
388: *
389: * Parse an host part and fills in the appropriate fields
390: * of the @uri structure
391: *
392: * host = IP-literal / IPv4address / reg-name
393: * IP-literal = "[" ( IPv6address / IPvFuture ) "]"
394: * IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
395: * reg-name = *( unreserved / pct-encoded / sub-delims )
396: *
397: * Returns 0 or the error code
398: */
399: static int
400: xmlParse3986Host(xmlURIPtr uri, const char **str)
401: {
402: const char *cur = *str;
403: const char *host;
404:
405: host = cur;
406: /*
407: * IPv6 and future adressing scheme are enclosed between brackets
408: */
409: if (*cur == '[') {
410: cur++;
411: while ((*cur != ']') && (*cur != 0))
412: cur++;
413: if (*cur != ']')
414: return(1);
415: cur++;
416: goto found;
417: }
418: /*
419: * try to parse an IPv4
420: */
421: if (ISA_DIGIT(cur)) {
422: if (xmlParse3986DecOctet(&cur) != 0)
423: goto not_ipv4;
424: if (*cur != '.')
425: goto not_ipv4;
426: cur++;
427: if (xmlParse3986DecOctet(&cur) != 0)
428: goto not_ipv4;
429: if (*cur != '.')
430: goto not_ipv4;
431: if (xmlParse3986DecOctet(&cur) != 0)
432: goto not_ipv4;
433: if (*cur != '.')
434: goto not_ipv4;
435: if (xmlParse3986DecOctet(&cur) != 0)
436: goto not_ipv4;
437: goto found;
438: not_ipv4:
439: cur = *str;
440: }
441: /*
442: * then this should be a hostname which can be empty
443: */
444: while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur))
445: NEXT(cur);
446: found:
447: if (uri != NULL) {
448: if (uri->authority != NULL) xmlFree(uri->authority);
449: uri->authority = NULL;
450: if (uri->server != NULL) xmlFree(uri->server);
451: if (cur != host) {
452: if (uri->cleanup & 2)
453: uri->server = STRNDUP(host, cur - host);
454: else
455: uri->server = xmlURIUnescapeString(host, cur - host, NULL);
456: } else
457: uri->server = NULL;
458: }
459: *str = cur;
460: return(0);
461: }
462:
463: /**
464: * xmlParse3986Authority:
465: * @uri: pointer to an URI structure
466: * @str: the string to analyze
467: *
468: * Parse an authority part and fills in the appropriate fields
469: * of the @uri structure
470: *
471: * authority = [ userinfo "@" ] host [ ":" port ]
472: *
473: * Returns 0 or the error code
474: */
475: static int
476: xmlParse3986Authority(xmlURIPtr uri, const char **str)
477: {
478: const char *cur;
479: int ret;
480:
481: cur = *str;
482: /*
483: * try to parse an userinfo and check for the trailing @
484: */
485: ret = xmlParse3986Userinfo(uri, &cur);
486: if ((ret != 0) || (*cur != '@'))
487: cur = *str;
488: else
489: cur++;
490: ret = xmlParse3986Host(uri, &cur);
491: if (ret != 0) return(ret);
492: if (*cur == ':') {
493: cur++;
494: ret = xmlParse3986Port(uri, &cur);
495: if (ret != 0) return(ret);
496: }
497: *str = cur;
498: return(0);
499: }
500:
501: /**
502: * xmlParse3986Segment:
503: * @str: the string to analyze
504: * @forbid: an optional forbidden character
505: * @empty: allow an empty segment
506: *
507: * Parse a segment and fills in the appropriate fields
508: * of the @uri structure
509: *
510: * segment = *pchar
511: * segment-nz = 1*pchar
512: * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
513: * ; non-zero-length segment without any colon ":"
514: *
515: * Returns 0 or the error code
516: */
517: static int
518: xmlParse3986Segment(const char **str, char forbid, int empty)
519: {
520: const char *cur;
521:
522: cur = *str;
523: if (!ISA_PCHAR(cur)) {
524: if (empty)
525: return(0);
526: return(1);
527: }
528: while (ISA_PCHAR(cur) && (*cur != forbid))
529: NEXT(cur);
530: *str = cur;
531: return (0);
532: }
533:
534: /**
535: * xmlParse3986PathAbEmpty:
536: * @uri: pointer to an URI structure
537: * @str: the string to analyze
538: *
539: * Parse an path absolute or empty and fills in the appropriate fields
540: * of the @uri structure
541: *
542: * path-abempty = *( "/" segment )
543: *
544: * Returns 0 or the error code
545: */
546: static int
547: xmlParse3986PathAbEmpty(xmlURIPtr uri, const char **str)
548: {
549: const char *cur;
550: int ret;
551:
552: cur = *str;
553:
554: while (*cur == '/') {
555: cur++;
556: ret = xmlParse3986Segment(&cur, 0, 1);
557: if (ret != 0) return(ret);
558: }
559: if (uri != NULL) {
560: if (uri->path != NULL) xmlFree(uri->path);
561: if (*str != cur) {
562: if (uri->cleanup & 2)
563: uri->path = STRNDUP(*str, cur - *str);
564: else
565: uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
566: } else {
567: uri->path = NULL;
568: }
569: }
570: *str = cur;
571: return (0);
572: }
573:
574: /**
575: * xmlParse3986PathAbsolute:
576: * @uri: pointer to an URI structure
577: * @str: the string to analyze
578: *
579: * Parse an path absolute and fills in the appropriate fields
580: * of the @uri structure
581: *
582: * path-absolute = "/" [ segment-nz *( "/" segment ) ]
583: *
584: * Returns 0 or the error code
585: */
586: static int
587: xmlParse3986PathAbsolute(xmlURIPtr uri, const char **str)
588: {
589: const char *cur;
590: int ret;
591:
592: cur = *str;
593:
594: if (*cur != '/')
595: return(1);
596: cur++;
597: ret = xmlParse3986Segment(&cur, 0, 0);
598: if (ret == 0) {
599: while (*cur == '/') {
600: cur++;
601: ret = xmlParse3986Segment(&cur, 0, 1);
602: if (ret != 0) return(ret);
603: }
604: }
605: if (uri != NULL) {
606: if (uri->path != NULL) xmlFree(uri->path);
607: if (cur != *str) {
608: if (uri->cleanup & 2)
609: uri->path = STRNDUP(*str, cur - *str);
610: else
611: uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
612: } else {
613: uri->path = NULL;
614: }
615: }
616: *str = cur;
617: return (0);
618: }
619:
620: /**
621: * xmlParse3986PathRootless:
622: * @uri: pointer to an URI structure
623: * @str: the string to analyze
624: *
625: * Parse an path without root and fills in the appropriate fields
626: * of the @uri structure
627: *
628: * path-rootless = segment-nz *( "/" segment )
629: *
630: * Returns 0 or the error code
631: */
632: static int
633: xmlParse3986PathRootless(xmlURIPtr uri, const char **str)
634: {
635: const char *cur;
636: int ret;
637:
638: cur = *str;
639:
640: ret = xmlParse3986Segment(&cur, 0, 0);
641: if (ret != 0) return(ret);
642: while (*cur == '/') {
643: cur++;
644: ret = xmlParse3986Segment(&cur, 0, 1);
645: if (ret != 0) return(ret);
646: }
647: if (uri != NULL) {
648: if (uri->path != NULL) xmlFree(uri->path);
649: if (cur != *str) {
650: if (uri->cleanup & 2)
651: uri->path = STRNDUP(*str, cur - *str);
652: else
653: uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
654: } else {
655: uri->path = NULL;
656: }
657: }
658: *str = cur;
659: return (0);
660: }
661:
662: /**
663: * xmlParse3986PathNoScheme:
664: * @uri: pointer to an URI structure
665: * @str: the string to analyze
666: *
667: * Parse an path which is not a scheme and fills in the appropriate fields
668: * of the @uri structure
669: *
670: * path-noscheme = segment-nz-nc *( "/" segment )
671: *
672: * Returns 0 or the error code
673: */
674: static int
675: xmlParse3986PathNoScheme(xmlURIPtr uri, const char **str)
676: {
677: const char *cur;
678: int ret;
679:
680: cur = *str;
681:
682: ret = xmlParse3986Segment(&cur, ':', 0);
683: if (ret != 0) return(ret);
684: while (*cur == '/') {
685: cur++;
686: ret = xmlParse3986Segment(&cur, 0, 1);
687: if (ret != 0) return(ret);
688: }
689: if (uri != NULL) {
690: if (uri->path != NULL) xmlFree(uri->path);
691: if (cur != *str) {
692: if (uri->cleanup & 2)
693: uri->path = STRNDUP(*str, cur - *str);
694: else
695: uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
696: } else {
697: uri->path = NULL;
698: }
699: }
700: *str = cur;
701: return (0);
702: }
703:
704: /**
705: * xmlParse3986HierPart:
706: * @uri: pointer to an URI structure
707: * @str: the string to analyze
708: *
709: * Parse an hierarchical part and fills in the appropriate fields
710: * of the @uri structure
711: *
712: * hier-part = "//" authority path-abempty
713: * / path-absolute
714: * / path-rootless
715: * / path-empty
716: *
717: * Returns 0 or the error code
718: */
719: static int
720: xmlParse3986HierPart(xmlURIPtr uri, const char **str)
721: {
722: const char *cur;
723: int ret;
724:
725: cur = *str;
726:
727: if ((*cur == '/') && (*(cur + 1) == '/')) {
728: cur += 2;
729: ret = xmlParse3986Authority(uri, &cur);
730: if (ret != 0) return(ret);
731: ret = xmlParse3986PathAbEmpty(uri, &cur);
732: if (ret != 0) return(ret);
733: *str = cur;
734: return(0);
735: } else if (*cur == '/') {
736: ret = xmlParse3986PathAbsolute(uri, &cur);
737: if (ret != 0) return(ret);
738: } else if (ISA_PCHAR(cur)) {
739: ret = xmlParse3986PathRootless(uri, &cur);
740: if (ret != 0) return(ret);
741: } else {
742: /* path-empty is effectively empty */
743: if (uri != NULL) {
744: if (uri->path != NULL) xmlFree(uri->path);
745: uri->path = NULL;
746: }
747: }
748: *str = cur;
749: return (0);
750: }
751:
752: /**
753: * xmlParse3986RelativeRef:
754: * @uri: pointer to an URI structure
755: * @str: the string to analyze
756: *
757: * Parse an URI string and fills in the appropriate fields
758: * of the @uri structure
759: *
760: * relative-ref = relative-part [ "?" query ] [ "#" fragment ]
761: * relative-part = "//" authority path-abempty
762: * / path-absolute
763: * / path-noscheme
764: * / path-empty
765: *
766: * Returns 0 or the error code
767: */
768: static int
769: xmlParse3986RelativeRef(xmlURIPtr uri, const char *str) {
770: int ret;
771:
772: if ((*str == '/') && (*(str + 1) == '/')) {
773: str += 2;
774: ret = xmlParse3986Authority(uri, &str);
775: if (ret != 0) return(ret);
776: ret = xmlParse3986PathAbEmpty(uri, &str);
777: if (ret != 0) return(ret);
778: } else if (*str == '/') {
779: ret = xmlParse3986PathAbsolute(uri, &str);
780: if (ret != 0) return(ret);
781: } else if (ISA_PCHAR(str)) {
782: ret = xmlParse3986PathNoScheme(uri, &str);
783: if (ret != 0) return(ret);
784: } else {
785: /* path-empty is effectively empty */
786: if (uri != NULL) {
787: if (uri->path != NULL) xmlFree(uri->path);
788: uri->path = NULL;
789: }
790: }
791:
792: if (*str == '?') {
793: str++;
794: ret = xmlParse3986Query(uri, &str);
795: if (ret != 0) return(ret);
796: }
797: if (*str == '#') {
798: str++;
799: ret = xmlParse3986Fragment(uri, &str);
800: if (ret != 0) return(ret);
801: }
802: if (*str != 0) {
803: xmlCleanURI(uri);
804: return(1);
805: }
806: return(0);
807: }
808:
809:
810: /**
811: * xmlParse3986URI:
812: * @uri: pointer to an URI structure
813: * @str: the string to analyze
814: *
815: * Parse an URI string and fills in the appropriate fields
816: * of the @uri structure
817: *
818: * scheme ":" hier-part [ "?" query ] [ "#" fragment ]
819: *
820: * Returns 0 or the error code
821: */
822: static int
823: xmlParse3986URI(xmlURIPtr uri, const char *str) {
824: int ret;
825:
826: ret = xmlParse3986Scheme(uri, &str);
827: if (ret != 0) return(ret);
828: if (*str != ':') {
829: return(1);
830: }
831: str++;
832: ret = xmlParse3986HierPart(uri, &str);
833: if (ret != 0) return(ret);
834: if (*str == '?') {
835: str++;
836: ret = xmlParse3986Query(uri, &str);
837: if (ret != 0) return(ret);
838: }
839: if (*str == '#') {
840: str++;
841: ret = xmlParse3986Fragment(uri, &str);
842: if (ret != 0) return(ret);
843: }
844: if (*str != 0) {
845: xmlCleanURI(uri);
846: return(1);
847: }
848: return(0);
849: }
850:
851: /**
852: * xmlParse3986URIReference:
853: * @uri: pointer to an URI structure
854: * @str: the string to analyze
855: *
856: * Parse an URI reference string and fills in the appropriate fields
857: * of the @uri structure
858: *
859: * URI-reference = URI / relative-ref
860: *
861: * Returns 0 or the error code
862: */
863: static int
864: xmlParse3986URIReference(xmlURIPtr uri, const char *str) {
865: int ret;
866:
867: if (str == NULL)
868: return(-1);
869: xmlCleanURI(uri);
870:
871: /*
872: * Try first to parse absolute refs, then fallback to relative if
873: * it fails.
874: */
875: ret = xmlParse3986URI(uri, str);
876: if (ret != 0) {
877: xmlCleanURI(uri);
878: ret = xmlParse3986RelativeRef(uri, str);
879: if (ret != 0) {
880: xmlCleanURI(uri);
881: return(ret);
882: }
883: }
884: return(0);
885: }
886:
887: /**
888: * xmlParseURI:
889: * @str: the URI string to analyze
890: *
891: * Parse an URI based on RFC 3986
892: *
893: * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
894: *
895: * Returns a newly built xmlURIPtr or NULL in case of error
896: */
897: xmlURIPtr
898: xmlParseURI(const char *str) {
899: xmlURIPtr uri;
900: int ret;
901:
902: if (str == NULL)
903: return(NULL);
904: uri = xmlCreateURI();
905: if (uri != NULL) {
906: ret = xmlParse3986URIReference(uri, str);
907: if (ret) {
908: xmlFreeURI(uri);
909: return(NULL);
910: }
911: }
912: return(uri);
913: }
914:
915: /**
916: * xmlParseURIReference:
917: * @uri: pointer to an URI structure
918: * @str: the string to analyze
919: *
920: * Parse an URI reference string based on RFC 3986 and fills in the
921: * appropriate fields of the @uri structure
922: *
923: * URI-reference = URI / relative-ref
924: *
925: * Returns 0 or the error code
926: */
927: int
928: xmlParseURIReference(xmlURIPtr uri, const char *str) {
929: return(xmlParse3986URIReference(uri, str));
930: }
931:
932: /**
933: * xmlParseURIRaw:
934: * @str: the URI string to analyze
935: * @raw: if 1 unescaping of URI pieces are disabled
936: *
937: * Parse an URI but allows to keep intact the original fragments.
938: *
939: * URI-reference = URI / relative-ref
940: *
941: * Returns a newly built xmlURIPtr or NULL in case of error
942: */
943: xmlURIPtr
944: xmlParseURIRaw(const char *str, int raw) {
945: xmlURIPtr uri;
946: int ret;
947:
948: if (str == NULL)
949: return(NULL);
950: uri = xmlCreateURI();
951: if (uri != NULL) {
952: if (raw) {
953: uri->cleanup |= 2;
954: }
955: ret = xmlParseURIReference(uri, str);
956: if (ret) {
957: xmlFreeURI(uri);
958: return(NULL);
959: }
960: }
961: return(uri);
962: }
963:
964: /************************************************************************
965: * *
966: * Generic URI structure functions *
967: * *
968: ************************************************************************/
969:
970: /**
971: * xmlCreateURI:
972: *
973: * Simply creates an empty xmlURI
974: *
975: * Returns the new structure or NULL in case of error
976: */
977: xmlURIPtr
978: xmlCreateURI(void) {
979: xmlURIPtr ret;
980:
981: ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
982: if (ret == NULL) {
983: xmlGenericError(xmlGenericErrorContext,
984: "xmlCreateURI: out of memory\n");
985: return(NULL);
986: }
987: memset(ret, 0, sizeof(xmlURI));
988: return(ret);
989: }
990:
991: /**
992: * xmlSaveUri:
993: * @uri: pointer to an xmlURI
994: *
995: * Save the URI as an escaped string
996: *
997: * Returns a new string (to be deallocated by caller)
998: */
999: xmlChar *
1000: xmlSaveUri(xmlURIPtr uri) {
1001: xmlChar *ret = NULL;
1002: xmlChar *temp;
1003: const char *p;
1004: int len;
1005: int max;
1006:
1007: if (uri == NULL) return(NULL);
1008:
1009:
1010: max = 80;
1011: ret = (xmlChar *) xmlMallocAtomic((max + 1) * sizeof(xmlChar));
1012: if (ret == NULL) {
1013: xmlGenericError(xmlGenericErrorContext,
1014: "xmlSaveUri: out of memory\n");
1015: return(NULL);
1016: }
1017: len = 0;
1018:
1019: if (uri->scheme != NULL) {
1020: p = uri->scheme;
1021: while (*p != 0) {
1022: if (len >= max) {
1023: max *= 2;
1024: temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1025: if (temp == NULL) {
1026: xmlGenericError(xmlGenericErrorContext,
1027: "xmlSaveUri: out of memory\n");
1028: xmlFree(ret);
1029: return(NULL);
1030: }
1031: ret = temp;
1032: }
1033: ret[len++] = *p++;
1034: }
1035: if (len >= max) {
1036: max *= 2;
1037: temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1038: if (temp == NULL) {
1039: xmlGenericError(xmlGenericErrorContext,
1040: "xmlSaveUri: out of memory\n");
1041: xmlFree(ret);
1042: return(NULL);
1043: }
1044: ret = temp;
1045: }
1046: ret[len++] = ':';
1047: }
1048: if (uri->opaque != NULL) {
1049: p = uri->opaque;
1050: while (*p != 0) {
1051: if (len + 3 >= max) {
1052: max *= 2;
1053: temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1054: if (temp == NULL) {
1055: xmlGenericError(xmlGenericErrorContext,
1056: "xmlSaveUri: out of memory\n");
1057: xmlFree(ret);
1058: return(NULL);
1059: }
1060: ret = temp;
1061: }
1062: if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p)))
1063: ret[len++] = *p++;
1064: else {
1065: int val = *(unsigned char *)p++;
1066: int hi = val / 0x10, lo = val % 0x10;
1067: ret[len++] = '%';
1068: ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1069: ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1070: }
1071: }
1072: } else {
1073: if (uri->server != NULL) {
1074: if (len + 3 >= max) {
1075: max *= 2;
1076: temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1077: if (temp == NULL) {
1078: xmlGenericError(xmlGenericErrorContext,
1079: "xmlSaveUri: out of memory\n");
1080: xmlFree(ret);
1081: return(NULL);
1082: }
1083: ret = temp;
1084: }
1085: ret[len++] = '/';
1086: ret[len++] = '/';
1087: if (uri->user != NULL) {
1088: p = uri->user;
1089: while (*p != 0) {
1090: if (len + 3 >= max) {
1091: max *= 2;
1092: temp = (xmlChar *) xmlRealloc(ret,
1093: (max + 1) * sizeof(xmlChar));
1094: if (temp == NULL) {
1095: xmlGenericError(xmlGenericErrorContext,
1096: "xmlSaveUri: out of memory\n");
1097: xmlFree(ret);
1098: return(NULL);
1099: }
1100: ret = temp;
1101: }
1102: if ((IS_UNRESERVED(*(p))) ||
1103: ((*(p) == ';')) || ((*(p) == ':')) ||
1104: ((*(p) == '&')) || ((*(p) == '=')) ||
1105: ((*(p) == '+')) || ((*(p) == '$')) ||
1106: ((*(p) == ',')))
1107: ret[len++] = *p++;
1108: else {
1109: int val = *(unsigned char *)p++;
1110: int hi = val / 0x10, lo = val % 0x10;
1111: ret[len++] = '%';
1112: ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1113: ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1114: }
1115: }
1116: if (len + 3 >= max) {
1117: max *= 2;
1118: temp = (xmlChar *) xmlRealloc(ret,
1119: (max + 1) * sizeof(xmlChar));
1120: if (temp == NULL) {
1121: xmlGenericError(xmlGenericErrorContext,
1122: "xmlSaveUri: out of memory\n");
1123: xmlFree(ret);
1124: return(NULL);
1125: }
1126: ret = temp;
1127: }
1128: ret[len++] = '@';
1129: }
1130: p = uri->server;
1131: while (*p != 0) {
1132: if (len >= max) {
1133: max *= 2;
1134: temp = (xmlChar *) xmlRealloc(ret,
1135: (max + 1) * sizeof(xmlChar));
1136: if (temp == NULL) {
1137: xmlGenericError(xmlGenericErrorContext,
1138: "xmlSaveUri: out of memory\n");
1139: xmlFree(ret);
1140: return(NULL);
1141: }
1142: ret = temp;
1143: }
1144: ret[len++] = *p++;
1145: }
1146: if (uri->port > 0) {
1147: if (len + 10 >= max) {
1148: max *= 2;
1149: temp = (xmlChar *) xmlRealloc(ret,
1150: (max + 1) * sizeof(xmlChar));
1151: if (temp == NULL) {
1152: xmlGenericError(xmlGenericErrorContext,
1153: "xmlSaveUri: out of memory\n");
1154: xmlFree(ret);
1155: return(NULL);
1156: }
1157: ret = temp;
1158: }
1159: len += snprintf((char *) &ret[len], max - len, ":%d", uri->port);
1160: }
1161: } else if (uri->authority != NULL) {
1162: if (len + 3 >= max) {
1163: max *= 2;
1164: temp = (xmlChar *) xmlRealloc(ret,
1165: (max + 1) * sizeof(xmlChar));
1166: if (temp == NULL) {
1167: xmlGenericError(xmlGenericErrorContext,
1168: "xmlSaveUri: out of memory\n");
1169: xmlFree(ret);
1170: return(NULL);
1171: }
1172: ret = temp;
1173: }
1174: ret[len++] = '/';
1175: ret[len++] = '/';
1176: p = uri->authority;
1177: while (*p != 0) {
1178: if (len + 3 >= max) {
1179: max *= 2;
1180: temp = (xmlChar *) xmlRealloc(ret,
1181: (max + 1) * sizeof(xmlChar));
1182: if (temp == NULL) {
1183: xmlGenericError(xmlGenericErrorContext,
1184: "xmlSaveUri: out of memory\n");
1185: xmlFree(ret);
1186: return(NULL);
1187: }
1188: ret = temp;
1189: }
1190: if ((IS_UNRESERVED(*(p))) ||
1191: ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
1192: ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1193: ((*(p) == '=')) || ((*(p) == '+')))
1194: ret[len++] = *p++;
1195: else {
1196: int val = *(unsigned char *)p++;
1197: int hi = val / 0x10, lo = val % 0x10;
1198: ret[len++] = '%';
1199: ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1200: ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1201: }
1202: }
1203: } else if (uri->scheme != NULL) {
1204: if (len + 3 >= max) {
1205: max *= 2;
1206: temp = (xmlChar *) xmlRealloc(ret,
1207: (max + 1) * sizeof(xmlChar));
1208: if (temp == NULL) {
1209: xmlGenericError(xmlGenericErrorContext,
1210: "xmlSaveUri: out of memory\n");
1211: xmlFree(ret);
1212: return(NULL);
1213: }
1214: ret = temp;
1215: }
1216: ret[len++] = '/';
1217: ret[len++] = '/';
1218: }
1219: if (uri->path != NULL) {
1220: p = uri->path;
1221: /*
1222: * the colon in file:///d: should not be escaped or
1223: * Windows accesses fail later.
1224: */
1225: if ((uri->scheme != NULL) &&
1226: (p[0] == '/') &&
1227: (((p[1] >= 'a') && (p[1] <= 'z')) ||
1228: ((p[1] >= 'A') && (p[1] <= 'Z'))) &&
1229: (p[2] == ':') &&
1230: (xmlStrEqual(BAD_CAST uri->scheme, BAD_CAST "file"))) {
1231: if (len + 3 >= max) {
1232: max *= 2;
1233: ret = (xmlChar *) xmlRealloc(ret,
1234: (max + 1) * sizeof(xmlChar));
1235: if (ret == NULL) {
1236: xmlGenericError(xmlGenericErrorContext,
1237: "xmlSaveUri: out of memory\n");
1238: return(NULL);
1239: }
1240: }
1241: ret[len++] = *p++;
1242: ret[len++] = *p++;
1243: ret[len++] = *p++;
1244: }
1245: while (*p != 0) {
1246: if (len + 3 >= max) {
1247: max *= 2;
1248: temp = (xmlChar *) xmlRealloc(ret,
1249: (max + 1) * sizeof(xmlChar));
1250: if (temp == NULL) {
1251: xmlGenericError(xmlGenericErrorContext,
1252: "xmlSaveUri: out of memory\n");
1253: xmlFree(ret);
1254: return(NULL);
1255: }
1256: ret = temp;
1257: }
1258: if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
1259: ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1260: ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
1261: ((*(p) == ',')))
1262: ret[len++] = *p++;
1263: else {
1264: int val = *(unsigned char *)p++;
1265: int hi = val / 0x10, lo = val % 0x10;
1266: ret[len++] = '%';
1267: ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1268: ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1269: }
1270: }
1271: }
1272: if (uri->query_raw != NULL) {
1273: if (len + 1 >= max) {
1274: max *= 2;
1275: temp = (xmlChar *) xmlRealloc(ret,
1276: (max + 1) * sizeof(xmlChar));
1277: if (temp == NULL) {
1278: xmlGenericError(xmlGenericErrorContext,
1279: "xmlSaveUri: out of memory\n");
1280: xmlFree(ret);
1281: return(NULL);
1282: }
1283: ret = temp;
1284: }
1285: ret[len++] = '?';
1286: p = uri->query_raw;
1287: while (*p != 0) {
1288: if (len + 1 >= max) {
1289: max *= 2;
1290: temp = (xmlChar *) xmlRealloc(ret,
1291: (max + 1) * sizeof(xmlChar));
1292: if (temp == NULL) {
1293: xmlGenericError(xmlGenericErrorContext,
1294: "xmlSaveUri: out of memory\n");
1295: xmlFree(ret);
1296: return(NULL);
1297: }
1298: ret = temp;
1299: }
1300: ret[len++] = *p++;
1301: }
1302: } else if (uri->query != NULL) {
1303: if (len + 3 >= max) {
1304: max *= 2;
1305: temp = (xmlChar *) xmlRealloc(ret,
1306: (max + 1) * sizeof(xmlChar));
1307: if (temp == NULL) {
1308: xmlGenericError(xmlGenericErrorContext,
1309: "xmlSaveUri: out of memory\n");
1310: xmlFree(ret);
1311: return(NULL);
1312: }
1313: ret = temp;
1314: }
1315: ret[len++] = '?';
1316: p = uri->query;
1317: while (*p != 0) {
1318: if (len + 3 >= max) {
1319: max *= 2;
1320: temp = (xmlChar *) xmlRealloc(ret,
1321: (max + 1) * sizeof(xmlChar));
1322: if (temp == NULL) {
1323: xmlGenericError(xmlGenericErrorContext,
1324: "xmlSaveUri: out of memory\n");
1325: xmlFree(ret);
1326: return(NULL);
1327: }
1328: ret = temp;
1329: }
1330: if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1331: ret[len++] = *p++;
1332: else {
1333: int val = *(unsigned char *)p++;
1334: int hi = val / 0x10, lo = val % 0x10;
1335: ret[len++] = '%';
1336: ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1337: ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1338: }
1339: }
1340: }
1341: }
1342: if (uri->fragment != NULL) {
1343: if (len + 3 >= max) {
1344: max *= 2;
1345: temp = (xmlChar *) xmlRealloc(ret,
1346: (max + 1) * sizeof(xmlChar));
1347: if (temp == NULL) {
1348: xmlGenericError(xmlGenericErrorContext,
1349: "xmlSaveUri: out of memory\n");
1350: xmlFree(ret);
1351: return(NULL);
1352: }
1353: ret = temp;
1354: }
1355: ret[len++] = '#';
1356: p = uri->fragment;
1357: while (*p != 0) {
1358: if (len + 3 >= max) {
1359: max *= 2;
1360: temp = (xmlChar *) xmlRealloc(ret,
1361: (max + 1) * sizeof(xmlChar));
1362: if (temp == NULL) {
1363: xmlGenericError(xmlGenericErrorContext,
1364: "xmlSaveUri: out of memory\n");
1365: xmlFree(ret);
1366: return(NULL);
1367: }
1368: ret = temp;
1369: }
1370: if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1371: ret[len++] = *p++;
1372: else {
1373: int val = *(unsigned char *)p++;
1374: int hi = val / 0x10, lo = val % 0x10;
1375: ret[len++] = '%';
1376: ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1377: ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1378: }
1379: }
1380: }
1381: if (len >= max) {
1382: max *= 2;
1383: temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1384: if (temp == NULL) {
1385: xmlGenericError(xmlGenericErrorContext,
1386: "xmlSaveUri: out of memory\n");
1387: xmlFree(ret);
1388: return(NULL);
1389: }
1390: ret = temp;
1391: }
1392: ret[len] = 0;
1393: return(ret);
1394: }
1395:
1396: /**
1397: * xmlPrintURI:
1398: * @stream: a FILE* for the output
1399: * @uri: pointer to an xmlURI
1400: *
1401: * Prints the URI in the stream @stream.
1402: */
1403: void
1404: xmlPrintURI(FILE *stream, xmlURIPtr uri) {
1405: xmlChar *out;
1406:
1407: out = xmlSaveUri(uri);
1408: if (out != NULL) {
1409: fprintf(stream, "%s", (char *) out);
1410: xmlFree(out);
1411: }
1412: }
1413:
1414: /**
1415: * xmlCleanURI:
1416: * @uri: pointer to an xmlURI
1417: *
1418: * Make sure the xmlURI struct is free of content
1419: */
1420: static void
1421: xmlCleanURI(xmlURIPtr uri) {
1422: if (uri == NULL) return;
1423:
1424: if (uri->scheme != NULL) xmlFree(uri->scheme);
1425: uri->scheme = NULL;
1426: if (uri->server != NULL) xmlFree(uri->server);
1427: uri->server = NULL;
1428: if (uri->user != NULL) xmlFree(uri->user);
1429: uri->user = NULL;
1430: if (uri->path != NULL) xmlFree(uri->path);
1431: uri->path = NULL;
1432: if (uri->fragment != NULL) xmlFree(uri->fragment);
1433: uri->fragment = NULL;
1434: if (uri->opaque != NULL) xmlFree(uri->opaque);
1435: uri->opaque = NULL;
1436: if (uri->authority != NULL) xmlFree(uri->authority);
1437: uri->authority = NULL;
1438: if (uri->query != NULL) xmlFree(uri->query);
1439: uri->query = NULL;
1440: if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1441: uri->query_raw = NULL;
1442: }
1443:
1444: /**
1445: * xmlFreeURI:
1446: * @uri: pointer to an xmlURI
1447: *
1448: * Free up the xmlURI struct
1449: */
1450: void
1451: xmlFreeURI(xmlURIPtr uri) {
1452: if (uri == NULL) return;
1453:
1454: if (uri->scheme != NULL) xmlFree(uri->scheme);
1455: if (uri->server != NULL) xmlFree(uri->server);
1456: if (uri->user != NULL) xmlFree(uri->user);
1457: if (uri->path != NULL) xmlFree(uri->path);
1458: if (uri->fragment != NULL) xmlFree(uri->fragment);
1459: if (uri->opaque != NULL) xmlFree(uri->opaque);
1460: if (uri->authority != NULL) xmlFree(uri->authority);
1461: if (uri->query != NULL) xmlFree(uri->query);
1462: if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1463: xmlFree(uri);
1464: }
1465:
1466: /************************************************************************
1467: * *
1468: * Helper functions *
1469: * *
1470: ************************************************************************/
1471:
1472: /**
1473: * xmlNormalizeURIPath:
1474: * @path: pointer to the path string
1475: *
1476: * Applies the 5 normalization steps to a path string--that is, RFC 2396
1477: * Section 5.2, steps 6.c through 6.g.
1478: *
1479: * Normalization occurs directly on the string, no new allocation is done
1480: *
1481: * Returns 0 or an error code
1482: */
1483: int
1484: xmlNormalizeURIPath(char *path) {
1485: char *cur, *out;
1486:
1487: if (path == NULL)
1488: return(-1);
1489:
1490: /* Skip all initial "/" chars. We want to get to the beginning of the
1491: * first non-empty segment.
1492: */
1493: cur = path;
1494: while (cur[0] == '/')
1495: ++cur;
1496: if (cur[0] == '\0')
1497: return(0);
1498:
1499: /* Keep everything we've seen so far. */
1500: out = cur;
1501:
1502: /*
1503: * Analyze each segment in sequence for cases (c) and (d).
1504: */
1505: while (cur[0] != '\0') {
1506: /*
1507: * c) All occurrences of "./", where "." is a complete path segment,
1508: * are removed from the buffer string.
1509: */
1510: if ((cur[0] == '.') && (cur[1] == '/')) {
1511: cur += 2;
1512: /* '//' normalization should be done at this point too */
1513: while (cur[0] == '/')
1514: cur++;
1515: continue;
1516: }
1517:
1518: /*
1519: * d) If the buffer string ends with "." as a complete path segment,
1520: * that "." is removed.
1521: */
1522: if ((cur[0] == '.') && (cur[1] == '\0'))
1523: break;
1524:
1525: /* Otherwise keep the segment. */
1526: while (cur[0] != '/') {
1527: if (cur[0] == '\0')
1528: goto done_cd;
1529: (out++)[0] = (cur++)[0];
1530: }
1531: /* nomalize // */
1532: while ((cur[0] == '/') && (cur[1] == '/'))
1533: cur++;
1534:
1535: (out++)[0] = (cur++)[0];
1536: }
1537: done_cd:
1538: out[0] = '\0';
1539:
1540: /* Reset to the beginning of the first segment for the next sequence. */
1541: cur = path;
1542: while (cur[0] == '/')
1543: ++cur;
1544: if (cur[0] == '\0')
1545: return(0);
1546:
1547: /*
1548: * Analyze each segment in sequence for cases (e) and (f).
1549: *
1550: * e) All occurrences of "<segment>/../", where <segment> is a
1551: * complete path segment not equal to "..", are removed from the
1552: * buffer string. Removal of these path segments is performed
1553: * iteratively, removing the leftmost matching pattern on each
1554: * iteration, until no matching pattern remains.
1555: *
1556: * f) If the buffer string ends with "<segment>/..", where <segment>
1557: * is a complete path segment not equal to "..", that
1558: * "<segment>/.." is removed.
1559: *
1560: * To satisfy the "iterative" clause in (e), we need to collapse the
1561: * string every time we find something that needs to be removed. Thus,
1562: * we don't need to keep two pointers into the string: we only need a
1563: * "current position" pointer.
1564: */
1565: while (1) {
1566: char *segp, *tmp;
1567:
1568: /* At the beginning of each iteration of this loop, "cur" points to
1569: * the first character of the segment we want to examine.
1570: */
1571:
1572: /* Find the end of the current segment. */
1573: segp = cur;
1574: while ((segp[0] != '/') && (segp[0] != '\0'))
1575: ++segp;
1576:
1577: /* If this is the last segment, we're done (we need at least two
1578: * segments to meet the criteria for the (e) and (f) cases).
1579: */
1580: if (segp[0] == '\0')
1581: break;
1582:
1583: /* If the first segment is "..", or if the next segment _isn't_ "..",
1584: * keep this segment and try the next one.
1585: */
1586: ++segp;
1587: if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
1588: || ((segp[0] != '.') || (segp[1] != '.')
1589: || ((segp[2] != '/') && (segp[2] != '\0')))) {
1590: cur = segp;
1591: continue;
1592: }
1593:
1594: /* If we get here, remove this segment and the next one and back up
1595: * to the previous segment (if there is one), to implement the
1596: * "iteratively" clause. It's pretty much impossible to back up
1597: * while maintaining two pointers into the buffer, so just compact
1598: * the whole buffer now.
1599: */
1600:
1601: /* If this is the end of the buffer, we're done. */
1602: if (segp[2] == '\0') {
1603: cur[0] = '\0';
1604: break;
1605: }
1606: /* Valgrind complained, strcpy(cur, segp + 3); */
1607: /* string will overlap, do not use strcpy */
1608: tmp = cur;
1609: segp += 3;
1610: while ((*tmp++ = *segp++) != 0);
1611:
1612: /* If there are no previous segments, then keep going from here. */
1613: segp = cur;
1614: while ((segp > path) && ((--segp)[0] == '/'))
1615: ;
1616: if (segp == path)
1617: continue;
1618:
1619: /* "segp" is pointing to the end of a previous segment; find it's
1620: * start. We need to back up to the previous segment and start
1621: * over with that to handle things like "foo/bar/../..". If we
1622: * don't do this, then on the first pass we'll remove the "bar/..",
1623: * but be pointing at the second ".." so we won't realize we can also
1624: * remove the "foo/..".
1625: */
1626: cur = segp;
1627: while ((cur > path) && (cur[-1] != '/'))
1628: --cur;
1629: }
1630: out[0] = '\0';
1631:
1632: /*
1633: * g) If the resulting buffer string still begins with one or more
1634: * complete path segments of "..", then the reference is
1635: * considered to be in error. Implementations may handle this
1636: * error by retaining these components in the resolved path (i.e.,
1637: * treating them as part of the final URI), by removing them from
1638: * the resolved path (i.e., discarding relative levels above the
1639: * root), or by avoiding traversal of the reference.
1640: *
1641: * We discard them from the final path.
1642: */
1643: if (path[0] == '/') {
1644: cur = path;
1645: while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.')
1646: && ((cur[3] == '/') || (cur[3] == '\0')))
1647: cur += 3;
1648:
1649: if (cur != path) {
1650: out = path;
1651: while (cur[0] != '\0')
1652: (out++)[0] = (cur++)[0];
1653: out[0] = 0;
1654: }
1655: }
1656:
1657: return(0);
1658: }
1659:
1660: static int is_hex(char c) {
1661: if (((c >= '0') && (c <= '9')) ||
1662: ((c >= 'a') && (c <= 'f')) ||
1663: ((c >= 'A') && (c <= 'F')))
1664: return(1);
1665: return(0);
1666: }
1667:
1668: /**
1669: * xmlURIUnescapeString:
1670: * @str: the string to unescape
1671: * @len: the length in bytes to unescape (or <= 0 to indicate full string)
1672: * @target: optional destination buffer
1673: *
1674: * Unescaping routine, but does not check that the string is an URI. The
1675: * output is a direct unsigned char translation of %XX values (no encoding)
1676: * Note that the length of the result can only be smaller or same size as
1677: * the input string.
1678: *
1679: * Returns a copy of the string, but unescaped, will return NULL only in case
1680: * of error
1681: */
1682: char *
1683: xmlURIUnescapeString(const char *str, int len, char *target) {
1684: char *ret, *out;
1685: const char *in;
1686:
1687: if (str == NULL)
1688: return(NULL);
1689: if (len <= 0) len = strlen(str);
1690: if (len < 0) return(NULL);
1691:
1692: if (target == NULL) {
1693: ret = (char *) xmlMallocAtomic(len + 1);
1694: if (ret == NULL) {
1695: xmlGenericError(xmlGenericErrorContext,
1696: "xmlURIUnescapeString: out of memory\n");
1697: return(NULL);
1698: }
1699: } else
1700: ret = target;
1701: in = str;
1702: out = ret;
1703: while(len > 0) {
1704: if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
1705: in++;
1706: if ((*in >= '0') && (*in <= '9'))
1707: *out = (*in - '0');
1708: else if ((*in >= 'a') && (*in <= 'f'))
1709: *out = (*in - 'a') + 10;
1710: else if ((*in >= 'A') && (*in <= 'F'))
1711: *out = (*in - 'A') + 10;
1712: in++;
1713: if ((*in >= '0') && (*in <= '9'))
1714: *out = *out * 16 + (*in - '0');
1715: else if ((*in >= 'a') && (*in <= 'f'))
1716: *out = *out * 16 + (*in - 'a') + 10;
1717: else if ((*in >= 'A') && (*in <= 'F'))
1718: *out = *out * 16 + (*in - 'A') + 10;
1719: in++;
1720: len -= 3;
1721: out++;
1722: } else {
1723: *out++ = *in++;
1724: len--;
1725: }
1726: }
1727: *out = 0;
1728: return(ret);
1729: }
1730:
1731: /**
1732: * xmlURIEscapeStr:
1733: * @str: string to escape
1734: * @list: exception list string of chars not to escape
1735: *
1736: * This routine escapes a string to hex, ignoring reserved characters (a-z)
1737: * and the characters in the exception list.
1738: *
1739: * Returns a new escaped string or NULL in case of error.
1740: */
1741: xmlChar *
1742: xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
1743: xmlChar *ret, ch;
1744: xmlChar *temp;
1745: const xmlChar *in;
1746:
1747: unsigned int len, out;
1748:
1749: if (str == NULL)
1750: return(NULL);
1751: if (str[0] == 0)
1752: return(xmlStrdup(str));
1753: len = xmlStrlen(str);
1754: if (!(len > 0)) return(NULL);
1755:
1756: len += 20;
1757: ret = (xmlChar *) xmlMallocAtomic(len);
1758: if (ret == NULL) {
1759: xmlGenericError(xmlGenericErrorContext,
1760: "xmlURIEscapeStr: out of memory\n");
1761: return(NULL);
1762: }
1763: in = (const xmlChar *) str;
1764: out = 0;
1765: while(*in != 0) {
1766: if (len - out <= 3) {
1767: len += 20;
1768: temp = (xmlChar *) xmlRealloc(ret, len);
1769: if (temp == NULL) {
1770: xmlGenericError(xmlGenericErrorContext,
1771: "xmlURIEscapeStr: out of memory\n");
1772: xmlFree(ret);
1773: return(NULL);
1774: }
1775: ret = temp;
1776: }
1777:
1778: ch = *in;
1779:
1780: if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
1781: unsigned char val;
1782: ret[out++] = '%';
1783: val = ch >> 4;
1784: if (val <= 9)
1785: ret[out++] = '0' + val;
1786: else
1787: ret[out++] = 'A' + val - 0xA;
1788: val = ch & 0xF;
1789: if (val <= 9)
1790: ret[out++] = '0' + val;
1791: else
1792: ret[out++] = 'A' + val - 0xA;
1793: in++;
1794: } else {
1795: ret[out++] = *in++;
1796: }
1797:
1798: }
1799: ret[out] = 0;
1800: return(ret);
1801: }
1802:
1803: /**
1804: * xmlURIEscape:
1805: * @str: the string of the URI to escape
1806: *
1807: * Escaping routine, does not do validity checks !
1808: * It will try to escape the chars needing this, but this is heuristic
1809: * based it's impossible to be sure.
1810: *
1811: * Returns an copy of the string, but escaped
1812: *
1813: * 25 May 2001
1814: * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
1815: * according to RFC2396.
1816: * - Carl Douglas
1817: */
1818: xmlChar *
1819: xmlURIEscape(const xmlChar * str)
1820: {
1821: xmlChar *ret, *segment = NULL;
1822: xmlURIPtr uri;
1823: int ret2;
1824:
1825: #define NULLCHK(p) if(!p) { \
1826: xmlGenericError(xmlGenericErrorContext, \
1827: "xmlURIEscape: out of memory\n"); \
1828: xmlFreeURI(uri); \
1829: return NULL; } \
1830:
1831: if (str == NULL)
1832: return (NULL);
1833:
1834: uri = xmlCreateURI();
1835: if (uri != NULL) {
1836: /*
1837: * Allow escaping errors in the unescaped form
1838: */
1839: uri->cleanup = 1;
1840: ret2 = xmlParseURIReference(uri, (const char *)str);
1841: if (ret2) {
1842: xmlFreeURI(uri);
1843: return (NULL);
1844: }
1845: }
1846:
1847: if (!uri)
1848: return NULL;
1849:
1850: ret = NULL;
1851:
1852: if (uri->scheme) {
1853: segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");
1854: NULLCHK(segment)
1855: ret = xmlStrcat(ret, segment);
1856: ret = xmlStrcat(ret, BAD_CAST ":");
1857: xmlFree(segment);
1858: }
1859:
1860: if (uri->authority) {
1861: segment =
1862: xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");
1863: NULLCHK(segment)
1864: ret = xmlStrcat(ret, BAD_CAST "//");
1865: ret = xmlStrcat(ret, segment);
1866: xmlFree(segment);
1867: }
1868:
1869: if (uri->user) {
1870: segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");
1871: NULLCHK(segment)
1872: ret = xmlStrcat(ret,BAD_CAST "//");
1873: ret = xmlStrcat(ret, segment);
1874: ret = xmlStrcat(ret, BAD_CAST "@");
1875: xmlFree(segment);
1876: }
1877:
1878: if (uri->server) {
1879: segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");
1880: NULLCHK(segment)
1881: if (uri->user == NULL)
1882: ret = xmlStrcat(ret, BAD_CAST "//");
1883: ret = xmlStrcat(ret, segment);
1884: xmlFree(segment);
1885: }
1886:
1887: if (uri->port) {
1888: xmlChar port[10];
1889:
1890: snprintf((char *) port, 10, "%d", uri->port);
1891: ret = xmlStrcat(ret, BAD_CAST ":");
1892: ret = xmlStrcat(ret, port);
1893: }
1894:
1895: if (uri->path) {
1896: segment =
1897: xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
1898: NULLCHK(segment)
1899: ret = xmlStrcat(ret, segment);
1900: xmlFree(segment);
1901: }
1902:
1903: if (uri->query_raw) {
1904: ret = xmlStrcat(ret, BAD_CAST "?");
1905: ret = xmlStrcat(ret, BAD_CAST uri->query_raw);
1906: }
1907: else if (uri->query) {
1908: segment =
1909: xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
1910: NULLCHK(segment)
1911: ret = xmlStrcat(ret, BAD_CAST "?");
1912: ret = xmlStrcat(ret, segment);
1913: xmlFree(segment);
1914: }
1915:
1916: if (uri->opaque) {
1917: segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");
1918: NULLCHK(segment)
1919: ret = xmlStrcat(ret, segment);
1920: xmlFree(segment);
1921: }
1922:
1923: if (uri->fragment) {
1924: segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");
1925: NULLCHK(segment)
1926: ret = xmlStrcat(ret, BAD_CAST "#");
1927: ret = xmlStrcat(ret, segment);
1928: xmlFree(segment);
1929: }
1930:
1931: xmlFreeURI(uri);
1932: #undef NULLCHK
1933:
1934: return (ret);
1935: }
1936:
1937: /************************************************************************
1938: * *
1939: * Public functions *
1940: * *
1941: ************************************************************************/
1942:
1943: /**
1944: * xmlBuildURI:
1945: * @URI: the URI instance found in the document
1946: * @base: the base value
1947: *
1948: * Computes he final URI of the reference done by checking that
1949: * the given URI is valid, and building the final URI using the
1950: * base URI. This is processed according to section 5.2 of the
1951: * RFC 2396
1952: *
1953: * 5.2. Resolving Relative References to Absolute Form
1954: *
1955: * Returns a new URI string (to be freed by the caller) or NULL in case
1956: * of error.
1957: */
1958: xmlChar *
1959: xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
1960: xmlChar *val = NULL;
1961: int ret, len, indx, cur, out;
1962: xmlURIPtr ref = NULL;
1963: xmlURIPtr bas = NULL;
1964: xmlURIPtr res = NULL;
1965:
1966: /*
1967: * 1) The URI reference is parsed into the potential four components and
1968: * fragment identifier, as described in Section 4.3.
1969: *
1970: * NOTE that a completely empty URI is treated by modern browsers
1971: * as a reference to "." rather than as a synonym for the current
1972: * URI. Should we do that here?
1973: */
1974: if (URI == NULL)
1975: ret = -1;
1976: else {
1977: if (*URI) {
1978: ref = xmlCreateURI();
1979: if (ref == NULL)
1980: goto done;
1981: ret = xmlParseURIReference(ref, (const char *) URI);
1982: }
1983: else
1984: ret = 0;
1985: }
1986: if (ret != 0)
1987: goto done;
1988: if ((ref != NULL) && (ref->scheme != NULL)) {
1989: /*
1990: * The URI is absolute don't modify.
1991: */
1992: val = xmlStrdup(URI);
1993: goto done;
1994: }
1995: if (base == NULL)
1996: ret = -1;
1997: else {
1998: bas = xmlCreateURI();
1999: if (bas == NULL)
2000: goto done;
2001: ret = xmlParseURIReference(bas, (const char *) base);
2002: }
2003: if (ret != 0) {
2004: if (ref)
2005: val = xmlSaveUri(ref);
2006: goto done;
2007: }
2008: if (ref == NULL) {
2009: /*
2010: * the base fragment must be ignored
2011: */
2012: if (bas->fragment != NULL) {
2013: xmlFree(bas->fragment);
2014: bas->fragment = NULL;
2015: }
2016: val = xmlSaveUri(bas);
2017: goto done;
2018: }
2019:
2020: /*
2021: * 2) If the path component is empty and the scheme, authority, and
2022: * query components are undefined, then it is a reference to the
2023: * current document and we are done. Otherwise, the reference URI's
2024: * query and fragment components are defined as found (or not found)
2025: * within the URI reference and not inherited from the base URI.
2026: *
2027: * NOTE that in modern browsers, the parsing differs from the above
2028: * in the following aspect: the query component is allowed to be
2029: * defined while still treating this as a reference to the current
2030: * document.
2031: */
2032: res = xmlCreateURI();
2033: if (res == NULL)
2034: goto done;
2035: if ((ref->scheme == NULL) && (ref->path == NULL) &&
2036: ((ref->authority == NULL) && (ref->server == NULL))) {
2037: if (bas->scheme != NULL)
2038: res->scheme = xmlMemStrdup(bas->scheme);
2039: if (bas->authority != NULL)
2040: res->authority = xmlMemStrdup(bas->authority);
2041: else if (bas->server != NULL) {
2042: res->server = xmlMemStrdup(bas->server);
2043: if (bas->user != NULL)
2044: res->user = xmlMemStrdup(bas->user);
2045: res->port = bas->port;
2046: }
2047: if (bas->path != NULL)
2048: res->path = xmlMemStrdup(bas->path);
2049: if (ref->query_raw != NULL)
2050: res->query_raw = xmlMemStrdup (ref->query_raw);
2051: else if (ref->query != NULL)
2052: res->query = xmlMemStrdup(ref->query);
2053: else if (bas->query_raw != NULL)
2054: res->query_raw = xmlMemStrdup(bas->query_raw);
2055: else if (bas->query != NULL)
2056: res->query = xmlMemStrdup(bas->query);
2057: if (ref->fragment != NULL)
2058: res->fragment = xmlMemStrdup(ref->fragment);
2059: goto step_7;
2060: }
2061:
2062: /*
2063: * 3) If the scheme component is defined, indicating that the reference
2064: * starts with a scheme name, then the reference is interpreted as an
2065: * absolute URI and we are done. Otherwise, the reference URI's
2066: * scheme is inherited from the base URI's scheme component.
2067: */
2068: if (ref->scheme != NULL) {
2069: val = xmlSaveUri(ref);
2070: goto done;
2071: }
2072: if (bas->scheme != NULL)
2073: res->scheme = xmlMemStrdup(bas->scheme);
2074:
2075: if (ref->query_raw != NULL)
2076: res->query_raw = xmlMemStrdup(ref->query_raw);
2077: else if (ref->query != NULL)
2078: res->query = xmlMemStrdup(ref->query);
2079: if (ref->fragment != NULL)
2080: res->fragment = xmlMemStrdup(ref->fragment);
2081:
2082: /*
2083: * 4) If the authority component is defined, then the reference is a
2084: * network-path and we skip to step 7. Otherwise, the reference
2085: * URI's authority is inherited from the base URI's authority
2086: * component, which will also be undefined if the URI scheme does not
2087: * use an authority component.
2088: */
2089: if ((ref->authority != NULL) || (ref->server != NULL)) {
2090: if (ref->authority != NULL)
2091: res->authority = xmlMemStrdup(ref->authority);
2092: else {
2093: res->server = xmlMemStrdup(ref->server);
2094: if (ref->user != NULL)
2095: res->user = xmlMemStrdup(ref->user);
2096: res->port = ref->port;
2097: }
2098: if (ref->path != NULL)
2099: res->path = xmlMemStrdup(ref->path);
2100: goto step_7;
2101: }
2102: if (bas->authority != NULL)
2103: res->authority = xmlMemStrdup(bas->authority);
2104: else if (bas->server != NULL) {
2105: res->server = xmlMemStrdup(bas->server);
2106: if (bas->user != NULL)
2107: res->user = xmlMemStrdup(bas->user);
2108: res->port = bas->port;
2109: }
2110:
2111: /*
2112: * 5) If the path component begins with a slash character ("/"), then
2113: * the reference is an absolute-path and we skip to step 7.
2114: */
2115: if ((ref->path != NULL) && (ref->path[0] == '/')) {
2116: res->path = xmlMemStrdup(ref->path);
2117: goto step_7;
2118: }
2119:
2120:
2121: /*
2122: * 6) If this step is reached, then we are resolving a relative-path
2123: * reference. The relative path needs to be merged with the base
2124: * URI's path. Although there are many ways to do this, we will
2125: * describe a simple method using a separate string buffer.
2126: *
2127: * Allocate a buffer large enough for the result string.
2128: */
2129: len = 2; /* extra / and 0 */
2130: if (ref->path != NULL)
2131: len += strlen(ref->path);
2132: if (bas->path != NULL)
2133: len += strlen(bas->path);
2134: res->path = (char *) xmlMallocAtomic(len);
2135: if (res->path == NULL) {
2136: xmlGenericError(xmlGenericErrorContext,
2137: "xmlBuildURI: out of memory\n");
2138: goto done;
2139: }
2140: res->path[0] = 0;
2141:
2142: /*
2143: * a) All but the last segment of the base URI's path component is
2144: * copied to the buffer. In other words, any characters after the
2145: * last (right-most) slash character, if any, are excluded.
2146: */
2147: cur = 0;
2148: out = 0;
2149: if (bas->path != NULL) {
2150: while (bas->path[cur] != 0) {
2151: while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
2152: cur++;
2153: if (bas->path[cur] == 0)
2154: break;
2155:
2156: cur++;
2157: while (out < cur) {
2158: res->path[out] = bas->path[out];
2159: out++;
2160: }
2161: }
2162: }
2163: res->path[out] = 0;
2164:
2165: /*
2166: * b) The reference's path component is appended to the buffer
2167: * string.
2168: */
2169: if (ref->path != NULL && ref->path[0] != 0) {
2170: indx = 0;
2171: /*
2172: * Ensure the path includes a '/'
2173: */
2174: if ((out == 0) && (bas->server != NULL))
2175: res->path[out++] = '/';
2176: while (ref->path[indx] != 0) {
2177: res->path[out++] = ref->path[indx++];
2178: }
2179: }
2180: res->path[out] = 0;
2181:
2182: /*
2183: * Steps c) to h) are really path normalization steps
2184: */
2185: xmlNormalizeURIPath(res->path);
2186:
2187: step_7:
2188:
2189: /*
2190: * 7) The resulting URI components, including any inherited from the
2191: * base URI, are recombined to give the absolute form of the URI
2192: * reference.
2193: */
2194: val = xmlSaveUri(res);
2195:
2196: done:
2197: if (ref != NULL)
2198: xmlFreeURI(ref);
2199: if (bas != NULL)
2200: xmlFreeURI(bas);
2201: if (res != NULL)
2202: xmlFreeURI(res);
2203: return(val);
2204: }
2205:
2206: /**
2207: * xmlBuildRelativeURI:
2208: * @URI: the URI reference under consideration
2209: * @base: the base value
2210: *
2211: * Expresses the URI of the reference in terms relative to the
2212: * base. Some examples of this operation include:
2213: * base = "http://site1.com/docs/book1.html"
2214: * URI input URI returned
2215: * docs/pic1.gif pic1.gif
2216: * docs/img/pic1.gif img/pic1.gif
2217: * img/pic1.gif ../img/pic1.gif
2218: * http://site1.com/docs/pic1.gif pic1.gif
2219: * http://site2.com/docs/pic1.gif http://site2.com/docs/pic1.gif
2220: *
2221: * base = "docs/book1.html"
2222: * URI input URI returned
2223: * docs/pic1.gif pic1.gif
2224: * docs/img/pic1.gif img/pic1.gif
2225: * img/pic1.gif ../img/pic1.gif
2226: * http://site1.com/docs/pic1.gif http://site1.com/docs/pic1.gif
2227: *
2228: *
2229: * Note: if the URI reference is really wierd or complicated, it may be
2230: * worthwhile to first convert it into a "nice" one by calling
2231: * xmlBuildURI (using 'base') before calling this routine,
2232: * since this routine (for reasonable efficiency) assumes URI has
2233: * already been through some validation.
2234: *
2235: * Returns a new URI string (to be freed by the caller) or NULL in case
2236: * error.
2237: */
2238: xmlChar *
2239: xmlBuildRelativeURI (const xmlChar * URI, const xmlChar * base)
2240: {
2241: xmlChar *val = NULL;
2242: int ret;
2243: int ix;
2244: int pos = 0;
2245: int nbslash = 0;
2246: int len;
2247: xmlURIPtr ref = NULL;
2248: xmlURIPtr bas = NULL;
2249: xmlChar *bptr, *uptr, *vptr;
2250: int remove_path = 0;
2251:
2252: if ((URI == NULL) || (*URI == 0))
2253: return NULL;
2254:
2255: /*
2256: * First parse URI into a standard form
2257: */
2258: ref = xmlCreateURI ();
2259: if (ref == NULL)
2260: return NULL;
2261: /* If URI not already in "relative" form */
2262: if (URI[0] != '.') {
2263: ret = xmlParseURIReference (ref, (const char *) URI);
2264: if (ret != 0)
2265: goto done; /* Error in URI, return NULL */
2266: } else
2267: ref->path = (char *)xmlStrdup(URI);
2268:
2269: /*
2270: * Next parse base into the same standard form
2271: */
2272: if ((base == NULL) || (*base == 0)) {
2273: val = xmlStrdup (URI);
2274: goto done;
2275: }
2276: bas = xmlCreateURI ();
2277: if (bas == NULL)
2278: goto done;
2279: if (base[0] != '.') {
2280: ret = xmlParseURIReference (bas, (const char *) base);
2281: if (ret != 0)
2282: goto done; /* Error in base, return NULL */
2283: } else
2284: bas->path = (char *)xmlStrdup(base);
2285:
2286: /*
2287: * If the scheme / server on the URI differs from the base,
2288: * just return the URI
2289: */
2290: if ((ref->scheme != NULL) &&
2291: ((bas->scheme == NULL) ||
2292: (xmlStrcmp ((xmlChar *)bas->scheme, (xmlChar *)ref->scheme)) ||
2293: (xmlStrcmp ((xmlChar *)bas->server, (xmlChar *)ref->server)))) {
2294: val = xmlStrdup (URI);
2295: goto done;
2296: }
2297: if (xmlStrEqual((xmlChar *)bas->path, (xmlChar *)ref->path)) {
2298: val = xmlStrdup(BAD_CAST "");
2299: goto done;
2300: }
2301: if (bas->path == NULL) {
2302: val = xmlStrdup((xmlChar *)ref->path);
2303: goto done;
2304: }
2305: if (ref->path == NULL) {
2306: ref->path = (char *) "/";
2307: remove_path = 1;
2308: }
2309:
2310: /*
2311: * At this point (at last!) we can compare the two paths
2312: *
2313: * First we take care of the special case where either of the
2314: * two path components may be missing (bug 316224)
2315: */
2316: if (bas->path == NULL) {
2317: if (ref->path != NULL) {
2318: uptr = (xmlChar *) ref->path;
2319: if (*uptr == '/')
2320: uptr++;
2321: /* exception characters from xmlSaveUri */
2322: val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
2323: }
2324: goto done;
2325: }
2326: bptr = (xmlChar *)bas->path;
2327: if (ref->path == NULL) {
2328: for (ix = 0; bptr[ix] != 0; ix++) {
2329: if (bptr[ix] == '/')
2330: nbslash++;
2331: }
2332: uptr = NULL;
2333: len = 1; /* this is for a string terminator only */
2334: } else {
2335: /*
2336: * Next we compare the two strings and find where they first differ
2337: */
2338: if ((ref->path[pos] == '.') && (ref->path[pos+1] == '/'))
2339: pos += 2;
2340: if ((*bptr == '.') && (bptr[1] == '/'))
2341: bptr += 2;
2342: else if ((*bptr == '/') && (ref->path[pos] != '/'))
2343: bptr++;
2344: while ((bptr[pos] == ref->path[pos]) && (bptr[pos] != 0))
2345: pos++;
2346:
2347: if (bptr[pos] == ref->path[pos]) {
2348: val = xmlStrdup(BAD_CAST "");
2349: goto done; /* (I can't imagine why anyone would do this) */
2350: }
2351:
2352: /*
2353: * In URI, "back up" to the last '/' encountered. This will be the
2354: * beginning of the "unique" suffix of URI
2355: */
2356: ix = pos;
2357: if ((ref->path[ix] == '/') && (ix > 0))
2358: ix--;
2359: else if ((ref->path[ix] == 0) && (ix > 1) && (ref->path[ix - 1] == '/'))
2360: ix -= 2;
2361: for (; ix > 0; ix--) {
2362: if (ref->path[ix] == '/')
2363: break;
2364: }
2365: if (ix == 0) {
2366: uptr = (xmlChar *)ref->path;
2367: } else {
2368: ix++;
2369: uptr = (xmlChar *)&ref->path[ix];
2370: }
2371:
2372: /*
2373: * In base, count the number of '/' from the differing point
2374: */
2375: if (bptr[pos] != ref->path[pos]) {/* check for trivial URI == base */
2376: for (; bptr[ix] != 0; ix++) {
2377: if (bptr[ix] == '/')
2378: nbslash++;
2379: }
2380: }
2381: len = xmlStrlen (uptr) + 1;
2382: }
2383:
2384: if (nbslash == 0) {
2385: if (uptr != NULL)
2386: /* exception characters from xmlSaveUri */
2387: val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
2388: goto done;
2389: }
2390:
2391: /*
2392: * Allocate just enough space for the returned string -
2393: * length of the remainder of the URI, plus enough space
2394: * for the "../" groups, plus one for the terminator
2395: */
2396: val = (xmlChar *) xmlMalloc (len + 3 * nbslash);
2397: if (val == NULL) {
2398: xmlGenericError(xmlGenericErrorContext,
2399: "xmlBuildRelativeURI: out of memory\n");
2400: goto done;
2401: }
2402: vptr = val;
2403: /*
2404: * Put in as many "../" as needed
2405: */
2406: for (; nbslash>0; nbslash--) {
2407: *vptr++ = '.';
2408: *vptr++ = '.';
2409: *vptr++ = '/';
2410: }
2411: /*
2412: * Finish up with the end of the URI
2413: */
2414: if (uptr != NULL) {
2415: if ((vptr > val) && (len > 0) &&
2416: (uptr[0] == '/') && (vptr[-1] == '/')) {
2417: memcpy (vptr, uptr + 1, len - 1);
2418: vptr[len - 2] = 0;
2419: } else {
2420: memcpy (vptr, uptr, len);
2421: vptr[len - 1] = 0;
2422: }
2423: } else {
2424: vptr[len - 1] = 0;
2425: }
2426:
2427: /* escape the freshly-built path */
2428: vptr = val;
2429: /* exception characters from xmlSaveUri */
2430: val = xmlURIEscapeStr(vptr, BAD_CAST "/;&=+$,");
2431: xmlFree(vptr);
2432:
2433: done:
2434: /*
2435: * Free the working variables
2436: */
2437: if (remove_path != 0)
2438: ref->path = NULL;
2439: if (ref != NULL)
2440: xmlFreeURI (ref);
2441: if (bas != NULL)
2442: xmlFreeURI (bas);
2443:
2444: return val;
2445: }
2446:
2447: /**
2448: * xmlCanonicPath:
2449: * @path: the resource locator in a filesystem notation
2450: *
2451: * Constructs a canonic path from the specified path.
2452: *
2453: * Returns a new canonic path, or a duplicate of the path parameter if the
2454: * construction fails. The caller is responsible for freeing the memory occupied
2455: * by the returned string. If there is insufficient memory available, or the
2456: * argument is NULL, the function returns NULL.
2457: */
2458: #define IS_WINDOWS_PATH(p) \
2459: ((p != NULL) && \
2460: (((p[0] >= 'a') && (p[0] <= 'z')) || \
2461: ((p[0] >= 'A') && (p[0] <= 'Z'))) && \
2462: (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\')))
2463: xmlChar *
2464: xmlCanonicPath(const xmlChar *path)
2465: {
2466: /*
2467: * For Windows implementations, additional work needs to be done to
2468: * replace backslashes in pathnames with "forward slashes"
2469: */
2470: #if defined(_WIN32) && !defined(__CYGWIN__)
2471: int len = 0;
2472: int i = 0;
2473: xmlChar *p = NULL;
2474: #endif
2475: xmlURIPtr uri;
2476: xmlChar *ret;
2477: const xmlChar *absuri;
2478:
2479: if (path == NULL)
2480: return(NULL);
2481:
2482: /* sanitize filename starting with // so it can be used as URI */
2483: if ((path[0] == '/') && (path[1] == '/') && (path[2] != '/'))
2484: path++;
2485:
2486: if ((uri = xmlParseURI((const char *) path)) != NULL) {
2487: xmlFreeURI(uri);
2488: return xmlStrdup(path);
2489: }
2490:
2491: /* Check if this is an "absolute uri" */
2492: absuri = xmlStrstr(path, BAD_CAST "://");
2493: if (absuri != NULL) {
2494: int l, j;
2495: unsigned char c;
2496: xmlChar *escURI;
2497:
2498: /*
2499: * this looks like an URI where some parts have not been
2500: * escaped leading to a parsing problem. Check that the first
2501: * part matches a protocol.
2502: */
2503: l = absuri - path;
2504: /* Bypass if first part (part before the '://') is > 20 chars */
2505: if ((l <= 0) || (l > 20))
2506: goto path_processing;
2507: /* Bypass if any non-alpha characters are present in first part */
2508: for (j = 0;j < l;j++) {
2509: c = path[j];
2510: if (!(((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'))))
2511: goto path_processing;
2512: }
2513:
2514: /* Escape all except the characters specified in the supplied path */
2515: escURI = xmlURIEscapeStr(path, BAD_CAST ":/?_.#&;=");
2516: if (escURI != NULL) {
2517: /* Try parsing the escaped path */
2518: uri = xmlParseURI((const char *) escURI);
2519: /* If successful, return the escaped string */
2520: if (uri != NULL) {
2521: xmlFreeURI(uri);
2522: return escURI;
2523: }
2524: }
2525: }
2526:
2527: path_processing:
2528: /* For Windows implementations, replace backslashes with 'forward slashes' */
2529: #if defined(_WIN32) && !defined(__CYGWIN__)
2530: /*
2531: * Create a URI structure
2532: */
2533: uri = xmlCreateURI();
2534: if (uri == NULL) { /* Guard against 'out of memory' */
2535: return(NULL);
2536: }
2537:
2538: len = xmlStrlen(path);
2539: if ((len > 2) && IS_WINDOWS_PATH(path)) {
2540: /* make the scheme 'file' */
2541: uri->scheme = xmlStrdup(BAD_CAST "file");
2542: /* allocate space for leading '/' + path + string terminator */
2543: uri->path = xmlMallocAtomic(len + 2);
2544: if (uri->path == NULL) {
2545: xmlFreeURI(uri); /* Guard agains 'out of memory' */
2546: return(NULL);
2547: }
2548: /* Put in leading '/' plus path */
2549: uri->path[0] = '/';
2550: p = uri->path + 1;
2551: strncpy(p, path, len + 1);
2552: } else {
2553: uri->path = xmlStrdup(path);
2554: if (uri->path == NULL) {
2555: xmlFreeURI(uri);
2556: return(NULL);
2557: }
2558: p = uri->path;
2559: }
2560: /* Now change all occurences of '\' to '/' */
2561: while (*p != '\0') {
2562: if (*p == '\\')
2563: *p = '/';
2564: p++;
2565: }
2566:
2567: if (uri->scheme == NULL) {
2568: ret = xmlStrdup((const xmlChar *) uri->path);
2569: } else {
2570: ret = xmlSaveUri(uri);
2571: }
2572:
2573: xmlFreeURI(uri);
2574: #else
2575: ret = xmlStrdup((const xmlChar *) path);
2576: #endif
2577: return(ret);
2578: }
2579:
2580: /**
2581: * xmlPathToURI:
2582: * @path: the resource locator in a filesystem notation
2583: *
2584: * Constructs an URI expressing the existing path
2585: *
2586: * Returns a new URI, or a duplicate of the path parameter if the
2587: * construction fails. The caller is responsible for freeing the memory
2588: * occupied by the returned string. If there is insufficient memory available,
2589: * or the argument is NULL, the function returns NULL.
2590: */
2591: xmlChar *
2592: xmlPathToURI(const xmlChar *path)
2593: {
2594: xmlURIPtr uri;
2595: xmlURI temp;
2596: xmlChar *ret, *cal;
2597:
2598: if (path == NULL)
2599: return(NULL);
2600:
2601: if ((uri = xmlParseURI((const char *) path)) != NULL) {
2602: xmlFreeURI(uri);
2603: return xmlStrdup(path);
2604: }
2605: cal = xmlCanonicPath(path);
2606: if (cal == NULL)
2607: return(NULL);
2608: #if defined(_WIN32) && !defined(__CYGWIN__)
2609: /* xmlCanonicPath can return an URI on Windows (is that the intended behaviour?)
2610: If 'cal' is a valid URI allready then we are done here, as continuing would make
2611: it invalid. */
2612: if ((uri = xmlParseURI((const char *) cal)) != NULL) {
2613: xmlFreeURI(uri);
2614: return cal;
2615: }
2616: /* 'cal' can contain a relative path with backslashes. If that is processed
2617: by xmlSaveURI, they will be escaped and the external entity loader machinery
2618: will fail. So convert them to slashes. Misuse 'ret' for walking. */
2619: ret = cal;
2620: while (*ret != '\0') {
2621: if (*ret == '\\')
2622: *ret = '/';
2623: ret++;
2624: }
2625: #endif
2626: memset(&temp, 0, sizeof(temp));
2627: temp.path = (char *) cal;
2628: ret = xmlSaveUri(&temp);
2629: xmlFree(cal);
2630: return(ret);
2631: }
2632: #define bottom_uri
2633: #include "elfgcchack.h"
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>