embedaddon/libxml2/uri.c - view

File: [ELWIX - Embedded LightWeight unIX -] / embedaddon / libxml2 / uri.c
Revision 1.1.1.2 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Mon Jul 22 01:22:20 2013 UTC (10 years, 11 months ago) by misho
Branches: libxml2, MAIN
CVS tags: v2_8_0p0, v2_8_0, HEAD

2.8.0

1: /** 2: * uri.c: set of generic URI related routines 3: * 4: * Reference: RFCs 3986, 2732 and 2373 5: * 6: * See Copyright for the status of this software. 7: * 8: * TODO: that module behaves really badly on OOM situation 9: * 10: * daniel@veillard.com 11: */ 12: 13: #define IN_LIBXML 14: #include "libxml.h" 15: 16: #include <string.h> 17: 18: #include <libxml/xmlmemory.h> 19: #include <libxml/uri.h> 20: #include <libxml/globals.h> 21: #include <libxml/xmlerror.h> 22: 23: static void xmlCleanURI(xmlURIPtr uri); 24: 25: /* 26: * Old rule from 2396 used in legacy handling code 27: * alpha = lowalpha | upalpha 28: */ 29: #define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x)) 30: 31: 32: /* 33: * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" | 34: * "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" | 35: * "u" | "v" | "w" | "x" | "y" | "z" 36: */ 37: 38: #define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z')) 39: 40: /* 41: * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" | 42: * "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" | 43: * "U" | "V" | "W" | "X" | "Y" | "Z" 44: */ 45: #define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z')) 46: 47: #ifdef IS_DIGIT 48: #undef IS_DIGIT 49: #endif 50: /* 51: * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" 52: */ 53: #define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9')) 54: 55: /* 56: * alphanum = alpha | digit 57: */ 58: 59: #define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x)) 60: 61: /* 62: * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")" 63: */ 64: 65: #define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') || \ 66: ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') || \ 67: ((x) == '(') || ((x) == ')')) 68: 69: /* 70: * unwise = "{" | "}" | "|" | "\" | "^" | "`" 71: */ 72: 73: #define IS_UNWISE(p) \ 74: (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) || \ 75: ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) || \ 76: ((*(p) == ']')) || ((*(p) == '`'))) 77: /* 78: * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," | 79: * "[" | "]" 80: */ 81: 82: #define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \ 83: ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \ 84: ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \ 85: ((x) == ']')) 86: 87: /* 88: * unreserved = alphanum | mark 89: */ 90: 91: #define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x)) 92: 93: /* 94: * Skip to next pointer char, handle escaped sequences 95: */ 96: 97: #define NEXT(p) ((*p == '%')? p += 3 : p++) 98: 99: /* 100: * Productions from the spec. 101: * 102: * authority = server | reg_name 103: * reg_name = 1*( unreserved | escaped | "$" | "," | 104: * ";" | ":" | "@" | "&" | "=" | "+" ) 105: * 106: * path = [ abs_path | opaque_part ] 107: */ 108: 109: #define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n)) 110: 111: /************************************************************************ 112: * * 113: * RFC 3986 parser * 114: * * 115: ************************************************************************/ 116: 117: #define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9')) 118: #define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) || \ 119: ((*(p) >= 'A') && (*(p) <= 'Z'))) 120: #define ISA_HEXDIG(p) \ 121: (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) || \ 122: ((*(p) >= 'A') && (*(p) <= 'F'))) 123: 124: /* 125: * sub-delims = "!" / "$" / "&" / "'" / "(" / ")" 126: * / "*" / "+" / "," / ";" / "=" 127: */ 128: #define ISA_SUB_DELIM(p) \ 129: (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) || \ 130: ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) || \ 131: ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) || \ 132: ((*(p) == '=')) || ((*(p) == '\''))) 133: 134: /* 135: * gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" 136: */ 137: #define ISA_GEN_DELIM(p) \ 138: (((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) || \ 139: ((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) || \ 140: ((*(p) == '@'))) 141: 142: /* 143: * reserved = gen-delims / sub-delims 144: */ 145: #define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p))) 146: 147: /* 148: * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" 149: */ 150: #define ISA_UNRESERVED(p) \ 151: ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) || \ 152: ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~'))) 153: 154: /* 155: * pct-encoded = "%" HEXDIG HEXDIG 156: */ 157: #define ISA_PCT_ENCODED(p) \ 158: ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2))) 159: 160: /* 161: * pchar = unreserved / pct-encoded / sub-delims / ":" / "@" 162: */ 163: #define ISA_PCHAR(p) \ 164: (ISA_UNRESERVED(p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) || \ 165: ((*(p) == ':')) || ((*(p) == '@'))) 166: 167: /** 168: * xmlParse3986Scheme: 169: * @uri: pointer to an URI structure 170: * @str: pointer to the string to analyze 171: * 172: * Parse an URI scheme 173: * 174: * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) 175: * 176: * Returns 0 or the error code 177: */ 178: static int 179: xmlParse3986Scheme(xmlURIPtr uri, const char **str) { 180: const char *cur; 181: 182: if (str == NULL) 183: return(-1); 184: 185: cur = *str; 186: if (!ISA_ALPHA(cur)) 187: return(2); 188: cur++; 189: while (ISA_ALPHA(cur) || ISA_DIGIT(cur) || 190: (*cur == '+') || (*cur == '-') || (*cur == '.')) cur++; 191: if (uri != NULL) { 192: if (uri->scheme != NULL) xmlFree(uri->scheme); 193: uri->scheme = STRNDUP(*str, cur - *str); 194: } 195: *str = cur; 196: return(0); 197: } 198: 199: /** 200: * xmlParse3986Fragment: 201: * @uri: pointer to an URI structure 202: * @str: pointer to the string to analyze 203: * 204: * Parse the query part of an URI 205: * 206: * fragment = *( pchar / "/" / "?" ) 207: * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']' 208: * in the fragment identifier but this is used very broadly for 209: * xpointer scheme selection, so we are allowing it here to not break 210: * for example all the DocBook processing chains. 211: * 212: * Returns 0 or the error code 213: */ 214: static int 215: xmlParse3986Fragment(xmlURIPtr uri, const char **str) 216: { 217: const char *cur; 218: 219: if (str == NULL) 220: return (-1); 221: 222: cur = *str; 223: 224: while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') || 225: (*cur == '[') || (*cur == ']') || 226: ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur)))) 227: NEXT(cur); 228: if (uri != NULL) { 229: if (uri->fragment != NULL) 230: xmlFree(uri->fragment); 231: if (uri->cleanup & 2) 232: uri->fragment = STRNDUP(*str, cur - *str); 233: else 234: uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL); 235: } 236: *str = cur; 237: return (0); 238: } 239: 240: /** 241: * xmlParse3986Query: 242: * @uri: pointer to an URI structure 243: * @str: pointer to the string to analyze 244: * 245: * Parse the query part of an URI 246: * 247: * query = *uric 248: * 249: * Returns 0 or the error code 250: */ 251: static int 252: xmlParse3986Query(xmlURIPtr uri, const char **str) 253: { 254: const char *cur; 255: 256: if (str == NULL) 257: return (-1); 258: 259: cur = *str; 260: 261: while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') || 262: ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur)))) 263: NEXT(cur); 264: if (uri != NULL) { 265: if (uri->query != NULL) 266: xmlFree(uri->query); 267: if (uri->cleanup & 2) 268: uri->query = STRNDUP(*str, cur - *str); 269: else 270: uri->query = xmlURIUnescapeString(*str, cur - *str, NULL); 271: 272: /* Save the raw bytes of the query as well. 273: * See: http://mail.gnome.org/archives/xml/2007-April/thread.html#00114 274: */ 275: if (uri->query_raw != NULL) 276: xmlFree (uri->query_raw); 277: uri->query_raw = STRNDUP (*str, cur - *str); 278: } 279: *str = cur; 280: return (0); 281: } 282: 283: /** 284: * xmlParse3986Port: 285: * @uri: pointer to an URI structure 286: * @str: the string to analyze 287: * 288: * Parse a port part and fills in the appropriate fields 289: * of the @uri structure 290: * 291: * port = *DIGIT 292: * 293: * Returns 0 or the error code 294: */ 295: static int 296: xmlParse3986Port(xmlURIPtr uri, const char **str) 297: { 298: const char *cur = *str; 299: 300: if (ISA_DIGIT(cur)) { 301: if (uri != NULL) 302: uri->port = 0; 303: while (ISA_DIGIT(cur)) { 304: if (uri != NULL) 305: uri->port = uri->port * 10 + (*cur - '0'); 306: cur++; 307: } 308: *str = cur; 309: return(0); 310: } 311: return(1); 312: } 313: 314: /** 315: * xmlParse3986Userinfo: 316: * @uri: pointer to an URI structure 317: * @str: the string to analyze 318: * 319: * Parse an user informations part and fills in the appropriate fields 320: * of the @uri structure 321: * 322: * userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) 323: * 324: * Returns 0 or the error code 325: */ 326: static int 327: xmlParse3986Userinfo(xmlURIPtr uri, const char **str) 328: { 329: const char *cur; 330: 331: cur = *str; 332: while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || 333: ISA_SUB_DELIM(cur) || (*cur == ':')) 334: NEXT(cur); 335: if (*cur == '@') { 336: if (uri != NULL) { 337: if (uri->user != NULL) xmlFree(uri->user); 338: if (uri->cleanup & 2) 339: uri->user = STRNDUP(*str, cur - *str); 340: else 341: uri->user = xmlURIUnescapeString(*str, cur - *str, NULL); 342: } 343: *str = cur; 344: return(0); 345: } 346: return(1); 347: } 348: 349: /** 350: * xmlParse3986DecOctet: 351: * @str: the string to analyze 352: * 353: * dec-octet = DIGIT ; 0-9 354: * / %x31-39 DIGIT ; 10-99 355: * / "1" 2DIGIT ; 100-199 356: * / "2" %x30-34 DIGIT ; 200-249 357: * / "25" %x30-35 ; 250-255 358: * 359: * Skip a dec-octet. 360: * 361: * Returns 0 if found and skipped, 1 otherwise 362: */ 363: static int 364: xmlParse3986DecOctet(const char **str) { 365: const char *cur = *str; 366: 367: if (!(ISA_DIGIT(cur))) 368: return(1); 369: if (!ISA_DIGIT(cur+1)) 370: cur++; 371: else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur+2))) 372: cur += 2; 373: else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2))) 374: cur += 3; 375: else if ((*cur == '2') && (*(cur + 1) >= '0') && 376: (*(cur + 1) <= '4') && (ISA_DIGIT(cur + 2))) 377: cur += 3; 378: else if ((*cur == '2') && (*(cur + 1) == '5') && 379: (*(cur + 2) >= '0') && (*(cur + 1) <= '5')) 380: cur += 3; 381: else 382: return(1); 383: *str = cur; 384: return(0); 385: } 386: /** 387: * xmlParse3986Host: 388: * @uri: pointer to an URI structure 389: * @str: the string to analyze 390: * 391: * Parse an host part and fills in the appropriate fields 392: * of the @uri structure 393: * 394: * host = IP-literal / IPv4address / reg-name 395: * IP-literal = "[" ( IPv6address / IPvFuture ) "]" 396: * IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet 397: * reg-name = *( unreserved / pct-encoded / sub-delims ) 398: * 399: * Returns 0 or the error code 400: */ 401: static int 402: xmlParse3986Host(xmlURIPtr uri, const char **str) 403: { 404: const char *cur = *str; 405: const char *host; 406: 407: host = cur; 408: /* 409: * IPv6 and future adressing scheme are enclosed between brackets 410: */ 411: if (*cur == '[') { 412: cur++; 413: while ((*cur != ']') && (*cur != 0)) 414: cur++; 415: if (*cur != ']') 416: return(1); 417: cur++; 418: goto found; 419: } 420: /* 421: * try to parse an IPv4 422: */ 423: if (ISA_DIGIT(cur)) { 424: if (xmlParse3986DecOctet(&cur) != 0) 425: goto not_ipv4; 426: if (*cur != '.') 427: goto not_ipv4; 428: cur++; 429: if (xmlParse3986DecOctet(&cur) != 0) 430: goto not_ipv4; 431: if (*cur != '.') 432: goto not_ipv4; 433: if (xmlParse3986DecOctet(&cur) != 0) 434: goto not_ipv4; 435: if (*cur != '.') 436: goto not_ipv4; 437: if (xmlParse3986DecOctet(&cur) != 0) 438: goto not_ipv4; 439: goto found; 440: not_ipv4: 441: cur = *str; 442: } 443: /* 444: * then this should be a hostname which can be empty 445: */ 446: while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur)) 447: NEXT(cur); 448: found: 449: if (uri != NULL) { 450: if (uri->authority != NULL) xmlFree(uri->authority); 451: uri->authority = NULL; 452: if (uri->server != NULL) xmlFree(uri->server); 453: if (cur != host) { 454: if (uri->cleanup & 2) 455: uri->server = STRNDUP(host, cur - host); 456: else 457: uri->server = xmlURIUnescapeString(host, cur - host, NULL); 458: } else 459: uri->server = NULL; 460: } 461: *str = cur; 462: return(0); 463: } 464: 465: /** 466: * xmlParse3986Authority: 467: * @uri: pointer to an URI structure 468: * @str: the string to analyze 469: * 470: * Parse an authority part and fills in the appropriate fields 471: * of the @uri structure 472: * 473: * authority = [ userinfo "@" ] host [ ":" port ] 474: * 475: * Returns 0 or the error code 476: */ 477: static int 478: xmlParse3986Authority(xmlURIPtr uri, const char **str) 479: { 480: const char *cur; 481: int ret; 482: 483: cur = *str; 484: /* 485: * try to parse an userinfo and check for the trailing @ 486: */ 487: ret = xmlParse3986Userinfo(uri, &cur); 488: if ((ret != 0) || (*cur != '@')) 489: cur = *str; 490: else 491: cur++; 492: ret = xmlParse3986Host(uri, &cur); 493: if (ret != 0) return(ret); 494: if (*cur == ':') { 495: cur++; 496: ret = xmlParse3986Port(uri, &cur); 497: if (ret != 0) return(ret); 498: } 499: *str = cur; 500: return(0); 501: } 502: 503: /** 504: * xmlParse3986Segment: 505: * @str: the string to analyze 506: * @forbid: an optional forbidden character 507: * @empty: allow an empty segment 508: * 509: * Parse a segment and fills in the appropriate fields 510: * of the @uri structure 511: * 512: * segment = *pchar 513: * segment-nz = 1*pchar 514: * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" ) 515: * ; non-zero-length segment without any colon ":" 516: * 517: * Returns 0 or the error code 518: */ 519: static int 520: xmlParse3986Segment(const char **str, char forbid, int empty) 521: { 522: const char *cur; 523: 524: cur = *str; 525: if (!ISA_PCHAR(cur)) { 526: if (empty) 527: return(0); 528: return(1); 529: } 530: while (ISA_PCHAR(cur) && (*cur != forbid)) 531: NEXT(cur); 532: *str = cur; 533: return (0); 534: } 535: 536: /** 537: * xmlParse3986PathAbEmpty: 538: * @uri: pointer to an URI structure 539: * @str: the string to analyze 540: * 541: * Parse an path absolute or empty and fills in the appropriate fields 542: * of the @uri structure 543: * 544: * path-abempty = *( "/" segment ) 545: * 546: * Returns 0 or the error code 547: */ 548: static int 549: xmlParse3986PathAbEmpty(xmlURIPtr uri, const char **str) 550: { 551: const char *cur; 552: int ret; 553: 554: cur = *str; 555: 556: while (*cur == '/') { 557: cur++; 558: ret = xmlParse3986Segment(&cur, 0, 1); 559: if (ret != 0) return(ret); 560: } 561: if (uri != NULL) { 562: if (uri->path != NULL) xmlFree(uri->path); 563: if (*str != cur) { 564: if (uri->cleanup & 2) 565: uri->path = STRNDUP(*str, cur - *str); 566: else 567: uri->path = xmlURIUnescapeString(*str, cur - *str, NULL); 568: } else { 569: uri->path = NULL; 570: } 571: } 572: *str = cur; 573: return (0); 574: } 575: 576: /** 577: * xmlParse3986PathAbsolute: 578: * @uri: pointer to an URI structure 579: * @str: the string to analyze 580: * 581: * Parse an path absolute and fills in the appropriate fields 582: * of the @uri structure 583: * 584: * path-absolute = "/" [ segment-nz *( "/" segment ) ] 585: * 586: * Returns 0 or the error code 587: */ 588: static int 589: xmlParse3986PathAbsolute(xmlURIPtr uri, const char **str) 590: { 591: const char *cur; 592: int ret; 593: 594: cur = *str; 595: 596: if (*cur != '/') 597: return(1); 598: cur++; 599: ret = xmlParse3986Segment(&cur, 0, 0); 600: if (ret == 0) { 601: while (*cur == '/') { 602: cur++; 603: ret = xmlParse3986Segment(&cur, 0, 1); 604: if (ret != 0) return(ret); 605: } 606: } 607: if (uri != NULL) { 608: if (uri->path != NULL) xmlFree(uri->path); 609: if (cur != *str) { 610: if (uri->cleanup & 2) 611: uri->path = STRNDUP(*str, cur - *str); 612: else 613: uri->path = xmlURIUnescapeString(*str, cur - *str, NULL); 614: } else { 615: uri->path = NULL; 616: } 617: } 618: *str = cur; 619: return (0); 620: } 621: 622: /** 623: * xmlParse3986PathRootless: 624: * @uri: pointer to an URI structure 625: * @str: the string to analyze 626: * 627: * Parse an path without root and fills in the appropriate fields 628: * of the @uri structure 629: * 630: * path-rootless = segment-nz *( "/" segment ) 631: * 632: * Returns 0 or the error code 633: */ 634: static int 635: xmlParse3986PathRootless(xmlURIPtr uri, const char **str) 636: { 637: const char *cur; 638: int ret; 639: 640: cur = *str; 641: 642: ret = xmlParse3986Segment(&cur, 0, 0); 643: if (ret != 0) return(ret); 644: while (*cur == '/') { 645: cur++; 646: ret = xmlParse3986Segment(&cur, 0, 1); 647: if (ret != 0) return(ret); 648: } 649: if (uri != NULL) { 650: if (uri->path != NULL) xmlFree(uri->path); 651: if (cur != *str) { 652: if (uri->cleanup & 2) 653: uri->path = STRNDUP(*str, cur - *str); 654: else 655: uri->path = xmlURIUnescapeString(*str, cur - *str, NULL); 656: } else { 657: uri->path = NULL; 658: } 659: } 660: *str = cur; 661: return (0); 662: } 663: 664: /** 665: * xmlParse3986PathNoScheme: 666: * @uri: pointer to an URI structure 667: * @str: the string to analyze 668: * 669: * Parse an path which is not a scheme and fills in the appropriate fields 670: * of the @uri structure 671: * 672: * path-noscheme = segment-nz-nc *( "/" segment ) 673: * 674: * Returns 0 or the error code 675: */ 676: static int 677: xmlParse3986PathNoScheme(xmlURIPtr uri, const char **str) 678: { 679: const char *cur; 680: int ret; 681: 682: cur = *str; 683: 684: ret = xmlParse3986Segment(&cur, ':', 0); 685: if (ret != 0) return(ret); 686: while (*cur == '/') { 687: cur++; 688: ret = xmlParse3986Segment(&cur, 0, 1); 689: if (ret != 0) return(ret); 690: } 691: if (uri != NULL) { 692: if (uri->path != NULL) xmlFree(uri->path); 693: if (cur != *str) { 694: if (uri->cleanup & 2) 695: uri->path = STRNDUP(*str, cur - *str); 696: else 697: uri->path = xmlURIUnescapeString(*str, cur - *str, NULL); 698: } else { 699: uri->path = NULL; 700: } 701: } 702: *str = cur; 703: return (0); 704: } 705: 706: /** 707: * xmlParse3986HierPart: 708: * @uri: pointer to an URI structure 709: * @str: the string to analyze 710: * 711: * Parse an hierarchical part and fills in the appropriate fields 712: * of the @uri structure 713: * 714: * hier-part = "//" authority path-abempty 715: * / path-absolute 716: * / path-rootless 717: * / path-empty 718: * 719: * Returns 0 or the error code 720: */ 721: static int 722: xmlParse3986HierPart(xmlURIPtr uri, const char **str) 723: { 724: const char *cur; 725: int ret; 726: 727: cur = *str; 728: 729: if ((*cur == '/') && (*(cur + 1) == '/')) { 730: cur += 2; 731: ret = xmlParse3986Authority(uri, &cur); 732: if (ret != 0) return(ret); 733: ret = xmlParse3986PathAbEmpty(uri, &cur); 734: if (ret != 0) return(ret); 735: *str = cur; 736: return(0); 737: } else if (*cur == '/') { 738: ret = xmlParse3986PathAbsolute(uri, &cur); 739: if (ret != 0) return(ret); 740: } else if (ISA_PCHAR(cur)) { 741: ret = xmlParse3986PathRootless(uri, &cur); 742: if (ret != 0) return(ret); 743: } else { 744: /* path-empty is effectively empty */ 745: if (uri != NULL) { 746: if (uri->path != NULL) xmlFree(uri->path); 747: uri->path = NULL; 748: } 749: } 750: *str = cur; 751: return (0); 752: } 753: 754: /** 755: * xmlParse3986RelativeRef: 756: * @uri: pointer to an URI structure 757: * @str: the string to analyze 758: * 759: * Parse an URI string and fills in the appropriate fields 760: * of the @uri structure 761: * 762: * relative-ref = relative-part [ "?" query ] [ "#" fragment ] 763: * relative-part = "//" authority path-abempty 764: * / path-absolute 765: * / path-noscheme 766: * / path-empty 767: * 768: * Returns 0 or the error code 769: */ 770: static int 771: xmlParse3986RelativeRef(xmlURIPtr uri, const char *str) { 772: int ret; 773: 774: if ((*str == '/') && (*(str + 1) == '/')) { 775: str += 2; 776: ret = xmlParse3986Authority(uri, &str); 777: if (ret != 0) return(ret); 778: ret = xmlParse3986PathAbEmpty(uri, &str); 779: if (ret != 0) return(ret); 780: } else if (*str == '/') { 781: ret = xmlParse3986PathAbsolute(uri, &str); 782: if (ret != 0) return(ret); 783: } else if (ISA_PCHAR(str)) { 784: ret = xmlParse3986PathNoScheme(uri, &str); 785: if (ret != 0) return(ret); 786: } else { 787: /* path-empty is effectively empty */ 788: if (uri != NULL) { 789: if (uri->path != NULL) xmlFree(uri->path); 790: uri->path = NULL; 791: } 792: } 793: 794: if (*str == '?') { 795: str++; 796: ret = xmlParse3986Query(uri, &str); 797: if (ret != 0) return(ret); 798: } 799: if (*str == '#') { 800: str++; 801: ret = xmlParse3986Fragment(uri, &str); 802: if (ret != 0) return(ret); 803: } 804: if (*str != 0) { 805: xmlCleanURI(uri); 806: return(1); 807: } 808: return(0); 809: } 810: 811: 812: /** 813: * xmlParse3986URI: 814: * @uri: pointer to an URI structure 815: * @str: the string to analyze 816: * 817: * Parse an URI string and fills in the appropriate fields 818: * of the @uri structure 819: * 820: * scheme ":" hier-part [ "?" query ] [ "#" fragment ] 821: * 822: * Returns 0 or the error code 823: */ 824: static int 825: xmlParse3986URI(xmlURIPtr uri, const char *str) { 826: int ret; 827: 828: ret = xmlParse3986Scheme(uri, &str); 829: if (ret != 0) return(ret); 830: if (*str != ':') { 831: return(1); 832: } 833: str++; 834: ret = xmlParse3986HierPart(uri, &str); 835: if (ret != 0) return(ret); 836: if (*str == '?') { 837: str++; 838: ret = xmlParse3986Query(uri, &str); 839: if (ret != 0) return(ret); 840: } 841: if (*str == '#') { 842: str++; 843: ret = xmlParse3986Fragment(uri, &str); 844: if (ret != 0) return(ret); 845: } 846: if (*str != 0) { 847: xmlCleanURI(uri); 848: return(1); 849: } 850: return(0); 851: } 852: 853: /** 854: * xmlParse3986URIReference: 855: * @uri: pointer to an URI structure 856: * @str: the string to analyze 857: * 858: * Parse an URI reference string and fills in the appropriate fields 859: * of the @uri structure 860: * 861: * URI-reference = URI / relative-ref 862: * 863: * Returns 0 or the error code 864: */ 865: static int 866: xmlParse3986URIReference(xmlURIPtr uri, const char *str) { 867: int ret; 868: 869: if (str == NULL) 870: return(-1); 871: xmlCleanURI(uri); 872: 873: /* 874: * Try first to parse absolute refs, then fallback to relative if 875: * it fails. 876: */ 877: ret = xmlParse3986URI(uri, str); 878: if (ret != 0) { 879: xmlCleanURI(uri); 880: ret = xmlParse3986RelativeRef(uri, str); 881: if (ret != 0) { 882: xmlCleanURI(uri); 883: return(ret); 884: } 885: } 886: return(0); 887: } 888: 889: /** 890: * xmlParseURI: 891: * @str: the URI string to analyze 892: * 893: * Parse an URI based on RFC 3986 894: * 895: * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ] 896: * 897: * Returns a newly built xmlURIPtr or NULL in case of error 898: */ 899: xmlURIPtr 900: xmlParseURI(const char *str) { 901: xmlURIPtr uri; 902: int ret; 903: 904: if (str == NULL) 905: return(NULL); 906: uri = xmlCreateURI(); 907: if (uri != NULL) { 908: ret = xmlParse3986URIReference(uri, str); 909: if (ret) { 910: xmlFreeURI(uri); 911: return(NULL); 912: } 913: } 914: return(uri); 915: } 916: 917: /** 918: * xmlParseURIReference: 919: * @uri: pointer to an URI structure 920: * @str: the string to analyze 921: * 922: * Parse an URI reference string based on RFC 3986 and fills in the 923: * appropriate fields of the @uri structure 924: * 925: * URI-reference = URI / relative-ref 926: * 927: * Returns 0 or the error code 928: */ 929: int 930: xmlParseURIReference(xmlURIPtr uri, const char *str) { 931: return(xmlParse3986URIReference(uri, str)); 932: } 933: 934: /** 935: * xmlParseURIRaw: 936: * @str: the URI string to analyze 937: * @raw: if 1 unescaping of URI pieces are disabled 938: * 939: * Parse an URI but allows to keep intact the original fragments. 940: * 941: * URI-reference = URI / relative-ref 942: * 943: * Returns a newly built xmlURIPtr or NULL in case of error 944: */ 945: xmlURIPtr 946: xmlParseURIRaw(const char *str, int raw) { 947: xmlURIPtr uri; 948: int ret; 949: 950: if (str == NULL) 951: return(NULL); 952: uri = xmlCreateURI(); 953: if (uri != NULL) { 954: if (raw) { 955: uri->cleanup |= 2; 956: } 957: ret = xmlParseURIReference(uri, str); 958: if (ret) { 959: xmlFreeURI(uri); 960: return(NULL); 961: } 962: } 963: return(uri); 964: } 965: 966: /************************************************************************ 967: * * 968: * Generic URI structure functions * 969: * * 970: ************************************************************************/ 971: 972: /** 973: * xmlCreateURI: 974: * 975: * Simply creates an empty xmlURI 976: * 977: * Returns the new structure or NULL in case of error 978: */ 979: xmlURIPtr 980: xmlCreateURI(void) { 981: xmlURIPtr ret; 982: 983: ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI)); 984: if (ret == NULL) { 985: xmlGenericError(xmlGenericErrorContext, 986: "xmlCreateURI: out of memory\n"); 987: return(NULL); 988: } 989: memset(ret, 0, sizeof(xmlURI)); 990: return(ret); 991: } 992: 993: /** 994: * xmlSaveUri: 995: * @uri: pointer to an xmlURI 996: * 997: * Save the URI as an escaped string 998: * 999: * Returns a new string (to be deallocated by caller) 1000: */ 1001: xmlChar * 1002: xmlSaveUri(xmlURIPtr uri) { 1003: xmlChar *ret = NULL; 1004: xmlChar *temp; 1005: const char *p; 1006: int len; 1007: int max; 1008: 1009: if (uri == NULL) return(NULL); 1010: 1011: 1012: max = 80; 1013: ret = (xmlChar *) xmlMallocAtomic((max + 1) * sizeof(xmlChar)); 1014: if (ret == NULL) { 1015: xmlGenericError(xmlGenericErrorContext, 1016: "xmlSaveUri: out of memory\n"); 1017: return(NULL); 1018: } 1019: len = 0; 1020: 1021: if (uri->scheme != NULL) { 1022: p = uri->scheme; 1023: while (*p != 0) { 1024: if (len >= max) { 1025: max *= 2; 1026: temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar)); 1027: if (temp == NULL) { 1028: xmlGenericError(xmlGenericErrorContext, 1029: "xmlSaveUri: out of memory\n"); 1030: xmlFree(ret); 1031: return(NULL); 1032: } 1033: ret = temp; 1034: } 1035: ret[len++] = *p++; 1036: } 1037: if (len >= max) { 1038: max *= 2; 1039: temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar)); 1040: if (temp == NULL) { 1041: xmlGenericError(xmlGenericErrorContext, 1042: "xmlSaveUri: out of memory\n"); 1043: xmlFree(ret); 1044: return(NULL); 1045: } 1046: ret = temp; 1047: } 1048: ret[len++] = ':'; 1049: } 1050: if (uri->opaque != NULL) { 1051: p = uri->opaque; 1052: while (*p != 0) { 1053: if (len + 3 >= max) { 1054: max *= 2; 1055: temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar)); 1056: if (temp == NULL) { 1057: xmlGenericError(xmlGenericErrorContext, 1058: "xmlSaveUri: out of memory\n"); 1059: xmlFree(ret); 1060: return(NULL); 1061: } 1062: ret = temp; 1063: } 1064: if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p))) 1065: ret[len++] = *p++; 1066: else { 1067: int val = *(unsigned char *)p++; 1068: int hi = val / 0x10, lo = val % 0x10; 1069: ret[len++] = '%'; 1070: ret[len++] = hi + (hi > 9? 'A'-10 : '0'); 1071: ret[len++] = lo + (lo > 9? 'A'-10 : '0'); 1072: } 1073: } 1074: } else { 1075: if (uri->server != NULL) { 1076: if (len + 3 >= max) { 1077: max *= 2; 1078: temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar)); 1079: if (temp == NULL) { 1080: xmlGenericError(xmlGenericErrorContext, 1081: "xmlSaveUri: out of memory\n"); 1082: xmlFree(ret); 1083: return(NULL); 1084: } 1085: ret = temp; 1086: } 1087: ret[len++] = '/'; 1088: ret[len++] = '/'; 1089: if (uri->user != NULL) { 1090: p = uri->user; 1091: while (*p != 0) { 1092: if (len + 3 >= max) { 1093: max *= 2; 1094: temp = (xmlChar *) xmlRealloc(ret, 1095: (max + 1) * sizeof(xmlChar)); 1096: if (temp == NULL) { 1097: xmlGenericError(xmlGenericErrorContext, 1098: "xmlSaveUri: out of memory\n"); 1099: xmlFree(ret); 1100: return(NULL); 1101: } 1102: ret = temp; 1103: } 1104: if ((IS_UNRESERVED(*(p))) || 1105: ((*(p) == ';')) || ((*(p) == ':')) || 1106: ((*(p) == '&')) || ((*(p) == '=')) || 1107: ((*(p) == '+')) || ((*(p) == '$')) || 1108: ((*(p) == ','))) 1109: ret[len++] = *p++; 1110: else { 1111: int val = *(unsigned char *)p++; 1112: int hi = val / 0x10, lo = val % 0x10; 1113: ret[len++] = '%'; 1114: ret[len++] = hi + (hi > 9? 'A'-10 : '0'); 1115: ret[len++] = lo + (lo > 9? 'A'-10 : '0'); 1116: } 1117: } 1118: if (len + 3 >= max) { 1119: max *= 2; 1120: temp = (xmlChar *) xmlRealloc(ret, 1121: (max + 1) * sizeof(xmlChar)); 1122: if (temp == NULL) { 1123: xmlGenericError(xmlGenericErrorContext, 1124: "xmlSaveUri: out of memory\n"); 1125: xmlFree(ret); 1126: return(NULL); 1127: } 1128: ret = temp; 1129: } 1130: ret[len++] = '@'; 1131: } 1132: p = uri->server; 1133: while (*p != 0) { 1134: if (len >= max) { 1135: max *= 2; 1136: temp = (xmlChar *) xmlRealloc(ret, 1137: (max + 1) * sizeof(xmlChar)); 1138: if (temp == NULL) { 1139: xmlGenericError(xmlGenericErrorContext, 1140: "xmlSaveUri: out of memory\n"); 1141: xmlFree(ret); 1142: return(NULL); 1143: } 1144: ret = temp; 1145: } 1146: ret[len++] = *p++; 1147: } 1148: if (uri->port > 0) { 1149: if (len + 10 >= max) { 1150: max *= 2; 1151: temp = (xmlChar *) xmlRealloc(ret, 1152: (max + 1) * sizeof(xmlChar)); 1153: if (temp == NULL) { 1154: xmlGenericError(xmlGenericErrorContext, 1155: "xmlSaveUri: out of memory\n"); 1156: xmlFree(ret); 1157: return(NULL); 1158: } 1159: ret = temp; 1160: } 1161: len += snprintf((char *) &ret[len], max - len, ":%d", uri->port); 1162: } 1163: } else if (uri->authority != NULL) { 1164: if (len + 3 >= max) { 1165: max *= 2; 1166: temp = (xmlChar *) xmlRealloc(ret, 1167: (max + 1) * sizeof(xmlChar)); 1168: if (temp == NULL) { 1169: xmlGenericError(xmlGenericErrorContext, 1170: "xmlSaveUri: out of memory\n"); 1171: xmlFree(ret); 1172: return(NULL); 1173: } 1174: ret = temp; 1175: } 1176: ret[len++] = '/'; 1177: ret[len++] = '/'; 1178: p = uri->authority; 1179: while (*p != 0) { 1180: if (len + 3 >= max) { 1181: max *= 2; 1182: temp = (xmlChar *) xmlRealloc(ret, 1183: (max + 1) * sizeof(xmlChar)); 1184: if (temp == NULL) { 1185: xmlGenericError(xmlGenericErrorContext, 1186: "xmlSaveUri: out of memory\n"); 1187: xmlFree(ret); 1188: return(NULL); 1189: } 1190: ret = temp; 1191: } 1192: if ((IS_UNRESERVED(*(p))) || 1193: ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) || 1194: ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) || 1195: ((*(p) == '=')) || ((*(p) == '+'))) 1196: ret[len++] = *p++; 1197: else { 1198: int val = *(unsigned char *)p++; 1199: int hi = val / 0x10, lo = val % 0x10; 1200: ret[len++] = '%'; 1201: ret[len++] = hi + (hi > 9? 'A'-10 : '0'); 1202: ret[len++] = lo + (lo > 9? 'A'-10 : '0'); 1203: } 1204: } 1205: } else if (uri->scheme != NULL) { 1206: if (len + 3 >= max) { 1207: max *= 2; 1208: temp = (xmlChar *) xmlRealloc(ret, 1209: (max + 1) * sizeof(xmlChar)); 1210: if (temp == NULL) { 1211: xmlGenericError(xmlGenericErrorContext, 1212: "xmlSaveUri: out of memory\n"); 1213: xmlFree(ret); 1214: return(NULL); 1215: } 1216: ret = temp; 1217: } 1218: ret[len++] = '/'; 1219: ret[len++] = '/'; 1220: } 1221: if (uri->path != NULL) { 1222: p = uri->path; 1223: /* 1224: * the colon in file:///d: should not be escaped or 1225: * Windows accesses fail later. 1226: */ 1227: if ((uri->scheme != NULL) && 1228: (p[0] == '/') && 1229: (((p[1] >= 'a') && (p[1] <= 'z')) || 1230: ((p[1] >= 'A') && (p[1] <= 'Z'))) && 1231: (p[2] == ':') && 1232: (xmlStrEqual(BAD_CAST uri->scheme, BAD_CAST "file"))) { 1233: if (len + 3 >= max) { 1234: max *= 2; 1235: ret = (xmlChar *) xmlRealloc(ret, 1236: (max + 1) * sizeof(xmlChar)); 1237: if (ret == NULL) { 1238: xmlGenericError(xmlGenericErrorContext, 1239: "xmlSaveUri: out of memory\n"); 1240: return(NULL); 1241: } 1242: } 1243: ret[len++] = *p++; 1244: ret[len++] = *p++; 1245: ret[len++] = *p++; 1246: } 1247: while (*p != 0) { 1248: if (len + 3 >= max) { 1249: max *= 2; 1250: temp = (xmlChar *) xmlRealloc(ret, 1251: (max + 1) * sizeof(xmlChar)); 1252: if (temp == NULL) { 1253: xmlGenericError(xmlGenericErrorContext, 1254: "xmlSaveUri: out of memory\n"); 1255: xmlFree(ret); 1256: return(NULL); 1257: } 1258: ret = temp; 1259: } 1260: if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) || 1261: ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) || 1262: ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) || 1263: ((*(p) == ','))) 1264: ret[len++] = *p++; 1265: else { 1266: int val = *(unsigned char *)p++; 1267: int hi = val / 0x10, lo = val % 0x10; 1268: ret[len++] = '%'; 1269: ret[len++] = hi + (hi > 9? 'A'-10 : '0'); 1270: ret[len++] = lo + (lo > 9? 'A'-10 : '0'); 1271: } 1272: } 1273: } 1274: if (uri->query_raw != NULL) { 1275: if (len + 1 >= max) { 1276: max *= 2; 1277: temp = (xmlChar *) xmlRealloc(ret, 1278: (max + 1) * sizeof(xmlChar)); 1279: if (temp == NULL) { 1280: xmlGenericError(xmlGenericErrorContext, 1281: "xmlSaveUri: out of memory\n"); 1282: xmlFree(ret); 1283: return(NULL); 1284: } 1285: ret = temp; 1286: } 1287: ret[len++] = '?'; 1288: p = uri->query_raw; 1289: while (*p != 0) { 1290: if (len + 1 >= max) { 1291: max *= 2; 1292: temp = (xmlChar *) xmlRealloc(ret, 1293: (max + 1) * sizeof(xmlChar)); 1294: if (temp == NULL) { 1295: xmlGenericError(xmlGenericErrorContext, 1296: "xmlSaveUri: out of memory\n"); 1297: xmlFree(ret); 1298: return(NULL); 1299: } 1300: ret = temp; 1301: } 1302: ret[len++] = *p++; 1303: } 1304: } else if (uri->query != NULL) { 1305: if (len + 3 >= max) { 1306: max *= 2; 1307: temp = (xmlChar *) xmlRealloc(ret, 1308: (max + 1) * sizeof(xmlChar)); 1309: if (temp == NULL) { 1310: xmlGenericError(xmlGenericErrorContext, 1311: "xmlSaveUri: out of memory\n"); 1312: xmlFree(ret); 1313: return(NULL); 1314: } 1315: ret = temp; 1316: } 1317: ret[len++] = '?'; 1318: p = uri->query; 1319: while (*p != 0) { 1320: if (len + 3 >= max) { 1321: max *= 2; 1322: temp = (xmlChar *) xmlRealloc(ret, 1323: (max + 1) * sizeof(xmlChar)); 1324: if (temp == NULL) { 1325: xmlGenericError(xmlGenericErrorContext, 1326: "xmlSaveUri: out of memory\n"); 1327: xmlFree(ret); 1328: return(NULL); 1329: } 1330: ret = temp; 1331: } 1332: if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p)))) 1333: ret[len++] = *p++; 1334: else { 1335: int val = *(unsigned char *)p++; 1336: int hi = val / 0x10, lo = val % 0x10; 1337: ret[len++] = '%'; 1338: ret[len++] = hi + (hi > 9? 'A'-10 : '0'); 1339: ret[len++] = lo + (lo > 9? 'A'-10 : '0'); 1340: } 1341: } 1342: } 1343: } 1344: if (uri->fragment != NULL) { 1345: if (len + 3 >= max) { 1346: max *= 2; 1347: temp = (xmlChar *) xmlRealloc(ret, 1348: (max + 1) * sizeof(xmlChar)); 1349: if (temp == NULL) { 1350: xmlGenericError(xmlGenericErrorContext, 1351: "xmlSaveUri: out of memory\n"); 1352: xmlFree(ret); 1353: return(NULL); 1354: } 1355: ret = temp; 1356: } 1357: ret[len++] = '#'; 1358: p = uri->fragment; 1359: while (*p != 0) { 1360: if (len + 3 >= max) { 1361: max *= 2; 1362: temp = (xmlChar *) xmlRealloc(ret, 1363: (max + 1) * sizeof(xmlChar)); 1364: if (temp == NULL) { 1365: xmlGenericError(xmlGenericErrorContext, 1366: "xmlSaveUri: out of memory\n"); 1367: xmlFree(ret); 1368: return(NULL); 1369: } 1370: ret = temp; 1371: } 1372: if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p)))) 1373: ret[len++] = *p++; 1374: else { 1375: int val = *(unsigned char *)p++; 1376: int hi = val / 0x10, lo = val % 0x10; 1377: ret[len++] = '%'; 1378: ret[len++] = hi + (hi > 9? 'A'-10 : '0'); 1379: ret[len++] = lo + (lo > 9? 'A'-10 : '0'); 1380: } 1381: } 1382: } 1383: if (len >= max) { 1384: max *= 2; 1385: temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar)); 1386: if (temp == NULL) { 1387: xmlGenericError(xmlGenericErrorContext, 1388: "xmlSaveUri: out of memory\n"); 1389: xmlFree(ret); 1390: return(NULL); 1391: } 1392: ret = temp; 1393: } 1394: ret[len] = 0; 1395: return(ret); 1396: } 1397: 1398: /** 1399: * xmlPrintURI: 1400: * @stream: a FILE* for the output 1401: * @uri: pointer to an xmlURI 1402: * 1403: * Prints the URI in the stream @stream. 1404: */ 1405: void 1406: xmlPrintURI(FILE *stream, xmlURIPtr uri) { 1407: xmlChar *out; 1408: 1409: out = xmlSaveUri(uri); 1410: if (out != NULL) { 1411: fprintf(stream, "%s", (char *) out); 1412: xmlFree(out); 1413: } 1414: } 1415: 1416: /** 1417: * xmlCleanURI: 1418: * @uri: pointer to an xmlURI 1419: * 1420: * Make sure the xmlURI struct is free of content 1421: */ 1422: static void 1423: xmlCleanURI(xmlURIPtr uri) { 1424: if (uri == NULL) return; 1425: 1426: if (uri->scheme != NULL) xmlFree(uri->scheme); 1427: uri->scheme = NULL; 1428: if (uri->server != NULL) xmlFree(uri->server); 1429: uri->server = NULL; 1430: if (uri->user != NULL) xmlFree(uri->user); 1431: uri->user = NULL; 1432: if (uri->path != NULL) xmlFree(uri->path); 1433: uri->path = NULL; 1434: if (uri->fragment != NULL) xmlFree(uri->fragment); 1435: uri->fragment = NULL; 1436: if (uri->opaque != NULL) xmlFree(uri->opaque); 1437: uri->opaque = NULL; 1438: if (uri->authority != NULL) xmlFree(uri->authority); 1439: uri->authority = NULL; 1440: if (uri->query != NULL) xmlFree(uri->query); 1441: uri->query = NULL; 1442: if (uri->query_raw != NULL) xmlFree(uri->query_raw); 1443: uri->query_raw = NULL; 1444: } 1445: 1446: /** 1447: * xmlFreeURI: 1448: * @uri: pointer to an xmlURI 1449: * 1450: * Free up the xmlURI struct 1451: */ 1452: void 1453: xmlFreeURI(xmlURIPtr uri) { 1454: if (uri == NULL) return; 1455: 1456: if (uri->scheme != NULL) xmlFree(uri->scheme); 1457: if (uri->server != NULL) xmlFree(uri->server); 1458: if (uri->user != NULL) xmlFree(uri->user); 1459: if (uri->path != NULL) xmlFree(uri->path); 1460: if (uri->fragment != NULL) xmlFree(uri->fragment); 1461: if (uri->opaque != NULL) xmlFree(uri->opaque); 1462: if (uri->authority != NULL) xmlFree(uri->authority); 1463: if (uri->query != NULL) xmlFree(uri->query); 1464: if (uri->query_raw != NULL) xmlFree(uri->query_raw); 1465: xmlFree(uri); 1466: } 1467: 1468: /************************************************************************ 1469: * * 1470: * Helper functions * 1471: * * 1472: ************************************************************************/ 1473: 1474: /** 1475: * xmlNormalizeURIPath: 1476: * @path: pointer to the path string 1477: * 1478: * Applies the 5 normalization steps to a path string--that is, RFC 2396 1479: * Section 5.2, steps 6.c through 6.g. 1480: * 1481: * Normalization occurs directly on the string, no new allocation is done 1482: * 1483: * Returns 0 or an error code 1484: */ 1485: int 1486: xmlNormalizeURIPath(char *path) { 1487: char *cur, *out; 1488: 1489: if (path == NULL) 1490: return(-1); 1491: 1492: /* Skip all initial "/" chars. We want to get to the beginning of the 1493: * first non-empty segment. 1494: */ 1495: cur = path; 1496: while (cur[0] == '/') 1497: ++cur; 1498: if (cur[0] == '\0') 1499: return(0); 1500: 1501: /* Keep everything we've seen so far. */ 1502: out = cur; 1503: 1504: /* 1505: * Analyze each segment in sequence for cases (c) and (d). 1506: */ 1507: while (cur[0] != '\0') { 1508: /* 1509: * c) All occurrences of "./", where "." is a complete path segment, 1510: * are removed from the buffer string. 1511: */ 1512: if ((cur[0] == '.') && (cur[1] == '/')) { 1513: cur += 2; 1514: /* '//' normalization should be done at this point too */ 1515: while (cur[0] == '/') 1516: cur++; 1517: continue; 1518: } 1519: 1520: /* 1521: * d) If the buffer string ends with "." as a complete path segment, 1522: * that "." is removed. 1523: */ 1524: if ((cur[0] == '.') && (cur[1] == '\0')) 1525: break; 1526: 1527: /* Otherwise keep the segment. */ 1528: while (cur[0] != '/') { 1529: if (cur[0] == '\0') 1530: goto done_cd; 1531: (out++)[0] = (cur++)[0]; 1532: } 1533: /* nomalize // */ 1534: while ((cur[0] == '/') && (cur[1] == '/')) 1535: cur++; 1536: 1537: (out++)[0] = (cur++)[0]; 1538: } 1539: done_cd: 1540: out[0] = '\0'; 1541: 1542: /* Reset to the beginning of the first segment for the next sequence. */ 1543: cur = path; 1544: while (cur[0] == '/') 1545: ++cur; 1546: if (cur[0] == '\0') 1547: return(0); 1548: 1549: /* 1550: * Analyze each segment in sequence for cases (e) and (f). 1551: * 1552: * e) All occurrences of "<segment>/../", where <segment> is a 1553: * complete path segment not equal to "..", are removed from the 1554: * buffer string. Removal of these path segments is performed 1555: * iteratively, removing the leftmost matching pattern on each 1556: * iteration, until no matching pattern remains. 1557: * 1558: * f) If the buffer string ends with "<segment>/..", where <segment> 1559: * is a complete path segment not equal to "..", that 1560: * "<segment>/.." is removed. 1561: * 1562: * To satisfy the "iterative" clause in (e), we need to collapse the 1563: * string every time we find something that needs to be removed. Thus, 1564: * we don't need to keep two pointers into the string: we only need a 1565: * "current position" pointer. 1566: */ 1567: while (1) { 1568: char *segp, *tmp; 1569: 1570: /* At the beginning of each iteration of this loop, "cur" points to 1571: * the first character of the segment we want to examine. 1572: */ 1573: 1574: /* Find the end of the current segment. */ 1575: segp = cur; 1576: while ((segp[0] != '/') && (segp[0] != '\0')) 1577: ++segp; 1578: 1579: /* If this is the last segment, we're done (we need at least two 1580: * segments to meet the criteria for the (e) and (f) cases). 1581: */ 1582: if (segp[0] == '\0') 1583: break; 1584: 1585: /* If the first segment is "..", or if the next segment _isn't_ "..", 1586: * keep this segment and try the next one. 1587: */ 1588: ++segp; 1589: if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3)) 1590: || ((segp[0] != '.') || (segp[1] != '.') 1591: || ((segp[2] != '/') && (segp[2] != '\0')))) { 1592: cur = segp; 1593: continue; 1594: } 1595: 1596: /* If we get here, remove this segment and the next one and back up 1597: * to the previous segment (if there is one), to implement the 1598: * "iteratively" clause. It's pretty much impossible to back up 1599: * while maintaining two pointers into the buffer, so just compact 1600: * the whole buffer now. 1601: */ 1602: 1603: /* If this is the end of the buffer, we're done. */ 1604: if (segp[2] == '\0') { 1605: cur[0] = '\0'; 1606: break; 1607: } 1608: /* Valgrind complained, strcpy(cur, segp + 3); */ 1609: /* string will overlap, do not use strcpy */ 1610: tmp = cur; 1611: segp += 3; 1612: while ((*tmp++ = *segp++) != 0) 1613: ; 1614: 1615: /* If there are no previous segments, then keep going from here. */ 1616: segp = cur; 1617: while ((segp > path) && ((--segp)[0] == '/')) 1618: ; 1619: if (segp == path) 1620: continue; 1621: 1622: /* "segp" is pointing to the end of a previous segment; find it's 1623: * start. We need to back up to the previous segment and start 1624: * over with that to handle things like "foo/bar/../..". If we 1625: * don't do this, then on the first pass we'll remove the "bar/..", 1626: * but be pointing at the second ".." so we won't realize we can also 1627: * remove the "foo/..". 1628: */ 1629: cur = segp; 1630: while ((cur > path) && (cur[-1] != '/')) 1631: --cur; 1632: } 1633: out[0] = '\0'; 1634: 1635: /* 1636: * g) If the resulting buffer string still begins with one or more 1637: * complete path segments of "..", then the reference is 1638: * considered to be in error. Implementations may handle this 1639: * error by retaining these components in the resolved path (i.e., 1640: * treating them as part of the final URI), by removing them from 1641: * the resolved path (i.e., discarding relative levels above the 1642: * root), or by avoiding traversal of the reference. 1643: * 1644: * We discard them from the final path. 1645: */ 1646: if (path[0] == '/') { 1647: cur = path; 1648: while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.') 1649: && ((cur[3] == '/') || (cur[3] == '\0'))) 1650: cur += 3; 1651: 1652: if (cur != path) { 1653: out = path; 1654: while (cur[0] != '\0') 1655: (out++)[0] = (cur++)[0]; 1656: out[0] = 0; 1657: } 1658: } 1659: 1660: return(0); 1661: } 1662: 1663: static int is_hex(char c) { 1664: if (((c >= '0') && (c <= '9')) || 1665: ((c >= 'a') && (c <= 'f')) || 1666: ((c >= 'A') && (c <= 'F'))) 1667: return(1); 1668: return(0); 1669: } 1670: 1671: /** 1672: * xmlURIUnescapeString: 1673: * @str: the string to unescape 1674: * @len: the length in bytes to unescape (or <= 0 to indicate full string) 1675: * @target: optional destination buffer 1676: * 1677: * Unescaping routine, but does not check that the string is an URI. The 1678: * output is a direct unsigned char translation of %XX values (no encoding) 1679: * Note that the length of the result can only be smaller or same size as 1680: * the input string. 1681: * 1682: * Returns a copy of the string, but unescaped, will return NULL only in case 1683: * of error 1684: */ 1685: char * 1686: xmlURIUnescapeString(const char *str, int len, char *target) { 1687: char *ret, *out; 1688: const char *in; 1689: 1690: if (str == NULL) 1691: return(NULL); 1692: if (len <= 0) len = strlen(str); 1693: if (len < 0) return(NULL); 1694: 1695: if (target == NULL) { 1696: ret = (char *) xmlMallocAtomic(len + 1); 1697: if (ret == NULL) { 1698: xmlGenericError(xmlGenericErrorContext, 1699: "xmlURIUnescapeString: out of memory\n"); 1700: return(NULL); 1701: } 1702: } else 1703: ret = target; 1704: in = str; 1705: out = ret; 1706: while(len > 0) { 1707: if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) { 1708: in++; 1709: if ((*in >= '0') && (*in <= '9')) 1710: *out = (*in - '0'); 1711: else if ((*in >= 'a') && (*in <= 'f')) 1712: *out = (*in - 'a') + 10; 1713: else if ((*in >= 'A') && (*in <= 'F')) 1714: *out = (*in - 'A') + 10; 1715: in++; 1716: if ((*in >= '0') && (*in <= '9')) 1717: *out = *out * 16 + (*in - '0'); 1718: else if ((*in >= 'a') && (*in <= 'f')) 1719: *out = *out * 16 + (*in - 'a') + 10; 1720: else if ((*in >= 'A') && (*in <= 'F')) 1721: *out = *out * 16 + (*in - 'A') + 10; 1722: in++; 1723: len -= 3; 1724: out++; 1725: } else { 1726: *out++ = *in++; 1727: len--; 1728: } 1729: } 1730: *out = 0; 1731: return(ret); 1732: } 1733: 1734: /** 1735: * xmlURIEscapeStr: 1736: * @str: string to escape 1737: * @list: exception list string of chars not to escape 1738: * 1739: * This routine escapes a string to hex, ignoring reserved characters (a-z) 1740: * and the characters in the exception list. 1741: * 1742: * Returns a new escaped string or NULL in case of error. 1743: */ 1744: xmlChar * 1745: xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) { 1746: xmlChar *ret, ch; 1747: xmlChar *temp; 1748: const xmlChar *in; 1749: 1750: unsigned int len, out; 1751: 1752: if (str == NULL) 1753: return(NULL); 1754: if (str[0] == 0) 1755: return(xmlStrdup(str)); 1756: len = xmlStrlen(str); 1757: if (!(len > 0)) return(NULL); 1758: 1759: len += 20; 1760: ret = (xmlChar *) xmlMallocAtomic(len); 1761: if (ret == NULL) { 1762: xmlGenericError(xmlGenericErrorContext, 1763: "xmlURIEscapeStr: out of memory\n"); 1764: return(NULL); 1765: } 1766: in = (const xmlChar *) str; 1767: out = 0; 1768: while(*in != 0) { 1769: if (len - out <= 3) { 1770: len += 20; 1771: temp = (xmlChar *) xmlRealloc(ret, len); 1772: if (temp == NULL) { 1773: xmlGenericError(xmlGenericErrorContext, 1774: "xmlURIEscapeStr: out of memory\n"); 1775: xmlFree(ret); 1776: return(NULL); 1777: } 1778: ret = temp; 1779: } 1780: 1781: ch = *in; 1782: 1783: if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) { 1784: unsigned char val; 1785: ret[out++] = '%'; 1786: val = ch >> 4; 1787: if (val <= 9) 1788: ret[out++] = '0' + val; 1789: else 1790: ret[out++] = 'A' + val - 0xA; 1791: val = ch & 0xF; 1792: if (val <= 9) 1793: ret[out++] = '0' + val; 1794: else 1795: ret[out++] = 'A' + val - 0xA; 1796: in++; 1797: } else { 1798: ret[out++] = *in++; 1799: } 1800: 1801: } 1802: ret[out] = 0; 1803: return(ret); 1804: } 1805: 1806: /** 1807: * xmlURIEscape: 1808: * @str: the string of the URI to escape 1809: * 1810: * Escaping routine, does not do validity checks ! 1811: * It will try to escape the chars needing this, but this is heuristic 1812: * based it's impossible to be sure. 1813: * 1814: * Returns an copy of the string, but escaped 1815: * 1816: * 25 May 2001 1817: * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly 1818: * according to RFC2396. 1819: * - Carl Douglas 1820: */ 1821: xmlChar * 1822: xmlURIEscape(const xmlChar * str) 1823: { 1824: xmlChar *ret, *segment = NULL; 1825: xmlURIPtr uri; 1826: int ret2; 1827: 1828: #define NULLCHK(p) if(!p) { \ 1829: xmlGenericError(xmlGenericErrorContext, \ 1830: "xmlURIEscape: out of memory\n"); \ 1831: xmlFreeURI(uri); \ 1832: return NULL; } \ 1833: 1834: if (str == NULL) 1835: return (NULL); 1836: 1837: uri = xmlCreateURI(); 1838: if (uri != NULL) { 1839: /* 1840: * Allow escaping errors in the unescaped form 1841: */ 1842: uri->cleanup = 1; 1843: ret2 = xmlParseURIReference(uri, (const char *)str); 1844: if (ret2) { 1845: xmlFreeURI(uri); 1846: return (NULL); 1847: } 1848: } 1849: 1850: if (!uri) 1851: return NULL; 1852: 1853: ret = NULL; 1854: 1855: if (uri->scheme) { 1856: segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-."); 1857: NULLCHK(segment) 1858: ret = xmlStrcat(ret, segment); 1859: ret = xmlStrcat(ret, BAD_CAST ":"); 1860: xmlFree(segment); 1861: } 1862: 1863: if (uri->authority) { 1864: segment = 1865: xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@"); 1866: NULLCHK(segment) 1867: ret = xmlStrcat(ret, BAD_CAST "//"); 1868: ret = xmlStrcat(ret, segment); 1869: xmlFree(segment); 1870: } 1871: 1872: if (uri->user) { 1873: segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,"); 1874: NULLCHK(segment) 1875: ret = xmlStrcat(ret,BAD_CAST "//"); 1876: ret = xmlStrcat(ret, segment); 1877: ret = xmlStrcat(ret, BAD_CAST "@"); 1878: xmlFree(segment); 1879: } 1880: 1881: if (uri->server) { 1882: segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@"); 1883: NULLCHK(segment) 1884: if (uri->user == NULL) 1885: ret = xmlStrcat(ret, BAD_CAST "//"); 1886: ret = xmlStrcat(ret, segment); 1887: xmlFree(segment); 1888: } 1889: 1890: if (uri->port) { 1891: xmlChar port[10]; 1892: 1893: snprintf((char *) port, 10, "%d", uri->port); 1894: ret = xmlStrcat(ret, BAD_CAST ":"); 1895: ret = xmlStrcat(ret, port); 1896: } 1897: 1898: if (uri->path) { 1899: segment = 1900: xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;"); 1901: NULLCHK(segment) 1902: ret = xmlStrcat(ret, segment); 1903: xmlFree(segment); 1904: } 1905: 1906: if (uri->query_raw) { 1907: ret = xmlStrcat(ret, BAD_CAST "?"); 1908: ret = xmlStrcat(ret, BAD_CAST uri->query_raw); 1909: } 1910: else if (uri->query) { 1911: segment = 1912: xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$"); 1913: NULLCHK(segment) 1914: ret = xmlStrcat(ret, BAD_CAST "?"); 1915: ret = xmlStrcat(ret, segment); 1916: xmlFree(segment); 1917: } 1918: 1919: if (uri->opaque) { 1920: segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST ""); 1921: NULLCHK(segment) 1922: ret = xmlStrcat(ret, segment); 1923: xmlFree(segment); 1924: } 1925: 1926: if (uri->fragment) { 1927: segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#"); 1928: NULLCHK(segment) 1929: ret = xmlStrcat(ret, BAD_CAST "#"); 1930: ret = xmlStrcat(ret, segment); 1931: xmlFree(segment); 1932: } 1933: 1934: xmlFreeURI(uri); 1935: #undef NULLCHK 1936: 1937: return (ret); 1938: } 1939: 1940: /************************************************************************ 1941: * * 1942: * Public functions * 1943: * * 1944: ************************************************************************/ 1945: 1946: /** 1947: * xmlBuildURI: 1948: * @URI: the URI instance found in the document 1949: * @base: the base value 1950: * 1951: * Computes he final URI of the reference done by checking that 1952: * the given URI is valid, and building the final URI using the 1953: * base URI. This is processed according to section 5.2 of the 1954: * RFC 2396 1955: * 1956: * 5.2. Resolving Relative References to Absolute Form 1957: * 1958: * Returns a new URI string (to be freed by the caller) or NULL in case 1959: * of error. 1960: */ 1961: xmlChar * 1962: xmlBuildURI(const xmlChar *URI, const xmlChar *base) { 1963: xmlChar *val = NULL; 1964: int ret, len, indx, cur, out; 1965: xmlURIPtr ref = NULL; 1966: xmlURIPtr bas = NULL; 1967: xmlURIPtr res = NULL; 1968: 1969: /* 1970: * 1) The URI reference is parsed into the potential four components and 1971: * fragment identifier, as described in Section 4.3. 1972: * 1973: * NOTE that a completely empty URI is treated by modern browsers 1974: * as a reference to "." rather than as a synonym for the current 1975: * URI. Should we do that here? 1976: */ 1977: if (URI == NULL) 1978: ret = -1; 1979: else { 1980: if (*URI) { 1981: ref = xmlCreateURI(); 1982: if (ref == NULL) 1983: goto done; 1984: ret = xmlParseURIReference(ref, (const char *) URI); 1985: } 1986: else 1987: ret = 0; 1988: } 1989: if (ret != 0) 1990: goto done; 1991: if ((ref != NULL) && (ref->scheme != NULL)) { 1992: /* 1993: * The URI is absolute don't modify. 1994: */ 1995: val = xmlStrdup(URI); 1996: goto done; 1997: } 1998: if (base == NULL) 1999: ret = -1; 2000: else { 2001: bas = xmlCreateURI(); 2002: if (bas == NULL) 2003: goto done; 2004: ret = xmlParseURIReference(bas, (const char *) base); 2005: } 2006: if (ret != 0) { 2007: if (ref) 2008: val = xmlSaveUri(ref); 2009: goto done; 2010: } 2011: if (ref == NULL) { 2012: /* 2013: * the base fragment must be ignored 2014: */ 2015: if (bas->fragment != NULL) { 2016: xmlFree(bas->fragment); 2017: bas->fragment = NULL; 2018: } 2019: val = xmlSaveUri(bas); 2020: goto done; 2021: } 2022: 2023: /* 2024: * 2) If the path component is empty and the scheme, authority, and 2025: * query components are undefined, then it is a reference to the 2026: * current document and we are done. Otherwise, the reference URI's 2027: * query and fragment components are defined as found (or not found) 2028: * within the URI reference and not inherited from the base URI. 2029: * 2030: * NOTE that in modern browsers, the parsing differs from the above 2031: * in the following aspect: the query component is allowed to be 2032: * defined while still treating this as a reference to the current 2033: * document. 2034: */ 2035: res = xmlCreateURI(); 2036: if (res == NULL) 2037: goto done; 2038: if ((ref->scheme == NULL) && (ref->path == NULL) && 2039: ((ref->authority == NULL) && (ref->server == NULL))) { 2040: if (bas->scheme != NULL) 2041: res->scheme = xmlMemStrdup(bas->scheme); 2042: if (bas->authority != NULL) 2043: res->authority = xmlMemStrdup(bas->authority); 2044: else if (bas->server != NULL) { 2045: res->server = xmlMemStrdup(bas->server); 2046: if (bas->user != NULL) 2047: res->user = xmlMemStrdup(bas->user); 2048: res->port = bas->port; 2049: } 2050: if (bas->path != NULL) 2051: res->path = xmlMemStrdup(bas->path); 2052: if (ref->query_raw != NULL) 2053: res->query_raw = xmlMemStrdup (ref->query_raw); 2054: else if (ref->query != NULL) 2055: res->query = xmlMemStrdup(ref->query); 2056: else if (bas->query_raw != NULL) 2057: res->query_raw = xmlMemStrdup(bas->query_raw); 2058: else if (bas->query != NULL) 2059: res->query = xmlMemStrdup(bas->query); 2060: if (ref->fragment != NULL) 2061: res->fragment = xmlMemStrdup(ref->fragment); 2062: goto step_7; 2063: } 2064: 2065: /* 2066: * 3) If the scheme component is defined, indicating that the reference 2067: * starts with a scheme name, then the reference is interpreted as an 2068: * absolute URI and we are done. Otherwise, the reference URI's 2069: * scheme is inherited from the base URI's scheme component. 2070: */ 2071: if (ref->scheme != NULL) { 2072: val = xmlSaveUri(ref); 2073: goto done; 2074: } 2075: if (bas->scheme != NULL) 2076: res->scheme = xmlMemStrdup(bas->scheme); 2077: 2078: if (ref->query_raw != NULL) 2079: res->query_raw = xmlMemStrdup(ref->query_raw); 2080: else if (ref->query != NULL) 2081: res->query = xmlMemStrdup(ref->query); 2082: if (ref->fragment != NULL) 2083: res->fragment = xmlMemStrdup(ref->fragment); 2084: 2085: /* 2086: * 4) If the authority component is defined, then the reference is a 2087: * network-path and we skip to step 7. Otherwise, the reference 2088: * URI's authority is inherited from the base URI's authority 2089: * component, which will also be undefined if the URI scheme does not 2090: * use an authority component. 2091: */ 2092: if ((ref->authority != NULL) || (ref->server != NULL)) { 2093: if (ref->authority != NULL) 2094: res->authority = xmlMemStrdup(ref->authority); 2095: else { 2096: res->server = xmlMemStrdup(ref->server); 2097: if (ref->user != NULL) 2098: res->user = xmlMemStrdup(ref->user); 2099: res->port = ref->port; 2100: } 2101: if (ref->path != NULL) 2102: res->path = xmlMemStrdup(ref->path); 2103: goto step_7; 2104: } 2105: if (bas->authority != NULL) 2106: res->authority = xmlMemStrdup(bas->authority); 2107: else if (bas->server != NULL) { 2108: res->server = xmlMemStrdup(bas->server); 2109: if (bas->user != NULL) 2110: res->user = xmlMemStrdup(bas->user); 2111: res->port = bas->port; 2112: } 2113: 2114: /* 2115: * 5) If the path component begins with a slash character ("/"), then 2116: * the reference is an absolute-path and we skip to step 7. 2117: */ 2118: if ((ref->path != NULL) && (ref->path[0] == '/')) { 2119: res->path = xmlMemStrdup(ref->path); 2120: goto step_7; 2121: } 2122: 2123: 2124: /* 2125: * 6) If this step is reached, then we are resolving a relative-path 2126: * reference. The relative path needs to be merged with the base 2127: * URI's path. Although there are many ways to do this, we will 2128: * describe a simple method using a separate string buffer. 2129: * 2130: * Allocate a buffer large enough for the result string. 2131: */ 2132: len = 2; /* extra / and 0 */ 2133: if (ref->path != NULL) 2134: len += strlen(ref->path); 2135: if (bas->path != NULL) 2136: len += strlen(bas->path); 2137: res->path = (char *) xmlMallocAtomic(len); 2138: if (res->path == NULL) { 2139: xmlGenericError(xmlGenericErrorContext, 2140: "xmlBuildURI: out of memory\n"); 2141: goto done; 2142: } 2143: res->path[0] = 0; 2144: 2145: /* 2146: * a) All but the last segment of the base URI's path component is 2147: * copied to the buffer. In other words, any characters after the 2148: * last (right-most) slash character, if any, are excluded. 2149: */ 2150: cur = 0; 2151: out = 0; 2152: if (bas->path != NULL) { 2153: while (bas->path[cur] != 0) { 2154: while ((bas->path[cur] != 0) && (bas->path[cur] != '/')) 2155: cur++; 2156: if (bas->path[cur] == 0) 2157: break; 2158: 2159: cur++; 2160: while (out < cur) { 2161: res->path[out] = bas->path[out]; 2162: out++; 2163: } 2164: } 2165: } 2166: res->path[out] = 0; 2167: 2168: /* 2169: * b) The reference's path component is appended to the buffer 2170: * string. 2171: */ 2172: if (ref->path != NULL && ref->path[0] != 0) { 2173: indx = 0; 2174: /* 2175: * Ensure the path includes a '/' 2176: */ 2177: if ((out == 0) && (bas->server != NULL)) 2178: res->path[out++] = '/'; 2179: while (ref->path[indx] != 0) { 2180: res->path[out++] = ref->path[indx++]; 2181: } 2182: } 2183: res->path[out] = 0; 2184: 2185: /* 2186: * Steps c) to h) are really path normalization steps 2187: */ 2188: xmlNormalizeURIPath(res->path); 2189: 2190: step_7: 2191: 2192: /* 2193: * 7) The resulting URI components, including any inherited from the 2194: * base URI, are recombined to give the absolute form of the URI 2195: * reference. 2196: */ 2197: val = xmlSaveUri(res); 2198: 2199: done: 2200: if (ref != NULL) 2201: xmlFreeURI(ref); 2202: if (bas != NULL) 2203: xmlFreeURI(bas); 2204: if (res != NULL) 2205: xmlFreeURI(res); 2206: return(val); 2207: } 2208: 2209: /** 2210: * xmlBuildRelativeURI: 2211: * @URI: the URI reference under consideration 2212: * @base: the base value 2213: * 2214: * Expresses the URI of the reference in terms relative to the 2215: * base. Some examples of this operation include: 2216: * base = "http://site1.com/docs/book1.html" 2217: * URI input URI returned 2218: * docs/pic1.gif pic1.gif 2219: * docs/img/pic1.gif img/pic1.gif 2220: * img/pic1.gif ../img/pic1.gif 2221: * http://site1.com/docs/pic1.gif pic1.gif 2222: * http://site2.com/docs/pic1.gif http://site2.com/docs/pic1.gif 2223: * 2224: * base = "docs/book1.html" 2225: * URI input URI returned 2226: * docs/pic1.gif pic1.gif 2227: * docs/img/pic1.gif img/pic1.gif 2228: * img/pic1.gif ../img/pic1.gif 2229: * http://site1.com/docs/pic1.gif http://site1.com/docs/pic1.gif 2230: * 2231: * 2232: * Note: if the URI reference is really wierd or complicated, it may be 2233: * worthwhile to first convert it into a "nice" one by calling 2234: * xmlBuildURI (using 'base') before calling this routine, 2235: * since this routine (for reasonable efficiency) assumes URI has 2236: * already been through some validation. 2237: * 2238: * Returns a new URI string (to be freed by the caller) or NULL in case 2239: * error. 2240: */ 2241: xmlChar * 2242: xmlBuildRelativeURI (const xmlChar * URI, const xmlChar * base) 2243: { 2244: xmlChar *val = NULL; 2245: int ret; 2246: int ix; 2247: int pos = 0; 2248: int nbslash = 0; 2249: int len; 2250: xmlURIPtr ref = NULL; 2251: xmlURIPtr bas = NULL; 2252: xmlChar *bptr, *uptr, *vptr; 2253: int remove_path = 0; 2254: 2255: if ((URI == NULL) || (*URI == 0)) 2256: return NULL; 2257: 2258: /* 2259: * First parse URI into a standard form 2260: */ 2261: ref = xmlCreateURI (); 2262: if (ref == NULL) 2263: return NULL; 2264: /* If URI not already in "relative" form */ 2265: if (URI[0] != '.') { 2266: ret = xmlParseURIReference (ref, (const char *) URI); 2267: if (ret != 0) 2268: goto done; /* Error in URI, return NULL */ 2269: } else 2270: ref->path = (char *)xmlStrdup(URI); 2271: 2272: /* 2273: * Next parse base into the same standard form 2274: */ 2275: if ((base == NULL) || (*base == 0)) { 2276: val = xmlStrdup (URI); 2277: goto done; 2278: } 2279: bas = xmlCreateURI (); 2280: if (bas == NULL) 2281: goto done; 2282: if (base[0] != '.') { 2283: ret = xmlParseURIReference (bas, (const char *) base); 2284: if (ret != 0) 2285: goto done; /* Error in base, return NULL */ 2286: } else 2287: bas->path = (char *)xmlStrdup(base); 2288: 2289: /* 2290: * If the scheme / server on the URI differs from the base, 2291: * just return the URI 2292: */ 2293: if ((ref->scheme != NULL) && 2294: ((bas->scheme == NULL) || 2295: (xmlStrcmp ((xmlChar *)bas->scheme, (xmlChar *)ref->scheme)) || 2296: (xmlStrcmp ((xmlChar *)bas->server, (xmlChar *)ref->server)))) { 2297: val = xmlStrdup (URI); 2298: goto done; 2299: } 2300: if (xmlStrEqual((xmlChar *)bas->path, (xmlChar *)ref->path)) { 2301: val = xmlStrdup(BAD_CAST ""); 2302: goto done; 2303: } 2304: if (bas->path == NULL) { 2305: val = xmlStrdup((xmlChar *)ref->path); 2306: goto done; 2307: } 2308: if (ref->path == NULL) { 2309: ref->path = (char *) "/"; 2310: remove_path = 1; 2311: } 2312: 2313: /* 2314: * At this point (at last!) we can compare the two paths 2315: * 2316: * First we take care of the special case where either of the 2317: * two path components may be missing (bug 316224) 2318: */ 2319: if (bas->path == NULL) { 2320: if (ref->path != NULL) { 2321: uptr = (xmlChar *) ref->path; 2322: if (*uptr == '/') 2323: uptr++; 2324: /* exception characters from xmlSaveUri */ 2325: val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,"); 2326: } 2327: goto done; 2328: } 2329: bptr = (xmlChar *)bas->path; 2330: if (ref->path == NULL) { 2331: for (ix = 0; bptr[ix] != 0; ix++) { 2332: if (bptr[ix] == '/') 2333: nbslash++; 2334: } 2335: uptr = NULL; 2336: len = 1; /* this is for a string terminator only */ 2337: } else { 2338: /* 2339: * Next we compare the two strings and find where they first differ 2340: */ 2341: if ((ref->path[pos] == '.') && (ref->path[pos+1] == '/')) 2342: pos += 2; 2343: if ((*bptr == '.') && (bptr[1] == '/')) 2344: bptr += 2; 2345: else if ((*bptr == '/') && (ref->path[pos] != '/')) 2346: bptr++; 2347: while ((bptr[pos] == ref->path[pos]) && (bptr[pos] != 0)) 2348: pos++; 2349: 2350: if (bptr[pos] == ref->path[pos]) { 2351: val = xmlStrdup(BAD_CAST ""); 2352: goto done; /* (I can't imagine why anyone would do this) */ 2353: } 2354: 2355: /* 2356: * In URI, "back up" to the last '/' encountered. This will be the 2357: * beginning of the "unique" suffix of URI 2358: */ 2359: ix = pos; 2360: if ((ref->path[ix] == '/') && (ix > 0)) 2361: ix--; 2362: else if ((ref->path[ix] == 0) && (ix > 1) && (ref->path[ix - 1] == '/')) 2363: ix -= 2; 2364: for (; ix > 0; ix--) { 2365: if (ref->path[ix] == '/') 2366: break; 2367: } 2368: if (ix == 0) { 2369: uptr = (xmlChar *)ref->path; 2370: } else { 2371: ix++; 2372: uptr = (xmlChar *)&ref->path[ix]; 2373: } 2374: 2375: /* 2376: * In base, count the number of '/' from the differing point 2377: */ 2378: if (bptr[pos] != ref->path[pos]) {/* check for trivial URI == base */ 2379: for (; bptr[ix] != 0; ix++) { 2380: if (bptr[ix] == '/') 2381: nbslash++; 2382: } 2383: } 2384: len = xmlStrlen (uptr) + 1; 2385: } 2386: 2387: if (nbslash == 0) { 2388: if (uptr != NULL) 2389: /* exception characters from xmlSaveUri */ 2390: val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,"); 2391: goto done; 2392: } 2393: 2394: /* 2395: * Allocate just enough space for the returned string - 2396: * length of the remainder of the URI, plus enough space 2397: * for the "../" groups, plus one for the terminator 2398: */ 2399: val = (xmlChar *) xmlMalloc (len + 3 * nbslash); 2400: if (val == NULL) { 2401: xmlGenericError(xmlGenericErrorContext, 2402: "xmlBuildRelativeURI: out of memory\n"); 2403: goto done; 2404: } 2405: vptr = val; 2406: /* 2407: * Put in as many "../" as needed 2408: */ 2409: for (; nbslash>0; nbslash--) { 2410: *vptr++ = '.'; 2411: *vptr++ = '.'; 2412: *vptr++ = '/'; 2413: } 2414: /* 2415: * Finish up with the end of the URI 2416: */ 2417: if (uptr != NULL) { 2418: if ((vptr > val) && (len > 0) && 2419: (uptr[0] == '/') && (vptr[-1] == '/')) { 2420: memcpy (vptr, uptr + 1, len - 1); 2421: vptr[len - 2] = 0; 2422: } else { 2423: memcpy (vptr, uptr, len); 2424: vptr[len - 1] = 0; 2425: } 2426: } else { 2427: vptr[len - 1] = 0; 2428: } 2429: 2430: /* escape the freshly-built path */ 2431: vptr = val; 2432: /* exception characters from xmlSaveUri */ 2433: val = xmlURIEscapeStr(vptr, BAD_CAST "/;&=+$,"); 2434: xmlFree(vptr); 2435: 2436: done: 2437: /* 2438: * Free the working variables 2439: */ 2440: if (remove_path != 0) 2441: ref->path = NULL; 2442: if (ref != NULL) 2443: xmlFreeURI (ref); 2444: if (bas != NULL) 2445: xmlFreeURI (bas); 2446: 2447: return val; 2448: } 2449: 2450: /** 2451: * xmlCanonicPath: 2452: * @path: the resource locator in a filesystem notation 2453: * 2454: * Constructs a canonic path from the specified path. 2455: * 2456: * Returns a new canonic path, or a duplicate of the path parameter if the 2457: * construction fails. The caller is responsible for freeing the memory occupied 2458: * by the returned string. If there is insufficient memory available, or the 2459: * argument is NULL, the function returns NULL. 2460: */ 2461: #define IS_WINDOWS_PATH(p) \ 2462: ((p != NULL) && \ 2463: (((p[0] >= 'a') && (p[0] <= 'z')) || \ 2464: ((p[0] >= 'A') && (p[0] <= 'Z'))) && \ 2465: (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\'))) 2466: xmlChar * 2467: xmlCanonicPath(const xmlChar *path) 2468: { 2469: /* 2470: * For Windows implementations, additional work needs to be done to 2471: * replace backslashes in pathnames with "forward slashes" 2472: */ 2473: #if defined(_WIN32) && !defined(__CYGWIN__) 2474: int len = 0; 2475: int i = 0; 2476: xmlChar *p = NULL; 2477: #endif 2478: xmlURIPtr uri; 2479: xmlChar *ret; 2480: const xmlChar *absuri; 2481: 2482: if (path == NULL) 2483: return(NULL); 2484: 2485: /* sanitize filename starting with // so it can be used as URI */ 2486: if ((path[0] == '/') && (path[1] == '/') && (path[2] != '/')) 2487: path++; 2488: 2489: if ((uri = xmlParseURI((const char *) path)) != NULL) { 2490: xmlFreeURI(uri); 2491: return xmlStrdup(path); 2492: } 2493: 2494: /* Check if this is an "absolute uri" */ 2495: absuri = xmlStrstr(path, BAD_CAST "://"); 2496: if (absuri != NULL) { 2497: int l, j; 2498: unsigned char c; 2499: xmlChar *escURI; 2500: 2501: /* 2502: * this looks like an URI where some parts have not been 2503: * escaped leading to a parsing problem. Check that the first 2504: * part matches a protocol. 2505: */ 2506: l = absuri - path; 2507: /* Bypass if first part (part before the '://') is > 20 chars */ 2508: if ((l <= 0) || (l > 20)) 2509: goto path_processing; 2510: /* Bypass if any non-alpha characters are present in first part */ 2511: for (j = 0;j < l;j++) { 2512: c = path[j]; 2513: if (!(((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z')))) 2514: goto path_processing; 2515: } 2516: 2517: /* Escape all except the characters specified in the supplied path */ 2518: escURI = xmlURIEscapeStr(path, BAD_CAST ":/?_.#&;="); 2519: if (escURI != NULL) { 2520: /* Try parsing the escaped path */ 2521: uri = xmlParseURI((const char *) escURI); 2522: /* If successful, return the escaped string */ 2523: if (uri != NULL) { 2524: xmlFreeURI(uri); 2525: return escURI; 2526: } 2527: } 2528: } 2529: 2530: path_processing: 2531: /* For Windows implementations, replace backslashes with 'forward slashes' */ 2532: #if defined(_WIN32) && !defined(__CYGWIN__) 2533: /* 2534: * Create a URI structure 2535: */ 2536: uri = xmlCreateURI(); 2537: if (uri == NULL) { /* Guard against 'out of memory' */ 2538: return(NULL); 2539: } 2540: 2541: len = xmlStrlen(path); 2542: if ((len > 2) && IS_WINDOWS_PATH(path)) { 2543: /* make the scheme 'file' */ 2544: uri->scheme = xmlStrdup(BAD_CAST "file"); 2545: /* allocate space for leading '/' + path + string terminator */ 2546: uri->path = xmlMallocAtomic(len + 2); 2547: if (uri->path == NULL) { 2548: xmlFreeURI(uri); /* Guard agains 'out of memory' */ 2549: return(NULL); 2550: } 2551: /* Put in leading '/' plus path */ 2552: uri->path[0] = '/'; 2553: p = uri->path + 1; 2554: strncpy(p, path, len + 1); 2555: } else { 2556: uri->path = xmlStrdup(path); 2557: if (uri->path == NULL) { 2558: xmlFreeURI(uri); 2559: return(NULL); 2560: } 2561: p = uri->path; 2562: } 2563: /* Now change all occurences of '\' to '/' */ 2564: while (*p != '\0') { 2565: if (*p == '\\') 2566: *p = '/'; 2567: p++; 2568: } 2569: 2570: if (uri->scheme == NULL) { 2571: ret = xmlStrdup((const xmlChar *) uri->path); 2572: } else { 2573: ret = xmlSaveUri(uri); 2574: } 2575: 2576: xmlFreeURI(uri); 2577: #else 2578: ret = xmlStrdup((const xmlChar *) path); 2579: #endif 2580: return(ret); 2581: } 2582: 2583: /** 2584: * xmlPathToURI: 2585: * @path: the resource locator in a filesystem notation 2586: * 2587: * Constructs an URI expressing the existing path 2588: * 2589: * Returns a new URI, or a duplicate of the path parameter if the 2590: * construction fails. The caller is responsible for freeing the memory 2591: * occupied by the returned string. If there is insufficient memory available, 2592: * or the argument is NULL, the function returns NULL. 2593: */ 2594: xmlChar * 2595: xmlPathToURI(const xmlChar *path) 2596: { 2597: xmlURIPtr uri; 2598: xmlURI temp; 2599: xmlChar *ret, *cal; 2600: 2601: if (path == NULL) 2602: return(NULL); 2603: 2604: if ((uri = xmlParseURI((const char *) path)) != NULL) { 2605: xmlFreeURI(uri); 2606: return xmlStrdup(path); 2607: } 2608: cal = xmlCanonicPath(path); 2609: if (cal == NULL) 2610: return(NULL); 2611: #if defined(_WIN32) && !defined(__CYGWIN__) 2612: /* xmlCanonicPath can return an URI on Windows (is that the intended behaviour?) 2613: If 'cal' is a valid URI allready then we are done here, as continuing would make 2614: it invalid. */ 2615: if ((uri = xmlParseURI((const char *) cal)) != NULL) { 2616: xmlFreeURI(uri); 2617: return cal; 2618: } 2619: /* 'cal' can contain a relative path with backslashes. If that is processed 2620: by xmlSaveURI, they will be escaped and the external entity loader machinery 2621: will fail. So convert them to slashes. Misuse 'ret' for walking. */ 2622: ret = cal; 2623: while (*ret != '\0') { 2624: if (*ret == '\\') 2625: *ret = '/'; 2626: ret++; 2627: } 2628: #endif 2629: memset(&temp, 0, sizeof(temp)); 2630: temp.path = (char *) cal; 2631: ret = xmlSaveUri(&temp); 2632: xmlFree(cal); 2633: return(ret); 2634: } 2635: #define bottom_uri 2636: #include "elfgcchack.h"