embedaddon/libxml2/uri.c - view

File: [ELWIX - Embedded LightWeight unIX -] / embedaddon / libxml2 / uri.c
Revision 1.1: download - view: text, annotated - select for diffs - revision graph
Tue Feb 21 23:37:58 2012 UTC (13 years, 4 months ago) by misho
CVS tags: MAIN, HEAD

Initial revision

1: /** 2: * uri.c: set of generic URI related routines 3: * 4: * Reference: RFCs 3986, 2732 and 2373 5: * 6: * See Copyright for the status of this software. 7: * 8: * daniel@veillard.com 9: */ 10: 11: #define IN_LIBXML 12: #include "libxml.h" 13: 14: #include <string.h> 15: 16: #include <libxml/xmlmemory.h> 17: #include <libxml/uri.h> 18: #include <libxml/globals.h> 19: #include <libxml/xmlerror.h> 20: 21: static void xmlCleanURI(xmlURIPtr uri); 22: 23: /* 24: * Old rule from 2396 used in legacy handling code 25: * alpha = lowalpha | upalpha 26: */ 27: #define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x)) 28: 29: 30: /* 31: * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" | 32: * "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" | 33: * "u" | "v" | "w" | "x" | "y" | "z" 34: */ 35: 36: #define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z')) 37: 38: /* 39: * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" | 40: * "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" | 41: * "U" | "V" | "W" | "X" | "Y" | "Z" 42: */ 43: #define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z')) 44: 45: #ifdef IS_DIGIT 46: #undef IS_DIGIT 47: #endif 48: /* 49: * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" 50: */ 51: #define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9')) 52: 53: /* 54: * alphanum = alpha | digit 55: */ 56: 57: #define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x)) 58: 59: /* 60: * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")" 61: */ 62: 63: #define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') || \ 64: ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') || \ 65: ((x) == '(') || ((x) == ')')) 66: 67: /* 68: * unwise = "{" | "}" | "|" | "\" | "^" | "`" 69: */ 70: 71: #define IS_UNWISE(p) \ 72: (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) || \ 73: ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) || \ 74: ((*(p) == ']')) || ((*(p) == '`'))) 75: /* 76: * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," | 77: * "[" | "]" 78: */ 79: 80: #define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \ 81: ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \ 82: ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \ 83: ((x) == ']')) 84: 85: /* 86: * unreserved = alphanum | mark 87: */ 88: 89: #define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x)) 90: 91: /* 92: * Skip to next pointer char, handle escaped sequences 93: */ 94: 95: #define NEXT(p) ((*p == '%')? p += 3 : p++) 96: 97: /* 98: * Productions from the spec. 99: * 100: * authority = server | reg_name 101: * reg_name = 1*( unreserved | escaped | "$" | "," | 102: * ";" | ":" | "@" | "&" | "=" | "+" ) 103: * 104: * path = [ abs_path | opaque_part ] 105: */ 106: 107: #define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n)) 108: 109: /************************************************************************ 110: * * 111: * RFC 3986 parser * 112: * * 113: ************************************************************************/ 114: 115: #define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9')) 116: #define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) || \ 117: ((*(p) >= 'A') && (*(p) <= 'Z'))) 118: #define ISA_HEXDIG(p) \ 119: (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) || \ 120: ((*(p) >= 'A') && (*(p) <= 'F'))) 121: 122: /* 123: * sub-delims = "!" / "$" / "&" / "'" / "(" / ")" 124: * / "*" / "+" / "," / ";" / "=" 125: */ 126: #define ISA_SUB_DELIM(p) \ 127: (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) || \ 128: ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) || \ 129: ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) || \ 130: ((*(p) == '=')) || ((*(p) == '\''))) 131: 132: /* 133: * gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" 134: */ 135: #define ISA_GEN_DELIM(p) \ 136: (((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) || \ 137: ((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) || \ 138: ((*(p) == '@'))) 139: 140: /* 141: * reserved = gen-delims / sub-delims 142: */ 143: #define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p))) 144: 145: /* 146: * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" 147: */ 148: #define ISA_UNRESERVED(p) \ 149: ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) || \ 150: ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~'))) 151: 152: /* 153: * pct-encoded = "%" HEXDIG HEXDIG 154: */ 155: #define ISA_PCT_ENCODED(p) \ 156: ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2))) 157: 158: /* 159: * pchar = unreserved / pct-encoded / sub-delims / ":" / "@" 160: */ 161: #define ISA_PCHAR(p) \ 162: (ISA_UNRESERVED(p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) || \ 163: ((*(p) == ':')) || ((*(p) == '@'))) 164: 165: /** 166: * xmlParse3986Scheme: 167: * @uri: pointer to an URI structure 168: * @str: pointer to the string to analyze 169: * 170: * Parse an URI scheme 171: * 172: * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) 173: * 174: * Returns 0 or the error code 175: */ 176: static int 177: xmlParse3986Scheme(xmlURIPtr uri, const char **str) { 178: const char *cur; 179: 180: if (str == NULL) 181: return(-1); 182: 183: cur = *str; 184: if (!ISA_ALPHA(cur)) 185: return(2); 186: cur++; 187: while (ISA_ALPHA(cur) || ISA_DIGIT(cur) || 188: (*cur == '+') || (*cur == '-') || (*cur == '.')) cur++; 189: if (uri != NULL) { 190: if (uri->scheme != NULL) xmlFree(uri->scheme); 191: uri->scheme = STRNDUP(*str, cur - *str); 192: } 193: *str = cur; 194: return(0); 195: } 196: 197: /** 198: * xmlParse3986Fragment: 199: * @uri: pointer to an URI structure 200: * @str: pointer to the string to analyze 201: * 202: * Parse the query part of an URI 203: * 204: * fragment = *( pchar / "/" / "?" ) 205: * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']' 206: * in the fragment identifier but this is used very broadly for 207: * xpointer scheme selection, so we are allowing it here to not break 208: * for example all the DocBook processing chains. 209: * 210: * Returns 0 or the error code 211: */ 212: static int 213: xmlParse3986Fragment(xmlURIPtr uri, const char **str) 214: { 215: const char *cur; 216: 217: if (str == NULL) 218: return (-1); 219: 220: cur = *str; 221: 222: while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') || 223: (*cur == '[') || (*cur == ']') || 224: ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur)))) 225: NEXT(cur); 226: if (uri != NULL) { 227: if (uri->fragment != NULL) 228: xmlFree(uri->fragment); 229: if (uri->cleanup & 2) 230: uri->fragment = STRNDUP(*str, cur - *str); 231: else 232: uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL); 233: } 234: *str = cur; 235: return (0); 236: } 237: 238: /** 239: * xmlParse3986Query: 240: * @uri: pointer to an URI structure 241: * @str: pointer to the string to analyze 242: * 243: * Parse the query part of an URI 244: * 245: * query = *uric 246: * 247: * Returns 0 or the error code 248: */ 249: static int 250: xmlParse3986Query(xmlURIPtr uri, const char **str) 251: { 252: const char *cur; 253: 254: if (str == NULL) 255: return (-1); 256: 257: cur = *str; 258: 259: while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') || 260: ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur)))) 261: NEXT(cur); 262: if (uri != NULL) { 263: if (uri->query != NULL) 264: xmlFree(uri->query); 265: if (uri->cleanup & 2) 266: uri->query = STRNDUP(*str, cur - *str); 267: else 268: uri->query = xmlURIUnescapeString(*str, cur - *str, NULL); 269: 270: /* Save the raw bytes of the query as well. 271: * See: http://mail.gnome.org/archives/xml/2007-April/thread.html#00114 272: */ 273: if (uri->query_raw != NULL) 274: xmlFree (uri->query_raw); 275: uri->query_raw = STRNDUP (*str, cur - *str); 276: } 277: *str = cur; 278: return (0); 279: } 280: 281: /** 282: * xmlParse3986Port: 283: * @uri: pointer to an URI structure 284: * @str: the string to analyze 285: * 286: * Parse a port part and fills in the appropriate fields 287: * of the @uri structure 288: * 289: * port = *DIGIT 290: * 291: * Returns 0 or the error code 292: */ 293: static int 294: xmlParse3986Port(xmlURIPtr uri, const char **str) 295: { 296: const char *cur = *str; 297: 298: if (ISA_DIGIT(cur)) { 299: if (uri != NULL) 300: uri->port = 0; 301: while (ISA_DIGIT(cur)) { 302: if (uri != NULL) 303: uri->port = uri->port * 10 + (*cur - '0'); 304: cur++; 305: } 306: *str = cur; 307: return(0); 308: } 309: return(1); 310: } 311: 312: /** 313: * xmlParse3986Userinfo: 314: * @uri: pointer to an URI structure 315: * @str: the string to analyze 316: * 317: * Parse an user informations part and fills in the appropriate fields 318: * of the @uri structure 319: * 320: * userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) 321: * 322: * Returns 0 or the error code 323: */ 324: static int 325: xmlParse3986Userinfo(xmlURIPtr uri, const char **str) 326: { 327: const char *cur; 328: 329: cur = *str; 330: while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || 331: ISA_SUB_DELIM(cur) || (*cur == ':')) 332: NEXT(cur); 333: if (*cur == '@') { 334: if (uri != NULL) { 335: if (uri->user != NULL) xmlFree(uri->user); 336: if (uri->cleanup & 2) 337: uri->user = STRNDUP(*str, cur - *str); 338: else 339: uri->user = xmlURIUnescapeString(*str, cur - *str, NULL); 340: } 341: *str = cur; 342: return(0); 343: } 344: return(1); 345: } 346: 347: /** 348: * xmlParse3986DecOctet: 349: * @str: the string to analyze 350: * 351: * dec-octet = DIGIT ; 0-9 352: * / %x31-39 DIGIT ; 10-99 353: * / "1" 2DIGIT ; 100-199 354: * / "2" %x30-34 DIGIT ; 200-249 355: * / "25" %x30-35 ; 250-255 356: * 357: * Skip a dec-octet. 358: * 359: * Returns 0 if found and skipped, 1 otherwise 360: */ 361: static int 362: xmlParse3986DecOctet(const char **str) { 363: const char *cur = *str; 364: 365: if (!(ISA_DIGIT(cur))) 366: return(1); 367: if (!ISA_DIGIT(cur+1)) 368: cur++; 369: else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur+2))) 370: cur += 2; 371: else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2))) 372: cur += 3; 373: else if ((*cur == '2') && (*(cur + 1) >= '0') && 374: (*(cur + 1) <= '4') && (ISA_DIGIT(cur + 2))) 375: cur += 3; 376: else if ((*cur == '2') && (*(cur + 1) == '5') && 377: (*(cur + 2) >= '0') && (*(cur + 1) <= '5')) 378: cur += 3; 379: else 380: return(1); 381: *str = cur; 382: return(0); 383: } 384: /** 385: * xmlParse3986Host: 386: * @uri: pointer to an URI structure 387: * @str: the string to analyze 388: * 389: * Parse an host part and fills in the appropriate fields 390: * of the @uri structure 391: * 392: * host = IP-literal / IPv4address / reg-name 393: * IP-literal = "[" ( IPv6address / IPvFuture ) "]" 394: * IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet 395: * reg-name = *( unreserved / pct-encoded / sub-delims ) 396: * 397: * Returns 0 or the error code 398: */ 399: static int 400: xmlParse3986Host(xmlURIPtr uri, const char **str) 401: { 402: const char *cur = *str; 403: const char *host; 404: 405: host = cur; 406: /* 407: * IPv6 and future adressing scheme are enclosed between brackets 408: */ 409: if (*cur == '[') { 410: cur++; 411: while ((*cur != ']') && (*cur != 0)) 412: cur++; 413: if (*cur != ']') 414: return(1); 415: cur++; 416: goto found; 417: } 418: /* 419: * try to parse an IPv4 420: */ 421: if (ISA_DIGIT(cur)) { 422: if (xmlParse3986DecOctet(&cur) != 0) 423: goto not_ipv4; 424: if (*cur != '.') 425: goto not_ipv4; 426: cur++; 427: if (xmlParse3986DecOctet(&cur) != 0) 428: goto not_ipv4; 429: if (*cur != '.') 430: goto not_ipv4; 431: if (xmlParse3986DecOctet(&cur) != 0) 432: goto not_ipv4; 433: if (*cur != '.') 434: goto not_ipv4; 435: if (xmlParse3986DecOctet(&cur) != 0) 436: goto not_ipv4; 437: goto found; 438: not_ipv4: 439: cur = *str; 440: } 441: /* 442: * then this should be a hostname which can be empty 443: */ 444: while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur)) 445: NEXT(cur); 446: found: 447: if (uri != NULL) { 448: if (uri->authority != NULL) xmlFree(uri->authority); 449: uri->authority = NULL; 450: if (uri->server != NULL) xmlFree(uri->server); 451: if (cur != host) { 452: if (uri->cleanup & 2) 453: uri->server = STRNDUP(host, cur - host); 454: else 455: uri->server = xmlURIUnescapeString(host, cur - host, NULL); 456: } else 457: uri->server = NULL; 458: } 459: *str = cur; 460: return(0); 461: } 462: 463: /** 464: * xmlParse3986Authority: 465: * @uri: pointer to an URI structure 466: * @str: the string to analyze 467: * 468: * Parse an authority part and fills in the appropriate fields 469: * of the @uri structure 470: * 471: * authority = [ userinfo "@" ] host [ ":" port ] 472: * 473: * Returns 0 or the error code 474: */ 475: static int 476: xmlParse3986Authority(xmlURIPtr uri, const char **str) 477: { 478: const char *cur; 479: int ret; 480: 481: cur = *str; 482: /* 483: * try to parse an userinfo and check for the trailing @ 484: */ 485: ret = xmlParse3986Userinfo(uri, &cur); 486: if ((ret != 0) || (*cur != '@')) 487: cur = *str; 488: else 489: cur++; 490: ret = xmlParse3986Host(uri, &cur); 491: if (ret != 0) return(ret); 492: if (*cur == ':') { 493: cur++; 494: ret = xmlParse3986Port(uri, &cur); 495: if (ret != 0) return(ret); 496: } 497: *str = cur; 498: return(0); 499: } 500: 501: /** 502: * xmlParse3986Segment: 503: * @str: the string to analyze 504: * @forbid: an optional forbidden character 505: * @empty: allow an empty segment 506: * 507: * Parse a segment and fills in the appropriate fields 508: * of the @uri structure 509: * 510: * segment = *pchar 511: * segment-nz = 1*pchar 512: * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" ) 513: * ; non-zero-length segment without any colon ":" 514: * 515: * Returns 0 or the error code 516: */ 517: static int 518: xmlParse3986Segment(const char **str, char forbid, int empty) 519: { 520: const char *cur; 521: 522: cur = *str; 523: if (!ISA_PCHAR(cur)) { 524: if (empty) 525: return(0); 526: return(1); 527: } 528: while (ISA_PCHAR(cur) && (*cur != forbid)) 529: NEXT(cur); 530: *str = cur; 531: return (0); 532: } 533: 534: /** 535: * xmlParse3986PathAbEmpty: 536: * @uri: pointer to an URI structure 537: * @str: the string to analyze 538: * 539: * Parse an path absolute or empty and fills in the appropriate fields 540: * of the @uri structure 541: * 542: * path-abempty = *( "/" segment ) 543: * 544: * Returns 0 or the error code 545: */ 546: static int 547: xmlParse3986PathAbEmpty(xmlURIPtr uri, const char **str) 548: { 549: const char *cur; 550: int ret; 551: 552: cur = *str; 553: 554: while (*cur == '/') { 555: cur++; 556: ret = xmlParse3986Segment(&cur, 0, 1); 557: if (ret != 0) return(ret); 558: } 559: if (uri != NULL) { 560: if (uri->path != NULL) xmlFree(uri->path); 561: if (*str != cur) { 562: if (uri->cleanup & 2) 563: uri->path = STRNDUP(*str, cur - *str); 564: else 565: uri->path = xmlURIUnescapeString(*str, cur - *str, NULL); 566: } else { 567: uri->path = NULL; 568: } 569: } 570: *str = cur; 571: return (0); 572: } 573: 574: /** 575: * xmlParse3986PathAbsolute: 576: * @uri: pointer to an URI structure 577: * @str: the string to analyze 578: * 579: * Parse an path absolute and fills in the appropriate fields 580: * of the @uri structure 581: * 582: * path-absolute = "/" [ segment-nz *( "/" segment ) ] 583: * 584: * Returns 0 or the error code 585: */ 586: static int 587: xmlParse3986PathAbsolute(xmlURIPtr uri, const char **str) 588: { 589: const char *cur; 590: int ret; 591: 592: cur = *str; 593: 594: if (*cur != '/') 595: return(1); 596: cur++; 597: ret = xmlParse3986Segment(&cur, 0, 0); 598: if (ret == 0) { 599: while (*cur == '/') { 600: cur++; 601: ret = xmlParse3986Segment(&cur, 0, 1); 602: if (ret != 0) return(ret); 603: } 604: } 605: if (uri != NULL) { 606: if (uri->path != NULL) xmlFree(uri->path); 607: if (cur != *str) { 608: if (uri->cleanup & 2) 609: uri->path = STRNDUP(*str, cur - *str); 610: else 611: uri->path = xmlURIUnescapeString(*str, cur - *str, NULL); 612: } else { 613: uri->path = NULL; 614: } 615: } 616: *str = cur; 617: return (0); 618: } 619: 620: /** 621: * xmlParse3986PathRootless: 622: * @uri: pointer to an URI structure 623: * @str: the string to analyze 624: * 625: * Parse an path without root and fills in the appropriate fields 626: * of the @uri structure 627: * 628: * path-rootless = segment-nz *( "/" segment ) 629: * 630: * Returns 0 or the error code 631: */ 632: static int 633: xmlParse3986PathRootless(xmlURIPtr uri, const char **str) 634: { 635: const char *cur; 636: int ret; 637: 638: cur = *str; 639: 640: ret = xmlParse3986Segment(&cur, 0, 0); 641: if (ret != 0) return(ret); 642: while (*cur == '/') { 643: cur++; 644: ret = xmlParse3986Segment(&cur, 0, 1); 645: if (ret != 0) return(ret); 646: } 647: if (uri != NULL) { 648: if (uri->path != NULL) xmlFree(uri->path); 649: if (cur != *str) { 650: if (uri->cleanup & 2) 651: uri->path = STRNDUP(*str, cur - *str); 652: else 653: uri->path = xmlURIUnescapeString(*str, cur - *str, NULL); 654: } else { 655: uri->path = NULL; 656: } 657: } 658: *str = cur; 659: return (0); 660: } 661: 662: /** 663: * xmlParse3986PathNoScheme: 664: * @uri: pointer to an URI structure 665: * @str: the string to analyze 666: * 667: * Parse an path which is not a scheme and fills in the appropriate fields 668: * of the @uri structure 669: * 670: * path-noscheme = segment-nz-nc *( "/" segment ) 671: * 672: * Returns 0 or the error code 673: */ 674: static int 675: xmlParse3986PathNoScheme(xmlURIPtr uri, const char **str) 676: { 677: const char *cur; 678: int ret; 679: 680: cur = *str; 681: 682: ret = xmlParse3986Segment(&cur, ':', 0); 683: if (ret != 0) return(ret); 684: while (*cur == '/') { 685: cur++; 686: ret = xmlParse3986Segment(&cur, 0, 1); 687: if (ret != 0) return(ret); 688: } 689: if (uri != NULL) { 690: if (uri->path != NULL) xmlFree(uri->path); 691: if (cur != *str) { 692: if (uri->cleanup & 2) 693: uri->path = STRNDUP(*str, cur - *str); 694: else 695: uri->path = xmlURIUnescapeString(*str, cur - *str, NULL); 696: } else { 697: uri->path = NULL; 698: } 699: } 700: *str = cur; 701: return (0); 702: } 703: 704: /** 705: * xmlParse3986HierPart: 706: * @uri: pointer to an URI structure 707: * @str: the string to analyze 708: * 709: * Parse an hierarchical part and fills in the appropriate fields 710: * of the @uri structure 711: * 712: * hier-part = "//" authority path-abempty 713: * / path-absolute 714: * / path-rootless 715: * / path-empty 716: * 717: * Returns 0 or the error code 718: */ 719: static int 720: xmlParse3986HierPart(xmlURIPtr uri, const char **str) 721: { 722: const char *cur; 723: int ret; 724: 725: cur = *str; 726: 727: if ((*cur == '/') && (*(cur + 1) == '/')) { 728: cur += 2; 729: ret = xmlParse3986Authority(uri, &cur); 730: if (ret != 0) return(ret); 731: ret = xmlParse3986PathAbEmpty(uri, &cur); 732: if (ret != 0) return(ret); 733: *str = cur; 734: return(0); 735: } else if (*cur == '/') { 736: ret = xmlParse3986PathAbsolute(uri, &cur); 737: if (ret != 0) return(ret); 738: } else if (ISA_PCHAR(cur)) { 739: ret = xmlParse3986PathRootless(uri, &cur); 740: if (ret != 0) return(ret); 741: } else { 742: /* path-empty is effectively empty */ 743: if (uri != NULL) { 744: if (uri->path != NULL) xmlFree(uri->path); 745: uri->path = NULL; 746: } 747: } 748: *str = cur; 749: return (0); 750: } 751: 752: /** 753: * xmlParse3986RelativeRef: 754: * @uri: pointer to an URI structure 755: * @str: the string to analyze 756: * 757: * Parse an URI string and fills in the appropriate fields 758: * of the @uri structure 759: * 760: * relative-ref = relative-part [ "?" query ] [ "#" fragment ] 761: * relative-part = "//" authority path-abempty 762: * / path-absolute 763: * / path-noscheme 764: * / path-empty 765: * 766: * Returns 0 or the error code 767: */ 768: static int 769: xmlParse3986RelativeRef(xmlURIPtr uri, const char *str) { 770: int ret; 771: 772: if ((*str == '/') && (*(str + 1) == '/')) { 773: str += 2; 774: ret = xmlParse3986Authority(uri, &str); 775: if (ret != 0) return(ret); 776: ret = xmlParse3986PathAbEmpty(uri, &str); 777: if (ret != 0) return(ret); 778: } else if (*str == '/') { 779: ret = xmlParse3986PathAbsolute(uri, &str); 780: if (ret != 0) return(ret); 781: } else if (ISA_PCHAR(str)) { 782: ret = xmlParse3986PathNoScheme(uri, &str); 783: if (ret != 0) return(ret); 784: } else { 785: /* path-empty is effectively empty */ 786: if (uri != NULL) { 787: if (uri->path != NULL) xmlFree(uri->path); 788: uri->path = NULL; 789: } 790: } 791: 792: if (*str == '?') { 793: str++; 794: ret = xmlParse3986Query(uri, &str); 795: if (ret != 0) return(ret); 796: } 797: if (*str == '#') { 798: str++; 799: ret = xmlParse3986Fragment(uri, &str); 800: if (ret != 0) return(ret); 801: } 802: if (*str != 0) { 803: xmlCleanURI(uri); 804: return(1); 805: } 806: return(0); 807: } 808: 809: 810: /** 811: * xmlParse3986URI: 812: * @uri: pointer to an URI structure 813: * @str: the string to analyze 814: * 815: * Parse an URI string and fills in the appropriate fields 816: * of the @uri structure 817: * 818: * scheme ":" hier-part [ "?" query ] [ "#" fragment ] 819: * 820: * Returns 0 or the error code 821: */ 822: static int 823: xmlParse3986URI(xmlURIPtr uri, const char *str) { 824: int ret; 825: 826: ret = xmlParse3986Scheme(uri, &str); 827: if (ret != 0) return(ret); 828: if (*str != ':') { 829: return(1); 830: } 831: str++; 832: ret = xmlParse3986HierPart(uri, &str); 833: if (ret != 0) return(ret); 834: if (*str == '?') { 835: str++; 836: ret = xmlParse3986Query(uri, &str); 837: if (ret != 0) return(ret); 838: } 839: if (*str == '#') { 840: str++; 841: ret = xmlParse3986Fragment(uri, &str); 842: if (ret != 0) return(ret); 843: } 844: if (*str != 0) { 845: xmlCleanURI(uri); 846: return(1); 847: } 848: return(0); 849: } 850: 851: /** 852: * xmlParse3986URIReference: 853: * @uri: pointer to an URI structure 854: * @str: the string to analyze 855: * 856: * Parse an URI reference string and fills in the appropriate fields 857: * of the @uri structure 858: * 859: * URI-reference = URI / relative-ref 860: * 861: * Returns 0 or the error code 862: */ 863: static int 864: xmlParse3986URIReference(xmlURIPtr uri, const char *str) { 865: int ret; 866: 867: if (str == NULL) 868: return(-1); 869: xmlCleanURI(uri); 870: 871: /* 872: * Try first to parse absolute refs, then fallback to relative if 873: * it fails. 874: */ 875: ret = xmlParse3986URI(uri, str); 876: if (ret != 0) { 877: xmlCleanURI(uri); 878: ret = xmlParse3986RelativeRef(uri, str); 879: if (ret != 0) { 880: xmlCleanURI(uri); 881: return(ret); 882: } 883: } 884: return(0); 885: } 886: 887: /** 888: * xmlParseURI: 889: * @str: the URI string to analyze 890: * 891: * Parse an URI based on RFC 3986 892: * 893: * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ] 894: * 895: * Returns a newly built xmlURIPtr or NULL in case of error 896: */ 897: xmlURIPtr 898: xmlParseURI(const char *str) { 899: xmlURIPtr uri; 900: int ret; 901: 902: if (str == NULL) 903: return(NULL); 904: uri = xmlCreateURI(); 905: if (uri != NULL) { 906: ret = xmlParse3986URIReference(uri, str); 907: if (ret) { 908: xmlFreeURI(uri); 909: return(NULL); 910: } 911: } 912: return(uri); 913: } 914: 915: /** 916: * xmlParseURIReference: 917: * @uri: pointer to an URI structure 918: * @str: the string to analyze 919: * 920: * Parse an URI reference string based on RFC 3986 and fills in the 921: * appropriate fields of the @uri structure 922: * 923: * URI-reference = URI / relative-ref 924: * 925: * Returns 0 or the error code 926: */ 927: int 928: xmlParseURIReference(xmlURIPtr uri, const char *str) { 929: return(xmlParse3986URIReference(uri, str)); 930: } 931: 932: /** 933: * xmlParseURIRaw: 934: * @str: the URI string to analyze 935: * @raw: if 1 unescaping of URI pieces are disabled 936: * 937: * Parse an URI but allows to keep intact the original fragments. 938: * 939: * URI-reference = URI / relative-ref 940: * 941: * Returns a newly built xmlURIPtr or NULL in case of error 942: */ 943: xmlURIPtr 944: xmlParseURIRaw(const char *str, int raw) { 945: xmlURIPtr uri; 946: int ret; 947: 948: if (str == NULL) 949: return(NULL); 950: uri = xmlCreateURI(); 951: if (uri != NULL) { 952: if (raw) { 953: uri->cleanup |= 2; 954: } 955: ret = xmlParseURIReference(uri, str); 956: if (ret) { 957: xmlFreeURI(uri); 958: return(NULL); 959: } 960: } 961: return(uri); 962: } 963: 964: /************************************************************************ 965: * * 966: * Generic URI structure functions * 967: * * 968: ************************************************************************/ 969: 970: /** 971: * xmlCreateURI: 972: * 973: * Simply creates an empty xmlURI 974: * 975: * Returns the new structure or NULL in case of error 976: */ 977: xmlURIPtr 978: xmlCreateURI(void) { 979: xmlURIPtr ret; 980: 981: ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI)); 982: if (ret == NULL) { 983: xmlGenericError(xmlGenericErrorContext, 984: "xmlCreateURI: out of memory\n"); 985: return(NULL); 986: } 987: memset(ret, 0, sizeof(xmlURI)); 988: return(ret); 989: } 990: 991: /** 992: * xmlSaveUri: 993: * @uri: pointer to an xmlURI 994: * 995: * Save the URI as an escaped string 996: * 997: * Returns a new string (to be deallocated by caller) 998: */ 999: xmlChar * 1000: xmlSaveUri(xmlURIPtr uri) { 1001: xmlChar *ret = NULL; 1002: xmlChar *temp; 1003: const char *p; 1004: int len; 1005: int max; 1006: 1007: if (uri == NULL) return(NULL); 1008: 1009: 1010: max = 80; 1011: ret = (xmlChar *) xmlMallocAtomic((max + 1) * sizeof(xmlChar)); 1012: if (ret == NULL) { 1013: xmlGenericError(xmlGenericErrorContext, 1014: "xmlSaveUri: out of memory\n"); 1015: return(NULL); 1016: } 1017: len = 0; 1018: 1019: if (uri->scheme != NULL) { 1020: p = uri->scheme; 1021: while (*p != 0) { 1022: if (len >= max) { 1023: max *= 2; 1024: temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar)); 1025: if (temp == NULL) { 1026: xmlGenericError(xmlGenericErrorContext, 1027: "xmlSaveUri: out of memory\n"); 1028: xmlFree(ret); 1029: return(NULL); 1030: } 1031: ret = temp; 1032: } 1033: ret[len++] = *p++; 1034: } 1035: if (len >= max) { 1036: max *= 2; 1037: temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar)); 1038: if (temp == NULL) { 1039: xmlGenericError(xmlGenericErrorContext, 1040: "xmlSaveUri: out of memory\n"); 1041: xmlFree(ret); 1042: return(NULL); 1043: } 1044: ret = temp; 1045: } 1046: ret[len++] = ':'; 1047: } 1048: if (uri->opaque != NULL) { 1049: p = uri->opaque; 1050: while (*p != 0) { 1051: if (len + 3 >= max) { 1052: max *= 2; 1053: temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar)); 1054: if (temp == NULL) { 1055: xmlGenericError(xmlGenericErrorContext, 1056: "xmlSaveUri: out of memory\n"); 1057: xmlFree(ret); 1058: return(NULL); 1059: } 1060: ret = temp; 1061: } 1062: if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p))) 1063: ret[len++] = *p++; 1064: else { 1065: int val = *(unsigned char *)p++; 1066: int hi = val / 0x10, lo = val % 0x10; 1067: ret[len++] = '%'; 1068: ret[len++] = hi + (hi > 9? 'A'-10 : '0'); 1069: ret[len++] = lo + (lo > 9? 'A'-10 : '0'); 1070: } 1071: } 1072: } else { 1073: if (uri->server != NULL) { 1074: if (len + 3 >= max) { 1075: max *= 2; 1076: temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar)); 1077: if (temp == NULL) { 1078: xmlGenericError(xmlGenericErrorContext, 1079: "xmlSaveUri: out of memory\n"); 1080: xmlFree(ret); 1081: return(NULL); 1082: } 1083: ret = temp; 1084: } 1085: ret[len++] = '/'; 1086: ret[len++] = '/'; 1087: if (uri->user != NULL) { 1088: p = uri->user; 1089: while (*p != 0) { 1090: if (len + 3 >= max) { 1091: max *= 2; 1092: temp = (xmlChar *) xmlRealloc(ret, 1093: (max + 1) * sizeof(xmlChar)); 1094: if (temp == NULL) { 1095: xmlGenericError(xmlGenericErrorContext, 1096: "xmlSaveUri: out of memory\n"); 1097: xmlFree(ret); 1098: return(NULL); 1099: } 1100: ret = temp; 1101: } 1102: if ((IS_UNRESERVED(*(p))) || 1103: ((*(p) == ';')) || ((*(p) == ':')) || 1104: ((*(p) == '&')) || ((*(p) == '=')) || 1105: ((*(p) == '+')) || ((*(p) == '$')) || 1106: ((*(p) == ','))) 1107: ret[len++] = *p++; 1108: else { 1109: int val = *(unsigned char *)p++; 1110: int hi = val / 0x10, lo = val % 0x10; 1111: ret[len++] = '%'; 1112: ret[len++] = hi + (hi > 9? 'A'-10 : '0'); 1113: ret[len++] = lo + (lo > 9? 'A'-10 : '0'); 1114: } 1115: } 1116: if (len + 3 >= max) { 1117: max *= 2; 1118: temp = (xmlChar *) xmlRealloc(ret, 1119: (max + 1) * sizeof(xmlChar)); 1120: if (temp == NULL) { 1121: xmlGenericError(xmlGenericErrorContext, 1122: "xmlSaveUri: out of memory\n"); 1123: xmlFree(ret); 1124: return(NULL); 1125: } 1126: ret = temp; 1127: } 1128: ret[len++] = '@'; 1129: } 1130: p = uri->server; 1131: while (*p != 0) { 1132: if (len >= max) { 1133: max *= 2; 1134: temp = (xmlChar *) xmlRealloc(ret, 1135: (max + 1) * sizeof(xmlChar)); 1136: if (temp == NULL) { 1137: xmlGenericError(xmlGenericErrorContext, 1138: "xmlSaveUri: out of memory\n"); 1139: xmlFree(ret); 1140: return(NULL); 1141: } 1142: ret = temp; 1143: } 1144: ret[len++] = *p++; 1145: } 1146: if (uri->port > 0) { 1147: if (len + 10 >= max) { 1148: max *= 2; 1149: temp = (xmlChar *) xmlRealloc(ret, 1150: (max + 1) * sizeof(xmlChar)); 1151: if (temp == NULL) { 1152: xmlGenericError(xmlGenericErrorContext, 1153: "xmlSaveUri: out of memory\n"); 1154: xmlFree(ret); 1155: return(NULL); 1156: } 1157: ret = temp; 1158: } 1159: len += snprintf((char *) &ret[len], max - len, ":%d", uri->port); 1160: } 1161: } else if (uri->authority != NULL) { 1162: if (len + 3 >= max) { 1163: max *= 2; 1164: temp = (xmlChar *) xmlRealloc(ret, 1165: (max + 1) * sizeof(xmlChar)); 1166: if (temp == NULL) { 1167: xmlGenericError(xmlGenericErrorContext, 1168: "xmlSaveUri: out of memory\n"); 1169: xmlFree(ret); 1170: return(NULL); 1171: } 1172: ret = temp; 1173: } 1174: ret[len++] = '/'; 1175: ret[len++] = '/'; 1176: p = uri->authority; 1177: while (*p != 0) { 1178: if (len + 3 >= max) { 1179: max *= 2; 1180: temp = (xmlChar *) xmlRealloc(ret, 1181: (max + 1) * sizeof(xmlChar)); 1182: if (temp == NULL) { 1183: xmlGenericError(xmlGenericErrorContext, 1184: "xmlSaveUri: out of memory\n"); 1185: xmlFree(ret); 1186: return(NULL); 1187: } 1188: ret = temp; 1189: } 1190: if ((IS_UNRESERVED(*(p))) || 1191: ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) || 1192: ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) || 1193: ((*(p) == '=')) || ((*(p) == '+'))) 1194: ret[len++] = *p++; 1195: else { 1196: int val = *(unsigned char *)p++; 1197: int hi = val / 0x10, lo = val % 0x10; 1198: ret[len++] = '%'; 1199: ret[len++] = hi + (hi > 9? 'A'-10 : '0'); 1200: ret[len++] = lo + (lo > 9? 'A'-10 : '0'); 1201: } 1202: } 1203: } else if (uri->scheme != NULL) { 1204: if (len + 3 >= max) { 1205: max *= 2; 1206: temp = (xmlChar *) xmlRealloc(ret, 1207: (max + 1) * sizeof(xmlChar)); 1208: if (temp == NULL) { 1209: xmlGenericError(xmlGenericErrorContext, 1210: "xmlSaveUri: out of memory\n"); 1211: xmlFree(ret); 1212: return(NULL); 1213: } 1214: ret = temp; 1215: } 1216: ret[len++] = '/'; 1217: ret[len++] = '/'; 1218: } 1219: if (uri->path != NULL) { 1220: p = uri->path; 1221: /* 1222: * the colon in file:///d: should not be escaped or 1223: * Windows accesses fail later. 1224: */ 1225: if ((uri->scheme != NULL) && 1226: (p[0] == '/') && 1227: (((p[1] >= 'a') && (p[1] <= 'z')) || 1228: ((p[1] >= 'A') && (p[1] <= 'Z'))) && 1229: (p[2] == ':') && 1230: (xmlStrEqual(BAD_CAST uri->scheme, BAD_CAST "file"))) { 1231: if (len + 3 >= max) { 1232: max *= 2; 1233: ret = (xmlChar *) xmlRealloc(ret, 1234: (max + 1) * sizeof(xmlChar)); 1235: if (ret == NULL) { 1236: xmlGenericError(xmlGenericErrorContext, 1237: "xmlSaveUri: out of memory\n"); 1238: return(NULL); 1239: } 1240: } 1241: ret[len++] = *p++; 1242: ret[len++] = *p++; 1243: ret[len++] = *p++; 1244: } 1245: while (*p != 0) { 1246: if (len + 3 >= max) { 1247: max *= 2; 1248: temp = (xmlChar *) xmlRealloc(ret, 1249: (max + 1) * sizeof(xmlChar)); 1250: if (temp == NULL) { 1251: xmlGenericError(xmlGenericErrorContext, 1252: "xmlSaveUri: out of memory\n"); 1253: xmlFree(ret); 1254: return(NULL); 1255: } 1256: ret = temp; 1257: } 1258: if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) || 1259: ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) || 1260: ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) || 1261: ((*(p) == ','))) 1262: ret[len++] = *p++; 1263: else { 1264: int val = *(unsigned char *)p++; 1265: int hi = val / 0x10, lo = val % 0x10; 1266: ret[len++] = '%'; 1267: ret[len++] = hi + (hi > 9? 'A'-10 : '0'); 1268: ret[len++] = lo + (lo > 9? 'A'-10 : '0'); 1269: } 1270: } 1271: } 1272: if (uri->query_raw != NULL) { 1273: if (len + 1 >= max) { 1274: max *= 2; 1275: temp = (xmlChar *) xmlRealloc(ret, 1276: (max + 1) * sizeof(xmlChar)); 1277: if (temp == NULL) { 1278: xmlGenericError(xmlGenericErrorContext, 1279: "xmlSaveUri: out of memory\n"); 1280: xmlFree(ret); 1281: return(NULL); 1282: } 1283: ret = temp; 1284: } 1285: ret[len++] = '?'; 1286: p = uri->query_raw; 1287: while (*p != 0) { 1288: if (len + 1 >= max) { 1289: max *= 2; 1290: temp = (xmlChar *) xmlRealloc(ret, 1291: (max + 1) * sizeof(xmlChar)); 1292: if (temp == NULL) { 1293: xmlGenericError(xmlGenericErrorContext, 1294: "xmlSaveUri: out of memory\n"); 1295: xmlFree(ret); 1296: return(NULL); 1297: } 1298: ret = temp; 1299: } 1300: ret[len++] = *p++; 1301: } 1302: } else if (uri->query != NULL) { 1303: if (len + 3 >= max) { 1304: max *= 2; 1305: temp = (xmlChar *) xmlRealloc(ret, 1306: (max + 1) * sizeof(xmlChar)); 1307: if (temp == NULL) { 1308: xmlGenericError(xmlGenericErrorContext, 1309: "xmlSaveUri: out of memory\n"); 1310: xmlFree(ret); 1311: return(NULL); 1312: } 1313: ret = temp; 1314: } 1315: ret[len++] = '?'; 1316: p = uri->query; 1317: while (*p != 0) { 1318: if (len + 3 >= max) { 1319: max *= 2; 1320: temp = (xmlChar *) xmlRealloc(ret, 1321: (max + 1) * sizeof(xmlChar)); 1322: if (temp == NULL) { 1323: xmlGenericError(xmlGenericErrorContext, 1324: "xmlSaveUri: out of memory\n"); 1325: xmlFree(ret); 1326: return(NULL); 1327: } 1328: ret = temp; 1329: } 1330: if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p)))) 1331: ret[len++] = *p++; 1332: else { 1333: int val = *(unsigned char *)p++; 1334: int hi = val / 0x10, lo = val % 0x10; 1335: ret[len++] = '%'; 1336: ret[len++] = hi + (hi > 9? 'A'-10 : '0'); 1337: ret[len++] = lo + (lo > 9? 'A'-10 : '0'); 1338: } 1339: } 1340: } 1341: } 1342: if (uri->fragment != NULL) { 1343: if (len + 3 >= max) { 1344: max *= 2; 1345: temp = (xmlChar *) xmlRealloc(ret, 1346: (max + 1) * sizeof(xmlChar)); 1347: if (temp == NULL) { 1348: xmlGenericError(xmlGenericErrorContext, 1349: "xmlSaveUri: out of memory\n"); 1350: xmlFree(ret); 1351: return(NULL); 1352: } 1353: ret = temp; 1354: } 1355: ret[len++] = '#'; 1356: p = uri->fragment; 1357: while (*p != 0) { 1358: if (len + 3 >= max) { 1359: max *= 2; 1360: temp = (xmlChar *) xmlRealloc(ret, 1361: (max + 1) * sizeof(xmlChar)); 1362: if (temp == NULL) { 1363: xmlGenericError(xmlGenericErrorContext, 1364: "xmlSaveUri: out of memory\n"); 1365: xmlFree(ret); 1366: return(NULL); 1367: } 1368: ret = temp; 1369: } 1370: if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p)))) 1371: ret[len++] = *p++; 1372: else { 1373: int val = *(unsigned char *)p++; 1374: int hi = val / 0x10, lo = val % 0x10; 1375: ret[len++] = '%'; 1376: ret[len++] = hi + (hi > 9? 'A'-10 : '0'); 1377: ret[len++] = lo + (lo > 9? 'A'-10 : '0'); 1378: } 1379: } 1380: } 1381: if (len >= max) { 1382: max *= 2; 1383: temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar)); 1384: if (temp == NULL) { 1385: xmlGenericError(xmlGenericErrorContext, 1386: "xmlSaveUri: out of memory\n"); 1387: xmlFree(ret); 1388: return(NULL); 1389: } 1390: ret = temp; 1391: } 1392: ret[len] = 0; 1393: return(ret); 1394: } 1395: 1396: /** 1397: * xmlPrintURI: 1398: * @stream: a FILE* for the output 1399: * @uri: pointer to an xmlURI 1400: * 1401: * Prints the URI in the stream @stream. 1402: */ 1403: void 1404: xmlPrintURI(FILE *stream, xmlURIPtr uri) { 1405: xmlChar *out; 1406: 1407: out = xmlSaveUri(uri); 1408: if (out != NULL) { 1409: fprintf(stream, "%s", (char *) out); 1410: xmlFree(out); 1411: } 1412: } 1413: 1414: /** 1415: * xmlCleanURI: 1416: * @uri: pointer to an xmlURI 1417: * 1418: * Make sure the xmlURI struct is free of content 1419: */ 1420: static void 1421: xmlCleanURI(xmlURIPtr uri) { 1422: if (uri == NULL) return; 1423: 1424: if (uri->scheme != NULL) xmlFree(uri->scheme); 1425: uri->scheme = NULL; 1426: if (uri->server != NULL) xmlFree(uri->server); 1427: uri->server = NULL; 1428: if (uri->user != NULL) xmlFree(uri->user); 1429: uri->user = NULL; 1430: if (uri->path != NULL) xmlFree(uri->path); 1431: uri->path = NULL; 1432: if (uri->fragment != NULL) xmlFree(uri->fragment); 1433: uri->fragment = NULL; 1434: if (uri->opaque != NULL) xmlFree(uri->opaque); 1435: uri->opaque = NULL; 1436: if (uri->authority != NULL) xmlFree(uri->authority); 1437: uri->authority = NULL; 1438: if (uri->query != NULL) xmlFree(uri->query); 1439: uri->query = NULL; 1440: if (uri->query_raw != NULL) xmlFree(uri->query_raw); 1441: uri->query_raw = NULL; 1442: } 1443: 1444: /** 1445: * xmlFreeURI: 1446: * @uri: pointer to an xmlURI 1447: * 1448: * Free up the xmlURI struct 1449: */ 1450: void 1451: xmlFreeURI(xmlURIPtr uri) { 1452: if (uri == NULL) return; 1453: 1454: if (uri->scheme != NULL) xmlFree(uri->scheme); 1455: if (uri->server != NULL) xmlFree(uri->server); 1456: if (uri->user != NULL) xmlFree(uri->user); 1457: if (uri->path != NULL) xmlFree(uri->path); 1458: if (uri->fragment != NULL) xmlFree(uri->fragment); 1459: if (uri->opaque != NULL) xmlFree(uri->opaque); 1460: if (uri->authority != NULL) xmlFree(uri->authority); 1461: if (uri->query != NULL) xmlFree(uri->query); 1462: if (uri->query_raw != NULL) xmlFree(uri->query_raw); 1463: xmlFree(uri); 1464: } 1465: 1466: /************************************************************************ 1467: * * 1468: * Helper functions * 1469: * * 1470: ************************************************************************/ 1471: 1472: /** 1473: * xmlNormalizeURIPath: 1474: * @path: pointer to the path string 1475: * 1476: * Applies the 5 normalization steps to a path string--that is, RFC 2396 1477: * Section 5.2, steps 6.c through 6.g. 1478: * 1479: * Normalization occurs directly on the string, no new allocation is done 1480: * 1481: * Returns 0 or an error code 1482: */ 1483: int 1484: xmlNormalizeURIPath(char *path) { 1485: char *cur, *out; 1486: 1487: if (path == NULL) 1488: return(-1); 1489: 1490: /* Skip all initial "/" chars. We want to get to the beginning of the 1491: * first non-empty segment. 1492: */ 1493: cur = path; 1494: while (cur[0] == '/') 1495: ++cur; 1496: if (cur[0] == '\0') 1497: return(0); 1498: 1499: /* Keep everything we've seen so far. */ 1500: out = cur; 1501: 1502: /* 1503: * Analyze each segment in sequence for cases (c) and (d). 1504: */ 1505: while (cur[0] != '\0') { 1506: /* 1507: * c) All occurrences of "./", where "." is a complete path segment, 1508: * are removed from the buffer string. 1509: */ 1510: if ((cur[0] == '.') && (cur[1] == '/')) { 1511: cur += 2; 1512: /* '//' normalization should be done at this point too */ 1513: while (cur[0] == '/') 1514: cur++; 1515: continue; 1516: } 1517: 1518: /* 1519: * d) If the buffer string ends with "." as a complete path segment, 1520: * that "." is removed. 1521: */ 1522: if ((cur[0] == '.') && (cur[1] == '\0')) 1523: break; 1524: 1525: /* Otherwise keep the segment. */ 1526: while (cur[0] != '/') { 1527: if (cur[0] == '\0') 1528: goto done_cd; 1529: (out++)[0] = (cur++)[0]; 1530: } 1531: /* nomalize // */ 1532: while ((cur[0] == '/') && (cur[1] == '/')) 1533: cur++; 1534: 1535: (out++)[0] = (cur++)[0]; 1536: } 1537: done_cd: 1538: out[0] = '\0'; 1539: 1540: /* Reset to the beginning of the first segment for the next sequence. */ 1541: cur = path; 1542: while (cur[0] == '/') 1543: ++cur; 1544: if (cur[0] == '\0') 1545: return(0); 1546: 1547: /* 1548: * Analyze each segment in sequence for cases (e) and (f). 1549: * 1550: * e) All occurrences of "<segment>/../", where <segment> is a 1551: * complete path segment not equal to "..", are removed from the 1552: * buffer string. Removal of these path segments is performed 1553: * iteratively, removing the leftmost matching pattern on each 1554: * iteration, until no matching pattern remains. 1555: * 1556: * f) If the buffer string ends with "<segment>/..", where <segment> 1557: * is a complete path segment not equal to "..", that 1558: * "<segment>/.." is removed. 1559: * 1560: * To satisfy the "iterative" clause in (e), we need to collapse the 1561: * string every time we find something that needs to be removed. Thus, 1562: * we don't need to keep two pointers into the string: we only need a 1563: * "current position" pointer. 1564: */ 1565: while (1) { 1566: char *segp, *tmp; 1567: 1568: /* At the beginning of each iteration of this loop, "cur" points to 1569: * the first character of the segment we want to examine. 1570: */ 1571: 1572: /* Find the end of the current segment. */ 1573: segp = cur; 1574: while ((segp[0] != '/') && (segp[0] != '\0')) 1575: ++segp; 1576: 1577: /* If this is the last segment, we're done (we need at least two 1578: * segments to meet the criteria for the (e) and (f) cases). 1579: */ 1580: if (segp[0] == '\0') 1581: break; 1582: 1583: /* If the first segment is "..", or if the next segment _isn't_ "..", 1584: * keep this segment and try the next one. 1585: */ 1586: ++segp; 1587: if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3)) 1588: || ((segp[0] != '.') || (segp[1] != '.') 1589: || ((segp[2] != '/') && (segp[2] != '\0')))) { 1590: cur = segp; 1591: continue; 1592: } 1593: 1594: /* If we get here, remove this segment and the next one and back up 1595: * to the previous segment (if there is one), to implement the 1596: * "iteratively" clause. It's pretty much impossible to back up 1597: * while maintaining two pointers into the buffer, so just compact 1598: * the whole buffer now. 1599: */ 1600: 1601: /* If this is the end of the buffer, we're done. */ 1602: if (segp[2] == '\0') { 1603: cur[0] = '\0'; 1604: break; 1605: } 1606: /* Valgrind complained, strcpy(cur, segp + 3); */ 1607: /* string will overlap, do not use strcpy */ 1608: tmp = cur; 1609: segp += 3; 1610: while ((*tmp++ = *segp++) != 0); 1611: 1612: /* If there are no previous segments, then keep going from here. */ 1613: segp = cur; 1614: while ((segp > path) && ((--segp)[0] == '/')) 1615: ; 1616: if (segp == path) 1617: continue; 1618: 1619: /* "segp" is pointing to the end of a previous segment; find it's 1620: * start. We need to back up to the previous segment and start 1621: * over with that to handle things like "foo/bar/../..". If we 1622: * don't do this, then on the first pass we'll remove the "bar/..", 1623: * but be pointing at the second ".." so we won't realize we can also 1624: * remove the "foo/..". 1625: */ 1626: cur = segp; 1627: while ((cur > path) && (cur[-1] != '/')) 1628: --cur; 1629: } 1630: out[0] = '\0'; 1631: 1632: /* 1633: * g) If the resulting buffer string still begins with one or more 1634: * complete path segments of "..", then the reference is 1635: * considered to be in error. Implementations may handle this 1636: * error by retaining these components in the resolved path (i.e., 1637: * treating them as part of the final URI), by removing them from 1638: * the resolved path (i.e., discarding relative levels above the 1639: * root), or by avoiding traversal of the reference. 1640: * 1641: * We discard them from the final path. 1642: */ 1643: if (path[0] == '/') { 1644: cur = path; 1645: while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.') 1646: && ((cur[3] == '/') || (cur[3] == '\0'))) 1647: cur += 3; 1648: 1649: if (cur != path) { 1650: out = path; 1651: while (cur[0] != '\0') 1652: (out++)[0] = (cur++)[0]; 1653: out[0] = 0; 1654: } 1655: } 1656: 1657: return(0); 1658: } 1659: 1660: static int is_hex(char c) { 1661: if (((c >= '0') && (c <= '9')) || 1662: ((c >= 'a') && (c <= 'f')) || 1663: ((c >= 'A') && (c <= 'F'))) 1664: return(1); 1665: return(0); 1666: } 1667: 1668: /** 1669: * xmlURIUnescapeString: 1670: * @str: the string to unescape 1671: * @len: the length in bytes to unescape (or <= 0 to indicate full string) 1672: * @target: optional destination buffer 1673: * 1674: * Unescaping routine, but does not check that the string is an URI. The 1675: * output is a direct unsigned char translation of %XX values (no encoding) 1676: * Note that the length of the result can only be smaller or same size as 1677: * the input string. 1678: * 1679: * Returns a copy of the string, but unescaped, will return NULL only in case 1680: * of error 1681: */ 1682: char * 1683: xmlURIUnescapeString(const char *str, int len, char *target) { 1684: char *ret, *out; 1685: const char *in; 1686: 1687: if (str == NULL) 1688: return(NULL); 1689: if (len <= 0) len = strlen(str); 1690: if (len < 0) return(NULL); 1691: 1692: if (target == NULL) { 1693: ret = (char *) xmlMallocAtomic(len + 1); 1694: if (ret == NULL) { 1695: xmlGenericError(xmlGenericErrorContext, 1696: "xmlURIUnescapeString: out of memory\n"); 1697: return(NULL); 1698: } 1699: } else 1700: ret = target; 1701: in = str; 1702: out = ret; 1703: while(len > 0) { 1704: if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) { 1705: in++; 1706: if ((*in >= '0') && (*in <= '9')) 1707: *out = (*in - '0'); 1708: else if ((*in >= 'a') && (*in <= 'f')) 1709: *out = (*in - 'a') + 10; 1710: else if ((*in >= 'A') && (*in <= 'F')) 1711: *out = (*in - 'A') + 10; 1712: in++; 1713: if ((*in >= '0') && (*in <= '9')) 1714: *out = *out * 16 + (*in - '0'); 1715: else if ((*in >= 'a') && (*in <= 'f')) 1716: *out = *out * 16 + (*in - 'a') + 10; 1717: else if ((*in >= 'A') && (*in <= 'F')) 1718: *out = *out * 16 + (*in - 'A') + 10; 1719: in++; 1720: len -= 3; 1721: out++; 1722: } else { 1723: *out++ = *in++; 1724: len--; 1725: } 1726: } 1727: *out = 0; 1728: return(ret); 1729: } 1730: 1731: /** 1732: * xmlURIEscapeStr: 1733: * @str: string to escape 1734: * @list: exception list string of chars not to escape 1735: * 1736: * This routine escapes a string to hex, ignoring reserved characters (a-z) 1737: * and the characters in the exception list. 1738: * 1739: * Returns a new escaped string or NULL in case of error. 1740: */ 1741: xmlChar * 1742: xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) { 1743: xmlChar *ret, ch; 1744: xmlChar *temp; 1745: const xmlChar *in; 1746: 1747: unsigned int len, out; 1748: 1749: if (str == NULL) 1750: return(NULL); 1751: if (str[0] == 0) 1752: return(xmlStrdup(str)); 1753: len = xmlStrlen(str); 1754: if (!(len > 0)) return(NULL); 1755: 1756: len += 20; 1757: ret = (xmlChar *) xmlMallocAtomic(len); 1758: if (ret == NULL) { 1759: xmlGenericError(xmlGenericErrorContext, 1760: "xmlURIEscapeStr: out of memory\n"); 1761: return(NULL); 1762: } 1763: in = (const xmlChar *) str; 1764: out = 0; 1765: while(*in != 0) { 1766: if (len - out <= 3) { 1767: len += 20; 1768: temp = (xmlChar *) xmlRealloc(ret, len); 1769: if (temp == NULL) { 1770: xmlGenericError(xmlGenericErrorContext, 1771: "xmlURIEscapeStr: out of memory\n"); 1772: xmlFree(ret); 1773: return(NULL); 1774: } 1775: ret = temp; 1776: } 1777: 1778: ch = *in; 1779: 1780: if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) { 1781: unsigned char val; 1782: ret[out++] = '%'; 1783: val = ch >> 4; 1784: if (val <= 9) 1785: ret[out++] = '0' + val; 1786: else 1787: ret[out++] = 'A' + val - 0xA; 1788: val = ch & 0xF; 1789: if (val <= 9) 1790: ret[out++] = '0' + val; 1791: else 1792: ret[out++] = 'A' + val - 0xA; 1793: in++; 1794: } else { 1795: ret[out++] = *in++; 1796: } 1797: 1798: } 1799: ret[out] = 0; 1800: return(ret); 1801: } 1802: 1803: /** 1804: * xmlURIEscape: 1805: * @str: the string of the URI to escape 1806: * 1807: * Escaping routine, does not do validity checks ! 1808: * It will try to escape the chars needing this, but this is heuristic 1809: * based it's impossible to be sure. 1810: * 1811: * Returns an copy of the string, but escaped 1812: * 1813: * 25 May 2001 1814: * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly 1815: * according to RFC2396. 1816: * - Carl Douglas 1817: */ 1818: xmlChar * 1819: xmlURIEscape(const xmlChar * str) 1820: { 1821: xmlChar *ret, *segment = NULL; 1822: xmlURIPtr uri; 1823: int ret2; 1824: 1825: #define NULLCHK(p) if(!p) { \ 1826: xmlGenericError(xmlGenericErrorContext, \ 1827: "xmlURIEscape: out of memory\n"); \ 1828: xmlFreeURI(uri); \ 1829: return NULL; } \ 1830: 1831: if (str == NULL) 1832: return (NULL); 1833: 1834: uri = xmlCreateURI(); 1835: if (uri != NULL) { 1836: /* 1837: * Allow escaping errors in the unescaped form 1838: */ 1839: uri->cleanup = 1; 1840: ret2 = xmlParseURIReference(uri, (const char *)str); 1841: if (ret2) { 1842: xmlFreeURI(uri); 1843: return (NULL); 1844: } 1845: } 1846: 1847: if (!uri) 1848: return NULL; 1849: 1850: ret = NULL; 1851: 1852: if (uri->scheme) { 1853: segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-."); 1854: NULLCHK(segment) 1855: ret = xmlStrcat(ret, segment); 1856: ret = xmlStrcat(ret, BAD_CAST ":"); 1857: xmlFree(segment); 1858: } 1859: 1860: if (uri->authority) { 1861: segment = 1862: xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@"); 1863: NULLCHK(segment) 1864: ret = xmlStrcat(ret, BAD_CAST "//"); 1865: ret = xmlStrcat(ret, segment); 1866: xmlFree(segment); 1867: } 1868: 1869: if (uri->user) { 1870: segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,"); 1871: NULLCHK(segment) 1872: ret = xmlStrcat(ret,BAD_CAST "//"); 1873: ret = xmlStrcat(ret, segment); 1874: ret = xmlStrcat(ret, BAD_CAST "@"); 1875: xmlFree(segment); 1876: } 1877: 1878: if (uri->server) { 1879: segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@"); 1880: NULLCHK(segment) 1881: if (uri->user == NULL) 1882: ret = xmlStrcat(ret, BAD_CAST "//"); 1883: ret = xmlStrcat(ret, segment); 1884: xmlFree(segment); 1885: } 1886: 1887: if (uri->port) { 1888: xmlChar port[10]; 1889: 1890: snprintf((char *) port, 10, "%d", uri->port); 1891: ret = xmlStrcat(ret, BAD_CAST ":"); 1892: ret = xmlStrcat(ret, port); 1893: } 1894: 1895: if (uri->path) { 1896: segment = 1897: xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;"); 1898: NULLCHK(segment) 1899: ret = xmlStrcat(ret, segment); 1900: xmlFree(segment); 1901: } 1902: 1903: if (uri->query_raw) { 1904: ret = xmlStrcat(ret, BAD_CAST "?"); 1905: ret = xmlStrcat(ret, BAD_CAST uri->query_raw); 1906: } 1907: else if (uri->query) { 1908: segment = 1909: xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$"); 1910: NULLCHK(segment) 1911: ret = xmlStrcat(ret, BAD_CAST "?"); 1912: ret = xmlStrcat(ret, segment); 1913: xmlFree(segment); 1914: } 1915: 1916: if (uri->opaque) { 1917: segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST ""); 1918: NULLCHK(segment) 1919: ret = xmlStrcat(ret, segment); 1920: xmlFree(segment); 1921: } 1922: 1923: if (uri->fragment) { 1924: segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#"); 1925: NULLCHK(segment) 1926: ret = xmlStrcat(ret, BAD_CAST "#"); 1927: ret = xmlStrcat(ret, segment); 1928: xmlFree(segment); 1929: } 1930: 1931: xmlFreeURI(uri); 1932: #undef NULLCHK 1933: 1934: return (ret); 1935: } 1936: 1937: /************************************************************************ 1938: * * 1939: * Public functions * 1940: * * 1941: ************************************************************************/ 1942: 1943: /** 1944: * xmlBuildURI: 1945: * @URI: the URI instance found in the document 1946: * @base: the base value 1947: * 1948: * Computes he final URI of the reference done by checking that 1949: * the given URI is valid, and building the final URI using the 1950: * base URI. This is processed according to section 5.2 of the 1951: * RFC 2396 1952: * 1953: * 5.2. Resolving Relative References to Absolute Form 1954: * 1955: * Returns a new URI string (to be freed by the caller) or NULL in case 1956: * of error. 1957: */ 1958: xmlChar * 1959: xmlBuildURI(const xmlChar *URI, const xmlChar *base) { 1960: xmlChar *val = NULL; 1961: int ret, len, indx, cur, out; 1962: xmlURIPtr ref = NULL; 1963: xmlURIPtr bas = NULL; 1964: xmlURIPtr res = NULL; 1965: 1966: /* 1967: * 1) The URI reference is parsed into the potential four components and 1968: * fragment identifier, as described in Section 4.3. 1969: * 1970: * NOTE that a completely empty URI is treated by modern browsers 1971: * as a reference to "." rather than as a synonym for the current 1972: * URI. Should we do that here? 1973: */ 1974: if (URI == NULL) 1975: ret = -1; 1976: else { 1977: if (*URI) { 1978: ref = xmlCreateURI(); 1979: if (ref == NULL) 1980: goto done; 1981: ret = xmlParseURIReference(ref, (const char *) URI); 1982: } 1983: else 1984: ret = 0; 1985: } 1986: if (ret != 0) 1987: goto done; 1988: if ((ref != NULL) && (ref->scheme != NULL)) { 1989: /* 1990: * The URI is absolute don't modify. 1991: */ 1992: val = xmlStrdup(URI); 1993: goto done; 1994: } 1995: if (base == NULL) 1996: ret = -1; 1997: else { 1998: bas = xmlCreateURI(); 1999: if (bas == NULL) 2000: goto done; 2001: ret = xmlParseURIReference(bas, (const char *) base); 2002: } 2003: if (ret != 0) { 2004: if (ref) 2005: val = xmlSaveUri(ref); 2006: goto done; 2007: } 2008: if (ref == NULL) { 2009: /* 2010: * the base fragment must be ignored 2011: */ 2012: if (bas->fragment != NULL) { 2013: xmlFree(bas->fragment); 2014: bas->fragment = NULL; 2015: } 2016: val = xmlSaveUri(bas); 2017: goto done; 2018: } 2019: 2020: /* 2021: * 2) If the path component is empty and the scheme, authority, and 2022: * query components are undefined, then it is a reference to the 2023: * current document and we are done. Otherwise, the reference URI's 2024: * query and fragment components are defined as found (or not found) 2025: * within the URI reference and not inherited from the base URI. 2026: * 2027: * NOTE that in modern browsers, the parsing differs from the above 2028: * in the following aspect: the query component is allowed to be 2029: * defined while still treating this as a reference to the current 2030: * document. 2031: */ 2032: res = xmlCreateURI(); 2033: if (res == NULL) 2034: goto done; 2035: if ((ref->scheme == NULL) && (ref->path == NULL) && 2036: ((ref->authority == NULL) && (ref->server == NULL))) { 2037: if (bas->scheme != NULL) 2038: res->scheme = xmlMemStrdup(bas->scheme); 2039: if (bas->authority != NULL) 2040: res->authority = xmlMemStrdup(bas->authority); 2041: else if (bas->server != NULL) { 2042: res->server = xmlMemStrdup(bas->server); 2043: if (bas->user != NULL) 2044: res->user = xmlMemStrdup(bas->user); 2045: res->port = bas->port; 2046: } 2047: if (bas->path != NULL) 2048: res->path = xmlMemStrdup(bas->path); 2049: if (ref->query_raw != NULL) 2050: res->query_raw = xmlMemStrdup (ref->query_raw); 2051: else if (ref->query != NULL) 2052: res->query = xmlMemStrdup(ref->query); 2053: else if (bas->query_raw != NULL) 2054: res->query_raw = xmlMemStrdup(bas->query_raw); 2055: else if (bas->query != NULL) 2056: res->query = xmlMemStrdup(bas->query); 2057: if (ref->fragment != NULL) 2058: res->fragment = xmlMemStrdup(ref->fragment); 2059: goto step_7; 2060: } 2061: 2062: /* 2063: * 3) If the scheme component is defined, indicating that the reference 2064: * starts with a scheme name, then the reference is interpreted as an 2065: * absolute URI and we are done. Otherwise, the reference URI's 2066: * scheme is inherited from the base URI's scheme component. 2067: */ 2068: if (ref->scheme != NULL) { 2069: val = xmlSaveUri(ref); 2070: goto done; 2071: } 2072: if (bas->scheme != NULL) 2073: res->scheme = xmlMemStrdup(bas->scheme); 2074: 2075: if (ref->query_raw != NULL) 2076: res->query_raw = xmlMemStrdup(ref->query_raw); 2077: else if (ref->query != NULL) 2078: res->query = xmlMemStrdup(ref->query); 2079: if (ref->fragment != NULL) 2080: res->fragment = xmlMemStrdup(ref->fragment); 2081: 2082: /* 2083: * 4) If the authority component is defined, then the reference is a 2084: * network-path and we skip to step 7. Otherwise, the reference 2085: * URI's authority is inherited from the base URI's authority 2086: * component, which will also be undefined if the URI scheme does not 2087: * use an authority component. 2088: */ 2089: if ((ref->authority != NULL) || (ref->server != NULL)) { 2090: if (ref->authority != NULL) 2091: res->authority = xmlMemStrdup(ref->authority); 2092: else { 2093: res->server = xmlMemStrdup(ref->server); 2094: if (ref->user != NULL) 2095: res->user = xmlMemStrdup(ref->user); 2096: res->port = ref->port; 2097: } 2098: if (ref->path != NULL) 2099: res->path = xmlMemStrdup(ref->path); 2100: goto step_7; 2101: } 2102: if (bas->authority != NULL) 2103: res->authority = xmlMemStrdup(bas->authority); 2104: else if (bas->server != NULL) { 2105: res->server = xmlMemStrdup(bas->server); 2106: if (bas->user != NULL) 2107: res->user = xmlMemStrdup(bas->user); 2108: res->port = bas->port; 2109: } 2110: 2111: /* 2112: * 5) If the path component begins with a slash character ("/"), then 2113: * the reference is an absolute-path and we skip to step 7. 2114: */ 2115: if ((ref->path != NULL) && (ref->path[0] == '/')) { 2116: res->path = xmlMemStrdup(ref->path); 2117: goto step_7; 2118: } 2119: 2120: 2121: /* 2122: * 6) If this step is reached, then we are resolving a relative-path 2123: * reference. The relative path needs to be merged with the base 2124: * URI's path. Although there are many ways to do this, we will 2125: * describe a simple method using a separate string buffer. 2126: * 2127: * Allocate a buffer large enough for the result string. 2128: */ 2129: len = 2; /* extra / and 0 */ 2130: if (ref->path != NULL) 2131: len += strlen(ref->path); 2132: if (bas->path != NULL) 2133: len += strlen(bas->path); 2134: res->path = (char *) xmlMallocAtomic(len); 2135: if (res->path == NULL) { 2136: xmlGenericError(xmlGenericErrorContext, 2137: "xmlBuildURI: out of memory\n"); 2138: goto done; 2139: } 2140: res->path[0] = 0; 2141: 2142: /* 2143: * a) All but the last segment of the base URI's path component is 2144: * copied to the buffer. In other words, any characters after the 2145: * last (right-most) slash character, if any, are excluded. 2146: */ 2147: cur = 0; 2148: out = 0; 2149: if (bas->path != NULL) { 2150: while (bas->path[cur] != 0) { 2151: while ((bas->path[cur] != 0) && (bas->path[cur] != '/')) 2152: cur++; 2153: if (bas->path[cur] == 0) 2154: break; 2155: 2156: cur++; 2157: while (out < cur) { 2158: res->path[out] = bas->path[out]; 2159: out++; 2160: } 2161: } 2162: } 2163: res->path[out] = 0; 2164: 2165: /* 2166: * b) The reference's path component is appended to the buffer 2167: * string. 2168: */ 2169: if (ref->path != NULL && ref->path[0] != 0) { 2170: indx = 0; 2171: /* 2172: * Ensure the path includes a '/' 2173: */ 2174: if ((out == 0) && (bas->server != NULL)) 2175: res->path[out++] = '/'; 2176: while (ref->path[indx] != 0) { 2177: res->path[out++] = ref->path[indx++]; 2178: } 2179: } 2180: res->path[out] = 0; 2181: 2182: /* 2183: * Steps c) to h) are really path normalization steps 2184: */ 2185: xmlNormalizeURIPath(res->path); 2186: 2187: step_7: 2188: 2189: /* 2190: * 7) The resulting URI components, including any inherited from the 2191: * base URI, are recombined to give the absolute form of the URI 2192: * reference. 2193: */ 2194: val = xmlSaveUri(res); 2195: 2196: done: 2197: if (ref != NULL) 2198: xmlFreeURI(ref); 2199: if (bas != NULL) 2200: xmlFreeURI(bas); 2201: if (res != NULL) 2202: xmlFreeURI(res); 2203: return(val); 2204: } 2205: 2206: /** 2207: * xmlBuildRelativeURI: 2208: * @URI: the URI reference under consideration 2209: * @base: the base value 2210: * 2211: * Expresses the URI of the reference in terms relative to the 2212: * base. Some examples of this operation include: 2213: * base = "http://site1.com/docs/book1.html" 2214: * URI input URI returned 2215: * docs/pic1.gif pic1.gif 2216: * docs/img/pic1.gif img/pic1.gif 2217: * img/pic1.gif ../img/pic1.gif 2218: * http://site1.com/docs/pic1.gif pic1.gif 2219: * http://site2.com/docs/pic1.gif http://site2.com/docs/pic1.gif 2220: * 2221: * base = "docs/book1.html" 2222: * URI input URI returned 2223: * docs/pic1.gif pic1.gif 2224: * docs/img/pic1.gif img/pic1.gif 2225: * img/pic1.gif ../img/pic1.gif 2226: * http://site1.com/docs/pic1.gif http://site1.com/docs/pic1.gif 2227: * 2228: * 2229: * Note: if the URI reference is really wierd or complicated, it may be 2230: * worthwhile to first convert it into a "nice" one by calling 2231: * xmlBuildURI (using 'base') before calling this routine, 2232: * since this routine (for reasonable efficiency) assumes URI has 2233: * already been through some validation. 2234: * 2235: * Returns a new URI string (to be freed by the caller) or NULL in case 2236: * error. 2237: */ 2238: xmlChar * 2239: xmlBuildRelativeURI (const xmlChar * URI, const xmlChar * base) 2240: { 2241: xmlChar *val = NULL; 2242: int ret; 2243: int ix; 2244: int pos = 0; 2245: int nbslash = 0; 2246: int len; 2247: xmlURIPtr ref = NULL; 2248: xmlURIPtr bas = NULL; 2249: xmlChar *bptr, *uptr, *vptr; 2250: int remove_path = 0; 2251: 2252: if ((URI == NULL) || (*URI == 0)) 2253: return NULL; 2254: 2255: /* 2256: * First parse URI into a standard form 2257: */ 2258: ref = xmlCreateURI (); 2259: if (ref == NULL) 2260: return NULL; 2261: /* If URI not already in "relative" form */ 2262: if (URI[0] != '.') { 2263: ret = xmlParseURIReference (ref, (const char *) URI); 2264: if (ret != 0) 2265: goto done; /* Error in URI, return NULL */ 2266: } else 2267: ref->path = (char *)xmlStrdup(URI); 2268: 2269: /* 2270: * Next parse base into the same standard form 2271: */ 2272: if ((base == NULL) || (*base == 0)) { 2273: val = xmlStrdup (URI); 2274: goto done; 2275: } 2276: bas = xmlCreateURI (); 2277: if (bas == NULL) 2278: goto done; 2279: if (base[0] != '.') { 2280: ret = xmlParseURIReference (bas, (const char *) base); 2281: if (ret != 0) 2282: goto done; /* Error in base, return NULL */ 2283: } else 2284: bas->path = (char *)xmlStrdup(base); 2285: 2286: /* 2287: * If the scheme / server on the URI differs from the base, 2288: * just return the URI 2289: */ 2290: if ((ref->scheme != NULL) && 2291: ((bas->scheme == NULL) || 2292: (xmlStrcmp ((xmlChar *)bas->scheme, (xmlChar *)ref->scheme)) || 2293: (xmlStrcmp ((xmlChar *)bas->server, (xmlChar *)ref->server)))) { 2294: val = xmlStrdup (URI); 2295: goto done; 2296: } 2297: if (xmlStrEqual((xmlChar *)bas->path, (xmlChar *)ref->path)) { 2298: val = xmlStrdup(BAD_CAST ""); 2299: goto done; 2300: } 2301: if (bas->path == NULL) { 2302: val = xmlStrdup((xmlChar *)ref->path); 2303: goto done; 2304: } 2305: if (ref->path == NULL) { 2306: ref->path = (char *) "/"; 2307: remove_path = 1; 2308: } 2309: 2310: /* 2311: * At this point (at last!) we can compare the two paths 2312: * 2313: * First we take care of the special case where either of the 2314: * two path components may be missing (bug 316224) 2315: */ 2316: if (bas->path == NULL) { 2317: if (ref->path != NULL) { 2318: uptr = (xmlChar *) ref->path; 2319: if (*uptr == '/') 2320: uptr++; 2321: /* exception characters from xmlSaveUri */ 2322: val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,"); 2323: } 2324: goto done; 2325: } 2326: bptr = (xmlChar *)bas->path; 2327: if (ref->path == NULL) { 2328: for (ix = 0; bptr[ix] != 0; ix++) { 2329: if (bptr[ix] == '/') 2330: nbslash++; 2331: } 2332: uptr = NULL; 2333: len = 1; /* this is for a string terminator only */ 2334: } else { 2335: /* 2336: * Next we compare the two strings and find where they first differ 2337: */ 2338: if ((ref->path[pos] == '.') && (ref->path[pos+1] == '/')) 2339: pos += 2; 2340: if ((*bptr == '.') && (bptr[1] == '/')) 2341: bptr += 2; 2342: else if ((*bptr == '/') && (ref->path[pos] != '/')) 2343: bptr++; 2344: while ((bptr[pos] == ref->path[pos]) && (bptr[pos] != 0)) 2345: pos++; 2346: 2347: if (bptr[pos] == ref->path[pos]) { 2348: val = xmlStrdup(BAD_CAST ""); 2349: goto done; /* (I can't imagine why anyone would do this) */ 2350: } 2351: 2352: /* 2353: * In URI, "back up" to the last '/' encountered. This will be the 2354: * beginning of the "unique" suffix of URI 2355: */ 2356: ix = pos; 2357: if ((ref->path[ix] == '/') && (ix > 0)) 2358: ix--; 2359: else if ((ref->path[ix] == 0) && (ix > 1) && (ref->path[ix - 1] == '/')) 2360: ix -= 2; 2361: for (; ix > 0; ix--) { 2362: if (ref->path[ix] == '/') 2363: break; 2364: } 2365: if (ix == 0) { 2366: uptr = (xmlChar *)ref->path; 2367: } else { 2368: ix++; 2369: uptr = (xmlChar *)&ref->path[ix]; 2370: } 2371: 2372: /* 2373: * In base, count the number of '/' from the differing point 2374: */ 2375: if (bptr[pos] != ref->path[pos]) {/* check for trivial URI == base */ 2376: for (; bptr[ix] != 0; ix++) { 2377: if (bptr[ix] == '/') 2378: nbslash++; 2379: } 2380: } 2381: len = xmlStrlen (uptr) + 1; 2382: } 2383: 2384: if (nbslash == 0) { 2385: if (uptr != NULL) 2386: /* exception characters from xmlSaveUri */ 2387: val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,"); 2388: goto done; 2389: } 2390: 2391: /* 2392: * Allocate just enough space for the returned string - 2393: * length of the remainder of the URI, plus enough space 2394: * for the "../" groups, plus one for the terminator 2395: */ 2396: val = (xmlChar *) xmlMalloc (len + 3 * nbslash); 2397: if (val == NULL) { 2398: xmlGenericError(xmlGenericErrorContext, 2399: "xmlBuildRelativeURI: out of memory\n"); 2400: goto done; 2401: } 2402: vptr = val; 2403: /* 2404: * Put in as many "../" as needed 2405: */ 2406: for (; nbslash>0; nbslash--) { 2407: *vptr++ = '.'; 2408: *vptr++ = '.'; 2409: *vptr++ = '/'; 2410: } 2411: /* 2412: * Finish up with the end of the URI 2413: */ 2414: if (uptr != NULL) { 2415: if ((vptr > val) && (len > 0) && 2416: (uptr[0] == '/') && (vptr[-1] == '/')) { 2417: memcpy (vptr, uptr + 1, len - 1); 2418: vptr[len - 2] = 0; 2419: } else { 2420: memcpy (vptr, uptr, len); 2421: vptr[len - 1] = 0; 2422: } 2423: } else { 2424: vptr[len - 1] = 0; 2425: } 2426: 2427: /* escape the freshly-built path */ 2428: vptr = val; 2429: /* exception characters from xmlSaveUri */ 2430: val = xmlURIEscapeStr(vptr, BAD_CAST "/;&=+$,"); 2431: xmlFree(vptr); 2432: 2433: done: 2434: /* 2435: * Free the working variables 2436: */ 2437: if (remove_path != 0) 2438: ref->path = NULL; 2439: if (ref != NULL) 2440: xmlFreeURI (ref); 2441: if (bas != NULL) 2442: xmlFreeURI (bas); 2443: 2444: return val; 2445: } 2446: 2447: /** 2448: * xmlCanonicPath: 2449: * @path: the resource locator in a filesystem notation 2450: * 2451: * Constructs a canonic path from the specified path. 2452: * 2453: * Returns a new canonic path, or a duplicate of the path parameter if the 2454: * construction fails. The caller is responsible for freeing the memory occupied 2455: * by the returned string. If there is insufficient memory available, or the 2456: * argument is NULL, the function returns NULL. 2457: */ 2458: #define IS_WINDOWS_PATH(p) \ 2459: ((p != NULL) && \ 2460: (((p[0] >= 'a') && (p[0] <= 'z')) || \ 2461: ((p[0] >= 'A') && (p[0] <= 'Z'))) && \ 2462: (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\'))) 2463: xmlChar * 2464: xmlCanonicPath(const xmlChar *path) 2465: { 2466: /* 2467: * For Windows implementations, additional work needs to be done to 2468: * replace backslashes in pathnames with "forward slashes" 2469: */ 2470: #if defined(_WIN32) && !defined(__CYGWIN__) 2471: int len = 0; 2472: int i = 0; 2473: xmlChar *p = NULL; 2474: #endif 2475: xmlURIPtr uri; 2476: xmlChar *ret; 2477: const xmlChar *absuri; 2478: 2479: if (path == NULL) 2480: return(NULL); 2481: 2482: /* sanitize filename starting with // so it can be used as URI */ 2483: if ((path[0] == '/') && (path[1] == '/') && (path[2] != '/')) 2484: path++; 2485: 2486: if ((uri = xmlParseURI((const char *) path)) != NULL) { 2487: xmlFreeURI(uri); 2488: return xmlStrdup(path); 2489: } 2490: 2491: /* Check if this is an "absolute uri" */ 2492: absuri = xmlStrstr(path, BAD_CAST "://"); 2493: if (absuri != NULL) { 2494: int l, j; 2495: unsigned char c; 2496: xmlChar *escURI; 2497: 2498: /* 2499: * this looks like an URI where some parts have not been 2500: * escaped leading to a parsing problem. Check that the first 2501: * part matches a protocol. 2502: */ 2503: l = absuri - path; 2504: /* Bypass if first part (part before the '://') is > 20 chars */ 2505: if ((l <= 0) || (l > 20)) 2506: goto path_processing; 2507: /* Bypass if any non-alpha characters are present in first part */ 2508: for (j = 0;j < l;j++) { 2509: c = path[j]; 2510: if (!(((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z')))) 2511: goto path_processing; 2512: } 2513: 2514: /* Escape all except the characters specified in the supplied path */ 2515: escURI = xmlURIEscapeStr(path, BAD_CAST ":/?_.#&;="); 2516: if (escURI != NULL) { 2517: /* Try parsing the escaped path */ 2518: uri = xmlParseURI((const char *) escURI); 2519: /* If successful, return the escaped string */ 2520: if (uri != NULL) { 2521: xmlFreeURI(uri); 2522: return escURI; 2523: } 2524: } 2525: } 2526: 2527: path_processing: 2528: /* For Windows implementations, replace backslashes with 'forward slashes' */ 2529: #if defined(_WIN32) && !defined(__CYGWIN__) 2530: /* 2531: * Create a URI structure 2532: */ 2533: uri = xmlCreateURI(); 2534: if (uri == NULL) { /* Guard against 'out of memory' */ 2535: return(NULL); 2536: } 2537: 2538: len = xmlStrlen(path); 2539: if ((len > 2) && IS_WINDOWS_PATH(path)) { 2540: /* make the scheme 'file' */ 2541: uri->scheme = xmlStrdup(BAD_CAST "file"); 2542: /* allocate space for leading '/' + path + string terminator */ 2543: uri->path = xmlMallocAtomic(len + 2); 2544: if (uri->path == NULL) { 2545: xmlFreeURI(uri); /* Guard agains 'out of memory' */ 2546: return(NULL); 2547: } 2548: /* Put in leading '/' plus path */ 2549: uri->path[0] = '/'; 2550: p = uri->path + 1; 2551: strncpy(p, path, len + 1); 2552: } else { 2553: uri->path = xmlStrdup(path); 2554: if (uri->path == NULL) { 2555: xmlFreeURI(uri); 2556: return(NULL); 2557: } 2558: p = uri->path; 2559: } 2560: /* Now change all occurences of '\' to '/' */ 2561: while (*p != '\0') { 2562: if (*p == '\\') 2563: *p = '/'; 2564: p++; 2565: } 2566: 2567: if (uri->scheme == NULL) { 2568: ret = xmlStrdup((const xmlChar *) uri->path); 2569: } else { 2570: ret = xmlSaveUri(uri); 2571: } 2572: 2573: xmlFreeURI(uri); 2574: #else 2575: ret = xmlStrdup((const xmlChar *) path); 2576: #endif 2577: return(ret); 2578: } 2579: 2580: /** 2581: * xmlPathToURI: 2582: * @path: the resource locator in a filesystem notation 2583: * 2584: * Constructs an URI expressing the existing path 2585: * 2586: * Returns a new URI, or a duplicate of the path parameter if the 2587: * construction fails. The caller is responsible for freeing the memory 2588: * occupied by the returned string. If there is insufficient memory available, 2589: * or the argument is NULL, the function returns NULL. 2590: */ 2591: xmlChar * 2592: xmlPathToURI(const xmlChar *path) 2593: { 2594: xmlURIPtr uri; 2595: xmlURI temp; 2596: xmlChar *ret, *cal; 2597: 2598: if (path == NULL) 2599: return(NULL); 2600: 2601: if ((uri = xmlParseURI((const char *) path)) != NULL) { 2602: xmlFreeURI(uri); 2603: return xmlStrdup(path); 2604: } 2605: cal = xmlCanonicPath(path); 2606: if (cal == NULL) 2607: return(NULL); 2608: #if defined(_WIN32) && !defined(__CYGWIN__) 2609: /* xmlCanonicPath can return an URI on Windows (is that the intended behaviour?) 2610: If 'cal' is a valid URI allready then we are done here, as continuing would make 2611: it invalid. */ 2612: if ((uri = xmlParseURI((const char *) cal)) != NULL) { 2613: xmlFreeURI(uri); 2614: return cal; 2615: } 2616: /* 'cal' can contain a relative path with backslashes. If that is processed 2617: by xmlSaveURI, they will be escaped and the external entity loader machinery 2618: will fail. So convert them to slashes. Misuse 'ret' for walking. */ 2619: ret = cal; 2620: while (*ret != '\0') { 2621: if (*ret == '\\') 2622: *ret = '/'; 2623: ret++; 2624: } 2625: #endif 2626: memset(&temp, 0, sizeof(temp)); 2627: temp.path = (char *) cal; 2628: ret = xmlSaveUri(&temp); 2629: xmlFree(cal); 2630: return(ret); 2631: } 2632: #define bottom_uri 2633: #include "elfgcchack.h"