embedaddon/libxml2/uri.c - view

File: [ELWIX - Embedded LightWeight unIX -] / embedaddon / libxml2 / uri.c
Revision 1.1.1.3 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Sun Jun 15 19:53:31 2014 UTC (10 years ago) by misho
Branches: libxml2, MAIN
CVS tags: v2_9_1p0, v2_9_1, HEAD

libxml2 2.9.1

1: /** 2: * uri.c: set of generic URI related routines 3: * 4: * Reference: RFCs 3986, 2732 and 2373 5: * 6: * See Copyright for the status of this software. 7: * 8: * daniel@veillard.com 9: */ 10: 11: #define IN_LIBXML 12: #include "libxml.h" 13: 14: #include <string.h> 15: 16: #include <libxml/xmlmemory.h> 17: #include <libxml/uri.h> 18: #include <libxml/globals.h> 19: #include <libxml/xmlerror.h> 20: 21: /** 22: * MAX_URI_LENGTH: 23: * 24: * The definition of the URI regexp in the above RFC has no size limit 25: * In practice they are usually relativey short except for the 26: * data URI scheme as defined in RFC 2397. Even for data URI the usual 27: * maximum size before hitting random practical limits is around 64 KB 28: * and 4KB is usually a maximum admitted limit for proper operations. 29: * The value below is more a security limit than anything else and 30: * really should never be hit by 'normal' operations 31: * Set to 1 MByte in 2012, this is only enforced on output 32: */ 33: #define MAX_URI_LENGTH 1024 * 1024 34: 35: static void 36: xmlURIErrMemory(const char *extra) 37: { 38: if (extra) 39: __xmlRaiseError(NULL, NULL, NULL, 40: NULL, NULL, XML_FROM_URI, 41: XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, 42: extra, NULL, NULL, 0, 0, 43: "Memory allocation failed : %s\n", extra); 44: else 45: __xmlRaiseError(NULL, NULL, NULL, 46: NULL, NULL, XML_FROM_URI, 47: XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, 48: NULL, NULL, NULL, 0, 0, 49: "Memory allocation failed\n"); 50: } 51: 52: static void xmlCleanURI(xmlURIPtr uri); 53: 54: /* 55: * Old rule from 2396 used in legacy handling code 56: * alpha = lowalpha | upalpha 57: */ 58: #define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x)) 59: 60: 61: /* 62: * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" | 63: * "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" | 64: * "u" | "v" | "w" | "x" | "y" | "z" 65: */ 66: 67: #define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z')) 68: 69: /* 70: * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" | 71: * "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" | 72: * "U" | "V" | "W" | "X" | "Y" | "Z" 73: */ 74: #define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z')) 75: 76: #ifdef IS_DIGIT 77: #undef IS_DIGIT 78: #endif 79: /* 80: * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" 81: */ 82: #define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9')) 83: 84: /* 85: * alphanum = alpha | digit 86: */ 87: 88: #define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x)) 89: 90: /* 91: * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")" 92: */ 93: 94: #define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') || \ 95: ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') || \ 96: ((x) == '(') || ((x) == ')')) 97: 98: /* 99: * unwise = "{" | "}" | "|" | "\" | "^" | "`" 100: */ 101: 102: #define IS_UNWISE(p) \ 103: (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) || \ 104: ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) || \ 105: ((*(p) == ']')) || ((*(p) == '`'))) 106: /* 107: * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," | 108: * "[" | "]" 109: */ 110: 111: #define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \ 112: ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \ 113: ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \ 114: ((x) == ']')) 115: 116: /* 117: * unreserved = alphanum | mark 118: */ 119: 120: #define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x)) 121: 122: /* 123: * Skip to next pointer char, handle escaped sequences 124: */ 125: 126: #define NEXT(p) ((*p == '%')? p += 3 : p++) 127: 128: /* 129: * Productions from the spec. 130: * 131: * authority = server | reg_name 132: * reg_name = 1*( unreserved | escaped | "$" | "," | 133: * ";" | ":" | "@" | "&" | "=" | "+" ) 134: * 135: * path = [ abs_path | opaque_part ] 136: */ 137: 138: #define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n)) 139: 140: /************************************************************************ 141: * * 142: * RFC 3986 parser * 143: * * 144: ************************************************************************/ 145: 146: #define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9')) 147: #define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) || \ 148: ((*(p) >= 'A') && (*(p) <= 'Z'))) 149: #define ISA_HEXDIG(p) \ 150: (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) || \ 151: ((*(p) >= 'A') && (*(p) <= 'F'))) 152: 153: /* 154: * sub-delims = "!" / "$" / "&" / "'" / "(" / ")" 155: * / "*" / "+" / "," / ";" / "=" 156: */ 157: #define ISA_SUB_DELIM(p) \ 158: (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) || \ 159: ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) || \ 160: ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) || \ 161: ((*(p) == '=')) || ((*(p) == '\''))) 162: 163: /* 164: * gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" 165: */ 166: #define ISA_GEN_DELIM(p) \ 167: (((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) || \ 168: ((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) || \ 169: ((*(p) == '@'))) 170: 171: /* 172: * reserved = gen-delims / sub-delims 173: */ 174: #define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p))) 175: 176: /* 177: * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" 178: */ 179: #define ISA_UNRESERVED(p) \ 180: ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) || \ 181: ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~'))) 182: 183: /* 184: * pct-encoded = "%" HEXDIG HEXDIG 185: */ 186: #define ISA_PCT_ENCODED(p) \ 187: ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2))) 188: 189: /* 190: * pchar = unreserved / pct-encoded / sub-delims / ":" / "@" 191: */ 192: #define ISA_PCHAR(p) \ 193: (ISA_UNRESERVED(p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) || \ 194: ((*(p) == ':')) || ((*(p) == '@'))) 195: 196: /** 197: * xmlParse3986Scheme: 198: * @uri: pointer to an URI structure 199: * @str: pointer to the string to analyze 200: * 201: * Parse an URI scheme 202: * 203: * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) 204: * 205: * Returns 0 or the error code 206: */ 207: static int 208: xmlParse3986Scheme(xmlURIPtr uri, const char **str) { 209: const char *cur; 210: 211: if (str == NULL) 212: return(-1); 213: 214: cur = *str; 215: if (!ISA_ALPHA(cur)) 216: return(2); 217: cur++; 218: while (ISA_ALPHA(cur) || ISA_DIGIT(cur) || 219: (*cur == '+') || (*cur == '-') || (*cur == '.')) cur++; 220: if (uri != NULL) { 221: if (uri->scheme != NULL) xmlFree(uri->scheme); 222: uri->scheme = STRNDUP(*str, cur - *str); 223: } 224: *str = cur; 225: return(0); 226: } 227: 228: /** 229: * xmlParse3986Fragment: 230: * @uri: pointer to an URI structure 231: * @str: pointer to the string to analyze 232: * 233: * Parse the query part of an URI 234: * 235: * fragment = *( pchar / "/" / "?" ) 236: * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']' 237: * in the fragment identifier but this is used very broadly for 238: * xpointer scheme selection, so we are allowing it here to not break 239: * for example all the DocBook processing chains. 240: * 241: * Returns 0 or the error code 242: */ 243: static int 244: xmlParse3986Fragment(xmlURIPtr uri, const char **str) 245: { 246: const char *cur; 247: 248: if (str == NULL) 249: return (-1); 250: 251: cur = *str; 252: 253: while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') || 254: (*cur == '[') || (*cur == ']') || 255: ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur)))) 256: NEXT(cur); 257: if (uri != NULL) { 258: if (uri->fragment != NULL) 259: xmlFree(uri->fragment); 260: if (uri->cleanup & 2) 261: uri->fragment = STRNDUP(*str, cur - *str); 262: else 263: uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL); 264: } 265: *str = cur; 266: return (0); 267: } 268: 269: /** 270: * xmlParse3986Query: 271: * @uri: pointer to an URI structure 272: * @str: pointer to the string to analyze 273: * 274: * Parse the query part of an URI 275: * 276: * query = *uric 277: * 278: * Returns 0 or the error code 279: */ 280: static int 281: xmlParse3986Query(xmlURIPtr uri, const char **str) 282: { 283: const char *cur; 284: 285: if (str == NULL) 286: return (-1); 287: 288: cur = *str; 289: 290: while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') || 291: ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur)))) 292: NEXT(cur); 293: if (uri != NULL) { 294: if (uri->query != NULL) 295: xmlFree(uri->query); 296: if (uri->cleanup & 2) 297: uri->query = STRNDUP(*str, cur - *str); 298: else 299: uri->query = xmlURIUnescapeString(*str, cur - *str, NULL); 300: 301: /* Save the raw bytes of the query as well. 302: * See: http://mail.gnome.org/archives/xml/2007-April/thread.html#00114 303: */ 304: if (uri->query_raw != NULL) 305: xmlFree (uri->query_raw); 306: uri->query_raw = STRNDUP (*str, cur - *str); 307: } 308: *str = cur; 309: return (0); 310: } 311: 312: /** 313: * xmlParse3986Port: 314: * @uri: pointer to an URI structure 315: * @str: the string to analyze 316: * 317: * Parse a port part and fills in the appropriate fields 318: * of the @uri structure 319: * 320: * port = *DIGIT 321: * 322: * Returns 0 or the error code 323: */ 324: static int 325: xmlParse3986Port(xmlURIPtr uri, const char **str) 326: { 327: const char *cur = *str; 328: 329: if (ISA_DIGIT(cur)) { 330: if (uri != NULL) 331: uri->port = 0; 332: while (ISA_DIGIT(cur)) { 333: if (uri != NULL) 334: uri->port = uri->port * 10 + (*cur - '0'); 335: cur++; 336: } 337: *str = cur; 338: return(0); 339: } 340: return(1); 341: } 342: 343: /** 344: * xmlParse3986Userinfo: 345: * @uri: pointer to an URI structure 346: * @str: the string to analyze 347: * 348: * Parse an user informations part and fills in the appropriate fields 349: * of the @uri structure 350: * 351: * userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) 352: * 353: * Returns 0 or the error code 354: */ 355: static int 356: xmlParse3986Userinfo(xmlURIPtr uri, const char **str) 357: { 358: const char *cur; 359: 360: cur = *str; 361: while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || 362: ISA_SUB_DELIM(cur) || (*cur == ':')) 363: NEXT(cur); 364: if (*cur == '@') { 365: if (uri != NULL) { 366: if (uri->user != NULL) xmlFree(uri->user); 367: if (uri->cleanup & 2) 368: uri->user = STRNDUP(*str, cur - *str); 369: else 370: uri->user = xmlURIUnescapeString(*str, cur - *str, NULL); 371: } 372: *str = cur; 373: return(0); 374: } 375: return(1); 376: } 377: 378: /** 379: * xmlParse3986DecOctet: 380: * @str: the string to analyze 381: * 382: * dec-octet = DIGIT ; 0-9 383: * / %x31-39 DIGIT ; 10-99 384: * / "1" 2DIGIT ; 100-199 385: * / "2" %x30-34 DIGIT ; 200-249 386: * / "25" %x30-35 ; 250-255 387: * 388: * Skip a dec-octet. 389: * 390: * Returns 0 if found and skipped, 1 otherwise 391: */ 392: static int 393: xmlParse3986DecOctet(const char **str) { 394: const char *cur = *str; 395: 396: if (!(ISA_DIGIT(cur))) 397: return(1); 398: if (!ISA_DIGIT(cur+1)) 399: cur++; 400: else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur+2))) 401: cur += 2; 402: else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2))) 403: cur += 3; 404: else if ((*cur == '2') && (*(cur + 1) >= '0') && 405: (*(cur + 1) <= '4') && (ISA_DIGIT(cur + 2))) 406: cur += 3; 407: else if ((*cur == '2') && (*(cur + 1) == '5') && 408: (*(cur + 2) >= '0') && (*(cur + 1) <= '5')) 409: cur += 3; 410: else 411: return(1); 412: *str = cur; 413: return(0); 414: } 415: /** 416: * xmlParse3986Host: 417: * @uri: pointer to an URI structure 418: * @str: the string to analyze 419: * 420: * Parse an host part and fills in the appropriate fields 421: * of the @uri structure 422: * 423: * host = IP-literal / IPv4address / reg-name 424: * IP-literal = "[" ( IPv6address / IPvFuture ) "]" 425: * IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet 426: * reg-name = *( unreserved / pct-encoded / sub-delims ) 427: * 428: * Returns 0 or the error code 429: */ 430: static int 431: xmlParse3986Host(xmlURIPtr uri, const char **str) 432: { 433: const char *cur = *str; 434: const char *host; 435: 436: host = cur; 437: /* 438: * IPv6 and future adressing scheme are enclosed between brackets 439: */ 440: if (*cur == '[') { 441: cur++; 442: while ((*cur != ']') && (*cur != 0)) 443: cur++; 444: if (*cur != ']') 445: return(1); 446: cur++; 447: goto found; 448: } 449: /* 450: * try to parse an IPv4 451: */ 452: if (ISA_DIGIT(cur)) { 453: if (xmlParse3986DecOctet(&cur) != 0) 454: goto not_ipv4; 455: if (*cur != '.') 456: goto not_ipv4; 457: cur++; 458: if (xmlParse3986DecOctet(&cur) != 0) 459: goto not_ipv4; 460: if (*cur != '.') 461: goto not_ipv4; 462: if (xmlParse3986DecOctet(&cur) != 0) 463: goto not_ipv4; 464: if (*cur != '.') 465: goto not_ipv4; 466: if (xmlParse3986DecOctet(&cur) != 0) 467: goto not_ipv4; 468: goto found; 469: not_ipv4: 470: cur = *str; 471: } 472: /* 473: * then this should be a hostname which can be empty 474: */ 475: while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur)) 476: NEXT(cur); 477: found: 478: if (uri != NULL) { 479: if (uri->authority != NULL) xmlFree(uri->authority); 480: uri->authority = NULL; 481: if (uri->server != NULL) xmlFree(uri->server); 482: if (cur != host) { 483: if (uri->cleanup & 2) 484: uri->server = STRNDUP(host, cur - host); 485: else 486: uri->server = xmlURIUnescapeString(host, cur - host, NULL); 487: } else 488: uri->server = NULL; 489: } 490: *str = cur; 491: return(0); 492: } 493: 494: /** 495: * xmlParse3986Authority: 496: * @uri: pointer to an URI structure 497: * @str: the string to analyze 498: * 499: * Parse an authority part and fills in the appropriate fields 500: * of the @uri structure 501: * 502: * authority = [ userinfo "@" ] host [ ":" port ] 503: * 504: * Returns 0 or the error code 505: */ 506: static int 507: xmlParse3986Authority(xmlURIPtr uri, const char **str) 508: { 509: const char *cur; 510: int ret; 511: 512: cur = *str; 513: /* 514: * try to parse an userinfo and check for the trailing @ 515: */ 516: ret = xmlParse3986Userinfo(uri, &cur); 517: if ((ret != 0) || (*cur != '@')) 518: cur = *str; 519: else 520: cur++; 521: ret = xmlParse3986Host(uri, &cur); 522: if (ret != 0) return(ret); 523: if (*cur == ':') { 524: cur++; 525: ret = xmlParse3986Port(uri, &cur); 526: if (ret != 0) return(ret); 527: } 528: *str = cur; 529: return(0); 530: } 531: 532: /** 533: * xmlParse3986Segment: 534: * @str: the string to analyze 535: * @forbid: an optional forbidden character 536: * @empty: allow an empty segment 537: * 538: * Parse a segment and fills in the appropriate fields 539: * of the @uri structure 540: * 541: * segment = *pchar 542: * segment-nz = 1*pchar 543: * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" ) 544: * ; non-zero-length segment without any colon ":" 545: * 546: * Returns 0 or the error code 547: */ 548: static int 549: xmlParse3986Segment(const char **str, char forbid, int empty) 550: { 551: const char *cur; 552: 553: cur = *str; 554: if (!ISA_PCHAR(cur)) { 555: if (empty) 556: return(0); 557: return(1); 558: } 559: while (ISA_PCHAR(cur) && (*cur != forbid)) 560: NEXT(cur); 561: *str = cur; 562: return (0); 563: } 564: 565: /** 566: * xmlParse3986PathAbEmpty: 567: * @uri: pointer to an URI structure 568: * @str: the string to analyze 569: * 570: * Parse an path absolute or empty and fills in the appropriate fields 571: * of the @uri structure 572: * 573: * path-abempty = *( "/" segment ) 574: * 575: * Returns 0 or the error code 576: */ 577: static int 578: xmlParse3986PathAbEmpty(xmlURIPtr uri, const char **str) 579: { 580: const char *cur; 581: int ret; 582: 583: cur = *str; 584: 585: while (*cur == '/') { 586: cur++; 587: ret = xmlParse3986Segment(&cur, 0, 1); 588: if (ret != 0) return(ret); 589: } 590: if (uri != NULL) { 591: if (uri->path != NULL) xmlFree(uri->path); 592: if (*str != cur) { 593: if (uri->cleanup & 2) 594: uri->path = STRNDUP(*str, cur - *str); 595: else 596: uri->path = xmlURIUnescapeString(*str, cur - *str, NULL); 597: } else { 598: uri->path = NULL; 599: } 600: } 601: *str = cur; 602: return (0); 603: } 604: 605: /** 606: * xmlParse3986PathAbsolute: 607: * @uri: pointer to an URI structure 608: * @str: the string to analyze 609: * 610: * Parse an path absolute and fills in the appropriate fields 611: * of the @uri structure 612: * 613: * path-absolute = "/" [ segment-nz *( "/" segment ) ] 614: * 615: * Returns 0 or the error code 616: */ 617: static int 618: xmlParse3986PathAbsolute(xmlURIPtr uri, const char **str) 619: { 620: const char *cur; 621: int ret; 622: 623: cur = *str; 624: 625: if (*cur != '/') 626: return(1); 627: cur++; 628: ret = xmlParse3986Segment(&cur, 0, 0); 629: if (ret == 0) { 630: while (*cur == '/') { 631: cur++; 632: ret = xmlParse3986Segment(&cur, 0, 1); 633: if (ret != 0) return(ret); 634: } 635: } 636: if (uri != NULL) { 637: if (uri->path != NULL) xmlFree(uri->path); 638: if (cur != *str) { 639: if (uri->cleanup & 2) 640: uri->path = STRNDUP(*str, cur - *str); 641: else 642: uri->path = xmlURIUnescapeString(*str, cur - *str, NULL); 643: } else { 644: uri->path = NULL; 645: } 646: } 647: *str = cur; 648: return (0); 649: } 650: 651: /** 652: * xmlParse3986PathRootless: 653: * @uri: pointer to an URI structure 654: * @str: the string to analyze 655: * 656: * Parse an path without root and fills in the appropriate fields 657: * of the @uri structure 658: * 659: * path-rootless = segment-nz *( "/" segment ) 660: * 661: * Returns 0 or the error code 662: */ 663: static int 664: xmlParse3986PathRootless(xmlURIPtr uri, const char **str) 665: { 666: const char *cur; 667: int ret; 668: 669: cur = *str; 670: 671: ret = xmlParse3986Segment(&cur, 0, 0); 672: if (ret != 0) return(ret); 673: while (*cur == '/') { 674: cur++; 675: ret = xmlParse3986Segment(&cur, 0, 1); 676: if (ret != 0) return(ret); 677: } 678: if (uri != NULL) { 679: if (uri->path != NULL) xmlFree(uri->path); 680: if (cur != *str) { 681: if (uri->cleanup & 2) 682: uri->path = STRNDUP(*str, cur - *str); 683: else 684: uri->path = xmlURIUnescapeString(*str, cur - *str, NULL); 685: } else { 686: uri->path = NULL; 687: } 688: } 689: *str = cur; 690: return (0); 691: } 692: 693: /** 694: * xmlParse3986PathNoScheme: 695: * @uri: pointer to an URI structure 696: * @str: the string to analyze 697: * 698: * Parse an path which is not a scheme and fills in the appropriate fields 699: * of the @uri structure 700: * 701: * path-noscheme = segment-nz-nc *( "/" segment ) 702: * 703: * Returns 0 or the error code 704: */ 705: static int 706: xmlParse3986PathNoScheme(xmlURIPtr uri, const char **str) 707: { 708: const char *cur; 709: int ret; 710: 711: cur = *str; 712: 713: ret = xmlParse3986Segment(&cur, ':', 0); 714: if (ret != 0) return(ret); 715: while (*cur == '/') { 716: cur++; 717: ret = xmlParse3986Segment(&cur, 0, 1); 718: if (ret != 0) return(ret); 719: } 720: if (uri != NULL) { 721: if (uri->path != NULL) xmlFree(uri->path); 722: if (cur != *str) { 723: if (uri->cleanup & 2) 724: uri->path = STRNDUP(*str, cur - *str); 725: else 726: uri->path = xmlURIUnescapeString(*str, cur - *str, NULL); 727: } else { 728: uri->path = NULL; 729: } 730: } 731: *str = cur; 732: return (0); 733: } 734: 735: /** 736: * xmlParse3986HierPart: 737: * @uri: pointer to an URI structure 738: * @str: the string to analyze 739: * 740: * Parse an hierarchical part and fills in the appropriate fields 741: * of the @uri structure 742: * 743: * hier-part = "//" authority path-abempty 744: * / path-absolute 745: * / path-rootless 746: * / path-empty 747: * 748: * Returns 0 or the error code 749: */ 750: static int 751: xmlParse3986HierPart(xmlURIPtr uri, const char **str) 752: { 753: const char *cur; 754: int ret; 755: 756: cur = *str; 757: 758: if ((*cur == '/') && (*(cur + 1) == '/')) { 759: cur += 2; 760: ret = xmlParse3986Authority(uri, &cur); 761: if (ret != 0) return(ret); 762: ret = xmlParse3986PathAbEmpty(uri, &cur); 763: if (ret != 0) return(ret); 764: *str = cur; 765: return(0); 766: } else if (*cur == '/') { 767: ret = xmlParse3986PathAbsolute(uri, &cur); 768: if (ret != 0) return(ret); 769: } else if (ISA_PCHAR(cur)) { 770: ret = xmlParse3986PathRootless(uri, &cur); 771: if (ret != 0) return(ret); 772: } else { 773: /* path-empty is effectively empty */ 774: if (uri != NULL) { 775: if (uri->path != NULL) xmlFree(uri->path); 776: uri->path = NULL; 777: } 778: } 779: *str = cur; 780: return (0); 781: } 782: 783: /** 784: * xmlParse3986RelativeRef: 785: * @uri: pointer to an URI structure 786: * @str: the string to analyze 787: * 788: * Parse an URI string and fills in the appropriate fields 789: * of the @uri structure 790: * 791: * relative-ref = relative-part [ "?" query ] [ "#" fragment ] 792: * relative-part = "//" authority path-abempty 793: * / path-absolute 794: * / path-noscheme 795: * / path-empty 796: * 797: * Returns 0 or the error code 798: */ 799: static int 800: xmlParse3986RelativeRef(xmlURIPtr uri, const char *str) { 801: int ret; 802: 803: if ((*str == '/') && (*(str + 1) == '/')) { 804: str += 2; 805: ret = xmlParse3986Authority(uri, &str); 806: if (ret != 0) return(ret); 807: ret = xmlParse3986PathAbEmpty(uri, &str); 808: if (ret != 0) return(ret); 809: } else if (*str == '/') { 810: ret = xmlParse3986PathAbsolute(uri, &str); 811: if (ret != 0) return(ret); 812: } else if (ISA_PCHAR(str)) { 813: ret = xmlParse3986PathNoScheme(uri, &str); 814: if (ret != 0) return(ret); 815: } else { 816: /* path-empty is effectively empty */ 817: if (uri != NULL) { 818: if (uri->path != NULL) xmlFree(uri->path); 819: uri->path = NULL; 820: } 821: } 822: 823: if (*str == '?') { 824: str++; 825: ret = xmlParse3986Query(uri, &str); 826: if (ret != 0) return(ret); 827: } 828: if (*str == '#') { 829: str++; 830: ret = xmlParse3986Fragment(uri, &str); 831: if (ret != 0) return(ret); 832: } 833: if (*str != 0) { 834: xmlCleanURI(uri); 835: return(1); 836: } 837: return(0); 838: } 839: 840: 841: /** 842: * xmlParse3986URI: 843: * @uri: pointer to an URI structure 844: * @str: the string to analyze 845: * 846: * Parse an URI string and fills in the appropriate fields 847: * of the @uri structure 848: * 849: * scheme ":" hier-part [ "?" query ] [ "#" fragment ] 850: * 851: * Returns 0 or the error code 852: */ 853: static int 854: xmlParse3986URI(xmlURIPtr uri, const char *str) { 855: int ret; 856: 857: ret = xmlParse3986Scheme(uri, &str); 858: if (ret != 0) return(ret); 859: if (*str != ':') { 860: return(1); 861: } 862: str++; 863: ret = xmlParse3986HierPart(uri, &str); 864: if (ret != 0) return(ret); 865: if (*str == '?') { 866: str++; 867: ret = xmlParse3986Query(uri, &str); 868: if (ret != 0) return(ret); 869: } 870: if (*str == '#') { 871: str++; 872: ret = xmlParse3986Fragment(uri, &str); 873: if (ret != 0) return(ret); 874: } 875: if (*str != 0) { 876: xmlCleanURI(uri); 877: return(1); 878: } 879: return(0); 880: } 881: 882: /** 883: * xmlParse3986URIReference: 884: * @uri: pointer to an URI structure 885: * @str: the string to analyze 886: * 887: * Parse an URI reference string and fills in the appropriate fields 888: * of the @uri structure 889: * 890: * URI-reference = URI / relative-ref 891: * 892: * Returns 0 or the error code 893: */ 894: static int 895: xmlParse3986URIReference(xmlURIPtr uri, const char *str) { 896: int ret; 897: 898: if (str == NULL) 899: return(-1); 900: xmlCleanURI(uri); 901: 902: /* 903: * Try first to parse absolute refs, then fallback to relative if 904: * it fails. 905: */ 906: ret = xmlParse3986URI(uri, str); 907: if (ret != 0) { 908: xmlCleanURI(uri); 909: ret = xmlParse3986RelativeRef(uri, str); 910: if (ret != 0) { 911: xmlCleanURI(uri); 912: return(ret); 913: } 914: } 915: return(0); 916: } 917: 918: /** 919: * xmlParseURI: 920: * @str: the URI string to analyze 921: * 922: * Parse an URI based on RFC 3986 923: * 924: * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ] 925: * 926: * Returns a newly built xmlURIPtr or NULL in case of error 927: */ 928: xmlURIPtr 929: xmlParseURI(const char *str) { 930: xmlURIPtr uri; 931: int ret; 932: 933: if (str == NULL) 934: return(NULL); 935: uri = xmlCreateURI(); 936: if (uri != NULL) { 937: ret = xmlParse3986URIReference(uri, str); 938: if (ret) { 939: xmlFreeURI(uri); 940: return(NULL); 941: } 942: } 943: return(uri); 944: } 945: 946: /** 947: * xmlParseURIReference: 948: * @uri: pointer to an URI structure 949: * @str: the string to analyze 950: * 951: * Parse an URI reference string based on RFC 3986 and fills in the 952: * appropriate fields of the @uri structure 953: * 954: * URI-reference = URI / relative-ref 955: * 956: * Returns 0 or the error code 957: */ 958: int 959: xmlParseURIReference(xmlURIPtr uri, const char *str) { 960: return(xmlParse3986URIReference(uri, str)); 961: } 962: 963: /** 964: * xmlParseURIRaw: 965: * @str: the URI string to analyze 966: * @raw: if 1 unescaping of URI pieces are disabled 967: * 968: * Parse an URI but allows to keep intact the original fragments. 969: * 970: * URI-reference = URI / relative-ref 971: * 972: * Returns a newly built xmlURIPtr or NULL in case of error 973: */ 974: xmlURIPtr 975: xmlParseURIRaw(const char *str, int raw) { 976: xmlURIPtr uri; 977: int ret; 978: 979: if (str == NULL) 980: return(NULL); 981: uri = xmlCreateURI(); 982: if (uri != NULL) { 983: if (raw) { 984: uri->cleanup |= 2; 985: } 986: ret = xmlParseURIReference(uri, str); 987: if (ret) { 988: xmlFreeURI(uri); 989: return(NULL); 990: } 991: } 992: return(uri); 993: } 994: 995: /************************************************************************ 996: * * 997: * Generic URI structure functions * 998: * * 999: ************************************************************************/ 1000: 1001: /** 1002: * xmlCreateURI: 1003: * 1004: * Simply creates an empty xmlURI 1005: * 1006: * Returns the new structure or NULL in case of error 1007: */ 1008: xmlURIPtr 1009: xmlCreateURI(void) { 1010: xmlURIPtr ret; 1011: 1012: ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI)); 1013: if (ret == NULL) { 1014: xmlURIErrMemory("creating URI structure\n"); 1015: return(NULL); 1016: } 1017: memset(ret, 0, sizeof(xmlURI)); 1018: return(ret); 1019: } 1020: 1021: /** 1022: * xmlSaveUriRealloc: 1023: * 1024: * Function to handle properly a reallocation when saving an URI 1025: * Also imposes some limit on the length of an URI string output 1026: */ 1027: static xmlChar * 1028: xmlSaveUriRealloc(xmlChar *ret, int *max) { 1029: xmlChar *temp; 1030: int tmp; 1031: 1032: if (*max > MAX_URI_LENGTH) { 1033: xmlURIErrMemory("reaching arbitrary MAX_URI_LENGTH limit\n"); 1034: return(NULL); 1035: } 1036: tmp = *max * 2; 1037: temp = (xmlChar *) xmlRealloc(ret, (tmp + 1)); 1038: if (temp == NULL) { 1039: xmlURIErrMemory("saving URI\n"); 1040: return(NULL); 1041: } 1042: *max = tmp; 1043: return(temp); 1044: } 1045: 1046: /** 1047: * xmlSaveUri: 1048: * @uri: pointer to an xmlURI 1049: * 1050: * Save the URI as an escaped string 1051: * 1052: * Returns a new string (to be deallocated by caller) 1053: */ 1054: xmlChar * 1055: xmlSaveUri(xmlURIPtr uri) { 1056: xmlChar *ret = NULL; 1057: xmlChar *temp; 1058: const char *p; 1059: int len; 1060: int max; 1061: 1062: if (uri == NULL) return(NULL); 1063: 1064: 1065: max = 80; 1066: ret = (xmlChar *) xmlMallocAtomic((max + 1) * sizeof(xmlChar)); 1067: if (ret == NULL) { 1068: xmlURIErrMemory("saving URI\n"); 1069: return(NULL); 1070: } 1071: len = 0; 1072: 1073: if (uri->scheme != NULL) { 1074: p = uri->scheme; 1075: while (*p != 0) { 1076: if (len >= max) { 1077: temp = xmlSaveUriRealloc(ret, &max); 1078: if (temp == NULL) goto mem_error; 1079: ret = temp; 1080: } 1081: ret[len++] = *p++; 1082: } 1083: if (len >= max) { 1084: temp = xmlSaveUriRealloc(ret, &max); 1085: if (temp == NULL) goto mem_error; 1086: ret = temp; 1087: } 1088: ret[len++] = ':'; 1089: } 1090: if (uri->opaque != NULL) { 1091: p = uri->opaque; 1092: while (*p != 0) { 1093: if (len + 3 >= max) { 1094: temp = xmlSaveUriRealloc(ret, &max); 1095: if (temp == NULL) goto mem_error; 1096: ret = temp; 1097: } 1098: if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p))) 1099: ret[len++] = *p++; 1100: else { 1101: int val = *(unsigned char *)p++; 1102: int hi = val / 0x10, lo = val % 0x10; 1103: ret[len++] = '%'; 1104: ret[len++] = hi + (hi > 9? 'A'-10 : '0'); 1105: ret[len++] = lo + (lo > 9? 'A'-10 : '0'); 1106: } 1107: } 1108: } else { 1109: if (uri->server != NULL) { 1110: if (len + 3 >= max) { 1111: temp = xmlSaveUriRealloc(ret, &max); 1112: if (temp == NULL) goto mem_error; 1113: ret = temp; 1114: } 1115: ret[len++] = '/'; 1116: ret[len++] = '/'; 1117: if (uri->user != NULL) { 1118: p = uri->user; 1119: while (*p != 0) { 1120: if (len + 3 >= max) { 1121: temp = xmlSaveUriRealloc(ret, &max); 1122: if (temp == NULL) goto mem_error; 1123: ret = temp; 1124: } 1125: if ((IS_UNRESERVED(*(p))) || 1126: ((*(p) == ';')) || ((*(p) == ':')) || 1127: ((*(p) == '&')) || ((*(p) == '=')) || 1128: ((*(p) == '+')) || ((*(p) == '$')) || 1129: ((*(p) == ','))) 1130: ret[len++] = *p++; 1131: else { 1132: int val = *(unsigned char *)p++; 1133: int hi = val / 0x10, lo = val % 0x10; 1134: ret[len++] = '%'; 1135: ret[len++] = hi + (hi > 9? 'A'-10 : '0'); 1136: ret[len++] = lo + (lo > 9? 'A'-10 : '0'); 1137: } 1138: } 1139: if (len + 3 >= max) { 1140: temp = xmlSaveUriRealloc(ret, &max); 1141: if (temp == NULL) goto mem_error; 1142: ret = temp; 1143: } 1144: ret[len++] = '@'; 1145: } 1146: p = uri->server; 1147: while (*p != 0) { 1148: if (len >= max) { 1149: temp = xmlSaveUriRealloc(ret, &max); 1150: if (temp == NULL) goto mem_error; 1151: ret = temp; 1152: } 1153: ret[len++] = *p++; 1154: } 1155: if (uri->port > 0) { 1156: if (len + 10 >= max) { 1157: temp = xmlSaveUriRealloc(ret, &max); 1158: if (temp == NULL) goto mem_error; 1159: ret = temp; 1160: } 1161: len += snprintf((char *) &ret[len], max - len, ":%d", uri->port); 1162: } 1163: } else if (uri->authority != NULL) { 1164: if (len + 3 >= max) { 1165: temp = xmlSaveUriRealloc(ret, &max); 1166: if (temp == NULL) goto mem_error; 1167: ret = temp; 1168: } 1169: ret[len++] = '/'; 1170: ret[len++] = '/'; 1171: p = uri->authority; 1172: while (*p != 0) { 1173: if (len + 3 >= max) { 1174: temp = xmlSaveUriRealloc(ret, &max); 1175: if (temp == NULL) goto mem_error; 1176: ret = temp; 1177: } 1178: if ((IS_UNRESERVED(*(p))) || 1179: ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) || 1180: ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) || 1181: ((*(p) == '=')) || ((*(p) == '+'))) 1182: ret[len++] = *p++; 1183: else { 1184: int val = *(unsigned char *)p++; 1185: int hi = val / 0x10, lo = val % 0x10; 1186: ret[len++] = '%'; 1187: ret[len++] = hi + (hi > 9? 'A'-10 : '0'); 1188: ret[len++] = lo + (lo > 9? 'A'-10 : '0'); 1189: } 1190: } 1191: } else if (uri->scheme != NULL) { 1192: if (len + 3 >= max) { 1193: temp = xmlSaveUriRealloc(ret, &max); 1194: if (temp == NULL) goto mem_error; 1195: ret = temp; 1196: } 1197: ret[len++] = '/'; 1198: ret[len++] = '/'; 1199: } 1200: if (uri->path != NULL) { 1201: p = uri->path; 1202: /* 1203: * the colon in file:///d: should not be escaped or 1204: * Windows accesses fail later. 1205: */ 1206: if ((uri->scheme != NULL) && 1207: (p[0] == '/') && 1208: (((p[1] >= 'a') && (p[1] <= 'z')) || 1209: ((p[1] >= 'A') && (p[1] <= 'Z'))) && 1210: (p[2] == ':') && 1211: (xmlStrEqual(BAD_CAST uri->scheme, BAD_CAST "file"))) { 1212: if (len + 3 >= max) { 1213: temp = xmlSaveUriRealloc(ret, &max); 1214: if (temp == NULL) goto mem_error; 1215: ret = temp; 1216: } 1217: ret[len++] = *p++; 1218: ret[len++] = *p++; 1219: ret[len++] = *p++; 1220: } 1221: while (*p != 0) { 1222: if (len + 3 >= max) { 1223: temp = xmlSaveUriRealloc(ret, &max); 1224: if (temp == NULL) goto mem_error; 1225: ret = temp; 1226: } 1227: if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) || 1228: ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) || 1229: ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) || 1230: ((*(p) == ','))) 1231: ret[len++] = *p++; 1232: else { 1233: int val = *(unsigned char *)p++; 1234: int hi = val / 0x10, lo = val % 0x10; 1235: ret[len++] = '%'; 1236: ret[len++] = hi + (hi > 9? 'A'-10 : '0'); 1237: ret[len++] = lo + (lo > 9? 'A'-10 : '0'); 1238: } 1239: } 1240: } 1241: if (uri->query_raw != NULL) { 1242: if (len + 1 >= max) { 1243: temp = xmlSaveUriRealloc(ret, &max); 1244: if (temp == NULL) goto mem_error; 1245: ret = temp; 1246: } 1247: ret[len++] = '?'; 1248: p = uri->query_raw; 1249: while (*p != 0) { 1250: if (len + 1 >= max) { 1251: temp = xmlSaveUriRealloc(ret, &max); 1252: if (temp == NULL) goto mem_error; 1253: ret = temp; 1254: } 1255: ret[len++] = *p++; 1256: } 1257: } else if (uri->query != NULL) { 1258: if (len + 3 >= max) { 1259: temp = xmlSaveUriRealloc(ret, &max); 1260: if (temp == NULL) goto mem_error; 1261: ret = temp; 1262: } 1263: ret[len++] = '?'; 1264: p = uri->query; 1265: while (*p != 0) { 1266: if (len + 3 >= max) { 1267: temp = xmlSaveUriRealloc(ret, &max); 1268: if (temp == NULL) goto mem_error; 1269: ret = temp; 1270: } 1271: if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p)))) 1272: ret[len++] = *p++; 1273: else { 1274: int val = *(unsigned char *)p++; 1275: int hi = val / 0x10, lo = val % 0x10; 1276: ret[len++] = '%'; 1277: ret[len++] = hi + (hi > 9? 'A'-10 : '0'); 1278: ret[len++] = lo + (lo > 9? 'A'-10 : '0'); 1279: } 1280: } 1281: } 1282: } 1283: if (uri->fragment != NULL) { 1284: if (len + 3 >= max) { 1285: temp = xmlSaveUriRealloc(ret, &max); 1286: if (temp == NULL) goto mem_error; 1287: ret = temp; 1288: } 1289: ret[len++] = '#'; 1290: p = uri->fragment; 1291: while (*p != 0) { 1292: if (len + 3 >= max) { 1293: temp = xmlSaveUriRealloc(ret, &max); 1294: if (temp == NULL) goto mem_error; 1295: ret = temp; 1296: } 1297: if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p)))) 1298: ret[len++] = *p++; 1299: else { 1300: int val = *(unsigned char *)p++; 1301: int hi = val / 0x10, lo = val % 0x10; 1302: ret[len++] = '%'; 1303: ret[len++] = hi + (hi > 9? 'A'-10 : '0'); 1304: ret[len++] = lo + (lo > 9? 'A'-10 : '0'); 1305: } 1306: } 1307: } 1308: if (len >= max) { 1309: temp = xmlSaveUriRealloc(ret, &max); 1310: if (temp == NULL) goto mem_error; 1311: ret = temp; 1312: } 1313: ret[len] = 0; 1314: return(ret); 1315: 1316: mem_error: 1317: xmlFree(ret); 1318: return(NULL); 1319: } 1320: 1321: /** 1322: * xmlPrintURI: 1323: * @stream: a FILE* for the output 1324: * @uri: pointer to an xmlURI 1325: * 1326: * Prints the URI in the stream @stream. 1327: */ 1328: void 1329: xmlPrintURI(FILE *stream, xmlURIPtr uri) { 1330: xmlChar *out; 1331: 1332: out = xmlSaveUri(uri); 1333: if (out != NULL) { 1334: fprintf(stream, "%s", (char *) out); 1335: xmlFree(out); 1336: } 1337: } 1338: 1339: /** 1340: * xmlCleanURI: 1341: * @uri: pointer to an xmlURI 1342: * 1343: * Make sure the xmlURI struct is free of content 1344: */ 1345: static void 1346: xmlCleanURI(xmlURIPtr uri) { 1347: if (uri == NULL) return; 1348: 1349: if (uri->scheme != NULL) xmlFree(uri->scheme); 1350: uri->scheme = NULL; 1351: if (uri->server != NULL) xmlFree(uri->server); 1352: uri->server = NULL; 1353: if (uri->user != NULL) xmlFree(uri->user); 1354: uri->user = NULL; 1355: if (uri->path != NULL) xmlFree(uri->path); 1356: uri->path = NULL; 1357: if (uri->fragment != NULL) xmlFree(uri->fragment); 1358: uri->fragment = NULL; 1359: if (uri->opaque != NULL) xmlFree(uri->opaque); 1360: uri->opaque = NULL; 1361: if (uri->authority != NULL) xmlFree(uri->authority); 1362: uri->authority = NULL; 1363: if (uri->query != NULL) xmlFree(uri->query); 1364: uri->query = NULL; 1365: if (uri->query_raw != NULL) xmlFree(uri->query_raw); 1366: uri->query_raw = NULL; 1367: } 1368: 1369: /** 1370: * xmlFreeURI: 1371: * @uri: pointer to an xmlURI 1372: * 1373: * Free up the xmlURI struct 1374: */ 1375: void 1376: xmlFreeURI(xmlURIPtr uri) { 1377: if (uri == NULL) return; 1378: 1379: if (uri->scheme != NULL) xmlFree(uri->scheme); 1380: if (uri->server != NULL) xmlFree(uri->server); 1381: if (uri->user != NULL) xmlFree(uri->user); 1382: if (uri->path != NULL) xmlFree(uri->path); 1383: if (uri->fragment != NULL) xmlFree(uri->fragment); 1384: if (uri->opaque != NULL) xmlFree(uri->opaque); 1385: if (uri->authority != NULL) xmlFree(uri->authority); 1386: if (uri->query != NULL) xmlFree(uri->query); 1387: if (uri->query_raw != NULL) xmlFree(uri->query_raw); 1388: xmlFree(uri); 1389: } 1390: 1391: /************************************************************************ 1392: * * 1393: * Helper functions * 1394: * * 1395: ************************************************************************/ 1396: 1397: /** 1398: * xmlNormalizeURIPath: 1399: * @path: pointer to the path string 1400: * 1401: * Applies the 5 normalization steps to a path string--that is, RFC 2396 1402: * Section 5.2, steps 6.c through 6.g. 1403: * 1404: * Normalization occurs directly on the string, no new allocation is done 1405: * 1406: * Returns 0 or an error code 1407: */ 1408: int 1409: xmlNormalizeURIPath(char *path) { 1410: char *cur, *out; 1411: 1412: if (path == NULL) 1413: return(-1); 1414: 1415: /* Skip all initial "/" chars. We want to get to the beginning of the 1416: * first non-empty segment. 1417: */ 1418: cur = path; 1419: while (cur[0] == '/') 1420: ++cur; 1421: if (cur[0] == '\0') 1422: return(0); 1423: 1424: /* Keep everything we've seen so far. */ 1425: out = cur; 1426: 1427: /* 1428: * Analyze each segment in sequence for cases (c) and (d). 1429: */ 1430: while (cur[0] != '\0') { 1431: /* 1432: * c) All occurrences of "./", where "." is a complete path segment, 1433: * are removed from the buffer string. 1434: */ 1435: if ((cur[0] == '.') && (cur[1] == '/')) { 1436: cur += 2; 1437: /* '//' normalization should be done at this point too */ 1438: while (cur[0] == '/') 1439: cur++; 1440: continue; 1441: } 1442: 1443: /* 1444: * d) If the buffer string ends with "." as a complete path segment, 1445: * that "." is removed. 1446: */ 1447: if ((cur[0] == '.') && (cur[1] == '\0')) 1448: break; 1449: 1450: /* Otherwise keep the segment. */ 1451: while (cur[0] != '/') { 1452: if (cur[0] == '\0') 1453: goto done_cd; 1454: (out++)[0] = (cur++)[0]; 1455: } 1456: /* nomalize // */ 1457: while ((cur[0] == '/') && (cur[1] == '/')) 1458: cur++; 1459: 1460: (out++)[0] = (cur++)[0]; 1461: } 1462: done_cd: 1463: out[0] = '\0'; 1464: 1465: /* Reset to the beginning of the first segment for the next sequence. */ 1466: cur = path; 1467: while (cur[0] == '/') 1468: ++cur; 1469: if (cur[0] == '\0') 1470: return(0); 1471: 1472: /* 1473: * Analyze each segment in sequence for cases (e) and (f). 1474: * 1475: * e) All occurrences of "<segment>/../", where <segment> is a 1476: * complete path segment not equal to "..", are removed from the 1477: * buffer string. Removal of these path segments is performed 1478: * iteratively, removing the leftmost matching pattern on each 1479: * iteration, until no matching pattern remains. 1480: * 1481: * f) If the buffer string ends with "<segment>/..", where <segment> 1482: * is a complete path segment not equal to "..", that 1483: * "<segment>/.." is removed. 1484: * 1485: * To satisfy the "iterative" clause in (e), we need to collapse the 1486: * string every time we find something that needs to be removed. Thus, 1487: * we don't need to keep two pointers into the string: we only need a 1488: * "current position" pointer. 1489: */ 1490: while (1) { 1491: char *segp, *tmp; 1492: 1493: /* At the beginning of each iteration of this loop, "cur" points to 1494: * the first character of the segment we want to examine. 1495: */ 1496: 1497: /* Find the end of the current segment. */ 1498: segp = cur; 1499: while ((segp[0] != '/') && (segp[0] != '\0')) 1500: ++segp; 1501: 1502: /* If this is the last segment, we're done (we need at least two 1503: * segments to meet the criteria for the (e) and (f) cases). 1504: */ 1505: if (segp[0] == '\0') 1506: break; 1507: 1508: /* If the first segment is "..", or if the next segment _isn't_ "..", 1509: * keep this segment and try the next one. 1510: */ 1511: ++segp; 1512: if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3)) 1513: || ((segp[0] != '.') || (segp[1] != '.') 1514: || ((segp[2] != '/') && (segp[2] != '\0')))) { 1515: cur = segp; 1516: continue; 1517: } 1518: 1519: /* If we get here, remove this segment and the next one and back up 1520: * to the previous segment (if there is one), to implement the 1521: * "iteratively" clause. It's pretty much impossible to back up 1522: * while maintaining two pointers into the buffer, so just compact 1523: * the whole buffer now. 1524: */ 1525: 1526: /* If this is the end of the buffer, we're done. */ 1527: if (segp[2] == '\0') { 1528: cur[0] = '\0'; 1529: break; 1530: } 1531: /* Valgrind complained, strcpy(cur, segp + 3); */ 1532: /* string will overlap, do not use strcpy */ 1533: tmp = cur; 1534: segp += 3; 1535: while ((*tmp++ = *segp++) != 0) 1536: ; 1537: 1538: /* If there are no previous segments, then keep going from here. */ 1539: segp = cur; 1540: while ((segp > path) && ((--segp)[0] == '/')) 1541: ; 1542: if (segp == path) 1543: continue; 1544: 1545: /* "segp" is pointing to the end of a previous segment; find it's 1546: * start. We need to back up to the previous segment and start 1547: * over with that to handle things like "foo/bar/../..". If we 1548: * don't do this, then on the first pass we'll remove the "bar/..", 1549: * but be pointing at the second ".." so we won't realize we can also 1550: * remove the "foo/..". 1551: */ 1552: cur = segp; 1553: while ((cur > path) && (cur[-1] != '/')) 1554: --cur; 1555: } 1556: out[0] = '\0'; 1557: 1558: /* 1559: * g) If the resulting buffer string still begins with one or more 1560: * complete path segments of "..", then the reference is 1561: * considered to be in error. Implementations may handle this 1562: * error by retaining these components in the resolved path (i.e., 1563: * treating them as part of the final URI), by removing them from 1564: * the resolved path (i.e., discarding relative levels above the 1565: * root), or by avoiding traversal of the reference. 1566: * 1567: * We discard them from the final path. 1568: */ 1569: if (path[0] == '/') { 1570: cur = path; 1571: while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.') 1572: && ((cur[3] == '/') || (cur[3] == '\0'))) 1573: cur += 3; 1574: 1575: if (cur != path) { 1576: out = path; 1577: while (cur[0] != '\0') 1578: (out++)[0] = (cur++)[0]; 1579: out[0] = 0; 1580: } 1581: } 1582: 1583: return(0); 1584: } 1585: 1586: static int is_hex(char c) { 1587: if (((c >= '0') && (c <= '9')) || 1588: ((c >= 'a') && (c <= 'f')) || 1589: ((c >= 'A') && (c <= 'F'))) 1590: return(1); 1591: return(0); 1592: } 1593: 1594: /** 1595: * xmlURIUnescapeString: 1596: * @str: the string to unescape 1597: * @len: the length in bytes to unescape (or <= 0 to indicate full string) 1598: * @target: optional destination buffer 1599: * 1600: * Unescaping routine, but does not check that the string is an URI. The 1601: * output is a direct unsigned char translation of %XX values (no encoding) 1602: * Note that the length of the result can only be smaller or same size as 1603: * the input string. 1604: * 1605: * Returns a copy of the string, but unescaped, will return NULL only in case 1606: * of error 1607: */ 1608: char * 1609: xmlURIUnescapeString(const char *str, int len, char *target) { 1610: char *ret, *out; 1611: const char *in; 1612: 1613: if (str == NULL) 1614: return(NULL); 1615: if (len <= 0) len = strlen(str); 1616: if (len < 0) return(NULL); 1617: 1618: if (target == NULL) { 1619: ret = (char *) xmlMallocAtomic(len + 1); 1620: if (ret == NULL) { 1621: xmlURIErrMemory("unescaping URI value\n"); 1622: return(NULL); 1623: } 1624: } else 1625: ret = target; 1626: in = str; 1627: out = ret; 1628: while(len > 0) { 1629: if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) { 1630: in++; 1631: if ((*in >= '0') && (*in <= '9')) 1632: *out = (*in - '0'); 1633: else if ((*in >= 'a') && (*in <= 'f')) 1634: *out = (*in - 'a') + 10; 1635: else if ((*in >= 'A') && (*in <= 'F')) 1636: *out = (*in - 'A') + 10; 1637: in++; 1638: if ((*in >= '0') && (*in <= '9')) 1639: *out = *out * 16 + (*in - '0'); 1640: else if ((*in >= 'a') && (*in <= 'f')) 1641: *out = *out * 16 + (*in - 'a') + 10; 1642: else if ((*in >= 'A') && (*in <= 'F')) 1643: *out = *out * 16 + (*in - 'A') + 10; 1644: in++; 1645: len -= 3; 1646: out++; 1647: } else { 1648: *out++ = *in++; 1649: len--; 1650: } 1651: } 1652: *out = 0; 1653: return(ret); 1654: } 1655: 1656: /** 1657: * xmlURIEscapeStr: 1658: * @str: string to escape 1659: * @list: exception list string of chars not to escape 1660: * 1661: * This routine escapes a string to hex, ignoring reserved characters (a-z) 1662: * and the characters in the exception list. 1663: * 1664: * Returns a new escaped string or NULL in case of error. 1665: */ 1666: xmlChar * 1667: xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) { 1668: xmlChar *ret, ch; 1669: xmlChar *temp; 1670: const xmlChar *in; 1671: int len, out; 1672: 1673: if (str == NULL) 1674: return(NULL); 1675: if (str[0] == 0) 1676: return(xmlStrdup(str)); 1677: len = xmlStrlen(str); 1678: if (!(len > 0)) return(NULL); 1679: 1680: len += 20; 1681: ret = (xmlChar *) xmlMallocAtomic(len); 1682: if (ret == NULL) { 1683: xmlURIErrMemory("escaping URI value\n"); 1684: return(NULL); 1685: } 1686: in = (const xmlChar *) str; 1687: out = 0; 1688: while(*in != 0) { 1689: if (len - out <= 3) { 1690: temp = xmlSaveUriRealloc(ret, &len); 1691: if (temp == NULL) { 1692: xmlURIErrMemory("escaping URI value\n"); 1693: xmlFree(ret); 1694: return(NULL); 1695: } 1696: ret = temp; 1697: } 1698: 1699: ch = *in; 1700: 1701: if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) { 1702: unsigned char val; 1703: ret[out++] = '%'; 1704: val = ch >> 4; 1705: if (val <= 9) 1706: ret[out++] = '0' + val; 1707: else 1708: ret[out++] = 'A' + val - 0xA; 1709: val = ch & 0xF; 1710: if (val <= 9) 1711: ret[out++] = '0' + val; 1712: else 1713: ret[out++] = 'A' + val - 0xA; 1714: in++; 1715: } else { 1716: ret[out++] = *in++; 1717: } 1718: 1719: } 1720: ret[out] = 0; 1721: return(ret); 1722: } 1723: 1724: /** 1725: * xmlURIEscape: 1726: * @str: the string of the URI to escape 1727: * 1728: * Escaping routine, does not do validity checks ! 1729: * It will try to escape the chars needing this, but this is heuristic 1730: * based it's impossible to be sure. 1731: * 1732: * Returns an copy of the string, but escaped 1733: * 1734: * 25 May 2001 1735: * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly 1736: * according to RFC2396. 1737: * - Carl Douglas 1738: */ 1739: xmlChar * 1740: xmlURIEscape(const xmlChar * str) 1741: { 1742: xmlChar *ret, *segment = NULL; 1743: xmlURIPtr uri; 1744: int ret2; 1745: 1746: #define NULLCHK(p) if(!p) { \ 1747: xmlURIErrMemory("escaping URI value\n"); \ 1748: xmlFreeURI(uri); \ 1749: return NULL; } \ 1750: 1751: if (str == NULL) 1752: return (NULL); 1753: 1754: uri = xmlCreateURI(); 1755: if (uri != NULL) { 1756: /* 1757: * Allow escaping errors in the unescaped form 1758: */ 1759: uri->cleanup = 1; 1760: ret2 = xmlParseURIReference(uri, (const char *)str); 1761: if (ret2) { 1762: xmlFreeURI(uri); 1763: return (NULL); 1764: } 1765: } 1766: 1767: if (!uri) 1768: return NULL; 1769: 1770: ret = NULL; 1771: 1772: if (uri->scheme) { 1773: segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-."); 1774: NULLCHK(segment) 1775: ret = xmlStrcat(ret, segment); 1776: ret = xmlStrcat(ret, BAD_CAST ":"); 1777: xmlFree(segment); 1778: } 1779: 1780: if (uri->authority) { 1781: segment = 1782: xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@"); 1783: NULLCHK(segment) 1784: ret = xmlStrcat(ret, BAD_CAST "//"); 1785: ret = xmlStrcat(ret, segment); 1786: xmlFree(segment); 1787: } 1788: 1789: if (uri->user) { 1790: segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,"); 1791: NULLCHK(segment) 1792: ret = xmlStrcat(ret,BAD_CAST "//"); 1793: ret = xmlStrcat(ret, segment); 1794: ret = xmlStrcat(ret, BAD_CAST "@"); 1795: xmlFree(segment); 1796: } 1797: 1798: if (uri->server) { 1799: segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@"); 1800: NULLCHK(segment) 1801: if (uri->user == NULL) 1802: ret = xmlStrcat(ret, BAD_CAST "//"); 1803: ret = xmlStrcat(ret, segment); 1804: xmlFree(segment); 1805: } 1806: 1807: if (uri->port) { 1808: xmlChar port[10]; 1809: 1810: snprintf((char *) port, 10, "%d", uri->port); 1811: ret = xmlStrcat(ret, BAD_CAST ":"); 1812: ret = xmlStrcat(ret, port); 1813: } 1814: 1815: if (uri->path) { 1816: segment = 1817: xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;"); 1818: NULLCHK(segment) 1819: ret = xmlStrcat(ret, segment); 1820: xmlFree(segment); 1821: } 1822: 1823: if (uri->query_raw) { 1824: ret = xmlStrcat(ret, BAD_CAST "?"); 1825: ret = xmlStrcat(ret, BAD_CAST uri->query_raw); 1826: } 1827: else if (uri->query) { 1828: segment = 1829: xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$"); 1830: NULLCHK(segment) 1831: ret = xmlStrcat(ret, BAD_CAST "?"); 1832: ret = xmlStrcat(ret, segment); 1833: xmlFree(segment); 1834: } 1835: 1836: if (uri->opaque) { 1837: segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST ""); 1838: NULLCHK(segment) 1839: ret = xmlStrcat(ret, segment); 1840: xmlFree(segment); 1841: } 1842: 1843: if (uri->fragment) { 1844: segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#"); 1845: NULLCHK(segment) 1846: ret = xmlStrcat(ret, BAD_CAST "#"); 1847: ret = xmlStrcat(ret, segment); 1848: xmlFree(segment); 1849: } 1850: 1851: xmlFreeURI(uri); 1852: #undef NULLCHK 1853: 1854: return (ret); 1855: } 1856: 1857: /************************************************************************ 1858: * * 1859: * Public functions * 1860: * * 1861: ************************************************************************/ 1862: 1863: /** 1864: * xmlBuildURI: 1865: * @URI: the URI instance found in the document 1866: * @base: the base value 1867: * 1868: * Computes he final URI of the reference done by checking that 1869: * the given URI is valid, and building the final URI using the 1870: * base URI. This is processed according to section 5.2 of the 1871: * RFC 2396 1872: * 1873: * 5.2. Resolving Relative References to Absolute Form 1874: * 1875: * Returns a new URI string (to be freed by the caller) or NULL in case 1876: * of error. 1877: */ 1878: xmlChar * 1879: xmlBuildURI(const xmlChar *URI, const xmlChar *base) { 1880: xmlChar *val = NULL; 1881: int ret, len, indx, cur, out; 1882: xmlURIPtr ref = NULL; 1883: xmlURIPtr bas = NULL; 1884: xmlURIPtr res = NULL; 1885: 1886: /* 1887: * 1) The URI reference is parsed into the potential four components and 1888: * fragment identifier, as described in Section 4.3. 1889: * 1890: * NOTE that a completely empty URI is treated by modern browsers 1891: * as a reference to "." rather than as a synonym for the current 1892: * URI. Should we do that here? 1893: */ 1894: if (URI == NULL) 1895: ret = -1; 1896: else { 1897: if (*URI) { 1898: ref = xmlCreateURI(); 1899: if (ref == NULL) 1900: goto done; 1901: ret = xmlParseURIReference(ref, (const char *) URI); 1902: } 1903: else 1904: ret = 0; 1905: } 1906: if (ret != 0) 1907: goto done; 1908: if ((ref != NULL) && (ref->scheme != NULL)) { 1909: /* 1910: * The URI is absolute don't modify. 1911: */ 1912: val = xmlStrdup(URI); 1913: goto done; 1914: } 1915: if (base == NULL) 1916: ret = -1; 1917: else { 1918: bas = xmlCreateURI(); 1919: if (bas == NULL) 1920: goto done; 1921: ret = xmlParseURIReference(bas, (const char *) base); 1922: } 1923: if (ret != 0) { 1924: if (ref) 1925: val = xmlSaveUri(ref); 1926: goto done; 1927: } 1928: if (ref == NULL) { 1929: /* 1930: * the base fragment must be ignored 1931: */ 1932: if (bas->fragment != NULL) { 1933: xmlFree(bas->fragment); 1934: bas->fragment = NULL; 1935: } 1936: val = xmlSaveUri(bas); 1937: goto done; 1938: } 1939: 1940: /* 1941: * 2) If the path component is empty and the scheme, authority, and 1942: * query components are undefined, then it is a reference to the 1943: * current document and we are done. Otherwise, the reference URI's 1944: * query and fragment components are defined as found (or not found) 1945: * within the URI reference and not inherited from the base URI. 1946: * 1947: * NOTE that in modern browsers, the parsing differs from the above 1948: * in the following aspect: the query component is allowed to be 1949: * defined while still treating this as a reference to the current 1950: * document. 1951: */ 1952: res = xmlCreateURI(); 1953: if (res == NULL) 1954: goto done; 1955: if ((ref->scheme == NULL) && (ref->path == NULL) && 1956: ((ref->authority == NULL) && (ref->server == NULL))) { 1957: if (bas->scheme != NULL) 1958: res->scheme = xmlMemStrdup(bas->scheme); 1959: if (bas->authority != NULL) 1960: res->authority = xmlMemStrdup(bas->authority); 1961: else if (bas->server != NULL) { 1962: res->server = xmlMemStrdup(bas->server); 1963: if (bas->user != NULL) 1964: res->user = xmlMemStrdup(bas->user); 1965: res->port = bas->port; 1966: } 1967: if (bas->path != NULL) 1968: res->path = xmlMemStrdup(bas->path); 1969: if (ref->query_raw != NULL) 1970: res->query_raw = xmlMemStrdup (ref->query_raw); 1971: else if (ref->query != NULL) 1972: res->query = xmlMemStrdup(ref->query); 1973: else if (bas->query_raw != NULL) 1974: res->query_raw = xmlMemStrdup(bas->query_raw); 1975: else if (bas->query != NULL) 1976: res->query = xmlMemStrdup(bas->query); 1977: if (ref->fragment != NULL) 1978: res->fragment = xmlMemStrdup(ref->fragment); 1979: goto step_7; 1980: } 1981: 1982: /* 1983: * 3) If the scheme component is defined, indicating that the reference 1984: * starts with a scheme name, then the reference is interpreted as an 1985: * absolute URI and we are done. Otherwise, the reference URI's 1986: * scheme is inherited from the base URI's scheme component. 1987: */ 1988: if (ref->scheme != NULL) { 1989: val = xmlSaveUri(ref); 1990: goto done; 1991: } 1992: if (bas->scheme != NULL) 1993: res->scheme = xmlMemStrdup(bas->scheme); 1994: 1995: if (ref->query_raw != NULL) 1996: res->query_raw = xmlMemStrdup(ref->query_raw); 1997: else if (ref->query != NULL) 1998: res->query = xmlMemStrdup(ref->query); 1999: if (ref->fragment != NULL) 2000: res->fragment = xmlMemStrdup(ref->fragment); 2001: 2002: /* 2003: * 4) If the authority component is defined, then the reference is a 2004: * network-path and we skip to step 7. Otherwise, the reference 2005: * URI's authority is inherited from the base URI's authority 2006: * component, which will also be undefined if the URI scheme does not 2007: * use an authority component. 2008: */ 2009: if ((ref->authority != NULL) || (ref->server != NULL)) { 2010: if (ref->authority != NULL) 2011: res->authority = xmlMemStrdup(ref->authority); 2012: else { 2013: res->server = xmlMemStrdup(ref->server); 2014: if (ref->user != NULL) 2015: res->user = xmlMemStrdup(ref->user); 2016: res->port = ref->port; 2017: } 2018: if (ref->path != NULL) 2019: res->path = xmlMemStrdup(ref->path); 2020: goto step_7; 2021: } 2022: if (bas->authority != NULL) 2023: res->authority = xmlMemStrdup(bas->authority); 2024: else if (bas->server != NULL) { 2025: res->server = xmlMemStrdup(bas->server); 2026: if (bas->user != NULL) 2027: res->user = xmlMemStrdup(bas->user); 2028: res->port = bas->port; 2029: } 2030: 2031: /* 2032: * 5) If the path component begins with a slash character ("/"), then 2033: * the reference is an absolute-path and we skip to step 7. 2034: */ 2035: if ((ref->path != NULL) && (ref->path[0] == '/')) { 2036: res->path = xmlMemStrdup(ref->path); 2037: goto step_7; 2038: } 2039: 2040: 2041: /* 2042: * 6) If this step is reached, then we are resolving a relative-path 2043: * reference. The relative path needs to be merged with the base 2044: * URI's path. Although there are many ways to do this, we will 2045: * describe a simple method using a separate string buffer. 2046: * 2047: * Allocate a buffer large enough for the result string. 2048: */ 2049: len = 2; /* extra / and 0 */ 2050: if (ref->path != NULL) 2051: len += strlen(ref->path); 2052: if (bas->path != NULL) 2053: len += strlen(bas->path); 2054: res->path = (char *) xmlMallocAtomic(len); 2055: if (res->path == NULL) { 2056: xmlURIErrMemory("resolving URI against base\n"); 2057: goto done; 2058: } 2059: res->path[0] = 0; 2060: 2061: /* 2062: * a) All but the last segment of the base URI's path component is 2063: * copied to the buffer. In other words, any characters after the 2064: * last (right-most) slash character, if any, are excluded. 2065: */ 2066: cur = 0; 2067: out = 0; 2068: if (bas->path != NULL) { 2069: while (bas->path[cur] != 0) { 2070: while ((bas->path[cur] != 0) && (bas->path[cur] != '/')) 2071: cur++; 2072: if (bas->path[cur] == 0) 2073: break; 2074: 2075: cur++; 2076: while (out < cur) { 2077: res->path[out] = bas->path[out]; 2078: out++; 2079: } 2080: } 2081: } 2082: res->path[out] = 0; 2083: 2084: /* 2085: * b) The reference's path component is appended to the buffer 2086: * string. 2087: */ 2088: if (ref->path != NULL && ref->path[0] != 0) { 2089: indx = 0; 2090: /* 2091: * Ensure the path includes a '/' 2092: */ 2093: if ((out == 0) && (bas->server != NULL)) 2094: res->path[out++] = '/'; 2095: while (ref->path[indx] != 0) { 2096: res->path[out++] = ref->path[indx++]; 2097: } 2098: } 2099: res->path[out] = 0; 2100: 2101: /* 2102: * Steps c) to h) are really path normalization steps 2103: */ 2104: xmlNormalizeURIPath(res->path); 2105: 2106: step_7: 2107: 2108: /* 2109: * 7) The resulting URI components, including any inherited from the 2110: * base URI, are recombined to give the absolute form of the URI 2111: * reference. 2112: */ 2113: val = xmlSaveUri(res); 2114: 2115: done: 2116: if (ref != NULL) 2117: xmlFreeURI(ref); 2118: if (bas != NULL) 2119: xmlFreeURI(bas); 2120: if (res != NULL) 2121: xmlFreeURI(res); 2122: return(val); 2123: } 2124: 2125: /** 2126: * xmlBuildRelativeURI: 2127: * @URI: the URI reference under consideration 2128: * @base: the base value 2129: * 2130: * Expresses the URI of the reference in terms relative to the 2131: * base. Some examples of this operation include: 2132: * base = "http://site1.com/docs/book1.html" 2133: * URI input URI returned 2134: * docs/pic1.gif pic1.gif 2135: * docs/img/pic1.gif img/pic1.gif 2136: * img/pic1.gif ../img/pic1.gif 2137: * http://site1.com/docs/pic1.gif pic1.gif 2138: * http://site2.com/docs/pic1.gif http://site2.com/docs/pic1.gif 2139: * 2140: * base = "docs/book1.html" 2141: * URI input URI returned 2142: * docs/pic1.gif pic1.gif 2143: * docs/img/pic1.gif img/pic1.gif 2144: * img/pic1.gif ../img/pic1.gif 2145: * http://site1.com/docs/pic1.gif http://site1.com/docs/pic1.gif 2146: * 2147: * 2148: * Note: if the URI reference is really wierd or complicated, it may be 2149: * worthwhile to first convert it into a "nice" one by calling 2150: * xmlBuildURI (using 'base') before calling this routine, 2151: * since this routine (for reasonable efficiency) assumes URI has 2152: * already been through some validation. 2153: * 2154: * Returns a new URI string (to be freed by the caller) or NULL in case 2155: * error. 2156: */ 2157: xmlChar * 2158: xmlBuildRelativeURI (const xmlChar * URI, const xmlChar * base) 2159: { 2160: xmlChar *val = NULL; 2161: int ret; 2162: int ix; 2163: int pos = 0; 2164: int nbslash = 0; 2165: int len; 2166: xmlURIPtr ref = NULL; 2167: xmlURIPtr bas = NULL; 2168: xmlChar *bptr, *uptr, *vptr; 2169: int remove_path = 0; 2170: 2171: if ((URI == NULL) || (*URI == 0)) 2172: return NULL; 2173: 2174: /* 2175: * First parse URI into a standard form 2176: */ 2177: ref = xmlCreateURI (); 2178: if (ref == NULL) 2179: return NULL; 2180: /* If URI not already in "relative" form */ 2181: if (URI[0] != '.') { 2182: ret = xmlParseURIReference (ref, (const char *) URI); 2183: if (ret != 0) 2184: goto done; /* Error in URI, return NULL */ 2185: } else 2186: ref->path = (char *)xmlStrdup(URI); 2187: 2188: /* 2189: * Next parse base into the same standard form 2190: */ 2191: if ((base == NULL) || (*base == 0)) { 2192: val = xmlStrdup (URI); 2193: goto done; 2194: } 2195: bas = xmlCreateURI (); 2196: if (bas == NULL) 2197: goto done; 2198: if (base[0] != '.') { 2199: ret = xmlParseURIReference (bas, (const char *) base); 2200: if (ret != 0) 2201: goto done; /* Error in base, return NULL */ 2202: } else 2203: bas->path = (char *)xmlStrdup(base); 2204: 2205: /* 2206: * If the scheme / server on the URI differs from the base, 2207: * just return the URI 2208: */ 2209: if ((ref->scheme != NULL) && 2210: ((bas->scheme == NULL) || 2211: (xmlStrcmp ((xmlChar *)bas->scheme, (xmlChar *)ref->scheme)) || 2212: (xmlStrcmp ((xmlChar *)bas->server, (xmlChar *)ref->server)))) { 2213: val = xmlStrdup (URI); 2214: goto done; 2215: } 2216: if (xmlStrEqual((xmlChar *)bas->path, (xmlChar *)ref->path)) { 2217: val = xmlStrdup(BAD_CAST ""); 2218: goto done; 2219: } 2220: if (bas->path == NULL) { 2221: val = xmlStrdup((xmlChar *)ref->path); 2222: goto done; 2223: } 2224: if (ref->path == NULL) { 2225: ref->path = (char *) "/"; 2226: remove_path = 1; 2227: } 2228: 2229: /* 2230: * At this point (at last!) we can compare the two paths 2231: * 2232: * First we take care of the special case where either of the 2233: * two path components may be missing (bug 316224) 2234: */ 2235: if (bas->path == NULL) { 2236: if (ref->path != NULL) { 2237: uptr = (xmlChar *) ref->path; 2238: if (*uptr == '/') 2239: uptr++; 2240: /* exception characters from xmlSaveUri */ 2241: val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,"); 2242: } 2243: goto done; 2244: } 2245: bptr = (xmlChar *)bas->path; 2246: if (ref->path == NULL) { 2247: for (ix = 0; bptr[ix] != 0; ix++) { 2248: if (bptr[ix] == '/') 2249: nbslash++; 2250: } 2251: uptr = NULL; 2252: len = 1; /* this is for a string terminator only */ 2253: } else { 2254: /* 2255: * Next we compare the two strings and find where they first differ 2256: */ 2257: if ((ref->path[pos] == '.') && (ref->path[pos+1] == '/')) 2258: pos += 2; 2259: if ((*bptr == '.') && (bptr[1] == '/')) 2260: bptr += 2; 2261: else if ((*bptr == '/') && (ref->path[pos] != '/')) 2262: bptr++; 2263: while ((bptr[pos] == ref->path[pos]) && (bptr[pos] != 0)) 2264: pos++; 2265: 2266: if (bptr[pos] == ref->path[pos]) { 2267: val = xmlStrdup(BAD_CAST ""); 2268: goto done; /* (I can't imagine why anyone would do this) */ 2269: } 2270: 2271: /* 2272: * In URI, "back up" to the last '/' encountered. This will be the 2273: * beginning of the "unique" suffix of URI 2274: */ 2275: ix = pos; 2276: if ((ref->path[ix] == '/') && (ix > 0)) 2277: ix--; 2278: else if ((ref->path[ix] == 0) && (ix > 1) && (ref->path[ix - 1] == '/')) 2279: ix -= 2; 2280: for (; ix > 0; ix--) { 2281: if (ref->path[ix] == '/') 2282: break; 2283: } 2284: if (ix == 0) { 2285: uptr = (xmlChar *)ref->path; 2286: } else { 2287: ix++; 2288: uptr = (xmlChar *)&ref->path[ix]; 2289: } 2290: 2291: /* 2292: * In base, count the number of '/' from the differing point 2293: */ 2294: if (bptr[pos] != ref->path[pos]) {/* check for trivial URI == base */ 2295: for (; bptr[ix] != 0; ix++) { 2296: if (bptr[ix] == '/') 2297: nbslash++; 2298: } 2299: } 2300: len = xmlStrlen (uptr) + 1; 2301: } 2302: 2303: if (nbslash == 0) { 2304: if (uptr != NULL) 2305: /* exception characters from xmlSaveUri */ 2306: val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,"); 2307: goto done; 2308: } 2309: 2310: /* 2311: * Allocate just enough space for the returned string - 2312: * length of the remainder of the URI, plus enough space 2313: * for the "../" groups, plus one for the terminator 2314: */ 2315: val = (xmlChar *) xmlMalloc (len + 3 * nbslash); 2316: if (val == NULL) { 2317: xmlURIErrMemory("building relative URI\n"); 2318: goto done; 2319: } 2320: vptr = val; 2321: /* 2322: * Put in as many "../" as needed 2323: */ 2324: for (; nbslash>0; nbslash--) { 2325: *vptr++ = '.'; 2326: *vptr++ = '.'; 2327: *vptr++ = '/'; 2328: } 2329: /* 2330: * Finish up with the end of the URI 2331: */ 2332: if (uptr != NULL) { 2333: if ((vptr > val) && (len > 0) && 2334: (uptr[0] == '/') && (vptr[-1] == '/')) { 2335: memcpy (vptr, uptr + 1, len - 1); 2336: vptr[len - 2] = 0; 2337: } else { 2338: memcpy (vptr, uptr, len); 2339: vptr[len - 1] = 0; 2340: } 2341: } else { 2342: vptr[len - 1] = 0; 2343: } 2344: 2345: /* escape the freshly-built path */ 2346: vptr = val; 2347: /* exception characters from xmlSaveUri */ 2348: val = xmlURIEscapeStr(vptr, BAD_CAST "/;&=+$,"); 2349: xmlFree(vptr); 2350: 2351: done: 2352: /* 2353: * Free the working variables 2354: */ 2355: if (remove_path != 0) 2356: ref->path = NULL; 2357: if (ref != NULL) 2358: xmlFreeURI (ref); 2359: if (bas != NULL) 2360: xmlFreeURI (bas); 2361: 2362: return val; 2363: } 2364: 2365: /** 2366: * xmlCanonicPath: 2367: * @path: the resource locator in a filesystem notation 2368: * 2369: * Constructs a canonic path from the specified path. 2370: * 2371: * Returns a new canonic path, or a duplicate of the path parameter if the 2372: * construction fails. The caller is responsible for freeing the memory occupied 2373: * by the returned string. If there is insufficient memory available, or the 2374: * argument is NULL, the function returns NULL. 2375: */ 2376: #define IS_WINDOWS_PATH(p) \ 2377: ((p != NULL) && \ 2378: (((p[0] >= 'a') && (p[0] <= 'z')) || \ 2379: ((p[0] >= 'A') && (p[0] <= 'Z'))) && \ 2380: (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\'))) 2381: xmlChar * 2382: xmlCanonicPath(const xmlChar *path) 2383: { 2384: /* 2385: * For Windows implementations, additional work needs to be done to 2386: * replace backslashes in pathnames with "forward slashes" 2387: */ 2388: #if defined(_WIN32) && !defined(__CYGWIN__) 2389: int len = 0; 2390: int i = 0; 2391: xmlChar *p = NULL; 2392: #endif 2393: xmlURIPtr uri; 2394: xmlChar *ret; 2395: const xmlChar *absuri; 2396: 2397: if (path == NULL) 2398: return(NULL); 2399: 2400: #if defined(_WIN32) 2401: /* 2402: * We must not change the backslashes to slashes if the the path 2403: * starts with \\?\ 2404: * Those paths can be up to 32k characters long. 2405: * Was added specifically for OpenOffice, those paths can't be converted 2406: * to URIs anyway. 2407: */ 2408: if ((path[0] == '\\') && (path[1] == '\\') && (path[2] == '?') && 2409: (path[3] == '\\') ) 2410: return xmlStrdup((const xmlChar *) path); 2411: #endif 2412: 2413: /* sanitize filename starting with // so it can be used as URI */ 2414: if ((path[0] == '/') && (path[1] == '/') && (path[2] != '/')) 2415: path++; 2416: 2417: if ((uri = xmlParseURI((const char *) path)) != NULL) { 2418: xmlFreeURI(uri); 2419: return xmlStrdup(path); 2420: } 2421: 2422: /* Check if this is an "absolute uri" */ 2423: absuri = xmlStrstr(path, BAD_CAST "://"); 2424: if (absuri != NULL) { 2425: int l, j; 2426: unsigned char c; 2427: xmlChar *escURI; 2428: 2429: /* 2430: * this looks like an URI where some parts have not been 2431: * escaped leading to a parsing problem. Check that the first 2432: * part matches a protocol. 2433: */ 2434: l = absuri - path; 2435: /* Bypass if first part (part before the '://') is > 20 chars */ 2436: if ((l <= 0) || (l > 20)) 2437: goto path_processing; 2438: /* Bypass if any non-alpha characters are present in first part */ 2439: for (j = 0;j < l;j++) { 2440: c = path[j]; 2441: if (!(((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z')))) 2442: goto path_processing; 2443: } 2444: 2445: /* Escape all except the characters specified in the supplied path */ 2446: escURI = xmlURIEscapeStr(path, BAD_CAST ":/?_.#&;="); 2447: if (escURI != NULL) { 2448: /* Try parsing the escaped path */ 2449: uri = xmlParseURI((const char *) escURI); 2450: /* If successful, return the escaped string */ 2451: if (uri != NULL) { 2452: xmlFreeURI(uri); 2453: return escURI; 2454: } 2455: } 2456: } 2457: 2458: path_processing: 2459: /* For Windows implementations, replace backslashes with 'forward slashes' */ 2460: #if defined(_WIN32) && !defined(__CYGWIN__) 2461: /* 2462: * Create a URI structure 2463: */ 2464: uri = xmlCreateURI(); 2465: if (uri == NULL) { /* Guard against 'out of memory' */ 2466: return(NULL); 2467: } 2468: 2469: len = xmlStrlen(path); 2470: if ((len > 2) && IS_WINDOWS_PATH(path)) { 2471: /* make the scheme 'file' */ 2472: uri->scheme = xmlStrdup(BAD_CAST "file"); 2473: /* allocate space for leading '/' + path + string terminator */ 2474: uri->path = xmlMallocAtomic(len + 2); 2475: if (uri->path == NULL) { 2476: xmlFreeURI(uri); /* Guard agains 'out of memory' */ 2477: return(NULL); 2478: } 2479: /* Put in leading '/' plus path */ 2480: uri->path[0] = '/'; 2481: p = uri->path + 1; 2482: strncpy(p, path, len + 1); 2483: } else { 2484: uri->path = xmlStrdup(path); 2485: if (uri->path == NULL) { 2486: xmlFreeURI(uri); 2487: return(NULL); 2488: } 2489: p = uri->path; 2490: } 2491: /* Now change all occurences of '\' to '/' */ 2492: while (*p != '\0') { 2493: if (*p == '\\') 2494: *p = '/'; 2495: p++; 2496: } 2497: 2498: if (uri->scheme == NULL) { 2499: ret = xmlStrdup((const xmlChar *) uri->path); 2500: } else { 2501: ret = xmlSaveUri(uri); 2502: } 2503: 2504: xmlFreeURI(uri); 2505: #else 2506: ret = xmlStrdup((const xmlChar *) path); 2507: #endif 2508: return(ret); 2509: } 2510: 2511: /** 2512: * xmlPathToURI: 2513: * @path: the resource locator in a filesystem notation 2514: * 2515: * Constructs an URI expressing the existing path 2516: * 2517: * Returns a new URI, or a duplicate of the path parameter if the 2518: * construction fails. The caller is responsible for freeing the memory 2519: * occupied by the returned string. If there is insufficient memory available, 2520: * or the argument is NULL, the function returns NULL. 2521: */ 2522: xmlChar * 2523: xmlPathToURI(const xmlChar *path) 2524: { 2525: xmlURIPtr uri; 2526: xmlURI temp; 2527: xmlChar *ret, *cal; 2528: 2529: if (path == NULL) 2530: return(NULL); 2531: 2532: if ((uri = xmlParseURI((const char *) path)) != NULL) { 2533: xmlFreeURI(uri); 2534: return xmlStrdup(path); 2535: } 2536: cal = xmlCanonicPath(path); 2537: if (cal == NULL) 2538: return(NULL); 2539: #if defined(_WIN32) && !defined(__CYGWIN__) 2540: /* xmlCanonicPath can return an URI on Windows (is that the intended behaviour?) 2541: If 'cal' is a valid URI allready then we are done here, as continuing would make 2542: it invalid. */ 2543: if ((uri = xmlParseURI((const char *) cal)) != NULL) { 2544: xmlFreeURI(uri); 2545: return cal; 2546: } 2547: /* 'cal' can contain a relative path with backslashes. If that is processed 2548: by xmlSaveURI, they will be escaped and the external entity loader machinery 2549: will fail. So convert them to slashes. Misuse 'ret' for walking. */ 2550: ret = cal; 2551: while (*ret != '\0') { 2552: if (*ret == '\\') 2553: *ret = '/'; 2554: ret++; 2555: } 2556: #endif 2557: memset(&temp, 0, sizeof(temp)); 2558: temp.path = (char *) cal; 2559: ret = xmlSaveUri(&temp); 2560: xmlFree(cal); 2561: return(ret); 2562: } 2563: #define bottom_uri 2564: #include "elfgcchack.h"