File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / libxml2 / uri.c
Revision 1.1.1.2 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Mon Jul 22 01:22:20 2013 UTC (10 years, 11 months ago) by misho
Branches: libxml2, MAIN
CVS tags: v2_8_0p0, v2_8_0, HEAD
2.8.0

    1: /**
    2:  * uri.c: set of generic URI related routines 
    3:  *
    4:  * Reference: RFCs 3986, 2732 and 2373
    5:  *
    6:  * See Copyright for the status of this software.
    7:  *
    8:  * TODO: that module behaves really badly on OOM situation
    9:  *
   10:  * daniel@veillard.com
   11:  */
   12: 
   13: #define IN_LIBXML
   14: #include "libxml.h"
   15: 
   16: #include <string.h>
   17: 
   18: #include <libxml/xmlmemory.h>
   19: #include <libxml/uri.h>
   20: #include <libxml/globals.h>
   21: #include <libxml/xmlerror.h>
   22: 
   23: static void xmlCleanURI(xmlURIPtr uri);
   24: 
   25: /*
   26:  * Old rule from 2396 used in legacy handling code
   27:  * alpha    = lowalpha | upalpha
   28:  */
   29: #define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
   30: 
   31: 
   32: /*
   33:  * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
   34:  *            "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
   35:  *            "u" | "v" | "w" | "x" | "y" | "z"
   36:  */
   37: 
   38: #define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
   39: 
   40: /*
   41:  * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
   42:  *           "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
   43:  *           "U" | "V" | "W" | "X" | "Y" | "Z"
   44:  */
   45: #define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
   46: 
   47: #ifdef IS_DIGIT
   48: #undef IS_DIGIT
   49: #endif
   50: /*
   51:  * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
   52:  */
   53: #define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
   54: 
   55: /*
   56:  * alphanum = alpha | digit
   57:  */
   58: 
   59: #define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
   60: 
   61: /*
   62:  * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
   63:  */
   64: 
   65: #define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') ||     \
   66:     ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') ||    \
   67:     ((x) == '(') || ((x) == ')'))
   68: 
   69: /*
   70:  * unwise = "{" | "}" | "|" | "\" | "^" | "`"
   71:  */
   72: 
   73: #define IS_UNWISE(p)                                                    \
   74:       (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) ||         \
   75:        ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) ||        \
   76:        ((*(p) == ']')) || ((*(p) == '`')))
   77: /*
   78:  * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
   79:  *            "[" | "]"
   80:  */
   81: 
   82: #define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
   83:         ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
   84:         ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \
   85:         ((x) == ']'))
   86: 
   87: /*
   88:  * unreserved = alphanum | mark
   89:  */
   90: 
   91: #define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
   92: 
   93: /*
   94:  * Skip to next pointer char, handle escaped sequences
   95:  */
   96: 
   97: #define NEXT(p) ((*p == '%')? p += 3 : p++)
   98: 
   99: /*
  100:  * Productions from the spec.
  101:  *
  102:  *    authority     = server | reg_name
  103:  *    reg_name      = 1*( unreserved | escaped | "$" | "," |
  104:  *                        ";" | ":" | "@" | "&" | "=" | "+" )
  105:  *
  106:  * path          = [ abs_path | opaque_part ]
  107:  */
  108: 
  109: #define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n))
  110: 
  111: /************************************************************************
  112:  *									*
  113:  *                         RFC 3986 parser				*
  114:  *									*
  115:  ************************************************************************/
  116: 
  117: #define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9'))
  118: #define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) ||		\
  119:                       ((*(p) >= 'A') && (*(p) <= 'Z')))
  120: #define ISA_HEXDIG(p)							\
  121:        (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) ||		\
  122:         ((*(p) >= 'A') && (*(p) <= 'F')))
  123: 
  124: /*
  125:  *    sub-delims    = "!" / "$" / "&" / "'" / "(" / ")"
  126:  *                     / "*" / "+" / "," / ";" / "="
  127:  */
  128: #define ISA_SUB_DELIM(p)						\
  129:       (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) ||		\
  130:        ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) ||		\
  131:        ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) ||		\
  132:        ((*(p) == '=')) || ((*(p) == '\'')))
  133: 
  134: /*
  135:  *    gen-delims    = ":" / "/" / "?" / "#" / "[" / "]" / "@"
  136:  */
  137: #define ISA_GEN_DELIM(p)						\
  138:       (((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) ||         \
  139:        ((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) ||         \
  140:        ((*(p) == '@')))
  141: 
  142: /*
  143:  *    reserved      = gen-delims / sub-delims
  144:  */
  145: #define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p)))
  146: 
  147: /*
  148:  *    unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"
  149:  */
  150: #define ISA_UNRESERVED(p)						\
  151:       ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) ||		\
  152:        ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~')))
  153: 
  154: /*
  155:  *    pct-encoded   = "%" HEXDIG HEXDIG
  156:  */
  157: #define ISA_PCT_ENCODED(p)						\
  158:      ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2)))
  159: 
  160: /*
  161:  *    pchar         = unreserved / pct-encoded / sub-delims / ":" / "@"
  162:  */
  163: #define ISA_PCHAR(p)							\
  164:      (ISA_UNRESERVED(p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) ||	\
  165:       ((*(p) == ':')) || ((*(p) == '@')))
  166: 
  167: /**
  168:  * xmlParse3986Scheme:
  169:  * @uri:  pointer to an URI structure
  170:  * @str:  pointer to the string to analyze
  171:  *
  172:  * Parse an URI scheme
  173:  *
  174:  * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
  175:  *
  176:  * Returns 0 or the error code
  177:  */
  178: static int
  179: xmlParse3986Scheme(xmlURIPtr uri, const char **str) {
  180:     const char *cur;
  181: 
  182:     if (str == NULL)
  183: 	return(-1);
  184: 
  185:     cur = *str;
  186:     if (!ISA_ALPHA(cur))
  187: 	return(2);
  188:     cur++;
  189:     while (ISA_ALPHA(cur) || ISA_DIGIT(cur) ||
  190:            (*cur == '+') || (*cur == '-') || (*cur == '.')) cur++;
  191:     if (uri != NULL) {
  192: 	if (uri->scheme != NULL) xmlFree(uri->scheme);
  193: 	uri->scheme = STRNDUP(*str, cur - *str);
  194:     }
  195:     *str = cur;
  196:     return(0);
  197: }
  198: 
  199: /**
  200:  * xmlParse3986Fragment:
  201:  * @uri:  pointer to an URI structure
  202:  * @str:  pointer to the string to analyze
  203:  *
  204:  * Parse the query part of an URI
  205:  *
  206:  * fragment      = *( pchar / "/" / "?" )
  207:  * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']'
  208:  *       in the fragment identifier but this is used very broadly for
  209:  *       xpointer scheme selection, so we are allowing it here to not break
  210:  *       for example all the DocBook processing chains.
  211:  *
  212:  * Returns 0 or the error code
  213:  */
  214: static int
  215: xmlParse3986Fragment(xmlURIPtr uri, const char **str)
  216: {
  217:     const char *cur;
  218: 
  219:     if (str == NULL)
  220:         return (-1);
  221: 
  222:     cur = *str;
  223: 
  224:     while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
  225:            (*cur == '[') || (*cur == ']') ||
  226:            ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
  227:         NEXT(cur);
  228:     if (uri != NULL) {
  229:         if (uri->fragment != NULL)
  230:             xmlFree(uri->fragment);
  231: 	if (uri->cleanup & 2)
  232: 	    uri->fragment = STRNDUP(*str, cur - *str);
  233: 	else
  234: 	    uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
  235:     }
  236:     *str = cur;
  237:     return (0);
  238: }
  239: 
  240: /**
  241:  * xmlParse3986Query:
  242:  * @uri:  pointer to an URI structure
  243:  * @str:  pointer to the string to analyze
  244:  *
  245:  * Parse the query part of an URI
  246:  *
  247:  * query = *uric
  248:  *
  249:  * Returns 0 or the error code
  250:  */
  251: static int
  252: xmlParse3986Query(xmlURIPtr uri, const char **str)
  253: {
  254:     const char *cur;
  255: 
  256:     if (str == NULL)
  257:         return (-1);
  258: 
  259:     cur = *str;
  260: 
  261:     while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
  262:            ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
  263:         NEXT(cur);
  264:     if (uri != NULL) {
  265:         if (uri->query != NULL)
  266:             xmlFree(uri->query);
  267: 	if (uri->cleanup & 2)
  268: 	    uri->query = STRNDUP(*str, cur - *str);
  269: 	else
  270: 	    uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
  271: 
  272: 	/* Save the raw bytes of the query as well.
  273: 	 * See: http://mail.gnome.org/archives/xml/2007-April/thread.html#00114
  274: 	 */
  275: 	if (uri->query_raw != NULL)
  276: 	    xmlFree (uri->query_raw);
  277: 	uri->query_raw = STRNDUP (*str, cur - *str);
  278:     }
  279:     *str = cur;
  280:     return (0);
  281: }
  282: 
  283: /**
  284:  * xmlParse3986Port:
  285:  * @uri:  pointer to an URI structure
  286:  * @str:  the string to analyze
  287:  *
  288:  * Parse a port  part and fills in the appropriate fields
  289:  * of the @uri structure
  290:  *
  291:  * port          = *DIGIT
  292:  *
  293:  * Returns 0 or the error code
  294:  */
  295: static int
  296: xmlParse3986Port(xmlURIPtr uri, const char **str)
  297: {
  298:     const char *cur = *str;
  299: 
  300:     if (ISA_DIGIT(cur)) {
  301: 	if (uri != NULL)
  302: 	    uri->port = 0;
  303: 	while (ISA_DIGIT(cur)) {
  304: 	    if (uri != NULL)
  305: 		uri->port = uri->port * 10 + (*cur - '0');
  306: 	    cur++;
  307: 	}
  308: 	*str = cur;
  309: 	return(0);
  310:     }
  311:     return(1);
  312: }
  313: 
  314: /**
  315:  * xmlParse3986Userinfo:
  316:  * @uri:  pointer to an URI structure
  317:  * @str:  the string to analyze
  318:  *
  319:  * Parse an user informations part and fills in the appropriate fields
  320:  * of the @uri structure
  321:  *
  322:  * userinfo      = *( unreserved / pct-encoded / sub-delims / ":" )
  323:  *
  324:  * Returns 0 or the error code
  325:  */
  326: static int
  327: xmlParse3986Userinfo(xmlURIPtr uri, const char **str)
  328: {
  329:     const char *cur;
  330: 
  331:     cur = *str;
  332:     while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) ||
  333:            ISA_SUB_DELIM(cur) || (*cur == ':'))
  334: 	NEXT(cur);
  335:     if (*cur == '@') {
  336: 	if (uri != NULL) {
  337: 	    if (uri->user != NULL) xmlFree(uri->user);
  338: 	    if (uri->cleanup & 2)
  339: 		uri->user = STRNDUP(*str, cur - *str);
  340: 	    else
  341: 		uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
  342: 	}
  343: 	*str = cur;
  344: 	return(0);
  345:     }
  346:     return(1);
  347: }
  348: 
  349: /**
  350:  * xmlParse3986DecOctet:
  351:  * @str:  the string to analyze
  352:  *
  353:  *    dec-octet     = DIGIT                 ; 0-9
  354:  *                  / %x31-39 DIGIT         ; 10-99
  355:  *                  / "1" 2DIGIT            ; 100-199
  356:  *                  / "2" %x30-34 DIGIT     ; 200-249
  357:  *                  / "25" %x30-35          ; 250-255
  358:  *
  359:  * Skip a dec-octet.
  360:  *
  361:  * Returns 0 if found and skipped, 1 otherwise
  362:  */
  363: static int
  364: xmlParse3986DecOctet(const char **str) {
  365:     const char *cur = *str;
  366: 
  367:     if (!(ISA_DIGIT(cur)))
  368:         return(1);
  369:     if (!ISA_DIGIT(cur+1))
  370: 	cur++;
  371:     else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur+2)))
  372: 	cur += 2;
  373:     else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2)))
  374: 	cur += 3;
  375:     else if ((*cur == '2') && (*(cur + 1) >= '0') &&
  376: 	     (*(cur + 1) <= '4') && (ISA_DIGIT(cur + 2)))
  377: 	cur += 3;
  378:     else if ((*cur == '2') && (*(cur + 1) == '5') &&
  379: 	     (*(cur + 2) >= '0') && (*(cur + 1) <= '5'))
  380: 	cur += 3;
  381:     else
  382:         return(1);
  383:     *str = cur;
  384:     return(0);
  385: }
  386: /**
  387:  * xmlParse3986Host:
  388:  * @uri:  pointer to an URI structure
  389:  * @str:  the string to analyze
  390:  *
  391:  * Parse an host part and fills in the appropriate fields
  392:  * of the @uri structure
  393:  *
  394:  * host          = IP-literal / IPv4address / reg-name
  395:  * IP-literal    = "[" ( IPv6address / IPvFuture  ) "]"
  396:  * IPv4address   = dec-octet "." dec-octet "." dec-octet "." dec-octet
  397:  * reg-name      = *( unreserved / pct-encoded / sub-delims )
  398:  *
  399:  * Returns 0 or the error code
  400:  */
  401: static int
  402: xmlParse3986Host(xmlURIPtr uri, const char **str)
  403: {
  404:     const char *cur = *str;
  405:     const char *host;
  406: 
  407:     host = cur;
  408:     /*
  409:      * IPv6 and future adressing scheme are enclosed between brackets
  410:      */
  411:     if (*cur == '[') {
  412:         cur++;
  413: 	while ((*cur != ']') && (*cur != 0))
  414: 	    cur++;
  415: 	if (*cur != ']')
  416: 	    return(1);
  417: 	cur++;
  418: 	goto found;
  419:     }
  420:     /*
  421:      * try to parse an IPv4
  422:      */
  423:     if (ISA_DIGIT(cur)) {
  424:         if (xmlParse3986DecOctet(&cur) != 0)
  425: 	    goto not_ipv4;
  426: 	if (*cur != '.')
  427: 	    goto not_ipv4;
  428: 	cur++;
  429:         if (xmlParse3986DecOctet(&cur) != 0)
  430: 	    goto not_ipv4;
  431: 	if (*cur != '.')
  432: 	    goto not_ipv4;
  433:         if (xmlParse3986DecOctet(&cur) != 0)
  434: 	    goto not_ipv4;
  435: 	if (*cur != '.')
  436: 	    goto not_ipv4;
  437:         if (xmlParse3986DecOctet(&cur) != 0)
  438: 	    goto not_ipv4;
  439: 	goto found;
  440: not_ipv4:
  441:         cur = *str;
  442:     }
  443:     /*
  444:      * then this should be a hostname which can be empty
  445:      */
  446:     while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur))
  447:         NEXT(cur);
  448: found:
  449:     if (uri != NULL) {
  450: 	if (uri->authority != NULL) xmlFree(uri->authority);
  451: 	uri->authority = NULL;
  452: 	if (uri->server != NULL) xmlFree(uri->server);
  453: 	if (cur != host) {
  454: 	    if (uri->cleanup & 2)
  455: 		uri->server = STRNDUP(host, cur - host);
  456: 	    else
  457: 		uri->server = xmlURIUnescapeString(host, cur - host, NULL);
  458: 	} else
  459: 	    uri->server = NULL;
  460:     }
  461:     *str = cur;
  462:     return(0);
  463: }
  464: 
  465: /**
  466:  * xmlParse3986Authority:
  467:  * @uri:  pointer to an URI structure
  468:  * @str:  the string to analyze
  469:  *
  470:  * Parse an authority part and fills in the appropriate fields
  471:  * of the @uri structure
  472:  *
  473:  * authority     = [ userinfo "@" ] host [ ":" port ]
  474:  *
  475:  * Returns 0 or the error code
  476:  */
  477: static int
  478: xmlParse3986Authority(xmlURIPtr uri, const char **str)
  479: {
  480:     const char *cur;
  481:     int ret;
  482: 
  483:     cur = *str;
  484:     /*
  485:      * try to parse an userinfo and check for the trailing @
  486:      */
  487:     ret = xmlParse3986Userinfo(uri, &cur);
  488:     if ((ret != 0) || (*cur != '@'))
  489:         cur = *str;
  490:     else
  491:         cur++;
  492:     ret = xmlParse3986Host(uri, &cur);
  493:     if (ret != 0) return(ret);
  494:     if (*cur == ':') {
  495:         cur++;
  496:         ret = xmlParse3986Port(uri, &cur);
  497: 	if (ret != 0) return(ret);
  498:     }
  499:     *str = cur;
  500:     return(0);
  501: }
  502: 
  503: /**
  504:  * xmlParse3986Segment:
  505:  * @str:  the string to analyze
  506:  * @forbid: an optional forbidden character
  507:  * @empty: allow an empty segment
  508:  *
  509:  * Parse a segment and fills in the appropriate fields
  510:  * of the @uri structure
  511:  *
  512:  * segment       = *pchar
  513:  * segment-nz    = 1*pchar
  514:  * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
  515:  *               ; non-zero-length segment without any colon ":"
  516:  *
  517:  * Returns 0 or the error code
  518:  */
  519: static int
  520: xmlParse3986Segment(const char **str, char forbid, int empty)
  521: {
  522:     const char *cur;
  523: 
  524:     cur = *str;
  525:     if (!ISA_PCHAR(cur)) {
  526:         if (empty)
  527: 	    return(0);
  528: 	return(1);
  529:     }
  530:     while (ISA_PCHAR(cur) && (*cur != forbid))
  531:         NEXT(cur);
  532:     *str = cur;
  533:     return (0);
  534: }
  535: 
  536: /**
  537:  * xmlParse3986PathAbEmpty:
  538:  * @uri:  pointer to an URI structure
  539:  * @str:  the string to analyze
  540:  *
  541:  * Parse an path absolute or empty and fills in the appropriate fields
  542:  * of the @uri structure
  543:  *
  544:  * path-abempty  = *( "/" segment )
  545:  *
  546:  * Returns 0 or the error code
  547:  */
  548: static int
  549: xmlParse3986PathAbEmpty(xmlURIPtr uri, const char **str)
  550: {
  551:     const char *cur;
  552:     int ret;
  553: 
  554:     cur = *str;
  555: 
  556:     while (*cur == '/') {
  557:         cur++;
  558: 	ret = xmlParse3986Segment(&cur, 0, 1);
  559: 	if (ret != 0) return(ret);
  560:     }
  561:     if (uri != NULL) {
  562: 	if (uri->path != NULL) xmlFree(uri->path);
  563:         if (*str != cur) {
  564:             if (uri->cleanup & 2)
  565:                 uri->path = STRNDUP(*str, cur - *str);
  566:             else
  567:                 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
  568:         } else {
  569:             uri->path = NULL;
  570:         }
  571:     }
  572:     *str = cur;
  573:     return (0);
  574: }
  575: 
  576: /**
  577:  * xmlParse3986PathAbsolute:
  578:  * @uri:  pointer to an URI structure
  579:  * @str:  the string to analyze
  580:  *
  581:  * Parse an path absolute and fills in the appropriate fields
  582:  * of the @uri structure
  583:  *
  584:  * path-absolute = "/" [ segment-nz *( "/" segment ) ]
  585:  *
  586:  * Returns 0 or the error code
  587:  */
  588: static int
  589: xmlParse3986PathAbsolute(xmlURIPtr uri, const char **str)
  590: {
  591:     const char *cur;
  592:     int ret;
  593: 
  594:     cur = *str;
  595: 
  596:     if (*cur != '/')
  597:         return(1);
  598:     cur++;
  599:     ret = xmlParse3986Segment(&cur, 0, 0);
  600:     if (ret == 0) {
  601: 	while (*cur == '/') {
  602: 	    cur++;
  603: 	    ret = xmlParse3986Segment(&cur, 0, 1);
  604: 	    if (ret != 0) return(ret);
  605: 	}
  606:     }
  607:     if (uri != NULL) {
  608: 	if (uri->path != NULL) xmlFree(uri->path);
  609:         if (cur != *str) {
  610:             if (uri->cleanup & 2)
  611:                 uri->path = STRNDUP(*str, cur - *str);
  612:             else
  613:                 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
  614:         } else {
  615:             uri->path = NULL;
  616:         }
  617:     }
  618:     *str = cur;
  619:     return (0);
  620: }
  621: 
  622: /**
  623:  * xmlParse3986PathRootless:
  624:  * @uri:  pointer to an URI structure
  625:  * @str:  the string to analyze
  626:  *
  627:  * Parse an path without root and fills in the appropriate fields
  628:  * of the @uri structure
  629:  *
  630:  * path-rootless = segment-nz *( "/" segment )
  631:  *
  632:  * Returns 0 or the error code
  633:  */
  634: static int
  635: xmlParse3986PathRootless(xmlURIPtr uri, const char **str)
  636: {
  637:     const char *cur;
  638:     int ret;
  639: 
  640:     cur = *str;
  641: 
  642:     ret = xmlParse3986Segment(&cur, 0, 0);
  643:     if (ret != 0) return(ret);
  644:     while (*cur == '/') {
  645:         cur++;
  646: 	ret = xmlParse3986Segment(&cur, 0, 1);
  647: 	if (ret != 0) return(ret);
  648:     }
  649:     if (uri != NULL) {
  650: 	if (uri->path != NULL) xmlFree(uri->path);
  651:         if (cur != *str) {
  652:             if (uri->cleanup & 2)
  653:                 uri->path = STRNDUP(*str, cur - *str);
  654:             else
  655:                 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
  656:         } else {
  657:             uri->path = NULL;
  658:         }
  659:     }
  660:     *str = cur;
  661:     return (0);
  662: }
  663: 
  664: /**
  665:  * xmlParse3986PathNoScheme:
  666:  * @uri:  pointer to an URI structure
  667:  * @str:  the string to analyze
  668:  *
  669:  * Parse an path which is not a scheme and fills in the appropriate fields
  670:  * of the @uri structure
  671:  *
  672:  * path-noscheme = segment-nz-nc *( "/" segment )
  673:  *
  674:  * Returns 0 or the error code
  675:  */
  676: static int
  677: xmlParse3986PathNoScheme(xmlURIPtr uri, const char **str)
  678: {
  679:     const char *cur;
  680:     int ret;
  681: 
  682:     cur = *str;
  683: 
  684:     ret = xmlParse3986Segment(&cur, ':', 0);
  685:     if (ret != 0) return(ret);
  686:     while (*cur == '/') {
  687:         cur++;
  688: 	ret = xmlParse3986Segment(&cur, 0, 1);
  689: 	if (ret != 0) return(ret);
  690:     }
  691:     if (uri != NULL) {
  692: 	if (uri->path != NULL) xmlFree(uri->path);
  693:         if (cur != *str) {
  694:             if (uri->cleanup & 2)
  695:                 uri->path = STRNDUP(*str, cur - *str);
  696:             else
  697:                 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
  698:         } else {
  699:             uri->path = NULL;
  700:         }
  701:     }
  702:     *str = cur;
  703:     return (0);
  704: }
  705: 
  706: /**
  707:  * xmlParse3986HierPart:
  708:  * @uri:  pointer to an URI structure
  709:  * @str:  the string to analyze
  710:  *
  711:  * Parse an hierarchical part and fills in the appropriate fields
  712:  * of the @uri structure
  713:  *
  714:  * hier-part     = "//" authority path-abempty
  715:  *                / path-absolute
  716:  *                / path-rootless
  717:  *                / path-empty
  718:  *
  719:  * Returns 0 or the error code
  720:  */
  721: static int
  722: xmlParse3986HierPart(xmlURIPtr uri, const char **str)
  723: {
  724:     const char *cur;
  725:     int ret;
  726: 
  727:     cur = *str;
  728: 
  729:     if ((*cur == '/') && (*(cur + 1) == '/')) {
  730:         cur += 2;
  731: 	ret = xmlParse3986Authority(uri, &cur);
  732: 	if (ret != 0) return(ret);
  733: 	ret = xmlParse3986PathAbEmpty(uri, &cur);
  734: 	if (ret != 0) return(ret);
  735: 	*str = cur;
  736: 	return(0);
  737:     } else if (*cur == '/') {
  738:         ret = xmlParse3986PathAbsolute(uri, &cur);
  739: 	if (ret != 0) return(ret);
  740:     } else if (ISA_PCHAR(cur)) {
  741:         ret = xmlParse3986PathRootless(uri, &cur);
  742: 	if (ret != 0) return(ret);
  743:     } else {
  744: 	/* path-empty is effectively empty */
  745: 	if (uri != NULL) {
  746: 	    if (uri->path != NULL) xmlFree(uri->path);
  747: 	    uri->path = NULL;
  748: 	}
  749:     }
  750:     *str = cur;
  751:     return (0);
  752: }
  753: 
  754: /**
  755:  * xmlParse3986RelativeRef:
  756:  * @uri:  pointer to an URI structure
  757:  * @str:  the string to analyze
  758:  *
  759:  * Parse an URI string and fills in the appropriate fields
  760:  * of the @uri structure
  761:  *
  762:  * relative-ref  = relative-part [ "?" query ] [ "#" fragment ]
  763:  * relative-part = "//" authority path-abempty
  764:  *               / path-absolute
  765:  *               / path-noscheme
  766:  *               / path-empty
  767:  *
  768:  * Returns 0 or the error code
  769:  */
  770: static int
  771: xmlParse3986RelativeRef(xmlURIPtr uri, const char *str) {
  772:     int ret;
  773: 
  774:     if ((*str == '/') && (*(str + 1) == '/')) {
  775:         str += 2;
  776: 	ret = xmlParse3986Authority(uri, &str);
  777: 	if (ret != 0) return(ret);
  778: 	ret = xmlParse3986PathAbEmpty(uri, &str);
  779: 	if (ret != 0) return(ret);
  780:     } else if (*str == '/') {
  781: 	ret = xmlParse3986PathAbsolute(uri, &str);
  782: 	if (ret != 0) return(ret);
  783:     } else if (ISA_PCHAR(str)) {
  784:         ret = xmlParse3986PathNoScheme(uri, &str);
  785: 	if (ret != 0) return(ret);
  786:     } else {
  787: 	/* path-empty is effectively empty */
  788: 	if (uri != NULL) {
  789: 	    if (uri->path != NULL) xmlFree(uri->path);
  790: 	    uri->path = NULL;
  791: 	}
  792:     }
  793: 
  794:     if (*str == '?') {
  795: 	str++;
  796: 	ret = xmlParse3986Query(uri, &str);
  797: 	if (ret != 0) return(ret);
  798:     }
  799:     if (*str == '#') {
  800: 	str++;
  801: 	ret = xmlParse3986Fragment(uri, &str);
  802: 	if (ret != 0) return(ret);
  803:     }
  804:     if (*str != 0) {
  805: 	xmlCleanURI(uri);
  806: 	return(1);
  807:     }
  808:     return(0);
  809: }
  810: 
  811: 
  812: /**
  813:  * xmlParse3986URI:
  814:  * @uri:  pointer to an URI structure
  815:  * @str:  the string to analyze
  816:  *
  817:  * Parse an URI string and fills in the appropriate fields
  818:  * of the @uri structure
  819:  *
  820:  * scheme ":" hier-part [ "?" query ] [ "#" fragment ]
  821:  *
  822:  * Returns 0 or the error code
  823:  */
  824: static int
  825: xmlParse3986URI(xmlURIPtr uri, const char *str) {
  826:     int ret;
  827: 
  828:     ret = xmlParse3986Scheme(uri, &str);
  829:     if (ret != 0) return(ret);
  830:     if (*str != ':') {
  831: 	return(1);
  832:     }
  833:     str++;
  834:     ret = xmlParse3986HierPart(uri, &str);
  835:     if (ret != 0) return(ret);
  836:     if (*str == '?') {
  837: 	str++;
  838: 	ret = xmlParse3986Query(uri, &str);
  839: 	if (ret != 0) return(ret);
  840:     }
  841:     if (*str == '#') {
  842: 	str++;
  843: 	ret = xmlParse3986Fragment(uri, &str);
  844: 	if (ret != 0) return(ret);
  845:     }
  846:     if (*str != 0) {
  847: 	xmlCleanURI(uri);
  848: 	return(1);
  849:     }
  850:     return(0);
  851: }
  852: 
  853: /**
  854:  * xmlParse3986URIReference:
  855:  * @uri:  pointer to an URI structure
  856:  * @str:  the string to analyze
  857:  *
  858:  * Parse an URI reference string and fills in the appropriate fields
  859:  * of the @uri structure
  860:  *
  861:  * URI-reference = URI / relative-ref
  862:  *
  863:  * Returns 0 or the error code
  864:  */
  865: static int
  866: xmlParse3986URIReference(xmlURIPtr uri, const char *str) {
  867:     int ret;
  868: 
  869:     if (str == NULL)
  870: 	return(-1);
  871:     xmlCleanURI(uri);
  872: 
  873:     /*
  874:      * Try first to parse absolute refs, then fallback to relative if
  875:      * it fails.
  876:      */
  877:     ret = xmlParse3986URI(uri, str);
  878:     if (ret != 0) {
  879: 	xmlCleanURI(uri);
  880:         ret = xmlParse3986RelativeRef(uri, str);
  881: 	if (ret != 0) {
  882: 	    xmlCleanURI(uri);
  883: 	    return(ret);
  884: 	}
  885:     }
  886:     return(0);
  887: }
  888: 
  889: /**
  890:  * xmlParseURI:
  891:  * @str:  the URI string to analyze
  892:  *
  893:  * Parse an URI based on RFC 3986
  894:  *
  895:  * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
  896:  *
  897:  * Returns a newly built xmlURIPtr or NULL in case of error
  898:  */
  899: xmlURIPtr
  900: xmlParseURI(const char *str) {
  901:     xmlURIPtr uri;
  902:     int ret;
  903: 
  904:     if (str == NULL)
  905: 	return(NULL);
  906:     uri = xmlCreateURI();
  907:     if (uri != NULL) {
  908: 	ret = xmlParse3986URIReference(uri, str);
  909:         if (ret) {
  910: 	    xmlFreeURI(uri);
  911: 	    return(NULL);
  912: 	}
  913:     }
  914:     return(uri);
  915: }
  916: 
  917: /**
  918:  * xmlParseURIReference:
  919:  * @uri:  pointer to an URI structure
  920:  * @str:  the string to analyze
  921:  *
  922:  * Parse an URI reference string based on RFC 3986 and fills in the
  923:  * appropriate fields of the @uri structure
  924:  *
  925:  * URI-reference = URI / relative-ref
  926:  *
  927:  * Returns 0 or the error code
  928:  */
  929: int
  930: xmlParseURIReference(xmlURIPtr uri, const char *str) {
  931:     return(xmlParse3986URIReference(uri, str));
  932: }
  933: 
  934: /**
  935:  * xmlParseURIRaw:
  936:  * @str:  the URI string to analyze
  937:  * @raw:  if 1 unescaping of URI pieces are disabled
  938:  *
  939:  * Parse an URI but allows to keep intact the original fragments.
  940:  *
  941:  * URI-reference = URI / relative-ref
  942:  *
  943:  * Returns a newly built xmlURIPtr or NULL in case of error
  944:  */
  945: xmlURIPtr
  946: xmlParseURIRaw(const char *str, int raw) {
  947:     xmlURIPtr uri;
  948:     int ret;
  949: 
  950:     if (str == NULL)
  951: 	return(NULL);
  952:     uri = xmlCreateURI();
  953:     if (uri != NULL) {
  954:         if (raw) {
  955: 	    uri->cleanup |= 2;
  956: 	}
  957: 	ret = xmlParseURIReference(uri, str);
  958:         if (ret) {
  959: 	    xmlFreeURI(uri);
  960: 	    return(NULL);
  961: 	}
  962:     }
  963:     return(uri);
  964: }
  965: 
  966: /************************************************************************
  967:  *									*
  968:  *			Generic URI structure functions			*
  969:  *									*
  970:  ************************************************************************/
  971: 
  972: /**
  973:  * xmlCreateURI:
  974:  *
  975:  * Simply creates an empty xmlURI
  976:  *
  977:  * Returns the new structure or NULL in case of error
  978:  */
  979: xmlURIPtr
  980: xmlCreateURI(void) {
  981:     xmlURIPtr ret;
  982: 
  983:     ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
  984:     if (ret == NULL) {
  985: 	xmlGenericError(xmlGenericErrorContext,
  986: 		"xmlCreateURI: out of memory\n");
  987: 	return(NULL);
  988:     }
  989:     memset(ret, 0, sizeof(xmlURI));
  990:     return(ret);
  991: }
  992: 
  993: /**
  994:  * xmlSaveUri:
  995:  * @uri:  pointer to an xmlURI
  996:  *
  997:  * Save the URI as an escaped string
  998:  *
  999:  * Returns a new string (to be deallocated by caller)
 1000:  */
 1001: xmlChar *
 1002: xmlSaveUri(xmlURIPtr uri) {
 1003:     xmlChar *ret = NULL;
 1004:     xmlChar *temp;
 1005:     const char *p;
 1006:     int len;
 1007:     int max;
 1008: 
 1009:     if (uri == NULL) return(NULL);
 1010: 
 1011: 
 1012:     max = 80;
 1013:     ret = (xmlChar *) xmlMallocAtomic((max + 1) * sizeof(xmlChar));
 1014:     if (ret == NULL) {
 1015: 	xmlGenericError(xmlGenericErrorContext,
 1016: 		"xmlSaveUri: out of memory\n");
 1017: 	return(NULL);
 1018:     }
 1019:     len = 0;
 1020: 
 1021:     if (uri->scheme != NULL) {
 1022: 	p = uri->scheme;
 1023: 	while (*p != 0) {
 1024: 	    if (len >= max) {
 1025: 		max *= 2;
 1026: 		temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
 1027: 		if (temp == NULL) {
 1028: 		    xmlGenericError(xmlGenericErrorContext,
 1029: 			    "xmlSaveUri: out of memory\n");
 1030: 		    xmlFree(ret);
 1031: 		    return(NULL);
 1032: 		}
 1033: 		ret = temp;
 1034: 	    }
 1035: 	    ret[len++] = *p++;
 1036: 	}
 1037: 	if (len >= max) {
 1038: 	    max *= 2;
 1039: 	    temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
 1040: 	    if (temp == NULL) {
 1041: 		xmlGenericError(xmlGenericErrorContext,
 1042: 			"xmlSaveUri: out of memory\n");
 1043: 		xmlFree(ret);
 1044: 		return(NULL);
 1045: 	    }
 1046: 	    ret = temp;
 1047: 	}
 1048: 	ret[len++] = ':';
 1049:     }
 1050:     if (uri->opaque != NULL) {
 1051: 	p = uri->opaque;
 1052: 	while (*p != 0) {
 1053: 	    if (len + 3 >= max) {
 1054: 		max *= 2;
 1055: 		temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
 1056: 		if (temp == NULL) {
 1057: 		    xmlGenericError(xmlGenericErrorContext,
 1058: 			    "xmlSaveUri: out of memory\n");
 1059: 		    xmlFree(ret);
 1060: 		    return(NULL);
 1061: 		}
 1062: 		ret = temp;
 1063: 	    }
 1064: 	    if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p)))
 1065: 		ret[len++] = *p++;
 1066: 	    else {
 1067: 		int val = *(unsigned char *)p++;
 1068: 		int hi = val / 0x10, lo = val % 0x10;
 1069: 		ret[len++] = '%';
 1070: 		ret[len++] = hi + (hi > 9? 'A'-10 : '0');
 1071: 		ret[len++] = lo + (lo > 9? 'A'-10 : '0');
 1072: 	    }
 1073: 	}
 1074:     } else {
 1075: 	if (uri->server != NULL) {
 1076: 	    if (len + 3 >= max) {
 1077: 		max *= 2;
 1078: 		temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
 1079: 		if (temp == NULL) {
 1080: 		    xmlGenericError(xmlGenericErrorContext,
 1081: 			    "xmlSaveUri: out of memory\n");
 1082:                   xmlFree(ret);  
 1083: 		    return(NULL);
 1084: 		}
 1085: 		ret = temp;
 1086: 	    }
 1087: 	    ret[len++] = '/';
 1088: 	    ret[len++] = '/';
 1089: 	    if (uri->user != NULL) {
 1090: 		p = uri->user;
 1091: 		while (*p != 0) {
 1092: 		    if (len + 3 >= max) {
 1093: 			max *= 2;
 1094: 			temp = (xmlChar *) xmlRealloc(ret,
 1095: 				(max + 1) * sizeof(xmlChar));
 1096: 			if (temp == NULL) {
 1097: 			    xmlGenericError(xmlGenericErrorContext,
 1098: 				    "xmlSaveUri: out of memory\n");
 1099: 			    xmlFree(ret);
 1100: 			    return(NULL);
 1101: 			}
 1102: 			ret = temp;
 1103: 		    }
 1104: 		    if ((IS_UNRESERVED(*(p))) ||
 1105: 			((*(p) == ';')) || ((*(p) == ':')) ||
 1106: 			((*(p) == '&')) || ((*(p) == '=')) ||
 1107: 			((*(p) == '+')) || ((*(p) == '$')) ||
 1108: 			((*(p) == ',')))
 1109: 			ret[len++] = *p++;
 1110: 		    else {
 1111: 			int val = *(unsigned char *)p++;
 1112: 			int hi = val / 0x10, lo = val % 0x10;
 1113: 			ret[len++] = '%';
 1114: 			ret[len++] = hi + (hi > 9? 'A'-10 : '0');
 1115: 			ret[len++] = lo + (lo > 9? 'A'-10 : '0');
 1116: 		    }
 1117: 		}
 1118: 		if (len + 3 >= max) {
 1119: 		    max *= 2;
 1120: 		    temp = (xmlChar *) xmlRealloc(ret,
 1121: 			    (max + 1) * sizeof(xmlChar));
 1122: 		    if (temp == NULL) {
 1123: 			xmlGenericError(xmlGenericErrorContext,
 1124: 				"xmlSaveUri: out of memory\n");
 1125: 			xmlFree(ret);
 1126: 			return(NULL);
 1127: 		    }
 1128: 		    ret = temp;
 1129: 		}
 1130: 		ret[len++] = '@';
 1131: 	    }
 1132: 	    p = uri->server;
 1133: 	    while (*p != 0) {
 1134: 		if (len >= max) {
 1135: 		    max *= 2;
 1136: 		    temp = (xmlChar *) xmlRealloc(ret,
 1137: 			    (max + 1) * sizeof(xmlChar));
 1138: 		    if (temp == NULL) {
 1139: 			xmlGenericError(xmlGenericErrorContext,
 1140: 				"xmlSaveUri: out of memory\n");
 1141: 			xmlFree(ret);
 1142: 			return(NULL);
 1143: 		    }
 1144: 		    ret = temp;
 1145: 		}
 1146: 		ret[len++] = *p++;
 1147: 	    }
 1148: 	    if (uri->port > 0) {
 1149: 		if (len + 10 >= max) {
 1150: 		    max *= 2;
 1151: 		    temp = (xmlChar *) xmlRealloc(ret,
 1152: 			    (max + 1) * sizeof(xmlChar));
 1153: 		    if (temp == NULL) {
 1154: 			xmlGenericError(xmlGenericErrorContext,
 1155: 				"xmlSaveUri: out of memory\n");
 1156:                      xmlFree(ret);
 1157: 			return(NULL);
 1158: 		    }
 1159: 		    ret = temp;
 1160: 		}
 1161: 		len += snprintf((char *) &ret[len], max - len, ":%d", uri->port);
 1162: 	    }
 1163: 	} else if (uri->authority != NULL) {
 1164: 	    if (len + 3 >= max) {
 1165: 		max *= 2;
 1166: 		temp = (xmlChar *) xmlRealloc(ret,
 1167: 			(max + 1) * sizeof(xmlChar));
 1168: 		if (temp == NULL) {
 1169: 			xmlGenericError(xmlGenericErrorContext,
 1170: 				"xmlSaveUri: out of memory\n");
 1171:                      xmlFree(ret);
 1172: 			return(NULL);
 1173: 		    }
 1174: 		    ret = temp;
 1175: 	    }
 1176: 	    ret[len++] = '/';
 1177: 	    ret[len++] = '/';
 1178: 	    p = uri->authority;
 1179: 	    while (*p != 0) {
 1180: 		if (len + 3 >= max) {
 1181: 		    max *= 2;
 1182: 		    temp = (xmlChar *) xmlRealloc(ret,
 1183: 			    (max + 1) * sizeof(xmlChar));
 1184: 		    if (temp == NULL) {
 1185: 			xmlGenericError(xmlGenericErrorContext,
 1186: 				"xmlSaveUri: out of memory\n");
 1187:                      xmlFree(ret);
 1188: 			return(NULL);
 1189: 		    }
 1190: 		    ret = temp;
 1191: 		}
 1192: 		if ((IS_UNRESERVED(*(p))) ||
 1193:                     ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
 1194:                     ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
 1195:                     ((*(p) == '=')) || ((*(p) == '+')))
 1196: 		    ret[len++] = *p++;
 1197: 		else {
 1198: 		    int val = *(unsigned char *)p++;
 1199: 		    int hi = val / 0x10, lo = val % 0x10;
 1200: 		    ret[len++] = '%';
 1201: 		    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
 1202: 		    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
 1203: 		}
 1204: 	    }
 1205: 	} else if (uri->scheme != NULL) {
 1206: 	    if (len + 3 >= max) {
 1207: 		max *= 2;
 1208: 		temp = (xmlChar *) xmlRealloc(ret,
 1209: 			(max + 1) * sizeof(xmlChar));
 1210: 		if (temp == NULL) {
 1211: 			xmlGenericError(xmlGenericErrorContext,
 1212: 				"xmlSaveUri: out of memory\n");
 1213:                      xmlFree(ret);
 1214: 			return(NULL);
 1215: 		    }
 1216: 		    ret = temp;
 1217: 	    }
 1218: 	    ret[len++] = '/';
 1219: 	    ret[len++] = '/';
 1220: 	}
 1221: 	if (uri->path != NULL) {
 1222: 	    p = uri->path;
 1223: 	    /*
 1224: 	     * the colon in file:///d: should not be escaped or
 1225: 	     * Windows accesses fail later.
 1226: 	     */
 1227: 	    if ((uri->scheme != NULL) &&
 1228: 		(p[0] == '/') &&
 1229: 		(((p[1] >= 'a') && (p[1] <= 'z')) ||
 1230: 		 ((p[1] >= 'A') && (p[1] <= 'Z'))) &&
 1231: 		(p[2] == ':') &&
 1232: 	        (xmlStrEqual(BAD_CAST uri->scheme, BAD_CAST "file"))) {
 1233: 		if (len + 3 >= max) {
 1234: 		    max *= 2;
 1235: 		    ret = (xmlChar *) xmlRealloc(ret,
 1236: 			    (max + 1) * sizeof(xmlChar));
 1237: 		    if (ret == NULL) {
 1238: 			xmlGenericError(xmlGenericErrorContext,
 1239: 				"xmlSaveUri: out of memory\n");
 1240: 			return(NULL);
 1241: 		    }
 1242: 		}
 1243: 		ret[len++] = *p++;
 1244: 		ret[len++] = *p++;
 1245: 		ret[len++] = *p++;
 1246: 	    }
 1247: 	    while (*p != 0) {
 1248: 		if (len + 3 >= max) {
 1249: 		    max *= 2;
 1250: 		    temp = (xmlChar *) xmlRealloc(ret,
 1251: 			    (max + 1) * sizeof(xmlChar));
 1252: 		    if (temp == NULL) {
 1253: 			xmlGenericError(xmlGenericErrorContext,
 1254: 				"xmlSaveUri: out of memory\n");
 1255:                      xmlFree(ret);
 1256: 			return(NULL);
 1257: 		    }
 1258: 		    ret = temp;
 1259: 		}
 1260: 		if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
 1261:                     ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
 1262: 	            ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
 1263: 	            ((*(p) == ',')))
 1264: 		    ret[len++] = *p++;
 1265: 		else {
 1266: 		    int val = *(unsigned char *)p++;
 1267: 		    int hi = val / 0x10, lo = val % 0x10;
 1268: 		    ret[len++] = '%';
 1269: 		    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
 1270: 		    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
 1271: 		}
 1272: 	    }
 1273: 	}
 1274: 	if (uri->query_raw != NULL) {
 1275: 	    if (len + 1 >= max) {
 1276: 		max *= 2;
 1277: 		temp = (xmlChar *) xmlRealloc(ret,
 1278: 			(max + 1) * sizeof(xmlChar));
 1279: 		if (temp == NULL) {
 1280: 			xmlGenericError(xmlGenericErrorContext,
 1281: 				"xmlSaveUri: out of memory\n");
 1282:                      xmlFree(ret);
 1283: 			return(NULL);
 1284: 		    }
 1285: 		    ret = temp;
 1286: 	    }
 1287: 	    ret[len++] = '?';
 1288: 	    p = uri->query_raw;
 1289: 	    while (*p != 0) {
 1290: 		if (len + 1 >= max) {
 1291: 		    max *= 2;
 1292: 		    temp = (xmlChar *) xmlRealloc(ret,
 1293: 			    (max + 1) * sizeof(xmlChar));
 1294: 		    if (temp == NULL) {
 1295: 			xmlGenericError(xmlGenericErrorContext,
 1296: 				"xmlSaveUri: out of memory\n");
 1297:                      xmlFree(ret);
 1298: 			return(NULL);
 1299: 		    }
 1300: 		    ret = temp;
 1301: 		}
 1302: 		ret[len++] = *p++;
 1303: 	    }
 1304: 	} else if (uri->query != NULL) {
 1305: 	    if (len + 3 >= max) {
 1306: 		max *= 2;
 1307: 		temp = (xmlChar *) xmlRealloc(ret,
 1308: 			(max + 1) * sizeof(xmlChar));
 1309: 		if (temp == NULL) {
 1310: 			xmlGenericError(xmlGenericErrorContext,
 1311: 				"xmlSaveUri: out of memory\n");
 1312:                      xmlFree(ret);
 1313: 			return(NULL);
 1314: 		    }
 1315: 		    ret = temp;
 1316: 	    }
 1317: 	    ret[len++] = '?';
 1318: 	    p = uri->query;
 1319: 	    while (*p != 0) {
 1320: 		if (len + 3 >= max) {
 1321: 		    max *= 2;
 1322: 		    temp = (xmlChar *) xmlRealloc(ret,
 1323: 			    (max + 1) * sizeof(xmlChar));
 1324: 		    if (temp == NULL) {
 1325: 			xmlGenericError(xmlGenericErrorContext,
 1326: 				"xmlSaveUri: out of memory\n");
 1327:                      xmlFree(ret);
 1328: 			return(NULL);
 1329: 		    }
 1330: 		    ret = temp;
 1331: 		}
 1332: 		if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p)))) 
 1333: 		    ret[len++] = *p++;
 1334: 		else {
 1335: 		    int val = *(unsigned char *)p++;
 1336: 		    int hi = val / 0x10, lo = val % 0x10;
 1337: 		    ret[len++] = '%';
 1338: 		    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
 1339: 		    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
 1340: 		}
 1341: 	    }
 1342: 	}
 1343:     }
 1344:     if (uri->fragment != NULL) {
 1345: 	if (len + 3 >= max) {
 1346: 	    max *= 2;
 1347: 	    temp = (xmlChar *) xmlRealloc(ret,
 1348: 		    (max + 1) * sizeof(xmlChar));
 1349: 	    if (temp == NULL) {
 1350: 			xmlGenericError(xmlGenericErrorContext,
 1351: 				"xmlSaveUri: out of memory\n");
 1352:                      xmlFree(ret);
 1353: 			return(NULL);
 1354: 		    }
 1355: 		    ret = temp;
 1356: 	}
 1357: 	ret[len++] = '#';
 1358: 	p = uri->fragment;
 1359: 	while (*p != 0) {
 1360: 	    if (len + 3 >= max) {
 1361: 		max *= 2;
 1362: 		temp = (xmlChar *) xmlRealloc(ret,
 1363: 			(max + 1) * sizeof(xmlChar));
 1364: 		if (temp == NULL) {
 1365: 			xmlGenericError(xmlGenericErrorContext,
 1366: 				"xmlSaveUri: out of memory\n");
 1367:                      xmlFree(ret);
 1368: 			return(NULL);
 1369: 		    }
 1370: 		    ret = temp;
 1371: 	    }
 1372: 	    if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p)))) 
 1373: 		ret[len++] = *p++;
 1374: 	    else {
 1375: 		int val = *(unsigned char *)p++;
 1376: 		int hi = val / 0x10, lo = val % 0x10;
 1377: 		ret[len++] = '%';
 1378: 		ret[len++] = hi + (hi > 9? 'A'-10 : '0');
 1379: 		ret[len++] = lo + (lo > 9? 'A'-10 : '0');
 1380: 	    }
 1381: 	}
 1382:     }
 1383:     if (len >= max) {
 1384: 	max *= 2;
 1385: 	temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
 1386: 	if (temp == NULL) {
 1387: 			xmlGenericError(xmlGenericErrorContext,
 1388: 				"xmlSaveUri: out of memory\n");
 1389:                      xmlFree(ret);
 1390: 			return(NULL);
 1391: 		    }
 1392: 		    ret = temp;
 1393:     }
 1394:     ret[len] = 0;
 1395:     return(ret);
 1396: }
 1397: 
 1398: /**
 1399:  * xmlPrintURI:
 1400:  * @stream:  a FILE* for the output
 1401:  * @uri:  pointer to an xmlURI
 1402:  *
 1403:  * Prints the URI in the stream @stream.
 1404:  */
 1405: void
 1406: xmlPrintURI(FILE *stream, xmlURIPtr uri) {
 1407:     xmlChar *out;
 1408: 
 1409:     out = xmlSaveUri(uri);
 1410:     if (out != NULL) {
 1411: 	fprintf(stream, "%s", (char *) out);
 1412: 	xmlFree(out);
 1413:     }
 1414: }
 1415: 
 1416: /**
 1417:  * xmlCleanURI:
 1418:  * @uri:  pointer to an xmlURI
 1419:  *
 1420:  * Make sure the xmlURI struct is free of content
 1421:  */
 1422: static void
 1423: xmlCleanURI(xmlURIPtr uri) {
 1424:     if (uri == NULL) return;
 1425: 
 1426:     if (uri->scheme != NULL) xmlFree(uri->scheme);
 1427:     uri->scheme = NULL;
 1428:     if (uri->server != NULL) xmlFree(uri->server);
 1429:     uri->server = NULL;
 1430:     if (uri->user != NULL) xmlFree(uri->user);
 1431:     uri->user = NULL;
 1432:     if (uri->path != NULL) xmlFree(uri->path);
 1433:     uri->path = NULL;
 1434:     if (uri->fragment != NULL) xmlFree(uri->fragment);
 1435:     uri->fragment = NULL;
 1436:     if (uri->opaque != NULL) xmlFree(uri->opaque);
 1437:     uri->opaque = NULL;
 1438:     if (uri->authority != NULL) xmlFree(uri->authority);
 1439:     uri->authority = NULL;
 1440:     if (uri->query != NULL) xmlFree(uri->query);
 1441:     uri->query = NULL;
 1442:     if (uri->query_raw != NULL) xmlFree(uri->query_raw);
 1443:     uri->query_raw = NULL;
 1444: }
 1445: 
 1446: /**
 1447:  * xmlFreeURI:
 1448:  * @uri:  pointer to an xmlURI
 1449:  *
 1450:  * Free up the xmlURI struct
 1451:  */
 1452: void
 1453: xmlFreeURI(xmlURIPtr uri) {
 1454:     if (uri == NULL) return;
 1455: 
 1456:     if (uri->scheme != NULL) xmlFree(uri->scheme);
 1457:     if (uri->server != NULL) xmlFree(uri->server);
 1458:     if (uri->user != NULL) xmlFree(uri->user);
 1459:     if (uri->path != NULL) xmlFree(uri->path);
 1460:     if (uri->fragment != NULL) xmlFree(uri->fragment);
 1461:     if (uri->opaque != NULL) xmlFree(uri->opaque);
 1462:     if (uri->authority != NULL) xmlFree(uri->authority);
 1463:     if (uri->query != NULL) xmlFree(uri->query);
 1464:     if (uri->query_raw != NULL) xmlFree(uri->query_raw);
 1465:     xmlFree(uri);
 1466: }
 1467: 
 1468: /************************************************************************
 1469:  *									*
 1470:  *			Helper functions				*
 1471:  *									*
 1472:  ************************************************************************/
 1473: 
 1474: /**
 1475:  * xmlNormalizeURIPath:
 1476:  * @path:  pointer to the path string
 1477:  *
 1478:  * Applies the 5 normalization steps to a path string--that is, RFC 2396
 1479:  * Section 5.2, steps 6.c through 6.g.
 1480:  *
 1481:  * Normalization occurs directly on the string, no new allocation is done
 1482:  *
 1483:  * Returns 0 or an error code
 1484:  */
 1485: int
 1486: xmlNormalizeURIPath(char *path) {
 1487:     char *cur, *out;
 1488: 
 1489:     if (path == NULL)
 1490: 	return(-1);
 1491: 
 1492:     /* Skip all initial "/" chars.  We want to get to the beginning of the
 1493:      * first non-empty segment.
 1494:      */
 1495:     cur = path;
 1496:     while (cur[0] == '/')
 1497:       ++cur;
 1498:     if (cur[0] == '\0')
 1499:       return(0);
 1500: 
 1501:     /* Keep everything we've seen so far.  */
 1502:     out = cur;
 1503: 
 1504:     /*
 1505:      * Analyze each segment in sequence for cases (c) and (d).
 1506:      */
 1507:     while (cur[0] != '\0') {
 1508: 	/*
 1509: 	 * c) All occurrences of "./", where "." is a complete path segment,
 1510: 	 *    are removed from the buffer string.
 1511: 	 */
 1512: 	if ((cur[0] == '.') && (cur[1] == '/')) {
 1513: 	    cur += 2;
 1514: 	    /* '//' normalization should be done at this point too */
 1515: 	    while (cur[0] == '/')
 1516: 		cur++;
 1517: 	    continue;
 1518: 	}
 1519: 
 1520: 	/*
 1521: 	 * d) If the buffer string ends with "." as a complete path segment,
 1522: 	 *    that "." is removed.
 1523: 	 */
 1524: 	if ((cur[0] == '.') && (cur[1] == '\0'))
 1525: 	    break;
 1526: 
 1527: 	/* Otherwise keep the segment.  */
 1528: 	while (cur[0] != '/') {
 1529:             if (cur[0] == '\0')
 1530:               goto done_cd;
 1531: 	    (out++)[0] = (cur++)[0];
 1532: 	}
 1533: 	/* nomalize // */
 1534: 	while ((cur[0] == '/') && (cur[1] == '/'))
 1535: 	    cur++;
 1536: 
 1537:         (out++)[0] = (cur++)[0];
 1538:     }
 1539:  done_cd:
 1540:     out[0] = '\0';
 1541: 
 1542:     /* Reset to the beginning of the first segment for the next sequence.  */
 1543:     cur = path;
 1544:     while (cur[0] == '/')
 1545:       ++cur;
 1546:     if (cur[0] == '\0')
 1547: 	return(0);
 1548: 
 1549:     /*
 1550:      * Analyze each segment in sequence for cases (e) and (f).
 1551:      *
 1552:      * e) All occurrences of "<segment>/../", where <segment> is a
 1553:      *    complete path segment not equal to "..", are removed from the
 1554:      *    buffer string.  Removal of these path segments is performed
 1555:      *    iteratively, removing the leftmost matching pattern on each
 1556:      *    iteration, until no matching pattern remains.
 1557:      *
 1558:      * f) If the buffer string ends with "<segment>/..", where <segment>
 1559:      *    is a complete path segment not equal to "..", that
 1560:      *    "<segment>/.." is removed.
 1561:      *
 1562:      * To satisfy the "iterative" clause in (e), we need to collapse the
 1563:      * string every time we find something that needs to be removed.  Thus,
 1564:      * we don't need to keep two pointers into the string: we only need a
 1565:      * "current position" pointer.
 1566:      */
 1567:     while (1) {
 1568:         char *segp, *tmp;
 1569: 
 1570:         /* At the beginning of each iteration of this loop, "cur" points to
 1571:          * the first character of the segment we want to examine.
 1572:          */
 1573: 
 1574:         /* Find the end of the current segment.  */
 1575:         segp = cur;
 1576:         while ((segp[0] != '/') && (segp[0] != '\0'))
 1577:           ++segp;
 1578: 
 1579:         /* If this is the last segment, we're done (we need at least two
 1580:          * segments to meet the criteria for the (e) and (f) cases).
 1581:          */
 1582:         if (segp[0] == '\0')
 1583:           break;
 1584: 
 1585:         /* If the first segment is "..", or if the next segment _isn't_ "..",
 1586:          * keep this segment and try the next one.
 1587:          */
 1588:         ++segp;
 1589:         if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
 1590:             || ((segp[0] != '.') || (segp[1] != '.')
 1591:                 || ((segp[2] != '/') && (segp[2] != '\0')))) {
 1592:           cur = segp;
 1593:           continue;
 1594:         }
 1595: 
 1596:         /* If we get here, remove this segment and the next one and back up
 1597:          * to the previous segment (if there is one), to implement the
 1598:          * "iteratively" clause.  It's pretty much impossible to back up
 1599:          * while maintaining two pointers into the buffer, so just compact
 1600:          * the whole buffer now.
 1601:          */
 1602: 
 1603:         /* If this is the end of the buffer, we're done.  */
 1604:         if (segp[2] == '\0') {
 1605:           cur[0] = '\0';
 1606:           break;
 1607:         }
 1608:         /* Valgrind complained, strcpy(cur, segp + 3); */
 1609:         /* string will overlap, do not use strcpy */
 1610:         tmp = cur;
 1611:         segp += 3;
 1612:         while ((*tmp++ = *segp++) != 0)
 1613:           ;
 1614: 
 1615:         /* If there are no previous segments, then keep going from here.  */
 1616:         segp = cur;
 1617:         while ((segp > path) && ((--segp)[0] == '/'))
 1618:           ;
 1619:         if (segp == path)
 1620:           continue;
 1621: 
 1622:         /* "segp" is pointing to the end of a previous segment; find it's
 1623:          * start.  We need to back up to the previous segment and start
 1624:          * over with that to handle things like "foo/bar/../..".  If we
 1625:          * don't do this, then on the first pass we'll remove the "bar/..",
 1626:          * but be pointing at the second ".." so we won't realize we can also
 1627:          * remove the "foo/..".
 1628:          */
 1629:         cur = segp;
 1630:         while ((cur > path) && (cur[-1] != '/'))
 1631:           --cur;
 1632:     }
 1633:     out[0] = '\0';
 1634: 
 1635:     /*
 1636:      * g) If the resulting buffer string still begins with one or more
 1637:      *    complete path segments of "..", then the reference is
 1638:      *    considered to be in error. Implementations may handle this
 1639:      *    error by retaining these components in the resolved path (i.e.,
 1640:      *    treating them as part of the final URI), by removing them from
 1641:      *    the resolved path (i.e., discarding relative levels above the
 1642:      *    root), or by avoiding traversal of the reference.
 1643:      *
 1644:      * We discard them from the final path.
 1645:      */
 1646:     if (path[0] == '/') {
 1647:       cur = path;
 1648:       while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.')
 1649:              && ((cur[3] == '/') || (cur[3] == '\0')))
 1650: 	cur += 3;
 1651: 
 1652:       if (cur != path) {
 1653: 	out = path;
 1654: 	while (cur[0] != '\0')
 1655:           (out++)[0] = (cur++)[0];
 1656: 	out[0] = 0;
 1657:       }
 1658:     }
 1659: 
 1660:     return(0);
 1661: }
 1662: 
 1663: static int is_hex(char c) {
 1664:     if (((c >= '0') && (c <= '9')) ||
 1665:         ((c >= 'a') && (c <= 'f')) ||
 1666:         ((c >= 'A') && (c <= 'F')))
 1667: 	return(1);
 1668:     return(0);
 1669: }
 1670: 
 1671: /**
 1672:  * xmlURIUnescapeString:
 1673:  * @str:  the string to unescape
 1674:  * @len:   the length in bytes to unescape (or <= 0 to indicate full string)
 1675:  * @target:  optional destination buffer
 1676:  *
 1677:  * Unescaping routine, but does not check that the string is an URI. The
 1678:  * output is a direct unsigned char translation of %XX values (no encoding)
 1679:  * Note that the length of the result can only be smaller or same size as
 1680:  * the input string.
 1681:  *
 1682:  * Returns a copy of the string, but unescaped, will return NULL only in case
 1683:  * of error
 1684:  */
 1685: char *
 1686: xmlURIUnescapeString(const char *str, int len, char *target) {
 1687:     char *ret, *out;
 1688:     const char *in;
 1689: 
 1690:     if (str == NULL)
 1691: 	return(NULL);
 1692:     if (len <= 0) len = strlen(str);
 1693:     if (len < 0) return(NULL);
 1694: 
 1695:     if (target == NULL) {
 1696: 	ret = (char *) xmlMallocAtomic(len + 1);
 1697: 	if (ret == NULL) {
 1698: 	    xmlGenericError(xmlGenericErrorContext,
 1699: 		    "xmlURIUnescapeString: out of memory\n");
 1700: 	    return(NULL);
 1701: 	}
 1702:     } else
 1703: 	ret = target;
 1704:     in = str;
 1705:     out = ret;
 1706:     while(len > 0) {
 1707: 	if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
 1708: 	    in++;
 1709: 	    if ((*in >= '0') && (*in <= '9')) 
 1710: 	        *out = (*in - '0');
 1711: 	    else if ((*in >= 'a') && (*in <= 'f'))
 1712: 	        *out = (*in - 'a') + 10;
 1713: 	    else if ((*in >= 'A') && (*in <= 'F'))
 1714: 	        *out = (*in - 'A') + 10;
 1715: 	    in++;
 1716: 	    if ((*in >= '0') && (*in <= '9')) 
 1717: 	        *out = *out * 16 + (*in - '0');
 1718: 	    else if ((*in >= 'a') && (*in <= 'f'))
 1719: 	        *out = *out * 16 + (*in - 'a') + 10;
 1720: 	    else if ((*in >= 'A') && (*in <= 'F'))
 1721: 	        *out = *out * 16 + (*in - 'A') + 10;
 1722: 	    in++;
 1723: 	    len -= 3;
 1724: 	    out++;
 1725: 	} else {
 1726: 	    *out++ = *in++;
 1727: 	    len--;
 1728: 	}
 1729:     }
 1730:     *out = 0;
 1731:     return(ret);
 1732: }
 1733: 
 1734: /**
 1735:  * xmlURIEscapeStr:
 1736:  * @str:  string to escape
 1737:  * @list: exception list string of chars not to escape
 1738:  *
 1739:  * This routine escapes a string to hex, ignoring reserved characters (a-z)
 1740:  * and the characters in the exception list.
 1741:  *
 1742:  * Returns a new escaped string or NULL in case of error.
 1743:  */
 1744: xmlChar *
 1745: xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
 1746:     xmlChar *ret, ch;
 1747:     xmlChar *temp;
 1748:     const xmlChar *in;
 1749: 
 1750:     unsigned int len, out;
 1751: 
 1752:     if (str == NULL)
 1753: 	return(NULL);
 1754:     if (str[0] == 0)
 1755: 	return(xmlStrdup(str));
 1756:     len = xmlStrlen(str);
 1757:     if (!(len > 0)) return(NULL);
 1758: 
 1759:     len += 20;
 1760:     ret = (xmlChar *) xmlMallocAtomic(len);
 1761:     if (ret == NULL) {
 1762: 	xmlGenericError(xmlGenericErrorContext,
 1763: 		"xmlURIEscapeStr: out of memory\n");
 1764: 	return(NULL);
 1765:     }
 1766:     in = (const xmlChar *) str;
 1767:     out = 0;
 1768:     while(*in != 0) {
 1769: 	if (len - out <= 3) {
 1770: 	    len += 20;
 1771: 	    temp = (xmlChar *) xmlRealloc(ret, len);
 1772: 	    if (temp == NULL) {
 1773: 		xmlGenericError(xmlGenericErrorContext,
 1774: 			"xmlURIEscapeStr: out of memory\n");
 1775: 		xmlFree(ret);
 1776: 		return(NULL);
 1777: 	    }
 1778: 	    ret = temp;
 1779: 	}
 1780: 
 1781: 	ch = *in;
 1782: 
 1783: 	if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
 1784: 	    unsigned char val;
 1785: 	    ret[out++] = '%';
 1786: 	    val = ch >> 4;
 1787: 	    if (val <= 9)
 1788: 		ret[out++] = '0' + val;
 1789: 	    else
 1790: 		ret[out++] = 'A' + val - 0xA;
 1791: 	    val = ch & 0xF;
 1792: 	    if (val <= 9)
 1793: 		ret[out++] = '0' + val;
 1794: 	    else
 1795: 		ret[out++] = 'A' + val - 0xA;
 1796: 	    in++;
 1797: 	} else {
 1798: 	    ret[out++] = *in++;
 1799: 	}
 1800: 
 1801:     }
 1802:     ret[out] = 0;
 1803:     return(ret);
 1804: }
 1805: 
 1806: /**
 1807:  * xmlURIEscape:
 1808:  * @str:  the string of the URI to escape
 1809:  *
 1810:  * Escaping routine, does not do validity checks !
 1811:  * It will try to escape the chars needing this, but this is heuristic
 1812:  * based it's impossible to be sure.
 1813:  *
 1814:  * Returns an copy of the string, but escaped
 1815:  *
 1816:  * 25 May 2001
 1817:  * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
 1818:  * according to RFC2396.
 1819:  *   - Carl Douglas
 1820:  */
 1821: xmlChar *
 1822: xmlURIEscape(const xmlChar * str)
 1823: {
 1824:     xmlChar *ret, *segment = NULL;
 1825:     xmlURIPtr uri;
 1826:     int ret2;
 1827: 
 1828: #define NULLCHK(p) if(!p) { \
 1829:                    xmlGenericError(xmlGenericErrorContext, \
 1830:                         "xmlURIEscape: out of memory\n"); \
 1831:                         xmlFreeURI(uri); \
 1832:                         return NULL; } \
 1833: 
 1834:     if (str == NULL)
 1835:         return (NULL);
 1836: 
 1837:     uri = xmlCreateURI();
 1838:     if (uri != NULL) {
 1839: 	/*
 1840: 	 * Allow escaping errors in the unescaped form
 1841: 	 */
 1842:         uri->cleanup = 1;
 1843:         ret2 = xmlParseURIReference(uri, (const char *)str);
 1844:         if (ret2) {
 1845:             xmlFreeURI(uri);
 1846:             return (NULL);
 1847:         }
 1848:     }
 1849: 
 1850:     if (!uri)
 1851:         return NULL;
 1852: 
 1853:     ret = NULL;
 1854: 
 1855:     if (uri->scheme) {
 1856:         segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");
 1857:         NULLCHK(segment)
 1858:         ret = xmlStrcat(ret, segment);
 1859:         ret = xmlStrcat(ret, BAD_CAST ":");
 1860:         xmlFree(segment);
 1861:     }
 1862: 
 1863:     if (uri->authority) {
 1864:         segment =
 1865:             xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");
 1866:         NULLCHK(segment)
 1867:         ret = xmlStrcat(ret, BAD_CAST "//");
 1868:         ret = xmlStrcat(ret, segment);
 1869:         xmlFree(segment);
 1870:     }
 1871: 
 1872:     if (uri->user) {
 1873:         segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");
 1874:         NULLCHK(segment)
 1875: 		ret = xmlStrcat(ret,BAD_CAST "//");	
 1876:         ret = xmlStrcat(ret, segment);
 1877:         ret = xmlStrcat(ret, BAD_CAST "@");
 1878:         xmlFree(segment);
 1879:     }
 1880: 
 1881:     if (uri->server) {
 1882:         segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");
 1883:         NULLCHK(segment)
 1884: 		if (uri->user == NULL)
 1885: 		ret = xmlStrcat(ret, BAD_CAST "//");
 1886:         ret = xmlStrcat(ret, segment);
 1887:         xmlFree(segment);
 1888:     }
 1889: 
 1890:     if (uri->port) {
 1891:         xmlChar port[10];
 1892: 
 1893:         snprintf((char *) port, 10, "%d", uri->port);
 1894:         ret = xmlStrcat(ret, BAD_CAST ":");
 1895:         ret = xmlStrcat(ret, port);
 1896:     }
 1897: 
 1898:     if (uri->path) {
 1899:         segment =
 1900:             xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
 1901:         NULLCHK(segment)
 1902:         ret = xmlStrcat(ret, segment);
 1903:         xmlFree(segment);
 1904:     }
 1905: 
 1906:     if (uri->query_raw) {
 1907:         ret = xmlStrcat(ret, BAD_CAST "?");
 1908:         ret = xmlStrcat(ret, BAD_CAST uri->query_raw);
 1909:     }
 1910:     else if (uri->query) {
 1911:         segment =
 1912:             xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
 1913:         NULLCHK(segment)
 1914:         ret = xmlStrcat(ret, BAD_CAST "?");
 1915:         ret = xmlStrcat(ret, segment);
 1916:         xmlFree(segment);
 1917:     }
 1918: 
 1919:     if (uri->opaque) {
 1920:         segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");
 1921:         NULLCHK(segment)
 1922:         ret = xmlStrcat(ret, segment);
 1923:         xmlFree(segment);
 1924:     }
 1925: 
 1926:     if (uri->fragment) {
 1927:         segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");
 1928:         NULLCHK(segment)
 1929:         ret = xmlStrcat(ret, BAD_CAST "#");
 1930:         ret = xmlStrcat(ret, segment);
 1931:         xmlFree(segment);
 1932:     }
 1933: 
 1934:     xmlFreeURI(uri);
 1935: #undef NULLCHK
 1936: 
 1937:     return (ret);
 1938: }
 1939: 
 1940: /************************************************************************
 1941:  *									*
 1942:  *			Public functions				*
 1943:  *									*
 1944:  ************************************************************************/
 1945: 
 1946: /**
 1947:  * xmlBuildURI:
 1948:  * @URI:  the URI instance found in the document
 1949:  * @base:  the base value
 1950:  *
 1951:  * Computes he final URI of the reference done by checking that
 1952:  * the given URI is valid, and building the final URI using the
 1953:  * base URI. This is processed according to section 5.2 of the 
 1954:  * RFC 2396
 1955:  *
 1956:  * 5.2. Resolving Relative References to Absolute Form
 1957:  *
 1958:  * Returns a new URI string (to be freed by the caller) or NULL in case
 1959:  *         of error.
 1960:  */
 1961: xmlChar *
 1962: xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
 1963:     xmlChar *val = NULL;
 1964:     int ret, len, indx, cur, out;
 1965:     xmlURIPtr ref = NULL;
 1966:     xmlURIPtr bas = NULL;
 1967:     xmlURIPtr res = NULL;
 1968: 
 1969:     /*
 1970:      * 1) The URI reference is parsed into the potential four components and
 1971:      *    fragment identifier, as described in Section 4.3.
 1972:      *
 1973:      *    NOTE that a completely empty URI is treated by modern browsers
 1974:      *    as a reference to "." rather than as a synonym for the current
 1975:      *    URI.  Should we do that here?
 1976:      */
 1977:     if (URI == NULL) 
 1978: 	ret = -1;
 1979:     else {
 1980: 	if (*URI) {
 1981: 	    ref = xmlCreateURI();
 1982: 	    if (ref == NULL)
 1983: 		goto done;
 1984: 	    ret = xmlParseURIReference(ref, (const char *) URI);
 1985: 	}
 1986: 	else
 1987: 	    ret = 0;
 1988:     }
 1989:     if (ret != 0)
 1990: 	goto done;
 1991:     if ((ref != NULL) && (ref->scheme != NULL)) {
 1992: 	/*
 1993: 	 * The URI is absolute don't modify.
 1994: 	 */
 1995: 	val = xmlStrdup(URI);
 1996: 	goto done;
 1997:     }
 1998:     if (base == NULL)
 1999: 	ret = -1;
 2000:     else {
 2001: 	bas = xmlCreateURI();
 2002: 	if (bas == NULL)
 2003: 	    goto done;
 2004: 	ret = xmlParseURIReference(bas, (const char *) base);
 2005:     }
 2006:     if (ret != 0) {
 2007: 	if (ref)
 2008: 	    val = xmlSaveUri(ref);
 2009: 	goto done;
 2010:     }
 2011:     if (ref == NULL) {
 2012: 	/*
 2013: 	 * the base fragment must be ignored
 2014: 	 */
 2015: 	if (bas->fragment != NULL) {
 2016: 	    xmlFree(bas->fragment);
 2017: 	    bas->fragment = NULL;
 2018: 	}
 2019: 	val = xmlSaveUri(bas);
 2020: 	goto done;
 2021:     }
 2022: 
 2023:     /*
 2024:      * 2) If the path component is empty and the scheme, authority, and
 2025:      *    query components are undefined, then it is a reference to the
 2026:      *    current document and we are done.  Otherwise, the reference URI's
 2027:      *    query and fragment components are defined as found (or not found)
 2028:      *    within the URI reference and not inherited from the base URI.
 2029:      *
 2030:      *    NOTE that in modern browsers, the parsing differs from the above
 2031:      *    in the following aspect:  the query component is allowed to be
 2032:      *    defined while still treating this as a reference to the current
 2033:      *    document.
 2034:      */
 2035:     res = xmlCreateURI();
 2036:     if (res == NULL)
 2037: 	goto done;
 2038:     if ((ref->scheme == NULL) && (ref->path == NULL) &&
 2039: 	((ref->authority == NULL) && (ref->server == NULL))) {
 2040: 	if (bas->scheme != NULL)
 2041: 	    res->scheme = xmlMemStrdup(bas->scheme);
 2042: 	if (bas->authority != NULL)
 2043: 	    res->authority = xmlMemStrdup(bas->authority);
 2044: 	else if (bas->server != NULL) {
 2045: 	    res->server = xmlMemStrdup(bas->server);
 2046: 	    if (bas->user != NULL)
 2047: 		res->user = xmlMemStrdup(bas->user);
 2048: 	    res->port = bas->port;		
 2049: 	}
 2050: 	if (bas->path != NULL)
 2051: 	    res->path = xmlMemStrdup(bas->path);
 2052: 	if (ref->query_raw != NULL)
 2053: 	    res->query_raw = xmlMemStrdup (ref->query_raw);
 2054: 	else if (ref->query != NULL)
 2055: 	    res->query = xmlMemStrdup(ref->query);
 2056: 	else if (bas->query_raw != NULL)
 2057: 	    res->query_raw = xmlMemStrdup(bas->query_raw);
 2058: 	else if (bas->query != NULL)
 2059: 	    res->query = xmlMemStrdup(bas->query);
 2060: 	if (ref->fragment != NULL)
 2061: 	    res->fragment = xmlMemStrdup(ref->fragment);
 2062: 	goto step_7;
 2063:     }
 2064: 
 2065:     /*
 2066:      * 3) If the scheme component is defined, indicating that the reference
 2067:      *    starts with a scheme name, then the reference is interpreted as an
 2068:      *    absolute URI and we are done.  Otherwise, the reference URI's
 2069:      *    scheme is inherited from the base URI's scheme component.
 2070:      */
 2071:     if (ref->scheme != NULL) {
 2072: 	val = xmlSaveUri(ref);
 2073: 	goto done;
 2074:     }
 2075:     if (bas->scheme != NULL)
 2076: 	res->scheme = xmlMemStrdup(bas->scheme);
 2077:  
 2078:     if (ref->query_raw != NULL)
 2079: 	res->query_raw = xmlMemStrdup(ref->query_raw);
 2080:     else if (ref->query != NULL)
 2081: 	res->query = xmlMemStrdup(ref->query);
 2082:     if (ref->fragment != NULL)
 2083: 	res->fragment = xmlMemStrdup(ref->fragment);
 2084: 
 2085:     /*
 2086:      * 4) If the authority component is defined, then the reference is a
 2087:      *    network-path and we skip to step 7.  Otherwise, the reference
 2088:      *    URI's authority is inherited from the base URI's authority
 2089:      *    component, which will also be undefined if the URI scheme does not
 2090:      *    use an authority component.
 2091:      */
 2092:     if ((ref->authority != NULL) || (ref->server != NULL)) {
 2093: 	if (ref->authority != NULL)
 2094: 	    res->authority = xmlMemStrdup(ref->authority);
 2095: 	else {
 2096: 	    res->server = xmlMemStrdup(ref->server);
 2097: 	    if (ref->user != NULL)
 2098: 		res->user = xmlMemStrdup(ref->user);
 2099:             res->port = ref->port;		
 2100: 	}
 2101: 	if (ref->path != NULL)
 2102: 	    res->path = xmlMemStrdup(ref->path);
 2103: 	goto step_7;
 2104:     }
 2105:     if (bas->authority != NULL)
 2106: 	res->authority = xmlMemStrdup(bas->authority);
 2107:     else if (bas->server != NULL) {
 2108: 	res->server = xmlMemStrdup(bas->server);
 2109: 	if (bas->user != NULL)
 2110: 	    res->user = xmlMemStrdup(bas->user);
 2111: 	res->port = bas->port;		
 2112:     }
 2113: 
 2114:     /*
 2115:      * 5) If the path component begins with a slash character ("/"), then
 2116:      *    the reference is an absolute-path and we skip to step 7.
 2117:      */
 2118:     if ((ref->path != NULL) && (ref->path[0] == '/')) {
 2119: 	res->path = xmlMemStrdup(ref->path);
 2120: 	goto step_7;
 2121:     }
 2122: 
 2123: 
 2124:     /*
 2125:      * 6) If this step is reached, then we are resolving a relative-path
 2126:      *    reference.  The relative path needs to be merged with the base
 2127:      *    URI's path.  Although there are many ways to do this, we will
 2128:      *    describe a simple method using a separate string buffer.
 2129:      *
 2130:      * Allocate a buffer large enough for the result string.
 2131:      */
 2132:     len = 2; /* extra / and 0 */
 2133:     if (ref->path != NULL)
 2134: 	len += strlen(ref->path);
 2135:     if (bas->path != NULL)
 2136: 	len += strlen(bas->path);
 2137:     res->path = (char *) xmlMallocAtomic(len);
 2138:     if (res->path == NULL) {
 2139: 	xmlGenericError(xmlGenericErrorContext,
 2140: 		"xmlBuildURI: out of memory\n");
 2141: 	goto done;
 2142:     }
 2143:     res->path[0] = 0;
 2144: 
 2145:     /*
 2146:      * a) All but the last segment of the base URI's path component is
 2147:      *    copied to the buffer.  In other words, any characters after the
 2148:      *    last (right-most) slash character, if any, are excluded.
 2149:      */
 2150:     cur = 0;
 2151:     out = 0;
 2152:     if (bas->path != NULL) {
 2153: 	while (bas->path[cur] != 0) {
 2154: 	    while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
 2155: 		cur++;
 2156: 	    if (bas->path[cur] == 0)
 2157: 		break;
 2158: 
 2159: 	    cur++;
 2160: 	    while (out < cur) {
 2161: 		res->path[out] = bas->path[out];
 2162: 		out++;
 2163: 	    }
 2164: 	}
 2165:     }
 2166:     res->path[out] = 0;
 2167: 
 2168:     /*
 2169:      * b) The reference's path component is appended to the buffer
 2170:      *    string.
 2171:      */
 2172:     if (ref->path != NULL && ref->path[0] != 0) {
 2173: 	indx = 0;
 2174: 	/*
 2175: 	 * Ensure the path includes a '/'
 2176: 	 */
 2177: 	if ((out == 0) && (bas->server != NULL))
 2178: 	    res->path[out++] = '/';
 2179: 	while (ref->path[indx] != 0) {
 2180: 	    res->path[out++] = ref->path[indx++];
 2181: 	}
 2182:     }
 2183:     res->path[out] = 0;
 2184: 
 2185:     /*
 2186:      * Steps c) to h) are really path normalization steps
 2187:      */
 2188:     xmlNormalizeURIPath(res->path);
 2189: 
 2190: step_7:
 2191: 
 2192:     /*
 2193:      * 7) The resulting URI components, including any inherited from the
 2194:      *    base URI, are recombined to give the absolute form of the URI
 2195:      *    reference.
 2196:      */
 2197:     val = xmlSaveUri(res);
 2198: 
 2199: done:
 2200:     if (ref != NULL)
 2201: 	xmlFreeURI(ref);
 2202:     if (bas != NULL)
 2203: 	xmlFreeURI(bas);
 2204:     if (res != NULL)
 2205: 	xmlFreeURI(res);
 2206:     return(val);
 2207: }
 2208: 
 2209: /**
 2210:  * xmlBuildRelativeURI:
 2211:  * @URI:  the URI reference under consideration
 2212:  * @base:  the base value
 2213:  *
 2214:  * Expresses the URI of the reference in terms relative to the
 2215:  * base.  Some examples of this operation include:
 2216:  *     base = "http://site1.com/docs/book1.html"
 2217:  *        URI input                        URI returned
 2218:  *     docs/pic1.gif                    pic1.gif
 2219:  *     docs/img/pic1.gif                img/pic1.gif
 2220:  *     img/pic1.gif                     ../img/pic1.gif
 2221:  *     http://site1.com/docs/pic1.gif   pic1.gif
 2222:  *     http://site2.com/docs/pic1.gif   http://site2.com/docs/pic1.gif
 2223:  *
 2224:  *     base = "docs/book1.html"
 2225:  *        URI input                        URI returned
 2226:  *     docs/pic1.gif                    pic1.gif
 2227:  *     docs/img/pic1.gif                img/pic1.gif
 2228:  *     img/pic1.gif                     ../img/pic1.gif
 2229:  *     http://site1.com/docs/pic1.gif   http://site1.com/docs/pic1.gif
 2230:  *
 2231:  *
 2232:  * Note: if the URI reference is really wierd or complicated, it may be
 2233:  *       worthwhile to first convert it into a "nice" one by calling
 2234:  *       xmlBuildURI (using 'base') before calling this routine,
 2235:  *       since this routine (for reasonable efficiency) assumes URI has
 2236:  *       already been through some validation.
 2237:  *
 2238:  * Returns a new URI string (to be freed by the caller) or NULL in case
 2239:  * error.
 2240:  */
 2241: xmlChar *
 2242: xmlBuildRelativeURI (const xmlChar * URI, const xmlChar * base)
 2243: {
 2244:     xmlChar *val = NULL;
 2245:     int ret;
 2246:     int ix;
 2247:     int pos = 0;
 2248:     int nbslash = 0;
 2249:     int len;
 2250:     xmlURIPtr ref = NULL;
 2251:     xmlURIPtr bas = NULL;
 2252:     xmlChar *bptr, *uptr, *vptr;
 2253:     int remove_path = 0;
 2254: 
 2255:     if ((URI == NULL) || (*URI == 0))
 2256: 	return NULL;
 2257: 
 2258:     /*
 2259:      * First parse URI into a standard form
 2260:      */
 2261:     ref = xmlCreateURI ();
 2262:     if (ref == NULL)
 2263: 	return NULL;
 2264:     /* If URI not already in "relative" form */
 2265:     if (URI[0] != '.') {
 2266: 	ret = xmlParseURIReference (ref, (const char *) URI);
 2267: 	if (ret != 0)
 2268: 	    goto done;		/* Error in URI, return NULL */
 2269:     } else
 2270: 	ref->path = (char *)xmlStrdup(URI);
 2271: 
 2272:     /*
 2273:      * Next parse base into the same standard form
 2274:      */
 2275:     if ((base == NULL) || (*base == 0)) {
 2276: 	val = xmlStrdup (URI);
 2277: 	goto done;
 2278:     }
 2279:     bas = xmlCreateURI ();
 2280:     if (bas == NULL)
 2281: 	goto done;
 2282:     if (base[0] != '.') {
 2283: 	ret = xmlParseURIReference (bas, (const char *) base);
 2284: 	if (ret != 0)
 2285: 	    goto done;		/* Error in base, return NULL */
 2286:     } else
 2287: 	bas->path = (char *)xmlStrdup(base);
 2288: 
 2289:     /*
 2290:      * If the scheme / server on the URI differs from the base,
 2291:      * just return the URI
 2292:      */
 2293:     if ((ref->scheme != NULL) &&
 2294: 	((bas->scheme == NULL) ||
 2295: 	 (xmlStrcmp ((xmlChar *)bas->scheme, (xmlChar *)ref->scheme)) ||
 2296: 	 (xmlStrcmp ((xmlChar *)bas->server, (xmlChar *)ref->server)))) {
 2297: 	val = xmlStrdup (URI);
 2298: 	goto done;
 2299:     }
 2300:     if (xmlStrEqual((xmlChar *)bas->path, (xmlChar *)ref->path)) {
 2301: 	val = xmlStrdup(BAD_CAST "");
 2302: 	goto done;
 2303:     }
 2304:     if (bas->path == NULL) {
 2305: 	val = xmlStrdup((xmlChar *)ref->path);
 2306: 	goto done;
 2307:     }
 2308:     if (ref->path == NULL) {
 2309:         ref->path = (char *) "/";
 2310: 	remove_path = 1;
 2311:     }
 2312: 
 2313:     /*
 2314:      * At this point (at last!) we can compare the two paths
 2315:      *
 2316:      * First we take care of the special case where either of the
 2317:      * two path components may be missing (bug 316224)
 2318:      */
 2319:     if (bas->path == NULL) {
 2320: 	if (ref->path != NULL) {
 2321: 	    uptr = (xmlChar *) ref->path;
 2322: 	    if (*uptr == '/')
 2323: 		uptr++;
 2324: 	    /* exception characters from xmlSaveUri */
 2325: 	    val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
 2326: 	}
 2327: 	goto done;
 2328:     }
 2329:     bptr = (xmlChar *)bas->path;
 2330:     if (ref->path == NULL) {
 2331: 	for (ix = 0; bptr[ix] != 0; ix++) {
 2332: 	    if (bptr[ix] == '/')
 2333: 		nbslash++;
 2334: 	}
 2335: 	uptr = NULL;
 2336: 	len = 1;	/* this is for a string terminator only */
 2337:     } else {
 2338:     /*
 2339:      * Next we compare the two strings and find where they first differ
 2340:      */
 2341: 	if ((ref->path[pos] == '.') && (ref->path[pos+1] == '/'))
 2342:             pos += 2;
 2343: 	if ((*bptr == '.') && (bptr[1] == '/'))
 2344:             bptr += 2;
 2345: 	else if ((*bptr == '/') && (ref->path[pos] != '/'))
 2346: 	    bptr++;
 2347: 	while ((bptr[pos] == ref->path[pos]) && (bptr[pos] != 0))
 2348: 	    pos++;
 2349: 
 2350: 	if (bptr[pos] == ref->path[pos]) {
 2351: 	    val = xmlStrdup(BAD_CAST "");
 2352: 	    goto done;		/* (I can't imagine why anyone would do this) */
 2353: 	}
 2354: 
 2355: 	/*
 2356: 	 * In URI, "back up" to the last '/' encountered.  This will be the
 2357: 	 * beginning of the "unique" suffix of URI
 2358: 	 */
 2359: 	ix = pos;
 2360: 	if ((ref->path[ix] == '/') && (ix > 0))
 2361: 	    ix--;
 2362: 	else if ((ref->path[ix] == 0) && (ix > 1) && (ref->path[ix - 1] == '/'))
 2363: 	    ix -= 2;
 2364: 	for (; ix > 0; ix--) {
 2365: 	    if (ref->path[ix] == '/')
 2366: 		break;
 2367: 	}
 2368: 	if (ix == 0) {
 2369: 	    uptr = (xmlChar *)ref->path;
 2370: 	} else {
 2371: 	    ix++;
 2372: 	    uptr = (xmlChar *)&ref->path[ix];
 2373: 	}
 2374: 
 2375: 	/*
 2376: 	 * In base, count the number of '/' from the differing point
 2377: 	 */
 2378: 	if (bptr[pos] != ref->path[pos]) {/* check for trivial URI == base */
 2379: 	    for (; bptr[ix] != 0; ix++) {
 2380: 		if (bptr[ix] == '/')
 2381: 		    nbslash++;
 2382: 	    }
 2383: 	}
 2384: 	len = xmlStrlen (uptr) + 1;
 2385:     }
 2386:     
 2387:     if (nbslash == 0) {
 2388: 	if (uptr != NULL)
 2389: 	    /* exception characters from xmlSaveUri */
 2390: 	    val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
 2391: 	goto done;
 2392:     }
 2393: 
 2394:     /*
 2395:      * Allocate just enough space for the returned string -
 2396:      * length of the remainder of the URI, plus enough space
 2397:      * for the "../" groups, plus one for the terminator
 2398:      */
 2399:     val = (xmlChar *) xmlMalloc (len + 3 * nbslash);
 2400:     if (val == NULL) {
 2401: 	xmlGenericError(xmlGenericErrorContext,
 2402: 		"xmlBuildRelativeURI: out of memory\n");
 2403: 	goto done;
 2404:     }
 2405:     vptr = val;
 2406:     /*
 2407:      * Put in as many "../" as needed
 2408:      */
 2409:     for (; nbslash>0; nbslash--) {
 2410: 	*vptr++ = '.';
 2411: 	*vptr++ = '.';
 2412: 	*vptr++ = '/';
 2413:     }
 2414:     /*
 2415:      * Finish up with the end of the URI
 2416:      */
 2417:     if (uptr != NULL) {
 2418:         if ((vptr > val) && (len > 0) &&
 2419: 	    (uptr[0] == '/') && (vptr[-1] == '/')) {
 2420: 	    memcpy (vptr, uptr + 1, len - 1);
 2421: 	    vptr[len - 2] = 0;
 2422: 	} else {
 2423: 	    memcpy (vptr, uptr, len);
 2424: 	    vptr[len - 1] = 0;
 2425: 	}
 2426:     } else {
 2427: 	vptr[len - 1] = 0;
 2428:     }
 2429: 
 2430:     /* escape the freshly-built path */
 2431:     vptr = val;
 2432: 	/* exception characters from xmlSaveUri */
 2433:     val = xmlURIEscapeStr(vptr, BAD_CAST "/;&=+$,");
 2434:     xmlFree(vptr);
 2435: 
 2436: done:
 2437:     /*
 2438:      * Free the working variables
 2439:      */
 2440:     if (remove_path != 0)
 2441:         ref->path = NULL;
 2442:     if (ref != NULL)
 2443: 	xmlFreeURI (ref);
 2444:     if (bas != NULL)
 2445: 	xmlFreeURI (bas);
 2446: 
 2447:     return val;
 2448: }
 2449: 
 2450: /**
 2451:  * xmlCanonicPath:
 2452:  * @path:  the resource locator in a filesystem notation
 2453:  *
 2454:  * Constructs a canonic path from the specified path. 
 2455:  *
 2456:  * Returns a new canonic path, or a duplicate of the path parameter if the 
 2457:  * construction fails. The caller is responsible for freeing the memory occupied
 2458:  * by the returned string. If there is insufficient memory available, or the 
 2459:  * argument is NULL, the function returns NULL.
 2460:  */
 2461: #define IS_WINDOWS_PATH(p) 					\
 2462: 	((p != NULL) &&						\
 2463: 	 (((p[0] >= 'a') && (p[0] <= 'z')) ||			\
 2464: 	  ((p[0] >= 'A') && (p[0] <= 'Z'))) &&			\
 2465: 	 (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\')))
 2466: xmlChar *
 2467: xmlCanonicPath(const xmlChar *path)
 2468: {
 2469: /*
 2470:  * For Windows implementations, additional work needs to be done to
 2471:  * replace backslashes in pathnames with "forward slashes"
 2472:  */
 2473: #if defined(_WIN32) && !defined(__CYGWIN__)    
 2474:     int len = 0;
 2475:     int i = 0;
 2476:     xmlChar *p = NULL;
 2477: #endif
 2478:     xmlURIPtr uri;
 2479:     xmlChar *ret;
 2480:     const xmlChar *absuri;
 2481: 
 2482:     if (path == NULL)
 2483: 	return(NULL);
 2484: 
 2485:     /* sanitize filename starting with // so it can be used as URI */
 2486:     if ((path[0] == '/') && (path[1] == '/') && (path[2] != '/'))
 2487:         path++;
 2488: 
 2489:     if ((uri = xmlParseURI((const char *) path)) != NULL) {
 2490: 	xmlFreeURI(uri);
 2491: 	return xmlStrdup(path);
 2492:     }
 2493: 
 2494:     /* Check if this is an "absolute uri" */
 2495:     absuri = xmlStrstr(path, BAD_CAST "://");
 2496:     if (absuri != NULL) {
 2497:         int l, j;
 2498: 	unsigned char c;
 2499: 	xmlChar *escURI;
 2500: 
 2501:         /*
 2502: 	 * this looks like an URI where some parts have not been
 2503: 	 * escaped leading to a parsing problem.  Check that the first
 2504: 	 * part matches a protocol.
 2505: 	 */
 2506: 	l = absuri - path;
 2507: 	/* Bypass if first part (part before the '://') is > 20 chars */
 2508: 	if ((l <= 0) || (l > 20))
 2509: 	    goto path_processing;
 2510: 	/* Bypass if any non-alpha characters are present in first part */
 2511: 	for (j = 0;j < l;j++) {
 2512: 	    c = path[j];
 2513: 	    if (!(((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'))))
 2514: 	        goto path_processing;
 2515: 	}
 2516: 
 2517: 	/* Escape all except the characters specified in the supplied path */
 2518:         escURI = xmlURIEscapeStr(path, BAD_CAST ":/?_.#&;=");
 2519: 	if (escURI != NULL) {
 2520: 	    /* Try parsing the escaped path */
 2521: 	    uri = xmlParseURI((const char *) escURI);
 2522: 	    /* If successful, return the escaped string */
 2523: 	    if (uri != NULL) {
 2524: 	        xmlFreeURI(uri);
 2525: 		return escURI;
 2526: 	    }
 2527: 	}
 2528:     }
 2529: 
 2530: path_processing:
 2531: /* For Windows implementations, replace backslashes with 'forward slashes' */
 2532: #if defined(_WIN32) && !defined(__CYGWIN__)    
 2533:     /*
 2534:      * Create a URI structure
 2535:      */
 2536:     uri = xmlCreateURI();
 2537:     if (uri == NULL) {		/* Guard against 'out of memory' */
 2538:         return(NULL);
 2539:     }
 2540: 
 2541:     len = xmlStrlen(path);
 2542:     if ((len > 2) && IS_WINDOWS_PATH(path)) {
 2543:         /* make the scheme 'file' */
 2544: 	uri->scheme = xmlStrdup(BAD_CAST "file");
 2545: 	/* allocate space for leading '/' + path + string terminator */
 2546: 	uri->path = xmlMallocAtomic(len + 2);
 2547: 	if (uri->path == NULL) {
 2548: 	    xmlFreeURI(uri);	/* Guard agains 'out of memory' */
 2549: 	    return(NULL);
 2550: 	}
 2551: 	/* Put in leading '/' plus path */
 2552: 	uri->path[0] = '/';
 2553: 	p = uri->path + 1;
 2554: 	strncpy(p, path, len + 1);
 2555:     } else {
 2556: 	uri->path = xmlStrdup(path);
 2557: 	if (uri->path == NULL) {
 2558: 	    xmlFreeURI(uri);
 2559: 	    return(NULL);
 2560: 	}
 2561: 	p = uri->path;
 2562:     }
 2563:     /* Now change all occurences of '\' to '/' */
 2564:     while (*p != '\0') {
 2565: 	if (*p == '\\')
 2566: 	    *p = '/';
 2567: 	p++;
 2568:     }
 2569: 
 2570:     if (uri->scheme == NULL) {
 2571: 	ret = xmlStrdup((const xmlChar *) uri->path);
 2572:     } else {
 2573: 	ret = xmlSaveUri(uri);
 2574:     }
 2575: 
 2576:     xmlFreeURI(uri);
 2577: #else
 2578:     ret = xmlStrdup((const xmlChar *) path);
 2579: #endif
 2580:     return(ret);
 2581: }
 2582: 
 2583: /**
 2584:  * xmlPathToURI:
 2585:  * @path:  the resource locator in a filesystem notation
 2586:  *
 2587:  * Constructs an URI expressing the existing path
 2588:  *
 2589:  * Returns a new URI, or a duplicate of the path parameter if the 
 2590:  * construction fails. The caller is responsible for freeing the memory
 2591:  * occupied by the returned string. If there is insufficient memory available,
 2592:  * or the argument is NULL, the function returns NULL.
 2593:  */
 2594: xmlChar *
 2595: xmlPathToURI(const xmlChar *path)
 2596: {
 2597:     xmlURIPtr uri;
 2598:     xmlURI temp;
 2599:     xmlChar *ret, *cal;
 2600: 
 2601:     if (path == NULL)
 2602:         return(NULL);
 2603: 
 2604:     if ((uri = xmlParseURI((const char *) path)) != NULL) {
 2605: 	xmlFreeURI(uri);
 2606: 	return xmlStrdup(path);
 2607:     }
 2608:     cal = xmlCanonicPath(path);
 2609:     if (cal == NULL)
 2610:         return(NULL);
 2611: #if defined(_WIN32) && !defined(__CYGWIN__)
 2612:     /* xmlCanonicPath can return an URI on Windows (is that the intended behaviour?) 
 2613:        If 'cal' is a valid URI allready then we are done here, as continuing would make
 2614:        it invalid. */
 2615:     if ((uri = xmlParseURI((const char *) cal)) != NULL) {
 2616: 	xmlFreeURI(uri);
 2617: 	return cal;
 2618:     }
 2619:     /* 'cal' can contain a relative path with backslashes. If that is processed
 2620:        by xmlSaveURI, they will be escaped and the external entity loader machinery
 2621:        will fail. So convert them to slashes. Misuse 'ret' for walking. */
 2622:     ret = cal;
 2623:     while (*ret != '\0') {
 2624: 	if (*ret == '\\')
 2625: 	    *ret = '/';
 2626: 	ret++;
 2627:     }
 2628: #endif
 2629:     memset(&temp, 0, sizeof(temp));
 2630:     temp.path = (char *) cal;
 2631:     ret = xmlSaveUri(&temp);
 2632:     xmlFree(cal);
 2633:     return(ret);
 2634: }
 2635: #define bottom_uri
 2636: #include "elfgcchack.h"

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>