embedaddon/libxml2/uri.c - annotate

Return to uri.c CVS log
Up to [ELWIX - Embedded LightWeight unIX -] / embedaddon / libxml2
Annotation of embedaddon/libxml2/uri.c, revision 1.1.1.3

1.1       misho       1: /**
1.1.1.3 ! misho       2:  * uri.c: set of generic URI related routines
1.1       misho       3:  *
                      4:  * Reference: RFCs 3986, 2732 and 2373
                      5:  *
                      6:  * See Copyright for the status of this software.
                      7:  *
                      8:  * daniel@veillard.com
                      9:  */
                     10: 
                     11: #define IN_LIBXML
                     12: #include "libxml.h"
                     13: 
                     14: #include <string.h>
                     15: 
                     16: #include <libxml/xmlmemory.h>
                     17: #include <libxml/uri.h>
                     18: #include <libxml/globals.h>
                     19: #include <libxml/xmlerror.h>
                     20: 
1.1.1.3 ! misho      21: /**
        !            22:  * MAX_URI_LENGTH:
        !            23:  *
        !            24:  * The definition of the URI regexp in the above RFC has no size limit
        !            25:  * In practice they are usually relativey short except for the
        !            26:  * data URI scheme as defined in RFC 2397. Even for data URI the usual
        !            27:  * maximum size before hitting random practical limits is around 64 KB
        !            28:  * and 4KB is usually a maximum admitted limit for proper operations.
        !            29:  * The value below is more a security limit than anything else and
        !            30:  * really should never be hit by 'normal' operations
        !            31:  * Set to 1 MByte in 2012, this is only enforced on output
        !            32:  */
        !            33: #define MAX_URI_LENGTH 1024 * 1024
        !            34: 
        !            35: static void
        !            36: xmlURIErrMemory(const char *extra)
        !            37: {
        !            38:     if (extra)
        !            39:         __xmlRaiseError(NULL, NULL, NULL,
        !            40:                         NULL, NULL, XML_FROM_URI,
        !            41:                         XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
        !            42:                         extra, NULL, NULL, 0, 0,
        !            43:                         "Memory allocation failed : %s\n", extra);
        !            44:     else
        !            45:         __xmlRaiseError(NULL, NULL, NULL,
        !            46:                         NULL, NULL, XML_FROM_URI,
        !            47:                         XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
        !            48:                         NULL, NULL, NULL, 0, 0,
        !            49:                         "Memory allocation failed\n");
        !            50: }
        !            51: 
1.1       misho      52: static void xmlCleanURI(xmlURIPtr uri);
                     53: 
                     54: /*
                     55:  * Old rule from 2396 used in legacy handling code
                     56:  * alpha    = lowalpha | upalpha
                     57:  */
                     58: #define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
                     59: 
                     60: 
                     61: /*
                     62:  * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
                     63:  *            "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
                     64:  *            "u" | "v" | "w" | "x" | "y" | "z"
                     65:  */
                     66: 
                     67: #define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
                     68: 
                     69: /*
                     70:  * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
                     71:  *           "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
                     72:  *           "U" | "V" | "W" | "X" | "Y" | "Z"
                     73:  */
                     74: #define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
                     75: 
                     76: #ifdef IS_DIGIT
                     77: #undef IS_DIGIT
                     78: #endif
                     79: /*
                     80:  * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
                     81:  */
                     82: #define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
                     83: 
                     84: /*
                     85:  * alphanum = alpha | digit
                     86:  */
                     87: 
                     88: #define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
                     89: 
                     90: /*
                     91:  * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
                     92:  */
                     93: 
                     94: #define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') ||     \
                     95:     ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') ||    \
                     96:     ((x) == '(') || ((x) == ')'))
                     97: 
                     98: /*
                     99:  * unwise = "{" | "}" | "|" | "\" | "^" | "`"
                    100:  */
                    101: 
                    102: #define IS_UNWISE(p)                                                    \
                    103:       (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) ||         \
                    104:        ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) ||        \
                    105:        ((*(p) == ']')) || ((*(p) == '`')))
                    106: /*
                    107:  * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
                    108:  *            "[" | "]"
                    109:  */
                    110: 
                    111: #define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
                    112:         ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
                    113:         ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \
                    114:         ((x) == ']'))
                    115: 
                    116: /*
                    117:  * unreserved = alphanum | mark
                    118:  */
                    119: 
                    120: #define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
                    121: 
                    122: /*
                    123:  * Skip to next pointer char, handle escaped sequences
                    124:  */
                    125: 
                    126: #define NEXT(p) ((*p == '%')? p += 3 : p++)
                    127: 
                    128: /*
                    129:  * Productions from the spec.
                    130:  *
                    131:  *    authority     = server | reg_name
                    132:  *    reg_name      = 1*( unreserved | escaped | "$" | "," |
                    133:  *                        ";" | ":" | "@" | "&" | "=" | "+" )
                    134:  *
                    135:  * path          = [ abs_path | opaque_part ]
                    136:  */
                    137: 
                    138: #define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n))
                    139: 
                    140: /************************************************************************
                    141:  *                                                                     *
                    142:  *                         RFC 3986 parser                             *
                    143:  *                                                                     *
                    144:  ************************************************************************/
                    145: 
                    146: #define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9'))
                    147: #define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) ||              \
                    148:                       ((*(p) >= 'A') && (*(p) <= 'Z')))
                    149: #define ISA_HEXDIG(p)                                                  \
                    150:        (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) ||            \
                    151:         ((*(p) >= 'A') && (*(p) <= 'F')))
                    152: 
                    153: /*
                    154:  *    sub-delims    = "!" / "$" / "&" / "'" / "(" / ")"
                    155:  *                     / "*" / "+" / "," / ";" / "="
                    156:  */
                    157: #define ISA_SUB_DELIM(p)                                               \
                    158:       (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) ||                \
                    159:        ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) ||                \
                    160:        ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) ||                \
                    161:        ((*(p) == '=')) || ((*(p) == '\'')))
                    162: 
                    163: /*
                    164:  *    gen-delims    = ":" / "/" / "?" / "#" / "[" / "]" / "@"
                    165:  */
                    166: #define ISA_GEN_DELIM(p)                                               \
                    167:       (((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) ||         \
                    168:        ((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) ||         \
                    169:        ((*(p) == '@')))
                    170: 
                    171: /*
                    172:  *    reserved      = gen-delims / sub-delims
                    173:  */
                    174: #define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p)))
                    175: 
                    176: /*
                    177:  *    unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"
                    178:  */
                    179: #define ISA_UNRESERVED(p)                                              \
                    180:       ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) ||          \
                    181:        ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~')))
                    182: 
                    183: /*
                    184:  *    pct-encoded   = "%" HEXDIG HEXDIG
                    185:  */
                    186: #define ISA_PCT_ENCODED(p)                                             \
                    187:      ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2)))
                    188: 
                    189: /*
                    190:  *    pchar         = unreserved / pct-encoded / sub-delims / ":" / "@"
                    191:  */
                    192: #define ISA_PCHAR(p)                                                   \
                    193:      (ISA_UNRESERVED(p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) ||   \
                    194:       ((*(p) == ':')) || ((*(p) == '@')))
                    195: 
                    196: /**
                    197:  * xmlParse3986Scheme:
                    198:  * @uri:  pointer to an URI structure
                    199:  * @str:  pointer to the string to analyze
                    200:  *
                    201:  * Parse an URI scheme
                    202:  *
                    203:  * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
                    204:  *
                    205:  * Returns 0 or the error code
                    206:  */
                    207: static int
                    208: xmlParse3986Scheme(xmlURIPtr uri, const char **str) {
                    209:     const char *cur;
                    210: 
                    211:     if (str == NULL)
                    212:        return(-1);
                    213: 
                    214:     cur = *str;
                    215:     if (!ISA_ALPHA(cur))
                    216:        return(2);
                    217:     cur++;
                    218:     while (ISA_ALPHA(cur) || ISA_DIGIT(cur) ||
                    219:            (*cur == '+') || (*cur == '-') || (*cur == '.')) cur++;
                    220:     if (uri != NULL) {
                    221:        if (uri->scheme != NULL) xmlFree(uri->scheme);
                    222:        uri->scheme = STRNDUP(*str, cur - *str);
                    223:     }
                    224:     *str = cur;
                    225:     return(0);
                    226: }
                    227: 
                    228: /**
                    229:  * xmlParse3986Fragment:
                    230:  * @uri:  pointer to an URI structure
                    231:  * @str:  pointer to the string to analyze
                    232:  *
                    233:  * Parse the query part of an URI
                    234:  *
                    235:  * fragment      = *( pchar / "/" / "?" )
                    236:  * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']'
                    237:  *       in the fragment identifier but this is used very broadly for
                    238:  *       xpointer scheme selection, so we are allowing it here to not break
                    239:  *       for example all the DocBook processing chains.
                    240:  *
                    241:  * Returns 0 or the error code
                    242:  */
                    243: static int
                    244: xmlParse3986Fragment(xmlURIPtr uri, const char **str)
                    245: {
                    246:     const char *cur;
                    247: 
                    248:     if (str == NULL)
                    249:         return (-1);
                    250: 
                    251:     cur = *str;
                    252: 
                    253:     while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
                    254:            (*cur == '[') || (*cur == ']') ||
                    255:            ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
                    256:         NEXT(cur);
                    257:     if (uri != NULL) {
                    258:         if (uri->fragment != NULL)
                    259:             xmlFree(uri->fragment);
                    260:        if (uri->cleanup & 2)
                    261:            uri->fragment = STRNDUP(*str, cur - *str);
                    262:        else
                    263:            uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
                    264:     }
                    265:     *str = cur;
                    266:     return (0);
                    267: }
                    268: 
                    269: /**
                    270:  * xmlParse3986Query:
                    271:  * @uri:  pointer to an URI structure
                    272:  * @str:  pointer to the string to analyze
                    273:  *
                    274:  * Parse the query part of an URI
                    275:  *
                    276:  * query = *uric
                    277:  *
                    278:  * Returns 0 or the error code
                    279:  */
                    280: static int
                    281: xmlParse3986Query(xmlURIPtr uri, const char **str)
                    282: {
                    283:     const char *cur;
                    284: 
                    285:     if (str == NULL)
                    286:         return (-1);
                    287: 
                    288:     cur = *str;
                    289: 
                    290:     while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
                    291:            ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
                    292:         NEXT(cur);
                    293:     if (uri != NULL) {
                    294:         if (uri->query != NULL)
                    295:             xmlFree(uri->query);
                    296:        if (uri->cleanup & 2)
                    297:            uri->query = STRNDUP(*str, cur - *str);
                    298:        else
                    299:            uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
                    300: 
                    301:        /* Save the raw bytes of the query as well.
                    302:         * See: http://mail.gnome.org/archives/xml/2007-April/thread.html#00114
                    303:         */
                    304:        if (uri->query_raw != NULL)
                    305:            xmlFree (uri->query_raw);
                    306:        uri->query_raw = STRNDUP (*str, cur - *str);
                    307:     }
                    308:     *str = cur;
                    309:     return (0);
                    310: }
                    311: 
                    312: /**
                    313:  * xmlParse3986Port:
                    314:  * @uri:  pointer to an URI structure
                    315:  * @str:  the string to analyze
                    316:  *
                    317:  * Parse a port  part and fills in the appropriate fields
                    318:  * of the @uri structure
                    319:  *
                    320:  * port          = *DIGIT
                    321:  *
                    322:  * Returns 0 or the error code
                    323:  */
                    324: static int
                    325: xmlParse3986Port(xmlURIPtr uri, const char **str)
                    326: {
                    327:     const char *cur = *str;
                    328: 
                    329:     if (ISA_DIGIT(cur)) {
                    330:        if (uri != NULL)
                    331:            uri->port = 0;
                    332:        while (ISA_DIGIT(cur)) {
                    333:            if (uri != NULL)
                    334:                uri->port = uri->port * 10 + (*cur - '0');
                    335:            cur++;
                    336:        }
                    337:        *str = cur;
                    338:        return(0);
                    339:     }
                    340:     return(1);
                    341: }
                    342: 
                    343: /**
                    344:  * xmlParse3986Userinfo:
                    345:  * @uri:  pointer to an URI structure
                    346:  * @str:  the string to analyze
                    347:  *
                    348:  * Parse an user informations part and fills in the appropriate fields
                    349:  * of the @uri structure
                    350:  *
                    351:  * userinfo      = *( unreserved / pct-encoded / sub-delims / ":" )
                    352:  *
                    353:  * Returns 0 or the error code
                    354:  */
                    355: static int
                    356: xmlParse3986Userinfo(xmlURIPtr uri, const char **str)
                    357: {
                    358:     const char *cur;
                    359: 
                    360:     cur = *str;
                    361:     while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) ||
                    362:            ISA_SUB_DELIM(cur) || (*cur == ':'))
                    363:        NEXT(cur);
                    364:     if (*cur == '@') {
                    365:        if (uri != NULL) {
                    366:            if (uri->user != NULL) xmlFree(uri->user);
                    367:            if (uri->cleanup & 2)
                    368:                uri->user = STRNDUP(*str, cur - *str);
                    369:            else
                    370:                uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
                    371:        }
                    372:        *str = cur;
                    373:        return(0);
                    374:     }
                    375:     return(1);
                    376: }
                    377: 
                    378: /**
                    379:  * xmlParse3986DecOctet:
                    380:  * @str:  the string to analyze
                    381:  *
                    382:  *    dec-octet     = DIGIT                 ; 0-9
                    383:  *                  / %x31-39 DIGIT         ; 10-99
                    384:  *                  / "1" 2DIGIT            ; 100-199
                    385:  *                  / "2" %x30-34 DIGIT     ; 200-249
                    386:  *                  / "25" %x30-35          ; 250-255
                    387:  *
                    388:  * Skip a dec-octet.
                    389:  *
                    390:  * Returns 0 if found and skipped, 1 otherwise
                    391:  */
                    392: static int
                    393: xmlParse3986DecOctet(const char **str) {
                    394:     const char *cur = *str;
                    395: 
                    396:     if (!(ISA_DIGIT(cur)))
                    397:         return(1);
                    398:     if (!ISA_DIGIT(cur+1))
                    399:        cur++;
                    400:     else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur+2)))
                    401:        cur += 2;
                    402:     else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2)))
                    403:        cur += 3;
                    404:     else if ((*cur == '2') && (*(cur + 1) >= '0') &&
                    405:             (*(cur + 1) <= '4') && (ISA_DIGIT(cur + 2)))
                    406:        cur += 3;
                    407:     else if ((*cur == '2') && (*(cur + 1) == '5') &&
                    408:             (*(cur + 2) >= '0') && (*(cur + 1) <= '5'))
                    409:        cur += 3;
                    410:     else
                    411:         return(1);
                    412:     *str = cur;
                    413:     return(0);
                    414: }
                    415: /**
                    416:  * xmlParse3986Host:
                    417:  * @uri:  pointer to an URI structure
                    418:  * @str:  the string to analyze
                    419:  *
                    420:  * Parse an host part and fills in the appropriate fields
                    421:  * of the @uri structure
                    422:  *
                    423:  * host          = IP-literal / IPv4address / reg-name
                    424:  * IP-literal    = "[" ( IPv6address / IPvFuture  ) "]"
                    425:  * IPv4address   = dec-octet "." dec-octet "." dec-octet "." dec-octet
                    426:  * reg-name      = *( unreserved / pct-encoded / sub-delims )
                    427:  *
                    428:  * Returns 0 or the error code
                    429:  */
                    430: static int
                    431: xmlParse3986Host(xmlURIPtr uri, const char **str)
                    432: {
                    433:     const char *cur = *str;
                    434:     const char *host;
                    435: 
                    436:     host = cur;
                    437:     /*
                    438:      * IPv6 and future adressing scheme are enclosed between brackets
                    439:      */
                    440:     if (*cur == '[') {
                    441:         cur++;
                    442:        while ((*cur != ']') && (*cur != 0))
                    443:            cur++;
                    444:        if (*cur != ']')
                    445:            return(1);
                    446:        cur++;
                    447:        goto found;
                    448:     }
                    449:     /*
                    450:      * try to parse an IPv4
                    451:      */
                    452:     if (ISA_DIGIT(cur)) {
                    453:         if (xmlParse3986DecOctet(&cur) != 0)
                    454:            goto not_ipv4;
                    455:        if (*cur != '.')
                    456:            goto not_ipv4;
                    457:        cur++;
                    458:         if (xmlParse3986DecOctet(&cur) != 0)
                    459:            goto not_ipv4;
                    460:        if (*cur != '.')
                    461:            goto not_ipv4;
                    462:         if (xmlParse3986DecOctet(&cur) != 0)
                    463:            goto not_ipv4;
                    464:        if (*cur != '.')
                    465:            goto not_ipv4;
                    466:         if (xmlParse3986DecOctet(&cur) != 0)
                    467:            goto not_ipv4;
                    468:        goto found;
                    469: not_ipv4:
                    470:         cur = *str;
                    471:     }
                    472:     /*
                    473:      * then this should be a hostname which can be empty
                    474:      */
                    475:     while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur))
                    476:         NEXT(cur);
                    477: found:
                    478:     if (uri != NULL) {
                    479:        if (uri->authority != NULL) xmlFree(uri->authority);
                    480:        uri->authority = NULL;
                    481:        if (uri->server != NULL) xmlFree(uri->server);
                    482:        if (cur != host) {
                    483:            if (uri->cleanup & 2)
                    484:                uri->server = STRNDUP(host, cur - host);
                    485:            else
                    486:                uri->server = xmlURIUnescapeString(host, cur - host, NULL);
                    487:        } else
                    488:            uri->server = NULL;
                    489:     }
                    490:     *str = cur;
                    491:     return(0);
                    492: }
                    493: 
                    494: /**
                    495:  * xmlParse3986Authority:
                    496:  * @uri:  pointer to an URI structure
                    497:  * @str:  the string to analyze
                    498:  *
                    499:  * Parse an authority part and fills in the appropriate fields
                    500:  * of the @uri structure
                    501:  *
                    502:  * authority     = [ userinfo "@" ] host [ ":" port ]
                    503:  *
                    504:  * Returns 0 or the error code
                    505:  */
                    506: static int
                    507: xmlParse3986Authority(xmlURIPtr uri, const char **str)
                    508: {
                    509:     const char *cur;
                    510:     int ret;
                    511: 
                    512:     cur = *str;
                    513:     /*
                    514:      * try to parse an userinfo and check for the trailing @
                    515:      */
                    516:     ret = xmlParse3986Userinfo(uri, &cur);
                    517:     if ((ret != 0) || (*cur != '@'))
                    518:         cur = *str;
                    519:     else
                    520:         cur++;
                    521:     ret = xmlParse3986Host(uri, &cur);
                    522:     if (ret != 0) return(ret);
                    523:     if (*cur == ':') {
                    524:         cur++;
                    525:         ret = xmlParse3986Port(uri, &cur);
                    526:        if (ret != 0) return(ret);
                    527:     }
                    528:     *str = cur;
                    529:     return(0);
                    530: }
                    531: 
                    532: /**
                    533:  * xmlParse3986Segment:
                    534:  * @str:  the string to analyze
                    535:  * @forbid: an optional forbidden character
                    536:  * @empty: allow an empty segment
                    537:  *
                    538:  * Parse a segment and fills in the appropriate fields
                    539:  * of the @uri structure
                    540:  *
                    541:  * segment       = *pchar
                    542:  * segment-nz    = 1*pchar
                    543:  * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
                    544:  *               ; non-zero-length segment without any colon ":"
                    545:  *
                    546:  * Returns 0 or the error code
                    547:  */
                    548: static int
                    549: xmlParse3986Segment(const char **str, char forbid, int empty)
                    550: {
                    551:     const char *cur;
                    552: 
                    553:     cur = *str;
                    554:     if (!ISA_PCHAR(cur)) {
                    555:         if (empty)
                    556:            return(0);
                    557:        return(1);
                    558:     }
                    559:     while (ISA_PCHAR(cur) && (*cur != forbid))
                    560:         NEXT(cur);
                    561:     *str = cur;
                    562:     return (0);
                    563: }
                    564: 
                    565: /**
                    566:  * xmlParse3986PathAbEmpty:
                    567:  * @uri:  pointer to an URI structure
                    568:  * @str:  the string to analyze
                    569:  *
                    570:  * Parse an path absolute or empty and fills in the appropriate fields
                    571:  * of the @uri structure
                    572:  *
                    573:  * path-abempty  = *( "/" segment )
                    574:  *
                    575:  * Returns 0 or the error code
                    576:  */
                    577: static int
                    578: xmlParse3986PathAbEmpty(xmlURIPtr uri, const char **str)
                    579: {
                    580:     const char *cur;
                    581:     int ret;
                    582: 
                    583:     cur = *str;
                    584: 
                    585:     while (*cur == '/') {
                    586:         cur++;
                    587:        ret = xmlParse3986Segment(&cur, 0, 1);
                    588:        if (ret != 0) return(ret);
                    589:     }
                    590:     if (uri != NULL) {
                    591:        if (uri->path != NULL) xmlFree(uri->path);
                    592:         if (*str != cur) {
                    593:             if (uri->cleanup & 2)
                    594:                 uri->path = STRNDUP(*str, cur - *str);
                    595:             else
                    596:                 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
                    597:         } else {
                    598:             uri->path = NULL;
                    599:         }
                    600:     }
                    601:     *str = cur;
                    602:     return (0);
                    603: }
                    604: 
                    605: /**
                    606:  * xmlParse3986PathAbsolute:
                    607:  * @uri:  pointer to an URI structure
                    608:  * @str:  the string to analyze
                    609:  *
                    610:  * Parse an path absolute and fills in the appropriate fields
                    611:  * of the @uri structure
                    612:  *
                    613:  * path-absolute = "/" [ segment-nz *( "/" segment ) ]
                    614:  *
                    615:  * Returns 0 or the error code
                    616:  */
                    617: static int
                    618: xmlParse3986PathAbsolute(xmlURIPtr uri, const char **str)
                    619: {
                    620:     const char *cur;
                    621:     int ret;
                    622: 
                    623:     cur = *str;
                    624: 
                    625:     if (*cur != '/')
                    626:         return(1);
                    627:     cur++;
                    628:     ret = xmlParse3986Segment(&cur, 0, 0);
                    629:     if (ret == 0) {
                    630:        while (*cur == '/') {
                    631:            cur++;
                    632:            ret = xmlParse3986Segment(&cur, 0, 1);
                    633:            if (ret != 0) return(ret);
                    634:        }
                    635:     }
                    636:     if (uri != NULL) {
                    637:        if (uri->path != NULL) xmlFree(uri->path);
                    638:         if (cur != *str) {
                    639:             if (uri->cleanup & 2)
                    640:                 uri->path = STRNDUP(*str, cur - *str);
                    641:             else
                    642:                 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
                    643:         } else {
                    644:             uri->path = NULL;
                    645:         }
                    646:     }
                    647:     *str = cur;
                    648:     return (0);
                    649: }
                    650: 
                    651: /**
                    652:  * xmlParse3986PathRootless:
                    653:  * @uri:  pointer to an URI structure
                    654:  * @str:  the string to analyze
                    655:  *
                    656:  * Parse an path without root and fills in the appropriate fields
                    657:  * of the @uri structure
                    658:  *
                    659:  * path-rootless = segment-nz *( "/" segment )
                    660:  *
                    661:  * Returns 0 or the error code
                    662:  */
                    663: static int
                    664: xmlParse3986PathRootless(xmlURIPtr uri, const char **str)
                    665: {
                    666:     const char *cur;
                    667:     int ret;
                    668: 
                    669:     cur = *str;
                    670: 
                    671:     ret = xmlParse3986Segment(&cur, 0, 0);
                    672:     if (ret != 0) return(ret);
                    673:     while (*cur == '/') {
                    674:         cur++;
                    675:        ret = xmlParse3986Segment(&cur, 0, 1);
                    676:        if (ret != 0) return(ret);
                    677:     }
                    678:     if (uri != NULL) {
                    679:        if (uri->path != NULL) xmlFree(uri->path);
                    680:         if (cur != *str) {
                    681:             if (uri->cleanup & 2)
                    682:                 uri->path = STRNDUP(*str, cur - *str);
                    683:             else
                    684:                 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
                    685:         } else {
                    686:             uri->path = NULL;
                    687:         }
                    688:     }
                    689:     *str = cur;
                    690:     return (0);
                    691: }
                    692: 
                    693: /**
                    694:  * xmlParse3986PathNoScheme:
                    695:  * @uri:  pointer to an URI structure
                    696:  * @str:  the string to analyze
                    697:  *
                    698:  * Parse an path which is not a scheme and fills in the appropriate fields
                    699:  * of the @uri structure
                    700:  *
                    701:  * path-noscheme = segment-nz-nc *( "/" segment )
                    702:  *
                    703:  * Returns 0 or the error code
                    704:  */
                    705: static int
                    706: xmlParse3986PathNoScheme(xmlURIPtr uri, const char **str)
                    707: {
                    708:     const char *cur;
                    709:     int ret;
                    710: 
                    711:     cur = *str;
                    712: 
                    713:     ret = xmlParse3986Segment(&cur, ':', 0);
                    714:     if (ret != 0) return(ret);
                    715:     while (*cur == '/') {
                    716:         cur++;
                    717:        ret = xmlParse3986Segment(&cur, 0, 1);
                    718:        if (ret != 0) return(ret);
                    719:     }
                    720:     if (uri != NULL) {
                    721:        if (uri->path != NULL) xmlFree(uri->path);
                    722:         if (cur != *str) {
                    723:             if (uri->cleanup & 2)
                    724:                 uri->path = STRNDUP(*str, cur - *str);
                    725:             else
                    726:                 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
                    727:         } else {
                    728:             uri->path = NULL;
                    729:         }
                    730:     }
                    731:     *str = cur;
                    732:     return (0);
                    733: }
                    734: 
                    735: /**
                    736:  * xmlParse3986HierPart:
                    737:  * @uri:  pointer to an URI structure
                    738:  * @str:  the string to analyze
                    739:  *
                    740:  * Parse an hierarchical part and fills in the appropriate fields
                    741:  * of the @uri structure
                    742:  *
                    743:  * hier-part     = "//" authority path-abempty
                    744:  *                / path-absolute
                    745:  *                / path-rootless
                    746:  *                / path-empty
                    747:  *
                    748:  * Returns 0 or the error code
                    749:  */
                    750: static int
                    751: xmlParse3986HierPart(xmlURIPtr uri, const char **str)
                    752: {
                    753:     const char *cur;
                    754:     int ret;
                    755: 
                    756:     cur = *str;
                    757: 
                    758:     if ((*cur == '/') && (*(cur + 1) == '/')) {
                    759:         cur += 2;
                    760:        ret = xmlParse3986Authority(uri, &cur);
                    761:        if (ret != 0) return(ret);
                    762:        ret = xmlParse3986PathAbEmpty(uri, &cur);
                    763:        if (ret != 0) return(ret);
                    764:        *str = cur;
                    765:        return(0);
                    766:     } else if (*cur == '/') {
                    767:         ret = xmlParse3986PathAbsolute(uri, &cur);
                    768:        if (ret != 0) return(ret);
                    769:     } else if (ISA_PCHAR(cur)) {
                    770:         ret = xmlParse3986PathRootless(uri, &cur);
                    771:        if (ret != 0) return(ret);
                    772:     } else {
                    773:        /* path-empty is effectively empty */
                    774:        if (uri != NULL) {
                    775:            if (uri->path != NULL) xmlFree(uri->path);
                    776:            uri->path = NULL;
                    777:        }
                    778:     }
                    779:     *str = cur;
                    780:     return (0);
                    781: }
                    782: 
                    783: /**
                    784:  * xmlParse3986RelativeRef:
                    785:  * @uri:  pointer to an URI structure
                    786:  * @str:  the string to analyze
                    787:  *
                    788:  * Parse an URI string and fills in the appropriate fields
                    789:  * of the @uri structure
                    790:  *
                    791:  * relative-ref  = relative-part [ "?" query ] [ "#" fragment ]
                    792:  * relative-part = "//" authority path-abempty
                    793:  *               / path-absolute
                    794:  *               / path-noscheme
                    795:  *               / path-empty
                    796:  *
                    797:  * Returns 0 or the error code
                    798:  */
                    799: static int
                    800: xmlParse3986RelativeRef(xmlURIPtr uri, const char *str) {
                    801:     int ret;
                    802: 
                    803:     if ((*str == '/') && (*(str + 1) == '/')) {
                    804:         str += 2;
                    805:        ret = xmlParse3986Authority(uri, &str);
                    806:        if (ret != 0) return(ret);
                    807:        ret = xmlParse3986PathAbEmpty(uri, &str);
                    808:        if (ret != 0) return(ret);
                    809:     } else if (*str == '/') {
                    810:        ret = xmlParse3986PathAbsolute(uri, &str);
                    811:        if (ret != 0) return(ret);
                    812:     } else if (ISA_PCHAR(str)) {
                    813:         ret = xmlParse3986PathNoScheme(uri, &str);
                    814:        if (ret != 0) return(ret);
                    815:     } else {
                    816:        /* path-empty is effectively empty */
                    817:        if (uri != NULL) {
                    818:            if (uri->path != NULL) xmlFree(uri->path);
                    819:            uri->path = NULL;
                    820:        }
                    821:     }
                    822: 
                    823:     if (*str == '?') {
                    824:        str++;
                    825:        ret = xmlParse3986Query(uri, &str);
                    826:        if (ret != 0) return(ret);
                    827:     }
                    828:     if (*str == '#') {
                    829:        str++;
                    830:        ret = xmlParse3986Fragment(uri, &str);
                    831:        if (ret != 0) return(ret);
                    832:     }
                    833:     if (*str != 0) {
                    834:        xmlCleanURI(uri);
                    835:        return(1);
                    836:     }
                    837:     return(0);
                    838: }
                    839: 
                    840: 
                    841: /**
                    842:  * xmlParse3986URI:
                    843:  * @uri:  pointer to an URI structure
                    844:  * @str:  the string to analyze
                    845:  *
                    846:  * Parse an URI string and fills in the appropriate fields
                    847:  * of the @uri structure
                    848:  *
                    849:  * scheme ":" hier-part [ "?" query ] [ "#" fragment ]
                    850:  *
                    851:  * Returns 0 or the error code
                    852:  */
                    853: static int
                    854: xmlParse3986URI(xmlURIPtr uri, const char *str) {
                    855:     int ret;
                    856: 
                    857:     ret = xmlParse3986Scheme(uri, &str);
                    858:     if (ret != 0) return(ret);
                    859:     if (*str != ':') {
                    860:        return(1);
                    861:     }
                    862:     str++;
                    863:     ret = xmlParse3986HierPart(uri, &str);
                    864:     if (ret != 0) return(ret);
                    865:     if (*str == '?') {
                    866:        str++;
                    867:        ret = xmlParse3986Query(uri, &str);
                    868:        if (ret != 0) return(ret);
                    869:     }
                    870:     if (*str == '#') {
                    871:        str++;
                    872:        ret = xmlParse3986Fragment(uri, &str);
                    873:        if (ret != 0) return(ret);
                    874:     }
                    875:     if (*str != 0) {
                    876:        xmlCleanURI(uri);
                    877:        return(1);
                    878:     }
                    879:     return(0);
                    880: }
                    881: 
                    882: /**
                    883:  * xmlParse3986URIReference:
                    884:  * @uri:  pointer to an URI structure
                    885:  * @str:  the string to analyze
                    886:  *
                    887:  * Parse an URI reference string and fills in the appropriate fields
                    888:  * of the @uri structure
                    889:  *
                    890:  * URI-reference = URI / relative-ref
                    891:  *
                    892:  * Returns 0 or the error code
                    893:  */
                    894: static int
                    895: xmlParse3986URIReference(xmlURIPtr uri, const char *str) {
                    896:     int ret;
                    897: 
                    898:     if (str == NULL)
                    899:        return(-1);
                    900:     xmlCleanURI(uri);
                    901: 
                    902:     /*
                    903:      * Try first to parse absolute refs, then fallback to relative if
                    904:      * it fails.
                    905:      */
                    906:     ret = xmlParse3986URI(uri, str);
                    907:     if (ret != 0) {
                    908:        xmlCleanURI(uri);
                    909:         ret = xmlParse3986RelativeRef(uri, str);
                    910:        if (ret != 0) {
                    911:            xmlCleanURI(uri);
                    912:            return(ret);
                    913:        }
                    914:     }
                    915:     return(0);
                    916: }
                    917: 
                    918: /**
                    919:  * xmlParseURI:
                    920:  * @str:  the URI string to analyze
                    921:  *
                    922:  * Parse an URI based on RFC 3986
                    923:  *
                    924:  * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
                    925:  *
                    926:  * Returns a newly built xmlURIPtr or NULL in case of error
                    927:  */
                    928: xmlURIPtr
                    929: xmlParseURI(const char *str) {
                    930:     xmlURIPtr uri;
                    931:     int ret;
                    932: 
                    933:     if (str == NULL)
                    934:        return(NULL);
                    935:     uri = xmlCreateURI();
                    936:     if (uri != NULL) {
                    937:        ret = xmlParse3986URIReference(uri, str);
                    938:         if (ret) {
                    939:            xmlFreeURI(uri);
                    940:            return(NULL);
                    941:        }
                    942:     }
                    943:     return(uri);
                    944: }
                    945: 
                    946: /**
                    947:  * xmlParseURIReference:
                    948:  * @uri:  pointer to an URI structure
                    949:  * @str:  the string to analyze
                    950:  *
                    951:  * Parse an URI reference string based on RFC 3986 and fills in the
                    952:  * appropriate fields of the @uri structure
                    953:  *
                    954:  * URI-reference = URI / relative-ref
                    955:  *
                    956:  * Returns 0 or the error code
                    957:  */
                    958: int
                    959: xmlParseURIReference(xmlURIPtr uri, const char *str) {
                    960:     return(xmlParse3986URIReference(uri, str));
                    961: }
                    962: 
                    963: /**
                    964:  * xmlParseURIRaw:
                    965:  * @str:  the URI string to analyze
                    966:  * @raw:  if 1 unescaping of URI pieces are disabled
                    967:  *
                    968:  * Parse an URI but allows to keep intact the original fragments.
                    969:  *
                    970:  * URI-reference = URI / relative-ref
                    971:  *
                    972:  * Returns a newly built xmlURIPtr or NULL in case of error
                    973:  */
                    974: xmlURIPtr
                    975: xmlParseURIRaw(const char *str, int raw) {
                    976:     xmlURIPtr uri;
                    977:     int ret;
                    978: 
                    979:     if (str == NULL)
                    980:        return(NULL);
                    981:     uri = xmlCreateURI();
                    982:     if (uri != NULL) {
                    983:         if (raw) {
                    984:            uri->cleanup |= 2;
                    985:        }
                    986:        ret = xmlParseURIReference(uri, str);
                    987:         if (ret) {
                    988:            xmlFreeURI(uri);
                    989:            return(NULL);
                    990:        }
                    991:     }
                    992:     return(uri);
                    993: }
                    994: 
                    995: /************************************************************************
                    996:  *                                                                     *
                    997:  *                     Generic URI structure functions                 *
                    998:  *                                                                     *
                    999:  ************************************************************************/
                   1000: 
                   1001: /**
                   1002:  * xmlCreateURI:
                   1003:  *
                   1004:  * Simply creates an empty xmlURI
                   1005:  *
                   1006:  * Returns the new structure or NULL in case of error
                   1007:  */
                   1008: xmlURIPtr
                   1009: xmlCreateURI(void) {
                   1010:     xmlURIPtr ret;
                   1011: 
                   1012:     ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
                   1013:     if (ret == NULL) {
1.1.1.3 ! misho    1014:         xmlURIErrMemory("creating URI structure\n");
1.1       misho    1015:        return(NULL);
                   1016:     }
                   1017:     memset(ret, 0, sizeof(xmlURI));
                   1018:     return(ret);
                   1019: }
                   1020: 
                   1021: /**
1.1.1.3 ! misho    1022:  * xmlSaveUriRealloc:
        !          1023:  *
        !          1024:  * Function to handle properly a reallocation when saving an URI
        !          1025:  * Also imposes some limit on the length of an URI string output
        !          1026:  */
        !          1027: static xmlChar *
        !          1028: xmlSaveUriRealloc(xmlChar *ret, int *max) {
        !          1029:     xmlChar *temp;
        !          1030:     int tmp;
        !          1031: 
        !          1032:     if (*max > MAX_URI_LENGTH) {
        !          1033:         xmlURIErrMemory("reaching arbitrary MAX_URI_LENGTH limit\n");
        !          1034:         return(NULL);
        !          1035:     }
        !          1036:     tmp = *max * 2;
        !          1037:     temp = (xmlChar *) xmlRealloc(ret, (tmp + 1));
        !          1038:     if (temp == NULL) {
        !          1039:         xmlURIErrMemory("saving URI\n");
        !          1040:         return(NULL);
        !          1041:     }
        !          1042:     *max = tmp;
        !          1043:     return(temp);
        !          1044: }
        !          1045: 
        !          1046: /**
1.1       misho    1047:  * xmlSaveUri:
                   1048:  * @uri:  pointer to an xmlURI
                   1049:  *
                   1050:  * Save the URI as an escaped string
                   1051:  *
                   1052:  * Returns a new string (to be deallocated by caller)
                   1053:  */
                   1054: xmlChar *
                   1055: xmlSaveUri(xmlURIPtr uri) {
                   1056:     xmlChar *ret = NULL;
                   1057:     xmlChar *temp;
                   1058:     const char *p;
                   1059:     int len;
                   1060:     int max;
                   1061: 
                   1062:     if (uri == NULL) return(NULL);
                   1063: 
                   1064: 
                   1065:     max = 80;
                   1066:     ret = (xmlChar *) xmlMallocAtomic((max + 1) * sizeof(xmlChar));
                   1067:     if (ret == NULL) {
1.1.1.3 ! misho    1068:         xmlURIErrMemory("saving URI\n");
1.1       misho    1069:        return(NULL);
                   1070:     }
                   1071:     len = 0;
                   1072: 
                   1073:     if (uri->scheme != NULL) {
                   1074:        p = uri->scheme;
                   1075:        while (*p != 0) {
                   1076:            if (len >= max) {
1.1.1.3 ! misho    1077:                 temp = xmlSaveUriRealloc(ret, &max);
        !          1078:                 if (temp == NULL) goto mem_error;
1.1       misho    1079:                ret = temp;
                   1080:            }
                   1081:            ret[len++] = *p++;
                   1082:        }
                   1083:        if (len >= max) {
1.1.1.3 ! misho    1084:             temp = xmlSaveUriRealloc(ret, &max);
        !          1085:             if (temp == NULL) goto mem_error;
        !          1086:             ret = temp;
1.1       misho    1087:        }
                   1088:        ret[len++] = ':';
                   1089:     }
                   1090:     if (uri->opaque != NULL) {
                   1091:        p = uri->opaque;
                   1092:        while (*p != 0) {
                   1093:            if (len + 3 >= max) {
1.1.1.3 ! misho    1094:                 temp = xmlSaveUriRealloc(ret, &max);
        !          1095:                 if (temp == NULL) goto mem_error;
        !          1096:                 ret = temp;
1.1       misho    1097:            }
                   1098:            if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p)))
                   1099:                ret[len++] = *p++;
                   1100:            else {
                   1101:                int val = *(unsigned char *)p++;
                   1102:                int hi = val / 0x10, lo = val % 0x10;
                   1103:                ret[len++] = '%';
                   1104:                ret[len++] = hi + (hi > 9? 'A'-10 : '0');
                   1105:                ret[len++] = lo + (lo > 9? 'A'-10 : '0');
                   1106:            }
                   1107:        }
                   1108:     } else {
                   1109:        if (uri->server != NULL) {
                   1110:            if (len + 3 >= max) {
1.1.1.3 ! misho    1111:                 temp = xmlSaveUriRealloc(ret, &max);
        !          1112:                 if (temp == NULL) goto mem_error;
        !          1113:                 ret = temp;
1.1       misho    1114:            }
                   1115:            ret[len++] = '/';
                   1116:            ret[len++] = '/';
                   1117:            if (uri->user != NULL) {
                   1118:                p = uri->user;
                   1119:                while (*p != 0) {
                   1120:                    if (len + 3 >= max) {
1.1.1.3 ! misho    1121:                         temp = xmlSaveUriRealloc(ret, &max);
        !          1122:                         if (temp == NULL) goto mem_error;
        !          1123:                         ret = temp;
1.1       misho    1124:                    }
                   1125:                    if ((IS_UNRESERVED(*(p))) ||
                   1126:                        ((*(p) == ';')) || ((*(p) == ':')) ||
                   1127:                        ((*(p) == '&')) || ((*(p) == '=')) ||
                   1128:                        ((*(p) == '+')) || ((*(p) == '$')) ||
                   1129:                        ((*(p) == ',')))
                   1130:                        ret[len++] = *p++;
                   1131:                    else {
                   1132:                        int val = *(unsigned char *)p++;
                   1133:                        int hi = val / 0x10, lo = val % 0x10;
                   1134:                        ret[len++] = '%';
                   1135:                        ret[len++] = hi + (hi > 9? 'A'-10 : '0');
                   1136:                        ret[len++] = lo + (lo > 9? 'A'-10 : '0');
                   1137:                    }
                   1138:                }
                   1139:                if (len + 3 >= max) {
1.1.1.3 ! misho    1140:                     temp = xmlSaveUriRealloc(ret, &max);
        !          1141:                     if (temp == NULL) goto mem_error;
        !          1142:                     ret = temp;
1.1       misho    1143:                }
                   1144:                ret[len++] = '@';
                   1145:            }
                   1146:            p = uri->server;
                   1147:            while (*p != 0) {
                   1148:                if (len >= max) {
1.1.1.3 ! misho    1149:                     temp = xmlSaveUriRealloc(ret, &max);
        !          1150:                     if (temp == NULL) goto mem_error;
        !          1151:                     ret = temp;
1.1       misho    1152:                }
                   1153:                ret[len++] = *p++;
                   1154:            }
                   1155:            if (uri->port > 0) {
                   1156:                if (len + 10 >= max) {
1.1.1.3 ! misho    1157:                     temp = xmlSaveUriRealloc(ret, &max);
        !          1158:                     if (temp == NULL) goto mem_error;
        !          1159:                     ret = temp;
1.1       misho    1160:                }
                   1161:                len += snprintf((char *) &ret[len], max - len, ":%d", uri->port);
                   1162:            }
                   1163:        } else if (uri->authority != NULL) {
                   1164:            if (len + 3 >= max) {
1.1.1.3 ! misho    1165:                 temp = xmlSaveUriRealloc(ret, &max);
        !          1166:                 if (temp == NULL) goto mem_error;
        !          1167:                 ret = temp;
1.1       misho    1168:            }
                   1169:            ret[len++] = '/';
                   1170:            ret[len++] = '/';
                   1171:            p = uri->authority;
                   1172:            while (*p != 0) {
                   1173:                if (len + 3 >= max) {
1.1.1.3 ! misho    1174:                     temp = xmlSaveUriRealloc(ret, &max);
        !          1175:                     if (temp == NULL) goto mem_error;
        !          1176:                     ret = temp;
1.1       misho    1177:                }
                   1178:                if ((IS_UNRESERVED(*(p))) ||
                   1179:                     ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
                   1180:                     ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
                   1181:                     ((*(p) == '=')) || ((*(p) == '+')))
                   1182:                    ret[len++] = *p++;
                   1183:                else {
                   1184:                    int val = *(unsigned char *)p++;
                   1185:                    int hi = val / 0x10, lo = val % 0x10;
                   1186:                    ret[len++] = '%';
                   1187:                    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
                   1188:                    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
                   1189:                }
                   1190:            }
                   1191:        } else if (uri->scheme != NULL) {
                   1192:            if (len + 3 >= max) {
1.1.1.3 ! misho    1193:                 temp = xmlSaveUriRealloc(ret, &max);
        !          1194:                 if (temp == NULL) goto mem_error;
        !          1195:                 ret = temp;
1.1       misho    1196:            }
                   1197:            ret[len++] = '/';
                   1198:            ret[len++] = '/';
                   1199:        }
                   1200:        if (uri->path != NULL) {
                   1201:            p = uri->path;
                   1202:            /*
                   1203:             * the colon in file:///d: should not be escaped or
                   1204:             * Windows accesses fail later.
                   1205:             */
                   1206:            if ((uri->scheme != NULL) &&
                   1207:                (p[0] == '/') &&
                   1208:                (((p[1] >= 'a') && (p[1] <= 'z')) ||
                   1209:                 ((p[1] >= 'A') && (p[1] <= 'Z'))) &&
                   1210:                (p[2] == ':') &&
                   1211:                (xmlStrEqual(BAD_CAST uri->scheme, BAD_CAST "file"))) {
                   1212:                if (len + 3 >= max) {
1.1.1.3 ! misho    1213:                     temp = xmlSaveUriRealloc(ret, &max);
        !          1214:                     if (temp == NULL) goto mem_error;
        !          1215:                     ret = temp;
1.1       misho    1216:                }
                   1217:                ret[len++] = *p++;
                   1218:                ret[len++] = *p++;
                   1219:                ret[len++] = *p++;
                   1220:            }
                   1221:            while (*p != 0) {
                   1222:                if (len + 3 >= max) {
1.1.1.3 ! misho    1223:                     temp = xmlSaveUriRealloc(ret, &max);
        !          1224:                     if (temp == NULL) goto mem_error;
        !          1225:                     ret = temp;
1.1       misho    1226:                }
                   1227:                if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
                   1228:                     ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
                   1229:                    ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
                   1230:                    ((*(p) == ',')))
                   1231:                    ret[len++] = *p++;
                   1232:                else {
                   1233:                    int val = *(unsigned char *)p++;
                   1234:                    int hi = val / 0x10, lo = val % 0x10;
                   1235:                    ret[len++] = '%';
                   1236:                    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
                   1237:                    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
                   1238:                }
                   1239:            }
                   1240:        }
                   1241:        if (uri->query_raw != NULL) {
                   1242:            if (len + 1 >= max) {
1.1.1.3 ! misho    1243:                 temp = xmlSaveUriRealloc(ret, &max);
        !          1244:                 if (temp == NULL) goto mem_error;
        !          1245:                 ret = temp;
1.1       misho    1246:            }
                   1247:            ret[len++] = '?';
                   1248:            p = uri->query_raw;
                   1249:            while (*p != 0) {
                   1250:                if (len + 1 >= max) {
1.1.1.3 ! misho    1251:                     temp = xmlSaveUriRealloc(ret, &max);
        !          1252:                     if (temp == NULL) goto mem_error;
        !          1253:                     ret = temp;
1.1       misho    1254:                }
                   1255:                ret[len++] = *p++;
                   1256:            }
                   1257:        } else if (uri->query != NULL) {
                   1258:            if (len + 3 >= max) {
1.1.1.3 ! misho    1259:                 temp = xmlSaveUriRealloc(ret, &max);
        !          1260:                 if (temp == NULL) goto mem_error;
        !          1261:                 ret = temp;
1.1       misho    1262:            }
                   1263:            ret[len++] = '?';
                   1264:            p = uri->query;
                   1265:            while (*p != 0) {
                   1266:                if (len + 3 >= max) {
1.1.1.3 ! misho    1267:                     temp = xmlSaveUriRealloc(ret, &max);
        !          1268:                     if (temp == NULL) goto mem_error;
        !          1269:                     ret = temp;
1.1       misho    1270:                }
1.1.1.3 ! misho    1271:                if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1.1       misho    1272:                    ret[len++] = *p++;
                   1273:                else {
                   1274:                    int val = *(unsigned char *)p++;
                   1275:                    int hi = val / 0x10, lo = val % 0x10;
                   1276:                    ret[len++] = '%';
                   1277:                    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
                   1278:                    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
                   1279:                }
                   1280:            }
                   1281:        }
                   1282:     }
                   1283:     if (uri->fragment != NULL) {
                   1284:        if (len + 3 >= max) {
1.1.1.3 ! misho    1285:             temp = xmlSaveUriRealloc(ret, &max);
        !          1286:             if (temp == NULL) goto mem_error;
        !          1287:             ret = temp;
1.1       misho    1288:        }
                   1289:        ret[len++] = '#';
                   1290:        p = uri->fragment;
                   1291:        while (*p != 0) {
                   1292:            if (len + 3 >= max) {
1.1.1.3 ! misho    1293:                 temp = xmlSaveUriRealloc(ret, &max);
        !          1294:                 if (temp == NULL) goto mem_error;
        !          1295:                 ret = temp;
1.1       misho    1296:            }
1.1.1.3 ! misho    1297:            if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1.1       misho    1298:                ret[len++] = *p++;
                   1299:            else {
                   1300:                int val = *(unsigned char *)p++;
                   1301:                int hi = val / 0x10, lo = val % 0x10;
                   1302:                ret[len++] = '%';
                   1303:                ret[len++] = hi + (hi > 9? 'A'-10 : '0');
                   1304:                ret[len++] = lo + (lo > 9? 'A'-10 : '0');
                   1305:            }
                   1306:        }
                   1307:     }
                   1308:     if (len >= max) {
1.1.1.3 ! misho    1309:         temp = xmlSaveUriRealloc(ret, &max);
        !          1310:         if (temp == NULL) goto mem_error;
        !          1311:         ret = temp;
1.1       misho    1312:     }
                   1313:     ret[len] = 0;
                   1314:     return(ret);
1.1.1.3 ! misho    1315: 
        !          1316: mem_error:
        !          1317:     xmlFree(ret);
        !          1318:     return(NULL);
1.1       misho    1319: }
                   1320: 
                   1321: /**
                   1322:  * xmlPrintURI:
                   1323:  * @stream:  a FILE* for the output
                   1324:  * @uri:  pointer to an xmlURI
                   1325:  *
                   1326:  * Prints the URI in the stream @stream.
                   1327:  */
                   1328: void
                   1329: xmlPrintURI(FILE *stream, xmlURIPtr uri) {
                   1330:     xmlChar *out;
                   1331: 
                   1332:     out = xmlSaveUri(uri);
                   1333:     if (out != NULL) {
                   1334:        fprintf(stream, "%s", (char *) out);
                   1335:        xmlFree(out);
                   1336:     }
                   1337: }
                   1338: 
                   1339: /**
                   1340:  * xmlCleanURI:
                   1341:  * @uri:  pointer to an xmlURI
                   1342:  *
                   1343:  * Make sure the xmlURI struct is free of content
                   1344:  */
                   1345: static void
                   1346: xmlCleanURI(xmlURIPtr uri) {
                   1347:     if (uri == NULL) return;
                   1348: 
                   1349:     if (uri->scheme != NULL) xmlFree(uri->scheme);
                   1350:     uri->scheme = NULL;
                   1351:     if (uri->server != NULL) xmlFree(uri->server);
                   1352:     uri->server = NULL;
                   1353:     if (uri->user != NULL) xmlFree(uri->user);
                   1354:     uri->user = NULL;
                   1355:     if (uri->path != NULL) xmlFree(uri->path);
                   1356:     uri->path = NULL;
                   1357:     if (uri->fragment != NULL) xmlFree(uri->fragment);
                   1358:     uri->fragment = NULL;
                   1359:     if (uri->opaque != NULL) xmlFree(uri->opaque);
                   1360:     uri->opaque = NULL;
                   1361:     if (uri->authority != NULL) xmlFree(uri->authority);
                   1362:     uri->authority = NULL;
                   1363:     if (uri->query != NULL) xmlFree(uri->query);
                   1364:     uri->query = NULL;
                   1365:     if (uri->query_raw != NULL) xmlFree(uri->query_raw);
                   1366:     uri->query_raw = NULL;
                   1367: }
                   1368: 
                   1369: /**
                   1370:  * xmlFreeURI:
                   1371:  * @uri:  pointer to an xmlURI
                   1372:  *
                   1373:  * Free up the xmlURI struct
                   1374:  */
                   1375: void
                   1376: xmlFreeURI(xmlURIPtr uri) {
                   1377:     if (uri == NULL) return;
                   1378: 
                   1379:     if (uri->scheme != NULL) xmlFree(uri->scheme);
                   1380:     if (uri->server != NULL) xmlFree(uri->server);
                   1381:     if (uri->user != NULL) xmlFree(uri->user);
                   1382:     if (uri->path != NULL) xmlFree(uri->path);
                   1383:     if (uri->fragment != NULL) xmlFree(uri->fragment);
                   1384:     if (uri->opaque != NULL) xmlFree(uri->opaque);
                   1385:     if (uri->authority != NULL) xmlFree(uri->authority);
                   1386:     if (uri->query != NULL) xmlFree(uri->query);
                   1387:     if (uri->query_raw != NULL) xmlFree(uri->query_raw);
                   1388:     xmlFree(uri);
                   1389: }
                   1390: 
                   1391: /************************************************************************
                   1392:  *                                                                     *
                   1393:  *                     Helper functions                                *
                   1394:  *                                                                     *
                   1395:  ************************************************************************/
                   1396: 
                   1397: /**
                   1398:  * xmlNormalizeURIPath:
                   1399:  * @path:  pointer to the path string
                   1400:  *
                   1401:  * Applies the 5 normalization steps to a path string--that is, RFC 2396
                   1402:  * Section 5.2, steps 6.c through 6.g.
                   1403:  *
                   1404:  * Normalization occurs directly on the string, no new allocation is done
                   1405:  *
                   1406:  * Returns 0 or an error code
                   1407:  */
                   1408: int
                   1409: xmlNormalizeURIPath(char *path) {
                   1410:     char *cur, *out;
                   1411: 
                   1412:     if (path == NULL)
                   1413:        return(-1);
                   1414: 
                   1415:     /* Skip all initial "/" chars.  We want to get to the beginning of the
                   1416:      * first non-empty segment.
                   1417:      */
                   1418:     cur = path;
                   1419:     while (cur[0] == '/')
                   1420:       ++cur;
                   1421:     if (cur[0] == '\0')
                   1422:       return(0);
                   1423: 
                   1424:     /* Keep everything we've seen so far.  */
                   1425:     out = cur;
                   1426: 
                   1427:     /*
                   1428:      * Analyze each segment in sequence for cases (c) and (d).
                   1429:      */
                   1430:     while (cur[0] != '\0') {
                   1431:        /*
                   1432:         * c) All occurrences of "./", where "." is a complete path segment,
                   1433:         *    are removed from the buffer string.
                   1434:         */
                   1435:        if ((cur[0] == '.') && (cur[1] == '/')) {
                   1436:            cur += 2;
                   1437:            /* '//' normalization should be done at this point too */
                   1438:            while (cur[0] == '/')
                   1439:                cur++;
                   1440:            continue;
                   1441:        }
                   1442: 
                   1443:        /*
                   1444:         * d) If the buffer string ends with "." as a complete path segment,
                   1445:         *    that "." is removed.
                   1446:         */
                   1447:        if ((cur[0] == '.') && (cur[1] == '\0'))
                   1448:            break;
                   1449: 
                   1450:        /* Otherwise keep the segment.  */
                   1451:        while (cur[0] != '/') {
                   1452:             if (cur[0] == '\0')
                   1453:               goto done_cd;
                   1454:            (out++)[0] = (cur++)[0];
                   1455:        }
                   1456:        /* nomalize // */
                   1457:        while ((cur[0] == '/') && (cur[1] == '/'))
                   1458:            cur++;
                   1459: 
                   1460:         (out++)[0] = (cur++)[0];
                   1461:     }
                   1462:  done_cd:
                   1463:     out[0] = '\0';
                   1464: 
                   1465:     /* Reset to the beginning of the first segment for the next sequence.  */
                   1466:     cur = path;
                   1467:     while (cur[0] == '/')
                   1468:       ++cur;
                   1469:     if (cur[0] == '\0')
                   1470:        return(0);
                   1471: 
                   1472:     /*
                   1473:      * Analyze each segment in sequence for cases (e) and (f).
                   1474:      *
                   1475:      * e) All occurrences of "<segment>/../", where <segment> is a
                   1476:      *    complete path segment not equal to "..", are removed from the
                   1477:      *    buffer string.  Removal of these path segments is performed
                   1478:      *    iteratively, removing the leftmost matching pattern on each
                   1479:      *    iteration, until no matching pattern remains.
                   1480:      *
                   1481:      * f) If the buffer string ends with "<segment>/..", where <segment>
                   1482:      *    is a complete path segment not equal to "..", that
                   1483:      *    "<segment>/.." is removed.
                   1484:      *
                   1485:      * To satisfy the "iterative" clause in (e), we need to collapse the
                   1486:      * string every time we find something that needs to be removed.  Thus,
                   1487:      * we don't need to keep two pointers into the string: we only need a
                   1488:      * "current position" pointer.
                   1489:      */
                   1490:     while (1) {
                   1491:         char *segp, *tmp;
                   1492: 
                   1493:         /* At the beginning of each iteration of this loop, "cur" points to
                   1494:          * the first character of the segment we want to examine.
                   1495:          */
                   1496: 
                   1497:         /* Find the end of the current segment.  */
                   1498:         segp = cur;
                   1499:         while ((segp[0] != '/') && (segp[0] != '\0'))
                   1500:           ++segp;
                   1501: 
                   1502:         /* If this is the last segment, we're done (we need at least two
                   1503:          * segments to meet the criteria for the (e) and (f) cases).
                   1504:          */
                   1505:         if (segp[0] == '\0')
                   1506:           break;
                   1507: 
                   1508:         /* If the first segment is "..", or if the next segment _isn't_ "..",
                   1509:          * keep this segment and try the next one.
                   1510:          */
                   1511:         ++segp;
                   1512:         if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
                   1513:             || ((segp[0] != '.') || (segp[1] != '.')
                   1514:                 || ((segp[2] != '/') && (segp[2] != '\0')))) {
                   1515:           cur = segp;
                   1516:           continue;
                   1517:         }
                   1518: 
                   1519:         /* If we get here, remove this segment and the next one and back up
                   1520:          * to the previous segment (if there is one), to implement the
                   1521:          * "iteratively" clause.  It's pretty much impossible to back up
                   1522:          * while maintaining two pointers into the buffer, so just compact
                   1523:          * the whole buffer now.
                   1524:          */
                   1525: 
                   1526:         /* If this is the end of the buffer, we're done.  */
                   1527:         if (segp[2] == '\0') {
                   1528:           cur[0] = '\0';
                   1529:           break;
                   1530:         }
                   1531:         /* Valgrind complained, strcpy(cur, segp + 3); */
1.1.1.2   misho    1532:         /* string will overlap, do not use strcpy */
                   1533:         tmp = cur;
                   1534:         segp += 3;
                   1535:         while ((*tmp++ = *segp++) != 0)
                   1536:           ;
1.1       misho    1537: 
                   1538:         /* If there are no previous segments, then keep going from here.  */
                   1539:         segp = cur;
                   1540:         while ((segp > path) && ((--segp)[0] == '/'))
                   1541:           ;
                   1542:         if (segp == path)
                   1543:           continue;
                   1544: 
                   1545:         /* "segp" is pointing to the end of a previous segment; find it's
                   1546:          * start.  We need to back up to the previous segment and start
                   1547:          * over with that to handle things like "foo/bar/../..".  If we
                   1548:          * don't do this, then on the first pass we'll remove the "bar/..",
                   1549:          * but be pointing at the second ".." so we won't realize we can also
                   1550:          * remove the "foo/..".
                   1551:          */
                   1552:         cur = segp;
                   1553:         while ((cur > path) && (cur[-1] != '/'))
                   1554:           --cur;
                   1555:     }
                   1556:     out[0] = '\0';
                   1557: 
                   1558:     /*
                   1559:      * g) If the resulting buffer string still begins with one or more
                   1560:      *    complete path segments of "..", then the reference is
                   1561:      *    considered to be in error. Implementations may handle this
                   1562:      *    error by retaining these components in the resolved path (i.e.,
                   1563:      *    treating them as part of the final URI), by removing them from
                   1564:      *    the resolved path (i.e., discarding relative levels above the
                   1565:      *    root), or by avoiding traversal of the reference.
                   1566:      *
                   1567:      * We discard them from the final path.
                   1568:      */
                   1569:     if (path[0] == '/') {
                   1570:       cur = path;
                   1571:       while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.')
                   1572:              && ((cur[3] == '/') || (cur[3] == '\0')))
                   1573:        cur += 3;
                   1574: 
                   1575:       if (cur != path) {
                   1576:        out = path;
                   1577:        while (cur[0] != '\0')
                   1578:           (out++)[0] = (cur++)[0];
                   1579:        out[0] = 0;
                   1580:       }
                   1581:     }
                   1582: 
                   1583:     return(0);
                   1584: }
                   1585: 
                   1586: static int is_hex(char c) {
                   1587:     if (((c >= '0') && (c <= '9')) ||
                   1588:         ((c >= 'a') && (c <= 'f')) ||
                   1589:         ((c >= 'A') && (c <= 'F')))
                   1590:        return(1);
                   1591:     return(0);
                   1592: }
                   1593: 
                   1594: /**
                   1595:  * xmlURIUnescapeString:
                   1596:  * @str:  the string to unescape
                   1597:  * @len:   the length in bytes to unescape (or <= 0 to indicate full string)
                   1598:  * @target:  optional destination buffer
                   1599:  *
                   1600:  * Unescaping routine, but does not check that the string is an URI. The
                   1601:  * output is a direct unsigned char translation of %XX values (no encoding)
                   1602:  * Note that the length of the result can only be smaller or same size as
                   1603:  * the input string.
                   1604:  *
                   1605:  * Returns a copy of the string, but unescaped, will return NULL only in case
                   1606:  * of error
                   1607:  */
                   1608: char *
                   1609: xmlURIUnescapeString(const char *str, int len, char *target) {
                   1610:     char *ret, *out;
                   1611:     const char *in;
                   1612: 
                   1613:     if (str == NULL)
                   1614:        return(NULL);
                   1615:     if (len <= 0) len = strlen(str);
                   1616:     if (len < 0) return(NULL);
                   1617: 
                   1618:     if (target == NULL) {
                   1619:        ret = (char *) xmlMallocAtomic(len + 1);
                   1620:        if (ret == NULL) {
1.1.1.3 ! misho    1621:             xmlURIErrMemory("unescaping URI value\n");
1.1       misho    1622:            return(NULL);
                   1623:        }
                   1624:     } else
                   1625:        ret = target;
                   1626:     in = str;
                   1627:     out = ret;
                   1628:     while(len > 0) {
                   1629:        if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
                   1630:            in++;
1.1.1.3 ! misho    1631:            if ((*in >= '0') && (*in <= '9'))
1.1       misho    1632:                *out = (*in - '0');
                   1633:            else if ((*in >= 'a') && (*in <= 'f'))
                   1634:                *out = (*in - 'a') + 10;
                   1635:            else if ((*in >= 'A') && (*in <= 'F'))
                   1636:                *out = (*in - 'A') + 10;
                   1637:            in++;
1.1.1.3 ! misho    1638:            if ((*in >= '0') && (*in <= '9'))
1.1       misho    1639:                *out = *out * 16 + (*in - '0');
                   1640:            else if ((*in >= 'a') && (*in <= 'f'))
                   1641:                *out = *out * 16 + (*in - 'a') + 10;
                   1642:            else if ((*in >= 'A') && (*in <= 'F'))
                   1643:                *out = *out * 16 + (*in - 'A') + 10;
                   1644:            in++;
                   1645:            len -= 3;
                   1646:            out++;
                   1647:        } else {
                   1648:            *out++ = *in++;
                   1649:            len--;
                   1650:        }
                   1651:     }
                   1652:     *out = 0;
                   1653:     return(ret);
                   1654: }
                   1655: 
                   1656: /**
                   1657:  * xmlURIEscapeStr:
                   1658:  * @str:  string to escape
                   1659:  * @list: exception list string of chars not to escape
                   1660:  *
                   1661:  * This routine escapes a string to hex, ignoring reserved characters (a-z)
                   1662:  * and the characters in the exception list.
                   1663:  *
                   1664:  * Returns a new escaped string or NULL in case of error.
                   1665:  */
                   1666: xmlChar *
                   1667: xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
                   1668:     xmlChar *ret, ch;
                   1669:     xmlChar *temp;
                   1670:     const xmlChar *in;
1.1.1.3 ! misho    1671:     int len, out;
1.1       misho    1672: 
                   1673:     if (str == NULL)
                   1674:        return(NULL);
                   1675:     if (str[0] == 0)
                   1676:        return(xmlStrdup(str));
                   1677:     len = xmlStrlen(str);
                   1678:     if (!(len > 0)) return(NULL);
                   1679: 
                   1680:     len += 20;
                   1681:     ret = (xmlChar *) xmlMallocAtomic(len);
                   1682:     if (ret == NULL) {
1.1.1.3 ! misho    1683:         xmlURIErrMemory("escaping URI value\n");
1.1       misho    1684:        return(NULL);
                   1685:     }
                   1686:     in = (const xmlChar *) str;
                   1687:     out = 0;
                   1688:     while(*in != 0) {
                   1689:        if (len - out <= 3) {
1.1.1.3 ! misho    1690:             temp = xmlSaveUriRealloc(ret, &len);
1.1       misho    1691:            if (temp == NULL) {
1.1.1.3 ! misho    1692:                 xmlURIErrMemory("escaping URI value\n");
1.1       misho    1693:                xmlFree(ret);
                   1694:                return(NULL);
                   1695:            }
                   1696:            ret = temp;
                   1697:        }
                   1698: 
                   1699:        ch = *in;
                   1700: 
                   1701:        if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
                   1702:            unsigned char val;
                   1703:            ret[out++] = '%';
                   1704:            val = ch >> 4;
                   1705:            if (val <= 9)
                   1706:                ret[out++] = '0' + val;
                   1707:            else
                   1708:                ret[out++] = 'A' + val - 0xA;
                   1709:            val = ch & 0xF;
                   1710:            if (val <= 9)
                   1711:                ret[out++] = '0' + val;
                   1712:            else
                   1713:                ret[out++] = 'A' + val - 0xA;
                   1714:            in++;
                   1715:        } else {
                   1716:            ret[out++] = *in++;
                   1717:        }
                   1718: 
                   1719:     }
                   1720:     ret[out] = 0;
                   1721:     return(ret);
                   1722: }
                   1723: 
                   1724: /**
                   1725:  * xmlURIEscape:
                   1726:  * @str:  the string of the URI to escape
                   1727:  *
                   1728:  * Escaping routine, does not do validity checks !
                   1729:  * It will try to escape the chars needing this, but this is heuristic
                   1730:  * based it's impossible to be sure.
                   1731:  *
                   1732:  * Returns an copy of the string, but escaped
                   1733:  *
                   1734:  * 25 May 2001
                   1735:  * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
                   1736:  * according to RFC2396.
                   1737:  *   - Carl Douglas
                   1738:  */
                   1739: xmlChar *
                   1740: xmlURIEscape(const xmlChar * str)
                   1741: {
                   1742:     xmlChar *ret, *segment = NULL;
                   1743:     xmlURIPtr uri;
                   1744:     int ret2;
                   1745: 
                   1746: #define NULLCHK(p) if(!p) { \
1.1.1.3 ! misho    1747:          xmlURIErrMemory("escaping URI value\n"); \
        !          1748:          xmlFreeURI(uri); \
        !          1749:          return NULL; } \
1.1       misho    1750: 
                   1751:     if (str == NULL)
                   1752:         return (NULL);
                   1753: 
                   1754:     uri = xmlCreateURI();
                   1755:     if (uri != NULL) {
                   1756:        /*
                   1757:         * Allow escaping errors in the unescaped form
                   1758:         */
                   1759:         uri->cleanup = 1;
                   1760:         ret2 = xmlParseURIReference(uri, (const char *)str);
                   1761:         if (ret2) {
                   1762:             xmlFreeURI(uri);
                   1763:             return (NULL);
                   1764:         }
                   1765:     }
                   1766: 
                   1767:     if (!uri)
                   1768:         return NULL;
                   1769: 
                   1770:     ret = NULL;
                   1771: 
                   1772:     if (uri->scheme) {
                   1773:         segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");
                   1774:         NULLCHK(segment)
                   1775:         ret = xmlStrcat(ret, segment);
                   1776:         ret = xmlStrcat(ret, BAD_CAST ":");
                   1777:         xmlFree(segment);
                   1778:     }
                   1779: 
                   1780:     if (uri->authority) {
                   1781:         segment =
                   1782:             xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");
                   1783:         NULLCHK(segment)
                   1784:         ret = xmlStrcat(ret, BAD_CAST "//");
                   1785:         ret = xmlStrcat(ret, segment);
                   1786:         xmlFree(segment);
                   1787:     }
                   1788: 
                   1789:     if (uri->user) {
                   1790:         segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");
                   1791:         NULLCHK(segment)
1.1.1.3 ! misho    1792:                ret = xmlStrcat(ret,BAD_CAST "//");
1.1       misho    1793:         ret = xmlStrcat(ret, segment);
                   1794:         ret = xmlStrcat(ret, BAD_CAST "@");
                   1795:         xmlFree(segment);
                   1796:     }
                   1797: 
                   1798:     if (uri->server) {
                   1799:         segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");
                   1800:         NULLCHK(segment)
                   1801:                if (uri->user == NULL)
                   1802:                ret = xmlStrcat(ret, BAD_CAST "//");
                   1803:         ret = xmlStrcat(ret, segment);
                   1804:         xmlFree(segment);
                   1805:     }
                   1806: 
                   1807:     if (uri->port) {
                   1808:         xmlChar port[10];
                   1809: 
                   1810:         snprintf((char *) port, 10, "%d", uri->port);
                   1811:         ret = xmlStrcat(ret, BAD_CAST ":");
                   1812:         ret = xmlStrcat(ret, port);
                   1813:     }
                   1814: 
                   1815:     if (uri->path) {
                   1816:         segment =
                   1817:             xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
                   1818:         NULLCHK(segment)
                   1819:         ret = xmlStrcat(ret, segment);
                   1820:         xmlFree(segment);
                   1821:     }
                   1822: 
                   1823:     if (uri->query_raw) {
                   1824:         ret = xmlStrcat(ret, BAD_CAST "?");
                   1825:         ret = xmlStrcat(ret, BAD_CAST uri->query_raw);
                   1826:     }
                   1827:     else if (uri->query) {
                   1828:         segment =
                   1829:             xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
                   1830:         NULLCHK(segment)
                   1831:         ret = xmlStrcat(ret, BAD_CAST "?");
                   1832:         ret = xmlStrcat(ret, segment);
                   1833:         xmlFree(segment);
                   1834:     }
                   1835: 
                   1836:     if (uri->opaque) {
                   1837:         segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");
                   1838:         NULLCHK(segment)
                   1839:         ret = xmlStrcat(ret, segment);
                   1840:         xmlFree(segment);
                   1841:     }
                   1842: 
                   1843:     if (uri->fragment) {
                   1844:         segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");
                   1845:         NULLCHK(segment)
                   1846:         ret = xmlStrcat(ret, BAD_CAST "#");
                   1847:         ret = xmlStrcat(ret, segment);
                   1848:         xmlFree(segment);
                   1849:     }
                   1850: 
                   1851:     xmlFreeURI(uri);
                   1852: #undef NULLCHK
                   1853: 
                   1854:     return (ret);
                   1855: }
                   1856: 
                   1857: /************************************************************************
                   1858:  *                                                                     *
                   1859:  *                     Public functions                                *
                   1860:  *                                                                     *
                   1861:  ************************************************************************/
                   1862: 
                   1863: /**
                   1864:  * xmlBuildURI:
                   1865:  * @URI:  the URI instance found in the document
                   1866:  * @base:  the base value
                   1867:  *
                   1868:  * Computes he final URI of the reference done by checking that
                   1869:  * the given URI is valid, and building the final URI using the
1.1.1.3 ! misho    1870:  * base URI. This is processed according to section 5.2 of the
1.1       misho    1871:  * RFC 2396
                   1872:  *
                   1873:  * 5.2. Resolving Relative References to Absolute Form
                   1874:  *
                   1875:  * Returns a new URI string (to be freed by the caller) or NULL in case
                   1876:  *         of error.
                   1877:  */
                   1878: xmlChar *
                   1879: xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
                   1880:     xmlChar *val = NULL;
                   1881:     int ret, len, indx, cur, out;
                   1882:     xmlURIPtr ref = NULL;
                   1883:     xmlURIPtr bas = NULL;
                   1884:     xmlURIPtr res = NULL;
                   1885: 
                   1886:     /*
                   1887:      * 1) The URI reference is parsed into the potential four components and
                   1888:      *    fragment identifier, as described in Section 4.3.
                   1889:      *
                   1890:      *    NOTE that a completely empty URI is treated by modern browsers
                   1891:      *    as a reference to "." rather than as a synonym for the current
                   1892:      *    URI.  Should we do that here?
                   1893:      */
1.1.1.3 ! misho    1894:     if (URI == NULL)
1.1       misho    1895:        ret = -1;
                   1896:     else {
                   1897:        if (*URI) {
                   1898:            ref = xmlCreateURI();
                   1899:            if (ref == NULL)
                   1900:                goto done;
                   1901:            ret = xmlParseURIReference(ref, (const char *) URI);
                   1902:        }
                   1903:        else
                   1904:            ret = 0;
                   1905:     }
                   1906:     if (ret != 0)
                   1907:        goto done;
                   1908:     if ((ref != NULL) && (ref->scheme != NULL)) {
                   1909:        /*
                   1910:         * The URI is absolute don't modify.
                   1911:         */
                   1912:        val = xmlStrdup(URI);
                   1913:        goto done;
                   1914:     }
                   1915:     if (base == NULL)
                   1916:        ret = -1;
                   1917:     else {
                   1918:        bas = xmlCreateURI();
                   1919:        if (bas == NULL)
                   1920:            goto done;
                   1921:        ret = xmlParseURIReference(bas, (const char *) base);
                   1922:     }
                   1923:     if (ret != 0) {
                   1924:        if (ref)
                   1925:            val = xmlSaveUri(ref);
                   1926:        goto done;
                   1927:     }
                   1928:     if (ref == NULL) {
                   1929:        /*
                   1930:         * the base fragment must be ignored
                   1931:         */
                   1932:        if (bas->fragment != NULL) {
                   1933:            xmlFree(bas->fragment);
                   1934:            bas->fragment = NULL;
                   1935:        }
                   1936:        val = xmlSaveUri(bas);
                   1937:        goto done;
                   1938:     }
                   1939: 
                   1940:     /*
                   1941:      * 2) If the path component is empty and the scheme, authority, and
                   1942:      *    query components are undefined, then it is a reference to the
                   1943:      *    current document and we are done.  Otherwise, the reference URI's
                   1944:      *    query and fragment components are defined as found (or not found)
                   1945:      *    within the URI reference and not inherited from the base URI.
                   1946:      *
                   1947:      *    NOTE that in modern browsers, the parsing differs from the above
                   1948:      *    in the following aspect:  the query component is allowed to be
                   1949:      *    defined while still treating this as a reference to the current
                   1950:      *    document.
                   1951:      */
                   1952:     res = xmlCreateURI();
                   1953:     if (res == NULL)
                   1954:        goto done;
                   1955:     if ((ref->scheme == NULL) && (ref->path == NULL) &&
                   1956:        ((ref->authority == NULL) && (ref->server == NULL))) {
                   1957:        if (bas->scheme != NULL)
                   1958:            res->scheme = xmlMemStrdup(bas->scheme);
                   1959:        if (bas->authority != NULL)
                   1960:            res->authority = xmlMemStrdup(bas->authority);
                   1961:        else if (bas->server != NULL) {
                   1962:            res->server = xmlMemStrdup(bas->server);
                   1963:            if (bas->user != NULL)
                   1964:                res->user = xmlMemStrdup(bas->user);
1.1.1.3 ! misho    1965:            res->port = bas->port;
1.1       misho    1966:        }
                   1967:        if (bas->path != NULL)
                   1968:            res->path = xmlMemStrdup(bas->path);
                   1969:        if (ref->query_raw != NULL)
                   1970:            res->query_raw = xmlMemStrdup (ref->query_raw);
                   1971:        else if (ref->query != NULL)
                   1972:            res->query = xmlMemStrdup(ref->query);
                   1973:        else if (bas->query_raw != NULL)
                   1974:            res->query_raw = xmlMemStrdup(bas->query_raw);
                   1975:        else if (bas->query != NULL)
                   1976:            res->query = xmlMemStrdup(bas->query);
                   1977:        if (ref->fragment != NULL)
                   1978:            res->fragment = xmlMemStrdup(ref->fragment);
                   1979:        goto step_7;
                   1980:     }
                   1981: 
                   1982:     /*
                   1983:      * 3) If the scheme component is defined, indicating that the reference
                   1984:      *    starts with a scheme name, then the reference is interpreted as an
                   1985:      *    absolute URI and we are done.  Otherwise, the reference URI's
                   1986:      *    scheme is inherited from the base URI's scheme component.
                   1987:      */
                   1988:     if (ref->scheme != NULL) {
                   1989:        val = xmlSaveUri(ref);
                   1990:        goto done;
                   1991:     }
                   1992:     if (bas->scheme != NULL)
                   1993:        res->scheme = xmlMemStrdup(bas->scheme);
1.1.1.3 ! misho    1994: 
1.1       misho    1995:     if (ref->query_raw != NULL)
                   1996:        res->query_raw = xmlMemStrdup(ref->query_raw);
                   1997:     else if (ref->query != NULL)
                   1998:        res->query = xmlMemStrdup(ref->query);
                   1999:     if (ref->fragment != NULL)
                   2000:        res->fragment = xmlMemStrdup(ref->fragment);
                   2001: 
                   2002:     /*
                   2003:      * 4) If the authority component is defined, then the reference is a
                   2004:      *    network-path and we skip to step 7.  Otherwise, the reference
                   2005:      *    URI's authority is inherited from the base URI's authority
                   2006:      *    component, which will also be undefined if the URI scheme does not
                   2007:      *    use an authority component.
                   2008:      */
                   2009:     if ((ref->authority != NULL) || (ref->server != NULL)) {
                   2010:        if (ref->authority != NULL)
                   2011:            res->authority = xmlMemStrdup(ref->authority);
                   2012:        else {
                   2013:            res->server = xmlMemStrdup(ref->server);
                   2014:            if (ref->user != NULL)
                   2015:                res->user = xmlMemStrdup(ref->user);
1.1.1.3 ! misho    2016:             res->port = ref->port;
1.1       misho    2017:        }
                   2018:        if (ref->path != NULL)
                   2019:            res->path = xmlMemStrdup(ref->path);
                   2020:        goto step_7;
                   2021:     }
                   2022:     if (bas->authority != NULL)
                   2023:        res->authority = xmlMemStrdup(bas->authority);
                   2024:     else if (bas->server != NULL) {
                   2025:        res->server = xmlMemStrdup(bas->server);
                   2026:        if (bas->user != NULL)
                   2027:            res->user = xmlMemStrdup(bas->user);
1.1.1.3 ! misho    2028:        res->port = bas->port;
1.1       misho    2029:     }
                   2030: 
                   2031:     /*
                   2032:      * 5) If the path component begins with a slash character ("/"), then
                   2033:      *    the reference is an absolute-path and we skip to step 7.
                   2034:      */
                   2035:     if ((ref->path != NULL) && (ref->path[0] == '/')) {
                   2036:        res->path = xmlMemStrdup(ref->path);
                   2037:        goto step_7;
                   2038:     }
                   2039: 
                   2040: 
                   2041:     /*
                   2042:      * 6) If this step is reached, then we are resolving a relative-path
                   2043:      *    reference.  The relative path needs to be merged with the base
                   2044:      *    URI's path.  Although there are many ways to do this, we will
                   2045:      *    describe a simple method using a separate string buffer.
                   2046:      *
                   2047:      * Allocate a buffer large enough for the result string.
                   2048:      */
                   2049:     len = 2; /* extra / and 0 */
                   2050:     if (ref->path != NULL)
                   2051:        len += strlen(ref->path);
                   2052:     if (bas->path != NULL)
                   2053:        len += strlen(bas->path);
                   2054:     res->path = (char *) xmlMallocAtomic(len);
                   2055:     if (res->path == NULL) {
1.1.1.3 ! misho    2056:         xmlURIErrMemory("resolving URI against base\n");
1.1       misho    2057:        goto done;
                   2058:     }
                   2059:     res->path[0] = 0;
                   2060: 
                   2061:     /*
                   2062:      * a) All but the last segment of the base URI's path component is
                   2063:      *    copied to the buffer.  In other words, any characters after the
                   2064:      *    last (right-most) slash character, if any, are excluded.
                   2065:      */
                   2066:     cur = 0;
                   2067:     out = 0;
                   2068:     if (bas->path != NULL) {
                   2069:        while (bas->path[cur] != 0) {
                   2070:            while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
                   2071:                cur++;
                   2072:            if (bas->path[cur] == 0)
                   2073:                break;
                   2074: 
                   2075:            cur++;
                   2076:            while (out < cur) {
                   2077:                res->path[out] = bas->path[out];
                   2078:                out++;
                   2079:            }
                   2080:        }
                   2081:     }
                   2082:     res->path[out] = 0;
                   2083: 
                   2084:     /*
                   2085:      * b) The reference's path component is appended to the buffer
                   2086:      *    string.
                   2087:      */
                   2088:     if (ref->path != NULL && ref->path[0] != 0) {
                   2089:        indx = 0;
                   2090:        /*
                   2091:         * Ensure the path includes a '/'
                   2092:         */
                   2093:        if ((out == 0) && (bas->server != NULL))
                   2094:            res->path[out++] = '/';
                   2095:        while (ref->path[indx] != 0) {
                   2096:            res->path[out++] = ref->path[indx++];
                   2097:        }
                   2098:     }
                   2099:     res->path[out] = 0;
                   2100: 
                   2101:     /*
                   2102:      * Steps c) to h) are really path normalization steps
                   2103:      */
                   2104:     xmlNormalizeURIPath(res->path);
                   2105: 
                   2106: step_7:
                   2107: 
                   2108:     /*
                   2109:      * 7) The resulting URI components, including any inherited from the
                   2110:      *    base URI, are recombined to give the absolute form of the URI
                   2111:      *    reference.
                   2112:      */
                   2113:     val = xmlSaveUri(res);
                   2114: 
                   2115: done:
                   2116:     if (ref != NULL)
                   2117:        xmlFreeURI(ref);
                   2118:     if (bas != NULL)
                   2119:        xmlFreeURI(bas);
                   2120:     if (res != NULL)
                   2121:        xmlFreeURI(res);
                   2122:     return(val);
                   2123: }
                   2124: 
                   2125: /**
                   2126:  * xmlBuildRelativeURI:
                   2127:  * @URI:  the URI reference under consideration
                   2128:  * @base:  the base value
                   2129:  *
                   2130:  * Expresses the URI of the reference in terms relative to the
                   2131:  * base.  Some examples of this operation include:
                   2132:  *     base = "http://site1.com/docs/book1.html"
                   2133:  *        URI input                        URI returned
                   2134:  *     docs/pic1.gif                    pic1.gif
                   2135:  *     docs/img/pic1.gif                img/pic1.gif
                   2136:  *     img/pic1.gif                     ../img/pic1.gif
                   2137:  *     http://site1.com/docs/pic1.gif   pic1.gif
                   2138:  *     http://site2.com/docs/pic1.gif   http://site2.com/docs/pic1.gif
                   2139:  *
                   2140:  *     base = "docs/book1.html"
                   2141:  *        URI input                        URI returned
                   2142:  *     docs/pic1.gif                    pic1.gif
                   2143:  *     docs/img/pic1.gif                img/pic1.gif
                   2144:  *     img/pic1.gif                     ../img/pic1.gif
                   2145:  *     http://site1.com/docs/pic1.gif   http://site1.com/docs/pic1.gif
                   2146:  *
                   2147:  *
                   2148:  * Note: if the URI reference is really wierd or complicated, it may be
                   2149:  *       worthwhile to first convert it into a "nice" one by calling
                   2150:  *       xmlBuildURI (using 'base') before calling this routine,
                   2151:  *       since this routine (for reasonable efficiency) assumes URI has
                   2152:  *       already been through some validation.
                   2153:  *
                   2154:  * Returns a new URI string (to be freed by the caller) or NULL in case
                   2155:  * error.
                   2156:  */
                   2157: xmlChar *
                   2158: xmlBuildRelativeURI (const xmlChar * URI, const xmlChar * base)
                   2159: {
                   2160:     xmlChar *val = NULL;
                   2161:     int ret;
                   2162:     int ix;
                   2163:     int pos = 0;
                   2164:     int nbslash = 0;
                   2165:     int len;
                   2166:     xmlURIPtr ref = NULL;
                   2167:     xmlURIPtr bas = NULL;
                   2168:     xmlChar *bptr, *uptr, *vptr;
                   2169:     int remove_path = 0;
                   2170: 
                   2171:     if ((URI == NULL) || (*URI == 0))
                   2172:        return NULL;
                   2173: 
                   2174:     /*
                   2175:      * First parse URI into a standard form
                   2176:      */
                   2177:     ref = xmlCreateURI ();
                   2178:     if (ref == NULL)
                   2179:        return NULL;
                   2180:     /* If URI not already in "relative" form */
                   2181:     if (URI[0] != '.') {
                   2182:        ret = xmlParseURIReference (ref, (const char *) URI);
                   2183:        if (ret != 0)
                   2184:            goto done;          /* Error in URI, return NULL */
                   2185:     } else
                   2186:        ref->path = (char *)xmlStrdup(URI);
                   2187: 
                   2188:     /*
                   2189:      * Next parse base into the same standard form
                   2190:      */
                   2191:     if ((base == NULL) || (*base == 0)) {
                   2192:        val = xmlStrdup (URI);
                   2193:        goto done;
                   2194:     }
                   2195:     bas = xmlCreateURI ();
                   2196:     if (bas == NULL)
                   2197:        goto done;
                   2198:     if (base[0] != '.') {
                   2199:        ret = xmlParseURIReference (bas, (const char *) base);
                   2200:        if (ret != 0)
                   2201:            goto done;          /* Error in base, return NULL */
                   2202:     } else
                   2203:        bas->path = (char *)xmlStrdup(base);
                   2204: 
                   2205:     /*
                   2206:      * If the scheme / server on the URI differs from the base,
                   2207:      * just return the URI
                   2208:      */
                   2209:     if ((ref->scheme != NULL) &&
                   2210:        ((bas->scheme == NULL) ||
                   2211:         (xmlStrcmp ((xmlChar *)bas->scheme, (xmlChar *)ref->scheme)) ||
                   2212:         (xmlStrcmp ((xmlChar *)bas->server, (xmlChar *)ref->server)))) {
                   2213:        val = xmlStrdup (URI);
                   2214:        goto done;
                   2215:     }
                   2216:     if (xmlStrEqual((xmlChar *)bas->path, (xmlChar *)ref->path)) {
                   2217:        val = xmlStrdup(BAD_CAST "");
                   2218:        goto done;
                   2219:     }
                   2220:     if (bas->path == NULL) {
                   2221:        val = xmlStrdup((xmlChar *)ref->path);
                   2222:        goto done;
                   2223:     }
                   2224:     if (ref->path == NULL) {
                   2225:         ref->path = (char *) "/";
                   2226:        remove_path = 1;
                   2227:     }
                   2228: 
                   2229:     /*
                   2230:      * At this point (at last!) we can compare the two paths
                   2231:      *
                   2232:      * First we take care of the special case where either of the
                   2233:      * two path components may be missing (bug 316224)
                   2234:      */
                   2235:     if (bas->path == NULL) {
                   2236:        if (ref->path != NULL) {
                   2237:            uptr = (xmlChar *) ref->path;
                   2238:            if (*uptr == '/')
                   2239:                uptr++;
                   2240:            /* exception characters from xmlSaveUri */
                   2241:            val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
                   2242:        }
                   2243:        goto done;
                   2244:     }
                   2245:     bptr = (xmlChar *)bas->path;
                   2246:     if (ref->path == NULL) {
                   2247:        for (ix = 0; bptr[ix] != 0; ix++) {
                   2248:            if (bptr[ix] == '/')
                   2249:                nbslash++;
                   2250:        }
                   2251:        uptr = NULL;
                   2252:        len = 1;        /* this is for a string terminator only */
                   2253:     } else {
                   2254:     /*
                   2255:      * Next we compare the two strings and find where they first differ
                   2256:      */
                   2257:        if ((ref->path[pos] == '.') && (ref->path[pos+1] == '/'))
                   2258:             pos += 2;
                   2259:        if ((*bptr == '.') && (bptr[1] == '/'))
                   2260:             bptr += 2;
                   2261:        else if ((*bptr == '/') && (ref->path[pos] != '/'))
                   2262:            bptr++;
                   2263:        while ((bptr[pos] == ref->path[pos]) && (bptr[pos] != 0))
                   2264:            pos++;
                   2265: 
                   2266:        if (bptr[pos] == ref->path[pos]) {
                   2267:            val = xmlStrdup(BAD_CAST "");
                   2268:            goto done;          /* (I can't imagine why anyone would do this) */
                   2269:        }
                   2270: 
                   2271:        /*
                   2272:         * In URI, "back up" to the last '/' encountered.  This will be the
                   2273:         * beginning of the "unique" suffix of URI
                   2274:         */
                   2275:        ix = pos;
                   2276:        if ((ref->path[ix] == '/') && (ix > 0))
                   2277:            ix--;
                   2278:        else if ((ref->path[ix] == 0) && (ix > 1) && (ref->path[ix - 1] == '/'))
                   2279:            ix -= 2;
                   2280:        for (; ix > 0; ix--) {
                   2281:            if (ref->path[ix] == '/')
                   2282:                break;
                   2283:        }
                   2284:        if (ix == 0) {
                   2285:            uptr = (xmlChar *)ref->path;
                   2286:        } else {
                   2287:            ix++;
                   2288:            uptr = (xmlChar *)&ref->path[ix];
                   2289:        }
                   2290: 
                   2291:        /*
                   2292:         * In base, count the number of '/' from the differing point
                   2293:         */
                   2294:        if (bptr[pos] != ref->path[pos]) {/* check for trivial URI == base */
                   2295:            for (; bptr[ix] != 0; ix++) {
                   2296:                if (bptr[ix] == '/')
                   2297:                    nbslash++;
                   2298:            }
                   2299:        }
                   2300:        len = xmlStrlen (uptr) + 1;
                   2301:     }
1.1.1.3 ! misho    2302: 
1.1       misho    2303:     if (nbslash == 0) {
                   2304:        if (uptr != NULL)
                   2305:            /* exception characters from xmlSaveUri */
                   2306:            val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
                   2307:        goto done;
                   2308:     }
                   2309: 
                   2310:     /*
                   2311:      * Allocate just enough space for the returned string -
                   2312:      * length of the remainder of the URI, plus enough space
                   2313:      * for the "../" groups, plus one for the terminator
                   2314:      */
                   2315:     val = (xmlChar *) xmlMalloc (len + 3 * nbslash);
                   2316:     if (val == NULL) {
1.1.1.3 ! misho    2317:         xmlURIErrMemory("building relative URI\n");
1.1       misho    2318:        goto done;
                   2319:     }
                   2320:     vptr = val;
                   2321:     /*
                   2322:      * Put in as many "../" as needed
                   2323:      */
                   2324:     for (; nbslash>0; nbslash--) {
                   2325:        *vptr++ = '.';
                   2326:        *vptr++ = '.';
                   2327:        *vptr++ = '/';
                   2328:     }
                   2329:     /*
                   2330:      * Finish up with the end of the URI
                   2331:      */
                   2332:     if (uptr != NULL) {
                   2333:         if ((vptr > val) && (len > 0) &&
                   2334:            (uptr[0] == '/') && (vptr[-1] == '/')) {
                   2335:            memcpy (vptr, uptr + 1, len - 1);
                   2336:            vptr[len - 2] = 0;
                   2337:        } else {
                   2338:            memcpy (vptr, uptr, len);
                   2339:            vptr[len - 1] = 0;
                   2340:        }
                   2341:     } else {
                   2342:        vptr[len - 1] = 0;
                   2343:     }
                   2344: 
                   2345:     /* escape the freshly-built path */
                   2346:     vptr = val;
                   2347:        /* exception characters from xmlSaveUri */
                   2348:     val = xmlURIEscapeStr(vptr, BAD_CAST "/;&=+$,");
                   2349:     xmlFree(vptr);
                   2350: 
                   2351: done:
                   2352:     /*
                   2353:      * Free the working variables
                   2354:      */
                   2355:     if (remove_path != 0)
                   2356:         ref->path = NULL;
                   2357:     if (ref != NULL)
                   2358:        xmlFreeURI (ref);
                   2359:     if (bas != NULL)
                   2360:        xmlFreeURI (bas);
                   2361: 
                   2362:     return val;
                   2363: }
                   2364: 
                   2365: /**
                   2366:  * xmlCanonicPath:
                   2367:  * @path:  the resource locator in a filesystem notation
                   2368:  *
1.1.1.3 ! misho    2369:  * Constructs a canonic path from the specified path.
1.1       misho    2370:  *
1.1.1.3 ! misho    2371:  * Returns a new canonic path, or a duplicate of the path parameter if the
1.1       misho    2372:  * construction fails. The caller is responsible for freeing the memory occupied
1.1.1.3 ! misho    2373:  * by the returned string. If there is insufficient memory available, or the
1.1       misho    2374:  * argument is NULL, the function returns NULL.
                   2375:  */
1.1.1.3 ! misho    2376: #define IS_WINDOWS_PATH(p)                                     \
1.1       misho    2377:        ((p != NULL) &&                                         \
                   2378:         (((p[0] >= 'a') && (p[0] <= 'z')) ||                   \
                   2379:          ((p[0] >= 'A') && (p[0] <= 'Z'))) &&                  \
                   2380:         (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\')))
                   2381: xmlChar *
                   2382: xmlCanonicPath(const xmlChar *path)
                   2383: {
                   2384: /*
                   2385:  * For Windows implementations, additional work needs to be done to
                   2386:  * replace backslashes in pathnames with "forward slashes"
                   2387:  */
1.1.1.3 ! misho    2388: #if defined(_WIN32) && !defined(__CYGWIN__)
1.1       misho    2389:     int len = 0;
                   2390:     int i = 0;
                   2391:     xmlChar *p = NULL;
                   2392: #endif
                   2393:     xmlURIPtr uri;
                   2394:     xmlChar *ret;
                   2395:     const xmlChar *absuri;
                   2396: 
                   2397:     if (path == NULL)
                   2398:        return(NULL);
                   2399: 
1.1.1.3 ! misho    2400: #if defined(_WIN32)
        !          2401:     /*
        !          2402:      * We must not change the backslashes to slashes if the the path
        !          2403:      * starts with \\?\
        !          2404:      * Those paths can be up to 32k characters long.
        !          2405:      * Was added specifically for OpenOffice, those paths can't be converted
        !          2406:      * to URIs anyway.
        !          2407:      */
        !          2408:     if ((path[0] == '\\') && (path[1] == '\\') && (path[2] == '?') &&
        !          2409:         (path[3] == '\\') )
        !          2410:        return xmlStrdup((const xmlChar *) path);
        !          2411: #endif
        !          2412: 
        !          2413:        /* sanitize filename starting with // so it can be used as URI */
1.1       misho    2414:     if ((path[0] == '/') && (path[1] == '/') && (path[2] != '/'))
                   2415:         path++;
                   2416: 
                   2417:     if ((uri = xmlParseURI((const char *) path)) != NULL) {
                   2418:        xmlFreeURI(uri);
                   2419:        return xmlStrdup(path);
                   2420:     }
                   2421: 
                   2422:     /* Check if this is an "absolute uri" */
                   2423:     absuri = xmlStrstr(path, BAD_CAST "://");
                   2424:     if (absuri != NULL) {
                   2425:         int l, j;
                   2426:        unsigned char c;
                   2427:        xmlChar *escURI;
                   2428: 
                   2429:         /*
                   2430:         * this looks like an URI where some parts have not been
                   2431:         * escaped leading to a parsing problem.  Check that the first
                   2432:         * part matches a protocol.
                   2433:         */
                   2434:        l = absuri - path;
                   2435:        /* Bypass if first part (part before the '://') is > 20 chars */
                   2436:        if ((l <= 0) || (l > 20))
                   2437:            goto path_processing;
                   2438:        /* Bypass if any non-alpha characters are present in first part */
                   2439:        for (j = 0;j < l;j++) {
                   2440:            c = path[j];
                   2441:            if (!(((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'))))
                   2442:                goto path_processing;
                   2443:        }
                   2444: 
                   2445:        /* Escape all except the characters specified in the supplied path */
                   2446:         escURI = xmlURIEscapeStr(path, BAD_CAST ":/?_.#&;=");
                   2447:        if (escURI != NULL) {
                   2448:            /* Try parsing the escaped path */
                   2449:            uri = xmlParseURI((const char *) escURI);
                   2450:            /* If successful, return the escaped string */
                   2451:            if (uri != NULL) {
                   2452:                xmlFreeURI(uri);
                   2453:                return escURI;
                   2454:            }
                   2455:        }
                   2456:     }
                   2457: 
                   2458: path_processing:
                   2459: /* For Windows implementations, replace backslashes with 'forward slashes' */
1.1.1.3 ! misho    2460: #if defined(_WIN32) && !defined(__CYGWIN__)
1.1       misho    2461:     /*
                   2462:      * Create a URI structure
                   2463:      */
                   2464:     uri = xmlCreateURI();
                   2465:     if (uri == NULL) {         /* Guard against 'out of memory' */
                   2466:         return(NULL);
                   2467:     }
                   2468: 
                   2469:     len = xmlStrlen(path);
                   2470:     if ((len > 2) && IS_WINDOWS_PATH(path)) {
                   2471:         /* make the scheme 'file' */
                   2472:        uri->scheme = xmlStrdup(BAD_CAST "file");
                   2473:        /* allocate space for leading '/' + path + string terminator */
                   2474:        uri->path = xmlMallocAtomic(len + 2);
                   2475:        if (uri->path == NULL) {
                   2476:            xmlFreeURI(uri);    /* Guard agains 'out of memory' */
                   2477:            return(NULL);
                   2478:        }
                   2479:        /* Put in leading '/' plus path */
                   2480:        uri->path[0] = '/';
                   2481:        p = uri->path + 1;
                   2482:        strncpy(p, path, len + 1);
                   2483:     } else {
                   2484:        uri->path = xmlStrdup(path);
                   2485:        if (uri->path == NULL) {
                   2486:            xmlFreeURI(uri);
                   2487:            return(NULL);
                   2488:        }
                   2489:        p = uri->path;
                   2490:     }
                   2491:     /* Now change all occurences of '\' to '/' */
                   2492:     while (*p != '\0') {
                   2493:        if (*p == '\\')
                   2494:            *p = '/';
                   2495:        p++;
                   2496:     }
                   2497: 
                   2498:     if (uri->scheme == NULL) {
                   2499:        ret = xmlStrdup((const xmlChar *) uri->path);
                   2500:     } else {
                   2501:        ret = xmlSaveUri(uri);
                   2502:     }
                   2503: 
                   2504:     xmlFreeURI(uri);
                   2505: #else
                   2506:     ret = xmlStrdup((const xmlChar *) path);
                   2507: #endif
                   2508:     return(ret);
                   2509: }
                   2510: 
                   2511: /**
                   2512:  * xmlPathToURI:
                   2513:  * @path:  the resource locator in a filesystem notation
                   2514:  *
                   2515:  * Constructs an URI expressing the existing path
                   2516:  *
1.1.1.3 ! misho    2517:  * Returns a new URI, or a duplicate of the path parameter if the
1.1       misho    2518:  * construction fails. The caller is responsible for freeing the memory
                   2519:  * occupied by the returned string. If there is insufficient memory available,
                   2520:  * or the argument is NULL, the function returns NULL.
                   2521:  */
                   2522: xmlChar *
                   2523: xmlPathToURI(const xmlChar *path)
                   2524: {
                   2525:     xmlURIPtr uri;
                   2526:     xmlURI temp;
                   2527:     xmlChar *ret, *cal;
                   2528: 
                   2529:     if (path == NULL)
                   2530:         return(NULL);
                   2531: 
                   2532:     if ((uri = xmlParseURI((const char *) path)) != NULL) {
                   2533:        xmlFreeURI(uri);
                   2534:        return xmlStrdup(path);
                   2535:     }
                   2536:     cal = xmlCanonicPath(path);
                   2537:     if (cal == NULL)
                   2538:         return(NULL);
                   2539: #if defined(_WIN32) && !defined(__CYGWIN__)
1.1.1.3 ! misho    2540:     /* xmlCanonicPath can return an URI on Windows (is that the intended behaviour?)
1.1       misho    2541:        If 'cal' is a valid URI allready then we are done here, as continuing would make
                   2542:        it invalid. */
                   2543:     if ((uri = xmlParseURI((const char *) cal)) != NULL) {
                   2544:        xmlFreeURI(uri);
                   2545:        return cal;
                   2546:     }
                   2547:     /* 'cal' can contain a relative path with backslashes. If that is processed
                   2548:        by xmlSaveURI, they will be escaped and the external entity loader machinery
                   2549:        will fail. So convert them to slashes. Misuse 'ret' for walking. */
                   2550:     ret = cal;
                   2551:     while (*ret != '\0') {
                   2552:        if (*ret == '\\')
                   2553:            *ret = '/';
                   2554:        ret++;
                   2555:     }
                   2556: #endif
                   2557:     memset(&temp, 0, sizeof(temp));
                   2558:     temp.path = (char *) cal;
                   2559:     ret = xmlSaveUri(&temp);
                   2560:     xmlFree(cal);
                   2561:     return(ret);
                   2562: }
                   2563: #define bottom_uri
                   2564: #include "elfgcchack.h"
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>