Annotation of embedaddon/curl/lib/urlapi.c, revision 1.1.1.1

1.1       misho       1: /***************************************************************************
                      2:  *                                  _   _ ____  _
                      3:  *  Project                     ___| | | |  _ \| |
                      4:  *                             / __| | | | |_) | |
                      5:  *                            | (__| |_| |  _ <| |___
                      6:  *                             \___|\___/|_| \_\_____|
                      7:  *
                      8:  * Copyright (C) 1998 - 2020, Daniel Stenberg, <daniel@haxx.se>, et al.
                      9:  *
                     10:  * This software is licensed as described in the file COPYING, which
                     11:  * you should have received as part of this distribution. The terms
                     12:  * are also available at https://curl.haxx.se/docs/copyright.html.
                     13:  *
                     14:  * You may opt to use, copy, modify, merge, publish, distribute and/or sell
                     15:  * copies of the Software, and permit persons to whom the Software is
                     16:  * furnished to do so, under the terms of the COPYING file.
                     17:  *
                     18:  * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
                     19:  * KIND, either express or implied.
                     20:  *
                     21:  ***************************************************************************/
                     22: 
                     23: #include "curl_setup.h"
                     24: 
                     25: #include "urldata.h"
                     26: #include "urlapi-int.h"
                     27: #include "strcase.h"
                     28: #include "dotdot.h"
                     29: #include "url.h"
                     30: #include "escape.h"
                     31: #include "curl_ctype.h"
                     32: #include "inet_pton.h"
                     33: 
                     34: /* The last 3 #include files should be in this order */
                     35: #include "curl_printf.h"
                     36: #include "curl_memory.h"
                     37: #include "memdebug.h"
                     38: 
                     39:   /* MSDOS/Windows style drive prefix, eg c: in c:foo */
                     40: #define STARTS_WITH_DRIVE_PREFIX(str) \
                     41:   ((('a' <= str[0] && str[0] <= 'z') || \
                     42:     ('A' <= str[0] && str[0] <= 'Z')) && \
                     43:    (str[1] == ':'))
                     44: 
                     45:   /* MSDOS/Windows style drive prefix, optionally with
                     46:    * a '|' instead of ':', followed by a slash or NUL */
                     47: #define STARTS_WITH_URL_DRIVE_PREFIX(str) \
                     48:   ((('a' <= (str)[0] && (str)[0] <= 'z') || \
                     49:     ('A' <= (str)[0] && (str)[0] <= 'Z')) && \
                     50:    ((str)[1] == ':' || (str)[1] == '|') && \
                     51:    ((str)[2] == '/' || (str)[2] == '\\' || (str)[2] == 0))
                     52: 
                     53: /* Internal representation of CURLU. Point to URL-encoded strings. */
                     54: struct Curl_URL {
                     55:   char *scheme;
                     56:   char *user;
                     57:   char *password;
                     58:   char *options; /* IMAP only? */
                     59:   char *host;
                     60:   char *zoneid; /* for numerical IPv6 addresses */
                     61:   char *port;
                     62:   char *path;
                     63:   char *query;
                     64:   char *fragment;
                     65: 
                     66:   char *scratch; /* temporary scratch area */
                     67:   char *temppath; /* temporary path pointer */
                     68:   long portnum; /* the numerical version */
                     69: };
                     70: 
                     71: #define DEFAULT_SCHEME "https"
                     72: 
                     73: static void free_urlhandle(struct Curl_URL *u)
                     74: {
                     75:   free(u->scheme);
                     76:   free(u->user);
                     77:   free(u->password);
                     78:   free(u->options);
                     79:   free(u->host);
                     80:   free(u->zoneid);
                     81:   free(u->port);
                     82:   free(u->path);
                     83:   free(u->query);
                     84:   free(u->fragment);
                     85:   free(u->scratch);
                     86:   free(u->temppath);
                     87: }
                     88: 
                     89: /* move the full contents of one handle onto another and
                     90:    free the original */
                     91: static void mv_urlhandle(struct Curl_URL *from,
                     92:                          struct Curl_URL *to)
                     93: {
                     94:   free_urlhandle(to);
                     95:   *to = *from;
                     96:   free(from);
                     97: }
                     98: 
                     99: /*
                    100:  * Find the separator at the end of the host name, or the '?' in cases like
                    101:  * http://www.url.com?id=2380
                    102:  */
                    103: static const char *find_host_sep(const char *url)
                    104: {
                    105:   const char *sep;
                    106:   const char *query;
                    107: 
                    108:   /* Find the start of the hostname */
                    109:   sep = strstr(url, "//");
                    110:   if(!sep)
                    111:     sep = url;
                    112:   else
                    113:     sep += 2;
                    114: 
                    115:   query = strchr(sep, '?');
                    116:   sep = strchr(sep, '/');
                    117: 
                    118:   if(!sep)
                    119:     sep = url + strlen(url);
                    120: 
                    121:   if(!query)
                    122:     query = url + strlen(url);
                    123: 
                    124:   return sep < query ? sep : query;
                    125: }
                    126: 
                    127: /*
                    128:  * Decide in an encoding-independent manner whether a character in an
                    129:  * URL must be escaped. The same criterion must be used in strlen_url()
                    130:  * and strcpy_url().
                    131:  */
                    132: static bool urlchar_needs_escaping(int c)
                    133: {
                    134:     return !(ISCNTRL(c) || ISSPACE(c) || ISGRAPH(c));
                    135: }
                    136: 
                    137: /*
                    138:  * strlen_url() returns the length of the given URL if the spaces within the
                    139:  * URL were properly URL encoded.
                    140:  * URL encoding should be skipped for host names, otherwise IDN resolution
                    141:  * will fail.
                    142:  */
                    143: static size_t strlen_url(const char *url, bool relative)
                    144: {
                    145:   const unsigned char *ptr;
                    146:   size_t newlen = 0;
                    147:   bool left = TRUE; /* left side of the ? */
                    148:   const unsigned char *host_sep = (const unsigned char *) url;
                    149: 
                    150:   if(!relative)
                    151:     host_sep = (const unsigned char *) find_host_sep(url);
                    152: 
                    153:   for(ptr = (unsigned char *)url; *ptr; ptr++) {
                    154: 
                    155:     if(ptr < host_sep) {
                    156:       ++newlen;
                    157:       continue;
                    158:     }
                    159: 
                    160:     switch(*ptr) {
                    161:     case '?':
                    162:       left = FALSE;
                    163:       /* FALLTHROUGH */
                    164:     default:
                    165:       if(urlchar_needs_escaping(*ptr))
                    166:         newlen += 2;
                    167:       newlen++;
                    168:       break;
                    169:     case ' ':
                    170:       if(left)
                    171:         newlen += 3;
                    172:       else
                    173:         newlen++;
                    174:       break;
                    175:     }
                    176:   }
                    177:   return newlen;
                    178: }
                    179: 
                    180: /* strcpy_url() copies a url to a output buffer and URL-encodes the spaces in
                    181:  * the source URL accordingly.
                    182:  * URL encoding should be skipped for host names, otherwise IDN resolution
                    183:  * will fail.
                    184:  */
                    185: static void strcpy_url(char *output, const char *url, bool relative)
                    186: {
                    187:   /* we must add this with whitespace-replacing */
                    188:   bool left = TRUE;
                    189:   const unsigned char *iptr;
                    190:   char *optr = output;
                    191:   const unsigned char *host_sep = (const unsigned char *) url;
                    192: 
                    193:   if(!relative)
                    194:     host_sep = (const unsigned char *) find_host_sep(url);
                    195: 
                    196:   for(iptr = (unsigned char *)url;    /* read from here */
                    197:       *iptr;         /* until zero byte */
                    198:       iptr++) {
                    199: 
                    200:     if(iptr < host_sep) {
                    201:       *optr++ = *iptr;
                    202:       continue;
                    203:     }
                    204: 
                    205:     switch(*iptr) {
                    206:     case '?':
                    207:       left = FALSE;
                    208:       /* FALLTHROUGH */
                    209:     default:
                    210:       if(urlchar_needs_escaping(*iptr)) {
                    211:         msnprintf(optr, 4, "%%%02x", *iptr);
                    212:         optr += 3;
                    213:       }
                    214:       else
                    215:         *optr++=*iptr;
                    216:       break;
                    217:     case ' ':
                    218:       if(left) {
                    219:         *optr++='%'; /* add a '%' */
                    220:         *optr++='2'; /* add a '2' */
                    221:         *optr++='0'; /* add a '0' */
                    222:       }
                    223:       else
                    224:         *optr++='+'; /* add a '+' here */
                    225:       break;
                    226:     }
                    227:   }
                    228:   *optr = 0; /* zero terminate output buffer */
                    229: 
                    230: }
                    231: 
                    232: /*
                    233:  * Returns true if the given URL is absolute (as opposed to relative) within
                    234:  * the buffer size. Returns the scheme in the buffer if TRUE and 'buf' is
                    235:  * non-NULL.
                    236:  */
                    237: bool Curl_is_absolute_url(const char *url, char *buf, size_t buflen)
                    238: {
                    239:   size_t i;
                    240: #ifdef WIN32
                    241:   if(STARTS_WITH_DRIVE_PREFIX(url))
                    242:     return FALSE;
                    243: #endif
                    244:   for(i = 0; i < buflen && url[i]; ++i) {
                    245:     char s = url[i];
                    246:     if((s == ':') && (url[i + 1] == '/')) {
                    247:       if(buf)
                    248:         buf[i] = 0;
                    249:       return TRUE;
                    250:     }
                    251:     /* RFC 3986 3.1 explains:
                    252:       scheme      = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
                    253:     */
                    254:     else if(ISALNUM(s) || (s == '+') || (s == '-') || (s == '.') ) {
                    255:       if(buf)
                    256:         buf[i] = (char)TOLOWER(s);
                    257:     }
                    258:     else
                    259:       break;
                    260:   }
                    261:   return FALSE;
                    262: }
                    263: 
                    264: /*
                    265:  * Concatenate a relative URL to a base URL making it absolute.
                    266:  * URL-encodes any spaces.
                    267:  * The returned pointer must be freed by the caller unless NULL
                    268:  * (returns NULL on out of memory).
                    269:  */
                    270: static char *concat_url(const char *base, const char *relurl)
                    271: {
                    272:   /***
                    273:    TRY to append this new path to the old URL
                    274:    to the right of the host part. Oh crap, this is doomed to cause
                    275:    problems in the future...
                    276:   */
                    277:   char *newest;
                    278:   char *protsep;
                    279:   char *pathsep;
                    280:   size_t newlen;
                    281:   bool host_changed = FALSE;
                    282: 
                    283:   const char *useurl = relurl;
                    284:   size_t urllen;
                    285: 
                    286:   /* we must make our own copy of the URL to play with, as it may
                    287:      point to read-only data */
                    288:   char *url_clone = strdup(base);
                    289: 
                    290:   if(!url_clone)
                    291:     return NULL; /* skip out of this NOW */
                    292: 
                    293:   /* protsep points to the start of the host name */
                    294:   protsep = strstr(url_clone, "//");
                    295:   if(!protsep)
                    296:     protsep = url_clone;
                    297:   else
                    298:     protsep += 2; /* pass the slashes */
                    299: 
                    300:   if('/' != relurl[0]) {
                    301:     int level = 0;
                    302: 
                    303:     /* First we need to find out if there's a ?-letter in the URL,
                    304:        and cut it and the right-side of that off */
                    305:     pathsep = strchr(protsep, '?');
                    306:     if(pathsep)
                    307:       *pathsep = 0;
                    308: 
                    309:     /* we have a relative path to append to the last slash if there's one
                    310:        available, or if the new URL is just a query string (starts with a
                    311:        '?')  we append the new one at the end of the entire currently worked
                    312:        out URL */
                    313:     if(useurl[0] != '?') {
                    314:       pathsep = strrchr(protsep, '/');
                    315:       if(pathsep)
                    316:         *pathsep = 0;
                    317:     }
                    318: 
                    319:     /* Check if there's any slash after the host name, and if so, remember
                    320:        that position instead */
                    321:     pathsep = strchr(protsep, '/');
                    322:     if(pathsep)
                    323:       protsep = pathsep + 1;
                    324:     else
                    325:       protsep = NULL;
                    326: 
                    327:     /* now deal with one "./" or any amount of "../" in the newurl
                    328:        and act accordingly */
                    329: 
                    330:     if((useurl[0] == '.') && (useurl[1] == '/'))
                    331:       useurl += 2; /* just skip the "./" */
                    332: 
                    333:     while((useurl[0] == '.') &&
                    334:           (useurl[1] == '.') &&
                    335:           (useurl[2] == '/')) {
                    336:       level++;
                    337:       useurl += 3; /* pass the "../" */
                    338:     }
                    339: 
                    340:     if(protsep) {
                    341:       while(level--) {
                    342:         /* cut off one more level from the right of the original URL */
                    343:         pathsep = strrchr(protsep, '/');
                    344:         if(pathsep)
                    345:           *pathsep = 0;
                    346:         else {
                    347:           *protsep = 0;
                    348:           break;
                    349:         }
                    350:       }
                    351:     }
                    352:   }
                    353:   else {
                    354:     /* We got a new absolute path for this server */
                    355: 
                    356:     if(relurl[1] == '/') {
                    357:       /* the new URL starts with //, just keep the protocol part from the
                    358:          original one */
                    359:       *protsep = 0;
                    360:       useurl = &relurl[2]; /* we keep the slashes from the original, so we
                    361:                               skip the new ones */
                    362:       host_changed = TRUE;
                    363:     }
                    364:     else {
                    365:       /* cut off the original URL from the first slash, or deal with URLs
                    366:          without slash */
                    367:       pathsep = strchr(protsep, '/');
                    368:       if(pathsep) {
                    369:         /* When people use badly formatted URLs, such as
                    370:            "http://www.url.com?dir=/home/daniel" we must not use the first
                    371:            slash, if there's a ?-letter before it! */
                    372:         char *sep = strchr(protsep, '?');
                    373:         if(sep && (sep < pathsep))
                    374:           pathsep = sep;
                    375:         *pathsep = 0;
                    376:       }
                    377:       else {
                    378:         /* There was no slash. Now, since we might be operating on a badly
                    379:            formatted URL, such as "http://www.url.com?id=2380" which doesn't
                    380:            use a slash separator as it is supposed to, we need to check for a
                    381:            ?-letter as well! */
                    382:         pathsep = strchr(protsep, '?');
                    383:         if(pathsep)
                    384:           *pathsep = 0;
                    385:       }
                    386:     }
                    387:   }
                    388: 
                    389:   /* If the new part contains a space, this is a mighty stupid redirect
                    390:      but we still make an effort to do "right". To the left of a '?'
                    391:      letter we replace each space with %20 while it is replaced with '+'
                    392:      on the right side of the '?' letter.
                    393:   */
                    394:   newlen = strlen_url(useurl, !host_changed);
                    395: 
                    396:   urllen = strlen(url_clone);
                    397: 
                    398:   newest = malloc(urllen + 1 + /* possible slash */
                    399:                   newlen + 1 /* zero byte */);
                    400: 
                    401:   if(!newest) {
                    402:     free(url_clone); /* don't leak this */
                    403:     return NULL;
                    404:   }
                    405: 
                    406:   /* copy over the root url part */
                    407:   memcpy(newest, url_clone, urllen);
                    408: 
                    409:   /* check if we need to append a slash */
                    410:   if(('/' == useurl[0]) || (protsep && !*protsep) || ('?' == useurl[0]))
                    411:     ;
                    412:   else
                    413:     newest[urllen++]='/';
                    414: 
                    415:   /* then append the new piece on the right side */
                    416:   strcpy_url(&newest[urllen], useurl, !host_changed);
                    417: 
                    418:   free(url_clone);
                    419: 
                    420:   return newest;
                    421: }
                    422: 
                    423: /*
                    424:  * parse_hostname_login()
                    425:  *
                    426:  * Parse the login details (user name, password and options) from the URL and
                    427:  * strip them out of the host name
                    428:  *
                    429:  */
                    430: static CURLUcode parse_hostname_login(struct Curl_URL *u,
                    431:                                       char **hostname,
                    432:                                       unsigned int flags)
                    433: {
                    434:   CURLUcode result = CURLUE_OK;
                    435:   CURLcode ccode;
                    436:   char *userp = NULL;
                    437:   char *passwdp = NULL;
                    438:   char *optionsp = NULL;
                    439:   const struct Curl_handler *h = NULL;
                    440: 
                    441:   /* At this point, we're hoping all the other special cases have
                    442:    * been taken care of, so conn->host.name is at most
                    443:    *    [user[:password][;options]]@]hostname
                    444:    *
                    445:    * We need somewhere to put the embedded details, so do that first.
                    446:    */
                    447: 
                    448:   char *ptr = strchr(*hostname, '@');
                    449:   char *login = *hostname;
                    450: 
                    451:   if(!ptr)
                    452:     goto out;
                    453: 
                    454:   /* We will now try to extract the
                    455:    * possible login information in a string like:
                    456:    * ftp://user:password@ftp.my.site:8021/README */
                    457:   *hostname = ++ptr;
                    458: 
                    459:   /* if this is a known scheme, get some details */
                    460:   if(u->scheme)
                    461:     h = Curl_builtin_scheme(u->scheme);
                    462: 
                    463:   /* We could use the login information in the URL so extract it. Only parse
                    464:      options if the handler says we should. Note that 'h' might be NULL! */
                    465:   ccode = Curl_parse_login_details(login, ptr - login - 1,
                    466:                                    &userp, &passwdp,
                    467:                                    (h && (h->flags & PROTOPT_URLOPTIONS)) ?
                    468:                                    &optionsp:NULL);
                    469:   if(ccode) {
                    470:     result = CURLUE_MALFORMED_INPUT;
                    471:     goto out;
                    472:   }
                    473: 
                    474:   if(userp) {
                    475:     if(flags & CURLU_DISALLOW_USER) {
                    476:       /* Option DISALLOW_USER is set and url contains username. */
                    477:       result = CURLUE_USER_NOT_ALLOWED;
                    478:       goto out;
                    479:     }
                    480: 
                    481:     u->user = userp;
                    482:   }
                    483: 
                    484:   if(passwdp)
                    485:     u->password = passwdp;
                    486: 
                    487:   if(optionsp)
                    488:     u->options = optionsp;
                    489: 
                    490:   return CURLUE_OK;
                    491:   out:
                    492: 
                    493:   free(userp);
                    494:   free(passwdp);
                    495:   free(optionsp);
                    496: 
                    497:   return result;
                    498: }
                    499: 
                    500: UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, char *hostname)
                    501: {
                    502:   char *portptr = NULL;
                    503:   char endbracket;
                    504:   int len;
                    505: 
                    506:   /*
                    507:    * Find the end of an IPv6 address, either on the ']' ending bracket or
                    508:    * a percent-encoded zone index.
                    509:    */
                    510:   if(1 == sscanf(hostname, "[%*45[0123456789abcdefABCDEF:.]%c%n",
                    511:                  &endbracket, &len)) {
                    512:     if(']' == endbracket)
                    513:       portptr = &hostname[len];
                    514:     else if('%' == endbracket) {
                    515:       int zonelen = len;
                    516:       if(1 == sscanf(hostname + zonelen, "%*[^]]%c%n", &endbracket, &len)) {
                    517:         if(']' != endbracket)
                    518:           return CURLUE_MALFORMED_INPUT;
                    519:         portptr = &hostname[--zonelen + len + 1];
                    520:       }
                    521:       else
                    522:         return CURLUE_MALFORMED_INPUT;
                    523:     }
                    524:     else
                    525:       return CURLUE_MALFORMED_INPUT;
                    526: 
                    527:     /* this is a RFC2732-style specified IP-address */
                    528:     if(portptr && *portptr) {
                    529:       if(*portptr != ':')
                    530:         return CURLUE_MALFORMED_INPUT;
                    531:     }
                    532:     else
                    533:       portptr = NULL;
                    534:   }
                    535:   else
                    536:     portptr = strchr(hostname, ':');
                    537: 
                    538:   if(portptr) {
                    539:     char *rest;
                    540:     long port;
                    541:     char portbuf[7];
                    542: 
                    543:     /* Browser behavior adaptation. If there's a colon with no digits after,
                    544:        just cut off the name there which makes us ignore the colon and just
                    545:        use the default port. Firefox, Chrome and Safari all do that. */
                    546:     if(!portptr[1]) {
                    547:       *portptr = '\0';
                    548:       return CURLUE_OK;
                    549:     }
                    550: 
                    551:     if(!ISDIGIT(portptr[1]))
                    552:       return CURLUE_BAD_PORT_NUMBER;
                    553: 
                    554:     port = strtol(portptr + 1, &rest, 10);  /* Port number must be decimal */
                    555: 
                    556:     if((port <= 0) || (port > 0xffff))
                    557:       /* Single unix standard says port numbers are 16 bits long, but we don't
                    558:          treat port zero as OK. */
                    559:       return CURLUE_BAD_PORT_NUMBER;
                    560: 
                    561:     if(rest[0])
                    562:       return CURLUE_BAD_PORT_NUMBER;
                    563: 
                    564:     *portptr++ = '\0'; /* cut off the name there */
                    565:     *rest = 0;
                    566:     /* generate a new port number string to get rid of leading zeroes etc */
                    567:     msnprintf(portbuf, sizeof(portbuf), "%ld", port);
                    568:     u->portnum = port;
                    569:     u->port = strdup(portbuf);
                    570:     if(!u->port)
                    571:       return CURLUE_OUT_OF_MEMORY;
                    572:   }
                    573: 
                    574:   return CURLUE_OK;
                    575: }
                    576: 
                    577: /* scan for byte values < 31 or 127 */
                    578: static CURLUcode junkscan(const char *part)
                    579: {
                    580:   if(part) {
                    581:     static const char badbytes[]={
                    582:       /* */ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
                    583:       0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
                    584:       0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
                    585:       0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
                    586:       0x7f,
                    587:       0x00 /* zero terminate */
                    588:     };
                    589:     size_t n = strlen(part);
                    590:     size_t nfine = strcspn(part, badbytes);
                    591:     if(nfine != n)
                    592:       /* since we don't know which part is scanned, return a generic error
                    593:          code */
                    594:       return CURLUE_MALFORMED_INPUT;
                    595:   }
                    596:   return CURLUE_OK;
                    597: }
                    598: 
                    599: static CURLUcode hostname_check(struct Curl_URL *u, char *hostname)
                    600: {
                    601:   size_t len;
                    602:   size_t hlen = strlen(hostname);
                    603: 
                    604:   if(hostname[0] == '[') {
                    605: #ifdef ENABLE_IPV6
                    606:     char dest[16]; /* fits a binary IPv6 address */
                    607: #endif
                    608:     const char *l = "0123456789abcdefABCDEF:.";
                    609:     if(hlen < 5) /* '[::1]' is the shortest possible valid string */
                    610:       return CURLUE_MALFORMED_INPUT;
                    611:     hostname++;
                    612:     hlen -= 2;
                    613: 
                    614:     if(hostname[hlen] != ']')
                    615:       return CURLUE_MALFORMED_INPUT;
                    616: 
                    617:     /* only valid letters are ok */
                    618:     len = strspn(hostname, l);
                    619:     if(hlen != len) {
                    620:       hlen = len;
                    621:       if(hostname[len] == '%') {
                    622:         /* this could now be '%[zone id]' */
                    623:         char zoneid[16];
                    624:         int i = 0;
                    625:         char *h = &hostname[len + 1];
                    626:         /* pass '25' if present and is a url encoded percent sign */
                    627:         if(!strncmp(h, "25", 2) && h[2] && (h[2] != ']'))
                    628:           h += 2;
                    629:         while(*h && (*h != ']') && (i < 15))
                    630:           zoneid[i++] = *h++;
                    631:         if(!i || (']' != *h))
                    632:           return CURLUE_MALFORMED_INPUT;
                    633:         zoneid[i] = 0;
                    634:         u->zoneid = strdup(zoneid);
                    635:         if(!u->zoneid)
                    636:           return CURLUE_OUT_OF_MEMORY;
                    637:         hostname[len] = ']'; /* insert end bracket */
                    638:         hostname[len + 1] = 0; /* terminate the hostname */
                    639:       }
                    640:       else
                    641:         return CURLUE_MALFORMED_INPUT;
                    642:       /* hostname is fine */
                    643:     }
                    644: #ifdef ENABLE_IPV6
                    645:     hostname[hlen] = 0; /* end the address there */
                    646:     if(1 != Curl_inet_pton(AF_INET6, hostname, dest))
                    647:       return CURLUE_MALFORMED_INPUT;
                    648:     hostname[hlen] = ']'; /* restore ending bracket */
                    649: #endif
                    650:   }
                    651:   else {
                    652:     /* letters from the second string is not ok */
                    653:     len = strcspn(hostname, " ");
                    654:     if(hlen != len)
                    655:       /* hostname with bad content */
                    656:       return CURLUE_MALFORMED_INPUT;
                    657:   }
                    658:   if(!hostname[0])
                    659:     return CURLUE_NO_HOST;
                    660:   return CURLUE_OK;
                    661: }
                    662: 
                    663: #define HOSTNAME_END(x) (((x) == '/') || ((x) == '?') || ((x) == '#'))
                    664: 
                    665: static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags)
                    666: {
                    667:   char *path;
                    668:   bool path_alloced = FALSE;
                    669:   char *hostname;
                    670:   char *query = NULL;
                    671:   char *fragment = NULL;
                    672:   CURLUcode result;
                    673:   bool url_has_scheme = FALSE;
                    674:   char schemebuf[MAX_SCHEME_LEN + 1];
                    675:   const char *schemep = NULL;
                    676:   size_t schemelen = 0;
                    677:   size_t urllen;
                    678: 
                    679:   if(!url)
                    680:     return CURLUE_MALFORMED_INPUT;
                    681: 
                    682:   /*************************************************************
                    683:    * Parse the URL.
                    684:    ************************************************************/
                    685:   /* allocate scratch area */
                    686:   urllen = strlen(url);
                    687:   if(urllen > CURL_MAX_INPUT_LENGTH)
                    688:     /* excessive input length */
                    689:     return CURLUE_MALFORMED_INPUT;
                    690: 
                    691:   path = u->scratch = malloc(urllen * 2 + 2);
                    692:   if(!path)
                    693:     return CURLUE_OUT_OF_MEMORY;
                    694: 
                    695:   hostname = &path[urllen + 1];
                    696:   hostname[0] = 0;
                    697: 
                    698:   if(Curl_is_absolute_url(url, schemebuf, sizeof(schemebuf))) {
                    699:     url_has_scheme = TRUE;
                    700:     schemelen = strlen(schemebuf);
                    701:   }
                    702: 
                    703:   /* handle the file: scheme */
                    704:   if(url_has_scheme && strcasecompare(schemebuf, "file")) {
                    705:     /* path has been allocated large enough to hold this */
                    706:     strcpy(path, &url[5]);
                    707: 
                    708:     hostname = NULL; /* no host for file: URLs */
                    709:     u->scheme = strdup("file");
                    710:     if(!u->scheme)
                    711:       return CURLUE_OUT_OF_MEMORY;
                    712: 
                    713:     /* Extra handling URLs with an authority component (i.e. that start with
                    714:      * "file://")
                    715:      *
                    716:      * We allow omitted hostname (e.g. file:/<path>) -- valid according to
                    717:      * RFC 8089, but not the (current) WHAT-WG URL spec.
                    718:      */
                    719:     if(path[0] == '/' && path[1] == '/') {
                    720:       /* swallow the two slashes */
                    721:       char *ptr = &path[2];
                    722: 
                    723:       /*
                    724:        * According to RFC 8089, a file: URL can be reliably dereferenced if:
                    725:        *
                    726:        *  o it has no/blank hostname, or
                    727:        *
                    728:        *  o the hostname matches "localhost" (case-insensitively), or
                    729:        *
                    730:        *  o the hostname is a FQDN that resolves to this machine.
                    731:        *
                    732:        * For brevity, we only consider URLs with empty, "localhost", or
                    733:        * "127.0.0.1" hostnames as local.
                    734:        *
                    735:        * Additionally, there is an exception for URLs with a Windows drive
                    736:        * letter in the authority (which was accidentally omitted from RFC 8089
                    737:        * Appendix E, but believe me, it was meant to be there. --MK)
                    738:        */
                    739:       if(ptr[0] != '/' && !STARTS_WITH_URL_DRIVE_PREFIX(ptr)) {
                    740:         /* the URL includes a host name, it must match "localhost" or
                    741:            "127.0.0.1" to be valid */
                    742:         if(!checkprefix("localhost/", ptr) &&
                    743:            !checkprefix("127.0.0.1/", ptr)) {
                    744:           /* Invalid file://hostname/, expected localhost or 127.0.0.1 or
                    745:              none */
                    746:           return CURLUE_MALFORMED_INPUT;
                    747:         }
                    748:         ptr += 9; /* now points to the slash after the host */
                    749:       }
                    750: 
                    751:       path = ptr;
                    752:     }
                    753: 
                    754: #if !defined(MSDOS) && !defined(WIN32) && !defined(__CYGWIN__)
                    755:     /* Don't allow Windows drive letters when not in Windows.
                    756:      * This catches both "file:/c:" and "file:c:" */
                    757:     if(('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) ||
                    758:        STARTS_WITH_URL_DRIVE_PREFIX(path)) {
                    759:       /* File drive letters are only accepted in MSDOS/Windows */
                    760:       return CURLUE_MALFORMED_INPUT;
                    761:     }
                    762: #else
                    763:     /* If the path starts with a slash and a drive letter, ditch the slash */
                    764:     if('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) {
                    765:       /* This cannot be done with strcpy, as the memory chunks overlap! */
                    766:       memmove(path, &path[1], strlen(&path[1]) + 1);
                    767:     }
                    768: #endif
                    769: 
                    770:   }
                    771:   else {
                    772:     /* clear path */
                    773:     const char *p;
                    774:     const char *hostp;
                    775:     size_t len;
                    776:     path[0] = 0;
                    777: 
                    778:     if(url_has_scheme) {
                    779:       int i = 0;
                    780:       p = &url[schemelen + 1];
                    781:       while(p && (*p == '/') && (i < 4)) {
                    782:         p++;
                    783:         i++;
                    784:       }
                    785:       if((i < 1) || (i>3))
                    786:         /* less than one or more than three slashes */
                    787:         return CURLUE_MALFORMED_INPUT;
                    788: 
                    789:       schemep = schemebuf;
                    790:       if(!Curl_builtin_scheme(schemep) &&
                    791:          !(flags & CURLU_NON_SUPPORT_SCHEME))
                    792:         return CURLUE_UNSUPPORTED_SCHEME;
                    793: 
                    794:       if(junkscan(schemep))
                    795:         return CURLUE_MALFORMED_INPUT;
                    796: 
                    797:     }
                    798:     else {
                    799:       /* no scheme! */
                    800: 
                    801:       if(!(flags & (CURLU_DEFAULT_SCHEME|CURLU_GUESS_SCHEME)))
                    802:         return CURLUE_MALFORMED_INPUT;
                    803:       if(flags & CURLU_DEFAULT_SCHEME)
                    804:         schemep = DEFAULT_SCHEME;
                    805: 
                    806:       /*
                    807:        * The URL was badly formatted, let's try without scheme specified.
                    808:        */
                    809:       p = url;
                    810:     }
                    811:     hostp = p; /* host name starts here */
                    812: 
                    813:     while(*p && !HOSTNAME_END(*p)) /* find end of host name */
                    814:       p++;
                    815: 
                    816:     len = p - hostp;
                    817:     if(len) {
                    818:       memcpy(hostname, hostp, len);
                    819:       hostname[len] = 0;
                    820:     }
                    821:     else {
                    822:       if(!(flags & CURLU_NO_AUTHORITY))
                    823:         return CURLUE_MALFORMED_INPUT;
                    824:     }
                    825: 
                    826:     len = strlen(p);
                    827:     memcpy(path, p, len);
                    828:     path[len] = 0;
                    829: 
                    830:     if(schemep) {
                    831:       u->scheme = strdup(schemep);
                    832:       if(!u->scheme)
                    833:         return CURLUE_OUT_OF_MEMORY;
                    834:     }
                    835:   }
                    836: 
                    837:   if(junkscan(path))
                    838:     return CURLUE_MALFORMED_INPUT;
                    839: 
                    840:   if((flags & CURLU_URLENCODE) && path[0]) {
                    841:     /* worst case output length is 3x the original! */
                    842:     char *newp = malloc(strlen(path) * 3);
                    843:     if(!newp)
                    844:       return CURLUE_OUT_OF_MEMORY;
                    845:     path_alloced = TRUE;
                    846:     strcpy_url(newp, path, TRUE); /* consider it relative */
                    847:     u->temppath = path = newp;
                    848:   }
                    849: 
                    850:   fragment = strchr(path, '#');
                    851:   if(fragment) {
                    852:     *fragment++ = 0;
                    853:     if(fragment[0]) {
                    854:       u->fragment = strdup(fragment);
                    855:       if(!u->fragment)
                    856:         return CURLUE_OUT_OF_MEMORY;
                    857:     }
                    858:   }
                    859: 
                    860:   query = strchr(path, '?');
                    861:   if(query) {
                    862:     *query++ = 0;
                    863:     /* done even if the query part is a blank string */
                    864:     u->query = strdup(query);
                    865:     if(!u->query)
                    866:       return CURLUE_OUT_OF_MEMORY;
                    867:   }
                    868: 
                    869:   if(!path[0])
                    870:     /* if there's no path left set, unset */
                    871:     path = NULL;
                    872:   else {
                    873:     if(!(flags & CURLU_PATH_AS_IS)) {
                    874:       /* remove ../ and ./ sequences according to RFC3986 */
                    875:       char *newp = Curl_dedotdotify(path);
                    876:       if(!newp)
                    877:         return CURLUE_OUT_OF_MEMORY;
                    878: 
                    879:       if(strcmp(newp, path)) {
                    880:         /* if we got a new version */
                    881:         if(path_alloced)
                    882:           Curl_safefree(u->temppath);
                    883:         u->temppath = path = newp;
                    884:         path_alloced = TRUE;
                    885:       }
                    886:       else
                    887:         free(newp);
                    888:     }
                    889: 
                    890:     u->path = path_alloced?path:strdup(path);
                    891:     if(!u->path)
                    892:       return CURLUE_OUT_OF_MEMORY;
                    893:     u->temppath = NULL; /* used now */
                    894:   }
                    895: 
                    896:   if(hostname) {
                    897:     /*
                    898:      * Parse the login details and strip them out of the host name.
                    899:      */
                    900:     if(junkscan(hostname))
                    901:       return CURLUE_MALFORMED_INPUT;
                    902: 
                    903:     result = parse_hostname_login(u, &hostname, flags);
                    904:     if(result)
                    905:       return result;
                    906: 
                    907:     result = Curl_parse_port(u, hostname);
                    908:     if(result)
                    909:       return result;
                    910: 
                    911:     if(0 == strlen(hostname) && (flags & CURLU_NO_AUTHORITY)) {
                    912:       /* Skip hostname check, it's allowed to be empty. */
                    913:     }
                    914:     else {
                    915:       result = hostname_check(u, hostname);
                    916:       if(result)
                    917:         return result;
                    918:     }
                    919: 
                    920:     u->host = strdup(hostname);
                    921:     if(!u->host)
                    922:       return CURLUE_OUT_OF_MEMORY;
                    923: 
                    924:     if((flags & CURLU_GUESS_SCHEME) && !schemep) {
                    925:       /* legacy curl-style guess based on host name */
                    926:       if(checkprefix("ftp.", hostname))
                    927:         schemep = "ftp";
                    928:       else if(checkprefix("dict.", hostname))
                    929:         schemep = "dict";
                    930:       else if(checkprefix("ldap.", hostname))
                    931:         schemep = "ldap";
                    932:       else if(checkprefix("imap.", hostname))
                    933:         schemep = "imap";
                    934:       else if(checkprefix("smtp.", hostname))
                    935:         schemep = "smtp";
                    936:       else if(checkprefix("pop3.", hostname))
                    937:         schemep = "pop3";
                    938:       else
                    939:         schemep = "http";
                    940: 
                    941:       u->scheme = strdup(schemep);
                    942:       if(!u->scheme)
                    943:         return CURLUE_OUT_OF_MEMORY;
                    944:     }
                    945:   }
                    946: 
                    947:   Curl_safefree(u->scratch);
                    948:   Curl_safefree(u->temppath);
                    949: 
                    950:   return CURLUE_OK;
                    951: }
                    952: 
                    953: /*
                    954:  * Parse the URL and set the relevant members of the Curl_URL struct.
                    955:  */
                    956: static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
                    957: {
                    958:   CURLUcode result = seturl(url, u, flags);
                    959:   if(result) {
                    960:     free_urlhandle(u);
                    961:     memset(u, 0, sizeof(struct Curl_URL));
                    962:   }
                    963:   return result;
                    964: }
                    965: 
                    966: /*
                    967:  */
                    968: CURLU *curl_url(void)
                    969: {
                    970:   return calloc(sizeof(struct Curl_URL), 1);
                    971: }
                    972: 
                    973: void curl_url_cleanup(CURLU *u)
                    974: {
                    975:   if(u) {
                    976:     free_urlhandle(u);
                    977:     free(u);
                    978:   }
                    979: }
                    980: 
                    981: #define DUP(dest, src, name)         \
                    982:   if(src->name) {                    \
                    983:     dest->name = strdup(src->name);  \
                    984:     if(!dest->name)                  \
                    985:       goto fail;                     \
                    986:   }
                    987: 
                    988: CURLU *curl_url_dup(CURLU *in)
                    989: {
                    990:   struct Curl_URL *u = calloc(sizeof(struct Curl_URL), 1);
                    991:   if(u) {
                    992:     DUP(u, in, scheme);
                    993:     DUP(u, in, user);
                    994:     DUP(u, in, password);
                    995:     DUP(u, in, options);
                    996:     DUP(u, in, host);
                    997:     DUP(u, in, port);
                    998:     DUP(u, in, path);
                    999:     DUP(u, in, query);
                   1000:     DUP(u, in, fragment);
                   1001:     u->portnum = in->portnum;
                   1002:   }
                   1003:   return u;
                   1004:   fail:
                   1005:   curl_url_cleanup(u);
                   1006:   return NULL;
                   1007: }
                   1008: 
                   1009: CURLUcode curl_url_get(CURLU *u, CURLUPart what,
                   1010:                        char **part, unsigned int flags)
                   1011: {
                   1012:   char *ptr;
                   1013:   CURLUcode ifmissing = CURLUE_UNKNOWN_PART;
                   1014:   char portbuf[7];
                   1015:   bool urldecode = (flags & CURLU_URLDECODE)?1:0;
                   1016:   bool plusdecode = FALSE;
                   1017:   (void)flags;
                   1018:   if(!u)
                   1019:     return CURLUE_BAD_HANDLE;
                   1020:   if(!part)
                   1021:     return CURLUE_BAD_PARTPOINTER;
                   1022:   *part = NULL;
                   1023: 
                   1024:   switch(what) {
                   1025:   case CURLUPART_SCHEME:
                   1026:     ptr = u->scheme;
                   1027:     ifmissing = CURLUE_NO_SCHEME;
                   1028:     urldecode = FALSE; /* never for schemes */
                   1029:     break;
                   1030:   case CURLUPART_USER:
                   1031:     ptr = u->user;
                   1032:     ifmissing = CURLUE_NO_USER;
                   1033:     break;
                   1034:   case CURLUPART_PASSWORD:
                   1035:     ptr = u->password;
                   1036:     ifmissing = CURLUE_NO_PASSWORD;
                   1037:     break;
                   1038:   case CURLUPART_OPTIONS:
                   1039:     ptr = u->options;
                   1040:     ifmissing = CURLUE_NO_OPTIONS;
                   1041:     break;
                   1042:   case CURLUPART_HOST:
                   1043:     ptr = u->host;
                   1044:     ifmissing = CURLUE_NO_HOST;
                   1045:     break;
                   1046:   case CURLUPART_ZONEID:
                   1047:     ptr = u->zoneid;
                   1048:     break;
                   1049:   case CURLUPART_PORT:
                   1050:     ptr = u->port;
                   1051:     ifmissing = CURLUE_NO_PORT;
                   1052:     urldecode = FALSE; /* never for port */
                   1053:     if(!ptr && (flags & CURLU_DEFAULT_PORT) && u->scheme) {
                   1054:       /* there's no stored port number, but asked to deliver
                   1055:          a default one for the scheme */
                   1056:       const struct Curl_handler *h =
                   1057:         Curl_builtin_scheme(u->scheme);
                   1058:       if(h) {
                   1059:         msnprintf(portbuf, sizeof(portbuf), "%ld", h->defport);
                   1060:         ptr = portbuf;
                   1061:       }
                   1062:     }
                   1063:     else if(ptr && u->scheme) {
                   1064:       /* there is a stored port number, but ask to inhibit if
                   1065:          it matches the default one for the scheme */
                   1066:       const struct Curl_handler *h =
                   1067:         Curl_builtin_scheme(u->scheme);
                   1068:       if(h && (h->defport == u->portnum) &&
                   1069:          (flags & CURLU_NO_DEFAULT_PORT))
                   1070:         ptr = NULL;
                   1071:     }
                   1072:     break;
                   1073:   case CURLUPART_PATH:
                   1074:     ptr = u->path;
                   1075:     if(!ptr) {
                   1076:       ptr = u->path = strdup("/");
                   1077:       if(!u->path)
                   1078:         return CURLUE_OUT_OF_MEMORY;
                   1079:     }
                   1080:     break;
                   1081:   case CURLUPART_QUERY:
                   1082:     ptr = u->query;
                   1083:     ifmissing = CURLUE_NO_QUERY;
                   1084:     plusdecode = urldecode;
                   1085:     break;
                   1086:   case CURLUPART_FRAGMENT:
                   1087:     ptr = u->fragment;
                   1088:     ifmissing = CURLUE_NO_FRAGMENT;
                   1089:     break;
                   1090:   case CURLUPART_URL: {
                   1091:     char *url;
                   1092:     char *scheme;
                   1093:     char *options = u->options;
                   1094:     char *port = u->port;
                   1095:     char *allochost = NULL;
                   1096:     if(u->scheme && strcasecompare("file", u->scheme)) {
                   1097:       url = aprintf("file://%s%s%s",
                   1098:                     u->path,
                   1099:                     u->fragment? "#": "",
                   1100:                     u->fragment? u->fragment : "");
                   1101:     }
                   1102:     else if(!u->host)
                   1103:       return CURLUE_NO_HOST;
                   1104:     else {
                   1105:       const struct Curl_handler *h = NULL;
                   1106:       if(u->scheme)
                   1107:         scheme = u->scheme;
                   1108:       else if(flags & CURLU_DEFAULT_SCHEME)
                   1109:         scheme = (char *) DEFAULT_SCHEME;
                   1110:       else
                   1111:         return CURLUE_NO_SCHEME;
                   1112: 
                   1113:       h = Curl_builtin_scheme(scheme);
                   1114:       if(!port && (flags & CURLU_DEFAULT_PORT)) {
                   1115:         /* there's no stored port number, but asked to deliver
                   1116:            a default one for the scheme */
                   1117:         if(h) {
                   1118:           msnprintf(portbuf, sizeof(portbuf), "%ld", h->defport);
                   1119:           port = portbuf;
                   1120:         }
                   1121:       }
                   1122:       else if(port) {
                   1123:         /* there is a stored port number, but asked to inhibit if it matches
                   1124:            the default one for the scheme */
                   1125:         if(h && (h->defport == u->portnum) &&
                   1126:            (flags & CURLU_NO_DEFAULT_PORT))
                   1127:           port = NULL;
                   1128:       }
                   1129: 
                   1130:       if(h && !(h->flags & PROTOPT_URLOPTIONS))
                   1131:         options = NULL;
                   1132: 
                   1133:       if((u->host[0] == '[') && u->zoneid) {
                   1134:         /* make it '[ host %25 zoneid ]' */
                   1135:         size_t hostlen = strlen(u->host);
                   1136:         size_t alen = hostlen + 3 + strlen(u->zoneid) + 1;
                   1137:         allochost = malloc(alen);
                   1138:         if(!allochost)
                   1139:           return CURLUE_OUT_OF_MEMORY;
                   1140:         memcpy(allochost, u->host, hostlen - 1);
                   1141:         msnprintf(&allochost[hostlen - 1], alen - hostlen + 1,
                   1142:                   "%%25%s]", u->zoneid);
                   1143:       }
                   1144: 
                   1145:       url = aprintf("%s://%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
                   1146:                     scheme,
                   1147:                     u->user ? u->user : "",
                   1148:                     u->password ? ":": "",
                   1149:                     u->password ? u->password : "",
                   1150:                     options ? ";" : "",
                   1151:                     options ? options : "",
                   1152:                     (u->user || u->password || options) ? "@": "",
                   1153:                     allochost ? allochost : u->host,
                   1154:                     port ? ":": "",
                   1155:                     port ? port : "",
                   1156:                     (u->path && (u->path[0] != '/')) ? "/": "",
                   1157:                     u->path ? u->path : "/",
                   1158:                     (u->query && u->query[0]) ? "?": "",
                   1159:                     (u->query && u->query[0]) ? u->query : "",
                   1160:                     u->fragment? "#": "",
                   1161:                     u->fragment? u->fragment : "");
                   1162:       free(allochost);
                   1163:     }
                   1164:     if(!url)
                   1165:       return CURLUE_OUT_OF_MEMORY;
                   1166:     *part = url;
                   1167:     return CURLUE_OK;
                   1168:   }
                   1169:   default:
                   1170:     ptr = NULL;
                   1171:     break;
                   1172:   }
                   1173:   if(ptr) {
                   1174:     *part = strdup(ptr);
                   1175:     if(!*part)
                   1176:       return CURLUE_OUT_OF_MEMORY;
                   1177:     if(plusdecode) {
                   1178:       /* convert + to space */
                   1179:       char *plus;
                   1180:       for(plus = *part; *plus; ++plus) {
                   1181:         if(*plus == '+')
                   1182:           *plus = ' ';
                   1183:       }
                   1184:     }
                   1185:     if(urldecode) {
                   1186:       char *decoded;
                   1187:       size_t dlen;
                   1188:       CURLcode res = Curl_urldecode(NULL, *part, 0, &decoded, &dlen, TRUE);
                   1189:       free(*part);
                   1190:       if(res) {
                   1191:         *part = NULL;
                   1192:         return CURLUE_URLDECODE;
                   1193:       }
                   1194:       *part = decoded;
                   1195:     }
                   1196:     return CURLUE_OK;
                   1197:   }
                   1198:   else
                   1199:     return ifmissing;
                   1200: }
                   1201: 
                   1202: CURLUcode curl_url_set(CURLU *u, CURLUPart what,
                   1203:                        const char *part, unsigned int flags)
                   1204: {
                   1205:   char **storep = NULL;
                   1206:   long port = 0;
                   1207:   bool urlencode = (flags & CURLU_URLENCODE)? 1 : 0;
                   1208:   bool plusencode = FALSE;
                   1209:   bool urlskipslash = FALSE;
                   1210:   bool appendquery = FALSE;
                   1211:   bool equalsencode = FALSE;
                   1212: 
                   1213:   if(!u)
                   1214:     return CURLUE_BAD_HANDLE;
                   1215:   if(!part) {
                   1216:     /* setting a part to NULL clears it */
                   1217:     switch(what) {
                   1218:     case CURLUPART_URL:
                   1219:       break;
                   1220:     case CURLUPART_SCHEME:
                   1221:       storep = &u->scheme;
                   1222:       break;
                   1223:     case CURLUPART_USER:
                   1224:       storep = &u->user;
                   1225:       break;
                   1226:     case CURLUPART_PASSWORD:
                   1227:       storep = &u->password;
                   1228:       break;
                   1229:     case CURLUPART_OPTIONS:
                   1230:       storep = &u->options;
                   1231:       break;
                   1232:     case CURLUPART_HOST:
                   1233:       storep = &u->host;
                   1234:       break;
                   1235:     case CURLUPART_ZONEID:
                   1236:       storep = &u->zoneid;
                   1237:       break;
                   1238:     case CURLUPART_PORT:
                   1239:       u->portnum = 0;
                   1240:       storep = &u->port;
                   1241:       break;
                   1242:     case CURLUPART_PATH:
                   1243:       storep = &u->path;
                   1244:       break;
                   1245:     case CURLUPART_QUERY:
                   1246:       storep = &u->query;
                   1247:       break;
                   1248:     case CURLUPART_FRAGMENT:
                   1249:       storep = &u->fragment;
                   1250:       break;
                   1251:     default:
                   1252:       return CURLUE_UNKNOWN_PART;
                   1253:     }
                   1254:     if(storep && *storep) {
                   1255:       free(*storep);
                   1256:       *storep = NULL;
                   1257:     }
                   1258:     return CURLUE_OK;
                   1259:   }
                   1260: 
                   1261:   switch(what) {
                   1262:   case CURLUPART_SCHEME:
                   1263:     if(strlen(part) > MAX_SCHEME_LEN)
                   1264:       /* too long */
                   1265:       return CURLUE_MALFORMED_INPUT;
                   1266:     if(!(flags & CURLU_NON_SUPPORT_SCHEME) &&
                   1267:        /* verify that it is a fine scheme */
                   1268:        !Curl_builtin_scheme(part))
                   1269:       return CURLUE_UNSUPPORTED_SCHEME;
                   1270:     storep = &u->scheme;
                   1271:     urlencode = FALSE; /* never */
                   1272:     break;
                   1273:   case CURLUPART_USER:
                   1274:     storep = &u->user;
                   1275:     break;
                   1276:   case CURLUPART_PASSWORD:
                   1277:     storep = &u->password;
                   1278:     break;
                   1279:   case CURLUPART_OPTIONS:
                   1280:     storep = &u->options;
                   1281:     break;
                   1282:   case CURLUPART_HOST:
                   1283:     storep = &u->host;
                   1284:     free(u->zoneid);
                   1285:     u->zoneid = NULL;
                   1286:     break;
                   1287:   case CURLUPART_ZONEID:
                   1288:     storep = &u->zoneid;
                   1289:     break;
                   1290:   case CURLUPART_PORT:
                   1291:   {
                   1292:     char *endp;
                   1293:     urlencode = FALSE; /* never */
                   1294:     port = strtol(part, &endp, 10);  /* Port number must be decimal */
                   1295:     if((port <= 0) || (port > 0xffff))
                   1296:       return CURLUE_BAD_PORT_NUMBER;
                   1297:     if(*endp)
                   1298:       /* weirdly provided number, not good! */
                   1299:       return CURLUE_MALFORMED_INPUT;
                   1300:     storep = &u->port;
                   1301:   }
                   1302:   break;
                   1303:   case CURLUPART_PATH:
                   1304:     urlskipslash = TRUE;
                   1305:     storep = &u->path;
                   1306:     break;
                   1307:   case CURLUPART_QUERY:
                   1308:     plusencode = urlencode;
                   1309:     appendquery = (flags & CURLU_APPENDQUERY)?1:0;
                   1310:     equalsencode = appendquery;
                   1311:     storep = &u->query;
                   1312:     break;
                   1313:   case CURLUPART_FRAGMENT:
                   1314:     storep = &u->fragment;
                   1315:     break;
                   1316:   case CURLUPART_URL: {
                   1317:     /*
                   1318:      * Allow a new URL to replace the existing (if any) contents.
                   1319:      *
                   1320:      * If the existing contents is enough for a URL, allow a relative URL to
                   1321:      * replace it.
                   1322:      */
                   1323:     CURLUcode result;
                   1324:     char *oldurl;
                   1325:     char *redired_url;
                   1326:     CURLU *handle2;
                   1327: 
                   1328:     if(Curl_is_absolute_url(part, NULL, MAX_SCHEME_LEN + 1)) {
                   1329:       handle2 = curl_url();
                   1330:       if(!handle2)
                   1331:         return CURLUE_OUT_OF_MEMORY;
                   1332:       result = parseurl(part, handle2, flags);
                   1333:       if(!result)
                   1334:         mv_urlhandle(handle2, u);
                   1335:       else
                   1336:         curl_url_cleanup(handle2);
                   1337:       return result;
                   1338:     }
                   1339:     /* extract the full "old" URL to do the redirect on */
                   1340:     result = curl_url_get(u, CURLUPART_URL, &oldurl, flags);
                   1341:     if(result) {
                   1342:       /* couldn't get the old URL, just use the new! */
                   1343:       handle2 = curl_url();
                   1344:       if(!handle2)
                   1345:         return CURLUE_OUT_OF_MEMORY;
                   1346:       result = parseurl(part, handle2, flags);
                   1347:       if(!result)
                   1348:         mv_urlhandle(handle2, u);
                   1349:       else
                   1350:         curl_url_cleanup(handle2);
                   1351:       return result;
                   1352:     }
                   1353: 
                   1354:     /* apply the relative part to create a new URL */
                   1355:     redired_url = concat_url(oldurl, part);
                   1356:     free(oldurl);
                   1357:     if(!redired_url)
                   1358:       return CURLUE_OUT_OF_MEMORY;
                   1359: 
                   1360:     /* now parse the new URL */
                   1361:     handle2 = curl_url();
                   1362:     if(!handle2) {
                   1363:       free(redired_url);
                   1364:       return CURLUE_OUT_OF_MEMORY;
                   1365:     }
                   1366:     result = parseurl(redired_url, handle2, flags);
                   1367:     free(redired_url);
                   1368:     if(!result)
                   1369:       mv_urlhandle(handle2, u);
                   1370:     else
                   1371:       curl_url_cleanup(handle2);
                   1372:     return result;
                   1373:   }
                   1374:   default:
                   1375:     return CURLUE_UNKNOWN_PART;
                   1376:   }
                   1377:   DEBUGASSERT(storep);
                   1378:   {
                   1379:     const char *newp = part;
                   1380:     size_t nalloc = strlen(part);
                   1381: 
                   1382:     if(nalloc > CURL_MAX_INPUT_LENGTH)
                   1383:       /* excessive input length */
                   1384:       return CURLUE_MALFORMED_INPUT;
                   1385: 
                   1386:     if(urlencode) {
                   1387:       const unsigned char *i;
                   1388:       char *o;
                   1389:       bool free_part = FALSE;
                   1390:       char *enc = malloc(nalloc * 3 + 1); /* for worst case! */
                   1391:       if(!enc)
                   1392:         return CURLUE_OUT_OF_MEMORY;
                   1393:       if(plusencode) {
                   1394:         /* space to plus */
                   1395:         i = (const unsigned char *)part;
                   1396:         for(o = enc; *i; ++o, ++i)
                   1397:           *o = (*i == ' ') ? '+' : *i;
                   1398:         *o = 0; /* zero terminate */
                   1399:         part = strdup(enc);
                   1400:         if(!part) {
                   1401:           free(enc);
                   1402:           return CURLUE_OUT_OF_MEMORY;
                   1403:         }
                   1404:         free_part = TRUE;
                   1405:       }
                   1406:       for(i = (const unsigned char *)part, o = enc; *i; i++) {
                   1407:         if(Curl_isunreserved(*i) ||
                   1408:            ((*i == '/') && urlskipslash) ||
                   1409:            ((*i == '=') && equalsencode) ||
                   1410:            ((*i == '+') && plusencode)) {
                   1411:           if((*i == '=') && equalsencode)
                   1412:             /* only skip the first equals sign */
                   1413:             equalsencode = FALSE;
                   1414:           *o = *i;
                   1415:           o++;
                   1416:         }
                   1417:         else {
                   1418:           msnprintf(o, 4, "%%%02x", *i);
                   1419:           o += 3;
                   1420:         }
                   1421:       }
                   1422:       *o = 0; /* zero terminate */
                   1423:       newp = enc;
                   1424:       if(free_part)
                   1425:         free((char *)part);
                   1426:     }
                   1427:     else {
                   1428:       char *p;
                   1429:       newp = strdup(part);
                   1430:       if(!newp)
                   1431:         return CURLUE_OUT_OF_MEMORY;
                   1432:       p = (char *)newp;
                   1433:       while(*p) {
                   1434:         /* make sure percent encoded are lower case */
                   1435:         if((*p == '%') && ISXDIGIT(p[1]) && ISXDIGIT(p[2]) &&
                   1436:            (ISUPPER(p[1]) || ISUPPER(p[2]))) {
                   1437:           p[1] = (char)TOLOWER(p[1]);
                   1438:           p[2] = (char)TOLOWER(p[2]);
                   1439:           p += 3;
                   1440:         }
                   1441:         else
                   1442:           p++;
                   1443:       }
                   1444:     }
                   1445: 
                   1446:     if(appendquery) {
                   1447:       /* Append the string onto the old query. Add a '&' separator if none is
                   1448:          present at the end of the exsting query already */
                   1449:       size_t querylen = u->query ? strlen(u->query) : 0;
                   1450:       bool addamperand = querylen && (u->query[querylen -1] != '&');
                   1451:       if(querylen) {
                   1452:         size_t newplen = strlen(newp);
                   1453:         char *p = malloc(querylen + addamperand + newplen + 1);
                   1454:         if(!p) {
                   1455:           free((char *)newp);
                   1456:           return CURLUE_OUT_OF_MEMORY;
                   1457:         }
                   1458:         strcpy(p, u->query); /* original query */
                   1459:         if(addamperand)
                   1460:           p[querylen] = '&'; /* ampersand */
                   1461:         strcpy(&p[querylen + addamperand], newp); /* new suffix */
                   1462:         free((char *)newp);
                   1463:         free(*storep);
                   1464:         *storep = p;
                   1465:         return CURLUE_OK;
                   1466:       }
                   1467:     }
                   1468: 
                   1469:     if(what == CURLUPART_HOST) {
                   1470:       if(0 == strlen(newp) && (flags & CURLU_NO_AUTHORITY)) {
                   1471:         /* Skip hostname check, it's allowed to be empty. */
                   1472:       }
                   1473:       else {
                   1474:         if(hostname_check(u, (char *)newp)) {
                   1475:           free((char *)newp);
                   1476:           return CURLUE_MALFORMED_INPUT;
                   1477:         }
                   1478:       }
                   1479:     }
                   1480: 
                   1481:     free(*storep);
                   1482:     *storep = (char *)newp;
                   1483:   }
                   1484:   /* set after the string, to make it not assigned if the allocation above
                   1485:      fails */
                   1486:   if(port)
                   1487:     u->portnum = port;
                   1488:   return CURLUE_OK;
                   1489: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>