1: /***************************************************************************
2: * _ _ ____ _
3: * Project ___| | | | _ \| |
4: * / __| | | | |_) | |
5: * | (__| |_| | _ <| |___
6: * \___|\___/|_| \_\_____|
7: *
8: * Copyright (C) 1998 - 2020, Daniel Stenberg, <daniel@haxx.se>, et al.
9: *
10: * This software is licensed as described in the file COPYING, which
11: * you should have received as part of this distribution. The terms
12: * are also available at https://curl.haxx.se/docs/copyright.html.
13: *
14: * You may opt to use, copy, modify, merge, publish, distribute and/or sell
15: * copies of the Software, and permit persons to whom the Software is
16: * furnished to do so, under the terms of the COPYING file.
17: *
18: * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19: * KIND, either express or implied.
20: *
21: ***************************************************************************/
22: #include "tool_setup.h"
23:
24: #define ENABLE_CURLX_PRINTF
25: /* use our own printf() functions */
26: #include "curlx.h"
27: #include "tool_cfgable.h"
28: #include "tool_doswin.h"
29: #include "tool_urlglob.h"
30: #include "tool_vms.h"
31:
32: #include "memdebug.h" /* keep this as LAST include */
33:
34: #define GLOBERROR(string, column, code) \
35: glob->error = string, glob->pos = column, code
36:
37: static CURLcode glob_fixed(URLGlob *glob, char *fixed, size_t len)
38: {
39: URLPattern *pat = &glob->pattern[glob->size];
40: pat->type = UPTSet;
41: pat->content.Set.size = 1;
42: pat->content.Set.ptr_s = 0;
43: pat->globindex = -1;
44:
45: pat->content.Set.elements = malloc(sizeof(char *));
46:
47: if(!pat->content.Set.elements)
48: return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY);
49:
50: pat->content.Set.elements[0] = malloc(len + 1);
51: if(!pat->content.Set.elements[0])
52: return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY);
53:
54: memcpy(pat->content.Set.elements[0], fixed, len);
55: pat->content.Set.elements[0][len] = 0;
56:
57: return CURLE_OK;
58: }
59:
60: /* multiply
61: *
62: * Multiplies and checks for overflow.
63: */
64: static int multiply(unsigned long *amount, long with)
65: {
66: unsigned long sum = *amount * with;
67: if(!with) {
68: *amount = 0;
69: return 0;
70: }
71: if(sum/with != *amount)
72: return 1; /* didn't fit, bail out */
73: *amount = sum;
74: return 0;
75: }
76:
77: static CURLcode glob_set(URLGlob *glob, char **patternp,
78: size_t *posp, unsigned long *amount,
79: int globindex)
80: {
81: /* processes a set expression with the point behind the opening '{'
82: ','-separated elements are collected until the next closing '}'
83: */
84: URLPattern *pat;
85: bool done = FALSE;
86: char *buf = glob->glob_buffer;
87: char *pattern = *patternp;
88: char *opattern = pattern;
89: size_t opos = *posp-1;
90:
91: pat = &glob->pattern[glob->size];
92: /* patterns 0,1,2,... correspond to size=1,3,5,... */
93: pat->type = UPTSet;
94: pat->content.Set.size = 0;
95: pat->content.Set.ptr_s = 0;
96: pat->content.Set.elements = NULL;
97: pat->globindex = globindex;
98:
99: while(!done) {
100: switch (*pattern) {
101: case '\0': /* URL ended while set was still open */
102: return GLOBERROR("unmatched brace", opos, CURLE_URL_MALFORMAT);
103:
104: case '{':
105: case '[': /* no nested expressions at this time */
106: return GLOBERROR("nested brace", *posp, CURLE_URL_MALFORMAT);
107:
108: case '}': /* set element completed */
109: if(opattern == pattern)
110: return GLOBERROR("empty string within braces", *posp,
111: CURLE_URL_MALFORMAT);
112:
113: /* add 1 to size since it'll be incremented below */
114: if(multiply(amount, pat->content.Set.size + 1))
115: return GLOBERROR("range overflow", 0, CURLE_URL_MALFORMAT);
116:
117: /* FALLTHROUGH */
118: case ',':
119:
120: *buf = '\0';
121: if(pat->content.Set.elements) {
122: char **new_arr = realloc(pat->content.Set.elements,
123: (pat->content.Set.size + 1) * sizeof(char *));
124: if(!new_arr)
125: return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY);
126:
127: pat->content.Set.elements = new_arr;
128: }
129: else
130: pat->content.Set.elements = malloc(sizeof(char *));
131:
132: if(!pat->content.Set.elements)
133: return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY);
134:
135: pat->content.Set.elements[pat->content.Set.size] =
136: strdup(glob->glob_buffer);
137: if(!pat->content.Set.elements[pat->content.Set.size])
138: return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY);
139: ++pat->content.Set.size;
140:
141: if(*pattern == '}') {
142: pattern++; /* pass the closing brace */
143: done = TRUE;
144: continue;
145: }
146:
147: buf = glob->glob_buffer;
148: ++pattern;
149: ++(*posp);
150: break;
151:
152: case ']': /* illegal closing bracket */
153: return GLOBERROR("unexpected close bracket", *posp, CURLE_URL_MALFORMAT);
154:
155: case '\\': /* escaped character, skip '\' */
156: if(pattern[1]) {
157: ++pattern;
158: ++(*posp);
159: }
160: /* FALLTHROUGH */
161: default:
162: *buf++ = *pattern++; /* copy character to set element */
163: ++(*posp);
164: }
165: }
166:
167: *patternp = pattern; /* return with the new position */
168: return CURLE_OK;
169: }
170:
171: static CURLcode glob_range(URLGlob *glob, char **patternp,
172: size_t *posp, unsigned long *amount,
173: int globindex)
174: {
175: /* processes a range expression with the point behind the opening '['
176: - char range: e.g. "a-z]", "B-Q]"
177: - num range: e.g. "0-9]", "17-2000]"
178: - num range with leading zeros: e.g. "001-999]"
179: expression is checked for well-formedness and collected until the next ']'
180: */
181: URLPattern *pat;
182: int rc;
183: char *pattern = *patternp;
184: char *c;
185:
186: pat = &glob->pattern[glob->size];
187: pat->globindex = globindex;
188:
189: if(ISALPHA(*pattern)) {
190: /* character range detected */
191: char min_c;
192: char max_c;
193: char end_c;
194: unsigned long step = 1;
195:
196: pat->type = UPTCharRange;
197:
198: rc = sscanf(pattern, "%c-%c%c", &min_c, &max_c, &end_c);
199:
200: if(rc == 3) {
201: if(end_c == ':') {
202: char *endp;
203: errno = 0;
204: step = strtoul(&pattern[4], &endp, 10);
205: if(errno || &pattern[4] == endp || *endp != ']')
206: step = 0;
207: else
208: pattern = endp + 1;
209: }
210: else if(end_c != ']')
211: /* then this is wrong */
212: rc = 0;
213: else
214: /* end_c == ']' */
215: pattern += 4;
216: }
217:
218: *posp += (pattern - *patternp);
219:
220: if(rc != 3 || !step || step > (unsigned)INT_MAX ||
221: (min_c == max_c && step != 1) ||
222: (min_c != max_c && (min_c > max_c || step > (unsigned)(max_c - min_c) ||
223: (max_c - min_c) > ('z' - 'a'))))
224: /* the pattern is not well-formed */
225: return GLOBERROR("bad range", *posp, CURLE_URL_MALFORMAT);
226:
227: /* if there was a ":[num]" thing, use that as step or else use 1 */
228: pat->content.CharRange.step = (int)step;
229: pat->content.CharRange.ptr_c = pat->content.CharRange.min_c = min_c;
230: pat->content.CharRange.max_c = max_c;
231:
232: if(multiply(amount, ((pat->content.CharRange.max_c -
233: pat->content.CharRange.min_c) /
234: pat->content.CharRange.step + 1)))
235: return GLOBERROR("range overflow", *posp, CURLE_URL_MALFORMAT);
236: }
237: else if(ISDIGIT(*pattern)) {
238: /* numeric range detected */
239: unsigned long min_n;
240: unsigned long max_n = 0;
241: unsigned long step_n = 0;
242: char *endp;
243:
244: pat->type = UPTNumRange;
245: pat->content.NumRange.padlength = 0;
246:
247: if(*pattern == '0') {
248: /* leading zero specified, count them! */
249: c = pattern;
250: while(ISDIGIT(*c)) {
251: c++;
252: ++pat->content.NumRange.padlength; /* padding length is set for all
253: instances of this pattern */
254: }
255: }
256:
257: errno = 0;
258: min_n = strtoul(pattern, &endp, 10);
259: if(errno || (endp == pattern))
260: endp = NULL;
261: else {
262: if(*endp != '-')
263: endp = NULL;
264: else {
265: pattern = endp + 1;
266: while(*pattern && ISBLANK(*pattern))
267: pattern++;
268: if(!ISDIGIT(*pattern)) {
269: endp = NULL;
270: goto fail;
271: }
272: errno = 0;
273: max_n = strtoul(pattern, &endp, 10);
274: if(errno)
275: /* overflow */
276: endp = NULL;
277: else if(*endp == ':') {
278: pattern = endp + 1;
279: errno = 0;
280: step_n = strtoul(pattern, &endp, 10);
281: if(errno)
282: /* over/underflow situation */
283: endp = NULL;
284: }
285: else
286: step_n = 1;
287: if(endp && (*endp == ']')) {
288: pattern = endp + 1;
289: }
290: else
291: endp = NULL;
292: }
293: }
294:
295: fail:
296: *posp += (pattern - *patternp);
297:
298: if(!endp || !step_n ||
299: (min_n == max_n && step_n != 1) ||
300: (min_n != max_n && (min_n > max_n || step_n > (max_n - min_n))))
301: /* the pattern is not well-formed */
302: return GLOBERROR("bad range", *posp, CURLE_URL_MALFORMAT);
303:
304: /* typecasting to ints are fine here since we make sure above that we
305: are within 31 bits */
306: pat->content.NumRange.ptr_n = pat->content.NumRange.min_n = min_n;
307: pat->content.NumRange.max_n = max_n;
308: pat->content.NumRange.step = step_n;
309:
310: if(multiply(amount, ((pat->content.NumRange.max_n -
311: pat->content.NumRange.min_n) /
312: pat->content.NumRange.step + 1)))
313: return GLOBERROR("range overflow", *posp, CURLE_URL_MALFORMAT);
314: }
315: else
316: return GLOBERROR("bad range specification", *posp, CURLE_URL_MALFORMAT);
317:
318: *patternp = pattern;
319: return CURLE_OK;
320: }
321:
322: static bool peek_ipv6(const char *str, size_t *skip)
323: {
324: /*
325: * Scan for a potential IPv6 literal.
326: * - Valid globs contain a hyphen and <= 1 colon.
327: * - IPv6 literals contain no hyphens and >= 2 colons.
328: */
329: size_t i = 0;
330: size_t colons = 0;
331: if(str[i++] != '[') {
332: return FALSE;
333: }
334: for(;;) {
335: const char c = str[i++];
336: if(ISALNUM(c) || c == '.' || c == '%') {
337: /* ok */
338: }
339: else if(c == ':') {
340: colons++;
341: }
342: else if(c == ']') {
343: *skip = i;
344: return colons >= 2 ? TRUE : FALSE;
345: }
346: else {
347: return FALSE;
348: }
349: }
350: }
351:
352: static CURLcode glob_parse(URLGlob *glob, char *pattern,
353: size_t pos, unsigned long *amount)
354: {
355: /* processes a literal string component of a URL
356: special characters '{' and '[' branch to set/range processing functions
357: */
358: CURLcode res = CURLE_OK;
359: int globindex = 0; /* count "actual" globs */
360:
361: *amount = 1;
362:
363: while(*pattern && !res) {
364: char *buf = glob->glob_buffer;
365: size_t sublen = 0;
366: while(*pattern && *pattern != '{') {
367: if(*pattern == '[') {
368: /* skip over IPv6 literals and [] */
369: size_t skip = 0;
370: if(!peek_ipv6(pattern, &skip) && (pattern[1] == ']'))
371: skip = 2;
372: if(skip) {
373: memcpy(buf, pattern, skip);
374: buf += skip;
375: pattern += skip;
376: sublen += skip;
377: continue;
378: }
379: break;
380: }
381: if(*pattern == '}' || *pattern == ']')
382: return GLOBERROR("unmatched close brace/bracket", pos,
383: CURLE_URL_MALFORMAT);
384:
385: /* only allow \ to escape known "special letters" */
386: if(*pattern == '\\' &&
387: (*(pattern + 1) == '{' || *(pattern + 1) == '[' ||
388: *(pattern + 1) == '}' || *(pattern + 1) == ']') ) {
389:
390: /* escape character, skip '\' */
391: ++pattern;
392: ++pos;
393: }
394: *buf++ = *pattern++; /* copy character to literal */
395: ++pos;
396: sublen++;
397: }
398: if(sublen) {
399: /* we got a literal string, add it as a single-item list */
400: *buf = '\0';
401: res = glob_fixed(glob, glob->glob_buffer, sublen);
402: }
403: else {
404: switch (*pattern) {
405: case '\0': /* done */
406: break;
407:
408: case '{':
409: /* process set pattern */
410: pattern++;
411: pos++;
412: res = glob_set(glob, &pattern, &pos, amount, globindex++);
413: break;
414:
415: case '[':
416: /* process range pattern */
417: pattern++;
418: pos++;
419: res = glob_range(glob, &pattern, &pos, amount, globindex++);
420: break;
421: }
422: }
423:
424: if(++glob->size >= GLOB_PATTERN_NUM)
425: return GLOBERROR("too many globs", pos, CURLE_URL_MALFORMAT);
426: }
427: return res;
428: }
429:
430: CURLcode glob_url(URLGlob **glob, char *url, unsigned long *urlnum,
431: FILE *error)
432: {
433: /*
434: * We can deal with any-size, just make a buffer with the same length
435: * as the specified URL!
436: */
437: URLGlob *glob_expand;
438: unsigned long amount = 0;
439: char *glob_buffer;
440: CURLcode res;
441:
442: *glob = NULL;
443:
444: glob_buffer = malloc(strlen(url) + 1);
445: if(!glob_buffer)
446: return CURLE_OUT_OF_MEMORY;
447: glob_buffer[0] = 0;
448:
449: glob_expand = calloc(1, sizeof(URLGlob));
450: if(!glob_expand) {
451: Curl_safefree(glob_buffer);
452: return CURLE_OUT_OF_MEMORY;
453: }
454: glob_expand->urllen = strlen(url);
455: glob_expand->glob_buffer = glob_buffer;
456:
457: res = glob_parse(glob_expand, url, 1, &amount);
458: if(!res)
459: *urlnum = amount;
460: else {
461: if(error && glob_expand->error) {
462: char text[512];
463: const char *t;
464: if(glob_expand->pos) {
465: msnprintf(text, sizeof(text), "%s in URL position %zu:\n%s\n%*s^",
466: glob_expand->error,
467: glob_expand->pos, url, glob_expand->pos - 1, " ");
468: t = text;
469: }
470: else
471: t = glob_expand->error;
472:
473: /* send error description to the error-stream */
474: fprintf(error, "curl: (%d) %s\n", res, t);
475: }
476: /* it failed, we cleanup */
477: glob_cleanup(glob_expand);
478: *urlnum = 1;
479: return res;
480: }
481:
482: *glob = glob_expand;
483: return CURLE_OK;
484: }
485:
486: void glob_cleanup(URLGlob* glob)
487: {
488: size_t i;
489: int elem;
490:
491: if(!glob)
492: return;
493:
494: for(i = 0; i < glob->size; i++) {
495: if((glob->pattern[i].type == UPTSet) &&
496: (glob->pattern[i].content.Set.elements)) {
497: for(elem = glob->pattern[i].content.Set.size - 1;
498: elem >= 0;
499: --elem) {
500: Curl_safefree(glob->pattern[i].content.Set.elements[elem]);
501: }
502: Curl_safefree(glob->pattern[i].content.Set.elements);
503: }
504: }
505: Curl_safefree(glob->glob_buffer);
506: Curl_safefree(glob);
507: }
508:
509: CURLcode glob_next_url(char **globbed, URLGlob *glob)
510: {
511: URLPattern *pat;
512: size_t i;
513: size_t len;
514: size_t buflen = glob->urllen + 1;
515: char *buf = glob->glob_buffer;
516:
517: *globbed = NULL;
518:
519: if(!glob->beenhere)
520: glob->beenhere = 1;
521: else {
522: bool carry = TRUE;
523:
524: /* implement a counter over the index ranges of all patterns, starting
525: with the rightmost pattern */
526: for(i = 0; carry && (i < glob->size); i++) {
527: carry = FALSE;
528: pat = &glob->pattern[glob->size - 1 - i];
529: switch(pat->type) {
530: case UPTSet:
531: if((pat->content.Set.elements) &&
532: (++pat->content.Set.ptr_s == pat->content.Set.size)) {
533: pat->content.Set.ptr_s = 0;
534: carry = TRUE;
535: }
536: break;
537: case UPTCharRange:
538: pat->content.CharRange.ptr_c =
539: (char)(pat->content.CharRange.step +
540: (int)((unsigned char)pat->content.CharRange.ptr_c));
541: if(pat->content.CharRange.ptr_c > pat->content.CharRange.max_c) {
542: pat->content.CharRange.ptr_c = pat->content.CharRange.min_c;
543: carry = TRUE;
544: }
545: break;
546: case UPTNumRange:
547: pat->content.NumRange.ptr_n += pat->content.NumRange.step;
548: if(pat->content.NumRange.ptr_n > pat->content.NumRange.max_n) {
549: pat->content.NumRange.ptr_n = pat->content.NumRange.min_n;
550: carry = TRUE;
551: }
552: break;
553: default:
554: printf("internal error: invalid pattern type (%d)\n", (int)pat->type);
555: return CURLE_FAILED_INIT;
556: }
557: }
558: if(carry) { /* first pattern ptr has run into overflow, done! */
559: return CURLE_OK;
560: }
561: }
562:
563: for(i = 0; i < glob->size; ++i) {
564: pat = &glob->pattern[i];
565: switch(pat->type) {
566: case UPTSet:
567: if(pat->content.Set.elements) {
568: msnprintf(buf, buflen, "%s",
569: pat->content.Set.elements[pat->content.Set.ptr_s]);
570: len = strlen(buf);
571: buf += len;
572: buflen -= len;
573: }
574: break;
575: case UPTCharRange:
576: if(buflen) {
577: *buf++ = pat->content.CharRange.ptr_c;
578: *buf = '\0';
579: buflen--;
580: }
581: break;
582: case UPTNumRange:
583: msnprintf(buf, buflen, "%0*lu",
584: pat->content.NumRange.padlength,
585: pat->content.NumRange.ptr_n);
586: len = strlen(buf);
587: buf += len;
588: buflen -= len;
589: break;
590: default:
591: printf("internal error: invalid pattern type (%d)\n", (int)pat->type);
592: return CURLE_FAILED_INIT;
593: }
594: }
595:
596: *globbed = strdup(glob->glob_buffer);
597: if(!*globbed)
598: return CURLE_OUT_OF_MEMORY;
599:
600: return CURLE_OK;
601: }
602:
603: CURLcode glob_match_url(char **result, char *filename, URLGlob *glob)
604: {
605: char *target;
606: size_t allocsize;
607: char numbuf[18];
608: char *appendthis = (char *)"";
609: size_t appendlen = 0;
610: size_t stringlen = 0;
611:
612: *result = NULL;
613:
614: /* We cannot use the glob_buffer for storage here since the filename may
615: * be longer than the URL we use. We allocate a good start size, then
616: * we need to realloc in case of need.
617: */
618: allocsize = strlen(filename) + 1; /* make it at least one byte to store the
619: trailing zero */
620: target = malloc(allocsize);
621: if(!target)
622: return CURLE_OUT_OF_MEMORY;
623:
624: while(*filename) {
625: if(*filename == '#' && ISDIGIT(filename[1])) {
626: char *ptr = filename;
627: unsigned long num = strtoul(&filename[1], &filename, 10);
628: URLPattern *pat = NULL;
629:
630: if(num && (num < glob->size)) {
631: unsigned long i;
632: num--; /* make it zero based */
633: /* find the correct glob entry */
634: for(i = 0; i<glob->size; i++) {
635: if(glob->pattern[i].globindex == (int)num) {
636: pat = &glob->pattern[i];
637: break;
638: }
639: }
640: }
641:
642: if(pat) {
643: switch(pat->type) {
644: case UPTSet:
645: if(pat->content.Set.elements) {
646: appendthis = pat->content.Set.elements[pat->content.Set.ptr_s];
647: appendlen =
648: strlen(pat->content.Set.elements[pat->content.Set.ptr_s]);
649: }
650: break;
651: case UPTCharRange:
652: numbuf[0] = pat->content.CharRange.ptr_c;
653: numbuf[1] = 0;
654: appendthis = numbuf;
655: appendlen = 1;
656: break;
657: case UPTNumRange:
658: msnprintf(numbuf, sizeof(numbuf), "%0*lu",
659: pat->content.NumRange.padlength,
660: pat->content.NumRange.ptr_n);
661: appendthis = numbuf;
662: appendlen = strlen(numbuf);
663: break;
664: default:
665: fprintf(stderr, "internal error: invalid pattern type (%d)\n",
666: (int)pat->type);
667: Curl_safefree(target);
668: return CURLE_FAILED_INIT;
669: }
670: }
671: else {
672: /* #[num] out of range, use the #[num] in the output */
673: filename = ptr;
674: appendthis = filename++;
675: appendlen = 1;
676: }
677: }
678: else {
679: appendthis = filename++;
680: appendlen = 1;
681: }
682: if(appendlen + stringlen >= allocsize) {
683: char *newstr;
684: /* we append a single byte to allow for the trailing byte to be appended
685: at the end of this function outside the while() loop */
686: allocsize = (appendlen + stringlen) * 2;
687: newstr = realloc(target, allocsize + 1);
688: if(!newstr) {
689: Curl_safefree(target);
690: return CURLE_OUT_OF_MEMORY;
691: }
692: target = newstr;
693: }
694: memcpy(&target[stringlen], appendthis, appendlen);
695: stringlen += appendlen;
696: }
697: target[stringlen]= '\0';
698:
699: #if defined(MSDOS) || defined(WIN32)
700: {
701: char *sanitized;
702: SANITIZEcode sc = sanitize_file_name(&sanitized, target,
703: (SANITIZE_ALLOW_PATH |
704: SANITIZE_ALLOW_RESERVED));
705: Curl_safefree(target);
706: if(sc)
707: return CURLE_URL_MALFORMAT;
708: target = sanitized;
709: }
710: #endif /* MSDOS || WIN32 */
711:
712: *result = target;
713: return CURLE_OK;
714: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>