Annotation of embedaddon/ntp/sntp/libopts/tokenize.c, revision 1.1
1.1 ! misho 1: /*
! 2: * This file defines the string_tokenize interface
! 3: * Time-stamp: "2010-07-17 10:40:26 bkorb"
! 4: *
! 5: * This file is part of AutoOpts, a companion to AutoGen.
! 6: * AutoOpts is free software.
! 7: * AutoOpts is Copyright (c) 1992-2011 by Bruce Korb - all rights reserved
! 8: *
! 9: * AutoOpts is available under any one of two licenses. The license
! 10: * in use must be one of these two and the choice is under the control
! 11: * of the user of the license.
! 12: *
! 13: * The GNU Lesser General Public License, version 3 or later
! 14: * See the files "COPYING.lgplv3" and "COPYING.gplv3"
! 15: *
! 16: * The Modified Berkeley Software Distribution License
! 17: * See the file "COPYING.mbsd"
! 18: *
! 19: * These files have the following md5sums:
! 20: *
! 21: * 43b91e8ca915626ed3818ffb1b71248b pkg/libopts/COPYING.gplv3
! 22: * 06a1a2e4760c90ea5e1dad8dfaac4d39 pkg/libopts/COPYING.lgplv3
! 23: * 66a5cedaf62c4b2637025f049f9b826f pkg/libopts/COPYING.mbsd
! 24: */
! 25:
! 26: #include <errno.h>
! 27: #include <stdlib.h>
! 28:
! 29: #define cc_t const unsigned char
! 30: #define ch_t unsigned char
! 31:
! 32: /* = = = START-STATIC-FORWARD = = = */
! 33: static void
! 34: copy_cooked(ch_t** ppDest, char const ** ppSrc);
! 35:
! 36: static void
! 37: copy_raw(ch_t** ppDest, char const ** ppSrc);
! 38:
! 39: static token_list_t *
! 40: alloc_token_list(char const * str);
! 41: /* = = = END-STATIC-FORWARD = = = */
! 42:
! 43: static void
! 44: copy_cooked(ch_t** ppDest, char const ** ppSrc)
! 45: {
! 46: ch_t* pDest = (ch_t*)*ppDest;
! 47: const ch_t* pSrc = (const ch_t*)(*ppSrc + 1);
! 48:
! 49: for (;;) {
! 50: ch_t ch = *(pSrc++);
! 51: switch (ch) {
! 52: case NUL: *ppSrc = NULL; return;
! 53: case '"': goto done;
! 54: case '\\':
! 55: pSrc += ao_string_cook_escape_char((char*)pSrc, (char*)&ch, 0x7F);
! 56: if (ch == 0x7F)
! 57: break;
! 58: /* FALLTHROUGH */
! 59:
! 60: default:
! 61: *(pDest++) = ch;
! 62: }
! 63: }
! 64:
! 65: done:
! 66: *ppDest = (ch_t*)pDest; /* next spot for storing character */
! 67: *ppSrc = (char const *)pSrc; /* char following closing quote */
! 68: }
! 69:
! 70:
! 71: static void
! 72: copy_raw(ch_t** ppDest, char const ** ppSrc)
! 73: {
! 74: ch_t* pDest = *ppDest;
! 75: cc_t* pSrc = (cc_t*) (*ppSrc + 1);
! 76:
! 77: for (;;) {
! 78: ch_t ch = *(pSrc++);
! 79: switch (ch) {
! 80: case NUL: *ppSrc = NULL; return;
! 81: case '\'': goto done;
! 82: case '\\':
! 83: /*
! 84: * *Four* escapes are handled: newline removal, escape char
! 85: * quoting and apostrophe quoting
! 86: */
! 87: switch (*pSrc) {
! 88: case NUL: *ppSrc = NULL; return;
! 89: case '\r':
! 90: if (*(++pSrc) == '\n')
! 91: ++pSrc;
! 92: continue;
! 93:
! 94: case '\n':
! 95: ++pSrc;
! 96: continue;
! 97:
! 98: case '\'':
! 99: ch = '\'';
! 100: /* FALLTHROUGH */
! 101:
! 102: case '\\':
! 103: ++pSrc;
! 104: break;
! 105: }
! 106: /* FALLTHROUGH */
! 107:
! 108: default:
! 109: *(pDest++) = ch;
! 110: }
! 111: }
! 112:
! 113: done:
! 114: *ppDest = pDest; /* next spot for storing character */
! 115: *ppSrc = (char const *) pSrc; /* char following closing quote */
! 116: }
! 117:
! 118: static token_list_t *
! 119: alloc_token_list(char const * str)
! 120: {
! 121: token_list_t * res;
! 122:
! 123: int max_token_ct = 2; /* allow for trailing NULL pointer & NUL on string */
! 124:
! 125: if (str == NULL) goto enoent_res;
! 126:
! 127: /*
! 128: * Trim leading white space. Use "ENOENT" and a NULL return to indicate
! 129: * an empty string was passed.
! 130: */
! 131: while (IS_WHITESPACE_CHAR(*str)) str++;
! 132: if (*str == NUL) goto enoent_res;
! 133:
! 134: /*
! 135: * Take an approximate count of tokens. If no quoted strings are used,
! 136: * it will be accurate. If quoted strings are used, it will be a little
! 137: * high and we'll squander the space for a few extra pointers.
! 138: */
! 139: {
! 140: cc_t* pz = (cc_t*)str;
! 141:
! 142: do {
! 143: max_token_ct++;
! 144: while (! IS_WHITESPACE_CHAR(*++pz))
! 145: if (*pz == NUL) goto found_nul;
! 146: while (IS_WHITESPACE_CHAR(*pz)) pz++;
! 147: } while (*pz != NUL);
! 148:
! 149: found_nul:
! 150: res = malloc(sizeof(*res) + (pz - (cc_t*)str)
! 151: + (max_token_ct * sizeof(ch_t*)));
! 152: }
! 153:
! 154: if (res == NULL)
! 155: errno = ENOMEM;
! 156: else res->tkn_list[0] = (ch_t*)(res->tkn_list + (max_token_ct - 1));
! 157:
! 158: return res;
! 159:
! 160: enoent_res:
! 161:
! 162: errno = ENOENT;
! 163: return NULL;
! 164: }
! 165:
! 166: /*=export_func ao_string_tokenize
! 167: *
! 168: * what: tokenize an input string
! 169: *
! 170: * arg: + char const* + string + string to be tokenized +
! 171: *
! 172: * ret_type: token_list_t*
! 173: * ret_desc: pointer to a structure that lists each token
! 174: *
! 175: * doc:
! 176: *
! 177: * This function will convert one input string into a list of strings.
! 178: * The list of strings is derived by separating the input based on
! 179: * white space separation. However, if the input contains either single
! 180: * or double quote characters, then the text after that character up to
! 181: * a matching quote will become the string in the list.
! 182: *
! 183: * The returned pointer should be deallocated with @code{free(3C)} when
! 184: * are done using the data. The data are placed in a single block of
! 185: * allocated memory. Do not deallocate individual token/strings.
! 186: *
! 187: * The structure pointed to will contain at least these two fields:
! 188: * @table @samp
! 189: * @item tkn_ct
! 190: * The number of tokens found in the input string.
! 191: * @item tok_list
! 192: * An array of @code{tkn_ct + 1} pointers to substring tokens, with
! 193: * the last pointer set to NULL.
! 194: * @end table
! 195: *
! 196: * There are two types of quoted strings: single quoted (@code{'}) and
! 197: * double quoted (@code{"}). Singly quoted strings are fairly raw in that
! 198: * escape characters (@code{\\}) are simply another character, except when
! 199: * preceding the following characters:
! 200: * @example
! 201: * @code{\\} double backslashes reduce to one
! 202: * @code{'} incorporates the single quote into the string
! 203: * @code{\n} suppresses both the backslash and newline character
! 204: * @end example
! 205: *
! 206: * Double quote strings are formed according to the rules of string
! 207: * constants in ANSI-C programs.
! 208: *
! 209: * example:
! 210: * @example
! 211: * #include <stdlib.h>
! 212: * int ix;
! 213: * token_list_t* ptl = ao_string_tokenize(some_string)
! 214: * for (ix = 0; ix < ptl->tkn_ct; ix++)
! 215: * do_something_with_tkn(ptl->tkn_list[ix]);
! 216: * free(ptl);
! 217: * @end example
! 218: * Note that everything is freed with the one call to @code{free(3C)}.
! 219: *
! 220: * err:
! 221: * NULL is returned and @code{errno} will be set to indicate the problem:
! 222: * @itemize @bullet
! 223: * @item
! 224: * @code{EINVAL} - There was an unterminated quoted string.
! 225: * @item
! 226: * @code{ENOENT} - The input string was empty.
! 227: * @item
! 228: * @code{ENOMEM} - There is not enough memory.
! 229: * @end itemize
! 230: =*/
! 231: token_list_t*
! 232: ao_string_tokenize(char const* str)
! 233: {
! 234: token_list_t* res = alloc_token_list(str);
! 235: ch_t* pzDest;
! 236:
! 237: /*
! 238: * Now copy each token into the output buffer.
! 239: */
! 240: if (res == NULL)
! 241: return res;
! 242:
! 243: pzDest = (ch_t*)(res->tkn_list[0]);
! 244: res->tkn_ct = 0;
! 245:
! 246: do {
! 247: res->tkn_list[ res->tkn_ct++ ] = pzDest;
! 248: for (;;) {
! 249: int ch = (ch_t)*str;
! 250: if (IS_WHITESPACE_CHAR(ch)) {
! 251: found_white_space:
! 252: while (IS_WHITESPACE_CHAR(*++str)) ;
! 253: break;
! 254: }
! 255:
! 256: switch (ch) {
! 257: case '"':
! 258: copy_cooked(&pzDest, &str);
! 259: if (str == NULL) {
! 260: free(res);
! 261: errno = EINVAL;
! 262: return NULL;
! 263: }
! 264: if (IS_WHITESPACE_CHAR(*str))
! 265: goto found_white_space;
! 266: break;
! 267:
! 268: case '\'':
! 269: copy_raw(&pzDest, &str);
! 270: if (str == NULL) {
! 271: free(res);
! 272: errno = EINVAL;
! 273: return NULL;
! 274: }
! 275: if (IS_WHITESPACE_CHAR(*str))
! 276: goto found_white_space;
! 277: break;
! 278:
! 279: case NUL:
! 280: goto copy_done;
! 281:
! 282: default:
! 283: str++;
! 284: *(pzDest++) = ch;
! 285: }
! 286: } copy_done:;
! 287:
! 288: /*
! 289: * NUL terminate the last token and see if we have any more tokens.
! 290: */
! 291: *(pzDest++) = NUL;
! 292: } while (*str != NUL);
! 293:
! 294: res->tkn_list[ res->tkn_ct ] = NULL;
! 295:
! 296: return res;
! 297: }
! 298:
! 299: #ifdef TEST
! 300: #include <stdio.h>
! 301: #include <string.h>
! 302:
! 303: int
! 304: main(int argc, char** argv)
! 305: {
! 306: if (argc == 1) {
! 307: printf("USAGE: %s arg [ ... ]\n", *argv);
! 308: return 1;
! 309: }
! 310: while (--argc > 0) {
! 311: char* arg = *(++argv);
! 312: token_list_t* p = ao_string_tokenize(arg);
! 313: if (p == NULL) {
! 314: printf("Parsing string ``%s'' failed:\n\terrno %d (%s)\n",
! 315: arg, errno, strerror(errno));
! 316: } else {
! 317: int ix = 0;
! 318: printf("Parsed string ``%s''\ninto %d tokens:\n", arg, p->tkn_ct);
! 319: do {
! 320: printf(" %3d: ``%s''\n", ix+1, p->tkn_list[ix]);
! 321: } while (++ix < p->tkn_ct);
! 322: free(p);
! 323: }
! 324: }
! 325: return 0;
! 326: }
! 327: #endif
! 328:
! 329: /*
! 330: * Local Variables:
! 331: * mode: C
! 332: * c-file-style: "stroustrup"
! 333: * indent-tabs-mode: nil
! 334: * End:
! 335: * end of autoopts/tokenize.c */
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>