Return to tokenize.c CVS log | Up to [ELWIX - Embedded LightWeight unIX -] / embedaddon / ntp / sntp / libopts |
1.1 ! misho 1: /* ! 2: * This file defines the string_tokenize interface ! 3: * Time-stamp: "2010-07-17 10:40:26 bkorb" ! 4: * ! 5: * This file is part of AutoOpts, a companion to AutoGen. ! 6: * AutoOpts is free software. ! 7: * AutoOpts is Copyright (c) 1992-2011 by Bruce Korb - all rights reserved ! 8: * ! 9: * AutoOpts is available under any one of two licenses. The license ! 10: * in use must be one of these two and the choice is under the control ! 11: * of the user of the license. ! 12: * ! 13: * The GNU Lesser General Public License, version 3 or later ! 14: * See the files "COPYING.lgplv3" and "COPYING.gplv3" ! 15: * ! 16: * The Modified Berkeley Software Distribution License ! 17: * See the file "COPYING.mbsd" ! 18: * ! 19: * These files have the following md5sums: ! 20: * ! 21: * 43b91e8ca915626ed3818ffb1b71248b pkg/libopts/COPYING.gplv3 ! 22: * 06a1a2e4760c90ea5e1dad8dfaac4d39 pkg/libopts/COPYING.lgplv3 ! 23: * 66a5cedaf62c4b2637025f049f9b826f pkg/libopts/COPYING.mbsd ! 24: */ ! 25: ! 26: #include <errno.h> ! 27: #include <stdlib.h> ! 28: ! 29: #define cc_t const unsigned char ! 30: #define ch_t unsigned char ! 31: ! 32: /* = = = START-STATIC-FORWARD = = = */ ! 33: static void ! 34: copy_cooked(ch_t** ppDest, char const ** ppSrc); ! 35: ! 36: static void ! 37: copy_raw(ch_t** ppDest, char const ** ppSrc); ! 38: ! 39: static token_list_t * ! 40: alloc_token_list(char const * str); ! 41: /* = = = END-STATIC-FORWARD = = = */ ! 42: ! 43: static void ! 44: copy_cooked(ch_t** ppDest, char const ** ppSrc) ! 45: { ! 46: ch_t* pDest = (ch_t*)*ppDest; ! 47: const ch_t* pSrc = (const ch_t*)(*ppSrc + 1); ! 48: ! 49: for (;;) { ! 50: ch_t ch = *(pSrc++); ! 51: switch (ch) { ! 52: case NUL: *ppSrc = NULL; return; ! 53: case '"': goto done; ! 54: case '\\': ! 55: pSrc += ao_string_cook_escape_char((char*)pSrc, (char*)&ch, 0x7F); ! 56: if (ch == 0x7F) ! 57: break; ! 58: /* FALLTHROUGH */ ! 59: ! 60: default: ! 61: *(pDest++) = ch; ! 62: } ! 63: } ! 64: ! 65: done: ! 66: *ppDest = (ch_t*)pDest; /* next spot for storing character */ ! 67: *ppSrc = (char const *)pSrc; /* char following closing quote */ ! 68: } ! 69: ! 70: ! 71: static void ! 72: copy_raw(ch_t** ppDest, char const ** ppSrc) ! 73: { ! 74: ch_t* pDest = *ppDest; ! 75: cc_t* pSrc = (cc_t*) (*ppSrc + 1); ! 76: ! 77: for (;;) { ! 78: ch_t ch = *(pSrc++); ! 79: switch (ch) { ! 80: case NUL: *ppSrc = NULL; return; ! 81: case '\'': goto done; ! 82: case '\\': ! 83: /* ! 84: * *Four* escapes are handled: newline removal, escape char ! 85: * quoting and apostrophe quoting ! 86: */ ! 87: switch (*pSrc) { ! 88: case NUL: *ppSrc = NULL; return; ! 89: case '\r': ! 90: if (*(++pSrc) == '\n') ! 91: ++pSrc; ! 92: continue; ! 93: ! 94: case '\n': ! 95: ++pSrc; ! 96: continue; ! 97: ! 98: case '\'': ! 99: ch = '\''; ! 100: /* FALLTHROUGH */ ! 101: ! 102: case '\\': ! 103: ++pSrc; ! 104: break; ! 105: } ! 106: /* FALLTHROUGH */ ! 107: ! 108: default: ! 109: *(pDest++) = ch; ! 110: } ! 111: } ! 112: ! 113: done: ! 114: *ppDest = pDest; /* next spot for storing character */ ! 115: *ppSrc = (char const *) pSrc; /* char following closing quote */ ! 116: } ! 117: ! 118: static token_list_t * ! 119: alloc_token_list(char const * str) ! 120: { ! 121: token_list_t * res; ! 122: ! 123: int max_token_ct = 2; /* allow for trailing NULL pointer & NUL on string */ ! 124: ! 125: if (str == NULL) goto enoent_res; ! 126: ! 127: /* ! 128: * Trim leading white space. Use "ENOENT" and a NULL return to indicate ! 129: * an empty string was passed. ! 130: */ ! 131: while (IS_WHITESPACE_CHAR(*str)) str++; ! 132: if (*str == NUL) goto enoent_res; ! 133: ! 134: /* ! 135: * Take an approximate count of tokens. If no quoted strings are used, ! 136: * it will be accurate. If quoted strings are used, it will be a little ! 137: * high and we'll squander the space for a few extra pointers. ! 138: */ ! 139: { ! 140: cc_t* pz = (cc_t*)str; ! 141: ! 142: do { ! 143: max_token_ct++; ! 144: while (! IS_WHITESPACE_CHAR(*++pz)) ! 145: if (*pz == NUL) goto found_nul; ! 146: while (IS_WHITESPACE_CHAR(*pz)) pz++; ! 147: } while (*pz != NUL); ! 148: ! 149: found_nul: ! 150: res = malloc(sizeof(*res) + (pz - (cc_t*)str) ! 151: + (max_token_ct * sizeof(ch_t*))); ! 152: } ! 153: ! 154: if (res == NULL) ! 155: errno = ENOMEM; ! 156: else res->tkn_list[0] = (ch_t*)(res->tkn_list + (max_token_ct - 1)); ! 157: ! 158: return res; ! 159: ! 160: enoent_res: ! 161: ! 162: errno = ENOENT; ! 163: return NULL; ! 164: } ! 165: ! 166: /*=export_func ao_string_tokenize ! 167: * ! 168: * what: tokenize an input string ! 169: * ! 170: * arg: + char const* + string + string to be tokenized + ! 171: * ! 172: * ret_type: token_list_t* ! 173: * ret_desc: pointer to a structure that lists each token ! 174: * ! 175: * doc: ! 176: * ! 177: * This function will convert one input string into a list of strings. ! 178: * The list of strings is derived by separating the input based on ! 179: * white space separation. However, if the input contains either single ! 180: * or double quote characters, then the text after that character up to ! 181: * a matching quote will become the string in the list. ! 182: * ! 183: * The returned pointer should be deallocated with @code{free(3C)} when ! 184: * are done using the data. The data are placed in a single block of ! 185: * allocated memory. Do not deallocate individual token/strings. ! 186: * ! 187: * The structure pointed to will contain at least these two fields: ! 188: * @table @samp ! 189: * @item tkn_ct ! 190: * The number of tokens found in the input string. ! 191: * @item tok_list ! 192: * An array of @code{tkn_ct + 1} pointers to substring tokens, with ! 193: * the last pointer set to NULL. ! 194: * @end table ! 195: * ! 196: * There are two types of quoted strings: single quoted (@code{'}) and ! 197: * double quoted (@code{"}). Singly quoted strings are fairly raw in that ! 198: * escape characters (@code{\\}) are simply another character, except when ! 199: * preceding the following characters: ! 200: * @example ! 201: * @code{\\} double backslashes reduce to one ! 202: * @code{'} incorporates the single quote into the string ! 203: * @code{\n} suppresses both the backslash and newline character ! 204: * @end example ! 205: * ! 206: * Double quote strings are formed according to the rules of string ! 207: * constants in ANSI-C programs. ! 208: * ! 209: * example: ! 210: * @example ! 211: * #include <stdlib.h> ! 212: * int ix; ! 213: * token_list_t* ptl = ao_string_tokenize(some_string) ! 214: * for (ix = 0; ix < ptl->tkn_ct; ix++) ! 215: * do_something_with_tkn(ptl->tkn_list[ix]); ! 216: * free(ptl); ! 217: * @end example ! 218: * Note that everything is freed with the one call to @code{free(3C)}. ! 219: * ! 220: * err: ! 221: * NULL is returned and @code{errno} will be set to indicate the problem: ! 222: * @itemize @bullet ! 223: * @item ! 224: * @code{EINVAL} - There was an unterminated quoted string. ! 225: * @item ! 226: * @code{ENOENT} - The input string was empty. ! 227: * @item ! 228: * @code{ENOMEM} - There is not enough memory. ! 229: * @end itemize ! 230: =*/ ! 231: token_list_t* ! 232: ao_string_tokenize(char const* str) ! 233: { ! 234: token_list_t* res = alloc_token_list(str); ! 235: ch_t* pzDest; ! 236: ! 237: /* ! 238: * Now copy each token into the output buffer. ! 239: */ ! 240: if (res == NULL) ! 241: return res; ! 242: ! 243: pzDest = (ch_t*)(res->tkn_list[0]); ! 244: res->tkn_ct = 0; ! 245: ! 246: do { ! 247: res->tkn_list[ res->tkn_ct++ ] = pzDest; ! 248: for (;;) { ! 249: int ch = (ch_t)*str; ! 250: if (IS_WHITESPACE_CHAR(ch)) { ! 251: found_white_space: ! 252: while (IS_WHITESPACE_CHAR(*++str)) ; ! 253: break; ! 254: } ! 255: ! 256: switch (ch) { ! 257: case '"': ! 258: copy_cooked(&pzDest, &str); ! 259: if (str == NULL) { ! 260: free(res); ! 261: errno = EINVAL; ! 262: return NULL; ! 263: } ! 264: if (IS_WHITESPACE_CHAR(*str)) ! 265: goto found_white_space; ! 266: break; ! 267: ! 268: case '\'': ! 269: copy_raw(&pzDest, &str); ! 270: if (str == NULL) { ! 271: free(res); ! 272: errno = EINVAL; ! 273: return NULL; ! 274: } ! 275: if (IS_WHITESPACE_CHAR(*str)) ! 276: goto found_white_space; ! 277: break; ! 278: ! 279: case NUL: ! 280: goto copy_done; ! 281: ! 282: default: ! 283: str++; ! 284: *(pzDest++) = ch; ! 285: } ! 286: } copy_done:; ! 287: ! 288: /* ! 289: * NUL terminate the last token and see if we have any more tokens. ! 290: */ ! 291: *(pzDest++) = NUL; ! 292: } while (*str != NUL); ! 293: ! 294: res->tkn_list[ res->tkn_ct ] = NULL; ! 295: ! 296: return res; ! 297: } ! 298: ! 299: #ifdef TEST ! 300: #include <stdio.h> ! 301: #include <string.h> ! 302: ! 303: int ! 304: main(int argc, char** argv) ! 305: { ! 306: if (argc == 1) { ! 307: printf("USAGE: %s arg [ ... ]\n", *argv); ! 308: return 1; ! 309: } ! 310: while (--argc > 0) { ! 311: char* arg = *(++argv); ! 312: token_list_t* p = ao_string_tokenize(arg); ! 313: if (p == NULL) { ! 314: printf("Parsing string ``%s'' failed:\n\terrno %d (%s)\n", ! 315: arg, errno, strerror(errno)); ! 316: } else { ! 317: int ix = 0; ! 318: printf("Parsed string ``%s''\ninto %d tokens:\n", arg, p->tkn_ct); ! 319: do { ! 320: printf(" %3d: ``%s''\n", ix+1, p->tkn_list[ix]); ! 321: } while (++ix < p->tkn_ct); ! 322: free(p); ! 323: } ! 324: } ! 325: return 0; ! 326: } ! 327: #endif ! 328: ! 329: /* ! 330: * Local Variables: ! 331: * mode: C ! 332: * c-file-style: "stroustrup" ! 333: * indent-tabs-mode: nil ! 334: * End: ! 335: * end of autoopts/tokenize.c */