embedaddon/ntp/sntp/libopts/tokenize.c - view

File: [ELWIX - Embedded LightWeight unIX -] / embedaddon / ntp / sntp / libopts / tokenize.c
Revision 1.1.1.1 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Tue May 29 12:08:38 2012 UTC (13 years, 1 month ago) by misho
Branches: ntp, MAIN
CVS tags: v4_2_6p5p0, v4_2_6p5, HEAD

ntp 4.2.6p5

1: /* 2: * This file defines the string_tokenize interface 3: * Time-stamp: "2010-07-17 10:40:26 bkorb" 4: * 5: * This file is part of AutoOpts, a companion to AutoGen. 6: * AutoOpts is free software. 7: * AutoOpts is Copyright (c) 1992-2011 by Bruce Korb - all rights reserved 8: * 9: * AutoOpts is available under any one of two licenses. The license 10: * in use must be one of these two and the choice is under the control 11: * of the user of the license. 12: * 13: * The GNU Lesser General Public License, version 3 or later 14: * See the files "COPYING.lgplv3" and "COPYING.gplv3" 15: * 16: * The Modified Berkeley Software Distribution License 17: * See the file "COPYING.mbsd" 18: * 19: * These files have the following md5sums: 20: * 21: * 43b91e8ca915626ed3818ffb1b71248b pkg/libopts/COPYING.gplv3 22: * 06a1a2e4760c90ea5e1dad8dfaac4d39 pkg/libopts/COPYING.lgplv3 23: * 66a5cedaf62c4b2637025f049f9b826f pkg/libopts/COPYING.mbsd 24: */ 25: 26: #include <errno.h> 27: #include <stdlib.h> 28: 29: #define cc_t const unsigned char 30: #define ch_t unsigned char 31: 32: /* = = = START-STATIC-FORWARD = = = */ 33: static void 34: copy_cooked(ch_t** ppDest, char const ** ppSrc); 35: 36: static void 37: copy_raw(ch_t** ppDest, char const ** ppSrc); 38: 39: static token_list_t * 40: alloc_token_list(char const * str); 41: /* = = = END-STATIC-FORWARD = = = */ 42: 43: static void 44: copy_cooked(ch_t** ppDest, char const ** ppSrc) 45: { 46: ch_t* pDest = (ch_t*)*ppDest; 47: const ch_t* pSrc = (const ch_t*)(*ppSrc + 1); 48: 49: for (;;) { 50: ch_t ch = *(pSrc++); 51: switch (ch) { 52: case NUL: *ppSrc = NULL; return; 53: case '"': goto done; 54: case '\\': 55: pSrc += ao_string_cook_escape_char((char*)pSrc, (char*)&ch, 0x7F); 56: if (ch == 0x7F) 57: break; 58: /* FALLTHROUGH */ 59: 60: default: 61: *(pDest++) = ch; 62: } 63: } 64: 65: done: 66: *ppDest = (ch_t*)pDest; /* next spot for storing character */ 67: *ppSrc = (char const *)pSrc; /* char following closing quote */ 68: } 69: 70: 71: static void 72: copy_raw(ch_t** ppDest, char const ** ppSrc) 73: { 74: ch_t* pDest = *ppDest; 75: cc_t* pSrc = (cc_t*) (*ppSrc + 1); 76: 77: for (;;) { 78: ch_t ch = *(pSrc++); 79: switch (ch) { 80: case NUL: *ppSrc = NULL; return; 81: case '\'': goto done; 82: case '\\': 83: /* 84: * *Four* escapes are handled: newline removal, escape char 85: * quoting and apostrophe quoting 86: */ 87: switch (*pSrc) { 88: case NUL: *ppSrc = NULL; return; 89: case '\r': 90: if (*(++pSrc) == '\n') 91: ++pSrc; 92: continue; 93: 94: case '\n': 95: ++pSrc; 96: continue; 97: 98: case '\'': 99: ch = '\''; 100: /* FALLTHROUGH */ 101: 102: case '\\': 103: ++pSrc; 104: break; 105: } 106: /* FALLTHROUGH */ 107: 108: default: 109: *(pDest++) = ch; 110: } 111: } 112: 113: done: 114: *ppDest = pDest; /* next spot for storing character */ 115: *ppSrc = (char const *) pSrc; /* char following closing quote */ 116: } 117: 118: static token_list_t * 119: alloc_token_list(char const * str) 120: { 121: token_list_t * res; 122: 123: int max_token_ct = 2; /* allow for trailing NULL pointer & NUL on string */ 124: 125: if (str == NULL) goto enoent_res; 126: 127: /* 128: * Trim leading white space. Use "ENOENT" and a NULL return to indicate 129: * an empty string was passed. 130: */ 131: while (IS_WHITESPACE_CHAR(*str)) str++; 132: if (*str == NUL) goto enoent_res; 133: 134: /* 135: * Take an approximate count of tokens. If no quoted strings are used, 136: * it will be accurate. If quoted strings are used, it will be a little 137: * high and we'll squander the space for a few extra pointers. 138: */ 139: { 140: cc_t* pz = (cc_t*)str; 141: 142: do { 143: max_token_ct++; 144: while (! IS_WHITESPACE_CHAR(*++pz)) 145: if (*pz == NUL) goto found_nul; 146: while (IS_WHITESPACE_CHAR(*pz)) pz++; 147: } while (*pz != NUL); 148: 149: found_nul: 150: res = malloc(sizeof(*res) + (pz - (cc_t*)str) 151: + (max_token_ct * sizeof(ch_t*))); 152: } 153: 154: if (res == NULL) 155: errno = ENOMEM; 156: else res->tkn_list[0] = (ch_t*)(res->tkn_list + (max_token_ct - 1)); 157: 158: return res; 159: 160: enoent_res: 161: 162: errno = ENOENT; 163: return NULL; 164: } 165: 166: /*=export_func ao_string_tokenize 167: * 168: * what: tokenize an input string 169: * 170: * arg: + char const* + string + string to be tokenized + 171: * 172: * ret_type: token_list_t* 173: * ret_desc: pointer to a structure that lists each token 174: * 175: * doc: 176: * 177: * This function will convert one input string into a list of strings. 178: * The list of strings is derived by separating the input based on 179: * white space separation. However, if the input contains either single 180: * or double quote characters, then the text after that character up to 181: * a matching quote will become the string in the list. 182: * 183: * The returned pointer should be deallocated with @code{free(3C)} when 184: * are done using the data. The data are placed in a single block of 185: * allocated memory. Do not deallocate individual token/strings. 186: * 187: * The structure pointed to will contain at least these two fields: 188: * @table @samp 189: * @item tkn_ct 190: * The number of tokens found in the input string. 191: * @item tok_list 192: * An array of @code{tkn_ct + 1} pointers to substring tokens, with 193: * the last pointer set to NULL. 194: * @end table 195: * 196: * There are two types of quoted strings: single quoted (@code{'}) and 197: * double quoted (@code{"}). Singly quoted strings are fairly raw in that 198: * escape characters (@code{\\}) are simply another character, except when 199: * preceding the following characters: 200: * @example 201: * @code{\\} double backslashes reduce to one 202: * @code{'} incorporates the single quote into the string 203: * @code{\n} suppresses both the backslash and newline character 204: * @end example 205: * 206: * Double quote strings are formed according to the rules of string 207: * constants in ANSI-C programs. 208: * 209: * example: 210: * @example 211: * #include <stdlib.h> 212: * int ix; 213: * token_list_t* ptl = ao_string_tokenize(some_string) 214: * for (ix = 0; ix < ptl->tkn_ct; ix++) 215: * do_something_with_tkn(ptl->tkn_list[ix]); 216: * free(ptl); 217: * @end example 218: * Note that everything is freed with the one call to @code{free(3C)}. 219: * 220: * err: 221: * NULL is returned and @code{errno} will be set to indicate the problem: 222: * @itemize @bullet 223: * @item 224: * @code{EINVAL} - There was an unterminated quoted string. 225: * @item 226: * @code{ENOENT} - The input string was empty. 227: * @item 228: * @code{ENOMEM} - There is not enough memory. 229: * @end itemize 230: =*/ 231: token_list_t* 232: ao_string_tokenize(char const* str) 233: { 234: token_list_t* res = alloc_token_list(str); 235: ch_t* pzDest; 236: 237: /* 238: * Now copy each token into the output buffer. 239: */ 240: if (res == NULL) 241: return res; 242: 243: pzDest = (ch_t*)(res->tkn_list[0]); 244: res->tkn_ct = 0; 245: 246: do { 247: res->tkn_list[ res->tkn_ct++ ] = pzDest; 248: for (;;) { 249: int ch = (ch_t)*str; 250: if (IS_WHITESPACE_CHAR(ch)) { 251: found_white_space: 252: while (IS_WHITESPACE_CHAR(*++str)) ; 253: break; 254: } 255: 256: switch (ch) { 257: case '"': 258: copy_cooked(&pzDest, &str); 259: if (str == NULL) { 260: free(res); 261: errno = EINVAL; 262: return NULL; 263: } 264: if (IS_WHITESPACE_CHAR(*str)) 265: goto found_white_space; 266: break; 267: 268: case '\'': 269: copy_raw(&pzDest, &str); 270: if (str == NULL) { 271: free(res); 272: errno = EINVAL; 273: return NULL; 274: } 275: if (IS_WHITESPACE_CHAR(*str)) 276: goto found_white_space; 277: break; 278: 279: case NUL: 280: goto copy_done; 281: 282: default: 283: str++; 284: *(pzDest++) = ch; 285: } 286: } copy_done:; 287: 288: /* 289: * NUL terminate the last token and see if we have any more tokens. 290: */ 291: *(pzDest++) = NUL; 292: } while (*str != NUL); 293: 294: res->tkn_list[ res->tkn_ct ] = NULL; 295: 296: return res; 297: } 298: 299: #ifdef TEST 300: #include <stdio.h> 301: #include <string.h> 302: 303: int 304: main(int argc, char** argv) 305: { 306: if (argc == 1) { 307: printf("USAGE: %s arg [ ... ]\n", *argv); 308: return 1; 309: } 310: while (--argc > 0) { 311: char* arg = *(++argv); 312: token_list_t* p = ao_string_tokenize(arg); 313: if (p == NULL) { 314: printf("Parsing string ``%s'' failed:\n\terrno %d (%s)\n", 315: arg, errno, strerror(errno)); 316: } else { 317: int ix = 0; 318: printf("Parsed string ``%s''\ninto %d tokens:\n", arg, p->tkn_ct); 319: do { 320: printf(" %3d: ``%s''\n", ix+1, p->tkn_list[ix]); 321: } while (++ix < p->tkn_ct); 322: free(p); 323: } 324: } 325: return 0; 326: } 327: #endif 328: 329: /* 330: * Local Variables: 331: * mode: C 332: * c-file-style: "stroustrup" 333: * indent-tabs-mode: nil 334: * End: 335: * end of autoopts/tokenize.c */