Annotation of embedaddon/ntp/sntp/libopts/tokenize.c, revision 1.1.1.1

1.1       misho       1: /*
                      2:  *  This file defines the string_tokenize interface
                      3:  * Time-stamp:      "2010-07-17 10:40:26 bkorb"
                      4:  *
                      5:  *  This file is part of AutoOpts, a companion to AutoGen.
                      6:  *  AutoOpts is free software.
                      7:  *  AutoOpts is Copyright (c) 1992-2011 by Bruce Korb - all rights reserved
                      8:  *
                      9:  *  AutoOpts is available under any one of two licenses.  The license
                     10:  *  in use must be one of these two and the choice is under the control
                     11:  *  of the user of the license.
                     12:  *
                     13:  *   The GNU Lesser General Public License, version 3 or later
                     14:  *      See the files "COPYING.lgplv3" and "COPYING.gplv3"
                     15:  *
                     16:  *   The Modified Berkeley Software Distribution License
                     17:  *      See the file "COPYING.mbsd"
                     18:  *
                     19:  *  These files have the following md5sums:
                     20:  *
                     21:  *  43b91e8ca915626ed3818ffb1b71248b pkg/libopts/COPYING.gplv3
                     22:  *  06a1a2e4760c90ea5e1dad8dfaac4d39 pkg/libopts/COPYING.lgplv3
                     23:  *  66a5cedaf62c4b2637025f049f9b826f pkg/libopts/COPYING.mbsd
                     24:  */
                     25: 
                     26: #include <errno.h>
                     27: #include <stdlib.h>
                     28: 
                     29: #define cc_t   const unsigned char
                     30: #define ch_t   unsigned char
                     31: 
                     32: /* = = = START-STATIC-FORWARD = = = */
                     33: static void
                     34: copy_cooked(ch_t** ppDest, char const ** ppSrc);
                     35: 
                     36: static void
                     37: copy_raw(ch_t** ppDest, char const ** ppSrc);
                     38: 
                     39: static token_list_t *
                     40: alloc_token_list(char const * str);
                     41: /* = = = END-STATIC-FORWARD = = = */
                     42: 
                     43: static void
                     44: copy_cooked(ch_t** ppDest, char const ** ppSrc)
                     45: {
                     46:     ch_t* pDest = (ch_t*)*ppDest;
                     47:     const ch_t* pSrc  = (const ch_t*)(*ppSrc + 1);
                     48: 
                     49:     for (;;) {
                     50:         ch_t ch = *(pSrc++);
                     51:         switch (ch) {
                     52:         case NUL:   *ppSrc = NULL; return;
                     53:         case '"':   goto done;
                     54:         case '\\':
                     55:             pSrc += ao_string_cook_escape_char((char*)pSrc, (char*)&ch, 0x7F);
                     56:             if (ch == 0x7F)
                     57:                 break;
                     58:             /* FALLTHROUGH */
                     59: 
                     60:         default:
                     61:             *(pDest++) = ch;
                     62:         }
                     63:     }
                     64: 
                     65:  done:
                     66:     *ppDest = (ch_t*)pDest; /* next spot for storing character */
                     67:     *ppSrc  = (char const *)pSrc;  /* char following closing quote    */
                     68: }
                     69: 
                     70: 
                     71: static void
                     72: copy_raw(ch_t** ppDest, char const ** ppSrc)
                     73: {
                     74:     ch_t* pDest = *ppDest;
                     75:     cc_t* pSrc  = (cc_t*) (*ppSrc + 1);
                     76: 
                     77:     for (;;) {
                     78:         ch_t ch = *(pSrc++);
                     79:         switch (ch) {
                     80:         case NUL:   *ppSrc = NULL; return;
                     81:         case '\'':  goto done;
                     82:         case '\\':
                     83:             /*
                     84:              *  *Four* escapes are handled:  newline removal, escape char
                     85:              *  quoting and apostrophe quoting
                     86:              */
                     87:             switch (*pSrc) {
                     88:             case NUL:   *ppSrc = NULL; return;
                     89:             case '\r':
                     90:                 if (*(++pSrc) == '\n')
                     91:                     ++pSrc;
                     92:                 continue;
                     93: 
                     94:             case '\n':
                     95:                 ++pSrc;
                     96:                 continue;
                     97: 
                     98:             case '\'':
                     99:                 ch = '\'';
                    100:                 /* FALLTHROUGH */
                    101: 
                    102:             case '\\':
                    103:                 ++pSrc;
                    104:                 break;
                    105:             }
                    106:             /* FALLTHROUGH */
                    107: 
                    108:         default:
                    109:             *(pDest++) = ch;
                    110:         }
                    111:     }
                    112: 
                    113:  done:
                    114:     *ppDest = pDest; /* next spot for storing character */
                    115:     *ppSrc  = (char const *) pSrc;  /* char following closing quote    */
                    116: }
                    117: 
                    118: static token_list_t *
                    119: alloc_token_list(char const * str)
                    120: {
                    121:     token_list_t * res;
                    122: 
                    123:     int max_token_ct = 2; /* allow for trailing NULL pointer & NUL on string */
                    124: 
                    125:     if (str == NULL) goto enoent_res;
                    126: 
                    127:     /*
                    128:      *  Trim leading white space.  Use "ENOENT" and a NULL return to indicate
                    129:      *  an empty string was passed.
                    130:      */
                    131:     while (IS_WHITESPACE_CHAR(*str))  str++;
                    132:     if (*str == NUL)  goto enoent_res;
                    133: 
                    134:     /*
                    135:      *  Take an approximate count of tokens.  If no quoted strings are used,
                    136:      *  it will be accurate.  If quoted strings are used, it will be a little
                    137:      *  high and we'll squander the space for a few extra pointers.
                    138:      */
                    139:     {
                    140:         cc_t* pz = (cc_t*)str;
                    141: 
                    142:         do {
                    143:             max_token_ct++;
                    144:             while (! IS_WHITESPACE_CHAR(*++pz))
                    145:                 if (*pz == NUL) goto found_nul;
                    146:             while (IS_WHITESPACE_CHAR(*pz))  pz++;
                    147:         } while (*pz != NUL);
                    148: 
                    149:     found_nul:
                    150:         res = malloc(sizeof(*res) + (pz - (cc_t*)str)
                    151:                      + (max_token_ct * sizeof(ch_t*)));
                    152:     }
                    153: 
                    154:     if (res == NULL)
                    155:         errno = ENOMEM;
                    156:     else res->tkn_list[0] = (ch_t*)(res->tkn_list + (max_token_ct - 1));
                    157: 
                    158:     return res;
                    159: 
                    160:     enoent_res:
                    161: 
                    162:     errno = ENOENT;
                    163:     return NULL;
                    164: }
                    165: 
                    166: /*=export_func ao_string_tokenize
                    167:  *
                    168:  * what: tokenize an input string
                    169:  *
                    170:  * arg:  + char const* + string + string to be tokenized +
                    171:  *
                    172:  * ret_type:  token_list_t*
                    173:  * ret_desc:  pointer to a structure that lists each token
                    174:  *
                    175:  * doc:
                    176:  *
                    177:  * This function will convert one input string into a list of strings.
                    178:  * The list of strings is derived by separating the input based on
                    179:  * white space separation.  However, if the input contains either single
                    180:  * or double quote characters, then the text after that character up to
                    181:  * a matching quote will become the string in the list.
                    182:  *
                    183:  *  The returned pointer should be deallocated with @code{free(3C)} when
                    184:  *  are done using the data.  The data are placed in a single block of
                    185:  *  allocated memory.  Do not deallocate individual token/strings.
                    186:  *
                    187:  *  The structure pointed to will contain at least these two fields:
                    188:  *  @table @samp
                    189:  *  @item tkn_ct
                    190:  *  The number of tokens found in the input string.
                    191:  *  @item tok_list
                    192:  *  An array of @code{tkn_ct + 1} pointers to substring tokens, with
                    193:  *  the last pointer set to NULL.
                    194:  *  @end table
                    195:  *
                    196:  * There are two types of quoted strings: single quoted (@code{'}) and
                    197:  * double quoted (@code{"}).  Singly quoted strings are fairly raw in that
                    198:  * escape characters (@code{\\}) are simply another character, except when
                    199:  * preceding the following characters:
                    200:  * @example
                    201:  * @code{\\}  double backslashes reduce to one
                    202:  * @code{'}   incorporates the single quote into the string
                    203:  * @code{\n}  suppresses both the backslash and newline character
                    204:  * @end example
                    205:  *
                    206:  * Double quote strings are formed according to the rules of string
                    207:  * constants in ANSI-C programs.
                    208:  *
                    209:  * example:
                    210:  * @example
                    211:  *    #include <stdlib.h>
                    212:  *    int ix;
                    213:  *    token_list_t* ptl = ao_string_tokenize(some_string)
                    214:  *    for (ix = 0; ix < ptl->tkn_ct; ix++)
                    215:  *       do_something_with_tkn(ptl->tkn_list[ix]);
                    216:  *    free(ptl);
                    217:  * @end example
                    218:  * Note that everything is freed with the one call to @code{free(3C)}.
                    219:  *
                    220:  * err:
                    221:  *  NULL is returned and @code{errno} will be set to indicate the problem:
                    222:  *  @itemize @bullet
                    223:  *  @item
                    224:  *  @code{EINVAL} - There was an unterminated quoted string.
                    225:  *  @item
                    226:  *  @code{ENOENT} - The input string was empty.
                    227:  *  @item
                    228:  *  @code{ENOMEM} - There is not enough memory.
                    229:  *  @end itemize
                    230: =*/
                    231: token_list_t*
                    232: ao_string_tokenize(char const* str)
                    233: {
                    234:     token_list_t* res = alloc_token_list(str);
                    235:     ch_t* pzDest;
                    236: 
                    237:     /*
                    238:      *  Now copy each token into the output buffer.
                    239:      */
                    240:     if (res == NULL)
                    241:         return res;
                    242: 
                    243:     pzDest = (ch_t*)(res->tkn_list[0]);
                    244:     res->tkn_ct  = 0;
                    245: 
                    246:     do  {
                    247:         res->tkn_list[ res->tkn_ct++ ] = pzDest;
                    248:         for (;;) {
                    249:             int ch = (ch_t)*str;
                    250:             if (IS_WHITESPACE_CHAR(ch)) {
                    251:             found_white_space:
                    252:                 while (IS_WHITESPACE_CHAR(*++str))  ;
                    253:                 break;
                    254:             }
                    255: 
                    256:             switch (ch) {
                    257:             case '"':
                    258:                 copy_cooked(&pzDest, &str);
                    259:                 if (str == NULL) {
                    260:                     free(res);
                    261:                     errno = EINVAL;
                    262:                     return NULL;
                    263:                 }
                    264:                 if (IS_WHITESPACE_CHAR(*str))
                    265:                     goto found_white_space;
                    266:                 break;
                    267: 
                    268:             case '\'':
                    269:                 copy_raw(&pzDest, &str);
                    270:                 if (str == NULL) {
                    271:                     free(res);
                    272:                     errno = EINVAL;
                    273:                     return NULL;
                    274:                 }
                    275:                 if (IS_WHITESPACE_CHAR(*str))
                    276:                     goto found_white_space;
                    277:                 break;
                    278: 
                    279:             case NUL:
                    280:                 goto copy_done;
                    281: 
                    282:             default:
                    283:                 str++;
                    284:                 *(pzDest++) = ch;
                    285:             }
                    286:         } copy_done:;
                    287: 
                    288:         /*
                    289:          * NUL terminate the last token and see if we have any more tokens.
                    290:          */
                    291:         *(pzDest++) = NUL;
                    292:     } while (*str != NUL);
                    293: 
                    294:     res->tkn_list[ res->tkn_ct ] = NULL;
                    295: 
                    296:     return res;
                    297: }
                    298: 
                    299: #ifdef TEST
                    300: #include <stdio.h>
                    301: #include <string.h>
                    302: 
                    303: int
                    304: main(int argc, char** argv)
                    305: {
                    306:     if (argc == 1) {
                    307:         printf("USAGE:  %s arg [ ... ]\n", *argv);
                    308:         return 1;
                    309:     }
                    310:     while (--argc > 0) {
                    311:         char* arg = *(++argv);
                    312:         token_list_t* p = ao_string_tokenize(arg);
                    313:         if (p == NULL) {
                    314:             printf("Parsing string ``%s'' failed:\n\terrno %d (%s)\n",
                    315:                    arg, errno, strerror(errno));
                    316:         } else {
                    317:             int ix = 0;
                    318:             printf("Parsed string ``%s''\ninto %d tokens:\n", arg, p->tkn_ct);
                    319:             do {
                    320:                 printf(" %3d:  ``%s''\n", ix+1, p->tkn_list[ix]);
                    321:             } while (++ix < p->tkn_ct);
                    322:             free(p);
                    323:         }
                    324:     }
                    325:     return 0;
                    326: }
                    327: #endif
                    328: 
                    329: /*
                    330:  * Local Variables:
                    331:  * mode: C
                    332:  * c-file-style: "stroustrup"
                    333:  * indent-tabs-mode: nil
                    334:  * End:
                    335:  * end of autoopts/tokenize.c */

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>