File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / ntp / sntp / libopts / tokenize.c
Revision 1.1.1.1 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Tue May 29 12:08:38 2012 UTC (12 years, 7 months ago) by misho
Branches: ntp, MAIN
CVS tags: v4_2_6p5p0, v4_2_6p5, HEAD
ntp 4.2.6p5

    1: /*
    2:  *  This file defines the string_tokenize interface
    3:  * Time-stamp:      "2010-07-17 10:40:26 bkorb"
    4:  *
    5:  *  This file is part of AutoOpts, a companion to AutoGen.
    6:  *  AutoOpts is free software.
    7:  *  AutoOpts is Copyright (c) 1992-2011 by Bruce Korb - all rights reserved
    8:  *
    9:  *  AutoOpts is available under any one of two licenses.  The license
   10:  *  in use must be one of these two and the choice is under the control
   11:  *  of the user of the license.
   12:  *
   13:  *   The GNU Lesser General Public License, version 3 or later
   14:  *      See the files "COPYING.lgplv3" and "COPYING.gplv3"
   15:  *
   16:  *   The Modified Berkeley Software Distribution License
   17:  *      See the file "COPYING.mbsd"
   18:  *
   19:  *  These files have the following md5sums:
   20:  *
   21:  *  43b91e8ca915626ed3818ffb1b71248b pkg/libopts/COPYING.gplv3
   22:  *  06a1a2e4760c90ea5e1dad8dfaac4d39 pkg/libopts/COPYING.lgplv3
   23:  *  66a5cedaf62c4b2637025f049f9b826f pkg/libopts/COPYING.mbsd
   24:  */
   25: 
   26: #include <errno.h>
   27: #include <stdlib.h>
   28: 
   29: #define cc_t   const unsigned char
   30: #define ch_t   unsigned char
   31: 
   32: /* = = = START-STATIC-FORWARD = = = */
   33: static void
   34: copy_cooked(ch_t** ppDest, char const ** ppSrc);
   35: 
   36: static void
   37: copy_raw(ch_t** ppDest, char const ** ppSrc);
   38: 
   39: static token_list_t *
   40: alloc_token_list(char const * str);
   41: /* = = = END-STATIC-FORWARD = = = */
   42: 
   43: static void
   44: copy_cooked(ch_t** ppDest, char const ** ppSrc)
   45: {
   46:     ch_t* pDest = (ch_t*)*ppDest;
   47:     const ch_t* pSrc  = (const ch_t*)(*ppSrc + 1);
   48: 
   49:     for (;;) {
   50:         ch_t ch = *(pSrc++);
   51:         switch (ch) {
   52:         case NUL:   *ppSrc = NULL; return;
   53:         case '"':   goto done;
   54:         case '\\':
   55:             pSrc += ao_string_cook_escape_char((char*)pSrc, (char*)&ch, 0x7F);
   56:             if (ch == 0x7F)
   57:                 break;
   58:             /* FALLTHROUGH */
   59: 
   60:         default:
   61:             *(pDest++) = ch;
   62:         }
   63:     }
   64: 
   65:  done:
   66:     *ppDest = (ch_t*)pDest; /* next spot for storing character */
   67:     *ppSrc  = (char const *)pSrc;  /* char following closing quote    */
   68: }
   69: 
   70: 
   71: static void
   72: copy_raw(ch_t** ppDest, char const ** ppSrc)
   73: {
   74:     ch_t* pDest = *ppDest;
   75:     cc_t* pSrc  = (cc_t*) (*ppSrc + 1);
   76: 
   77:     for (;;) {
   78:         ch_t ch = *(pSrc++);
   79:         switch (ch) {
   80:         case NUL:   *ppSrc = NULL; return;
   81:         case '\'':  goto done;
   82:         case '\\':
   83:             /*
   84:              *  *Four* escapes are handled:  newline removal, escape char
   85:              *  quoting and apostrophe quoting
   86:              */
   87:             switch (*pSrc) {
   88:             case NUL:   *ppSrc = NULL; return;
   89:             case '\r':
   90:                 if (*(++pSrc) == '\n')
   91:                     ++pSrc;
   92:                 continue;
   93: 
   94:             case '\n':
   95:                 ++pSrc;
   96:                 continue;
   97: 
   98:             case '\'':
   99:                 ch = '\'';
  100:                 /* FALLTHROUGH */
  101: 
  102:             case '\\':
  103:                 ++pSrc;
  104:                 break;
  105:             }
  106:             /* FALLTHROUGH */
  107: 
  108:         default:
  109:             *(pDest++) = ch;
  110:         }
  111:     }
  112: 
  113:  done:
  114:     *ppDest = pDest; /* next spot for storing character */
  115:     *ppSrc  = (char const *) pSrc;  /* char following closing quote    */
  116: }
  117: 
  118: static token_list_t *
  119: alloc_token_list(char const * str)
  120: {
  121:     token_list_t * res;
  122: 
  123:     int max_token_ct = 2; /* allow for trailing NULL pointer & NUL on string */
  124: 
  125:     if (str == NULL) goto enoent_res;
  126: 
  127:     /*
  128:      *  Trim leading white space.  Use "ENOENT" and a NULL return to indicate
  129:      *  an empty string was passed.
  130:      */
  131:     while (IS_WHITESPACE_CHAR(*str))  str++;
  132:     if (*str == NUL)  goto enoent_res;
  133: 
  134:     /*
  135:      *  Take an approximate count of tokens.  If no quoted strings are used,
  136:      *  it will be accurate.  If quoted strings are used, it will be a little
  137:      *  high and we'll squander the space for a few extra pointers.
  138:      */
  139:     {
  140:         cc_t* pz = (cc_t*)str;
  141: 
  142:         do {
  143:             max_token_ct++;
  144:             while (! IS_WHITESPACE_CHAR(*++pz))
  145:                 if (*pz == NUL) goto found_nul;
  146:             while (IS_WHITESPACE_CHAR(*pz))  pz++;
  147:         } while (*pz != NUL);
  148: 
  149:     found_nul:
  150:         res = malloc(sizeof(*res) + (pz - (cc_t*)str)
  151:                      + (max_token_ct * sizeof(ch_t*)));
  152:     }
  153: 
  154:     if (res == NULL)
  155:         errno = ENOMEM;
  156:     else res->tkn_list[0] = (ch_t*)(res->tkn_list + (max_token_ct - 1));
  157: 
  158:     return res;
  159: 
  160:     enoent_res:
  161: 
  162:     errno = ENOENT;
  163:     return NULL;
  164: }
  165: 
  166: /*=export_func ao_string_tokenize
  167:  *
  168:  * what: tokenize an input string
  169:  *
  170:  * arg:  + char const* + string + string to be tokenized +
  171:  *
  172:  * ret_type:  token_list_t*
  173:  * ret_desc:  pointer to a structure that lists each token
  174:  *
  175:  * doc:
  176:  *
  177:  * This function will convert one input string into a list of strings.
  178:  * The list of strings is derived by separating the input based on
  179:  * white space separation.  However, if the input contains either single
  180:  * or double quote characters, then the text after that character up to
  181:  * a matching quote will become the string in the list.
  182:  *
  183:  *  The returned pointer should be deallocated with @code{free(3C)} when
  184:  *  are done using the data.  The data are placed in a single block of
  185:  *  allocated memory.  Do not deallocate individual token/strings.
  186:  *
  187:  *  The structure pointed to will contain at least these two fields:
  188:  *  @table @samp
  189:  *  @item tkn_ct
  190:  *  The number of tokens found in the input string.
  191:  *  @item tok_list
  192:  *  An array of @code{tkn_ct + 1} pointers to substring tokens, with
  193:  *  the last pointer set to NULL.
  194:  *  @end table
  195:  *
  196:  * There are two types of quoted strings: single quoted (@code{'}) and
  197:  * double quoted (@code{"}).  Singly quoted strings are fairly raw in that
  198:  * escape characters (@code{\\}) are simply another character, except when
  199:  * preceding the following characters:
  200:  * @example
  201:  * @code{\\}  double backslashes reduce to one
  202:  * @code{'}   incorporates the single quote into the string
  203:  * @code{\n}  suppresses both the backslash and newline character
  204:  * @end example
  205:  *
  206:  * Double quote strings are formed according to the rules of string
  207:  * constants in ANSI-C programs.
  208:  *
  209:  * example:
  210:  * @example
  211:  *    #include <stdlib.h>
  212:  *    int ix;
  213:  *    token_list_t* ptl = ao_string_tokenize(some_string)
  214:  *    for (ix = 0; ix < ptl->tkn_ct; ix++)
  215:  *       do_something_with_tkn(ptl->tkn_list[ix]);
  216:  *    free(ptl);
  217:  * @end example
  218:  * Note that everything is freed with the one call to @code{free(3C)}.
  219:  *
  220:  * err:
  221:  *  NULL is returned and @code{errno} will be set to indicate the problem:
  222:  *  @itemize @bullet
  223:  *  @item
  224:  *  @code{EINVAL} - There was an unterminated quoted string.
  225:  *  @item
  226:  *  @code{ENOENT} - The input string was empty.
  227:  *  @item
  228:  *  @code{ENOMEM} - There is not enough memory.
  229:  *  @end itemize
  230: =*/
  231: token_list_t*
  232: ao_string_tokenize(char const* str)
  233: {
  234:     token_list_t* res = alloc_token_list(str);
  235:     ch_t* pzDest;
  236: 
  237:     /*
  238:      *  Now copy each token into the output buffer.
  239:      */
  240:     if (res == NULL)
  241:         return res;
  242: 
  243:     pzDest = (ch_t*)(res->tkn_list[0]);
  244:     res->tkn_ct  = 0;
  245: 
  246:     do  {
  247:         res->tkn_list[ res->tkn_ct++ ] = pzDest;
  248:         for (;;) {
  249:             int ch = (ch_t)*str;
  250:             if (IS_WHITESPACE_CHAR(ch)) {
  251:             found_white_space:
  252:                 while (IS_WHITESPACE_CHAR(*++str))  ;
  253:                 break;
  254:             }
  255: 
  256:             switch (ch) {
  257:             case '"':
  258:                 copy_cooked(&pzDest, &str);
  259:                 if (str == NULL) {
  260:                     free(res);
  261:                     errno = EINVAL;
  262:                     return NULL;
  263:                 }
  264:                 if (IS_WHITESPACE_CHAR(*str))
  265:                     goto found_white_space;
  266:                 break;
  267: 
  268:             case '\'':
  269:                 copy_raw(&pzDest, &str);
  270:                 if (str == NULL) {
  271:                     free(res);
  272:                     errno = EINVAL;
  273:                     return NULL;
  274:                 }
  275:                 if (IS_WHITESPACE_CHAR(*str))
  276:                     goto found_white_space;
  277:                 break;
  278: 
  279:             case NUL:
  280:                 goto copy_done;
  281: 
  282:             default:
  283:                 str++;
  284:                 *(pzDest++) = ch;
  285:             }
  286:         } copy_done:;
  287: 
  288:         /*
  289:          * NUL terminate the last token and see if we have any more tokens.
  290:          */
  291:         *(pzDest++) = NUL;
  292:     } while (*str != NUL);
  293: 
  294:     res->tkn_list[ res->tkn_ct ] = NULL;
  295: 
  296:     return res;
  297: }
  298: 
  299: #ifdef TEST
  300: #include <stdio.h>
  301: #include <string.h>
  302: 
  303: int
  304: main(int argc, char** argv)
  305: {
  306:     if (argc == 1) {
  307:         printf("USAGE:  %s arg [ ... ]\n", *argv);
  308:         return 1;
  309:     }
  310:     while (--argc > 0) {
  311:         char* arg = *(++argv);
  312:         token_list_t* p = ao_string_tokenize(arg);
  313:         if (p == NULL) {
  314:             printf("Parsing string ``%s'' failed:\n\terrno %d (%s)\n",
  315:                    arg, errno, strerror(errno));
  316:         } else {
  317:             int ix = 0;
  318:             printf("Parsed string ``%s''\ninto %d tokens:\n", arg, p->tkn_ct);
  319:             do {
  320:                 printf(" %3d:  ``%s''\n", ix+1, p->tkn_list[ix]);
  321:             } while (++ix < p->tkn_ct);
  322:             free(p);
  323:         }
  324:     }
  325:     return 0;
  326: }
  327: #endif
  328: 
  329: /*
  330:  * Local Variables:
  331:  * mode: C
  332:  * c-file-style: "stroustrup"
  333:  * indent-tabs-mode: nil
  334:  * End:
  335:  * end of autoopts/tokenize.c */

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>