Annotation of embedaddon/ntp/sntp/libopts/tokenize.c, revision 1.1

1.1     ! misho       1: /*
        !             2:  *  This file defines the string_tokenize interface
        !             3:  * Time-stamp:      "2010-07-17 10:40:26 bkorb"
        !             4:  *
        !             5:  *  This file is part of AutoOpts, a companion to AutoGen.
        !             6:  *  AutoOpts is free software.
        !             7:  *  AutoOpts is Copyright (c) 1992-2011 by Bruce Korb - all rights reserved
        !             8:  *
        !             9:  *  AutoOpts is available under any one of two licenses.  The license
        !            10:  *  in use must be one of these two and the choice is under the control
        !            11:  *  of the user of the license.
        !            12:  *
        !            13:  *   The GNU Lesser General Public License, version 3 or later
        !            14:  *      See the files "COPYING.lgplv3" and "COPYING.gplv3"
        !            15:  *
        !            16:  *   The Modified Berkeley Software Distribution License
        !            17:  *      See the file "COPYING.mbsd"
        !            18:  *
        !            19:  *  These files have the following md5sums:
        !            20:  *
        !            21:  *  43b91e8ca915626ed3818ffb1b71248b pkg/libopts/COPYING.gplv3
        !            22:  *  06a1a2e4760c90ea5e1dad8dfaac4d39 pkg/libopts/COPYING.lgplv3
        !            23:  *  66a5cedaf62c4b2637025f049f9b826f pkg/libopts/COPYING.mbsd
        !            24:  */
        !            25: 
        !            26: #include <errno.h>
        !            27: #include <stdlib.h>
        !            28: 
        !            29: #define cc_t   const unsigned char
        !            30: #define ch_t   unsigned char
        !            31: 
        !            32: /* = = = START-STATIC-FORWARD = = = */
        !            33: static void
        !            34: copy_cooked(ch_t** ppDest, char const ** ppSrc);
        !            35: 
        !            36: static void
        !            37: copy_raw(ch_t** ppDest, char const ** ppSrc);
        !            38: 
        !            39: static token_list_t *
        !            40: alloc_token_list(char const * str);
        !            41: /* = = = END-STATIC-FORWARD = = = */
        !            42: 
        !            43: static void
        !            44: copy_cooked(ch_t** ppDest, char const ** ppSrc)
        !            45: {
        !            46:     ch_t* pDest = (ch_t*)*ppDest;
        !            47:     const ch_t* pSrc  = (const ch_t*)(*ppSrc + 1);
        !            48: 
        !            49:     for (;;) {
        !            50:         ch_t ch = *(pSrc++);
        !            51:         switch (ch) {
        !            52:         case NUL:   *ppSrc = NULL; return;
        !            53:         case '"':   goto done;
        !            54:         case '\\':
        !            55:             pSrc += ao_string_cook_escape_char((char*)pSrc, (char*)&ch, 0x7F);
        !            56:             if (ch == 0x7F)
        !            57:                 break;
        !            58:             /* FALLTHROUGH */
        !            59: 
        !            60:         default:
        !            61:             *(pDest++) = ch;
        !            62:         }
        !            63:     }
        !            64: 
        !            65:  done:
        !            66:     *ppDest = (ch_t*)pDest; /* next spot for storing character */
        !            67:     *ppSrc  = (char const *)pSrc;  /* char following closing quote    */
        !            68: }
        !            69: 
        !            70: 
        !            71: static void
        !            72: copy_raw(ch_t** ppDest, char const ** ppSrc)
        !            73: {
        !            74:     ch_t* pDest = *ppDest;
        !            75:     cc_t* pSrc  = (cc_t*) (*ppSrc + 1);
        !            76: 
        !            77:     for (;;) {
        !            78:         ch_t ch = *(pSrc++);
        !            79:         switch (ch) {
        !            80:         case NUL:   *ppSrc = NULL; return;
        !            81:         case '\'':  goto done;
        !            82:         case '\\':
        !            83:             /*
        !            84:              *  *Four* escapes are handled:  newline removal, escape char
        !            85:              *  quoting and apostrophe quoting
        !            86:              */
        !            87:             switch (*pSrc) {
        !            88:             case NUL:   *ppSrc = NULL; return;
        !            89:             case '\r':
        !            90:                 if (*(++pSrc) == '\n')
        !            91:                     ++pSrc;
        !            92:                 continue;
        !            93: 
        !            94:             case '\n':
        !            95:                 ++pSrc;
        !            96:                 continue;
        !            97: 
        !            98:             case '\'':
        !            99:                 ch = '\'';
        !           100:                 /* FALLTHROUGH */
        !           101: 
        !           102:             case '\\':
        !           103:                 ++pSrc;
        !           104:                 break;
        !           105:             }
        !           106:             /* FALLTHROUGH */
        !           107: 
        !           108:         default:
        !           109:             *(pDest++) = ch;
        !           110:         }
        !           111:     }
        !           112: 
        !           113:  done:
        !           114:     *ppDest = pDest; /* next spot for storing character */
        !           115:     *ppSrc  = (char const *) pSrc;  /* char following closing quote    */
        !           116: }
        !           117: 
        !           118: static token_list_t *
        !           119: alloc_token_list(char const * str)
        !           120: {
        !           121:     token_list_t * res;
        !           122: 
        !           123:     int max_token_ct = 2; /* allow for trailing NULL pointer & NUL on string */
        !           124: 
        !           125:     if (str == NULL) goto enoent_res;
        !           126: 
        !           127:     /*
        !           128:      *  Trim leading white space.  Use "ENOENT" and a NULL return to indicate
        !           129:      *  an empty string was passed.
        !           130:      */
        !           131:     while (IS_WHITESPACE_CHAR(*str))  str++;
        !           132:     if (*str == NUL)  goto enoent_res;
        !           133: 
        !           134:     /*
        !           135:      *  Take an approximate count of tokens.  If no quoted strings are used,
        !           136:      *  it will be accurate.  If quoted strings are used, it will be a little
        !           137:      *  high and we'll squander the space for a few extra pointers.
        !           138:      */
        !           139:     {
        !           140:         cc_t* pz = (cc_t*)str;
        !           141: 
        !           142:         do {
        !           143:             max_token_ct++;
        !           144:             while (! IS_WHITESPACE_CHAR(*++pz))
        !           145:                 if (*pz == NUL) goto found_nul;
        !           146:             while (IS_WHITESPACE_CHAR(*pz))  pz++;
        !           147:         } while (*pz != NUL);
        !           148: 
        !           149:     found_nul:
        !           150:         res = malloc(sizeof(*res) + (pz - (cc_t*)str)
        !           151:                      + (max_token_ct * sizeof(ch_t*)));
        !           152:     }
        !           153: 
        !           154:     if (res == NULL)
        !           155:         errno = ENOMEM;
        !           156:     else res->tkn_list[0] = (ch_t*)(res->tkn_list + (max_token_ct - 1));
        !           157: 
        !           158:     return res;
        !           159: 
        !           160:     enoent_res:
        !           161: 
        !           162:     errno = ENOENT;
        !           163:     return NULL;
        !           164: }
        !           165: 
        !           166: /*=export_func ao_string_tokenize
        !           167:  *
        !           168:  * what: tokenize an input string
        !           169:  *
        !           170:  * arg:  + char const* + string + string to be tokenized +
        !           171:  *
        !           172:  * ret_type:  token_list_t*
        !           173:  * ret_desc:  pointer to a structure that lists each token
        !           174:  *
        !           175:  * doc:
        !           176:  *
        !           177:  * This function will convert one input string into a list of strings.
        !           178:  * The list of strings is derived by separating the input based on
        !           179:  * white space separation.  However, if the input contains either single
        !           180:  * or double quote characters, then the text after that character up to
        !           181:  * a matching quote will become the string in the list.
        !           182:  *
        !           183:  *  The returned pointer should be deallocated with @code{free(3C)} when
        !           184:  *  are done using the data.  The data are placed in a single block of
        !           185:  *  allocated memory.  Do not deallocate individual token/strings.
        !           186:  *
        !           187:  *  The structure pointed to will contain at least these two fields:
        !           188:  *  @table @samp
        !           189:  *  @item tkn_ct
        !           190:  *  The number of tokens found in the input string.
        !           191:  *  @item tok_list
        !           192:  *  An array of @code{tkn_ct + 1} pointers to substring tokens, with
        !           193:  *  the last pointer set to NULL.
        !           194:  *  @end table
        !           195:  *
        !           196:  * There are two types of quoted strings: single quoted (@code{'}) and
        !           197:  * double quoted (@code{"}).  Singly quoted strings are fairly raw in that
        !           198:  * escape characters (@code{\\}) are simply another character, except when
        !           199:  * preceding the following characters:
        !           200:  * @example
        !           201:  * @code{\\}  double backslashes reduce to one
        !           202:  * @code{'}   incorporates the single quote into the string
        !           203:  * @code{\n}  suppresses both the backslash and newline character
        !           204:  * @end example
        !           205:  *
        !           206:  * Double quote strings are formed according to the rules of string
        !           207:  * constants in ANSI-C programs.
        !           208:  *
        !           209:  * example:
        !           210:  * @example
        !           211:  *    #include <stdlib.h>
        !           212:  *    int ix;
        !           213:  *    token_list_t* ptl = ao_string_tokenize(some_string)
        !           214:  *    for (ix = 0; ix < ptl->tkn_ct; ix++)
        !           215:  *       do_something_with_tkn(ptl->tkn_list[ix]);
        !           216:  *    free(ptl);
        !           217:  * @end example
        !           218:  * Note that everything is freed with the one call to @code{free(3C)}.
        !           219:  *
        !           220:  * err:
        !           221:  *  NULL is returned and @code{errno} will be set to indicate the problem:
        !           222:  *  @itemize @bullet
        !           223:  *  @item
        !           224:  *  @code{EINVAL} - There was an unterminated quoted string.
        !           225:  *  @item
        !           226:  *  @code{ENOENT} - The input string was empty.
        !           227:  *  @item
        !           228:  *  @code{ENOMEM} - There is not enough memory.
        !           229:  *  @end itemize
        !           230: =*/
        !           231: token_list_t*
        !           232: ao_string_tokenize(char const* str)
        !           233: {
        !           234:     token_list_t* res = alloc_token_list(str);
        !           235:     ch_t* pzDest;
        !           236: 
        !           237:     /*
        !           238:      *  Now copy each token into the output buffer.
        !           239:      */
        !           240:     if (res == NULL)
        !           241:         return res;
        !           242: 
        !           243:     pzDest = (ch_t*)(res->tkn_list[0]);
        !           244:     res->tkn_ct  = 0;
        !           245: 
        !           246:     do  {
        !           247:         res->tkn_list[ res->tkn_ct++ ] = pzDest;
        !           248:         for (;;) {
        !           249:             int ch = (ch_t)*str;
        !           250:             if (IS_WHITESPACE_CHAR(ch)) {
        !           251:             found_white_space:
        !           252:                 while (IS_WHITESPACE_CHAR(*++str))  ;
        !           253:                 break;
        !           254:             }
        !           255: 
        !           256:             switch (ch) {
        !           257:             case '"':
        !           258:                 copy_cooked(&pzDest, &str);
        !           259:                 if (str == NULL) {
        !           260:                     free(res);
        !           261:                     errno = EINVAL;
        !           262:                     return NULL;
        !           263:                 }
        !           264:                 if (IS_WHITESPACE_CHAR(*str))
        !           265:                     goto found_white_space;
        !           266:                 break;
        !           267: 
        !           268:             case '\'':
        !           269:                 copy_raw(&pzDest, &str);
        !           270:                 if (str == NULL) {
        !           271:                     free(res);
        !           272:                     errno = EINVAL;
        !           273:                     return NULL;
        !           274:                 }
        !           275:                 if (IS_WHITESPACE_CHAR(*str))
        !           276:                     goto found_white_space;
        !           277:                 break;
        !           278: 
        !           279:             case NUL:
        !           280:                 goto copy_done;
        !           281: 
        !           282:             default:
        !           283:                 str++;
        !           284:                 *(pzDest++) = ch;
        !           285:             }
        !           286:         } copy_done:;
        !           287: 
        !           288:         /*
        !           289:          * NUL terminate the last token and see if we have any more tokens.
        !           290:          */
        !           291:         *(pzDest++) = NUL;
        !           292:     } while (*str != NUL);
        !           293: 
        !           294:     res->tkn_list[ res->tkn_ct ] = NULL;
        !           295: 
        !           296:     return res;
        !           297: }
        !           298: 
        !           299: #ifdef TEST
        !           300: #include <stdio.h>
        !           301: #include <string.h>
        !           302: 
        !           303: int
        !           304: main(int argc, char** argv)
        !           305: {
        !           306:     if (argc == 1) {
        !           307:         printf("USAGE:  %s arg [ ... ]\n", *argv);
        !           308:         return 1;
        !           309:     }
        !           310:     while (--argc > 0) {
        !           311:         char* arg = *(++argv);
        !           312:         token_list_t* p = ao_string_tokenize(arg);
        !           313:         if (p == NULL) {
        !           314:             printf("Parsing string ``%s'' failed:\n\terrno %d (%s)\n",
        !           315:                    arg, errno, strerror(errno));
        !           316:         } else {
        !           317:             int ix = 0;
        !           318:             printf("Parsed string ``%s''\ninto %d tokens:\n", arg, p->tkn_ct);
        !           319:             do {
        !           320:                 printf(" %3d:  ``%s''\n", ix+1, p->tkn_list[ix]);
        !           321:             } while (++ix < p->tkn_ct);
        !           322:             free(p);
        !           323:         }
        !           324:     }
        !           325:     return 0;
        !           326: }
        !           327: #endif
        !           328: 
        !           329: /*
        !           330:  * Local Variables:
        !           331:  * mode: C
        !           332:  * c-file-style: "stroustrup"
        !           333:  * indent-tabs-mode: nil
        !           334:  * End:
        !           335:  * end of autoopts/tokenize.c */

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>