File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / expat / xmlwf / ct.c
Revision 1.1.1.1 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Tue Feb 21 23:00:54 2012 UTC (12 years, 7 months ago) by misho
Branches: expat, MAIN
CVS tags: v2_1_0, v2_0_1p0, v2_0_1, HEAD
expat

    1: #define CHARSET_MAX 41
    2: 
    3: static const char *
    4: getTok(const char **pp)
    5: {
    6:   enum { inAtom, inString, init, inComment };
    7:   int state = init;
    8:   const char *tokStart = 0;
    9:   for (;;) {
   10:     switch (**pp) {
   11:     case '\0':
   12:       return 0;
   13:     case ' ':
   14:     case '\r':
   15:     case '\t':
   16:     case '\n':
   17:       if (state == inAtom)
   18:         return tokStart;
   19:       break;
   20:     case '(':
   21:       if (state == inAtom)
   22:         return tokStart;
   23:       if (state != inString)
   24:         state++;
   25:       break;
   26:     case ')':
   27:       if (state > init)
   28:         --state;
   29:       else if (state != inString)
   30:         return 0;
   31:       break;
   32:     case ';':
   33:     case '/':
   34:     case '=':
   35:       if (state == inAtom)
   36:         return tokStart;
   37:       if (state == init)
   38:         return (*pp)++;
   39:       break;
   40:     case '\\':
   41:       ++*pp;
   42:       if (**pp == '\0')
   43:         return 0;
   44:       break;
   45:     case '"':
   46:       switch (state) {
   47:       case inString:
   48:         ++*pp;
   49:         return tokStart;
   50:       case inAtom:
   51:         return tokStart;
   52:       case init:
   53:         tokStart = *pp;
   54:         state = inString;
   55:         break;
   56:       }
   57:       break;
   58:     default:
   59:       if (state == init) {
   60:         tokStart = *pp;
   61:         state = inAtom;
   62:       }
   63:       break;
   64:     }
   65:     ++*pp;
   66:   }
   67:   /* not reached */
   68: }
   69: 
   70: /* key must be lowercase ASCII */
   71: 
   72: static int
   73: matchkey(const char *start, const char *end, const char *key)
   74: {
   75:   if (!start)
   76:     return 0;
   77:   for (; start != end; start++, key++)
   78:     if (*start != *key && *start != 'A' + (*key - 'a'))
   79:       return 0;
   80:   return *key == '\0';
   81: }
   82: 
   83: void
   84: getXMLCharset(const char *buf, char *charset)
   85: {
   86:   const char *next, *p;
   87: 
   88:   charset[0] = '\0';
   89:   next = buf;
   90:   p = getTok(&next);
   91:   if (matchkey(p, next, "text"))
   92:     strcpy(charset, "us-ascii");
   93:   else if (!matchkey(p, next, "application"))
   94:     return;
   95:   p = getTok(&next);
   96:   if (!p || *p != '/')
   97:     return;
   98:   p = getTok(&next);
   99:   if (matchkey(p, next, "xml"))
  100:     isXml = 1;
  101:   p = getTok(&next);
  102:   while (p) {
  103:     if (*p == ';') {
  104:       p = getTok(&next);
  105:       if (matchkey(p, next, "charset")) {
  106:         p = getTok(&next);
  107:         if (p && *p == '=') {
  108:           p = getTok(&next);
  109:           if (p) {
  110:             char *s = charset;
  111:             if (*p == '"') {
  112:               while (++p != next - 1) {
  113:                 if (*p == '\\')
  114:                   ++p;
  115:                 if (s == charset + CHARSET_MAX - 1) {
  116:                   charset[0] = '\0';
  117:                   break;
  118:                 }
  119:                 *s++ = *p;
  120:               }
  121:               *s++ = '\0';
  122:             }
  123:             else {
  124:               if (next - p > CHARSET_MAX - 1)
  125:                 break;
  126:               while (p != next)
  127:                 *s++ = *p++;
  128:               *s = 0;
  129:               break;
  130:             }
  131:           }
  132:         }
  133:       }
  134:     }
  135:   else
  136:     p = getTok(&next);
  137:   }
  138: }
  139: 
  140: int
  141: main(int argc, char **argv)
  142: {
  143:   char buf[CHARSET_MAX];
  144:   getXMLCharset(argv[1], buf);
  145:   printf("charset = \"%s\"\n", buf);
  146:   return 0;
  147: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>