File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / expat / lib / xmltok.h
Revision 1.1.1.1 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Tue Feb 21 23:00:54 2012 UTC (12 years, 6 months ago) by misho
Branches: expat, MAIN
CVS tags: v2_1_0, v2_0_1p0, v2_0_1, HEAD
expat

    1: /* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
    2:    See the file COPYING for copying permission.
    3: */
    4: 
    5: #ifndef XmlTok_INCLUDED
    6: #define XmlTok_INCLUDED 1
    7: 
    8: #ifdef __cplusplus
    9: extern "C" {
   10: #endif
   11: 
   12: /* The following token may be returned by XmlContentTok */
   13: #define XML_TOK_TRAILING_RSQB -5 /* ] or ]] at the end of the scan; might be
   14:                                     start of illegal ]]> sequence */
   15: /* The following tokens may be returned by both XmlPrologTok and
   16:    XmlContentTok.
   17: */
   18: #define XML_TOK_NONE -4          /* The string to be scanned is empty */
   19: #define XML_TOK_TRAILING_CR -3   /* A CR at the end of the scan;
   20:                                     might be part of CRLF sequence */
   21: #define XML_TOK_PARTIAL_CHAR -2  /* only part of a multibyte sequence */
   22: #define XML_TOK_PARTIAL -1       /* only part of a token */
   23: #define XML_TOK_INVALID 0
   24: 
   25: /* The following tokens are returned by XmlContentTok; some are also
   26:    returned by XmlAttributeValueTok, XmlEntityTok, XmlCdataSectionTok.
   27: */
   28: #define XML_TOK_START_TAG_WITH_ATTS 1
   29: #define XML_TOK_START_TAG_NO_ATTS 2
   30: #define XML_TOK_EMPTY_ELEMENT_WITH_ATTS 3 /* empty element tag <e/> */
   31: #define XML_TOK_EMPTY_ELEMENT_NO_ATTS 4
   32: #define XML_TOK_END_TAG 5
   33: #define XML_TOK_DATA_CHARS 6
   34: #define XML_TOK_DATA_NEWLINE 7
   35: #define XML_TOK_CDATA_SECT_OPEN 8
   36: #define XML_TOK_ENTITY_REF 9
   37: #define XML_TOK_CHAR_REF 10               /* numeric character reference */
   38: 
   39: /* The following tokens may be returned by both XmlPrologTok and
   40:    XmlContentTok.
   41: */
   42: #define XML_TOK_PI 11                     /* processing instruction */
   43: #define XML_TOK_XML_DECL 12               /* XML decl or text decl */
   44: #define XML_TOK_COMMENT 13
   45: #define XML_TOK_BOM 14                    /* Byte order mark */
   46: 
   47: /* The following tokens are returned only by XmlPrologTok */
   48: #define XML_TOK_PROLOG_S 15
   49: #define XML_TOK_DECL_OPEN 16              /* <!foo */
   50: #define XML_TOK_DECL_CLOSE 17             /* > */
   51: #define XML_TOK_NAME 18
   52: #define XML_TOK_NMTOKEN 19
   53: #define XML_TOK_POUND_NAME 20             /* #name */
   54: #define XML_TOK_OR 21                     /* | */
   55: #define XML_TOK_PERCENT 22
   56: #define XML_TOK_OPEN_PAREN 23
   57: #define XML_TOK_CLOSE_PAREN 24
   58: #define XML_TOK_OPEN_BRACKET 25
   59: #define XML_TOK_CLOSE_BRACKET 26
   60: #define XML_TOK_LITERAL 27
   61: #define XML_TOK_PARAM_ENTITY_REF 28
   62: #define XML_TOK_INSTANCE_START 29
   63: 
   64: /* The following occur only in element type declarations */
   65: #define XML_TOK_NAME_QUESTION 30          /* name? */
   66: #define XML_TOK_NAME_ASTERISK 31          /* name* */
   67: #define XML_TOK_NAME_PLUS 32              /* name+ */
   68: #define XML_TOK_COND_SECT_OPEN 33         /* <![ */
   69: #define XML_TOK_COND_SECT_CLOSE 34        /* ]]> */
   70: #define XML_TOK_CLOSE_PAREN_QUESTION 35   /* )? */
   71: #define XML_TOK_CLOSE_PAREN_ASTERISK 36   /* )* */
   72: #define XML_TOK_CLOSE_PAREN_PLUS 37       /* )+ */
   73: #define XML_TOK_COMMA 38
   74: 
   75: /* The following token is returned only by XmlAttributeValueTok */
   76: #define XML_TOK_ATTRIBUTE_VALUE_S 39
   77: 
   78: /* The following token is returned only by XmlCdataSectionTok */
   79: #define XML_TOK_CDATA_SECT_CLOSE 40
   80: 
   81: /* With namespace processing this is returned by XmlPrologTok for a
   82:    name with a colon.
   83: */
   84: #define XML_TOK_PREFIXED_NAME 41
   85: 
   86: #ifdef XML_DTD
   87: #define XML_TOK_IGNORE_SECT 42
   88: #endif /* XML_DTD */
   89: 
   90: #ifdef XML_DTD
   91: #define XML_N_STATES 4
   92: #else /* not XML_DTD */
   93: #define XML_N_STATES 3
   94: #endif /* not XML_DTD */
   95: 
   96: #define XML_PROLOG_STATE 0
   97: #define XML_CONTENT_STATE 1
   98: #define XML_CDATA_SECTION_STATE 2
   99: #ifdef XML_DTD
  100: #define XML_IGNORE_SECTION_STATE 3
  101: #endif /* XML_DTD */
  102: 
  103: #define XML_N_LITERAL_TYPES 2
  104: #define XML_ATTRIBUTE_VALUE_LITERAL 0
  105: #define XML_ENTITY_VALUE_LITERAL 1
  106: 
  107: /* The size of the buffer passed to XmlUtf8Encode must be at least this. */
  108: #define XML_UTF8_ENCODE_MAX 4
  109: /* The size of the buffer passed to XmlUtf16Encode must be at least this. */
  110: #define XML_UTF16_ENCODE_MAX 2
  111: 
  112: typedef struct position {
  113:   /* first line and first column are 0 not 1 */
  114:   XML_Size lineNumber;
  115:   XML_Size columnNumber;
  116: } POSITION;
  117: 
  118: typedef struct {
  119:   const char *name;
  120:   const char *valuePtr;
  121:   const char *valueEnd;
  122:   char normalized;
  123: } ATTRIBUTE;
  124: 
  125: struct encoding;
  126: typedef struct encoding ENCODING;
  127: 
  128: typedef int (PTRCALL *SCANNER)(const ENCODING *,
  129:                                const char *,
  130:                                const char *,
  131:                                const char **);
  132: 
  133: struct encoding {
  134:   SCANNER scanners[XML_N_STATES];
  135:   SCANNER literalScanners[XML_N_LITERAL_TYPES];
  136:   int (PTRCALL *sameName)(const ENCODING *,
  137:                           const char *,
  138:                           const char *);
  139:   int (PTRCALL *nameMatchesAscii)(const ENCODING *,
  140:                                   const char *,
  141:                                   const char *,
  142:                                   const char *);
  143:   int (PTRFASTCALL *nameLength)(const ENCODING *, const char *);
  144:   const char *(PTRFASTCALL *skipS)(const ENCODING *, const char *);
  145:   int (PTRCALL *getAtts)(const ENCODING *enc,
  146:                          const char *ptr,
  147:                          int attsMax,
  148:                          ATTRIBUTE *atts);
  149:   int (PTRFASTCALL *charRefNumber)(const ENCODING *enc, const char *ptr);
  150:   int (PTRCALL *predefinedEntityName)(const ENCODING *,
  151:                                       const char *,
  152:                                       const char *);
  153:   void (PTRCALL *updatePosition)(const ENCODING *,
  154:                                  const char *ptr,
  155:                                  const char *end,
  156:                                  POSITION *);
  157:   int (PTRCALL *isPublicId)(const ENCODING *enc,
  158:                             const char *ptr,
  159:                             const char *end,
  160:                             const char **badPtr);
  161:   void (PTRCALL *utf8Convert)(const ENCODING *enc,
  162:                               const char **fromP,
  163:                               const char *fromLim,
  164:                               char **toP,
  165:                               const char *toLim);
  166:   void (PTRCALL *utf16Convert)(const ENCODING *enc,
  167:                                const char **fromP,
  168:                                const char *fromLim,
  169:                                unsigned short **toP,
  170:                                const unsigned short *toLim);
  171:   int minBytesPerChar;
  172:   char isUtf8;
  173:   char isUtf16;
  174: };
  175: 
  176: /* Scan the string starting at ptr until the end of the next complete
  177:    token, but do not scan past eptr.  Return an integer giving the
  178:    type of token.
  179: 
  180:    Return XML_TOK_NONE when ptr == eptr; nextTokPtr will not be set.
  181: 
  182:    Return XML_TOK_PARTIAL when the string does not contain a complete
  183:    token; nextTokPtr will not be set.
  184: 
  185:    Return XML_TOK_INVALID when the string does not start a valid
  186:    token; nextTokPtr will be set to point to the character which made
  187:    the token invalid.
  188: 
  189:    Otherwise the string starts with a valid token; nextTokPtr will be
  190:    set to point to the character following the end of that token.
  191: 
  192:    Each data character counts as a single token, but adjacent data
  193:    characters may be returned together.  Similarly for characters in
  194:    the prolog outside literals, comments and processing instructions.
  195: */
  196: 
  197: 
  198: #define XmlTok(enc, state, ptr, end, nextTokPtr) \
  199:   (((enc)->scanners[state])(enc, ptr, end, nextTokPtr))
  200: 
  201: #define XmlPrologTok(enc, ptr, end, nextTokPtr) \
  202:    XmlTok(enc, XML_PROLOG_STATE, ptr, end, nextTokPtr)
  203: 
  204: #define XmlContentTok(enc, ptr, end, nextTokPtr) \
  205:    XmlTok(enc, XML_CONTENT_STATE, ptr, end, nextTokPtr)
  206: 
  207: #define XmlCdataSectionTok(enc, ptr, end, nextTokPtr) \
  208:    XmlTok(enc, XML_CDATA_SECTION_STATE, ptr, end, nextTokPtr)
  209: 
  210: #ifdef XML_DTD
  211: 
  212: #define XmlIgnoreSectionTok(enc, ptr, end, nextTokPtr) \
  213:    XmlTok(enc, XML_IGNORE_SECTION_STATE, ptr, end, nextTokPtr)
  214: 
  215: #endif /* XML_DTD */
  216: 
  217: /* This is used for performing a 2nd-level tokenization on the content
  218:    of a literal that has already been returned by XmlTok.
  219: */
  220: #define XmlLiteralTok(enc, literalType, ptr, end, nextTokPtr) \
  221:   (((enc)->literalScanners[literalType])(enc, ptr, end, nextTokPtr))
  222: 
  223: #define XmlAttributeValueTok(enc, ptr, end, nextTokPtr) \
  224:    XmlLiteralTok(enc, XML_ATTRIBUTE_VALUE_LITERAL, ptr, end, nextTokPtr)
  225: 
  226: #define XmlEntityValueTok(enc, ptr, end, nextTokPtr) \
  227:    XmlLiteralTok(enc, XML_ENTITY_VALUE_LITERAL, ptr, end, nextTokPtr)
  228: 
  229: #define XmlSameName(enc, ptr1, ptr2) (((enc)->sameName)(enc, ptr1, ptr2))
  230: 
  231: #define XmlNameMatchesAscii(enc, ptr1, end1, ptr2) \
  232:   (((enc)->nameMatchesAscii)(enc, ptr1, end1, ptr2))
  233: 
  234: #define XmlNameLength(enc, ptr) \
  235:   (((enc)->nameLength)(enc, ptr))
  236: 
  237: #define XmlSkipS(enc, ptr) \
  238:   (((enc)->skipS)(enc, ptr))
  239: 
  240: #define XmlGetAttributes(enc, ptr, attsMax, atts) \
  241:   (((enc)->getAtts)(enc, ptr, attsMax, atts))
  242: 
  243: #define XmlCharRefNumber(enc, ptr) \
  244:   (((enc)->charRefNumber)(enc, ptr))
  245: 
  246: #define XmlPredefinedEntityName(enc, ptr, end) \
  247:   (((enc)->predefinedEntityName)(enc, ptr, end))
  248: 
  249: #define XmlUpdatePosition(enc, ptr, end, pos) \
  250:   (((enc)->updatePosition)(enc, ptr, end, pos))
  251: 
  252: #define XmlIsPublicId(enc, ptr, end, badPtr) \
  253:   (((enc)->isPublicId)(enc, ptr, end, badPtr))
  254: 
  255: #define XmlUtf8Convert(enc, fromP, fromLim, toP, toLim) \
  256:   (((enc)->utf8Convert)(enc, fromP, fromLim, toP, toLim))
  257: 
  258: #define XmlUtf16Convert(enc, fromP, fromLim, toP, toLim) \
  259:   (((enc)->utf16Convert)(enc, fromP, fromLim, toP, toLim))
  260: 
  261: typedef struct {
  262:   ENCODING initEnc;
  263:   const ENCODING **encPtr;
  264: } INIT_ENCODING;
  265: 
  266: int XmlParseXmlDecl(int isGeneralTextEntity,
  267:                     const ENCODING *enc,
  268:                     const char *ptr,
  269:                     const char *end,
  270:                     const char **badPtr,
  271:                     const char **versionPtr,
  272:                     const char **versionEndPtr,
  273:                     const char **encodingNamePtr,
  274:                     const ENCODING **namedEncodingPtr,
  275:                     int *standalonePtr);
  276: 
  277: int XmlInitEncoding(INIT_ENCODING *, const ENCODING **, const char *name);
  278: const ENCODING *XmlGetUtf8InternalEncoding(void);
  279: const ENCODING *XmlGetUtf16InternalEncoding(void);
  280: int FASTCALL XmlUtf8Encode(int charNumber, char *buf);
  281: int FASTCALL XmlUtf16Encode(int charNumber, unsigned short *buf);
  282: int XmlSizeOfUnknownEncoding(void);
  283: 
  284: 
  285: typedef int (XMLCALL *CONVERTER) (void *userData, const char *p);
  286: 
  287: ENCODING *
  288: XmlInitUnknownEncoding(void *mem,
  289:                        int *table,
  290:                        CONVERTER convert,
  291:                        void *userData);
  292: 
  293: int XmlParseXmlDeclNS(int isGeneralTextEntity,
  294:                       const ENCODING *enc,
  295:                       const char *ptr,
  296:                       const char *end,
  297:                       const char **badPtr,
  298:                       const char **versionPtr,
  299:                       const char **versionEndPtr,
  300:                       const char **encodingNamePtr,
  301:                       const ENCODING **namedEncodingPtr,
  302:                       int *standalonePtr);
  303: 
  304: int XmlInitEncodingNS(INIT_ENCODING *, const ENCODING **, const char *name);
  305: const ENCODING *XmlGetUtf8InternalEncodingNS(void);
  306: const ENCODING *XmlGetUtf16InternalEncodingNS(void);
  307: ENCODING *
  308: XmlInitUnknownEncodingNS(void *mem,
  309:                          int *table,
  310:                          CONVERTER convert,
  311:                          void *userData);
  312: #ifdef __cplusplus
  313: }
  314: #endif
  315: 
  316: #endif /* not XmlTok_INCLUDED */

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>