Annotation of embedaddon/expat/lib/xmltok.h, revision 1.1.1.1

1.1       misho       1: /* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
                      2:    See the file COPYING for copying permission.
                      3: */
                      4: 
                      5: #ifndef XmlTok_INCLUDED
                      6: #define XmlTok_INCLUDED 1
                      7: 
                      8: #ifdef __cplusplus
                      9: extern "C" {
                     10: #endif
                     11: 
                     12: /* The following token may be returned by XmlContentTok */
                     13: #define XML_TOK_TRAILING_RSQB -5 /* ] or ]] at the end of the scan; might be
                     14:                                     start of illegal ]]> sequence */
                     15: /* The following tokens may be returned by both XmlPrologTok and
                     16:    XmlContentTok.
                     17: */
                     18: #define XML_TOK_NONE -4          /* The string to be scanned is empty */
                     19: #define XML_TOK_TRAILING_CR -3   /* A CR at the end of the scan;
                     20:                                     might be part of CRLF sequence */
                     21: #define XML_TOK_PARTIAL_CHAR -2  /* only part of a multibyte sequence */
                     22: #define XML_TOK_PARTIAL -1       /* only part of a token */
                     23: #define XML_TOK_INVALID 0
                     24: 
                     25: /* The following tokens are returned by XmlContentTok; some are also
                     26:    returned by XmlAttributeValueTok, XmlEntityTok, XmlCdataSectionTok.
                     27: */
                     28: #define XML_TOK_START_TAG_WITH_ATTS 1
                     29: #define XML_TOK_START_TAG_NO_ATTS 2
                     30: #define XML_TOK_EMPTY_ELEMENT_WITH_ATTS 3 /* empty element tag <e/> */
                     31: #define XML_TOK_EMPTY_ELEMENT_NO_ATTS 4
                     32: #define XML_TOK_END_TAG 5
                     33: #define XML_TOK_DATA_CHARS 6
                     34: #define XML_TOK_DATA_NEWLINE 7
                     35: #define XML_TOK_CDATA_SECT_OPEN 8
                     36: #define XML_TOK_ENTITY_REF 9
                     37: #define XML_TOK_CHAR_REF 10               /* numeric character reference */
                     38: 
                     39: /* The following tokens may be returned by both XmlPrologTok and
                     40:    XmlContentTok.
                     41: */
                     42: #define XML_TOK_PI 11                     /* processing instruction */
                     43: #define XML_TOK_XML_DECL 12               /* XML decl or text decl */
                     44: #define XML_TOK_COMMENT 13
                     45: #define XML_TOK_BOM 14                    /* Byte order mark */
                     46: 
                     47: /* The following tokens are returned only by XmlPrologTok */
                     48: #define XML_TOK_PROLOG_S 15
                     49: #define XML_TOK_DECL_OPEN 16              /* <!foo */
                     50: #define XML_TOK_DECL_CLOSE 17             /* > */
                     51: #define XML_TOK_NAME 18
                     52: #define XML_TOK_NMTOKEN 19
                     53: #define XML_TOK_POUND_NAME 20             /* #name */
                     54: #define XML_TOK_OR 21                     /* | */
                     55: #define XML_TOK_PERCENT 22
                     56: #define XML_TOK_OPEN_PAREN 23
                     57: #define XML_TOK_CLOSE_PAREN 24
                     58: #define XML_TOK_OPEN_BRACKET 25
                     59: #define XML_TOK_CLOSE_BRACKET 26
                     60: #define XML_TOK_LITERAL 27
                     61: #define XML_TOK_PARAM_ENTITY_REF 28
                     62: #define XML_TOK_INSTANCE_START 29
                     63: 
                     64: /* The following occur only in element type declarations */
                     65: #define XML_TOK_NAME_QUESTION 30          /* name? */
                     66: #define XML_TOK_NAME_ASTERISK 31          /* name* */
                     67: #define XML_TOK_NAME_PLUS 32              /* name+ */
                     68: #define XML_TOK_COND_SECT_OPEN 33         /* <![ */
                     69: #define XML_TOK_COND_SECT_CLOSE 34        /* ]]> */
                     70: #define XML_TOK_CLOSE_PAREN_QUESTION 35   /* )? */
                     71: #define XML_TOK_CLOSE_PAREN_ASTERISK 36   /* )* */
                     72: #define XML_TOK_CLOSE_PAREN_PLUS 37       /* )+ */
                     73: #define XML_TOK_COMMA 38
                     74: 
                     75: /* The following token is returned only by XmlAttributeValueTok */
                     76: #define XML_TOK_ATTRIBUTE_VALUE_S 39
                     77: 
                     78: /* The following token is returned only by XmlCdataSectionTok */
                     79: #define XML_TOK_CDATA_SECT_CLOSE 40
                     80: 
                     81: /* With namespace processing this is returned by XmlPrologTok for a
                     82:    name with a colon.
                     83: */
                     84: #define XML_TOK_PREFIXED_NAME 41
                     85: 
                     86: #ifdef XML_DTD
                     87: #define XML_TOK_IGNORE_SECT 42
                     88: #endif /* XML_DTD */
                     89: 
                     90: #ifdef XML_DTD
                     91: #define XML_N_STATES 4
                     92: #else /* not XML_DTD */
                     93: #define XML_N_STATES 3
                     94: #endif /* not XML_DTD */
                     95: 
                     96: #define XML_PROLOG_STATE 0
                     97: #define XML_CONTENT_STATE 1
                     98: #define XML_CDATA_SECTION_STATE 2
                     99: #ifdef XML_DTD
                    100: #define XML_IGNORE_SECTION_STATE 3
                    101: #endif /* XML_DTD */
                    102: 
                    103: #define XML_N_LITERAL_TYPES 2
                    104: #define XML_ATTRIBUTE_VALUE_LITERAL 0
                    105: #define XML_ENTITY_VALUE_LITERAL 1
                    106: 
                    107: /* The size of the buffer passed to XmlUtf8Encode must be at least this. */
                    108: #define XML_UTF8_ENCODE_MAX 4
                    109: /* The size of the buffer passed to XmlUtf16Encode must be at least this. */
                    110: #define XML_UTF16_ENCODE_MAX 2
                    111: 
                    112: typedef struct position {
                    113:   /* first line and first column are 0 not 1 */
                    114:   XML_Size lineNumber;
                    115:   XML_Size columnNumber;
                    116: } POSITION;
                    117: 
                    118: typedef struct {
                    119:   const char *name;
                    120:   const char *valuePtr;
                    121:   const char *valueEnd;
                    122:   char normalized;
                    123: } ATTRIBUTE;
                    124: 
                    125: struct encoding;
                    126: typedef struct encoding ENCODING;
                    127: 
                    128: typedef int (PTRCALL *SCANNER)(const ENCODING *,
                    129:                                const char *,
                    130:                                const char *,
                    131:                                const char **);
                    132: 
                    133: struct encoding {
                    134:   SCANNER scanners[XML_N_STATES];
                    135:   SCANNER literalScanners[XML_N_LITERAL_TYPES];
                    136:   int (PTRCALL *sameName)(const ENCODING *,
                    137:                           const char *,
                    138:                           const char *);
                    139:   int (PTRCALL *nameMatchesAscii)(const ENCODING *,
                    140:                                   const char *,
                    141:                                   const char *,
                    142:                                   const char *);
                    143:   int (PTRFASTCALL *nameLength)(const ENCODING *, const char *);
                    144:   const char *(PTRFASTCALL *skipS)(const ENCODING *, const char *);
                    145:   int (PTRCALL *getAtts)(const ENCODING *enc,
                    146:                          const char *ptr,
                    147:                          int attsMax,
                    148:                          ATTRIBUTE *atts);
                    149:   int (PTRFASTCALL *charRefNumber)(const ENCODING *enc, const char *ptr);
                    150:   int (PTRCALL *predefinedEntityName)(const ENCODING *,
                    151:                                       const char *,
                    152:                                       const char *);
                    153:   void (PTRCALL *updatePosition)(const ENCODING *,
                    154:                                  const char *ptr,
                    155:                                  const char *end,
                    156:                                  POSITION *);
                    157:   int (PTRCALL *isPublicId)(const ENCODING *enc,
                    158:                             const char *ptr,
                    159:                             const char *end,
                    160:                             const char **badPtr);
                    161:   void (PTRCALL *utf8Convert)(const ENCODING *enc,
                    162:                               const char **fromP,
                    163:                               const char *fromLim,
                    164:                               char **toP,
                    165:                               const char *toLim);
                    166:   void (PTRCALL *utf16Convert)(const ENCODING *enc,
                    167:                                const char **fromP,
                    168:                                const char *fromLim,
                    169:                                unsigned short **toP,
                    170:                                const unsigned short *toLim);
                    171:   int minBytesPerChar;
                    172:   char isUtf8;
                    173:   char isUtf16;
                    174: };
                    175: 
                    176: /* Scan the string starting at ptr until the end of the next complete
                    177:    token, but do not scan past eptr.  Return an integer giving the
                    178:    type of token.
                    179: 
                    180:    Return XML_TOK_NONE when ptr == eptr; nextTokPtr will not be set.
                    181: 
                    182:    Return XML_TOK_PARTIAL when the string does not contain a complete
                    183:    token; nextTokPtr will not be set.
                    184: 
                    185:    Return XML_TOK_INVALID when the string does not start a valid
                    186:    token; nextTokPtr will be set to point to the character which made
                    187:    the token invalid.
                    188: 
                    189:    Otherwise the string starts with a valid token; nextTokPtr will be
                    190:    set to point to the character following the end of that token.
                    191: 
                    192:    Each data character counts as a single token, but adjacent data
                    193:    characters may be returned together.  Similarly for characters in
                    194:    the prolog outside literals, comments and processing instructions.
                    195: */
                    196: 
                    197: 
                    198: #define XmlTok(enc, state, ptr, end, nextTokPtr) \
                    199:   (((enc)->scanners[state])(enc, ptr, end, nextTokPtr))
                    200: 
                    201: #define XmlPrologTok(enc, ptr, end, nextTokPtr) \
                    202:    XmlTok(enc, XML_PROLOG_STATE, ptr, end, nextTokPtr)
                    203: 
                    204: #define XmlContentTok(enc, ptr, end, nextTokPtr) \
                    205:    XmlTok(enc, XML_CONTENT_STATE, ptr, end, nextTokPtr)
                    206: 
                    207: #define XmlCdataSectionTok(enc, ptr, end, nextTokPtr) \
                    208:    XmlTok(enc, XML_CDATA_SECTION_STATE, ptr, end, nextTokPtr)
                    209: 
                    210: #ifdef XML_DTD
                    211: 
                    212: #define XmlIgnoreSectionTok(enc, ptr, end, nextTokPtr) \
                    213:    XmlTok(enc, XML_IGNORE_SECTION_STATE, ptr, end, nextTokPtr)
                    214: 
                    215: #endif /* XML_DTD */
                    216: 
                    217: /* This is used for performing a 2nd-level tokenization on the content
                    218:    of a literal that has already been returned by XmlTok.
                    219: */
                    220: #define XmlLiteralTok(enc, literalType, ptr, end, nextTokPtr) \
                    221:   (((enc)->literalScanners[literalType])(enc, ptr, end, nextTokPtr))
                    222: 
                    223: #define XmlAttributeValueTok(enc, ptr, end, nextTokPtr) \
                    224:    XmlLiteralTok(enc, XML_ATTRIBUTE_VALUE_LITERAL, ptr, end, nextTokPtr)
                    225: 
                    226: #define XmlEntityValueTok(enc, ptr, end, nextTokPtr) \
                    227:    XmlLiteralTok(enc, XML_ENTITY_VALUE_LITERAL, ptr, end, nextTokPtr)
                    228: 
                    229: #define XmlSameName(enc, ptr1, ptr2) (((enc)->sameName)(enc, ptr1, ptr2))
                    230: 
                    231: #define XmlNameMatchesAscii(enc, ptr1, end1, ptr2) \
                    232:   (((enc)->nameMatchesAscii)(enc, ptr1, end1, ptr2))
                    233: 
                    234: #define XmlNameLength(enc, ptr) \
                    235:   (((enc)->nameLength)(enc, ptr))
                    236: 
                    237: #define XmlSkipS(enc, ptr) \
                    238:   (((enc)->skipS)(enc, ptr))
                    239: 
                    240: #define XmlGetAttributes(enc, ptr, attsMax, atts) \
                    241:   (((enc)->getAtts)(enc, ptr, attsMax, atts))
                    242: 
                    243: #define XmlCharRefNumber(enc, ptr) \
                    244:   (((enc)->charRefNumber)(enc, ptr))
                    245: 
                    246: #define XmlPredefinedEntityName(enc, ptr, end) \
                    247:   (((enc)->predefinedEntityName)(enc, ptr, end))
                    248: 
                    249: #define XmlUpdatePosition(enc, ptr, end, pos) \
                    250:   (((enc)->updatePosition)(enc, ptr, end, pos))
                    251: 
                    252: #define XmlIsPublicId(enc, ptr, end, badPtr) \
                    253:   (((enc)->isPublicId)(enc, ptr, end, badPtr))
                    254: 
                    255: #define XmlUtf8Convert(enc, fromP, fromLim, toP, toLim) \
                    256:   (((enc)->utf8Convert)(enc, fromP, fromLim, toP, toLim))
                    257: 
                    258: #define XmlUtf16Convert(enc, fromP, fromLim, toP, toLim) \
                    259:   (((enc)->utf16Convert)(enc, fromP, fromLim, toP, toLim))
                    260: 
                    261: typedef struct {
                    262:   ENCODING initEnc;
                    263:   const ENCODING **encPtr;
                    264: } INIT_ENCODING;
                    265: 
                    266: int XmlParseXmlDecl(int isGeneralTextEntity,
                    267:                     const ENCODING *enc,
                    268:                     const char *ptr,
                    269:                     const char *end,
                    270:                     const char **badPtr,
                    271:                     const char **versionPtr,
                    272:                     const char **versionEndPtr,
                    273:                     const char **encodingNamePtr,
                    274:                     const ENCODING **namedEncodingPtr,
                    275:                     int *standalonePtr);
                    276: 
                    277: int XmlInitEncoding(INIT_ENCODING *, const ENCODING **, const char *name);
                    278: const ENCODING *XmlGetUtf8InternalEncoding(void);
                    279: const ENCODING *XmlGetUtf16InternalEncoding(void);
                    280: int FASTCALL XmlUtf8Encode(int charNumber, char *buf);
                    281: int FASTCALL XmlUtf16Encode(int charNumber, unsigned short *buf);
                    282: int XmlSizeOfUnknownEncoding(void);
                    283: 
                    284: 
                    285: typedef int (XMLCALL *CONVERTER) (void *userData, const char *p);
                    286: 
                    287: ENCODING *
                    288: XmlInitUnknownEncoding(void *mem,
                    289:                        int *table,
                    290:                        CONVERTER convert,
                    291:                        void *userData);
                    292: 
                    293: int XmlParseXmlDeclNS(int isGeneralTextEntity,
                    294:                       const ENCODING *enc,
                    295:                       const char *ptr,
                    296:                       const char *end,
                    297:                       const char **badPtr,
                    298:                       const char **versionPtr,
                    299:                       const char **versionEndPtr,
                    300:                       const char **encodingNamePtr,
                    301:                       const ENCODING **namedEncodingPtr,
                    302:                       int *standalonePtr);
                    303: 
                    304: int XmlInitEncodingNS(INIT_ENCODING *, const ENCODING **, const char *name);
                    305: const ENCODING *XmlGetUtf8InternalEncodingNS(void);
                    306: const ENCODING *XmlGetUtf16InternalEncodingNS(void);
                    307: ENCODING *
                    308: XmlInitUnknownEncodingNS(void *mem,
                    309:                          int *table,
                    310:                          CONVERTER convert,
                    311:                          void *userData);
                    312: #ifdef __cplusplus
                    313: }
                    314: #endif
                    315: 
                    316: #endif /* not XmlTok_INCLUDED */

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>