Annotation of embedaddon/php/ext/mbstring/oniguruma/regparse.h, revision 1.1
1.1 ! misho 1: #ifndef REGPARSE_H
! 2: #define REGPARSE_H
! 3: /**********************************************************************
! 4: regparse.h - Oniguruma (regular expression library)
! 5: **********************************************************************/
! 6: /*-
! 7: * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
! 8: * All rights reserved.
! 9: *
! 10: * Redistribution and use in source and binary forms, with or without
! 11: * modification, are permitted provided that the following conditions
! 12: * are met:
! 13: * 1. Redistributions of source code must retain the above copyright
! 14: * notice, this list of conditions and the following disclaimer.
! 15: * 2. Redistributions in binary form must reproduce the above copyright
! 16: * notice, this list of conditions and the following disclaimer in the
! 17: * documentation and/or other materials provided with the distribution.
! 18: *
! 19: * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
! 20: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
! 21: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
! 22: * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
! 23: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
! 24: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
! 25: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
! 26: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
! 27: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
! 28: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
! 29: * SUCH DAMAGE.
! 30: */
! 31:
! 32: #include "regint.h"
! 33:
! 34: /* node type */
! 35: #define N_STRING (1<< 0)
! 36: #define N_CCLASS (1<< 1)
! 37: #define N_CTYPE (1<< 2)
! 38: #define N_ANYCHAR (1<< 3)
! 39: #define N_BACKREF (1<< 4)
! 40: #define N_QUANTIFIER (1<< 5)
! 41: #define N_EFFECT (1<< 6)
! 42: #define N_ANCHOR (1<< 7)
! 43: #define N_LIST (1<< 8)
! 44: #define N_ALT (1<< 9)
! 45: #define N_CALL (1<<10)
! 46:
! 47: #define IS_NODE_TYPE_SIMPLE(type) \
! 48: (((type) & (N_STRING | N_CCLASS | N_CTYPE | N_ANYCHAR | N_BACKREF)) != 0)
! 49:
! 50: #define NTYPE(node) ((node)->type)
! 51: #define NCONS(node) ((node)->u.cons)
! 52: #define NSTRING(node) ((node)->u.str)
! 53: #define NCCLASS(node) ((node)->u.cclass)
! 54: #define NCTYPE(node) ((node)->u.ctype)
! 55: #define NQUANTIFIER(node) ((node)->u.quantifier)
! 56: #define NANCHOR(node) ((node)->u.anchor)
! 57: #define NBACKREF(node) ((node)->u.backref)
! 58: #define NEFFECT(node) ((node)->u.effect)
! 59: #define NCALL(node) ((node)->u.call)
! 60:
! 61: #define CTYPE_WORD (1<<0)
! 62: #define CTYPE_NOT_WORD (1<<1)
! 63: #define CTYPE_WHITE_SPACE (1<<2)
! 64: #define CTYPE_NOT_WHITE_SPACE (1<<3)
! 65: #define CTYPE_DIGIT (1<<4)
! 66: #define CTYPE_NOT_DIGIT (1<<5)
! 67: #define CTYPE_XDIGIT (1<<6)
! 68: #define CTYPE_NOT_XDIGIT (1<<7)
! 69:
! 70: #define ANCHOR_ANYCHAR_STAR_MASK (ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML)
! 71: #define ANCHOR_END_BUF_MASK (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)
! 72:
! 73: #define EFFECT_MEMORY (1<<0)
! 74: #define EFFECT_OPTION (1<<1)
! 75: #define EFFECT_STOP_BACKTRACK (1<<2)
! 76:
! 77: #define NODE_STR_MARGIN 16
! 78: #define NODE_STR_BUF_SIZE 24 /* sizeof(CClassNode) - sizeof(int)*4 */
! 79: #define NODE_BACKREFS_SIZE 6
! 80:
! 81: #define NSTR_RAW (1<<0) /* by backslashed number */
! 82: #define NSTR_AMBIG (1<<1)
! 83: #define NSTR_AMBIG_REDUCE (1<<2)
! 84:
! 85: #define NSTRING_LEN(node) ((node)->u.str.end - (node)->u.str.s)
! 86: #define NSTRING_SET_RAW(node) (node)->u.str.flag |= NSTR_RAW
! 87: #define NSTRING_CLEAR_RAW(node) (node)->u.str.flag &= ~NSTR_RAW
! 88: #define NSTRING_SET_AMBIG(node) (node)->u.str.flag |= NSTR_AMBIG
! 89: #define NSTRING_SET_AMBIG_REDUCE(node) (node)->u.str.flag |= NSTR_AMBIG_REDUCE
! 90: #define NSTRING_IS_RAW(node) (((node)->u.str.flag & NSTR_RAW) != 0)
! 91: #define NSTRING_IS_AMBIG(node) (((node)->u.str.flag & NSTR_AMBIG) != 0)
! 92: #define NSTRING_IS_AMBIG_REDUCE(node) \
! 93: (((node)->u.str.flag & NSTR_AMBIG_REDUCE) != 0)
! 94:
! 95: #define BACKREFS_P(br) \
! 96: (IS_NOT_NULL((br)->back_dynamic) ? (br)->back_dynamic : (br)->back_static);
! 97:
! 98: #define NQ_TARGET_ISNOT_EMPTY 0
! 99: #define NQ_TARGET_IS_EMPTY 1
! 100: #define NQ_TARGET_IS_EMPTY_MEM 2
! 101: #define NQ_TARGET_IS_EMPTY_REC 3
! 102:
! 103:
! 104: typedef struct {
! 105: UChar* s;
! 106: UChar* end;
! 107: unsigned int flag;
! 108: int capa; /* (allocated size - 1) or 0: use buf[] */
! 109: UChar buf[NODE_STR_BUF_SIZE];
! 110: } StrNode;
! 111:
! 112: /* move to regint.h */
! 113: #if 0
! 114: typedef struct {
! 115: int flags;
! 116: BitSet bs;
! 117: BBuf* mbuf; /* multi-byte info or NULL */
! 118: } CClassNode;
! 119: #endif
! 120:
! 121: typedef struct {
! 122: int state;
! 123: struct _Node* target;
! 124: int lower;
! 125: int upper;
! 126: int greedy;
! 127: int target_empty_info;
! 128: struct _Node* head_exact;
! 129: struct _Node* next_head_exact;
! 130: int is_refered; /* include called node. don't eliminate even if {0} */
! 131: #ifdef USE_COMBINATION_EXPLOSION_CHECK
! 132: int comb_exp_check_num; /* 1,2,3...: check, 0: no check */
! 133: #endif
! 134: } QuantifierNode;
! 135:
! 136: /* status bits */
! 137: #define NST_MIN_FIXED (1<<0)
! 138: #define NST_MAX_FIXED (1<<1)
! 139: #define NST_CLEN_FIXED (1<<2)
! 140: #define NST_MARK1 (1<<3)
! 141: #define NST_MARK2 (1<<4)
! 142: #define NST_MEM_BACKREFED (1<<5)
! 143: #define NST_STOP_BT_SIMPLE_REPEAT (1<<6)
! 144: #define NST_RECURSION (1<<7)
! 145: #define NST_CALLED (1<<8)
! 146: #define NST_ADDR_FIXED (1<<9)
! 147: #define NST_NAMED_GROUP (1<<10)
! 148: #define NST_NAME_REF (1<<11)
! 149: #define NST_IN_REPEAT (1<<12) /* STK_REPEAT is nested in stack. */
! 150: #define NST_NEST_LEVEL (1<<13)
! 151: #define NST_BY_NUMBER (1<<14) /* {n,m} */
! 152:
! 153: #define SET_EFFECT_STATUS(node,f) (node)->u.effect.state |= (f)
! 154: #define CLEAR_EFFECT_STATUS(node,f) (node)->u.effect.state &= ~(f)
! 155:
! 156: #define IS_EFFECT_CALLED(en) (((en)->state & NST_CALLED) != 0)
! 157: #define IS_EFFECT_ADDR_FIXED(en) (((en)->state & NST_ADDR_FIXED) != 0)
! 158: #define IS_EFFECT_RECURSION(en) (((en)->state & NST_RECURSION) != 0)
! 159: #define IS_EFFECT_MARK1(en) (((en)->state & NST_MARK1) != 0)
! 160: #define IS_EFFECT_MARK2(en) (((en)->state & NST_MARK2) != 0)
! 161: #define IS_EFFECT_MIN_FIXED(en) (((en)->state & NST_MIN_FIXED) != 0)
! 162: #define IS_EFFECT_MAX_FIXED(en) (((en)->state & NST_MAX_FIXED) != 0)
! 163: #define IS_EFFECT_CLEN_FIXED(en) (((en)->state & NST_CLEN_FIXED) != 0)
! 164: #define IS_EFFECT_STOP_BT_SIMPLE_REPEAT(en) \
! 165: (((en)->state & NST_STOP_BT_SIMPLE_REPEAT) != 0)
! 166: #define IS_EFFECT_NAMED_GROUP(en) (((en)->state & NST_NAMED_GROUP) != 0)
! 167:
! 168: #define SET_CALL_RECURSION(node) (node)->u.call.state |= NST_RECURSION
! 169: #define IS_CALL_RECURSION(cn) (((cn)->state & NST_RECURSION) != 0)
! 170: #define IS_CALL_NAME_REF(cn) (((cn)->state & NST_NAME_REF) != 0)
! 171: #define IS_BACKREF_NAME_REF(bn) (((bn)->state & NST_NAME_REF) != 0)
! 172: #define IS_BACKREF_NEST_LEVEL(bn) (((bn)->state & NST_NEST_LEVEL) != 0)
! 173: #define IS_QUANTIFIER_IN_REPEAT(qn) (((qn)->state & NST_IN_REPEAT) != 0)
! 174: #define IS_QUANTIFIER_BY_NUMBER(qn) (((qn)->state & NST_BY_NUMBER) != 0)
! 175:
! 176: typedef struct {
! 177: int state;
! 178: int type;
! 179: int regnum;
! 180: OnigOptionType option;
! 181: struct _Node* target;
! 182: AbsAddrType call_addr;
! 183: /* for multiple call reference */
! 184: OnigDistance min_len; /* min length (byte) */
! 185: OnigDistance max_len; /* max length (byte) */
! 186: int char_len; /* character length */
! 187: int opt_count; /* referenced count in optimize_node_left() */
! 188: } EffectNode;
! 189:
! 190: #define CALLNODE_REFNUM_UNDEF -1
! 191:
! 192: #ifdef USE_SUBEXP_CALL
! 193:
! 194: typedef struct {
! 195: int offset;
! 196: struct _Node* target;
! 197: } UnsetAddr;
! 198:
! 199: typedef struct {
! 200: int num;
! 201: int alloc;
! 202: UnsetAddr* us;
! 203: } UnsetAddrList;
! 204:
! 205: typedef struct {
! 206: int state;
! 207: int ref_num;
! 208: UChar* name;
! 209: UChar* name_end;
! 210: struct _Node* target; /* EffectNode : EFFECT_MEMORY */
! 211: UnsetAddrList* unset_addr_list;
! 212: } CallNode;
! 213:
! 214: #endif
! 215:
! 216: typedef struct {
! 217: int state;
! 218: int back_num;
! 219: int back_static[NODE_BACKREFS_SIZE];
! 220: int* back_dynamic;
! 221: int nest_level;
! 222: } BackrefNode;
! 223:
! 224: typedef struct {
! 225: int type;
! 226: struct _Node* target;
! 227: int char_len;
! 228: } AnchorNode;
! 229:
! 230: typedef struct _Node {
! 231: int type;
! 232: union {
! 233: StrNode str;
! 234: CClassNode cclass;
! 235: QuantifierNode quantifier;
! 236: EffectNode effect;
! 237: #ifdef USE_SUBEXP_CALL
! 238: CallNode call;
! 239: #endif
! 240: BackrefNode backref;
! 241: AnchorNode anchor;
! 242: struct {
! 243: struct _Node* left;
! 244: struct _Node* right;
! 245: } cons;
! 246: struct {
! 247: int type;
! 248: } ctype;
! 249: } u;
! 250: } Node;
! 251:
! 252: #define NULL_NODE ((Node* )0)
! 253:
! 254: #define SCANENV_MEMNODES_SIZE 8
! 255: #define SCANENV_MEM_NODES(senv) \
! 256: (IS_NOT_NULL((senv)->mem_nodes_dynamic) ? \
! 257: (senv)->mem_nodes_dynamic : (senv)->mem_nodes_static)
! 258:
! 259: typedef struct {
! 260: OnigOptionType option;
! 261: OnigAmbigType ambig_flag;
! 262: OnigEncoding enc;
! 263: OnigSyntaxType* syntax;
! 264: BitStatusType capture_history;
! 265: BitStatusType bt_mem_start;
! 266: BitStatusType bt_mem_end;
! 267: BitStatusType backrefed_mem;
! 268: UChar* pattern;
! 269: UChar* pattern_end;
! 270: UChar* error;
! 271: UChar* error_end;
! 272: regex_t* reg; /* for reg->names only */
! 273: int num_call;
! 274: #ifdef USE_SUBEXP_CALL
! 275: UnsetAddrList* unset_addr_list;
! 276: #endif
! 277: int num_mem;
! 278: #ifdef USE_NAMED_GROUP
! 279: int num_named;
! 280: #endif
! 281: int mem_alloc;
! 282: Node* mem_nodes_static[SCANENV_MEMNODES_SIZE];
! 283: Node** mem_nodes_dynamic;
! 284: #ifdef USE_COMBINATION_EXPLOSION_CHECK
! 285: int num_comb_exp_check;
! 286: int comb_exp_max_regnum;
! 287: int curr_max_regnum;
! 288: int has_recursion;
! 289: #endif
! 290: } ScanEnv;
! 291:
! 292:
! 293: #define IS_SYNTAX_OP(syn, opm) (((syn)->op & (opm)) != 0)
! 294: #define IS_SYNTAX_OP2(syn, opm) (((syn)->op2 & (opm)) != 0)
! 295: #define IS_SYNTAX_BV(syn, bvm) (((syn)->behavior & (bvm)) != 0)
! 296:
! 297:
! 298: #ifdef USE_NAMED_GROUP
! 299: typedef struct {
! 300: int new_val;
! 301: } GroupNumRemap;
! 302:
! 303: extern int onig_renumber_name_table P_((regex_t* reg, GroupNumRemap* map));
! 304: #endif
! 305:
! 306: extern int onig_strncmp P_((const UChar* s1, const UChar* s2, int n));
! 307: extern void onig_scan_env_set_error_string P_((ScanEnv* env, int ecode, UChar* arg, UChar* arg_end));
! 308: extern int onig_scan_unsigned_number P_((UChar** src, const UChar* end, OnigEncoding enc));
! 309: extern void onig_reduce_nested_quantifier P_((Node* pnode, Node* cnode));
! 310: extern void onig_node_conv_to_str_node P_((Node* node, int raw));
! 311: extern int onig_node_str_cat P_((Node* node, const UChar* s, const UChar* end));
! 312: extern void onig_node_free P_((Node* node));
! 313: extern Node* onig_node_new_effect P_((int type));
! 314: extern Node* onig_node_new_anchor P_((int type));
! 315: extern Node* onig_node_new_str P_((const UChar* s, const UChar* end));
! 316: extern Node* onig_node_new_list P_((Node* left, Node* right));
! 317: extern void onig_node_str_clear P_((Node* node));
! 318: extern int onig_free_node_list P_((void));
! 319: extern int onig_names_free P_((regex_t* reg));
! 320: extern int onig_parse_make_tree P_((Node** root, const UChar* pattern, const UChar* end, regex_t* reg, ScanEnv* env));
! 321:
! 322: #ifdef ONIG_DEBUG
! 323: #ifdef USE_NAMED_GROUP
! 324: extern int onig_print_names(FILE*, regex_t*);
! 325: #endif
! 326: #endif
! 327:
! 328: #endif /* REGPARSE_H */
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>