embedaddon/sqlite3/src/tokenize.c - view

File: [ELWIX - Embedded LightWeight unIX -] / embedaddon / sqlite3 / src / tokenize.c
Revision 1.1.1.1 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Tue Feb 21 17:04:17 2012 UTC (14 years, 4 months ago) by misho
Branches: sqlite3, MAIN
CVS tags: v3_7_10, HEAD

sqlite3

1: /* 2: ** 2001 September 15 3: ** 4: ** The author disclaims copyright to this source code. In place of 5: ** a legal notice, here is a blessing: 6: ** 7: ** May you do good and not evil. 8: ** May you find forgiveness for yourself and forgive others. 9: ** May you share freely, never taking more than you give. 10: ** 11: ************************************************************************* 12: ** An tokenizer for SQL 13: ** 14: ** This file contains C code that splits an SQL input string up into 15: ** individual tokens and sends those tokens one-by-one over to the 16: ** parser for analysis. 17: */ 18: #include "sqliteInt.h" 19: #include <stdlib.h> 20: 21: /* 22: ** The charMap() macro maps alphabetic characters into their 23: ** lower-case ASCII equivalent. On ASCII machines, this is just 24: ** an upper-to-lower case map. On EBCDIC machines we also need 25: ** to adjust the encoding. Only alphabetic characters and underscores 26: ** need to be translated. 27: */ 28: #ifdef SQLITE_ASCII 29: # define charMap(X) sqlite3UpperToLower[(unsigned char)X] 30: #endif 31: #ifdef SQLITE_EBCDIC 32: # define charMap(X) ebcdicToAscii[(unsigned char)X] 33: const unsigned char ebcdicToAscii[] = { 34: /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */ 35: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x */ 36: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1x */ 37: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2x */ 38: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 3x */ 39: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 4x */ 40: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 5x */ 41: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 95, 0, 0, /* 6x */ 42: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 7x */ 43: 0, 97, 98, 99,100,101,102,103,104,105, 0, 0, 0, 0, 0, 0, /* 8x */ 44: 0,106,107,108,109,110,111,112,113,114, 0, 0, 0, 0, 0, 0, /* 9x */ 45: 0, 0,115,116,117,118,119,120,121,122, 0, 0, 0, 0, 0, 0, /* Ax */ 46: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Bx */ 47: 0, 97, 98, 99,100,101,102,103,104,105, 0, 0, 0, 0, 0, 0, /* Cx */ 48: 0,106,107,108,109,110,111,112,113,114, 0, 0, 0, 0, 0, 0, /* Dx */ 49: 0, 0,115,116,117,118,119,120,121,122, 0, 0, 0, 0, 0, 0, /* Ex */ 50: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Fx */ 51: }; 52: #endif 53: 54: /* 55: ** The sqlite3KeywordCode function looks up an identifier to determine if 56: ** it is a keyword. If it is a keyword, the token code of that keyword is 57: ** returned. If the input is not a keyword, TK_ID is returned. 58: ** 59: ** The implementation of this routine was generated by a program, 60: ** mkkeywordhash.h, located in the tool subdirectory of the distribution. 61: ** The output of the mkkeywordhash.c program is written into a file 62: ** named keywordhash.h and then included into this source file by 63: ** the #include below. 64: */ 65: #include "keywordhash.h" 66: 67: 68: /* 69: ** If X is a character that can be used in an identifier then 70: ** IdChar(X) will be true. Otherwise it is false. 71: ** 72: ** For ASCII, any character with the high-order bit set is 73: ** allowed in an identifier. For 7-bit characters, 74: ** sqlite3IsIdChar[X] must be 1. 75: ** 76: ** For EBCDIC, the rules are more complex but have the same 77: ** end result. 78: ** 79: ** Ticket #1066. the SQL standard does not allow '$' in the 80: ** middle of identfiers. But many SQL implementations do. 81: ** SQLite will allow '$' in identifiers for compatibility. 82: ** But the feature is undocumented. 83: */ 84: #ifdef SQLITE_ASCII 85: #define IdChar(C) ((sqlite3CtypeMap[(unsigned char)C]&0x46)!=0) 86: #endif 87: #ifdef SQLITE_EBCDIC 88: const char sqlite3IsEbcdicIdChar[] = { 89: /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */ 90: 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 4x */ 91: 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, /* 5x */ 92: 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, /* 6x */ 93: 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, /* 7x */ 94: 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, /* 8x */ 95: 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, /* 9x */ 96: 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, /* Ax */ 97: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Bx */ 98: 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, /* Cx */ 99: 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, /* Dx */ 100: 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, /* Ex */ 101: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, /* Fx */ 102: }; 103: #define IdChar(C) (((c=C)>=0x42 && sqlite3IsEbcdicIdChar[c-0x40])) 104: #endif 105: 106: 107: /* 108: ** Return the length of the token that begins at z[0]. 109: ** Store the token type in *tokenType before returning. 110: */ 111: int sqlite3GetToken(const unsigned char *z, int *tokenType){ 112: int i, c; 113: switch( *z ){ 114: case ' ': case '\t': case '\n': case '\f': case '\r': { 115: testcase( z[0]==' ' ); 116: testcase( z[0]=='\t' ); 117: testcase( z[0]=='\n' ); 118: testcase( z[0]=='\f' ); 119: testcase( z[0]=='\r' ); 120: for(i=1; sqlite3Isspace(z[i]); i++){} 121: *tokenType = TK_SPACE; 122: return i; 123: } 124: case '-': { 125: if( z[1]=='-' ){ 126: /* IMP: R-50417-27976 -- syntax diagram for comments */ 127: for(i=2; (c=z[i])!=0 && c!='\n'; i++){} 128: *tokenType = TK_SPACE; /* IMP: R-22934-25134 */ 129: return i; 130: } 131: *tokenType = TK_MINUS; 132: return 1; 133: } 134: case '(': { 135: *tokenType = TK_LP; 136: return 1; 137: } 138: case ')': { 139: *tokenType = TK_RP; 140: return 1; 141: } 142: case ';': { 143: *tokenType = TK_SEMI; 144: return 1; 145: } 146: case '+': { 147: *tokenType = TK_PLUS; 148: return 1; 149: } 150: case '*': { 151: *tokenType = TK_STAR; 152: return 1; 153: } 154: case '/': { 155: if( z[1]!='*' || z[2]==0 ){ 156: *tokenType = TK_SLASH; 157: return 1; 158: } 159: /* IMP: R-50417-27976 -- syntax diagram for comments */ 160: for(i=3, c=z[2]; (c!='*' || z[i]!='/') && (c=z[i])!=0; i++){} 161: if( c ) i++; 162: *tokenType = TK_SPACE; /* IMP: R-22934-25134 */ 163: return i; 164: } 165: case '%': { 166: *tokenType = TK_REM; 167: return 1; 168: } 169: case '=': { 170: *tokenType = TK_EQ; 171: return 1 + (z[1]=='='); 172: } 173: case '<': { 174: if( (c=z[1])=='=' ){ 175: *tokenType = TK_LE; 176: return 2; 177: }else if( c=='>' ){ 178: *tokenType = TK_NE; 179: return 2; 180: }else if( c=='<' ){ 181: *tokenType = TK_LSHIFT; 182: return 2; 183: }else{ 184: *tokenType = TK_LT; 185: return 1; 186: } 187: } 188: case '>': { 189: if( (c=z[1])=='=' ){ 190: *tokenType = TK_GE; 191: return 2; 192: }else if( c=='>' ){ 193: *tokenType = TK_RSHIFT; 194: return 2; 195: }else{ 196: *tokenType = TK_GT; 197: return 1; 198: } 199: } 200: case '!': { 201: if( z[1]!='=' ){ 202: *tokenType = TK_ILLEGAL; 203: return 2; 204: }else{ 205: *tokenType = TK_NE; 206: return 2; 207: } 208: } 209: case '|': { 210: if( z[1]!='|' ){ 211: *tokenType = TK_BITOR; 212: return 1; 213: }else{ 214: *tokenType = TK_CONCAT; 215: return 2; 216: } 217: } 218: case ',': { 219: *tokenType = TK_COMMA; 220: return 1; 221: } 222: case '&': { 223: *tokenType = TK_BITAND; 224: return 1; 225: } 226: case '~': { 227: *tokenType = TK_BITNOT; 228: return 1; 229: } 230: case '`': 231: case '\'': 232: case '"': { 233: int delim = z[0]; 234: testcase( delim=='`' ); 235: testcase( delim=='\'' ); 236: testcase( delim=='"' ); 237: for(i=1; (c=z[i])!=0; i++){ 238: if( c==delim ){ 239: if( z[i+1]==delim ){ 240: i++; 241: }else{ 242: break; 243: } 244: } 245: } 246: if( c=='\'' ){ 247: *tokenType = TK_STRING; 248: return i+1; 249: }else if( c!=0 ){ 250: *tokenType = TK_ID; 251: return i+1; 252: }else{ 253: *tokenType = TK_ILLEGAL; 254: return i; 255: } 256: } 257: case '.': { 258: #ifndef SQLITE_OMIT_FLOATING_POINT 259: if( !sqlite3Isdigit(z[1]) ) 260: #endif 261: { 262: *tokenType = TK_DOT; 263: return 1; 264: } 265: /* If the next character is a digit, this is a floating point 266: ** number that begins with ".". Fall thru into the next case */ 267: } 268: case '0': case '1': case '2': case '3': case '4': 269: case '5': case '6': case '7': case '8': case '9': { 270: testcase( z[0]=='0' ); testcase( z[0]=='1' ); testcase( z[0]=='2' ); 271: testcase( z[0]=='3' ); testcase( z[0]=='4' ); testcase( z[0]=='5' ); 272: testcase( z[0]=='6' ); testcase( z[0]=='7' ); testcase( z[0]=='8' ); 273: testcase( z[0]=='9' ); 274: *tokenType = TK_INTEGER; 275: for(i=0; sqlite3Isdigit(z[i]); i++){} 276: #ifndef SQLITE_OMIT_FLOATING_POINT 277: if( z[i]=='.' ){ 278: i++; 279: while( sqlite3Isdigit(z[i]) ){ i++; } 280: *tokenType = TK_FLOAT; 281: } 282: if( (z[i]=='e' || z[i]=='E') && 283: ( sqlite3Isdigit(z[i+1]) 284: || ((z[i+1]=='+' || z[i+1]=='-') && sqlite3Isdigit(z[i+2])) 285: ) 286: ){ 287: i += 2; 288: while( sqlite3Isdigit(z[i]) ){ i++; } 289: *tokenType = TK_FLOAT; 290: } 291: #endif 292: while( IdChar(z[i]) ){ 293: *tokenType = TK_ILLEGAL; 294: i++; 295: } 296: return i; 297: } 298: case '[': { 299: for(i=1, c=z[0]; c!=']' && (c=z[i])!=0; i++){} 300: *tokenType = c==']' ? TK_ID : TK_ILLEGAL; 301: return i; 302: } 303: case '?': { 304: *tokenType = TK_VARIABLE; 305: for(i=1; sqlite3Isdigit(z[i]); i++){} 306: return i; 307: } 308: case '#': { 309: for(i=1; sqlite3Isdigit(z[i]); i++){} 310: if( i>1 ){ 311: /* Parameters of the form #NNN (where NNN is a number) are used 312: ** internally by sqlite3NestedParse. */ 313: *tokenType = TK_REGISTER; 314: return i; 315: } 316: /* Fall through into the next case if the '#' is not followed by 317: ** a digit. Try to match #AAAA where AAAA is a parameter name. */ 318: } 319: #ifndef SQLITE_OMIT_TCL_VARIABLE 320: case '$': 321: #endif 322: case '@': /* For compatibility with MS SQL Server */ 323: case ':': { 324: int n = 0; 325: testcase( z[0]=='$' ); testcase( z[0]=='@' ); testcase( z[0]==':' ); 326: *tokenType = TK_VARIABLE; 327: for(i=1; (c=z[i])!=0; i++){ 328: if( IdChar(c) ){ 329: n++; 330: #ifndef SQLITE_OMIT_TCL_VARIABLE 331: }else if( c=='(' && n>0 ){ 332: do{ 333: i++; 334: }while( (c=z[i])!=0 && !sqlite3Isspace(c) && c!=')' ); 335: if( c==')' ){ 336: i++; 337: }else{ 338: *tokenType = TK_ILLEGAL; 339: } 340: break; 341: }else if( c==':' && z[i+1]==':' ){ 342: i++; 343: #endif 344: }else{ 345: break; 346: } 347: } 348: if( n==0 ) *tokenType = TK_ILLEGAL; 349: return i; 350: } 351: #ifndef SQLITE_OMIT_BLOB_LITERAL 352: case 'x': case 'X': { 353: testcase( z[0]=='x' ); testcase( z[0]=='X' ); 354: if( z[1]=='\'' ){ 355: *tokenType = TK_BLOB; 356: for(i=2; sqlite3Isxdigit(z[i]); i++){} 357: if( z[i]!='\'' || i%2 ){ 358: *tokenType = TK_ILLEGAL; 359: while( z[i] && z[i]!='\'' ){ i++; } 360: } 361: if( z[i] ) i++; 362: return i; 363: } 364: /* Otherwise fall through to the next case */ 365: } 366: #endif 367: default: { 368: if( !IdChar(*z) ){ 369: break; 370: } 371: for(i=1; IdChar(z[i]); i++){} 372: *tokenType = keywordCode((char*)z, i); 373: return i; 374: } 375: } 376: *tokenType = TK_ILLEGAL; 377: return 1; 378: } 379: 380: /* 381: ** Run the parser on the given SQL string. The parser structure is 382: ** passed in. An SQLITE_ status code is returned. If an error occurs 383: ** then an and attempt is made to write an error message into 384: ** memory obtained from sqlite3_malloc() and to make *pzErrMsg point to that 385: ** error message. 386: */ 387: int sqlite3RunParser(Parse *pParse, const char *zSql, char **pzErrMsg){ 388: int nErr = 0; /* Number of errors encountered */ 389: int i; /* Loop counter */ 390: void *pEngine; /* The LEMON-generated LALR(1) parser */ 391: int tokenType; /* type of the next token */ 392: int lastTokenParsed = -1; /* type of the previous token */ 393: u8 enableLookaside; /* Saved value of db->lookaside.bEnabled */ 394: sqlite3 *db = pParse->db; /* The database connection */ 395: int mxSqlLen; /* Max length of an SQL string */ 396: 397: 398: mxSqlLen = db->aLimit[SQLITE_LIMIT_SQL_LENGTH]; 399: if( db->activeVdbeCnt==0 ){ 400: db->u1.isInterrupted = 0; 401: } 402: pParse->rc = SQLITE_OK; 403: pParse->zTail = zSql; 404: i = 0; 405: assert( pzErrMsg!=0 ); 406: pEngine = sqlite3ParserAlloc((void*(*)(size_t))sqlite3Malloc); 407: if( pEngine==0 ){ 408: db->mallocFailed = 1; 409: return SQLITE_NOMEM; 410: } 411: assert( pParse->pNewTable==0 ); 412: assert( pParse->pNewTrigger==0 ); 413: assert( pParse->nVar==0 ); 414: assert( pParse->nzVar==0 ); 415: assert( pParse->azVar==0 ); 416: enableLookaside = db->lookaside.bEnabled; 417: if( db->lookaside.pStart ) db->lookaside.bEnabled = 1; 418: while( !db->mallocFailed && zSql[i]!=0 ){ 419: assert( i>=0 ); 420: pParse->sLastToken.z = &zSql[i]; 421: pParse->sLastToken.n = sqlite3GetToken((unsigned char*)&zSql[i],&tokenType); 422: i += pParse->sLastToken.n; 423: if( i>mxSqlLen ){ 424: pParse->rc = SQLITE_TOOBIG; 425: break; 426: } 427: switch( tokenType ){ 428: case TK_SPACE: { 429: if( db->u1.isInterrupted ){ 430: sqlite3ErrorMsg(pParse, "interrupt"); 431: pParse->rc = SQLITE_INTERRUPT; 432: goto abort_parse; 433: } 434: break; 435: } 436: case TK_ILLEGAL: { 437: sqlite3DbFree(db, *pzErrMsg); 438: *pzErrMsg = sqlite3MPrintf(db, "unrecognized token: \"%T\"", 439: &pParse->sLastToken); 440: nErr++; 441: goto abort_parse; 442: } 443: case TK_SEMI: { 444: pParse->zTail = &zSql[i]; 445: /* Fall thru into the default case */ 446: } 447: default: { 448: sqlite3Parser(pEngine, tokenType, pParse->sLastToken, pParse); 449: lastTokenParsed = tokenType; 450: if( pParse->rc!=SQLITE_OK ){ 451: goto abort_parse; 452: } 453: break; 454: } 455: } 456: } 457: abort_parse: 458: if( zSql[i]==0 && nErr==0 && pParse->rc==SQLITE_OK ){ 459: if( lastTokenParsed!=TK_SEMI ){ 460: sqlite3Parser(pEngine, TK_SEMI, pParse->sLastToken, pParse); 461: pParse->zTail = &zSql[i]; 462: } 463: sqlite3Parser(pEngine, 0, pParse->sLastToken, pParse); 464: } 465: #ifdef YYTRACKMAXSTACKDEPTH 466: sqlite3StatusSet(SQLITE_STATUS_PARSER_STACK, 467: sqlite3ParserStackPeak(pEngine) 468: ); 469: #endif /* YYDEBUG */ 470: sqlite3ParserFree(pEngine, sqlite3_free); 471: db->lookaside.bEnabled = enableLookaside; 472: if( db->mallocFailed ){ 473: pParse->rc = SQLITE_NOMEM; 474: } 475: if( pParse->rc!=SQLITE_OK && pParse->rc!=SQLITE_DONE && pParse->zErrMsg==0 ){ 476: sqlite3SetString(&pParse->zErrMsg, db, "%s", sqlite3ErrStr(pParse->rc)); 477: } 478: assert( pzErrMsg!=0 ); 479: if( pParse->zErrMsg ){ 480: *pzErrMsg = pParse->zErrMsg; 481: sqlite3_log(pParse->rc, "%s", *pzErrMsg); 482: pParse->zErrMsg = 0; 483: nErr++; 484: } 485: if( pParse->pVdbe && pParse->nErr>0 && pParse->nested==0 ){ 486: sqlite3VdbeDelete(pParse->pVdbe); 487: pParse->pVdbe = 0; 488: } 489: #ifndef SQLITE_OMIT_SHARED_CACHE 490: if( pParse->nested==0 ){ 491: sqlite3DbFree(db, pParse->aTableLock); 492: pParse->aTableLock = 0; 493: pParse->nTableLock = 0; 494: } 495: #endif 496: #ifndef SQLITE_OMIT_VIRTUALTABLE 497: sqlite3_free(pParse->apVtabLock); 498: #endif 499: 500: if( !IN_DECLARE_VTAB ){ 501: /* If the pParse->declareVtab flag is set, do not delete any table 502: ** structure built up in pParse->pNewTable. The calling code (see vtab.c) 503: ** will take responsibility for freeing the Table structure. 504: */ 505: sqlite3DeleteTable(db, pParse->pNewTable); 506: } 507: 508: sqlite3DeleteTrigger(db, pParse->pNewTrigger); 509: for(i=pParse->nzVar-1; i>=0; i--) sqlite3DbFree(db, pParse->azVar[i]); 510: sqlite3DbFree(db, pParse->azVar); 511: sqlite3DbFree(db, pParse->aAlias); 512: while( pParse->pAinc ){ 513: AutoincInfo *p = pParse->pAinc; 514: pParse->pAinc = p->pNext; 515: sqlite3DbFree(db, p); 516: } 517: while( pParse->pZombieTab ){ 518: Table *p = pParse->pZombieTab; 519: pParse->pZombieTab = p->pNextZombie; 520: sqlite3DeleteTable(db, p); 521: } 522: if( nErr>0 && pParse->rc==SQLITE_OK ){ 523: pParse->rc = SQLITE_ERROR; 524: } 525: return nErr; 526: }