File:  [ELWIX - Embedded LightWeight unIX -] / embedaddon / sqlite3 / src / tokenize.c
Revision 1.1.1.1 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Tue Feb 21 17:04:17 2012 UTC (12 years, 8 months ago) by misho
Branches: sqlite3, MAIN
CVS tags: v3_7_10, HEAD
sqlite3

    1: /*
    2: ** 2001 September 15
    3: **
    4: ** The author disclaims copyright to this source code.  In place of
    5: ** a legal notice, here is a blessing:
    6: **
    7: **    May you do good and not evil.
    8: **    May you find forgiveness for yourself and forgive others.
    9: **    May you share freely, never taking more than you give.
   10: **
   11: *************************************************************************
   12: ** An tokenizer for SQL
   13: **
   14: ** This file contains C code that splits an SQL input string up into
   15: ** individual tokens and sends those tokens one-by-one over to the
   16: ** parser for analysis.
   17: */
   18: #include "sqliteInt.h"
   19: #include <stdlib.h>
   20: 
   21: /*
   22: ** The charMap() macro maps alphabetic characters into their
   23: ** lower-case ASCII equivalent.  On ASCII machines, this is just
   24: ** an upper-to-lower case map.  On EBCDIC machines we also need
   25: ** to adjust the encoding.  Only alphabetic characters and underscores
   26: ** need to be translated.
   27: */
   28: #ifdef SQLITE_ASCII
   29: # define charMap(X) sqlite3UpperToLower[(unsigned char)X]
   30: #endif
   31: #ifdef SQLITE_EBCDIC
   32: # define charMap(X) ebcdicToAscii[(unsigned char)X]
   33: const unsigned char ebcdicToAscii[] = {
   34: /* 0   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F */
   35:    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  /* 0x */
   36:    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  /* 1x */
   37:    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  /* 2x */
   38:    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  /* 3x */
   39:    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  /* 4x */
   40:    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  /* 5x */
   41:    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 95,  0,  0,  /* 6x */
   42:    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  /* 7x */
   43:    0, 97, 98, 99,100,101,102,103,104,105,  0,  0,  0,  0,  0,  0,  /* 8x */
   44:    0,106,107,108,109,110,111,112,113,114,  0,  0,  0,  0,  0,  0,  /* 9x */
   45:    0,  0,115,116,117,118,119,120,121,122,  0,  0,  0,  0,  0,  0,  /* Ax */
   46:    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  /* Bx */
   47:    0, 97, 98, 99,100,101,102,103,104,105,  0,  0,  0,  0,  0,  0,  /* Cx */
   48:    0,106,107,108,109,110,111,112,113,114,  0,  0,  0,  0,  0,  0,  /* Dx */
   49:    0,  0,115,116,117,118,119,120,121,122,  0,  0,  0,  0,  0,  0,  /* Ex */
   50:    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  /* Fx */
   51: };
   52: #endif
   53: 
   54: /*
   55: ** The sqlite3KeywordCode function looks up an identifier to determine if
   56: ** it is a keyword.  If it is a keyword, the token code of that keyword is 
   57: ** returned.  If the input is not a keyword, TK_ID is returned.
   58: **
   59: ** The implementation of this routine was generated by a program,
   60: ** mkkeywordhash.h, located in the tool subdirectory of the distribution.
   61: ** The output of the mkkeywordhash.c program is written into a file
   62: ** named keywordhash.h and then included into this source file by
   63: ** the #include below.
   64: */
   65: #include "keywordhash.h"
   66: 
   67: 
   68: /*
   69: ** If X is a character that can be used in an identifier then
   70: ** IdChar(X) will be true.  Otherwise it is false.
   71: **
   72: ** For ASCII, any character with the high-order bit set is
   73: ** allowed in an identifier.  For 7-bit characters, 
   74: ** sqlite3IsIdChar[X] must be 1.
   75: **
   76: ** For EBCDIC, the rules are more complex but have the same
   77: ** end result.
   78: **
   79: ** Ticket #1066.  the SQL standard does not allow '$' in the
   80: ** middle of identfiers.  But many SQL implementations do. 
   81: ** SQLite will allow '$' in identifiers for compatibility.
   82: ** But the feature is undocumented.
   83: */
   84: #ifdef SQLITE_ASCII
   85: #define IdChar(C)  ((sqlite3CtypeMap[(unsigned char)C]&0x46)!=0)
   86: #endif
   87: #ifdef SQLITE_EBCDIC
   88: const char sqlite3IsEbcdicIdChar[] = {
   89: /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */
   90:     0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,  /* 4x */
   91:     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0,  /* 5x */
   92:     0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0,  /* 6x */
   93:     0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,  /* 7x */
   94:     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0,  /* 8x */
   95:     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0,  /* 9x */
   96:     1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0,  /* Ax */
   97:     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* Bx */
   98:     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1,  /* Cx */
   99:     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1,  /* Dx */
  100:     0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1,  /* Ex */
  101:     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0,  /* Fx */
  102: };
  103: #define IdChar(C)  (((c=C)>=0x42 && sqlite3IsEbcdicIdChar[c-0x40]))
  104: #endif
  105: 
  106: 
  107: /*
  108: ** Return the length of the token that begins at z[0]. 
  109: ** Store the token type in *tokenType before returning.
  110: */
  111: int sqlite3GetToken(const unsigned char *z, int *tokenType){
  112:   int i, c;
  113:   switch( *z ){
  114:     case ' ': case '\t': case '\n': case '\f': case '\r': {
  115:       testcase( z[0]==' ' );
  116:       testcase( z[0]=='\t' );
  117:       testcase( z[0]=='\n' );
  118:       testcase( z[0]=='\f' );
  119:       testcase( z[0]=='\r' );
  120:       for(i=1; sqlite3Isspace(z[i]); i++){}
  121:       *tokenType = TK_SPACE;
  122:       return i;
  123:     }
  124:     case '-': {
  125:       if( z[1]=='-' ){
  126:         /* IMP: R-50417-27976 -- syntax diagram for comments */
  127:         for(i=2; (c=z[i])!=0 && c!='\n'; i++){}
  128:         *tokenType = TK_SPACE;   /* IMP: R-22934-25134 */
  129:         return i;
  130:       }
  131:       *tokenType = TK_MINUS;
  132:       return 1;
  133:     }
  134:     case '(': {
  135:       *tokenType = TK_LP;
  136:       return 1;
  137:     }
  138:     case ')': {
  139:       *tokenType = TK_RP;
  140:       return 1;
  141:     }
  142:     case ';': {
  143:       *tokenType = TK_SEMI;
  144:       return 1;
  145:     }
  146:     case '+': {
  147:       *tokenType = TK_PLUS;
  148:       return 1;
  149:     }
  150:     case '*': {
  151:       *tokenType = TK_STAR;
  152:       return 1;
  153:     }
  154:     case '/': {
  155:       if( z[1]!='*' || z[2]==0 ){
  156:         *tokenType = TK_SLASH;
  157:         return 1;
  158:       }
  159:       /* IMP: R-50417-27976 -- syntax diagram for comments */
  160:       for(i=3, c=z[2]; (c!='*' || z[i]!='/') && (c=z[i])!=0; i++){}
  161:       if( c ) i++;
  162:       *tokenType = TK_SPACE;   /* IMP: R-22934-25134 */
  163:       return i;
  164:     }
  165:     case '%': {
  166:       *tokenType = TK_REM;
  167:       return 1;
  168:     }
  169:     case '=': {
  170:       *tokenType = TK_EQ;
  171:       return 1 + (z[1]=='=');
  172:     }
  173:     case '<': {
  174:       if( (c=z[1])=='=' ){
  175:         *tokenType = TK_LE;
  176:         return 2;
  177:       }else if( c=='>' ){
  178:         *tokenType = TK_NE;
  179:         return 2;
  180:       }else if( c=='<' ){
  181:         *tokenType = TK_LSHIFT;
  182:         return 2;
  183:       }else{
  184:         *tokenType = TK_LT;
  185:         return 1;
  186:       }
  187:     }
  188:     case '>': {
  189:       if( (c=z[1])=='=' ){
  190:         *tokenType = TK_GE;
  191:         return 2;
  192:       }else if( c=='>' ){
  193:         *tokenType = TK_RSHIFT;
  194:         return 2;
  195:       }else{
  196:         *tokenType = TK_GT;
  197:         return 1;
  198:       }
  199:     }
  200:     case '!': {
  201:       if( z[1]!='=' ){
  202:         *tokenType = TK_ILLEGAL;
  203:         return 2;
  204:       }else{
  205:         *tokenType = TK_NE;
  206:         return 2;
  207:       }
  208:     }
  209:     case '|': {
  210:       if( z[1]!='|' ){
  211:         *tokenType = TK_BITOR;
  212:         return 1;
  213:       }else{
  214:         *tokenType = TK_CONCAT;
  215:         return 2;
  216:       }
  217:     }
  218:     case ',': {
  219:       *tokenType = TK_COMMA;
  220:       return 1;
  221:     }
  222:     case '&': {
  223:       *tokenType = TK_BITAND;
  224:       return 1;
  225:     }
  226:     case '~': {
  227:       *tokenType = TK_BITNOT;
  228:       return 1;
  229:     }
  230:     case '`':
  231:     case '\'':
  232:     case '"': {
  233:       int delim = z[0];
  234:       testcase( delim=='`' );
  235:       testcase( delim=='\'' );
  236:       testcase( delim=='"' );
  237:       for(i=1; (c=z[i])!=0; i++){
  238:         if( c==delim ){
  239:           if( z[i+1]==delim ){
  240:             i++;
  241:           }else{
  242:             break;
  243:           }
  244:         }
  245:       }
  246:       if( c=='\'' ){
  247:         *tokenType = TK_STRING;
  248:         return i+1;
  249:       }else if( c!=0 ){
  250:         *tokenType = TK_ID;
  251:         return i+1;
  252:       }else{
  253:         *tokenType = TK_ILLEGAL;
  254:         return i;
  255:       }
  256:     }
  257:     case '.': {
  258: #ifndef SQLITE_OMIT_FLOATING_POINT
  259:       if( !sqlite3Isdigit(z[1]) )
  260: #endif
  261:       {
  262:         *tokenType = TK_DOT;
  263:         return 1;
  264:       }
  265:       /* If the next character is a digit, this is a floating point
  266:       ** number that begins with ".".  Fall thru into the next case */
  267:     }
  268:     case '0': case '1': case '2': case '3': case '4':
  269:     case '5': case '6': case '7': case '8': case '9': {
  270:       testcase( z[0]=='0' );  testcase( z[0]=='1' );  testcase( z[0]=='2' );
  271:       testcase( z[0]=='3' );  testcase( z[0]=='4' );  testcase( z[0]=='5' );
  272:       testcase( z[0]=='6' );  testcase( z[0]=='7' );  testcase( z[0]=='8' );
  273:       testcase( z[0]=='9' );
  274:       *tokenType = TK_INTEGER;
  275:       for(i=0; sqlite3Isdigit(z[i]); i++){}
  276: #ifndef SQLITE_OMIT_FLOATING_POINT
  277:       if( z[i]=='.' ){
  278:         i++;
  279:         while( sqlite3Isdigit(z[i]) ){ i++; }
  280:         *tokenType = TK_FLOAT;
  281:       }
  282:       if( (z[i]=='e' || z[i]=='E') &&
  283:            ( sqlite3Isdigit(z[i+1]) 
  284:             || ((z[i+1]=='+' || z[i+1]=='-') && sqlite3Isdigit(z[i+2]))
  285:            )
  286:       ){
  287:         i += 2;
  288:         while( sqlite3Isdigit(z[i]) ){ i++; }
  289:         *tokenType = TK_FLOAT;
  290:       }
  291: #endif
  292:       while( IdChar(z[i]) ){
  293:         *tokenType = TK_ILLEGAL;
  294:         i++;
  295:       }
  296:       return i;
  297:     }
  298:     case '[': {
  299:       for(i=1, c=z[0]; c!=']' && (c=z[i])!=0; i++){}
  300:       *tokenType = c==']' ? TK_ID : TK_ILLEGAL;
  301:       return i;
  302:     }
  303:     case '?': {
  304:       *tokenType = TK_VARIABLE;
  305:       for(i=1; sqlite3Isdigit(z[i]); i++){}
  306:       return i;
  307:     }
  308:     case '#': {
  309:       for(i=1; sqlite3Isdigit(z[i]); i++){}
  310:       if( i>1 ){
  311:         /* Parameters of the form #NNN (where NNN is a number) are used
  312:         ** internally by sqlite3NestedParse.  */
  313:         *tokenType = TK_REGISTER;
  314:         return i;
  315:       }
  316:       /* Fall through into the next case if the '#' is not followed by
  317:       ** a digit. Try to match #AAAA where AAAA is a parameter name. */
  318:     }
  319: #ifndef SQLITE_OMIT_TCL_VARIABLE
  320:     case '$':
  321: #endif
  322:     case '@':  /* For compatibility with MS SQL Server */
  323:     case ':': {
  324:       int n = 0;
  325:       testcase( z[0]=='$' );  testcase( z[0]=='@' );  testcase( z[0]==':' );
  326:       *tokenType = TK_VARIABLE;
  327:       for(i=1; (c=z[i])!=0; i++){
  328:         if( IdChar(c) ){
  329:           n++;
  330: #ifndef SQLITE_OMIT_TCL_VARIABLE
  331:         }else if( c=='(' && n>0 ){
  332:           do{
  333:             i++;
  334:           }while( (c=z[i])!=0 && !sqlite3Isspace(c) && c!=')' );
  335:           if( c==')' ){
  336:             i++;
  337:           }else{
  338:             *tokenType = TK_ILLEGAL;
  339:           }
  340:           break;
  341:         }else if( c==':' && z[i+1]==':' ){
  342:           i++;
  343: #endif
  344:         }else{
  345:           break;
  346:         }
  347:       }
  348:       if( n==0 ) *tokenType = TK_ILLEGAL;
  349:       return i;
  350:     }
  351: #ifndef SQLITE_OMIT_BLOB_LITERAL
  352:     case 'x': case 'X': {
  353:       testcase( z[0]=='x' ); testcase( z[0]=='X' );
  354:       if( z[1]=='\'' ){
  355:         *tokenType = TK_BLOB;
  356:         for(i=2; sqlite3Isxdigit(z[i]); i++){}
  357:         if( z[i]!='\'' || i%2 ){
  358:           *tokenType = TK_ILLEGAL;
  359:           while( z[i] && z[i]!='\'' ){ i++; }
  360:         }
  361:         if( z[i] ) i++;
  362:         return i;
  363:       }
  364:       /* Otherwise fall through to the next case */
  365:     }
  366: #endif
  367:     default: {
  368:       if( !IdChar(*z) ){
  369:         break;
  370:       }
  371:       for(i=1; IdChar(z[i]); i++){}
  372:       *tokenType = keywordCode((char*)z, i);
  373:       return i;
  374:     }
  375:   }
  376:   *tokenType = TK_ILLEGAL;
  377:   return 1;
  378: }
  379: 
  380: /*
  381: ** Run the parser on the given SQL string.  The parser structure is
  382: ** passed in.  An SQLITE_ status code is returned.  If an error occurs
  383: ** then an and attempt is made to write an error message into 
  384: ** memory obtained from sqlite3_malloc() and to make *pzErrMsg point to that
  385: ** error message.
  386: */
  387: int sqlite3RunParser(Parse *pParse, const char *zSql, char **pzErrMsg){
  388:   int nErr = 0;                   /* Number of errors encountered */
  389:   int i;                          /* Loop counter */
  390:   void *pEngine;                  /* The LEMON-generated LALR(1) parser */
  391:   int tokenType;                  /* type of the next token */
  392:   int lastTokenParsed = -1;       /* type of the previous token */
  393:   u8 enableLookaside;             /* Saved value of db->lookaside.bEnabled */
  394:   sqlite3 *db = pParse->db;       /* The database connection */
  395:   int mxSqlLen;                   /* Max length of an SQL string */
  396: 
  397: 
  398:   mxSqlLen = db->aLimit[SQLITE_LIMIT_SQL_LENGTH];
  399:   if( db->activeVdbeCnt==0 ){
  400:     db->u1.isInterrupted = 0;
  401:   }
  402:   pParse->rc = SQLITE_OK;
  403:   pParse->zTail = zSql;
  404:   i = 0;
  405:   assert( pzErrMsg!=0 );
  406:   pEngine = sqlite3ParserAlloc((void*(*)(size_t))sqlite3Malloc);
  407:   if( pEngine==0 ){
  408:     db->mallocFailed = 1;
  409:     return SQLITE_NOMEM;
  410:   }
  411:   assert( pParse->pNewTable==0 );
  412:   assert( pParse->pNewTrigger==0 );
  413:   assert( pParse->nVar==0 );
  414:   assert( pParse->nzVar==0 );
  415:   assert( pParse->azVar==0 );
  416:   enableLookaside = db->lookaside.bEnabled;
  417:   if( db->lookaside.pStart ) db->lookaside.bEnabled = 1;
  418:   while( !db->mallocFailed && zSql[i]!=0 ){
  419:     assert( i>=0 );
  420:     pParse->sLastToken.z = &zSql[i];
  421:     pParse->sLastToken.n = sqlite3GetToken((unsigned char*)&zSql[i],&tokenType);
  422:     i += pParse->sLastToken.n;
  423:     if( i>mxSqlLen ){
  424:       pParse->rc = SQLITE_TOOBIG;
  425:       break;
  426:     }
  427:     switch( tokenType ){
  428:       case TK_SPACE: {
  429:         if( db->u1.isInterrupted ){
  430:           sqlite3ErrorMsg(pParse, "interrupt");
  431:           pParse->rc = SQLITE_INTERRUPT;
  432:           goto abort_parse;
  433:         }
  434:         break;
  435:       }
  436:       case TK_ILLEGAL: {
  437:         sqlite3DbFree(db, *pzErrMsg);
  438:         *pzErrMsg = sqlite3MPrintf(db, "unrecognized token: \"%T\"",
  439:                         &pParse->sLastToken);
  440:         nErr++;
  441:         goto abort_parse;
  442:       }
  443:       case TK_SEMI: {
  444:         pParse->zTail = &zSql[i];
  445:         /* Fall thru into the default case */
  446:       }
  447:       default: {
  448:         sqlite3Parser(pEngine, tokenType, pParse->sLastToken, pParse);
  449:         lastTokenParsed = tokenType;
  450:         if( pParse->rc!=SQLITE_OK ){
  451:           goto abort_parse;
  452:         }
  453:         break;
  454:       }
  455:     }
  456:   }
  457: abort_parse:
  458:   if( zSql[i]==0 && nErr==0 && pParse->rc==SQLITE_OK ){
  459:     if( lastTokenParsed!=TK_SEMI ){
  460:       sqlite3Parser(pEngine, TK_SEMI, pParse->sLastToken, pParse);
  461:       pParse->zTail = &zSql[i];
  462:     }
  463:     sqlite3Parser(pEngine, 0, pParse->sLastToken, pParse);
  464:   }
  465: #ifdef YYTRACKMAXSTACKDEPTH
  466:   sqlite3StatusSet(SQLITE_STATUS_PARSER_STACK,
  467:       sqlite3ParserStackPeak(pEngine)
  468:   );
  469: #endif /* YYDEBUG */
  470:   sqlite3ParserFree(pEngine, sqlite3_free);
  471:   db->lookaside.bEnabled = enableLookaside;
  472:   if( db->mallocFailed ){
  473:     pParse->rc = SQLITE_NOMEM;
  474:   }
  475:   if( pParse->rc!=SQLITE_OK && pParse->rc!=SQLITE_DONE && pParse->zErrMsg==0 ){
  476:     sqlite3SetString(&pParse->zErrMsg, db, "%s", sqlite3ErrStr(pParse->rc));
  477:   }
  478:   assert( pzErrMsg!=0 );
  479:   if( pParse->zErrMsg ){
  480:     *pzErrMsg = pParse->zErrMsg;
  481:     sqlite3_log(pParse->rc, "%s", *pzErrMsg);
  482:     pParse->zErrMsg = 0;
  483:     nErr++;
  484:   }
  485:   if( pParse->pVdbe && pParse->nErr>0 && pParse->nested==0 ){
  486:     sqlite3VdbeDelete(pParse->pVdbe);
  487:     pParse->pVdbe = 0;
  488:   }
  489: #ifndef SQLITE_OMIT_SHARED_CACHE
  490:   if( pParse->nested==0 ){
  491:     sqlite3DbFree(db, pParse->aTableLock);
  492:     pParse->aTableLock = 0;
  493:     pParse->nTableLock = 0;
  494:   }
  495: #endif
  496: #ifndef SQLITE_OMIT_VIRTUALTABLE
  497:   sqlite3_free(pParse->apVtabLock);
  498: #endif
  499: 
  500:   if( !IN_DECLARE_VTAB ){
  501:     /* If the pParse->declareVtab flag is set, do not delete any table 
  502:     ** structure built up in pParse->pNewTable. The calling code (see vtab.c)
  503:     ** will take responsibility for freeing the Table structure.
  504:     */
  505:     sqlite3DeleteTable(db, pParse->pNewTable);
  506:   }
  507: 
  508:   sqlite3DeleteTrigger(db, pParse->pNewTrigger);
  509:   for(i=pParse->nzVar-1; i>=0; i--) sqlite3DbFree(db, pParse->azVar[i]);
  510:   sqlite3DbFree(db, pParse->azVar);
  511:   sqlite3DbFree(db, pParse->aAlias);
  512:   while( pParse->pAinc ){
  513:     AutoincInfo *p = pParse->pAinc;
  514:     pParse->pAinc = p->pNext;
  515:     sqlite3DbFree(db, p);
  516:   }
  517:   while( pParse->pZombieTab ){
  518:     Table *p = pParse->pZombieTab;
  519:     pParse->pZombieTab = p->pNextZombie;
  520:     sqlite3DeleteTable(db, p);
  521:   }
  522:   if( nErr>0 && pParse->rc==SQLITE_OK ){
  523:     pParse->rc = SQLITE_ERROR;
  524:   }
  525:   return nErr;
  526: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>