Annotation of embedaddon/sqlite3/src/vdbesort.c, revision 1.1
1.1 ! misho 1: /*
! 2: ** 2011 July 9
! 3: **
! 4: ** The author disclaims copyright to this source code. In place of
! 5: ** a legal notice, here is a blessing:
! 6: **
! 7: ** May you do good and not evil.
! 8: ** May you find forgiveness for yourself and forgive others.
! 9: ** May you share freely, never taking more than you give.
! 10: **
! 11: *************************************************************************
! 12: ** This file contains code for the VdbeSorter object, used in concert with
! 13: ** a VdbeCursor to sort large numbers of keys (as may be required, for
! 14: ** example, by CREATE INDEX statements on tables too large to fit in main
! 15: ** memory).
! 16: */
! 17:
! 18: #include "sqliteInt.h"
! 19: #include "vdbeInt.h"
! 20:
! 21: #ifndef SQLITE_OMIT_MERGE_SORT
! 22:
! 23: typedef struct VdbeSorterIter VdbeSorterIter;
! 24: typedef struct SorterRecord SorterRecord;
! 25:
! 26: /*
! 27: ** NOTES ON DATA STRUCTURE USED FOR N-WAY MERGES:
! 28: **
! 29: ** As keys are added to the sorter, they are written to disk in a series
! 30: ** of sorted packed-memory-arrays (PMAs). The size of each PMA is roughly
! 31: ** the same as the cache-size allowed for temporary databases. In order
! 32: ** to allow the caller to extract keys from the sorter in sorted order,
! 33: ** all PMAs currently stored on disk must be merged together. This comment
! 34: ** describes the data structure used to do so. The structure supports
! 35: ** merging any number of arrays in a single pass with no redundant comparison
! 36: ** operations.
! 37: **
! 38: ** The aIter[] array contains an iterator for each of the PMAs being merged.
! 39: ** An aIter[] iterator either points to a valid key or else is at EOF. For
! 40: ** the purposes of the paragraphs below, we assume that the array is actually
! 41: ** N elements in size, where N is the smallest power of 2 greater to or equal
! 42: ** to the number of iterators being merged. The extra aIter[] elements are
! 43: ** treated as if they are empty (always at EOF).
! 44: **
! 45: ** The aTree[] array is also N elements in size. The value of N is stored in
! 46: ** the VdbeSorter.nTree variable.
! 47: **
! 48: ** The final (N/2) elements of aTree[] contain the results of comparing
! 49: ** pairs of iterator keys together. Element i contains the result of
! 50: ** comparing aIter[2*i-N] and aIter[2*i-N+1]. Whichever key is smaller, the
! 51: ** aTree element is set to the index of it.
! 52: **
! 53: ** For the purposes of this comparison, EOF is considered greater than any
! 54: ** other key value. If the keys are equal (only possible with two EOF
! 55: ** values), it doesn't matter which index is stored.
! 56: **
! 57: ** The (N/4) elements of aTree[] that preceed the final (N/2) described
! 58: ** above contains the index of the smallest of each block of 4 iterators.
! 59: ** And so on. So that aTree[1] contains the index of the iterator that
! 60: ** currently points to the smallest key value. aTree[0] is unused.
! 61: **
! 62: ** Example:
! 63: **
! 64: ** aIter[0] -> Banana
! 65: ** aIter[1] -> Feijoa
! 66: ** aIter[2] -> Elderberry
! 67: ** aIter[3] -> Currant
! 68: ** aIter[4] -> Grapefruit
! 69: ** aIter[5] -> Apple
! 70: ** aIter[6] -> Durian
! 71: ** aIter[7] -> EOF
! 72: **
! 73: ** aTree[] = { X, 5 0, 5 0, 3, 5, 6 }
! 74: **
! 75: ** The current element is "Apple" (the value of the key indicated by
! 76: ** iterator 5). When the Next() operation is invoked, iterator 5 will
! 77: ** be advanced to the next key in its segment. Say the next key is
! 78: ** "Eggplant":
! 79: **
! 80: ** aIter[5] -> Eggplant
! 81: **
! 82: ** The contents of aTree[] are updated first by comparing the new iterator
! 83: ** 5 key to the current key of iterator 4 (still "Grapefruit"). The iterator
! 84: ** 5 value is still smaller, so aTree[6] is set to 5. And so on up the tree.
! 85: ** The value of iterator 6 - "Durian" - is now smaller than that of iterator
! 86: ** 5, so aTree[3] is set to 6. Key 0 is smaller than key 6 (Banana<Durian),
! 87: ** so the value written into element 1 of the array is 0. As follows:
! 88: **
! 89: ** aTree[] = { X, 0 0, 6 0, 3, 5, 6 }
! 90: **
! 91: ** In other words, each time we advance to the next sorter element, log2(N)
! 92: ** key comparison operations are required, where N is the number of segments
! 93: ** being merged (rounded up to the next power of 2).
! 94: */
! 95: struct VdbeSorter {
! 96: int nInMemory; /* Current size of pRecord list as PMA */
! 97: int nTree; /* Used size of aTree/aIter (power of 2) */
! 98: VdbeSorterIter *aIter; /* Array of iterators to merge */
! 99: int *aTree; /* Current state of incremental merge */
! 100: i64 iWriteOff; /* Current write offset within file pTemp1 */
! 101: i64 iReadOff; /* Current read offset within file pTemp1 */
! 102: sqlite3_file *pTemp1; /* PMA file 1 */
! 103: int nPMA; /* Number of PMAs stored in pTemp1 */
! 104: SorterRecord *pRecord; /* Head of in-memory record list */
! 105: int mnPmaSize; /* Minimum PMA size, in bytes */
! 106: int mxPmaSize; /* Maximum PMA size, in bytes. 0==no limit */
! 107: UnpackedRecord *pUnpacked; /* Used to unpack keys */
! 108: };
! 109:
! 110: /*
! 111: ** The following type is an iterator for a PMA. It caches the current key in
! 112: ** variables nKey/aKey. If the iterator is at EOF, pFile==0.
! 113: */
! 114: struct VdbeSorterIter {
! 115: i64 iReadOff; /* Current read offset */
! 116: i64 iEof; /* 1 byte past EOF for this iterator */
! 117: sqlite3_file *pFile; /* File iterator is reading from */
! 118: int nAlloc; /* Bytes of space at aAlloc */
! 119: u8 *aAlloc; /* Allocated space */
! 120: int nKey; /* Number of bytes in key */
! 121: u8 *aKey; /* Pointer to current key */
! 122: };
! 123:
! 124: /*
! 125: ** A structure to store a single record. All in-memory records are connected
! 126: ** together into a linked list headed at VdbeSorter.pRecord using the
! 127: ** SorterRecord.pNext pointer.
! 128: */
! 129: struct SorterRecord {
! 130: void *pVal;
! 131: int nVal;
! 132: SorterRecord *pNext;
! 133: };
! 134:
! 135: /* Minimum allowable value for the VdbeSorter.nWorking variable */
! 136: #define SORTER_MIN_WORKING 10
! 137:
! 138: /* Maximum number of segments to merge in a single pass. */
! 139: #define SORTER_MAX_MERGE_COUNT 16
! 140:
! 141: /*
! 142: ** Free all memory belonging to the VdbeSorterIter object passed as the second
! 143: ** argument. All structure fields are set to zero before returning.
! 144: */
! 145: static void vdbeSorterIterZero(sqlite3 *db, VdbeSorterIter *pIter){
! 146: sqlite3DbFree(db, pIter->aAlloc);
! 147: memset(pIter, 0, sizeof(VdbeSorterIter));
! 148: }
! 149:
! 150: /*
! 151: ** Advance iterator pIter to the next key in its PMA. Return SQLITE_OK if
! 152: ** no error occurs, or an SQLite error code if one does.
! 153: */
! 154: static int vdbeSorterIterNext(
! 155: sqlite3 *db, /* Database handle (for sqlite3DbMalloc() ) */
! 156: VdbeSorterIter *pIter /* Iterator to advance */
! 157: ){
! 158: int rc; /* Return Code */
! 159: int nRead; /* Number of bytes read */
! 160: int nRec = 0; /* Size of record in bytes */
! 161: int iOff = 0; /* Size of serialized size varint in bytes */
! 162:
! 163: assert( pIter->iEof>=pIter->iReadOff );
! 164: if( pIter->iEof-pIter->iReadOff>5 ){
! 165: nRead = 5;
! 166: }else{
! 167: nRead = (int)(pIter->iEof - pIter->iReadOff);
! 168: }
! 169: if( nRead<=0 ){
! 170: /* This is an EOF condition */
! 171: vdbeSorterIterZero(db, pIter);
! 172: return SQLITE_OK;
! 173: }
! 174:
! 175: rc = sqlite3OsRead(pIter->pFile, pIter->aAlloc, nRead, pIter->iReadOff);
! 176: if( rc==SQLITE_OK ){
! 177: iOff = getVarint32(pIter->aAlloc, nRec);
! 178: if( (iOff+nRec)>nRead ){
! 179: int nRead2; /* Number of extra bytes to read */
! 180: if( (iOff+nRec)>pIter->nAlloc ){
! 181: int nNew = pIter->nAlloc*2;
! 182: while( (iOff+nRec)>nNew ) nNew = nNew*2;
! 183: pIter->aAlloc = sqlite3DbReallocOrFree(db, pIter->aAlloc, nNew);
! 184: if( !pIter->aAlloc ) return SQLITE_NOMEM;
! 185: pIter->nAlloc = nNew;
! 186: }
! 187:
! 188: nRead2 = iOff + nRec - nRead;
! 189: rc = sqlite3OsRead(
! 190: pIter->pFile, &pIter->aAlloc[nRead], nRead2, pIter->iReadOff+nRead
! 191: );
! 192: }
! 193: }
! 194:
! 195: assert( rc!=SQLITE_OK || nRec>0 );
! 196: pIter->iReadOff += iOff+nRec;
! 197: pIter->nKey = nRec;
! 198: pIter->aKey = &pIter->aAlloc[iOff];
! 199: return rc;
! 200: }
! 201:
! 202: /*
! 203: ** Write a single varint, value iVal, to file-descriptor pFile. Return
! 204: ** SQLITE_OK if successful, or an SQLite error code if some error occurs.
! 205: **
! 206: ** The value of *piOffset when this function is called is used as the byte
! 207: ** offset in file pFile to write to. Before returning, *piOffset is
! 208: ** incremented by the number of bytes written.
! 209: */
! 210: static int vdbeSorterWriteVarint(
! 211: sqlite3_file *pFile, /* File to write to */
! 212: i64 iVal, /* Value to write as a varint */
! 213: i64 *piOffset /* IN/OUT: Write offset in file pFile */
! 214: ){
! 215: u8 aVarint[9]; /* Buffer large enough for a varint */
! 216: int nVarint; /* Number of used bytes in varint */
! 217: int rc; /* Result of write() call */
! 218:
! 219: nVarint = sqlite3PutVarint(aVarint, iVal);
! 220: rc = sqlite3OsWrite(pFile, aVarint, nVarint, *piOffset);
! 221: *piOffset += nVarint;
! 222:
! 223: return rc;
! 224: }
! 225:
! 226: /*
! 227: ** Read a single varint from file-descriptor pFile. Return SQLITE_OK if
! 228: ** successful, or an SQLite error code if some error occurs.
! 229: **
! 230: ** The value of *piOffset when this function is called is used as the
! 231: ** byte offset in file pFile from whence to read the varint. If successful
! 232: ** (i.e. if no IO error occurs), then *piOffset is set to the offset of
! 233: ** the first byte past the end of the varint before returning. *piVal is
! 234: ** set to the integer value read. If an error occurs, the final values of
! 235: ** both *piOffset and *piVal are undefined.
! 236: */
! 237: static int vdbeSorterReadVarint(
! 238: sqlite3_file *pFile, /* File to read from */
! 239: i64 *piOffset, /* IN/OUT: Read offset in pFile */
! 240: i64 *piVal /* OUT: Value read from file */
! 241: ){
! 242: u8 aVarint[9]; /* Buffer large enough for a varint */
! 243: i64 iOff = *piOffset; /* Offset in file to read from */
! 244: int rc; /* Return code */
! 245:
! 246: rc = sqlite3OsRead(pFile, aVarint, 9, iOff);
! 247: if( rc==SQLITE_OK ){
! 248: *piOffset += getVarint(aVarint, (u64 *)piVal);
! 249: }
! 250:
! 251: return rc;
! 252: }
! 253:
! 254: /*
! 255: ** Initialize iterator pIter to scan through the PMA stored in file pFile
! 256: ** starting at offset iStart and ending at offset iEof-1. This function
! 257: ** leaves the iterator pointing to the first key in the PMA (or EOF if the
! 258: ** PMA is empty).
! 259: */
! 260: static int vdbeSorterIterInit(
! 261: sqlite3 *db, /* Database handle */
! 262: VdbeSorter *pSorter, /* Sorter object */
! 263: i64 iStart, /* Start offset in pFile */
! 264: VdbeSorterIter *pIter, /* Iterator to populate */
! 265: i64 *pnByte /* IN/OUT: Increment this value by PMA size */
! 266: ){
! 267: int rc;
! 268:
! 269: assert( pSorter->iWriteOff>iStart );
! 270: assert( pIter->aAlloc==0 );
! 271: pIter->pFile = pSorter->pTemp1;
! 272: pIter->iReadOff = iStart;
! 273: pIter->nAlloc = 128;
! 274: pIter->aAlloc = (u8 *)sqlite3DbMallocRaw(db, pIter->nAlloc);
! 275: if( !pIter->aAlloc ){
! 276: rc = SQLITE_NOMEM;
! 277: }else{
! 278: i64 nByte; /* Total size of PMA in bytes */
! 279: rc = vdbeSorterReadVarint(pSorter->pTemp1, &pIter->iReadOff, &nByte);
! 280: *pnByte += nByte;
! 281: pIter->iEof = pIter->iReadOff + nByte;
! 282: }
! 283: if( rc==SQLITE_OK ){
! 284: rc = vdbeSorterIterNext(db, pIter);
! 285: }
! 286: return rc;
! 287: }
! 288:
! 289:
! 290: /*
! 291: ** Compare key1 (buffer pKey1, size nKey1 bytes) with key2 (buffer pKey2,
! 292: ** size nKey2 bytes). Argument pKeyInfo supplies the collation functions
! 293: ** used by the comparison. If an error occurs, return an SQLite error code.
! 294: ** Otherwise, return SQLITE_OK and set *pRes to a negative, zero or positive
! 295: ** value, depending on whether key1 is smaller, equal to or larger than key2.
! 296: **
! 297: ** If the bOmitRowid argument is non-zero, assume both keys end in a rowid
! 298: ** field. For the purposes of the comparison, ignore it. Also, if bOmitRowid
! 299: ** is true and key1 contains even a single NULL value, it is considered to
! 300: ** be less than key2. Even if key2 also contains NULL values.
! 301: **
! 302: ** If pKey2 is passed a NULL pointer, then it is assumed that the pCsr->aSpace
! 303: ** has been allocated and contains an unpacked record that is used as key2.
! 304: */
! 305: static void vdbeSorterCompare(
! 306: VdbeCursor *pCsr, /* Cursor object (for pKeyInfo) */
! 307: int bOmitRowid, /* Ignore rowid field at end of keys */
! 308: void *pKey1, int nKey1, /* Left side of comparison */
! 309: void *pKey2, int nKey2, /* Right side of comparison */
! 310: int *pRes /* OUT: Result of comparison */
! 311: ){
! 312: KeyInfo *pKeyInfo = pCsr->pKeyInfo;
! 313: VdbeSorter *pSorter = pCsr->pSorter;
! 314: UnpackedRecord *r2 = pSorter->pUnpacked;
! 315: int i;
! 316:
! 317: if( pKey2 ){
! 318: sqlite3VdbeRecordUnpack(pKeyInfo, nKey2, pKey2, r2);
! 319: }
! 320:
! 321: if( bOmitRowid ){
! 322: r2->nField = pKeyInfo->nField;
! 323: assert( r2->nField>0 );
! 324: for(i=0; i<r2->nField; i++){
! 325: if( r2->aMem[i].flags & MEM_Null ){
! 326: *pRes = -1;
! 327: return;
! 328: }
! 329: }
! 330: r2->flags |= UNPACKED_PREFIX_MATCH;
! 331: }
! 332:
! 333: *pRes = sqlite3VdbeRecordCompare(nKey1, pKey1, r2);
! 334: }
! 335:
! 336: /*
! 337: ** This function is called to compare two iterator keys when merging
! 338: ** multiple b-tree segments. Parameter iOut is the index of the aTree[]
! 339: ** value to recalculate.
! 340: */
! 341: static int vdbeSorterDoCompare(VdbeCursor *pCsr, int iOut){
! 342: VdbeSorter *pSorter = pCsr->pSorter;
! 343: int i1;
! 344: int i2;
! 345: int iRes;
! 346: VdbeSorterIter *p1;
! 347: VdbeSorterIter *p2;
! 348:
! 349: assert( iOut<pSorter->nTree && iOut>0 );
! 350:
! 351: if( iOut>=(pSorter->nTree/2) ){
! 352: i1 = (iOut - pSorter->nTree/2) * 2;
! 353: i2 = i1 + 1;
! 354: }else{
! 355: i1 = pSorter->aTree[iOut*2];
! 356: i2 = pSorter->aTree[iOut*2+1];
! 357: }
! 358:
! 359: p1 = &pSorter->aIter[i1];
! 360: p2 = &pSorter->aIter[i2];
! 361:
! 362: if( p1->pFile==0 ){
! 363: iRes = i2;
! 364: }else if( p2->pFile==0 ){
! 365: iRes = i1;
! 366: }else{
! 367: int res;
! 368: assert( pCsr->pSorter->pUnpacked!=0 ); /* allocated in vdbeSorterMerge() */
! 369: vdbeSorterCompare(
! 370: pCsr, 0, p1->aKey, p1->nKey, p2->aKey, p2->nKey, &res
! 371: );
! 372: if( res<=0 ){
! 373: iRes = i1;
! 374: }else{
! 375: iRes = i2;
! 376: }
! 377: }
! 378:
! 379: pSorter->aTree[iOut] = iRes;
! 380: return SQLITE_OK;
! 381: }
! 382:
! 383: /*
! 384: ** Initialize the temporary index cursor just opened as a sorter cursor.
! 385: */
! 386: int sqlite3VdbeSorterInit(sqlite3 *db, VdbeCursor *pCsr){
! 387: int pgsz; /* Page size of main database */
! 388: int mxCache; /* Cache size */
! 389: VdbeSorter *pSorter; /* The new sorter */
! 390: char *d; /* Dummy */
! 391:
! 392: assert( pCsr->pKeyInfo && pCsr->pBt==0 );
! 393: pCsr->pSorter = pSorter = sqlite3DbMallocZero(db, sizeof(VdbeSorter));
! 394: if( pSorter==0 ){
! 395: return SQLITE_NOMEM;
! 396: }
! 397:
! 398: pSorter->pUnpacked = sqlite3VdbeAllocUnpackedRecord(pCsr->pKeyInfo, 0, 0, &d);
! 399: if( pSorter->pUnpacked==0 ) return SQLITE_NOMEM;
! 400: assert( pSorter->pUnpacked==(UnpackedRecord *)d );
! 401:
! 402: if( !sqlite3TempInMemory(db) ){
! 403: pgsz = sqlite3BtreeGetPageSize(db->aDb[0].pBt);
! 404: pSorter->mnPmaSize = SORTER_MIN_WORKING * pgsz;
! 405: mxCache = db->aDb[0].pSchema->cache_size;
! 406: if( mxCache<SORTER_MIN_WORKING ) mxCache = SORTER_MIN_WORKING;
! 407: pSorter->mxPmaSize = mxCache * pgsz;
! 408: }
! 409:
! 410: return SQLITE_OK;
! 411: }
! 412:
! 413: /*
! 414: ** Free the list of sorted records starting at pRecord.
! 415: */
! 416: static void vdbeSorterRecordFree(sqlite3 *db, SorterRecord *pRecord){
! 417: SorterRecord *p;
! 418: SorterRecord *pNext;
! 419: for(p=pRecord; p; p=pNext){
! 420: pNext = p->pNext;
! 421: sqlite3DbFree(db, p);
! 422: }
! 423: }
! 424:
! 425: /*
! 426: ** Free any cursor components allocated by sqlite3VdbeSorterXXX routines.
! 427: */
! 428: void sqlite3VdbeSorterClose(sqlite3 *db, VdbeCursor *pCsr){
! 429: VdbeSorter *pSorter = pCsr->pSorter;
! 430: if( pSorter ){
! 431: if( pSorter->aIter ){
! 432: int i;
! 433: for(i=0; i<pSorter->nTree; i++){
! 434: vdbeSorterIterZero(db, &pSorter->aIter[i]);
! 435: }
! 436: sqlite3DbFree(db, pSorter->aIter);
! 437: }
! 438: if( pSorter->pTemp1 ){
! 439: sqlite3OsCloseFree(pSorter->pTemp1);
! 440: }
! 441: vdbeSorterRecordFree(db, pSorter->pRecord);
! 442: sqlite3DbFree(db, pSorter->pUnpacked);
! 443: sqlite3DbFree(db, pSorter);
! 444: pCsr->pSorter = 0;
! 445: }
! 446: }
! 447:
! 448: /*
! 449: ** Allocate space for a file-handle and open a temporary file. If successful,
! 450: ** set *ppFile to point to the malloc'd file-handle and return SQLITE_OK.
! 451: ** Otherwise, set *ppFile to 0 and return an SQLite error code.
! 452: */
! 453: static int vdbeSorterOpenTempFile(sqlite3 *db, sqlite3_file **ppFile){
! 454: int dummy;
! 455: return sqlite3OsOpenMalloc(db->pVfs, 0, ppFile,
! 456: SQLITE_OPEN_TEMP_JOURNAL |
! 457: SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE |
! 458: SQLITE_OPEN_EXCLUSIVE | SQLITE_OPEN_DELETEONCLOSE, &dummy
! 459: );
! 460: }
! 461:
! 462: /*
! 463: ** Merge the two sorted lists p1 and p2 into a single list.
! 464: ** Set *ppOut to the head of the new list.
! 465: */
! 466: static void vdbeSorterMerge(
! 467: VdbeCursor *pCsr, /* For pKeyInfo */
! 468: SorterRecord *p1, /* First list to merge */
! 469: SorterRecord *p2, /* Second list to merge */
! 470: SorterRecord **ppOut /* OUT: Head of merged list */
! 471: ){
! 472: SorterRecord *pFinal = 0;
! 473: SorterRecord **pp = &pFinal;
! 474: void *pVal2 = p2 ? p2->pVal : 0;
! 475:
! 476: while( p1 && p2 ){
! 477: int res;
! 478: vdbeSorterCompare(pCsr, 0, p1->pVal, p1->nVal, pVal2, p2->nVal, &res);
! 479: if( res<=0 ){
! 480: *pp = p1;
! 481: pp = &p1->pNext;
! 482: p1 = p1->pNext;
! 483: pVal2 = 0;
! 484: }else{
! 485: *pp = p2;
! 486: pp = &p2->pNext;
! 487: p2 = p2->pNext;
! 488: if( p2==0 ) break;
! 489: pVal2 = p2->pVal;
! 490: }
! 491: }
! 492: *pp = p1 ? p1 : p2;
! 493: *ppOut = pFinal;
! 494: }
! 495:
! 496: /*
! 497: ** Sort the linked list of records headed at pCsr->pRecord. Return SQLITE_OK
! 498: ** if successful, or an SQLite error code (i.e. SQLITE_NOMEM) if an error
! 499: ** occurs.
! 500: */
! 501: static int vdbeSorterSort(VdbeCursor *pCsr){
! 502: int i;
! 503: SorterRecord **aSlot;
! 504: SorterRecord *p;
! 505: VdbeSorter *pSorter = pCsr->pSorter;
! 506:
! 507: aSlot = (SorterRecord **)sqlite3MallocZero(64 * sizeof(SorterRecord *));
! 508: if( !aSlot ){
! 509: return SQLITE_NOMEM;
! 510: }
! 511:
! 512: p = pSorter->pRecord;
! 513: while( p ){
! 514: SorterRecord *pNext = p->pNext;
! 515: p->pNext = 0;
! 516: for(i=0; aSlot[i]; i++){
! 517: vdbeSorterMerge(pCsr, p, aSlot[i], &p);
! 518: aSlot[i] = 0;
! 519: }
! 520: aSlot[i] = p;
! 521: p = pNext;
! 522: }
! 523:
! 524: p = 0;
! 525: for(i=0; i<64; i++){
! 526: vdbeSorterMerge(pCsr, p, aSlot[i], &p);
! 527: }
! 528: pSorter->pRecord = p;
! 529:
! 530: sqlite3_free(aSlot);
! 531: return SQLITE_OK;
! 532: }
! 533:
! 534:
! 535: /*
! 536: ** Write the current contents of the in-memory linked-list to a PMA. Return
! 537: ** SQLITE_OK if successful, or an SQLite error code otherwise.
! 538: **
! 539: ** The format of a PMA is:
! 540: **
! 541: ** * A varint. This varint contains the total number of bytes of content
! 542: ** in the PMA (not including the varint itself).
! 543: **
! 544: ** * One or more records packed end-to-end in order of ascending keys.
! 545: ** Each record consists of a varint followed by a blob of data (the
! 546: ** key). The varint is the number of bytes in the blob of data.
! 547: */
! 548: static int vdbeSorterListToPMA(sqlite3 *db, VdbeCursor *pCsr){
! 549: int rc = SQLITE_OK; /* Return code */
! 550: VdbeSorter *pSorter = pCsr->pSorter;
! 551:
! 552: if( pSorter->nInMemory==0 ){
! 553: assert( pSorter->pRecord==0 );
! 554: return rc;
! 555: }
! 556:
! 557: rc = vdbeSorterSort(pCsr);
! 558:
! 559: /* If the first temporary PMA file has not been opened, open it now. */
! 560: if( rc==SQLITE_OK && pSorter->pTemp1==0 ){
! 561: rc = vdbeSorterOpenTempFile(db, &pSorter->pTemp1);
! 562: assert( rc!=SQLITE_OK || pSorter->pTemp1 );
! 563: assert( pSorter->iWriteOff==0 );
! 564: assert( pSorter->nPMA==0 );
! 565: }
! 566:
! 567: if( rc==SQLITE_OK ){
! 568: i64 iOff = pSorter->iWriteOff;
! 569: SorterRecord *p;
! 570: SorterRecord *pNext = 0;
! 571: static const char eightZeros[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
! 572:
! 573: pSorter->nPMA++;
! 574: rc = vdbeSorterWriteVarint(pSorter->pTemp1, pSorter->nInMemory, &iOff);
! 575: for(p=pSorter->pRecord; rc==SQLITE_OK && p; p=pNext){
! 576: pNext = p->pNext;
! 577: rc = vdbeSorterWriteVarint(pSorter->pTemp1, p->nVal, &iOff);
! 578:
! 579: if( rc==SQLITE_OK ){
! 580: rc = sqlite3OsWrite(pSorter->pTemp1, p->pVal, p->nVal, iOff);
! 581: iOff += p->nVal;
! 582: }
! 583:
! 584: sqlite3DbFree(db, p);
! 585: }
! 586:
! 587: /* This assert verifies that unless an error has occurred, the size of
! 588: ** the PMA on disk is the same as the expected size stored in
! 589: ** pSorter->nInMemory. */
! 590: assert( rc!=SQLITE_OK || pSorter->nInMemory==(
! 591: iOff-pSorter->iWriteOff-sqlite3VarintLen(pSorter->nInMemory)
! 592: ));
! 593:
! 594: pSorter->iWriteOff = iOff;
! 595: if( rc==SQLITE_OK ){
! 596: /* Terminate each file with 8 extra bytes so that from any offset
! 597: ** in the file we can always read 9 bytes without a SHORT_READ error */
! 598: rc = sqlite3OsWrite(pSorter->pTemp1, eightZeros, 8, iOff);
! 599: }
! 600: pSorter->pRecord = p;
! 601: }
! 602:
! 603: return rc;
! 604: }
! 605:
! 606: /*
! 607: ** Add a record to the sorter.
! 608: */
! 609: int sqlite3VdbeSorterWrite(
! 610: sqlite3 *db, /* Database handle */
! 611: VdbeCursor *pCsr, /* Sorter cursor */
! 612: Mem *pVal /* Memory cell containing record */
! 613: ){
! 614: VdbeSorter *pSorter = pCsr->pSorter;
! 615: int rc = SQLITE_OK; /* Return Code */
! 616: SorterRecord *pNew; /* New list element */
! 617:
! 618: assert( pSorter );
! 619: pSorter->nInMemory += sqlite3VarintLen(pVal->n) + pVal->n;
! 620:
! 621: pNew = (SorterRecord *)sqlite3DbMallocRaw(db, pVal->n + sizeof(SorterRecord));
! 622: if( pNew==0 ){
! 623: rc = SQLITE_NOMEM;
! 624: }else{
! 625: pNew->pVal = (void *)&pNew[1];
! 626: memcpy(pNew->pVal, pVal->z, pVal->n);
! 627: pNew->nVal = pVal->n;
! 628: pNew->pNext = pSorter->pRecord;
! 629: pSorter->pRecord = pNew;
! 630: }
! 631:
! 632: /* See if the contents of the sorter should now be written out. They
! 633: ** are written out when either of the following are true:
! 634: **
! 635: ** * The total memory allocated for the in-memory list is greater
! 636: ** than (page-size * cache-size), or
! 637: **
! 638: ** * The total memory allocated for the in-memory list is greater
! 639: ** than (page-size * 10) and sqlite3HeapNearlyFull() returns true.
! 640: */
! 641: if( rc==SQLITE_OK && pSorter->mxPmaSize>0 && (
! 642: (pSorter->nInMemory>pSorter->mxPmaSize)
! 643: || (pSorter->nInMemory>pSorter->mnPmaSize && sqlite3HeapNearlyFull())
! 644: )){
! 645: rc = vdbeSorterListToPMA(db, pCsr);
! 646: pSorter->nInMemory = 0;
! 647: }
! 648:
! 649: return rc;
! 650: }
! 651:
! 652: /*
! 653: ** Helper function for sqlite3VdbeSorterRewind().
! 654: */
! 655: static int vdbeSorterInitMerge(
! 656: sqlite3 *db, /* Database handle */
! 657: VdbeCursor *pCsr, /* Cursor handle for this sorter */
! 658: i64 *pnByte /* Sum of bytes in all opened PMAs */
! 659: ){
! 660: VdbeSorter *pSorter = pCsr->pSorter;
! 661: int rc = SQLITE_OK; /* Return code */
! 662: int i; /* Used to iterator through aIter[] */
! 663: i64 nByte = 0; /* Total bytes in all opened PMAs */
! 664:
! 665: /* Initialize the iterators. */
! 666: for(i=0; i<SORTER_MAX_MERGE_COUNT; i++){
! 667: VdbeSorterIter *pIter = &pSorter->aIter[i];
! 668: rc = vdbeSorterIterInit(db, pSorter, pSorter->iReadOff, pIter, &nByte);
! 669: pSorter->iReadOff = pIter->iEof;
! 670: assert( rc!=SQLITE_OK || pSorter->iReadOff<=pSorter->iWriteOff );
! 671: if( rc!=SQLITE_OK || pSorter->iReadOff>=pSorter->iWriteOff ) break;
! 672: }
! 673:
! 674: /* Initialize the aTree[] array. */
! 675: for(i=pSorter->nTree-1; rc==SQLITE_OK && i>0; i--){
! 676: rc = vdbeSorterDoCompare(pCsr, i);
! 677: }
! 678:
! 679: *pnByte = nByte;
! 680: return rc;
! 681: }
! 682:
! 683: /*
! 684: ** Once the sorter has been populated, this function is called to prepare
! 685: ** for iterating through its contents in sorted order.
! 686: */
! 687: int sqlite3VdbeSorterRewind(sqlite3 *db, VdbeCursor *pCsr, int *pbEof){
! 688: VdbeSorter *pSorter = pCsr->pSorter;
! 689: int rc; /* Return code */
! 690: sqlite3_file *pTemp2 = 0; /* Second temp file to use */
! 691: i64 iWrite2 = 0; /* Write offset for pTemp2 */
! 692: int nIter; /* Number of iterators used */
! 693: int nByte; /* Bytes of space required for aIter/aTree */
! 694: int N = 2; /* Power of 2 >= nIter */
! 695:
! 696: assert( pSorter );
! 697:
! 698: /* If no data has been written to disk, then do not do so now. Instead,
! 699: ** sort the VdbeSorter.pRecord list. The vdbe layer will read data directly
! 700: ** from the in-memory list. */
! 701: if( pSorter->nPMA==0 ){
! 702: *pbEof = !pSorter->pRecord;
! 703: assert( pSorter->aTree==0 );
! 704: return vdbeSorterSort(pCsr);
! 705: }
! 706:
! 707: /* Write the current b-tree to a PMA. Close the b-tree cursor. */
! 708: rc = vdbeSorterListToPMA(db, pCsr);
! 709: if( rc!=SQLITE_OK ) return rc;
! 710:
! 711: /* Allocate space for aIter[] and aTree[]. */
! 712: nIter = pSorter->nPMA;
! 713: if( nIter>SORTER_MAX_MERGE_COUNT ) nIter = SORTER_MAX_MERGE_COUNT;
! 714: assert( nIter>0 );
! 715: while( N<nIter ) N += N;
! 716: nByte = N * (sizeof(int) + sizeof(VdbeSorterIter));
! 717: pSorter->aIter = (VdbeSorterIter *)sqlite3DbMallocZero(db, nByte);
! 718: if( !pSorter->aIter ) return SQLITE_NOMEM;
! 719: pSorter->aTree = (int *)&pSorter->aIter[N];
! 720: pSorter->nTree = N;
! 721:
! 722: do {
! 723: int iNew; /* Index of new, merged, PMA */
! 724:
! 725: for(iNew=0;
! 726: rc==SQLITE_OK && iNew*SORTER_MAX_MERGE_COUNT<pSorter->nPMA;
! 727: iNew++
! 728: ){
! 729: i64 nWrite; /* Number of bytes in new PMA */
! 730:
! 731: /* If there are SORTER_MAX_MERGE_COUNT or less PMAs in file pTemp1,
! 732: ** initialize an iterator for each of them and break out of the loop.
! 733: ** These iterators will be incrementally merged as the VDBE layer calls
! 734: ** sqlite3VdbeSorterNext().
! 735: **
! 736: ** Otherwise, if pTemp1 contains more than SORTER_MAX_MERGE_COUNT PMAs,
! 737: ** initialize interators for SORTER_MAX_MERGE_COUNT of them. These PMAs
! 738: ** are merged into a single PMA that is written to file pTemp2.
! 739: */
! 740: rc = vdbeSorterInitMerge(db, pCsr, &nWrite);
! 741: assert( rc!=SQLITE_OK || pSorter->aIter[ pSorter->aTree[1] ].pFile );
! 742: if( rc!=SQLITE_OK || pSorter->nPMA<=SORTER_MAX_MERGE_COUNT ){
! 743: break;
! 744: }
! 745:
! 746: /* Open the second temp file, if it is not already open. */
! 747: if( pTemp2==0 ){
! 748: assert( iWrite2==0 );
! 749: rc = vdbeSorterOpenTempFile(db, &pTemp2);
! 750: }
! 751:
! 752: if( rc==SQLITE_OK ){
! 753: rc = vdbeSorterWriteVarint(pTemp2, nWrite, &iWrite2);
! 754: }
! 755:
! 756: if( rc==SQLITE_OK ){
! 757: int bEof = 0;
! 758: while( rc==SQLITE_OK && bEof==0 ){
! 759: int nToWrite;
! 760: VdbeSorterIter *pIter = &pSorter->aIter[ pSorter->aTree[1] ];
! 761: assert( pIter->pFile );
! 762: nToWrite = pIter->nKey + sqlite3VarintLen(pIter->nKey);
! 763: rc = sqlite3OsWrite(pTemp2, pIter->aAlloc, nToWrite, iWrite2);
! 764: iWrite2 += nToWrite;
! 765: if( rc==SQLITE_OK ){
! 766: rc = sqlite3VdbeSorterNext(db, pCsr, &bEof);
! 767: }
! 768: }
! 769: }
! 770: }
! 771:
! 772: if( pSorter->nPMA<=SORTER_MAX_MERGE_COUNT ){
! 773: break;
! 774: }else{
! 775: sqlite3_file *pTmp = pSorter->pTemp1;
! 776: pSorter->nPMA = iNew;
! 777: pSorter->pTemp1 = pTemp2;
! 778: pTemp2 = pTmp;
! 779: pSorter->iWriteOff = iWrite2;
! 780: pSorter->iReadOff = 0;
! 781: iWrite2 = 0;
! 782: }
! 783: }while( rc==SQLITE_OK );
! 784:
! 785: if( pTemp2 ){
! 786: sqlite3OsCloseFree(pTemp2);
! 787: }
! 788: *pbEof = (pSorter->aIter[pSorter->aTree[1]].pFile==0);
! 789: return rc;
! 790: }
! 791:
! 792: /*
! 793: ** Advance to the next element in the sorter.
! 794: */
! 795: int sqlite3VdbeSorterNext(sqlite3 *db, VdbeCursor *pCsr, int *pbEof){
! 796: VdbeSorter *pSorter = pCsr->pSorter;
! 797: int rc; /* Return code */
! 798:
! 799: if( pSorter->aTree ){
! 800: int iPrev = pSorter->aTree[1];/* Index of iterator to advance */
! 801: int i; /* Index of aTree[] to recalculate */
! 802:
! 803: rc = vdbeSorterIterNext(db, &pSorter->aIter[iPrev]);
! 804: for(i=(pSorter->nTree+iPrev)/2; rc==SQLITE_OK && i>0; i=i/2){
! 805: rc = vdbeSorterDoCompare(pCsr, i);
! 806: }
! 807:
! 808: *pbEof = (pSorter->aIter[pSorter->aTree[1]].pFile==0);
! 809: }else{
! 810: SorterRecord *pFree = pSorter->pRecord;
! 811: pSorter->pRecord = pFree->pNext;
! 812: pFree->pNext = 0;
! 813: vdbeSorterRecordFree(db, pFree);
! 814: *pbEof = !pSorter->pRecord;
! 815: rc = SQLITE_OK;
! 816: }
! 817: return rc;
! 818: }
! 819:
! 820: /*
! 821: ** Return a pointer to a buffer owned by the sorter that contains the
! 822: ** current key.
! 823: */
! 824: static void *vdbeSorterRowkey(
! 825: VdbeSorter *pSorter, /* Sorter object */
! 826: int *pnKey /* OUT: Size of current key in bytes */
! 827: ){
! 828: void *pKey;
! 829: if( pSorter->aTree ){
! 830: VdbeSorterIter *pIter;
! 831: pIter = &pSorter->aIter[ pSorter->aTree[1] ];
! 832: *pnKey = pIter->nKey;
! 833: pKey = pIter->aKey;
! 834: }else{
! 835: *pnKey = pSorter->pRecord->nVal;
! 836: pKey = pSorter->pRecord->pVal;
! 837: }
! 838: return pKey;
! 839: }
! 840:
! 841: /*
! 842: ** Copy the current sorter key into the memory cell pOut.
! 843: */
! 844: int sqlite3VdbeSorterRowkey(VdbeCursor *pCsr, Mem *pOut){
! 845: VdbeSorter *pSorter = pCsr->pSorter;
! 846: void *pKey; int nKey; /* Sorter key to copy into pOut */
! 847:
! 848: pKey = vdbeSorterRowkey(pSorter, &nKey);
! 849: if( sqlite3VdbeMemGrow(pOut, nKey, 0) ){
! 850: return SQLITE_NOMEM;
! 851: }
! 852: pOut->n = nKey;
! 853: MemSetTypeFlag(pOut, MEM_Blob);
! 854: memcpy(pOut->z, pKey, nKey);
! 855:
! 856: return SQLITE_OK;
! 857: }
! 858:
! 859: /*
! 860: ** Compare the key in memory cell pVal with the key that the sorter cursor
! 861: ** passed as the first argument currently points to. For the purposes of
! 862: ** the comparison, ignore the rowid field at the end of each record.
! 863: **
! 864: ** If an error occurs, return an SQLite error code (i.e. SQLITE_NOMEM).
! 865: ** Otherwise, set *pRes to a negative, zero or positive value if the
! 866: ** key in pVal is smaller than, equal to or larger than the current sorter
! 867: ** key.
! 868: */
! 869: int sqlite3VdbeSorterCompare(
! 870: VdbeCursor *pCsr, /* Sorter cursor */
! 871: Mem *pVal, /* Value to compare to current sorter key */
! 872: int *pRes /* OUT: Result of comparison */
! 873: ){
! 874: VdbeSorter *pSorter = pCsr->pSorter;
! 875: void *pKey; int nKey; /* Sorter key to compare pVal with */
! 876:
! 877: pKey = vdbeSorterRowkey(pSorter, &nKey);
! 878: vdbeSorterCompare(pCsr, 1, pVal->z, pVal->n, pKey, nKey, pRes);
! 879: return SQLITE_OK;
! 880: }
! 881:
! 882: #endif /* #ifndef SQLITE_OMIT_MERGE_SORT */
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>