Annotation of embedaddon/php/ext/sqlite/libsqlite/src/btree.c, revision 1.1

1.1     ! misho       1: /*
        !             2: ** 2001 September 15
        !             3: **
        !             4: ** The author disclaims copyright to this source code.  In place of
        !             5: ** a legal notice, here is a blessing:
        !             6: **
        !             7: **    May you do good and not evil.
        !             8: **    May you find forgiveness for yourself and forgive others.
        !             9: **    May you share freely, never taking more than you give.
        !            10: **
        !            11: *************************************************************************
        !            12: ** $Id: btree.c 195361 2005-09-07 15:11:33Z iliaa $
        !            13: **
        !            14: ** This file implements a external (disk-based) database using BTrees.
        !            15: ** For a detailed discussion of BTrees, refer to
        !            16: **
        !            17: **     Donald E. Knuth, THE ART OF COMPUTER PROGRAMMING, Volume 3:
        !            18: **     "Sorting And Searching", pages 473-480. Addison-Wesley
        !            19: **     Publishing Company, Reading, Massachusetts.
        !            20: **
        !            21: ** The basic idea is that each page of the file contains N database
        !            22: ** entries and N+1 pointers to subpages.
        !            23: **
        !            24: **   ----------------------------------------------------------------
        !            25: **   |  Ptr(0) | Key(0) | Ptr(1) | Key(1) | ... | Key(N) | Ptr(N+1) |
        !            26: **   ----------------------------------------------------------------
        !            27: **
        !            28: ** All of the keys on the page that Ptr(0) points to have values less
        !            29: ** than Key(0).  All of the keys on page Ptr(1) and its subpages have
        !            30: ** values greater than Key(0) and less than Key(1).  All of the keys
        !            31: ** on Ptr(N+1) and its subpages have values greater than Key(N).  And
        !            32: ** so forth.
        !            33: **
        !            34: ** Finding a particular key requires reading O(log(M)) pages from the 
        !            35: ** disk where M is the number of entries in the tree.
        !            36: **
        !            37: ** In this implementation, a single file can hold one or more separate 
        !            38: ** BTrees.  Each BTree is identified by the index of its root page.  The
        !            39: ** key and data for any entry are combined to form the "payload".  Up to
        !            40: ** MX_LOCAL_PAYLOAD bytes of payload can be carried directly on the
        !            41: ** database page.  If the payload is larger than MX_LOCAL_PAYLOAD bytes
        !            42: ** then surplus bytes are stored on overflow pages.  The payload for an
        !            43: ** entry and the preceding pointer are combined to form a "Cell".  Each 
        !            44: ** page has a small header which contains the Ptr(N+1) pointer.
        !            45: **
        !            46: ** The first page of the file contains a magic string used to verify that
        !            47: ** the file really is a valid BTree database, a pointer to a list of unused
        !            48: ** pages in the file, and some meta information.  The root of the first
        !            49: ** BTree begins on page 2 of the file.  (Pages are numbered beginning with
        !            50: ** 1, not 0.)  Thus a minimum database contains 2 pages.
        !            51: */
        !            52: #include "sqliteInt.h"
        !            53: #include "pager.h"
        !            54: #include "btree.h"
        !            55: #include <assert.h>
        !            56: 
        !            57: /* Forward declarations */
        !            58: static BtOps sqliteBtreeOps;
        !            59: static BtCursorOps sqliteBtreeCursorOps;
        !            60: 
        !            61: /*
        !            62: ** Macros used for byteswapping.  B is a pointer to the Btree
        !            63: ** structure.  This is needed to access the Btree.needSwab boolean
        !            64: ** in order to tell if byte swapping is needed or not.
        !            65: ** X is an unsigned integer.  SWAB16 byte swaps a 16-bit integer.
        !            66: ** SWAB32 byteswaps a 32-bit integer.
        !            67: */
        !            68: #define SWAB16(B,X)   ((B)->needSwab? swab16((u16)X) : ((u16)X))
        !            69: #define SWAB32(B,X)   ((B)->needSwab? swab32(X) : (X))
        !            70: #define SWAB_ADD(B,X,A) \
        !            71:    if((B)->needSwab){ X=swab32(swab32(X)+A); }else{ X += (A); }
        !            72: 
        !            73: /*
        !            74: ** The following global variable - available only if SQLITE_TEST is
        !            75: ** defined - is used to determine whether new databases are created in
        !            76: ** native byte order or in non-native byte order.  Non-native byte order
        !            77: ** databases are created for testing purposes only.  Under normal operation,
        !            78: ** only native byte-order databases should be created, but we should be
        !            79: ** able to read or write existing databases regardless of the byteorder.
        !            80: */
        !            81: #ifdef SQLITE_TEST
        !            82: int btree_native_byte_order = 1;
        !            83: #else
        !            84: # define btree_native_byte_order 1
        !            85: #endif
        !            86: 
        !            87: /*
        !            88: ** Forward declarations of structures used only in this file.
        !            89: */
        !            90: typedef struct PageOne PageOne;
        !            91: typedef struct MemPage MemPage;
        !            92: typedef struct PageHdr PageHdr;
        !            93: typedef struct Cell Cell;
        !            94: typedef struct CellHdr CellHdr;
        !            95: typedef struct FreeBlk FreeBlk;
        !            96: typedef struct OverflowPage OverflowPage;
        !            97: typedef struct FreelistInfo FreelistInfo;
        !            98: 
        !            99: /*
        !           100: ** All structures on a database page are aligned to 4-byte boundries.
        !           101: ** This routine rounds up a number of bytes to the next multiple of 4.
        !           102: **
        !           103: ** This might need to change for computer architectures that require
        !           104: ** and 8-byte alignment boundry for structures.
        !           105: */
        !           106: #define ROUNDUP(X)  ((X+3) & ~3)
        !           107: 
        !           108: /*
        !           109: ** This is a magic string that appears at the beginning of every
        !           110: ** SQLite database in order to identify the file as a real database.
        !           111: */
        !           112: static const char zMagicHeader[] = 
        !           113:    "** This file contains an SQLite 2.1 database **";
        !           114: #define MAGIC_SIZE (sizeof(zMagicHeader))
        !           115: 
        !           116: /*
        !           117: ** This is a magic integer also used to test the integrity of the database
        !           118: ** file.  This integer is used in addition to the string above so that
        !           119: ** if the file is written on a little-endian architecture and read
        !           120: ** on a big-endian architectures (or vice versa) we can detect the
        !           121: ** problem.
        !           122: **
        !           123: ** The number used was obtained at random and has no special
        !           124: ** significance other than the fact that it represents a different
        !           125: ** integer on little-endian and big-endian machines.
        !           126: */
        !           127: #define MAGIC 0xdae37528
        !           128: 
        !           129: /*
        !           130: ** The first page of the database file contains a magic header string
        !           131: ** to identify the file as an SQLite database file.  It also contains
        !           132: ** a pointer to the first free page of the file.  Page 2 contains the
        !           133: ** root of the principle BTree.  The file might contain other BTrees
        !           134: ** rooted on pages above 2.
        !           135: **
        !           136: ** The first page also contains SQLITE_N_BTREE_META integers that
        !           137: ** can be used by higher-level routines.
        !           138: **
        !           139: ** Remember that pages are numbered beginning with 1.  (See pager.c
        !           140: ** for additional information.)  Page 0 does not exist and a page
        !           141: ** number of 0 is used to mean "no such page".
        !           142: */
        !           143: struct PageOne {
        !           144:   char zMagic[MAGIC_SIZE]; /* String that identifies the file as a database */
        !           145:   int iMagic;              /* Integer to verify correct byte order */
        !           146:   Pgno freeList;           /* First free page in a list of all free pages */
        !           147:   int nFree;               /* Number of pages on the free list */
        !           148:   int aMeta[SQLITE_N_BTREE_META-1];  /* User defined integers */
        !           149: };
        !           150: 
        !           151: /*
        !           152: ** Each database page has a header that is an instance of this
        !           153: ** structure.
        !           154: **
        !           155: ** PageHdr.firstFree is 0 if there is no free space on this page.
        !           156: ** Otherwise, PageHdr.firstFree is the index in MemPage.u.aDisk[] of a 
        !           157: ** FreeBlk structure that describes the first block of free space.  
        !           158: ** All free space is defined by a linked list of FreeBlk structures.
        !           159: **
        !           160: ** Data is stored in a linked list of Cell structures.  PageHdr.firstCell
        !           161: ** is the index into MemPage.u.aDisk[] of the first cell on the page.  The
        !           162: ** Cells are kept in sorted order.
        !           163: **
        !           164: ** A Cell contains all information about a database entry and a pointer
        !           165: ** to a child page that contains other entries less than itself.  In
        !           166: ** other words, the i-th Cell contains both Ptr(i) and Key(i).  The
        !           167: ** right-most pointer of the page is contained in PageHdr.rightChild.
        !           168: */
        !           169: struct PageHdr {
        !           170:   Pgno rightChild;  /* Child page that comes after all cells on this page */
        !           171:   u16 firstCell;    /* Index in MemPage.u.aDisk[] of the first cell */
        !           172:   u16 firstFree;    /* Index in MemPage.u.aDisk[] of the first free block */
        !           173: };
        !           174: 
        !           175: /*
        !           176: ** Entries on a page of the database are called "Cells".  Each Cell
        !           177: ** has a header and data.  This structure defines the header.  The
        !           178: ** key and data (collectively the "payload") follow this header on
        !           179: ** the database page.
        !           180: **
        !           181: ** A definition of the complete Cell structure is given below.  The
        !           182: ** header for the cell must be defined first in order to do some
        !           183: ** of the sizing #defines that follow.
        !           184: */
        !           185: struct CellHdr {
        !           186:   Pgno leftChild; /* Child page that comes before this cell */
        !           187:   u16 nKey;       /* Number of bytes in the key */
        !           188:   u16 iNext;      /* Index in MemPage.u.aDisk[] of next cell in sorted order */
        !           189:   u8 nKeyHi;      /* Upper 8 bits of key size for keys larger than 64K bytes */
        !           190:   u8 nDataHi;     /* Upper 8 bits of data size when the size is more than 64K */
        !           191:   u16 nData;      /* Number of bytes of data */
        !           192: };
        !           193: 
        !           194: /*
        !           195: ** The key and data size are split into a lower 16-bit segment and an
        !           196: ** upper 8-bit segment in order to pack them together into a smaller
        !           197: ** space.  The following macros reassembly a key or data size back
        !           198: ** into an integer.
        !           199: */
        !           200: #define NKEY(b,h)  (SWAB16(b,h.nKey) + h.nKeyHi*65536)
        !           201: #define NDATA(b,h) (SWAB16(b,h.nData) + h.nDataHi*65536)
        !           202: 
        !           203: /*
        !           204: ** The minimum size of a complete Cell.  The Cell must contain a header
        !           205: ** and at least 4 bytes of payload.
        !           206: */
        !           207: #define MIN_CELL_SIZE  (sizeof(CellHdr)+4)
        !           208: 
        !           209: /*
        !           210: ** The maximum number of database entries that can be held in a single
        !           211: ** page of the database. 
        !           212: */
        !           213: #define MX_CELL ((SQLITE_USABLE_SIZE-sizeof(PageHdr))/MIN_CELL_SIZE)
        !           214: 
        !           215: /*
        !           216: ** The amount of usable space on a single page of the BTree.  This is the
        !           217: ** page size minus the overhead of the page header.
        !           218: */
        !           219: #define USABLE_SPACE  (SQLITE_USABLE_SIZE - sizeof(PageHdr))
        !           220: 
        !           221: /*
        !           222: ** The maximum amount of payload (in bytes) that can be stored locally for
        !           223: ** a database entry.  If the entry contains more data than this, the
        !           224: ** extra goes onto overflow pages.
        !           225: **
        !           226: ** This number is chosen so that at least 4 cells will fit on every page.
        !           227: */
        !           228: #define MX_LOCAL_PAYLOAD ((USABLE_SPACE/4-(sizeof(CellHdr)+sizeof(Pgno)))&~3)
        !           229: 
        !           230: /*
        !           231: ** Data on a database page is stored as a linked list of Cell structures.
        !           232: ** Both the key and the data are stored in aPayload[].  The key always comes
        !           233: ** first.  The aPayload[] field grows as necessary to hold the key and data,
        !           234: ** up to a maximum of MX_LOCAL_PAYLOAD bytes.  If the size of the key and
        !           235: ** data combined exceeds MX_LOCAL_PAYLOAD bytes, then Cell.ovfl is the
        !           236: ** page number of the first overflow page.
        !           237: **
        !           238: ** Though this structure is fixed in size, the Cell on the database
        !           239: ** page varies in size.  Every cell has a CellHdr and at least 4 bytes
        !           240: ** of payload space.  Additional payload bytes (up to the maximum of
        !           241: ** MX_LOCAL_PAYLOAD) and the Cell.ovfl value are allocated only as
        !           242: ** needed.
        !           243: */
        !           244: struct Cell {
        !           245:   CellHdr h;                        /* The cell header */
        !           246:   char aPayload[MX_LOCAL_PAYLOAD];  /* Key and data */
        !           247:   Pgno ovfl;                        /* The first overflow page */
        !           248: };
        !           249: 
        !           250: /*
        !           251: ** Free space on a page is remembered using a linked list of the FreeBlk
        !           252: ** structures.  Space on a database page is allocated in increments of
        !           253: ** at least 4 bytes and is always aligned to a 4-byte boundry.  The
        !           254: ** linked list of FreeBlks is always kept in order by address.
        !           255: */
        !           256: struct FreeBlk {
        !           257:   u16 iSize;      /* Number of bytes in this block of free space */
        !           258:   u16 iNext;      /* Index in MemPage.u.aDisk[] of the next free block */
        !           259: };
        !           260: 
        !           261: /*
        !           262: ** The number of bytes of payload that will fit on a single overflow page.
        !           263: */
        !           264: #define OVERFLOW_SIZE (SQLITE_USABLE_SIZE-sizeof(Pgno))
        !           265: 
        !           266: /*
        !           267: ** When the key and data for a single entry in the BTree will not fit in
        !           268: ** the MX_LOCAL_PAYLOAD bytes of space available on the database page,
        !           269: ** then all extra bytes are written to a linked list of overflow pages.
        !           270: ** Each overflow page is an instance of the following structure.
        !           271: **
        !           272: ** Unused pages in the database are also represented by instances of
        !           273: ** the OverflowPage structure.  The PageOne.freeList field is the
        !           274: ** page number of the first page in a linked list of unused database
        !           275: ** pages.
        !           276: */
        !           277: struct OverflowPage {
        !           278:   Pgno iNext;
        !           279:   char aPayload[OVERFLOW_SIZE];
        !           280: };
        !           281: 
        !           282: /*
        !           283: ** The PageOne.freeList field points to a linked list of overflow pages
        !           284: ** hold information about free pages.  The aPayload section of each
        !           285: ** overflow page contains an instance of the following structure.  The
        !           286: ** aFree[] array holds the page number of nFree unused pages in the disk
        !           287: ** file.
        !           288: */
        !           289: struct FreelistInfo {
        !           290:   int nFree;
        !           291:   Pgno aFree[(OVERFLOW_SIZE-sizeof(int))/sizeof(Pgno)];
        !           292: };
        !           293: 
        !           294: /*
        !           295: ** For every page in the database file, an instance of the following structure
        !           296: ** is stored in memory.  The u.aDisk[] array contains the raw bits read from
        !           297: ** the disk.  The rest is auxiliary information held in memory only. The
        !           298: ** auxiliary info is only valid for regular database pages - it is not
        !           299: ** used for overflow pages and pages on the freelist.
        !           300: **
        !           301: ** Of particular interest in the auxiliary info is the apCell[] entry.  Each
        !           302: ** apCell[] entry is a pointer to a Cell structure in u.aDisk[].  The cells are
        !           303: ** put in this array so that they can be accessed in constant time, rather
        !           304: ** than in linear time which would be needed if we had to walk the linked 
        !           305: ** list on every access.
        !           306: **
        !           307: ** Note that apCell[] contains enough space to hold up to two more Cells
        !           308: ** than can possibly fit on one page.  In the steady state, every apCell[]
        !           309: ** points to memory inside u.aDisk[].  But in the middle of an insert
        !           310: ** operation, some apCell[] entries may temporarily point to data space
        !           311: ** outside of u.aDisk[].  This is a transient situation that is quickly
        !           312: ** resolved.  But while it is happening, it is possible for a database
        !           313: ** page to hold as many as two more cells than it might otherwise hold.
        !           314: ** The extra two entries in apCell[] are an allowance for this situation.
        !           315: **
        !           316: ** The pParent field points back to the parent page.  This allows us to
        !           317: ** walk up the BTree from any leaf to the root.  Care must be taken to
        !           318: ** unref() the parent page pointer when this page is no longer referenced.
        !           319: ** The pageDestructor() routine handles that chore.
        !           320: */
        !           321: struct MemPage {
        !           322:   union u_page_data {
        !           323:     char aDisk[SQLITE_PAGE_SIZE];  /* Page data stored on disk */
        !           324:     PageHdr hdr;                   /* Overlay page header */
        !           325:   } u;
        !           326:   u8 isInit;                     /* True if auxiliary data is initialized */
        !           327:   u8 idxShift;                   /* True if apCell[] indices have changed */
        !           328:   u8 isOverfull;                 /* Some apCell[] points outside u.aDisk[] */
        !           329:   MemPage *pParent;              /* The parent of this page.  NULL for root */
        !           330:   int idxParent;                 /* Index in pParent->apCell[] of this node */
        !           331:   int nFree;                     /* Number of free bytes in u.aDisk[] */
        !           332:   int nCell;                     /* Number of entries on this page */
        !           333:   Cell *apCell[MX_CELL+2];       /* All data entires in sorted order */
        !           334: };
        !           335: 
        !           336: /*
        !           337: ** The in-memory image of a disk page has the auxiliary information appended
        !           338: ** to the end.  EXTRA_SIZE is the number of bytes of space needed to hold
        !           339: ** that extra information.
        !           340: */
        !           341: #define EXTRA_SIZE (sizeof(MemPage)-sizeof(union u_page_data))
        !           342: 
        !           343: /*
        !           344: ** Everything we need to know about an open database
        !           345: */
        !           346: struct Btree {
        !           347:   BtOps *pOps;          /* Function table */
        !           348:   Pager *pPager;        /* The page cache */
        !           349:   BtCursor *pCursor;    /* A list of all open cursors */
        !           350:   PageOne *page1;       /* First page of the database */
        !           351:   u8 inTrans;           /* True if a transaction is in progress */
        !           352:   u8 inCkpt;            /* True if there is a checkpoint on the transaction */
        !           353:   u8 readOnly;          /* True if the underlying file is readonly */
        !           354:   u8 needSwab;          /* Need to byte-swapping */
        !           355: };
        !           356: typedef Btree Bt;
        !           357: 
        !           358: /*
        !           359: ** A cursor is a pointer to a particular entry in the BTree.
        !           360: ** The entry is identified by its MemPage and the index in
        !           361: ** MemPage.apCell[] of the entry.
        !           362: */
        !           363: struct BtCursor {
        !           364:   BtCursorOps *pOps;        /* Function table */
        !           365:   Btree *pBt;               /* The Btree to which this cursor belongs */
        !           366:   BtCursor *pNext, *pPrev;  /* Forms a linked list of all cursors */
        !           367:   BtCursor *pShared;        /* Loop of cursors with the same root page */
        !           368:   Pgno pgnoRoot;            /* The root page of this tree */
        !           369:   MemPage *pPage;           /* Page that contains the entry */
        !           370:   int idx;                  /* Index of the entry in pPage->apCell[] */
        !           371:   u8 wrFlag;                /* True if writable */
        !           372:   u8 eSkip;                 /* Determines if next step operation is a no-op */
        !           373:   u8 iMatch;                /* compare result from last sqliteBtreeMoveto() */
        !           374: };
        !           375: 
        !           376: /*
        !           377: ** Legal values for BtCursor.eSkip.
        !           378: */
        !           379: #define SKIP_NONE     0   /* Always step the cursor */
        !           380: #define SKIP_NEXT     1   /* The next sqliteBtreeNext() is a no-op */
        !           381: #define SKIP_PREV     2   /* The next sqliteBtreePrevious() is a no-op */
        !           382: #define SKIP_INVALID  3   /* Calls to Next() and Previous() are invalid */
        !           383: 
        !           384: /* Forward declarations */
        !           385: static int fileBtreeCloseCursor(BtCursor *pCur);
        !           386: 
        !           387: /*
        !           388: ** Routines for byte swapping.
        !           389: */
        !           390: u16 swab16(u16 x){
        !           391:   return ((x & 0xff)<<8) | ((x>>8)&0xff);
        !           392: }
        !           393: u32 swab32(u32 x){
        !           394:   return ((x & 0xff)<<24) | ((x & 0xff00)<<8) |
        !           395:          ((x>>8) & 0xff00) | ((x>>24)&0xff);
        !           396: }
        !           397: 
        !           398: /*
        !           399: ** Compute the total number of bytes that a Cell needs on the main
        !           400: ** database page.  The number returned includes the Cell header,
        !           401: ** local payload storage, and the pointer to overflow pages (if
        !           402: ** applicable).  Additional space allocated on overflow pages
        !           403: ** is NOT included in the value returned from this routine.
        !           404: */
        !           405: static int cellSize(Btree *pBt, Cell *pCell){
        !           406:   int n = NKEY(pBt, pCell->h) + NDATA(pBt, pCell->h);
        !           407:   if( n>MX_LOCAL_PAYLOAD ){
        !           408:     n = MX_LOCAL_PAYLOAD + sizeof(Pgno);
        !           409:   }else{
        !           410:     n = ROUNDUP(n);
        !           411:   }
        !           412:   n += sizeof(CellHdr);
        !           413:   return n;
        !           414: }
        !           415: 
        !           416: /*
        !           417: ** Defragment the page given.  All Cells are moved to the
        !           418: ** beginning of the page and all free space is collected 
        !           419: ** into one big FreeBlk at the end of the page.
        !           420: */
        !           421: static void defragmentPage(Btree *pBt, MemPage *pPage){
        !           422:   int pc, i, n;
        !           423:   FreeBlk *pFBlk;
        !           424:   char newPage[SQLITE_USABLE_SIZE];
        !           425: 
        !           426:   assert( sqlitepager_iswriteable(pPage) );
        !           427:   assert( pPage->isInit );
        !           428:   pc = sizeof(PageHdr);
        !           429:   pPage->u.hdr.firstCell = SWAB16(pBt, pc);
        !           430:   memcpy(newPage, pPage->u.aDisk, pc);
        !           431:   for(i=0; i<pPage->nCell; i++){
        !           432:     Cell *pCell = pPage->apCell[i];
        !           433: 
        !           434:     /* This routine should never be called on an overfull page.  The
        !           435:     ** following asserts verify that constraint. */
        !           436:     assert( Addr(pCell) > Addr(pPage) );
        !           437:     assert( Addr(pCell) < Addr(pPage) + SQLITE_USABLE_SIZE );
        !           438: 
        !           439:     n = cellSize(pBt, pCell);
        !           440:     pCell->h.iNext = SWAB16(pBt, pc + n);
        !           441:     memcpy(&newPage[pc], pCell, n);
        !           442:     pPage->apCell[i] = (Cell*)&pPage->u.aDisk[pc];
        !           443:     pc += n;
        !           444:   }
        !           445:   assert( pPage->nFree==SQLITE_USABLE_SIZE-pc );
        !           446:   memcpy(pPage->u.aDisk, newPage, pc);
        !           447:   if( pPage->nCell>0 ){
        !           448:     pPage->apCell[pPage->nCell-1]->h.iNext = 0;
        !           449:   }
        !           450:   pFBlk = (FreeBlk*)&pPage->u.aDisk[pc];
        !           451:   pFBlk->iSize = SWAB16(pBt, SQLITE_USABLE_SIZE - pc);
        !           452:   pFBlk->iNext = 0;
        !           453:   pPage->u.hdr.firstFree = SWAB16(pBt, pc);
        !           454:   memset(&pFBlk[1], 0, SQLITE_USABLE_SIZE - pc - sizeof(FreeBlk));
        !           455: }
        !           456: 
        !           457: /*
        !           458: ** Allocate nByte bytes of space on a page.  nByte must be a 
        !           459: ** multiple of 4.
        !           460: **
        !           461: ** Return the index into pPage->u.aDisk[] of the first byte of
        !           462: ** the new allocation. Or return 0 if there is not enough free
        !           463: ** space on the page to satisfy the allocation request.
        !           464: **
        !           465: ** If the page contains nBytes of free space but does not contain
        !           466: ** nBytes of contiguous free space, then this routine automatically
        !           467: ** calls defragementPage() to consolidate all free space before 
        !           468: ** allocating the new chunk.
        !           469: */
        !           470: static int allocateSpace(Btree *pBt, MemPage *pPage, int nByte){
        !           471:   FreeBlk *p;
        !           472:   u16 *pIdx;
        !           473:   int start;
        !           474:   int iSize;
        !           475: #ifndef NDEBUG
        !           476:   int cnt = 0;
        !           477: #endif
        !           478: 
        !           479:   assert( sqlitepager_iswriteable(pPage) );
        !           480:   assert( nByte==ROUNDUP(nByte) );
        !           481:   assert( pPage->isInit );
        !           482:   if( pPage->nFree<nByte || pPage->isOverfull ) return 0;
        !           483:   pIdx = &pPage->u.hdr.firstFree;
        !           484:   p = (FreeBlk*)&pPage->u.aDisk[SWAB16(pBt, *pIdx)];
        !           485:   while( (iSize = SWAB16(pBt, p->iSize))<nByte ){
        !           486:     assert( cnt++ < SQLITE_USABLE_SIZE/4 );
        !           487:     if( p->iNext==0 ){
        !           488:       defragmentPage(pBt, pPage);
        !           489:       pIdx = &pPage->u.hdr.firstFree;
        !           490:     }else{
        !           491:       pIdx = &p->iNext;
        !           492:     }
        !           493:     p = (FreeBlk*)&pPage->u.aDisk[SWAB16(pBt, *pIdx)];
        !           494:   }
        !           495:   if( iSize==nByte ){
        !           496:     start = SWAB16(pBt, *pIdx);
        !           497:     *pIdx = p->iNext;
        !           498:   }else{
        !           499:     FreeBlk *pNew;
        !           500:     start = SWAB16(pBt, *pIdx);
        !           501:     pNew = (FreeBlk*)&pPage->u.aDisk[start + nByte];
        !           502:     pNew->iNext = p->iNext;
        !           503:     pNew->iSize = SWAB16(pBt, iSize - nByte);
        !           504:     *pIdx = SWAB16(pBt, start + nByte);
        !           505:   }
        !           506:   pPage->nFree -= nByte;
        !           507:   return start;
        !           508: }
        !           509: 
        !           510: /*
        !           511: ** Return a section of the MemPage.u.aDisk[] to the freelist.
        !           512: ** The first byte of the new free block is pPage->u.aDisk[start]
        !           513: ** and the size of the block is "size" bytes.  Size must be
        !           514: ** a multiple of 4.
        !           515: **
        !           516: ** Most of the effort here is involved in coalesing adjacent
        !           517: ** free blocks into a single big free block.
        !           518: */
        !           519: static void freeSpace(Btree *pBt, MemPage *pPage, int start, int size){
        !           520:   int end = start + size;
        !           521:   u16 *pIdx, idx;
        !           522:   FreeBlk *pFBlk;
        !           523:   FreeBlk *pNew;
        !           524:   FreeBlk *pNext;
        !           525:   int iSize;
        !           526: 
        !           527:   assert( sqlitepager_iswriteable(pPage) );
        !           528:   assert( size == ROUNDUP(size) );
        !           529:   assert( start == ROUNDUP(start) );
        !           530:   assert( pPage->isInit );
        !           531:   pIdx = &pPage->u.hdr.firstFree;
        !           532:   idx = SWAB16(pBt, *pIdx);
        !           533:   while( idx!=0 && idx<start ){
        !           534:     pFBlk = (FreeBlk*)&pPage->u.aDisk[idx];
        !           535:     iSize = SWAB16(pBt, pFBlk->iSize);
        !           536:     if( idx + iSize == start ){
        !           537:       pFBlk->iSize = SWAB16(pBt, iSize + size);
        !           538:       if( idx + iSize + size == SWAB16(pBt, pFBlk->iNext) ){
        !           539:         pNext = (FreeBlk*)&pPage->u.aDisk[idx + iSize + size];
        !           540:         if( pBt->needSwab ){
        !           541:           pFBlk->iSize = swab16((u16)swab16(pNext->iSize)+iSize+size);
        !           542:         }else{
        !           543:           pFBlk->iSize += pNext->iSize;
        !           544:         }
        !           545:         pFBlk->iNext = pNext->iNext;
        !           546:       }
        !           547:       pPage->nFree += size;
        !           548:       return;
        !           549:     }
        !           550:     pIdx = &pFBlk->iNext;
        !           551:     idx = SWAB16(pBt, *pIdx);
        !           552:   }
        !           553:   pNew = (FreeBlk*)&pPage->u.aDisk[start];
        !           554:   if( idx != end ){
        !           555:     pNew->iSize = SWAB16(pBt, size);
        !           556:     pNew->iNext = SWAB16(pBt, idx);
        !           557:   }else{
        !           558:     pNext = (FreeBlk*)&pPage->u.aDisk[idx];
        !           559:     pNew->iSize = SWAB16(pBt, size + SWAB16(pBt, pNext->iSize));
        !           560:     pNew->iNext = pNext->iNext;
        !           561:   }
        !           562:   *pIdx = SWAB16(pBt, start);
        !           563:   pPage->nFree += size;
        !           564: }
        !           565: 
        !           566: /*
        !           567: ** Initialize the auxiliary information for a disk block.
        !           568: **
        !           569: ** The pParent parameter must be a pointer to the MemPage which
        !           570: ** is the parent of the page being initialized.  The root of the
        !           571: ** BTree (usually page 2) has no parent and so for that page, 
        !           572: ** pParent==NULL.
        !           573: **
        !           574: ** Return SQLITE_OK on success.  If we see that the page does
        !           575: ** not contain a well-formed database page, then return 
        !           576: ** SQLITE_CORRUPT.  Note that a return of SQLITE_OK does not
        !           577: ** guarantee that the page is well-formed.  It only shows that
        !           578: ** we failed to detect any corruption.
        !           579: */
        !           580: static int initPage(Bt *pBt, MemPage *pPage, Pgno pgnoThis, MemPage *pParent){
        !           581:   int idx;           /* An index into pPage->u.aDisk[] */
        !           582:   Cell *pCell;       /* A pointer to a Cell in pPage->u.aDisk[] */
        !           583:   FreeBlk *pFBlk;    /* A pointer to a free block in pPage->u.aDisk[] */
        !           584:   int sz;            /* The size of a Cell in bytes */
        !           585:   int freeSpace;     /* Amount of free space on the page */
        !           586: 
        !           587:   if( pPage->pParent ){
        !           588:     assert( pPage->pParent==pParent );
        !           589:     return SQLITE_OK;
        !           590:   }
        !           591:   if( pParent ){
        !           592:     pPage->pParent = pParent;
        !           593:     sqlitepager_ref(pParent);
        !           594:   }
        !           595:   if( pPage->isInit ) return SQLITE_OK;
        !           596:   pPage->isInit = 1;
        !           597:   pPage->nCell = 0;
        !           598:   freeSpace = USABLE_SPACE;
        !           599:   idx = SWAB16(pBt, pPage->u.hdr.firstCell);
        !           600:   while( idx!=0 ){
        !           601:     if( idx>SQLITE_USABLE_SIZE-MIN_CELL_SIZE ) goto page_format_error;
        !           602:     if( idx<sizeof(PageHdr) ) goto page_format_error;
        !           603:     if( idx!=ROUNDUP(idx) ) goto page_format_error;
        !           604:     pCell = (Cell*)&pPage->u.aDisk[idx];
        !           605:     sz = cellSize(pBt, pCell);
        !           606:     if( idx+sz > SQLITE_USABLE_SIZE ) goto page_format_error;
        !           607:     freeSpace -= sz;
        !           608:     pPage->apCell[pPage->nCell++] = pCell;
        !           609:     idx = SWAB16(pBt, pCell->h.iNext);
        !           610:   }
        !           611:   pPage->nFree = 0;
        !           612:   idx = SWAB16(pBt, pPage->u.hdr.firstFree);
        !           613:   while( idx!=0 ){
        !           614:     int iNext;
        !           615:     if( idx>SQLITE_USABLE_SIZE-sizeof(FreeBlk) ) goto page_format_error;
        !           616:     if( idx<sizeof(PageHdr) ) goto page_format_error;
        !           617:     pFBlk = (FreeBlk*)&pPage->u.aDisk[idx];
        !           618:     pPage->nFree += SWAB16(pBt, pFBlk->iSize);
        !           619:     iNext = SWAB16(pBt, pFBlk->iNext);
        !           620:     if( iNext>0 && iNext <= idx ) goto page_format_error;
        !           621:     idx = iNext;
        !           622:   }
        !           623:   if( pPage->nCell==0 && pPage->nFree==0 ){
        !           624:     /* As a special case, an uninitialized root page appears to be
        !           625:     ** an empty database */
        !           626:     return SQLITE_OK;
        !           627:   }
        !           628:   if( pPage->nFree!=freeSpace ) goto page_format_error;
        !           629:   return SQLITE_OK;
        !           630: 
        !           631: page_format_error:
        !           632:   return SQLITE_CORRUPT;
        !           633: }
        !           634: 
        !           635: /*
        !           636: ** Set up a raw page so that it looks like a database page holding
        !           637: ** no entries.
        !           638: */
        !           639: static void zeroPage(Btree *pBt, MemPage *pPage){
        !           640:   PageHdr *pHdr;
        !           641:   FreeBlk *pFBlk;
        !           642:   assert( sqlitepager_iswriteable(pPage) );
        !           643:   memset(pPage, 0, SQLITE_USABLE_SIZE);
        !           644:   pHdr = &pPage->u.hdr;
        !           645:   pHdr->firstCell = 0;
        !           646:   pHdr->firstFree = SWAB16(pBt, sizeof(*pHdr));
        !           647:   pFBlk = (FreeBlk*)&pHdr[1];
        !           648:   pFBlk->iNext = 0;
        !           649:   pPage->nFree = SQLITE_USABLE_SIZE - sizeof(*pHdr);
        !           650:   pFBlk->iSize = SWAB16(pBt, pPage->nFree);
        !           651:   pPage->nCell = 0;
        !           652:   pPage->isOverfull = 0;
        !           653: }
        !           654: 
        !           655: /*
        !           656: ** This routine is called when the reference count for a page
        !           657: ** reaches zero.  We need to unref the pParent pointer when that
        !           658: ** happens.
        !           659: */
        !           660: static void pageDestructor(void *pData){
        !           661:   MemPage *pPage = (MemPage*)pData;
        !           662:   if( pPage->pParent ){
        !           663:     MemPage *pParent = pPage->pParent;
        !           664:     pPage->pParent = 0;
        !           665:     sqlitepager_unref(pParent);
        !           666:   }
        !           667: }
        !           668: 
        !           669: /*
        !           670: ** Open a new database.
        !           671: **
        !           672: ** Actually, this routine just sets up the internal data structures
        !           673: ** for accessing the database.  We do not open the database file 
        !           674: ** until the first page is loaded.
        !           675: **
        !           676: ** zFilename is the name of the database file.  If zFilename is NULL
        !           677: ** a new database with a random name is created.  This randomly named
        !           678: ** database file will be deleted when sqliteBtreeClose() is called.
        !           679: */
        !           680: int sqliteBtreeOpen(
        !           681:   const char *zFilename,    /* Name of the file containing the BTree database */
        !           682:   int omitJournal,          /* if TRUE then do not journal this file */
        !           683:   int nCache,               /* How many pages in the page cache */
        !           684:   Btree **ppBtree           /* Pointer to new Btree object written here */
        !           685: ){
        !           686:   Btree *pBt;
        !           687:   int rc;
        !           688: 
        !           689:   /*
        !           690:   ** The following asserts make sure that structures used by the btree are
        !           691:   ** the right size.  This is to guard against size changes that result
        !           692:   ** when compiling on a different architecture.
        !           693:   */
        !           694:   assert( sizeof(u32)==4 );
        !           695:   assert( sizeof(u16)==2 );
        !           696:   assert( sizeof(Pgno)==4 );
        !           697:   assert( sizeof(PageHdr)==8 );
        !           698:   assert( sizeof(CellHdr)==12 );
        !           699:   assert( sizeof(FreeBlk)==4 );
        !           700:   assert( sizeof(OverflowPage)==SQLITE_USABLE_SIZE );
        !           701:   assert( sizeof(FreelistInfo)==OVERFLOW_SIZE );
        !           702:   assert( sizeof(ptr)==sizeof(char*) );
        !           703:   assert( sizeof(uptr)==sizeof(ptr) );
        !           704: 
        !           705:   pBt = sqliteMalloc( sizeof(*pBt) );
        !           706:   if( pBt==0 ){
        !           707:     *ppBtree = 0;
        !           708:     return SQLITE_NOMEM;
        !           709:   }
        !           710:   if( nCache<10 ) nCache = 10;
        !           711:   rc = sqlitepager_open(&pBt->pPager, zFilename, nCache, EXTRA_SIZE,
        !           712:                         !omitJournal);
        !           713:   if( rc!=SQLITE_OK ){
        !           714:     if( pBt->pPager ) sqlitepager_close(pBt->pPager);
        !           715:     sqliteFree(pBt);
        !           716:     *ppBtree = 0;
        !           717:     return rc;
        !           718:   }
        !           719:   sqlitepager_set_destructor(pBt->pPager, pageDestructor);
        !           720:   pBt->pCursor = 0;
        !           721:   pBt->page1 = 0;
        !           722:   pBt->readOnly = sqlitepager_isreadonly(pBt->pPager);
        !           723:   pBt->pOps = &sqliteBtreeOps;
        !           724:   *ppBtree = pBt;
        !           725:   return SQLITE_OK;
        !           726: }
        !           727: 
        !           728: /*
        !           729: ** Close an open database and invalidate all cursors.
        !           730: */
        !           731: static int fileBtreeClose(Btree *pBt){
        !           732:   while( pBt->pCursor ){
        !           733:     fileBtreeCloseCursor(pBt->pCursor);
        !           734:   }
        !           735:   sqlitepager_close(pBt->pPager);
        !           736:   sqliteFree(pBt);
        !           737:   return SQLITE_OK;
        !           738: }
        !           739: 
        !           740: /*
        !           741: ** Change the limit on the number of pages allowed in the cache.
        !           742: **
        !           743: ** The maximum number of cache pages is set to the absolute
        !           744: ** value of mxPage.  If mxPage is negative, the pager will
        !           745: ** operate asynchronously - it will not stop to do fsync()s
        !           746: ** to insure data is written to the disk surface before
        !           747: ** continuing.  Transactions still work if synchronous is off,
        !           748: ** and the database cannot be corrupted if this program
        !           749: ** crashes.  But if the operating system crashes or there is
        !           750: ** an abrupt power failure when synchronous is off, the database
        !           751: ** could be left in an inconsistent and unrecoverable state.
        !           752: ** Synchronous is on by default so database corruption is not
        !           753: ** normally a worry.
        !           754: */
        !           755: static int fileBtreeSetCacheSize(Btree *pBt, int mxPage){
        !           756:   sqlitepager_set_cachesize(pBt->pPager, mxPage);
        !           757:   return SQLITE_OK;
        !           758: }
        !           759: 
        !           760: /*
        !           761: ** Change the way data is synced to disk in order to increase or decrease
        !           762: ** how well the database resists damage due to OS crashes and power
        !           763: ** failures.  Level 1 is the same as asynchronous (no syncs() occur and
        !           764: ** there is a high probability of damage)  Level 2 is the default.  There
        !           765: ** is a very low but non-zero probability of damage.  Level 3 reduces the
        !           766: ** probability of damage to near zero but with a write performance reduction.
        !           767: */
        !           768: static int fileBtreeSetSafetyLevel(Btree *pBt, int level){
        !           769:   sqlitepager_set_safety_level(pBt->pPager, level);
        !           770:   return SQLITE_OK;
        !           771: }
        !           772: 
        !           773: /*
        !           774: ** Get a reference to page1 of the database file.  This will
        !           775: ** also acquire a readlock on that file.
        !           776: **
        !           777: ** SQLITE_OK is returned on success.  If the file is not a
        !           778: ** well-formed database file, then SQLITE_CORRUPT is returned.
        !           779: ** SQLITE_BUSY is returned if the database is locked.  SQLITE_NOMEM
        !           780: ** is returned if we run out of memory.  SQLITE_PROTOCOL is returned
        !           781: ** if there is a locking protocol violation.
        !           782: */
        !           783: static int lockBtree(Btree *pBt){
        !           784:   int rc;
        !           785:   if( pBt->page1 ) return SQLITE_OK;
        !           786:   rc = sqlitepager_get(pBt->pPager, 1, (void**)&pBt->page1);
        !           787:   if( rc!=SQLITE_OK ) return rc;
        !           788: 
        !           789:   /* Do some checking to help insure the file we opened really is
        !           790:   ** a valid database file. 
        !           791:   */
        !           792:   if( sqlitepager_pagecount(pBt->pPager)>0 ){
        !           793:     PageOne *pP1 = pBt->page1;
        !           794:     if( strcmp(pP1->zMagic,zMagicHeader)!=0 ||
        !           795:           (pP1->iMagic!=MAGIC && swab32(pP1->iMagic)!=MAGIC) ){
        !           796:       rc = SQLITE_NOTADB;
        !           797:       goto page1_init_failed;
        !           798:     }
        !           799:     pBt->needSwab = pP1->iMagic!=MAGIC;
        !           800:   }
        !           801:   return rc;
        !           802: 
        !           803: page1_init_failed:
        !           804:   sqlitepager_unref(pBt->page1);
        !           805:   pBt->page1 = 0;
        !           806:   return rc;
        !           807: }
        !           808: 
        !           809: /*
        !           810: ** If there are no outstanding cursors and we are not in the middle
        !           811: ** of a transaction but there is a read lock on the database, then
        !           812: ** this routine unrefs the first page of the database file which 
        !           813: ** has the effect of releasing the read lock.
        !           814: **
        !           815: ** If there are any outstanding cursors, this routine is a no-op.
        !           816: **
        !           817: ** If there is a transaction in progress, this routine is a no-op.
        !           818: */
        !           819: static void unlockBtreeIfUnused(Btree *pBt){
        !           820:   if( pBt->inTrans==0 && pBt->pCursor==0 && pBt->page1!=0 ){
        !           821:     sqlitepager_unref(pBt->page1);
        !           822:     pBt->page1 = 0;
        !           823:     pBt->inTrans = 0;
        !           824:     pBt->inCkpt = 0;
        !           825:   }
        !           826: }
        !           827: 
        !           828: /*
        !           829: ** Create a new database by initializing the first two pages of the
        !           830: ** file.
        !           831: */
        !           832: static int newDatabase(Btree *pBt){
        !           833:   MemPage *pRoot;
        !           834:   PageOne *pP1;
        !           835:   int rc;
        !           836:   if( sqlitepager_pagecount(pBt->pPager)>1 ) return SQLITE_OK;
        !           837:   pP1 = pBt->page1;
        !           838:   rc = sqlitepager_write(pBt->page1);
        !           839:   if( rc ) return rc;
        !           840:   rc = sqlitepager_get(pBt->pPager, 2, (void**)&pRoot);
        !           841:   if( rc ) return rc;
        !           842:   rc = sqlitepager_write(pRoot);
        !           843:   if( rc ){
        !           844:     sqlitepager_unref(pRoot);
        !           845:     return rc;
        !           846:   }
        !           847:   strcpy(pP1->zMagic, zMagicHeader);
        !           848:   if( btree_native_byte_order ){
        !           849:     pP1->iMagic = MAGIC;
        !           850:     pBt->needSwab = 0;
        !           851:   }else{
        !           852:     pP1->iMagic = swab32(MAGIC);
        !           853:     pBt->needSwab = 1;
        !           854:   }
        !           855:   zeroPage(pBt, pRoot);
        !           856:   sqlitepager_unref(pRoot);
        !           857:   return SQLITE_OK;
        !           858: }
        !           859: 
        !           860: /*
        !           861: ** Attempt to start a new transaction.
        !           862: **
        !           863: ** A transaction must be started before attempting any changes
        !           864: ** to the database.  None of the following routines will work
        !           865: ** unless a transaction is started first:
        !           866: **
        !           867: **      sqliteBtreeCreateTable()
        !           868: **      sqliteBtreeCreateIndex()
        !           869: **      sqliteBtreeClearTable()
        !           870: **      sqliteBtreeDropTable()
        !           871: **      sqliteBtreeInsert()
        !           872: **      sqliteBtreeDelete()
        !           873: **      sqliteBtreeUpdateMeta()
        !           874: */
        !           875: static int fileBtreeBeginTrans(Btree *pBt){
        !           876:   int rc;
        !           877:   if( pBt->inTrans ) return SQLITE_ERROR;
        !           878:   if( pBt->readOnly ) return SQLITE_READONLY;
        !           879:   if( pBt->page1==0 ){
        !           880:     rc = lockBtree(pBt);
        !           881:     if( rc!=SQLITE_OK ){
        !           882:       return rc;
        !           883:     }
        !           884:   }
        !           885:   rc = sqlitepager_begin(pBt->page1);
        !           886:   if( rc==SQLITE_OK ){
        !           887:     rc = newDatabase(pBt);
        !           888:   }
        !           889:   if( rc==SQLITE_OK ){
        !           890:     pBt->inTrans = 1;
        !           891:     pBt->inCkpt = 0;
        !           892:   }else{
        !           893:     unlockBtreeIfUnused(pBt);
        !           894:   }
        !           895:   return rc;
        !           896: }
        !           897: 
        !           898: /*
        !           899: ** Commit the transaction currently in progress.
        !           900: **
        !           901: ** This will release the write lock on the database file.  If there
        !           902: ** are no active cursors, it also releases the read lock.
        !           903: */
        !           904: static int fileBtreeCommit(Btree *pBt){
        !           905:   int rc;
        !           906:   rc = pBt->readOnly ? SQLITE_OK : sqlitepager_commit(pBt->pPager);
        !           907:   pBt->inTrans = 0;
        !           908:   pBt->inCkpt = 0;
        !           909:   unlockBtreeIfUnused(pBt);
        !           910:   return rc;
        !           911: }
        !           912: 
        !           913: /*
        !           914: ** Rollback the transaction in progress.  All cursors will be
        !           915: ** invalided by this operation.  Any attempt to use a cursor
        !           916: ** that was open at the beginning of this operation will result
        !           917: ** in an error.
        !           918: **
        !           919: ** This will release the write lock on the database file.  If there
        !           920: ** are no active cursors, it also releases the read lock.
        !           921: */
        !           922: static int fileBtreeRollback(Btree *pBt){
        !           923:   int rc;
        !           924:   BtCursor *pCur;
        !           925:   if( pBt->inTrans==0 ) return SQLITE_OK;
        !           926:   pBt->inTrans = 0;
        !           927:   pBt->inCkpt = 0;
        !           928:   rc = pBt->readOnly ? SQLITE_OK : sqlitepager_rollback(pBt->pPager);
        !           929:   for(pCur=pBt->pCursor; pCur; pCur=pCur->pNext){
        !           930:     if( pCur->pPage && pCur->pPage->isInit==0 ){
        !           931:       sqlitepager_unref(pCur->pPage);
        !           932:       pCur->pPage = 0;
        !           933:     }
        !           934:   }
        !           935:   unlockBtreeIfUnused(pBt);
        !           936:   return rc;
        !           937: }
        !           938: 
        !           939: /*
        !           940: ** Set the checkpoint for the current transaction.  The checkpoint serves
        !           941: ** as a sub-transaction that can be rolled back independently of the
        !           942: ** main transaction.  You must start a transaction before starting a
        !           943: ** checkpoint.  The checkpoint is ended automatically if the transaction
        !           944: ** commits or rolls back.
        !           945: **
        !           946: ** Only one checkpoint may be active at a time.  It is an error to try
        !           947: ** to start a new checkpoint if another checkpoint is already active.
        !           948: */
        !           949: static int fileBtreeBeginCkpt(Btree *pBt){
        !           950:   int rc;
        !           951:   if( !pBt->inTrans || pBt->inCkpt ){
        !           952:     return pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR;
        !           953:   }
        !           954:   rc = pBt->readOnly ? SQLITE_OK : sqlitepager_ckpt_begin(pBt->pPager);
        !           955:   pBt->inCkpt = 1;
        !           956:   return rc;
        !           957: }
        !           958: 
        !           959: 
        !           960: /*
        !           961: ** Commit a checkpoint to transaction currently in progress.  If no
        !           962: ** checkpoint is active, this is a no-op.
        !           963: */
        !           964: static int fileBtreeCommitCkpt(Btree *pBt){
        !           965:   int rc;
        !           966:   if( pBt->inCkpt && !pBt->readOnly ){
        !           967:     rc = sqlitepager_ckpt_commit(pBt->pPager);
        !           968:   }else{
        !           969:     rc = SQLITE_OK;
        !           970:   }
        !           971:   pBt->inCkpt = 0;
        !           972:   return rc;
        !           973: }
        !           974: 
        !           975: /*
        !           976: ** Rollback the checkpoint to the current transaction.  If there
        !           977: ** is no active checkpoint or transaction, this routine is a no-op.
        !           978: **
        !           979: ** All cursors will be invalided by this operation.  Any attempt
        !           980: ** to use a cursor that was open at the beginning of this operation
        !           981: ** will result in an error.
        !           982: */
        !           983: static int fileBtreeRollbackCkpt(Btree *pBt){
        !           984:   int rc;
        !           985:   BtCursor *pCur;
        !           986:   if( pBt->inCkpt==0 || pBt->readOnly ) return SQLITE_OK;
        !           987:   rc = sqlitepager_ckpt_rollback(pBt->pPager);
        !           988:   for(pCur=pBt->pCursor; pCur; pCur=pCur->pNext){
        !           989:     if( pCur->pPage && pCur->pPage->isInit==0 ){
        !           990:       sqlitepager_unref(pCur->pPage);
        !           991:       pCur->pPage = 0;
        !           992:     }
        !           993:   }
        !           994:   pBt->inCkpt = 0;
        !           995:   return rc;
        !           996: }
        !           997: 
        !           998: /*
        !           999: ** Create a new cursor for the BTree whose root is on the page
        !          1000: ** iTable.  The act of acquiring a cursor gets a read lock on 
        !          1001: ** the database file.
        !          1002: **
        !          1003: ** If wrFlag==0, then the cursor can only be used for reading.
        !          1004: ** If wrFlag==1, then the cursor can be used for reading or for
        !          1005: ** writing if other conditions for writing are also met.  These
        !          1006: ** are the conditions that must be met in order for writing to
        !          1007: ** be allowed:
        !          1008: **
        !          1009: ** 1:  The cursor must have been opened with wrFlag==1
        !          1010: **
        !          1011: ** 2:  No other cursors may be open with wrFlag==0 on the same table
        !          1012: **
        !          1013: ** 3:  The database must be writable (not on read-only media)
        !          1014: **
        !          1015: ** 4:  There must be an active transaction.
        !          1016: **
        !          1017: ** Condition 2 warrants further discussion.  If any cursor is opened
        !          1018: ** on a table with wrFlag==0, that prevents all other cursors from
        !          1019: ** writing to that table.  This is a kind of "read-lock".  When a cursor
        !          1020: ** is opened with wrFlag==0 it is guaranteed that the table will not
        !          1021: ** change as long as the cursor is open.  This allows the cursor to
        !          1022: ** do a sequential scan of the table without having to worry about
        !          1023: ** entries being inserted or deleted during the scan.  Cursors should
        !          1024: ** be opened with wrFlag==0 only if this read-lock property is needed.
        !          1025: ** That is to say, cursors should be opened with wrFlag==0 only if they
        !          1026: ** intend to use the sqliteBtreeNext() system call.  All other cursors
        !          1027: ** should be opened with wrFlag==1 even if they never really intend
        !          1028: ** to write.
        !          1029: ** 
        !          1030: ** No checking is done to make sure that page iTable really is the
        !          1031: ** root page of a b-tree.  If it is not, then the cursor acquired
        !          1032: ** will not work correctly.
        !          1033: */
        !          1034: static 
        !          1035: int fileBtreeCursor(Btree *pBt, int iTable, int wrFlag, BtCursor **ppCur){
        !          1036:   int rc;
        !          1037:   BtCursor *pCur, *pRing;
        !          1038: 
        !          1039:   if( pBt->readOnly && wrFlag ){
        !          1040:     *ppCur = 0;
        !          1041:     return SQLITE_READONLY;
        !          1042:   }
        !          1043:   if( pBt->page1==0 ){
        !          1044:     rc = lockBtree(pBt);
        !          1045:     if( rc!=SQLITE_OK ){
        !          1046:       *ppCur = 0;
        !          1047:       return rc;
        !          1048:     }
        !          1049:   }
        !          1050:   pCur = sqliteMalloc( sizeof(*pCur) );
        !          1051:   if( pCur==0 ){
        !          1052:     rc = SQLITE_NOMEM;
        !          1053:     goto create_cursor_exception;
        !          1054:   }
        !          1055:   pCur->pgnoRoot = (Pgno)iTable;
        !          1056:   rc = sqlitepager_get(pBt->pPager, pCur->pgnoRoot, (void**)&pCur->pPage);
        !          1057:   if( rc!=SQLITE_OK ){
        !          1058:     goto create_cursor_exception;
        !          1059:   }
        !          1060:   rc = initPage(pBt, pCur->pPage, pCur->pgnoRoot, 0);
        !          1061:   if( rc!=SQLITE_OK ){
        !          1062:     goto create_cursor_exception;
        !          1063:   }
        !          1064:   pCur->pOps = &sqliteBtreeCursorOps;
        !          1065:   pCur->pBt = pBt;
        !          1066:   pCur->wrFlag = wrFlag;
        !          1067:   pCur->idx = 0;
        !          1068:   pCur->eSkip = SKIP_INVALID;
        !          1069:   pCur->pNext = pBt->pCursor;
        !          1070:   if( pCur->pNext ){
        !          1071:     pCur->pNext->pPrev = pCur;
        !          1072:   }
        !          1073:   pCur->pPrev = 0;
        !          1074:   pRing = pBt->pCursor;
        !          1075:   while( pRing && pRing->pgnoRoot!=pCur->pgnoRoot ){ pRing = pRing->pNext; }
        !          1076:   if( pRing ){
        !          1077:     pCur->pShared = pRing->pShared;
        !          1078:     pRing->pShared = pCur;
        !          1079:   }else{
        !          1080:     pCur->pShared = pCur;
        !          1081:   }
        !          1082:   pBt->pCursor = pCur;
        !          1083:   *ppCur = pCur;
        !          1084:   return SQLITE_OK;
        !          1085: 
        !          1086: create_cursor_exception:
        !          1087:   *ppCur = 0;
        !          1088:   if( pCur ){
        !          1089:     if( pCur->pPage ) sqlitepager_unref(pCur->pPage);
        !          1090:     sqliteFree(pCur);
        !          1091:   }
        !          1092:   unlockBtreeIfUnused(pBt);
        !          1093:   return rc;
        !          1094: }
        !          1095: 
        !          1096: /*
        !          1097: ** Close a cursor.  The read lock on the database file is released
        !          1098: ** when the last cursor is closed.
        !          1099: */
        !          1100: static int fileBtreeCloseCursor(BtCursor *pCur){
        !          1101:   Btree *pBt = pCur->pBt;
        !          1102:   if( pCur->pPrev ){
        !          1103:     pCur->pPrev->pNext = pCur->pNext;
        !          1104:   }else{
        !          1105:     pBt->pCursor = pCur->pNext;
        !          1106:   }
        !          1107:   if( pCur->pNext ){
        !          1108:     pCur->pNext->pPrev = pCur->pPrev;
        !          1109:   }
        !          1110:   if( pCur->pPage ){
        !          1111:     sqlitepager_unref(pCur->pPage);
        !          1112:   }
        !          1113:   if( pCur->pShared!=pCur ){
        !          1114:     BtCursor *pRing = pCur->pShared;
        !          1115:     while( pRing->pShared!=pCur ){ pRing = pRing->pShared; }
        !          1116:     pRing->pShared = pCur->pShared;
        !          1117:   }
        !          1118:   unlockBtreeIfUnused(pBt);
        !          1119:   sqliteFree(pCur);
        !          1120:   return SQLITE_OK;
        !          1121: }
        !          1122: 
        !          1123: /*
        !          1124: ** Make a temporary cursor by filling in the fields of pTempCur.
        !          1125: ** The temporary cursor is not on the cursor list for the Btree.
        !          1126: */
        !          1127: static void getTempCursor(BtCursor *pCur, BtCursor *pTempCur){
        !          1128:   memcpy(pTempCur, pCur, sizeof(*pCur));
        !          1129:   pTempCur->pNext = 0;
        !          1130:   pTempCur->pPrev = 0;
        !          1131:   if( pTempCur->pPage ){
        !          1132:     sqlitepager_ref(pTempCur->pPage);
        !          1133:   }
        !          1134: }
        !          1135: 
        !          1136: /*
        !          1137: ** Delete a temporary cursor such as was made by the CreateTemporaryCursor()
        !          1138: ** function above.
        !          1139: */
        !          1140: static void releaseTempCursor(BtCursor *pCur){
        !          1141:   if( pCur->pPage ){
        !          1142:     sqlitepager_unref(pCur->pPage);
        !          1143:   }
        !          1144: }
        !          1145: 
        !          1146: /*
        !          1147: ** Set *pSize to the number of bytes of key in the entry the
        !          1148: ** cursor currently points to.  Always return SQLITE_OK.
        !          1149: ** Failure is not possible.  If the cursor is not currently
        !          1150: ** pointing to an entry (which can happen, for example, if
        !          1151: ** the database is empty) then *pSize is set to 0.
        !          1152: */
        !          1153: static int fileBtreeKeySize(BtCursor *pCur, int *pSize){
        !          1154:   Cell *pCell;
        !          1155:   MemPage *pPage;
        !          1156: 
        !          1157:   pPage = pCur->pPage;
        !          1158:   assert( pPage!=0 );
        !          1159:   if( pCur->idx >= pPage->nCell ){
        !          1160:     *pSize = 0;
        !          1161:   }else{
        !          1162:     pCell = pPage->apCell[pCur->idx];
        !          1163:     *pSize = NKEY(pCur->pBt, pCell->h);
        !          1164:   }
        !          1165:   return SQLITE_OK;
        !          1166: }
        !          1167: 
        !          1168: /*
        !          1169: ** Read payload information from the entry that the pCur cursor is
        !          1170: ** pointing to.  Begin reading the payload at "offset" and read
        !          1171: ** a total of "amt" bytes.  Put the result in zBuf.
        !          1172: **
        !          1173: ** This routine does not make a distinction between key and data.
        !          1174: ** It just reads bytes from the payload area.
        !          1175: */
        !          1176: static int getPayload(BtCursor *pCur, int offset, int amt, char *zBuf){
        !          1177:   char *aPayload;
        !          1178:   Pgno nextPage;
        !          1179:   int rc;
        !          1180:   Btree *pBt = pCur->pBt;
        !          1181:   assert( pCur!=0 && pCur->pPage!=0 );
        !          1182:   assert( pCur->idx>=0 && pCur->idx<pCur->pPage->nCell );
        !          1183:   aPayload = pCur->pPage->apCell[pCur->idx]->aPayload;
        !          1184:   if( offset<MX_LOCAL_PAYLOAD ){
        !          1185:     int a = amt;
        !          1186:     if( a+offset>MX_LOCAL_PAYLOAD ){
        !          1187:       a = MX_LOCAL_PAYLOAD - offset;
        !          1188:     }
        !          1189:     memcpy(zBuf, &aPayload[offset], a);
        !          1190:     if( a==amt ){
        !          1191:       return SQLITE_OK;
        !          1192:     }
        !          1193:     offset = 0;
        !          1194:     zBuf += a;
        !          1195:     amt -= a;
        !          1196:   }else{
        !          1197:     offset -= MX_LOCAL_PAYLOAD;
        !          1198:   }
        !          1199:   if( amt>0 ){
        !          1200:     nextPage = SWAB32(pBt, pCur->pPage->apCell[pCur->idx]->ovfl);
        !          1201:   }
        !          1202:   while( amt>0 && nextPage ){
        !          1203:     OverflowPage *pOvfl;
        !          1204:     rc = sqlitepager_get(pBt->pPager, nextPage, (void**)&pOvfl);
        !          1205:     if( rc!=0 ){
        !          1206:       return rc;
        !          1207:     }
        !          1208:     nextPage = SWAB32(pBt, pOvfl->iNext);
        !          1209:     if( offset<OVERFLOW_SIZE ){
        !          1210:       int a = amt;
        !          1211:       if( a + offset > OVERFLOW_SIZE ){
        !          1212:         a = OVERFLOW_SIZE - offset;
        !          1213:       }
        !          1214:       memcpy(zBuf, &pOvfl->aPayload[offset], a);
        !          1215:       offset = 0;
        !          1216:       amt -= a;
        !          1217:       zBuf += a;
        !          1218:     }else{
        !          1219:       offset -= OVERFLOW_SIZE;
        !          1220:     }
        !          1221:     sqlitepager_unref(pOvfl);
        !          1222:   }
        !          1223:   if( amt>0 ){
        !          1224:     return SQLITE_CORRUPT;
        !          1225:   }
        !          1226:   return SQLITE_OK;
        !          1227: }
        !          1228: 
        !          1229: /*
        !          1230: ** Read part of the key associated with cursor pCur.  A maximum
        !          1231: ** of "amt" bytes will be transfered into zBuf[].  The transfer
        !          1232: ** begins at "offset".  The number of bytes actually read is
        !          1233: ** returned. 
        !          1234: **
        !          1235: ** Change:  It used to be that the amount returned will be smaller
        !          1236: ** than the amount requested if there are not enough bytes in the key
        !          1237: ** to satisfy the request.  But now, it must be the case that there
        !          1238: ** is enough data available to satisfy the request.  If not, an exception
        !          1239: ** is raised.  The change was made in an effort to boost performance
        !          1240: ** by eliminating unneeded tests.
        !          1241: */
        !          1242: static int fileBtreeKey(BtCursor *pCur, int offset, int amt, char *zBuf){
        !          1243:   MemPage *pPage;
        !          1244: 
        !          1245:   assert( amt>=0 );
        !          1246:   assert( offset>=0 );
        !          1247:   assert( pCur->pPage!=0 );
        !          1248:   pPage = pCur->pPage;
        !          1249:   if( pCur->idx >= pPage->nCell ){
        !          1250:     return 0;
        !          1251:   }
        !          1252:   assert( amt+offset <= NKEY(pCur->pBt, pPage->apCell[pCur->idx]->h) );
        !          1253:   getPayload(pCur, offset, amt, zBuf);
        !          1254:   return amt;
        !          1255: }
        !          1256: 
        !          1257: /*
        !          1258: ** Set *pSize to the number of bytes of data in the entry the
        !          1259: ** cursor currently points to.  Always return SQLITE_OK.
        !          1260: ** Failure is not possible.  If the cursor is not currently
        !          1261: ** pointing to an entry (which can happen, for example, if
        !          1262: ** the database is empty) then *pSize is set to 0.
        !          1263: */
        !          1264: static int fileBtreeDataSize(BtCursor *pCur, int *pSize){
        !          1265:   Cell *pCell;
        !          1266:   MemPage *pPage;
        !          1267: 
        !          1268:   pPage = pCur->pPage;
        !          1269:   assert( pPage!=0 );
        !          1270:   if( pCur->idx >= pPage->nCell ){
        !          1271:     *pSize = 0;
        !          1272:   }else{
        !          1273:     pCell = pPage->apCell[pCur->idx];
        !          1274:     *pSize = NDATA(pCur->pBt, pCell->h);
        !          1275:   }
        !          1276:   return SQLITE_OK;
        !          1277: }
        !          1278: 
        !          1279: /*
        !          1280: ** Read part of the data associated with cursor pCur.  A maximum
        !          1281: ** of "amt" bytes will be transfered into zBuf[].  The transfer
        !          1282: ** begins at "offset".  The number of bytes actually read is
        !          1283: ** returned.  The amount returned will be smaller than the
        !          1284: ** amount requested if there are not enough bytes in the data
        !          1285: ** to satisfy the request.
        !          1286: */
        !          1287: static int fileBtreeData(BtCursor *pCur, int offset, int amt, char *zBuf){
        !          1288:   Cell *pCell;
        !          1289:   MemPage *pPage;
        !          1290: 
        !          1291:   assert( amt>=0 );
        !          1292:   assert( offset>=0 );
        !          1293:   assert( pCur->pPage!=0 );
        !          1294:   pPage = pCur->pPage;
        !          1295:   if( pCur->idx >= pPage->nCell ){
        !          1296:     return 0;
        !          1297:   }
        !          1298:   pCell = pPage->apCell[pCur->idx];
        !          1299:   assert( amt+offset <= NDATA(pCur->pBt, pCell->h) );
        !          1300:   getPayload(pCur, offset + NKEY(pCur->pBt, pCell->h), amt, zBuf);
        !          1301:   return amt;
        !          1302: }
        !          1303: 
        !          1304: /*
        !          1305: ** Compare an external key against the key on the entry that pCur points to.
        !          1306: **
        !          1307: ** The external key is pKey and is nKey bytes long.  The last nIgnore bytes
        !          1308: ** of the key associated with pCur are ignored, as if they do not exist.
        !          1309: ** (The normal case is for nIgnore to be zero in which case the entire
        !          1310: ** internal key is used in the comparison.)
        !          1311: **
        !          1312: ** The comparison result is written to *pRes as follows:
        !          1313: **
        !          1314: **    *pRes<0    This means pCur<pKey
        !          1315: **
        !          1316: **    *pRes==0   This means pCur==pKey for all nKey bytes
        !          1317: **
        !          1318: **    *pRes>0    This means pCur>pKey
        !          1319: **
        !          1320: ** When one key is an exact prefix of the other, the shorter key is
        !          1321: ** considered less than the longer one.  In order to be equal the
        !          1322: ** keys must be exactly the same length. (The length of the pCur key
        !          1323: ** is the actual key length minus nIgnore bytes.)
        !          1324: */
        !          1325: static int fileBtreeKeyCompare(
        !          1326:   BtCursor *pCur,       /* Pointer to entry to compare against */
        !          1327:   const void *pKey,     /* Key to compare against entry that pCur points to */
        !          1328:   int nKey,             /* Number of bytes in pKey */
        !          1329:   int nIgnore,          /* Ignore this many bytes at the end of pCur */
        !          1330:   int *pResult          /* Write the result here */
        !          1331: ){
        !          1332:   Pgno nextPage;
        !          1333:   int n, c, rc, nLocal;
        !          1334:   Cell *pCell;
        !          1335:   Btree *pBt = pCur->pBt;
        !          1336:   const char *zKey  = (const char*)pKey;
        !          1337: 
        !          1338:   assert( pCur->pPage );
        !          1339:   assert( pCur->idx>=0 && pCur->idx<pCur->pPage->nCell );
        !          1340:   pCell = pCur->pPage->apCell[pCur->idx];
        !          1341:   nLocal = NKEY(pBt, pCell->h) - nIgnore;
        !          1342:   if( nLocal<0 ) nLocal = 0;
        !          1343:   n = nKey<nLocal ? nKey : nLocal;
        !          1344:   if( n>MX_LOCAL_PAYLOAD ){
        !          1345:     n = MX_LOCAL_PAYLOAD;
        !          1346:   }
        !          1347:   c = memcmp(pCell->aPayload, zKey, n);
        !          1348:   if( c!=0 ){
        !          1349:     *pResult = c;
        !          1350:     return SQLITE_OK;
        !          1351:   }
        !          1352:   zKey += n;
        !          1353:   nKey -= n;
        !          1354:   nLocal -= n;
        !          1355:   nextPage = SWAB32(pBt, pCell->ovfl);
        !          1356:   while( nKey>0 && nLocal>0 ){
        !          1357:     OverflowPage *pOvfl;
        !          1358:     if( nextPage==0 ){
        !          1359:       return SQLITE_CORRUPT;
        !          1360:     }
        !          1361:     rc = sqlitepager_get(pBt->pPager, nextPage, (void**)&pOvfl);
        !          1362:     if( rc ){
        !          1363:       return rc;
        !          1364:     }
        !          1365:     nextPage = SWAB32(pBt, pOvfl->iNext);
        !          1366:     n = nKey<nLocal ? nKey : nLocal;
        !          1367:     if( n>OVERFLOW_SIZE ){
        !          1368:       n = OVERFLOW_SIZE;
        !          1369:     }
        !          1370:     c = memcmp(pOvfl->aPayload, zKey, n);
        !          1371:     sqlitepager_unref(pOvfl);
        !          1372:     if( c!=0 ){
        !          1373:       *pResult = c;
        !          1374:       return SQLITE_OK;
        !          1375:     }
        !          1376:     nKey -= n;
        !          1377:     nLocal -= n;
        !          1378:     zKey += n;
        !          1379:   }
        !          1380:   if( c==0 ){
        !          1381:     c = nLocal - nKey;
        !          1382:   }
        !          1383:   *pResult = c;
        !          1384:   return SQLITE_OK;
        !          1385: }
        !          1386: 
        !          1387: /*
        !          1388: ** Move the cursor down to a new child page.  The newPgno argument is the
        !          1389: ** page number of the child page in the byte order of the disk image.
        !          1390: */
        !          1391: static int moveToChild(BtCursor *pCur, int newPgno){
        !          1392:   int rc;
        !          1393:   MemPage *pNewPage;
        !          1394:   Btree *pBt = pCur->pBt;
        !          1395: 
        !          1396:   newPgno = SWAB32(pBt, newPgno);
        !          1397:   rc = sqlitepager_get(pBt->pPager, newPgno, (void**)&pNewPage);
        !          1398:   if( rc ) return rc;
        !          1399:   rc = initPage(pBt, pNewPage, newPgno, pCur->pPage);
        !          1400:   if( rc ) return rc;
        !          1401:   assert( pCur->idx>=pCur->pPage->nCell
        !          1402:           || pCur->pPage->apCell[pCur->idx]->h.leftChild==SWAB32(pBt,newPgno) );
        !          1403:   assert( pCur->idx<pCur->pPage->nCell
        !          1404:           || pCur->pPage->u.hdr.rightChild==SWAB32(pBt,newPgno) );
        !          1405:   pNewPage->idxParent = pCur->idx;
        !          1406:   pCur->pPage->idxShift = 0;
        !          1407:   sqlitepager_unref(pCur->pPage);
        !          1408:   pCur->pPage = pNewPage;
        !          1409:   pCur->idx = 0;
        !          1410:   if( pNewPage->nCell<1 ){
        !          1411:     return SQLITE_CORRUPT;
        !          1412:   }
        !          1413:   return SQLITE_OK;
        !          1414: }
        !          1415: 
        !          1416: /*
        !          1417: ** Move the cursor up to the parent page.
        !          1418: **
        !          1419: ** pCur->idx is set to the cell index that contains the pointer
        !          1420: ** to the page we are coming from.  If we are coming from the
        !          1421: ** right-most child page then pCur->idx is set to one more than
        !          1422: ** the largest cell index.
        !          1423: */
        !          1424: static void moveToParent(BtCursor *pCur){
        !          1425:   Pgno oldPgno;
        !          1426:   MemPage *pParent;
        !          1427:   MemPage *pPage;
        !          1428:   int idxParent;
        !          1429:   pPage = pCur->pPage;
        !          1430:   assert( pPage!=0 );
        !          1431:   pParent = pPage->pParent;
        !          1432:   assert( pParent!=0 );
        !          1433:   idxParent = pPage->idxParent;
        !          1434:   sqlitepager_ref(pParent);
        !          1435:   sqlitepager_unref(pPage);
        !          1436:   pCur->pPage = pParent;
        !          1437:   assert( pParent->idxShift==0 );
        !          1438:   if( pParent->idxShift==0 ){
        !          1439:     pCur->idx = idxParent;
        !          1440: #ifndef NDEBUG  
        !          1441:     /* Verify that pCur->idx is the correct index to point back to the child
        !          1442:     ** page we just came from 
        !          1443:     */
        !          1444:     oldPgno = SWAB32(pCur->pBt, sqlitepager_pagenumber(pPage));
        !          1445:     if( pCur->idx<pParent->nCell ){
        !          1446:       assert( pParent->apCell[idxParent]->h.leftChild==oldPgno );
        !          1447:     }else{
        !          1448:       assert( pParent->u.hdr.rightChild==oldPgno );
        !          1449:     }
        !          1450: #endif
        !          1451:   }else{
        !          1452:     /* The MemPage.idxShift flag indicates that cell indices might have 
        !          1453:     ** changed since idxParent was set and hence idxParent might be out
        !          1454:     ** of date.  So recompute the parent cell index by scanning all cells
        !          1455:     ** and locating the one that points to the child we just came from.
        !          1456:     */
        !          1457:     int i;
        !          1458:     pCur->idx = pParent->nCell;
        !          1459:     oldPgno = SWAB32(pCur->pBt, sqlitepager_pagenumber(pPage));
        !          1460:     for(i=0; i<pParent->nCell; i++){
        !          1461:       if( pParent->apCell[i]->h.leftChild==oldPgno ){
        !          1462:         pCur->idx = i;
        !          1463:         break;
        !          1464:       }
        !          1465:     }
        !          1466:   }
        !          1467: }
        !          1468: 
        !          1469: /*
        !          1470: ** Move the cursor to the root page
        !          1471: */
        !          1472: static int moveToRoot(BtCursor *pCur){
        !          1473:   MemPage *pNew;
        !          1474:   int rc;
        !          1475:   Btree *pBt = pCur->pBt;
        !          1476: 
        !          1477:   rc = sqlitepager_get(pBt->pPager, pCur->pgnoRoot, (void**)&pNew);
        !          1478:   if( rc ) return rc;
        !          1479:   rc = initPage(pBt, pNew, pCur->pgnoRoot, 0);
        !          1480:   if( rc ) return rc;
        !          1481:   sqlitepager_unref(pCur->pPage);
        !          1482:   pCur->pPage = pNew;
        !          1483:   pCur->idx = 0;
        !          1484:   return SQLITE_OK;
        !          1485: }
        !          1486: 
        !          1487: /*
        !          1488: ** Move the cursor down to the left-most leaf entry beneath the
        !          1489: ** entry to which it is currently pointing.
        !          1490: */
        !          1491: static int moveToLeftmost(BtCursor *pCur){
        !          1492:   Pgno pgno;
        !          1493:   int rc;
        !          1494: 
        !          1495:   while( (pgno = pCur->pPage->apCell[pCur->idx]->h.leftChild)!=0 ){
        !          1496:     rc = moveToChild(pCur, pgno);
        !          1497:     if( rc ) return rc;
        !          1498:   }
        !          1499:   return SQLITE_OK;
        !          1500: }
        !          1501: 
        !          1502: /*
        !          1503: ** Move the cursor down to the right-most leaf entry beneath the
        !          1504: ** page to which it is currently pointing.  Notice the difference
        !          1505: ** between moveToLeftmost() and moveToRightmost().  moveToLeftmost()
        !          1506: ** finds the left-most entry beneath the *entry* whereas moveToRightmost()
        !          1507: ** finds the right-most entry beneath the *page*.
        !          1508: */
        !          1509: static int moveToRightmost(BtCursor *pCur){
        !          1510:   Pgno pgno;
        !          1511:   int rc;
        !          1512: 
        !          1513:   while( (pgno = pCur->pPage->u.hdr.rightChild)!=0 ){
        !          1514:     pCur->idx = pCur->pPage->nCell;
        !          1515:     rc = moveToChild(pCur, pgno);
        !          1516:     if( rc ) return rc;
        !          1517:   }
        !          1518:   pCur->idx = pCur->pPage->nCell - 1;
        !          1519:   return SQLITE_OK;
        !          1520: }
        !          1521: 
        !          1522: /* Move the cursor to the first entry in the table.  Return SQLITE_OK
        !          1523: ** on success.  Set *pRes to 0 if the cursor actually points to something
        !          1524: ** or set *pRes to 1 if the table is empty.
        !          1525: */
        !          1526: static int fileBtreeFirst(BtCursor *pCur, int *pRes){
        !          1527:   int rc;
        !          1528:   if( pCur->pPage==0 ) return SQLITE_ABORT;
        !          1529:   rc = moveToRoot(pCur);
        !          1530:   if( rc ) return rc;
        !          1531:   if( pCur->pPage->nCell==0 ){
        !          1532:     *pRes = 1;
        !          1533:     return SQLITE_OK;
        !          1534:   }
        !          1535:   *pRes = 0;
        !          1536:   rc = moveToLeftmost(pCur);
        !          1537:   pCur->eSkip = SKIP_NONE;
        !          1538:   return rc;
        !          1539: }
        !          1540: 
        !          1541: /* Move the cursor to the last entry in the table.  Return SQLITE_OK
        !          1542: ** on success.  Set *pRes to 0 if the cursor actually points to something
        !          1543: ** or set *pRes to 1 if the table is empty.
        !          1544: */
        !          1545: static int fileBtreeLast(BtCursor *pCur, int *pRes){
        !          1546:   int rc;
        !          1547:   if( pCur->pPage==0 ) return SQLITE_ABORT;
        !          1548:   rc = moveToRoot(pCur);
        !          1549:   if( rc ) return rc;
        !          1550:   assert( pCur->pPage->isInit );
        !          1551:   if( pCur->pPage->nCell==0 ){
        !          1552:     *pRes = 1;
        !          1553:     return SQLITE_OK;
        !          1554:   }
        !          1555:   *pRes = 0;
        !          1556:   rc = moveToRightmost(pCur);
        !          1557:   pCur->eSkip = SKIP_NONE;
        !          1558:   return rc;
        !          1559: }
        !          1560: 
        !          1561: /* Move the cursor so that it points to an entry near pKey.
        !          1562: ** Return a success code.
        !          1563: **
        !          1564: ** If an exact match is not found, then the cursor is always
        !          1565: ** left pointing at a leaf page which would hold the entry if it
        !          1566: ** were present.  The cursor might point to an entry that comes
        !          1567: ** before or after the key.
        !          1568: **
        !          1569: ** The result of comparing the key with the entry to which the
        !          1570: ** cursor is left pointing is stored in pCur->iMatch.  The same
        !          1571: ** value is also written to *pRes if pRes!=NULL.  The meaning of
        !          1572: ** this value is as follows:
        !          1573: **
        !          1574: **     *pRes<0      The cursor is left pointing at an entry that
        !          1575: **                  is smaller than pKey or if the table is empty
        !          1576: **                  and the cursor is therefore left point to nothing.
        !          1577: **
        !          1578: **     *pRes==0     The cursor is left pointing at an entry that
        !          1579: **                  exactly matches pKey.
        !          1580: **
        !          1581: **     *pRes>0      The cursor is left pointing at an entry that
        !          1582: **                  is larger than pKey.
        !          1583: */
        !          1584: static
        !          1585: int fileBtreeMoveto(BtCursor *pCur, const void *pKey, int nKey, int *pRes){
        !          1586:   int rc;
        !          1587:   if( pCur->pPage==0 ) return SQLITE_ABORT;
        !          1588:   pCur->eSkip = SKIP_NONE;
        !          1589:   rc = moveToRoot(pCur);
        !          1590:   if( rc ) return rc;
        !          1591:   for(;;){
        !          1592:     int lwr, upr;
        !          1593:     Pgno chldPg;
        !          1594:     MemPage *pPage = pCur->pPage;
        !          1595:     int c = -1;  /* pRes return if table is empty must be -1 */
        !          1596:     lwr = 0;
        !          1597:     upr = pPage->nCell-1;
        !          1598:     while( lwr<=upr ){
        !          1599:       pCur->idx = (lwr+upr)/2;
        !          1600:       rc = fileBtreeKeyCompare(pCur, pKey, nKey, 0, &c);
        !          1601:       if( rc ) return rc;
        !          1602:       if( c==0 ){
        !          1603:         pCur->iMatch = c;
        !          1604:         if( pRes ) *pRes = 0;
        !          1605:         return SQLITE_OK;
        !          1606:       }
        !          1607:       if( c<0 ){
        !          1608:         lwr = pCur->idx+1;
        !          1609:       }else{
        !          1610:         upr = pCur->idx-1;
        !          1611:       }
        !          1612:     }
        !          1613:     assert( lwr==upr+1 );
        !          1614:     assert( pPage->isInit );
        !          1615:     if( lwr>=pPage->nCell ){
        !          1616:       chldPg = pPage->u.hdr.rightChild;
        !          1617:     }else{
        !          1618:       chldPg = pPage->apCell[lwr]->h.leftChild;
        !          1619:     }
        !          1620:     if( chldPg==0 ){
        !          1621:       pCur->iMatch = c;
        !          1622:       if( pRes ) *pRes = c;
        !          1623:       return SQLITE_OK;
        !          1624:     }
        !          1625:     pCur->idx = lwr;
        !          1626:     rc = moveToChild(pCur, chldPg);
        !          1627:     if( rc ) return rc;
        !          1628:   }
        !          1629:   /* NOT REACHED */
        !          1630: }
        !          1631: 
        !          1632: /*
        !          1633: ** Advance the cursor to the next entry in the database.  If
        !          1634: ** successful then set *pRes=0.  If the cursor
        !          1635: ** was already pointing to the last entry in the database before
        !          1636: ** this routine was called, then set *pRes=1.
        !          1637: */
        !          1638: static int fileBtreeNext(BtCursor *pCur, int *pRes){
        !          1639:   int rc;
        !          1640:   MemPage *pPage = pCur->pPage;
        !          1641:   assert( pRes!=0 );
        !          1642:   if( pPage==0 ){
        !          1643:     *pRes = 1;
        !          1644:     return SQLITE_ABORT;
        !          1645:   }
        !          1646:   assert( pPage->isInit );
        !          1647:   assert( pCur->eSkip!=SKIP_INVALID );
        !          1648:   if( pPage->nCell==0 ){
        !          1649:     *pRes = 1;
        !          1650:     return SQLITE_OK;
        !          1651:   }
        !          1652:   assert( pCur->idx<pPage->nCell );
        !          1653:   if( pCur->eSkip==SKIP_NEXT ){
        !          1654:     pCur->eSkip = SKIP_NONE;
        !          1655:     *pRes = 0;
        !          1656:     return SQLITE_OK;
        !          1657:   }
        !          1658:   pCur->eSkip = SKIP_NONE;
        !          1659:   pCur->idx++;
        !          1660:   if( pCur->idx>=pPage->nCell ){
        !          1661:     if( pPage->u.hdr.rightChild ){
        !          1662:       rc = moveToChild(pCur, pPage->u.hdr.rightChild);
        !          1663:       if( rc ) return rc;
        !          1664:       rc = moveToLeftmost(pCur);
        !          1665:       *pRes = 0;
        !          1666:       return rc;
        !          1667:     }
        !          1668:     do{
        !          1669:       if( pPage->pParent==0 ){
        !          1670:         *pRes = 1;
        !          1671:         return SQLITE_OK;
        !          1672:       }
        !          1673:       moveToParent(pCur);
        !          1674:       pPage = pCur->pPage;
        !          1675:     }while( pCur->idx>=pPage->nCell );
        !          1676:     *pRes = 0;
        !          1677:     return SQLITE_OK;
        !          1678:   }
        !          1679:   *pRes = 0;
        !          1680:   if( pPage->u.hdr.rightChild==0 ){
        !          1681:     return SQLITE_OK;
        !          1682:   }
        !          1683:   rc = moveToLeftmost(pCur);
        !          1684:   return rc;
        !          1685: }
        !          1686: 
        !          1687: /*
        !          1688: ** Step the cursor to the back to the previous entry in the database.  If
        !          1689: ** successful then set *pRes=0.  If the cursor
        !          1690: ** was already pointing to the first entry in the database before
        !          1691: ** this routine was called, then set *pRes=1.
        !          1692: */
        !          1693: static int fileBtreePrevious(BtCursor *pCur, int *pRes){
        !          1694:   int rc;
        !          1695:   Pgno pgno;
        !          1696:   MemPage *pPage;
        !          1697:   pPage = pCur->pPage;
        !          1698:   if( pPage==0 ){
        !          1699:     *pRes = 1;
        !          1700:     return SQLITE_ABORT;
        !          1701:   }
        !          1702:   assert( pPage->isInit );
        !          1703:   assert( pCur->eSkip!=SKIP_INVALID );
        !          1704:   if( pPage->nCell==0 ){
        !          1705:     *pRes = 1;
        !          1706:     return SQLITE_OK;
        !          1707:   }
        !          1708:   if( pCur->eSkip==SKIP_PREV ){
        !          1709:     pCur->eSkip = SKIP_NONE;
        !          1710:     *pRes = 0;
        !          1711:     return SQLITE_OK;
        !          1712:   }
        !          1713:   pCur->eSkip = SKIP_NONE;
        !          1714:   assert( pCur->idx>=0 );
        !          1715:   if( (pgno = pPage->apCell[pCur->idx]->h.leftChild)!=0 ){
        !          1716:     rc = moveToChild(pCur, pgno);
        !          1717:     if( rc ) return rc;
        !          1718:     rc = moveToRightmost(pCur);
        !          1719:   }else{
        !          1720:     while( pCur->idx==0 ){
        !          1721:       if( pPage->pParent==0 ){
        !          1722:         if( pRes ) *pRes = 1;
        !          1723:         return SQLITE_OK;
        !          1724:       }
        !          1725:       moveToParent(pCur);
        !          1726:       pPage = pCur->pPage;
        !          1727:     }
        !          1728:     pCur->idx--;
        !          1729:     rc = SQLITE_OK;
        !          1730:   }
        !          1731:   *pRes = 0;
        !          1732:   return rc;
        !          1733: }
        !          1734: 
        !          1735: /*
        !          1736: ** Allocate a new page from the database file.
        !          1737: **
        !          1738: ** The new page is marked as dirty.  (In other words, sqlitepager_write()
        !          1739: ** has already been called on the new page.)  The new page has also
        !          1740: ** been referenced and the calling routine is responsible for calling
        !          1741: ** sqlitepager_unref() on the new page when it is done.
        !          1742: **
        !          1743: ** SQLITE_OK is returned on success.  Any other return value indicates
        !          1744: ** an error.  *ppPage and *pPgno are undefined in the event of an error.
        !          1745: ** Do not invoke sqlitepager_unref() on *ppPage if an error is returned.
        !          1746: **
        !          1747: ** If the "nearby" parameter is not 0, then a (feeble) effort is made to 
        !          1748: ** locate a page close to the page number "nearby".  This can be used in an
        !          1749: ** attempt to keep related pages close to each other in the database file,
        !          1750: ** which in turn can make database access faster.
        !          1751: */
        !          1752: static int allocatePage(Btree *pBt, MemPage **ppPage, Pgno *pPgno, Pgno nearby){
        !          1753:   PageOne *pPage1 = pBt->page1;
        !          1754:   int rc;
        !          1755:   if( pPage1->freeList ){
        !          1756:     OverflowPage *pOvfl;
        !          1757:     FreelistInfo *pInfo;
        !          1758: 
        !          1759:     rc = sqlitepager_write(pPage1);
        !          1760:     if( rc ) return rc;
        !          1761:     SWAB_ADD(pBt, pPage1->nFree, -1);
        !          1762:     rc = sqlitepager_get(pBt->pPager, SWAB32(pBt, pPage1->freeList),
        !          1763:                         (void**)&pOvfl);
        !          1764:     if( rc ) return rc;
        !          1765:     rc = sqlitepager_write(pOvfl);
        !          1766:     if( rc ){
        !          1767:       sqlitepager_unref(pOvfl);
        !          1768:       return rc;
        !          1769:     }
        !          1770:     pInfo = (FreelistInfo*)pOvfl->aPayload;
        !          1771:     if( pInfo->nFree==0 ){
        !          1772:       *pPgno = SWAB32(pBt, pPage1->freeList);
        !          1773:       pPage1->freeList = pOvfl->iNext;
        !          1774:       *ppPage = (MemPage*)pOvfl;
        !          1775:     }else{
        !          1776:       int closest, n;
        !          1777:       n = SWAB32(pBt, pInfo->nFree);
        !          1778:       if( n>1 && nearby>0 ){
        !          1779:         int i, dist;
        !          1780:         closest = 0;
        !          1781:         dist = SWAB32(pBt, pInfo->aFree[0]) - nearby;
        !          1782:         if( dist<0 ) dist = -dist;
        !          1783:         for(i=1; i<n; i++){
        !          1784:           int d2 = SWAB32(pBt, pInfo->aFree[i]) - nearby;
        !          1785:           if( d2<0 ) d2 = -d2;
        !          1786:           if( d2<dist ) closest = i;
        !          1787:         }
        !          1788:       }else{
        !          1789:         closest = 0;
        !          1790:       }
        !          1791:       SWAB_ADD(pBt, pInfo->nFree, -1);
        !          1792:       *pPgno = SWAB32(pBt, pInfo->aFree[closest]);
        !          1793:       pInfo->aFree[closest] = pInfo->aFree[n-1];
        !          1794:       rc = sqlitepager_get(pBt->pPager, *pPgno, (void**)ppPage);
        !          1795:       sqlitepager_unref(pOvfl);
        !          1796:       if( rc==SQLITE_OK ){
        !          1797:         sqlitepager_dont_rollback(*ppPage);
        !          1798:         rc = sqlitepager_write(*ppPage);
        !          1799:       }
        !          1800:     }
        !          1801:   }else{
        !          1802:     *pPgno = sqlitepager_pagecount(pBt->pPager) + 1;
        !          1803:     rc = sqlitepager_get(pBt->pPager, *pPgno, (void**)ppPage);
        !          1804:     if( rc ) return rc;
        !          1805:     rc = sqlitepager_write(*ppPage);
        !          1806:   }
        !          1807:   return rc;
        !          1808: }
        !          1809: 
        !          1810: /*
        !          1811: ** Add a page of the database file to the freelist.  Either pgno or
        !          1812: ** pPage but not both may be 0. 
        !          1813: **
        !          1814: ** sqlitepager_unref() is NOT called for pPage.
        !          1815: */
        !          1816: static int freePage(Btree *pBt, void *pPage, Pgno pgno){
        !          1817:   PageOne *pPage1 = pBt->page1;
        !          1818:   OverflowPage *pOvfl = (OverflowPage*)pPage;
        !          1819:   int rc;
        !          1820:   int needUnref = 0;
        !          1821:   MemPage *pMemPage;
        !          1822: 
        !          1823:   if( pgno==0 ){
        !          1824:     assert( pOvfl!=0 );
        !          1825:     pgno = sqlitepager_pagenumber(pOvfl);
        !          1826:   }
        !          1827:   assert( pgno>2 );
        !          1828:   assert( sqlitepager_pagenumber(pOvfl)==pgno );
        !          1829:   pMemPage = (MemPage*)pPage;
        !          1830:   pMemPage->isInit = 0;
        !          1831:   if( pMemPage->pParent ){
        !          1832:     sqlitepager_unref(pMemPage->pParent);
        !          1833:     pMemPage->pParent = 0;
        !          1834:   }
        !          1835:   rc = sqlitepager_write(pPage1);
        !          1836:   if( rc ){
        !          1837:     return rc;
        !          1838:   }
        !          1839:   SWAB_ADD(pBt, pPage1->nFree, 1);
        !          1840:   if( pPage1->nFree!=0 && pPage1->freeList!=0 ){
        !          1841:     OverflowPage *pFreeIdx;
        !          1842:     rc = sqlitepager_get(pBt->pPager, SWAB32(pBt, pPage1->freeList),
        !          1843:                         (void**)&pFreeIdx);
        !          1844:     if( rc==SQLITE_OK ){
        !          1845:       FreelistInfo *pInfo = (FreelistInfo*)pFreeIdx->aPayload;
        !          1846:       int n = SWAB32(pBt, pInfo->nFree);
        !          1847:       if( n<(sizeof(pInfo->aFree)/sizeof(pInfo->aFree[0])) ){
        !          1848:         rc = sqlitepager_write(pFreeIdx);
        !          1849:         if( rc==SQLITE_OK ){
        !          1850:           pInfo->aFree[n] = SWAB32(pBt, pgno);
        !          1851:           SWAB_ADD(pBt, pInfo->nFree, 1);
        !          1852:           sqlitepager_unref(pFreeIdx);
        !          1853:           sqlitepager_dont_write(pBt->pPager, pgno);
        !          1854:           return rc;
        !          1855:         }
        !          1856:       }
        !          1857:       sqlitepager_unref(pFreeIdx);
        !          1858:     }
        !          1859:   }
        !          1860:   if( pOvfl==0 ){
        !          1861:     assert( pgno>0 );
        !          1862:     rc = sqlitepager_get(pBt->pPager, pgno, (void**)&pOvfl);
        !          1863:     if( rc ) return rc;
        !          1864:     needUnref = 1;
        !          1865:   }
        !          1866:   rc = sqlitepager_write(pOvfl);
        !          1867:   if( rc ){
        !          1868:     if( needUnref ) sqlitepager_unref(pOvfl);
        !          1869:     return rc;
        !          1870:   }
        !          1871:   pOvfl->iNext = pPage1->freeList;
        !          1872:   pPage1->freeList = SWAB32(pBt, pgno);
        !          1873:   memset(pOvfl->aPayload, 0, OVERFLOW_SIZE);
        !          1874:   if( needUnref ) rc = sqlitepager_unref(pOvfl);
        !          1875:   return rc;
        !          1876: }
        !          1877: 
        !          1878: /*
        !          1879: ** Erase all the data out of a cell.  This involves returning overflow
        !          1880: ** pages back the freelist.
        !          1881: */
        !          1882: static int clearCell(Btree *pBt, Cell *pCell){
        !          1883:   Pager *pPager = pBt->pPager;
        !          1884:   OverflowPage *pOvfl;
        !          1885:   Pgno ovfl, nextOvfl;
        !          1886:   int rc;
        !          1887: 
        !          1888:   if( NKEY(pBt, pCell->h) + NDATA(pBt, pCell->h) <= MX_LOCAL_PAYLOAD ){
        !          1889:     return SQLITE_OK;
        !          1890:   }
        !          1891:   ovfl = SWAB32(pBt, pCell->ovfl);
        !          1892:   pCell->ovfl = 0;
        !          1893:   while( ovfl ){
        !          1894:     rc = sqlitepager_get(pPager, ovfl, (void**)&pOvfl);
        !          1895:     if( rc ) return rc;
        !          1896:     nextOvfl = SWAB32(pBt, pOvfl->iNext);
        !          1897:     rc = freePage(pBt, pOvfl, ovfl);
        !          1898:     if( rc ) return rc;
        !          1899:     sqlitepager_unref(pOvfl);
        !          1900:     ovfl = nextOvfl;
        !          1901:   }
        !          1902:   return SQLITE_OK;
        !          1903: }
        !          1904: 
        !          1905: /*
        !          1906: ** Create a new cell from key and data.  Overflow pages are allocated as
        !          1907: ** necessary and linked to this cell.  
        !          1908: */
        !          1909: static int fillInCell(
        !          1910:   Btree *pBt,              /* The whole Btree.  Needed to allocate pages */
        !          1911:   Cell *pCell,             /* Populate this Cell structure */
        !          1912:   const void *pKey, int nKey,    /* The key */
        !          1913:   const void *pData,int nData    /* The data */
        !          1914: ){
        !          1915:   OverflowPage *pOvfl, *pPrior;
        !          1916:   Pgno *pNext;
        !          1917:   int spaceLeft;
        !          1918:   int n, rc;
        !          1919:   int nPayload;
        !          1920:   const char *pPayload;
        !          1921:   char *pSpace;
        !          1922:   Pgno nearby = 0;
        !          1923: 
        !          1924:   pCell->h.leftChild = 0;
        !          1925:   pCell->h.nKey = SWAB16(pBt, nKey & 0xffff);
        !          1926:   pCell->h.nKeyHi = nKey >> 16;
        !          1927:   pCell->h.nData = SWAB16(pBt, nData & 0xffff);
        !          1928:   pCell->h.nDataHi = nData >> 16;
        !          1929:   pCell->h.iNext = 0;
        !          1930: 
        !          1931:   pNext = &pCell->ovfl;
        !          1932:   pSpace = pCell->aPayload;
        !          1933:   spaceLeft = MX_LOCAL_PAYLOAD;
        !          1934:   pPayload = pKey;
        !          1935:   pKey = 0;
        !          1936:   nPayload = nKey;
        !          1937:   pPrior = 0;
        !          1938:   while( nPayload>0 ){
        !          1939:     if( spaceLeft==0 ){
        !          1940:       rc = allocatePage(pBt, (MemPage**)&pOvfl, pNext, nearby);
        !          1941:       if( rc ){
        !          1942:         *pNext = 0;
        !          1943:       }else{
        !          1944:         nearby = *pNext;
        !          1945:       }
        !          1946:       if( pPrior ) sqlitepager_unref(pPrior);
        !          1947:       if( rc ){
        !          1948:         clearCell(pBt, pCell);
        !          1949:         return rc;
        !          1950:       }
        !          1951:       if( pBt->needSwab ) *pNext = swab32(*pNext);
        !          1952:       pPrior = pOvfl;
        !          1953:       spaceLeft = OVERFLOW_SIZE;
        !          1954:       pSpace = pOvfl->aPayload;
        !          1955:       pNext = &pOvfl->iNext;
        !          1956:     }
        !          1957:     n = nPayload;
        !          1958:     if( n>spaceLeft ) n = spaceLeft;
        !          1959:     memcpy(pSpace, pPayload, n);
        !          1960:     nPayload -= n;
        !          1961:     if( nPayload==0 && pData ){
        !          1962:       pPayload = pData;
        !          1963:       nPayload = nData;
        !          1964:       pData = 0;
        !          1965:     }else{
        !          1966:       pPayload += n;
        !          1967:     }
        !          1968:     spaceLeft -= n;
        !          1969:     pSpace += n;
        !          1970:   }
        !          1971:   *pNext = 0;
        !          1972:   if( pPrior ){
        !          1973:     sqlitepager_unref(pPrior);
        !          1974:   }
        !          1975:   return SQLITE_OK;
        !          1976: }
        !          1977: 
        !          1978: /*
        !          1979: ** Change the MemPage.pParent pointer on the page whose number is
        !          1980: ** given in the second argument so that MemPage.pParent holds the
        !          1981: ** pointer in the third argument.
        !          1982: */
        !          1983: static void reparentPage(Pager *pPager, Pgno pgno, MemPage *pNewParent,int idx){
        !          1984:   MemPage *pThis;
        !          1985: 
        !          1986:   if( pgno==0 ) return;
        !          1987:   assert( pPager!=0 );
        !          1988:   pThis = sqlitepager_lookup(pPager, pgno);
        !          1989:   if( pThis && pThis->isInit ){
        !          1990:     if( pThis->pParent!=pNewParent ){
        !          1991:       if( pThis->pParent ) sqlitepager_unref(pThis->pParent);
        !          1992:       pThis->pParent = pNewParent;
        !          1993:       if( pNewParent ) sqlitepager_ref(pNewParent);
        !          1994:     }
        !          1995:     pThis->idxParent = idx;
        !          1996:     sqlitepager_unref(pThis);
        !          1997:   }
        !          1998: }
        !          1999: 
        !          2000: /*
        !          2001: ** Reparent all children of the given page to be the given page.
        !          2002: ** In other words, for every child of pPage, invoke reparentPage()
        !          2003: ** to make sure that each child knows that pPage is its parent.
        !          2004: **
        !          2005: ** This routine gets called after you memcpy() one page into
        !          2006: ** another.
        !          2007: */
        !          2008: static void reparentChildPages(Btree *pBt, MemPage *pPage){
        !          2009:   int i;
        !          2010:   Pager *pPager = pBt->pPager;
        !          2011:   for(i=0; i<pPage->nCell; i++){
        !          2012:     reparentPage(pPager, SWAB32(pBt, pPage->apCell[i]->h.leftChild), pPage, i);
        !          2013:   }
        !          2014:   reparentPage(pPager, SWAB32(pBt, pPage->u.hdr.rightChild), pPage, i);
        !          2015:   pPage->idxShift = 0;
        !          2016: }
        !          2017: 
        !          2018: /*
        !          2019: ** Remove the i-th cell from pPage.  This routine effects pPage only.
        !          2020: ** The cell content is not freed or deallocated.  It is assumed that
        !          2021: ** the cell content has been copied someplace else.  This routine just
        !          2022: ** removes the reference to the cell from pPage.
        !          2023: **
        !          2024: ** "sz" must be the number of bytes in the cell.
        !          2025: **
        !          2026: ** Do not bother maintaining the integrity of the linked list of Cells.
        !          2027: ** Only the pPage->apCell[] array is important.  The relinkCellList() 
        !          2028: ** routine will be called soon after this routine in order to rebuild 
        !          2029: ** the linked list.
        !          2030: */
        !          2031: static void dropCell(Btree *pBt, MemPage *pPage, int idx, int sz){
        !          2032:   int j;
        !          2033:   assert( idx>=0 && idx<pPage->nCell );
        !          2034:   assert( sz==cellSize(pBt, pPage->apCell[idx]) );
        !          2035:   assert( sqlitepager_iswriteable(pPage) );
        !          2036:   freeSpace(pBt, pPage, Addr(pPage->apCell[idx]) - Addr(pPage), sz);
        !          2037:   for(j=idx; j<pPage->nCell-1; j++){
        !          2038:     pPage->apCell[j] = pPage->apCell[j+1];
        !          2039:   }
        !          2040:   pPage->nCell--;
        !          2041:   pPage->idxShift = 1;
        !          2042: }
        !          2043: 
        !          2044: /*
        !          2045: ** Insert a new cell on pPage at cell index "i".  pCell points to the
        !          2046: ** content of the cell.
        !          2047: **
        !          2048: ** If the cell content will fit on the page, then put it there.  If it
        !          2049: ** will not fit, then just make pPage->apCell[i] point to the content
        !          2050: ** and set pPage->isOverfull.  
        !          2051: **
        !          2052: ** Do not bother maintaining the integrity of the linked list of Cells.
        !          2053: ** Only the pPage->apCell[] array is important.  The relinkCellList() 
        !          2054: ** routine will be called soon after this routine in order to rebuild 
        !          2055: ** the linked list.
        !          2056: */
        !          2057: static void insertCell(Btree *pBt, MemPage *pPage, int i, Cell *pCell, int sz){
        !          2058:   int idx, j;
        !          2059:   assert( i>=0 && i<=pPage->nCell );
        !          2060:   assert( sz==cellSize(pBt, pCell) );
        !          2061:   assert( sqlitepager_iswriteable(pPage) );
        !          2062:   idx = allocateSpace(pBt, pPage, sz);
        !          2063:   for(j=pPage->nCell; j>i; j--){
        !          2064:     pPage->apCell[j] = pPage->apCell[j-1];
        !          2065:   }
        !          2066:   pPage->nCell++;
        !          2067:   if( idx<=0 ){
        !          2068:     pPage->isOverfull = 1;
        !          2069:     pPage->apCell[i] = pCell;
        !          2070:   }else{
        !          2071:     memcpy(&pPage->u.aDisk[idx], pCell, sz);
        !          2072:     pPage->apCell[i] = (Cell*)&pPage->u.aDisk[idx];
        !          2073:   }
        !          2074:   pPage->idxShift = 1;
        !          2075: }
        !          2076: 
        !          2077: /*
        !          2078: ** Rebuild the linked list of cells on a page so that the cells
        !          2079: ** occur in the order specified by the pPage->apCell[] array.  
        !          2080: ** Invoke this routine once to repair damage after one or more
        !          2081: ** invocations of either insertCell() or dropCell().
        !          2082: */
        !          2083: static void relinkCellList(Btree *pBt, MemPage *pPage){
        !          2084:   int i;
        !          2085:   u16 *pIdx;
        !          2086:   assert( sqlitepager_iswriteable(pPage) );
        !          2087:   pIdx = &pPage->u.hdr.firstCell;
        !          2088:   for(i=0; i<pPage->nCell; i++){
        !          2089:     int idx = Addr(pPage->apCell[i]) - Addr(pPage);
        !          2090:     assert( idx>0 && idx<SQLITE_USABLE_SIZE );
        !          2091:     *pIdx = SWAB16(pBt, idx);
        !          2092:     pIdx = &pPage->apCell[i]->h.iNext;
        !          2093:   }
        !          2094:   *pIdx = 0;
        !          2095: }
        !          2096: 
        !          2097: /*
        !          2098: ** Make a copy of the contents of pFrom into pTo.  The pFrom->apCell[]
        !          2099: ** pointers that point into pFrom->u.aDisk[] must be adjusted to point
        !          2100: ** into pTo->u.aDisk[] instead.  But some pFrom->apCell[] entries might
        !          2101: ** not point to pFrom->u.aDisk[].  Those are unchanged.
        !          2102: */
        !          2103: static void copyPage(MemPage *pTo, MemPage *pFrom){
        !          2104:   uptr from, to;
        !          2105:   int i;
        !          2106:   memcpy(pTo->u.aDisk, pFrom->u.aDisk, SQLITE_USABLE_SIZE);
        !          2107:   pTo->pParent = 0;
        !          2108:   pTo->isInit = 1;
        !          2109:   pTo->nCell = pFrom->nCell;
        !          2110:   pTo->nFree = pFrom->nFree;
        !          2111:   pTo->isOverfull = pFrom->isOverfull;
        !          2112:   to = Addr(pTo);
        !          2113:   from = Addr(pFrom);
        !          2114:   for(i=0; i<pTo->nCell; i++){
        !          2115:     uptr x = Addr(pFrom->apCell[i]);
        !          2116:     if( x>from && x<from+SQLITE_USABLE_SIZE ){
        !          2117:       *((uptr*)&pTo->apCell[i]) = x + to - from;
        !          2118:     }else{
        !          2119:       pTo->apCell[i] = pFrom->apCell[i];
        !          2120:     }
        !          2121:   }
        !          2122: }
        !          2123: 
        !          2124: /*
        !          2125: ** The following parameters determine how many adjacent pages get involved
        !          2126: ** in a balancing operation.  NN is the number of neighbors on either side
        !          2127: ** of the page that participate in the balancing operation.  NB is the
        !          2128: ** total number of pages that participate, including the target page and
        !          2129: ** NN neighbors on either side.
        !          2130: **
        !          2131: ** The minimum value of NN is 1 (of course).  Increasing NN above 1
        !          2132: ** (to 2 or 3) gives a modest improvement in SELECT and DELETE performance
        !          2133: ** in exchange for a larger degradation in INSERT and UPDATE performance.
        !          2134: ** The value of NN appears to give the best results overall.
        !          2135: */
        !          2136: #define NN 1             /* Number of neighbors on either side of pPage */
        !          2137: #define NB (NN*2+1)      /* Total pages involved in the balance */
        !          2138: 
        !          2139: /*
        !          2140: ** This routine redistributes Cells on pPage and up to two siblings
        !          2141: ** of pPage so that all pages have about the same amount of free space.
        !          2142: ** Usually one sibling on either side of pPage is used in the balancing,
        !          2143: ** though both siblings might come from one side if pPage is the first
        !          2144: ** or last child of its parent.  If pPage has fewer than two siblings
        !          2145: ** (something which can only happen if pPage is the root page or a 
        !          2146: ** child of root) then all available siblings participate in the balancing.
        !          2147: **
        !          2148: ** The number of siblings of pPage might be increased or decreased by
        !          2149: ** one in an effort to keep pages between 66% and 100% full. The root page
        !          2150: ** is special and is allowed to be less than 66% full. If pPage is 
        !          2151: ** the root page, then the depth of the tree might be increased
        !          2152: ** or decreased by one, as necessary, to keep the root page from being
        !          2153: ** overfull or empty.
        !          2154: **
        !          2155: ** This routine calls relinkCellList() on its input page regardless of
        !          2156: ** whether or not it does any real balancing.  Client routines will typically
        !          2157: ** invoke insertCell() or dropCell() before calling this routine, so we
        !          2158: ** need to call relinkCellList() to clean up the mess that those other
        !          2159: ** routines left behind.
        !          2160: **
        !          2161: ** pCur is left pointing to the same cell as when this routine was called
        !          2162: ** even if that cell gets moved to a different page.  pCur may be NULL.
        !          2163: ** Set the pCur parameter to NULL if you do not care about keeping track
        !          2164: ** of a cell as that will save this routine the work of keeping track of it.
        !          2165: **
        !          2166: ** Note that when this routine is called, some of the Cells on pPage
        !          2167: ** might not actually be stored in pPage->u.aDisk[].  This can happen
        !          2168: ** if the page is overfull.  Part of the job of this routine is to
        !          2169: ** make sure all Cells for pPage once again fit in pPage->u.aDisk[].
        !          2170: **
        !          2171: ** In the course of balancing the siblings of pPage, the parent of pPage
        !          2172: ** might become overfull or underfull.  If that happens, then this routine
        !          2173: ** is called recursively on the parent.
        !          2174: **
        !          2175: ** If this routine fails for any reason, it might leave the database
        !          2176: ** in a corrupted state.  So if this routine fails, the database should
        !          2177: ** be rolled back.
        !          2178: */
        !          2179: static int balance(Btree *pBt, MemPage *pPage, BtCursor *pCur){
        !          2180:   MemPage *pParent;            /* The parent of pPage */
        !          2181:   int nCell;                   /* Number of cells in apCell[] */
        !          2182:   int nOld;                    /* Number of pages in apOld[] */
        !          2183:   int nNew;                    /* Number of pages in apNew[] */
        !          2184:   int nDiv;                    /* Number of cells in apDiv[] */
        !          2185:   int i, j, k;                 /* Loop counters */
        !          2186:   int idx;                     /* Index of pPage in pParent->apCell[] */
        !          2187:   int nxDiv;                   /* Next divider slot in pParent->apCell[] */
        !          2188:   int rc;                      /* The return code */
        !          2189:   int iCur;                    /* apCell[iCur] is the cell of the cursor */
        !          2190:   MemPage *pOldCurPage;        /* The cursor originally points to this page */
        !          2191:   int subtotal;                /* Subtotal of bytes in cells on one page */
        !          2192:   MemPage *extraUnref = 0;     /* A page that needs to be unref-ed */
        !          2193:   MemPage *apOld[NB];          /* pPage and up to two siblings */
        !          2194:   Pgno pgnoOld[NB];            /* Page numbers for each page in apOld[] */
        !          2195:   MemPage *apNew[NB+1];        /* pPage and up to NB siblings after balancing */
        !          2196:   Pgno pgnoNew[NB+1];          /* Page numbers for each page in apNew[] */
        !          2197:   int idxDiv[NB];              /* Indices of divider cells in pParent */
        !          2198:   Cell *apDiv[NB];             /* Divider cells in pParent */
        !          2199:   Cell aTemp[NB];              /* Temporary holding area for apDiv[] */
        !          2200:   int cntNew[NB+1];            /* Index in apCell[] of cell after i-th page */
        !          2201:   int szNew[NB+1];             /* Combined size of cells place on i-th page */
        !          2202:   MemPage aOld[NB];            /* Temporary copies of pPage and its siblings */
        !          2203:   Cell *apCell[(MX_CELL+2)*NB]; /* All cells from pages being balanced */
        !          2204:   int szCell[(MX_CELL+2)*NB];  /* Local size of all cells */
        !          2205: 
        !          2206:   /* 
        !          2207:   ** Return without doing any work if pPage is neither overfull nor
        !          2208:   ** underfull.
        !          2209:   */
        !          2210:   assert( sqlitepager_iswriteable(pPage) );
        !          2211:   if( !pPage->isOverfull && pPage->nFree<SQLITE_USABLE_SIZE/2 
        !          2212:         && pPage->nCell>=2){
        !          2213:     relinkCellList(pBt, pPage);
        !          2214:     return SQLITE_OK;
        !          2215:   }
        !          2216: 
        !          2217:   /*
        !          2218:   ** Find the parent of the page to be balanceed.
        !          2219:   ** If there is no parent, it means this page is the root page and
        !          2220:   ** special rules apply.
        !          2221:   */
        !          2222:   pParent = pPage->pParent;
        !          2223:   if( pParent==0 ){
        !          2224:     Pgno pgnoChild;
        !          2225:     MemPage *pChild;
        !          2226:     assert( pPage->isInit );
        !          2227:     if( pPage->nCell==0 ){
        !          2228:       if( pPage->u.hdr.rightChild ){
        !          2229:         /*
        !          2230:         ** The root page is empty.  Copy the one child page
        !          2231:         ** into the root page and return.  This reduces the depth
        !          2232:         ** of the BTree by one.
        !          2233:         */
        !          2234:         pgnoChild = SWAB32(pBt, pPage->u.hdr.rightChild);
        !          2235:         rc = sqlitepager_get(pBt->pPager, pgnoChild, (void**)&pChild);
        !          2236:         if( rc ) return rc;
        !          2237:         memcpy(pPage, pChild, SQLITE_USABLE_SIZE);
        !          2238:         pPage->isInit = 0;
        !          2239:         rc = initPage(pBt, pPage, sqlitepager_pagenumber(pPage), 0);
        !          2240:         assert( rc==SQLITE_OK );
        !          2241:         reparentChildPages(pBt, pPage);
        !          2242:         if( pCur && pCur->pPage==pChild ){
        !          2243:           sqlitepager_unref(pChild);
        !          2244:           pCur->pPage = pPage;
        !          2245:           sqlitepager_ref(pPage);
        !          2246:         }
        !          2247:         freePage(pBt, pChild, pgnoChild);
        !          2248:         sqlitepager_unref(pChild);
        !          2249:       }else{
        !          2250:         relinkCellList(pBt, pPage);
        !          2251:       }
        !          2252:       return SQLITE_OK;
        !          2253:     }
        !          2254:     if( !pPage->isOverfull ){
        !          2255:       /* It is OK for the root page to be less than half full.
        !          2256:       */
        !          2257:       relinkCellList(pBt, pPage);
        !          2258:       return SQLITE_OK;
        !          2259:     }
        !          2260:     /*
        !          2261:     ** If we get to here, it means the root page is overfull.
        !          2262:     ** When this happens, Create a new child page and copy the
        !          2263:     ** contents of the root into the child.  Then make the root
        !          2264:     ** page an empty page with rightChild pointing to the new
        !          2265:     ** child.  Then fall thru to the code below which will cause
        !          2266:     ** the overfull child page to be split.
        !          2267:     */
        !          2268:     rc = sqlitepager_write(pPage);
        !          2269:     if( rc ) return rc;
        !          2270:     rc = allocatePage(pBt, &pChild, &pgnoChild, sqlitepager_pagenumber(pPage));
        !          2271:     if( rc ) return rc;
        !          2272:     assert( sqlitepager_iswriteable(pChild) );
        !          2273:     copyPage(pChild, pPage);
        !          2274:     pChild->pParent = pPage;
        !          2275:     pChild->idxParent = 0;
        !          2276:     sqlitepager_ref(pPage);
        !          2277:     pChild->isOverfull = 1;
        !          2278:     if( pCur && pCur->pPage==pPage ){
        !          2279:       sqlitepager_unref(pPage);
        !          2280:       pCur->pPage = pChild;
        !          2281:     }else{
        !          2282:       extraUnref = pChild;
        !          2283:     }
        !          2284:     zeroPage(pBt, pPage);
        !          2285:     pPage->u.hdr.rightChild = SWAB32(pBt, pgnoChild);
        !          2286:     pParent = pPage;
        !          2287:     pPage = pChild;
        !          2288:   }
        !          2289:   rc = sqlitepager_write(pParent);
        !          2290:   if( rc ) return rc;
        !          2291:   assert( pParent->isInit );
        !          2292:   
        !          2293:   /*
        !          2294:   ** Find the Cell in the parent page whose h.leftChild points back
        !          2295:   ** to pPage.  The "idx" variable is the index of that cell.  If pPage
        !          2296:   ** is the rightmost child of pParent then set idx to pParent->nCell 
        !          2297:   */
        !          2298:   if( pParent->idxShift ){
        !          2299:     Pgno pgno, swabPgno;
        !          2300:     pgno = sqlitepager_pagenumber(pPage);
        !          2301:     swabPgno = SWAB32(pBt, pgno);
        !          2302:     for(idx=0; idx<pParent->nCell; idx++){
        !          2303:       if( pParent->apCell[idx]->h.leftChild==swabPgno ){
        !          2304:         break;
        !          2305:       }
        !          2306:     }
        !          2307:     assert( idx<pParent->nCell || pParent->u.hdr.rightChild==swabPgno );
        !          2308:   }else{
        !          2309:     idx = pPage->idxParent;
        !          2310:   }
        !          2311: 
        !          2312:   /*
        !          2313:   ** Initialize variables so that it will be safe to jump
        !          2314:   ** directly to balance_cleanup at any moment.
        !          2315:   */
        !          2316:   nOld = nNew = 0;
        !          2317:   sqlitepager_ref(pParent);
        !          2318: 
        !          2319:   /*
        !          2320:   ** Find sibling pages to pPage and the Cells in pParent that divide
        !          2321:   ** the siblings.  An attempt is made to find NN siblings on either
        !          2322:   ** side of pPage.  More siblings are taken from one side, however, if
        !          2323:   ** pPage there are fewer than NN siblings on the other side.  If pParent
        !          2324:   ** has NB or fewer children then all children of pParent are taken.
        !          2325:   */
        !          2326:   nxDiv = idx - NN;
        !          2327:   if( nxDiv + NB > pParent->nCell ){
        !          2328:     nxDiv = pParent->nCell - NB + 1;
        !          2329:   }
        !          2330:   if( nxDiv<0 ){
        !          2331:     nxDiv = 0;
        !          2332:   }
        !          2333:   nDiv = 0;
        !          2334:   for(i=0, k=nxDiv; i<NB; i++, k++){
        !          2335:     if( k<pParent->nCell ){
        !          2336:       idxDiv[i] = k;
        !          2337:       apDiv[i] = pParent->apCell[k];
        !          2338:       nDiv++;
        !          2339:       pgnoOld[i] = SWAB32(pBt, apDiv[i]->h.leftChild);
        !          2340:     }else if( k==pParent->nCell ){
        !          2341:       pgnoOld[i] = SWAB32(pBt, pParent->u.hdr.rightChild);
        !          2342:     }else{
        !          2343:       break;
        !          2344:     }
        !          2345:     rc = sqlitepager_get(pBt->pPager, pgnoOld[i], (void**)&apOld[i]);
        !          2346:     if( rc ) goto balance_cleanup;
        !          2347:     rc = initPage(pBt, apOld[i], pgnoOld[i], pParent);
        !          2348:     if( rc ) goto balance_cleanup;
        !          2349:     apOld[i]->idxParent = k;
        !          2350:     nOld++;
        !          2351:   }
        !          2352: 
        !          2353:   /*
        !          2354:   ** Set iCur to be the index in apCell[] of the cell that the cursor
        !          2355:   ** is pointing to.  We will need this later on in order to keep the
        !          2356:   ** cursor pointing at the same cell.  If pCur points to a page that
        !          2357:   ** has no involvement with this rebalancing, then set iCur to a large
        !          2358:   ** number so that the iCur==j tests always fail in the main cell
        !          2359:   ** distribution loop below.
        !          2360:   */
        !          2361:   if( pCur ){
        !          2362:     iCur = 0;
        !          2363:     for(i=0; i<nOld; i++){
        !          2364:       if( pCur->pPage==apOld[i] ){
        !          2365:         iCur += pCur->idx;
        !          2366:         break;
        !          2367:       }
        !          2368:       iCur += apOld[i]->nCell;
        !          2369:       if( i<nOld-1 && pCur->pPage==pParent && pCur->idx==idxDiv[i] ){
        !          2370:         break;
        !          2371:       }
        !          2372:       iCur++;
        !          2373:     }
        !          2374:     pOldCurPage = pCur->pPage;
        !          2375:   }
        !          2376: 
        !          2377:   /*
        !          2378:   ** Make copies of the content of pPage and its siblings into aOld[].
        !          2379:   ** The rest of this function will use data from the copies rather
        !          2380:   ** that the original pages since the original pages will be in the
        !          2381:   ** process of being overwritten.
        !          2382:   */
        !          2383:   for(i=0; i<nOld; i++){
        !          2384:     copyPage(&aOld[i], apOld[i]);
        !          2385:   }
        !          2386: 
        !          2387:   /*
        !          2388:   ** Load pointers to all cells on sibling pages and the divider cells
        !          2389:   ** into the local apCell[] array.  Make copies of the divider cells
        !          2390:   ** into aTemp[] and remove the the divider Cells from pParent.
        !          2391:   */
        !          2392:   nCell = 0;
        !          2393:   for(i=0; i<nOld; i++){
        !          2394:     MemPage *pOld = &aOld[i];
        !          2395:     for(j=0; j<pOld->nCell; j++){
        !          2396:       apCell[nCell] = pOld->apCell[j];
        !          2397:       szCell[nCell] = cellSize(pBt, apCell[nCell]);
        !          2398:       nCell++;
        !          2399:     }
        !          2400:     if( i<nOld-1 ){
        !          2401:       szCell[nCell] = cellSize(pBt, apDiv[i]);
        !          2402:       memcpy(&aTemp[i], apDiv[i], szCell[nCell]);
        !          2403:       apCell[nCell] = &aTemp[i];
        !          2404:       dropCell(pBt, pParent, nxDiv, szCell[nCell]);
        !          2405:       assert( SWAB32(pBt, apCell[nCell]->h.leftChild)==pgnoOld[i] );
        !          2406:       apCell[nCell]->h.leftChild = pOld->u.hdr.rightChild;
        !          2407:       nCell++;
        !          2408:     }
        !          2409:   }
        !          2410: 
        !          2411:   /*
        !          2412:   ** Figure out the number of pages needed to hold all nCell cells.
        !          2413:   ** Store this number in "k".  Also compute szNew[] which is the total
        !          2414:   ** size of all cells on the i-th page and cntNew[] which is the index
        !          2415:   ** in apCell[] of the cell that divides path i from path i+1.  
        !          2416:   ** cntNew[k] should equal nCell.
        !          2417:   **
        !          2418:   ** This little patch of code is critical for keeping the tree
        !          2419:   ** balanced. 
        !          2420:   */
        !          2421:   for(subtotal=k=i=0; i<nCell; i++){
        !          2422:     subtotal += szCell[i];
        !          2423:     if( subtotal > USABLE_SPACE ){
        !          2424:       szNew[k] = subtotal - szCell[i];
        !          2425:       cntNew[k] = i;
        !          2426:       subtotal = 0;
        !          2427:       k++;
        !          2428:     }
        !          2429:   }
        !          2430:   szNew[k] = subtotal;
        !          2431:   cntNew[k] = nCell;
        !          2432:   k++;
        !          2433:   for(i=k-1; i>0; i--){
        !          2434:     while( szNew[i]<USABLE_SPACE/2 ){
        !          2435:       cntNew[i-1]--;
        !          2436:       assert( cntNew[i-1]>0 );
        !          2437:       szNew[i] += szCell[cntNew[i-1]];
        !          2438:       szNew[i-1] -= szCell[cntNew[i-1]-1];
        !          2439:     }
        !          2440:   }
        !          2441:   assert( cntNew[0]>0 );
        !          2442: 
        !          2443:   /*
        !          2444:   ** Allocate k new pages.  Reuse old pages where possible.
        !          2445:   */
        !          2446:   for(i=0; i<k; i++){
        !          2447:     if( i<nOld ){
        !          2448:       apNew[i] = apOld[i];
        !          2449:       pgnoNew[i] = pgnoOld[i];
        !          2450:       apOld[i] = 0;
        !          2451:       sqlitepager_write(apNew[i]);
        !          2452:     }else{
        !          2453:       rc = allocatePage(pBt, &apNew[i], &pgnoNew[i], pgnoNew[i-1]);
        !          2454:       if( rc ) goto balance_cleanup;
        !          2455:     }
        !          2456:     nNew++;
        !          2457:     zeroPage(pBt, apNew[i]);
        !          2458:     apNew[i]->isInit = 1;
        !          2459:   }
        !          2460: 
        !          2461:   /* Free any old pages that were not reused as new pages.
        !          2462:   */
        !          2463:   while( i<nOld ){
        !          2464:     rc = freePage(pBt, apOld[i], pgnoOld[i]);
        !          2465:     if( rc ) goto balance_cleanup;
        !          2466:     sqlitepager_unref(apOld[i]);
        !          2467:     apOld[i] = 0;
        !          2468:     i++;
        !          2469:   }
        !          2470: 
        !          2471:   /*
        !          2472:   ** Put the new pages in accending order.  This helps to
        !          2473:   ** keep entries in the disk file in order so that a scan
        !          2474:   ** of the table is a linear scan through the file.  That
        !          2475:   ** in turn helps the operating system to deliver pages
        !          2476:   ** from the disk more rapidly.
        !          2477:   **
        !          2478:   ** An O(n^2) insertion sort algorithm is used, but since
        !          2479:   ** n is never more than NB (a small constant), that should
        !          2480:   ** not be a problem.
        !          2481:   **
        !          2482:   ** When NB==3, this one optimization makes the database
        !          2483:   ** about 25% faster for large insertions and deletions.
        !          2484:   */
        !          2485:   for(i=0; i<k-1; i++){
        !          2486:     int minV = pgnoNew[i];
        !          2487:     int minI = i;
        !          2488:     for(j=i+1; j<k; j++){
        !          2489:       if( pgnoNew[j]<(unsigned)minV ){
        !          2490:         minI = j;
        !          2491:         minV = pgnoNew[j];
        !          2492:       }
        !          2493:     }
        !          2494:     if( minI>i ){
        !          2495:       int t;
        !          2496:       MemPage *pT;
        !          2497:       t = pgnoNew[i];
        !          2498:       pT = apNew[i];
        !          2499:       pgnoNew[i] = pgnoNew[minI];
        !          2500:       apNew[i] = apNew[minI];
        !          2501:       pgnoNew[minI] = t;
        !          2502:       apNew[minI] = pT;
        !          2503:     }
        !          2504:   }
        !          2505: 
        !          2506:   /*
        !          2507:   ** Evenly distribute the data in apCell[] across the new pages.
        !          2508:   ** Insert divider cells into pParent as necessary.
        !          2509:   */
        !          2510:   j = 0;
        !          2511:   for(i=0; i<nNew; i++){
        !          2512:     MemPage *pNew = apNew[i];
        !          2513:     while( j<cntNew[i] ){
        !          2514:       assert( pNew->nFree>=szCell[j] );
        !          2515:       if( pCur && iCur==j ){ pCur->pPage = pNew; pCur->idx = pNew->nCell; }
        !          2516:       insertCell(pBt, pNew, pNew->nCell, apCell[j], szCell[j]);
        !          2517:       j++;
        !          2518:     }
        !          2519:     assert( pNew->nCell>0 );
        !          2520:     assert( !pNew->isOverfull );
        !          2521:     relinkCellList(pBt, pNew);
        !          2522:     if( i<nNew-1 && j<nCell ){
        !          2523:       pNew->u.hdr.rightChild = apCell[j]->h.leftChild;
        !          2524:       apCell[j]->h.leftChild = SWAB32(pBt, pgnoNew[i]);
        !          2525:       if( pCur && iCur==j ){ pCur->pPage = pParent; pCur->idx = nxDiv; }
        !          2526:       insertCell(pBt, pParent, nxDiv, apCell[j], szCell[j]);
        !          2527:       j++;
        !          2528:       nxDiv++;
        !          2529:     }
        !          2530:   }
        !          2531:   assert( j==nCell );
        !          2532:   apNew[nNew-1]->u.hdr.rightChild = aOld[nOld-1].u.hdr.rightChild;
        !          2533:   if( nxDiv==pParent->nCell ){
        !          2534:     pParent->u.hdr.rightChild = SWAB32(pBt, pgnoNew[nNew-1]);
        !          2535:   }else{
        !          2536:     pParent->apCell[nxDiv]->h.leftChild = SWAB32(pBt, pgnoNew[nNew-1]);
        !          2537:   }
        !          2538:   if( pCur ){
        !          2539:     if( j<=iCur && pCur->pPage==pParent && pCur->idx>idxDiv[nOld-1] ){
        !          2540:       assert( pCur->pPage==pOldCurPage );
        !          2541:       pCur->idx += nNew - nOld;
        !          2542:     }else{
        !          2543:       assert( pOldCurPage!=0 );
        !          2544:       sqlitepager_ref(pCur->pPage);
        !          2545:       sqlitepager_unref(pOldCurPage);
        !          2546:     }
        !          2547:   }
        !          2548: 
        !          2549:   /*
        !          2550:   ** Reparent children of all cells.
        !          2551:   */
        !          2552:   for(i=0; i<nNew; i++){
        !          2553:     reparentChildPages(pBt, apNew[i]);
        !          2554:   }
        !          2555:   reparentChildPages(pBt, pParent);
        !          2556: 
        !          2557:   /*
        !          2558:   ** balance the parent page.
        !          2559:   */
        !          2560:   rc = balance(pBt, pParent, pCur);
        !          2561: 
        !          2562:   /*
        !          2563:   ** Cleanup before returning.
        !          2564:   */
        !          2565: balance_cleanup:
        !          2566:   if( extraUnref ){
        !          2567:     sqlitepager_unref(extraUnref);
        !          2568:   }
        !          2569:   for(i=0; i<nOld; i++){
        !          2570:     if( apOld[i]!=0 && apOld[i]!=&aOld[i] ) sqlitepager_unref(apOld[i]);
        !          2571:   }
        !          2572:   for(i=0; i<nNew; i++){
        !          2573:     sqlitepager_unref(apNew[i]);
        !          2574:   }
        !          2575:   if( pCur && pCur->pPage==0 ){
        !          2576:     pCur->pPage = pParent;
        !          2577:     pCur->idx = 0;
        !          2578:   }else{
        !          2579:     sqlitepager_unref(pParent);
        !          2580:   }
        !          2581:   return rc;
        !          2582: }
        !          2583: 
        !          2584: /*
        !          2585: ** This routine checks all cursors that point to the same table
        !          2586: ** as pCur points to.  If any of those cursors were opened with
        !          2587: ** wrFlag==0 then this routine returns SQLITE_LOCKED.  If all
        !          2588: ** cursors point to the same table were opened with wrFlag==1
        !          2589: ** then this routine returns SQLITE_OK.
        !          2590: **
        !          2591: ** In addition to checking for read-locks (where a read-lock 
        !          2592: ** means a cursor opened with wrFlag==0) this routine also moves
        !          2593: ** all cursors other than pCur so that they are pointing to the 
        !          2594: ** first Cell on root page.  This is necessary because an insert 
        !          2595: ** or delete might change the number of cells on a page or delete
        !          2596: ** a page entirely and we do not want to leave any cursors 
        !          2597: ** pointing to non-existant pages or cells.
        !          2598: */
        !          2599: static int checkReadLocks(BtCursor *pCur){
        !          2600:   BtCursor *p;
        !          2601:   assert( pCur->wrFlag );
        !          2602:   for(p=pCur->pShared; p!=pCur; p=p->pShared){
        !          2603:     assert( p );
        !          2604:     assert( p->pgnoRoot==pCur->pgnoRoot );
        !          2605:     if( p->wrFlag==0 ) return SQLITE_LOCKED;
        !          2606:     if( sqlitepager_pagenumber(p->pPage)!=p->pgnoRoot ){
        !          2607:       moveToRoot(p);
        !          2608:     }
        !          2609:   }
        !          2610:   return SQLITE_OK;
        !          2611: }
        !          2612: 
        !          2613: /*
        !          2614: ** Insert a new record into the BTree.  The key is given by (pKey,nKey)
        !          2615: ** and the data is given by (pData,nData).  The cursor is used only to
        !          2616: ** define what database the record should be inserted into.  The cursor
        !          2617: ** is left pointing at the new record.
        !          2618: */
        !          2619: static int fileBtreeInsert(
        !          2620:   BtCursor *pCur,                /* Insert data into the table of this cursor */
        !          2621:   const void *pKey, int nKey,    /* The key of the new record */
        !          2622:   const void *pData, int nData   /* The data of the new record */
        !          2623: ){
        !          2624:   Cell newCell;
        !          2625:   int rc;
        !          2626:   int loc;
        !          2627:   int szNew;
        !          2628:   MemPage *pPage;
        !          2629:   Btree *pBt = pCur->pBt;
        !          2630: 
        !          2631:   if( pCur->pPage==0 ){
        !          2632:     return SQLITE_ABORT;  /* A rollback destroyed this cursor */
        !          2633:   }
        !          2634:   if( !pBt->inTrans || nKey+nData==0 ){
        !          2635:     /* Must start a transaction before doing an insert */
        !          2636:     return pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR;
        !          2637:   }
        !          2638:   assert( !pBt->readOnly );
        !          2639:   if( !pCur->wrFlag ){
        !          2640:     return SQLITE_PERM;   /* Cursor not open for writing */
        !          2641:   }
        !          2642:   if( checkReadLocks(pCur) ){
        !          2643:     return SQLITE_LOCKED; /* The table pCur points to has a read lock */
        !          2644:   }
        !          2645:   rc = fileBtreeMoveto(pCur, pKey, nKey, &loc);
        !          2646:   if( rc ) return rc;
        !          2647:   pPage = pCur->pPage;
        !          2648:   assert( pPage->isInit );
        !          2649:   rc = sqlitepager_write(pPage);
        !          2650:   if( rc ) return rc;
        !          2651:   rc = fillInCell(pBt, &newCell, pKey, nKey, pData, nData);
        !          2652:   if( rc ) return rc;
        !          2653:   szNew = cellSize(pBt, &newCell);
        !          2654:   if( loc==0 ){
        !          2655:     newCell.h.leftChild = pPage->apCell[pCur->idx]->h.leftChild;
        !          2656:     rc = clearCell(pBt, pPage->apCell[pCur->idx]);
        !          2657:     if( rc ) return rc;
        !          2658:     dropCell(pBt, pPage, pCur->idx, cellSize(pBt, pPage->apCell[pCur->idx]));
        !          2659:   }else if( loc<0 && pPage->nCell>0 ){
        !          2660:     assert( pPage->u.hdr.rightChild==0 );  /* Must be a leaf page */
        !          2661:     pCur->idx++;
        !          2662:   }else{
        !          2663:     assert( pPage->u.hdr.rightChild==0 );  /* Must be a leaf page */
        !          2664:   }
        !          2665:   insertCell(pBt, pPage, pCur->idx, &newCell, szNew);
        !          2666:   rc = balance(pCur->pBt, pPage, pCur);
        !          2667:   /* sqliteBtreePageDump(pCur->pBt, pCur->pgnoRoot, 1); */
        !          2668:   /* fflush(stdout); */
        !          2669:   pCur->eSkip = SKIP_INVALID;
        !          2670:   return rc;
        !          2671: }
        !          2672: 
        !          2673: /*
        !          2674: ** Delete the entry that the cursor is pointing to.
        !          2675: **
        !          2676: ** The cursor is left pointing at either the next or the previous
        !          2677: ** entry.  If the cursor is left pointing to the next entry, then 
        !          2678: ** the pCur->eSkip flag is set to SKIP_NEXT which forces the next call to 
        !          2679: ** sqliteBtreeNext() to be a no-op.  That way, you can always call
        !          2680: ** sqliteBtreeNext() after a delete and the cursor will be left
        !          2681: ** pointing to the first entry after the deleted entry.  Similarly,
        !          2682: ** pCur->eSkip is set to SKIP_PREV is the cursor is left pointing to
        !          2683: ** the entry prior to the deleted entry so that a subsequent call to
        !          2684: ** sqliteBtreePrevious() will always leave the cursor pointing at the
        !          2685: ** entry immediately before the one that was deleted.
        !          2686: */
        !          2687: static int fileBtreeDelete(BtCursor *pCur){
        !          2688:   MemPage *pPage = pCur->pPage;
        !          2689:   Cell *pCell;
        !          2690:   int rc;
        !          2691:   Pgno pgnoChild;
        !          2692:   Btree *pBt = pCur->pBt;
        !          2693: 
        !          2694:   assert( pPage->isInit );
        !          2695:   if( pCur->pPage==0 ){
        !          2696:     return SQLITE_ABORT;  /* A rollback destroyed this cursor */
        !          2697:   }
        !          2698:   if( !pBt->inTrans ){
        !          2699:     /* Must start a transaction before doing a delete */
        !          2700:     return pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR;
        !          2701:   }
        !          2702:   assert( !pBt->readOnly );
        !          2703:   if( pCur->idx >= pPage->nCell ){
        !          2704:     return SQLITE_ERROR;  /* The cursor is not pointing to anything */
        !          2705:   }
        !          2706:   if( !pCur->wrFlag ){
        !          2707:     return SQLITE_PERM;   /* Did not open this cursor for writing */
        !          2708:   }
        !          2709:   if( checkReadLocks(pCur) ){
        !          2710:     return SQLITE_LOCKED; /* The table pCur points to has a read lock */
        !          2711:   }
        !          2712:   rc = sqlitepager_write(pPage);
        !          2713:   if( rc ) return rc;
        !          2714:   pCell = pPage->apCell[pCur->idx];
        !          2715:   pgnoChild = SWAB32(pBt, pCell->h.leftChild);
        !          2716:   clearCell(pBt, pCell);
        !          2717:   if( pgnoChild ){
        !          2718:     /*
        !          2719:     ** The entry we are about to delete is not a leaf so if we do not
        !          2720:     ** do something we will leave a hole on an internal page.
        !          2721:     ** We have to fill the hole by moving in a cell from a leaf.  The
        !          2722:     ** next Cell after the one to be deleted is guaranteed to exist and
        !          2723:     ** to be a leaf so we can use it.
        !          2724:     */
        !          2725:     BtCursor leafCur;
        !          2726:     Cell *pNext;
        !          2727:     int szNext;
        !          2728:     int notUsed;
        !          2729:     getTempCursor(pCur, &leafCur);
        !          2730:     rc = fileBtreeNext(&leafCur, &notUsed);
        !          2731:     if( rc!=SQLITE_OK ){
        !          2732:       if( rc!=SQLITE_NOMEM ) rc = SQLITE_CORRUPT;
        !          2733:       return rc;
        !          2734:     }
        !          2735:     rc = sqlitepager_write(leafCur.pPage);
        !          2736:     if( rc ) return rc;
        !          2737:     dropCell(pBt, pPage, pCur->idx, cellSize(pBt, pCell));
        !          2738:     pNext = leafCur.pPage->apCell[leafCur.idx];
        !          2739:     szNext = cellSize(pBt, pNext);
        !          2740:     pNext->h.leftChild = SWAB32(pBt, pgnoChild);
        !          2741:     insertCell(pBt, pPage, pCur->idx, pNext, szNext);
        !          2742:     rc = balance(pBt, pPage, pCur);
        !          2743:     if( rc ) return rc;
        !          2744:     pCur->eSkip = SKIP_NEXT;
        !          2745:     dropCell(pBt, leafCur.pPage, leafCur.idx, szNext);
        !          2746:     rc = balance(pBt, leafCur.pPage, pCur);
        !          2747:     releaseTempCursor(&leafCur);
        !          2748:   }else{
        !          2749:     dropCell(pBt, pPage, pCur->idx, cellSize(pBt, pCell));
        !          2750:     if( pCur->idx>=pPage->nCell ){
        !          2751:       pCur->idx = pPage->nCell-1;
        !          2752:       if( pCur->idx<0 ){ 
        !          2753:         pCur->idx = 0;
        !          2754:         pCur->eSkip = SKIP_NEXT;
        !          2755:       }else{
        !          2756:         pCur->eSkip = SKIP_PREV;
        !          2757:       }
        !          2758:     }else{
        !          2759:       pCur->eSkip = SKIP_NEXT;
        !          2760:     }
        !          2761:     rc = balance(pBt, pPage, pCur);
        !          2762:   }
        !          2763:   return rc;
        !          2764: }
        !          2765: 
        !          2766: /*
        !          2767: ** Create a new BTree table.  Write into *piTable the page
        !          2768: ** number for the root page of the new table.
        !          2769: **
        !          2770: ** In the current implementation, BTree tables and BTree indices are the 
        !          2771: ** the same.  In the future, we may change this so that BTree tables
        !          2772: ** are restricted to having a 4-byte integer key and arbitrary data and
        !          2773: ** BTree indices are restricted to having an arbitrary key and no data.
        !          2774: ** But for now, this routine also serves to create indices.
        !          2775: */
        !          2776: static int fileBtreeCreateTable(Btree *pBt, int *piTable){
        !          2777:   MemPage *pRoot;
        !          2778:   Pgno pgnoRoot;
        !          2779:   int rc;
        !          2780:   if( !pBt->inTrans ){
        !          2781:     /* Must start a transaction first */
        !          2782:     return pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR;
        !          2783:   }
        !          2784:   if( pBt->readOnly ){
        !          2785:     return SQLITE_READONLY;
        !          2786:   }
        !          2787:   rc = allocatePage(pBt, &pRoot, &pgnoRoot, 0);
        !          2788:   if( rc ) return rc;
        !          2789:   assert( sqlitepager_iswriteable(pRoot) );
        !          2790:   zeroPage(pBt, pRoot);
        !          2791:   sqlitepager_unref(pRoot);
        !          2792:   *piTable = (int)pgnoRoot;
        !          2793:   return SQLITE_OK;
        !          2794: }
        !          2795: 
        !          2796: /*
        !          2797: ** Erase the given database page and all its children.  Return
        !          2798: ** the page to the freelist.
        !          2799: */
        !          2800: static int clearDatabasePage(Btree *pBt, Pgno pgno, int freePageFlag){
        !          2801:   MemPage *pPage;
        !          2802:   int rc;
        !          2803:   Cell *pCell;
        !          2804:   int idx;
        !          2805: 
        !          2806:   rc = sqlitepager_get(pBt->pPager, pgno, (void**)&pPage);
        !          2807:   if( rc ) return rc;
        !          2808:   rc = sqlitepager_write(pPage);
        !          2809:   if( rc ) return rc;
        !          2810:   rc = initPage(pBt, pPage, pgno, 0);
        !          2811:   if( rc ) return rc;
        !          2812:   idx = SWAB16(pBt, pPage->u.hdr.firstCell);
        !          2813:   while( idx>0 ){
        !          2814:     pCell = (Cell*)&pPage->u.aDisk[idx];
        !          2815:     idx = SWAB16(pBt, pCell->h.iNext);
        !          2816:     if( pCell->h.leftChild ){
        !          2817:       rc = clearDatabasePage(pBt, SWAB32(pBt, pCell->h.leftChild), 1);
        !          2818:       if( rc ) return rc;
        !          2819:     }
        !          2820:     rc = clearCell(pBt, pCell);
        !          2821:     if( rc ) return rc;
        !          2822:   }
        !          2823:   if( pPage->u.hdr.rightChild ){
        !          2824:     rc = clearDatabasePage(pBt, SWAB32(pBt, pPage->u.hdr.rightChild), 1);
        !          2825:     if( rc ) return rc;
        !          2826:   }
        !          2827:   if( freePageFlag ){
        !          2828:     rc = freePage(pBt, pPage, pgno);
        !          2829:   }else{
        !          2830:     zeroPage(pBt, pPage);
        !          2831:   }
        !          2832:   sqlitepager_unref(pPage);
        !          2833:   return rc;
        !          2834: }
        !          2835: 
        !          2836: /*
        !          2837: ** Delete all information from a single table in the database.
        !          2838: */
        !          2839: static int fileBtreeClearTable(Btree *pBt, int iTable){
        !          2840:   int rc;
        !          2841:   BtCursor *pCur;
        !          2842:   if( !pBt->inTrans ){
        !          2843:     return pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR;
        !          2844:   }
        !          2845:   for(pCur=pBt->pCursor; pCur; pCur=pCur->pNext){
        !          2846:     if( pCur->pgnoRoot==(Pgno)iTable ){
        !          2847:       if( pCur->wrFlag==0 ) return SQLITE_LOCKED;
        !          2848:       moveToRoot(pCur);
        !          2849:     }
        !          2850:   }
        !          2851:   rc = clearDatabasePage(pBt, (Pgno)iTable, 0);
        !          2852:   if( rc ){
        !          2853:     fileBtreeRollback(pBt);
        !          2854:   }
        !          2855:   return rc;
        !          2856: }
        !          2857: 
        !          2858: /*
        !          2859: ** Erase all information in a table and add the root of the table to
        !          2860: ** the freelist.  Except, the root of the principle table (the one on
        !          2861: ** page 2) is never added to the freelist.
        !          2862: */
        !          2863: static int fileBtreeDropTable(Btree *pBt, int iTable){
        !          2864:   int rc;
        !          2865:   MemPage *pPage;
        !          2866:   BtCursor *pCur;
        !          2867:   if( !pBt->inTrans ){
        !          2868:     return pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR;
        !          2869:   }
        !          2870:   for(pCur=pBt->pCursor; pCur; pCur=pCur->pNext){
        !          2871:     if( pCur->pgnoRoot==(Pgno)iTable ){
        !          2872:       return SQLITE_LOCKED;  /* Cannot drop a table that has a cursor */
        !          2873:     }
        !          2874:   }
        !          2875:   rc = sqlitepager_get(pBt->pPager, (Pgno)iTable, (void**)&pPage);
        !          2876:   if( rc ) return rc;
        !          2877:   rc = fileBtreeClearTable(pBt, iTable);
        !          2878:   if( rc ) return rc;
        !          2879:   if( iTable>2 ){
        !          2880:     rc = freePage(pBt, pPage, iTable);
        !          2881:   }else{
        !          2882:     zeroPage(pBt, pPage);
        !          2883:   }
        !          2884:   sqlitepager_unref(pPage);
        !          2885:   return rc;  
        !          2886: }
        !          2887: 
        !          2888: #if 0 /* UNTESTED */
        !          2889: /*
        !          2890: ** Copy all cell data from one database file into another.
        !          2891: ** pages back the freelist.
        !          2892: */
        !          2893: static int copyCell(Btree *pBtFrom, BTree *pBtTo, Cell *pCell){
        !          2894:   Pager *pFromPager = pBtFrom->pPager;
        !          2895:   OverflowPage *pOvfl;
        !          2896:   Pgno ovfl, nextOvfl;
        !          2897:   Pgno *pPrev;
        !          2898:   int rc = SQLITE_OK;
        !          2899:   MemPage *pNew, *pPrevPg;
        !          2900:   Pgno new;
        !          2901: 
        !          2902:   if( NKEY(pBtTo, pCell->h) + NDATA(pBtTo, pCell->h) <= MX_LOCAL_PAYLOAD ){
        !          2903:     return SQLITE_OK;
        !          2904:   }
        !          2905:   pPrev = &pCell->ovfl;
        !          2906:   pPrevPg = 0;
        !          2907:   ovfl = SWAB32(pBtTo, pCell->ovfl);
        !          2908:   while( ovfl && rc==SQLITE_OK ){
        !          2909:     rc = sqlitepager_get(pFromPager, ovfl, (void**)&pOvfl);
        !          2910:     if( rc ) return rc;
        !          2911:     nextOvfl = SWAB32(pBtFrom, pOvfl->iNext);
        !          2912:     rc = allocatePage(pBtTo, &pNew, &new, 0);
        !          2913:     if( rc==SQLITE_OK ){
        !          2914:       rc = sqlitepager_write(pNew);
        !          2915:       if( rc==SQLITE_OK ){
        !          2916:         memcpy(pNew, pOvfl, SQLITE_USABLE_SIZE);
        !          2917:         *pPrev = SWAB32(pBtTo, new);
        !          2918:         if( pPrevPg ){
        !          2919:           sqlitepager_unref(pPrevPg);
        !          2920:         }
        !          2921:         pPrev = &pOvfl->iNext;
        !          2922:         pPrevPg = pNew;
        !          2923:       }
        !          2924:     }
        !          2925:     sqlitepager_unref(pOvfl);
        !          2926:     ovfl = nextOvfl;
        !          2927:   }
        !          2928:   if( pPrevPg ){
        !          2929:     sqlitepager_unref(pPrevPg);
        !          2930:   }
        !          2931:   return rc;
        !          2932: }
        !          2933: #endif
        !          2934: 
        !          2935: 
        !          2936: #if 0 /* UNTESTED */
        !          2937: /*
        !          2938: ** Copy a page of data from one database over to another.
        !          2939: */
        !          2940: static int copyDatabasePage(
        !          2941:   Btree *pBtFrom,
        !          2942:   Pgno pgnoFrom,
        !          2943:   Btree *pBtTo,
        !          2944:   Pgno *pTo
        !          2945: ){
        !          2946:   MemPage *pPageFrom, *pPage;
        !          2947:   Pgno to;
        !          2948:   int rc;
        !          2949:   Cell *pCell;
        !          2950:   int idx;
        !          2951: 
        !          2952:   rc = sqlitepager_get(pBtFrom->pPager, pgno, (void**)&pPageFrom);
        !          2953:   if( rc ) return rc;
        !          2954:   rc = allocatePage(pBt, &pPage, pTo, 0);
        !          2955:   if( rc==SQLITE_OK ){
        !          2956:     rc = sqlitepager_write(pPage);
        !          2957:   }
        !          2958:   if( rc==SQLITE_OK ){
        !          2959:     memcpy(pPage, pPageFrom, SQLITE_USABLE_SIZE);
        !          2960:     idx = SWAB16(pBt, pPage->u.hdr.firstCell);
        !          2961:     while( idx>0 ){
        !          2962:       pCell = (Cell*)&pPage->u.aDisk[idx];
        !          2963:       idx = SWAB16(pBt, pCell->h.iNext);
        !          2964:       if( pCell->h.leftChild ){
        !          2965:         Pgno newChld;
        !          2966:         rc = copyDatabasePage(pBtFrom, SWAB32(pBtFrom, pCell->h.leftChild),
        !          2967:                               pBtTo, &newChld);
        !          2968:         if( rc ) return rc;
        !          2969:         pCell->h.leftChild = SWAB32(pBtFrom, newChld);
        !          2970:       }
        !          2971:       rc = copyCell(pBtFrom, pBtTo, pCell);
        !          2972:       if( rc ) return rc;
        !          2973:     }
        !          2974:     if( pPage->u.hdr.rightChild ){
        !          2975:       Pgno newChld;
        !          2976:       rc = copyDatabasePage(pBtFrom, SWAB32(pBtFrom, pPage->u.hdr.rightChild), 
        !          2977:                             pBtTo, &newChld);
        !          2978:       if( rc ) return rc;
        !          2979:       pPage->u.hdr.rightChild = SWAB32(pBtTo, newChild);
        !          2980:     }
        !          2981:   }
        !          2982:   sqlitepager_unref(pPage);
        !          2983:   return rc;
        !          2984: }
        !          2985: #endif
        !          2986: 
        !          2987: /*
        !          2988: ** Read the meta-information out of a database file.
        !          2989: */
        !          2990: static int fileBtreeGetMeta(Btree *pBt, int *aMeta){
        !          2991:   PageOne *pP1;
        !          2992:   int rc;
        !          2993:   int i;
        !          2994: 
        !          2995:   rc = sqlitepager_get(pBt->pPager, 1, (void**)&pP1);
        !          2996:   if( rc ) return rc;
        !          2997:   aMeta[0] = SWAB32(pBt, pP1->nFree);
        !          2998:   for(i=0; i<sizeof(pP1->aMeta)/sizeof(pP1->aMeta[0]); i++){
        !          2999:     aMeta[i+1] = SWAB32(pBt, pP1->aMeta[i]);
        !          3000:   }
        !          3001:   sqlitepager_unref(pP1);
        !          3002:   return SQLITE_OK;
        !          3003: }
        !          3004: 
        !          3005: /*
        !          3006: ** Write meta-information back into the database.
        !          3007: */
        !          3008: static int fileBtreeUpdateMeta(Btree *pBt, int *aMeta){
        !          3009:   PageOne *pP1;
        !          3010:   int rc, i;
        !          3011:   if( !pBt->inTrans ){
        !          3012:     return pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR;
        !          3013:   }
        !          3014:   pP1 = pBt->page1;
        !          3015:   rc = sqlitepager_write(pP1);
        !          3016:   if( rc ) return rc;   
        !          3017:   for(i=0; i<sizeof(pP1->aMeta)/sizeof(pP1->aMeta[0]); i++){
        !          3018:     pP1->aMeta[i] = SWAB32(pBt, aMeta[i+1]);
        !          3019:   }
        !          3020:   return SQLITE_OK;
        !          3021: }
        !          3022: 
        !          3023: /******************************************************************************
        !          3024: ** The complete implementation of the BTree subsystem is above this line.
        !          3025: ** All the code the follows is for testing and troubleshooting the BTree
        !          3026: ** subsystem.  None of the code that follows is used during normal operation.
        !          3027: ******************************************************************************/
        !          3028: 
        !          3029: /*
        !          3030: ** Print a disassembly of the given page on standard output.  This routine
        !          3031: ** is used for debugging and testing only.
        !          3032: */
        !          3033: #ifdef SQLITE_TEST
        !          3034: static int fileBtreePageDump(Btree *pBt, int pgno, int recursive){
        !          3035:   int rc;
        !          3036:   MemPage *pPage;
        !          3037:   int i, j;
        !          3038:   int nFree;
        !          3039:   u16 idx;
        !          3040:   char range[20];
        !          3041:   unsigned char payload[20];
        !          3042:   rc = sqlitepager_get(pBt->pPager, (Pgno)pgno, (void**)&pPage);
        !          3043:   if( rc ){
        !          3044:     return rc;
        !          3045:   }
        !          3046:   if( recursive ) printf("PAGE %d:\n", pgno);
        !          3047:   i = 0;
        !          3048:   idx = SWAB16(pBt, pPage->u.hdr.firstCell);
        !          3049:   while( idx>0 && idx<=SQLITE_USABLE_SIZE-MIN_CELL_SIZE ){
        !          3050:     Cell *pCell = (Cell*)&pPage->u.aDisk[idx];
        !          3051:     int sz = cellSize(pBt, pCell);
        !          3052:     sprintf(range,"%d..%d", idx, idx+sz-1);
        !          3053:     sz = NKEY(pBt, pCell->h) + NDATA(pBt, pCell->h);
        !          3054:     if( sz>sizeof(payload)-1 ) sz = sizeof(payload)-1;
        !          3055:     memcpy(payload, pCell->aPayload, sz);
        !          3056:     for(j=0; j<sz; j++){
        !          3057:       if( payload[j]<0x20 || payload[j]>0x7f ) payload[j] = '.';
        !          3058:     }
        !          3059:     payload[sz] = 0;
        !          3060:     printf(
        !          3061:       "cell %2d: i=%-10s chld=%-4d nk=%-4d nd=%-4d payload=%s\n",
        !          3062:       i, range, (int)pCell->h.leftChild, 
        !          3063:       NKEY(pBt, pCell->h), NDATA(pBt, pCell->h),
        !          3064:       payload
        !          3065:     );
        !          3066:     if( pPage->isInit && pPage->apCell[i]!=pCell ){
        !          3067:       printf("**** apCell[%d] does not match on prior entry ****\n", i);
        !          3068:     }
        !          3069:     i++;
        !          3070:     idx = SWAB16(pBt, pCell->h.iNext);
        !          3071:   }
        !          3072:   if( idx!=0 ){
        !          3073:     printf("ERROR: next cell index out of range: %d\n", idx);
        !          3074:   }
        !          3075:   printf("right_child: %d\n", SWAB32(pBt, pPage->u.hdr.rightChild));
        !          3076:   nFree = 0;
        !          3077:   i = 0;
        !          3078:   idx = SWAB16(pBt, pPage->u.hdr.firstFree);
        !          3079:   while( idx>0 && idx<SQLITE_USABLE_SIZE ){
        !          3080:     FreeBlk *p = (FreeBlk*)&pPage->u.aDisk[idx];
        !          3081:     sprintf(range,"%d..%d", idx, idx+p->iSize-1);
        !          3082:     nFree += SWAB16(pBt, p->iSize);
        !          3083:     printf("freeblock %2d: i=%-10s size=%-4d total=%d\n",
        !          3084:        i, range, SWAB16(pBt, p->iSize), nFree);
        !          3085:     idx = SWAB16(pBt, p->iNext);
        !          3086:     i++;
        !          3087:   }
        !          3088:   if( idx!=0 ){
        !          3089:     printf("ERROR: next freeblock index out of range: %d\n", idx);
        !          3090:   }
        !          3091:   if( recursive && pPage->u.hdr.rightChild!=0 ){
        !          3092:     idx = SWAB16(pBt, pPage->u.hdr.firstCell);
        !          3093:     while( idx>0 && idx<SQLITE_USABLE_SIZE-MIN_CELL_SIZE ){
        !          3094:       Cell *pCell = (Cell*)&pPage->u.aDisk[idx];
        !          3095:       fileBtreePageDump(pBt, SWAB32(pBt, pCell->h.leftChild), 1);
        !          3096:       idx = SWAB16(pBt, pCell->h.iNext);
        !          3097:     }
        !          3098:     fileBtreePageDump(pBt, SWAB32(pBt, pPage->u.hdr.rightChild), 1);
        !          3099:   }
        !          3100:   sqlitepager_unref(pPage);
        !          3101:   return SQLITE_OK;
        !          3102: }
        !          3103: #endif
        !          3104: 
        !          3105: #ifdef SQLITE_TEST
        !          3106: /*
        !          3107: ** Fill aResult[] with information about the entry and page that the
        !          3108: ** cursor is pointing to.
        !          3109: ** 
        !          3110: **   aResult[0] =  The page number
        !          3111: **   aResult[1] =  The entry number
        !          3112: **   aResult[2] =  Total number of entries on this page
        !          3113: **   aResult[3] =  Size of this entry
        !          3114: **   aResult[4] =  Number of free bytes on this page
        !          3115: **   aResult[5] =  Number of free blocks on the page
        !          3116: **   aResult[6] =  Page number of the left child of this entry
        !          3117: **   aResult[7] =  Page number of the right child for the whole page
        !          3118: **
        !          3119: ** This routine is used for testing and debugging only.
        !          3120: */
        !          3121: static int fileBtreeCursorDump(BtCursor *pCur, int *aResult){
        !          3122:   int cnt, idx;
        !          3123:   MemPage *pPage = pCur->pPage;
        !          3124:   Btree *pBt = pCur->pBt;
        !          3125:   aResult[0] = sqlitepager_pagenumber(pPage);
        !          3126:   aResult[1] = pCur->idx;
        !          3127:   aResult[2] = pPage->nCell;
        !          3128:   if( pCur->idx>=0 && pCur->idx<pPage->nCell ){
        !          3129:     aResult[3] = cellSize(pBt, pPage->apCell[pCur->idx]);
        !          3130:     aResult[6] = SWAB32(pBt, pPage->apCell[pCur->idx]->h.leftChild);
        !          3131:   }else{
        !          3132:     aResult[3] = 0;
        !          3133:     aResult[6] = 0;
        !          3134:   }
        !          3135:   aResult[4] = pPage->nFree;
        !          3136:   cnt = 0;
        !          3137:   idx = SWAB16(pBt, pPage->u.hdr.firstFree);
        !          3138:   while( idx>0 && idx<SQLITE_USABLE_SIZE ){
        !          3139:     cnt++;
        !          3140:     idx = SWAB16(pBt, ((FreeBlk*)&pPage->u.aDisk[idx])->iNext);
        !          3141:   }
        !          3142:   aResult[5] = cnt;
        !          3143:   aResult[7] = SWAB32(pBt, pPage->u.hdr.rightChild);
        !          3144:   return SQLITE_OK;
        !          3145: }
        !          3146: #endif
        !          3147: 
        !          3148: /*
        !          3149: ** Return the pager associated with a BTree.  This routine is used for
        !          3150: ** testing and debugging only.
        !          3151: */
        !          3152: static Pager *fileBtreePager(Btree *pBt){
        !          3153:   return pBt->pPager;
        !          3154: }
        !          3155: 
        !          3156: /*
        !          3157: ** This structure is passed around through all the sanity checking routines
        !          3158: ** in order to keep track of some global state information.
        !          3159: */
        !          3160: typedef struct IntegrityCk IntegrityCk;
        !          3161: struct IntegrityCk {
        !          3162:   Btree *pBt;    /* The tree being checked out */
        !          3163:   Pager *pPager; /* The associated pager.  Also accessible by pBt->pPager */
        !          3164:   int nPage;     /* Number of pages in the database */
        !          3165:   int *anRef;    /* Number of times each page is referenced */
        !          3166:   char *zErrMsg; /* An error message.  NULL of no errors seen. */
        !          3167: };
        !          3168: 
        !          3169: /*
        !          3170: ** Append a message to the error message string.
        !          3171: */
        !          3172: static void checkAppendMsg(IntegrityCk *pCheck, char *zMsg1, char *zMsg2){
        !          3173:   if( pCheck->zErrMsg ){
        !          3174:     char *zOld = pCheck->zErrMsg;
        !          3175:     pCheck->zErrMsg = 0;
        !          3176:     sqliteSetString(&pCheck->zErrMsg, zOld, "\n", zMsg1, zMsg2, (char*)0);
        !          3177:     sqliteFree(zOld);
        !          3178:   }else{
        !          3179:     sqliteSetString(&pCheck->zErrMsg, zMsg1, zMsg2, (char*)0);
        !          3180:   }
        !          3181: }
        !          3182: 
        !          3183: /*
        !          3184: ** Add 1 to the reference count for page iPage.  If this is the second
        !          3185: ** reference to the page, add an error message to pCheck->zErrMsg.
        !          3186: ** Return 1 if there are 2 ore more references to the page and 0 if
        !          3187: ** if this is the first reference to the page.
        !          3188: **
        !          3189: ** Also check that the page number is in bounds.
        !          3190: */
        !          3191: static int checkRef(IntegrityCk *pCheck, int iPage, char *zContext){
        !          3192:   if( iPage==0 ) return 1;
        !          3193:   if( iPage>pCheck->nPage || iPage<0 ){
        !          3194:     char zBuf[100];
        !          3195:     sprintf(zBuf, "invalid page number %d", iPage);
        !          3196:     checkAppendMsg(pCheck, zContext, zBuf);
        !          3197:     return 1;
        !          3198:   }
        !          3199:   if( pCheck->anRef[iPage]==1 ){
        !          3200:     char zBuf[100];
        !          3201:     sprintf(zBuf, "2nd reference to page %d", iPage);
        !          3202:     checkAppendMsg(pCheck, zContext, zBuf);
        !          3203:     return 1;
        !          3204:   }
        !          3205:   return  (pCheck->anRef[iPage]++)>1;
        !          3206: }
        !          3207: 
        !          3208: /*
        !          3209: ** Check the integrity of the freelist or of an overflow page list.
        !          3210: ** Verify that the number of pages on the list is N.
        !          3211: */
        !          3212: static void checkList(
        !          3213:   IntegrityCk *pCheck,  /* Integrity checking context */
        !          3214:   int isFreeList,       /* True for a freelist.  False for overflow page list */
        !          3215:   int iPage,            /* Page number for first page in the list */
        !          3216:   int N,                /* Expected number of pages in the list */
        !          3217:   char *zContext        /* Context for error messages */
        !          3218: ){
        !          3219:   int i;
        !          3220:   char zMsg[100];
        !          3221:   while( N-- > 0 ){
        !          3222:     OverflowPage *pOvfl;
        !          3223:     if( iPage<1 ){
        !          3224:       sprintf(zMsg, "%d pages missing from overflow list", N+1);
        !          3225:       checkAppendMsg(pCheck, zContext, zMsg);
        !          3226:       break;
        !          3227:     }
        !          3228:     if( checkRef(pCheck, iPage, zContext) ) break;
        !          3229:     if( sqlitepager_get(pCheck->pPager, (Pgno)iPage, (void**)&pOvfl) ){
        !          3230:       sprintf(zMsg, "failed to get page %d", iPage);
        !          3231:       checkAppendMsg(pCheck, zContext, zMsg);
        !          3232:       break;
        !          3233:     }
        !          3234:     if( isFreeList ){
        !          3235:       FreelistInfo *pInfo = (FreelistInfo*)pOvfl->aPayload;
        !          3236:       int n = SWAB32(pCheck->pBt, pInfo->nFree);
        !          3237:       for(i=0; i<n; i++){
        !          3238:         checkRef(pCheck, SWAB32(pCheck->pBt, pInfo->aFree[i]), zContext);
        !          3239:       }
        !          3240:       N -= n;
        !          3241:     }
        !          3242:     iPage = SWAB32(pCheck->pBt, pOvfl->iNext);
        !          3243:     sqlitepager_unref(pOvfl);
        !          3244:   }
        !          3245: }
        !          3246: 
        !          3247: /*
        !          3248: ** Return negative if zKey1<zKey2.
        !          3249: ** Return zero if zKey1==zKey2.
        !          3250: ** Return positive if zKey1>zKey2.
        !          3251: */
        !          3252: static int keyCompare(
        !          3253:   const char *zKey1, int nKey1,
        !          3254:   const char *zKey2, int nKey2
        !          3255: ){
        !          3256:   int min = nKey1>nKey2 ? nKey2 : nKey1;
        !          3257:   int c = memcmp(zKey1, zKey2, min);
        !          3258:   if( c==0 ){
        !          3259:     c = nKey1 - nKey2;
        !          3260:   }
        !          3261:   return c;
        !          3262: }
        !          3263: 
        !          3264: /*
        !          3265: ** Do various sanity checks on a single page of a tree.  Return
        !          3266: ** the tree depth.  Root pages return 0.  Parents of root pages
        !          3267: ** return 1, and so forth.
        !          3268: ** 
        !          3269: ** These checks are done:
        !          3270: **
        !          3271: **      1.  Make sure that cells and freeblocks do not overlap
        !          3272: **          but combine to completely cover the page.
        !          3273: **      2.  Make sure cell keys are in order.
        !          3274: **      3.  Make sure no key is less than or equal to zLowerBound.
        !          3275: **      4.  Make sure no key is greater than or equal to zUpperBound.
        !          3276: **      5.  Check the integrity of overflow pages.
        !          3277: **      6.  Recursively call checkTreePage on all children.
        !          3278: **      7.  Verify that the depth of all children is the same.
        !          3279: **      8.  Make sure this page is at least 33% full or else it is
        !          3280: **          the root of the tree.
        !          3281: */
        !          3282: static int checkTreePage(
        !          3283:   IntegrityCk *pCheck,  /* Context for the sanity check */
        !          3284:   int iPage,            /* Page number of the page to check */
        !          3285:   MemPage *pParent,     /* Parent page */
        !          3286:   char *zParentContext, /* Parent context */
        !          3287:   char *zLowerBound,    /* All keys should be greater than this, if not NULL */
        !          3288:   int nLower,           /* Number of characters in zLowerBound */
        !          3289:   char *zUpperBound,    /* All keys should be less than this, if not NULL */
        !          3290:   int nUpper            /* Number of characters in zUpperBound */
        !          3291: ){
        !          3292:   MemPage *pPage;
        !          3293:   int i, rc, depth, d2, pgno;
        !          3294:   char *zKey1, *zKey2;
        !          3295:   int nKey1, nKey2;
        !          3296:   BtCursor cur;
        !          3297:   Btree *pBt;
        !          3298:   char zMsg[100];
        !          3299:   char zContext[100];
        !          3300:   char hit[SQLITE_USABLE_SIZE];
        !          3301: 
        !          3302:   /* Check that the page exists
        !          3303:   */
        !          3304:   cur.pBt = pBt = pCheck->pBt;
        !          3305:   if( iPage==0 ) return 0;
        !          3306:   if( checkRef(pCheck, iPage, zParentContext) ) return 0;
        !          3307:   sprintf(zContext, "On tree page %d: ", iPage);
        !          3308:   if( (rc = sqlitepager_get(pCheck->pPager, (Pgno)iPage, (void**)&pPage))!=0 ){
        !          3309:     sprintf(zMsg, "unable to get the page. error code=%d", rc);
        !          3310:     checkAppendMsg(pCheck, zContext, zMsg);
        !          3311:     return 0;
        !          3312:   }
        !          3313:   if( (rc = initPage(pBt, pPage, (Pgno)iPage, pParent))!=0 ){
        !          3314:     sprintf(zMsg, "initPage() returns error code %d", rc);
        !          3315:     checkAppendMsg(pCheck, zContext, zMsg);
        !          3316:     sqlitepager_unref(pPage);
        !          3317:     return 0;
        !          3318:   }
        !          3319: 
        !          3320:   /* Check out all the cells.
        !          3321:   */
        !          3322:   depth = 0;
        !          3323:   if( zLowerBound ){
        !          3324:     zKey1 = sqliteMalloc( nLower+1 );
        !          3325:     memcpy(zKey1, zLowerBound, nLower);
        !          3326:     zKey1[nLower] = 0;
        !          3327:   }else{
        !          3328:     zKey1 = 0;
        !          3329:   }
        !          3330:   nKey1 = nLower;
        !          3331:   cur.pPage = pPage;
        !          3332:   for(i=0; i<pPage->nCell; i++){
        !          3333:     Cell *pCell = pPage->apCell[i];
        !          3334:     int sz;
        !          3335: 
        !          3336:     /* Check payload overflow pages
        !          3337:     */
        !          3338:     nKey2 = NKEY(pBt, pCell->h);
        !          3339:     sz = nKey2 + NDATA(pBt, pCell->h);
        !          3340:     sprintf(zContext, "On page %d cell %d: ", iPage, i);
        !          3341:     if( sz>MX_LOCAL_PAYLOAD ){
        !          3342:       int nPage = (sz - MX_LOCAL_PAYLOAD + OVERFLOW_SIZE - 1)/OVERFLOW_SIZE;
        !          3343:       checkList(pCheck, 0, SWAB32(pBt, pCell->ovfl), nPage, zContext);
        !          3344:     }
        !          3345: 
        !          3346:     /* Check that keys are in the right order
        !          3347:     */
        !          3348:     cur.idx = i;
        !          3349:     zKey2 = sqliteMallocRaw( nKey2+1 );
        !          3350:     getPayload(&cur, 0, nKey2, zKey2);
        !          3351:     if( zKey1 && keyCompare(zKey1, nKey1, zKey2, nKey2)>=0 ){
        !          3352:       checkAppendMsg(pCheck, zContext, "Key is out of order");
        !          3353:     }
        !          3354: 
        !          3355:     /* Check sanity of left child page.
        !          3356:     */
        !          3357:     pgno = SWAB32(pBt, pCell->h.leftChild);
        !          3358:     d2 = checkTreePage(pCheck, pgno, pPage, zContext, zKey1,nKey1,zKey2,nKey2);
        !          3359:     if( i>0 && d2!=depth ){
        !          3360:       checkAppendMsg(pCheck, zContext, "Child page depth differs");
        !          3361:     }
        !          3362:     depth = d2;
        !          3363:     sqliteFree(zKey1);
        !          3364:     zKey1 = zKey2;
        !          3365:     nKey1 = nKey2;
        !          3366:   }
        !          3367:   pgno = SWAB32(pBt, pPage->u.hdr.rightChild);
        !          3368:   sprintf(zContext, "On page %d at right child: ", iPage);
        !          3369:   checkTreePage(pCheck, pgno, pPage, zContext, zKey1,nKey1,zUpperBound,nUpper);
        !          3370:   sqliteFree(zKey1);
        !          3371:  
        !          3372:   /* Check for complete coverage of the page
        !          3373:   */
        !          3374:   memset(hit, 0, sizeof(hit));
        !          3375:   memset(hit, 1, sizeof(PageHdr));
        !          3376:   for(i=SWAB16(pBt, pPage->u.hdr.firstCell); i>0 && i<SQLITE_USABLE_SIZE; ){
        !          3377:     Cell *pCell = (Cell*)&pPage->u.aDisk[i];
        !          3378:     int j;
        !          3379:     for(j=i+cellSize(pBt, pCell)-1; j>=i; j--) hit[j]++;
        !          3380:     i = SWAB16(pBt, pCell->h.iNext);
        !          3381:   }
        !          3382:   for(i=SWAB16(pBt,pPage->u.hdr.firstFree); i>0 && i<SQLITE_USABLE_SIZE; ){
        !          3383:     FreeBlk *pFBlk = (FreeBlk*)&pPage->u.aDisk[i];
        !          3384:     int j;
        !          3385:     for(j=i+SWAB16(pBt,pFBlk->iSize)-1; j>=i; j--) hit[j]++;
        !          3386:     i = SWAB16(pBt,pFBlk->iNext);
        !          3387:   }
        !          3388:   for(i=0; i<SQLITE_USABLE_SIZE; i++){
        !          3389:     if( hit[i]==0 ){
        !          3390:       sprintf(zMsg, "Unused space at byte %d of page %d", i, iPage);
        !          3391:       checkAppendMsg(pCheck, zMsg, 0);
        !          3392:       break;
        !          3393:     }else if( hit[i]>1 ){
        !          3394:       sprintf(zMsg, "Multiple uses for byte %d of page %d", i, iPage);
        !          3395:       checkAppendMsg(pCheck, zMsg, 0);
        !          3396:       break;
        !          3397:     }
        !          3398:   }
        !          3399: 
        !          3400:   /* Check that free space is kept to a minimum
        !          3401:   */
        !          3402: #if 0
        !          3403:   if( pParent && pParent->nCell>2 && pPage->nFree>3*SQLITE_USABLE_SIZE/4 ){
        !          3404:     sprintf(zMsg, "free space (%d) greater than max (%d)", pPage->nFree,
        !          3405:        SQLITE_USABLE_SIZE/3);
        !          3406:     checkAppendMsg(pCheck, zContext, zMsg);
        !          3407:   }
        !          3408: #endif
        !          3409: 
        !          3410:   sqlitepager_unref(pPage);
        !          3411:   return depth;
        !          3412: }
        !          3413: 
        !          3414: /*
        !          3415: ** This routine does a complete check of the given BTree file.  aRoot[] is
        !          3416: ** an array of pages numbers were each page number is the root page of
        !          3417: ** a table.  nRoot is the number of entries in aRoot.
        !          3418: **
        !          3419: ** If everything checks out, this routine returns NULL.  If something is
        !          3420: ** amiss, an error message is written into memory obtained from malloc()
        !          3421: ** and a pointer to that error message is returned.  The calling function
        !          3422: ** is responsible for freeing the error message when it is done.
        !          3423: */
        !          3424: char *fileBtreeIntegrityCheck(Btree *pBt, int *aRoot, int nRoot){
        !          3425:   int i;
        !          3426:   int nRef;
        !          3427:   IntegrityCk sCheck;
        !          3428: 
        !          3429:   nRef = *sqlitepager_stats(pBt->pPager);
        !          3430:   if( lockBtree(pBt)!=SQLITE_OK ){
        !          3431:     return sqliteStrDup("Unable to acquire a read lock on the database");
        !          3432:   }
        !          3433:   sCheck.pBt = pBt;
        !          3434:   sCheck.pPager = pBt->pPager;
        !          3435:   sCheck.nPage = sqlitepager_pagecount(sCheck.pPager);
        !          3436:   if( sCheck.nPage==0 ){
        !          3437:     unlockBtreeIfUnused(pBt);
        !          3438:     return 0;
        !          3439:   }
        !          3440:   sCheck.anRef = sqliteMallocRaw( (sCheck.nPage+1)*sizeof(sCheck.anRef[0]) );
        !          3441:   sCheck.anRef[1] = 1;
        !          3442:   for(i=2; i<=sCheck.nPage; i++){ sCheck.anRef[i] = 0; }
        !          3443:   sCheck.zErrMsg = 0;
        !          3444: 
        !          3445:   /* Check the integrity of the freelist
        !          3446:   */
        !          3447:   checkList(&sCheck, 1, SWAB32(pBt, pBt->page1->freeList),
        !          3448:             SWAB32(pBt, pBt->page1->nFree), "Main freelist: ");
        !          3449: 
        !          3450:   /* Check all the tables.
        !          3451:   */
        !          3452:   for(i=0; i<nRoot; i++){
        !          3453:     if( aRoot[i]==0 ) continue;
        !          3454:     checkTreePage(&sCheck, aRoot[i], 0, "List of tree roots: ", 0,0,0,0);
        !          3455:   }
        !          3456: 
        !          3457:   /* Make sure every page in the file is referenced
        !          3458:   */
        !          3459:   for(i=1; i<=sCheck.nPage; i++){
        !          3460:     if( sCheck.anRef[i]==0 ){
        !          3461:       char zBuf[100];
        !          3462:       sprintf(zBuf, "Page %d is never used", i);
        !          3463:       checkAppendMsg(&sCheck, zBuf, 0);
        !          3464:     }
        !          3465:   }
        !          3466: 
        !          3467:   /* Make sure this analysis did not leave any unref() pages
        !          3468:   */
        !          3469:   unlockBtreeIfUnused(pBt);
        !          3470:   if( nRef != *sqlitepager_stats(pBt->pPager) ){
        !          3471:     char zBuf[100];
        !          3472:     sprintf(zBuf, 
        !          3473:       "Outstanding page count goes from %d to %d during this analysis",
        !          3474:       nRef, *sqlitepager_stats(pBt->pPager)
        !          3475:     );
        !          3476:     checkAppendMsg(&sCheck, zBuf, 0);
        !          3477:   }
        !          3478: 
        !          3479:   /* Clean  up and report errors.
        !          3480:   */
        !          3481:   sqliteFree(sCheck.anRef);
        !          3482:   return sCheck.zErrMsg;
        !          3483: }
        !          3484: 
        !          3485: /*
        !          3486: ** Return the full pathname of the underlying database file.
        !          3487: */
        !          3488: static const char *fileBtreeGetFilename(Btree *pBt){
        !          3489:   assert( pBt->pPager!=0 );
        !          3490:   return sqlitepager_filename(pBt->pPager);
        !          3491: }
        !          3492: 
        !          3493: /*
        !          3494: ** Copy the complete content of pBtFrom into pBtTo.  A transaction
        !          3495: ** must be active for both files.
        !          3496: **
        !          3497: ** The size of file pBtFrom may be reduced by this operation.
        !          3498: ** If anything goes wrong, the transaction on pBtFrom is rolled back.
        !          3499: */
        !          3500: static int fileBtreeCopyFile(Btree *pBtTo, Btree *pBtFrom){
        !          3501:   int rc = SQLITE_OK;
        !          3502:   Pgno i, nPage, nToPage;
        !          3503: 
        !          3504:   if( !pBtTo->inTrans || !pBtFrom->inTrans ) return SQLITE_ERROR;
        !          3505:   if( pBtTo->needSwab!=pBtFrom->needSwab ) return SQLITE_ERROR;
        !          3506:   if( pBtTo->pCursor ) return SQLITE_BUSY;
        !          3507:   memcpy(pBtTo->page1, pBtFrom->page1, SQLITE_USABLE_SIZE);
        !          3508:   rc = sqlitepager_overwrite(pBtTo->pPager, 1, pBtFrom->page1);
        !          3509:   nToPage = sqlitepager_pagecount(pBtTo->pPager);
        !          3510:   nPage = sqlitepager_pagecount(pBtFrom->pPager);
        !          3511:   for(i=2; rc==SQLITE_OK && i<=nPage; i++){
        !          3512:     void *pPage;
        !          3513:     rc = sqlitepager_get(pBtFrom->pPager, i, &pPage);
        !          3514:     if( rc ) break;
        !          3515:     rc = sqlitepager_overwrite(pBtTo->pPager, i, pPage);
        !          3516:     if( rc ) break;
        !          3517:     sqlitepager_unref(pPage);
        !          3518:   }
        !          3519:   for(i=nPage+1; rc==SQLITE_OK && i<=nToPage; i++){
        !          3520:     void *pPage;
        !          3521:     rc = sqlitepager_get(pBtTo->pPager, i, &pPage);
        !          3522:     if( rc ) break;
        !          3523:     rc = sqlitepager_write(pPage);
        !          3524:     sqlitepager_unref(pPage);
        !          3525:     sqlitepager_dont_write(pBtTo->pPager, i);
        !          3526:   }
        !          3527:   if( !rc && nPage<nToPage ){
        !          3528:     rc = sqlitepager_truncate(pBtTo->pPager, nPage);
        !          3529:   }
        !          3530:   if( rc ){
        !          3531:     fileBtreeRollback(pBtTo);
        !          3532:   }
        !          3533:   return rc;  
        !          3534: }
        !          3535: 
        !          3536: /*
        !          3537: ** The following tables contain pointers to all of the interface
        !          3538: ** routines for this implementation of the B*Tree backend.  To
        !          3539: ** substitute a different implemention of the backend, one has merely
        !          3540: ** to provide pointers to alternative functions in similar tables.
        !          3541: */
        !          3542: static BtOps sqliteBtreeOps = {
        !          3543:     fileBtreeClose,
        !          3544:     fileBtreeSetCacheSize,
        !          3545:     fileBtreeSetSafetyLevel,
        !          3546:     fileBtreeBeginTrans,
        !          3547:     fileBtreeCommit,
        !          3548:     fileBtreeRollback,
        !          3549:     fileBtreeBeginCkpt,
        !          3550:     fileBtreeCommitCkpt,
        !          3551:     fileBtreeRollbackCkpt,
        !          3552:     fileBtreeCreateTable,
        !          3553:     fileBtreeCreateTable,  /* Really sqliteBtreeCreateIndex() */
        !          3554:     fileBtreeDropTable,
        !          3555:     fileBtreeClearTable,
        !          3556:     fileBtreeCursor,
        !          3557:     fileBtreeGetMeta,
        !          3558:     fileBtreeUpdateMeta,
        !          3559:     fileBtreeIntegrityCheck,
        !          3560:     fileBtreeGetFilename,
        !          3561:     fileBtreeCopyFile,
        !          3562:     fileBtreePager,
        !          3563: #ifdef SQLITE_TEST
        !          3564:     fileBtreePageDump,
        !          3565: #endif
        !          3566: };
        !          3567: static BtCursorOps sqliteBtreeCursorOps = {
        !          3568:     fileBtreeMoveto,
        !          3569:     fileBtreeDelete,
        !          3570:     fileBtreeInsert,
        !          3571:     fileBtreeFirst,
        !          3572:     fileBtreeLast,
        !          3573:     fileBtreeNext,
        !          3574:     fileBtreePrevious,
        !          3575:     fileBtreeKeySize,
        !          3576:     fileBtreeKey,
        !          3577:     fileBtreeKeyCompare,
        !          3578:     fileBtreeDataSize,
        !          3579:     fileBtreeData,
        !          3580:     fileBtreeCloseCursor,
        !          3581: #ifdef SQLITE_TEST
        !          3582:     fileBtreeCursorDump,
        !          3583: #endif
        !          3584: };

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>