1: /*
2: ** 2005 December 14
3: **
4: ** The author disclaims copyright to this source code. In place of
5: ** a legal notice, here is a blessing:
6: **
7: ** May you do good and not evil.
8: ** May you find forgiveness for yourself and forgive others.
9: ** May you share freely, never taking more than you give.
10: **
11: *************************************************************************
12: **
13: ** $Id: sqlite3async.c,v 1.1.1.1 2012/02/21 17:04:17 misho Exp $
14: **
15: ** This file contains the implementation of an asynchronous IO backend
16: ** for SQLite.
17: */
18:
19: #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_ASYNCIO)
20:
21: #include "sqlite3async.h"
22: #include "sqlite3.h"
23: #include <stdarg.h>
24: #include <string.h>
25: #include <assert.h>
26:
27: /* Useful macros used in several places */
28: #define MIN(x,y) ((x)<(y)?(x):(y))
29: #define MAX(x,y) ((x)>(y)?(x):(y))
30:
31: #ifndef SQLITE_AMALGAMATION
32: /* Macro to mark parameters as unused and silence compiler warnings. */
33: #define UNUSED_PARAMETER(x) (void)(x)
34: #endif
35:
36: /* Forward references */
37: typedef struct AsyncWrite AsyncWrite;
38: typedef struct AsyncFile AsyncFile;
39: typedef struct AsyncFileData AsyncFileData;
40: typedef struct AsyncFileLock AsyncFileLock;
41: typedef struct AsyncLock AsyncLock;
42:
43: /* Enable for debugging */
44: #ifndef NDEBUG
45: #include <stdio.h>
46: static int sqlite3async_trace = 0;
47: # define ASYNC_TRACE(X) if( sqlite3async_trace ) asyncTrace X
48: static void asyncTrace(const char *zFormat, ...){
49: char *z;
50: va_list ap;
51: va_start(ap, zFormat);
52: z = sqlite3_vmprintf(zFormat, ap);
53: va_end(ap);
54: fprintf(stderr, "[%d] %s", 0 /* (int)pthread_self() */, z);
55: sqlite3_free(z);
56: }
57: #else
58: # define ASYNC_TRACE(X)
59: #endif
60:
61: /*
62: ** THREAD SAFETY NOTES
63: **
64: ** Basic rules:
65: **
66: ** * Both read and write access to the global write-op queue must be
67: ** protected by the async.queueMutex. As are the async.ioError and
68: ** async.nFile variables.
69: **
70: ** * The async.pLock list and all AsyncLock and AsyncFileLock
71: ** structures must be protected by the async.lockMutex mutex.
72: **
73: ** * The file handles from the underlying system are not assumed to
74: ** be thread safe.
75: **
76: ** * See the last two paragraphs under "The Writer Thread" for
77: ** an assumption to do with file-handle synchronization by the Os.
78: **
79: ** Deadlock prevention:
80: **
81: ** There are three mutex used by the system: the "writer" mutex,
82: ** the "queue" mutex and the "lock" mutex. Rules are:
83: **
84: ** * It is illegal to block on the writer mutex when any other mutex
85: ** are held, and
86: **
87: ** * It is illegal to block on the queue mutex when the lock mutex
88: ** is held.
89: **
90: ** i.e. mutex's must be grabbed in the order "writer", "queue", "lock".
91: **
92: ** File system operations (invoked by SQLite thread):
93: **
94: ** xOpen
95: ** xDelete
96: ** xFileExists
97: **
98: ** File handle operations (invoked by SQLite thread):
99: **
100: ** asyncWrite, asyncClose, asyncTruncate, asyncSync
101: **
102: ** The operations above add an entry to the global write-op list. They
103: ** prepare the entry, acquire the async.queueMutex momentarily while
104: ** list pointers are manipulated to insert the new entry, then release
105: ** the mutex and signal the writer thread to wake up in case it happens
106: ** to be asleep.
107: **
108: **
109: ** asyncRead, asyncFileSize.
110: **
111: ** Read operations. Both of these read from both the underlying file
112: ** first then adjust their result based on pending writes in the
113: ** write-op queue. So async.queueMutex is held for the duration
114: ** of these operations to prevent other threads from changing the
115: ** queue in mid operation.
116: **
117: **
118: ** asyncLock, asyncUnlock, asyncCheckReservedLock
119: **
120: ** These primitives implement in-process locking using a hash table
121: ** on the file name. Files are locked correctly for connections coming
122: ** from the same process. But other processes cannot see these locks
123: ** and will therefore not honor them.
124: **
125: **
126: ** The writer thread:
127: **
128: ** The async.writerMutex is used to make sure only there is only
129: ** a single writer thread running at a time.
130: **
131: ** Inside the writer thread is a loop that works like this:
132: **
133: ** WHILE (write-op list is not empty)
134: ** Do IO operation at head of write-op list
135: ** Remove entry from head of write-op list
136: ** END WHILE
137: **
138: ** The async.queueMutex is always held during the <write-op list is
139: ** not empty> test, and when the entry is removed from the head
140: ** of the write-op list. Sometimes it is held for the interim
141: ** period (while the IO is performed), and sometimes it is
142: ** relinquished. It is relinquished if (a) the IO op is an
143: ** ASYNC_CLOSE or (b) when the file handle was opened, two of
144: ** the underlying systems handles were opened on the same
145: ** file-system entry.
146: **
147: ** If condition (b) above is true, then one file-handle
148: ** (AsyncFile.pBaseRead) is used exclusively by sqlite threads to read the
149: ** file, the other (AsyncFile.pBaseWrite) by sqlite3_async_flush()
150: ** threads to perform write() operations. This means that read
151: ** operations are not blocked by asynchronous writes (although
152: ** asynchronous writes may still be blocked by reads).
153: **
154: ** This assumes that the OS keeps two handles open on the same file
155: ** properly in sync. That is, any read operation that starts after a
156: ** write operation on the same file system entry has completed returns
157: ** data consistent with the write. We also assume that if one thread
158: ** reads a file while another is writing it all bytes other than the
159: ** ones actually being written contain valid data.
160: **
161: ** If the above assumptions are not true, set the preprocessor symbol
162: ** SQLITE_ASYNC_TWO_FILEHANDLES to 0.
163: */
164:
165:
166: #ifndef NDEBUG
167: # define TESTONLY( X ) X
168: #else
169: # define TESTONLY( X )
170: #endif
171:
172: /*
173: ** PORTING FUNCTIONS
174: **
175: ** There are two definitions of the following functions. One for pthreads
176: ** compatible systems and one for Win32. These functions isolate the OS
177: ** specific code required by each platform.
178: **
179: ** The system uses three mutexes and a single condition variable. To
180: ** block on a mutex, async_mutex_enter() is called. The parameter passed
181: ** to async_mutex_enter(), which must be one of ASYNC_MUTEX_LOCK,
182: ** ASYNC_MUTEX_QUEUE or ASYNC_MUTEX_WRITER, identifies which of the three
183: ** mutexes to lock. Similarly, to unlock a mutex, async_mutex_leave() is
184: ** called with a parameter identifying the mutex being unlocked. Mutexes
185: ** are not recursive - it is an error to call async_mutex_enter() to
186: ** lock a mutex that is already locked, or to call async_mutex_leave()
187: ** to unlock a mutex that is not currently locked.
188: **
189: ** The async_cond_wait() and async_cond_signal() functions are modelled
190: ** on the pthreads functions with similar names. The first parameter to
191: ** both functions is always ASYNC_COND_QUEUE. When async_cond_wait()
192: ** is called the mutex identified by the second parameter must be held.
193: ** The mutex is unlocked, and the calling thread simultaneously begins
194: ** waiting for the condition variable to be signalled by another thread.
195: ** After another thread signals the condition variable, the calling
196: ** thread stops waiting, locks mutex eMutex and returns. The
197: ** async_cond_signal() function is used to signal the condition variable.
198: ** It is assumed that the mutex used by the thread calling async_cond_wait()
199: ** is held by the caller of async_cond_signal() (otherwise there would be
200: ** a race condition).
201: **
202: ** It is guaranteed that no other thread will call async_cond_wait() when
203: ** there is already a thread waiting on the condition variable.
204: **
205: ** The async_sched_yield() function is called to suggest to the operating
206: ** system that it would be a good time to shift the current thread off the
207: ** CPU. The system will still work if this function is not implemented
208: ** (it is not currently implemented for win32), but it might be marginally
209: ** more efficient if it is.
210: */
211: static void async_mutex_enter(int eMutex);
212: static void async_mutex_leave(int eMutex);
213: static void async_cond_wait(int eCond, int eMutex);
214: static void async_cond_signal(int eCond);
215: static void async_sched_yield(void);
216:
217: /*
218: ** There are also two definitions of the following. async_os_initialize()
219: ** is called when the asynchronous VFS is first installed, and os_shutdown()
220: ** is called when it is uninstalled (from within sqlite3async_shutdown()).
221: **
222: ** For pthreads builds, both of these functions are no-ops. For win32,
223: ** they provide an opportunity to initialize and finalize the required
224: ** mutex and condition variables.
225: **
226: ** If async_os_initialize() returns other than zero, then the initialization
227: ** fails and SQLITE_ERROR is returned to the user.
228: */
229: static int async_os_initialize(void);
230: static void async_os_shutdown(void);
231:
232: /* Values for use as the 'eMutex' argument of the above functions. The
233: ** integer values assigned to these constants are important for assert()
234: ** statements that verify that mutexes are locked in the correct order.
235: ** Specifically, it is unsafe to try to lock mutex N while holding a lock
236: ** on mutex M if (M<=N).
237: */
238: #define ASYNC_MUTEX_LOCK 0
239: #define ASYNC_MUTEX_QUEUE 1
240: #define ASYNC_MUTEX_WRITER 2
241:
242: /* Values for use as the 'eCond' argument of the above functions. */
243: #define ASYNC_COND_QUEUE 0
244:
245: /*************************************************************************
246: ** Start of OS specific code.
247: */
248: #if SQLITE_OS_WIN || defined(_WIN32) || defined(WIN32) || defined(__CYGWIN__) || defined(__MINGW32__) || defined(__BORLANDC__)
249:
250: #include <windows.h>
251:
252: /* The following block contains the win32 specific code. */
253:
254: #define mutex_held(X) (GetCurrentThreadId()==primitives.aHolder[X])
255:
256: static struct AsyncPrimitives {
257: int isInit;
258: DWORD aHolder[3];
259: CRITICAL_SECTION aMutex[3];
260: HANDLE aCond[1];
261: } primitives = { 0 };
262:
263: static int async_os_initialize(void){
264: if( !primitives.isInit ){
265: primitives.aCond[0] = CreateEvent(NULL, TRUE, FALSE, 0);
266: if( primitives.aCond[0]==NULL ){
267: return 1;
268: }
269: InitializeCriticalSection(&primitives.aMutex[0]);
270: InitializeCriticalSection(&primitives.aMutex[1]);
271: InitializeCriticalSection(&primitives.aMutex[2]);
272: primitives.isInit = 1;
273: }
274: return 0;
275: }
276: static void async_os_shutdown(void){
277: if( primitives.isInit ){
278: DeleteCriticalSection(&primitives.aMutex[0]);
279: DeleteCriticalSection(&primitives.aMutex[1]);
280: DeleteCriticalSection(&primitives.aMutex[2]);
281: CloseHandle(primitives.aCond[0]);
282: primitives.isInit = 0;
283: }
284: }
285:
286: /* The following block contains the Win32 specific code. */
287: static void async_mutex_enter(int eMutex){
288: assert( eMutex==0 || eMutex==1 || eMutex==2 );
289: assert( eMutex!=2 || (!mutex_held(0) && !mutex_held(1) && !mutex_held(2)) );
290: assert( eMutex!=1 || (!mutex_held(0) && !mutex_held(1)) );
291: assert( eMutex!=0 || (!mutex_held(0)) );
292: EnterCriticalSection(&primitives.aMutex[eMutex]);
293: TESTONLY( primitives.aHolder[eMutex] = GetCurrentThreadId(); )
294: }
295: static void async_mutex_leave(int eMutex){
296: assert( eMutex==0 || eMutex==1 || eMutex==2 );
297: assert( mutex_held(eMutex) );
298: TESTONLY( primitives.aHolder[eMutex] = 0; )
299: LeaveCriticalSection(&primitives.aMutex[eMutex]);
300: }
301: static void async_cond_wait(int eCond, int eMutex){
302: ResetEvent(primitives.aCond[eCond]);
303: async_mutex_leave(eMutex);
304: WaitForSingleObject(primitives.aCond[eCond], INFINITE);
305: async_mutex_enter(eMutex);
306: }
307: static void async_cond_signal(int eCond){
308: assert( mutex_held(ASYNC_MUTEX_QUEUE) );
309: SetEvent(primitives.aCond[eCond]);
310: }
311: static void async_sched_yield(void){
312: Sleep(0);
313: }
314: #else
315:
316: /* The following block contains the pthreads specific code. */
317: #include <pthread.h>
318: #include <sched.h>
319:
320: #define mutex_held(X) pthread_equal(primitives.aHolder[X], pthread_self())
321:
322: static int async_os_initialize(void) {return 0;}
323: static void async_os_shutdown(void) {}
324:
325: static struct AsyncPrimitives {
326: pthread_mutex_t aMutex[3];
327: pthread_cond_t aCond[1];
328: pthread_t aHolder[3];
329: } primitives = {
330: { PTHREAD_MUTEX_INITIALIZER,
331: PTHREAD_MUTEX_INITIALIZER,
332: PTHREAD_MUTEX_INITIALIZER
333: } , {
334: PTHREAD_COND_INITIALIZER
335: } , { 0, 0, 0 }
336: };
337:
338: static void async_mutex_enter(int eMutex){
339: assert( eMutex==0 || eMutex==1 || eMutex==2 );
340: assert( eMutex!=2 || (!mutex_held(0) && !mutex_held(1) && !mutex_held(2)) );
341: assert( eMutex!=1 || (!mutex_held(0) && !mutex_held(1)) );
342: assert( eMutex!=0 || (!mutex_held(0)) );
343: pthread_mutex_lock(&primitives.aMutex[eMutex]);
344: TESTONLY( primitives.aHolder[eMutex] = pthread_self(); )
345: }
346: static void async_mutex_leave(int eMutex){
347: assert( eMutex==0 || eMutex==1 || eMutex==2 );
348: assert( mutex_held(eMutex) );
349: TESTONLY( primitives.aHolder[eMutex] = 0; )
350: pthread_mutex_unlock(&primitives.aMutex[eMutex]);
351: }
352: static void async_cond_wait(int eCond, int eMutex){
353: assert( eMutex==0 || eMutex==1 || eMutex==2 );
354: assert( mutex_held(eMutex) );
355: TESTONLY( primitives.aHolder[eMutex] = 0; )
356: pthread_cond_wait(&primitives.aCond[eCond], &primitives.aMutex[eMutex]);
357: TESTONLY( primitives.aHolder[eMutex] = pthread_self(); )
358: }
359: static void async_cond_signal(int eCond){
360: assert( mutex_held(ASYNC_MUTEX_QUEUE) );
361: pthread_cond_signal(&primitives.aCond[eCond]);
362: }
363: static void async_sched_yield(void){
364: sched_yield();
365: }
366: #endif
367: /*
368: ** End of OS specific code.
369: *************************************************************************/
370:
371: #define assert_mutex_is_held(X) assert( mutex_held(X) )
372:
373:
374: #ifndef SQLITE_ASYNC_TWO_FILEHANDLES
375: /* #define SQLITE_ASYNC_TWO_FILEHANDLES 0 */
376: #define SQLITE_ASYNC_TWO_FILEHANDLES 1
377: #endif
378:
379: /*
380: ** State information is held in the static variable "async" defined
381: ** as the following structure.
382: **
383: ** Both async.ioError and async.nFile are protected by async.queueMutex.
384: */
385: static struct TestAsyncStaticData {
386: AsyncWrite *pQueueFirst; /* Next write operation to be processed */
387: AsyncWrite *pQueueLast; /* Last write operation on the list */
388: AsyncLock *pLock; /* Linked list of all AsyncLock structures */
389: volatile int ioDelay; /* Extra delay between write operations */
390: volatile int eHalt; /* One of the SQLITEASYNC_HALT_XXX values */
391: volatile int bLockFiles; /* Current value of "lockfiles" parameter */
392: int ioError; /* True if an IO error has occurred */
393: int nFile; /* Number of open files (from sqlite pov) */
394: } async = { 0,0,0,0,0,1,0,0 };
395:
396: /* Possible values of AsyncWrite.op */
397: #define ASYNC_NOOP 0
398: #define ASYNC_WRITE 1
399: #define ASYNC_SYNC 2
400: #define ASYNC_TRUNCATE 3
401: #define ASYNC_CLOSE 4
402: #define ASYNC_DELETE 5
403: #define ASYNC_OPENEXCLUSIVE 6
404: #define ASYNC_UNLOCK 7
405:
406: /* Names of opcodes. Used for debugging only.
407: ** Make sure these stay in sync with the macros above!
408: */
409: static const char *azOpcodeName[] = {
410: "NOOP", "WRITE", "SYNC", "TRUNCATE", "CLOSE", "DELETE", "OPENEX", "UNLOCK"
411: };
412:
413: /*
414: ** Entries on the write-op queue are instances of the AsyncWrite
415: ** structure, defined here.
416: **
417: ** The interpretation of the iOffset and nByte variables varies depending
418: ** on the value of AsyncWrite.op:
419: **
420: ** ASYNC_NOOP:
421: ** No values used.
422: **
423: ** ASYNC_WRITE:
424: ** iOffset -> Offset in file to write to.
425: ** nByte -> Number of bytes of data to write (pointed to by zBuf).
426: **
427: ** ASYNC_SYNC:
428: ** nByte -> flags to pass to sqlite3OsSync().
429: **
430: ** ASYNC_TRUNCATE:
431: ** iOffset -> Size to truncate file to.
432: ** nByte -> Unused.
433: **
434: ** ASYNC_CLOSE:
435: ** iOffset -> Unused.
436: ** nByte -> Unused.
437: **
438: ** ASYNC_DELETE:
439: ** iOffset -> Contains the "syncDir" flag.
440: ** nByte -> Number of bytes of zBuf points to (file name).
441: **
442: ** ASYNC_OPENEXCLUSIVE:
443: ** iOffset -> Value of "delflag".
444: ** nByte -> Number of bytes of zBuf points to (file name).
445: **
446: ** ASYNC_UNLOCK:
447: ** nByte -> Argument to sqlite3OsUnlock().
448: **
449: **
450: ** For an ASYNC_WRITE operation, zBuf points to the data to write to the file.
451: ** This space is sqlite3_malloc()d along with the AsyncWrite structure in a
452: ** single blob, so is deleted when sqlite3_free() is called on the parent
453: ** structure.
454: */
455: struct AsyncWrite {
456: AsyncFileData *pFileData; /* File to write data to or sync */
457: int op; /* One of ASYNC_xxx etc. */
458: sqlite_int64 iOffset; /* See above */
459: int nByte; /* See above */
460: char *zBuf; /* Data to write to file (or NULL if op!=ASYNC_WRITE) */
461: AsyncWrite *pNext; /* Next write operation (to any file) */
462: };
463:
464: /*
465: ** An instance of this structure is created for each distinct open file
466: ** (i.e. if two handles are opened on the one file, only one of these
467: ** structures is allocated) and stored in the async.aLock hash table. The
468: ** keys for async.aLock are the full pathnames of the opened files.
469: **
470: ** AsyncLock.pList points to the head of a linked list of AsyncFileLock
471: ** structures, one for each handle currently open on the file.
472: **
473: ** If the opened file is not a main-database (the SQLITE_OPEN_MAIN_DB is
474: ** not passed to the sqlite3OsOpen() call), or if async.bLockFiles is
475: ** false, variables AsyncLock.pFile and AsyncLock.eLock are never used.
476: ** Otherwise, pFile is a file handle opened on the file in question and
477: ** used to obtain the file-system locks required by database connections
478: ** within this process.
479: **
480: ** See comments above the asyncLock() function for more details on
481: ** the implementation of database locking used by this backend.
482: */
483: struct AsyncLock {
484: char *zFile;
485: int nFile;
486: sqlite3_file *pFile;
487: int eLock;
488: AsyncFileLock *pList;
489: AsyncLock *pNext; /* Next in linked list headed by async.pLock */
490: };
491:
492: /*
493: ** An instance of the following structure is allocated along with each
494: ** AsyncFileData structure (see AsyncFileData.lock), but is only used if the
495: ** file was opened with the SQLITE_OPEN_MAIN_DB.
496: */
497: struct AsyncFileLock {
498: int eLock; /* Internally visible lock state (sqlite pov) */
499: int eAsyncLock; /* Lock-state with write-queue unlock */
500: AsyncFileLock *pNext;
501: };
502:
503: /*
504: ** The AsyncFile structure is a subclass of sqlite3_file used for
505: ** asynchronous IO.
506: **
507: ** All of the actual data for the structure is stored in the structure
508: ** pointed to by AsyncFile.pData, which is allocated as part of the
509: ** sqlite3OsOpen() using sqlite3_malloc(). The reason for this is that the
510: ** lifetime of the AsyncFile structure is ended by the caller after OsClose()
511: ** is called, but the data in AsyncFileData may be required by the
512: ** writer thread after that point.
513: */
514: struct AsyncFile {
515: sqlite3_io_methods *pMethod;
516: AsyncFileData *pData;
517: };
518: struct AsyncFileData {
519: char *zName; /* Underlying OS filename - used for debugging */
520: int nName; /* Number of characters in zName */
521: sqlite3_file *pBaseRead; /* Read handle to the underlying Os file */
522: sqlite3_file *pBaseWrite; /* Write handle to the underlying Os file */
523: AsyncFileLock lock; /* Lock state for this handle */
524: AsyncLock *pLock; /* AsyncLock object for this file system entry */
525: AsyncWrite closeOp; /* Preallocated close operation */
526: };
527:
528: /*
529: ** Add an entry to the end of the global write-op list. pWrite should point
530: ** to an AsyncWrite structure allocated using sqlite3_malloc(). The writer
531: ** thread will call sqlite3_free() to free the structure after the specified
532: ** operation has been completed.
533: **
534: ** Once an AsyncWrite structure has been added to the list, it becomes the
535: ** property of the writer thread and must not be read or modified by the
536: ** caller.
537: */
538: static void addAsyncWrite(AsyncWrite *pWrite){
539: /* We must hold the queue mutex in order to modify the queue pointers */
540: if( pWrite->op!=ASYNC_UNLOCK ){
541: async_mutex_enter(ASYNC_MUTEX_QUEUE);
542: }
543:
544: /* Add the record to the end of the write-op queue */
545: assert( !pWrite->pNext );
546: if( async.pQueueLast ){
547: assert( async.pQueueFirst );
548: async.pQueueLast->pNext = pWrite;
549: }else{
550: async.pQueueFirst = pWrite;
551: }
552: async.pQueueLast = pWrite;
553: ASYNC_TRACE(("PUSH %p (%s %s %d)\n", pWrite, azOpcodeName[pWrite->op],
554: pWrite->pFileData ? pWrite->pFileData->zName : "-", pWrite->iOffset));
555:
556: if( pWrite->op==ASYNC_CLOSE ){
557: async.nFile--;
558: }
559:
560: /* The writer thread might have been idle because there was nothing
561: ** on the write-op queue for it to do. So wake it up. */
562: async_cond_signal(ASYNC_COND_QUEUE);
563:
564: /* Drop the queue mutex */
565: if( pWrite->op!=ASYNC_UNLOCK ){
566: async_mutex_leave(ASYNC_MUTEX_QUEUE);
567: }
568: }
569:
570: /*
571: ** Increment async.nFile in a thread-safe manner.
572: */
573: static void incrOpenFileCount(void){
574: /* We must hold the queue mutex in order to modify async.nFile */
575: async_mutex_enter(ASYNC_MUTEX_QUEUE);
576: if( async.nFile==0 ){
577: async.ioError = SQLITE_OK;
578: }
579: async.nFile++;
580: async_mutex_leave(ASYNC_MUTEX_QUEUE);
581: }
582:
583: /*
584: ** This is a utility function to allocate and populate a new AsyncWrite
585: ** structure and insert it (via addAsyncWrite() ) into the global list.
586: */
587: static int addNewAsyncWrite(
588: AsyncFileData *pFileData,
589: int op,
590: sqlite3_int64 iOffset,
591: int nByte,
592: const char *zByte
593: ){
594: AsyncWrite *p;
595: if( op!=ASYNC_CLOSE && async.ioError ){
596: return async.ioError;
597: }
598: p = sqlite3_malloc(sizeof(AsyncWrite) + (zByte?nByte:0));
599: if( !p ){
600: /* The upper layer does not expect operations like OsWrite() to
601: ** return SQLITE_NOMEM. This is partly because under normal conditions
602: ** SQLite is required to do rollback without calling malloc(). So
603: ** if malloc() fails here, treat it as an I/O error. The above
604: ** layer knows how to handle that.
605: */
606: return SQLITE_IOERR;
607: }
608: p->op = op;
609: p->iOffset = iOffset;
610: p->nByte = nByte;
611: p->pFileData = pFileData;
612: p->pNext = 0;
613: if( zByte ){
614: p->zBuf = (char *)&p[1];
615: memcpy(p->zBuf, zByte, nByte);
616: }else{
617: p->zBuf = 0;
618: }
619: addAsyncWrite(p);
620: return SQLITE_OK;
621: }
622:
623: /*
624: ** Close the file. This just adds an entry to the write-op list, the file is
625: ** not actually closed.
626: */
627: static int asyncClose(sqlite3_file *pFile){
628: AsyncFileData *p = ((AsyncFile *)pFile)->pData;
629:
630: /* Unlock the file, if it is locked */
631: async_mutex_enter(ASYNC_MUTEX_LOCK);
632: p->lock.eLock = 0;
633: async_mutex_leave(ASYNC_MUTEX_LOCK);
634:
635: addAsyncWrite(&p->closeOp);
636: return SQLITE_OK;
637: }
638:
639: /*
640: ** Implementation of sqlite3OsWrite() for asynchronous files. Instead of
641: ** writing to the underlying file, this function adds an entry to the end of
642: ** the global AsyncWrite list. Either SQLITE_OK or SQLITE_NOMEM may be
643: ** returned.
644: */
645: static int asyncWrite(
646: sqlite3_file *pFile,
647: const void *pBuf,
648: int amt,
649: sqlite3_int64 iOff
650: ){
651: AsyncFileData *p = ((AsyncFile *)pFile)->pData;
652: return addNewAsyncWrite(p, ASYNC_WRITE, iOff, amt, pBuf);
653: }
654:
655: /*
656: ** Read data from the file. First we read from the filesystem, then adjust
657: ** the contents of the buffer based on ASYNC_WRITE operations in the
658: ** write-op queue.
659: **
660: ** This method holds the mutex from start to finish.
661: */
662: static int asyncRead(
663: sqlite3_file *pFile,
664: void *zOut,
665: int iAmt,
666: sqlite3_int64 iOffset
667: ){
668: AsyncFileData *p = ((AsyncFile *)pFile)->pData;
669: int rc = SQLITE_OK;
670: sqlite3_int64 filesize = 0;
671: sqlite3_file *pBase = p->pBaseRead;
672: sqlite3_int64 iAmt64 = (sqlite3_int64)iAmt;
673:
674: /* Grab the write queue mutex for the duration of the call */
675: async_mutex_enter(ASYNC_MUTEX_QUEUE);
676:
677: /* If an I/O error has previously occurred in this virtual file
678: ** system, then all subsequent operations fail.
679: */
680: if( async.ioError!=SQLITE_OK ){
681: rc = async.ioError;
682: goto asyncread_out;
683: }
684:
685: if( pBase->pMethods ){
686: sqlite3_int64 nRead;
687: rc = pBase->pMethods->xFileSize(pBase, &filesize);
688: if( rc!=SQLITE_OK ){
689: goto asyncread_out;
690: }
691: nRead = MIN(filesize - iOffset, iAmt64);
692: if( nRead>0 ){
693: rc = pBase->pMethods->xRead(pBase, zOut, (int)nRead, iOffset);
694: ASYNC_TRACE(("READ %s %d bytes at %d\n", p->zName, nRead, iOffset));
695: }
696: }
697:
698: if( rc==SQLITE_OK ){
699: AsyncWrite *pWrite;
700: char *zName = p->zName;
701:
702: for(pWrite=async.pQueueFirst; pWrite; pWrite = pWrite->pNext){
703: if( pWrite->op==ASYNC_WRITE && (
704: (pWrite->pFileData==p) ||
705: (zName && pWrite->pFileData->zName==zName)
706: )){
707: sqlite3_int64 nCopy;
708: sqlite3_int64 nByte64 = (sqlite3_int64)pWrite->nByte;
709:
710: /* Set variable iBeginIn to the offset in buffer pWrite->zBuf[] from
711: ** which data should be copied. Set iBeginOut to the offset within
712: ** the output buffer to which data should be copied. If either of
713: ** these offsets is a negative number, set them to 0.
714: */
715: sqlite3_int64 iBeginOut = (pWrite->iOffset-iOffset);
716: sqlite3_int64 iBeginIn = -iBeginOut;
717: if( iBeginIn<0 ) iBeginIn = 0;
718: if( iBeginOut<0 ) iBeginOut = 0;
719:
720: filesize = MAX(filesize, pWrite->iOffset+nByte64);
721:
722: nCopy = MIN(nByte64-iBeginIn, iAmt64-iBeginOut);
723: if( nCopy>0 ){
724: memcpy(&((char *)zOut)[iBeginOut], &pWrite->zBuf[iBeginIn], (size_t)nCopy);
725: ASYNC_TRACE(("OVERREAD %d bytes at %d\n", nCopy, iBeginOut+iOffset));
726: }
727: }
728: }
729: }
730:
731: asyncread_out:
732: async_mutex_leave(ASYNC_MUTEX_QUEUE);
733: if( rc==SQLITE_OK && filesize<(iOffset+iAmt) ){
734: rc = SQLITE_IOERR_SHORT_READ;
735: }
736: return rc;
737: }
738:
739: /*
740: ** Truncate the file to nByte bytes in length. This just adds an entry to
741: ** the write-op list, no IO actually takes place.
742: */
743: static int asyncTruncate(sqlite3_file *pFile, sqlite3_int64 nByte){
744: AsyncFileData *p = ((AsyncFile *)pFile)->pData;
745: return addNewAsyncWrite(p, ASYNC_TRUNCATE, nByte, 0, 0);
746: }
747:
748: /*
749: ** Sync the file. This just adds an entry to the write-op list, the
750: ** sync() is done later by sqlite3_async_flush().
751: */
752: static int asyncSync(sqlite3_file *pFile, int flags){
753: AsyncFileData *p = ((AsyncFile *)pFile)->pData;
754: return addNewAsyncWrite(p, ASYNC_SYNC, 0, flags, 0);
755: }
756:
757: /*
758: ** Read the size of the file. First we read the size of the file system
759: ** entry, then adjust for any ASYNC_WRITE or ASYNC_TRUNCATE operations
760: ** currently in the write-op list.
761: **
762: ** This method holds the mutex from start to finish.
763: */
764: int asyncFileSize(sqlite3_file *pFile, sqlite3_int64 *piSize){
765: AsyncFileData *p = ((AsyncFile *)pFile)->pData;
766: int rc = SQLITE_OK;
767: sqlite3_int64 s = 0;
768: sqlite3_file *pBase;
769:
770: async_mutex_enter(ASYNC_MUTEX_QUEUE);
771:
772: /* Read the filesystem size from the base file. If pMethods is NULL, this
773: ** means the file hasn't been opened yet. In this case all relevant data
774: ** must be in the write-op queue anyway, so we can omit reading from the
775: ** file-system.
776: */
777: pBase = p->pBaseRead;
778: if( pBase->pMethods ){
779: rc = pBase->pMethods->xFileSize(pBase, &s);
780: }
781:
782: if( rc==SQLITE_OK ){
783: AsyncWrite *pWrite;
784: for(pWrite=async.pQueueFirst; pWrite; pWrite = pWrite->pNext){
785: if( pWrite->op==ASYNC_DELETE
786: && p->zName
787: && strcmp(p->zName, pWrite->zBuf)==0
788: ){
789: s = 0;
790: }else if( pWrite->pFileData && (
791: (pWrite->pFileData==p)
792: || (p->zName && pWrite->pFileData->zName==p->zName)
793: )){
794: switch( pWrite->op ){
795: case ASYNC_WRITE:
796: s = MAX(pWrite->iOffset + (sqlite3_int64)(pWrite->nByte), s);
797: break;
798: case ASYNC_TRUNCATE:
799: s = MIN(s, pWrite->iOffset);
800: break;
801: }
802: }
803: }
804: *piSize = s;
805: }
806: async_mutex_leave(ASYNC_MUTEX_QUEUE);
807: return rc;
808: }
809:
810: /*
811: ** Lock or unlock the actual file-system entry.
812: */
813: static int getFileLock(AsyncLock *pLock){
814: int rc = SQLITE_OK;
815: AsyncFileLock *pIter;
816: int eRequired = 0;
817:
818: if( pLock->pFile ){
819: for(pIter=pLock->pList; pIter; pIter=pIter->pNext){
820: assert(pIter->eAsyncLock>=pIter->eLock);
821: if( pIter->eAsyncLock>eRequired ){
822: eRequired = pIter->eAsyncLock;
823: assert(eRequired>=0 && eRequired<=SQLITE_LOCK_EXCLUSIVE);
824: }
825: }
826:
827: if( eRequired>pLock->eLock ){
828: rc = pLock->pFile->pMethods->xLock(pLock->pFile, eRequired);
829: if( rc==SQLITE_OK ){
830: pLock->eLock = eRequired;
831: }
832: }
833: else if( eRequired<pLock->eLock && eRequired<=SQLITE_LOCK_SHARED ){
834: rc = pLock->pFile->pMethods->xUnlock(pLock->pFile, eRequired);
835: if( rc==SQLITE_OK ){
836: pLock->eLock = eRequired;
837: }
838: }
839: }
840:
841: return rc;
842: }
843:
844: /*
845: ** Return the AsyncLock structure from the global async.pLock list
846: ** associated with the file-system entry identified by path zName
847: ** (a string of nName bytes). If no such structure exists, return 0.
848: */
849: static AsyncLock *findLock(const char *zName, int nName){
850: AsyncLock *p = async.pLock;
851: while( p && (p->nFile!=nName || memcmp(p->zFile, zName, nName)) ){
852: p = p->pNext;
853: }
854: return p;
855: }
856:
857: /*
858: ** The following two methods - asyncLock() and asyncUnlock() - are used
859: ** to obtain and release locks on database files opened with the
860: ** asynchronous backend.
861: */
862: static int asyncLock(sqlite3_file *pFile, int eLock){
863: int rc = SQLITE_OK;
864: AsyncFileData *p = ((AsyncFile *)pFile)->pData;
865:
866: if( p->zName ){
867: async_mutex_enter(ASYNC_MUTEX_LOCK);
868: if( p->lock.eLock<eLock ){
869: AsyncLock *pLock = p->pLock;
870: AsyncFileLock *pIter;
871: assert(pLock && pLock->pList);
872: for(pIter=pLock->pList; pIter; pIter=pIter->pNext){
873: if( pIter!=&p->lock && (
874: (eLock==SQLITE_LOCK_EXCLUSIVE && pIter->eLock>=SQLITE_LOCK_SHARED) ||
875: (eLock==SQLITE_LOCK_PENDING && pIter->eLock>=SQLITE_LOCK_RESERVED) ||
876: (eLock==SQLITE_LOCK_RESERVED && pIter->eLock>=SQLITE_LOCK_RESERVED) ||
877: (eLock==SQLITE_LOCK_SHARED && pIter->eLock>=SQLITE_LOCK_PENDING)
878: )){
879: rc = SQLITE_BUSY;
880: }
881: }
882: if( rc==SQLITE_OK ){
883: p->lock.eLock = eLock;
884: p->lock.eAsyncLock = MAX(p->lock.eAsyncLock, eLock);
885: }
886: assert(p->lock.eAsyncLock>=p->lock.eLock);
887: if( rc==SQLITE_OK ){
888: rc = getFileLock(pLock);
889: }
890: }
891: async_mutex_leave(ASYNC_MUTEX_LOCK);
892: }
893:
894: ASYNC_TRACE(("LOCK %d (%s) rc=%d\n", eLock, p->zName, rc));
895: return rc;
896: }
897: static int asyncUnlock(sqlite3_file *pFile, int eLock){
898: int rc = SQLITE_OK;
899: AsyncFileData *p = ((AsyncFile *)pFile)->pData;
900: if( p->zName ){
901: AsyncFileLock *pLock = &p->lock;
902: async_mutex_enter(ASYNC_MUTEX_QUEUE);
903: async_mutex_enter(ASYNC_MUTEX_LOCK);
904: pLock->eLock = MIN(pLock->eLock, eLock);
905: rc = addNewAsyncWrite(p, ASYNC_UNLOCK, 0, eLock, 0);
906: async_mutex_leave(ASYNC_MUTEX_LOCK);
907: async_mutex_leave(ASYNC_MUTEX_QUEUE);
908: }
909: return rc;
910: }
911:
912: /*
913: ** This function is called when the pager layer first opens a database file
914: ** and is checking for a hot-journal.
915: */
916: static int asyncCheckReservedLock(sqlite3_file *pFile, int *pResOut){
917: int ret = 0;
918: AsyncFileLock *pIter;
919: AsyncFileData *p = ((AsyncFile *)pFile)->pData;
920:
921: async_mutex_enter(ASYNC_MUTEX_LOCK);
922: for(pIter=p->pLock->pList; pIter; pIter=pIter->pNext){
923: if( pIter->eLock>=SQLITE_LOCK_RESERVED ){
924: ret = 1;
925: break;
926: }
927: }
928: async_mutex_leave(ASYNC_MUTEX_LOCK);
929:
930: ASYNC_TRACE(("CHECK-LOCK %d (%s)\n", ret, p->zName));
931: *pResOut = ret;
932: return SQLITE_OK;
933: }
934:
935: /*
936: ** sqlite3_file_control() implementation.
937: */
938: static int asyncFileControl(sqlite3_file *id, int op, void *pArg){
939: switch( op ){
940: case SQLITE_FCNTL_LOCKSTATE: {
941: async_mutex_enter(ASYNC_MUTEX_LOCK);
942: *(int*)pArg = ((AsyncFile*)id)->pData->lock.eLock;
943: async_mutex_leave(ASYNC_MUTEX_LOCK);
944: return SQLITE_OK;
945: }
946: }
947: return SQLITE_ERROR;
948: }
949:
950: /*
951: ** Return the device characteristics and sector-size of the device. It
952: ** is tricky to implement these correctly, as this backend might
953: ** not have an open file handle at this point.
954: */
955: static int asyncSectorSize(sqlite3_file *pFile){
956: UNUSED_PARAMETER(pFile);
957: return 512;
958: }
959: static int asyncDeviceCharacteristics(sqlite3_file *pFile){
960: UNUSED_PARAMETER(pFile);
961: return 0;
962: }
963:
964: static int unlinkAsyncFile(AsyncFileData *pData){
965: AsyncFileLock **ppIter;
966: int rc = SQLITE_OK;
967:
968: if( pData->zName ){
969: AsyncLock *pLock = pData->pLock;
970: for(ppIter=&pLock->pList; *ppIter; ppIter=&((*ppIter)->pNext)){
971: if( (*ppIter)==&pData->lock ){
972: *ppIter = pData->lock.pNext;
973: break;
974: }
975: }
976: if( !pLock->pList ){
977: AsyncLock **pp;
978: if( pLock->pFile ){
979: pLock->pFile->pMethods->xClose(pLock->pFile);
980: }
981: for(pp=&async.pLock; *pp!=pLock; pp=&((*pp)->pNext));
982: *pp = pLock->pNext;
983: sqlite3_free(pLock);
984: }else{
985: rc = getFileLock(pLock);
986: }
987: }
988:
989: return rc;
990: }
991:
992: /*
993: ** The parameter passed to this function is a copy of a 'flags' parameter
994: ** passed to this modules xOpen() method. This function returns true
995: ** if the file should be opened asynchronously, or false if it should
996: ** be opened immediately.
997: **
998: ** If the file is to be opened asynchronously, then asyncOpen() will add
999: ** an entry to the event queue and the file will not actually be opened
1000: ** until the event is processed. Otherwise, the file is opened directly
1001: ** by the caller.
1002: */
1003: static int doAsynchronousOpen(int flags){
1004: return (flags&SQLITE_OPEN_CREATE) && (
1005: (flags&SQLITE_OPEN_MAIN_JOURNAL) ||
1006: (flags&SQLITE_OPEN_TEMP_JOURNAL) ||
1007: (flags&SQLITE_OPEN_DELETEONCLOSE)
1008: );
1009: }
1010:
1011: /*
1012: ** Open a file.
1013: */
1014: static int asyncOpen(
1015: sqlite3_vfs *pAsyncVfs,
1016: const char *zName,
1017: sqlite3_file *pFile,
1018: int flags,
1019: int *pOutFlags
1020: ){
1021: static sqlite3_io_methods async_methods = {
1022: 1, /* iVersion */
1023: asyncClose, /* xClose */
1024: asyncRead, /* xRead */
1025: asyncWrite, /* xWrite */
1026: asyncTruncate, /* xTruncate */
1027: asyncSync, /* xSync */
1028: asyncFileSize, /* xFileSize */
1029: asyncLock, /* xLock */
1030: asyncUnlock, /* xUnlock */
1031: asyncCheckReservedLock, /* xCheckReservedLock */
1032: asyncFileControl, /* xFileControl */
1033: asyncSectorSize, /* xSectorSize */
1034: asyncDeviceCharacteristics /* xDeviceCharacteristics */
1035: };
1036:
1037: sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
1038: AsyncFile *p = (AsyncFile *)pFile;
1039: int nName = 0;
1040: int rc = SQLITE_OK;
1041: int nByte;
1042: AsyncFileData *pData;
1043: AsyncLock *pLock = 0;
1044: char *z;
1045: int isAsyncOpen = doAsynchronousOpen(flags);
1046:
1047: /* If zName is NULL, then the upper layer is requesting an anonymous file */
1048: if( zName ){
1049: nName = (int)strlen(zName)+1;
1050: }
1051:
1052: nByte = (
1053: sizeof(AsyncFileData) + /* AsyncFileData structure */
1054: 2 * pVfs->szOsFile + /* AsyncFileData.pBaseRead and pBaseWrite */
1055: nName /* AsyncFileData.zName */
1056: );
1057: z = sqlite3_malloc(nByte);
1058: if( !z ){
1059: return SQLITE_NOMEM;
1060: }
1061: memset(z, 0, nByte);
1062: pData = (AsyncFileData*)z;
1063: z += sizeof(pData[0]);
1064: pData->pBaseRead = (sqlite3_file*)z;
1065: z += pVfs->szOsFile;
1066: pData->pBaseWrite = (sqlite3_file*)z;
1067: pData->closeOp.pFileData = pData;
1068: pData->closeOp.op = ASYNC_CLOSE;
1069:
1070: if( zName ){
1071: z += pVfs->szOsFile;
1072: pData->zName = z;
1073: pData->nName = nName;
1074: memcpy(pData->zName, zName, nName);
1075: }
1076:
1077: if( !isAsyncOpen ){
1078: int flagsout;
1079: rc = pVfs->xOpen(pVfs, pData->zName, pData->pBaseRead, flags, &flagsout);
1080: if( rc==SQLITE_OK
1081: && (flagsout&SQLITE_OPEN_READWRITE)
1082: && (flags&SQLITE_OPEN_EXCLUSIVE)==0
1083: ){
1084: rc = pVfs->xOpen(pVfs, pData->zName, pData->pBaseWrite, flags, 0);
1085: }
1086: if( pOutFlags ){
1087: *pOutFlags = flagsout;
1088: }
1089: }
1090:
1091: async_mutex_enter(ASYNC_MUTEX_LOCK);
1092:
1093: if( zName && rc==SQLITE_OK ){
1094: pLock = findLock(pData->zName, pData->nName);
1095: if( !pLock ){
1096: int nByte = pVfs->szOsFile + sizeof(AsyncLock) + pData->nName + 1;
1097: pLock = (AsyncLock *)sqlite3_malloc(nByte);
1098: if( pLock ){
1099: memset(pLock, 0, nByte);
1100: if( async.bLockFiles && (flags&SQLITE_OPEN_MAIN_DB) ){
1101: pLock->pFile = (sqlite3_file *)&pLock[1];
1102: rc = pVfs->xOpen(pVfs, pData->zName, pLock->pFile, flags, 0);
1103: if( rc!=SQLITE_OK ){
1104: sqlite3_free(pLock);
1105: pLock = 0;
1106: }
1107: }
1108: if( pLock ){
1109: pLock->nFile = pData->nName;
1110: pLock->zFile = &((char *)(&pLock[1]))[pVfs->szOsFile];
1111: memcpy(pLock->zFile, pData->zName, pLock->nFile);
1112: pLock->pNext = async.pLock;
1113: async.pLock = pLock;
1114: }
1115: }else{
1116: rc = SQLITE_NOMEM;
1117: }
1118: }
1119: }
1120:
1121: if( rc==SQLITE_OK ){
1122: p->pMethod = &async_methods;
1123: p->pData = pData;
1124:
1125: /* Link AsyncFileData.lock into the linked list of
1126: ** AsyncFileLock structures for this file.
1127: */
1128: if( zName ){
1129: pData->lock.pNext = pLock->pList;
1130: pLock->pList = &pData->lock;
1131: pData->zName = pLock->zFile;
1132: }
1133: }else{
1134: if( pData->pBaseRead->pMethods ){
1135: pData->pBaseRead->pMethods->xClose(pData->pBaseRead);
1136: }
1137: if( pData->pBaseWrite->pMethods ){
1138: pData->pBaseWrite->pMethods->xClose(pData->pBaseWrite);
1139: }
1140: sqlite3_free(pData);
1141: }
1142:
1143: async_mutex_leave(ASYNC_MUTEX_LOCK);
1144:
1145: if( rc==SQLITE_OK ){
1146: pData->pLock = pLock;
1147: }
1148:
1149: if( rc==SQLITE_OK && isAsyncOpen ){
1150: rc = addNewAsyncWrite(pData, ASYNC_OPENEXCLUSIVE, (sqlite3_int64)flags,0,0);
1151: if( rc==SQLITE_OK ){
1152: if( pOutFlags ) *pOutFlags = flags;
1153: }else{
1154: async_mutex_enter(ASYNC_MUTEX_LOCK);
1155: unlinkAsyncFile(pData);
1156: async_mutex_leave(ASYNC_MUTEX_LOCK);
1157: sqlite3_free(pData);
1158: }
1159: }
1160: if( rc!=SQLITE_OK ){
1161: p->pMethod = 0;
1162: }else{
1163: incrOpenFileCount();
1164: }
1165:
1166: return rc;
1167: }
1168:
1169: /*
1170: ** Implementation of sqlite3OsDelete. Add an entry to the end of the
1171: ** write-op queue to perform the delete.
1172: */
1173: static int asyncDelete(sqlite3_vfs *pAsyncVfs, const char *z, int syncDir){
1174: UNUSED_PARAMETER(pAsyncVfs);
1175: return addNewAsyncWrite(0, ASYNC_DELETE, syncDir, (int)strlen(z)+1, z);
1176: }
1177:
1178: /*
1179: ** Implementation of sqlite3OsAccess. This method holds the mutex from
1180: ** start to finish.
1181: */
1182: static int asyncAccess(
1183: sqlite3_vfs *pAsyncVfs,
1184: const char *zName,
1185: int flags,
1186: int *pResOut
1187: ){
1188: int rc;
1189: int ret;
1190: AsyncWrite *p;
1191: sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
1192:
1193: assert(flags==SQLITE_ACCESS_READWRITE
1194: || flags==SQLITE_ACCESS_READ
1195: || flags==SQLITE_ACCESS_EXISTS
1196: );
1197:
1198: async_mutex_enter(ASYNC_MUTEX_QUEUE);
1199: rc = pVfs->xAccess(pVfs, zName, flags, &ret);
1200: if( rc==SQLITE_OK && flags==SQLITE_ACCESS_EXISTS ){
1201: for(p=async.pQueueFirst; p; p = p->pNext){
1202: if( p->op==ASYNC_DELETE && 0==strcmp(p->zBuf, zName) ){
1203: ret = 0;
1204: }else if( p->op==ASYNC_OPENEXCLUSIVE
1205: && p->pFileData->zName
1206: && 0==strcmp(p->pFileData->zName, zName)
1207: ){
1208: ret = 1;
1209: }
1210: }
1211: }
1212: ASYNC_TRACE(("ACCESS(%s): %s = %d\n",
1213: flags==SQLITE_ACCESS_READWRITE?"read-write":
1214: flags==SQLITE_ACCESS_READ?"read":"exists"
1215: , zName, ret)
1216: );
1217: async_mutex_leave(ASYNC_MUTEX_QUEUE);
1218: *pResOut = ret;
1219: return rc;
1220: }
1221:
1222: /*
1223: ** Fill in zPathOut with the full path to the file identified by zPath.
1224: */
1225: static int asyncFullPathname(
1226: sqlite3_vfs *pAsyncVfs,
1227: const char *zPath,
1228: int nPathOut,
1229: char *zPathOut
1230: ){
1231: int rc;
1232: sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
1233: rc = pVfs->xFullPathname(pVfs, zPath, nPathOut, zPathOut);
1234:
1235: /* Because of the way intra-process file locking works, this backend
1236: ** needs to return a canonical path. The following block assumes the
1237: ** file-system uses unix style paths.
1238: */
1239: if( rc==SQLITE_OK ){
1240: int i, j;
1241: char *z = zPathOut;
1242: int n = (int)strlen(z);
1243: while( n>1 && z[n-1]=='/' ){ n--; }
1244: for(i=j=0; i<n; i++){
1245: if( z[i]=='/' ){
1246: if( z[i+1]=='/' ) continue;
1247: if( z[i+1]=='.' && i+2<n && z[i+2]=='/' ){
1248: i += 1;
1249: continue;
1250: }
1251: if( z[i+1]=='.' && i+3<n && z[i+2]=='.' && z[i+3]=='/' ){
1252: while( j>0 && z[j-1]!='/' ){ j--; }
1253: if( j>0 ){ j--; }
1254: i += 2;
1255: continue;
1256: }
1257: }
1258: z[j++] = z[i];
1259: }
1260: z[j] = 0;
1261: }
1262:
1263: return rc;
1264: }
1265: static void *asyncDlOpen(sqlite3_vfs *pAsyncVfs, const char *zPath){
1266: sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
1267: return pVfs->xDlOpen(pVfs, zPath);
1268: }
1269: static void asyncDlError(sqlite3_vfs *pAsyncVfs, int nByte, char *zErrMsg){
1270: sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
1271: pVfs->xDlError(pVfs, nByte, zErrMsg);
1272: }
1273: static void (*asyncDlSym(
1274: sqlite3_vfs *pAsyncVfs,
1275: void *pHandle,
1276: const char *zSymbol
1277: ))(void){
1278: sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
1279: return pVfs->xDlSym(pVfs, pHandle, zSymbol);
1280: }
1281: static void asyncDlClose(sqlite3_vfs *pAsyncVfs, void *pHandle){
1282: sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
1283: pVfs->xDlClose(pVfs, pHandle);
1284: }
1285: static int asyncRandomness(sqlite3_vfs *pAsyncVfs, int nByte, char *zBufOut){
1286: sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
1287: return pVfs->xRandomness(pVfs, nByte, zBufOut);
1288: }
1289: static int asyncSleep(sqlite3_vfs *pAsyncVfs, int nMicro){
1290: sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
1291: return pVfs->xSleep(pVfs, nMicro);
1292: }
1293: static int asyncCurrentTime(sqlite3_vfs *pAsyncVfs, double *pTimeOut){
1294: sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
1295: return pVfs->xCurrentTime(pVfs, pTimeOut);
1296: }
1297:
1298: static sqlite3_vfs async_vfs = {
1299: 1, /* iVersion */
1300: sizeof(AsyncFile), /* szOsFile */
1301: 0, /* mxPathname */
1302: 0, /* pNext */
1303: SQLITEASYNC_VFSNAME, /* zName */
1304: 0, /* pAppData */
1305: asyncOpen, /* xOpen */
1306: asyncDelete, /* xDelete */
1307: asyncAccess, /* xAccess */
1308: asyncFullPathname, /* xFullPathname */
1309: asyncDlOpen, /* xDlOpen */
1310: asyncDlError, /* xDlError */
1311: asyncDlSym, /* xDlSym */
1312: asyncDlClose, /* xDlClose */
1313: asyncRandomness, /* xDlError */
1314: asyncSleep, /* xDlSym */
1315: asyncCurrentTime /* xDlClose */
1316: };
1317:
1318: /*
1319: ** This procedure runs in a separate thread, reading messages off of the
1320: ** write queue and processing them one by one.
1321: **
1322: ** If async.writerHaltNow is true, then this procedure exits
1323: ** after processing a single message.
1324: **
1325: ** If async.writerHaltWhenIdle is true, then this procedure exits when
1326: ** the write queue is empty.
1327: **
1328: ** If both of the above variables are false, this procedure runs
1329: ** indefinately, waiting for operations to be added to the write queue
1330: ** and processing them in the order in which they arrive.
1331: **
1332: ** An artifical delay of async.ioDelay milliseconds is inserted before
1333: ** each write operation in order to simulate the effect of a slow disk.
1334: **
1335: ** Only one instance of this procedure may be running at a time.
1336: */
1337: static void asyncWriterThread(void){
1338: sqlite3_vfs *pVfs = (sqlite3_vfs *)(async_vfs.pAppData);
1339: AsyncWrite *p = 0;
1340: int rc = SQLITE_OK;
1341: int holdingMutex = 0;
1342:
1343: async_mutex_enter(ASYNC_MUTEX_WRITER);
1344:
1345: while( async.eHalt!=SQLITEASYNC_HALT_NOW ){
1346: int doNotFree = 0;
1347: sqlite3_file *pBase = 0;
1348:
1349: if( !holdingMutex ){
1350: async_mutex_enter(ASYNC_MUTEX_QUEUE);
1351: }
1352: while( (p = async.pQueueFirst)==0 ){
1353: if( async.eHalt!=SQLITEASYNC_HALT_NEVER ){
1354: async_mutex_leave(ASYNC_MUTEX_QUEUE);
1355: break;
1356: }else{
1357: ASYNC_TRACE(("IDLE\n"));
1358: async_cond_wait(ASYNC_COND_QUEUE, ASYNC_MUTEX_QUEUE);
1359: ASYNC_TRACE(("WAKEUP\n"));
1360: }
1361: }
1362: if( p==0 ) break;
1363: holdingMutex = 1;
1364:
1365: /* Right now this thread is holding the mutex on the write-op queue.
1366: ** Variable 'p' points to the first entry in the write-op queue. In
1367: ** the general case, we hold on to the mutex for the entire body of
1368: ** the loop.
1369: **
1370: ** However in the cases enumerated below, we relinquish the mutex,
1371: ** perform the IO, and then re-request the mutex before removing 'p' from
1372: ** the head of the write-op queue. The idea is to increase concurrency with
1373: ** sqlite threads.
1374: **
1375: ** * An ASYNC_CLOSE operation.
1376: ** * An ASYNC_OPENEXCLUSIVE operation. For this one, we relinquish
1377: ** the mutex, call the underlying xOpenExclusive() function, then
1378: ** re-aquire the mutex before seting the AsyncFile.pBaseRead
1379: ** variable.
1380: ** * ASYNC_SYNC and ASYNC_WRITE operations, if
1381: ** SQLITE_ASYNC_TWO_FILEHANDLES was set at compile time and two
1382: ** file-handles are open for the particular file being "synced".
1383: */
1384: if( async.ioError!=SQLITE_OK && p->op!=ASYNC_CLOSE ){
1385: p->op = ASYNC_NOOP;
1386: }
1387: if( p->pFileData ){
1388: pBase = p->pFileData->pBaseWrite;
1389: if(
1390: p->op==ASYNC_CLOSE ||
1391: p->op==ASYNC_OPENEXCLUSIVE ||
1392: (pBase->pMethods && (p->op==ASYNC_SYNC || p->op==ASYNC_WRITE) )
1393: ){
1394: async_mutex_leave(ASYNC_MUTEX_QUEUE);
1395: holdingMutex = 0;
1396: }
1397: if( !pBase->pMethods ){
1398: pBase = p->pFileData->pBaseRead;
1399: }
1400: }
1401:
1402: switch( p->op ){
1403: case ASYNC_NOOP:
1404: break;
1405:
1406: case ASYNC_WRITE:
1407: assert( pBase );
1408: ASYNC_TRACE(("WRITE %s %d bytes at %d\n",
1409: p->pFileData->zName, p->nByte, p->iOffset));
1410: rc = pBase->pMethods->xWrite(pBase, (void *)(p->zBuf), p->nByte, p->iOffset);
1411: break;
1412:
1413: case ASYNC_SYNC:
1414: assert( pBase );
1415: ASYNC_TRACE(("SYNC %s\n", p->pFileData->zName));
1416: rc = pBase->pMethods->xSync(pBase, p->nByte);
1417: break;
1418:
1419: case ASYNC_TRUNCATE:
1420: assert( pBase );
1421: ASYNC_TRACE(("TRUNCATE %s to %d bytes\n",
1422: p->pFileData->zName, p->iOffset));
1423: rc = pBase->pMethods->xTruncate(pBase, p->iOffset);
1424: break;
1425:
1426: case ASYNC_CLOSE: {
1427: AsyncFileData *pData = p->pFileData;
1428: ASYNC_TRACE(("CLOSE %s\n", p->pFileData->zName));
1429: if( pData->pBaseWrite->pMethods ){
1430: pData->pBaseWrite->pMethods->xClose(pData->pBaseWrite);
1431: }
1432: if( pData->pBaseRead->pMethods ){
1433: pData->pBaseRead->pMethods->xClose(pData->pBaseRead);
1434: }
1435:
1436: /* Unlink AsyncFileData.lock from the linked list of AsyncFileLock
1437: ** structures for this file. Obtain the async.lockMutex mutex
1438: ** before doing so.
1439: */
1440: async_mutex_enter(ASYNC_MUTEX_LOCK);
1441: rc = unlinkAsyncFile(pData);
1442: async_mutex_leave(ASYNC_MUTEX_LOCK);
1443:
1444: if( !holdingMutex ){
1445: async_mutex_enter(ASYNC_MUTEX_QUEUE);
1446: holdingMutex = 1;
1447: }
1448: assert_mutex_is_held(ASYNC_MUTEX_QUEUE);
1449: async.pQueueFirst = p->pNext;
1450: sqlite3_free(pData);
1451: doNotFree = 1;
1452: break;
1453: }
1454:
1455: case ASYNC_UNLOCK: {
1456: AsyncWrite *pIter;
1457: AsyncFileData *pData = p->pFileData;
1458: int eLock = p->nByte;
1459:
1460: /* When a file is locked by SQLite using the async backend, it is
1461: ** locked within the 'real' file-system synchronously. When it is
1462: ** unlocked, an ASYNC_UNLOCK event is added to the write-queue to
1463: ** unlock the file asynchronously. The design of the async backend
1464: ** requires that the 'real' file-system file be locked from the
1465: ** time that SQLite first locks it (and probably reads from it)
1466: ** until all asynchronous write events that were scheduled before
1467: ** SQLite unlocked the file have been processed.
1468: **
1469: ** This is more complex if SQLite locks and unlocks the file multiple
1470: ** times in quick succession. For example, if SQLite does:
1471: **
1472: ** lock, write, unlock, lock, write, unlock
1473: **
1474: ** Each "lock" operation locks the file immediately. Each "write"
1475: ** and "unlock" operation adds an event to the event queue. If the
1476: ** second "lock" operation is performed before the first "unlock"
1477: ** operation has been processed asynchronously, then the first
1478: ** "unlock" cannot be safely processed as is, since this would mean
1479: ** the file was unlocked when the second "write" operation is
1480: ** processed. To work around this, when processing an ASYNC_UNLOCK
1481: ** operation, SQLite:
1482: **
1483: ** 1) Unlocks the file to the minimum of the argument passed to
1484: ** the xUnlock() call and the current lock from SQLite's point
1485: ** of view, and
1486: **
1487: ** 2) Only unlocks the file at all if this event is the last
1488: ** ASYNC_UNLOCK event on this file in the write-queue.
1489: */
1490: assert( holdingMutex==1 );
1491: assert( async.pQueueFirst==p );
1492: for(pIter=async.pQueueFirst->pNext; pIter; pIter=pIter->pNext){
1493: if( pIter->pFileData==pData && pIter->op==ASYNC_UNLOCK ) break;
1494: }
1495: if( !pIter ){
1496: async_mutex_enter(ASYNC_MUTEX_LOCK);
1497: pData->lock.eAsyncLock = MIN(
1498: pData->lock.eAsyncLock, MAX(pData->lock.eLock, eLock)
1499: );
1500: assert(pData->lock.eAsyncLock>=pData->lock.eLock);
1501: rc = getFileLock(pData->pLock);
1502: async_mutex_leave(ASYNC_MUTEX_LOCK);
1503: }
1504: break;
1505: }
1506:
1507: case ASYNC_DELETE:
1508: ASYNC_TRACE(("DELETE %s\n", p->zBuf));
1509: rc = pVfs->xDelete(pVfs, p->zBuf, (int)p->iOffset);
1510: break;
1511:
1512: case ASYNC_OPENEXCLUSIVE: {
1513: int flags = (int)p->iOffset;
1514: AsyncFileData *pData = p->pFileData;
1515: ASYNC_TRACE(("OPEN %s flags=%d\n", p->zBuf, (int)p->iOffset));
1516: assert(pData->pBaseRead->pMethods==0 && pData->pBaseWrite->pMethods==0);
1517: rc = pVfs->xOpen(pVfs, pData->zName, pData->pBaseRead, flags, 0);
1518: assert( holdingMutex==0 );
1519: async_mutex_enter(ASYNC_MUTEX_QUEUE);
1520: holdingMutex = 1;
1521: break;
1522: }
1523:
1524: default: assert(!"Illegal value for AsyncWrite.op");
1525: }
1526:
1527: /* If we didn't hang on to the mutex during the IO op, obtain it now
1528: ** so that the AsyncWrite structure can be safely removed from the
1529: ** global write-op queue.
1530: */
1531: if( !holdingMutex ){
1532: async_mutex_enter(ASYNC_MUTEX_QUEUE);
1533: holdingMutex = 1;
1534: }
1535: /* ASYNC_TRACE(("UNLINK %p\n", p)); */
1536: if( p==async.pQueueLast ){
1537: async.pQueueLast = 0;
1538: }
1539: if( !doNotFree ){
1540: assert_mutex_is_held(ASYNC_MUTEX_QUEUE);
1541: async.pQueueFirst = p->pNext;
1542: sqlite3_free(p);
1543: }
1544: assert( holdingMutex );
1545:
1546: /* An IO error has occurred. We cannot report the error back to the
1547: ** connection that requested the I/O since the error happened
1548: ** asynchronously. The connection has already moved on. There
1549: ** really is nobody to report the error to.
1550: **
1551: ** The file for which the error occurred may have been a database or
1552: ** journal file. Regardless, none of the currently queued operations
1553: ** associated with the same database should now be performed. Nor should
1554: ** any subsequently requested IO on either a database or journal file
1555: ** handle for the same database be accepted until the main database
1556: ** file handle has been closed and reopened.
1557: **
1558: ** Furthermore, no further IO should be queued or performed on any file
1559: ** handle associated with a database that may have been part of a
1560: ** multi-file transaction that included the database associated with
1561: ** the IO error (i.e. a database ATTACHed to the same handle at some
1562: ** point in time).
1563: */
1564: if( rc!=SQLITE_OK ){
1565: async.ioError = rc;
1566: }
1567:
1568: if( async.ioError && !async.pQueueFirst ){
1569: async_mutex_enter(ASYNC_MUTEX_LOCK);
1570: if( 0==async.pLock ){
1571: async.ioError = SQLITE_OK;
1572: }
1573: async_mutex_leave(ASYNC_MUTEX_LOCK);
1574: }
1575:
1576: /* Drop the queue mutex before continuing to the next write operation
1577: ** in order to give other threads a chance to work with the write queue.
1578: */
1579: if( !async.pQueueFirst || !async.ioError ){
1580: async_mutex_leave(ASYNC_MUTEX_QUEUE);
1581: holdingMutex = 0;
1582: if( async.ioDelay>0 ){
1583: pVfs->xSleep(pVfs, async.ioDelay*1000);
1584: }else{
1585: async_sched_yield();
1586: }
1587: }
1588: }
1589:
1590: async_mutex_leave(ASYNC_MUTEX_WRITER);
1591: return;
1592: }
1593:
1594: /*
1595: ** Install the asynchronous VFS.
1596: */
1597: int sqlite3async_initialize(const char *zParent, int isDefault){
1598: int rc = SQLITE_OK;
1599: if( async_vfs.pAppData==0 ){
1600: sqlite3_vfs *pParent = sqlite3_vfs_find(zParent);
1601: if( !pParent || async_os_initialize() ){
1602: rc = SQLITE_ERROR;
1603: }else if( SQLITE_OK!=(rc = sqlite3_vfs_register(&async_vfs, isDefault)) ){
1604: async_os_shutdown();
1605: }else{
1606: async_vfs.pAppData = (void *)pParent;
1607: async_vfs.mxPathname = ((sqlite3_vfs *)async_vfs.pAppData)->mxPathname;
1608: }
1609: }
1610: return rc;
1611: }
1612:
1613: /*
1614: ** Uninstall the asynchronous VFS.
1615: */
1616: void sqlite3async_shutdown(void){
1617: if( async_vfs.pAppData ){
1618: async_os_shutdown();
1619: sqlite3_vfs_unregister((sqlite3_vfs *)&async_vfs);
1620: async_vfs.pAppData = 0;
1621: }
1622: }
1623:
1624: /*
1625: ** Process events on the write-queue.
1626: */
1627: void sqlite3async_run(void){
1628: asyncWriterThread();
1629: }
1630:
1631: /*
1632: ** Control/configure the asynchronous IO system.
1633: */
1634: int sqlite3async_control(int op, ...){
1635: va_list ap;
1636: va_start(ap, op);
1637: switch( op ){
1638: case SQLITEASYNC_HALT: {
1639: int eWhen = va_arg(ap, int);
1640: if( eWhen!=SQLITEASYNC_HALT_NEVER
1641: && eWhen!=SQLITEASYNC_HALT_NOW
1642: && eWhen!=SQLITEASYNC_HALT_IDLE
1643: ){
1644: return SQLITE_MISUSE;
1645: }
1646: async.eHalt = eWhen;
1647: async_mutex_enter(ASYNC_MUTEX_QUEUE);
1648: async_cond_signal(ASYNC_COND_QUEUE);
1649: async_mutex_leave(ASYNC_MUTEX_QUEUE);
1650: break;
1651: }
1652:
1653: case SQLITEASYNC_DELAY: {
1654: int iDelay = va_arg(ap, int);
1655: if( iDelay<0 ){
1656: return SQLITE_MISUSE;
1657: }
1658: async.ioDelay = iDelay;
1659: break;
1660: }
1661:
1662: case SQLITEASYNC_LOCKFILES: {
1663: int bLock = va_arg(ap, int);
1664: async_mutex_enter(ASYNC_MUTEX_QUEUE);
1665: if( async.nFile || async.pQueueFirst ){
1666: async_mutex_leave(ASYNC_MUTEX_QUEUE);
1667: return SQLITE_MISUSE;
1668: }
1669: async.bLockFiles = bLock;
1670: async_mutex_leave(ASYNC_MUTEX_QUEUE);
1671: break;
1672: }
1673:
1674: case SQLITEASYNC_GET_HALT: {
1675: int *peWhen = va_arg(ap, int *);
1676: *peWhen = async.eHalt;
1677: break;
1678: }
1679: case SQLITEASYNC_GET_DELAY: {
1680: int *piDelay = va_arg(ap, int *);
1681: *piDelay = async.ioDelay;
1682: break;
1683: }
1684: case SQLITEASYNC_GET_LOCKFILES: {
1685: int *piDelay = va_arg(ap, int *);
1686: *piDelay = async.bLockFiles;
1687: break;
1688: }
1689:
1690: default:
1691: return SQLITE_ERROR;
1692: }
1693: return SQLITE_OK;
1694: }
1695:
1696: #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_ASYNCIO) */
1697:
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>