Annotation of embedaddon/sqlite3/tool/spaceanal.tcl, revision 1.1.1.1

1.1       misho       1: # Run this TCL script using "testfixture" in order get a report that shows
                      2: # how much disk space is used by a particular data to actually store data
                      3: # versus how much space is unused.
                      4: #
                      5: 
                      6: if {[catch {
                      7: # Get the name of the database to analyze
                      8: #
                      9: proc usage {} {
                     10:   set argv0 [file rootname [file tail [info nameofexecutable]]]
                     11:   puts stderr "Usage: $argv0 database-name"
                     12:   exit 1
                     13: }
                     14: set file_to_analyze {}
                     15: set flags(-pageinfo) 0
                     16: set flags(-stats) 0
                     17: append argv {}
                     18: foreach arg $argv {
                     19:   if {[regexp {^-+pageinfo$} $arg]} {
                     20:     set flags(-pageinfo) 1
                     21:   } elseif {[regexp {^-+stats$} $arg]} {
                     22:     set flags(-stats) 1
                     23:   } elseif {[regexp {^-} $arg]} {
                     24:     puts stderr "Unknown option: $arg"
                     25:     usage
                     26:   } elseif {$file_to_analyze!=""} {
                     27:     usage
                     28:   } else {
                     29:     set file_to_analyze $arg
                     30:   }
                     31: }
                     32: if {$file_to_analyze==""} usage
                     33: if {![file exists $file_to_analyze]} {
                     34:   puts stderr "No such file: $file_to_analyze"
                     35:   exit 1
                     36: }
                     37: if {![file readable $file_to_analyze]} {
                     38:   puts stderr "File is not readable: $file_to_analyze"
                     39:   exit 1
                     40: }
                     41: set true_file_size [file size $file_to_analyze]
                     42: if {$true_file_size<512} {
                     43:   puts stderr "Empty or malformed database: $file_to_analyze"
                     44:   exit 1
                     45: }
                     46: 
                     47: # Compute the total file size assuming test_multiplexor is being used.
                     48: # Assume that SQLITE_ENABLE_8_3_NAMES might be enabled
                     49: #
                     50: set extension [file extension $file_to_analyze]
                     51: set pattern $file_to_analyze
                     52: append pattern {[0-9][0-9]}
                     53: foreach f [glob -nocomplain $pattern] {
                     54:   incr true_file_size [file size $f]
                     55:   set extension {}
                     56: }
                     57: if {[string length $extension]>=2 && [string length $extension]<=4} {
                     58:   set pattern [file rootname $file_to_analyze]
                     59:   append pattern [string range $extension 0 1]
                     60:   append pattern {[0-9][0-9]}
                     61:   foreach f [glob -nocomplain $pattern] {
                     62:     incr true_file_size [file size $f]
                     63:   }
                     64: }
                     65: 
                     66: # Open the database
                     67: #
                     68: sqlite3 db $file_to_analyze
                     69: register_dbstat_vtab db
                     70: 
                     71: db eval {SELECT count(*) FROM sqlite_master}
                     72: set pageSize [expr {wide([db one {PRAGMA page_size}])}]
                     73: 
                     74: if {$flags(-pageinfo)} {
                     75:   db eval {CREATE VIRTUAL TABLE temp.stat USING dbstat}
                     76:   db eval {SELECT name, path, pageno FROM temp.stat ORDER BY pageno} {
                     77:     puts "$pageno $name $path"
                     78:   }
                     79:   exit 0
                     80: }
                     81: if {$flags(-stats)} {
                     82:   db eval {CREATE VIRTUAL TABLE temp.stat USING dbstat}
                     83:   puts "BEGIN;"
                     84:   puts "CREATE TABLE stats("
                     85:   puts "  name       STRING,           /* Name of table or index */"
                     86:   puts "  path       INTEGER,          /* Path to page from root */"
                     87:   puts "  pageno     INTEGER,          /* Page number */"
                     88:   puts "  pagetype   STRING,           /* 'internal', 'leaf' or 'overflow' */"
                     89:   puts "  ncell      INTEGER,          /* Cells on page (0 for overflow) */"
                     90:   puts "  payload    INTEGER,          /* Bytes of payload on this page */"
                     91:   puts "  unused     INTEGER,          /* Bytes of unused space on this page */"
                     92:   puts "  mx_payload INTEGER,          /* Largest payload size of all cells */"
                     93:   puts "  pgoffset   INTEGER,          /* Offset of page in file */"
                     94:   puts "  pgsize     INTEGER           /* Size of the page */"
                     95:   puts ");"
                     96:   db eval {SELECT quote(name) || ',' ||
                     97:                   quote(path) || ',' ||
                     98:                   quote(pageno) || ',' ||
                     99:                   quote(pagetype) || ',' ||
                    100:                   quote(ncell) || ',' ||
                    101:                   quote(payload) || ',' ||
                    102:                   quote(unused) || ',' ||
                    103:                   quote(mx_payload) || ',' ||
                    104:                   quote(pgoffset) || ',' ||
                    105:                   quote(pgsize) AS x FROM stat} {
                    106:     puts "INSERT INTO stats VALUES($x);"
                    107:   }
                    108:   puts "COMMIT;"
                    109:   exit 0
                    110: }
                    111: 
                    112: # In-memory database for collecting statistics. This script loops through
                    113: # the tables and indices in the database being analyzed, adding a row for each
                    114: # to an in-memory database (for which the schema is shown below). It then
                    115: # queries the in-memory db to produce the space-analysis report.
                    116: #
                    117: sqlite3 mem :memory:
                    118: set tabledef {CREATE TABLE space_used(
                    119:    name clob,        -- Name of a table or index in the database file
                    120:    tblname clob,     -- Name of associated table
                    121:    is_index boolean, -- TRUE if it is an index, false for a table
                    122:    nentry int,       -- Number of entries in the BTree
                    123:    leaf_entries int, -- Number of leaf entries
                    124:    payload int,      -- Total amount of data stored in this table or index
                    125:    ovfl_payload int, -- Total amount of data stored on overflow pages
                    126:    ovfl_cnt int,     -- Number of entries that use overflow
                    127:    mx_payload int,   -- Maximum payload size
                    128:    int_pages int,    -- Number of interior pages used
                    129:    leaf_pages int,   -- Number of leaf pages used
                    130:    ovfl_pages int,   -- Number of overflow pages used
                    131:    int_unused int,   -- Number of unused bytes on interior pages
                    132:    leaf_unused int,  -- Number of unused bytes on primary pages
                    133:    ovfl_unused int,  -- Number of unused bytes on overflow pages
                    134:    gap_cnt int,      -- Number of gaps in the page layout
                    135:    compressed_size int  -- Total bytes stored on disk
                    136: );}
                    137: mem eval $tabledef
                    138: 
                    139: # Create a temporary "dbstat" virtual table.
                    140: #
                    141: db eval {CREATE VIRTUAL TABLE temp.stat USING dbstat}
                    142: db eval {CREATE TEMP TABLE dbstat AS SELECT * FROM temp.stat
                    143:          ORDER BY name, path}
                    144: db eval {DROP TABLE temp.stat}
                    145: 
                    146: proc isleaf {pagetype is_index} {
                    147:   return [expr {$pagetype == "leaf" || ($pagetype == "internal" && $is_index)}]
                    148: }
                    149: proc isoverflow {pagetype is_index} {
                    150:   return [expr {$pagetype == "overflow"}]
                    151: }
                    152: proc isinternal {pagetype is_index} {
                    153:   return [expr {$pagetype == "internal" && $is_index==0}]
                    154: }
                    155: 
                    156: db func isleaf isleaf
                    157: db func isinternal isinternal
                    158: db func isoverflow isoverflow
                    159: 
                    160: set isCompressed 0
                    161: set compressOverhead 0
                    162: set sql { SELECT name, tbl_name FROM sqlite_master WHERE rootpage>0 }
                    163: foreach {name tblname} [concat sqlite_master sqlite_master [db eval $sql]] {
                    164: 
                    165:   set is_index [expr {$name!=$tblname}]
                    166:   db eval {
                    167:     SELECT 
                    168:       sum(ncell) AS nentry,
                    169:       sum(isleaf(pagetype, $is_index) * ncell) AS leaf_entries,
                    170:       sum(payload) AS payload,
                    171:       sum(isoverflow(pagetype, $is_index) * payload) AS ovfl_payload,
                    172:       sum(path LIKE '%+000000') AS ovfl_cnt,
                    173:       max(mx_payload) AS mx_payload,
                    174:       sum(isinternal(pagetype, $is_index)) AS int_pages,
                    175:       sum(isleaf(pagetype, $is_index)) AS leaf_pages,
                    176:       sum(isoverflow(pagetype, $is_index)) AS ovfl_pages,
                    177:       sum(isinternal(pagetype, $is_index) * unused) AS int_unused,
                    178:       sum(isleaf(pagetype, $is_index) * unused) AS leaf_unused,
                    179:       sum(isoverflow(pagetype, $is_index) * unused) AS ovfl_unused,
                    180:       sum(pgsize) AS compressed_size
                    181:     FROM temp.dbstat WHERE name = $name
                    182:   } break
                    183: 
                    184:   set total_pages [expr {$leaf_pages+$int_pages+$ovfl_pages}]
                    185:   set storage [expr {$total_pages*$pageSize}]
                    186:   if {!$isCompressed && $storage>$compressed_size} {
                    187:     set isCompressed 1
                    188:     set compressOverhead 14
                    189:   }
                    190: 
                    191:   # Column 'gap_cnt' is set to the number of non-contiguous entries in the
                    192:   # list of pages visited if the b-tree structure is traversed in a top-down
                    193:   # fashion (each node visited before its child-tree is passed). Any overflow
                    194:   # chains present are traversed from start to finish before any child-tree
                    195:   # is.
                    196:   #
                    197:   set gap_cnt 0
                    198:   set pglist [db eval {
                    199:     SELECT pageno FROM temp.dbstat WHERE name = $name ORDER BY rowid
                    200:   }]
                    201:   set prev [lindex $pglist 0]
                    202:   foreach pgno [lrange $pglist 1 end] {
                    203:     if {$pgno != $prev+1} {incr gap_cnt}
                    204:     set prev $pgno
                    205:   }
                    206: 
                    207:   mem eval {
                    208:     INSERT INTO space_used VALUES(
                    209:       $name,
                    210:       $tblname,
                    211:       $is_index,
                    212:       $nentry,
                    213:       $leaf_entries,
                    214:       $payload,     
                    215:       $ovfl_payload,
                    216:       $ovfl_cnt,   
                    217:       $mx_payload,
                    218:       $int_pages,
                    219:       $leaf_pages,  
                    220:       $ovfl_pages, 
                    221:       $int_unused, 
                    222:       $leaf_unused,
                    223:       $ovfl_unused,
                    224:       $gap_cnt,
                    225:       $compressed_size
                    226:     );
                    227:   }
                    228: }
                    229: 
                    230: proc integerify {real} {
                    231:   if {[string is double -strict $real]} {
                    232:     return [expr {wide($real)}]
                    233:   } else {
                    234:     return 0
                    235:   }
                    236: }
                    237: mem function int integerify
                    238: 
                    239: # Quote a string for use in an SQL query. Examples:
                    240: #
                    241: # [quote {hello world}]   == {'hello world'}
                    242: # [quote {hello world's}] == {'hello world''s'}
                    243: #
                    244: proc quote {txt} {
                    245:   regsub -all ' $txt '' q
                    246:   return '$q'
                    247: }
                    248: 
                    249: # Generate a single line of output in the statistics section of the
                    250: # report.
                    251: #
                    252: proc statline {title value {extra {}}} {
                    253:   set len [string length $title]
                    254:   set dots [string range {......................................} $len end]
                    255:   set len [string length $value]
                    256:   set sp2 [string range {          } $len end]
                    257:   if {$extra ne ""} {
                    258:     set extra " $extra"
                    259:   }
                    260:   puts "$title$dots $value$sp2$extra"
                    261: }
                    262: 
                    263: # Generate a formatted percentage value for $num/$denom
                    264: #
                    265: proc percent {num denom {of {}}} {
                    266:   if {$denom==0.0} {return ""}
                    267:   set v [expr {$num*100.0/$denom}]
                    268:   set of {}
                    269:   if {$v==100.0 || $v<0.001 || ($v>1.0 && $v<99.0)} {
                    270:     return [format {%5.1f%% %s} $v $of]
                    271:   } elseif {$v<0.1 || $v>99.9} {
                    272:     return [format {%7.3f%% %s} $v $of]
                    273:   } else {
                    274:     return [format {%6.2f%% %s} $v $of]
                    275:   }
                    276: }
                    277: 
                    278: proc divide {num denom} {
                    279:   if {$denom==0} {return 0.0}
                    280:   return [format %.2f [expr double($num)/double($denom)]]
                    281: }
                    282: 
                    283: # Generate a subreport that covers some subset of the database.
                    284: # the $where clause determines which subset to analyze.
                    285: #
                    286: proc subreport {title where} {
                    287:   global pageSize file_pgcnt compressOverhead
                    288: 
                    289:   # Query the in-memory database for the sum of various statistics 
                    290:   # for the subset of tables/indices identified by the WHERE clause in
                    291:   # $where. Note that even if the WHERE clause matches no rows, the
                    292:   # following query returns exactly one row (because it is an aggregate).
                    293:   #
                    294:   # The results of the query are stored directly by SQLite into local 
                    295:   # variables (i.e. $nentry, $nleaf etc.).
                    296:   #
                    297:   mem eval "
                    298:     SELECT
                    299:       int(sum(nentry)) AS nentry,
                    300:       int(sum(leaf_entries)) AS nleaf,
                    301:       int(sum(payload)) AS payload,
                    302:       int(sum(ovfl_payload)) AS ovfl_payload,
                    303:       max(mx_payload) AS mx_payload,
                    304:       int(sum(ovfl_cnt)) as ovfl_cnt,
                    305:       int(sum(leaf_pages)) AS leaf_pages,
                    306:       int(sum(int_pages)) AS int_pages,
                    307:       int(sum(ovfl_pages)) AS ovfl_pages,
                    308:       int(sum(leaf_unused)) AS leaf_unused,
                    309:       int(sum(int_unused)) AS int_unused,
                    310:       int(sum(ovfl_unused)) AS ovfl_unused,
                    311:       int(sum(gap_cnt)) AS gap_cnt,
                    312:       int(sum(compressed_size)) AS compressed_size
                    313:     FROM space_used WHERE $where" {} {}
                    314: 
                    315:   # Output the sub-report title, nicely decorated with * characters.
                    316:   #
                    317:   puts ""
                    318:   set len [string length $title]
                    319:   set stars [string repeat * [expr 65-$len]]
                    320:   puts "*** $title $stars"
                    321:   puts ""
                    322: 
                    323:   # Calculate statistics and store the results in TCL variables, as follows:
                    324:   #
                    325:   # total_pages: Database pages consumed.
                    326:   # total_pages_percent: Pages consumed as a percentage of the file.
                    327:   # storage: Bytes consumed.
                    328:   # payload_percent: Payload bytes used as a percentage of $storage.
                    329:   # total_unused: Unused bytes on pages.
                    330:   # avg_payload: Average payload per btree entry.
                    331:   # avg_fanout: Average fanout for internal pages.
                    332:   # avg_unused: Average unused bytes per btree entry.
                    333:   # ovfl_cnt_percent: Percentage of btree entries that use overflow pages.
                    334:   #
                    335:   set total_pages [expr {$leaf_pages+$int_pages+$ovfl_pages}]
                    336:   set total_pages_percent [percent $total_pages $file_pgcnt]
                    337:   set storage [expr {$total_pages*$pageSize}]
                    338:   set payload_percent [percent $payload $storage {of storage consumed}]
                    339:   set total_unused [expr {$ovfl_unused+$int_unused+$leaf_unused}]
                    340:   set avg_payload [divide $payload $nleaf]
                    341:   set avg_unused [divide $total_unused $nleaf]
                    342:   if {$int_pages>0} {
                    343:     # TODO: Is this formula correct?
                    344:     set nTab [mem eval "
                    345:       SELECT count(*) FROM (
                    346:           SELECT DISTINCT tblname FROM space_used WHERE $where AND is_index=0
                    347:       )
                    348:     "]
                    349:     set avg_fanout [mem eval "
                    350:       SELECT (sum(leaf_pages+int_pages)-$nTab)/sum(int_pages) FROM space_used
                    351:           WHERE $where AND is_index = 0
                    352:     "]
                    353:     set avg_fanout [format %.2f $avg_fanout]
                    354:   }
                    355:   set ovfl_cnt_percent [percent $ovfl_cnt $nleaf {of all entries}]
                    356: 
                    357:   # Print out the sub-report statistics.
                    358:   #
                    359:   statline {Percentage of total database} $total_pages_percent
                    360:   statline {Number of entries} $nleaf
                    361:   statline {Bytes of storage consumed} $storage
                    362:   if {$compressed_size!=$storage} {
                    363:     set compressed_size [expr {$compressed_size+$compressOverhead*$total_pages}]
                    364:     set pct [expr {$compressed_size*100.0/$storage}]
                    365:     set pct [format {%5.1f%%} $pct]
                    366:     statline {Bytes used after compression} $compressed_size $pct
                    367:   }
                    368:   statline {Bytes of payload} $payload $payload_percent
                    369:   statline {Average payload per entry} $avg_payload
                    370:   statline {Average unused bytes per entry} $avg_unused
                    371:   if {[info exists avg_fanout]} {
                    372:     statline {Average fanout} $avg_fanout
                    373:   }
                    374:   if {$total_pages>1} {
                    375:     set fragmentation [percent $gap_cnt [expr {$total_pages-1}] {fragmentation}]
                    376:     statline {Fragmentation} $fragmentation
                    377:   }
                    378:   statline {Maximum payload per entry} $mx_payload
                    379:   statline {Entries that use overflow} $ovfl_cnt $ovfl_cnt_percent
                    380:   if {$int_pages>0} {
                    381:     statline {Index pages used} $int_pages
                    382:   }
                    383:   statline {Primary pages used} $leaf_pages
                    384:   statline {Overflow pages used} $ovfl_pages
                    385:   statline {Total pages used} $total_pages
                    386:   if {$int_unused>0} {
                    387:     set int_unused_percent [
                    388:          percent $int_unused [expr {$int_pages*$pageSize}] {of index space}]
                    389:     statline "Unused bytes on index pages" $int_unused $int_unused_percent
                    390:   }
                    391:   statline "Unused bytes on primary pages" $leaf_unused [
                    392:      percent $leaf_unused [expr {$leaf_pages*$pageSize}] {of primary space}]
                    393:   statline "Unused bytes on overflow pages" $ovfl_unused [
                    394:      percent $ovfl_unused [expr {$ovfl_pages*$pageSize}] {of overflow space}]
                    395:   statline "Unused bytes on all pages" $total_unused [
                    396:                percent $total_unused $storage {of all space}]
                    397:   return 1
                    398: }
                    399: 
                    400: # Calculate the overhead in pages caused by auto-vacuum. 
                    401: #
                    402: # This procedure calculates and returns the number of pages used by the 
                    403: # auto-vacuum 'pointer-map'. If the database does not support auto-vacuum,
                    404: # then 0 is returned. The two arguments are the size of the database file in
                    405: # pages and the page size used by the database (in bytes).
                    406: proc autovacuum_overhead {filePages pageSize} {
                    407: 
                    408:   # Set $autovacuum to non-zero for databases that support auto-vacuum.
                    409:   set autovacuum [db one {PRAGMA auto_vacuum}]
                    410: 
                    411:   # If the database is not an auto-vacuum database or the file consists
                    412:   # of one page only then there is no overhead for auto-vacuum. Return zero.
                    413:   if {0==$autovacuum || $filePages==1} {
                    414:     return 0
                    415:   }
                    416: 
                    417:   # The number of entries on each pointer map page. The layout of the
                    418:   # database file is one pointer-map page, followed by $ptrsPerPage other
                    419:   # pages, followed by a pointer-map page etc. The first pointer-map page
                    420:   # is the second page of the file overall.
                    421:   set ptrsPerPage [expr double($pageSize/5)]
                    422: 
                    423:   # Return the number of pointer map pages in the database.
                    424:   return [expr wide(ceil( ($filePages-1.0)/($ptrsPerPage+1.0) ))]
                    425: }
                    426: 
                    427: 
                    428: # Calculate the summary statistics for the database and store the results
                    429: # in TCL variables. They are output below. Variables are as follows:
                    430: #
                    431: # pageSize:      Size of each page in bytes.
                    432: # file_bytes:    File size in bytes.
                    433: # file_pgcnt:    Number of pages in the file.
                    434: # file_pgcnt2:   Number of pages in the file (calculated).
                    435: # av_pgcnt:      Pages consumed by the auto-vacuum pointer-map.
                    436: # av_percent:    Percentage of the file consumed by auto-vacuum pointer-map.
                    437: # inuse_pgcnt:   Data pages in the file.
                    438: # inuse_percent: Percentage of pages used to store data.
                    439: # free_pgcnt:    Free pages calculated as (<total pages> - <in-use pages>)
                    440: # free_pgcnt2:   Free pages in the file according to the file header.
                    441: # free_percent:  Percentage of file consumed by free pages (calculated).
                    442: # free_percent2: Percentage of file consumed by free pages (header).
                    443: # ntable:        Number of tables in the db.
                    444: # nindex:        Number of indices in the db.
                    445: # nautoindex:    Number of indices created automatically.
                    446: # nmanindex:     Number of indices created manually.
                    447: # user_payload:  Number of bytes of payload in table btrees 
                    448: #                (not including sqlite_master)
                    449: # user_percent:  $user_payload as a percentage of total file size.
                    450: 
                    451: ### The following, setting $file_bytes based on the actual size of the file
                    452: ### on disk, causes this tool to choke on zipvfs databases. So set it based
                    453: ### on the return of [PRAGMA page_count] instead.
                    454: if 0 {
                    455:   set file_bytes  [file size $file_to_analyze]
                    456:   set file_pgcnt  [expr {$file_bytes/$pageSize}]
                    457: }
                    458: set file_pgcnt  [db one {PRAGMA page_count}]
                    459: set file_bytes  [expr {$file_pgcnt * $pageSize}]
                    460: 
                    461: set av_pgcnt    [autovacuum_overhead $file_pgcnt $pageSize]
                    462: set av_percent  [percent $av_pgcnt $file_pgcnt]
                    463: 
                    464: set sql {SELECT sum(leaf_pages+int_pages+ovfl_pages) FROM space_used}
                    465: set inuse_pgcnt   [expr wide([mem eval $sql])]
                    466: set inuse_percent [percent $inuse_pgcnt $file_pgcnt]
                    467: 
                    468: set free_pgcnt    [expr {$file_pgcnt-$inuse_pgcnt-$av_pgcnt}]
                    469: set free_percent  [percent $free_pgcnt $file_pgcnt]
                    470: set free_pgcnt2   [db one {PRAGMA freelist_count}]
                    471: set free_percent2 [percent $free_pgcnt2 $file_pgcnt]
                    472: 
                    473: set file_pgcnt2 [expr {$inuse_pgcnt+$free_pgcnt2+$av_pgcnt}]
                    474: 
                    475: set ntable [db eval {SELECT count(*)+1 FROM sqlite_master WHERE type='table'}]
                    476: set nindex [db eval {SELECT count(*) FROM sqlite_master WHERE type='index'}]
                    477: set sql {SELECT count(*) FROM sqlite_master WHERE name LIKE 'sqlite_autoindex%'}
                    478: set nautoindex [db eval $sql]
                    479: set nmanindex [expr {$nindex-$nautoindex}]
                    480: 
                    481: # set total_payload [mem eval "SELECT sum(payload) FROM space_used"]
                    482: set user_payload [mem one {SELECT int(sum(payload)) FROM space_used
                    483:      WHERE NOT is_index AND name NOT LIKE 'sqlite_master'}]
                    484: set user_percent [percent $user_payload $file_bytes]
                    485: 
                    486: # Output the summary statistics calculated above.
                    487: #
                    488: puts "/** Disk-Space Utilization Report For $file_to_analyze"
                    489: catch {
                    490:   puts "*** As of [clock format [clock seconds] -format {%Y-%b-%d %H:%M:%S}]"
                    491: }
                    492: puts ""
                    493: statline {Page size in bytes} $pageSize
                    494: statline {Pages in the whole file (measured)} $file_pgcnt
                    495: statline {Pages in the whole file (calculated)} $file_pgcnt2
                    496: statline {Pages that store data} $inuse_pgcnt $inuse_percent
                    497: statline {Pages on the freelist (per header)} $free_pgcnt2 $free_percent2
                    498: statline {Pages on the freelist (calculated)} $free_pgcnt $free_percent
                    499: statline {Pages of auto-vacuum overhead} $av_pgcnt $av_percent
                    500: statline {Number of tables in the database} $ntable
                    501: statline {Number of indices} $nindex
                    502: statline {Number of named indices} $nmanindex
                    503: statline {Automatically generated indices} $nautoindex
                    504: if {$isCompressed} {
                    505:   statline {Size of uncompressed content in bytes} $file_bytes
                    506:   set efficiency [percent $true_file_size $file_bytes]
                    507:   statline {Size of compressed file on disk} $true_file_size $efficiency
                    508: } else {
                    509:   statline {Size of the file in bytes} $file_bytes
                    510: }
                    511: statline {Bytes of user payload stored} $user_payload $user_percent
                    512: 
                    513: # Output table rankings
                    514: #
                    515: puts ""
                    516: puts "*** Page counts for all tables with their indices ********************"
                    517: puts ""
                    518: mem eval {SELECT tblname, count(*) AS cnt, 
                    519:               int(sum(int_pages+leaf_pages+ovfl_pages)) AS size
                    520:           FROM space_used GROUP BY tblname ORDER BY size+0 DESC, tblname} {} {
                    521:   statline [string toupper $tblname] $size [percent $size $file_pgcnt]
                    522: }
                    523: if {$isCompressed} {
                    524:   puts ""
                    525:   puts "*** Bytes of disk space used after compression ***********************"
                    526:   puts ""
                    527:   set csum 0
                    528:   mem eval {SELECT tblname,
                    529:                   int(sum(compressed_size)) +
                    530:                          $compressOverhead*sum(int_pages+leaf_pages+ovfl_pages)
                    531:                         AS csize
                    532:           FROM space_used GROUP BY tblname ORDER BY csize+0 DESC, tblname} {} {
                    533:     incr csum $csize
                    534:     statline [string toupper $tblname] $csize [percent $csize $true_file_size]
                    535:   }
                    536:   set overhead [expr {$true_file_size - $csum}]
                    537:   if {$overhead>0} {
                    538:     statline {Header and free space} $overhead [percent $overhead $true_file_size]
                    539:   }
                    540: }
                    541: 
                    542: # Output subreports
                    543: #
                    544: if {$nindex>0} {
                    545:   subreport {All tables and indices} 1
                    546: }
                    547: subreport {All tables} {NOT is_index}
                    548: if {$nindex>0} {
                    549:   subreport {All indices} {is_index}
                    550: }
                    551: foreach tbl [mem eval {SELECT name FROM space_used WHERE NOT is_index
                    552:                        ORDER BY name}] {
                    553:   regsub ' $tbl '' qn
                    554:   set name [string toupper $tbl]
                    555:   set n [mem eval "SELECT count(*) FROM space_used WHERE tblname='$qn'"]
                    556:   if {$n>1} {
                    557:     subreport "Table $name and all its indices" "tblname='$qn'"
                    558:     subreport "Table $name w/o any indices" "name='$qn'"
                    559:     subreport "Indices of table $name" "tblname='$qn' AND is_index"
                    560:   } else {
                    561:     subreport "Table $name" "name='$qn'"
                    562:   }
                    563: }
                    564: 
                    565: # Output instructions on what the numbers above mean.
                    566: #
                    567: puts {
                    568: *** Definitions ******************************************************
                    569: 
                    570: Page size in bytes
                    571: 
                    572:     The number of bytes in a single page of the database file.  
                    573:     Usually 1024.
                    574: 
                    575: Number of pages in the whole file
                    576: }
                    577: puts "    The number of $pageSize-byte pages that go into forming the complete
                    578:     database"
                    579: puts {
                    580: Pages that store data
                    581: 
                    582:     The number of pages that store data, either as primary B*Tree pages or
                    583:     as overflow pages.  The number at the right is the data pages divided by
                    584:     the total number of pages in the file.
                    585: 
                    586: Pages on the freelist
                    587: 
                    588:     The number of pages that are not currently in use but are reserved for
                    589:     future use.  The percentage at the right is the number of freelist pages
                    590:     divided by the total number of pages in the file.
                    591: 
                    592: Pages of auto-vacuum overhead
                    593: 
                    594:     The number of pages that store data used by the database to facilitate
                    595:     auto-vacuum. This is zero for databases that do not support auto-vacuum.
                    596: 
                    597: Number of tables in the database
                    598: 
                    599:     The number of tables in the database, including the SQLITE_MASTER table
                    600:     used to store schema information.
                    601: 
                    602: Number of indices
                    603: 
                    604:     The total number of indices in the database.
                    605: 
                    606: Number of named indices
                    607: 
                    608:     The number of indices created using an explicit CREATE INDEX statement.
                    609: 
                    610: Automatically generated indices
                    611: 
                    612:     The number of indices used to implement PRIMARY KEY or UNIQUE constraints
                    613:     on tables.
                    614: 
                    615: Size of the file in bytes
                    616: 
                    617:     The total amount of disk space used by the entire database files.
                    618: 
                    619: Bytes of user payload stored
                    620: 
                    621:     The total number of bytes of user payload stored in the database. The
                    622:     schema information in the SQLITE_MASTER table is not counted when
                    623:     computing this number.  The percentage at the right shows the payload
                    624:     divided by the total file size.
                    625: 
                    626: Percentage of total database
                    627: 
                    628:     The amount of the complete database file that is devoted to storing
                    629:     information described by this category.
                    630: 
                    631: Number of entries
                    632: 
                    633:     The total number of B-Tree key/value pairs stored under this category.
                    634: 
                    635: Bytes of storage consumed
                    636: 
                    637:     The total amount of disk space required to store all B-Tree entries
                    638:     under this category.  The is the total number of pages used times
                    639:     the pages size.
                    640: 
                    641: Bytes of payload
                    642: 
                    643:     The amount of payload stored under this category.  Payload is the data
                    644:     part of table entries and the key part of index entries.  The percentage
                    645:     at the right is the bytes of payload divided by the bytes of storage 
                    646:     consumed.
                    647: 
                    648: Average payload per entry
                    649: 
                    650:     The average amount of payload on each entry.  This is just the bytes of
                    651:     payload divided by the number of entries.
                    652: 
                    653: Average unused bytes per entry
                    654: 
                    655:     The average amount of free space remaining on all pages under this
                    656:     category on a per-entry basis.  This is the number of unused bytes on
                    657:     all pages divided by the number of entries.
                    658: 
                    659: Fragmentation
                    660: 
                    661:     The percentage of pages in the table or index that are not
                    662:     consecutive in the disk file.  Many filesystems are optimized
                    663:     for sequential file access so smaller fragmentation numbers 
                    664:     sometimes result in faster queries, especially for larger
                    665:     database files that do not fit in the disk cache.
                    666: 
                    667: Maximum payload per entry
                    668: 
                    669:     The largest payload size of any entry.
                    670: 
                    671: Entries that use overflow
                    672: 
                    673:     The number of entries that user one or more overflow pages.
                    674: 
                    675: Total pages used
                    676: 
                    677:     This is the number of pages used to hold all information in the current
                    678:     category.  This is the sum of index, primary, and overflow pages.
                    679: 
                    680: Index pages used
                    681: 
                    682:     This is the number of pages in a table B-tree that hold only key (rowid)
                    683:     information and no data.
                    684: 
                    685: Primary pages used
                    686: 
                    687:     This is the number of B-tree pages that hold both key and data.
                    688: 
                    689: Overflow pages used
                    690: 
                    691:     The total number of overflow pages used for this category.
                    692: 
                    693: Unused bytes on index pages
                    694: 
                    695:     The total number of bytes of unused space on all index pages.  The
                    696:     percentage at the right is the number of unused bytes divided by the
                    697:     total number of bytes on index pages.
                    698: 
                    699: Unused bytes on primary pages
                    700: 
                    701:     The total number of bytes of unused space on all primary pages.  The
                    702:     percentage at the right is the number of unused bytes divided by the
                    703:     total number of bytes on primary pages.
                    704: 
                    705: Unused bytes on overflow pages
                    706: 
                    707:     The total number of bytes of unused space on all overflow pages.  The
                    708:     percentage at the right is the number of unused bytes divided by the
                    709:     total number of bytes on overflow pages.
                    710: 
                    711: Unused bytes on all pages
                    712: 
                    713:     The total number of bytes of unused space on all primary and overflow 
                    714:     pages.  The percentage at the right is the number of unused bytes 
                    715:     divided by the total number of bytes.
                    716: }
                    717: 
                    718: # Output a dump of the in-memory database. This can be used for more
                    719: # complex offline analysis.
                    720: #
                    721: puts "**********************************************************************"
                    722: puts "The entire text of this report can be sourced into any SQL database"
                    723: puts "engine for further analysis.  All of the text above is an SQL comment."
                    724: puts "The data used to generate this report follows:"
                    725: puts "*/"
                    726: puts "BEGIN;"
                    727: puts $tabledef
                    728: unset -nocomplain x
                    729: mem eval {SELECT * FROM space_used} x {
                    730:   puts -nonewline "INSERT INTO space_used VALUES"
                    731:   set sep (
                    732:   foreach col $x(*) {
                    733:     set v $x($col)
                    734:     if {$v=="" || ![string is double $v]} {set v [quote $v]}
                    735:     puts -nonewline $sep$v
                    736:     set sep ,
                    737:   }
                    738:   puts ");"
                    739: }
                    740: puts "COMMIT;"
                    741: 
                    742: } err]} {
                    743:   puts "ERROR: $err"
                    744:   puts $errorInfo
                    745:   exit 1
                    746: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>