diff --git a/Makefile.msc b/Makefile.msc index 561d5f58e0..0c657fef81 100644 --- a/Makefile.msc +++ b/Makefile.msc @@ -1578,6 +1578,7 @@ TESTEXT = \ $(TOP)\ext\misc\amatch.c \ $(TOP)\ext\misc\appendvfs.c \ $(TOP)\ext\misc\basexx.c \ + $(TOP)\ext\misc\bgckpt.c \ $(TOP)\ext\misc\cksumvfs.c \ $(TOP)\ext\misc\closure.c \ $(TOP)\ext\misc\csv.c \ diff --git a/autosetup/teaish/core.tcl b/autosetup/teaish/core.tcl index c9abfa0626..381597ec56 100644 --- a/autosetup/teaish/core.tcl +++ b/autosetup/teaish/core.tcl @@ -92,7 +92,6 @@ array set teaish__Config [proj-strip-hash-comments { -tm.tcl.in TEAISH_TM_TCL_IN -options {} -pragmas {} - -src {} } # @@ -220,9 +219,7 @@ proc teaish-configure-core {} { => {Full pathname of tclsh to use. It is used for trying to find tclConfig.sh. Warning: if its containing dir has multiple tclsh versions, it may select the wrong tclConfig.sh! - Defaults to the $TCLSH environment variable.} - - tcl-stubs=0 => {Enable use of Tcl stubs library.} + Defaults to the $TCLSH environment variable.} # TEA has --with-tclinclude but it appears to only be useful for # building an extension against an uninstalled copy of TCL's own @@ -334,33 +331,29 @@ proc teaish-configure-core {} { -url - -v "" -tm.tcl - -v "" -tm.tcl.in - -v "" - -src - -v "" } { - #proj-assert 0 {Just testing} set isPIFlag [expr {"-" ne $pflag}] if {$isPIFlag} { if {[info exists ::teaish__PkgInfo($pflag)]} { # Was already set - skip it. continue; } - proj-assert {{-} eq $key};# "Unexpected pflag=$pflag key=$key type=$type val=$val" + proj-assert {{-} eq $key} set key $f2d($pflag) } - if {"" ne $key} { - if {"" ne [get-define $key ""]} { - # Was already set - skip it. - continue - } + proj-assert {"" ne $key} + set got [get-define $key ""] + if {"" ne $got} { + # Was already set - skip it. + continue } switch -exact -- $type { -v {} -e { set val [eval $val] } default { proj-error "Invalid type flag: $type" } } - #puts "***** defining default $pflag $key {$val} isPIFlag=$isPIFlag" - if {$key ne ""} { - define $key $val - } + #puts "***** defining default $pflag $key {$val} isPIFlag=$isPIFlag got=$got" + define $key $val if {$isPIFlag} { set ::teaish__PkgInfo($pflag) $val } @@ -500,8 +493,6 @@ proc teaish__configure_phase1 {} { } teaish-checks-run -post - define TEAISH_USE_STUBS [opt-bool tcl-stubs] - apply {{} { # Set up "vsatisfies" code for pkgIndex.tcl.in, # _teaish.tester.tcl.in, and for a configure-time check. We would @@ -531,7 +522,7 @@ proc teaish__configure_phase1 {} { set vputs "puts \[ $vsat \]" #puts "*** vputs = $vputs" scan [exec echo $vputs | $tclsh] %d vvcheck - if {![info exists vvcheck] || 0 == $vvcheck} { + if {0 == $vvcheck} { proj-fatal -up $tclsh "check failed:" $vsat } } @@ -594,8 +585,7 @@ proc teaish__configure_phase1 {} { # if {0x0f & $::teaish__Config(pkginit-policy)} { file delete -force -- [get-define TEAISH_PKGINIT_TCL] - proj-dot-ins-append [get-define TEAISH_PKGINIT_TCL_IN] \ - [get-define TEAISH_PKGINIT_TCL] + proj-dot-ins-append [get-define TEAISH_PKGINIT_TCL_IN] } if {0x0f & $::teaish__Config(tm-policy)} { file delete -force -- [get-define TEAISH_TM_TCL] @@ -605,20 +595,17 @@ proc teaish__configure_phase1 {} { apply {{} { # Queue up any remaining dot-in files set dotIns [list] - foreach {dIn => dOut} { - TEAISH_TESTER_TCL_IN => TEAISH_TESTER_TCL - TEAISH_TEST_TCL_IN => TEAISH_TEST_TCL - TEAISH_MAKEFILE_IN => TEAISH_MAKEFILE + foreach d { + TEAISH_TESTER_TCL_IN + TEAISH_TEST_TCL_IN + TEAISH_MAKEFILE_IN } { - lappend dotIns [get-define $dIn ""] [get-define $dOut ""] - } - lappend dotIns $::autosetup(srcdir)/Makefile.in Makefile; # must be after TEAISH_MAKEFILE_IN. - # Much later: probably because of timestamps for deps purposes :-? - #puts "dotIns=$dotIns" - foreach {i o} $dotIns { - if {"" ne $i && "" ne $o} { - #puts " pre-dot-ins-append: \[$i\] -> \[$o\]" - proj-dot-ins-append $i $o + lappend dotIns [get-define $d ""] + } + lappend dotIns $::autosetup(srcdir)/Makefile.in; # must be after TEAISH_MAKEFILE_IN + foreach f $dotIns { + if {"" ne $f} { + proj-dot-ins-append $f } } }} @@ -653,10 +640,10 @@ proc teaish__configure_phase1 {} { # # NO [define]s after this point! # + proj-dot-ins-process -validate proj-if-opt-truthy teaish-dump-defines { proj-file-write config.defines.txt $tdefs } - proj-dot-ins-process -validate }; # teaish__configure_phase1 @@ -830,9 +817,7 @@ proc teaish__check_tcl {} { if {"" ne $withSh && [catch {exec echo "puts stdout \$auto_path" | "$withSh"} result] == 0} { foreach i $result { - if {![string match //zip* $i] && [file isdirectory $i]} { - # isdirectory actually passes on //zipfs:/..., but those are - # useless for our purposes + if {[file isdirectory $i]} { set tcllibdir $i/$extDirName break } @@ -1081,7 +1066,7 @@ If you are attempting an out-of-tree build, use ]]} { if {[string match *.in $extM]} { define TEAISH_MAKEFILE_IN $extM - define TEAISH_MAKEFILE _[file rootname [file tail $extM]] + define TEAISH_MAKEFILE [file rootname [file tail $extM]] } else { define TEAISH_MAKEFILE_IN "" define TEAISH_MAKEFILE $extM @@ -1149,8 +1134,8 @@ If you are attempting an out-of-tree build, use set flist [list $dirExt/teaish.test.tcl.in $dirExt/teaish.test.tcl] if {[proj-first-file-found ttt $flist]} { if {[string match *.in $ttt]} { - # Generate _teaish.test.tcl from $ttt - set xt _[file rootname [file tail $ttt]] + # Generate teaish.test.tcl from $ttt + set xt [file rootname [file tail $ttt]] file delete -force -- $xt; # ensure no stale copy is used define TEAISH_TEST_TCL $xt define TEAISH_TEST_TCL_IN $ttt @@ -1317,6 +1302,7 @@ proc teaish-ldflags-prepend {args} { # object files (which are typically in the build tree)). # proc teaish-src-add {args} { + set i 0 proj-parse-simple-flags args flags { -dist 0 {expr 1} -dir 0 {expr 1} @@ -1401,7 +1387,7 @@ proc teaish__cleanup_rule {{tgt clean}} { return ${tgt}-_${x}_ } -# @teaish-make-obj ?flags? ?...args? +# @teaish-make-obj objfile srcfile ?...args? # # Uses teaish-make-add to inject makefile rules for $objfile from # $srcfile, which is assumed to be C code which uses libtcl. Unless @@ -1415,45 +1401,43 @@ proc teaish__cleanup_rule {{tgt clean}} { # Any arguments after the 2nd may be flags described below or, if no # -recipe is provided, flags for the compiler call. # -# -obj obj-filename.o -# -# -src src-filename.c -# # -recipe {...} # Uses the trimmed value of {...} as the recipe, prefixing it with # a single hard-tab character. # # -deps {...} -# List of extra files to list as dependencies of $o. +# List of extra files to list as dependencies of $o. Good luck +# escaping non-trivial cases properly. # # -clean # Generate cleanup rules as well. -proc teaish-make-obj {args} { - proj-parse-simple-flags args flags { - -clean 0 {expr 1} - -recipe => {} - -deps => {} - -obj => {} - -src => {} - } - #parray flags - if {"" eq $flags(-obj)} { - set args [lassign $args flags(-obj)] - if {"" eq $flags(-obj)} { - proj-error "Missing -obj flag." +proc teaish-make-obj {o src args} { + set consume 0 + set clean 0 + set flag "" + array set flags {} + set xargs {} + foreach arg $args { + if {$consume} { + set consume 0 + set flags($flag) $arg + continue + } + switch -exact -- $arg { + -clean {incr clean} + -recipe - + -deps { + set flag $arg + incr consume + } + default { + lappend xargs $arg + } } } - foreach f {-deps -src} { - set flags($f) [string trim [string map {\n " "} $flags($f)]] - } - foreach f {-deps -src} { - set flags($f) [string trim $flags($f)] - } - #parray flags - #puts "-- args=$args" teaish-make-add \ - "# [proj-scope 1] -> [proj-scope] $flags(-obj) $flags(-src)" -nl \ - "$flags(-obj): $flags(-src) $::teaish__Config(teaish.tcl)" + "# [proj-scope 1] -> [proj-scope] $o $src" -nl \ + "$o: $src $::teaish__Config(teaish.tcl)" if {[info exists flags(-deps)]} { teaish-make-add " " [join $flags(-deps)] } @@ -1461,12 +1445,12 @@ proc teaish-make-obj {args} { if {[info exists flags(-recipe)]} { teaish-make-add [string trim $flags(-recipe)] -nl } else { - teaish-make-add [join [list \$(CC.tcl) -c $flags(-src) {*}$args]] -nl + teaish-make-add [join [list \$(CC.tcl) -c $src {*}$xargs]] -nl } - if {$flags(-clean)} { + if {$clean} { set rule [teaish__cleanup_rule] teaish-make-add \ - "clean: $rule\n$rule:\n\trm -f \"$flags(-obj)\"\n" + "clean: $rule\n$rule:\n\trm -f \"$o\"\n" } } @@ -2094,17 +2078,6 @@ proc teaish-pkginfo-set {args} { set v $x } - -src { - set d $::teaish__Config(extension-dir) - foreach f $v { - lappend ::teaish__Config(dist-files) $f - lappend ::teaish__Config(extension-src) $d/$f - lappend ::teaish__PkgInfo(-src) $f - # ^^^ so that default-value initialization in - # teaish-configure-core recognizes that it's been set. - } - } - -tm.tcl - -tm.tcl.in { if {0x30 & $::teaish__Config(pkgindex-policy)} { @@ -2542,7 +2515,7 @@ proc teaish__install {{dDest ""}} { ] { teaish__verbose 1 msg-result "Copying files to $destDir..." file mkdir $destDir - foreach f [glob -nocomplain -directory $srcDir *] { + foreach f [glob -directory $srcDir *] { if {[string match {*~} $f] || [string match "#*#" [file tail $f]]} { # Editor-generated backups and emacs lock files continue diff --git a/autosetup/teaish/tester.tcl b/autosetup/teaish/tester.tcl index a25b366e8d..d8b5f7a0e8 100644 --- a/autosetup/teaish/tester.tcl +++ b/autosetup/teaish/tester.tcl @@ -99,7 +99,7 @@ proc test__affert {failMode args} { lassign $args script msg } incr ::test__Counters($what) - if {![uplevel 1 expr [list $script]]} { + if {![uplevel 1 [concat expr [list $script]]]} { if {"" eq $msg} { set msg $script } @@ -136,40 +136,6 @@ proc assert {args} { tailcall test__affert 1 {*}$args } -# -# @assert-matches ?-e? pattern ?-e? rhs ?msg? -# -# Equivalent to assert {[string match $pattern $rhs]} except that -# if either of those are prefixed with an -e flag, they are eval'd -# and their results are used. -# -proc assert-matches {args} { - set evalLhs 0 - set evalRhs 0 - if {"-e" eq [lindex $args 0]} { - incr evalLhs - set args [lassign $args -] - } - set args [lassign $args pattern] - if {"-e" eq [lindex $args 0]} { - incr evalRhs - set args [lassign $args -] - } - set args [lassign $args rhs msg] - - if {$evalLhs} { - set pattern [uplevel 1 $pattern] - } - if {$evalRhs} { - set rhs [uplevel 1 $rhs] - } - #puts "***pattern=$pattern\n***rhs=$rhs" - tailcall test__affert 1 \ - [join [list \[ string match [list $pattern] [list $rhs] \]]] $msg - # why does this not work? [list \[ string match [list $pattern] [list $rhs] \]] $msg - # "\[string match [list $pattern] [list $rhs]\]" -} - # # @test-assert testId script ?msg? # @@ -191,7 +157,7 @@ proc test-expect {testId script result} { puts "test $testId" set x [string trim [uplevel 1 $script]] set result [string trim $result] - tailcall test__affert 0 [list "{$x}" eq "{$result}"] \ + tailcall test__affert 0 [list $x eq $result] \ "\nEXPECTED: <<$result>>\nGOT: <<$x>>" } @@ -203,7 +169,7 @@ proc test-expect {testId script result} { # proc test-catch {cmd args} { if {[catch { - uplevel 1 $cmd {*}$args + $cmd {*}$args } rc xopts]} { puts "[test-current-scope] ignoring failure of: $cmd [lindex $args 0]: $rc" return 1 @@ -211,37 +177,6 @@ proc test-catch {cmd args} { return 0 } -# -# @test-catch-matching pattern (script|cmd args...) -# -# Works like test-catch, but it expects its argument(s) to to throw an -# error matching the given string (checked with [string match]). If -# they do not throw, or the error does not match $pattern, this -# function throws, else it returns 1. -# -# If there is no second argument, the $cmd is assumed to be a script, -# and will be eval'd in the caller's scope. -# -# TODO: add -glob and -regex flags to control matching flavor. -# -proc test-catch-matching {pattern cmd args} { - if {[catch { - #puts "**** catch-matching cmd=$cmd args=$args" - if {0 == [llength $args]} { - uplevel 1 $cmd {*}$args - } else { - $cmd {*}$args - } - } rc xopts]} { - if {[string match $pattern $rc]} { - return 1 - } else { - error "[test-current-scope] exception does not match {$pattern}: {$rc}" - } - } - error "[test-current-scope] expecting to see an error matching {$pattern}" -} - if {![array exists ::teaish__BuildFlags]} { array set ::teaish__BuildFlags {} } diff --git a/doc/begin_concurrent.md b/doc/begin_concurrent.md new file mode 100644 index 0000000000..2a06b5872e --- /dev/null +++ b/doc/begin_concurrent.md @@ -0,0 +1,107 @@ + +Begin Concurrent +================ + +## Overview + +Usually, SQLite allows at most one writer to proceed concurrently. The +BEGIN CONCURRENT enhancement allows multiple writers to process write +transactions simultanously if the database is in "wal" or "wal2" mode, +although the system still serializes COMMIT commands. + +When a write-transaction is opened with "BEGIN CONCURRENT", actually +locking the database is deferred until a COMMIT is executed. This means +that any number of transactions started with BEGIN CONCURRENT may proceed +concurrently. The system uses optimistic page-level-locking to prevent +conflicting concurrent transactions from being committed. + +When a BEGIN CONCURRENT transaction is committed, the system checks whether +or not any of the database pages that the transaction has read have been +modified since the BEGIN CONCURRENT was opened. In other words - it asks +if the transaction being committed operates on a different set of data than +all other concurrently executing transactions. If the answer is "yes, this +transaction did not read or modify any data modified by any concurrent +transaction", then the transaction is committed as normal. Otherwise, if the +transaction does conflict, it cannot be committed and an SQLITE_BUSY_SNAPSHOT +error is returned. At this point, all the client can do is ROLLBACK the +transaction. + +If SQLITE_BUSY_SNAPSHOT is returned, messages are output via the sqlite3_log +mechanism indicating the page and table or index on which the conflict +occurred. This can be useful when optimizing concurrency. + +## Application Programming Notes + +In order to serialize COMMIT processing, SQLite takes a lock on the database +as part of each COMMIT command and releases it before returning. At most one +writer may hold this lock at any one time. If a writer cannot obtain the lock, +it uses SQLite's busy-handler to pause and retry for a while: + + + https://www.sqlite.org/c3ref/busy_handler.html + + +If there is significant contention for the writer lock, this mechanism can be +inefficient. In this case it is better for the application to use a mutex or +some other mechanism that supports blocking to ensure that at most one writer +is attempting to COMMIT a BEGIN CONCURRENT transaction at a time. This is +usually easier if all writers are part of the same operating system process. + +If all database clients (readers and writers) are located in the same OS +process, and if that OS is a Unix variant, then it can be more efficient to +the built-in VFS "unix-excl" instead of the default "unix". This is because it +uses more efficient locking primitives. + +The key to maximizing concurrency using BEGIN CONCURRENT is to ensure that +there are a large number of non-conflicting transactions. In SQLite, each +table and each index is stored as a separate b-tree, each of which is +distributed over a discrete set of database pages. This means that: + + * Two transactions that write to different sets of tables never + conflict, and that + + * Two transactions that write to the same tables or indexes only + conflict if the values of the keys (either primary keys or indexed + rows) are fairly close together. For example, given a large + table with the schema: + +
     CREATE TABLE t1(a INTEGER PRIMARY KEY, b BLOB);
+ + writing two rows with adjacent values for "a" probably will cause a + conflict (as the two keys are stored on the same page), but writing two + rows with vastly different values for "a" will not (as the keys will likly + be stored on different pages). + +Note that, in SQLite, if values are not explicitly supplied for an INTEGER +PRIMARY KEY, as for example in: + +> + INSERT INTO t1(b) VALUES(<blob-value>); + +then monotonically increasing values are assigned automatically. This is +terrible for concurrency, as it all but ensures that all new rows are +added to the same database page. In such situations, it is better to +explicitly assign random values to INTEGER PRIMARY KEY fields. + +This problem also comes up for non-WITHOUT ROWID tables that do not have an +explicit INTEGER PRIMARY KEY column. In these cases each table has an implicit +INTEGER PRIMARY KEY column that is assigned increasing values, leading to the +same problem as omitting to assign a value to an explicit INTEGER PRIMARY KEY +column. + +For both explicit and implicit INTEGER PRIMARY KEYs, it is possible to have +SQLite assign values at random (instead of the monotonically increasing +values) by writing a row with a rowid equal to the largest possible signed +64-bit integer to the table. For example: + + INSERT INTO t1(a) VALUES(9223372036854775807); + +Applications should take care not to malfunction due to the presence of such +rows. + +The nature of some types of indexes, for example indexes on timestamp fields, +can also cause problems (as concurrent transactions may assign similar +timestamps that will be stored on the same db page to new records). In these +cases the database schema may need to be rethought to increase the concurrency +provided by page-level-locking. + diff --git a/doc/wal2.md b/doc/wal2.md new file mode 100644 index 0000000000..5afd8ae606 --- /dev/null +++ b/doc/wal2.md @@ -0,0 +1,98 @@ + +Wal2 Mode Notes +=============== + +## Activating/Deactivating Wal2 Mode + +"Wal2" mode is very similar to "wal" mode. To change a database to wal2 mode, +use the command: + +> + PRAGMA journal_mode = wal2; + +It is not possible to change a database directly from "wal" mode to "wal2" +mode. Instead, it must first be changed to rollback mode. So, to change a wal +mode database to wal2 mode, the following two commands may be used: + +> + PRAGMA journal_mode = delete; + PRAGMA journal_mode = wal2; + +A database in wal2 mode may only be accessed by versions of SQLite compiled +from this branch. Attempting to use any other version of SQLite results in an +SQLITE_NOTADB error. A wal2 mode database may be changed back to rollback mode +(making it accessible by all versions of SQLite) using: + +> + PRAGMA journal_mode = delete; + +## The Advantage of Wal2 Mode + +In legacy wal mode, when a writer writes data to the database, it doesn't +modify the database file directly. Instead, it appends new data to the +"<database>-wal" file. Readers read data from both the original database +file and the "<database>-wal" file. At some point, data is copied from the +"<database>-wal" file into the database file, after which the wal file can +be deleted or overwritten. Copying data from the wal file into the database +file is called a "checkpoint", and may be done explictly (either by "PRAGMA +wal_checkpoint" or sqlite3_wal_checkpoint_v2()), or +automatically (by configuring "PRAGMA wal_autocheckpoint" - this is the +default). + +Checkpointers do not block writers, and writers do not block checkpointers. +However, if a writer writes to the database while a checkpoint is ongoing, +then the new data is appended to the end of the wal file. This means that, +even following the checkpoint, the wal file cannot be overwritten or deleted, +and so all subsequent transactions must also be appended to the wal file. The +work of the checkpointer is not wasted - SQLite remembers which parts of the +wal file have already been copied into the db file so that the next checkpoint +does not have to do so again - but it does mean that the wal file may grow +indefinitely if the checkpointer never gets a chance to finish without a +writer appending to the wal file. There are also circumstances in which +long-running readers may prevent a checkpointer from checkpointing the entire +wal file - also causing the wal file to grow indefinitely in a busy system. + +Wal2 mode does not have this problem. In wal2 mode, wal files do not grow +indefinitely even if the checkpointer never has a chance to finish +uninterrupted. + +In wal2 mode, the system uses two wal files instead of one. The files are named +"<database>-wal" and "<database>-wal2", where "<database>" is of +course the name of the database file. When data is written to the database, the +writer begins by appending the new data to the first wal file. Once the first +wal file has grown large enough, writers switch to appending data to the second +wal file. At this point the first wal file can be checkpointed (after which it +can be overwritten). Then, once the second wal file has grown large enough and +the first wal file has been checkpointed, writers switch back to the first wal +file. And so on. + +## Application Programming + +From the point of view of the user, the main differences between wal and +wal2 mode are to do with checkpointing: + + * In wal mode, a checkpoint may be attempted at any time. In wal2 + mode, the checkpointer has to wait until writers have switched + to the "other" wal file before a checkpoint can take place. + + * In wal mode, the wal-hook (callback registered using + sqlite3_wal_hook()) is invoked after a transaction is committed + with the total number of pages in the wal file as an argument. In wal2 + mode, the argument is either the total number of uncheckpointed pages in + both wal files, or - if the "other" wal file is empty or already + checkpointed - 0. + +Clients are recommended to use the same strategies for checkpointing wal2 mode +databases as for wal databases - by registering a wal-hook using +sqlite3_wal_hook() and attempting a checkpoint when the parameter +exceeds a certain threshold. + +However, it should be noted that although the wal-hook is invoked after each +transaction is committed to disk and database locks released, it is still +invoked from within the sqlite3_step() call used to execute the "COMMIT" +command. In BEGIN CONCURRENT systems, where the "COMMIT" is often protected by +an application mutex, this may reduce concurrency. In such systems, instead of +executing a checkpoint from within the wal-hook, a thread might defer this +action until after the application mutex has been released. + + diff --git a/ext/fts5/test/fts5concurrent.test b/ext/fts5/test/fts5concurrent.test new file mode 100644 index 0000000000..3ac21cfb58 --- /dev/null +++ b/ext/fts5/test/fts5concurrent.test @@ -0,0 +1,55 @@ +# 2022 May 09 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#************************************************************************* +# This file implements regression tests for SQLite library. The +# focus of this script is testing the FTS5 module. +# + +source [file join [file dirname [info script]] fts5_common.tcl] +set testprefix fts5concurrent + +# If SQLITE_ENABLE_FTS5 is not defined, omit this file. +ifcapable !fts5 { + finish_test + return +} + +do_execsql_test 1.0 { + CREATE VIRTUAL TABLE ft USING fts5(line, tokenize=trigram); +} + +do_execsql_test 1.1 { + BEGIN CONCURRENT; + INSERT INTO ft VALUES( hex(randomblob(50)) ); + COMMIT +} {} + +do_execsql_test 1.2 { + BEGIN CONCURRENT; + WITH s(i) AS ( + SELECT 1 UNION ALL SELECT i+1 FROM s WHERE i<50 + ) + INSERT INTO ft SELECT hex(randomblob(50)) FROM s; + + WITH s(i) AS ( + SELECT 1 UNION ALL SELECT i+1 FROM s WHERE i<50 + ) + INSERT INTO ft SELECT hex(randomblob(50)) FROM s; + + WITH s(i) AS ( + SELECT 1 UNION ALL SELECT i+1 FROM s WHERE i<50 + ) + INSERT INTO ft SELECT hex(randomblob(50)) FROM s; + COMMIT; +} + + +finish_test + diff --git a/ext/fts5/test/fts5secure3.test b/ext/fts5/test/fts5secure3.test index 49347144f3..5c9a473a3d 100644 --- a/ext/fts5/test/fts5secure3.test +++ b/ext/fts5/test/fts5secure3.test @@ -85,7 +85,6 @@ do_execsql_test 2.8 { #------------------------------------------------------------------------- # Tests with large/small rowid values. # - foreach {tn cfg} { 1 "" 2 "INSERT INTO fff(fff, rank) VALUES('secure-delete', 1)" @@ -111,6 +110,13 @@ foreach {tn cfg} { set ret } db func newdoc newdoc + + proc random {} { + set res [expr { int(rand() * 0x7FFFFFFFFFFFFFFF) }] + if { int(rand() * 2) } { set res [expr $res*-1] } + return $res + } + db func random random do_execsql_test 3.$tn.0 { CREATE VIRTUAL TABLE fff USING fts5(y); @@ -168,4 +174,3 @@ foreach {tn cfg} { finish_test - diff --git a/ext/misc/bgckpt.c b/ext/misc/bgckpt.c new file mode 100644 index 0000000000..cac3b84831 --- /dev/null +++ b/ext/misc/bgckpt.c @@ -0,0 +1,228 @@ +/* +** 2017-10-11 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +** +*/ + +#if !defined(SQLITE_TEST) || defined(SQLITE_OS_UNIX) + +#include "sqlite3.h" +#include +#include + +/* +** API declarations. +*/ +typedef struct Checkpointer Checkpointer; +int sqlite3_bgckpt_create(const char *zFilename, Checkpointer **pp); +int sqlite3_bgckpt_checkpoint(Checkpointer *p, int bBlock); +void sqlite3_bgckpt_destroy(Checkpointer *p); + + +struct Checkpointer { + sqlite3 *db; /* Database handle */ + + pthread_t thread; /* Background thread */ + pthread_mutex_t mutex; + pthread_cond_t cond; + + int rc; /* Error from "PRAGMA wal_checkpoint" */ + int bCkpt; /* True if checkpoint requested */ + int bExit; /* True if exit requested */ +}; + +static void *bgckptThreadMain(void *pCtx){ + int rc = SQLITE_OK; + Checkpointer *p = (Checkpointer*)pCtx; + + while( rc==SQLITE_OK ){ + int bExit; + + pthread_mutex_lock(&p->mutex); + if( p->bCkpt==0 && p->bExit==0 ){ + pthread_cond_wait(&p->cond, &p->mutex); + } + p->bCkpt = 0; + bExit = p->bExit; + pthread_mutex_unlock(&p->mutex); + + if( bExit ) break; + rc = sqlite3_exec(p->db, "PRAGMA wal_checkpoint", 0, 0, 0); + if( rc==SQLITE_BUSY ){ + rc = SQLITE_OK; + } + } + + pthread_mutex_lock(&p->mutex); + p->rc = rc; + pthread_mutex_unlock(&p->mutex); + return 0; +} + +void sqlite3_bgckpt_destroy(Checkpointer *p){ + if( p ){ + void *ret = 0; + + /* Signal the background thread to exit */ + pthread_mutex_lock(&p->mutex); + p->bExit = 1; + pthread_cond_broadcast(&p->cond); + pthread_mutex_unlock(&p->mutex); + + pthread_join(p->thread, &ret); + sqlite3_close(p->db); + sqlite3_free(p); + } +} + + +int sqlite3_bgckpt_create(const char *zFilename, Checkpointer **pp){ + Checkpointer *pNew = 0; + int rc; + + pNew = (Checkpointer*)sqlite3_malloc(sizeof(Checkpointer)); + if( pNew==0 ){ + rc = SQLITE_NOMEM; + }else{ + memset(pNew, 0, sizeof(Checkpointer)); + rc = sqlite3_open(zFilename, &pNew->db); + } + + if( rc==SQLITE_OK ){ + pthread_mutex_init(&pNew->mutex, 0); + pthread_cond_init(&pNew->cond, 0); + pthread_create(&pNew->thread, 0, bgckptThreadMain, (void*)pNew); + } + + if( rc!=SQLITE_OK ){ + sqlite3_bgckpt_destroy(pNew); + pNew = 0; + } + *pp = pNew; + return rc; +} + +int sqlite3_bgckpt_checkpoint(Checkpointer *p, int bBlock){ + int rc; + pthread_mutex_lock(&p->mutex); + rc = p->rc; + if( rc==SQLITE_OK ){ + p->bCkpt = 1; + pthread_cond_broadcast(&p->cond); + } + pthread_mutex_unlock(&p->mutex); + return rc; +} + +#ifdef SQLITE_TEST +#include "tclsqlite.h" + +const char *sqlite3ErrName(int rc); + +static void SQLITE_TCLAPI bgckpt_del(void * clientData){ + Checkpointer *pCkpt = (Checkpointer*)clientData; + sqlite3_bgckpt_destroy(pCkpt); +} + +/* +** Tclcmd: $ckpt SUBCMD ... +*/ +static int SQLITE_TCLAPI bgckpt_obj_cmd( + void * clientData, + Tcl_Interp *interp, + int objc, + Tcl_Obj *CONST objv[] +){ + Checkpointer *pCkpt = (Checkpointer*)clientData; + const char *aCmd[] = { "checkpoint", "destroy", 0 }; + int iCmd; + + if( objc<2 ){ + Tcl_WrongNumArgs(interp, 1, objv, "SUBCMD ..."); + return TCL_ERROR; + } + + if( Tcl_GetIndexFromObj(interp, objv[1], aCmd, "sub-command", 0, &iCmd) ){ + return TCL_ERROR; + } + + switch( iCmd ){ + case 0: { + int rc; + int bBlock = 0; + + if( objc>3 ){ + Tcl_WrongNumArgs(interp, 2, objv, "?BLOCKING?"); + return TCL_ERROR; + } + if( objc==3 && Tcl_GetBooleanFromObj(interp, objv[2], &bBlock) ){ + return TCL_ERROR; + } + + rc = sqlite3_bgckpt_checkpoint(pCkpt, bBlock); + if( rc!=SQLITE_OK ){ + Tcl_SetObjResult(interp, Tcl_NewStringObj(sqlite3ErrName(rc), -1)); + return TCL_ERROR; + } + break; + } + + case 1: { + Tcl_DeleteCommand(interp, Tcl_GetString(objv[0])); + break; + } + } + + return TCL_OK; +} + +/* +** Tclcmd: bgckpt CMDNAME FILENAME +*/ +static int SQLITE_TCLAPI bgckpt_cmd( + void * clientData, + Tcl_Interp *interp, + int objc, + Tcl_Obj *CONST objv[] +){ + const char *zCmd; + const char *zFilename; + int rc; + Checkpointer *pCkpt; + + if( objc!=3 ){ + Tcl_WrongNumArgs(interp, 1, objv, "CMDNAME FILENAME"); + return TCL_ERROR; + } + zCmd = Tcl_GetString(objv[1]); + zFilename = Tcl_GetString(objv[2]); + + rc = sqlite3_bgckpt_create(zFilename, &pCkpt); + if( rc!=SQLITE_OK ){ + Tcl_SetObjResult(interp, Tcl_NewStringObj(sqlite3ErrName(rc), -1)); + return TCL_ERROR; + } + + Tcl_CreateObjCommand(interp, zCmd, bgckpt_obj_cmd, (void*)pCkpt, bgckpt_del); + Tcl_SetObjResult(interp, objv[1]); + return TCL_OK; +} + +int Bgckpt_Init(Tcl_Interp *interp){ + Tcl_CreateObjCommand(interp, "bgckpt", bgckpt_cmd, 0, 0); + return TCL_OK; +} +#endif /* SQLITE_TEST */ + +#else +# include "tclsqlite.h" +int Bgckpt_Init(Tcl_Interp *interp){ return TCL_OK; } +#endif diff --git a/ext/rbu/sqlite3rbu.c b/ext/rbu/sqlite3rbu.c index e3bcd5fc79..b8f79e033c 100644 --- a/ext/rbu/sqlite3rbu.c +++ b/ext/rbu/sqlite3rbu.c @@ -89,6 +89,10 @@ #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_RBU) #include "sqlite3rbu.h" +#ifdef SQLITE_ENABLE_WAL_BIGHASH +# error "may not use SQLITE_ENABLE_RBU with SQLITE_ENABLE_WAL_BIGHASH" +#endif + #if defined(_WIN32_WCE) #include "windows.h" #endif diff --git a/ext/session/changebatch1.test b/ext/session/changebatch1.test new file mode 100644 index 0000000000..a4c0d3bd9b --- /dev/null +++ b/ext/session/changebatch1.test @@ -0,0 +1,223 @@ +# 2016 August 23 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# This file implements regression tests for SQLite library. +# + +if {![info exists testdir]} { + set testdir [file join [file dirname [info script]] .. .. test] +} +source $testdir/tester.tcl +ifcapable !session {finish_test; return} + +set testprefix changebatch1 + + +proc sql_to_changeset {method sql} { + sqlite3session S db main + S attach * + execsql $sql + set ret [S $method] + S delete + return $ret +} + +proc do_changebatch_test {tn method args} { + set C [list] + foreach a $args { + lappend C [sql_to_changeset $method $a] + } + + sqlite3changebatch cb db + set i 1 + foreach ::cs [lrange $C 0 end-1] { + set rc [cb add $::cs] + if {$rc!="SQLITE_OK"} { error "expected SQLITE_OK, got $rc (i=$i)" } + incr i + } + + set ::cs [lindex $C end] + do_test $tn { cb add [set ::cs] } SQLITE_CONSTRAINT + cb delete +} + +proc do_changebatch_test1 {tn args} { + uplevel do_changebatch_test $tn changeset $args +} +proc do_changebatch_test2 {tn args} { + uplevel do_changebatch_test $tn fullchangeset $args +} + +#------------------------------------------------------------------------- +# The body of the following loop contains tests for database schemas +# that do not feature multi-column UNIQUE constraints. In this case +# it doesn't matter if the changesets are generated using +# sqlite3session_changeset() or sqlite3session_fullchangeset(). +# +foreach {tn testfunction} { + 1 do_changebatch_test1 + 2 do_changebatch_test2 +} { + reset_db + + #------------------------------------------------------------------------- + # + do_execsql_test $tn.1.0 { + CREATE TABLE t1(a PRIMARY KEY, b); + } + + $testfunction $tn.1.1 { + INSERT INTO t1 VALUES(1, 1); + } { + DELETE FROM t1 WHERE a=1; + } + + do_execsql_test $tn.1.2.0 { + INSERT INTO t1 VALUES(1, 1); + INSERT INTO t1 VALUES(2, 2); + INSERT INTO t1 VALUES(3, 3); + } + $testfunction $tn.1.2.1 { + DELETE FROM t1 WHERE a=2; + } { + INSERT INTO t1 VALUES(2, 2); + } + + #------------------------------------------------------------------------- + # + do_execsql_test $tn.2.0 { + CREATE TABLE x1(a, b PRIMARY KEY, c UNIQUE); + CREATE TABLE x2(a PRIMARY KEY, b UNIQUE, c UNIQUE); + CREATE INDEX x1a ON x1(a); + + INSERT INTO x1 VALUES(1, 1, 'a'); + INSERT INTO x1 VALUES(1, 2, 'b'); + INSERT INTO x1 VALUES(1, 3, 'c'); + } + + $testfunction $tn.2.1 { + DELETE FROM x1 WHERE b=2; + } { + UPDATE x1 SET c='b' WHERE b=3; + } + + $testfunction $tn.2.2 { + DELETE FROM x1 WHERE b=1; + } { + INSERT INTO x1 VALUES(1, 5, 'a'); + } + + set L [list] + for {set i 1000} {$i < 10000} {incr i} { + lappend L "INSERT INTO x2 VALUES($i, $i, 'x' || $i)" + } + lappend L "DELETE FROM x2 WHERE b=1005" + $testfunction $tn.2.3 {*}$L + + execsql { INSERT INTO x1 VALUES('f', 'f', 'f') } + $testfunction $tn.2.4 { + INSERT INTO x2 VALUES('f', 'f', 'f'); + } { + INSERT INTO x1 VALUES('g', 'g', 'g'); + } { + DELETE FROM x1 WHERE b='f'; + } { + INSERT INTO x2 VALUES('g', 'g', 'g'); + } { + INSERT INTO x1 VALUES('f', 'f', 'f'); + } + + execsql { + DELETE FROM x1; + INSERT INTO x1 VALUES(1.5, 1.5, 1.5); + } + $testfunction $tn.2.5 { + DELETE FROM x1 WHERE b BETWEEN 1 AND 2; + } { + INSERT INTO x1 VALUES(2.5, 2.5, 2.5); + } { + INSERT INTO x1 VALUES(1.5, 1.5, 1.5); + } + + execsql { + DELETE FROM x2; + INSERT INTO x2 VALUES(X'abcd', X'1234', X'7890'); + INSERT INTO x2 VALUES(X'0000', X'0000', X'0000'); + } + $testfunction $tn.2.6 { + UPDATE x2 SET c = X'1234' WHERE a=X'abcd'; + INSERT INTO x2 VALUES(X'1234', X'abcd', X'7890'); + } { + DELETE FROM x2 WHERE b=X'0000'; + } { + INSERT INTO x2 VALUES(1, X'0000', NULL); + } +} + +#------------------------------------------------------------------------- +# Test some multi-column UNIQUE constraints. First Using _changeset() to +# demonstrate the problem, then using _fullchangeset() to show that it has +# been fixed. +# +reset_db +do_execsql_test 3.0 { + CREATE TABLE y1(a PRIMARY KEY, b, c, UNIQUE(b, c)); + INSERT INTO y1 VALUES(1, 1, 1); + INSERT INTO y1 VALUES(2, 2, 2); + INSERT INTO y1 VALUES(3, 3, 3); + INSERT INTO y1 VALUES(4, 3, 4); + BEGIN; +} + +do_test 3.1.1 { + set c1 [sql_to_changeset changeset { DELETE FROM y1 WHERE a=4 }] + set c2 [sql_to_changeset changeset { UPDATE y1 SET c=4 WHERE a=3 }] + sqlite3changebatch cb db + cb add $c1 + cb add $c2 +} {SQLITE_OK} +do_test 3.1.2 { + cb delete + execsql ROLLBACK +} {} + +do_test 3.1.1 { + set c1 [sql_to_changeset fullchangeset { DELETE FROM y1 WHERE a=4 }] + set c2 [sql_to_changeset fullchangeset { UPDATE y1 SET c=4 WHERE a=3 }] + sqlite3changebatch cb db + cb add $c1 + cb add $c2 +} {SQLITE_CONSTRAINT} +do_test 3.1.2 { + cb delete +} {} + +#------------------------------------------------------------------------- +# +reset_db +do_execsql_test 4.0 { + CREATE TABLE t1(x, y, z, PRIMARY KEY(x, y), UNIQUE(z)); +} + +do_test 4.1 { + set c1 [sql_to_changeset fullchangeset { INSERT INTO t1 VALUES(1, 2, 3) }] + execsql { + DROP TABLE t1; + CREATE TABLE t1(w, x, y, z, PRIMARY KEY(x, y), UNIQUE(z)); + } + sqlite3changebatch cb db + list [catch { cb add $c1 } msg] $msg +} {1 SQLITE_RANGE} + +cb delete + + + +finish_test diff --git a/ext/session/changebatchfault.test b/ext/session/changebatchfault.test new file mode 100644 index 0000000000..209b60e0c8 --- /dev/null +++ b/ext/session/changebatchfault.test @@ -0,0 +1,42 @@ +# 2011 Mar 21 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# +# The focus of this file is testing the session module. +# + +if {![info exists testdir]} { + set testdir [file join [file dirname [info script]] .. .. test] +} +source [file join [file dirname [info script]] session_common.tcl] +source $testdir/tester.tcl +ifcapable !session {finish_test; return} +set testprefix changebatchfault + +do_execsql_test 1.0 { + CREATE TABLE t1(a, b, c PRIMARY KEY, UNIQUE(a, b)); + INSERT INTO t1 VALUES('a', 'a', 'a'); + INSERT INTO t1 VALUES('b', 'b', 'b'); +} + +set ::c1 [changeset_from_sql { delete from t1 where c='a' }] +set ::c2 [changeset_from_sql { insert into t1 values('c', 'c', 'c') }] + +do_faultsim_test 1 -faults oom-* -body { + sqlite3changebatch cb db + cb add $::c1 + cb add $::c2 +} -test { + faultsim_test_result {0 SQLITE_OK} {1 SQLITE_NOMEM} + catch { cb delete } +} + + +finish_test diff --git a/ext/session/sessionH.test b/ext/session/sessionH.test index e1b12571c6..c8b41600ea 100644 --- a/ext/session/sessionH.test +++ b/ext/session/sessionH.test @@ -29,7 +29,7 @@ do_test 1.0 { WITH s(i) AS ( VALUES(1) UNION ALL SELECT i+1 FROM s WHERe i<10000 ) - INSERT INTO t1 SELECT 'abcde', randomblob(16), i FROM s; + INSERT INTO t1 SELECT 'abcde', randomblob(18), i FROM s; } compare_db db db2 } {} diff --git a/ext/session/sqlite3changebatch.c b/ext/session/sqlite3changebatch.c new file mode 100644 index 0000000000..b3fcd731ee --- /dev/null +++ b/ext/session/sqlite3changebatch.c @@ -0,0 +1,485 @@ + +#if !defined(SQLITE_TEST) || (defined(SQLITE_ENABLE_SESSION) && defined(SQLITE_ENABLE_PREUPDATE_HOOK)) + +#include "sqlite3session.h" +#include "sqlite3changebatch.h" + +#include +#include + +typedef struct BatchTable BatchTable; +typedef struct BatchIndex BatchIndex; +typedef struct BatchIndexEntry BatchIndexEntry; +typedef struct BatchHash BatchHash; + +struct sqlite3_changebatch { + sqlite3 *db; /* Database handle used to read schema */ + BatchTable *pTab; /* First in linked list of tables */ + int iChangesetId; /* Current changeset id */ + int iNextIdxId; /* Next available index id */ + int nEntry; /* Number of entries in hash table */ + int nHash; /* Number of hash buckets */ + BatchIndexEntry **apHash; /* Array of hash buckets */ +}; + +struct BatchTable { + BatchIndex *pIdx; /* First in linked list of UNIQUE indexes */ + BatchTable *pNext; /* Next table */ + char zTab[1]; /* Table name */ +}; + +struct BatchIndex { + BatchIndex *pNext; /* Next index on same table */ + int iId; /* Index id (assigned internally) */ + int bPk; /* True for PK index */ + int nCol; /* Size of aiCol[] array */ + int *aiCol; /* Array of columns that make up index */ +}; + +struct BatchIndexEntry { + BatchIndexEntry *pNext; /* Next colliding hash table entry */ + int iChangesetId; /* Id of associated changeset */ + int iIdxId; /* Id of index this key is from */ + int szRecord; + char aRecord[1]; +}; + +/* +** Allocate and zero a block of nByte bytes. Must be freed using cbFree(). +*/ +static void *cbMalloc(int *pRc, int nByte){ + void *pRet; + + if( *pRc ){ + pRet = 0; + }else{ + pRet = sqlite3_malloc(nByte); + if( pRet ){ + memset(pRet, 0, nByte); + }else{ + *pRc = SQLITE_NOMEM; + } + } + + return pRet; +} + +/* +** Free an allocation made by cbMalloc(). +*/ +static void cbFree(void *p){ + sqlite3_free(p); +} + +/* +** Return the hash bucket that pEntry belongs in. +*/ +static int cbHash(sqlite3_changebatch *p, BatchIndexEntry *pEntry){ + unsigned int iHash = (unsigned int)pEntry->iIdxId; + unsigned char *pEnd = (unsigned char*)&pEntry->aRecord[pEntry->szRecord]; + unsigned char *pIter; + + for(pIter=(unsigned char*)pEntry->aRecord; pIternHash); +} + +/* +** Resize the hash table. +*/ +static int cbHashResize(sqlite3_changebatch *p){ + int rc = SQLITE_OK; + BatchIndexEntry **apNew; + int nNew = (p->nHash ? p->nHash*2 : 512); + int i; + + apNew = cbMalloc(&rc, sizeof(BatchIndexEntry*) * nNew); + if( rc==SQLITE_OK ){ + int nHash = p->nHash; + p->nHash = nNew; + for(i=0; iapHash[i])!=0 ){ + int iHash = cbHash(p, pEntry); + p->apHash[i] = pEntry->pNext; + pEntry->pNext = apNew[iHash]; + apNew[iHash] = pEntry; + } + } + + cbFree(p->apHash); + p->apHash = apNew; + } + + return rc; +} + + +/* +** Allocate a new sqlite3_changebatch object. +*/ +int sqlite3changebatch_new(sqlite3 *db, sqlite3_changebatch **pp){ + sqlite3_changebatch *pRet; + int rc = SQLITE_OK; + *pp = pRet = (sqlite3_changebatch*)cbMalloc(&rc, sizeof(sqlite3_changebatch)); + if( pRet ){ + pRet->db = db; + } + return rc; +} + +/* +** Add a BatchIndex entry for index zIdx to table pTab. +*/ +static int cbAddIndex( + sqlite3_changebatch *p, + BatchTable *pTab, + const char *zIdx, + int bPk +){ + int nCol = 0; + sqlite3_stmt *pIndexInfo = 0; + BatchIndex *pNew = 0; + int rc; + char *zIndexInfo; + + zIndexInfo = (char*)sqlite3_mprintf("PRAGMA main.index_info = %Q", zIdx); + if( zIndexInfo ){ + rc = sqlite3_prepare_v2(p->db, zIndexInfo, -1, &pIndexInfo, 0); + sqlite3_free(zIndexInfo); + }else{ + rc = SQLITE_NOMEM; + } + + if( rc==SQLITE_OK ){ + while( SQLITE_ROW==sqlite3_step(pIndexInfo) ){ nCol++; } + rc = sqlite3_reset(pIndexInfo); + } + + pNew = (BatchIndex*)cbMalloc(&rc, sizeof(BatchIndex) + sizeof(int) * nCol); + if( rc==SQLITE_OK ){ + pNew->nCol = nCol; + pNew->bPk = bPk; + pNew->aiCol = (int*)&pNew[1]; + pNew->iId = p->iNextIdxId++; + while( SQLITE_ROW==sqlite3_step(pIndexInfo) ){ + int i = sqlite3_column_int(pIndexInfo, 0); + int j = sqlite3_column_int(pIndexInfo, 1); + pNew->aiCol[i] = j; + } + rc = sqlite3_reset(pIndexInfo); + } + + if( rc==SQLITE_OK ){ + pNew->pNext = pTab->pIdx; + pTab->pIdx = pNew; + }else{ + cbFree(pNew); + } + sqlite3_finalize(pIndexInfo); + + return rc; +} + +/* +** Free the object passed as the first argument. +*/ +static void cbFreeTable(BatchTable *pTab){ + BatchIndex *pIdx; + BatchIndex *pIdxNext; + for(pIdx=pTab->pIdx; pIdx; pIdx=pIdxNext){ + pIdxNext = pIdx->pNext; + cbFree(pIdx); + } + cbFree(pTab); +} + +/* +** Find or create the BatchTable object named zTab. +*/ +static int cbFindTable( + sqlite3_changebatch *p, + const char *zTab, + BatchTable **ppTab +){ + BatchTable *pRet = 0; + int rc = SQLITE_OK; + + for(pRet=p->pTab; pRet; pRet=pRet->pNext){ + if( 0==sqlite3_stricmp(zTab, pRet->zTab) ) break; + } + + if( pRet==0 ){ + int nTab = strlen(zTab); + pRet = (BatchTable*)cbMalloc(&rc, nTab + sizeof(BatchTable)); + if( pRet ){ + sqlite3_stmt *pIndexList = 0; + char *zIndexList = 0; + int rc2; + memcpy(pRet->zTab, zTab, nTab); + + zIndexList = sqlite3_mprintf("PRAGMA main.index_list = %Q", zTab); + if( zIndexList==0 ){ + rc = SQLITE_NOMEM; + }else{ + rc = sqlite3_prepare_v2(p->db, zIndexList, -1, &pIndexList, 0); + sqlite3_free(zIndexList); + } + + while( rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pIndexList) ){ + if( sqlite3_column_int(pIndexList, 2) ){ + const char *zIdx = (const char*)sqlite3_column_text(pIndexList, 1); + const char *zTyp = (const char*)sqlite3_column_text(pIndexList, 3); + rc = cbAddIndex(p, pRet, zIdx, (zTyp[0]=='p')); + } + } + rc2 = sqlite3_finalize(pIndexList); + if( rc==SQLITE_OK ) rc = rc2; + + if( rc==SQLITE_OK ){ + pRet->pNext = p->pTab; + p->pTab = pRet; + }else{ + cbFreeTable(pRet); + pRet = 0; + } + } + } + + *ppTab = pRet; + return rc; +} + +/* +** Extract value iVal from the changeset iterator passed as the first +** argument. Set *ppVal to point to the value before returning. +** +** This function attempts to extract the value using function xVal +** (which is always either sqlite3changeset_new or sqlite3changeset_old). +** If the call returns SQLITE_OK but does not supply an sqlite3_value* +** pointer, an attempt to extract the value is made using the xFallback +** function. +*/ +static int cbGetChangesetValue( + sqlite3_changeset_iter *pIter, + int (*xVal)(sqlite3_changeset_iter*,int,sqlite3_value**), + int (*xFallback)(sqlite3_changeset_iter*,int,sqlite3_value**), + int iVal, + sqlite3_value **ppVal +){ + int rc = xVal(pIter, iVal, ppVal); + if( rc==SQLITE_OK && *ppVal==0 && xFallback ){ + rc = xFallback(pIter, iVal, ppVal); + } + return rc; +} + +static int cbAddToHash( + sqlite3_changebatch *p, + sqlite3_changeset_iter *pIter, + BatchIndex *pIdx, + int (*xVal)(sqlite3_changeset_iter*,int,sqlite3_value**), + int (*xFallback)(sqlite3_changeset_iter*,int,sqlite3_value**), + int *pbConf +){ + BatchIndexEntry *pNew; + int sz = pIdx->nCol; + int i; + int iOut = 0; + int rc = SQLITE_OK; + + for(i=0; rc==SQLITE_OK && inCol; i++){ + sqlite3_value *pVal; + rc = cbGetChangesetValue(pIter, xVal, xFallback, pIdx->aiCol[i], &pVal); + if( rc==SQLITE_OK ){ + int eType = 0; + if( pVal ) eType = sqlite3_value_type(pVal); + switch( eType ){ + case 0: + case SQLITE_NULL: + return SQLITE_OK; + + case SQLITE_INTEGER: + sz += 8; + break; + case SQLITE_FLOAT: + sz += 8; + break; + + default: + assert( eType==SQLITE_TEXT || eType==SQLITE_BLOB ); + sz += sqlite3_value_bytes(pVal); + break; + } + } + } + + pNew = cbMalloc(&rc, sizeof(BatchIndexEntry) + sz); + if( pNew ){ + pNew->iChangesetId = p->iChangesetId; + pNew->iIdxId = pIdx->iId; + pNew->szRecord = sz; + + for(i=0; inCol; i++){ + int eType; + sqlite3_value *pVal; + rc = cbGetChangesetValue(pIter, xVal, xFallback, pIdx->aiCol[i], &pVal); + if( rc!=SQLITE_OK ) break; /* coverage: condition is never true */ + eType = sqlite3_value_type(pVal); + pNew->aRecord[iOut++] = eType; + switch( eType ){ + case SQLITE_INTEGER: { + sqlite3_int64 i64 = sqlite3_value_int64(pVal); + memcpy(&pNew->aRecord[iOut], &i64, 8); + iOut += 8; + break; + } + case SQLITE_FLOAT: { + double d64 = sqlite3_value_double(pVal); + memcpy(&pNew->aRecord[iOut], &d64, sizeof(double)); + iOut += sizeof(double); + break; + } + + default: { + int nByte = sqlite3_value_bytes(pVal); + const char *z = (const char*)sqlite3_value_blob(pVal); + memcpy(&pNew->aRecord[iOut], z, nByte); + iOut += nByte; + break; + } + } + } + } + + if( rc==SQLITE_OK && p->nEntry>=(p->nHash/2) ){ + rc = cbHashResize(p); + } + + if( rc==SQLITE_OK ){ + BatchIndexEntry *pIter; + int iHash = cbHash(p, pNew); + + assert( iHash>=0 && iHashnHash ); + for(pIter=p->apHash[iHash]; pIter; pIter=pIter->pNext){ + if( pNew->szRecord==pIter->szRecord + && 0==memcmp(pNew->aRecord, pIter->aRecord, pNew->szRecord) + ){ + if( pNew->iChangesetId!=pIter->iChangesetId ){ + *pbConf = 1; + } + cbFree(pNew); + pNew = 0; + break; + } + } + + if( pNew ){ + pNew->pNext = p->apHash[iHash]; + p->apHash[iHash] = pNew; + p->nEntry++; + } + }else{ + cbFree(pNew); + } + + return rc; +} + + +/* +** Add a changeset to the current batch. +*/ +int sqlite3changebatch_add(sqlite3_changebatch *p, void *pBuf, int nBuf){ + sqlite3_changeset_iter *pIter; /* Iterator opened on pBuf/nBuf */ + int rc; /* Return code */ + int bConf = 0; /* Conflict was detected */ + + rc = sqlite3changeset_start_v2(&pIter, nBuf, pBuf,SQLITE_CHANGESETSTART_FULL); + if( rc==SQLITE_OK ){ + int rc2; + for(rc2 = sqlite3changeset_next(pIter); + rc2==SQLITE_ROW; + rc2 = sqlite3changeset_next(pIter) + ){ + BatchTable *pTab; + BatchIndex *pIdx; + const char *zTab; /* Table this change applies to */ + int nCol; /* Number of columns in table */ + int op; /* UPDATE, INSERT or DELETE */ + + sqlite3changeset_op(pIter, &zTab, &nCol, &op, 0); + assert( op==SQLITE_INSERT || op==SQLITE_UPDATE || op==SQLITE_DELETE ); + + rc = cbFindTable(p, zTab, &pTab); + assert( pTab || rc!=SQLITE_OK ); + if( pTab ){ + for(pIdx=pTab->pIdx; pIdx && rc==SQLITE_OK; pIdx=pIdx->pNext){ + if( op==SQLITE_UPDATE && pIdx->bPk ) continue; + if( op==SQLITE_UPDATE || op==SQLITE_DELETE ){ + rc = cbAddToHash(p, pIter, pIdx, sqlite3changeset_old, 0, &bConf); + } + if( op==SQLITE_UPDATE || op==SQLITE_INSERT ){ + rc = cbAddToHash(p, pIter, pIdx, + sqlite3changeset_new, sqlite3changeset_old, &bConf + ); + } + } + } + if( rc!=SQLITE_OK ) break; + } + + rc2 = sqlite3changeset_finalize(pIter); + if( rc==SQLITE_OK ) rc = rc2; + } + + if( rc==SQLITE_OK && bConf ){ + rc = SQLITE_CONSTRAINT; + } + p->iChangesetId++; + return rc; +} + +/* +** Zero an existing changebatch object. +*/ +void sqlite3changebatch_zero(sqlite3_changebatch *p){ + int i; + for(i=0; inHash; i++){ + BatchIndexEntry *pEntry; + BatchIndexEntry *pNext; + for(pEntry=p->apHash[i]; pEntry; pEntry=pNext){ + pNext = pEntry->pNext; + cbFree(pEntry); + } + } + cbFree(p->apHash); + p->nHash = 0; + p->apHash = 0; +} + +/* +** Delete a changebatch object. +*/ +void sqlite3changebatch_delete(sqlite3_changebatch *p){ + BatchTable *pTab; + BatchTable *pTabNext; + + sqlite3changebatch_zero(p); + for(pTab=p->pTab; pTab; pTab=pTabNext){ + pTabNext = pTab->pNext; + cbFreeTable(pTab); + } + cbFree(p); +} + +/* +** Return the db handle. +*/ +sqlite3 *sqlite3changebatch_db(sqlite3_changebatch *p){ + return p->db; +} + +#endif /* SQLITE_ENABLE_SESSION && SQLITE_ENABLE_PREUPDATE_HOOK */ diff --git a/ext/session/sqlite3changebatch.h b/ext/session/sqlite3changebatch.h new file mode 100644 index 0000000000..3476629e58 --- /dev/null +++ b/ext/session/sqlite3changebatch.h @@ -0,0 +1,82 @@ + +#if !defined(SQLITECHANGEBATCH_H_) +#define SQLITECHANGEBATCH_H_ 1 + +typedef struct sqlite3_changebatch sqlite3_changebatch; + +/* +** Create a new changebatch object for detecting conflicts between +** changesets associated with a schema equivalent to that of the "main" +** database of the open database handle db passed as the first +** parameter. It is the responsibility of the caller to ensure that +** the database handle is not closed until after the changebatch +** object has been deleted. +** +** A changebatch object is used to detect batches of non-conflicting +** changesets. Changesets that do not conflict may be applied to the +** target database in any order without affecting the final state of +** the database. +** +** The changebatch object only works reliably if PRIMARY KEY and UNIQUE +** constraints on tables affected by the changesets use collation +** sequences that are equivalent to built-in collation sequence +** BINARY for the == operation. +** +** If successful, SQLITE_OK is returned and (*pp) set to point to +** the new changebatch object. If an error occurs, an SQLite error +** code is returned and the final value of (*pp) is undefined. +*/ +int sqlite3changebatch_new(sqlite3 *db, sqlite3_changebatch **pp); + +/* +** Argument p points to a buffer containing a changeset n bytes in +** size. Assuming no error occurs, this function returns SQLITE_OK +** if the changeset does not conflict with any changeset passed +** to an sqlite3changebatch_add() call made on the same +** sqlite3_changebatch* handle since the most recent call to +** sqlite3changebatch_zero(). If the changeset does conflict with +** an earlier such changeset, SQLITE_CONSTRAINT is returned. Or, +** if an error occurs, some other SQLite error code may be returned. +** +** One changeset is said to conflict with another if +** either: +** +** * the two changesets contain operations (INSERT, UPDATE or +** DELETE) on the same row, identified by primary key, or +** +** * the two changesets contain operations (INSERT, UPDATE or +** DELETE) on rows with identical values in any combination +** of fields constrained by a UNIQUE constraint. +** +** Even if this function returns SQLITE_CONFLICT, the current +** changeset is added to the internal data structures - so future +** calls to this function may conflict with it. If this function +** returns any result code other than SQLITE_OK or SQLITE_CONFLICT, +** the result of any future call to sqlite3changebatch_add() is +** undefined. +** +** Only changesets may be passed to this function. Passing a +** patchset to this function results in an SQLITE_MISUSE error. +*/ +int sqlite3changebatch_add(sqlite3_changebatch*, void *p, int n); + +/* +** Zero a changebatch object. This causes the records of all earlier +** calls to sqlite3changebatch_add() to be discarded. +*/ +void sqlite3changebatch_zero(sqlite3_changebatch*); + +/* +** Return a copy of the first argument passed to the sqlite3changebatch_new() +** call used to create the changebatch object passed as the only argument +** to this function. +*/ +sqlite3 *sqlite3changebatch_db(sqlite3_changebatch*); + +/* +** Delete a changebatch object. +*/ +void sqlite3changebatch_delete(sqlite3_changebatch*); + +#endif /* !defined(SQLITECHANGEBATCH_H_) */ + diff --git a/ext/session/sqlite3session.c b/ext/session/sqlite3session.c index 90fedc6db4..952baae1e0 100644 --- a/ext/session/sqlite3session.c +++ b/ext/session/sqlite3session.c @@ -27,6 +27,13 @@ typedef struct SessionInput SessionInput; #define SESSIONS_ROWID "_rowid_" +/* +** The three different types of changesets generated. +*/ +#define SESSIONS_PATCHSET 0 +#define SESSIONS_CHANGESET 1 +#define SESSIONS_FULLCHANGESET 2 + static int sessions_strm_chunk_size = SESSIONS_STRM_CHUNK_SIZE; typedef struct SessionHook SessionHook; @@ -98,6 +105,7 @@ struct SessionInput { struct sqlite3_changeset_iter { SessionInput in; /* Input buffer or stream */ SessionBuffer tblhdr; /* Buffer to hold apValue/zTab/abPK/ */ + int bChangebatch; /* True for changebatch_add() */ int bPatchset; /* True if this is a patchset */ int bInvert; /* True to invert changeset */ int bSkipEmpty; /* Skip noop UPDATE changes */ @@ -2668,7 +2676,7 @@ static void sessionAppendCol( */ static int sessionAppendUpdate( SessionBuffer *pBuf, /* Buffer to append to */ - int bPatchset, /* True for "patchset", 0 for "changeset" */ + int ePatchset, /* True for "patchset", 0 for "changeset" */ sqlite3_stmt *pStmt, /* Statement handle pointing at new row */ SessionChange *p, /* Object containing old values */ u8 *abPK /* Boolean array - true for PK columns */ @@ -2732,8 +2740,8 @@ static int sessionAppendUpdate( /* Add a field to the old.* record. This is omitted if this module is ** currently generating a patchset. */ - if( bPatchset==0 ){ - if( bChanged || abPK[i] ){ + if( ePatchset!=SESSIONS_PATCHSET ){ + if( ePatchset==SESSIONS_FULLCHANGESET || bChanged || abPK[i] ){ sessionAppendBlob(pBuf, pCsr, nAdvance, &rc); }else{ sessionAppendByte(pBuf, 0, &rc); @@ -2742,7 +2750,7 @@ static int sessionAppendUpdate( /* Add a field to the new.* record. Or the only record if currently ** generating a patchset. */ - if( bChanged || (bPatchset && abPK[i]) ){ + if( bChanged || (ePatchset==SESSIONS_PATCHSET && abPK[i]) ){ sessionAppendCol(&buf2, pStmt, i, &rc); }else{ sessionAppendByte(&buf2, 0, &rc); @@ -2768,7 +2776,7 @@ static int sessionAppendUpdate( */ static int sessionAppendDelete( SessionBuffer *pBuf, /* Buffer to append to */ - int bPatchset, /* True for "patchset", 0 for "changeset" */ + int eChangeset, /* One of SESSIONS_CHANGESET etc. */ SessionChange *p, /* Object containing old values */ int nCol, /* Number of columns in table */ u8 *abPK /* Boolean array - true for PK columns */ @@ -2778,7 +2786,7 @@ static int sessionAppendDelete( sessionAppendByte(pBuf, SQLITE_DELETE, &rc); sessionAppendByte(pBuf, p->bIndirect, &rc); - if( bPatchset==0 ){ + if( eChangeset!=SESSIONS_PATCHSET ){ sessionAppendBlob(pBuf, p->aRecord, p->nRecord, &rc); }else{ int i; @@ -3025,12 +3033,12 @@ static int sessionSelectBind( */ static void sessionAppendTableHdr( SessionBuffer *pBuf, /* Append header to this buffer */ - int bPatchset, /* Use the patchset format if true */ + int ePatchset, /* Use the patchset format if true */ SessionTable *pTab, /* Table object to append header for */ int *pRc /* IN/OUT: Error code */ ){ /* Write a table header */ - sessionAppendByte(pBuf, (bPatchset ? 'P' : 'T'), pRc); + sessionAppendByte(pBuf, (ePatchset==SESSIONS_PATCHSET) ? 'P' : 'T', pRc); sessionAppendVarint(pBuf, pTab->nCol, pRc); sessionAppendBlob(pBuf, pTab->abPK, pTab->nCol, pRc); sessionAppendBlob(pBuf, (u8 *)pTab->zName, (int)strlen(pTab->zName)+1, pRc); @@ -3048,7 +3056,7 @@ static void sessionAppendTableHdr( */ static int sessionGenerateChangeset( sqlite3_session *pSession, /* Session object */ - int bPatchset, /* True for patchset, false for changeset */ + int ePatchset, /* One of SESSIONS_CHANGESET etc. */ int (*xOutput)(void *pOut, const void *pData, int nData), void *pOut, /* First argument for xOutput */ int *pnChangeset, /* OUT: Size of buffer at *ppChangeset */ @@ -3093,7 +3101,7 @@ static int sessionGenerateChangeset( } /* Write a table header */ - sessionAppendTableHdr(&buf, bPatchset, pTab, &rc); + sessionAppendTableHdr(&buf, ePatchset, pTab, &rc); /* Build and compile a statement to execute: */ if( rc==SQLITE_OK ){ @@ -3119,10 +3127,10 @@ static int sessionGenerateChangeset( } }else{ assert( pTab->abPK!=0 ); - rc = sessionAppendUpdate(&buf, bPatchset, pSel, p, pTab->abPK); + rc = sessionAppendUpdate(&buf, ePatchset, pSel, p, pTab->abPK); } }else if( p->op!=SQLITE_INSERT ){ - rc = sessionAppendDelete(&buf, bPatchset, p, pTab->nCol,pTab->abPK); + rc = sessionAppendDelete(&buf, ePatchset, p, pTab->nCol,pTab->abPK); } if( rc==SQLITE_OK ){ rc = sqlite3_reset(pSel); @@ -3181,7 +3189,8 @@ int sqlite3session_changeset( int rc; if( pnChangeset==0 || ppChangeset==0 ) return SQLITE_MISUSE; - rc = sessionGenerateChangeset(pSession, 0, 0, 0, pnChangeset, ppChangeset); + rc = sessionGenerateChangeset( + pSession, SESSIONS_CHANGESET, 0, 0, pnChangeset, ppChangeset); assert( rc || pnChangeset==0 || pSession->bEnableSize==0 || *pnChangeset<=pSession->nMaxChangesetSize ); @@ -3197,7 +3206,8 @@ int sqlite3session_changeset_strm( void *pOut ){ if( xOutput==0 ) return SQLITE_MISUSE; - return sessionGenerateChangeset(pSession, 0, xOutput, pOut, 0, 0); + return sessionGenerateChangeset( + pSession, SESSIONS_CHANGESET, xOutput, pOut, 0, 0); } /* @@ -3209,7 +3219,8 @@ int sqlite3session_patchset_strm( void *pOut ){ if( xOutput==0 ) return SQLITE_MISUSE; - return sessionGenerateChangeset(pSession, 1, xOutput, pOut, 0, 0); + return sessionGenerateChangeset( + pSession, SESSIONS_PATCHSET, xOutput, pOut, 0, 0); } /* @@ -3225,9 +3236,20 @@ int sqlite3session_patchset( void **ppPatchset /* OUT: Buffer containing changeset */ ){ if( pnPatchset==0 || ppPatchset==0 ) return SQLITE_MISUSE; - return sessionGenerateChangeset(pSession, 1, 0, 0, pnPatchset, ppPatchset); + return sessionGenerateChangeset( + pSession, SESSIONS_PATCHSET, 0, 0, pnPatchset, ppPatchset); +} + +int sqlite3session_fullchangeset( + sqlite3_session *pSession, /* Session object */ + int *pnChangeset, /* OUT: Size of buffer at *ppChangeset */ + void **ppChangeset /* OUT: Buffer containing changeset */ +){ + return sessionGenerateChangeset( + pSession, SESSIONS_FULLCHANGESET, 0, 0, pnChangeset, ppChangeset); } + /* ** Enable or disable the session object passed as the first argument. */ @@ -3380,8 +3402,13 @@ int sqlite3changeset_start_v2( void *pChangeset, /* Pointer to buffer containing changeset */ int flags ){ + int rc; + sqlite3_changeset_iter *pIter = 0; int bInvert = !!(flags & SQLITE_CHANGESETSTART_INVERT); - return sessionChangesetStart(pp, 0, 0, nChangeset, pChangeset, bInvert, 0); + rc = sessionChangesetStart(&pIter, 0, 0, nChangeset, pChangeset, bInvert, 0); + if( pIter && (flags & SQLITE_CHANGESETSTART_FULL) ) pIter->bChangebatch = 1; + *pp = pIter; + return rc; } /* @@ -3859,8 +3886,12 @@ static int sessionChangesetNextOne( ** ** Such records are technically corrupt, but the rebaser was at one ** point generating them. Under most circumstances this is benign, but - ** can cause spurious SQLITE_RANGE errors when applying the changeset. */ - if( p->bPatchset==0 && p->op==SQLITE_UPDATE){ + ** can cause spurious SQLITE_RANGE errors when applying the changeset. + ** + ** Update for bedrock branch: Do not do this for changebatch_add() on + ** this branch, as changesets generated by sqlite3sessions_fullchangeset() + ** also have this property. */ + if( p->bChangebatch==0 && p->bPatchset==0 && p->op==SQLITE_UPDATE){ for(i=0; inCol; i++){ if( p->abPK[i]==0 && p->apValue[i+p->nCol]==0 ){ sqlite3ValueFree(p->apValue[i]); @@ -6171,10 +6202,11 @@ static int sessionChangegroupOutput( ** hash tables attached to the SessionTable objects in list p->pList. */ for(pTab=pGrp->pList; rc==SQLITE_OK && pTab; pTab=pTab->pNext){ + int eChangeset = pGrp->bPatch ? SESSIONS_PATCHSET : SESSIONS_CHANGESET; int i; if( pTab->nEntry==0 ) continue; - sessionAppendTableHdr(&buf, pGrp->bPatch, pTab, &rc); + sessionAppendTableHdr(&buf, eChangeset, pTab, &rc); for(i=0; inChange; i++){ SessionChange *p; for(p=pTab->apChange[i]; p; p=p->pNext){ diff --git a/ext/session/sqlite3session.h b/ext/session/sqlite3session.h index 28b90eb6b5..1b18e630b5 100644 --- a/ext/session/sqlite3session.h +++ b/ext/session/sqlite3session.h @@ -369,6 +369,32 @@ int sqlite3session_changeset( void **ppChangeset /* OUT: Buffer containing changeset */ ); +/* +** CAPI3REF: Generate A Full Changeset From A Session Object +** +** This function is similar to sqlite3session_changeset(), except that for +** each row affected by an UPDATE statement, all old.* values are recorded +** as part of the changeset, not just those modified. +*/ +int sqlite3session_fullchangeset( + sqlite3_session *pSession, /* Session object */ + int *pnChangeset, /* OUT: Size of buffer at *ppChangeset */ + void **ppChangeset /* OUT: Buffer containing changeset */ +); + +/* +** CAPI3REF: Generate A Full Changeset From A Session Object +** +** This function is similar to sqlite3session_changeset(), except that for +** each row affected by an UPDATE statement, all old.* values are recorded +** as part of the changeset, not just those modified. +*/ +int sqlite3session_fullchangeset( + sqlite3_session *pSession, /* Session object */ + int *pnChangeset, /* OUT: Size of buffer at *ppChangeset */ + void **ppChangeset /* OUT: Buffer containing changeset */ +); + /* ** CAPI3REF: Return An Upper-limit For The Size Of The Changeset ** METHOD: sqlite3_session @@ -574,8 +600,12 @@ int sqlite3changeset_start_v2( ** Invert the changeset while iterating through it. This is equivalent to ** inverting a changeset using sqlite3changeset_invert() before applying it. ** It is an error to specify this flag with a patchset. +** +**
SQLITE_CHANGESETSTART_FULL
+** Do not trim extra fields added to fullchangeset changesets. */ #define SQLITE_CHANGESETSTART_INVERT 0x0002 +#define SQLITE_CHANGESETSTART_FULL 0x0004 /* diff --git a/ext/session/test_session.c b/ext/session/test_session.c index 6ad5b37749..60577c12c5 100644 --- a/ext/session/test_session.c +++ b/ext/session/test_session.c @@ -229,6 +229,7 @@ static int testStreamOutput( ** $session indirect INTEGER ** $session patchset ** $session table_filter SCRIPT +** $session fullchangeset */ static int SQLITE_TCLAPI test_session_cmd( void *clientData, @@ -242,20 +243,20 @@ static int SQLITE_TCLAPI test_session_cmd( const char *zSub; int nArg; const char *zMsg; - int iSub; } aSub[] = { - { "attach", 1, "TABLE", }, /* 0 */ - { "changeset", 0, "", }, /* 1 */ - { "delete", 0, "", }, /* 2 */ - { "enable", 1, "BOOL", }, /* 3 */ - { "indirect", 1, "BOOL", }, /* 4 */ - { "isempty", 0, "", }, /* 5 */ - { "table_filter", 1, "SCRIPT", }, /* 6 */ + { "attach", 1, "TABLE" }, /* 0 */ + { "changeset", 0, "" }, /* 1 */ + { "delete", 0, "" }, /* 2 */ + { "enable", 1, "BOOL" }, /* 3 */ + { "indirect", 1, "BOOL" }, /* 4 */ + { "isempty", 0, "" }, /* 5 */ + { "table_filter", 1, "SCRIPT" }, /* 6 */ { "patchset", 0, "", }, /* 7 */ - { "diff", 2, "FROMDB TBL", }, /* 8 */ - { "memory_used", 0, "", }, /* 9 */ - { "changeset_size", 0, "", }, /* 10 */ - { "object_config", 2, "OPTION INTEGER", }, /* 11 */ + { "diff", 2, "FROMDB TBL" }, /* 8 */ + { "fullchangeset",0, "" }, /* 9 */ + { "memory_used", 0, "", }, /* 10 */ + { "changeset_size", 0, "", }, /* 11 */ + { "object_config", 2, "OPTION INTEGER", }, /* 12 */ { 0 } }; int iSub; @@ -285,10 +286,11 @@ static int SQLITE_TCLAPI test_session_cmd( break; } + case 9: /* fullchangeset */ case 7: /* patchset */ case 1: { /* changeset */ TestSessionsBlob o = {0, 0}; - if( test_tcl_integer(interp, SESSION_STREAM_TCL_VAR) ){ + if( iSub!=9 && test_tcl_integer(interp, SESSION_STREAM_TCL_VAR) ){ void *pCtx = (void*)&o; if( iSub==7 ){ rc = sqlite3session_patchset_strm(pSession, testStreamOutput, pCtx); @@ -298,12 +300,15 @@ static int SQLITE_TCLAPI test_session_cmd( }else{ if( iSub==7 ){ rc = sqlite3session_patchset(pSession, &o.n, &o.p); + assert_changeset_is_ok(o.n, o.p); + }else if( iSub==9 ){ + rc = sqlite3session_fullchangeset(pSession, &o.n, &o.p); }else{ rc = sqlite3session_changeset(pSession, &o.n, &o.p); + assert_changeset_is_ok(o.n, o.p); } } if( rc==SQLITE_OK ){ - assert_changeset_is_ok(o.n, o.p); Tcl_SetObjResult(interp, Tcl_NewByteArrayObj(o.p, o.n)); } sqlite3_free(o.p); @@ -313,6 +318,7 @@ static int SQLITE_TCLAPI test_session_cmd( break; } + case 2: /* delete */ Tcl_DeleteCommand(interp, Tcl_GetString(objv[0])); break; @@ -363,18 +369,18 @@ static int SQLITE_TCLAPI test_session_cmd( break; } - case 9: { /* memory_used */ + case 10: { /* memory_used */ sqlite3_int64 nMalloc = sqlite3session_memory_used(pSession); Tcl_SetObjResult(interp, Tcl_NewWideIntObj(nMalloc)); break; } - case 10: { + case 11: { sqlite3_int64 nSize = sqlite3session_changeset_size(pSession); Tcl_SetObjResult(interp, Tcl_NewWideIntObj(nSize)); break; } - case 11: { /* object_config */ + case 12: { /* object_config */ struct ObjConfOpt { const char *zName; int opt; @@ -384,7 +390,6 @@ static int SQLITE_TCLAPI test_session_cmd( { 0, 0 } }; int sz = (int)sizeof(aOpt[0]); - int iArg; Tcl_Size iOpt; if( Tcl_GetIndexFromObjStruct(interp,objv[2],aOpt,sz,"option",0,&iOpt) ){ @@ -623,7 +628,7 @@ static int test_filter_v3_handler( Tcl_DecrRefCount(pEval); return res; -} +} static int test_conflict_handler( void *pCtx, /* Pointer to TestConflictHandler structure */ @@ -1245,6 +1250,127 @@ static int SQLITE_TCLAPI test_sqlite3session_foreach( return TCL_OK; } +#include "sqlite3changebatch.h" + +typedef struct TestChangebatch TestChangebatch; +struct TestChangebatch { + sqlite3_changebatch *pChangebatch; +}; + +/* +** Tclcmd: $changebatch add BLOB +** $changebatch zero +** $changebatch delete +*/ +static int SQLITE_TCLAPI test_changebatch_cmd( + void *clientData, + Tcl_Interp *interp, + int objc, + Tcl_Obj *CONST objv[] +){ + TestChangebatch *p = (TestChangebatch*)clientData; + sqlite3_changebatch *pChangebatch = p->pChangebatch; + struct SessionSubcmd { + const char *zSub; + int nArg; + const char *zMsg; + int iSub; + } aSub[] = { + { "add", 1, "CHANGESET", }, /* 0 */ + { "zero", 0, "", }, /* 1 */ + { "delete", 0, "", }, /* 2 */ + { 0 } + }; + int iSub; + int rc; + + if( objc<2 ){ + Tcl_WrongNumArgs(interp, 1, objv, "SUBCOMMAND ..."); + return TCL_ERROR; + } + rc = Tcl_GetIndexFromObjStruct(interp, + objv[1], aSub, sizeof(aSub[0]), "sub-command", 0, &iSub + ); + if( rc!=TCL_OK ) return rc; + if( objc!=2+aSub[iSub].nArg ){ + Tcl_WrongNumArgs(interp, 2, objv, aSub[iSub].zMsg); + return TCL_ERROR; + } + + switch( iSub ){ + case 0: { /* add */ + int nArg; + unsigned char *pArg = Tcl_GetByteArrayFromObj(objv[2], &nArg); + rc = sqlite3changebatch_add(pChangebatch, pArg, nArg); + if( rc!=SQLITE_OK && rc!=SQLITE_CONSTRAINT ){ + return test_session_error(interp, rc, 0); + }else{ + extern const char *sqlite3ErrName(int); + Tcl_SetObjResult(interp, Tcl_NewStringObj(sqlite3ErrName(rc), -1)); + } + break; + } + + case 1: { /* zero */ + sqlite3changebatch_zero(pChangebatch); + break; + } + + case 2: /* delete */ + Tcl_DeleteCommand(interp, Tcl_GetString(objv[0])); + break; + } + + return TCL_OK; +} + +static void SQLITE_TCLAPI test_changebatch_del(void *clientData){ + TestChangebatch *p = (TestChangebatch*)clientData; + sqlite3changebatch_delete(p->pChangebatch); + ckfree((char*)p); +} + +/* +** Tclcmd: sqlite3changebatch CMD DB-HANDLE +*/ +static int SQLITE_TCLAPI test_sqlite3changebatch( + void * clientData, + Tcl_Interp *interp, + int objc, + Tcl_Obj *CONST objv[] +){ + sqlite3 *db; + Tcl_CmdInfo info; + int rc; /* sqlite3session_create() return code */ + TestChangebatch *p; /* New wrapper object */ + + if( objc!=3 ){ + Tcl_WrongNumArgs(interp, 1, objv, "CMD DB-HANDLE"); + return TCL_ERROR; + } + + if( 0==Tcl_GetCommandInfo(interp, Tcl_GetString(objv[2]), &info) ){ + Tcl_AppendResult(interp, "no such handle: ", Tcl_GetString(objv[2]), 0); + return TCL_ERROR; + } + db = *(sqlite3 **)info.objClientData; + + p = (TestChangebatch*)ckalloc(sizeof(TestChangebatch)); + memset(p, 0, sizeof(TestChangebatch)); + rc = sqlite3changebatch_new(db, &p->pChangebatch); + if( rc!=SQLITE_OK ){ + ckfree((char*)p); + return test_session_error(interp, rc, 0); + } + + Tcl_CreateObjCommand( + interp, Tcl_GetString(objv[1]), test_changebatch_cmd, (ClientData)p, + test_changebatch_del + ); + Tcl_SetObjResult(interp, objv[1]); + return TCL_OK; +} + /* ** tclcmd: CMD configure REBASE-BLOB ** tclcmd: CMD rebase CHANGESET @@ -1822,6 +1948,10 @@ int TestSession_Init(Tcl_Interp *interp){ Tcl_CreateObjCommand(interp, p->zCmd, p->xProc, 0, 0); } + + Tcl_CreateObjCommand( + interp, "sqlite3changebatch", test_sqlite3changebatch, 0, 0 + ); return TCL_OK; } diff --git a/ext/wasm/api/sqlite3-worker1-promiser.c-pp.js b/ext/wasm/api/sqlite3-worker1-promiser.c-pp.js index b282c5e6e1..55e497ead5 100644 --- a/ext/wasm/api/sqlite3-worker1-promiser.c-pp.js +++ b/ext/wasm/api/sqlite3-worker1-promiser.c-pp.js @@ -1,4 +1,4 @@ -//#if not omit-oo1 +//#ifnot omit-oo1 /* 2022-08-24 @@ -19,12 +19,10 @@ slightly simpler client-side interface than the slightly-lower-level Worker API does. - In non-ESM builds this file necessarily exposes one global symbol, - but clients may freely `delete` that symbol after calling it. + This script necessarily exposes one global symbol, but clients may + freely `delete` that symbol after calling it. */ -//#if not defined target:es6-module 'use strict'; -//#endif /** Configures an sqlite3 Worker API #1 Worker such that it can be manipulated via a Promise-based interface and returns a factory @@ -111,12 +109,10 @@ the callback is called one time for each row of the result set, passed the same worker message format as the worker API emits: - { - type:typeString, + {type:typeString, row:VALUE, rowNumber:1-based-#, - columnNames: array - } + columnNames: array} Where `typeString` is an internally-synthesized message type string used temporarily for worker message dispatching. It can be ignored @@ -127,16 +123,19 @@ callback. At the end of the result set, the same event is fired with - (row=undefined, rowNumber=null) to indicate that the end of the - result set has been reached. The rows arrive via worker-posted - messages, with all the implications of that. + (row=undefined, rowNumber=null) to indicate that + the end of the result set has been reached. Note that the rows + arrive via worker-posted messages, with all the implications + of that. Notable shortcomings: - - "v1" of this this API is not suitable for use as an ESM module - because ESM worker modules were not widely supported when it was - developed. For use as an ESM module, see the "v2" interface later - on in this file. + - This API was not designed with ES6 modules in mind. Neither Firefox + nor Safari support, as of March 2023, the {type:"module"} flag to the + Worker constructor, so that particular usage is not something we're going + to target for the time being: + + https://developer.mozilla.org/en-US/docs/Web/API/Worker/Worker */ globalThis.sqlite3Worker1Promiser = function callee(config = callee.defaultConfig){ // Inspired by: https://stackoverflow.com/a/52439530 @@ -255,14 +254,10 @@ globalThis.sqlite3Worker1Promiser = function callee(config = callee.defaultConfi globalThis.sqlite3Worker1Promiser.defaultConfig = { worker: function(){ -//#if target:es6-bundler-friendly +//#if target=es6-module return new Worker(new URL("sqlite3-worker1-bundler-friendly.mjs", import.meta.url),{ type: 'module' }); -//#elif target:es6-module - return new Worker(new URL("sqlite3-worker1.mjs", import.meta.url),{ - type: 'module' - }); //#else let theJs = "sqlite3-worker1.js"; if(this.currentScript){ @@ -280,13 +275,13 @@ globalThis.sqlite3Worker1Promiser.defaultConfig = { return new Worker(theJs + globalThis.location.search); //#endif } -//#if not target:es6-module +//#ifnot target=es6-module .bind({ currentScript: globalThis?.document?.currentScript }) //#endif , - onerror: (...args)=>console.error('sqlite3Worker1Promiser():',...args) + onerror: (...args)=>console.error('worker1 promiser error',...args) }/*defaultConfig*/; /** @@ -301,7 +296,7 @@ globalThis.sqlite3Worker1Promiser.defaultConfig = { after calling the original function and will reject if that function throws. */ -globalThis.sqlite3Worker1Promiser.v2 = function callee(config = callee.defaultConfig){ +sqlite3Worker1Promiser.v2 = function(config){ let oldFunc; if( 'function' == typeof config ){ oldFunc = config; @@ -331,25 +326,21 @@ globalThis.sqlite3Worker1Promiser.v2 = function callee(config = callee.defaultCo } return p; }.bind({ - /* We do this because clients are recommended to delete - globalThis.sqlite3Worker1Promiser. */ + /* We do this because clients are + recommended to delete globalThis.sqlite3Worker1Promiser. */ original: sqlite3Worker1Promiser }); -globalThis.sqlite3Worker1Promiser.v2.defaultConfig = - globalThis.sqlite3Worker1Promiser.defaultConfig; - -//#if target:es6-module +//#if target=es6-module /** When built as a module, we export sqlite3Worker1Promiser.v2() instead of sqlite3Worker1Promise() because (A) its interface is more - conventional for ESM usage and (B) the ESM export option for this - API did not exist until v2 was created, so there's no backwards + conventional for ESM usage and (B) the ESM option export option for + this API did not exist until v2 was created, so there's no backwards incompatibility. */ export default sqlite3Worker1Promiser.v2; -delete globalThis.sqlite3Worker1Promiser; -//#endif /* target:es6-module */ +//#endif /* target=es6-module */ //#else /* Built with the omit-oo1 flag. */ -//#endif if not omit-oo1 +//#endif ifnot omit-oo1 diff --git a/ext/wasm/demo-worker1-promiser.c-pp.html b/ext/wasm/demo-worker1-promiser.c-pp.html index a1005beb93..e0b487bdf3 100644 --- a/ext/wasm/demo-worker1-promiser.c-pp.html +++ b/ext/wasm/demo-worker1-promiser.c-pp.html @@ -6,10 +6,10 @@ -//#if target:es6-module - Worker1-promiser (ESM) tests +//#if target=es6-module + worker-promise (via ESM) tests //#else - Worker1-promiser tests + worker-promise tests //#endif @@ -32,7 +32,7 @@
-//#if target:es6-module +//#if target=es6-module //#else diff --git a/ext/wasm/demo-worker1-promiser.c-pp.js b/ext/wasm/demo-worker1-promiser.c-pp.js index c129e21281..f6fc9568ae 100644 --- a/ext/wasm/demo-worker1-promiser.c-pp.js +++ b/ext/wasm/demo-worker1-promiser.c-pp.js @@ -13,7 +13,7 @@ Demonstration of the sqlite3 Worker API #1 Promiser: a Promise-based proxy for for the sqlite3 Worker #1 API. */ -//#if target:es6-module +//#if target=es6-module import {default as promiserFactory} from "./jswasm/sqlite3-worker1-promiser.mjs"; //#else "use strict"; @@ -40,7 +40,7 @@ delete globalThis.sqlite3Worker1Promiser; }; const promiserConfig = { -//#if not target:es6-module +//#ifnot target=es6-module /** The v1 interfaces uses an onready function. The v2 interface optionally accepts one but does not require it. If provided, it is called _before_ @@ -115,7 +115,6 @@ delete globalThis.sqlite3Worker1Promiser; "insert into t(a,b) values(1,2),(3,4),(5,6)" ].join(';'), resultRows: [], columnNames: [], - lastInsertRowId: true, countChanges: sqConfig.bigIntEnabled ? 64 : true }, function(ev){ ev = ev.result; @@ -123,9 +122,7 @@ delete globalThis.sqlite3Worker1Promiser; .assert(0===ev.columnNames.length) .assert(sqConfig.bigIntEnabled ? (3n===ev.changeCount) - : (3===ev.changeCount)) - .assert('bigint'===typeof ev.lastInsertRowId) - .assert(ev.lastInsertRowId>=3); + : (3===ev.changeCount)); }); await wtest('exec',{ diff --git a/main.mk b/main.mk index 8b5d2821cf..e7091e0b0d 100644 --- a/main.mk +++ b/main.mk @@ -779,6 +779,7 @@ TESTSRC = \ $(TOP)/ext/fts3/fts3_term.c \ $(TOP)/ext/fts3/fts3_test.c \ $(TOP)/ext/session/test_session.c \ + $(TOP)/ext/session/sqlite3changebatch.c \ $(TOP)/ext/recover/sqlite3recover.c \ $(TOP)/ext/recover/dbdata.c \ $(TOP)/ext/recover/test_recover.c \ @@ -794,6 +795,7 @@ TESTSRC += \ $(TOP)/ext/misc/amatch.c \ $(TOP)/ext/misc/appendvfs.c \ $(TOP)/ext/misc/basexx.c \ + $(TOP)/ext/misc/bgckpt.c \ $(TOP)/ext/misc/cksumvfs.c \ $(TOP)/ext/misc/closure.c \ $(TOP)/ext/misc/csv.c \ @@ -875,8 +877,8 @@ TESTSRC2 = \ $(TOP)/ext/fts3/fts3_expr.c \ $(TOP)/ext/fts3/fts3_tokenizer.c \ $(TOP)/ext/fts3/fts3_write.c \ - $(TOP)/ext/session/sqlite3session.c \ $(TOP)/ext/misc/stmt.c \ + $(TOP)/ext/session/sqlite3session.c \ fts5.c # Header files used by all library source files. diff --git a/manifest b/manifest index 6319be53e6..0708ded1e4 100644 --- a/manifest +++ b/manifest @@ -1,12 +1,12 @@ -C wasm:\sfilter\sthe\scustom\sModule.instantiateWasm()\sout\sof\snode\sbuilds,\sper\srequest\sfrom\sthe\snpm\sproject. -D 2026-01-12T15:43:18.126 +C Merge\sall\sthe\slatest\strunk\senhancements\sinto\sthe\sbedrock\sbranch. +D 2026-01-12T17:18:58.486 F .fossil-settings/binary-glob 61195414528fb3ea9693577e1980230d78a1f8b0a54c78cf1b9b24d0a409ed6a x F .fossil-settings/empty-dirs dbb81e8fc0401ac46a1491ab34a7f2c7c0452f2f06b54ebb845d024ca8283ef1 F .fossil-settings/ignore-glob 35175cdfcf539b2318cb04a9901442804be81cd677d8b889fcc9149c21f239ea F LICENSE.md 6bc480fc673fb4acbc4094e77edb326267dd460162d7723c7f30bee2d3d9e97d F Makefile.in 3ce07126d7e87c7464301482e161fdae6a51d0a2aa06b200b8f0000ef4d6163b F Makefile.linux-generic bd3e3cacd369821a6241d4ea1967395c962dfe3057e38cb0a435cee0e8b789d0 -F Makefile.msc 1fa179beafd6d438b8479146cae77fb1724dd35b330b09dbfebd8a2f0823c62a +F Makefile.msc 651f037e143d940b996b48fad0f773eb3bf5fab148af3e98aa2730c812c16e8b F README.md 3fa51fc7ababc32edd175ae8b2986c86d5ea120c1cb1e57c7f7849492d1405ec F VERSION 74672bfd4c7826c0fc6f84762488a707c52e7d2d94af42ccb0edcc6c74311c41 F art/icon-243x273.gif 9750b734f82fdb3dc43127753d5e6fbf3b62c9f4e136c2fbf573b2f57ea87af5 @@ -50,12 +50,13 @@ F autosetup/proj.tcl ce301197f364f7ce2acabbbd84b43d19e917ec73653157ca134a06f32d3 F autosetup/sqlite-config.tcl 7463d59c9c5e86ca286ea16fdab943058beb9346110049eca435154795890f71 F autosetup/system.tcl 51d4be76cd9a9074704b584e5c9cbba616202c8468cf9ba8a4f8294a7ab1dba9 F autosetup/teaish/README.txt b40071e6f8506500a2f7f71d5fc69e0bf87b9d7678dd9da1e5b4d0acbf40b1ca -F autosetup/teaish/core.tcl e014dd95900c7f9a34e8e0f460f47e94841059827bce8b4c49668b0c7ae3f1a0 +F autosetup/teaish/core.tcl 1ebbe849d8e716424a3ffe9384c7e8b352b3e1194d3d4a153b125cc5176b3715 F autosetup/teaish/feature.tcl 18194fb79a24d30e5bbdeab40999616f39278b53a27525349ded033af2fd73be -F autosetup/teaish/tester.tcl 1799514c2652db49561b3386c5242b94534d1663f2cfac861a955e071895fdd0 +F autosetup/teaish/tester.tcl 091745984473faea6985254b9986c6dfd0cce06f68bc515ba4afc1e6b3742fa8 F configure 9a00b21dfd13757bbfb8d89b30660a89ec1f8f3a79402b8f9f9b6fc475c3303a x F contrib/sqlitecon.tcl eb4c6578e08dd353263958da0dc620f8400b869a50d06e271ab0be85a51a08d3 F doc/F2FS.txt c1d4a0ae9711cfe0e1d8b019d154f1c29e0d3abfe820787ba1e9ed7691160fcd +F doc/begin_concurrent.md 4bee2c3990d1eb800f1ce3726a911292a8e4b889300b2ffd4b08d357370db299 F doc/compile-for-unix.md c8f05bf9ff8c588c501515eb11642540572203e53d0b5eb5bf60983acdd47643 F doc/compile-for-windows.md 36601c95fa4070eebfe757684271d17a7c4a586912ba706d0b5e7817e1df54ad F doc/json-enhancements.md e356fc834781f1f1aa22ee300027a270b2c960122468499bf347bb123ce1ea4f @@ -68,6 +69,7 @@ F doc/trusted-schema.md 33625008620e879c7bcfbbfa079587612c434fa094d338b08242288d F doc/vdbesort-memory.md 4da2639c14cd24a31e0af694b1a8dd37eaf277aff3867e9a8cc14046bc49df56 F doc/vfs-shm.txt 1a55f3f0e7b6745931b117ba5c9df3640d7a0536f532ef0052563100f4416f86 F doc/wal-lock.md 7db0cd61e2000b545b78ce89b0c2a9a8dd8d64c097839258ac10d7c5c4156ec1 +F doc/wal2.md a807405a05e19a4945c5905a9ffa0fe45b8560dd7572461192501f565c19cdb5 F ext/README.md 6eb1ac267d917767952ed0ef63f55de003b6a5da433ce1fa389e1a9532e73132 F ext/expert/README.md b321c2762bb93c18ea102d5a5f7753a4b8bac646cb392b3b437f633caf2020c3 F ext/expert/expert.c d548d603a4cc9e61f446cc179c120c6713511c413f82a4a32b1e1e69d3f086a4 @@ -151,6 +153,7 @@ F ext/fts5/test/fts5cat.test bf67dd335f964482ee658287521b81e2b88697b45eb7f73933e F ext/fts5/test/fts5circref.test 0918c69440a73fff429bc9797b07086fc74d018eb3abb1cf9738980390bb2713 F ext/fts5/test/fts5colset.test 544f4998cdbfe06a3123887fc0221612e8aa8192cdaff152872f1aadb10e6897 F ext/fts5/test/fts5columnsize.test 0af91d63985afdf663455d4b572b935238380140d74079eac362760866d3297b +F ext/fts5/test/fts5concurrent.test eaaba0037090138bc86d581ca95f3be7ef8eff60155b5b8fa3014acee4a595c2 F ext/fts5/test/fts5config.test 017daf10d2642496e97402baa0134de8b5b46b9c37e53c229cd9ab711d21522c F ext/fts5/test/fts5conflict.test bf6030a77dbb1bedfcc42e589ed7980846c995765d77460551e448b56d741244 F ext/fts5/test/fts5connect.test 08030168fc96fc278fa81f28654fb7e90566f33aff269c073e19b3ae9126b2f4 @@ -237,7 +240,7 @@ F ext/fts5/test/fts5rowid.test 8632829fec04996832a4cfb4f0bd89721ba65b7e398c17317 F ext/fts5/test/fts5savepoint.test 1447758d7900afe903cef08b4524c5331fb60c1126ae6fba7f4d8704268013c5 F ext/fts5/test/fts5secure.test a02f771742fb2b1b9bdcb4bf523bcf2d0aa1ff597831d40fe3e72aaa6d0ec40f F ext/fts5/test/fts5secure2.test 2e961d7eef939f294c56b5d895cac7f1c3a60b934ee2cfd5e5e620bdf1ba6bbc -F ext/fts5/test/fts5secure3.test 6d066828d225b0dbe5db818d4d6165df7bb70210e68a577e858e8762400d5a23 +F ext/fts5/test/fts5secure3.test e29f7e92af78a35e1c0f7461dd598f910036d588437db50242d5823576f3d6ee F ext/fts5/test/fts5secure4.test 0d10a80590c07891478700af7793b232962042677432b9846cf7fc8337b67c97 F ext/fts5/test/fts5secure5.test c07a68ced5951567ac116c22f2d2aafae497e47fe9fcb6a335c22f9c7a4f2c3a F ext/fts5/test/fts5secure6.test 74bf04733cc523bccca519bb03d3b4e2ed6f6e3db7c59bf6be82c88a0ac857fd @@ -361,6 +364,7 @@ F ext/misc/appendvfs.c 9642c7a194a2a25dca7ad3e36af24a0a46d7702168c4ad7e59c9f9b0e F ext/misc/base64.c 8dc0a08cee11722822858a62625f1b63e5d5f1adac1cf4492d5732b571e37aa0 F ext/misc/base85.c ff54cc676c6ec86231f75ecc86ea45416fcb69751dfb79690d5f5da5f7d39867 F ext/misc/basexx.c 89ad6b76558efbceb627afd5e2ef1d84b2e96d9aaf9b7ecb20e3d00b51be6fcf +F ext/misc/bgckpt.c 18cfc9c39ffab3299f730f86ae2991c8574c0bd9ec80efd2f89196798a7b7181 F ext/misc/blobio.c a867c4c4617f6ec223a307ebfe0eabb45e0992f74dd47722b96f3e631c0edb2a F ext/misc/btreeinfo.c 13bc9e9f1c13cde370d0e4a6a2683e9f1926a4cead7fb72c71871b11a06d78a1 F ext/misc/cksumvfs.c 9d7d0cf1a8893ac5d48922bfe9f3f217b4a61a6265f559263a02bb2001259913 @@ -465,7 +469,7 @@ F ext/rbu/rbuvacuum.test 542561741ff2b262e3694bc6012b44694ee62c545845319a06f3237 F ext/rbu/rbuvacuum2.test 1a9bd41f127be2826de2a65204df9118525a8af8d16e61e6bc63ba3ac0010a23 F ext/rbu/rbuvacuum3.test 3ce42695fdf21aaa3499e857d7d4253bc499ad759bcd6c9362042c13cd37d8de F ext/rbu/rbuvacuum4.test ffccd22f67e2d0b380d2889685742159dfe0d19a3880ca3d2d1d69eefaebb205 -F ext/rbu/sqlite3rbu.c 3fb2390575b261c365d3f6fea61ff15e74d5d89e373f2a2bfa4d80c24321e793 +F ext/rbu/sqlite3rbu.c 62e4f13c70e40d42370946c4ed545fffa9fc9d85cf0c5d084f4a0acc53ea31b8 F ext/rbu/sqlite3rbu.h e3a5bf21e09ca93ce4e8740e00d6a853e90a697968ec0ea98f40826938bdb68e F ext/rbu/test_rbu.c 8b6e64e486c28c41ef29f6f4ea6be7b3091958987812784904f5e903f6b56418 F ext/recover/dbdata.c 10d3c56968a9af6853722a47280805ad1564714d79ea45ac6f7da14bb57fd137 @@ -534,6 +538,8 @@ F ext/rtree/tkt3363.test 142ab96eded44a3615ec79fba98c7bde7d0f96de F ext/rtree/util/randomshape.tcl 54ee03d0d4a1c621806f7f44d5b78d2db8fac26e0e8687c36c4bd0203b27dbff F ext/rtree/viewrtree.tcl eea6224b3553599ae665b239bd827e182b466024 F ext/rtree/visual01.txt e9c2564083bcd30ec51b07f881bffbf0e12b50a3f6fced0c222c5c1d2f94ac66 +F ext/session/changebatch1.test 51694900ccbdf144c2e403f99358b7a3f137354e2ba8d1033ef88a53f1a494f2 +F ext/session/changebatchfault.test be49c793219bf387ad692a60856b921f0854ad6d F ext/session/changeset.c 06b585d977391d498746f002b2d5f9315d0d37888ce9551bd0cb30bfe9a4cf47 F ext/session/changesetfuzz.c 227076ab0ae4447d742c01ee88a564da6478bbf26b65108bf8fac9cd8b0b24aa F ext/session/changesetfuzz1.test 15b629004e58d5ffcc852e6842a603775bb64b1ce51254831f3d12b113b616cd @@ -552,7 +558,7 @@ F ext/session/sessionD.test 470ff917dc849e2eb78142ade63aaabd729d773833cff0ff01bc F ext/session/sessionE.test b2010949c9d7415306f64e3c2072ddabc4b8250c98478d3c0c4d064bce83111d F ext/session/sessionF.test d37ed800881e742c208df443537bf29aa49fd56eac520d0f0c6df3e6320f3401 F ext/session/sessionG.test 3efe388282d641b65485b5462e67851002cd91a282dc95b685d085eb8efdad0a -F ext/session/sessionH.test 71bbff6b1abb2c4ac62b84dee53273c37e0b21e5fde3aed80929403e091ef859 +F ext/session/sessionH.test 29a5441c3dc0a63fa596d745e64bc6c636e062ae04cd89bc84e32c7d98b1fa9b F ext/session/sessionI.test 11e7b6729fc942982a5104a40132f70a2e964d64d60dc5809b8206465af74822 F ext/session/session_common.tcl a31f537a929a695a852d241c9434f2847cadf329856401921139fbb03a5a7697 F ext/session/session_gen.test 942a0002df10da53c45b40b581cc3ed25e7ff42bda1e7ba497273dc2887aa8e6 @@ -577,9 +583,11 @@ F ext/session/sessionrowid.test 85187c2f1b38861a5844868126f69f9ec62223a03449a98a F ext/session/sessionsize.test 8fcf4685993c3dbaa46a24183940ab9f5aa9ed0d23e5fb63bfffbdb56134b795 F ext/session/sessionstat1.test 5e718d5888c0c49bbb33a7a4f816366db85f59f6a4f97544a806421b85dc2dec F ext/session/sessionwor.test 6fd9a2256442cebde5b2284936ae9e0d54bde692d0f5fd009ecef8511f4cf3fc -F ext/session/sqlite3session.c b3de195ce668cace9b324599bf6255a70290cbfb5451e826e946f3aee6e64c54 -F ext/session/sqlite3session.h 7404723606074fcb2afdc6b72c206072cdb2b7d8ba097ca1559174a80bc26f7a -F ext/session/test_session.c 8766b5973a6323934cb51248f621c3dc87ad2a98f023c3cc280d79e7d78d36fb +F ext/session/sqlite3changebatch.c d488b42d8fd49fb013a1e9c4535232680dabeb28ae8f9421b65ea0ccc3b430f7 +F ext/session/sqlite3changebatch.h e72016998c9a22d439ddfd547b69e1ebac810c24 +F ext/session/sqlite3session.c 5566dab489b5d252db61f96bc74ebcf5bf0a18135dc6ee11009dd1e80826e5e6 +F ext/session/sqlite3session.h 120bd92f96f78d3b79848feb8c539350a15da8e9803226750d337f9a943dd86b +F ext/session/test_session.c eea930cdd55dbf960a2f6b35d7050d118a44bbe9bca1e66c6793909f02783404 F ext/wasm/GNUmakefile c3d007dd181527283d8674c812cc60518353f1f69c9a9d3008f10f53cea4a3c1 F ext/wasm/README-dist.txt f01081a850ce38a56706af6b481e3a7878e24e42b314cfcd4b129f0f8427066a F ext/wasm/README.md 2e87804e12c98f1d194b7a06162a88441d33bb443efcfe00dc6565a780d2f259 @@ -607,7 +615,7 @@ F ext/wasm/api/sqlite3-vfs-opfs-sahpool.c-pp.js a2eea6442556867b589e04107796c6e1 F ext/wasm/api/sqlite3-vfs-opfs.c-pp.js 88ce2078267a2d1af57525a32d896295f4a8db7664de0e17e82dc9ff006ed8d3 F ext/wasm/api/sqlite3-vtab-helper.c-pp.js 366596d8ff73d4cefb938bbe95bc839d503c3fab6c8335ce4bf52f0d8a7dee81 F ext/wasm/api/sqlite3-wasm.c 6d69ca57b772a5a194b3acf879ef39b865f634c5bc92718c6ef56cb4cd7dc0b2 -F ext/wasm/api/sqlite3-worker1-promiser.c-pp.js aa9715f661fb700459a5a6cb1c32a4d6a770723b47aa9ac0e16c2cf87d622a66 +F ext/wasm/api/sqlite3-worker1-promiser.c-pp.js 46f303ba8ddd1b2f0a391798837beddfa72e8c897038c8047eda49ce7d5ed46b F ext/wasm/api/sqlite3-worker1.c-pp.js bd0655687090e3b1657268a6a9cacde1ea2a734079d194e16dbbed9083e51b38 F ext/wasm/c-pp-lite.c f38254fba42561728c2e4764a7ba8d68700091e7c2f4418112868c0daba16783 F ext/wasm/common/SqliteTestUtil.js dae753b95e72248c4395d8de8359e0d055cd9928488e8dd84aef89e46d23b32e @@ -620,8 +628,8 @@ F ext/wasm/demo-123.html 8c70a412ce386bd3796534257935eb1e3ea5c581e5d5aea0490b823 F ext/wasm/demo-123.js c7b3cca50c55841c381a9ca4f9396e5bbdc6114273d0b10a43e378e32e7be5bf F ext/wasm/demo-jsstorage.html 409c4be4af5f207fb2877160724b91b33ea36a3cd8c204e8da1acb828ffe588e F ext/wasm/demo-jsstorage.js 467cb4126ff679ebcdb112d100d073af26b9808d0a0b52d66a40e28f59c5099b -F ext/wasm/demo-worker1-promiser.c-pp.html f73b0b98457e7fdad40d8353cb9b2919391da180f49549a86f3d58b4e5a010eb -F ext/wasm/demo-worker1-promiser.c-pp.js f40ec65810048e368896be71461028bd10de01e24277208c59266edf23bb9f52 +F ext/wasm/demo-worker1-promiser.c-pp.html 635cf90685805e21772a5f7a35d1ace80f98a9ef7c42ff04d7a125ddca7e5db8 +F ext/wasm/demo-worker1-promiser.c-pp.js fcc628cb42fcfaf07d250477801de1e6deb1e319d003976612a0db8d76b9fccc F ext/wasm/demo-worker1.html 2c178c1890a2beb5a5fecb1453e796d067a4b8d3d2a04d65ca2eb1ab2c68ef5d F ext/wasm/demo-worker1.js fdfa90aa9d6b402bfed802cf1595fe4da6cc834ac38c8ff854bf1ee01f5ff9bb F ext/wasm/example_extra_init.c 2347cd69d19d839ef4e5e77b7855103a7fe3ef2af86f2e8c95839afd8b05862f @@ -659,7 +667,7 @@ F ext/wasm/tests/opfs/sahpool/index.html be736567fd92d3ecb9754c145755037cbbd2bca F ext/wasm/tests/opfs/sahpool/sahpool-pausing.js f264925cfc82155de38cecb3d204c36e0f6991460fff0cb7c15079454679a4e2 F ext/wasm/tests/opfs/sahpool/sahpool-worker.js bd25a43fc2ab2d1bafd8f2854ad3943ef673f7c3be03e95ecf1612ff6e8e2a61 F magic.txt 5ade0bc977aa135e79e3faaea894d5671b26107cc91e70783aa7dc83f22f3ba0 -F main.mk 9393d5982db60f26e72c5af24a8c11cf39374ff5e695fadb5a4e7376f28150c6 +F main.mk 3e58a98a5ac521d949d076be7b009464609e79e5de898dc95f893388b1ae2ffa F make.bat a136fd0b1c93e89854a86d5f4edcf0386d211e5d5ec2434480f6eea436c7420c F mptest/config01.test 3c6adcbc50b991866855f1977ff172eb6d901271 F mptest/config02.test 4415dfe36c48785f751e16e32c20b077c28ae504 @@ -671,17 +679,17 @@ F sqlite.pc.in 42b7bf0d02e08b9e77734a47798d1a55a9e0716b F sqlite3.1 1b9c24374a85dfc7eb8fa7c4266ee0db4f9609cceecfc5481cd8307e5af04366 F sqlite3.pc.in e6dee284fba59ef500092fdc1843df3be8433323a3733c91da96690a50a5b398 F src/alter.c fc36b19273ffe364aeb4d00ba04bda8798ad7a67fec7a035ee8ee56272e1bdbe -F src/analyze.c 03bcfc083fc0cccaa9ded93604e1d4244ea245c17285d463ef6a60425fcb247d +F src/analyze.c ebfe8c9c55e44c5ca2bb885f84b133cde7a37e99ea5e3bf1c2782fb7e34b0882 F src/attach.c 7cf07d4fa42b9fc8662237c60c40b730326c30aa90ae5fffc0b18b2d726ebf61 F src/auth.c 54ab9c6c5803b47c0d45b76ce27eff22a03b4b1f767c5945a3a4eb13aa4c78dc F src/backup.c 5c97e8023aab1ce14a42387eb3ae00ba5a0644569e3476f38661fa6f824c3523 -F src/bitvec.c e242d4496774dfc88fa278177dd23b607dce369ccafb3f61b41638eea2c9b399 +F src/bitvec.c 1a048c91b2fbccbbc32dcefa757f93216c0f518ed2bc0b4db39947264479ad06 F src/btmutex.c 30dada73a819a1ef5b7583786370dce1842e12e1ad941e4d05ac29695528daea -F src/btree.c 8850125300b9780fa54bc45a41af88eb2796b90f2f97942279094beef9b0e971 -F src/btree.h e823c46d87f63d904d735a24b76146d19f51f04445ea561f71cc3382fd1307f0 -F src/btreeInt.h 9c0f9ea5c9b5f4dcaea18111d43efe95f2ac276cd86d770dce10fd99ccc93886 -F src/build.c 4e1afafc56504ed6253e1b115c1502de4243c2287a0c799f4967fcd2d7716ad9 -F src/callback.c 3605bbf02bd7ed46c79cd48346db4a32fc51d67624400539c0532f4eead804ad +F src/btree.c 0c0710584f65ffbc73e478dfb84babec1ffe7cf17682489c26c026c0f7404b6f +F src/btree.h dcda10d5a5d29690dc2739b8d4d416d136760ab1dbe88a2dd417855e1775e37a +F src/btreeInt.h c3b8750a8dd8bc9a38840be463e7d23899b9da08276fa442e2bb8efd1052bc48 +F src/build.c 323ce9b3de09f4578f7f4f725239ac15b3182ccb7ce3dcb79293730148365569 +F src/callback.c 6987b99f738d9f68fe95ff482cdc10a9bf3df2051319d3d418b7cd22e725825e F src/carray.c ff6081a31878fc34df8fa1052a9cbf17ddc22652544dcb3e2326886ed1053b55 F src/complete.c a3634ab1e687055cd002e11b8f43eb75c17da23e F src/date.c e19e0cfff9a41bfdd884c655755f6f00bca4c1a22272b56e0dd6667b7ea893a2 @@ -691,7 +699,7 @@ F src/delete.c e020dde34838369e2f0eff75f25c44a4e56a41262593f7c48d1223689d674e4d F src/expr.c 252e62742f5bb01517377c93057b6040ab954034ec3dde4d6fc583565d859a9c F src/fault.c 460f3e55994363812d9d60844b2a6de88826e007 F src/fkey.c c065da737307a29e4d240ac727758dbf4102cb3218a1f651eb689b6a6fa12531 -F src/func.c 0b802107498048d3dcac0b757720bcb8506507ce02159e213ab8161458eb293b +F src/func.c 13ab81ae9e23657de777837b1a909a6913d28283615132be88144fc8c1830bbb F src/global.c a19e4b1ca1335f560e9560e590fc13081e21f670643367f99cb9e8f9dc7d615b F src/hash.c 03c8c0f4be9e8bcb6de65aa26d34a61d48a9430747084a69f9469fbb00ea52ca F src/hash.h 46b92795a95bfefb210f52f0c316e9d7cdbcdd7e7fcfb0d8be796d3a5767cddf @@ -701,7 +709,7 @@ F src/insert.c dfd311b0ac2d4f6359e62013db67799757f4d2cc56cca5c10f4888acfbbfa3fd F src/json.c fb031340edee159c07ad37dbe668ffe945ed86f525b0eb3822e4a67cbc498a72 F src/legacy.c d7874bc885906868cd51e6c2156698f2754f02d9eee1bae2d687323c3ca8e5aa F src/loadext.c d6559d2b39c9bde6b104b83adeafbe5db3a514aae4d3d40afc58de522a03043b -F src/main.c 21fb86045bbf6b6329251a0ce6771735b6c71287cc9fcda1f2005d4ac5f25b52 +F src/main.c ce34cbd2bd5beccdf98aa127179bc6ecea3716d86f9f6b00724aa2034fdae5c5 F src/malloc.c 410e570b30c26cc36e3372577df50f7a96ee3eed5b2b161c6b6b48773c650c5e F src/mem0.c 6a55ebe57c46ca1a7d98da93aaa07f99f1059645 F src/mem1.c 3bb59158c38e05f6270e761a9f435bf19827a264c13d1631c58b84bdc96d73b2 @@ -722,33 +730,33 @@ F src/os.h 1ff5ae51d339d0e30d8a9d814f4b8f8e448169304d83a7ed9db66a65732f3e63 F src/os_common.h 6c0eb8dd40ef3e12fe585a13e709710267a258e2c8dd1c40b1948a1d14582e06 F src/os_kv.c e7d96727db5b67e39d590a68cc61c86daf4c093c36c011a09ebfb521182ec28d F src/os_setup.h 8efc64eda6a6c2f221387eefc2e7e45fd5a3d5c8337a7a83519ba4fbd2957ae2 -F src/os_unix.c dcf7988ddbdd68619b821c9a722f9377abb46f1d26c9279eb5a50402fd43d749 +F src/os_unix.c 54ab9a0a780e73c740fd01cdd13b2cba192d7b2b50d5c6820ee661e02e6f488e F src/os_win.c 0d553b6e8b92c8eb85e7f1b4a8036fe8638c8b32c9ad8d9d72a861c10f81b4c5 F src/os_win.h 5e168adf482484327195d10f9c3bce3520f598e04e07ffe62c9c5a8067c1037b -F src/pager.c a81461de271ac4886ad75b7ca2cca8157a48635820c4646cd2714acdc2c17e5f -F src/pager.h 6137149346e6c8a3ddc1eeb40aee46381e9bc8b0fcc6dda8a1efde993c2275b8 -F src/parse.y 7c2184b5665c671258c4e96a10bbc9dbf7e1ede462ebc4e614249de0d54c8a26 +F src/pager.c 93e9cf4fe32606a29ed446ef5577eaed7e7e7802b01773d96312742bbfce45ed +F src/pager.h dd6ade22dd303a8ca6c34f1ff0f299add7191c1bff65f0289b7fd7c3460f9551 +F src/parse.y aa52bf080906d3214e01343a67feabbc330f9c7e7f1db6126a3b722cbf58c01d F src/pcache.c 588cc3c5ccaaadde689ed35ce5c5c891a1f7b1f4d1f56f6cf0143b74d8ee6484 F src/pcache.h 1497ce1b823cf00094bb0cf3bac37b345937e6f910890c626b16512316d3abf5 -F src/pcache1.c 131ca0daf4e66b4608d2945ae76d6ed90de3f60539afbd5ef9ec65667a5f2fcd -F src/pragma.c ecec75795c1821520266e4f93fa8840cce48979af532db06f085e36a7813860f +F src/pcache1.c 69b3a3e00e56dd03749af545084f4c823690400e8b12fcefba948f8fd975b84a +F src/pragma.c ab340f5e412cb46da58d159fd57a70d6696578b5b39664fb44064279b4d20a33 F src/prepare.c f6a6e28a281bd1d1da12f47d370a81af46159b40f73bf7fa0b276b664f9c8b7d F src/printf.c b1b29b5e58e1530d5daeee5963d3c318d8ab2d7e38437580e28755753e0c1ded -F src/random.c 606b00941a1d7dd09c381d3279a058d771f406c5213c9932bbd93d5587be4b9c +F src/random.c 9bd018738ec450bf35d28050b4b33fa9a6eebf3aaefb1a1cff42dc14a7725673 F src/resolve.c 47aa7fdc9ec4c19b103ac5e79d7887d30119b5675309facf5eed1118391c868b F src/rowset.c 8432130e6c344b3401a8874c3cb49fefe6873fec593294de077afea2dce5ec97 -F src/select.c 4d45a04431db072040d6625ee21c1dc483c9b2b64a5ab419f4a4e05aabed1204 +F src/select.c 25073e43299e869b003af054bbb38c26b8de89fe1dccc0fe506b8bdf9692ae44 F src/shell.c.in 3bc4c6aa962bdf950dc144c2138eb4bc734bf7e305f2ac42dbda5a83c4a4c660 -F src/sqlite.h.in 476f3efeb5dd26ad94dcbce262ca7eb9d042d797a92d624059c67ef37d5b3ab4 +F src/sqlite.h.in 91b7a209629dd57f534585fa08752bf154b53b63ee596c8e41275b8c636bb300 F src/sqlite3.rc 015537e6ac1eec6c7050e17b616c2ffe6f70fca241835a84a4f0d5937383c479 F src/sqlite3ext.h f590cd8cb4c36fc727632c9b5fbbafc85f7efe2c6890f9958d7e711dc26ec01e -F src/sqliteInt.h af67bc95fa6b66cd3c7f3d18d2d040ad386e4cbb02965ee318cc721ee9d5fa45 +F src/sqliteInt.h fc560d28bf9e287cece657914685b8d82679268a6a92b20b6a013300b7526b81 F src/sqliteLimit.h 7e705474d59912388832cc5465edbc0dbb552872e23452812846e90d280987f3 F src/status.c 7565d63a79aa2f326339a24a0461a60096d0bd2bce711fefb50b5c89335f3592 F src/table.c 0f141b58a16de7e2fbe81c308379e7279f4c6b50eb08efeec5892794a0ba30d1 F src/tclsqlite.c 85b5a20df96016e5d1d8fdc68c8a4c279c5b93e2049b77cd806c2cc50b9d8c56 F src/tclsqlite.h 614b3780a62522bc9f8f2b9fb22689e8009958e7aa77e572d0f3149050af348a -F src/test1.c 2986148389a86555db7e6b731767154f165a2c7bc690131f90596fa6263fe79d +F src/test1.c c77a5c418701277d015a4f3ce5d046eda21f0f9b8e2098f4a3b32bd82eea4f4f F src/test2.c 62f0830958f9075692c29c6de51b495ae8969e1bef85f239ffcd9ba5fb44a5ff F src/test3.c 432646f581d8af1bb495e58fc98234380250954f5d5535e507fc785eccc3987a F src/test4.c 0ac87fc13cdb334ab3a71823f99b6c32a6bebe5d603cd6a71d84c823d43a25a0 @@ -761,7 +769,7 @@ F src/test_backup.c a2bfd90d2ff2511b8635507bdb30fa9b605ade19c16b533066cae3077f5b F src/test_bestindex.c d75fad21369d80910238032bcf8d9ca1f2bffda13c1ceec63bfbb7f704448b15 F src/test_blob.c 77b994e17f2c87055f44fd96c9a206c5a7155bae2cda2769af60c2f3582f962c F src/test_btree.c 28283787d32b8fa953eb77412ad0de2c9895260e4e5bd5a94b3c7411664f90d5 -F src/test_config.c 9b6dac5bc4ab56c9c6289ca9a6a73a156e178e58bcfdac19a69f6e17e28ad5ac +F src/test_config.c 0cac3f68f571bcd26b9618883e2794006dfccd9067fb81adf977743a48588af4 F src/test_delete.c d0e8f6dc55cfc98a7c27c057fb88d512260564bf0b611482656c68b8f7f401ed F src/test_demovfs.c 3efa2adf4f21e10d95521721687d5ca047aea91fa62dd8cc22ac9e5a9c942383 F src/test_devsym.c 649434ed34d0b03fbd5a6b42df80f0f9a7e53f94dd1710aad5dd8831e91c4e86 @@ -786,9 +794,9 @@ F src/test_quota.h 2a8ad1952d1d2ca9af0ce0465e56e6c023b5e15d F src/test_rtree.c d844d746a3cc027247318b970025a927f14772339c991f40e7911583ea5ed0d9 F src/test_schema.c b06d3ddc3edc173c143878f3edb869dd200d57d918ae2f38820534f9a5e3d7d9 F src/test_sqllog.c 5abf04865758c0a3915b4ec2b2ee5ab75f74c00e2f05bf503b9083e0ab6829d7 -F src/test_superlock.c 3387fc794a68d8c6b6ed059aabacbfe870dc502c5cf65562f36aac78b4a4d629 +F src/test_superlock.c d511389fdd930690779711206ac77f15ce65b5767cb626dfbf80d149e96dc3b2 F src/test_syscall.c c5bf039261973135068aa68f4d185a6147333dcf266977989f8245b3a1968f1b -F src/test_tclsh.c c01706ac60bd3176754d3ccd37da74c6ad97c2e14489f8ed71b497c1c0ac0dd4 +F src/test_tclsh.c 179e8c9fc722240275d5d9c5d9ff0d8e652c7614f601b8699c41d9c703f7ec38 F src/test_tclvar.c ae873248a0188459b1c16ca7cc431265dacce524399e8b46725c2b3b7e048424 F src/test_thread.c 3edb4a5b5aeb1a6e9a275dccc848ac95acab7f496b3e9230f6d2d04953a2b862 F src/test_vdbecov.c 5c426d9cd2b351f5f9ceb30cabf8c64a63bfcad644c507e0bd9ce2f6ae1a3bf3 @@ -799,16 +807,16 @@ F src/threads.c 4ae07fa022a3dc7c5beb373cf744a85d3c5c6c3c F src/tokenize.c f297bbf02037639e7a93b37d9c6e4415b3de1273395ee8fa8183e741e1e7fb72 F src/treeview.c 3ce7ac9835d2d70cc1c868b01b747ae8a062322e155701e58e3d62ca79aada7a F src/trigger.c cb894db98083c68c4a17c72566ed39434aa91286db20d2891279ee49e6bfec0c -F src/update.c 3e5e7ff66fa19ebe4d1b113d480639a24cc1175adbefabbd1a948a07f28e37cf +F src/update.c e1a8b1876e6f2a1824a6b9f1999a03d5dd50ce250a1bb86ea133e5b4b359488b F src/upsert.c 215328c3f91623c520ec8672c44323553f12caeb4f01b1090ebdca99fdf7b4f1 F src/utf.c 7267c3fb9e2467020507601af3354c2446c61f444387e094c779dccd5ca62165 F src/util.c 36fb1150062957280777655976f3f9a75db236cb8207a0770ceae8d5ec17fcd3 -F src/vacuum.c d3d35d8ae893d419ade5fa196d761a83bddcbb62137a1a157ae751ef38b26e82 -F src/vdbe.c b44c366e83412d3b8c190feb1f029b7d02e1bd69252a57b32f195107f0d03964 +F src/vacuum.c 09d61b53904979f2c6acb740121fd0d1b5371f962baaaa2dbbe449b5535ff581 +F src/vdbe.c 427dd374b2f545f317faaff61a1f37f744fe8ca8e32f946841c3a29fefafb932 F src/vdbe.h 966d0677a540b7ea6549b7c4e1312fc0d830fce3a235a58c801f2cc31cf5ecf9 F src/vdbeInt.h 2aaeb6df2938b181b4700a9328688a3986f2bba71e8b96f6a80671316618fa49 F src/vdbeapi.c 6a2181cfd27c86b4cc1d8abb27ae11f3b3f0357567814fa276ec37b043542938 -F src/vdbeaux.c 908d8a191aed444b2e4c920159249127f3ff67b94c56a16fad1dfdf9c7488f20 +F src/vdbeaux.c cc8d2a621f6be605cc3f8f147f30556d611731f937771176d02dbf70e9a188ff F src/vdbeblob.c b3f0640db9642fbdc88bd6ebcc83d6009514cafc98f062f675f2c8d505d82692 F src/vdbemem.c 48e562ff27e6386eb8613207ac27d3d98c1f67fdc4775a1ab13759d2c2a1c021 F src/vdbesort.c b69220f4ea9ffea5fdef34d968c60305444eea909252a81933b54c296d9cca70 @@ -816,8 +824,8 @@ F src/vdbetrace.c 49e689f751505839742f4a243a1a566e57d5c9eaf0d33bbaa26e2de3febf7b F src/vdbevtab.c fc46b9cbd759dc013f0b3724549cc0d71379183c667df3a5988f7e2f1bd485f3 F src/vtab.c 5437ce986db2f70e639ce8a3fe68dcdfe64b0f1abb14eaebecdabd5e0766cc68 F src/vxworks.h 9d18819c5235b49c2340a8a4d48195ec5d5afb637b152406de95a9436beeaeab -F src/wal.c 505a98fbc599a971d92cb90371cf54546c404cd61e04fd093e7b0c8ff978f9b6 -F src/wal.h ba252daaa94f889f4b2c17c027e823d9be47ce39da1d3799886bbd51f0490452 +F src/wal.c 8f0f8aed7bafc4f0fa9b0e1e5d70d4f12022b46509936b3f671f41246e6ad728 +F src/wal.h 8d02ab8c2a93a941f5898eb3345bf711c1d3f8f86f4be8d5428fb6c074962d8a F src/walker.c d5006d6b005e4ea7302ad390957a8d41ed83faa177e412f89bc5600a7462a014 F src/where.c 0079b6ba463ae806b99b20cb335729dcce5f3e496b81cccf6441dc11f8c5bf92 F src/whereInt.h 8d94cb116c9e06205c3d5ac87af065fc044f8cf08bfdccd94b6ea1c1308e65da @@ -905,6 +913,7 @@ F test/badutf.test d5360fc31f643d37a973ab0d8b4fb85799c3169f F test/badutf2.test f310fd3b24a491b6b77bccdf14923b85d6ebcce751068c180d93a6b8ff854399 F test/basexx1.test 4ae6ddbd92a7ebcabb5d844664c3e755d29fb69c8ddcf0c8d59bbe4e07c23919 F test/bc_common.tcl c70b896d1d4ce72f769d2c7c1fc15b2cb07559eb2093f2736c8ca51664b29ff5 +F test/bc_test1.c e0a092579552e066ed4ce7bcdaecfa69c4aacc8d F test/bestindex1.test 856a453dff8c68b4568601eed5a8b5e20b4763af9229f3947c215729ed878db0 F test/bestindex2.test 394ff8fbf34703391247116d6a44e1c50ee7282236ee77909044573cefc37bc0 F test/bestindex3.test 34bea272b0e0f835651b16a3931dbe7ac927039be6b2e1cb617bbe1d584b492b @@ -982,6 +991,18 @@ F test/collateB.test 9c840e21f9aead6fc533cea310f0bd202d5c11511088811b7e93ae7b47f F test/colmeta.test 248a644cec4c7c371cf1e107fd8fdba708dc290866c572672b6260e3466cce79 F test/colname.test 0e32125de701c6bd86247194c6f5639b740b4f71a0d88ee1e67ff3bda9ae99ca F test/columncount.test 6fe99c2f35738b0129357a1cf3fa483f76140f4cd8a89014c88c33c876d2638f +F test/commitstatus.test d5a871506ce5944a29afb7e65ce47ca7f76cadc1d09775022830258fdd6168a1 +F test/concfault.test 500f17c3fcfe7705114422bcc6ddd3c740001a43 +F test/concfault2.test 34b3fd258836aa305475d00e804c7450ade92f0de0bf9fa620e701446669bb12 +F test/concurrent.test fb624ddac9b008f347685911f90b6b5a022fd0a3f884c0ffef8056bc440e5d76 +F test/concurrent2.test 847cd11edc82229a95e1eaf88b55c974430b0f669cefd67a453d274e3480981c +F test/concurrent3.test 82923fc2ea7321144b4448f98ea38aa316ddceef9020a392c5f6dea536506434 +F test/concurrent4.test e0b12cd467137e50259df3b4f837507e82aaa07c35941c88664dc8ed1d089c44 +F test/concurrent5.test 5031c87134fee85352ac33ad33c81c6ec4f07d5547fe2429e1d38492a797f6bc +F test/concurrent6.test a7860e9ca13bb5fb76bcf41c5524fbfa9c37e6e258ecf84ffb5748a272488c67 +F test/concurrent7.test b96fa5c4cfdf8d5c0bc66b6934214500bad0260884a736f054ccc76e81aae85d +F test/concurrent8.test b93937e74a8efb8b84f2fea7595b53418c5f29777bbe9cbdb5dc219b3dd72a7d +F test/concurrent9.test 4b59e327c524d09c992f94b5cc7202cfed124ecbb85823c30308b5e1c7e16dca F test/conflict.test 3307ffdf988e04b01c4e942d8aa369a977f085bf629f43a627c9a77f39d65926 F test/conflict2.test 5557909ce683b1073982f5d1b61dfb1d41e369533bfdaf003180c5bc87282dd1 F test/conflict3.test 81865d9599609aca394fb3b9cd5f561d4729ea5b176bece3644f6ecb540f88ac @@ -995,7 +1016,7 @@ F test/corrupt6.test fc6a891716139665dae0073b6945e3670bf92568 F test/corrupt7.test ffa86896fe63a3d00b0a131e1e64f402e4da9f7e5d89609d6501c851e511d73a F test/corrupt8.test 2399dfe40d2c0c63af86706e30f3e6302a8d0516 F test/corrupt9.test 730a3db08d4ab9aa43392ea30d9c2b4879cbff85 -F test/corruptA.test 112f4b2ae0b95ebf3ea63718642fb969a93acea557ace3a307234d19c245989b +F test/corruptA.test 56e8f321adaf3411960e9d2c7136669d8e1a91cbde6cf401ea84e6d6c7ccbe10 F test/corruptB.test 73a8d6c0b9833697ecf16b63e3c5c05c945b5dec F test/corruptC.test 7d6d9e907334ea3ccb7111a0656cafa30a28f8a5f2aaf1c45ad712236302856a F test/corruptD.test 614320aa519f6bf6c7dd2f581f9513ff7b6826954180cca1a606d0e25ea084a3 @@ -1008,7 +1029,7 @@ F test/corruptJ.test 4d5ccc4bf959464229a836d60142831ef76a5aa4 F test/corruptK.test ac13504593d89d69690d45479547616ed12644d42b5cb7eeb2e759a76fc23dcb F test/corruptL.test 652fc8ac0763a6fd3eb28b951d481924167b2d9936083bcc68253b2274a0c8fe F test/corruptM.test 7d574320e08c1b36caa3e47262061f186367d593a7e305d35f15289cc2c3e067 -F test/corruptN.test a034bb217bebd8d007625dfb078e76ec3d53515052dbceb68bd47b2c27674d5c +F test/corruptN.test eddd3b01283d63442c65f4921f5b6864531832ede030f8af52374220ed28c607 F test/cost.test cc434a026b1e9d0d98137a147e24e5daf1b1ad09e9ff7da63b34c83ddd136d92 F test/count.test cd4bd531066e8d77ef8fe1e3fc8253d042072e117ccab214b290cf83f1602249 F test/countofview.test 4088e461a10ee33e69803c177a69aa1d7bba81a9ffc2df66d76465a22ca7fdfc @@ -1159,7 +1180,7 @@ F test/fts3conf.test c9cd45433b6787d48a43e84949aa2eb8b3b3d242bac7276731c1476290d F test/fts3corrupt.test 6732477c5ace050c5758a40a8b5706c8c0cccd416b9c558e0e15224805a40e57 F test/fts3corrupt2.test e318f0676e5e78d5a4b702637e2bb25265954c08a1b1e4aaf93c7880bb0c67d0 F test/fts3corrupt3.test 0d5b69a0998b4adf868cc301fc78f3d0707745f1d984ce044c205cdb764b491f -F test/fts3corrupt4.test c7f414fe29b97a478d15c90382c4ae077a2bbd2283bf8c63bf66dadaaed3edb8 +F test/fts3corrupt4.test bf9eabf113474f0ae73c9e001aeb3fcf2150b260f2309f300379cbd46b45bdfb F test/fts3corrupt5.test 0549f85ec4bd22e992f645f13c59b99d652f2f5e643dac75568bfd23a6db7ed5 F test/fts3corrupt6.test f417c910254f32c0bc9ead7affa991a1d5aec35b3b32a183ffb05eea78289525 F test/fts3corrupt7.test 1da31776e24bb91d3c028e663456b61280b121a74496ccf2fef3fe33790ad2b0 @@ -1445,7 +1466,7 @@ F test/mmap4.test 2e2b4e32555b58da15176e6fe750f17c9dcf7f93 F test/mmapcorrupt.test 470fb44fe92e99c1d23701d156f8c17865f5b027063c9119dcfdb842791f4465 F test/mmapfault.test d4c9eff9cd8c2dc14bc43e71e042f175b0a26fe3 F test/mmapwarm.test 2272005969cd17a910077bd5082f70bc1fefad9a875afec7fc9af483898ecaf3 -F test/modeA.sql 3f2b5a7ce7074a52b2b7ec07b07dc1a08edba19e40bce9b4d65d3965413bbea3 +F test/modeA.sql 3f2b5a7ce7074a52b2b7ec07b07dc1a08edba19e40bce9b4d65d3965413bbea3 w test/modeA.clitest F test/multiplex.test d74c034e52805f6de8cc5432cef8c9eb774bb64ec29b83a22effc8ca4dac1f08 F test/multiplex2.test 580ca5817c7edbe4cc68fa150609c9473393003a F test/multiplex3.test fac575e0b1b852025575a6a8357701d80933e98b5d2fe6d35ddaa68f92f6a1f7 @@ -1495,12 +1516,11 @@ F test/pagerfault2.test caf4c7facb914fd3b03a17b31ae2b180c8d6ca1f F test/pagerfault3.test 1003fcda009bf48a8e22a516e193b6ef0dd1bbd8 F test/pageropt.test 84e4cc5cbca285357f7906e99b21be4f2bf5abc0 F test/pagesize.test 5769fc62d8c890a83a503f67d47508dfdc543305 -F test/parser1.test 131f4733472252d53d8ed681115257866f55740ab697fa05900d766049348f27 F test/pcache.test c8acbedd3b6fd0f9a7ca887a83b11d24a007972b F test/pcache2.test af7f3deb1a819f77a6d0d81534e97d1cf62cd442 F test/pendingrace.test e99efc5ab3584da3dfc8cd6a0ec4e5a42214820574f5ea24ee93f1d84655f463 F test/percentile.test eaee1ff3e35d5fe933ac98d927c9a7d2f4f1a1a19ee22e7d45e97a6d9ee32077 -F test/permutations.test e6de4f5777f7785737ac3d1d964b8656e5477a134665b2fe8a91884ab9b685b3 +F test/permutations.test 965b59d667eac286a6dc74c7cc93943683834708bb6870028b6f12af8faec68f F test/pg_common.tcl 3b27542224db1e713ae387459b5d117c836a5f6e328846922993b6d2b7640d9f F test/pragma.test 7d07b7bb76e273215d6a20c4f83c3062cc28976c737ccb70a686025801e86c8f F test/pragma2.test e5d5c176360c321344249354c0c16aec46214c9f @@ -1531,7 +1551,7 @@ F test/quote.test 7b01b2a261bc26d9821aea9f4941ce1e08191d62fc55ba8862440fb3a59197 F test/randexpr1.tcl 40dec52119ed3a2b8b2a773bce24b63a3a746459 F test/randexpr1.test eda062a97e60f9c38ae8d806b03b0ddf23d796df F test/rbu.test 168573d353cd0fd10196b87b0caa322c144ef736 -F test/rdonly.test 64e2696c322e3538df0b1ed624e21f9a23ed9ff8 +F test/rdonly.test 21e99ee237265d0cf95a0c84b50c784e834acaa4ef05d92a27b262626a656682 F test/readonly.test 0d307c335b3421898cfe64a783a376138aa003849b6bff61ee2d21e805bc0051 F test/recover.test 643139b911ac880a1e881d7621f02cfb546b608b8f2494d7d26fd5ed103b1ceb F test/regexp1.sql de2b5b33b16b664d655b41e780f2efca38de3e5559fc254b4c9783ff0bea96b0 @@ -1564,7 +1584,7 @@ F test/rowvaluefault.test 963ae9cdaed30a85a29668dd514e639f3556cae903ee9f172ea972 F test/rowvaluevtab.test cd9747bb3f308086944c07968f547ad6b05022e698d80b9ffbdfe09ce0b8da6f F test/rtree.test 0c8d9dd458d6824e59683c19ab2ffa9ef946f798 F test/run-wordcount.sh 891e89c4c2d16e629cd45951d4ed899ad12afc09 -F test/savepoint.test 6e9804a17767f08432c7a5e738b9a8f4b891d243110b63d3a41d270d3d1378ec +F test/savepoint.test 63a120ec4fbbd5025b238c259d12ed0516fbf4bca6384041cb995ade9a5f00d2 F test/savepoint2.test 9b8543940572a2f01a18298c3135ad0c9f4f67d7 F test/savepoint4.test c8f8159ade6d2acd9128be61e1230f1c1edc6cc0 F test/savepoint5.test 0735db177e0ebbaedc39812c8d065075d563c4fd @@ -1684,7 +1704,7 @@ F test/subquery2.test ab96ff3fa9c4e3dce0d699f74e61c50250ed4335bc8f400e127707d552 F test/subselect.test 0966aa8e720224dbd6a5e769a3ec2a723e332303 F test/substr.test a673e3763e247e9b5e497a6cacbaf3da2bd8ec8921c0677145c109f2e633f36b F test/subtype1.test 96fd2a59bfc845c955b5f339d23b37ef4d50de5f8a04acd1450a68605fa2e3e7 -F test/superlock.test 85256830339a6871ce36a2ef591c3f67716a701b5497788fb2068b90159c2442 +F test/superlock.test e47035e7fa127642781808ce4121c1e5df89b330f87116327d7f909342826f0e F test/swarmvtab.test 250231404fcac88f61a6c147bb0e3a118ed879278cd3ccb0ae2d3a729e1e8e26 F test/swarmvtab2.test c948cb2fdfc5b01d85e8f6d6504854202dc1a0782ab2a0ed61538f27cbd0aa5c F test/swarmvtab3.test 41a3ab47cb7a834d4e5336425103b617410a67bb95d335ef536f887587ece073 @@ -1708,7 +1728,7 @@ F test/temptable2.test 76821347810ecc88203e6ef0dd6897b6036ac788e9dd3e6b04fd4d163 F test/temptable3.test d11a0974e52b347e45ee54ef1923c91ed91e4637 F test/temptrigfault.tes fc5918e64f3867156fefe7cfca9d8e1f495134a5229b2b511b0dc11c07f2eab4 F test/temptrigger.test a00f258ed8d21a0e8fd4f322f15e8cfb5cef2e43655670e07a753e3fb4769d61 -F test/tester.tcl 2d943f60200e0a36bcd3f1f0baf181a751cd3604ef6b6bd4c8dc39b4e8a53116 +F test/tester.tcl 7a86598eed39bfbfc5dea5f1c35c3e6b0843b2155188ec6f5ed4d2c9fb9d841c F test/testloadext.c 862b848783eaed9985fbce46c65cd214664376b549fae252b364d5d1ef350a27 F test/testrunner.tcl 78d67079fc39caf2af3fd9d4c30bdac78dae7ec50b9fc802835e7a5189581e07 x F test/testrunner_data.tcl 87b8afd37c8e517fa87b7936540b2fc1ede8291f0567fb88744b9bff272a2e8b @@ -1724,7 +1744,7 @@ F test/thread3.test a12656a56cdf67acb6a2ff7638826c6d6a645f79909d86df521045ad31cf F test/thread_common.tcl b3b19a769fe30ef5537cdfa60acd49b78f771301627720d1add2d3bac77d9039 F test/threadtest1.c 6029d9c5567db28e6dc908a0c63099c3ba6c383b F test/threadtest2.c a70a8e94bef23339d34226eb9521015ef99f4df8 -F test/threadtest3.c 655bff6c0895ec03f014126aa65e808fac9aae8c5a7a7da58a510cbe8b43b781 +F test/threadtest3.c e947b396444f7992a942cd8db0f01589ede90dd250ec802fe800cc90bbee21e3 F test/threadtest4.c c1e67136ceb6c7ec8184e56ac61db28f96bd2925 F test/threadtest5.c 9b4d782c58d8915d7e955ff8051f3d03628bda0d33b82971ea8c0f2f2808c421 F test/time-wordcount.sh 8e0b0f8109367827ad5d58f5cc849705731e4b90 @@ -1899,7 +1919,9 @@ F test/triggerF.test 5d76f0a8c428ff87a4d5ed52da06f6096a2c787a1e21b846111dfac4123 F test/triggerG.test b4e3fbccde6cf8995177cd6cad880256c8c00e407e07d8c67149f46106292a2c F test/triggerupfrom.test d1f9e56090408115c522bee626cc33a2f3370f627a5e341d832589d72e3aa271 F test/trustschema1.test d2996bb284859c99956ac706160eab9f086919da738d19bfef3ac431cce8fd47 +F test/tt3_bcwal2.c 8351577477ce58c3b21a1772c28e73ec58538c44be4a183ff7485d6814bd5385 F test/tt3_checkpoint.c ac7ca661d739280c89d9c253897df64a59a49369bd1247207ac0f655b622579d +F test/tt3_core.c 8cd89ead95410f70e7fb02c79f1e040f9c5ad5cf F test/tt3_index.c 39eec10a35f57672225be4d182862152896dee4a F test/tt3_lookaside1.c 0377e202c3c2a50d688cb65ba203afeda6fafeb9 F test/tt3_shared.c b37d22defc944a2ac4c91c927fd06c1d48cd51e2ce9d004fe868625bd2399f93 @@ -1932,7 +1954,7 @@ F test/upsert3.test 88d7d590a1948a9cb6eac1b54b0642f67a9f35a1fc0f19b200e97d5d39e3 F test/upsert4.test 25d2a1da92f149331ae0c51ca6e3eee78189577585eab92de149900d62994fa5 F test/upsert5.test 9953b180d02d1369cdbb6c73c900834e5fef8cb78e98e07511c8762ec21cc176 F test/upsertfault.test f21ca47740841fdb4d61acfa7b17646d773e67724fe8c185b71c018db8a94b35 -F test/uri.test 1250724af9beeed2d6c3716f5b990c483200c54f408d3c0ec9543a3c7961f8fc +F test/uri.test 234147e5eda1ef143a8b74ac8f8e9bae363aafa6b8c5813119052f268765d62f F test/uri2.test 9d3ba7a53ee167572d53a298ee4a5d38ec4a8fb7 F test/utf16align.test 9fde0bb5d3a821594aa68c6829ab9c5453a084384137ebb9f6153e2d678039da F test/vacuum-into.test 5a489714feecfdabfc7b293be4111564a173dee92c0d6818dd0207f3ade65783 @@ -1978,13 +2000,26 @@ F test/vtab_shared.test 5253bff2355a9a3f014c15337da7e177ab0ef8ad F test/vtabdistinct.test 7688f0889358f849fd60bbfde1ded38b014b18066076d4bfbb75395804dfe072 F test/vtabdrop.test 65d4cf6722972e5499bdaf0c0d70ee3b8133944a4e4bc31862563f32a7edca12 F test/vtabrhs1.test 9b5ecbc74a689500c33a4b2b36761f9bcc22fcc4e3f9d21066ee0c9c74cf5f6c -F test/wal.test 519c550255c78f55959e9159b93ebbfad2b4e9f36f5b76284da41f572f9d27da -F test/wal2.test f058016abe4627d2664db4b4b87990298d925e66a4c5a2c8e674a0ff6f4c841d +F test/wal.test a5d6c7f4bd79251ed344229d96d44ecdfb896bdbd32b7e65f118756699c7e473 +F test/wal2.test 7d4265b33fe4804f125acd53733388909bb7c64c84690e1ab2b36dc493d0464f +F test/wal2big.test 829141cbecdda4329db8fa38705424c4a73db72a06b9540b06811a825d330409 +F test/wal2concurrent.test 7fc3e570073683a2a28f42bda46ecf516f5bc82afd703c1fbf4aa38e18fb3361 +F test/wal2fault.test 2e8e60cacd5bcd451618aeffd05f676894d17202d3e2986e288d36e2c5993249 +F test/wal2lock.test 0ef98d72dc6bcf7711dedd684760488400d9a9a6eec0dc5d3822060437793552 +F test/wal2openclose.test 2b26be723ea7f4263c8d5d70b37efd1c359561a0526e39466c45fe8e6478daee +F test/wal2recover.test 348a3f2a4c79359790f70fd692dcd0c8f04e42a85365e688778c945e10bae02b +F test/wal2recover2.test e849fc5a202ae49dc561a3d47270accf159227c9e7663594fab0eac212480d28 +F test/wal2recover3.test 4a91689e165a38bc401736e6518188c2b0ff4fa1566d1810b8867536db128177 +F test/wal2rewrite.test 6ca6f631ffcf871240beab5f02608913fd075c6d0d31310b026c8383c65c9f9c +F test/wal2rollback.test 23adc4a099b23f6aaea8b04fdca1c35861d887dd80f8be7da2d5273eb777e428 +F test/wal2savepoint.test c2410acac73bfa63827f9346d060205080b747f0414ff27273d417dadfea28d0 +F test/wal2simple.test d2d94fe1f30700c9f30e46c16f1c3d7d245b861a1be2bfe5ead701484c6dd38e +F test/wal2snapshot.test f6c3945bea572fd8745774e95b8dca1e5832c696e2251bb0db33391ee567fe60 F test/wal3.test 5de023bb862fd1eb9d2ad26fa8d9c43abb5370582e5b08b2ae0d6f93661bc310 F test/wal4.test 4744e155cd6299c6bd99d3eab1c82f77db9cdb3c F test/wal5.test 9c11da7aeccd83a46d79a556ad11a18d3cb15aa9 F test/wal6.test 6a773eff47b989c5142d17f2a7778c02d8260149a648d44ef8345aa080e428e3 -F test/wal64k.test bb8c52f0140aae1de877ffed86e2a97d903f98cf9ac263f185d51c58cde92327 +F test/wal64k.test 87eef68151bc7119f7a2dc9ba173a2405122d38ee520cfc8c304d6f35b378e99 F test/wal7.test 2ae8f427d240099cc4b2dfef63cff44e2a68a1bd F test/wal8.test d9df3fba4caad5854ed69ed673c68482514203c8 F test/wal9.test 378e76a9ad09cd9bee06c172ad3547b0129a6750 @@ -1992,7 +2027,7 @@ F test/wal_common.tcl 204d1721ac13c5e0c7fae6380315b5ab7f4e8423f580d826c5e9df1995 F test/walbak.test 018d4e5a3d45c6298d11b99f09a8ef6876527946 F test/walbig.test f437473a16cfb314867c6b5d1dbcd519e73e3434 F test/walblock.test 6bb472e82730e7e4e81395e907a01d8cfc2bd9e1f01f8a9184ca572e2955a4bf -F test/walckptnoop.test 5f6123750f40cb86633a7e014f9fb805d0eb494b811840086dc72e554e68c7c1 +F test/walckptnoop.test 72456e04a712b9e8daea5a7ee28588b708455c108d4552e5134b503f4e856024 F test/walcksum.test 50e204500eed9c691b6045e467bb2923f49aa93d8adf315e2be135fdb202c1c2 F test/walcrash.test 21038858cc552077b0522f50b0fa87e38139306a F test/walcrash2.test a0edab4e5390f03b99a790de89aad15d6ec70b36 @@ -2006,10 +2041,10 @@ F test/walnoshm.test 844b3eb7d8e8ee76c834ef723babec57b0be51fa52ef7e321c289ed0fe3 F test/waloverwrite.test dad2f26567f1b45174e54fbf9a8dc1cb876a7f03 F test/walpersist.test 8d78a1ec91299163451417b451a2bac3481f8eb9f455b1ca507a6625c927ca6e F test/walprotocol.test 1b3f922125e341703f6e946d77fdc564d38fb3e07a9385cfdc6c99cac1ecf878 -F test/walprotocol2.test 7d3b6b4bf0b12f8007121b1e6ef714bc99101fb3b48e46371df1db868eebc131 +F test/walprotocol2.test 7e4bedd5ee83607e2928ac438bf7332a396b980d3e02aa0746509ce11ad1f13c F test/walro.test 78a84bc0fdae1385c06b017215c426b6845734d6a5a3ac75c918dd9b801b1b9d F test/walro2.test 33955a6fd874dd9724005e17f77fef89d334b3171454a1256fe4941a96766cdc -F test/walrofault.test c70cb6e308c443867701856cce92ad8288cd99488fa52afab77cca6cfd51af68 +F test/walrofault.test 5a25f91c16a68bae65edec7cdef4495e5c6494c8408743fe9b29045fa6665cd0 F test/walseh1.test bae700eb99519b6d5cd3f893c04759accc5a59c391d4189fe4dd6995a533442b F test/walsetlk.test 9079cd8ef82570b8cf0067f31e049a72bec353fb2d5f0cc88f1736dc42ba9704 F test/walsetlk2.test 4a67823b1e759ac5a4fe78a83c1f857c3c5761bf8d755421c8b55907957f23dd @@ -2134,13 +2169,13 @@ F tool/mkamalzip.tcl 8aa5ebe7973c8b8774062d34e15fea9815c4cc2ceea3a9b184695f00591 F tool/mkautoconfamal.sh 06fbe090b81c24e592c1f22b404334f805ba74d482a9260f2ac81e6f3d3386d8 F tool/mkccode.tcl c42a8f8cf78f92e83795d5447460dbce7aaf78a3bbf9082f1507dc71a3665f3c x F tool/mkcombo.tcl 2a5189b219c4a495e1ff7fc980bd568d3cfb82ae9d50c84e77f7a161e96fc132 -F tool/mkctimec.tcl 3fb5cad05922f5da61262cb6bcd5868a34e94a49ca8833ae2d7796e7df075576 x +F tool/mkctimec.tcl 936c2d30a06c5231a1dc8b7c978f902d49335e54092e761ddf167490a5e94902 x F tool/mkkeywordhash.c 82d5af1d0e677900739fba59155cddac172d8c712c2d91ab73d6e6bcb30060f0 F tool/mkmsvcmin.tcl d76c45efda1cce2d4005bcea7b8a22bb752e3256009f331120fb4fecb14ebb7a F tool/mkopcodec.tcl 33d20791e191df43209b77d37f0ff0904620b28465cca6990cf8d60da61a07ef F tool/mkopcodeh.tcl 2b4e6967a670ef21bf53a164964c35c6163277d002a4c6f56fa231d68c88d023 F tool/mkopts.tcl 680f785fdb09729fd9ac50632413da4eadbdf9071535e3f26d03795828ab07fa -F tool/mkpragmatab.tcl 3801ce32f8c55fe63a3b279f231fb26c2c1a2ea9a09d2dd599239d87a609acec +F tool/mkpragmatab.tcl 10694206dfe9d1f6e24d5876d52bb56e67706ef20bc479002bb88b67b90be045 F tool/mkshellc.tcl da6918b128e928a8f0d663519e14829153e59465bd5eb596442e99fa10a411b7 F tool/mksourceid.c 36aa8020014aed0836fd13c51d6dc9219b0df1761d6b5f58ff5b616211b079b9 F tool/mksqlite3c-noext.tcl 351c55256213154cabb051a3c870ef9f4487de905015141ae50dc7578a901b84 @@ -2183,16 +2218,18 @@ F tool/stripccomments.c 68d2aa8cb504439f541ce66b8f128067612bdd16f5fb7bfe540f3fcb F tool/symbols-mingw.sh 4dbcea7e74768305384c9fd2ed2b41bbf9f0414d F tool/symbols.sh 1612bd947750e21e7b47befad5f6b3825b06cce0705441f903bf35ced65ae9b9 F tool/tclConfigShToMake.sh 7c065d81c2d178e15e45a77372c6e5a38b5a1b08755301cd6f20a3a862db7312 x +F tool/tserver.c 17b7f0b06f4e776e26220889941a86936b3c56ad18608baadc8faa00b7bd46ee +F tool/tserver_test.tcl 64415a134749ac3f38c9abd2bb95c7387a9b44e5116419487fd008cff8a459db F tool/varint.c 5d94cb5003db9dbbcbcc5df08d66f16071aee003 F tool/vdbe-compress.tcl fa2f37ab39b2a0087fafb6a7f3ce19503e25e624ffa8ed9951717ab72920c088 F tool/vdbe_profile.tcl 3ac5a4a9449f4baf77059358ea050db3e34395ccf59c5464d29b91746d5b961e -F tool/version-info.c 33d0390ef484b3b1cb685d59362be891ea162123cea181cb8e6d2cf6ddf2700c +F tool/version-info.c 3b36468a90faf1bbd59c65fd0eb66522d9f941eedd364fabccd72273503ae7d5 F tool/warnings-clang.sh bbf6a1e685e534c92ec2bfba5b1745f34fb6f0bc2a362850723a9ee87c1b31a7 F tool/warnings.sh d924598cf2f55a4ecbc2aeb055c10bd5f48114793e7ba25f9585435da29e7e98 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f F tool/winmain.c 00c8fb88e365c9017db14c73d3c78af62194d9644feaf60e220ab0f411f3604c -P 70b1da718c176b8eb154fe087af4352eb6f55c9c0d1f09fc625d073d9f8075f4 -R 00bfa5500890db22f6793537f5f0589c -U stephan -Z e94aded409cc10495252667056d88c93 +P f273f6b8245c5dcaf5642a83afaa015efd5b6115d099ddd1325db53c51338f63 b57a8215f4259a0aae188b7ee5060f8ff48919303179aae80b58b43ed3b991f5 +R b94b8b995e69f821ef662acbd14c9693 +U drh +Z 4983067014ca9c3566faa2245d09fda6 # Remove this line to create a well-formed Fossil manifest. diff --git a/manifest.tags b/manifest.tags index bec971799f..d2f1286592 100644 --- a/manifest.tags +++ b/manifest.tags @@ -1,2 +1,2 @@ -branch trunk -tag trunk +branch bedrock +tag bedrock diff --git a/manifest.uuid b/manifest.uuid index d1c286cbbc..d205fc0354 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -b57a8215f4259a0aae188b7ee5060f8ff48919303179aae80b58b43ed3b991f5 +d577b2a2b2b04ff7b7b07c0fce6c9e3a910ae0a2a46d3524e688c1530155e3c2 diff --git a/src/analyze.c b/src/analyze.c index 2721f25234..b24be3e2b8 100644 --- a/src/analyze.c +++ b/src/analyze.c @@ -257,6 +257,13 @@ static void openStatTable( # define SQLITE_STAT4_SAMPLES 24 #endif +/* +** Assumed number of of samples when loading sqlite_stat4 data. It doesn't +** matter if there are more or fewer samples than this, but is more efficient +** if this estimate turns out to be true. +*/ +#define SQLITE_STAT4_EST_SAMPLES SQLITE_STAT4_SAMPLES + /* ** Three SQL functions - stat_init(), stat_push(), and stat_get() - ** share an instance of the following structure to hold their state @@ -1551,6 +1558,9 @@ static void decodeIntArray( #endif if( *z==' ' ) z++; } + if( aOut ){ + for(/* no-op */; iaSample[j]; sqlite3DbFree(db, p->p); } - sqlite3DbFree(db, pIdx->aSample); + if( pIdx->nSampleAlloc!=SQLITE_STAT4_EST_SAMPLES ){ + sqlite3DbFree(db, pIdx->aSample); + } } if( db->pnBytesFreed==0 ){ pIdx->nSample = 0; pIdx->aSample = 0; + pIdx->nSampleAlloc = 0; } #else UNUSED_PARAMETER(db); @@ -1753,8 +1766,110 @@ static Index *findIndexOrPrimaryKey( } /* -** Load the content from either the sqlite_stat4 -** into the relevant Index.aSample[] arrays. +** Grow the pIdx->aSample[] array. Return SQLITE_OK if successful, or +** SQLITE_NOMEM otherwise. +*/ +static int growSampleArray(sqlite3 *db, Index *pIdx, int *piOff){ + int nIdxCol = pIdx->nSampleCol; + int nNew = 0; + IndexSample *aNew = 0; + int nByte = 0; + tRowcnt *pSpace; /* Available allocated memory space */ + u8 *pPtr; /* Available memory as a u8 for easier manipulation */ + int i; + u64 t; + + assert( pIdx->nSample==pIdx->nSampleAlloc ); + nNew = SQLITE_STAT4_EST_SAMPLES; + if( pIdx->nSample ){ + nNew = pIdx->nSample*2; + } + + /* Set nByte to the required amount of space */ + nByte = ROUND8(sizeof(IndexSample) * nNew); + nByte += sizeof(tRowcnt) * nIdxCol * 3 * nNew; + nByte += nIdxCol * sizeof(tRowcnt); /* Space for Index.aAvgEq[] */ + + if( nNew==SQLITE_STAT4_EST_SAMPLES ){ + aNew = (IndexSample*)&((u8*)pIdx->pSchema->pStat4Space)[*piOff]; + *piOff += nByte; + assert( *piOff<=sqlite3_msize(pIdx->pSchema->pStat4Space) ); + }else{ + aNew = (IndexSample*)sqlite3DbMallocRaw(db, nByte); + if( aNew==0 ) return SQLITE_NOMEM_BKPT; + } + + pPtr = (u8*)aNew; + pPtr += ROUND8(nNew*sizeof(pIdx->aSample[0])); + pSpace = (tRowcnt*)pPtr; + + pIdx->aAvgEq = pSpace; pSpace += nIdxCol; + assert( EIGHT_BYTE_ALIGNMENT( pSpace ) ); + + if( pIdx->nSample ){ + /* Copy the contents of the anEq[], anLt[], anDLt[] arrays for all + ** extant samples to the new location. */ + int nByte = nIdxCol * 3 * sizeof(tRowcnt) * pIdx->nSample; + memcpy(pSpace, pIdx->aSample[0].anEq, nByte); + } + for(i=0; inSample ){ + aNew[i].p = pIdx->aSample[i].p; + aNew[i].n = pIdx->aSample[i].n; + } + } + assert( ((u8*)pSpace)-nByte==(u8*)aNew ); + + if( pIdx->nSample!=SQLITE_STAT4_EST_SAMPLES ){ + sqlite3DbFree(db, pIdx->aSample); + } + pIdx->aSample = aNew; + pIdx->nSampleAlloc = nNew; + return SQLITE_OK; +} + +/* +** Allocate the space that will likely be required for the Index.aSample[] +** arrays populated by loading data from the sqlite_stat4 table. Return +** SQLITE_OK if successful, or SQLITE_NOMEM otherwise. +*/ +static int stat4AllocSpace(sqlite3 *db, const char *zDb){ + int iDb = sqlite3FindDbName(db, zDb); + Schema *pSchema = db->aDb[iDb].pSchema; + int nByte = 0; + HashElem *k; + + assert( iDb>=0 ); + assert( pSchema->pStat4Space==0 ); + for(k=sqliteHashFirst(&pSchema->idxHash); k; k=sqliteHashNext(k)){ + Index *pIdx = sqliteHashData(k); + int nIdxCol; + if( !HasRowid(pIdx->pTable) && IsPrimaryKeyIndex(pIdx) ){ + nIdxCol = pIdx->nKeyCol; + }else{ + nIdxCol = pIdx->nColumn; + } + nByte += ROUND8(sizeof(IndexSample) * SQLITE_STAT4_EST_SAMPLES); + nByte += sizeof(tRowcnt) * nIdxCol * 3 * SQLITE_STAT4_EST_SAMPLES; + nByte += nIdxCol * sizeof(tRowcnt); /* Space for Index.aAvgEq[] */ + } + + if( nByte>0 ){ + pSchema->pStat4Space = sqlite3_malloc(nByte); + if( pSchema->pStat4Space==0 ){ + return SQLITE_NOMEM_BKPT; + } + } + + return SQLITE_OK; +} + +/* +** Load the content from the sqlite_stat4 into the relevant Index.aSample[] +** arrays. ** ** Arguments zSql1 and zSql2 must point to SQL statements that return ** data equivalent to the following: @@ -1775,69 +1890,14 @@ static int loadStatTbl( char *zSql; /* Text of the SQL statement */ Index *pPrevIdx = 0; /* Previous index in the loop */ IndexSample *pSample; /* A slot in pIdx->aSample[] */ + int iBlockOff = 0; /* Offset into Schema.pStat4Space */ assert( db->lookaside.bDisable ); - zSql = sqlite3MPrintf(db, zSql1, zDb); - if( !zSql ){ - return SQLITE_NOMEM_BKPT; - } - rc = sqlite3_prepare(db, zSql, -1, &pStmt, 0); - sqlite3DbFree(db, zSql); - if( rc ) return rc; - - while( sqlite3_step(pStmt)==SQLITE_ROW ){ - int nIdxCol = 1; /* Number of columns in stat4 records */ - char *zIndex; /* Index name */ - Index *pIdx; /* Pointer to the index object */ - int nSample; /* Number of samples */ - i64 nByte; /* Bytes of space required */ - i64 i; /* Bytes of space required */ - tRowcnt *pSpace; /* Available allocated memory space */ - u8 *pPtr; /* Available memory as a u8 for easier manipulation */ - - zIndex = (char *)sqlite3_column_text(pStmt, 0); - if( zIndex==0 ) continue; - nSample = sqlite3_column_int(pStmt, 1); - pIdx = findIndexOrPrimaryKey(db, zIndex, zDb); - assert( pIdx==0 || pIdx->nSample==0 ); - if( pIdx==0 ) continue; - if( pIdx->aSample!=0 ){ - /* The same index appears in sqlite_stat4 under multiple names */ - continue; - } - assert( !HasRowid(pIdx->pTable) || pIdx->nColumn==pIdx->nKeyCol+1 ); - if( !HasRowid(pIdx->pTable) && IsPrimaryKeyIndex(pIdx) ){ - nIdxCol = pIdx->nKeyCol; - }else{ - nIdxCol = pIdx->nColumn; - } - pIdx->nSampleCol = nIdxCol; - pIdx->mxSample = nSample; - nByte = ROUND8(sizeof(IndexSample) * nSample); - nByte += sizeof(tRowcnt) * nIdxCol * 3 * nSample; - nByte += nIdxCol * sizeof(tRowcnt); /* Space for Index.aAvgEq[] */ - - pIdx->aSample = sqlite3DbMallocZero(db, nByte); - if( pIdx->aSample==0 ){ - sqlite3_finalize(pStmt); - return SQLITE_NOMEM_BKPT; - } - pPtr = (u8*)pIdx->aSample; - pPtr += ROUND8(nSample*sizeof(pIdx->aSample[0])); - pSpace = (tRowcnt*)pPtr; - assert( EIGHT_BYTE_ALIGNMENT( pSpace ) ); - pIdx->aAvgEq = pSpace; pSpace += nIdxCol; - pIdx->pTable->tabFlags |= TF_HasStat4; - for(i=0; iaSample[i].anEq = pSpace; pSpace += nIdxCol; - pIdx->aSample[i].anLt = pSpace; pSpace += nIdxCol; - pIdx->aSample[i].anDLt = pSpace; pSpace += nIdxCol; - } - assert( ((u8*)pSpace)-nByte==(u8*)(pIdx->aSample) ); - } - rc = sqlite3_finalize(pStmt); - if( rc ) return rc; + /* Allocate the Schema.pStat4Space block that will be used for the + ** Index.aSample[] arrays populated by this call. */ + rc = stat4AllocSpace(db, zDb); + if( rc!=SQLITE_OK ) return rc; zSql = sqlite3MPrintf(db, zSql2, zDb); if( !zSql ){ @@ -1856,18 +1916,23 @@ static int loadStatTbl( if( zIndex==0 ) continue; pIdx = findIndexOrPrimaryKey(db, zIndex, zDb); if( pIdx==0 ) continue; - if( pIdx->nSample>=pIdx->mxSample ){ - /* Too many slots used because the same index appears in - ** sqlite_stat4 using multiple names */ - continue; + + if( pIdx->nSample==pIdx->nSampleAlloc ){ + pIdx->pTable->tabFlags |= TF_HasStat4; + assert( !HasRowid(pIdx->pTable) || pIdx->nColumn==pIdx->nKeyCol+1 ); + if( !HasRowid(pIdx->pTable) && IsPrimaryKeyIndex(pIdx) ){ + pIdx->nSampleCol = pIdx->nKeyCol; + }else{ + pIdx->nSampleCol = pIdx->nColumn; + } + if( growSampleArray(db, pIdx, &iBlockOff) ) break; } - /* This next condition is true if data has already been loaded from - ** the sqlite_stat4 table. */ - nCol = pIdx->nSampleCol; + if( pIdx!=pPrevIdx ){ initAvgEq(pPrevIdx); pPrevIdx = pIdx; } + nCol = pIdx->nSampleCol; pSample = &pIdx->aSample[pIdx->nSample]; decodeIntArray((char*)sqlite3_column_text(pStmt,1),nCol,pSample->anEq,0,0); decodeIntArray((char*)sqlite3_column_text(pStmt,2),nCol,pSample->anLt,0,0); @@ -1964,6 +2029,10 @@ int sqlite3AnalysisLoad(sqlite3 *db, int iDb){ pIdx->aSample = 0; #endif } +#ifdef SQLITE_ENABLE_STAT4 + sqlite3_free(pSchema->pStat4Space); + pSchema->pStat4Space = 0; +#endif /* Load new statistics out of the sqlite_stat1 table */ sInfo.db = db; diff --git a/src/bitvec.c b/src/bitvec.c index 7c5fa71d9b..a9314a4fd3 100644 --- a/src/bitvec.c +++ b/src/bitvec.c @@ -172,6 +172,12 @@ int sqlite3BitvecSet(Bitvec *p, u32 i){ if( p==0 ) return SQLITE_OK; assert( i>0 ); assert( i<=p->iSize ); + if( i>p->iSize || i==0 ){ + sqlite3_log(SQLITE_ERROR, + "Bitvec: setting bit %d of bitvec size %d\n", (int)i, (int)p->iSize + ); + abort(); + } i--; while((p->iSize > BITVEC_NBIT) && p->iDivisor) { u32 bin = i/p->iDivisor; diff --git a/src/btree.c b/src/btree.c index a3cbc6e3f8..c507457037 100644 --- a/src/btree.c +++ b/src/btree.c @@ -528,7 +528,240 @@ static void downgradeAllSharedCacheTableLocks(Btree *p){ #endif /* SQLITE_OMIT_SHARED_CACHE */ -static void releasePage(MemPage *pPage); /* Forward reference */ +#ifndef SQLITE_OMIT_CONCURRENT +/* +** The following structure - BtreePtrmap - stores the in-memory pointer map +** used for newly allocated pages in CONCURRENT transactions. Such pages are +** always allocated in a contiguous block (from the end of the file) starting +** with page BtreePtrmap.iFirst. +*/ +typedef struct RollbackEntry RollbackEntry; +typedef struct PtrmapEntry PtrmapEntry; +struct PtrmapEntry { + Pgno parent; + u8 eType; +}; +struct RollbackEntry { + Pgno pgno; + Pgno parent; + u8 eType; +}; +struct BtreePtrmap { + Pgno iFirst; /* First new page number aPtr[0] */ + + int nPtrAlloc; /* Allocated size of aPtr[] array */ + PtrmapEntry *aPtr; /* Array of parent page numbers */ + + int nSvpt; /* Used size of aSvpt[] array */ + int nSvptAlloc; /* Allocated size of aSvpt[] */ + int *aSvpt; /* First aRollback[] entry for savepoint i */ + + int nRollback; /* Used size of aRollback[] array */ + int nRollbackAlloc; /* Allocated size of aRollback[] array */ + RollbackEntry *aRollback; /* Array of rollback entries */ +}; + +/* !defined(SQLITE_OMIT_CONCURRENT) +** +** If page number pgno is greater than or equal to BtreePtrmap.iFirst, +** store an entry for it in the pointer-map structure. +*/ +static int btreePtrmapStore( + BtShared *pBt, + Pgno pgno, + u8 eType, + Pgno parent +){ + BtreePtrmap *pMap = pBt->pMap; + if( pgno>=pMap->iFirst ){ + int iEntry = pgno - pMap->iFirst; + + /* Grow the aPtr[] array as required */ + while( iEntry>=pMap->nPtrAlloc ){ + int nNew = pMap->nPtrAlloc ? pMap->nPtrAlloc*2 : 16; + PtrmapEntry *aNew = (PtrmapEntry*)sqlite3_realloc( + pMap->aPtr, nNew*sizeof(PtrmapEntry) + ); + if( aNew==0 ){ + return SQLITE_NOMEM; + }else{ + int nByte = (nNew-pMap->nPtrAlloc)*sizeof(PtrmapEntry); + memset(&aNew[pMap->nPtrAlloc], 0, nByte); + pMap->aPtr = aNew; + pMap->nPtrAlloc = nNew; + } + } + + /* Add an entry to the rollback log if required */ + if( pMap->nSvpt>0 && pMap->aPtr[iEntry].parent ){ + if( pMap->nRollback>=pMap->nRollbackAlloc ){ + int nNew = pMap->nRollback ? pMap->nRollback*2 : 16; + RollbackEntry *aNew = (RollbackEntry*)sqlite3_realloc( + pMap->aRollback, nNew*sizeof(RollbackEntry) + ); + if( aNew==0 ){ + return SQLITE_NOMEM; + }else{ + pMap->aRollback = aNew; + pMap->nRollbackAlloc = nNew; + } + } + + pMap->aRollback[pMap->nRollback].pgno = pgno; + pMap->aRollback[pMap->nRollback].parent = pMap->aPtr[iEntry].parent; + pMap->aRollback[pMap->nRollback].eType = pMap->aPtr[iEntry].eType; + pMap->nRollback++; + } + + /* Update the aPtr[] array */ + pMap->aPtr[iEntry].parent = parent; + pMap->aPtr[iEntry].eType = eType; + } + + return SQLITE_OK; +} + +/* !defined(SQLITE_OMIT_CONCURRENT) +** +** Open savepoint iSavepoint, if it is not already open. +*/ +static int btreePtrmapBegin(BtShared *pBt, int nSvpt){ + BtreePtrmap *pMap = pBt->pMap; + if( pMap && nSvpt>pMap->nSvpt ){ + int i; + if( nSvpt>=pMap->nSvptAlloc ){ + int nNew = pMap->nSvptAlloc ? pMap->nSvptAlloc*2 : 16; + int *aNew = sqlite3_realloc(pMap->aSvpt, sizeof(int) * nNew); + if( aNew==0 ){ + return SQLITE_NOMEM; + }else{ + pMap->aSvpt = aNew; + pMap->nSvptAlloc = nNew; + } + } + + for(i=pMap->nSvpt; iaSvpt[i] = pMap->nRollback; + } + pMap->nSvpt = nSvpt; + } + + return SQLITE_OK; +} + +/* !defined(SQLITE_OMIT_CONCURRENT) +** +** Rollback (if op==SAVEPOINT_ROLLBACK) or release (if op==SAVEPOINT_RELEASE) +** savepoint iSvpt. +*/ +static void btreePtrmapEnd(BtShared *pBt, int op, int iSvpt){ + BtreePtrmap *pMap = pBt->pMap; + if( pMap ){ + assert( op==SAVEPOINT_ROLLBACK || op==SAVEPOINT_RELEASE ); + assert( iSvpt>=0 || (iSvpt==-1 && op==SAVEPOINT_ROLLBACK) ); + if( iSvpt<0 ){ + pMap->nSvpt = 0; + pMap->nRollback = 0; + memset(pMap->aPtr, 0, sizeof(Pgno) * pMap->nPtrAlloc); + }else if( iSvptnSvpt ){ + if( op==SAVEPOINT_ROLLBACK ){ + int ii; + for(ii=pMap->nRollback-1; ii>=pMap->aSvpt[iSvpt]; ii--){ + RollbackEntry *p = &pMap->aRollback[ii]; + PtrmapEntry *pEntry = &pMap->aPtr[p->pgno - pMap->iFirst]; + pEntry->parent = p->parent; + pEntry->eType = p->eType; + } + } + pMap->nSvpt = iSvpt + (op==SAVEPOINT_ROLLBACK); + pMap->nRollback = pMap->aSvpt[iSvpt]; + } + } +} + +/* !defined(SQLITE_OMIT_CONCURRENT) +** +** This function is called after an CONCURRENT transaction is opened on the +** database. It allocates the BtreePtrmap structure used to track pointers +** to allocated pages and zeroes the nFree/iTrunk fields in the database +** header on page 1. +*/ +static int btreePtrmapAllocate(BtShared *pBt){ + int rc = SQLITE_OK; + if( pBt->pMap==0 ){ + BtreePtrmap *pMap = sqlite3_malloc(sizeof(BtreePtrmap)); + if( pMap==0 ){ + rc = SQLITE_NOMEM; + }else{ + memset(&pBt->pPage1->aData[32], 0, sizeof(u32)*2); + memset(pMap, 0, sizeof(BtreePtrmap)); + pMap->iFirst = pBt->nPage + 1; + pBt->pMap = pMap; + } + } + return rc; +} + +/* !defined(SQLITE_OMIT_CONCURRENT) +** +** Free any BtreePtrmap structure allocated by an earlier call to +** btreePtrmapAllocate(). +*/ +static void btreePtrmapDelete(BtShared *pBt){ + BtreePtrmap *pMap = pBt->pMap; + if( pMap ){ + sqlite3_free(pMap->aRollback); + sqlite3_free(pMap->aPtr); + sqlite3_free(pMap->aSvpt); + sqlite3_free(pMap); + pBt->pMap = 0; + } +} + +/* +** Check that the pointer-map does not contain any entries with a parent +** page of 0. Call sqlite3_log() multiple times to output the entire +** data structure if it does. +*/ +static void btreePtrmapCheck(BtShared *pBt, Pgno nPage){ + Pgno i; + int bProblem = 0; + BtreePtrmap *p = pBt->pMap; + + for(i=p->iFirst; i<=nPage; i++){ + PtrmapEntry *pEntry = &p->aPtr[i-p->iFirst]; + if( pEntry->eType==PTRMAP_OVERFLOW1 + || pEntry->eType==PTRMAP_OVERFLOW2 + || pEntry->eType==PTRMAP_BTREE + ){ + if( pEntry->parent==0 ){ + bProblem = 1; + break; + } + } + } + + if( bProblem ){ + for(i=p->iFirst; i<=nPage; i++){ + PtrmapEntry *pEntry = &p->aPtr[i-p->iFirst]; + sqlite3_log(SQLITE_CORRUPT, + "btreePtrmapCheck: pgno=%d eType=%d parent=%d", + (int)i, (int)pEntry->eType, (int)pEntry->parent + ); + } + abort(); + } +} + +#else /* SQLITE_OMIT_CONCURRENT */ +# define btreePtrmapAllocate(x) SQLITE_OK +# define btreePtrmapDelete(x) +# define btreePtrmapBegin(x,y) SQLITE_OK +# define btreePtrmapEnd(x,y,z) +# define btreePtrmapCheck(y,z) +#endif /* SQLITE_OMIT_CONCURRENT */ + +static void releasePage(MemPage *pPage); /* Forward reference */ static void releasePageOne(MemPage *pPage); /* Forward reference */ static void releasePageNotNull(MemPage *pPage); /* Forward reference */ @@ -1070,6 +1303,13 @@ static void ptrmapPut(BtShared *pBt, Pgno key, u8 eType, Pgno parent, int *pRC){ /* The super-journal page number must never be used as a pointer map page */ assert( 0==PTRMAP_ISPAGE(pBt, PENDING_BYTE_PAGE(pBt)) ); +#ifndef SQLITE_OMIT_CONCURRENT + if( pBt->pMap ){ + *pRC = btreePtrmapStore(pBt, key, eType, parent); + return; + } +#endif + assert( pBt->autoVacuum ); if( key==0 ){ *pRC = SQLITE_CORRUPT_BKPT; @@ -2408,6 +2648,17 @@ static int getAndInitPage( return SQLITE_OK; } +#ifndef SQLITE_OMIT_CONCURRENT +/* +** Set the value of the MemPage.pgnoRoot variable, if it exists. +*/ +static void setMempageRoot(MemPage *pPg, u32 pgnoRoot){ + pPg->pgnoRoot = pgnoRoot; +} +#else +# define setMempageRoot(x,y) +#endif + /* ** Release a MemPage. This should be called once for each prior ** call to btreeGetPage. @@ -3319,10 +3570,10 @@ static int lockBtree(BtShared *pBt){ goto page1_init_failed; } #else - if( page1[18]>2 ){ + if( page1[18]>3 ){ pBt->btsFlags |= BTS_READ_ONLY; } - if( page1[19]>2 ){ + if( page1[19]>3 ){ goto page1_init_failed; } @@ -3334,9 +3585,9 @@ static int lockBtree(BtShared *pBt){ ** may not be the latest version - there may be a newer one in the log ** file. */ - if( page1[19]==2 && (pBt->btsFlags & BTS_NO_WAL)==0 ){ + if( page1[19]>=2 && (pBt->btsFlags & BTS_NO_WAL)==0 ){ int isOpen = 0; - rc = sqlite3PagerOpenWal(pBt->pPager, &isOpen); + rc = sqlite3PagerOpenWal(pBt->pPager, (page1[19]==3), &isOpen); if( rc!=SQLITE_OK ){ goto page1_init_failed; }else{ @@ -3599,6 +3850,7 @@ static SQLITE_NOINLINE int btreeBeginTrans( BtShared *pBt = p->pBt; Pager *pPager = pBt->pPager; int rc = SQLITE_OK; + int bConcurrent = (p->db->eConcurrent && !ISAUTOVACUUM(pBt)); sqlite3BtreeEnter(p); btreeIntegrity(p); @@ -3710,7 +3962,8 @@ static SQLITE_NOINLINE int btreeBeginTrans( if( (pBt->btsFlags & BTS_READ_ONLY)!=0 ){ rc = SQLITE_READONLY; }else{ - rc = sqlite3PagerBegin(pPager, wrflag>1, sqlite3TempInMemory(p->db)); + int exFlag = bConcurrent ? -1 : (wrflag>1); + rc = sqlite3PagerBegin(pPager, exFlag, sqlite3TempInMemory(p->db)); if( rc==SQLITE_OK ){ rc = newDatabase(pBt); }else if( rc==SQLITE_BUSY_SNAPSHOT && pBt->inTransaction==TRANS_NONE ){ @@ -3781,6 +4034,15 @@ static SQLITE_NOINLINE int btreeBeginTrans( } trans_begun: +#ifndef SQLITE_OMIT_CONCURRENT + if( bConcurrent && rc==SQLITE_OK && sqlite3PagerIsWal(pBt->pPager) ){ + rc = sqlite3PagerBeginConcurrent(pBt->pPager); + if( rc==SQLITE_OK && wrflag ){ + rc = btreePtrmapAllocate(pBt); + } + } +#endif + if( rc==SQLITE_OK ){ if( pSchemaVersion ){ *pSchemaVersion = get4byte(&pBt->pPage1->aData[40]); @@ -3790,7 +4052,11 @@ static SQLITE_NOINLINE int btreeBeginTrans( ** open savepoints. If the second parameter is greater than 0 and ** the sub-journal is not already open, then it will be opened here. */ - rc = sqlite3PagerOpenSavepoint(pPager, p->db->nSavepoint); + int nSavepoint = p->db->nSavepoint; + rc = sqlite3PagerOpenSavepoint(pPager, nSavepoint); + if( rc==SQLITE_OK && nSavepoint ){ + rc = btreePtrmapBegin(pBt, nSavepoint); + } } } @@ -3803,6 +4069,15 @@ int sqlite3BtreeBeginTrans(Btree *p, int wrflag, int *pSchemaVersion){ if( p->sharable || p->inTrans==TRANS_NONE || (p->inTrans==TRANS_READ && wrflag!=0) +#ifndef SQLITE_OMIT_CONCURRENT + /* Always use the full version for "BEGIN CONCURRENT" transactions. This + ** is to ensure that any required calls to btreePtrmapBegin() are made. + ** These calls are not present on trunk (they're part of the + ** begin-concurrent patch), and so they are not present in the fast path + ** below. And it's easier just to call the full version every time than + ** to complicate the code below by adding btreePtrmapBegin() calls. */ + || p->db->eConcurrent!=CONCURRENT_NONE +#endif ){ return btreeBeginTrans(p,wrflag,pSchemaVersion); } @@ -4280,6 +4555,189 @@ static int autoVacuumCommit(Btree *p){ # define setChildPtrmaps(x) SQLITE_OK #endif +#ifndef SQLITE_OMIT_CONCURRENT +/* +** This function is called as part of merging an CONCURRENT transaction with +** the snapshot at the head of the wal file. It relocates all pages in the +** range iFirst..iLast, inclusive. It is assumed that the BtreePtrmap +** structure at BtShared.pMap contains the location of the pointers to each +** page in the range. +** +** If pnCurrent is NULL, then all pages in the range are moved to currently +** free locations (i.e. free-list entries) within the database file before page +** iFirst. +** +** Or, if pnCurrent is not NULL, then it points to a value containing the +** current size of the database file in pages. In this case, all pages are +** relocated to the end of the database file - page iFirst is relocated to +** page (*pnCurrent+1), page iFirst+1 to page (*pnCurrent+2), and so on. +** Value *pnCurrent is set to the new size of the database before this +** function returns. +** +** If no error occurs, SQLITE_OK is returned. Otherwise, an SQLite error code. +*/ +static int btreeRelocateRange( + BtShared *pBt, /* B-tree handle */ + Pgno iFirst, /* First page to relocate */ + Pgno iLast, /* Last page to relocate */ + Pgno *pnCurrent /* If not NULL, IN/OUT: Database size */ +){ + int rc = SQLITE_OK; + BtreePtrmap *pMap = pBt->pMap; + Pgno iPg; + + for(iPg=iFirst; iPg<=iLast && rc==SQLITE_OK; iPg++){ + MemPage *pFree = 0; /* Page allocated from free-list */ + MemPage *pPg = 0; + Pgno iNew; /* New page number for pPg */ + PtrmapEntry *pEntry; /* Pointer map entry for page iPg */ + + if( iPg==PENDING_BYTE_PAGE(pBt) ) continue; + pEntry = &pMap->aPtr[iPg - pMap->iFirst]; + + if( pEntry->eType==PTRMAP_FREEPAGE ){ + Pgno dummy; + rc = allocateBtreePage(pBt, &pFree, &dummy, iPg, BTALLOC_EXACT); + if( pFree ){ + assert( sqlite3PagerPageRefcount(pFree->pDbPage)==1 ); + sqlite3PcacheDrop(pFree->pDbPage); + } + assert( rc!=SQLITE_OK || dummy==iPg ); + }else if( pnCurrent ){ + btreeGetPage(pBt, iPg, &pPg, 0); + assert( sqlite3PagerIswriteable(pPg->pDbPage) ); + assert( sqlite3PagerPageRefcount(pPg->pDbPage)==1 ); + iNew = ++(*pnCurrent); + if( iNew==PENDING_BYTE_PAGE(pBt) ) iNew = ++(*pnCurrent); + rc = relocatePage(pBt, pPg, pEntry->eType, pEntry->parent, iNew, 1); + releasePageNotNull(pPg); + }else if( pEntry->eType!=0 ){ + + /* Allocate a new page from the free-list to move page iPg to. + ** Except - if the page allocated is within the range being relocated + ** (i.e. pgno>=iFirst), then discard it and allocate another. */ + do { + rc = allocateBtreePage(pBt, &pFree, &iNew, 0, 0); + if( iNew>=iFirst ){ + assert( sqlite3PagerPageRefcount(pFree->pDbPage)==1 ); + assert( iNew>iPg ); + sqlite3PcacheDrop(pFree->pDbPage); + pMap->aPtr[iNew - pMap->iFirst].eType = 0; + pFree = 0; + } + }while( pFree==0 ); + + assert( rc!=SQLITE_OK || iNeweType, pEntry->parent,iNew,1); + releasePage(pPg); + } + } + } + return rc; +} + +/* !defined(SQLITE_OMIT_CONCURRENT) +** +** The b-tree handle passed as the only argument is about to commit an +** CONCURRENT transaction. At this point it is guaranteed that this is +** possible - the wal WRITER lock is held and it is known that there are +** no conflicts with committed transactions. +*/ +static int btreeFixUnlocked(Btree *p){ + BtShared *pBt = p->pBt; + MemPage *pPage1 = pBt->pPage1; + u8 *p1 = pPage1->aData; + Pager *pPager = pBt->pPager; + int rc = SQLITE_OK; + + /* If page 1 of the database is not writable, then no pages were allocated + ** or freed by this transaction. In this case no special handling is + ** required. Otherwise, if page 1 is dirty, proceed. */ + BtreePtrmap *pMap = pBt->pMap; + Pgno iTrunk = get4byte(&p1[32]); + Pgno nPage = btreePagecount(pBt); + u32 nFree = get4byte(&p1[36]); + + assert( pBt->pMap ); + rc = sqlite3PagerUpgradeSnapshot(pPager, pPage1->pDbPage); + assert( p1==pPage1->aData ); + + if( rc==SQLITE_OK ){ + Pgno nHPage = get4byte(&p1[28]); + Pgno nFin = nHPage; /* Size of db after transaction merge */ + + if( sqlite3PagerIswriteable(pPage1->pDbPage) ){ + Pgno iHTrunk = get4byte(&p1[32]); + u32 nHFree = get4byte(&p1[36]); + + btreePtrmapCheck(pBt, nPage); + + /* Attach the head database free list to the end of the current + ** transactions free-list (if any). */ + if( iTrunk!=0 ){ + put4byte(&p1[36], nHFree + nFree); + put4byte(&p1[32], iTrunk); + while( iTrunk ){ + DbPage *pTrunk = sqlite3PagerLookup(pPager, iTrunk); + iTrunk = get4byte((u8*)pTrunk->pData); + if( iTrunk==0 ){ + put4byte((u8*)pTrunk->pData, iHTrunk); + } + sqlite3PagerUnref(pTrunk); + }; + } + + if( nHPage<(pMap->iFirst-1) ){ + /* The database consisted of (pMap->iFirst-1) pages when the current + ** concurrent transaction was opened. And an concurrent transaction may + ** not be executed on an auto-vacuum database - so the db should + ** not have shrunk since the transaction was opened. Therefore nHPage + ** should be set to (pMap->iFirst-1) or greater. */ + rc = SQLITE_CORRUPT_BKPT; + }else{ + /* The current transaction allocated pages pMap->iFirst through + ** nPage (inclusive) at the end of the database file. Meanwhile, + ** other transactions have allocated (iFirst..nHPage). So move + ** pages (iFirst..MIN(nPage,nHPage)) to (MAX(nPage,nHPage)+1). */ + Pgno iLast = MIN(nPage, nHPage); /* Last page to move */ + Pgno nCurrent; /* Current size of db */ + + nCurrent = MAX(nPage, nHPage); + pBt->nPage = nCurrent; + rc = btreeRelocateRange(pBt, pMap->iFirst, iLast, &nCurrent); + + /* There are now no collisions with the snapshot at the head of the + ** database file. So at this point it would be possible to write + ** the transaction out to disk. Before doing so though, attempt to + ** relocate some of the new pages to free locations within the body + ** of the database file (i.e. free-list entries). */ + if( rc==SQLITE_OK ){ + assert( nCurrent!=PENDING_BYTE_PAGE(pBt) ); + sqlite3PagerSetDbsize(pBt->pPager, nCurrent); + nFree = get4byte(&p1[36]); + nFin = nCurrent-nFree; + if( nCurrent>PENDING_BYTE_PAGE(pBt) && nFin<=PENDING_BYTE_PAGE(pBt) ){ + nFin--; + } + nFin = MAX(nFin, nHPage); + rc = btreeRelocateRange(pBt, nFin+1, nCurrent, 0); + } + + put4byte(&p1[28], nFin); + } + } + sqlite3PagerSetDbsize(pPager, nFin); + } + + return rc; +} +#else +# define btreeFixUnlocked(X) SQLITE_OK +#endif /* SQLITE_OMIT_CONCURRENT */ + /* ** This routine does the first phase of a two-phase commit. This routine ** causes a rollback journal to be created (if it does not already exist) @@ -4311,8 +4769,13 @@ int sqlite3BtreeCommitPhaseOne(Btree *p, const char *zSuperJrnl){ if( p->inTrans==TRANS_WRITE ){ BtShared *pBt = p->pBt; sqlite3BtreeEnter(p); + +#ifndef SQLITE_OMIT_CONCURRENT + memset(p->aCommit, 0, sizeof(p->aCommit)); +#endif #ifndef SQLITE_OMIT_AUTOVACUUM if( pBt->autoVacuum ){ + assert( ISCONCURRENT==0 ); rc = autoVacuumCommit(p); if( rc!=SQLITE_OK ){ sqlite3BtreeLeave(p); @@ -4323,7 +4786,25 @@ int sqlite3BtreeCommitPhaseOne(Btree *p, const char *zSuperJrnl){ sqlite3PagerTruncateImage(pBt->pPager, pBt->nPage); } #endif - rc = sqlite3PagerCommitPhaseOne(pBt->pPager, zSuperJrnl, 0); + if( rc==SQLITE_OK && ISCONCURRENT && p->db->eConcurrent==CONCURRENT_OPEN ){ + rc = btreeFixUnlocked(p); + } + if( rc==SQLITE_OK ){ + rc = sqlite3PagerCommitPhaseOne(pBt->pPager, zSuperJrnl, 0); + } +#ifndef SQLITE_OMIT_CONCURRENT + if( rc==SQLITE_OK ){ + u32 iPrev = 0; + u32 iCurrent = 0; + sqlite3PagerWalInfo(pBt->pPager, &iPrev, &iCurrent); + if( (iPrev&0x80000000)!=(iCurrent&0x80000000) ){ + iPrev = (iPrev & 0x7FFFFFFF) | (iCurrent & 0x80000000); + } + + p->aCommit[SQLITE_COMMIT_FIRSTFRAME] = iPrev+1; + p->aCommit[SQLITE_COMMIT_NFRAME] = iCurrent-iPrev; + } +#endif sqlite3BtreeLeave(p); } return rc; @@ -4366,6 +4847,11 @@ static void btreeEndTransaction(Btree *p){ unlockBtreeIfUnused(pBt); } + /* If this was an CONCURRENT transaction, delete the pBt->pMap object. + ** Also call PagerEndConcurrent() to ensure that the pager has discarded + ** the record of all pages read within the transaction. */ + btreePtrmapDelete(pBt); + sqlite3PagerEndConcurrent(pBt->pPager); btreeIntegrity(p); } @@ -4595,6 +5081,9 @@ int sqlite3BtreeBeginStmt(Btree *p, int iStatement){ ** such savepoints while the statement transaction savepoint is active. */ rc = sqlite3PagerOpenSavepoint(pBt->pPager, iStatement); + if( rc==SQLITE_OK ){ + rc = btreePtrmapBegin(pBt, iStatement); + } sqlite3BtreeLeave(p); return rc; } @@ -4618,6 +5107,7 @@ int sqlite3BtreeSavepoint(Btree *p, int op, int iSavepoint){ assert( op==SAVEPOINT_RELEASE || op==SAVEPOINT_ROLLBACK ); assert( iSavepoint>=0 || (iSavepoint==-1 && op==SAVEPOINT_ROLLBACK) ); sqlite3BtreeEnter(p); + btreePtrmapEnd(pBt, op, iSavepoint); if( op==SAVEPOINT_ROLLBACK ){ rc = saveAllCursors(pBt, 0, 0); } @@ -5278,6 +5768,8 @@ static int accessPayload( u8 *aWrite = &pBuf[-4]; assert( aWrite>=pBufStart ); /* due to (6) */ memcpy(aSave, aWrite, 4); + rc = sqlite3PagerUsePage(pBt->pPager, nextPage); + if( rc!=SQLITE_OK ) break; rc = sqlite3OsRead(fd, aWrite, a+4, (i64)pBt->pageSize*(nextPage-1)); nextPage = get4byte(aWrite); memcpy(aWrite, aSave, 4); @@ -5290,6 +5782,9 @@ static int accessPayload( (eOp==0 ? PAGER_GET_READONLY : 0) ); if( rc==SQLITE_OK ){ + setMempageRoot( + (MemPage*)sqlite3PagerGetExtra(pDbPage), pCur->pgnoRoot + ); aPayload = sqlite3PagerGetData(pDbPage); nextPage = get4byte(aPayload); rc = copyPayload(&aPayload[offset+4], pBuf, a, eOp, pDbPage); @@ -5440,6 +5935,7 @@ const void *sqlite3BtreePayloadFetch(BtCursor *pCur, u32 *pAmt){ ** vice-versa). */ static int moveToChild(BtCursor *pCur, u32 newPgno){ + BtShared *pBt = pCur->pBt; int rc; assert( cursorOwnsBtShared(pCur) ); assert( pCur->eState==CURSOR_VALID ); @@ -5454,13 +5950,14 @@ static int moveToChild(BtCursor *pCur, u32 newPgno){ pCur->apPage[pCur->iPage] = pCur->pPage; pCur->ix = 0; pCur->iPage++; - rc = getAndInitPage(pCur->pBt, newPgno, &pCur->pPage, pCur->curPagerFlags); - assert( pCur->pPage!=0 || rc!=SQLITE_OK ); - if( rc==SQLITE_OK - && (pCur->pPage->nCell<1 || pCur->pPage->intKey!=pCur->curIntKey) - ){ - releasePage(pCur->pPage); - rc = SQLITE_CORRUPT_PGNO(newPgno); + rc = getAndInitPage(pBt, newPgno, &pCur->pPage, pCur->curPagerFlags); + if( rc==SQLITE_OK ){ + assert( pCur->pPage!=0 ); + setMempageRoot(pCur->pPage, pCur->pgnoRoot); + if( pCur->pPage->nCell<1 || pCur->pPage->intKey!=pCur->curIntKey ){ + releasePage(pCur->pPage); + rc = SQLITE_CORRUPT_PGNO(newPgno); + } } if( rc ){ pCur->pPage = pCur->apPage[--pCur->iPage]; @@ -5577,6 +6074,7 @@ static int moveToRoot(BtCursor *pCur){ pCur->eState = CURSOR_INVALID; return rc; } + setMempageRoot(pCur->pPage, pCur->pgnoRoot); pCur->iPage = 0; pCur->curIntKey = pCur->pPage->intKey; } @@ -6242,6 +6740,7 @@ int sqlite3BtreeIndexMoveto( pCur->pPage = pCur->apPage[--pCur->iPage]; break; } + setMempageRoot(pCur->pPage, pCur->pgnoRoot); /* ***** End of in-lined moveToChild() call */ } @@ -6512,7 +7011,7 @@ static int allocateBtreePage( Pgno mxPage; /* Total size of the database file */ assert( sqlite3_mutex_held(pBt->mutex) ); - assert( eMode==BTALLOC_ANY || (nearby>0 && IfNotOmitAV(pBt->autoVacuum)) ); + assert( eMode==BTALLOC_ANY || (nearby>0 && REQUIRE_PTRMAP ) ); pPage1 = pBt->pPage1; mxPage = btreePagecount(pBt); /* EVIDENCE-OF: R-21003-45125 The 4-byte big-endian integer at offset 36 @@ -6522,6 +7021,15 @@ static int allocateBtreePage( if( n>=mxPage ){ return SQLITE_CORRUPT_BKPT; } + + /* Ensure page 1 is writable. This function will either change the number + ** of pages in the free-list or the size of the database file. Since both + ** of these operations involve modifying page 1 header fields, page 1 + ** will definitely be written by this transaction. If this is an CONCURRENT + ** transaction, ensure the BtreePtrmap structure has been allocated. */ + rc = sqlite3PagerWrite(pPage1->pDbPage); + if( rc ) return rc; + if( n>0 ){ /* There are pages on the freelist. Reuse one of those pages. */ Pgno iTrunk; @@ -6532,28 +7040,29 @@ static int allocateBtreePage( ** shows that the page 'nearby' is somewhere on the free-list, then ** the entire-list will be searched for that page. */ -#ifndef SQLITE_OMIT_AUTOVACUUM if( eMode==BTALLOC_EXACT ){ - if( nearby<=mxPage ){ - u8 eType; - assert( nearby>0 ); - assert( pBt->autoVacuum ); - rc = ptrmapGet(pBt, nearby, &eType, 0); - if( rc ) return rc; - if( eType==PTRMAP_FREEPAGE ){ - searchList = 1; + assert( ISAUTOVACUUM(pBt)!=ISCONCURRENT ); + if( ISAUTOVACUUM(pBt) ){ + if( nearby<=mxPage ){ + u8 eType; + assert( nearby>0 ); + assert( pBt->autoVacuum ); + rc = ptrmapGet(pBt, nearby, &eType, 0); + if( rc ) return rc; + if( eType==PTRMAP_FREEPAGE ){ + searchList = 1; + } } + }else{ + searchList = 1; } }else if( eMode==BTALLOC_LE ){ searchList = 1; } -#endif /* Decrement the free-list count by 1. Set iTrunk to the index of the ** first free-list trunk page. iPrevTrunk is initially 1. */ - rc = sqlite3PagerWrite(pPage1->pDbPage); - if( rc ) return rc; put4byte(&pPage1->aData[36], n-1); /* The code within this loop is run only once if the 'searchList' variable @@ -6861,7 +7370,7 @@ static int freePage2(BtShared *pBt, MemPage *pMemPage, Pgno iPage){ /* If the database supports auto-vacuum, write an entry in the pointer-map ** to indicate that the page is free. */ - if( ISAUTOVACUUM(pBt) ){ + if( REQUIRE_PTRMAP ){ ptrmapPut(pBt, iPage, PTRMAP_FREEPAGE, 0, &rc); if( rc ) goto freepage_out; } @@ -7195,7 +7704,7 @@ static int fillInCell( } #endif rc = allocateBtreePage(pBt, &pOvfl, &pgnoOvfl, pgnoOvfl, 0); -#ifndef SQLITE_OMIT_AUTOVACUUM + /* If the database supports auto-vacuum, and the second or subsequent ** overflow page is being allocated, add an entry to the pointer-map ** for that page now. @@ -7206,14 +7715,13 @@ static int fillInCell( ** may misinterpret the uninitialized values and delete the ** wrong pages from the database. */ - if( pBt->autoVacuum && rc==SQLITE_OK ){ + if( REQUIRE_PTRMAP && rc==SQLITE_OK ){ u8 eType = (pgnoPtrmap?PTRMAP_OVERFLOW2:PTRMAP_OVERFLOW1); ptrmapPut(pBt, pgnoOvfl, eType, pgnoPtrmap, &rc); if( rc ){ releasePage(pOvfl); } } -#endif if( rc ){ releasePage(pToRelease); return rc; @@ -7357,6 +7865,7 @@ static int insertCell( assert( j==0 || pPage->aiOvfl[j-1]<(u16)i ); /* Overflows in sorted order */ assert( j==0 || i==pPage->aiOvfl[j-1]+1 ); /* Overflows are sequential */ }else{ + BtShared *pBt = pPage->pBt; int rc = sqlite3PagerWrite(pPage->pDbPage); if( NEVER(rc!=SQLITE_OK) ){ return rc; @@ -7387,7 +7896,7 @@ static int insertCell( if( (++data[pPage->hdrOffset+4])==0 ) data[pPage->hdrOffset+3]++; assert( get2byte(&data[pPage->hdrOffset+3])==pPage->nCell || CORRUPT_DB ); #ifndef SQLITE_OMIT_AUTOVACUUM - if( pPage->pBt->autoVacuum ){ + if( REQUIRE_PTRMAP ){ int rc2 = SQLITE_OK; /* The cell may contain a pointer to an overflow page. If so, write ** the entry for the overflow page into the pointer map. @@ -7447,6 +7956,7 @@ static int insertCellFast( assert( j==0 || pPage->aiOvfl[j-1]<(u16)i ); /* Overflows in sorted order */ assert( j==0 || i==pPage->aiOvfl[j-1]+1 ); /* Overflows are sequential */ }else{ + BtShared *pBt = pPage->pBt; int rc = sqlite3PagerWrite(pPage->pDbPage); if( rc!=SQLITE_OK ){ return rc; @@ -7460,7 +7970,7 @@ static int insertCellFast( ** if it returns successfully */ assert( idx >= 0 ); assert( idx >= pPage->cellOffset+2*pPage->nCell+2 || CORRUPT_DB ); - assert( idx+sz <= (int)pPage->pBt->usableSize ); + assert( idx+sz <= (int)pBt->usableSize ); pPage->nFree -= (u16)(2 + sz); memcpy(&data[idx], pCell, sz); pIns = pPage->aCellIdx + i*2; @@ -7470,8 +7980,7 @@ static int insertCellFast( /* increment the cell count */ if( (++data[pPage->hdrOffset+4])==0 ) data[pPage->hdrOffset+3]++; assert( get2byte(&data[pPage->hdrOffset+3])==pPage->nCell || CORRUPT_DB ); -#ifndef SQLITE_OMIT_AUTOVACUUM - if( pPage->pBt->autoVacuum ){ + if( REQUIRE_PTRMAP ){ int rc2 = SQLITE_OK; /* The cell may contain a pointer to an overflow page. If so, write ** the entry for the overflow page into the pointer map. @@ -7479,7 +7988,6 @@ static int insertCellFast( ptrmapPutOvflPtr(pPage, pPage, pCell, &rc2); if( rc2 ) return rc2; } -#endif } return SQLITE_OK; } @@ -8043,7 +8551,7 @@ static int balance_quick(MemPage *pParent, MemPage *pPage, u8 *pSpace){ ** be marked as dirty. Returning an error code will cause a ** rollback, undoing any changes made to the parent page. */ - if( ISAUTOVACUUM(pBt) ){ + if( REQUIRE_PTRMAP ){ ptrmapPut(pBt, pgnoNew, PTRMAP_BTREE, pParent->pgno, &rc); if( szCell>pNew->minLocal ){ ptrmapPutOvflPtr(pNew, pNew, pCell, &rc); @@ -8181,7 +8689,7 @@ static void copyNodeContent(MemPage *pFrom, MemPage *pTo, int *pRC){ /* If this is an auto-vacuum database, update the pointer-map entries ** for any b-tree or overflow pages that pTo now contains the pointers to. */ - if( ISAUTOVACUUM(pBt) ){ + if( REQUIRE_PTRMAP ){ *pRC = setChildPtrmaps(pTo); } } @@ -8232,7 +8740,8 @@ static int balance_nonroot( int iParentIdx, /* Index of "the page" in pParent */ u8 *aOvflSpace, /* page-size bytes of space for parent ovfl */ int isRoot, /* True if pParent is a root-page */ - int bBulk /* True if this call is part of a bulk load */ + int bBulk, /* True if this call is part of a bulk load */ + Pgno pgnoRoot /* Root page of b-tree being balanced */ ){ BtShared *pBt; /* The whole database */ int nMaxCells = 0; /* Allocated size of apCell, szCell, aFrom. */ @@ -8329,6 +8838,7 @@ static int balance_nonroot( goto balance_cleanup; } } + setMempageRoot(apOld[i], pgnoRoot); nMaxCells += apOld[i]->nCell + ArraySize(pParent->apOvfl); if( (i--)==0 ) break; @@ -8671,7 +9181,7 @@ static int balance_nonroot( cntOld[i] = b.nCell; /* Set the pointer-map entry for the new sibling page. */ - if( ISAUTOVACUUM(pBt) ){ + if( REQUIRE_PTRMAP ){ ptrmapPut(pBt, pNew->pgno, PTRMAP_BTREE, pParent->pgno, &rc); if( rc!=SQLITE_OK ){ goto balance_cleanup; @@ -8769,7 +9279,7 @@ static int balance_nonroot( ** updated. This happens below, after the sibling pages have been ** populated, not here. */ - if( ISAUTOVACUUM(pBt) ){ + if( REQUIRE_PTRMAP ){ MemPage *pOld; MemPage *pNew = pOld = apNew[0]; int cntOldNext = pNew->nCell + pNew->nOverflow; @@ -8965,7 +9475,7 @@ static int balance_nonroot( ); copyNodeContent(apNew[0], pParent, &rc); freePage(apNew[0], &rc); - }else if( ISAUTOVACUUM(pBt) && !leafCorrection ){ + }else if( REQUIRE_PTRMAP && !leafCorrection ){ /* Fix the pointer map entries associated with the right-child of each ** sibling page. All other pointer map entries have already been taken ** care of. */ @@ -9048,7 +9558,7 @@ static int balance_deeper(MemPage *pRoot, MemPage **ppChild){ if( rc==SQLITE_OK ){ rc = allocateBtreePage(pBt,&pChild,&pgnoChild,pRoot->pgno,0); copyNodeContent(pRoot, pChild, &rc); - if( ISAUTOVACUUM(pBt) ){ + if( REQUIRE_PTRMAP ){ ptrmapPut(pBt, pgnoChild, PTRMAP_BTREE, pRoot->pgno, &rc); } } @@ -9211,7 +9721,7 @@ static int balance(BtCursor *pCur){ */ u8 *pSpace = sqlite3PageMalloc(pCur->pBt->pageSize); rc = balance_nonroot(pParent, iIdx, pSpace, iPage==1, - pCur->hints&BTREE_BULKLOAD); + pCur->hints&BTREE_BULKLOAD, pCur->pgnoRoot); if( pFree ){ /* If pFree is not NULL, it points to the pSpace buffer used ** by a previous call to balance_nonroot(). Its contents are @@ -9319,6 +9829,7 @@ static SQLITE_NOINLINE int btreeOverwriteOverflowCell( do{ rc = btreeGetPage(pBt, ovflPgno, &pPage, 0); if( rc ) return rc; + setMempageRoot(pPage, pCur->pgnoRoot); if( sqlite3PagerPageRefcount(pPage->pDbPage)!=1 || pPage->isInit ){ rc = SQLITE_CORRUPT_PAGE(pPage); }else{ @@ -9592,6 +10103,7 @@ int sqlite3BtreeInsert( pCur->info.nSize = 0; if( loc==0 ){ CellInfo info; + BtShared *pBt = p->pBt; assert( idx>=0 ); if( idx>=pPage->nCell ){ return SQLITE_CORRUPT_PAGE(pPage); @@ -9608,7 +10120,7 @@ int sqlite3BtreeInsert( testcase( pCur->curFlags & BTCF_ValidOvfl ); invalidateOverflowCache(pCur); if( info.nSize==szNew && info.nLocal==info.nPayload - && (!ISAUTOVACUUM(p->pBt) || szNewminLocal) + && (!REQUIRE_PTRMAP || szNewminLocal) ){ /* Overwrite the old cell with the new if they are the same size. ** We could also try to do this if the old cell is smaller, then add @@ -9773,6 +10285,9 @@ int sqlite3BtreeTransferRow(BtCursor *pDest, BtCursor *pSrc, i64 iKey){ pPageIn = 0; rc = sqlite3PagerGet(pSrcPager, ovflIn, &pPageIn, PAGER_GET_READONLY); if( rc==SQLITE_OK ){ + setMempageRoot( + (MemPage*)sqlite3PagerGetExtra(pPageIn), pSrc->pgnoRoot + ); aIn = (const u8*)sqlite3PagerGetData(pPageIn); ovflIn = get4byte(aIn); aIn += 4; @@ -10197,7 +10712,8 @@ static int clearDatabasePage( BtShared *pBt, /* The BTree that contains the table */ Pgno pgno, /* Page number to clear */ int freePageFlag, /* Deallocate page if true */ - i64 *pnChange /* Add number of Cells freed to this counter */ + i64 *pnChange, /* Add number of Cells freed to this counter */ + Pgno pgnoRoot ){ MemPage *pPage; int rc; @@ -10212,6 +10728,7 @@ static int clearDatabasePage( } rc = getAndInitPage(pBt, pgno, &pPage, 0); if( rc ) return rc; + setMempageRoot(pPage, pgnoRoot); if( (pBt->openFlags & BTREE_SINGLE)==0 && sqlite3PagerPageRefcount(pPage->pDbPage) != (1 + (pgno==1)) ){ @@ -10222,14 +10739,16 @@ static int clearDatabasePage( for(i=0; inCell; i++){ pCell = findCell(pPage, i); if( !pPage->leaf ){ - rc = clearDatabasePage(pBt, get4byte(pCell), 1, pnChange); + rc = clearDatabasePage(pBt, get4byte(pCell), 1, pnChange, pgnoRoot); if( rc ) goto cleardatabasepage_out; } BTREE_CLEAR_CELL(rc, pPage, pCell, info); if( rc ) goto cleardatabasepage_out; } if( !pPage->leaf ){ - rc = clearDatabasePage(pBt, get4byte(&pPage->aData[hdr+8]), 1, pnChange); + rc = clearDatabasePage( + pBt, get4byte(&pPage->aData[hdr+8]), 1, pnChange, pgnoRoot + ); if( rc ) goto cleardatabasepage_out; if( pPage->intKey ) pnChange = 0; } @@ -10275,7 +10794,7 @@ int sqlite3BtreeClearTable(Btree *p, int iTable, i64 *pnChange){ if( p->hasIncrblobCur ){ invalidateIncrblobCursors(p, (Pgno)iTable, 0, 1); } - rc = clearDatabasePage(pBt, (Pgno)iTable, 0, pnChange); + rc = clearDatabasePage(pBt, (Pgno)iTable, 0, pnChange, (Pgno)iTable); } sqlite3BtreeLeave(p); return rc; @@ -11231,10 +11750,12 @@ int sqlite3BtreeIntegrityCheck( } pBt->db->flags = savedDbFlags; - /* Make sure every page in the file is referenced - */ + /* Make sure every page in the file is referenced. Skip this if the + ** database is currently being written by a CONCURRENT transaction (it + ** may fail as pages that were part of the free-list when the transaction + ** was opened cannot be counted). */ if( !bPartial ){ - for(i=1; i<=sCheck.nCkPage && sCheck.mxErr; i++){ + for(i=1; ISCONCURRENT==0 && i<=sCheck.nCkPage && sCheck.mxErr; i++){ #ifdef SQLITE_OMIT_AUTOVACUUM if( getPageReferenced(&sCheck, i)==0 ){ checkAppendMsg(&sCheck, "Page %u: never used", i); @@ -11490,7 +12011,7 @@ int sqlite3BtreeSetVersion(Btree *pBtree, int iVersion){ BtShared *pBt = pBtree->pBt; int rc; /* Return code */ - assert( iVersion==1 || iVersion==2 ); + assert( iVersion==1 || iVersion==2 || iVersion==3 ); /* If setting the version fields to 1, do not automatically open the ** WAL connection, even if the version fields are currently set to 2. @@ -11537,6 +12058,109 @@ int sqlite3BtreeIsReadonly(Btree *p){ */ int sqlite3HeaderSizeBtree(void){ return ROUND8(sizeof(MemPage)); } +/* +** This function is called to ensure that all locks required to commit the +** current write-transaction to the database file are held. If the db is +** in rollback mode, this means the EXCLUSIVE lock on the database file. +** +** Or, if this is an CONCURRENT transaction on a wal-mode database, the WRITER +** lock on the wal file. In this case this function also checks that the +** CONCURRENT transaction can be safely committed (does not commit with any +** other transaction committed since it was opened). +** +** SQLITE_OK is returned if successful. SQLITE_BUSY if the required locks +** cannot be obtained due to a conflicting lock. If the locks cannot be +** obtained for an CONCURRENT transaction due to a conflict with an already +** committed transaction, SQLITE_BUSY_SNAPSHOT is returned. Otherwise, if +** some other error (OOM, IO, etc.) occurs, the relevant SQLite error code +** is returned. +*/ +int sqlite3BtreeExclusiveLock(Btree *p){ + sqlite3 *db = p->db; + int rc; + Pgno pgno = 0; + BtShared *pBt = p->pBt; + assert( p->inTrans==TRANS_WRITE && pBt->pPage1 ); + memset(db->aCommit, 0, sizeof(db->aCommit)); + sqlite3BtreeEnter(p); + rc = sqlite3PagerExclusiveLock(pBt->pPager, + (db->eConcurrent==CONCURRENT_SCHEMA) ? 0 : pBt->pPage1->pDbPage, + db->aCommit + ); +#ifdef SQLITE_OMIT_CONCURRENT + assert( db->aCommit[SQLITE_COMMIT_CONFLICT_PGNO]==0 ); +#else + if( (rc==SQLITE_BUSY_SNAPSHOT) + && (pgno = db->aCommit[SQLITE_COMMIT_CONFLICT_PGNO]) + ){ + int iDb; + PgHdr *pPg = 0; + for(iDb=0; db->aDb[iDb].pBt!=p; iDb++); + db->aCommit[SQLITE_COMMIT_CONFLICT_DB] = (u32)iDb; + (void)sqlite3PagerGet(pBt->pPager, pgno, &pPg, 0); + if( pPg ){ + int bWrite = -1; + const char *zObj = 0; + const char *zTab = 0; + char zContent[17]; + + if( pPg ){ + Pgno pgnoRoot = 0; + HashElem *pE; + Schema *pSchema; + u8 *aData = (u8*)sqlite3PagerGetData(pPg); + int i; + for(i=0; i<8; i++){ + static const char hexdigits[] = { + '0', '1', '2', '3', '4', '5', '6', '7', + '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' + }; + zContent[i*2] = hexdigits[(aData[i] >> 4)]; + zContent[i*2+1] = hexdigits[(aData[i] & 0xF)]; + } + zContent[16] = '\0'; + + pgnoRoot = ((MemPage*)sqlite3PagerGetExtra(pPg))->pgnoRoot; + bWrite = sqlite3PagerIswriteable(pPg); + sqlite3PagerUnref(pPg); + + pSchema = sqlite3SchemaGet(p->db, p); + if( pSchema ){ + for(pE=sqliteHashFirst(&pSchema->tblHash); pE; pE=sqliteHashNext(pE)){ + Table *pTab = (Table *)sqliteHashData(pE); + if( pTab->tnum==pgnoRoot ){ + zObj = pTab->zName; + zTab = 0; + }else{ + Index *pIdx; + for(pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext){ + if( pIdx->tnum==pgnoRoot ){ + zObj = pIdx->zName; + zTab = pTab->zName; + } + } + } + } + } + } + + sqlite3_log(SQLITE_OK, + "cannot commit CONCURRENT transaction " + "- conflict at page %d " + "(%s page; part of db %s %s%s%s; content=%s...)", + (int)pgno, + (bWrite==0?"read-only":(bWrite>0?"read/write":"unknown")), + (zTab ? "index" : "table"), + (zTab ? zTab : ""), (zTab ? "." : ""), (zObj ? zObj : "UNKNOWN"), + zContent + ); + } + } +#endif + sqlite3BtreeLeave(p); + return rc; +} + /* ** If no transaction is active and the database is not a temp-db, clear ** the in-memory pager cache. @@ -11566,3 +12190,37 @@ int sqlite3BtreeConnectionCount(Btree *p){ return p->pBt->nRef; } #endif + +/* +** Access details of recent COMMIT commands. This function allows various +** details related to the most recent COMMIT command to be accessed. +** The requested value is always returned via output parameter (*piVal). +** The specific value requested is identified by parameter op (see +** below). +** +** SQLITE_OK is returned if successful, or SQLITE_ERROR if the "op" or +** "zDb" paramters are unrecognized. +*/ +int sqlite3_commit_status( + sqlite3 *db, /* Database handle */ + const char *zDb, /* Name of database - "main" etc. */ + int op, /* SQLITE_COMMIT_XXX constant */ + unsigned int *piVal /* OUT: Write requested value here */ +){ + int rc = SQLITE_OK; +#ifndef SQLITE_OMIT_CONCURRENT + if( op<0 || op>SQLITE_COMMIT_CONFLICT_PGNO ){ + rc = SQLITE_ERROR; + }else if( op==SQLITE_COMMIT_FIRSTFRAME || op==SQLITE_COMMIT_NFRAME ){ + int iDb = sqlite3FindDbName(db, zDb); + if( iDb<0 ){ + rc = SQLITE_ERROR; + }else{ + *piVal = db->aDb[iDb].pBt->aCommit[op]; + } + }else{ + *piVal = db->aCommit[op]; + } +#endif + return rc; +} diff --git a/src/btree.h b/src/btree.h index 96f4c4c607..7bf113c892 100644 --- a/src/btree.h +++ b/src/btree.h @@ -361,6 +361,8 @@ sqlite3_uint64 sqlite3BtreeSeekCount(Btree*); # define sqlite3BtreeSeekCount(X) 0 #endif +int sqlite3BtreeExclusiveLock(Btree *pBt); + #ifndef NDEBUG int sqlite3BtreeCursorIsValid(BtCursor*); #endif @@ -421,5 +423,4 @@ void sqlite3BtreeClearCache(Btree*); # define sqlite3SchemaMutexHeld(X,Y,Z) 1 #endif - #endif /* SQLITE_BTREE_H */ diff --git a/src/btreeInt.h b/src/btreeInt.h index 17e3a1add5..431b1e2c09 100644 --- a/src/btreeInt.h +++ b/src/btreeInt.h @@ -232,6 +232,7 @@ typedef struct MemPage MemPage; typedef struct BtLock BtLock; typedef struct CellInfo CellInfo; +typedef struct BtreePtrmap BtreePtrmap; /* ** This is a magic string that appears at the beginning of every @@ -275,6 +276,9 @@ struct MemPage { u8 intKey; /* True if table b-trees. False for index b-trees */ u8 intKeyLeaf; /* True if the leaf of an intKey table */ Pgno pgno; /* Page number for this page */ +#ifndef SQLITE_OMIT_CONCURRENT + Pgno pgnoRoot; /* Root page of b-tree that this page belongs to */ +#endif /* Only the first 8 bytes (above) are zeroed by pager.c when a new page ** is allocated. All fields that follow must be initialized before use */ u8 leaf; /* True if a leaf page */ @@ -360,6 +364,12 @@ struct Btree { #ifndef SQLITE_OMIT_SHARED_CACHE BtLock lock; /* Object used to lock page 1 */ #endif +#ifndef SQLITE_OMIT_CONCURRENT + /* Return values for sqlite3_commit_status() requests: + ** SQLITE_COMMIT_FIRSTFRAME, COMMIT_NFRAME. + */ + u32 aCommit[2]; +#endif }; /* @@ -456,6 +466,9 @@ struct BtShared { Btree *pWriter; /* Btree with currently open write transaction */ #endif u8 *pTmpSpace; /* Temp space sufficient to hold a single cell */ +#ifndef SQLITE_OMIT_CONCURRENT + BtreePtrmap *pMap; +#endif int nPreformatSize; /* Size of last cell written by TransferRow() */ }; @@ -679,12 +692,19 @@ struct BtCursor { ** (sqliteMallocRaw), it is not possible to use conditional compilation. ** So, this macro is defined instead. */ -#ifndef SQLITE_OMIT_AUTOVACUUM +#ifdef SQLITE_OMIT_AUTOVACUUM +#define ISAUTOVACUUM(pBt) 0 +#else #define ISAUTOVACUUM(pBt) (pBt->autoVacuum) +#endif + +#ifdef SQLITE_OMIT_CONCURRENT +# define ISCONCURRENT 0 #else -#define ISAUTOVACUUM(pBt) 0 +# define ISCONCURRENT (pBt->pMap!=0) #endif +#define REQUIRE_PTRMAP (ISAUTOVACUUM(pBt) || ISCONCURRENT) /* ** This structure is passed around through all the PRAGMA integrity_check diff --git a/src/build.c b/src/build.c index 9af0a1635c..c4d36f03ce 100644 --- a/src/build.c +++ b/src/build.c @@ -5249,7 +5249,7 @@ void sqlite3BeginTransaction(Parse *pParse, int type){ } v = sqlite3GetVdbe(pParse); if( !v ) return; - if( type!=TK_DEFERRED ){ + if( type==TK_IMMEDIATE || type==TK_EXCLUSIVE ){ for(i=0; inDb; i++){ int eTxnType; Btree *pBt = db->aDb[i].pBt; @@ -5264,7 +5264,7 @@ void sqlite3BeginTransaction(Parse *pParse, int type){ sqlite3VdbeUsesBtree(v, i); } } - sqlite3VdbeAddOp0(v, OP_AutoCommit); + sqlite3VdbeAddOp3(v, OP_AutoCommit, 0, 0, (type==TK_CONCURRENT)); } /* diff --git a/src/callback.c b/src/callback.c index e6418097f6..98d58a1bbe 100644 --- a/src/callback.c +++ b/src/callback.c @@ -521,6 +521,10 @@ void sqlite3SchemaClear(void *p){ pSchema->iGeneration++; } pSchema->schemaFlags &= ~(DB_SchemaLoaded|DB_ResetWanted); +#ifdef SQLITE_ENABLE_STAT4 + sqlite3_free(pSchema->pStat4Space); + pSchema->pStat4Space = 0; +#endif } /* diff --git a/src/func.c b/src/func.c index 6dac7195a4..7afb9fcf38 100644 --- a/src/func.c +++ b/src/func.c @@ -559,8 +559,9 @@ static void randomFunc( sqlite3_value **NotUsed2 ){ sqlite_int64 r; + sqlite3 *db = sqlite3_context_db_handle(context); UNUSED_PARAMETER2(NotUsed, NotUsed2); - sqlite3_randomness(sizeof(r), &r); + sqlite3FastRandomness(&db->sPrng, sizeof(r), &r); if( r<0 ){ /* We need to prevent a random number of 0x8000000000000000 ** (or -9223372036854775808) since when you do abs() of that @@ -586,6 +587,7 @@ static void randomBlob( ){ sqlite3_int64 n; unsigned char *p; + sqlite3 *db = sqlite3_context_db_handle(context); assert( argc==1 ); UNUSED_PARAMETER(argc); n = sqlite3_value_int64(argv[0]); @@ -594,7 +596,7 @@ static void randomBlob( } p = contextMalloc(context, n); if( p ){ - sqlite3_randomness(n, p); + sqlite3FastRandomness(&db->sPrng, n, p); sqlite3_result_blob(context, (char*)p, n, sqlite3_free); } } diff --git a/src/main.c b/src/main.c index 8a2b43b2f9..04b781e4d6 100644 --- a/src/main.c +++ b/src/main.c @@ -3399,7 +3399,7 @@ static int openDatabase( db->aDb = db->aDbStatic; db->lookaside.bDisable = 1; db->lookaside.sz = 0; - + sqlite3FastPrngInit(&db->sPrng); assert( sizeof(db->aLimit)==sizeof(aHardLimit) ); memcpy(db->aLimit, aHardLimit, sizeof(db->aLimit)); db->aLimit[SQLITE_LIMIT_WORKER_THREADS] = SQLITE_DEFAULT_WORKER_THREADS; @@ -5121,6 +5121,35 @@ void sqlite3_snapshot_free(sqlite3_snapshot *pSnapshot){ } #endif /* SQLITE_ENABLE_SNAPSHOT */ +SQLITE_EXPERIMENTAL int sqlite3_wal_info( + sqlite3 *db, const char *zDb, + unsigned int *pnPrior, unsigned int *pnFrame +){ + int rc = SQLITE_OK; + +#ifndef SQLITE_OMIT_WAL + Btree *pBt; + int iDb; + +#ifdef SQLITE_ENABLE_API_ARMOR + if( !sqlite3SafetyCheckOk(db) ){ + return SQLITE_MISUSE_BKPT; + } +#endif + + sqlite3_mutex_enter(db->mutex); + iDb = sqlite3FindDbName(db, zDb); + if( iDb<0 ){ + return SQLITE_ERROR; + } + pBt = db->aDb[iDb].pBt; + rc = sqlite3PagerWalInfo(sqlite3BtreePager(pBt), pnPrior, pnFrame); + sqlite3_mutex_leave(db->mutex); +#endif /* SQLITE_OMIT_WAL */ + + return rc; +} + #ifndef SQLITE_OMIT_COMPILEOPTION_DIAGS /* ** Given the name of a compile-time option, return true if that option diff --git a/src/os_unix.c b/src/os_unix.c index d73d899241..394339b69f 100644 --- a/src/os_unix.c +++ b/src/os_unix.c @@ -46,6 +46,16 @@ #include "sqliteInt.h" #if SQLITE_OS_UNIX /* This file is used on unix only */ +/* Turn this feature on in all builds for now */ +#define SQLITE_MUTEXFREE_SHMLOCK 1 +#define SQLITE_MFS_EXCLUSIVE 255 +#ifndef SQLITE_MFS_NSHARD +# define SQLITE_MFS_NSHARD 8 +#endif +#if SQLITE_MFS_NSHARD<1 +# error "SQLITE_MFS_NSHARD must be greater than 0" +#endif + /* ** There are various methods for file locking used for concurrency ** control: @@ -1339,6 +1349,10 @@ struct unixInodeInfo { sem_t *pSem; /* Named POSIX semaphore */ char aSemName[MAX_PATHNAME+2]; /* Name of that semaphore */ #endif +#ifdef SQLITE_SHARED_MAPPING + sqlite3_int64 nSharedMapping; /* Size of mapped region in bytes */ + void *pSharedMapping; /* Memory mapped region */ +#endif }; /* @@ -1495,6 +1509,13 @@ static void releaseInodeInfo(unixFile *pFile){ pInode->nRef--; if( pInode->nRef==0 ){ assert( pInode->pShmNode==0 ); +#ifdef SQLITE_SHARED_MAPPING + if( pInode->pSharedMapping ){ + osMunmap(pInode->pSharedMapping, pInode->nSharedMapping); + pInode->pSharedMapping = 0; + pInode->nSharedMapping = 0; + } +#endif sqlite3_mutex_enter(pInode->pLockMutex); closePendingFds(pFile); sqlite3_mutex_leave(pInode->pLockMutex); @@ -2408,6 +2429,14 @@ static int nolockUnlock(sqlite3_file *NotUsed, int NotUsed2){ ** Close the file. */ static int nolockClose(sqlite3_file *id) { +#ifdef SQLITE_SHARED_MAPPING + unixFile *pFd = (unixFile*)id; + if( pFd->pInode ){ + unixEnterMutex(); + releaseInodeInfo(pFd); + unixLeaveMutex(); + } +#endif return closeUnixFile(id); } @@ -4242,6 +4271,9 @@ static int unixFileControl(sqlite3_file *id, int op, void *pArg){ *(i64*)pArg = pFile->mmapSizeMax; if( newLimit>=0 && newLimit!=pFile->mmapSizeMax && pFile->nFetchOut==0 ){ pFile->mmapSizeMax = newLimit; +#ifdef SQLITE_SHARED_MAPPING + if( pFile->pInode==0 ) +#endif if( pFile->mmapSize>0 ){ unixUnmapfile(pFile); rc = unixMapfile(pFile, -1); @@ -4572,8 +4604,41 @@ struct unixShmNode { #ifdef SQLITE_DEBUG u8 nextShmId; /* Next available unixShm.id value */ #endif + +#ifdef SQLITE_MUTEXFREE_SHMLOCK + /* In unix-excl mode, if SQLITE_MUTEXFREE_SHMLOCK is defined, all locks + ** are stored in the following 64-bit value. There are in total 8 + ** shm-locking slots, each of which are assigned 8-bits from the 64-bit + ** value. The least-significant 8 bits correspond to shm-locking slot + ** 0, and so on. + ** + ** If the 8-bits corresponding to a shm-locking locking slot are set to + ** 0xFF, then a write-lock is held on the slot. Or, if they are set to + ** a non-zero value smaller than 0xFF, then they represent the total + ** number of read-locks held on the slot. There is no way to distinguish + ** between a write-lock and 255 read-locks. */ + struct LockingSlot { + u32 nLock; + u64 aPadding[7]; + } aMFSlot[3 + SQLITE_MFS_NSHARD*5]; +#endif }; +/* +** Atomic CAS primitive used in multi-process mode. Equivalent to: +** +** int unixCompareAndSwap(u32 *ptr, u32 oldval, u32 newval){ +** if( *ptr==oldval ){ +** *ptr = newval; +** return 1; +** } +** return 0; +** } +*/ +#define unixCompareAndSwap(ptr,oldval,newval) \ + __sync_bool_compare_and_swap(ptr,oldval,newval) + + /* ** Structure used internally by this VFS to record the state of an ** open shared memory connection. @@ -4594,6 +4659,9 @@ struct unixShm { u8 id; /* Id of this connection within its unixShmNode */ u16 sharedMask; /* Mask of shared locks held */ u16 exclMask; /* Mask of exclusive locks held */ +#ifdef SQLITE_MUTEXFREE_SHMLOCK + u8 aMFCurrent[8]; /* Current slot used for each shared lock */ +#endif }; /* @@ -5238,6 +5306,87 @@ static int unixShmMap( return rc; } +#ifdef SQLITE_MUTEXFREE_SHMLOCK +static int unixMutexFreeShmlock( + unixFile *pFd, /* Database file holding the shared memory */ + int ofst, /* First lock to acquire or release */ + int n, /* Number of locks to acquire or release */ + int flags /* What to do with the lock */ +){ + struct LockMapEntry { + int iFirst; + int nSlot; + } aMap[9] = { + { 0, 1 }, + { 1, 1 }, + { 2, 1 }, + { 3+0*SQLITE_MFS_NSHARD, SQLITE_MFS_NSHARD }, + { 3+1*SQLITE_MFS_NSHARD, SQLITE_MFS_NSHARD }, + { 3+2*SQLITE_MFS_NSHARD, SQLITE_MFS_NSHARD }, + { 3+3*SQLITE_MFS_NSHARD, SQLITE_MFS_NSHARD }, + { 3+4*SQLITE_MFS_NSHARD, SQLITE_MFS_NSHARD }, + { 3+5*SQLITE_MFS_NSHARD, 0 }, + }; + + unixShm *p = pFd->pShm; /* The shared memory being locked */ + unixShmNode *pShmNode = p->pShmNode; /* The underlying file iNode */ + + if( flags & SQLITE_SHM_SHARED ){ + /* SHARED locks */ + u32 iOld, iNew, *ptr; + int iIncr = -1; + if( (flags & SQLITE_SHM_UNLOCK)==0 ){ + p->aMFCurrent[ofst] = (p->aMFCurrent[ofst] + 1) % aMap[ofst].nSlot; + iIncr = 1; + } + ptr = &pShmNode->aMFSlot[aMap[ofst].iFirst + p->aMFCurrent[ofst]].nLock; + do { + iOld = *ptr; + iNew = iOld + iIncr; + if( iNew>SQLITE_MFS_EXCLUSIVE ){ + return SQLITE_BUSY; + } + }while( 0==unixCompareAndSwap(ptr, iOld, iNew) ); + }else{ + /* EXCLUSIVE locks */ + u16 mask = (1<<(ofst+n)) - (1<exclMask) ){ + int iFirst = aMap[ofst].iFirst; + int iLast = aMap[ofst+n].iFirst; + int i; + for(i=iFirst; iaMFSlot[i].nLock; + if( flags & SQLITE_SHM_UNLOCK ){ + assert( (*ptr)==SQLITE_MFS_EXCLUSIVE ); + *ptr = 0; + }else{ + u32 iOld; + do { + iOld = *ptr; + if( iOld>0 ){ + while( i>iFirst ){ + i--; + pShmNode->aMFSlot[i].nLock = 0; + } + return SQLITE_BUSY; + } + }while( 0==unixCompareAndSwap(ptr, iOld, SQLITE_MFS_EXCLUSIVE) ); + } + } + if( flags & SQLITE_SHM_UNLOCK ){ + p->exclMask &= ~mask; + }else{ + p->exclMask |= mask; + } + } + } + + return SQLITE_OK; +} +#else +# define unixMutexFreeShmlock(a,b,c,d) SQLITE_OK +#endif + /* ** Check that the pShmNode->aLock[] array comports with the locking bitmasks ** held by each client. Return true if it does, or false otherwise. This @@ -5312,6 +5461,11 @@ static int unixShmLock( assert( pShmNode->hShm>=0 || pDbFd->pInode->bProcessLock==1 ); assert( pShmNode->hShm<0 || pDbFd->pInode->bProcessLock==0 ); + if( pDbFd->pInode->bProcessLock ){ + return unixMutexFreeShmlock(pDbFd, ofst, n, flags); + } + + /* Check that, if this to be a blocking lock, no locks that occur later ** in the following list than the lock being obtained are already held: ** @@ -5485,12 +5639,16 @@ static void unixShmBarrier( sqlite3_file *fd /* Database file holding the shared memory */ ){ UNUSED_PARAMETER(fd); +#ifdef SQLITE_MUTEXFREE_SHMLOCK + __sync_synchronize(); +#else sqlite3MemoryBarrier(); /* compiler-defined memory barrier */ assert( fd->pMethods->xLock==nolockLock || unixFileMutexNotheld((unixFile*)fd) ); unixEnterMutex(); /* Also mutex, for redundancy */ unixLeaveMutex(); +#endif } /* @@ -5559,6 +5717,9 @@ static int unixShmUnmap( */ static void unixUnmapfile(unixFile *pFd){ assert( pFd->nFetchOut==0 ); +#ifdef SQLITE_SHARED_MAPPING + if( pFd->pInode ) return; +#endif if( pFd->pMapRegion ){ osMunmap(pFd->pMapRegion, pFd->mmapSizeActual); pFd->pMapRegion = 0; @@ -5690,6 +5851,28 @@ static int unixMapfile(unixFile *pFd, i64 nMap){ nMap = pFd->mmapSizeMax; } +#ifdef SQLITE_SHARED_MAPPING + if( pFd->pInode ){ + unixInodeInfo *pInode = pFd->pInode; + if( pFd->pMapRegion ) return SQLITE_OK; + unixEnterMutex(); + if( pInode->pSharedMapping==0 ){ + u8 *pNew = osMmap(0, nMap, PROT_READ, MAP_SHARED, pFd->h, 0); + if( pNew==MAP_FAILED ){ + unixLogError(SQLITE_OK, "mmap", pFd->zPath); + pFd->mmapSizeMax = 0; + }else{ + pInode->pSharedMapping = pNew; + pInode->nSharedMapping = nMap; + } + } + pFd->pMapRegion = pInode->pSharedMapping; + pFd->mmapSizeActual = pFd->mmapSize = pInode->nSharedMapping; + unixLeaveMutex(); + return SQLITE_OK; + } +#endif + assert( nMap>0 || (pFd->mmapSize==0 && pFd->pMapRegion==0) ); if( nMap!=pFd->mmapSize ){ unixRemapfile(pFd, nMap); @@ -6132,6 +6315,9 @@ static int fillInUnixFile( if( pLockingStyle == &posixIoMethods #if defined(__APPLE__) && SQLITE_ENABLE_LOCKING_STYLE || pLockingStyle == &nfsIoMethods +#endif +#ifdef SQLITE_SHARED_MAPPING + || pLockingStyle == &nolockIoMethods #endif ){ unixEnterMutex(); diff --git a/src/pager.c b/src/pager.c index 1e03b87ec0..1259c711b8 100644 --- a/src/pager.c +++ b/src/pager.c @@ -658,6 +658,9 @@ struct Pager { u32 cksumInit; /* Quasi-random value added to every checksum */ u32 nSubRec; /* Number of records written to sub-journal */ Bitvec *pInJournal; /* One bit for each page in the database file */ +#ifndef SQLITE_OMIT_CONCURRENT + Bitvec *pAllRead; /* Pages read within current CONCURRENT trans. */ +#endif sqlite3_file *fd; /* File descriptor for database */ sqlite3_file *jfd; /* File descriptor for main journal */ sqlite3_file *sjfd; /* File descriptor for sub-journal */ @@ -913,7 +916,9 @@ static int assert_pager_state(Pager *p){ if( !pagerUseWal(pPager) ){ assert( p->eLock>=RESERVED_LOCK ); } - assert( pPager->dbSize==pPager->dbOrigSize ); +#ifndef SQLITE_OMIT_CONCURRENT + assert( pPager->dbSize==pPager->dbOrigSize || pPager->pAllRead ); +#endif assert( pPager->dbOrigSize==pPager->dbFileSize ); assert( pPager->dbOrigSize==pPager->dbHintSize ); assert( pPager->setSuper==0 ); @@ -932,6 +937,7 @@ static int assert_pager_state(Pager *p){ assert( isOpen(p->jfd) || p->journalMode==PAGER_JOURNALMODE_OFF || p->journalMode==PAGER_JOURNALMODE_WAL + || p->journalMode==PAGER_JOURNALMODE_WAL2 ); } assert( pPager->dbOrigSize==pPager->dbFileSize ); @@ -946,6 +952,7 @@ static int assert_pager_state(Pager *p){ assert( isOpen(p->jfd) || p->journalMode==PAGER_JOURNALMODE_OFF || p->journalMode==PAGER_JOURNALMODE_WAL + || p->journalMode==PAGER_JOURNALMODE_WAL2 || (sqlite3OsDeviceCharacteristics(p->fd)&SQLITE_IOCAP_BATCH_ATOMIC) ); assert( pPager->dbOrigSize<=pPager->dbHintSize ); @@ -958,6 +965,7 @@ static int assert_pager_state(Pager *p){ assert( isOpen(p->jfd) || p->journalMode==PAGER_JOURNALMODE_OFF || p->journalMode==PAGER_JOURNALMODE_WAL + || p->journalMode==PAGER_JOURNALMODE_WAL2 || (sqlite3OsDeviceCharacteristics(p->fd)&SQLITE_IOCAP_BATCH_ATOMIC) ); break; @@ -1821,6 +1829,53 @@ static int addToSavepointBitvecs(Pager *pPager, Pgno pgno){ return rc; } +#ifndef SQLITE_OMIT_CONCURRENT +/* +** If they are not already, begin recording all pages read from the pager layer +** by the b-tree layer This is used by concurrent transactions. Return +** SQLITE_OK if successful, or an SQLite error code (SQLITE_NOMEM) if an error +** occurs. +*/ +int sqlite3PagerBeginConcurrent(Pager *pPager){ + int rc = SQLITE_OK; + if( pPager->pAllRead==0 ){ + pPager->pAllRead = sqlite3BitvecCreate(pPager->dbSize); + pPager->dbOrigSize = pPager->dbSize; + if( pPager->pAllRead==0 ){ + rc = SQLITE_NOMEM; + } + } + return rc; +} + +/* !defined(SQLITE_OMIT_CONCURRENT) +** +** Stop recording all pages read from the pager layer by the b-tree layer +** and discard any current records. +*/ +void sqlite3PagerEndConcurrent(Pager *pPager){ + sqlite3BitvecDestroy(pPager->pAllRead); + pPager->pAllRead = 0; +} + +/* !defined(SQLITE_OMIT_CONCURRENT) +** +** Return true if the database is in wal mode. False otherwise. +*/ +int sqlite3PagerIsWal(Pager *pPager){ + return pPager->pWal!=0; +} +#endif /* SQLITE_OMIT_CONCURRENT */ + +/* +** Free the Pager.pInJournal and Pager.pAllRead bitvec objects. +*/ +static void pagerFreeBitvecs(Pager *pPager){ + sqlite3BitvecDestroy(pPager->pInJournal); + pPager->pInJournal = 0; + sqlite3PagerEndConcurrent(pPager); +} + /* ** This function is a no-op if the pager is in exclusive mode and not ** in the ERROR state. Otherwise, it switches the pager to PAGER_OPEN @@ -1845,8 +1900,7 @@ static void pager_unlock(Pager *pPager){ || pPager->eState==PAGER_ERROR ); - sqlite3BitvecDestroy(pPager->pInJournal); - pPager->pInJournal = 0; + pagerFreeBitvecs(pPager); releaseAllSavepoints(pPager); if( pagerUseWal(pPager) ){ @@ -2103,6 +2157,7 @@ static int pager_end_transaction(Pager *pPager, int hasSuper, int bCommit){ assert( pPager->journalMode==PAGER_JOURNALMODE_DELETE || pPager->journalMode==PAGER_JOURNALMODE_MEMORY || pPager->journalMode==PAGER_JOURNALMODE_WAL + || pPager->journalMode==PAGER_JOURNALMODE_WAL2 ); sqlite3OsClose(pPager->jfd); if( bDelete ){ @@ -2122,8 +2177,7 @@ static int pager_end_transaction(Pager *pPager, int hasSuper, int bCommit){ } #endif - sqlite3BitvecDestroy(pPager->pInJournal); - pPager->pInJournal = 0; + pagerFreeBitvecs(pPager); pPager->nRec = 0; if( rc==SQLITE_OK ){ if( MEMDB || pagerFlushOnCommit(pPager, bCommit) ){ @@ -3156,8 +3210,24 @@ static int pagerRollbackWal(Pager *pPager){ ** + Reload page content from the database (if refcount>0). */ pPager->dbSize = pPager->dbOrigSize; - rc = sqlite3WalUndo(pPager->pWal, pagerUndoCallback, (void *)pPager); + rc = sqlite3WalUndo(pPager->pWal, pagerUndoCallback, (void *)pPager, +#ifdef SQLITE_OMIT_CONCURRENT + 0 +#else + pPager->pAllRead!=0 +#endif + ); pList = sqlite3PcacheDirtyList(pPager->pPCache); + +#ifndef SQLITE_OMIT_CONCURRENT + /* If this is an CONCURRENT transaction, then page 1 must be reread from + ** the db file, even if it is not dirty. This is because the b-tree layer + ** may have already zeroed the nFree and iTrunk header fields. */ + if( rc==SQLITE_OK && (pList==0 || pList->pgno!=1) && pPager->pAllRead ){ + rc = pagerUndoCallback((void*)pPager, 1); + } +#endif + while( pList && rc==SQLITE_OK ){ PgHdr *pNext = pList->pDirty; rc = pagerUndoCallback((void *)pPager, pList->pgno); @@ -3207,6 +3277,8 @@ static int pagerWalFrames( if( p->pgno<=nTruncate ){ ppNext = &p->pDirty; nList++; + PAGERTRACE(("TO-WAL %d page %d hash(%08x)\n", + PAGERID(pPager), p->pgno, pager_pagehash(p))); } } assert( pList ); @@ -3261,6 +3333,10 @@ static int pagerBeginReadTransaction(Pager *pPager){ if( rc!=SQLITE_OK || changed ){ pager_reset(pPager); if( USEFETCH(pPager) ) sqlite3OsUnfetch(pPager->fd, 0, 0); + assert( pPager->journalMode==PAGER_JOURNALMODE_WAL + || pPager->journalMode==PAGER_JOURNALMODE_WAL2 + ); + pPager->journalMode = sqlite3WalJournalMode(pPager->pWal); } return rc; @@ -3356,9 +3432,9 @@ static int pagerOpenWalIfPresent(Pager *pPager){ rc = sqlite3OsDelete(pPager->pVfs, pPager->zWal, 0); }else{ testcase( sqlite3PcachePagecount(pPager->pPCache)==0 ); - rc = sqlite3PagerOpenWal(pPager, 0); + rc = sqlite3PagerOpenWal(pPager, 0, 0); } - }else if( pPager->journalMode==PAGER_JOURNALMODE_WAL ){ + }else if( pPager->journalMode>=PAGER_JOURNALMODE_WAL ){ pPager->journalMode = PAGER_JOURNALMODE_DELETE; } } @@ -4292,7 +4368,7 @@ static int syncJournal(Pager *pPager, int newHdr){ assert( assert_pager_state(pPager) ); assert( !pagerUseWal(pPager) ); - rc = sqlite3PagerExclusiveLock(pPager); + rc = sqlite3PagerExclusiveLock(pPager, 0, 0); if( rc!=SQLITE_OK ) return rc; if( !pPager->noSync ){ @@ -4643,6 +4719,12 @@ static int pagerStress(void *p, PgHdr *pPg){ pPager->aStat[PAGER_STAT_SPILL]++; pPg->pDirty = 0; if( pagerUseWal(pPager) ){ +#ifndef SQLITE_OMIT_CONCURRENT + /* If the transaction is a "BEGIN CONCURRENT" transaction, the page + ** cannot be flushed to disk. Return early in this case. */ + if( pPager->pAllRead ) return SQLITE_OK; +#endif + /* Write a single frame for this page to the log. */ rc = subjournalPageIfRequired(pPg); if( rc==SQLITE_OK ){ @@ -4879,6 +4961,7 @@ int sqlite3PagerOpen( (u64)nPathname + 8 + 1 + /* Journal filename */ #ifndef SQLITE_OMIT_WAL (u64)nPathname + 4 + 1 + /* WAL filename */ + nPathname + 5 + 1 + /* Second WAL filename */ #endif 3 /* Terminator */ ); @@ -4931,6 +5014,8 @@ int sqlite3PagerOpen( sqlite3FileSuffix3(zFilename, pPager->zWal); pPtr = (u8*)(pPager->zWal + sqlite3Strlen30(pPager->zWal)+1); #endif + memcpy(pPtr, zPathname, nPathname); pPtr += nPathname; + memcpy(pPtr, "-wal2", 5); pPtr += 5 + 1; }else{ pPager->zWal = 0; } @@ -5475,6 +5560,23 @@ static void pagerUnlockIfUnused(Pager *pPager){ } } +#ifndef SQLITE_OMIT_CONCURRENT +/* +** If this pager is currently in a concurrent transaction (pAllRead!=0), +** then set the bit in the pAllRead vector to indicate that the transaction +** read from page pgno. Return SQLITE_OK if successful, or an SQLite error +** code (i.e. SQLITE_NOMEM) if an error occurs. +*/ +int sqlite3PagerUsePage(Pager *pPager, Pgno pgno){ + int rc = SQLITE_OK; + if( pPager->pAllRead && pgno<=pPager->dbOrigSize ){ + PAGERTRACE(("USING page %d\n", pgno)); + rc = sqlite3BitvecSet(pPager->pAllRead, pgno); + } + return rc; +} +#endif + /* ** The page getter methods each try to acquire a reference to a ** page with page number pgno. If the requested reference is @@ -5548,6 +5650,14 @@ static int getPageNormal( assert( assert_pager_state(pPager) ); assert( pPager->hasHeldSharedLock==1 ); + /* If this is an CONCURRENT transaction and the page being read was + ** present in the database file when the transaction was opened, + ** mark it as read in the pAllRead vector. */ + if( (rc = sqlite3PagerUsePage(pPager, pgno))!=SQLITE_OK ){ + pPg = 0; + goto pager_acquire_err; + } + if( pgno==0 ) return SQLITE_CORRUPT_BKPT; pBase = sqlite3PcacheFetch(pPager->pPCache, pgno, 3); if( pBase==0 ){ @@ -5906,11 +6016,14 @@ static int pager_open_journal(Pager *pPager){ ** Begin a write-transaction on the specified pager object. If a ** write-transaction has already been opened, this function is a no-op. ** -** If the exFlag argument is false, then acquire at least a RESERVED -** lock on the database file. If exFlag is true, then acquire at least +** If the exFlag argument is 0, then acquire at least a RESERVED +** lock on the database file. If exFlag is >0, then acquire at least ** an EXCLUSIVE lock. If such a lock is already held, no locking ** functions need be called. ** +** If (exFlag<0) and the database is in WAL mode, do not take any locks. +** The transaction will run in CONCURRENT mode instead. +** ** If the subjInMemory argument is non-zero, then any sub-journal opened ** within this transaction will be opened as an in-memory file. This ** has no effect if the sub-journal is already opened (as it may be when @@ -5928,7 +6041,6 @@ int sqlite3PagerBegin(Pager *pPager, int exFlag, int subjInMemory){ if( pPager->eState==PAGER_READER ){ assert( pPager->pInJournal==0 ); - if( pagerUseWal(pPager) ){ /* If the pager is configured to use locking_mode=exclusive, and an ** exclusive lock on the database is not already held, obtain it now. @@ -5944,9 +6056,10 @@ int sqlite3PagerBegin(Pager *pPager, int exFlag, int subjInMemory){ /* Grab the write lock on the log file. If successful, upgrade to ** PAGER_RESERVED state. Otherwise, return an error code to the caller. ** The busy-handler is not invoked if another connection already - ** holds the write-lock. If possible, the upper layer will call it. - */ - rc = sqlite3WalBeginWriteTransaction(pPager->pWal); + ** holds the write-lock. If possible, the upper layer will call it. */ + if( exFlag>=0 ){ + rc = sqlite3WalBeginWriteTransaction(pPager->pWal); + } }else{ /* Obtain a RESERVED lock on the database file. If the exFlag parameter ** is true, then immediately upgrade this to an EXCLUSIVE lock. The @@ -5954,7 +6067,7 @@ int sqlite3PagerBegin(Pager *pPager, int exFlag, int subjInMemory){ ** lock, but not when obtaining the RESERVED lock. */ rc = pagerLockDb(pPager, RESERVED_LOCK); - if( rc==SQLITE_OK && exFlag ){ + if( rc==SQLITE_OK && exFlag>0 ){ rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK); } } @@ -6254,7 +6367,7 @@ int sqlite3PagerWrite(PgHdr *pPg){ ** to sqlite3PagerWrite(). In other words, return TRUE if it is ok ** to change the content of the page. */ -#ifndef NDEBUG +#if !defined(SQLITE_OMIT_CONCURRENT) || !defined(NDEBUG) int sqlite3PagerIswriteable(DbPage *pPg){ return pPg->flags & PGHDR_WRITEABLE; } @@ -6410,17 +6523,26 @@ int sqlite3PagerSync(Pager *pPager, const char *zSuper){ } /* -** This function may only be called while a write-transaction is active in -** rollback. If the connection is in WAL mode, this call is a no-op. -** Otherwise, if the connection does not already have an EXCLUSIVE lock on -** the database file, an attempt is made to obtain one. +** This function is called to ensure that all locks required to commit the +** current write-transaction to the database file are held. If the db is +** in rollback mode, this means the EXCLUSIVE lock on the database file. ** -** If the EXCLUSIVE lock is already held or the attempt to obtain it is -** successful, or the connection is in WAL mode, SQLITE_OK is returned. -** Otherwise, either SQLITE_BUSY or an SQLITE_IOERR_XXX error code is -** returned. +** Or, if this is a non-CONCURRENT transaction on a wal-mode database, this +** function is a no-op. +** +** If this is an CONCURRENT transaction on a wal-mode database, this function +** attempts to obtain the WRITER lock on the wal file and also checks to +** see that the transaction can be safely committed (does not commit with +** any other transaction committed since it was opened). +** +** If the required locks are already held or successfully obtained and +** the transaction can be committed, SQLITE_OK is returned. If a required lock +** cannot be obtained, SQLITE_BUSY is returned. Or, if the current transaction +** is CONCURRENT and cannot be committed due to a conflict, SQLITE_BUSY_SNAPSHOT +** is returned. Otherwise, if some other error occurs (IO error, OOM etc.), +** and SQLite error code is returned. */ -int sqlite3PagerExclusiveLock(Pager *pPager){ +int sqlite3PagerExclusiveLock(Pager *pPager, PgHdr *pPage1, u32 *aConflict){ int rc = pPager->errCode; assert( assert_pager_state(pPager) ); if( rc==SQLITE_OK ){ @@ -6432,10 +6554,73 @@ int sqlite3PagerExclusiveLock(Pager *pPager){ if( 0==pagerUseWal(pPager) ){ rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK); } +#ifndef SQLITE_OMIT_CONCURRENT + else{ + if( pPager->pAllRead ){ + /* This is an CONCURRENT transaction. Attempt to lock the wal database + ** here. If SQLITE_BUSY (but not SQLITE_BUSY_SNAPSHOT) is returned, + ** invoke the busy-handler and try again for as long as it returns + ** non-zero. */ + do { + rc = sqlite3WalLockForCommit( + pPager->pWal, pPage1, pPager->pAllRead, aConflict + ); + }while( rc==SQLITE_BUSY + && pPager->xBusyHandler(pPager->pBusyHandlerArg) + ); + } + } +#endif /* SQLITE_OMIT_CONCURRENT */ } return rc; } +#ifndef SQLITE_OMIT_CONCURRENT +/* +** This function is called as part of committing an CONCURRENT transaction. +** At this point the wal WRITER lock is held, and all pages in the cache +** except for page 1 are compatible with the snapshot at the head of the +** wal file. +** +** This function updates the in-memory data structures and reloads the +** contents of page 1 so that the client is operating on the snapshot +** at the head of the wal file. +** +** SQLITE_OK is returned if successful, or an SQLite error code otherwise. +*/ +int sqlite3PagerUpgradeSnapshot(Pager *pPager, DbPage *pPage1){ + int rc; + + assert( pPager->pWal && pPager->pAllRead ); + rc = sqlite3WalUpgradeSnapshot(pPager->pWal); + if( rc==SQLITE_OK ){ + rc = readDbPage(pPage1); + } + + return rc; +} + +/* !defined(SQLITE_OMIT_CONCURRENT) +** +** Set the in-memory cache of the database file size to nSz pages. +*/ +void sqlite3PagerSetDbsize(Pager *pPager, Pgno nSz){ + pPager->dbSize = nSz; +} + +/* !defined(SQLITE_OMIT_CONCURRENT) +** +** If this is a WAL mode connection and the WRITER lock is currently held, +** relinquish it. +*/ +void sqlite3PagerDropExclusiveLock(Pager *pPager){ + if( pagerUseWal(pPager) ){ + sqlite3WalEndWriteTransaction(pPager->pWal); + } +} +#endif /* SQLITE_OMIT_CONCURRENT */ + + /* ** Sync the database file for the pager pPager. zSuper points to the name ** of a super-journal file that should be written into the individual @@ -7367,7 +7552,8 @@ int sqlite3PagerSetJournalMode(Pager *pPager, int eMode){ || eMode==PAGER_JOURNALMODE_OFF /* 2 */ || eMode==PAGER_JOURNALMODE_TRUNCATE /* 3 */ || eMode==PAGER_JOURNALMODE_MEMORY /* 4 */ - || eMode==PAGER_JOURNALMODE_WAL /* 5 */ ); + || eMode==PAGER_JOURNALMODE_WAL /* 5 */ + || eMode==PAGER_JOURNALMODE_WAL2 /* 6 */ ); /* This routine is only called from the OP_JournalMode opcode, and ** the logic there will never allow a temporary file to be changed @@ -7401,9 +7587,12 @@ int sqlite3PagerSetJournalMode(Pager *pPager, int eMode){ assert( (PAGER_JOURNALMODE_MEMORY & 5)==4 ); assert( (PAGER_JOURNALMODE_OFF & 5)==0 ); assert( (PAGER_JOURNALMODE_WAL & 5)==5 ); + assert( (PAGER_JOURNALMODE_WAL2 & 5)==4 ); assert( isOpen(pPager->fd) || pPager->exclusiveMode ); - if( !pPager->exclusiveMode && (eOld & 5)==1 && (eMode & 1)==0 ){ + if( !pPager->exclusiveMode && (eOld & 5)==1 && (eMode & 1)==0 + && eMode!=PAGER_JOURNALMODE_WAL2 /* TODO: fix this if possible */ + ){ /* In this case we would like to delete the journal file. If it is ** not possible, then that is not a problem. Deleting the journal file ** here is an optimization only. @@ -7578,7 +7767,7 @@ static int pagerExclusiveLock(Pager *pPager){ ** lock on the database file and use heap-memory to store the wal-index ** in. Otherwise, use the normal shared-memory. */ -static int pagerOpenWal(Pager *pPager){ +static int pagerOpenWal(Pager *pPager, int bWal2){ int rc = SQLITE_OK; assert( pPager->pWal==0 && pPager->tempFile==0 ); @@ -7599,7 +7788,7 @@ static int pagerOpenWal(Pager *pPager){ if( rc==SQLITE_OK ){ rc = sqlite3WalOpen(pPager->pVfs, pPager->fd, pPager->zWal, pPager->exclusiveMode, - pPager->journalSizeLimit, &pPager->pWal + pPager->journalSizeLimit, bWal2, &pPager->pWal ); #ifdef SQLITE_ENABLE_SETLK_TIMEOUT if( rc==SQLITE_OK ){ @@ -7630,6 +7819,7 @@ static int pagerOpenWal(Pager *pPager){ */ int sqlite3PagerOpenWal( Pager *pPager, /* Pager object */ + int bWal2, /* Open in wal2 mode if not already open */ int *pbOpen /* OUT: Set to true if call is a no-op */ ){ int rc = SQLITE_OK; /* Return code */ @@ -7646,9 +7836,9 @@ int sqlite3PagerOpenWal( /* Close any rollback journal previously open */ sqlite3OsClose(pPager->jfd); - rc = pagerOpenWal(pPager); + rc = pagerOpenWal(pPager, bWal2); if( rc==SQLITE_OK ){ - pPager->journalMode = PAGER_JOURNALMODE_WAL; + pPager->journalMode = bWal2?PAGER_JOURNALMODE_WAL2:PAGER_JOURNALMODE_WAL; pPager->eState = PAGER_OPEN; } }else{ @@ -7670,7 +7860,9 @@ int sqlite3PagerOpenWal( int sqlite3PagerCloseWal(Pager *pPager, sqlite3 *db){ int rc = SQLITE_OK; - assert( pPager->journalMode==PAGER_JOURNALMODE_WAL ); + assert( pPager->journalMode==PAGER_JOURNALMODE_WAL + || pPager->journalMode==PAGER_JOURNALMODE_WAL2 + ); /* If the log file is not already open, but does exist in the file-system, ** it may need to be checkpointed before the connection can switch to @@ -7685,7 +7877,7 @@ int sqlite3PagerCloseWal(Pager *pPager, sqlite3 *db){ ); } if( rc==SQLITE_OK && logexists ){ - rc = pagerOpenWal(pPager); + rc = pagerOpenWal(pPager, 0); } } @@ -7809,6 +8001,11 @@ void sqlite3PagerSnapshotUnlock(Pager *pPager){ } #endif /* SQLITE_ENABLE_SNAPSHOT */ + +int sqlite3PagerWalInfo(Pager *pPager, u32 *pnPrior, u32 *pnFrame){ + return sqlite3WalInfo(pPager->pWal, pnPrior, pnFrame); +} + #endif /* !SQLITE_OMIT_WAL */ #ifdef SQLITE_ENABLE_ZIPVFS diff --git a/src/pager.h b/src/pager.h index 9b2cfc0bcf..5828c37f86 100644 --- a/src/pager.h +++ b/src/pager.h @@ -82,8 +82,9 @@ typedef struct PgHdr DbPage; #define PAGER_JOURNALMODE_TRUNCATE 3 /* Commit by truncating journal */ #define PAGER_JOURNALMODE_MEMORY 4 /* In-memory journal file */ #define PAGER_JOURNALMODE_WAL 5 /* Use write-ahead logging */ +#define PAGER_JOURNALMODE_WAL2 6 /* Use write-ahead logging mode 2 */ -#define isWalMode(x) ((x)==PAGER_JOURNALMODE_WAL) +#define isWalMode(x) ((x)==PAGER_JOURNALMODE_WAL || (x)==PAGER_JOURNALMODE_WAL2) /* ** The argument to this macro is a file descriptor (type sqlite3_file*). @@ -179,7 +180,7 @@ void *sqlite3PagerGetExtra(DbPage *); void sqlite3PagerPagecount(Pager*, int*); int sqlite3PagerBegin(Pager*, int exFlag, int); int sqlite3PagerCommitPhaseOne(Pager*,const char *zSuper, int); -int sqlite3PagerExclusiveLock(Pager*); +int sqlite3PagerExclusiveLock(Pager*, DbPage *pPage1, u32*); int sqlite3PagerSync(Pager *pPager, const char *zSuper); int sqlite3PagerCommitPhaseTwo(Pager*); int sqlite3PagerRollback(Pager*); @@ -187,11 +188,12 @@ int sqlite3PagerOpenSavepoint(Pager *pPager, int n); int sqlite3PagerSavepoint(Pager *pPager, int op, int iSavepoint); int sqlite3PagerSharedLock(Pager *pPager); + #ifndef SQLITE_OMIT_WAL int sqlite3PagerCheckpoint(Pager *pPager, sqlite3*, int, int*, int*); int sqlite3PagerWalSupported(Pager *pPager); int sqlite3PagerWalCallback(Pager *pPager); - int sqlite3PagerOpenWal(Pager *pPager, int *pisOpen); + int sqlite3PagerOpenWal(Pager *pPager, int, int *pisOpen); int sqlite3PagerCloseWal(Pager *pPager, sqlite3*); # ifdef SQLITE_ENABLE_SNAPSHOT int sqlite3PagerSnapshotGet(Pager*, sqlite3_snapshot **ppSnapshot); @@ -241,10 +243,28 @@ void sqlite3PagerTruncateImage(Pager*,Pgno); void sqlite3PagerRekey(DbPage*, Pgno, u16); +#ifndef SQLITE_OMIT_CONCURRENT +int sqlite3PagerUsePage(Pager*, Pgno); +void sqlite3PagerEndConcurrent(Pager*); +int sqlite3PagerBeginConcurrent(Pager*); +void sqlite3PagerDropExclusiveLock(Pager*); +int sqlite3PagerUpgradeSnapshot(Pager *pPager, DbPage*); +void sqlite3PagerSetDbsize(Pager *pPager, Pgno); +int sqlite3PagerIsWal(Pager*); +#else +# define sqlite3PagerEndConcurrent(x) +# define sqlite3PagerUsePage(x, y) SQLITE_OK +#endif + +#if defined(SQLITE_DEBUG) || !defined(SQLITE_OMIT_CONCURRENT) +int sqlite3PagerIswriteable(DbPage*); +#endif + +int sqlite3PagerWalInfo(Pager*, u32 *pnPrior, u32 *pnFrame); + /* Functions to support testing and debugging. */ #if !defined(NDEBUG) || defined(SQLITE_TEST) Pgno sqlite3PagerPagenumber(DbPage*); - int sqlite3PagerIswriteable(DbPage*); #endif #ifdef SQLITE_TEST int *sqlite3PagerStats(Pager*); diff --git a/src/parse.y b/src/parse.y index cbcc9b43ae..b092928e5a 100644 --- a/src/parse.y +++ b/src/parse.y @@ -186,7 +186,16 @@ trans_opt ::= TRANSACTION nm. transtype(A) ::= . {A = TK_DEFERRED;} transtype(A) ::= DEFERRED(X). {A = @X; /*A-overwrites-X*/} transtype(A) ::= IMMEDIATE(X). {A = @X; /*A-overwrites-X*/} -transtype(A) ::= EXCLUSIVE(X). {A = @X; /*A-overwrites-X*/} +transtype(A) ::= ID(X). { + Token *p = &X; + if( p->n==9 && sqlite3_strnicmp(p->z,"exclusive",9)==0 ){ + A = TK_EXCLUSIVE; + }else if( p->n==10 && sqlite3_strnicmp(p->z,"concurrent",10)==0 ){ + A = TK_CONCURRENT; /*A-overwrites-X*/ + }else{ + parserSyntaxError(pParse, p); + } +} cmd ::= COMMIT|END(X) trans_opt. {sqlite3EndTransaction(pParse,@X);} cmd ::= ROLLBACK(X) trans_opt. {sqlite3EndTransaction(pParse,@X);} @@ -324,7 +333,6 @@ columnname(A) ::= nm(A) typetoken(Y). {sqlite3AddColumn(pParse,A,Y);} // keywords. Any non-standard keyword can also be an identifier. // %token_class id ID|INDEXED. - // And "ids" is an identifier-or-string. // %token_class ids ID|STRING. @@ -2127,6 +2135,7 @@ filter_clause(A) ::= FILTER LP WHERE expr(X) RP. { A = X; } UMINUS /* Unary minus */ TRUTH /* IS TRUE or IS FALSE or IS NOT TRUE or IS NOT FALSE */ REGISTER /* Reference to a VDBE register */ + CONCURRENT /* BEGIN CONCURRENT */ VECTOR /* Vector */ SELECT_COLUMN /* Choose a single column from a multi-column SELECT */ IF_NULL_ROW /* the if-null-row operator */ diff --git a/src/pcache1.c b/src/pcache1.c index 39607328f3..1f261d6502 100644 --- a/src/pcache1.c +++ b/src/pcache1.c @@ -629,6 +629,14 @@ static void pcache1EnforceMaxPage(PCache1 *pCache){ pcache1RemoveFromHash(p, 1); } if( pCache->nPage==0 && pCache->pBulk ){ + if( pcache1.separateCache ){ + PgHdr1 *p1 = pCache->pFree; + while( p1 ){ + PgHdr1 *pNext = p1->pNext; + if( p1->isBulkLocal==0 ) pcache1Free(p1->page.pBuf); + p1 = pNext; + } + } sqlite3_free(pCache->pBulk); pCache->pBulk = pCache->pFree = 0; } @@ -674,7 +682,13 @@ static void pcache1TruncateUnsafe( pCache->nPage--; *pp = pPage->pNext; if( PAGE_IS_UNPINNED(pPage) ) pcache1PinPage(pPage); - pcache1FreePage(pPage); + if( pcache1.separateCache ){ + pPage->pNext = pCache->pFree; + pCache->pFree = pPage; + (*pCache->pnPurgeable)--; + }else{ + pcache1FreePage(pPage); + } }else{ pp = &pPage->pNext; TESTONLY( if( nPage>=0 ) nPage++; ) @@ -1172,6 +1186,15 @@ static void pcache1Destroy(sqlite3_pcache *p){ assert( pCache->bPurgeable || (pCache->nMax==0 && pCache->nMin==0) ); pcache1EnterMutex(pGroup); if( pCache->nPage ) pcache1TruncateUnsafe(pCache, 0); + if( pcache1.separateCache ){ + PgHdr1 *p1 = pCache->pFree; + while( p1 ){ + PgHdr1 *pNext = p1->pNext; + if( p1->isBulkLocal==0 ) pcache1Free(p1->page.pBuf); + p1 = pNext; + } + pCache->pFree = 0; + } assert( pGroup->nMaxPage >= pCache->nMax ); pGroup->nMaxPage -= pCache->nMax; assert( pGroup->nMinPage >= pCache->nMin ); diff --git a/src/pragma.c b/src/pragma.c index 791508ea4a..c948e59bdb 100644 --- a/src/pragma.c +++ b/src/pragma.c @@ -290,7 +290,7 @@ const char *sqlite3JournalModename(int eMode){ static char * const azModeName[] = { "delete", "persist", "off", "truncate", "memory" #ifndef SQLITE_OMIT_WAL - , "wal" + , "wal", "wal2" #endif }; assert( PAGER_JOURNALMODE_DELETE==0 ); @@ -299,6 +299,7 @@ const char *sqlite3JournalModename(int eMode){ assert( PAGER_JOURNALMODE_TRUNCATE==3 ); assert( PAGER_JOURNALMODE_MEMORY==4 ); assert( PAGER_JOURNALMODE_WAL==5 ); + assert( PAGER_JOURNALMODE_WAL2==6 ); assert( eMode>=0 && eMode<=ArraySize(azModeName) ); if( eMode==ArraySize(azModeName) ) return 0; diff --git a/src/random.c b/src/random.c index ea8431ba94..7209ace349 100644 --- a/src/random.c +++ b/src/random.c @@ -129,6 +129,28 @@ void sqlite3_randomness(int N, void *pBuf){ sqlite3_mutex_leave(mutex); } +/* +** Initialize a fast PRNG. A Fast PRNG is called "fast" because it does +** not need a mutex to operate, though it does use a mutex to initialize. +** The quality of the randomness is not as good as the global PRNG. +*/ +void sqlite3FastPrngInit(FastPrng *pPrng){ + sqlite3_randomness(sizeof(*pPrng), pPrng); + pPrng->x |= 1; +} + +/* +** Generate N bytes of pseudo-randomness using a FastPrng +*/ +void sqlite3FastRandomness(FastPrng *pPrng, int N, void *P){ + unsigned char *pOut = (unsigned char*)P; + while( N-->0 ){ + pPrng->x = ((pPrng->x)>>1) ^ ((1+~((pPrng->x)&1)) & 0xd0000001); + pPrng->y = (pPrng->y)*1103515245 + 12345; + *(pOut++) = (pPrng->x ^ pPrng->y) & 0xff; + } +} + #ifndef SQLITE_UNTESTABLE /* ** For testing purposes, we sometimes want to preserve the state of diff --git a/src/select.c b/src/select.c index 8fff983901..55b0a8fbd1 100644 --- a/src/select.c +++ b/src/select.c @@ -2327,7 +2327,7 @@ int sqlite3ColumnsFromExprList( zName = sqlite3MPrintf(db, "%.*z:%u", nName, zName, ++cnt); sqlite3ProgressCheck(pParse); if( cnt>3 ){ - sqlite3_randomness(sizeof(cnt), &cnt); + sqlite3FastRandomness(&db->sPrng, sizeof(cnt), &cnt); } } pCol->zCnName = zName; diff --git a/src/sqlite.h.in b/src/sqlite.h.in index 61c528b33f..caae62650a 100644 --- a/src/sqlite.h.in +++ b/src/sqlite.h.in @@ -11049,6 +11049,31 @@ int sqlite3_snapshot_cmp( */ int sqlite3_snapshot_recover(sqlite3 *db, const char *zDb); +/* +** CAPI3REF: Wal related information regarding the most recent COMMIT +** EXPERIMENTAL +** +** This function reports on the state of the wal file (if any) for database +** zDb, which should be "main", "temp", or the name of the attached database. +** Its results - the values written to the output parameters - are only +** defined if the most recent SQL command on the connection was a successful +** COMMIT that wrote data to wal-mode database zDb. +** +** Assuming the above conditions are met, output parameter (*pnFrame) is set +** to the total number of frames in the wal file. Parameter (*pnPrior) is +** set to the number of frames that were present in the wal file before the +** most recent transaction was committed. So that the number of frames written +** by the most recent transaction is (*pnFrame)-(*pnPrior). +** +** If successful, SQLITE_OK is returned. Otherwise, an SQLite error code. It +** is not an error if this function is called at a time when the results +** are undefined. +*/ +SQLITE_EXPERIMENTAL int sqlite3_wal_info( + sqlite3 *db, const char *zDb, + unsigned int *pnPrior, unsigned int *pnFrame +); + /* ** CAPI3REF: Serialize a database ** @@ -11194,6 +11219,98 @@ int sqlite3_deserialize( #define SQLITE_DESERIALIZE_RESIZEABLE 2 /* Resize using sqlite3_realloc64() */ #define SQLITE_DESERIALIZE_READONLY 4 /* Database is read-only */ +/* +** Access details of recent COMMIT commands. This function allows various +** details related to the most recent COMMIT command to be accessed. +** The requested value is always returned via output parameter (*piVal). +** The specific value requested is identified by parameter op (see +** below). +** +** SQLITE_OK is returned if successful, or SQLITE_ERROR if the "op" or +** "zDb" paramters are unrecognized. +*/ +int sqlite3_commit_status( + sqlite3 *db, /* Database handle */ + const char *zDb, /* Name of database - "main" etc. */ + int op, /* SQLITE_COMMIT_XXX constant */ + unsigned int *piVal /* OUT: Write requested value here */ +); + +/* +** The following describes the five requests supported by +** sqlite3_commit_status(), each identified by an SQLITE_COMMIT_XXX +** constant: +** +** SQLITE_COMMIT_FIRSTFRAME: +** In this case argument zDb must be "main", or "temp", or else the name of +** an attached database. If zDb does not correspond to any attached database, +** SQLITE_ERROR is returned. +** +** The final value of (*piVal) for this request is only defined if (a) the +** most recent attempt to write to the database connection was successful, +** (b) the most recent attempt to write to the database did write to database +** zDb, and (c) zDb is a wal mode database. +** +** If the above conditions are true, then output parameter (*piVal) is +** set to the frame number of the first frame written by the recent +** transaction. In wal mode, or in wal2 mode when a transaction is +** written into the *-wal file, the frame number indicates the frame's +** position in the wal file - frames are numbered starting from 1. In +** wal2 mode, when a transaction is written to the *-wal2 file, the frame +** number is the frame's position in the *-wal2 file, plus (1 << 31). +** +** Note: Although the a database may have up to (1<<32) pages, each wal +** file may contain at most (1<<31) frames. +** +** SQLITE_COMMIT_NFRAME: +** zDb is interpreted in the same way as, and the final value of (*piVal) +** is undefined, for SQLITE_COMMIT_FIRSTFRAME. +** +** Otherwise, (*piVal) is set to the number of frames written by the +** recent transaction. +** +** SQLITE_COMMIT_CONFLICT_DB: +** Parameter zDb is ignored for this request. The results of this +** request are only defined if the most recent attempt to write to +** the database handle was a BEGIN CONCURRENT transaction that +** failed with an SQLITE_BUSY_SNAPSHOT error. +** +** In other cases, (*piVal) is set to the index of the database +** on which the SQLITE_BUSY_SNAPSHOT error occurred (0 for main, +** a value of 2 or greater for an attached database). This value +** may be used with the sqlite3_db_name() API to find the name +** of the conflicting database. +** +** SQLITE_COMMIT_CONFLICT_FRAME: +** Parameter zDb is ignored for this request. The results of this +** request are only defined if the most recent attempt to write to +** the database handle was a BEGIN CONCURRENT transaction that +** failed with an SQLITE_BUSY_SNAPSHOT error. +** +** (*piVal) is set to the frame number of the conflicting frame for +** the recent SQLITE_BUSY_SNAPSHOT error. The conflicting transaction may +** be found by comparing this value with the FIRSTFRAME and +** NFRAME values for recent succesfully committed transactions on +** the same db. If the CONFLICT_FRAME value is F, then the conflicting +** transaction is the most recent successful commit for which +** (FIRSTFRAME <= F <= FIRSTFRAME+NFRAME) is true. +** +** SQLITE_COMMIT_CONFLICT_PGNO: +** Parameter zDb is ignored for this request. The results of this +** request are only defined if the previous attempt to write to +** the database using database handle db failed with +** SQLITE_BUSY_SNAPSHOT. +** +** Return the page number of the conflicting page for the most +** recent SQLITE_BUSY_SNAPSHOT error. +*/ +#define SQLITE_COMMIT_FIRSTFRAME 0 +#define SQLITE_COMMIT_NFRAME 1 +#define SQLITE_COMMIT_CONFLICT_DB 2 +#define SQLITE_COMMIT_CONFLICT_FRAME 3 +#define SQLITE_COMMIT_CONFLICT_PGNO 4 + + /* ** CAPI3REF: Bind array values to the CARRAY table-valued function ** diff --git a/src/sqliteInt.h b/src/sqliteInt.h index 185b36fd4c..f04578d47f 100644 --- a/src/sqliteInt.h +++ b/src/sqliteInt.h @@ -1336,6 +1336,7 @@ typedef struct DbFixer DbFixer; typedef struct Schema Schema; typedef struct Expr Expr; typedef struct ExprList ExprList; +typedef struct FastPrng FastPrng; typedef struct FKey FKey; typedef struct FpDecode FpDecode; typedef struct FuncDestructor FuncDestructor; @@ -1464,6 +1465,14 @@ typedef int VList; # define SQLITE_DEFAULT_WAL_SYNCHRONOUS SQLITE_DEFAULT_SYNCHRONOUS #endif +/* +** State of a simple PRNG used for the per-connection and per-pager +** pseudo-random number generators. +*/ +struct FastPrng { + unsigned int x, y; +}; + /* ** Each database file to be accessed by the system is an instance ** of the following structure. There are normally two of these structures @@ -1508,6 +1517,9 @@ struct Schema { u8 enc; /* Text encoding used by this database */ u16 schemaFlags; /* Flags associated with this schema */ int cache_size; /* Number of pages to use in the cache */ +#ifdef SQLITE_ENABLE_STAT4 + void *pStat4Space; /* Memory for stat4 Index.aSample[] arrays */ +#endif }; /* @@ -1679,6 +1691,7 @@ struct sqlite3 { u32 dbOptFlags; /* Flags to enable/disable optimizations */ u8 enc; /* Text encoding */ u8 autoCommit; /* The auto-commit flag. */ + u8 eConcurrent; /* CONCURRENT_* value */ u8 temp_store; /* 1: file 2: memory 0: default */ u8 mallocFailed; /* True if we have seen a malloc failure */ u8 bBenignMalloc; /* Do not require OOMs if true */ @@ -1692,6 +1705,7 @@ struct sqlite3 { u8 nSqlExec; /* Number of pending OP_SqlExec opcodes */ u8 eOpenState; /* Current condition of the connection */ int nextPagesize; /* Pagesize after VACUUM if >0 */ + FastPrng sPrng; /* State of the per-connection PRNG */ i64 nChange; /* Value returned by sqlite3_changes() */ i64 nTotalChange; /* Value returned by sqlite3_total_changes() */ int aLimit[SQLITE_N_LIMIT]; /* Limits */ @@ -1802,8 +1816,21 @@ struct sqlite3 { void (*xUnlockNotify)(void **, int); /* Unlock notify callback */ sqlite3 *pNextBlocked; /* Next in list of all blocked connections */ #endif +#ifndef SQLITE_OMIT_CONCURRENT + /* Return values for sqlite3_commit_status() requests: + ** SQLITE_COMMIT_CONFLICT_DB, CONFLICT_FRAME and CONFLICT_PGNO. + */ + u32 aCommit[5]; +#endif }; +/* +** Candidate values for sqlite3.eConcurrent +*/ +#define CONCURRENT_NONE 0 +#define CONCURRENT_OPEN 1 +#define CONCURRENT_SCHEMA 2 + /* ** A macro to discover the encoding of a database. */ @@ -1869,6 +1896,7 @@ struct sqlite3 { #define SQLITE_AttachWrite HI(0x00020) /* ATTACH allowed to open for write */ #define SQLITE_Comments HI(0x00040) /* Enable SQL comments */ +#define SQLITE_NoopUpdate 0x01000000 /* UPDATE operations are no-ops */ /* Flags used only if debugging */ #ifdef SQLITE_DEBUG #define SQLITE_SqlTrace HI(0x0100000) /* Debug print SQL as it executes */ @@ -2817,7 +2845,7 @@ struct Index { ** expression, or a reference to a VIRTUAL column */ #ifdef SQLITE_ENABLE_STAT4 int nSample; /* Number of elements in aSample[] */ - int mxSample; /* Number of slots allocated to aSample[] */ + int nSampleAlloc; /* Allocated size of aSample[] */ int nSampleCol; /* Size of IndexSample.anEq[] and so on */ tRowcnt *aAvgEq; /* Average nEq values for keys not in aSample */ IndexSample *aSample; /* Samples of the left-most key */ @@ -5163,6 +5191,8 @@ Vdbe *sqlite3GetVdbe(Parse*); void sqlite3PrngSaveState(void); void sqlite3PrngRestoreState(void); #endif +void sqlite3FastPrngInit(FastPrng*); +void sqlite3FastRandomness(FastPrng*, int N, void *P); void sqlite3RollbackAll(sqlite3*,int); void sqlite3CodeVerifySchema(Parse*, int); void sqlite3CodeVerifyNamedSchema(Parse*, const char *zDb); diff --git a/src/test1.c b/src/test1.c index 6f2e2e94ed..7801bf0212 100644 --- a/src/test1.c +++ b/src/test1.c @@ -2595,6 +2595,83 @@ static int SQLITE_TCLAPI test_create_null_module( } #endif /* SQLITE_OMIT_VIRTUALTABLE */ +/* +** Usage: sqlite3_randomess NBYTE +*/ +static int SQLITE_TCLAPI test_sqlite3_randomness( + void * clientData, + Tcl_Interp *interp, + int objc, + Tcl_Obj *CONST objv[] +){ + int nByte = 0; + u8 *aBuf = 0; + + if( objc!=2 ){ + Tcl_WrongNumArgs(interp, 1, objv, "NBYTE"); + return TCL_ERROR; + } + if( Tcl_GetIntFromObj(interp, objv[1], &nByte) ) return TCL_ERROR; + + aBuf = ckalloc(nByte); + sqlite3_randomness(nByte, aBuf); + Tcl_SetObjResult(interp, Tcl_NewByteArrayObj(aBuf, nByte)); + ckfree(aBuf); + + return TCL_OK; +} + +/* +** tclcmd: sqlite3_commit_status db DBNAME OP +*/ +static int SQLITE_TCLAPI test_commit_status( + ClientData clientData, /* Unused */ + Tcl_Interp *interp, /* The TCL interpreter that invoked this command */ + int objc, /* Number of arguments */ + Tcl_Obj *CONST objv[] /* Command arguments */ +){ + struct Op { + const char *zOp; + int op; + } aOp[] = { + { "FIRSTFRAME", SQLITE_COMMIT_FIRSTFRAME }, + { "NFRAME", SQLITE_COMMIT_NFRAME }, + { "CONFLICT_DB", SQLITE_COMMIT_CONFLICT_DB }, + { "CONFLICT_FRAME", SQLITE_COMMIT_CONFLICT_FRAME }, + { "CONFLICT_PGNO", SQLITE_COMMIT_CONFLICT_PGNO }, + { 0, 0 } + }; + sqlite3 *db = 0; + const char *zDb = 0; + int op = 0; + int rc = SQLITE_OK; + unsigned int val = 0; + + if( objc!=4 ){ + Tcl_WrongNumArgs(interp, 1, objv, "DB DBNAME OP"); + return TCL_ERROR; + } + if( getDbPointer(interp, Tcl_GetString(objv[1]), &db) ){ + return TCL_ERROR; + } + zDb = Tcl_GetString(objv[2]); + if( Tcl_GetIndexFromObjStruct( + interp, objv[3], aOp, sizeof(aOp[0]), "OP", 0, &op + )){ + return TCL_ERROR; + } + op = aOp[op].op; + + rc = sqlite3_commit_status(db, zDb, op, &val); + if( rc==SQLITE_OK ){ + Tcl_SetObjResult(interp, Tcl_NewWideIntObj((i64)val)); + return TCL_OK; + } + + Tcl_SetObjResult(interp, Tcl_NewStringObj(sqlite3ErrName(rc), -1)); + return TCL_ERROR; +} + #ifdef SQLITE_ENABLE_SNAPSHOT /* ** Usage: sqlite3_snapshot_get DB DBNAME @@ -8706,6 +8783,41 @@ static int SQLITE_TCLAPI test_dbconfig_maindbname_icecube( } } +/* +** Usage: sqlite3_wal_info DB DBNAME +*/ +static int SQLITE_TCLAPI test_wal_info( + void * clientData, + Tcl_Interp *interp, + int objc, + Tcl_Obj *CONST objv[] +){ + int rc; + sqlite3 *db; + char *zName; + unsigned int nPrior; + unsigned int nFrame; + + if( objc!=3 ){ + Tcl_WrongNumArgs(interp, 1, objv, "DB DBNAME"); + return TCL_ERROR; + } + if( getDbPointer(interp, Tcl_GetString(objv[1]), &db) ) return TCL_ERROR; + zName = Tcl_GetString(objv[2]); + + rc = sqlite3_wal_info(db, zName, &nPrior, &nFrame); + if( rc!=SQLITE_OK ){ + Tcl_SetObjResult(interp, Tcl_NewStringObj(sqlite3ErrName(rc), -1)); + return TCL_ERROR; + }else{ + Tcl_Obj *pNew = Tcl_NewObj(); + Tcl_ListObjAppendElement(interp, pNew, Tcl_NewWideIntObj((i64)nPrior)); + Tcl_ListObjAppendElement(interp, pNew, Tcl_NewWideIntObj((i64)nFrame)); + Tcl_SetObjResult(interp, pNew); + } + return TCL_OK; +} + /* ** Usage: sqlite3_mmap_warm DB DBNAME */ @@ -9296,8 +9408,9 @@ int Sqlitetest1_Init(Tcl_Interp *interp){ { "sqlite3_snapshot_open_blob", test_snapshot_open_blob, 0 }, { "sqlite3_snapshot_cmp_blob", test_snapshot_cmp_blob, 0 }, #endif - { "sqlite3_delete_database", test_delete_database, 0 }, - { "atomic_batch_write", test_atomic_batch_write, 0 }, + { "sqlite3_delete_database", test_delete_database, 0 }, + { "sqlite3_wal_info", test_wal_info, 0 }, + { "atomic_batch_write", test_atomic_batch_write, 0 }, { "sqlite3_mmap_warm", test_mmap_warm, 0 }, { "sqlite3_config_sorterref", test_config_sorterref, 0 }, { "sqlite3_autovacuum_pages", test_autovacuum_pages, 0 }, @@ -9309,6 +9422,8 @@ int Sqlitetest1_Init(Tcl_Interp *interp){ #ifndef SQLITE_OMIT_VIRTUALTABLE { "create_null_module", test_create_null_module, 0 }, #endif + { "sqlite3_commit_status", test_commit_status, 0 }, + { "sqlite3_randomness", test_sqlite3_randomness, 0 }, }; static int bitmask_size = sizeof(Bitmask)*8; int i; diff --git a/src/test_config.c b/src/test_config.c index 5d0f419ec3..3de22f7f74 100644 --- a/src/test_config.c +++ b/src/test_config.c @@ -622,6 +622,12 @@ Tcl_SetVar2(interp, "sqlite_options", "mergesort", "1", TCL_GLOBAL_ONLY); Tcl_SetVar2(interp, "sqlite_options", "scanstatus", "0", TCL_GLOBAL_ONLY); #endif +#ifdef SQLITE_ENABLE_WAL_BIGHASH + Tcl_SetVar2(interp, "sqlite_options", "wal_bighash", "1", TCL_GLOBAL_ONLY); +#else + Tcl_SetVar2(interp, "sqlite_options", "wal_bighash", "0", TCL_GLOBAL_ONLY); +#endif + #if !defined(SQLITE_ENABLE_LOCKING_STYLE) # if defined(__APPLE__) # define SQLITE_ENABLE_LOCKING_STYLE 1 @@ -691,6 +697,12 @@ Tcl_SetVar2(interp, "sqlite_options", "mergesort", "1", TCL_GLOBAL_ONLY); Tcl_SetVar2(interp, "sqlite_options", "truncate_opt", "1", TCL_GLOBAL_ONLY); #endif +#ifndef SQLITE_OMIT_CONCURRENT + Tcl_SetVar2(interp, "sqlite_options", "concurrent", "1", TCL_GLOBAL_ONLY); +#else + Tcl_SetVar2(interp, "sqlite_options", "concurrent", "0", TCL_GLOBAL_ONLY); +#endif + #ifdef SQLITE_OMIT_UTF16 Tcl_SetVar2(interp, "sqlite_options", "utf16", "0", TCL_GLOBAL_ONLY); #else @@ -781,6 +793,12 @@ Tcl_SetVar2(interp, "sqlite_options", "mergesort", "1", TCL_GLOBAL_ONLY); Tcl_SetVar2(interp, "sqlite_options", "normalize", "0", TCL_GLOBAL_ONLY); #endif +#ifdef SQLITE_ENABLE_WAL2NOCKSUM + Tcl_SetVar2(interp, "sqlite_options", "wal2nocksum", "1", TCL_GLOBAL_ONLY); +#else + Tcl_SetVar2(interp, "sqlite_options", "wal2nocksum", "0", TCL_GLOBAL_ONLY); +#endif + #ifdef SQLITE_OMIT_WINDOWFUNC Tcl_SetVar2(interp, "sqlite_options", "windowfunc", "0", TCL_GLOBAL_ONLY); #else diff --git a/src/test_superlock.c b/src/test_superlock.c index 82997927c4..b2c2a0a329 100644 --- a/src/test_superlock.c +++ b/src/test_superlock.c @@ -41,6 +41,8 @@ typedef struct SuperlockBusy SuperlockBusy; struct Superlock { sqlite3 *db; /* Database handle used to lock db */ int bWal; /* True if db is a WAL database */ + int bRecoveryLocked; /* True if WAL RECOVERY lock is held */ + int bReaderLocked; /* True if WAL READER locks are held */ }; typedef struct Superlock Superlock; @@ -107,12 +109,13 @@ static int superlockShmLock( ** Invoke the supplied busy-handler as required. */ static int superlockWalLock( - sqlite3 *db, /* Database handle open on WAL database */ + Superlock *pLock, /* Superlock handle */ SuperlockBusy *pBusy /* Busy handler wrapper object */ ){ int rc; /* Return code */ sqlite3_file *fd = 0; /* Main database file handle */ void volatile *p = 0; /* Pointer to first page of shared memory */ + sqlite3 *db = pLock->db; /* Obtain a pointer to the sqlite3_file object open on the main db file. */ rc = sqlite3_file_control(db, "main", SQLITE_FCNTL_FILE_POINTER, (void *)&fd); @@ -121,8 +124,10 @@ static int superlockWalLock( /* Obtain the "recovery" lock. Normally, this lock is only obtained by ** clients running database recovery. */ + assert( pLock->bRecoveryLocked==0 ); rc = superlockShmLock(fd, 2, 1, pBusy); if( rc!=SQLITE_OK ) return rc; + pLock->bRecoveryLocked = 1; /* Zero the start of the first shared-memory page. This means that any ** clients that open read or write transactions from this point on will @@ -139,7 +144,9 @@ static int superlockWalLock( ** are held, it is guaranteed that there are no active reader, writer or ** checkpointer clients. */ + assert( pLock->bReaderLocked==0 ); rc = superlockShmLock(fd, 3, SQLITE_SHM_NLOCK-3, pBusy); + if( rc==SQLITE_OK ) pLock->bReaderLocked = 1; return rc; } @@ -156,8 +163,14 @@ void sqlite3demo_superunlock(void *pLock){ sqlite3_file *fd = 0; rc = sqlite3_file_control(p->db, "main", SQLITE_FCNTL_FILE_POINTER, (void *)&fd); if( rc==SQLITE_OK ){ - fd->pMethods->xShmLock(fd, 2, 1, flags); - fd->pMethods->xShmLock(fd, 3, SQLITE_SHM_NLOCK-3, flags); + if( p->bRecoveryLocked ){ + fd->pMethods->xShmLock(fd, 2, 1, flags); + p->bRecoveryLocked = 0; + } + if( p->bReaderLocked ){ + fd->pMethods->xShmLock(fd, 3, SQLITE_SHM_NLOCK-3, flags); + p->bReaderLocked = 0; + } } } sqlite3_close(p->db); @@ -232,7 +245,7 @@ int sqlite3demo_superlock( if( SQLITE_OK==(rc = superlockIsWal(pLock)) && pLock->bWal ){ rc = sqlite3_exec(pLock->db, "COMMIT", 0, 0, 0); if( rc==SQLITE_OK ){ - rc = superlockWalLock(pLock->db, &busy); + rc = superlockWalLock(pLock, &busy); } } } diff --git a/src/test_tclsh.c b/src/test_tclsh.c index 989cb97a62..d6892f12a7 100644 --- a/src/test_tclsh.c +++ b/src/test_tclsh.c @@ -90,6 +90,7 @@ const char *sqlite3TestInit(Tcl_Interp *interp){ extern int Fts5tcl_Init(Tcl_Interp *); extern int SqliteRbu_Init(Tcl_Interp*); extern int Sqlitetesttcl_Init(Tcl_Interp*); + extern int Bgckpt_Init(Tcl_Interp*); #if defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4) extern int Sqlitetestfts3_Init(Tcl_Interp *interp); #endif @@ -159,6 +160,8 @@ const char *sqlite3TestInit(Tcl_Interp *interp){ Fts5tcl_Init(interp); SqliteRbu_Init(interp); Sqlitetesttcl_Init(interp); + Bgckpt_Init(interp); + #if defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4) Sqlitetestfts3_Init(interp); diff --git a/src/update.c b/src/update.c index 979afea1f5..998e1f7df3 100644 --- a/src/update.c +++ b/src/update.c @@ -465,6 +465,17 @@ void sqlite3Update( */ chngRowid = chngPk = 0; for(i=0; inExpr; i++){ +#if defined(SQLITE_ENABLE_NOOP_UPDATE) && !defined(SQLITE_OMIT_FLAG_PRAGMAS) + if( db->flags & SQLITE_NoopUpdate ){ + Token x; + sqlite3ExprDelete(db, pChanges->a[i].pExpr); + x.z = pChanges->a[i].zEName; + x.n = sqlite3Strlen30(x.z); + pChanges->a[i].pExpr = + sqlite3PExpr(pParse, TK_UPLUS, sqlite3ExprAlloc(db, TK_ID, &x, 0), 0); + if( db->mallocFailed ) goto update_cleanup; + } +#endif /* If this is an UPDATE with a FROM clause, do not resolve expressions ** here. The call to sqlite3Select() below will do that. */ if( nChangeFrom==0 && sqlite3ResolveExprNames(&sNC, pChanges->a[i].pExpr) ){ diff --git a/src/vacuum.c b/src/vacuum.c index 70e62e1ef1..7e82d5a8af 100644 --- a/src/vacuum.c +++ b/src/vacuum.c @@ -412,6 +412,7 @@ SQLITE_NOINLINE int sqlite3RunVacuum( ** is closed by the DETACH. */ db->autoCommit = 1; + assert( db->eConcurrent==0 ); if( pDb ){ sqlite3BtreeClose(pDb->pBt); diff --git a/src/vdbe.c b/src/vdbe.c index b5a262e636..8a24f0d7b9 100644 --- a/src/vdbe.c +++ b/src/vdbe.c @@ -3913,6 +3913,7 @@ case OP_Savepoint: { ** is committed. */ int isTransaction = pSavepoint->pNext==0 && db->isTransactionSavepoint; + assert( db->eConcurrent==0 || db->isTransactionSavepoint==0 ); if( isTransaction && p1==SAVEPOINT_RELEASE ){ if( (rc = sqlite3VdbeCheckFkDeferred(p))!=SQLITE_OK ){ goto vdbe_return; @@ -3999,23 +4000,31 @@ case OP_Savepoint: { break; } -/* Opcode: AutoCommit P1 P2 * * * +/* Opcode: AutoCommit P1 P2 P3 * * ** ** Set the database auto-commit flag to P1 (1 or 0). If P2 is true, roll ** back any currently active btree transactions. If there are any active ** VMs (apart from this one), then a ROLLBACK fails. A COMMIT fails if ** there are active writing VMs or active VMs that use shared cache. ** +** If P3 is non-zero, then this instruction is being executed as part of +** a "BEGIN CONCURRENT" command. +** ** This instruction causes the VM to halt. */ case OP_AutoCommit: { int desiredAutoCommit; int iRollback; + int bConcurrent; + int hrc; desiredAutoCommit = pOp->p1; iRollback = pOp->p2; + bConcurrent = pOp->p3; assert( desiredAutoCommit==1 || desiredAutoCommit==0 ); assert( desiredAutoCommit==1 || iRollback==0 ); + assert( desiredAutoCommit==0 || bConcurrent==0 ); + assert( db->autoCommit==0 || db->eConcurrent==CONCURRENT_NONE ); assert( db->nVdbeActive>0 ); /* At least this one VM is active */ assert( p->bIsReader ); @@ -4024,10 +4033,17 @@ case OP_AutoCommit: { assert( desiredAutoCommit==1 ); sqlite3RollbackAll(db, SQLITE_ABORT_ROLLBACK); db->autoCommit = 1; - }else if( desiredAutoCommit && db->nVdbeWrite>0 ){ - /* If this instruction implements a COMMIT and other VMs are writing - ** return an error indicating that the other VMs must complete first. - */ + db->eConcurrent = CONCURRENT_NONE; + }else if( desiredAutoCommit + && (db->nVdbeWrite>0 || (db->eConcurrent && db->nVdbeActive>1)) ){ + /* A transaction may only be committed if there are no other active + ** writer VMs. If the transaction is CONCURRENT, then it may only be + ** committed if there are no active VMs at all (readers or writers). + ** + ** If this instruction is a COMMIT and the transaction may not be + ** committed due to one of the conditions above, return an error + ** indicating that other VMs must complete before the COMMIT can + ** be processed. */ sqlite3VdbeError(p, "cannot commit transaction - " "SQL statements in progress"); rc = SQLITE_BUSY; @@ -4037,12 +4053,16 @@ case OP_AutoCommit: { }else{ db->autoCommit = (u8)desiredAutoCommit; } - if( sqlite3VdbeHalt(p)==SQLITE_BUSY ){ + hrc = sqlite3VdbeHalt(p); + if( (hrc & 0xFF)==SQLITE_BUSY ){ p->pc = (int)(pOp - aOp); db->autoCommit = (u8)(1-desiredAutoCommit); - p->rc = rc = SQLITE_BUSY; + p->rc = hrc; + rc = SQLITE_BUSY; goto vdbe_return; } + assert( bConcurrent==CONCURRENT_NONE || bConcurrent==CONCURRENT_OPEN ); + db->eConcurrent = (u8)bConcurrent; sqlite3CloseSavepoints(db); if( p->rc==SQLITE_OK ){ rc = SQLITE_DONE; @@ -4255,6 +4275,17 @@ case OP_SetCookie: { pDb = &db->aDb[pOp->p1]; assert( pDb->pBt!=0 ); assert( sqlite3SchemaMutexHeld(db, pOp->p1, 0) ); +#ifndef SQLITE_OMIT_CONCURRENT + if( db->eConcurrent + && (pOp->p2==BTREE_USER_VERSION || pOp->p2==BTREE_APPLICATION_ID) + ){ + rc = SQLITE_ERROR; + sqlite3VdbeError(p, "cannot modify %s within CONCURRENT transaction", + pOp->p2==BTREE_USER_VERSION ? "user_version" : "application_id" + ); + goto abort_due_to_error; + } +#endif /* See note about index shifting on OP_ReadCookie */ rc = sqlite3BtreeUpdateMeta(pDb->pBt, pOp->p2, pOp->p3); if( pOp->p2==BTREE_SCHEMA_VERSION ){ @@ -4404,6 +4435,11 @@ case OP_OpenWrite: pX = pDb->pBt; assert( pX!=0 ); if( pOp->opcode==OP_OpenWrite ){ +#ifndef SQLITE_OMIT_CONCURRENT + if( db->eConcurrent==CONCURRENT_OPEN && p2==1 && iDb!=1 ){ + db->eConcurrent = CONCURRENT_SCHEMA; + } +#endif assert( OPFLAG_FORDELETE==BTREE_FORDELETE ); wrFlag = BTREE_WRCSR | (pOp->p5 & OPFLAG_FORDELETE); assert( sqlite3SchemaMutexHeld(db, iDb, 0) ); @@ -8008,6 +8044,7 @@ case OP_JournalMode: { /* out2 */ || eNew==PAGER_JOURNALMODE_OFF || eNew==PAGER_JOURNALMODE_MEMORY || eNew==PAGER_JOURNALMODE_WAL + || eNew==PAGER_JOURNALMODE_WAL2 || eNew==PAGER_JOURNALMODE_QUERY ); assert( pOp->p1>=0 && pOp->p1nDb ); @@ -8026,16 +8063,25 @@ case OP_JournalMode: { /* out2 */ /* Do not allow a transition to journal_mode=WAL for a database ** in temporary storage or if the VFS does not support shared memory */ - if( eNew==PAGER_JOURNALMODE_WAL + if( isWalMode(eNew) && (sqlite3Strlen30(zFilename)==0 /* Temp file */ || !sqlite3PagerWalSupported(pPager)) /* No shared-memory support */ ){ eNew = eOld; } - if( (eNew!=eOld) - && (eOld==PAGER_JOURNALMODE_WAL || eNew==PAGER_JOURNALMODE_WAL) - ){ + if( eNew!=eOld && (isWalMode(eNew) || isWalMode(eOld)) ){ + + /* Prevent changing directly to wal2 from wal mode. And vice versa. */ + if( isWalMode(eNew) && isWalMode(eOld) ){ + rc = SQLITE_ERROR; + sqlite3VdbeError(p, "cannot change from %s to %s mode", + sqlite3JournalModename(eOld), sqlite3JournalModename(eNew) + ); + goto abort_due_to_error; + } + + /* Prevent switching into or out of wal/wal2 mode mid-transaction */ if( !db->autoCommit || db->nVdbeRead>1 ){ rc = SQLITE_ERROR; sqlite3VdbeError(p, @@ -8045,7 +8091,7 @@ case OP_JournalMode: { /* out2 */ goto abort_due_to_error; }else{ - if( eOld==PAGER_JOURNALMODE_WAL ){ + if( isWalMode(eOld) ){ /* If leaving WAL mode, close the log file. If successful, the call ** to PagerCloseWal() checkpoints and deletes the write-ahead-log ** file. An EXCLUSIVE lock may still be held on the database file @@ -8066,7 +8112,10 @@ case OP_JournalMode: { /* out2 */ */ assert( sqlite3BtreeTxnState(pBt)!=SQLITE_TXN_WRITE ); if( rc==SQLITE_OK ){ - rc = sqlite3BtreeSetVersion(pBt, (eNew==PAGER_JOURNALMODE_WAL ? 2 : 1)); + /* 1==rollback, 2==wal, 3==wal2 */ + rc = sqlite3BtreeSetVersion(pBt, + 1 + isWalMode(eNew) + (eNew==PAGER_JOURNALMODE_WAL2) + ); } } } @@ -8203,6 +8252,11 @@ case OP_CursorUnlock: { */ case OP_TableLock: { u8 isWriteLock = (u8)pOp->p3; +#ifndef SQLITE_OMIT_CONCURRENT + if( isWriteLock && db->eConcurrent && pOp->p2==1 && pOp->p1!=1 ){ + db->eConcurrent = CONCURRENT_SCHEMA; + } +#endif if( isWriteLock || 0==(db->flags&SQLITE_ReadUncommit) ){ int p1 = pOp->p1; assert( p1>=0 && p1nDb ); diff --git a/src/vdbeaux.c b/src/vdbeaux.c index 5368c0c420..ce24b4b82d 100644 --- a/src/vdbeaux.c +++ b/src/vdbeaux.c @@ -2953,7 +2953,8 @@ static int vdbeCommit(sqlite3 *db, Vdbe *p){ /* OFF */ 0, /* TRUNCATE */ 1, /* MEMORY */ 0, - /* WAL */ 0 + /* WAL */ 0, + /* WAL2 */ 0 }; Pager *pPager; /* Pager associated with pBt */ needXcommit = 1; @@ -2966,10 +2967,27 @@ static int vdbeCommit(sqlite3 *db, Vdbe *p){ assert( i!=1 ); nTrans++; } - rc = sqlite3PagerExclusiveLock(pPager); + rc = sqlite3BtreeExclusiveLock(pBt); sqlite3BtreeLeave(pBt); } } + +#ifndef SQLITE_OMIT_CONCURRENT + if( db->eConcurrent && (rc & 0xFF)==SQLITE_BUSY ){ + /* An SQLITE_BUSY or SQLITE_BUSY_SNAPSHOT was encountered while + ** attempting to take the WRITER lock on a wal file. Release the + ** WRITER locks on all wal files and return early. */ + for(i=0; inDb; i++){ + Btree *pBt = db->aDb[i].pBt; + if( sqlite3BtreeTxnState(pBt)==SQLITE_TXN_WRITE ){ + sqlite3BtreeEnter(pBt); + sqlite3PagerDropExclusiveLock(sqlite3BtreePager(pBt)); + sqlite3BtreeLeave(pBt); + } + } + } +#endif + if( rc!=SQLITE_OK ){ return rc; } @@ -3378,6 +3396,7 @@ int sqlite3VdbeHalt(Vdbe *p){ sqlite3RollbackAll(db, SQLITE_ABORT_ROLLBACK); sqlite3CloseSavepoints(db); db->autoCommit = 1; + db->eConcurrent = CONCURRENT_NONE; p->nChange = 0; } } @@ -3416,9 +3435,9 @@ int sqlite3VdbeHalt(Vdbe *p){ ** is required. */ rc = vdbeCommit(db, p); } - if( rc==SQLITE_BUSY && p->readOnly ){ + if( (rc & 0xFF)==SQLITE_BUSY && p->readOnly ){ sqlite3VdbeLeave(p); - return SQLITE_BUSY; + return rc; }else if( rc!=SQLITE_OK ){ sqlite3SystemError(db, rc); p->rc = rc; @@ -3446,6 +3465,7 @@ int sqlite3VdbeHalt(Vdbe *p){ sqlite3RollbackAll(db, SQLITE_ABORT_ROLLBACK); sqlite3CloseSavepoints(db); db->autoCommit = 1; + db->eConcurrent = CONCURRENT_NONE; p->nChange = 0; } } @@ -3467,6 +3487,7 @@ int sqlite3VdbeHalt(Vdbe *p){ sqlite3RollbackAll(db, SQLITE_ABORT_ROLLBACK); sqlite3CloseSavepoints(db); db->autoCommit = 1; + db->eConcurrent = CONCURRENT_NONE; p->nChange = 0; } } diff --git a/src/wal.c b/src/wal.c index 0698521586..80360d68e0 100644 --- a/src/wal.c +++ b/src/wal.c @@ -101,7 +101,7 @@ ** ** To read a page from the database (call it page number P), a reader ** first checks the WAL to see if it contains page P. If so, then the -** last valid instance of page P that is a followed by a commit frame +** last valid instance of page P that is followed by a commit frame ** or is a commit frame itself becomes the value read. If the WAL ** contains no copies of page P that are valid and which are a commit ** frame or are followed by a commit frame, then page P is read from @@ -236,7 +236,7 @@ ** and to the wal-index) might be using a different value K1, where K1>K0. ** Both readers can use the same hash table and mapping section to get ** the correct result. There may be entries in the hash table with -** K>K0 but to the first reader, those entries will appear to be unused +** K>K0, but to the first reader those entries will appear to be unused ** slots in the hash table and so the first reader will get an answer as ** if no values greater than K0 had ever been inserted into the hash table ** in the first place - which is what reader one wants. Meanwhile, the @@ -247,6 +247,191 @@ ** that correspond to frames greater than the new K value are removed ** from the hash table at this point. */ + +/* +** WAL2 NOTES +** +** This file also contains the implementation of "wal2" mode - activated +** using "PRAGMA journal_mode = wal2". Wal2 mode is very similar to wal +** mode, except that it uses two wal files instead of one. Under some +** circumstances, wal2 mode provides more concurrency than legacy wal +** mode. +** +** THE PROBLEM WAL2 SOLVES: +** +** In legacy wal mode, if a writer wishes to write to the database while +** a checkpoint is ongoing, it may append frames to the existing wal file. +** This means that after the checkpoint has finished, the wal file consists +** of a large block of checkpointed frames, followed by a block of +** uncheckpointed frames. In a deployment that features a high volume of +** write traffic, this may mean that the wal file is never completely +** checkpointed. And so grows indefinitely. +** +** An alternative is to use "PRAGMA wal_checkpoint=RESTART" or similar to +** force a complete checkpoint of the wal file. But this must: +** +** 1) Wait on all existing readers to finish, +** 2) Wait on any existing writer, and then block all new writers, +** 3) Do the checkpoint, +** 4) Wait on any new readers that started during steps 2 and 3. Writers +** are still blocked during this step. +** +** This means that in order to avoid the wal file growing indefinitely +** in a busy system, writers must periodically pause to allow a checkpoint +** to complete. In a system with long running readers, such pauses may be +** for a non-trivial amount of time. +** +** OVERVIEW OF SOLUTION +** +** Wal2 mode uses two wal files. After writers have grown the first wal +** file to a pre-configured size, they begin appending transactions to +** the second wal file. Once all existing readers are reading snapshots +** new enough to include the entire first wal file, a checkpointer can +** checkpoint it. +** +** Meanwhile, writers are writing transactions to the second wal file. +** Once that wal file has grown larger than the pre-configured size, each +** new writer checks if: +** +** * the first wal file has been checkpointed, and if so, if +** * there are no readers still reading from the first wal file (once +** it has been checkpointed, new readers read only from the second +** wal file). +** +** If both these conditions are true, the writer may switch back to the +** first wal file. Eventually, a checkpointer can checkpoint the second +** wal file, and so on. +** +** The wal file that writers are currently appending to (the one they +** don't have to check the above two criteria before writing to) is called +** the "current" wal file. +** +** The first wal file takes the same name as the wal file in legacy wal +** mode systems - "-wal". The second is named "-wal2". + +** +** CHECKPOINTS +** +** The "pre-configured size" mentioned above is the value set by +** "PRAGMA journal_size_limit". Or, if journal_size_limit is not set, +** 1000 pages. +** +** There is only a single type of checkpoint in wal2 mode (no "truncate", +** "restart" etc.), and it always checkpoints the entire contents of a single +** wal file. A wal file cannot be checkpointed until after a writer has written +** the first transaction into the other wal file and all readers are reading a +** snapshot that includes at least one transaction from the other wal file. +** +** The wal-hook, if one is registered, is invoked after a write-transaction +** is committed, just as it is in legacy wal mode. The integer parameter +** passed to the wal-hook is the total number of uncheckpointed frames in both +** wal files. Except, the parameter is set to zero if there is no frames +** that may be checkpointed. This happens in two scenarios: +** +** 1. The "other" wal file (the one that the writer did not just append to) +** is completely empty, or +** +** 2. The "other" wal file (the one that the writer did not just append to) +** has already been checkpointed. +** +** +** WAL FILE FORMAT +** +** The file format used for each wal file in wal2 mode is the same as for +** legacy wal mode. Except, the file format field is set to 3021000 +** instead of 3007000. +** +** WAL-INDEX FORMAT +** +** The wal-index format is also very similar. Even though there are two +** wal files, there is still a single wal-index shared-memory area (*-shm +** file with the default unix or win32 VFS). The wal-index header is the +** same size, with the following exceptions it has the same format: +** +** * The version field is set to 3021000 instead of 3007000. +** +** * An unused 32-bit field in the legacy wal-index header is +** now used to store (a) a single bit indicating which of the +** two wal files writers should append to and (b) the number +** of frames in the second wal file (31 bits). +** +** The first hash table in the wal-index contains entries corresponding +** to the first HASHTABLE_NPAGE_ONE frames stored in the first wal file. +** The second hash table in the wal-index contains entries indexing the +** first HASHTABLE_NPAGE frames in the second wal file. The third hash +** table contains the next HASHTABLE_NPAGE frames in the first wal file, +** and so on. +** +** LOCKS +** +** Read-locks are simpler than for legacy wal mode. There are no locking +** slots that contain frame numbers. Instead, there are four distinct +** combinations of read locks a reader may hold: +** +** WAL_LOCK_PART1: "part" lock on first wal, none of second. +** WAL_LOCK_PART1_FULL2: "part" lock on first wal, "full" of second. +** WAL_LOCK_PART2: no lock on first wal, "part" lock on second. +** WAL_LOCK_PART2_FULL1: "full" lock on first wal, "part" lock on second. +** +** When a reader reads the wal-index header as part of opening a read +** transaction, it takes a "part" lock on the current wal file. "Part" +** because the wal file may grow while the read transaction is active, in +** which case the reader would be reading only part of the wal file. +** A part lock prevents a checkpointer from checkpointing the wal file +** on which it is held. +** +** If there is data in the non-current wal file that has not been +** checkpointed, the reader takes a "full" lock on that wal file. A +** "full" lock indicates that the reader is using the entire wal file. +** A full lock prevents a writer from overwriting the wal file on which +** it is held, but does not prevent a checkpointer from checkpointing +** it. +** +** There is still a single WRITER and a single CHECKPOINTER lock. The +** recovery procedure still takes the same exclusive lock on the entire +** range of SQLITE_SHM_NLOCK shm-locks. This works because the read-locks +** above use four of the six read-locking slots used by legacy wal mode. +** +** STARTUP/RECOVERY +** +** The read and write version fields of the database header in a wal2 +** database are set to 0x03, instead of 0x02 as in legacy wal mode. +** +** The wal file format used in wal2 mode is the same as the format used +** in legacy wal mode. However, in order to support recovery, there are two +** differences in the way wal file header fields are populated, as follows: +** +** * When the first wal file is first created, the "nCkpt" field in +** the wal file header is set to 0. Thereafter, each time the writer +** switches wal file, it sets the nCkpt field in the new wal file +** header to ((nCkpt0 + 1) & 0x0F), where nCkpt0 is the value in +** the previous wal file header. This means that the first wal file +** always has an even value in the nCkpt field, and the second wal +** file always has an odd value. +** +** * When a writer switches wal file, it sets the salt values in the +** new wal file to a copy of the checksum for the final frame in +** the previous wal file. +** +** Recovery proceeds as follows: +** +** 1. Each wal file is recovered separately. Except, if the first wal +** file does not exist or is zero bytes in size, the second wal file +** is truncated to zero bytes before it is "recovered". +** +** 2. If both wal files contain valid headers, then the nCkpt fields +** are compared to see which of the two wal files is older. If the +** salt keys in the second wal file match the final frame checksum +** in the older wal file, then both wal files are used. Otherwise, +** the newer wal file is ignored. +** +** 3. Or, if only one or neither of the wal files has a valid header, +** then only a single or no wal files are recovered into the +** reconstructed wal-index. +** +** Refer to header comments for walIndexRecover() for further details. +*/ + #ifndef SQLITE_OMIT_WAL #include "wal.h" @@ -262,20 +447,39 @@ int sqlite3WalTrace = 0; #endif /* -** The maximum (and only) versions of the wal and wal-index formats -** that may be interpreted by this version of SQLite. -** -** If a client begins recovering a WAL file and finds that (a) the checksum -** values in the wal-header are correct and (b) the version field is not -** WAL_MAX_VERSION, recovery fails and SQLite returns SQLITE_CANTOPEN. -** -** Similarly, if a client successfully reads a wal-index header (i.e. the -** checksum test is successful) and finds that the version field is not -** WALINDEX_MAX_VERSION, then no read-transaction is opened and SQLite -** returns SQLITE_CANTOPEN. +** Both the wal-file and the wal-index contain version fields +** indicating the current version of the system. If a client +** reads the header of a wal file (as part of recovery), or the +** wal-index (as part of opening a read transaction) and (a) the +** header checksum is correct but (b) the version field is not +** recognized, the operation fails with SQLITE_CANTOPEN. +** +** Currently, clients support both version-1 ("journal_mode=wal") and +** version-2 ("journal_mode=wal2"). Legacy clients may support version-1 +** only. */ -#define WAL_MAX_VERSION 3007000 -#define WALINDEX_MAX_VERSION 3007000 +#ifdef SQLITE_ENABLE_WAL_BIGHASH +# define WAL_VERSION1 3007001 /* For "journal_mode=wal" */ +# define WAL_VERSION2 3021001 /* For "journal_mode=wal2" */ +#else +# define WAL_VERSION1 3007000 /* For "journal_mode=wal" */ +# define WAL_VERSION2 3021000 /* For "journal_mode=wal2" */ +#endif + +#ifdef SQLITE_ENABLE_WAL2NOCKSUM +# undef WAL_VERSION2 +# ifdef SQLITE_ENABLE_WAL_BIGHASH +# define WAL_VERSION2 3048001 /* wal2, big-hash, checksums on frame hdrs */ +# else +# define WAL_VERSION2 3048000 /* wal2, checksums on frame hdrs */ +# endif +# define isNocksum(pWal) isWalMode2(pWal) +#else +# define isNocksum(pWal) 0 +#endif + + + /* ** Index numbers for various locking bytes. WAL_NREADER is the number @@ -298,6 +502,39 @@ int sqlite3WalTrace = 0; #define WAL_READ_LOCK(I) (3+(I)) #define WAL_NREADER (SQLITE_SHM_NLOCK-3) +/* +** Values that may be stored in Wal.readLock in wal2 mode. +** +** In wal mode, the Wal.readLock member is set to -1 when no read-lock +** is held, or else is the index of the read-mark on which a lock is +** held. +** +** In wal2 mode, a value of -1 still indicates that no read-lock is held. +** And a non-zero value still represents the index of the read-mark on +** which a lock is held. There are two differences: +** +** 1. wal2 mode never uses read-mark 0. +** +** 2. locks on each read-mark have a different interpretation, as +** indicated by the symbolic names below. +*/ +#define WAL_LOCK_NONE -1 +#define WAL_LOCK_PART1 1 +#define WAL_LOCK_PART1_FULL2 2 +#define WAL_LOCK_PART2_FULL1 3 +#define WAL_LOCK_PART2 4 + +/* +** This constant is used in wal2 mode only. +** +** In wal2 mode, when committing a transaction, if the current wal file +** is sufficiently large and there are no conflicting locks held, the +** writer writes the new transaction into the start of the other wal +** file. Usually, "sufficiently large" is defined by the value configured +** using "PRAGMA journal_size_limit". However, if no such value has been +** configured, sufficiently large defaults to WAL_DEFAULT_WALSIZE frames. +*/ +#define WAL_DEFAULT_WALSIZE 1000 /* Object declarations */ typedef struct WalIndexHdr WalIndexHdr; @@ -317,21 +554,63 @@ typedef struct WalCkptInfo WalCkptInfo; ** The szPage value can be any power of 2 between 512 and 32768, inclusive. ** Or it can be 1 to represent a 65536-byte page. The latter case was ** added in 3.7.1 when support for 64K pages was added. +** +** WAL2 mode notes: Member variable mxFrame2 is only used in wal2 mode +** (when iVersion is set to WAL_VERSION2). The lower 31 bits store +** the maximum frame number in file *-wal2. The most significant bit +** is a flag - set if clients are currently appending to *-wal2, clear +** otherwise. */ struct WalIndexHdr { u32 iVersion; /* Wal-index version */ - u32 unused; /* Unused (padding) field */ + u32 mxFrame2; /* See "WAL2 mode notes" above */ u32 iChange; /* Counter incremented each transaction */ u8 isInit; /* 1 when initialized */ u8 bigEndCksum; /* True if checksums in WAL are big-endian */ u16 szPage; /* Database page size in bytes. 1==64K */ - u32 mxFrame; /* Index of last valid frame in the WAL */ + u32 mxFrame; /* Index of last valid frame in each WAL */ u32 nPage; /* Size of database in pages */ u32 aFrameCksum[2]; /* Checksum of last frame in log */ u32 aSalt[2]; /* Two salt values copied from WAL header */ u32 aCksum[2]; /* Checksum over all prior fields */ }; +/* +** The following macros and functions are get/set methods for the maximum +** frame numbers and current wal file values stored in the WalIndexHdr +** structure. These are helpful because of the unorthodox way in which +** the values are stored in wal2 mode (see above). They are equivalent +** to functions with the following signatures. +** +** u32 walidxGetMxFrame(WalIndexHdr*, int iWal); // get mxFrame +** void walidxSetMxFrame(WalIndexHdr*, int iWal, u32 val); // set mxFrame +** int walidxGetFile(WalIndexHdr*) // get file +** void walidxSetFile(WalIndexHdr*, int val); // set file +*/ +#define walidxGetMxFrame(pHdr, iWal) \ + ((iWal) ? ((pHdr)->mxFrame2 & 0x7FFFFFFF) : (pHdr)->mxFrame) + +static void walidxSetMxFrame(WalIndexHdr *pHdr, int iWal, u32 mxFrame){ + if( iWal ){ + pHdr->mxFrame2 = (pHdr->mxFrame2 & 0x80000000) | mxFrame; + }else{ + pHdr->mxFrame = mxFrame; + } + assert( walidxGetMxFrame(pHdr, iWal)==mxFrame ); +} + +#define walidxGetFile(pHdr) (int)((pHdr)->mxFrame2 >> 31) +#define walidxSetFile(pHdr, iWal) ( \ + (pHdr)->mxFrame2 = ((pHdr)->mxFrame2 & 0x7FFFFFFF) | (((u32)(iWal))<<31) \ +) + +/* +** Argument is a pointer to a Wal structure. Return true if the current +** cache of the wal-index header indicates "journal_mode=wal2" mode, or +** false otherwise. +*/ +#define isWalMode2(pWal) ((pWal)->hdr.iVersion==WAL_VERSION2) + /* ** A copy of the following object occurs in the wal-index immediately ** following the second copy of the WalIndexHdr. This object stores @@ -511,7 +790,7 @@ struct WalCkptInfo { struct Wal { sqlite3_vfs *pVfs; /* The VFS used to create pDbFd */ sqlite3_file *pDbFd; /* File handle for the database file */ - sqlite3_file *pWalFd; /* File handle for WAL file */ + sqlite3_file *apWalFd[2]; /* File handle for "*-wal" and "*-wal2" */ u32 iCallback; /* Value to pass to log callback (or 0) */ i64 mxWalSize; /* Truncate WAL to this size upon reset */ int nWiData; /* Size of array apWiData */ @@ -531,8 +810,11 @@ struct Wal { WalIndexHdr hdr; /* Wal-index header for current transaction */ u32 minFrame; /* Ignore wal frames before this one */ u32 iReCksum; /* On commit, recalculate checksums from here */ + u32 nPriorFrame; /* For sqlite3WalInfo() */ const char *zWalName; /* Name of WAL file */ + const char *zWalName2; /* Name of second WAL file */ u32 nCkpt; /* Checkpoint sequence counter in the wal-header */ + FastPrng sPrng; /* Random number generator */ #ifdef SQLITE_USE_SEH u32 lockMask; /* Mask of locks held */ void *pFree; /* Pointer to sqlite3_free() if exception thrown */ @@ -548,6 +830,8 @@ struct Wal { WalIndexHdr *pSnapshot; /* Start transaction here if not NULL */ int bGetSnapshot; /* Transaction opened for sqlite3_get_snapshot() */ #endif + int bClosing; /* Set to true at start of sqlite3WalClose() */ + int bWal2; /* bWal2 flag passed to WalOpen() */ #ifdef SQLITE_ENABLE_SETLK_TIMEOUT sqlite3 *db; #endif @@ -571,7 +855,11 @@ struct Wal { ** Each page of the wal-index mapping contains a hash-table made up of ** an array of HASHTABLE_NSLOT elements of the following type. */ -typedef u16 ht_slot; +#ifdef SQLITE_ENABLE_WAL_BIGHASH + typedef u32 ht_slot; +#else + typedef u16 ht_slot; +#endif /* ** This structure is used to implement an iterator that loops through @@ -612,9 +900,14 @@ struct WalIterator { ** Changing any of these constants will alter the wal-index format and ** create incompatibilities. */ -#define HASHTABLE_NPAGE 4096 /* Must be power of 2 */ -#define HASHTABLE_HASH_1 383 /* Should be prime */ -#define HASHTABLE_NSLOT (HASHTABLE_NPAGE*2) /* Must be a power of 2 */ +#ifdef SQLITE_ENABLE_WAL_BIGHASH +# define HASHTABLE_BITS 17 /* 128K frames per hash */ +#else +# define HASHTABLE_BITS 12 /* 4K frames per hash */ +#endif +# define HASHTABLE_NPAGE (1<nWiData>0 && pWal->apWiData[0] ); - SEH_INJECT_FAULT; return (volatile WalIndexHdr*)pWal->apWiData[0]; } @@ -945,7 +1237,7 @@ static SQLITE_NO_TSAN void walIndexWriteHdr(Wal *pWal){ assert( pWal->writeLock ); pWal->hdr.isInit = 1; - pWal->hdr.iVersion = WALINDEX_MAX_VERSION; + assert( pWal->hdr.iVersion==WAL_VERSION1||pWal->hdr.iVersion==WAL_VERSION2 ); walChecksumBytes(1, (u8*)&pWal->hdr, nCksum, 0, pWal->hdr.aCksum); /* Possible TSAN false-positive. See tag-20200519-1 */ memcpy((void*)&aHdr[1], (const void*)&pWal->hdr, sizeof(WalIndexHdr)); @@ -983,10 +1275,13 @@ static void walEncodeFrame( nativeCksum = (pWal->hdr.bigEndCksum==SQLITE_BIGENDIAN); walChecksumBytes(nativeCksum, aFrame, 8, aCksum, aCksum); - walChecksumBytes(nativeCksum, aData, pWal->szPage, aCksum, aCksum); + if( isNocksum(pWal)==0 ){ + walChecksumBytes(nativeCksum, aData, pWal->szPage, aCksum, aCksum); + } sqlite3Put4byte(&aFrame[16], aCksum[0]); sqlite3Put4byte(&aFrame[20], aCksum[1]); + }else{ memset(&aFrame[8], 0, 16); } @@ -1030,7 +1325,9 @@ static int walDecodeFrame( */ nativeCksum = (pWal->hdr.bigEndCksum==SQLITE_BIGENDIAN); walChecksumBytes(nativeCksum, aFrame, 8, aCksum, aCksum); - walChecksumBytes(nativeCksum, aData, pWal->szPage, aCksum, aCksum); + if( isNocksum(pWal)==0 ){ + walChecksumBytes(nativeCksum, aData, pWal->szPage, aCksum, aCksum); + } if( aCksum[0]!=sqlite3Get4byte(&aFrame[16]) || aCksum[1]!=sqlite3Get4byte(&aFrame[20]) ){ @@ -1072,7 +1369,7 @@ static const char *walLockName(int lockIdx){ /* ** Set or release locks on the WAL. Locks are either shared or exclusive. ** A lock cannot be moved directly between shared and exclusive - it must go -** through the unlocked state first. +** through the concurrent state first. ** ** In locking_mode=EXCLUSIVE, all of these routines become no-ops. */ @@ -1188,6 +1485,38 @@ static int walHashGet( return rc; } +static u32 walExternalEncode(int iWal, u32 iFrame){ + u32 iRet; + if( iWal ){ + iRet = HASHTABLE_NPAGE_ONE + iFrame; + iRet += ((iFrame-1) / HASHTABLE_NPAGE) * HASHTABLE_NPAGE; + }else{ + iRet = iFrame; + iFrame += HASHTABLE_NPAGE - HASHTABLE_NPAGE_ONE; + iRet += ((iFrame-1) / HASHTABLE_NPAGE) * HASHTABLE_NPAGE; + } + return iRet; +} + +/* +** Parameter iExternal is an external frame identifier. This function +** transforms it to a wal file number (0 or 1) and frame number within +** this wal file (reported via output parameter *piRead). +*/ +static int walExternalDecode(u32 iExternal, u32 *piRead){ + int iHash = (iExternal+HASHTABLE_NPAGE-HASHTABLE_NPAGE_ONE-1)/HASHTABLE_NPAGE; + + if( 0==(iHash & 0x01) ){ + /* A frame in wal file 0 */ + *piRead = (iExternal <= HASHTABLE_NPAGE_ONE) ? iExternal : + iExternal - (iHash/2) * HASHTABLE_NPAGE; + return 0; + } + + *piRead = iExternal - HASHTABLE_NPAGE_ONE - ((iHash-1)/2) * HASHTABLE_NPAGE; + return 1; +} + /* ** Return the number of the wal-index page that contains the hash-table ** and page-number array that contain entries corresponding to WAL frame @@ -1206,6 +1535,22 @@ static int walFramePage(u32 iFrame){ return iHash; } +/* +** Return the index of the hash-table corresponding to frame iFrame of wal +** file iWal. +*/ +static int walFramePage2(int iWal, u32 iFrame){ + int iRet; + assert( iWal==0 || iWal==1 ); + assert( iFrame>0 ); + if( iWal==0 ){ + iRet = 2*((iFrame+HASHTABLE_NPAGE-HASHTABLE_NPAGE_ONE-1)/HASHTABLE_NPAGE); + }else{ + iRet = 1 + 2 * ((iFrame-1) / HASHTABLE_NPAGE); + } + return iRet; +} + /* ** Return the page number associated with frame iFrame in this WAL. */ @@ -1218,6 +1563,10 @@ static u32 walFramePgno(Wal *pWal, u32 iFrame){ return pWal->apWiData[iHash][(iFrame-1-HASHTABLE_NPAGE_ONE)%HASHTABLE_NPAGE]; } +static u32 walFramePgno2(Wal *pWal, int iWal, u32 iFrame){ + return walFramePgno(pWal, walExternalEncode(iWal, iFrame)); +} + /* ** Remove entries from the hash table that point to WAL slots greater ** than pWal->hdr.mxFrame. @@ -1235,27 +1584,37 @@ static void walCleanupHash(Wal *pWal){ int iLimit = 0; /* Zero values greater than this */ int nByte; /* Number of bytes to zero in aPgno[] */ int i; /* Used to iterate through aHash[] */ + int iWal = walidxGetFile(&pWal->hdr); + u32 mxFrame = walidxGetMxFrame(&pWal->hdr, iWal); + + u32 iExternal; + if( isWalMode2(pWal) ){ + iExternal = walExternalEncode(iWal, mxFrame); + }else{ + assert( iWal==0 ); + iExternal = mxFrame; + } assert( pWal->writeLock ); - testcase( pWal->hdr.mxFrame==HASHTABLE_NPAGE_ONE-1 ); - testcase( pWal->hdr.mxFrame==HASHTABLE_NPAGE_ONE ); - testcase( pWal->hdr.mxFrame==HASHTABLE_NPAGE_ONE+1 ); + testcase( mxFrame==HASHTABLE_NPAGE_ONE-1 ); + testcase( mxFrame==HASHTABLE_NPAGE_ONE ); + testcase( mxFrame==HASHTABLE_NPAGE_ONE+1 ); - if( pWal->hdr.mxFrame==0 ) return; + if( mxFrame==0 ) return; /* Obtain pointers to the hash-table and page-number array containing ** the entry that corresponds to frame pWal->hdr.mxFrame. It is guaranteed ** that the page said hash-table and array reside on is already mapped.(1) */ - assert( pWal->nWiData>walFramePage(pWal->hdr.mxFrame) ); - assert( pWal->apWiData[walFramePage(pWal->hdr.mxFrame)] ); - i = walHashGet(pWal, walFramePage(pWal->hdr.mxFrame), &sLoc); + assert( pWal->nWiData>walFramePage(iExternal) ); + assert( pWal->apWiData[walFramePage(iExternal)] ); + i = walHashGet(pWal, walFramePage(iExternal), &sLoc); if( NEVER(i) ) return; /* Defense-in-depth, in case (1) above is wrong */ /* Zero all hash-table entries that correspond to frame numbers greater ** than pWal->hdr.mxFrame. */ - iLimit = pWal->hdr.mxFrame - sLoc.iZero; + iLimit = iExternal - sLoc.iZero; assert( iLimit>0 ); for(i=0; iiLimit ){ @@ -1287,16 +1646,23 @@ static void walCleanupHash(Wal *pWal){ #endif /* SQLITE_ENABLE_EXPENSIVE_ASSERT */ } - /* ** Set an entry in the wal-index that will map database page number ** pPage into WAL frame iFrame. */ -static int walIndexAppend(Wal *pWal, u32 iFrame, u32 iPage){ +static int walIndexAppend(Wal *pWal, int iWal, u32 iFrame, u32 iPage){ int rc; /* Return code */ WalHashLoc sLoc; /* Wal-index hash table location */ + u32 iExternal; + + if( isWalMode2(pWal) ){ + iExternal = walExternalEncode(iWal, iFrame); + }else{ + assert( iWal==0 ); + iExternal = iFrame; + } - rc = walHashGet(pWal, walFramePage(iFrame), &sLoc); + rc = walHashGet(pWal, walFramePage(iExternal), &sLoc); /* Assuming the wal-index file was successfully mapped, populate the ** page number array and hash table entry. @@ -1306,7 +1672,7 @@ static int walIndexAppend(Wal *pWal, u32 iFrame, u32 iPage){ int idx; /* Value to write to hash-table slot */ int nCollide; /* Number of hash collisions */ - idx = iFrame - sLoc.iZero; + idx = iExternal - sLoc.iZero; assert( idx <= HASHTABLE_NSLOT/2 + 1 ); /* If this is the first entry to be added to this hash-table, zero the @@ -1370,6 +1736,217 @@ static int walIndexAppend(Wal *pWal, u32 iFrame, u32 iPage){ return rc; } +/* +** Recover a single wal file - *-wal if iWal==0, or *-wal2 if iWal==1. +*/ +static int walIndexRecoverOne(Wal *pWal, int iWal, u32 *pnCkpt, int *pbZero){ + i64 nSize; /* Size of log file */ + u32 aFrameCksum[2] = {0, 0}; + int rc; + sqlite3_file *pWalFd = pWal->apWalFd[iWal]; + + assert( iWal==0 || iWal==1 ); + + memset(&pWal->hdr, 0, sizeof(WalIndexHdr)); + sqlite3FastRandomness(&pWal->sPrng, 8, pWal->hdr.aSalt); + + rc = sqlite3OsFileSize(pWalFd, &nSize); + if( rc==SQLITE_OK ){ + if( nSize>WAL_HDRSIZE ){ + u8 aBuf[WAL_HDRSIZE]; /* Buffer to load WAL header into */ + u32 *aPrivate = 0; /* Heap copy of *-shm pg being populated */ + u8 *aFrame = 0; /* Malloc'd buffer to load entire frame */ + int szFrame; /* Number of bytes in buffer aFrame[] */ + u8 *aData; /* Pointer to data part of aFrame buffer */ + int szPage; /* Page size according to the log */ + u32 magic; /* Magic value read from WAL header */ + u32 version; /* Magic value read from WAL header */ + int isValid; /* True if this frame is valid */ + int iPg; /* Current 32KB wal-index page */ + int iLastFrame; /* Last frame in wal, based on size alone */ + int iLastPg; /* Last shm page used by this wal */ + + /* Read in the WAL header. */ + rc = sqlite3OsRead(pWalFd, aBuf, WAL_HDRSIZE, 0); + if( rc!=SQLITE_OK ){ + return rc; + } + + /* If the database page size is not a power of two, or is greater than + ** SQLITE_MAX_PAGE_SIZE, conclude that the WAL file contains no valid + ** data. Similarly, if the 'magic' value is invalid, ignore the whole + ** WAL file. + */ + magic = sqlite3Get4byte(&aBuf[0]); + szPage = sqlite3Get4byte(&aBuf[8]); + if( (magic&0xFFFFFFFE)!=WAL_MAGIC + || szPage&(szPage-1) + || szPage>SQLITE_MAX_PAGE_SIZE + || szPage<512 + ){ + return SQLITE_OK; + } + pWal->hdr.bigEndCksum = (u8)(magic&0x00000001); + pWal->szPage = szPage; + + /* Verify that the WAL header checksum is correct */ + walChecksumBytes(pWal->hdr.bigEndCksum==SQLITE_BIGENDIAN, + aBuf, WAL_HDRSIZE-2*4, 0, pWal->hdr.aFrameCksum + ); + if( pWal->hdr.aFrameCksum[0]!=sqlite3Get4byte(&aBuf[24]) + || pWal->hdr.aFrameCksum[1]!=sqlite3Get4byte(&aBuf[28]) + ){ + return SQLITE_OK; + } + + memcpy(&pWal->hdr.aSalt, &aBuf[16], 8); + *pnCkpt = sqlite3Get4byte(&aBuf[12]); + + /* Verify that the version number on the WAL format is one that + ** are able to understand */ + version = sqlite3Get4byte(&aBuf[4]); + if( version!=WAL_VERSION1 && version!=WAL_VERSION2 ){ + return SQLITE_CANTOPEN_BKPT; + } + pWal->hdr.iVersion = version; + + /* Malloc a buffer to read frames into. */ + szFrame = szPage + WAL_FRAME_HDRSIZE; + aFrame = (u8 *)sqlite3_malloc64(szFrame + WALINDEX_PGSZ); + SEH_FREE_ON_ERROR(0, aFrame); + if( !aFrame ){ + return SQLITE_NOMEM_BKPT; + } + aData = &aFrame[WAL_FRAME_HDRSIZE]; + aPrivate = (u32*)&aData[szPage]; + + /* Read all frames from the log file. */ + iLastFrame = (nSize - WAL_HDRSIZE) / szFrame; + if( version==WAL_VERSION2 ){ + iLastPg = walFramePage2(iWal, iLastFrame); + }else{ + iLastPg = walFramePage(iLastFrame); + } + for(iPg=iWal; iPg<=iLastPg; iPg+=(version==WAL_VERSION2 ? 2 : 1)){ + u32 *aShare; + int iFrame; /* Index of last frame read */ + int iLast; + int iFirst; + int nHdr, nHdr32; + + rc = walIndexPage(pWal, iPg, (volatile u32**)&aShare); + assert( aShare!=0 || rc!=SQLITE_OK ); + if( aShare==0 ) break; + SEH_SET_ON_ERROR(iPg, aShare); + pWal->apWiData[iPg] = aPrivate; + + if( iWal ){ + assert( version==WAL_VERSION2 ); + iFirst = 1 + (iPg/2)*HASHTABLE_NPAGE; + iLast = iFirst + HASHTABLE_NPAGE - 1; + }else{ + int i2 = (version==WAL_VERSION2) ? (iPg/2) : iPg; + iLast = HASHTABLE_NPAGE_ONE+i2*HASHTABLE_NPAGE; + iFirst = 1 + (i2==0?0:HASHTABLE_NPAGE_ONE+(i2-1)*HASHTABLE_NPAGE); + } + iLast = MIN(iLast, iLastFrame); + + for(iFrame=iFirst; iFrame<=iLast; iFrame++){ + i64 iOffset = walFrameOffset(iFrame, szPage); + u32 pgno; /* Database page number for frame */ + u32 nTruncate; /* dbsize field from frame header */ + + /* Read and decode the next log frame. */ + rc = sqlite3OsRead(pWalFd, aFrame, szFrame, iOffset); + if( rc!=SQLITE_OK ) break; + isValid = walDecodeFrame(pWal, &pgno, &nTruncate, aData, aFrame); + if( !isValid ) break; + rc = walIndexAppend(pWal, iWal, iFrame, pgno); + if( NEVER(rc!=SQLITE_OK) ) break; + + /* If nTruncate is non-zero, this is a commit record. */ + if( nTruncate ){ + pWal->hdr.mxFrame = iFrame; + pWal->hdr.nPage = nTruncate; + pWal->hdr.szPage = (u16)((szPage&0xff00) | (szPage>>16)); + testcase( szPage<=32768 ); + testcase( szPage>=65536 ); + aFrameCksum[0] = pWal->hdr.aFrameCksum[0]; + aFrameCksum[1] = pWal->hdr.aFrameCksum[1]; + } + } + pWal->apWiData[iPg] = aShare; + SEH_SET_ON_ERROR(0, 0); + nHdr = (iPg==0 ? WALINDEX_HDR_SIZE : 0); + nHdr32 = nHdr / sizeof(u32); +#ifndef SQLITE_SAFER_WALINDEX_RECOVERY + /* Memcpy() should work fine here, on all reasonable implementations. + ** Technically, memcpy() might change the destination to some + ** intermediate value before setting to the final value, and that might + ** cause a concurrent reader to malfunction. Memcpy() is allowed to + ** do that, according to the spec, but no memcpy() implementation that + ** we know of actually does that, which is why we say that memcpy() + ** is safe for this. Memcpy() is certainly a lot faster. + */ + memcpy(&aShare[nHdr32], &aPrivate[nHdr32], WALINDEX_PGSZ-nHdr); +#else + /* In the event that some platform is found for which memcpy() + ** changes the destination to some intermediate value before + ** setting the final value, this alternative copy routine is + ** provided. + */ + { + int i; + for(i=nHdr32; ihdr.aFrameCksum[0] = aFrameCksum[0]; + pWal->hdr.aFrameCksum[1] = aFrameCksum[1]; + + return rc; +} + +static int walOpenWal2(Wal *pWal){ + int rc = SQLITE_OK; + if( !isOpen(pWal->apWalFd[1]) ){ + int f = (SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE|SQLITE_OPEN_WAL); + rc = sqlite3OsOpen(pWal->pVfs, pWal->zWalName2, pWal->apWalFd[1], f, &f); + } + return rc; +} + +static int walTruncateWal2(Wal *pWal){ + int bIs; + int rc; + assert( !isOpen(pWal->apWalFd[1]) ); + rc = sqlite3OsAccess(pWal->pVfs, pWal->zWalName2, SQLITE_ACCESS_EXISTS, &bIs); + if( rc==SQLITE_OK && bIs ){ + rc = walOpenWal2(pWal); + if( rc==SQLITE_OK ){ + rc = sqlite3OsTruncate(pWal->apWalFd[1], 0); + sqlite3OsClose(pWal->apWalFd[1]); + } + } + return rc; +} /* ** Recover the wal-index by reading the write-ahead log file. @@ -1383,14 +1960,16 @@ static int walIndexAppend(Wal *pWal, u32 iFrame, u32 iPage){ */ static int walIndexRecover(Wal *pWal){ int rc; /* Return Code */ - i64 nSize; /* Size of log file */ - u32 aFrameCksum[2] = {0, 0}; int iLock; /* Lock offset to lock for checkpoint */ + u32 nCkpt1 = 0xFFFFFFFF; + u32 nCkpt2 = 0xFFFFFFFF; + int bZero = 0; + WalIndexHdr hdr; /* Obtain an exclusive lock on all byte in the locking range not already ** locked by the caller. The caller is guaranteed to have locked the ** WAL_WRITE_LOCK byte, and may have also locked the WAL_CKPT_LOCK byte. - ** If successful, the same bytes that are locked here are unlocked before + ** If successful, the same bytes that are locked here are concurrent before ** this function returns. */ assert( pWal->ckptLock==1 || pWal->ckptLock==0 ); @@ -1405,164 +1984,92 @@ static int walIndexRecover(Wal *pWal){ WALTRACE(("WAL%p: recovery begin...\n", pWal)); - memset(&pWal->hdr, 0, sizeof(WalIndexHdr)); - - rc = sqlite3OsFileSize(pWal->pWalFd, &nSize); - if( rc!=SQLITE_OK ){ - goto recovery_error; - } - - if( nSize>WAL_HDRSIZE ){ - u8 aBuf[WAL_HDRSIZE]; /* Buffer to load WAL header into */ - u32 *aPrivate = 0; /* Heap copy of *-shm hash being populated */ - u8 *aFrame = 0; /* Malloc'd buffer to load entire frame */ - int szFrame; /* Number of bytes in buffer aFrame[] */ - u8 *aData; /* Pointer to data part of aFrame buffer */ - int szPage; /* Page size according to the log */ - u32 magic; /* Magic value read from WAL header */ - u32 version; /* Magic value read from WAL header */ - int isValid; /* True if this frame is valid */ - u32 iPg; /* Current 32KB wal-index page */ - u32 iLastFrame; /* Last frame in wal, based on nSize alone */ - - /* Read in the WAL header. */ - rc = sqlite3OsRead(pWal->pWalFd, aBuf, WAL_HDRSIZE, 0); - if( rc!=SQLITE_OK ){ - goto recovery_error; - } - - /* If the database page size is not a power of two, or is greater than - ** SQLITE_MAX_PAGE_SIZE, conclude that the WAL file contains no valid - ** data. Similarly, if the 'magic' value is invalid, ignore the whole - ** WAL file. - */ - magic = sqlite3Get4byte(&aBuf[0]); - szPage = sqlite3Get4byte(&aBuf[8]); - if( (magic&0xFFFFFFFE)!=WAL_MAGIC - || szPage&(szPage-1) - || szPage>SQLITE_MAX_PAGE_SIZE - || szPage<512 - ){ - goto finished; + /* Recover the *-wal file. If a valid version-1 header is recovered + ** from it, do not open the *-wal2 file. Even if it exists. + ** + ** Otherwise, if the *-wal2 file exists or if the "wal2" flag was + ** specified when sqlite3WalOpen() was called, open and recover + ** the *-wal2 file. Except, if the *-wal file was zero bytes in size, + ** truncate the *-wal2 to zero bytes in size. + ** + ** After this block has run, if the *-wal2 file is open the system + ** starts up in VERSION2 mode. In this case pWal->hdr contains the + ** wal-index header considering only *-wal2. Stack variable hdr + ** contains the wal-index header considering only *-wal. The hash + ** tables are populated for both. + ** + ** Or, if the *-wal2 file is not open, start up in VERSION1 mode. + ** pWal->hdr is already populated. + */ + rc = walIndexRecoverOne(pWal, 0, &nCkpt1, &bZero); + assert( pWal->hdr.iVersion==0 + || pWal->hdr.iVersion==WAL_VERSION1 + || pWal->hdr.iVersion==WAL_VERSION2 + ); + if( rc==SQLITE_OK && bZero ){ + rc = walTruncateWal2(pWal); + } + if( rc==SQLITE_OK && pWal->hdr.iVersion!=WAL_VERSION1 ){ + int bOpen = 1; + sqlite3_vfs *pVfs = pWal->pVfs; + if( pWal->hdr.iVersion==0 && pWal->bWal2==0 ){ + rc = sqlite3OsAccess(pVfs, pWal->zWalName2, SQLITE_ACCESS_EXISTS, &bOpen); } - pWal->hdr.bigEndCksum = (u8)(magic&0x00000001); - pWal->szPage = szPage; - pWal->nCkpt = sqlite3Get4byte(&aBuf[12]); - memcpy(&pWal->hdr.aSalt, &aBuf[16], 8); - - /* Verify that the WAL header checksum is correct */ - walChecksumBytes(pWal->hdr.bigEndCksum==SQLITE_BIGENDIAN, - aBuf, WAL_HDRSIZE-2*4, 0, pWal->hdr.aFrameCksum - ); - if( pWal->hdr.aFrameCksum[0]!=sqlite3Get4byte(&aBuf[24]) - || pWal->hdr.aFrameCksum[1]!=sqlite3Get4byte(&aBuf[28]) - ){ - goto finished; + if( rc==SQLITE_OK && bOpen ){ + rc = walOpenWal2(pWal); + if( rc==SQLITE_OK ){ + hdr = pWal->hdr; + rc = walIndexRecoverOne(pWal, 1, &nCkpt2, 0); + } } + } - /* Verify that the version number on the WAL format is one that - ** are able to understand */ - version = sqlite3Get4byte(&aBuf[4]); - if( version!=WAL_MAX_VERSION ){ - rc = SQLITE_CANTOPEN_BKPT; - goto finished; - } + if( rc==SQLITE_OK ){ + volatile WalCkptInfo *pInfo; - /* Malloc a buffer to read frames into. */ - szFrame = szPage + WAL_FRAME_HDRSIZE; - aFrame = (u8 *)sqlite3_malloc64(szFrame + WALINDEX_PGSZ); - SEH_FREE_ON_ERROR(0, aFrame); - if( !aFrame ){ - rc = SQLITE_NOMEM_BKPT; - goto recovery_error; - } - aData = &aFrame[WAL_FRAME_HDRSIZE]; - aPrivate = (u32*)&aData[szPage]; - - /* Read all frames from the log file. */ - iLastFrame = (nSize - WAL_HDRSIZE) / szFrame; - for(iPg=0; iPg<=(u32)walFramePage(iLastFrame); iPg++){ - u32 *aShare; - u32 iFrame; /* Index of last frame read */ - u32 iLast = MIN(iLastFrame, HASHTABLE_NPAGE_ONE+iPg*HASHTABLE_NPAGE); - u32 iFirst = 1 + (iPg==0?0:HASHTABLE_NPAGE_ONE+(iPg-1)*HASHTABLE_NPAGE); - u32 nHdr, nHdr32; - rc = walIndexPage(pWal, iPg, (volatile u32**)&aShare); - assert( aShare!=0 || rc!=SQLITE_OK ); - if( aShare==0 ) break; - SEH_SET_ON_ERROR(iPg, aShare); - pWal->apWiData[iPg] = aPrivate; - - for(iFrame=iFirst; iFrame<=iLast; iFrame++){ - i64 iOffset = walFrameOffset(iFrame, szPage); - u32 pgno; /* Database page number for frame */ - u32 nTruncate; /* dbsize field from frame header */ - - /* Read and decode the next log frame. */ - rc = sqlite3OsRead(pWal->pWalFd, aFrame, szFrame, iOffset); - if( rc!=SQLITE_OK ) break; - isValid = walDecodeFrame(pWal, &pgno, &nTruncate, aData, aFrame); - if( !isValid ) break; - rc = walIndexAppend(pWal, iFrame, pgno); - if( NEVER(rc!=SQLITE_OK) ) break; - - /* If nTruncate is non-zero, this is a commit record. */ - if( nTruncate ){ - pWal->hdr.mxFrame = iFrame; - pWal->hdr.nPage = nTruncate; - pWal->hdr.szPage = (u16)((szPage&0xff00) | (szPage>>16)); - testcase( szPage<=32768 ); - testcase( szPage>=65536 ); - aFrameCksum[0] = pWal->hdr.aFrameCksum[0]; - aFrameCksum[1] = pWal->hdr.aFrameCksum[1]; + if( isOpen(pWal->apWalFd[1]) ){ + /* The case where *-wal2 may follow *-wal */ + if( nCkpt2<=0x0F && nCkpt2==nCkpt1+1 ){ + if( pWal->hdr.mxFrame + && sqlite3Get4byte((u8*)(&pWal->hdr.aSalt[0]))==hdr.aFrameCksum[0] + && sqlite3Get4byte((u8*)(&pWal->hdr.aSalt[1]))==hdr.aFrameCksum[1] + ){ + walidxSetFile(&pWal->hdr, 1); + walidxSetMxFrame(&pWal->hdr, 1, pWal->hdr.mxFrame); + walidxSetMxFrame(&pWal->hdr, 0, hdr.mxFrame); + }else{ + pWal->hdr = hdr; } - } - pWal->apWiData[iPg] = aShare; - SEH_SET_ON_ERROR(0,0); - nHdr = (iPg==0 ? WALINDEX_HDR_SIZE : 0); - nHdr32 = nHdr / sizeof(u32); -#ifndef SQLITE_SAFER_WALINDEX_RECOVERY - /* Memcpy() should work fine here, on all reasonable implementations. - ** Technically, memcpy() might change the destination to some - ** intermediate value before setting to the final value, and that might - ** cause a concurrent reader to malfunction. Memcpy() is allowed to - ** do that, according to the spec, but no memcpy() implementation that - ** we know of actually does that, which is why we say that memcpy() - ** is safe for this. Memcpy() is certainly a lot faster. - */ - memcpy(&aShare[nHdr32], &aPrivate[nHdr32], WALINDEX_PGSZ-nHdr); -#else - /* In the event that some platform is found for which memcpy() - ** changes the destination to some intermediate value before - ** setting the final value, this alternative copy routine is - ** provided. - */ - { - int i; - for(i=nHdr32; ihdr.aFrameCksum[0] + && sqlite3Get4byte((u8*)(&hdr.aSalt[1]))==pWal->hdr.aFrameCksum[1] + ){ + SWAP(WalIndexHdr, pWal->hdr, hdr); + walidxSetMxFrame(&pWal->hdr, 1, hdr.mxFrame); + }else{ + walidxSetFile(&pWal->hdr, 1); + walidxSetMxFrame(&pWal->hdr, 1, pWal->hdr.mxFrame); + walidxSetMxFrame(&pWal->hdr, 0, 0); } + }else + + /* Fallback */ + if( nCkpt1<=nCkpt2 ){ + pWal->hdr = hdr; + }else{ + walidxSetFile(&pWal->hdr, 1); + walidxSetMxFrame(&pWal->hdr, 1, pWal->hdr.mxFrame); + walidxSetMxFrame(&pWal->hdr, 0, 0); } -#endif - SEH_INJECT_FAULT; - if( iFrame<=iLast ) break; + pWal->hdr.iVersion = WAL_VERSION2; + }else{ + pWal->hdr.iVersion = WAL_VERSION1; } - SEH_FREE_ON_ERROR(aFrame, 0); - sqlite3_free(aFrame); - } - -finished: - if( rc==SQLITE_OK ){ - volatile WalCkptInfo *pInfo; - int i; - pWal->hdr.aFrameCksum[0] = aFrameCksum[0]; - pWal->hdr.aFrameCksum[1] = aFrameCksum[1]; walIndexWriteHdr(pWal); /* Reset the checkpoint-header. This is safe because this thread is @@ -1570,45 +2077,53 @@ static int walIndexRecover(Wal *pWal){ ** checkpointers. Then set the values of read-mark slots 1 through N. */ pInfo = walCkptInfo(pWal); - pInfo->nBackfill = 0; - pInfo->nBackfillAttempted = pWal->hdr.mxFrame; - pInfo->aReadMark[0] = 0; - for(i=1; ihdr.mxFrame ){ - pInfo->aReadMark[i] = pWal->hdr.mxFrame; - }else{ - pInfo->aReadMark[i] = READMARK_NOT_USED; + memset((void*)pInfo, 0, sizeof(WalCkptInfo)); + if( 0==isWalMode2(pWal) ){ + int i; + pInfo->nBackfillAttempted = pWal->hdr.mxFrame; + pInfo->aReadMark[0] = 0; + for(i=1; ihdr.mxFrame ){ + pInfo->aReadMark[i] = pWal->hdr.mxFrame; + }else{ + pInfo->aReadMark[i] = READMARK_NOT_USED; + } + walUnlockExclusive(pWal, WAL_READ_LOCK(i), 1); + }else if( rc!=SQLITE_BUSY ){ + break; } - SEH_INJECT_FAULT; - walUnlockExclusive(pWal, WAL_READ_LOCK(i), 1); - }else if( rc!=SQLITE_BUSY ){ - goto recovery_error; } } /* If more than one frame was recovered from the log file, report an ** event via sqlite3_log(). This is to help with identifying performance ** problems caused by applications routinely shutting down without - ** checkpointing the log file. - */ + ** checkpointing the log file. */ if( pWal->hdr.nPage ){ - sqlite3_log(SQLITE_NOTICE_RECOVER_WAL, - "recovered %d frames from WAL file %s", - pWal->hdr.mxFrame, pWal->zWalName - ); + if( isWalMode2(pWal) ){ + sqlite3_log(SQLITE_NOTICE_RECOVER_WAL, + "recovered (%d,%d) frames from WAL files %s[2] (wal2 mode)", + walidxGetMxFrame(&pWal->hdr, 0), walidxGetMxFrame(&pWal->hdr, 1), + pWal->zWalName + ); + }else{ + sqlite3_log(SQLITE_NOTICE_RECOVER_WAL, + "recovered %d frames from WAL file %s", + pWal->hdr.mxFrame, pWal->zWalName + ); + } } } -recovery_error: WALTRACE(("WAL%p: recovery %s\n", pWal, rc ? "failed" : "ok")); walUnlockExclusive(pWal, iLock, WAL_READ_LOCK(0)-iLock); return rc; } /* -** Close an open wal-index. +** Close an open wal-index and wal files. */ static void walIndexClose(Wal *pWal, int isDelete){ if( pWal->exclusiveMode==WAL_HEAPMEMORY_MODE || pWal->bShmUnreliable ){ @@ -1621,6 +2136,8 @@ static void walIndexClose(Wal *pWal, int isDelete){ if( pWal->exclusiveMode!=WAL_HEAPMEMORY_MODE ){ sqlite3OsShmUnmap(pWal->pDbFd, isDelete); } + sqlite3OsClose(pWal->apWalFd[0]); + sqlite3OsClose(pWal->apWalFd[1]); } /* @@ -1644,11 +2161,13 @@ int sqlite3WalOpen( const char *zWalName, /* Name of the WAL file */ int bNoShm, /* True to run in heap-memory mode */ i64 mxWalSize, /* Truncate WAL to this size on reset */ + int bWal2, /* True to open in wal2 mode */ Wal **ppWal /* OUT: Allocated Wal handle */ ){ int rc; /* Return Code */ Wal *pRet; /* Object to allocate and return */ int flags; /* Flags passed to OsOpen() */ + int nByte; /* Bytes of space to allocate */ assert( zWalName && zWalName[0] ); assert( pDbFd ); @@ -1666,11 +2185,13 @@ int sqlite3WalOpen( assert( 40 == sizeof(WalCkptInfo) ); assert( 120 == WALINDEX_LOCK_OFFSET ); assert( 136 == WALINDEX_HDR_SIZE ); +#ifndef SQLITE_ENABLE_WAL_BIGHASH assert( 4096 == HASHTABLE_NPAGE ); assert( 4062 == HASHTABLE_NPAGE_ONE ); assert( 8192 == HASHTABLE_NSLOT ); assert( 383 == HASHTABLE_HASH_1 ); assert( 32768 == WALINDEX_PGSZ ); +#endif assert( 8 == SQLITE_SHM_NLOCK ); assert( 5 == WAL_NREADER ); assert( 24 == WAL_FRAME_HDRSIZE ); @@ -1697,34 +2218,38 @@ int sqlite3WalOpen( assert( UNIX_SHM_BASE==WALINDEX_LOCK_OFFSET ); #endif + nByte = sizeof(Wal) + pVfs->szOsFile*2; /* Allocate an instance of struct Wal to return. */ *ppWal = 0; - pRet = (Wal*)sqlite3MallocZero(sizeof(Wal) + pVfs->szOsFile); + pRet = (Wal*)sqlite3MallocZero(nByte); if( !pRet ){ return SQLITE_NOMEM_BKPT; } pRet->pVfs = pVfs; - pRet->pWalFd = (sqlite3_file *)&pRet[1]; + pRet->apWalFd[0] = (sqlite3_file*)((char*)pRet+sizeof(Wal)); + pRet->apWalFd[1] = (sqlite3_file*)((char*)pRet+sizeof(Wal)+pVfs->szOsFile); pRet->pDbFd = pDbFd; - pRet->readLock = -1; + pRet->readLock = WAL_LOCK_NONE; pRet->mxWalSize = mxWalSize; pRet->zWalName = zWalName; pRet->syncHeader = 1; pRet->padToSectorBoundary = 1; pRet->exclusiveMode = (bNoShm ? WAL_HEAPMEMORY_MODE: WAL_NORMAL_MODE); + sqlite3FastPrngInit(&pRet->sPrng); + pRet->bWal2 = bWal2; + pRet->zWalName2 = &zWalName[sqlite3Strlen30(zWalName)+1]; - /* Open file handle on the write-ahead log file. */ + /* Open a file handle on the first write-ahead log file. */ flags = (SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE|SQLITE_OPEN_WAL); - rc = sqlite3OsOpen(pVfs, zWalName, pRet->pWalFd, flags, &flags); + rc = sqlite3OsOpen(pVfs, zWalName, pRet->apWalFd[0], flags, &flags); if( rc==SQLITE_OK && flags&SQLITE_OPEN_READONLY ){ pRet->readOnly = WAL_RDONLY; } if( rc!=SQLITE_OK ){ walIndexClose(pRet, 0); - sqlite3OsClose(pRet->pWalFd); sqlite3_free(pRet); }else{ int iDC = sqlite3OsDeviceCharacteristics(pDbFd); @@ -1881,7 +2406,7 @@ static void walMergesort( ht_slot *aMerge = 0; /* List to be merged */ int iList; /* Index into input list */ u32 iSub = 0; /* Index into aSub array */ - struct Sublist aSub[13]; /* Array of sub-lists */ + struct Sublist aSub[HASHTABLE_BITS+1]; /* Array of sub-lists */ memset(aSub, 0, sizeof(aSub)); assert( nList<=HASHTABLE_NPAGE && nList>0 ); @@ -1934,34 +2459,50 @@ static void walIteratorFree(WalIterator *p){ /* ** Construct a WalInterator object that can be used to loop over all -** pages in the WAL following frame nBackfill in ascending order. Frames +** pages in wal file iWal following frame nBackfill in ascending order. Frames ** nBackfill or earlier may be included - excluding them is an optimization ** only. The caller must hold the checkpoint lock. ** -** On success, make *pp point to the newly allocated WalInterator object -** return SQLITE_OK. Otherwise, return an error code. If this routine -** returns an error, the value of *pp is undefined. +** On success, make *pp point to the newly allocated WalIterator object +** and return SQLITE_OK. Otherwise, return an error code. If this routine +** returns an error, the final value of *pp is undefined. ** ** The calling routine should invoke walIteratorFree() to destroy the ** WalIterator object when it has finished with it. */ -static int walIteratorInit(Wal *pWal, u32 nBackfill, WalIterator **pp){ +static int walIteratorInit( + Wal *pWal, + int iWal, + u32 nBackfill, + WalIterator **pp +){ WalIterator *p; /* Return value */ int nSegment; /* Number of segments to merge */ u32 iLast; /* Last frame in log */ sqlite3_int64 nByte; /* Number of bytes to allocate */ int i; /* Iterator variable */ + int iLastSeg; /* Last hash table to iterate though */ ht_slot *aTmp; /* Temp space used by merge-sort */ int rc = SQLITE_OK; /* Return Code */ + int iMode = isWalMode2(pWal) ? 2 : 1; + + assert( isWalMode2(pWal) || iWal==0 ); + assert( 0==isWalMode2(pWal) || nBackfill==0 ); /* This routine only runs while holding the checkpoint lock. And ** it only runs if there is actually content in the log (mxFrame>0). */ - assert( pWal->ckptLock && pWal->hdr.mxFrame>0 ); - iLast = pWal->hdr.mxFrame; + iLast = walidxGetMxFrame(&pWal->hdr, iWal); + assert( pWal->ckptLock && iLast>0 ); + + if( iMode==2 ){ + iLastSeg = walFramePage2(iWal, iLast); + }else{ + iLastSeg = walFramePage(iLast); + } + nSegment = 1 + (iLastSeg/iMode); /* Allocate space for the WalIterator object. */ - nSegment = walFramePage(iLast) + 1; nByte = SZ_WALITERATOR(nSegment) + iLast*sizeof(ht_slot); p = (WalIterator *)sqlite3_malloc64(nByte @@ -1974,7 +2515,8 @@ static int walIteratorInit(Wal *pWal, u32 nBackfill, WalIterator **pp){ p->nSegment = nSegment; aTmp = (ht_slot*)&(((u8*)p)[nByte]); SEH_FREE_ON_ERROR(0, p); - for(i=walFramePage(nBackfill+1); rc==SQLITE_OK && i=2 ); + }else{ + iZero = sLoc.iZero; + } - if( (i+1)==nSegment ){ - nEntry = (int)(iLast - sLoc.iZero); + if( i==iLastSeg ){ + nEntry = (int)(iLast - iZero); }else{ nEntry = (int)((u32*)sLoc.aHash - (u32*)sLoc.aPgno); } - aIndex = &((ht_slot *)&p->aSegment[p->nSegment])[sLoc.iZero]; - sLoc.iZero++; + aIndex = &((ht_slot *)&p->aSegment[p->nSegment])[iZero]; + iZero++; for(j=0; jaSegment[i].iZero = sLoc.iZero; - p->aSegment[i].nEntry = nEntry; - p->aSegment[i].aIndex = aIndex; - p->aSegment[i].aPgno = (u32 *)sLoc.aPgno; + walMergesort((u32*)sLoc.aPgno, aTmp, aIndex, &nEntry); + p->aSegment[i/iMode].iZero = iZero; + p->aSegment[i/iMode].nEntry = nEntry; + p->aSegment[i/iMode].aIndex = aIndex; + p->aSegment[i/iMode].aPgno = (u32*)sLoc.aPgno; } } if( rc!=SQLITE_OK ){ @@ -2147,6 +2698,7 @@ static void walRestartHdr(Wal *pWal, u32 salt1){ volatile WalCkptInfo *pInfo = walCkptInfo(pWal); int i; /* Loop counter */ u32 *aSalt = pWal->hdr.aSalt; /* Big-endian salt values */ + assert( isWalMode2(pWal)==0 ); pWal->nCkpt++; pWal->hdr.mxFrame = 0; sqlite3Put4byte((u8*)&aSalt[0], 1 + sqlite3Get4byte((u8*)&aSalt[0])); @@ -2160,14 +2712,76 @@ static void walRestartHdr(Wal *pWal, u32 salt1){ } /* -** Copy as much content as we can from the WAL back into the database file -** in response to an sqlite3_wal_checkpoint() request or the equivalent. +** This function is used in wal2 mode. ** -** The amount of information copies from WAL to database might be limited -** by active readers. This routine will never overwrite a database page -** that a concurrent reader might be using. +** This function is called when writer pWal is just about to start +** writing out frames. Parameter iApp is the current wal file. The "other" wal +** file (wal file !iApp) has been fully checkpointed. This function returns +** SQLITE_OK if there are no readers preventing the writer from switching to +** the other wal file. Or SQLITE_BUSY if there are. +*/ +static int wal2RestartOk(Wal *pWal, int iApp){ + /* The other wal file (wal file !iApp) can be overwritten if there + ** are no readers reading from it - no "full" or "partial" locks. + ** Technically speaking it is not possible for any reader to hold + ** a "part" lock, as this would have prevented the file from being + ** checkpointed. But checking anyway doesn't hurt. The following + ** is equivalent to: + ** + ** if( iApp==0 ) eLock = WAL_LOCK_PART1_FULL2; + ** if( iApp==1 ) eLock = WAL_LOCK_PART1; + */ + int eLock = 1 + (iApp==0); + + assert( WAL_LOCK_PART1==1 ); + assert( WAL_LOCK_PART1_FULL2==2 ); + assert( WAL_LOCK_PART2_FULL1==3 ); + assert( WAL_LOCK_PART2==4 ); + + assert( iApp!=0 || eLock==WAL_LOCK_PART1_FULL2 ); + assert( iApp!=1 || eLock==WAL_LOCK_PART1 ); + + return walLockExclusive(pWal, WAL_READ_LOCK(eLock), 3); +} +static void wal2RestartFinished(Wal *pWal, int iApp){ + walUnlockExclusive(pWal, WAL_READ_LOCK(1 + (iApp==0)), 3); +} + +/* +** This function is used in wal2 mode. ** -** All I/O barrier operations (a.k.a fsyncs) occur in this routine when +** This function is called when a checkpointer wishes to checkpoint wal +** file iCkpt. It takes the required lock and, if successful, returns +** SQLITE_OK. Otherwise, an SQLite error code (e.g. SQLITE_BUSY). If this +** function returns SQLITE_OK, it is the responsibility of the caller +** to invoke wal2CheckpointFinished() to release the lock. +*/ +static int wal2CheckpointOk(Wal *pWal, int iCkpt){ + int eLock = 1 + (iCkpt*2); + + assert( WAL_LOCK_PART1==1 ); + assert( WAL_LOCK_PART1_FULL2==2 ); + assert( WAL_LOCK_PART2_FULL1==3 ); + assert( WAL_LOCK_PART2==4 ); + + assert( iCkpt!=0 || eLock==WAL_LOCK_PART1 ); + assert( iCkpt!=1 || eLock==WAL_LOCK_PART2_FULL1 ); + + return walLockExclusive(pWal, WAL_READ_LOCK(eLock), 2); +} +static void wal2CheckpointFinished(Wal *pWal, int iCkpt){ + walUnlockExclusive(pWal, WAL_READ_LOCK(1 + (iCkpt*2)), 2); +} + +/* +** Copy as much content as we can from the WAL back into the database file +** in response to an sqlite3_wal_checkpoint() request or the equivalent. +** +** The amount of information copies from WAL to database might be limited +** by active readers. This routine will never overwrite a database page +** that a concurrent reader might be using. +** +** All I/O barrier operations (a.k.a fsyncs) occur in this routine when ** SQLite is in WAL-mode in synchronous=NORMAL. That means that if ** checkpoints are always run by a background thread or background ** process, foreground threads will never block on a lengthy fsync call. @@ -2208,94 +2822,121 @@ static int walCheckpoint( u32 mxPage; /* Max database page to write */ int i; /* Loop counter */ volatile WalCkptInfo *pInfo; /* The checkpoint status information */ + int bWal2 = isWalMode2(pWal); /* True for wal2 connections */ + int iCkpt = bWal2 ? !walidxGetFile(&pWal->hdr) : 0; + mxSafeFrame = walidxGetMxFrame(&pWal->hdr, iCkpt); szPage = walPagesize(pWal); testcase( szPage<=32768 ); testcase( szPage>=65536 ); pInfo = walCkptInfo(pWal); - if( pInfo->nBackfillhdr.mxFrame ){ + if( (bWal2==1 && pInfo->nBackfill==0 && mxSafeFrame) + || (bWal2==0 && pInfo->nBackfillapWalFd[iCkpt]; + mxPage = pWal->hdr.nPage; + + /* If this is a wal2 system, check for a reader holding a lock + ** preventing this checkpoint operation. If one is found, return + ** early. */ + if( bWal2 ){ + rc = wal2CheckpointOk(pWal, iCkpt); + if( rc!=SQLITE_OK ) return rc; + } /* EVIDENCE-OF: R-62920-47450 The busy-handler callback is never invoked ** in the SQLITE_CHECKPOINT_PASSIVE mode. */ assert( eMode!=SQLITE_CHECKPOINT_PASSIVE || xBusy==0 ); - /* Compute in mxSafeFrame the index of the last frame of the WAL that is - ** safe to write into the database. Frames beyond mxSafeFrame might - ** overwrite database pages that are in use by active readers and thus - ** cannot be backfilled from the WAL. + /* If this is a wal system (not wal2), compute in mxSafeFrame the index + ** of the last frame of the WAL that is safe to write into the database. + ** Frames beyond mxSafeFrame might overwrite database pages that are in + ** use by active readers and thus cannot be backfilled from the WAL. */ - mxSafeFrame = pWal->hdr.mxFrame; - mxPage = pWal->hdr.nPage; - for(i=1; iaReadMark+i); SEH_INJECT_FAULT; - if( mxSafeFrame>y ){ - assert( y<=pWal->hdr.mxFrame ); - rc = walBusyLock(pWal, xBusy, pBusyArg, WAL_READ_LOCK(i), 1); - if( rc==SQLITE_OK ){ - u32 iMark = (i==1 ? mxSafeFrame : READMARK_NOT_USED); - AtomicStore(pInfo->aReadMark+i, iMark); SEH_INJECT_FAULT; - walUnlockExclusive(pWal, WAL_READ_LOCK(i), 1); - }else if( rc==SQLITE_BUSY ){ - mxSafeFrame = y; - xBusy = 0; - }else{ - goto walcheckpoint_out; + if( bWal2==0 ){ + mxSafeFrame = pWal->hdr.mxFrame; + mxPage = pWal->hdr.nPage; + for(i=1; iaReadMark+i); SEH_INJECT_FAULT; + if( mxSafeFrame>y ){ + assert( y<=pWal->hdr.mxFrame ); + rc = walBusyLock(pWal, xBusy, pBusyArg, WAL_READ_LOCK(i), 1); + if( rc==SQLITE_OK ){ + u32 iMark = (i==1 ? mxSafeFrame : READMARK_NOT_USED); + AtomicStore(pInfo->aReadMark+i, iMark); SEH_INJECT_FAULT; + walUnlockExclusive(pWal, WAL_READ_LOCK(i), 1); + }else if( rc==SQLITE_BUSY ){ + mxSafeFrame = y; + xBusy = 0; + }else{ + goto walcheckpoint_out; + } } } } /* Allocate the iterator */ - if( pInfo->nBackfillnBackfill, &pIter); + if( bWal2 || pInfo->nBackfillnBackfill==0 ); + rc = walIteratorInit(pWal, iCkpt, pInfo->nBackfill, &pIter); assert( rc==SQLITE_OK || pIter==0 ); } - if( pIter - && (rc = walBusyLock(pWal,xBusy,pBusyArg,WAL_READ_LOCK(0),1))==SQLITE_OK - ){ + if( pIter && (bWal2 + || (rc = walBusyLock(pWal, xBusy, pBusyArg,WAL_READ_LOCK(0),1))==SQLITE_OK + )){ u32 nBackfill = pInfo->nBackfill; + + assert( bWal2==0 || nBackfill==0 ); pInfo->nBackfillAttempted = mxSafeFrame; SEH_INJECT_FAULT; - /* Sync the WAL to disk */ - rc = sqlite3OsSync(pWal->pWalFd, CKPT_SYNC_FLAGS(sync_flags)); + /* Sync the wal file being checkpointed to disk */ + rc = sqlite3OsSync(pWalFd, CKPT_SYNC_FLAGS(sync_flags)); /* If the database may grow as a result of this checkpoint, hint - ** about the eventual size of the db file to the VFS layer. - */ + ** about the eventual size of the db file to the VFS layer. */ if( rc==SQLITE_OK ){ i64 nReq = ((i64)mxPage * szPage); i64 nSize; /* Current size of database file */ sqlite3OsFileControl(pWal->pDbFd, SQLITE_FCNTL_CKPT_START, 0); rc = sqlite3OsFileSize(pWal->pDbFd, &nSize); if( rc==SQLITE_OK && nSizehdr.mxFrame*szPage)hdr.mxFrame + (bWal2?walidxGetMxFrame(&pWal->hdr,1):0); + if( (nSize+65536+mx*szPage)pDbFd, SQLITE_FCNTL_SIZE_HINT,&nReq); } } - } /* Iterate through the contents of the WAL, copying data to the db file */ while( rc==SQLITE_OK && 0==walIteratorNext(pIter, &iDbpage, &iFrame) ){ i64 iOffset; - assert( walFramePgno(pWal, iFrame)==iDbpage ); + + assert( bWal2==1 || walFramePgno(pWal, iFrame)==iDbpage ); + assert( bWal2==0 || walFramePgno2(pWal, iCkpt, iFrame)==iDbpage ); + SEH_INJECT_FAULT; if( AtomicLoad(&db->u1.isInterrupted) ){ rc = db->mallocFailed ? SQLITE_NOMEM_BKPT : SQLITE_INTERRUPT; break; } if( iFrame<=nBackfill || iFrame>mxSafeFrame || iDbpage>mxPage ){ + assert( bWal2==0 || iDbpage>mxPage ); continue; } iOffset = walFrameOffset(iFrame, szPage) + WAL_FRAME_HDRSIZE; + WALTRACE(("WAL%p: checkpoint frame %d of wal %d to db page %d\n", + pWal, (int)iFrame, iCkpt, (int)iDbpage + )); /* testcase( IS_BIG_INT(iOffset) ); // requires a 4GiB WAL file */ - rc = sqlite3OsRead(pWal->pWalFd, zBuf, szPage, iOffset); + rc = sqlite3OsRead(pWalFd, zBuf, szPage, iOffset); if( rc!=SQLITE_OK ) break; iOffset = (iDbpage-1)*(i64)szPage; testcase( IS_BIG_INT(iOffset) ); @@ -2304,23 +2945,27 @@ static int walCheckpoint( } sqlite3OsFileControl(pWal->pDbFd, SQLITE_FCNTL_CKPT_DONE, 0); - /* If work was actually accomplished... */ - if( rc==SQLITE_OK ){ - if( mxSafeFrame==walIndexHdr(pWal)->mxFrame ){ + /* If work was actually accomplished, truncate the db file, sync the wal + ** file and set WalCkptInfo.nBackfill to indicate so. */ + if( rc==SQLITE_OK && (bWal2 || mxSafeFrame==walIndexHdr(pWal)->mxFrame) ){ + if( !bWal2 ){ i64 szDb = pWal->hdr.nPage*(i64)szPage; testcase( IS_BIG_INT(szDb) ); rc = sqlite3OsTruncate(pWal->pDbFd, szDb); - if( rc==SQLITE_OK ){ - rc = sqlite3OsSync(pWal->pDbFd, CKPT_SYNC_FLAGS(sync_flags)); - } } if( rc==SQLITE_OK ){ - AtomicStore(&pInfo->nBackfill, mxSafeFrame); SEH_INJECT_FAULT; + rc = sqlite3OsSync(pWal->pDbFd, CKPT_SYNC_FLAGS(sync_flags)); } } + if( rc==SQLITE_OK ){ + AtomicStore(&pInfo->nBackfill, (bWal2 ? 1 : mxSafeFrame)); + SEH_INJECT_FAULT; + } /* Release the reader lock held while backfilling */ - walUnlockExclusive(pWal, WAL_READ_LOCK(0), 1); + if( bWal2==0 ){ + walUnlockExclusive(pWal, WAL_READ_LOCK(0), 1); + } } if( rc==SQLITE_BUSY ){ @@ -2328,6 +2973,7 @@ static int walCheckpoint( ** just because there are active readers. */ rc = SQLITE_OK; } + if( bWal2 ) wal2CheckpointFinished(pWal, iCkpt); } /* If this is an SQLITE_CHECKPOINT_RESTART or TRUNCATE operation, and the @@ -2335,14 +2981,14 @@ static int walCheckpoint( ** until all readers have finished using the wal file. This ensures that ** the next process to write to the database restarts the wal file. */ - if( rc==SQLITE_OK && eMode!=SQLITE_CHECKPOINT_PASSIVE ){ + if( bWal2==0 && rc==SQLITE_OK && eMode!=SQLITE_CHECKPOINT_PASSIVE ){ assert( pWal->writeLock ); SEH_INJECT_FAULT; if( pInfo->nBackfillhdr.mxFrame ){ rc = SQLITE_BUSY; }else if( eMode>=SQLITE_CHECKPOINT_RESTART ){ u32 salt1; - sqlite3_randomness(4, &salt1); + sqlite3FastRandomness(&pWal->sPrng, 4, &salt1); assert( pInfo->nBackfill==pWal->hdr.mxFrame ); rc = walBusyLock(pWal, xBusy, pBusyArg, WAL_READ_LOCK(1), WAL_NREADER-1); if( rc==SQLITE_OK ){ @@ -2361,7 +3007,7 @@ static int walCheckpoint( ** file-system. To avoid this, update the wal-index header to ** indicate that the log file contains zero valid frames. */ walRestartHdr(pWal, salt1); - rc = sqlite3OsTruncate(pWal->pWalFd, 0); + rc = sqlite3OsTruncate(pWal->apWalFd[0], 0); } walUnlockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1); } @@ -2379,16 +3025,18 @@ static int walCheckpoint( ** it to exactly nMax bytes. If an error occurs while doing so, ignore it. */ static void walLimitSize(Wal *pWal, i64 nMax){ - i64 sz; - int rx; - sqlite3BeginBenignMalloc(); - rx = sqlite3OsFileSize(pWal->pWalFd, &sz); - if( rx==SQLITE_OK && (sz > nMax ) ){ - rx = sqlite3OsTruncate(pWal->pWalFd, nMax); - } - sqlite3EndBenignMalloc(); - if( rx ){ - sqlite3_log(rx, "cannot limit WAL size: %s", pWal->zWalName); + if( isWalMode2(pWal)==0 ){ + i64 sz; + int rx; + sqlite3BeginBenignMalloc(); + rx = sqlite3OsFileSize(pWal->apWalFd[0], &sz); + if( rx==SQLITE_OK && (sz > nMax ) ){ + rx = sqlite3OsTruncate(pWal->apWalFd[0], nMax); + } + sqlite3EndBenignMalloc(); + if( rx ){ + sqlite3_log(rx, "cannot limit WAL size: %s", pWal->zWalName); + } } } @@ -2496,6 +3144,7 @@ int sqlite3WalClose( int isDelete = 0; /* True to unlink wal and wal-index files */ assert( walAssertLockmask(pWal) ); + pWal->bClosing = 1; /* If an EXCLUSIVE lock can be obtained on the database file (using the ** ordinary, rollback-mode locking methods, this guarantees that the @@ -2508,39 +3157,53 @@ int sqlite3WalClose( if( zBuf!=0 && SQLITE_OK==(rc = sqlite3OsLock(pWal->pDbFd, SQLITE_LOCK_EXCLUSIVE)) ){ + int i; if( pWal->exclusiveMode==WAL_NORMAL_MODE ){ pWal->exclusiveMode = WAL_EXCLUSIVE_MODE; } - rc = sqlite3WalCheckpoint(pWal, db, - SQLITE_CHECKPOINT_PASSIVE, 0, 0, sync_flags, nBuf, zBuf, 0, 0 - ); - if( rc==SQLITE_OK ){ - int bPersist = -1; - sqlite3OsFileControlHint( - pWal->pDbFd, SQLITE_FCNTL_PERSIST_WAL, &bPersist + for(i=0; rc==SQLITE_OK && i<2; i++){ + rc = sqlite3WalCheckpoint(pWal, db, + SQLITE_CHECKPOINT_PASSIVE, 0, 0, sync_flags, nBuf, zBuf, 0, 0 ); - if( bPersist!=1 ){ - /* Try to delete the WAL file if the checkpoint completed and - ** fsynced (rc==SQLITE_OK) and if we are not in persistent-wal - ** mode (!bPersist) */ - isDelete = 1; - }else if( pWal->mxWalSize>=0 ){ - /* Try to truncate the WAL file to zero bytes if the checkpoint - ** completed and fsynced (rc==SQLITE_OK) and we are in persistent - ** WAL mode (bPersist) and if the PRAGMA journal_size_limit is a - ** non-negative value (pWal->mxWalSize>=0). Note that we truncate - ** to zero bytes as truncating to the journal_size_limit might - ** leave a corrupt WAL file on disk. */ - walLimitSize(pWal, 0); + if( rc==SQLITE_OK ){ + int bPersist = -1; + sqlite3OsFileControlHint( + pWal->pDbFd, SQLITE_FCNTL_PERSIST_WAL, &bPersist + ); + if( bPersist!=1 ){ + /* Try to delete the WAL file if the checkpoint completed and + ** fsyned (rc==SQLITE_OK) and if we are not in persistent-wal + ** mode (!bPersist) */ + isDelete = 1; + }else if( pWal->mxWalSize>=0 ){ + /* Try to truncate the WAL file to zero bytes if the checkpoint + ** completed and fsynced (rc==SQLITE_OK) and we are in persistent + ** WAL mode (bPersist) and if the PRAGMA journal_size_limit is a + ** non-negative value (pWal->mxWalSize>=0). Note that we truncate + ** to zero bytes as truncating to the journal_size_limit might + ** leave a corrupt WAL file on disk. */ + walLimitSize(pWal, 0); + } } + + if( isWalMode2(pWal)==0 ) break; + + SEH_TRY { + walCkptInfo(pWal)->nBackfill = 0; + walidxSetFile(&pWal->hdr, !walidxGetFile(&pWal->hdr)); + pWal->writeLock = 1; + walIndexWriteHdr(pWal); + pWal->writeLock = 0; + } + SEH_EXCEPT( rc = SQLITE_IOERR_IN_PAGE; ) } } walIndexClose(pWal, isDelete); - sqlite3OsClose(pWal->pWalFd); if( isDelete ){ sqlite3BeginBenignMalloc(); sqlite3OsDelete(pWal->pVfs, pWal->zWalName, 0); + sqlite3OsDelete(pWal->pVfs, pWal->zWalName2, 0); sqlite3EndBenignMalloc(); } WALTRACE(("WAL%p: closed\n", pWal)); @@ -2551,25 +3214,14 @@ int sqlite3WalClose( } /* -** Try to read the wal-index header. Return 0 on success and 1 if -** there is a problem. -** -** The wal-index is in shared memory. Another thread or process might -** be writing the header at the same time this procedure is trying to -** read it, which might result in inconsistency. A dirty read is detected -** by verifying that both copies of the header are the same and also by -** a checksum on the header. -** -** If and only if the read is consistent and the header is different from -** pWal->hdr, then pWal->hdr is updated to the content of the new header -** and *pChanged is set to 1. -** -** If the checksum cannot be verified return non-zero. If the header -** is read successfully and the checksum verified, return zero. +** Try to copy the wal-index header from shared-memory into (*pHdr). Return +** zero if successful or non-zero otherwise. If the header is corrupted +** (either because the two copies are inconsistent or because the checksum +** values are incorrect), the read fails and non-zero is returned. */ -static SQLITE_NO_TSAN int walIndexTryHdr(Wal *pWal, int *pChanged){ +static int walIndexLoadHdr(Wal *pWal, WalIndexHdr *pHdr){ u32 aCksum[2]; /* Checksum on the header content */ - WalIndexHdr h1, h2; /* Two copies of the header content */ + WalIndexHdr h2; /* Second copy of the header content */ WalIndexHdr volatile *aHdr; /* Header in shared memory */ /* The first page of the wal-index must be mapped at this point. */ @@ -2580,33 +3232,54 @@ static SQLITE_NO_TSAN int walIndexTryHdr(Wal *pWal, int *pChanged){ ** meaning it is possible that an inconsistent snapshot is read ** from the file. If this happens, return non-zero. ** - ** tag-20200519-1: ** There are two copies of the header at the beginning of the wal-index. ** When reading, read [0] first then [1]. Writes are in the reverse order. ** Memory barriers are used to prevent the compiler or the hardware from - ** reordering the reads and writes. TSAN and similar tools can sometimes - ** give false-positive warnings about these accesses because the tools do not - ** account for the double-read and the memory barrier. The use of mutexes - ** here would be problematic as the memory being accessed is potentially - ** shared among multiple processes and not all mutex implementations work - ** reliably in that environment. + ** reordering the reads and writes. */ aHdr = walIndexHdr(pWal); - memcpy(&h1, (void *)&aHdr[0], sizeof(h1)); /* Possible TSAN false-positive */ + memcpy(pHdr, (void *)&aHdr[0], sizeof(h2)); walShmBarrier(pWal); memcpy(&h2, (void *)&aHdr[1], sizeof(h2)); - if( memcmp(&h1, &h2, sizeof(h1))!=0 ){ + if( memcmp(&h2, pHdr, sizeof(h2))!=0 ){ return 1; /* Dirty read */ - } - if( h1.isInit==0 ){ + } + if( h2.isInit==0 ){ return 1; /* Malformed header - probably all zeros */ } - walChecksumBytes(1, (u8*)&h1, sizeof(h1)-sizeof(h1.aCksum), 0, aCksum); - if( aCksum[0]!=h1.aCksum[0] || aCksum[1]!=h1.aCksum[1] ){ + walChecksumBytes(1, (u8*)&h2, sizeof(h2)-sizeof(h2.aCksum), 0, aCksum); + if( aCksum[0]!=h2.aCksum[0] || aCksum[1]!=h2.aCksum[1] ){ return 1; /* Checksum does not match */ } + return 0; +} + +/* +** Try to read the wal-index header. Return 0 on success and 1 if +** there is a problem. +** +** The wal-index is in shared memory. Another thread or process might +** be writing the header at the same time this procedure is trying to +** read it, which might result in inconsistency. A dirty read is detected +** by verifying that both copies of the header are the same and also by +** a checksum on the header. +** +** If and only if the read is consistent and the header is different from +** pWal->hdr, then pWal->hdr is updated to the content of the new header +** and *pChanged is set to 1. +** +** If the checksum cannot be verified return non-zero. If the header +** is read successfully and the checksum verified, return zero. +*/ +static SQLITE_NO_TSAN int walIndexTryHdr(Wal *pWal, int *pChanged){ + WalIndexHdr h1; /* Copy of the header content */ + + if( walIndexLoadHdr(pWal, &h1) ){ + return 1; + } + if( memcmp(&pWal->hdr, &h1, sizeof(WalIndexHdr)) ){ *pChanged = 1; memcpy(&pWal->hdr, &h1, sizeof(WalIndexHdr)); @@ -2723,7 +3396,9 @@ static int walIndexReadHdr(Wal *pWal, int *pChanged){ ** sure the wal-index was not constructed with some future format that ** this version of SQLite cannot understand. */ - if( badHdr==0 && pWal->hdr.iVersion!=WALINDEX_MAX_VERSION ){ + if( badHdr==0 + && pWal->hdr.iVersion!=WAL_VERSION1 && pWal->hdr.iVersion!=WAL_VERSION2 + ){ rc = SQLITE_CANTOPEN_BKPT; } if( pWal->bShmUnreliable ){ @@ -2815,7 +3490,7 @@ static int walBeginShmUnreliable(Wal *pWal, int *pChanged){ ** even if some external agent does a "chmod" to make the shared-memory ** writable by us, until sqlite3OsShmUnmap() has been called. ** This is a requirement on the VFS implementation. - */ + */ rc = sqlite3OsShmMap(pWal->pDbFd, 0, WALINDEX_PGSZ, 0, &pDummy); assert( rc!=SQLITE_OK ); /* SQLITE_OK not possible for read-only connection */ if( rc!=SQLITE_READONLY_CANTINIT ){ @@ -2832,7 +3507,7 @@ static int walBeginShmUnreliable(Wal *pWal, int *pChanged){ /* Make sure some writer hasn't come in and changed the WAL file out ** from under us, then disconnected, while we were not looking. */ - rc = sqlite3OsFileSize(pWal->pWalFd, &szWal); + rc = sqlite3OsFileSize(pWal->apWalFd[0], &szWal); if( rc!=SQLITE_OK ){ goto begin_unreliable_shm_out; } @@ -2849,7 +3524,7 @@ static int walBeginShmUnreliable(Wal *pWal, int *pChanged){ } /* Check the salt keys at the start of the wal file still match. */ - rc = sqlite3OsRead(pWal->pWalFd, aBuf, WAL_HDRSIZE, 0); + rc = sqlite3OsRead(pWal->apWalFd[0], aBuf, WAL_HDRSIZE, 0); if( rc!=SQLITE_OK ){ goto begin_unreliable_shm_out; } @@ -2886,7 +3561,7 @@ static int walBeginShmUnreliable(Wal *pWal, int *pChanged){ u32 nTruncate; /* dbsize field from frame header */ /* Read and decode the next log frame. */ - rc = sqlite3OsRead(pWal->pWalFd, aFrame, szFrame, iOffset); + rc = sqlite3OsRead(pWal->apWalFd[0], aFrame, szFrame, iOffset); if( rc!=SQLITE_OK ) break; if( !walDecodeFrame(pWal, &pgno, &nTruncate, aData, aFrame) ) break; @@ -3004,7 +3679,7 @@ static int walTryBeginRead(Wal *pWal, int *pChanged, int useWal, int *pCnt){ int nBlockTmout = 0; #endif - assert( pWal->readLock<0 ); /* Not currently locked */ + assert( pWal->readLock==WAL_LOCK_NONE ); /* Not currently locked */ /* useWal may only be set for read/write connections */ assert( (pWal->readOnly & WAL_SHM_RDONLY)==0 || useWal==0 ); @@ -3106,12 +3781,40 @@ static int walTryBeginRead(Wal *pWal, int *pChanged, int useWal, int *pCnt){ assert( pWal->apWiData[0]!=0 ); pInfo = walCkptInfo(pWal); SEH_INJECT_FAULT; + if( isWalMode2(pWal) ){ + /* This connection needs a "part" lock on the current wal file and, + ** unless pInfo->nBackfill is set to indicate that it has already been + ** checkpointed, a "full" lock on the other wal file. */ + int iWal = walidxGetFile(&pWal->hdr); + int nBackfill = pInfo->nBackfill || walidxGetMxFrame(&pWal->hdr, !iWal)==0; + int eLock = 1 + (iWal*2) + (nBackfill==iWal); + + assert( nBackfill==0 || nBackfill==1 ); + assert( iWal==0 || iWal==1 ); + assert( iWal!=0 || nBackfill!=1 || eLock==WAL_LOCK_PART1 ); + assert( iWal!=0 || nBackfill!=0 || eLock==WAL_LOCK_PART1_FULL2 ); + assert( iWal!=1 || nBackfill!=1 || eLock==WAL_LOCK_PART2 ); + assert( iWal!=1 || nBackfill!=0 || eLock==WAL_LOCK_PART2_FULL1 ); + + rc = walLockShared(pWal, WAL_READ_LOCK(eLock)); + if( rc!=SQLITE_OK ){ + return (rc==SQLITE_BUSY ? WAL_RETRY : rc); + } + walShmBarrier(pWal); + if( memcmp((void *)walIndexHdr(pWal), &pWal->hdr, sizeof(WalIndexHdr)) ){ + walUnlockShared(pWal, WAL_READ_LOCK(eLock)); + return WAL_RETRY; + }else{ + pWal->readLock = eLock; + } + assert( pWal->minFrame==0 && walFramePage(pWal->minFrame)==0 ); + }else { u32 mxReadMark; /* Largest aReadMark[] value */ int mxI; /* Index of largest aReadMark[] value */ int i; /* Loop counter */ u32 mxFrame; /* Wal frame to lock to */ - if( !useWal && AtomicLoad(&pInfo->nBackfill)==pWal->hdr.mxFrame + if( !useWal && pInfo->nBackfill==pWal->hdr.mxFrame #ifdef SQLITE_ENABLE_SNAPSHOT && ((pWal->bGetSnapshot==0 && pWal->pSnapshot==0) || pWal->hdr.mxFrame==0) #endif @@ -3282,7 +3985,7 @@ static int walSnapshotRecover( if( iDbOff+szPage<=szDb ){ iWalOff = walFrameOffset(i, szPage) + WAL_FRAME_HDRSIZE; - rc = sqlite3OsRead(pWal->pWalFd, pBuf1, szPage, iWalOff); + rc = sqlite3OsRead(pWal->apWalFd[0], pBuf1, szPage, iWalOff); if( rc==SQLITE_OK ){ rc = sqlite3OsRead(pWal->pDbFd, pBuf2, szPage, iDbOff); @@ -3322,6 +4025,9 @@ static int walSnapshotRecover( int sqlite3WalSnapshotRecover(Wal *pWal){ int rc; + /* Snapshots may not be used with wal2 mode databases. */ + if( isWalMode2(pWal) ) return SQLITE_ERROR; + assert( pWal->readLock>=0 ); rc = walLockExclusive(pWal, WAL_CKPT_LOCK, 1); if( rc==SQLITE_OK ){ @@ -3347,6 +4053,15 @@ int sqlite3WalSnapshotRecover(Wal *pWal){ } #endif /* SQLITE_ENABLE_SNAPSHOT */ +/* +** Return the current last frame in the wal-index - mxFrame for *-wal, +** or mxFrame2 for *-wal2. If the last frame is current in wal2, return +** mxFrame2 without clearing the 0x80000000 bit. +*/ +static u32 walGetPriorFrame(WalIndexHdr *pHdr){ + return (walidxGetFile(pHdr) ? pHdr->mxFrame2 : pHdr->mxFrame); +} + /* ** This function does the work of sqlite3WalBeginReadTransaction() (see ** below). That function simply calls this one inside an SEH_TRY{...} block. @@ -3365,6 +4080,7 @@ static int walBeginReadTransaction(Wal *pWal, int *pChanged){ #ifdef SQLITE_ENABLE_SNAPSHOT if( pSnapshot ){ + if( isWalMode2(pWal) ) return SQLITE_ERROR; if( memcmp(pSnapshot, &pWal->hdr, sizeof(WalIndexHdr))!=0 ){ bChanged = 1; } @@ -3395,7 +4111,12 @@ static int walBeginReadTransaction(Wal *pWal, int *pChanged){ testcase( (rc&0xff)==SQLITE_IOERR ); testcase( rc==SQLITE_PROTOCOL ); testcase( rc==SQLITE_OK ); + + if( rc==SQLITE_OK && pWal->hdr.iVersion==WAL_VERSION2 ){ + rc = walOpenWal2(pWal); + } + pWal->nPriorFrame = walGetPriorFrame(&pWal->hdr); #ifdef SQLITE_ENABLE_SNAPSHOT if( rc==SQLITE_OK ){ if( pSnapshot && memcmp(pSnapshot, &pWal->hdr, sizeof(WalIndexHdr))!=0 ){ @@ -3487,13 +4208,99 @@ void sqlite3WalEndReadTransaction(Wal *pWal){ #ifndef SQLITE_ENABLE_SETLK_TIMEOUT assert( pWal->writeLock==0 || pWal->readLock<0 ); #endif - if( pWal->readLock>=0 ){ + if( pWal->readLock!=WAL_LOCK_NONE ){ (void)sqlite3WalEndWriteTransaction(pWal); walUnlockShared(pWal, WAL_READ_LOCK(pWal->readLock)); - pWal->readLock = -1; + pWal->readLock = WAL_LOCK_NONE; } } +/* Search hash table iHash for an entry matching page number +** pgno. Each call to this function searches a single hash table +** (each hash table indexes up to HASHTABLE_NPAGE frames). +** +** This code might run concurrently to the code in walIndexAppend() +** that adds entries to the wal-index (and possibly to this hash +** table). This means the value just read from the hash +** slot (aHash[iKey]) may have been added before or after the +** current read transaction was opened. Values added after the +** read transaction was opened may have been written incorrectly - +** i.e. these slots may contain garbage data. However, we assume +** that any slots written before the current read transaction was +** opened remain unmodified. +** +** For the reasons above, the if(...) condition featured in the inner +** loop of the following block is more stringent that would be required +** if we had exclusive access to the hash-table: +** +** (aPgno[iFrame]==pgno): +** This condition filters out normal hash-table collisions. +** +** (iFrame<=iLast): +** This condition filters out entries that were added to the hash +** table after the current read-transaction had started. +*/ +static int walSearchHash( + Wal *pWal, + u32 iLast, + int iHash, + Pgno pgno, + u32 *piRead +){ + WalHashLoc sLoc; /* Hash table location */ + int iKey; /* Hash slot index */ + int nCollide; /* Number of hash collisions remaining */ + int rc; /* Error code */ + u32 iH; + + rc = walHashGet(pWal, iHash, &sLoc); + if( rc!=SQLITE_OK ){ + return rc; + } + nCollide = HASHTABLE_NSLOT; + iKey = walHash(pgno); + SEH_INJECT_FAULT; + while( (iH = AtomicLoad(&sLoc.aHash[iKey]))!=0 ){ + u32 iFrame = iH + sLoc.iZero; + if( iFrame<=iLast && iFrame>=pWal->minFrame && sLoc.aPgno[iH-1]==pgno ){ + assert( iFrame>*piRead || CORRUPT_DB ); + *piRead = iFrame; + } + if( (nCollide--)==0 ){ + *piRead = 0; + return SQLITE_CORRUPT_BKPT; + } + iKey = walNextHash(iKey); + } + + return SQLITE_OK; +} + +static int walSearchWal( + Wal *pWal, + int iWal, + Pgno pgno, + u32 *piRead +){ + int rc = SQLITE_OK; + int bWal2 = isWalMode2(pWal); + u32 iLast = walidxGetMxFrame(&pWal->hdr, iWal); + if( iLast ){ + int iHash; + int iMinHash = walFramePage(pWal->minFrame); + u32 iExternal = bWal2 ? walExternalEncode(iWal, iLast) : iLast; + assert( bWal2==0 || pWal->minFrame==0 ); + for(iHash=walFramePage(iExternal); + iHash>=iMinHash && *piRead==0; + iHash-=(1+bWal2) + ){ + rc = walSearchHash(pWal, iExternal, iHash, pgno, piRead); + if( rc!=SQLITE_OK ) break; + } + } + return rc; +} + /* ** Search the wal file for page pgno. If found, set *piRead to the frame that ** contains the page. Otherwise, if pgno is not in the wal file, set *piRead @@ -3507,85 +4314,77 @@ static int walFindFrame( Pgno pgno, /* Database page number to read data for */ u32 *piRead /* OUT: Frame number (or zero) */ ){ + int bWal2 = isWalMode2(pWal); + int iApp = walidxGetFile(&pWal->hdr); + int rc = SQLITE_OK; u32 iRead = 0; /* If !=0, WAL frame to return data from */ - u32 iLast = pWal->hdr.mxFrame; /* Last page in WAL for this reader */ - int iHash; /* Used to loop through N hash tables */ - int iMinHash; - - /* This routine is only be called from within a read transaction. */ - assert( pWal->readLock>=0 || pWal->lockError ); - - /* If the "last page" field of the wal-index header snapshot is 0, then - ** no data will be read from the wal under any circumstances. Return early - ** in this case as an optimization. Likewise, if pWal->readLock==0, - ** then the WAL is ignored by the reader so return early, as if the - ** WAL were empty. - */ - if( iLast==0 || (pWal->readLock==0 && pWal->bShmUnreliable==0) ){ + + /* This routine is only be called from within a read transaction. Or, + ** sometimes, as part of a rollback that occurs after an error reaquiring + ** a read-lock in walRestartLog(). */ + assert( pWal->readLock!=WAL_LOCK_NONE || pWal->lockError ); + + /* If this is a regular wal system, then iApp must be set to 0 (there is + ** only one wal file, after all). Or, if this is a wal2 system and the + ** write-lock is not held, the client must have a partial-wal lock on wal + ** file iApp. This is not always true if the write-lock is held and this + ** function is being called after WalLockForCommit() as part of committing + ** a CONCURRENT transaction. */ +#ifdef SQLITE_DEBUG + if( bWal2 ){ + if( pWal->writeLock==0 ){ + int l = pWal->readLock; + assert( iApp==1 || l==WAL_LOCK_PART1 || l==WAL_LOCK_PART1_FULL2 ); + assert( iApp==0 || l==WAL_LOCK_PART2 || l==WAL_LOCK_PART2_FULL1 ); + } + }else{ + assert( iApp==0 ); + } +#endif + + /* Return early if read-lock 0 is held. */ + if( (pWal->readLock==0 && pWal->bShmUnreliable==0) ){ + assert( !bWal2 ); *piRead = 0; return SQLITE_OK; } - /* Search the hash table or tables for an entry matching page number - ** pgno. Each iteration of the following for() loop searches one - ** hash table (each hash table indexes up to HASHTABLE_NPAGE frames). - ** - ** This code might run concurrently to the code in walIndexAppend() - ** that adds entries to the wal-index (and possibly to this hash - ** table). This means the value just read from the hash - ** slot (aHash[iKey]) may have been added before or after the - ** current read transaction was opened. Values added after the - ** read transaction was opened may have been written incorrectly - - ** i.e. these slots may contain garbage data. However, we assume - ** that any slots written before the current read transaction was - ** opened remain unmodified. - ** - ** For the reasons above, the if(...) condition featured in the inner - ** loop of the following block is more stringent that would be required - ** if we had exclusive access to the hash-table: - ** - ** (aPgno[iFrame]==pgno): - ** This condition filters out normal hash-table collisions. - ** - ** (iFrame<=iLast): - ** This condition filters out entries that were added to the hash - ** table after the current read-transaction had started. - */ - iMinHash = walFramePage(pWal->minFrame); - for(iHash=walFramePage(iLast); iHash>=iMinHash; iHash--){ - WalHashLoc sLoc; /* Hash table location */ - int iKey; /* Hash slot index */ - int nCollide; /* Number of hash collisions remaining */ - int rc; /* Error code */ - u32 iH; - - rc = walHashGet(pWal, iHash, &sLoc); - if( rc!=SQLITE_OK ){ - return rc; - } - nCollide = HASHTABLE_NSLOT; - iKey = walHash(pgno); - SEH_INJECT_FAULT; - while( (iH = AtomicLoad(&sLoc.aHash[iKey]))!=0 ){ - u32 iFrame = iH + sLoc.iZero; - if( iFrame<=iLast && iFrame>=pWal->minFrame && sLoc.aPgno[iH-1]==pgno ){ - assert( iFrame>iRead || CORRUPT_DB ); - iRead = iFrame; - } - if( (nCollide--)==0 ){ - *piRead = 0; - return SQLITE_CORRUPT_BKPT; - } - iKey = walNextHash(iKey); - } - if( iRead ) break; + /* Search the wal file that the client holds a partial lock on first. */ + rc = walSearchWal(pWal, iApp, pgno, &iRead); + + /* If the requested page was not found, no error has occured, and + ** the client holds a full-wal lock on the other wal file, search it + ** too. */ + if( rc==SQLITE_OK && bWal2 && iRead==0 && ( + pWal->readLock==WAL_LOCK_PART1_FULL2 + || pWal->readLock==WAL_LOCK_PART2_FULL1 +#ifndef SQLITE_OMIT_CONCURRENT + || (pWal->readLock==WAL_LOCK_PART1 && iApp==1) + || (pWal->readLock==WAL_LOCK_PART2 && iApp==0) +#endif + )){ + rc = walSearchWal(pWal, !iApp, pgno, &iRead); } + if( rc!=SQLITE_OK ) return rc; -#ifdef SQLITE_ENABLE_EXPENSIVE_ASSERT +#if defined(SQLITE_TEST) && defined(SQLITE_DEBUG) + if( iRead ){ + u32 iFrame; + int iWal = walExternalDecode(iRead, &iFrame); + WALTRACE(("WAL%p: page %d @ frame %d wal %d\n",pWal,(int)pgno,iFrame,iWal)); + }else{ + WALTRACE(("WAL%p: page %d not found\n", pWal, (int)pgno)); + } +#endif + +#if defined(SQLITE_ENABLE_EXPENSIVE_ASSERT) && /*TODO*/ 0 /* If expensive assert() statements are available, do a linear search ** of the wal-index file content. Make sure the results agree with the - ** result obtained using the hash indexes above. */ - { + ** result obtained using the hash indexes above. + ** + ** TODO: This is broken for wal2. + */ + if( rc==SQLITE_OK ){ u32 iRead2 = 0; u32 iTest; assert( pWal->bShmUnreliable || pWal->minFrame>0 ); @@ -3634,31 +4433,74 @@ int sqlite3WalFindFrame( */ int sqlite3WalReadFrame( Wal *pWal, /* WAL handle */ - u32 iRead, /* Frame to read */ + u32 iExternal, /* Frame to read */ int nOut, /* Size of buffer pOut in bytes */ u8 *pOut /* Buffer to write page data to */ ){ int sz; + int iWal = 0; + u32 iRead; i64 iOffset; + + /* Figure out the page size */ sz = pWal->hdr.szPage; sz = (sz&0xfe00) + ((sz&0x0001)<<16); testcase( sz<=32768 ); testcase( sz>=65536 ); + + if( isWalMode2(pWal) ){ + /* Figure out which of the two wal files, and the frame within, that + ** iExternal refers to. */ + iWal = walExternalDecode(iExternal, &iRead); + }else{ + iRead = iExternal; + } + + WALTRACE(("WAL%p: reading frame %d wal %d\n", pWal, iRead, iWal)); iOffset = walFrameOffset(iRead, sz) + WAL_FRAME_HDRSIZE; /* testcase( IS_BIG_INT(iOffset) ); // requires a 4GiB WAL */ - return sqlite3OsRead(pWal->pWalFd, pOut, (nOut>sz ? sz : nOut), iOffset); + return sqlite3OsRead(pWal->apWalFd[iWal], pOut, (nOut>sz?sz:nOut), iOffset); } /* ** Return the size of the database in pages (or zero, if unknown). */ Pgno sqlite3WalDbsize(Wal *pWal){ - if( pWal && ALWAYS(pWal->readLock>=0) ){ + if( pWal && ALWAYS(pWal->readLock!=WAL_LOCK_NONE) ){ return pWal->hdr.nPage; } return 0; } +/* +** Take the WRITER lock on the WAL file. Return SQLITE_OK if successful, +** or an SQLite error code otherwise. This routine does not invoke any +** busy-handler callbacks, that is done at a higher level. +*/ +static int walWriteLock(Wal *pWal){ + int rc; + + /* Cannot start a write transaction without first holding a read lock */ + assert( pWal->readLock>=0 ); + assert( pWal->writeLock==0 ); + assert( pWal->iReCksum==0 ); + + /* If this is a read-only connection, obtaining a write-lock is not + ** possible. In this case return SQLITE_READONLY. Otherwise, attempt + ** to grab the WRITER lock. Set Wal.writeLock to true and return + ** SQLITE_OK if successful, or leave Wal.writeLock clear and return + ** an SQLite error code (possibly SQLITE_BUSY) otherwise. */ + if( pWal->readOnly ){ + rc = SQLITE_READONLY; + }else{ + rc = walLockExclusive(pWal, WAL_WRITE_LOCK, 1); + if( rc==SQLITE_OK ){ + pWal->writeLock = 1; + } + } + + return rc; +} /* ** This function starts a write transaction on the WAL. @@ -3685,42 +4527,303 @@ int sqlite3WalBeginWriteTransaction(Wal *pWal){ return SQLITE_OK; } #endif + + rc = walWriteLock(pWal); + if( rc==SQLITE_OK ){ + /* If another connection has written to the database file since the + ** time the read transaction on this connection was started, then + ** the write is disallowed. Release the WRITER lock and return + ** SQLITE_BUSY_SNAPSHOT in this case. */ + SEH_TRY { + if( memcmp(&pWal->hdr, (void*)walIndexHdr(pWal),sizeof(WalIndexHdr))!=0 ){ + rc = SQLITE_BUSY_SNAPSHOT; + } + } + SEH_EXCEPT( rc = SQLITE_IOERR_IN_PAGE; ) + if( rc!=SQLITE_OK ){ + walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1); + pWal->writeLock = 0; + } + } + return rc; +} - /* Cannot start a write transaction without first holding a read - ** transaction. */ - assert( pWal->readLock>=0 ); - assert( pWal->writeLock==0 && pWal->iReCksum==0 ); +/* +** This function is called by a writer that has a read-lock on aReadmark[0] +** (pWal->readLock==0). This function relinquishes that lock and takes a +** lock on a different aReadmark[] slot. +** +** SQLITE_OK is returned if successful, or an SQLite error code otherwise. +*/ +static int walUpgradeReadlock(Wal *pWal){ + int cnt; + int rc; + assert( pWal->writeLock && pWal->readLock==0 ); + assert( isWalMode2(pWal)==0 ); + walUnlockShared(pWal, WAL_READ_LOCK(0)); + pWal->readLock = -1; + cnt = 0; + do{ + int notUsed; + rc = walTryBeginRead(pWal, ¬Used, 1, &cnt); + }while( rc==WAL_RETRY ); + assert( (rc&0xff)!=SQLITE_BUSY ); /* BUSY not possible when useWal==1 */ + testcase( (rc&0xff)==SQLITE_IOERR ); + testcase( rc==SQLITE_PROTOCOL ); + testcase( rc==SQLITE_OK ); + return rc; +} - if( pWal->readOnly ){ - return SQLITE_READONLY; - } - /* Only one writer allowed at a time. Get the write lock. Return - ** SQLITE_BUSY if unable. - */ - rc = walLockExclusive(pWal, WAL_WRITE_LOCK, 1); - if( rc ){ - return rc; +#ifndef SQLITE_OMIT_CONCURRENT +/* +** A concurrent transaction has conflicted with external frame iExternal. +** Transform this value to the one required by SQLITE_COMMIT_CONFLICT_FRAME - +** the frame offset within its wal file, with the 0x80000000 bit set for +** wal2, clear for the default wal file. +*/ +static u32 walConflictFrame(Wal *pWal, u32 iExternal){ + u32 iRet = iExternal; + if( isWalMode2(pWal) ){ + int bFile = walExternalDecode(iExternal, &iRet); + iRet = (iRet | (bFile ? 0x80000000 : 0)); } - pWal->writeLock = 1; + return iRet; +} + +/* +** This function does the work of sqlite3WalLockForCommit(). The difference +** between this function and sqlite3WalLockForCommit() is that the latter +** encloses everything in a SEH_TRY {} block. +*/ +static int walLockForCommit( + Wal *pWal, + PgHdr *pPg1, + Bitvec *pAllRead, + u32 *aConflict +){ + int rc = walWriteLock(pWal); - /* If another connection has written to the database file since the - ** time the read transaction on this connection was started, then - ** the write is disallowed. + /* If the database has been modified since this transaction was started, + ** check if it is still possible to commit. The transaction can be + ** committed if: + ** + ** a) None of the pages in pList have been modified since the + ** transaction opened, and + ** + ** b) The database schema cookie has not been modified since the + ** transaction was started. */ - SEH_TRY { - if( memcmp(&pWal->hdr, (void *)walIndexHdr(pWal), sizeof(WalIndexHdr))!=0 ){ + if( rc==SQLITE_OK ){ + WalIndexHdr head; + + if( walIndexLoadHdr(pWal, &head) ){ + /* This branch is taken if the wal-index header is corrupted. This + ** occurs if some other writer has crashed while committing a + ** transaction to this database since the current concurrent transaction + ** was opened. */ rc = SQLITE_BUSY_SNAPSHOT; + }else if( memcmp(&pWal->hdr, (void*)&head, sizeof(WalIndexHdr))!=0 ){ + int bWal2 = isWalMode2(pWal); + int iHash; + int nLoop = 1+(bWal2 && walidxGetFile(&head)!=walidxGetFile(&pWal->hdr)); + int iLoop; + + if( pPg1==0 ){ + /* If pPg1==0, then the current transaction modified the database + ** schema. This means it conflicts with all other transactions. */ + u32 bFile = walidxGetFile(&pWal->hdr); + u32 iFrame = walidxGetMxFrame(&head, bFile) | (bFile << 31); + aConflict[SQLITE_COMMIT_CONFLICT_PGNO] = 1; + aConflict[SQLITE_COMMIT_CONFLICT_FRAME] = iFrame; + rc = SQLITE_BUSY_SNAPSHOT; + } + + assert( nLoop==1 || nLoop==2 ); + for(iLoop=0; rc==SQLITE_OK && iLoophdr.mxFrame (which will be + ** set to the size of the old, now overwritten, wal file). This + ** doesn't come up in wal2 mode, as in wal2 mode the client always + ** has a PART lock on one of the wal files, preventing it from being + ** checkpointed or overwritten. */ + iFirst = pWal->hdr.mxFrame+1; + if( memcmp(pWal->hdr.aSalt, (u32*)head.aSalt, sizeof(u32)*2) ){ + assert( pWal->readLock==0 ); + iFirst = 1; + } + mxFrame = head.mxFrame; + }else{ + int iA = walidxGetFile(&pWal->hdr); + if( iLoop==0 ){ + iFirst = walExternalEncode(iA, 1+walidxGetMxFrame(&pWal->hdr, iA)); + mxFrame = walExternalEncode(iA, walidxGetMxFrame(&head, iA)); + }else{ + iFirst = walExternalEncode(!iA, 1); + mxFrame = walExternalEncode(!iA, walidxGetMxFrame(&head, !iA)); + } + } + iLastHash = walFramePage(mxFrame); + + for(iHash=walFramePage(iFirst); iHash<=iLastHash; iHash += (1+bWal2)){ + WalHashLoc sLoc; + + rc = walHashGet(pWal, iHash, &sLoc); + if( rc==SQLITE_OK ){ + u32 i, iMin, iMax; + assert( mxFrame>=sLoc.iZero ); + iMin = (sLoc.iZero >= iFirst) ? 1 : (iFirst - sLoc.iZero); + iMax = (iHash==0) ? HASHTABLE_NPAGE_ONE : HASHTABLE_NPAGE; + if( iMax>(mxFrame-sLoc.iZero) ) iMax = (mxFrame-sLoc.iZero); + for(i=iMin; rc==SQLITE_OK && i<=iMax; i++){ + PgHdr *pPg; + if( sLoc.aPgno[i-1]==1 ){ + /* Check that the schema cookie has not been modified. If + ** it has not, the commit can proceed. */ + u8 aNew[4]; + u8 *aOld = &((u8*)pPg1->pData)[40]; + int sz; + i64 iOff; + u32 iFrame = sLoc.iZero + i; + int iWal = 0; + if( bWal2 ){ + iWal = walExternalDecode(iFrame, &iFrame); + } + sz = head.szPage; + sz = (sz&0xfe00) + ((sz&0x0001)<<16); + iOff = walFrameOffset(iFrame, sz) + WAL_FRAME_HDRSIZE + 40; + rc = sqlite3OsRead(pWal->apWalFd[iWal],aNew,sizeof(aNew),iOff); + if( rc==SQLITE_OK && memcmp(aOld, aNew, sizeof(aNew)) ){ + u32 iFrame = walConflictFrame(pWal, sLoc.iZero+i); + aConflict[SQLITE_COMMIT_CONFLICT_PGNO] = 1; + aConflict[SQLITE_COMMIT_CONFLICT_FRAME] = iFrame; + rc = SQLITE_BUSY_SNAPSHOT; + } + }else if( sqlite3BitvecTestNotNull(pAllRead, sLoc.aPgno[i-1]) ){ + u32 iFrame = walConflictFrame(pWal, sLoc.iZero+i); + aConflict[SQLITE_COMMIT_CONFLICT_PGNO] = sLoc.aPgno[i-1]; + aConflict[SQLITE_COMMIT_CONFLICT_FRAME] = iFrame; + rc = SQLITE_BUSY_SNAPSHOT; + }else + if( (pPg = sqlite3PagerLookup(pPg1->pPager, sLoc.aPgno[i-1])) ){ + /* Page aPgno[i], which is present in the pager cache, has been + ** modified since the current CONCURRENT transaction was + ** started. However it was not read by the current + ** transaction, so is not a conflict. There are two + ** possibilities: (a) the page was allocated at the of the file + ** by the current transaction or (b) was present in the cache + ** at the start of the transaction. + ** + ** For case (a), do nothing. This page will be moved within the + ** database file by the commit code to avoid the conflict. The + ** call to PagerUnref() is to release the reference grabbed by + ** the sqlite3PagerLookup() above. + ** + ** In case (b), drop the page from the cache - otherwise + ** following the snapshot upgrade the cache would be + ** inconsistent with the database as stored on disk. */ + if( sqlite3PagerIswriteable(pPg) ){ + sqlite3PagerUnref(pPg); + }else{ + sqlite3PcacheDrop(pPg); + } + } + } + } + if( rc!=SQLITE_OK ) break; + } + } } } - SEH_EXCEPT( rc = SQLITE_IOERR_IN_PAGE; ) + pWal->nPriorFrame = walGetPriorFrame(&pWal->hdr); + return rc; +} - if( rc!=SQLITE_OK ){ - walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1); - pWal->writeLock = 0; - } +/* +** This function is only ever called when committing a "BEGIN CONCURRENT" +** transaction. It may be assumed that no frames have been written to +** the wal file. The second parameter is a pointer to the in-memory +** representation of page 1 of the database (which may or may not be +** dirty). The third is a bitvec with a bit set for each page in the +** database file that was read by the current concurrent transaction. +** +** This function performs three tasks: +** +** 1) It obtains the WRITER lock on the wal file, +** +** 2) It checks that there are no conflicts between the current +** transaction and any transactions committed to the wal file since +** it was opened, and +** +** 3) It ejects any non-dirty pages from the page-cache that have been +** written by another client since the CONCURRENT transaction was started +** (so as to avoid ending up with an inconsistent cache after the +** current transaction is committed). +** +** If no error occurs and the caller may proceed with committing the +** transaction, SQLITE_OK is returned. SQLITE_BUSY is returned if the WRITER +** lock cannot be obtained. Or, if the WRITER lock can be obtained but there +** are conflicts with a committed transaction, SQLITE_BUSY_SNAPSHOT. Finally, +** if an error (i.e. an OOM condition or IO error), an SQLite error code +** is returned. +*/ +int sqlite3WalLockForCommit( + Wal *pWal, + PgHdr *pPg1, + Bitvec *pAllRead, + Pgno *piConflict +){ + int rc = SQLITE_OK; + SEH_TRY { + rc = walLockForCommit(pWal, pPg1, pAllRead, piConflict); + } SEH_EXCEPT( rc = SQLITE_IOERR_IN_PAGE; ) + return rc; +} + +/* !defined(SQLITE_OMIT_CONCURRENT) +** +** This function is called as part of committing an CONCURRENT transaction. +** It is assumed that sqlite3WalLockForCommit() has already been successfully +** called and so (a) the WRITER lock is held and (b) it is known that the +** wal-index-header stored in shared memory is not corrupt. +** +** Before returning, this function upgrades the client so that it is +** operating on the database snapshot currently at the head of the wal file +** (even if the CONCURRENT transaction ran against an older snapshot). +** +** SQLITE_OK is returned if successful, or an SQLite error code otherwise. +*/ +int sqlite3WalUpgradeSnapshot(Wal *pWal){ + int rc = SQLITE_OK; + assert( pWal->writeLock ); + + SEH_TRY { + assert( pWal->szPage==pWal->hdr.szPage ); + memcpy(&pWal->hdr, (void*)walIndexHdr(pWal), sizeof(WalIndexHdr)); + assert( pWal->szPage==pWal->hdr.szPage || pWal->szPage==0 ); + pWal->szPage = pWal->hdr.szPage; + + /* If this client has its read-lock on slot aReadmark[0] and the entire + ** wal has not been checkpointed, switch it to a different slot. Otherwise + ** any reads performed between now and committing the transaction will + ** read from the old snapshot - not the one just upgraded to. */ + if( pWal->readLock==0 && pWal->hdr.mxFrame!=walCkptInfo(pWal)->nBackfill ){ + assert( isWalMode2(pWal)==0 ); + rc = walUpgradeReadlock(pWal); + } + } SEH_EXCEPT( rc = SQLITE_IOERR_IN_PAGE; ) return rc; } +#endif /* SQLITE_OMIT_CONCURRENT */ /* ** End a write transaction. The commit has already been done. This @@ -3748,22 +4851,54 @@ int sqlite3WalEndWriteTransaction(Wal *pWal){ ** Otherwise, if the callback function does not return an error, this ** function returns SQLITE_OK. */ -int sqlite3WalUndo(Wal *pWal, int (*xUndo)(void *, Pgno), void *pUndoCtx){ +int sqlite3WalUndo( + Wal *pWal, + int (*xUndo)(void *, Pgno), + void *pUndoCtx, + int bConcurrent /* True if this is a CONCURRENT transaction */ +){ int rc = SQLITE_OK; - if( ALWAYS(pWal->writeLock) ){ - Pgno iMax = pWal->hdr.mxFrame; + if( pWal->writeLock ){ + int iWal = walidxGetFile(&pWal->hdr); + Pgno iMax = walidxGetMxFrame(&pWal->hdr, iWal); + Pgno iNew; Pgno iFrame; + assert( isWalMode2(pWal) || iWal==0 ); + SEH_TRY { /* Restore the clients cache of the wal-index header to the state it ** was in before the client began writing to the database. */ memcpy(&pWal->hdr, (void *)walIndexHdr(pWal), sizeof(WalIndexHdr)); - - for(iFrame=pWal->hdr.mxFrame+1; - ALWAYS(rc==SQLITE_OK) && iFrame<=iMax; - iFrame++ - ){ + iNew = walidxGetMxFrame(&pWal->hdr, walidxGetFile(&pWal->hdr)); + + /* BEGIN CONCURRENT transactions are different, as the header just + ** memcpy()d into pWal->hdr may not be the same as the current header + ** when the transaction was started. Instead, pWal->hdr now contains + ** the header written by the most recent successful COMMIT. Because + ** Wal.writeLock is set, if this is a BEGIN CONCURRENT transaction, + ** the rollback must be taking place because an error occurred during + ** a COMMIT. + ** + ** The code below is still valid. All frames between (iNew+1) and iMax + ** must have been written by this transaction before the error occurred. + ** The exception is in wal2 mode - if the current wal file at the time + ** of the last COMMIT is not wal file iWal, then the error must have + ** occurred in WalLockForCommit(), before any pages were written + ** to the database file. In this case return early. */ +#ifndef SQLITE_OMIT_CONCURRENT + if( bConcurrent ){ + pWal->hdr.aCksum[0]++; + } + if( walidxGetFile(&pWal->hdr)!=iWal ){ + assert( bConcurrent && isWalMode2(pWal) ); + return SQLITE_OK; + } +#endif + assert( walidxGetFile(&pWal->hdr)==iWal ); + + for(iFrame=iNew+1; ALWAYS(rc==SQLITE_OK) && iFrame<=iMax; iFrame++){ /* This call cannot fail. Unless the page for which the page number ** is passed as the second argument is (a) in the cache and ** (b) has an outstanding reference, then xUndo is either a no-op @@ -3775,10 +4910,16 @@ int sqlite3WalUndo(Wal *pWal, int (*xUndo)(void *, Pgno), void *pUndoCtx){ ** page 1 is never written to the log until the transaction is ** committed. As a result, the call to xUndo may not fail. */ - assert( walFramePgno(pWal, iFrame)!=1 ); - rc = xUndo(pUndoCtx, walFramePgno(pWal, iFrame)); + Pgno pgno; + if( isWalMode2(pWal) ){ + pgno = walFramePgno2(pWal, iWal, iFrame); + }else{ + pgno = walFramePgno(pWal, iFrame); + } + assert( pgno!=1 ); + rc = xUndo(pUndoCtx, pgno); } - if( iMax!=pWal->hdr.mxFrame ) walCleanupHash(pWal); + if( iMax!=iNew ) walCleanupHash(pWal); } SEH_EXCEPT( rc = SQLITE_IOERR_IN_PAGE; ) pWal->iReCksum = 0; @@ -3793,11 +4934,12 @@ int sqlite3WalUndo(Wal *pWal, int (*xUndo)(void *, Pgno), void *pUndoCtx){ ** point in the event of a savepoint rollback (via WalSavepointUndo()). */ void sqlite3WalSavepoint(Wal *pWal, u32 *aWalData){ - assert( pWal->writeLock ); - aWalData[0] = pWal->hdr.mxFrame; + int iWal = walidxGetFile(&pWal->hdr); + assert( isWalMode2(pWal) || iWal==0 ); + aWalData[0] = walidxGetMxFrame(&pWal->hdr, iWal); aWalData[1] = pWal->hdr.aFrameCksum[0]; aWalData[2] = pWal->hdr.aFrameCksum[1]; - aWalData[3] = pWal->nCkpt; + aWalData[3] = isWalMode2(pWal) ? (u32)iWal : pWal->nCkpt; } /* @@ -3808,21 +4950,24 @@ void sqlite3WalSavepoint(Wal *pWal, u32 *aWalData){ */ int sqlite3WalSavepointUndo(Wal *pWal, u32 *aWalData){ int rc = SQLITE_OK; + int iWal = walidxGetFile(&pWal->hdr); + u32 iCmp = isWalMode2(pWal) ? (u32)iWal : pWal->nCkpt; - assert( pWal->writeLock ); - assert( aWalData[3]!=pWal->nCkpt || aWalData[0]<=pWal->hdr.mxFrame ); + assert( pWal->writeLock || aWalData[0]==pWal->hdr.mxFrame ); + assert( isWalMode2(pWal) || iWal==0 ); + assert( aWalData[3]!=iCmp || aWalData[0]<=walidxGetMxFrame(&pWal->hdr,iWal) ); - if( aWalData[3]!=pWal->nCkpt ){ + if( aWalData[3]!=iCmp ){ /* This savepoint was opened immediately after the write-transaction ** was started. Right after that, the writer decided to wrap around ** to the start of the log. Update the savepoint values to match. */ aWalData[0] = 0; - aWalData[3] = pWal->nCkpt; + aWalData[3] = iCmp; } - if( aWalData[0]hdr.mxFrame ){ - pWal->hdr.mxFrame = aWalData[0]; + if( aWalData[0]hdr, iWal) ){ + walidxSetMxFrame(&pWal->hdr, iWal, aWalData[0]); pWal->hdr.aFrameCksum[0] = aWalData[1]; pWal->hdr.aFrameCksum[1] = aWalData[2]; SEH_TRY { @@ -3840,25 +4985,85 @@ int sqlite3WalSavepointUndo(Wal *pWal, u32 *aWalData){ /* ** This function is called just before writing a set of frames to the log ** file (see sqlite3WalFrames()). It checks to see if, instead of appending -** to the current log file, it is possible to overwrite the start of the -** existing log file with the new frames (i.e. "reset" the log). If so, -** it sets pWal->hdr.mxFrame to 0. Otherwise, pWal->hdr.mxFrame is left -** unchanged. +** to the current log file, it is possible and desirable to switch to the +** other log file and write the new transaction to the start of it. +** If so, the wal-index header is updated accordingly - both in heap memory +** and in the *-shm file. ** ** SQLITE_OK is returned if no error is encountered (regardless of whether -** or not pWal->hdr.mxFrame is modified). An SQLite error code is returned +** or not the wal-index header is modified). An SQLite error code is returned ** if an error occurs. */ static int walRestartLog(Wal *pWal){ int rc = SQLITE_OK; - int cnt; - if( pWal->readLock==0 ){ + if( isWalMode2(pWal) ){ + int iApp = walidxGetFile(&pWal->hdr); + u32 nWalSize = WAL_DEFAULT_WALSIZE; + if( pWal->mxWalSize>0 ){ + /* mxWalSize is in bytes. Convert this to a number of frames. */ + nWalSize = (pWal->mxWalSize-WAL_HDRSIZE+pWal->szPage+WAL_FRAME_HDRSIZE-1) + / (pWal->szPage+WAL_FRAME_HDRSIZE); + nWalSize = MAX(nWalSize, 1); + } + + /* With a BEGIN CONCURRENT transaction, it is possible for a connection + ** to hold the WAL_LOCK_PART1 lock even if iApp==1 (or WAL_LOCK_PART2 + ** when iApp==0). This is because a connection running concurrent to this + ** one may have switched the wal file after this connection took the + ** read lock. + ** + ** This is not a problem, as if this happens it means the current lock + ** is more restrictive, not less, than required. And a BEGIN CONCURRENT + ** transaction cannot be committed and downgraded to a read-transaction, + ** so there is no chance of continuing while holding the wrong lock. + */ + assert( iApp==0 || pWal->readLock==WAL_LOCK_PART2 + || pWal->readLock==WAL_LOCK_PART2_FULL1 + || pWal->readLock==WAL_LOCK_PART1 ); + assert( iApp==1 || pWal->readLock==WAL_LOCK_PART1 + || pWal->readLock==WAL_LOCK_PART1_FULL2 + || pWal->readLock==WAL_LOCK_PART2 ); + + /* Switch to wal file !iApp if + ** + ** (a) Wal file iApp (the current wal file) contains >= nWalSize frames. + ** (b) This client is not reading from wal file !iApp. + ** (c) No other client is reading from wal file !iApp. + ** + ** Condition (b) guarantees that wal file !iApp is either empty or + ** completely checkpointed. + */ + assert( (0*3)+1==WAL_LOCK_PART1 ); /* iApp==0 -> require WAL_LOCK_PART1 */ + assert( (1*3)+1==WAL_LOCK_PART2 ); /* iApp==1 -> require WAL_LOCK_PART2 */ + if( pWal->readLock==(iApp*3)+1 + && walidxGetMxFrame(&pWal->hdr, iApp)>=nWalSize + ){ + rc = wal2RestartOk(pWal, iApp); + if( rc==SQLITE_OK ){ + volatile WalCkptInfo *pInfo = walCkptInfo(pWal); + int iNew = !iApp; + pWal->nCkpt++; + walidxSetFile(&pWal->hdr, iNew); + walidxSetMxFrame(&pWal->hdr, iNew, 0); + sqlite3Put4byte((u8*)&pWal->hdr.aSalt[0], pWal->hdr.aFrameCksum[0]); + sqlite3Put4byte((u8*)&pWal->hdr.aSalt[1], pWal->hdr.aFrameCksum[1]); + walIndexWriteHdr(pWal); + pInfo->nBackfill = 0; + wal2RestartFinished(pWal, iApp); + walUnlockShared(pWal, WAL_READ_LOCK(pWal->readLock)); + pWal->readLock = iNew ? WAL_LOCK_PART2_FULL1 : WAL_LOCK_PART1_FULL2; + rc = walLockShared(pWal, WAL_READ_LOCK(pWal->readLock)); + }else if( rc==SQLITE_BUSY ){ + rc = SQLITE_OK; + } + } + }else if( pWal->readLock==0 ){ volatile WalCkptInfo *pInfo = walCkptInfo(pWal); assert( pInfo->nBackfill==pWal->hdr.mxFrame ); if( pInfo->nBackfill>0 ){ u32 salt1; - sqlite3_randomness(4, &salt1); + sqlite3FastRandomness(&pWal->sPrng, 4, &salt1); rc = walLockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1); if( rc==SQLITE_OK ){ /* If all readers are using WAL_READ_LOCK(0) (in other words if no @@ -3872,22 +5077,23 @@ static int walRestartLog(Wal *pWal){ ** to handle if this transaction is rolled back. */ walRestartHdr(pWal, salt1); walUnlockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1); + pWal->nPriorFrame = 0; }else if( rc!=SQLITE_BUSY ){ return rc; } } - walUnlockShared(pWal, WAL_READ_LOCK(0)); - pWal->readLock = -1; - cnt = 0; - do{ - int notUsed; - rc = walTryBeginRead(pWal, ¬Used, 1, &cnt); - }while( rc==WAL_RETRY ); - assert( (rc&0xff)!=SQLITE_BUSY ); /* BUSY not possible when useWal==1 */ - testcase( (rc&0xff)==SQLITE_IOERR ); - testcase( rc==SQLITE_PROTOCOL ); - testcase( rc==SQLITE_OK ); + + /* Regardless of whether or not the wal file was restarted, change the + ** read-lock held by this client to a slot other than aReadmark[0]. + ** Clients with a lock on aReadmark[0] read from the database file + ** only - never from the wal file. This means that if a writer holding + ** a lock on aReadmark[0] were to commit a transaction but not close the + ** read-transaction, subsequent read operations would read directly from + ** the database file - ignoring the new pages just appended + ** to the wal file. */ + rc = walUpgradeReadlock(pWal); } + pWal->nPriorFrame = walGetPriorFrame(&pWal->hdr); return rc; } @@ -3946,12 +5152,35 @@ static int walWriteOneFrame( int rc; /* Result code from subfunctions */ void *pData; /* Data actually written */ u8 aFrame[WAL_FRAME_HDRSIZE]; /* Buffer to assemble frame-header in */ + +#if defined(SQLITE_TEST) && defined(SQLITE_DEBUG) + { + int iWal = walidxGetFile(&p->pWal->hdr); + int iFrame = 1 + (iOffset / (WAL_FRAME_HDRSIZE + p->pWal->szPage)); + assert( p->pWal->apWalFd[iWal]==p->pFd ); + WALTRACE(("WAL%p: page %d written to frame %d of wal %d\n", + p->pWal, (int)pPage->pgno, iFrame, iWal + )); + } +#endif + pData = pPage->pData; walEncodeFrame(p->pWal, pPage->pgno, nTruncate, pData, aFrame); - rc = walWriteToLog(p, aFrame, sizeof(aFrame), iOffset); - if( rc ) return rc; + + if( isNocksum(p->pWal)==0 ){ + /* Write the header in normal mode */ + rc = walWriteToLog(p, aFrame, sizeof(aFrame), iOffset); + if( rc ) return rc; + } + /* Write the page data */ rc = walWriteToLog(p, pData, p->szPage, iOffset+sizeof(aFrame)); + + if( isNocksum(p->pWal) ){ + /* Write the header in no-checksum mode */ + if( rc ) return rc; + rc = walWriteToLog(p, aFrame, sizeof(aFrame), iOffset); + } return rc; } @@ -3964,12 +5193,13 @@ static int walWriteOneFrame( ** SQLITE_OK is returned if successful, or an SQLite error code otherwise. */ static int walRewriteChecksums(Wal *pWal, u32 iLast){ - const int szPage = pWal->szPage;/* Database page size */ int rc = SQLITE_OK; /* Return code */ + const int szPage = pWal->szPage;/* Database page size */ u8 *aBuf; /* Buffer to load data from wal file into */ u8 aFrame[WAL_FRAME_HDRSIZE]; /* Buffer to assemble frame-headers in */ u32 iRead; /* Next frame to read from wal file */ i64 iCksumOff; + sqlite3_file *pWalFd = pWal->apWalFd[walidxGetFile(&pWal->hdr)]; aBuf = sqlite3_malloc(szPage + WAL_FRAME_HDRSIZE); if( aBuf==0 ) return SQLITE_NOMEM_BKPT; @@ -3985,7 +5215,7 @@ static int walRewriteChecksums(Wal *pWal, u32 iLast){ }else{ iCksumOff = walFrameOffset(pWal->iReCksum-1, szPage) + 16; } - rc = sqlite3OsRead(pWal->pWalFd, aBuf, sizeof(u32)*2, iCksumOff); + rc = sqlite3OsRead(pWalFd, aBuf, sizeof(u32)*2, iCksumOff); pWal->hdr.aFrameCksum[0] = sqlite3Get4byte(aBuf); pWal->hdr.aFrameCksum[1] = sqlite3Get4byte(&aBuf[sizeof(u32)]); @@ -3993,14 +5223,14 @@ static int walRewriteChecksums(Wal *pWal, u32 iLast){ pWal->iReCksum = 0; for(; rc==SQLITE_OK && iRead<=iLast; iRead++){ i64 iOff = walFrameOffset(iRead, szPage); - rc = sqlite3OsRead(pWal->pWalFd, aBuf, szPage+WAL_FRAME_HDRSIZE, iOff); + rc = sqlite3OsRead(pWalFd, aBuf, szPage+WAL_FRAME_HDRSIZE, iOff); if( rc==SQLITE_OK ){ u32 iPgno, nDbSize; iPgno = sqlite3Get4byte(aBuf); nDbSize = sqlite3Get4byte(&aBuf[4]); walEncodeFrame(pWal, iPgno, nDbSize, &aBuf[WAL_FRAME_HDRSIZE], aFrame); - rc = sqlite3OsWrite(pWal->pWalFd, aFrame, sizeof(aFrame), iOff); + rc = sqlite3OsWrite(pWalFd, aFrame, sizeof(aFrame), iOff); } } @@ -4030,6 +5260,8 @@ static int walFrames( WalWriter w; /* The writer */ u32 iFirst = 0; /* First frame that may be overwritten */ WalIndexHdr *pLive; /* Pointer to shared header */ + int iApp; + int bWal2 = isWalMode2(pWal); assert( pList ); assert( pWal->writeLock ); @@ -4038,22 +5270,16 @@ static int walFrames( ** nTruncate==0 then this frame set does not complete the transaction. */ assert( (isCommit!=0)==(nTruncate!=0) ); -#if defined(SQLITE_TEST) && defined(SQLITE_DEBUG) - { int cnt; for(cnt=0, p=pList; p; p=p->pDirty, cnt++){} - WALTRACE(("WAL%p: frame write begin. %d frames. mxFrame=%d. %s\n", - pWal, cnt, pWal->hdr.mxFrame, isCommit ? "Commit" : "Spill")); - } -#endif - pLive = (WalIndexHdr*)walIndexHdr(pWal); if( memcmp(&pWal->hdr, (void *)pLive, sizeof(WalIndexHdr))!=0 ){ - iFirst = pLive->mxFrame+1; + /* if( isWalMode2(pWal)==0 ) */ + iFirst = walidxGetMxFrame(pLive, walidxGetFile(pLive))+1; } /* See if it is possible to write these frames into the start of the ** log file, instead of appending to it at pWal->hdr.mxFrame. */ - if( SQLITE_OK!=(rc = walRestartLog(pWal)) ){ + else if( SQLITE_OK!=(rc = walRestartLog(pWal)) ){ return rc; } @@ -4061,16 +5287,38 @@ static int walFrames( ** header to the start of the WAL file. See comments at the top of ** this source file for a description of the WAL header format. */ - iFrame = pWal->hdr.mxFrame; + iApp = walidxGetFile(&pWal->hdr); + iFrame = walidxGetMxFrame(&pWal->hdr, iApp); + assert( iApp==0 || bWal2 ); + +#if defined(SQLITE_TEST) && defined(SQLITE_DEBUG) + { int cnt; for(cnt=0, p=pList; p; p=p->pDirty, cnt++){} + WALTRACE(("WAL%p: frame write begin. %d frames. iWal=%d. mxFrame=%d. %s\n", + pWal, cnt, iApp, iFrame, isCommit ? "Commit" : "Spill")); + } +#endif + if( iFrame==0 ){ + u32 iCkpt = 0; u8 aWalHdr[WAL_HDRSIZE]; /* Buffer to assemble wal-header in */ u32 aCksum[2]; /* Checksum for wal-header */ sqlite3Put4byte(&aWalHdr[0], (WAL_MAGIC | SQLITE_BIGENDIAN)); - sqlite3Put4byte(&aWalHdr[4], WAL_MAX_VERSION); + sqlite3Put4byte(&aWalHdr[4], pWal->hdr.iVersion); sqlite3Put4byte(&aWalHdr[8], szPage); - sqlite3Put4byte(&aWalHdr[12], pWal->nCkpt); - if( pWal->nCkpt==0 ) sqlite3_randomness(8, pWal->hdr.aSalt); + if( bWal2 ){ + if( walidxGetMxFrame(&pWal->hdr, !iApp)>0 ){ + u8 aPrev[4]; + rc = sqlite3OsRead(pWal->apWalFd[!iApp], aPrev, 4, 12); + if( rc!=SQLITE_OK ){ + return rc; + } + iCkpt = (sqlite3Get4byte(aPrev) + 1) & 0x0F; + } + }else{ + iCkpt = pWal->nCkpt; + } + sqlite3Put4byte(&aWalHdr[12], iCkpt); memcpy(&aWalHdr[16], pWal->hdr.aSalt, 8); walChecksumBytes(1, aWalHdr, WAL_HDRSIZE-2*4, 0, aCksum); sqlite3Put4byte(&aWalHdr[24], aCksum[0]); @@ -4082,7 +5330,7 @@ static int walFrames( pWal->hdr.aFrameCksum[1] = aCksum[1]; pWal->truncateOnCommit = 1; - rc = sqlite3OsWrite(pWal->pWalFd, aWalHdr, sizeof(aWalHdr), 0); + rc = sqlite3OsWrite(pWal->apWalFd[iApp], aWalHdr, sizeof(aWalHdr), 0); WALTRACE(("WAL%p: wal-header write %s\n", pWal, rc ? "failed" : "ok")); if( rc!=SQLITE_OK ){ return rc; @@ -4096,7 +5344,7 @@ static int walFrames( ** https://sqlite.org/src/info/ff5be73dee */ if( pWal->syncHeader ){ - rc = sqlite3OsSync(pWal->pWalFd, CKPT_SYNC_FLAGS(sync_flags)); + rc = sqlite3OsSync(pWal->apWalFd[iApp], CKPT_SYNC_FLAGS(sync_flags)); if( rc ) return rc; } } @@ -4106,7 +5354,7 @@ static int walFrames( /* Setup information needed to write frames into the WAL */ w.pWal = pWal; - w.pFd = pWal->pWalFd; + w.pFd = pWal->apWalFd[iApp]; w.iSyncPoint = 0; w.syncFlags = sync_flags; w.szPage = szPage; @@ -4123,8 +5371,11 @@ static int walFrames( ** checksums must be recomputed when the transaction is committed. */ if( iFirst && (p->pDirty || isCommit==0) ){ u32 iWrite = 0; - VVA_ONLY(rc =) walFindFrame(pWal, p->pgno, &iWrite); + VVA_ONLY(rc =) walSearchWal(pWal, iApp, p->pgno, &iWrite); assert( rc==SQLITE_OK || iWrite==0 ); + if( iWrite && bWal2 ){ + walExternalDecode(iWrite, &iWrite); + } if( iWrite>=iFirst ){ i64 iOff = walFrameOffset(iWrite, szPage) + WAL_FRAME_HDRSIZE; void *pData; @@ -4132,7 +5383,7 @@ static int walFrames( pWal->iReCksum = iWrite; } pData = p->pData; - rc = sqlite3OsWrite(pWal->pWalFd, pData, szPage, iOff); + rc = sqlite3OsWrite(pWal->apWalFd[iApp], pData, szPage, iOff); if( rc ) return rc; p->flags &= ~PGHDR_WAL_APPEND; continue; @@ -4149,6 +5400,7 @@ static int walFrames( p->flags |= PGHDR_WAL_APPEND; } + /* Recalculate checksums within the wal file if required. */ if( isCommit && pWal->iReCksum ){ rc = walRewriteChecksums(pWal, iFrame); @@ -4172,7 +5424,7 @@ static int walFrames( if( isCommit && WAL_SYNC_FLAGS(sync_flags)!=0 ){ int bSync = 1; if( pWal->padToSectorBoundary ){ - int sectorSize = sqlite3SectorSize(pWal->pWalFd); + int sectorSize = sqlite3SectorSize(w.pFd); w.iSyncPoint = ((iOffset+sectorSize-1)/sectorSize)*sectorSize; bSync = (w.iSyncPoint==iOffset); testcase( bSync ); @@ -4208,17 +5460,17 @@ static int walFrames( ** guarantees that there are no other writers, and no data that may ** be in use by existing readers is being overwritten. */ - iFrame = pWal->hdr.mxFrame; + iFrame = walidxGetMxFrame(&pWal->hdr, iApp); for(p=pList; p && rc==SQLITE_OK; p=p->pDirty){ if( (p->flags & PGHDR_WAL_APPEND)==0 ) continue; iFrame++; - rc = walIndexAppend(pWal, iFrame, p->pgno); + rc = walIndexAppend(pWal, iApp, iFrame, p->pgno); } assert( pLast!=0 || nExtra==0 ); while( rc==SQLITE_OK && nExtra>0 ){ iFrame++; nExtra--; - rc = walIndexAppend(pWal, iFrame, pLast->pgno); + rc = walIndexAppend(pWal, iApp, iFrame, pLast->pgno); } if( rc==SQLITE_OK ){ @@ -4226,7 +5478,7 @@ static int walFrames( pWal->hdr.szPage = (u16)((szPage&0xff00) | (szPage>>16)); testcase( szPage<=32768 ); testcase( szPage>=65536 ); - pWal->hdr.mxFrame = iFrame; + walidxSetMxFrame(&pWal->hdr, iApp, iFrame); if( isCommit ){ pWal->hdr.iChange++; pWal->hdr.nPage = nTruncate; @@ -4234,7 +5486,17 @@ static int walFrames( /* If this is a commit, update the wal-index header too. */ if( isCommit ){ walIndexWriteHdr(pWal); - pWal->iCallback = iFrame; + if( bWal2 ){ + int iOther = !walidxGetFile(&pWal->hdr); + if( walidxGetMxFrame(&pWal->hdr, iOther) + && !walCkptInfo(pWal)->nBackfill + ){ + pWal->iCallback = walidxGetMxFrame(&pWal->hdr, 0); + pWal->iCallback += walidxGetMxFrame(&pWal->hdr, 1); + } + }else{ + pWal->iCallback = iFrame; + } } } @@ -4331,7 +5593,7 @@ int sqlite3WalCheckpoint( ** writer lock retried until either the busy-handler returns 0 or the ** lock is successfully obtained. */ - if( eMode!=SQLITE_CHECKPOINT_PASSIVE ){ + if( eMode!=SQLITE_CHECKPOINT_PASSIVE && isWalMode2(pWal)==0 ){ rc = walBusyLock(pWal, xBusy2, pBusyArg, WAL_WRITE_LOCK, 1); if( rc==SQLITE_OK ){ pWal->writeLock = 1; @@ -4365,7 +5627,9 @@ int sqlite3WalCheckpoint( /* Copy data from the log to the database file. */ if( rc==SQLITE_OK ){ - if( pWal->hdr.mxFrame && walPagesize(pWal)!=nBuf ){ + if( (walPagesize(pWal)!=nBuf) + && ((pWal->hdr.mxFrame2 & 0x7FFFFFFF) || pWal->hdr.mxFrame) + ){ rc = SQLITE_CORRUPT_BKPT; }else if( eMode2!=SQLITE_CHECKPOINT_NOOP ){ rc = walCheckpoint(pWal, db, eMode2, xBusy2, pBusyArg, sync_flags,zBuf); @@ -4373,21 +5637,38 @@ int sqlite3WalCheckpoint( /* If no error occurred, set the output variables. */ if( rc==SQLITE_OK || rc==SQLITE_BUSY ){ - if( pnLog ) *pnLog = (int)pWal->hdr.mxFrame; + if( pnLog ){ + WalIndexHdr *pHdr = &pWal->hdr; + *pnLog = walidxGetMxFrame(pHdr, 0) + walidxGetMxFrame(pHdr, 1); + } SEH_INJECT_FAULT; - if( pnCkpt ) *pnCkpt = (int)(walCkptInfo(pWal)->nBackfill); + if( pnCkpt ){ + if( isWalMode2(pWal) ){ + if( (int)(walCkptInfo(pWal)->nBackfill) ){ + *pnCkpt = walidxGetMxFrame(&pWal->hdr,!walidxGetFile(&pWal->hdr)); + }else{ + *pnCkpt = 0; + } + }else{ + *pnCkpt = walCkptInfo(pWal)->nBackfill; + } + } } } } SEH_EXCEPT( rc = walHandleException(pWal); ) - if( isChanged ){ + if( isChanged && pWal->bClosing==0 ){ /* If a new wal-index header was loaded before the checkpoint was ** performed, then the pager-cache associated with pWal is now ** out of date. So zero the cached wal-index header to ensure that ** next time the pager opens a snapshot on this database it knows that ** the cache needs to be reset. - */ + ** + ** Except, do not do this if the wal is being closed. In this case + ** the caller needs the wal-index header to check if the database is + ** in wal2 mode and the "other" wal file also needs to be checkpointed. + ** Besides, the pager cache will not be used again in this case. */ memset(&pWal->hdr, 0, sizeof(WalIndexHdr)); } @@ -4457,14 +5738,15 @@ int sqlite3WalExclusiveMode(Wal *pWal, int op){ ** upgrade to exclusive-mode following such an error. */ #ifndef SQLITE_USE_SEH - assert( pWal->readLock>=0 || pWal->lockError ); + assert( pWal->readLock!=WAL_LOCK_NONE || pWal->lockError ); #endif - assert( pWal->readLock>=0 || (op<=0 && pWal->exclusiveMode==0) ); + assert( pWal->readLock!=WAL_LOCK_NONE || (op<=0 && pWal->exclusiveMode==0) ); if( op==0 ){ - if( pWal->exclusiveMode!=WAL_NORMAL_MODE ){ + if( pWal->exclusiveMode ){ pWal->exclusiveMode = WAL_NORMAL_MODE; - if( walLockShared(pWal, WAL_READ_LOCK(pWal->readLock))!=SQLITE_OK ){ + rc = walLockShared(pWal, WAL_READ_LOCK(pWal->readLock)); + if( rc!=SQLITE_OK ){ pWal->exclusiveMode = WAL_EXCLUSIVE_MODE; } rc = pWal->exclusiveMode==WAL_NORMAL_MODE; @@ -4503,6 +5785,9 @@ int sqlite3WalSnapshotGet(Wal *pWal, sqlite3_snapshot **ppSnapshot){ WalIndexHdr *pRet; static const u32 aZero[4] = { 0, 0, 0, 0 }; + /* Snapshots may not be used with wal2 mode databases. */ + if( isWalMode2(pWal) ) return SQLITE_ERROR; + assert( pWal->readLock>=0 && pWal->writeLock==0 ); if( memcmp(&pWal->hdr.aFrameCksum[0],aZero,16)==0 ){ @@ -4572,6 +5857,10 @@ int sqlite3_snapshot_cmp(sqlite3_snapshot *p1, sqlite3_snapshot *p2){ */ int sqlite3WalSnapshotCheck(Wal *pWal, sqlite3_snapshot *pSnapshot){ int rc; + + /* Snapshots may not be used with wal2 mode databases. */ + if( isWalMode2(pWal) ) return SQLITE_ERROR; + SEH_TRY { rc = walLockShared(pWal, WAL_CKPT_LOCK); if( rc==SQLITE_OK ){ @@ -4615,7 +5904,27 @@ int sqlite3WalFramesize(Wal *pWal){ /* Return the sqlite3_file object for the WAL file */ sqlite3_file *sqlite3WalFile(Wal *pWal){ - return pWal->pWalFd; + return pWal->apWalFd[0]; +} + +/* +** Return the values required by sqlite3_wal_info(). +*/ +int sqlite3WalInfo(Wal *pWal, u32 *pnPrior, u32 *pnFrame){ + int rc = SQLITE_OK; + if( pWal ){ + *pnPrior = pWal->nPriorFrame; + *pnFrame = walGetPriorFrame(&pWal->hdr); + } + return rc; +} + +/* +** Return the journal mode used by this Wal object. +*/ +int sqlite3WalJournalMode(Wal *pWal){ + assert( pWal ); + return (isWalMode2(pWal) ? PAGER_JOURNALMODE_WAL2 : PAGER_JOURNALMODE_WAL); } #endif /* #ifndef SQLITE_OMIT_WAL */ diff --git a/src/wal.h b/src/wal.h index 1b17d2dfbe..cfe3dc2f69 100644 --- a/src/wal.h +++ b/src/wal.h @@ -26,7 +26,7 @@ #define CKPT_SYNC_FLAGS(X) (((X)>>2)&0x03) #ifdef SQLITE_OMIT_WAL -# define sqlite3WalOpen(x,y,z) 0 +# define sqlite3WalOpen(w,x,y,z) 0 # define sqlite3WalLimit(x,y) # define sqlite3WalClose(v,w,x,y,z) 0 # define sqlite3WalBeginReadTransaction(y,z) 0 @@ -34,7 +34,7 @@ # define sqlite3WalDbsize(y) 0 # define sqlite3WalBeginWriteTransaction(y) 0 # define sqlite3WalEndWriteTransaction(x) 0 -# define sqlite3WalUndo(x,y,z) 0 +# define sqlite3WalUndo(w,x,y,z) 0 # define sqlite3WalSavepoint(y,z) # define sqlite3WalSavepointUndo(y,z) 0 # define sqlite3WalFrames(u,v,w,x,y,z) 0 @@ -45,6 +45,7 @@ # define sqlite3WalFramesize(z) 0 # define sqlite3WalFindFrame(x,y,z) 0 # define sqlite3WalFile(x) 0 +# define sqlite3WalJournalMode(x) 0 # undef SQLITE_USE_SEH #else @@ -56,7 +57,7 @@ typedef struct Wal Wal; /* Open and close a connection to a write-ahead log. */ -int sqlite3WalOpen(sqlite3_vfs*, sqlite3_file*, const char *, int, i64, Wal**); +int sqlite3WalOpen(sqlite3_vfs*, sqlite3_file*, const char *,int,i64,int,Wal**); int sqlite3WalClose(Wal *pWal, sqlite3*, int sync_flags, int, u8 *); /* Set the limiting size of a WAL file. */ @@ -84,7 +85,7 @@ int sqlite3WalBeginWriteTransaction(Wal *pWal); int sqlite3WalEndWriteTransaction(Wal *pWal); /* Undo any frames written (but not committed) to the log */ -int sqlite3WalUndo(Wal *pWal, int (*xUndo)(void *, Pgno), void *pUndoCtx); +int sqlite3WalUndo(Wal *pWal, int (*xUndo)(void *, Pgno), void *pUndoCtx, int); /* Return an integer that records the current (uncommitted) write ** position in the WAL */ @@ -137,6 +138,15 @@ int sqlite3WalSnapshotCheck(Wal *pWal, sqlite3_snapshot *pSnapshot); void sqlite3WalSnapshotUnlock(Wal *pWal); #endif +#ifndef SQLITE_OMIT_CONCURRENT +/* Tell the wal layer that we want to commit a concurrent transaction */ +int sqlite3WalLockForCommit(Wal *pWal, PgHdr *pPg, Bitvec *pRead, u32*); + +/* Upgrade the state of the client to take into account changes written +** by other connections */ +int sqlite3WalUpgradeSnapshot(Wal *pWal); +#endif /* SQLITE_OMIT_CONCURRENT */ + #ifdef SQLITE_ENABLE_ZIPVFS /* If the WAL file is not empty, return the number of bytes of content ** stored in each frame (i.e. the db page-size when the WAL was created). @@ -147,6 +157,9 @@ int sqlite3WalFramesize(Wal *pWal); /* Return the sqlite3_file object for the WAL file */ sqlite3_file *sqlite3WalFile(Wal *pWal); +/* Return the journal mode (WAL or WAL2) used by this Wal object. */ +int sqlite3WalJournalMode(Wal *pWal); + #ifdef SQLITE_ENABLE_SETLK_TIMEOUT int sqlite3WalWriteLock(Wal *pWal, int bLock); void sqlite3WalDb(Wal *pWal, sqlite3 *db); @@ -156,5 +169,11 @@ void sqlite3WalDb(Wal *pWal, sqlite3 *db); int sqlite3WalSystemErrno(Wal*); #endif +/* sqlite3_wal_info() data */ +int sqlite3WalInfo(Wal *pWal, u32 *pnPrior, u32 *pnFrame); + +/* sqlite3_wal_info() data */ +int sqlite3WalInfo(Wal *pWal, u32 *pnPrior, u32 *pnFrame); + #endif /* ifndef SQLITE_OMIT_WAL */ #endif /* SQLITE_WAL_H */ diff --git a/test/bc_test1.c b/test/bc_test1.c new file mode 100644 index 0000000000..89135d66d0 --- /dev/null +++ b/test/bc_test1.c @@ -0,0 +1,556 @@ +/* +** 2016-05-07 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +*/ + + +#include +#include +#include +#include "tt3_core.c" + +#ifdef USE_OSINST +# include "../src/test_osinst.c" +#else +# define vfslog_time() 0 +#endif + +typedef struct Config Config; +typedef struct ThreadCtx ThreadCtx; + +#define THREAD_TIME_INSERT 0 +#define THREAD_TIME_COMMIT 1 +#define THREAD_TIME_ROLLBACK 2 +#define THREAD_TIME_WRITER 3 +#define THREAD_TIME_CKPT 4 + +struct ThreadCtx { + Config *pConfig; + Sqlite *pDb; + Error *pErr; + sqlite3_int64 aTime[5]; +}; + +struct Config { + int nIPT; /* --inserts-per-transaction */ + int nThread; /* --threads */ + int nSecond; /* --seconds */ + int bMutex; /* --mutex */ + int nAutoCkpt; /* --autockpt */ + int bRm; /* --rm */ + int bClearCache; /* --clear-cache */ + int nMmap; /* mmap limit in MB */ + char *zFile; + int bOsinst; /* True to use osinst */ + + ThreadCtx *aCtx; /* Array of size nThread */ + + pthread_cond_t cond; + pthread_mutex_t mutex; + int nCondWait; /* Number of threads waiting on hCond */ + sqlite3_vfs *pVfs; +}; + + +typedef struct VfsWrapperFd VfsWrapperFd; +struct VfsWrapperFd { + sqlite3_file base; /* Base class */ + int bWriter; /* True if holding shm WRITER lock */ + int iTid; + Config *pConfig; + sqlite3_file *pFd; /* Underlying file descriptor */ +}; + +/* Methods of the wrapper VFS */ +static int vfsWrapOpen(sqlite3_vfs*, const char*, sqlite3_file*, int, int*); +static int vfsWrapDelete(sqlite3_vfs*, const char*, int); +static int vfsWrapAccess(sqlite3_vfs*, const char*, int, int*); +static int vfsWrapFullPathname(sqlite3_vfs*, const char *, int, char*); +static void *vfsWrapDlOpen(sqlite3_vfs*, const char*); +static void vfsWrapDlError(sqlite3_vfs*, int, char*); +static void (*vfsWrapDlSym(sqlite3_vfs*,void*, const char*))(void); +static void vfsWrapDlClose(sqlite3_vfs*, void*); +static int vfsWrapRandomness(sqlite3_vfs*, int, char*); +static int vfsWrapSleep(sqlite3_vfs*, int); +static int vfsWrapCurrentTime(sqlite3_vfs*, double*); +static int vfsWrapGetLastError(sqlite3_vfs*, int, char*); +static int vfsWrapCurrentTimeInt64(sqlite3_vfs*, sqlite3_int64*); +static int vfsWrapSetSystemCall(sqlite3_vfs*, const char*, sqlite3_syscall_ptr); +static sqlite3_syscall_ptr vfsWrapGetSystemCall(sqlite3_vfs*, const char*); +static const char *vfsWrapNextSystemCall(sqlite3_vfs*, const char*); + +/* Methods of wrapper sqlite3_io_methods object (see vfsWrapOpen()) */ +static int vfsWrapClose(sqlite3_file*); +static int vfsWrapRead(sqlite3_file*, void*, int iAmt, sqlite3_int64 iOfst); +static int vfsWrapWrite(sqlite3_file*, const void*, int iAmt, sqlite3_int64); +static int vfsWrapTruncate(sqlite3_file*, sqlite3_int64 size); +static int vfsWrapSync(sqlite3_file*, int flags); +static int vfsWrapFileSize(sqlite3_file*, sqlite3_int64 *pSize); +static int vfsWrapLock(sqlite3_file*, int); +static int vfsWrapUnlock(sqlite3_file*, int); +static int vfsWrapCheckReservedLock(sqlite3_file*, int *pResOut); +static int vfsWrapFileControl(sqlite3_file*, int op, void *pArg); +static int vfsWrapSectorSize(sqlite3_file*); +static int vfsWrapDeviceCharacteristics(sqlite3_file*); +static int vfsWrapShmMap(sqlite3_file*, int iPg, int, int, void volatile**); +static int vfsWrapShmLock(sqlite3_file*, int offset, int n, int flags); +static void vfsWrapShmBarrier(sqlite3_file*); +static int vfsWrapShmUnmap(sqlite3_file*, int deleteFlag); +static int vfsWrapFetch(sqlite3_file*, sqlite3_int64 iOfst, int iAmt, void **); +static int vfsWrapUnfetch(sqlite3_file*, sqlite3_int64 iOfst, void *p); + +static int vfsWrapOpen( + sqlite3_vfs *pVfs, + const char *zName, + sqlite3_file *pFd, + int flags, + int *fout +){ + static sqlite3_io_methods methods = { + 3, + vfsWrapClose, vfsWrapRead, vfsWrapWrite, + vfsWrapTruncate, vfsWrapSync, vfsWrapFileSize, + vfsWrapLock, vfsWrapUnlock, vfsWrapCheckReservedLock, + vfsWrapFileControl, vfsWrapSectorSize, vfsWrapDeviceCharacteristics, + vfsWrapShmMap, vfsWrapShmLock, vfsWrapShmBarrier, + vfsWrapShmUnmap, vfsWrapFetch, vfsWrapUnfetch + }; + + Config *pConfig = (Config*)pVfs->pAppData; + VfsWrapperFd *pWrapper = (VfsWrapperFd*)pFd; + int rc; + + memset(pWrapper, 0, sizeof(VfsWrapperFd)); + if( flags & SQLITE_OPEN_MAIN_DB ){ + pWrapper->iTid = (int)sqlite3_uri_int64(zName, "tid", 0); + } + + pWrapper->pFd = (sqlite3_file*)&pWrapper[1]; + pWrapper->pConfig = pConfig; + rc = pConfig->pVfs->xOpen(pConfig->pVfs, zName, pWrapper->pFd, flags, fout); + if( rc==SQLITE_OK ){ + pWrapper->base.pMethods = &methods; + } + return rc; +} + +static int vfsWrapDelete(sqlite3_vfs *pVfs, const char *a, int b){ + Config *pConfig = (Config*)pVfs->pAppData; + return pConfig->pVfs->xDelete(pConfig->pVfs, a, b); +} +static int vfsWrapAccess(sqlite3_vfs *pVfs, const char *a, int b, int *c){ + Config *pConfig = (Config*)pVfs->pAppData; + return pConfig->pVfs->xAccess(pConfig->pVfs, a, b, c); +} +static int vfsWrapFullPathname(sqlite3_vfs *pVfs, const char *a, int b, char*c){ + Config *pConfig = (Config*)pVfs->pAppData; + return pConfig->pVfs->xFullPathname(pConfig->pVfs, a, b, c); +} +static void *vfsWrapDlOpen(sqlite3_vfs *pVfs, const char *a){ + Config *pConfig = (Config*)pVfs->pAppData; + return pConfig->pVfs->xDlOpen(pConfig->pVfs, a); +} +static void vfsWrapDlError(sqlite3_vfs *pVfs, int a, char *b){ + Config *pConfig = (Config*)pVfs->pAppData; + return pConfig->pVfs->xDlError(pConfig->pVfs, a, b); +} +static void (*vfsWrapDlSym(sqlite3_vfs *pVfs, void *a, const char *b))(void){ + Config *pConfig = (Config*)pVfs->pAppData; + return pConfig->pVfs->xDlSym(pConfig->pVfs, a, b); +} +static void vfsWrapDlClose(sqlite3_vfs *pVfs, void *a){ + Config *pConfig = (Config*)pVfs->pAppData; + return pConfig->pVfs->xDlClose(pConfig->pVfs, a); +} +static int vfsWrapRandomness(sqlite3_vfs *pVfs, int a, char *b){ + Config *pConfig = (Config*)pVfs->pAppData; + return pConfig->pVfs->xRandomness(pConfig->pVfs, a, b); +} +static int vfsWrapSleep(sqlite3_vfs *pVfs, int a){ + Config *pConfig = (Config*)pVfs->pAppData; + return pConfig->pVfs->xSleep(pConfig->pVfs, a); +} +static int vfsWrapCurrentTime(sqlite3_vfs *pVfs, double *a){ + Config *pConfig = (Config*)pVfs->pAppData; + return pConfig->pVfs->xCurrentTime(pConfig->pVfs, a); +} +static int vfsWrapGetLastError(sqlite3_vfs *pVfs, int a, char *b){ + Config *pConfig = (Config*)pVfs->pAppData; + return pConfig->pVfs->xGetLastError(pConfig->pVfs, a, b); +} +static int vfsWrapCurrentTimeInt64(sqlite3_vfs *pVfs, sqlite3_int64 *a){ + Config *pConfig = (Config*)pVfs->pAppData; + return pConfig->pVfs->xCurrentTimeInt64(pConfig->pVfs, a); +} +static int vfsWrapSetSystemCall( + sqlite3_vfs *pVfs, + const char *a, + sqlite3_syscall_ptr b +){ + Config *pConfig = (Config*)pVfs->pAppData; + return pConfig->pVfs->xSetSystemCall(pConfig->pVfs, a, b); +} +static sqlite3_syscall_ptr vfsWrapGetSystemCall( + sqlite3_vfs *pVfs, + const char *a +){ + Config *pConfig = (Config*)pVfs->pAppData; + return pConfig->pVfs->xGetSystemCall(pConfig->pVfs, a); +} +static const char *vfsWrapNextSystemCall(sqlite3_vfs *pVfs, const char *a){ + Config *pConfig = (Config*)pVfs->pAppData; + return pConfig->pVfs->xNextSystemCall(pConfig->pVfs, a); +} + +static int vfsWrapClose(sqlite3_file *pFd){ + VfsWrapperFd *pWrapper = (VfsWrapperFd*)pFd; + pWrapper->pFd->pMethods->xClose(pWrapper->pFd); + pWrapper->pFd = 0; + return SQLITE_OK; +} +static int vfsWrapRead(sqlite3_file *pFd, void *a, int b, sqlite3_int64 c){ + VfsWrapperFd *pWrapper = (VfsWrapperFd*)pFd; + return pWrapper->pFd->pMethods->xRead(pWrapper->pFd, a, b, c); +} +static int vfsWrapWrite( + sqlite3_file *pFd, + const void *a, int b, + sqlite3_int64 c +){ + VfsWrapperFd *pWrapper = (VfsWrapperFd*)pFd; + return pWrapper->pFd->pMethods->xWrite(pWrapper->pFd, a, b, c); +} +static int vfsWrapTruncate(sqlite3_file *pFd, sqlite3_int64 a){ + VfsWrapperFd *pWrapper = (VfsWrapperFd*)pFd; + return pWrapper->pFd->pMethods->xTruncate(pWrapper->pFd, a); +} +static int vfsWrapSync(sqlite3_file *pFd, int a){ + VfsWrapperFd *pWrapper = (VfsWrapperFd*)pFd; + return pWrapper->pFd->pMethods->xSync(pWrapper->pFd, a); +} +static int vfsWrapFileSize(sqlite3_file *pFd, sqlite3_int64 *a){ + VfsWrapperFd *pWrapper = (VfsWrapperFd*)pFd; + return pWrapper->pFd->pMethods->xFileSize(pWrapper->pFd, a); +} +static int vfsWrapLock(sqlite3_file *pFd, int a){ + VfsWrapperFd *pWrapper = (VfsWrapperFd*)pFd; + return pWrapper->pFd->pMethods->xLock(pWrapper->pFd, a); +} +static int vfsWrapUnlock(sqlite3_file *pFd, int a){ + VfsWrapperFd *pWrapper = (VfsWrapperFd*)pFd; + return pWrapper->pFd->pMethods->xUnlock(pWrapper->pFd, a); +} +static int vfsWrapCheckReservedLock(sqlite3_file *pFd, int *a){ + VfsWrapperFd *pWrapper = (VfsWrapperFd*)pFd; + return pWrapper->pFd->pMethods->xCheckReservedLock(pWrapper->pFd, a); +} +static int vfsWrapFileControl(sqlite3_file *pFd, int a, void *b){ + VfsWrapperFd *pWrapper = (VfsWrapperFd*)pFd; + return pWrapper->pFd->pMethods->xFileControl(pWrapper->pFd, a, b); +} +static int vfsWrapSectorSize(sqlite3_file *pFd){ + VfsWrapperFd *pWrapper = (VfsWrapperFd*)pFd; + return pWrapper->pFd->pMethods->xSectorSize(pWrapper->pFd); +} +static int vfsWrapDeviceCharacteristics(sqlite3_file *pFd){ + VfsWrapperFd *pWrapper = (VfsWrapperFd*)pFd; + return pWrapper->pFd->pMethods->xDeviceCharacteristics(pWrapper->pFd); +} +static int vfsWrapShmMap( + sqlite3_file *pFd, + int a, int b, int c, + void volatile **d +){ + VfsWrapperFd *pWrapper = (VfsWrapperFd*)pFd; + return pWrapper->pFd->pMethods->xShmMap(pWrapper->pFd, a, b, c, d); +} +static int vfsWrapShmLock(sqlite3_file *pFd, int offset, int n, int flags){ + VfsWrapperFd *pWrapper = (VfsWrapperFd*)pFd; + Config *pConfig = pWrapper->pConfig; + int bMutex = 0; + int rc; + + if( (offset==0 && n==1) + && (flags & SQLITE_SHM_LOCK) && (flags & SQLITE_SHM_EXCLUSIVE) + ){ + pthread_mutex_lock(&pConfig->mutex); + pWrapper->bWriter = 1; + bMutex = 1; + if( pWrapper->iTid ){ + sqlite3_int64 t = vfslog_time(); + pConfig->aCtx[pWrapper->iTid-1].aTime[THREAD_TIME_WRITER] -= t; + } + } + + rc = pWrapper->pFd->pMethods->xShmLock(pWrapper->pFd, offset, n, flags); + + if( (rc!=SQLITE_OK && bMutex) + || (offset==0 && (flags & SQLITE_SHM_UNLOCK) && pWrapper->bWriter) + ){ + assert( pWrapper->bWriter ); + pthread_mutex_unlock(&pConfig->mutex); + pWrapper->bWriter = 0; + if( pWrapper->iTid ){ + sqlite3_int64 t = vfslog_time(); + pConfig->aCtx[pWrapper->iTid-1].aTime[THREAD_TIME_WRITER] += t; + } + } + + return rc; +} +static void vfsWrapShmBarrier(sqlite3_file *pFd){ + VfsWrapperFd *pWrapper = (VfsWrapperFd*)pFd; + return pWrapper->pFd->pMethods->xShmBarrier(pWrapper->pFd); +} +static int vfsWrapShmUnmap(sqlite3_file *pFd, int a){ + VfsWrapperFd *pWrapper = (VfsWrapperFd*)pFd; + return pWrapper->pFd->pMethods->xShmUnmap(pWrapper->pFd, a); +} +static int vfsWrapFetch(sqlite3_file *pFd, sqlite3_int64 a, int b, void **c){ + VfsWrapperFd *pWrapper = (VfsWrapperFd*)pFd; + return pWrapper->pFd->pMethods->xFetch(pWrapper->pFd, a, b, c); +} +static int vfsWrapUnfetch(sqlite3_file *pFd, sqlite3_int64 a, void *b){ + VfsWrapperFd *pWrapper = (VfsWrapperFd*)pFd; + return pWrapper->pFd->pMethods->xUnfetch(pWrapper->pFd, a, b); +} + +static void create_vfs(Config *pConfig){ + static sqlite3_vfs vfs = { + 3, 0, 0, 0, "wrapper", 0, + vfsWrapOpen, vfsWrapDelete, vfsWrapAccess, + vfsWrapFullPathname, vfsWrapDlOpen, vfsWrapDlError, + vfsWrapDlSym, vfsWrapDlClose, vfsWrapRandomness, + vfsWrapSleep, vfsWrapCurrentTime, vfsWrapGetLastError, + vfsWrapCurrentTimeInt64, vfsWrapSetSystemCall, vfsWrapGetSystemCall, + vfsWrapNextSystemCall + }; + sqlite3_vfs *pVfs; + + pVfs = sqlite3_vfs_find(0); + vfs.mxPathname = pVfs->mxPathname; + vfs.szOsFile = pVfs->szOsFile + sizeof(VfsWrapperFd); + vfs.pAppData = (void*)pConfig; + pConfig->pVfs = pVfs; + + sqlite3_vfs_register(&vfs, 1); +} + + +/* +** Wal hook used by connections in thread_main(). +*/ +static int thread_wal_hook( + void *pArg, /* Pointer to ThreadCtx object */ + sqlite3 *db, + const char *zDb, + int nFrame +){ + ThreadCtx *pCtx = (ThreadCtx*)pArg; + Config *pConfig = pCtx->pConfig; + + if( pConfig->nAutoCkpt && nFrame>=pConfig->nAutoCkpt ){ + pCtx->aTime[THREAD_TIME_CKPT] -= vfslog_time(); + pthread_mutex_lock(&pConfig->mutex); + if( pConfig->nCondWait>=0 ){ + pConfig->nCondWait++; + if( pConfig->nCondWait==pConfig->nThread ){ + execsql(pCtx->pErr, pCtx->pDb, "PRAGMA wal_checkpoint"); + pthread_cond_broadcast(&pConfig->cond); + }else{ + pthread_cond_wait(&pConfig->cond, &pConfig->mutex); + } + pConfig->nCondWait--; + } + pthread_mutex_unlock(&pConfig->mutex); + pCtx->aTime[THREAD_TIME_CKPT] += vfslog_time(); + } + + return SQLITE_OK; +} + + +static char *thread_main(int iTid, void *pArg){ + Config *pConfig = (Config*)pArg; + Error err = {0}; /* Error code and message */ + Sqlite db = {0}; /* SQLite database connection */ + int nAttempt = 0; /* Attempted transactions */ + int nCommit = 0; /* Successful transactions */ + int j; + ThreadCtx *pCtx = &pConfig->aCtx[iTid-1]; + char *zUri = 0; + +#ifdef USE_OSINST + char *zOsinstName = 0; + char *zLogName = 0; + if( pConfig->bOsinst ){ + zOsinstName = sqlite3_mprintf("osinst%d", iTid); + zLogName = sqlite3_mprintf("bc_test1.log.%d.%d", (int)getpid(), iTid); + zUri = sqlite3_mprintf( + "file:%s?vfs=%s&tid=%d", pConfig->zFile, zOsinstName, iTid + ); + sqlite3_vfslog_new(zOsinstName, 0, zLogName); + opendb(&err, &db, zUri, 0); + }else +#endif + { + zUri = sqlite3_mprintf("file:%s?tid=%d", pConfig->zFile, iTid); + opendb(&err, &db, zUri, 0); + } + + sqlite3_busy_handler(db.db, 0, 0); + sql_script_printf(&err, &db, + "PRAGMA wal_autocheckpoint = 0;" + "PRAGMA synchronous = 0;" + "PRAGMA mmap_size = %lld;", + (i64)(pConfig->nMmap) * 1024 * 1024 + ); + + pCtx->pConfig = pConfig; + pCtx->pErr = &err; + pCtx->pDb = &db; + sqlite3_wal_hook(db.db, thread_wal_hook, (void*)pCtx); + + while( !timetostop(&err) ){ + execsql(&err, &db, "BEGIN CONCURRENT"); + + pCtx->aTime[THREAD_TIME_INSERT] -= vfslog_time(); + for(j=0; jnIPT; j++){ + execsql(&err, &db, + "INSERT INTO t1 VALUES" + "(randomblob(10), randomblob(20), randomblob(30), randomblob(200))" + ); + } + pCtx->aTime[THREAD_TIME_INSERT] += vfslog_time(); + + pCtx->aTime[THREAD_TIME_COMMIT] -= vfslog_time(); + execsql(&err, &db, "COMMIT"); + pCtx->aTime[THREAD_TIME_COMMIT] += vfslog_time(); + + pCtx->aTime[THREAD_TIME_ROLLBACK] -= vfslog_time(); + nAttempt++; + if( err.rc==SQLITE_OK ){ + nCommit++; + }else{ + clear_error(&err, SQLITE_BUSY); + execsql(&err, &db, "ROLLBACK"); + } + pCtx->aTime[THREAD_TIME_ROLLBACK] += vfslog_time(); + + if( pConfig->bClearCache ){ + sqlite3_db_release_memory(db.db); + } + } + + closedb(&err, &db); + +#ifdef USE_OSINST + if( pConfig->bOsinst ){ + sqlite3_vfslog_finalize(zOsinstName); + sqlite3_free(zOsinstName); + sqlite3_free(zLogName); + } +#endif + sqlite3_free(zUri); + + pthread_mutex_lock(&pConfig->mutex); + pConfig->nCondWait = -1; + pthread_cond_broadcast(&pConfig->cond); + pthread_mutex_unlock(&pConfig->mutex); + + return sqlite3_mprintf("commits: %d/%d insert: %dms" + " commit: %dms" + " rollback: %dms" + " writer: %dms" + " checkpoint: %dms", + nCommit, nAttempt, + (int)(pCtx->aTime[THREAD_TIME_INSERT]/1000), + (int)(pCtx->aTime[THREAD_TIME_COMMIT]/1000), + (int)(pCtx->aTime[THREAD_TIME_ROLLBACK]/1000), + (int)(pCtx->aTime[THREAD_TIME_WRITER]/1000), + (int)(pCtx->aTime[THREAD_TIME_CKPT]/1000) + ); +} + +int main(int argc, const char **argv){ + Error err = {0}; /* Error code and message */ + Sqlite db = {0}; /* SQLite database connection */ + Threadset threads = {0}; /* Test threads */ + Config conf = {5, 3, 5}; + int i; + + CmdlineArg apArg[] = { + { "-seconds", CMDLINE_INT, offsetof(Config, nSecond) }, + { "-inserts", CMDLINE_INT, offsetof(Config, nIPT) }, + { "-threads", CMDLINE_INT, offsetof(Config, nThread) }, + { "-mutex", CMDLINE_BOOL, offsetof(Config, bMutex) }, + { "-rm", CMDLINE_BOOL, offsetof(Config, bRm) }, + { "-autockpt",CMDLINE_INT, offsetof(Config, nAutoCkpt) }, + { "-mmap", CMDLINE_INT, offsetof(Config, nMmap) }, + { "-clear-cache", CMDLINE_BOOL, offsetof(Config, bClearCache) }, + { "-file", CMDLINE_STRING, offsetof(Config, zFile) }, + { "-osinst", CMDLINE_BOOL, offsetof(Config, bOsinst) }, + { 0, 0, 0 } + }; + + conf.nAutoCkpt = 1000; + cmdline_process(apArg, argc, argv, (void*)&conf); + if( err.rc==SQLITE_OK ){ + char *z = cmdline_construct(apArg, (void*)&conf); + printf("With: %s\n", z); + sqlite3_free(z); + } + if( conf.zFile==0 ){ + conf.zFile = "xyz.db"; + } + + /* Create the special VFS - "wrapper". And the mutex and condition + ** variable. */ + create_vfs(&conf); + pthread_mutex_init(&conf.mutex, 0); + pthread_cond_init(&conf.cond, 0); + + conf.aCtx = sqlite3_malloc(sizeof(ThreadCtx) * conf.nThread); + memset(conf.aCtx, 0, sizeof(ThreadCtx) * conf.nThread); + + /* Ensure the schema has been created */ + opendb(&err, &db, conf.zFile, conf.bRm); + sql_script(&err, &db, + "PRAGMA journal_mode = wal;" + "CREATE TABLE IF NOT EXISTS t1(a PRIMARY KEY, b, c, d) WITHOUT ROWID;" + "CREATE INDEX IF NOT EXISTS t1b ON t1(b);" + "CREATE INDEX IF NOT EXISTS t1c ON t1(c);" + ); + + setstoptime(&err, conf.nSecond*1000); + if( conf.nThread==1 ){ + char *z = thread_main(1, (void*)&conf); + printf("Thread 0 says: %s\n", (z==0 ? "..." : z)); + fflush(stdout); + }else{ + for(i=0; i75000] [file size test.db-shm] +} {1 32768} + +faultsim_save_and_close + +do_faultsim_test 1 -prep { + faultsim_restore_and_reopen + execsql { + SELECT * FROM t1; + BEGIN CONCURRENT; + INSERT INTO t2 VALUES(1, 2); + } + sqlite3 db2 test.db + execsql { + PRAGMA journal_size_limit = 10000; + INSERT INTO t1 VALUES(randomblob(1000), randomblob(1000)); + } db2 + db2 close +} -body { + execsql { COMMIT } +} -test { + faultsim_test_result {0 {}} + catchsql { ROLLBACK } + set res [catchsql { SELECT count(*) FROM t1 }] + if {$res!="0 9"} { error "expected {0 9} got {$res}" } + faultsim_integrity_check +} + +finish_test + diff --git a/test/concurrent.test b/test/concurrent.test new file mode 100644 index 0000000000..ba01ee7753 --- /dev/null +++ b/test/concurrent.test @@ -0,0 +1,688 @@ +# 2015 July 26 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl +source $testdir/lock_common.tcl +set ::testprefix concurrent + +ifcapable !concurrent { + finish_test + return +} + +do_execsql_test 1.0 { + PRAGMA journal_mode = wal; +} {wal} + +do_execsql_test 1.1 { + CREATE TABLE t1(k INTEGER PRIMARY KEY, v); + BEGIN CONCURRENT; + INSERT INTO t1 VALUES(1, 'abcd'); + COMMIT; +} + +do_execsql_test 1.2 { + SELECT * FROM t1; +} {1 abcd} + +do_execsql_test 1.3 { + BEGIN CONCURRENT; + INSERT INTO t1 VALUES(2, 'efgh'); + ROLLBACK; +} + +do_execsql_test 1.4 { + SELECT * FROM t1; +} {1 abcd} + + +#------------------------------------------------------------------------- +# CONCURRENT transactions cannot do cache spills. +# +foreach {tn trans spill} { + 1 {BEGIN CONCURRENT} 0 + 2 {BEGIN} 1 +} { + do_test 1.5.$tn { + sqlite3 db2 test.db + set walsz [file size test.db-wal] + + execsql { PRAGMA cache_size = 10 } db2 + execsql $trans db2 + execsql { + WITH cnt(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM cnt WHERE i<50) + INSERT INTO t1(v) SELECT randomblob(900) FROM cnt; + } db2 + + expr {[file size test.db-wal]==$walsz} + } [expr !$spill] + + execsql ROLLBACK db2 + db2 close +} + +#------------------------------------------------------------------------- +# CONCURRENT transactions man not be committed while there are active +# readers. +do_execsql_test 1.6.setup { + DROP TABLE t1; + CREATE TABLE t1(a, b); + INSERT INTO t1 VALUES(1, 2); + INSERT INTO t1 VALUES(3, 4); + INSERT INTO t1 VALUES(5, 6); +} +foreach {tn trans commit_ok} { + 1 {BEGIN CONCURRENT} 0 + 2 {BEGIN} 1 +} { + do_test 1.6.$tn.1 { + set stmt [sqlite3_prepare db "SELECT * FROM t1" -1 dummy] + sqlite3_step $stmt + } SQLITE_ROW + do_test 1.6.$tn.2 { + execsql $trans + execsql { INSERT INTO t1 VALUES(7, 8) } + } {} + + if { $commit_ok } { + do_test 1.6.$tn.3 { catchsql COMMIT } {0 {}} + } else { + do_test 1.6.$tn.4 { catchsql COMMIT } {/1 {cannot commit transaction .*}/} + } + + sqlite3_finalize $stmt + catchsql ROLLBACK +} + +#------------------------------------------------------------------------- +# CONCURRENT transactions may not modify the db schema. +# +sqlite3 db2 test.db +foreach {tn sql} { + 1 { CREATE TABLE xx(a, b) } + 2 { DROP TABLE t1 } + 3 { CREATE INDEX i1 ON t1(a) } + 4 { CREATE VIEW v1 AS SELECT * FROM t1 } +} { + do_catchsql_test 1.7.0.$tn.1 " + BEGIN CONCURRENT; + $sql + " {0 {}} + + db2 eval {INSERT INTO t1 DEFAULT VALUES} + + do_catchsql_test 1.7.0.$tn.2 { + COMMIT + } {1 {database is locked}} + + do_execsql_test 1.7.0.$tn.2 ROLLBACK + + do_execsql_test 1.7.0.$tn.3 { + SELECT sql FROM sqlite_master; + SELECT sql FROM sqlite_temp_master; + } {{CREATE TABLE t1(a, b)}} + + #do_execsql_test 1.7.0.$tn.3 COMMIT +} + +# Except the temp db schema. +foreach {tn sql} { + 1 { CREATE TEMP TABLE xx(a, b) } + 2 { DROP TABLE xx } + 3 { CREATE TEMP TABLE yy(a, b) } + 4 { CREATE VIEW temp.v1 AS SELECT * FROM t1 } + 5 { CREATE INDEX yyi1 ON yy(a); } + 6 { CREATE TABLE temp.zz(a, b) } +} { + do_catchsql_test 1.7.1.$tn.1 " + BEGIN CONCURRENT; + $sql + " {0 {}} + + do_execsql_test 1.7.1.$tn.2 COMMIT +} + + +do_execsql_test 1.7.1.x { + SELECT sql FROM sqlite_master; + SELECT sql FROM sqlite_temp_master; +} { + {CREATE TABLE t1(a, b)} + {CREATE TABLE yy(a, b)} + {CREATE VIEW v1 AS SELECT * FROM t1} + {CREATE INDEX yyi1 ON yy(a)} + {CREATE TABLE zz(a, b)} +} +db2 close + +#------------------------------------------------------------------------- +# If an auto-vacuum database is written within an CONCURRENT transaction, it +# is handled in the same way as for a non-CONCURRENT transaction. +# +reset_db +do_execsql_test 1.8.1 { + PRAGMA auto_vacuum = 1; + PRAGMA journal_mode = wal; + CREATE TABLE t1(x, y); + INSERT INTO t1 VALUES('x', 'y'); +} {wal} + +do_execsql_test 1.8.2 { + BEGIN CONCURRENT; + SELECT * FROM t1; + COMMIT; +} {x y} + +do_catchsql_test 1.8.3 { + BEGIN CONCURRENT; + INSERT INTO t1 VALUES('a', 'b'); +} {0 {}} + +do_test 1.8.4 { + sqlite3 db2 test.db + catchsql { + BEGIN CONCURRENT; + INSERT INTO t1 VALUES('c', 'd'); + } db2 +} {1 {database is locked}} + +do_test 1.8.5 { + db eval COMMIT + db2 eval COMMIT +} {} +db close +db2 close + +do_multiclient_test tn { + + #----------------------------------------------------------------------- + # 1. Start an CONCURRENT transaction using [db1]. + # + # 2. Start and then rollback a regular transaction using [db2]. This + # can be done as the ongoing [db1] transaction is CONCURRENT. + # + # 3. The [db1] transaction can now be committed, as [db2] has relinquished + # the write lock. + # + do_test 2.$tn.1.1 { + sql1 { + PRAGMA journal_mode = wal; + CREATE TABLE t1(k INTEGER PRIMARY KEY, v); + INSERT INTO t1 VALUES(1, 'one'); + } + sql1 { + BEGIN CONCURRENT; + INSERT INTO t1 VALUES(2, 'two'); + } + code1 { sqlite3_get_autocommit db } + } 0 + + do_test 2.$tn.1.2 { + sql2 { + BEGIN; + INSERT INTO t1 VALUES(3, 'three'); + ROLLBACK; + } + } {} + + do_test 2.$tn.1.3 { + sql1 COMMIT + sql2 { SELECT * FROM t1 } + } {1 one 2 two} + + #----------------------------------------------------------------------- + # 1. Start an CONCURRENT transaction using [db1]. + # + # 2. Commit a transaction using [db2]. + # + # 3. Try to commit with [db1]. Check that SQLITE_BUSY_SNAPSHOT is returned, + # and the transaction is not rolled back. + # + do_test 2.$tn.2.1 { + sql1 { + BEGIN CONCURRENT; + INSERT INTO t1 VALUES(-1, 'hello world'); + } + } {} + + do_test 2.$tn.2.2 { + sql2 { + INSERT INTO t1 VALUES(3, 'three'); + } + } {} + + do_test 2.$tn.2.3.1 { + set rc [catch { sql1 COMMIT } msg] + list $rc $msg + } {1 {database is locked}} + + do_test 2.$tn.2.3.2 { + code1 { list [sqlite3_extended_errcode db] [sqlite3_get_autocommit db] } + } {SQLITE_BUSY_SNAPSHOT 0} + + do_test 2.$tn.2.3.3 { + sql1 { + SELECT * FROM t1; + ROLLBACK; + } + } {-1 {hello world} 1 one 2 two} + + #----------------------------------------------------------------------- + # 1. Start an CONCURRENT transaction using [db1]. + # + # 2. Open a transaction using [db2]. + # + # 3. Try to commit with [db1]. Check that SQLITE_BUSY is returned, + # and the transaction is not rolled back. + # + # 4. Have [db2] roll its transaction back. Then check that [db1] can + # commit. + # + do_test 2.$tn.3.1 { + sql1 { + BEGIN CONCURRENT; + INSERT INTO t1 VALUES(4, 'four'); + } + } {} + + do_test 2.$tn.3.2 { + sql2 { + BEGIN; + INSERT INTO t1 VALUES(-1, 'xyz'); + } + } {} + + do_test 2.$tn.3.3.1 { + set rc [catch { sql1 COMMIT } msg] + list $rc $msg + } {1 {database is locked}} + + do_test 2.$tn.3.3.2 { + code1 { list [sqlite3_extended_errcode db] [sqlite3_get_autocommit db] } + } {SQLITE_BUSY 0} + + do_test 2.$tn.3.3.3 { + sql1 { SELECT * FROM t1; } + } {1 one 2 two 3 three 4 four} + + do_test 2.$tn.3.4 { + sql2 ROLLBACK + sql1 COMMIT + sql1 { SELECT * FROM t1; } + } {1 one 2 two 3 three 4 four} + + #----------------------------------------------------------------------- + # 1. Create a second table - t2. + # + # 2. Write to t1 with [db] and t2 with [db2]. + # + # 3. See if it worked. + # + do_test 2.$tn.4.1 { + sql1 { CREATE TABLE t2(a, b) } + } {} + do_test 2.$tn.4.2 { + sql2 { + BEGIN CONCURRENT; + INSERT INTO t2 VALUES('i', 'n'); + } + + sql1 { + BEGIN CONCURRENT; + INSERT INTO t1 VALUES(5, 'five'); + COMMIT; + } + + sql2 COMMIT + } {} + + do_test 2.$tn.4.3.1 { + sql2 {SELECT * FROM t1} + } {1 one 2 two 3 three 4 four 5 five} + do_test 2.$tn.4.3.2 { + sql1 {SELECT * FROM t1} + } {1 one 2 two 3 three 4 four 5 five} + + do_test 2.$tn.4.3.3 { sql2 {SELECT * FROM t2} } {i n} + do_test 2.$tn.4.3.4 { sql1 {SELECT * FROM t2} } {i n} + + #----------------------------------------------------------------------- + # The "schema cookie" issue. + # + # 1. Begin and CONCURRENT write to "t1" using [db] + # + # 2. Create an index on t1 using [db2]. + # + # 3. Attempt to commit the CONCURRENT write. This is an SQLITE_BUSY_SNAPSHOT, + # even though there is no page collision. + # + do_test 2.$tn.5.1 { + sql1 { + BEGIN CONCURRENT; + INSERT INTO t1 VALUES(6, 'six'); + } + } {} + + do_test 2.$tn.5.2 { + sql2 { CREATE INDEX i1 ON t1(v); } + } {} + + do_test 2.$tn.5.3 { + list [catch { sql1 { COMMIT } } msg] $msg [sqlite3_errcode db] + } {1 {database is locked} SQLITE_BUSY_SNAPSHOT} + + do_test 2.$tn.5.4 { + sql2 { PRAGMA integrity_check } + } {ok} + catch { sql1 ROLLBACK } + + #----------------------------------------------------------------------- + # + # 1. Begin an CONCURRENT write to "t1" using [db] + # + # 2. Lots of inserts into t2. Enough to grow the db file and modify page 1. + # + # 3. Check that the CONCURRENT transaction can not be committed. + # + do_test 2.$tn.6.1 { + sql1 { + BEGIN CONCURRENT; + INSERT INTO t1 VALUES(6, 'six'); + } + } {} + + do_test 2.$tn.6.2 { + sql2 { + WITH src(a,b) AS ( + VALUES(1,1) UNION ALL SELECT a+1,b+1 FROM src WHERE a<10000 + ) INSERT INTO t2 SELECT * FROM src; + } + } {} + + do_test 2.$tn.6.3 { + sql1 { SELECT count(*) FROM t2 } + list [catch { sql1 { COMMIT } } msg] $msg [sqlite3_errcode db] + } {1 {database is locked} SQLITE_BUSY_SNAPSHOT} + sql1 ROLLBACK + + do_test 2.$tn.6.4 { + sql1 { + SELECT count(*) FROM t1; + SELECT count(*) FROM t2; + } + } {5 10001} + + #----------------------------------------------------------------------- + # + # 1. Begin an big CONCURRENT write to "t1" using [db] - large enough to + # grow the db file. + # + # 2. Lots of inserts into t2. Also enough to grow the db file. + # + # 3. Check that the CONCURRENT transaction cannot be committed (due to a clash + # on page 1 - the db size field). + # + do_test 2.$tn.7.1 { + sql1 { + BEGIN CONCURRENT; + WITH src(a,b) AS ( + VALUES(10000,10000) UNION ALL SELECT a+1,b+1 FROM src WHERE a<20000 + ) INSERT INTO t1 SELECT * FROM src; + } + } {} + + do_test 2.$tn.7.2 { + sql2 { + WITH src(a,b) AS ( + VALUES(1,1) UNION ALL SELECT a+1,b+1 FROM src WHERE a<10000 + ) INSERT INTO t2 SELECT * FROM src; + } + } {} + + do_test 2.$tn.7.3 { + list [catch { sql1 { COMMIT } } msg] $msg [sqlite3_errcode db] + } {0 {} SQLITE_OK} + + do_test 2.$tn.7.4 { sql3 { PRAGMA integrity_check } } ok +} + +#------------------------------------------------------------------------- +# Concurrent transactions may not modify the user_version or application_id. +# +reset_db +do_execsql_test 3.0 { + PRAGMA journal_mode = wal; + CREATE TABLE t1(x, y); + INSERT INTO t1 VALUES('a', 'b'); + PRAGMA user_version = 10; +} {wal} +do_execsql_test 3.1 { + BEGIN CONCURRENT; + INSERT INTO t1 VALUES('c', 'd'); + SELECT * FROM t1; +} {a b c d} +do_catchsql_test 3.2 { + PRAGMA user_version = 11; +} {1 {cannot modify user_version within CONCURRENT transaction}} +do_execsql_test 3.3 { + PRAGMA user_version; + SELECT * FROM t1; +} {10 a b c d} +do_catchsql_test 3.4 { + PRAGMA application_id = 11; +} {1 {cannot modify application_id within CONCURRENT transaction}} +do_execsql_test 3.5 { + COMMIT; + PRAGMA user_version; + PRAGMA application_id; + SELECT * FROM t1; +} {10 0 a b c d} + +#------------------------------------------------------------------------- +# However, another transaction modifying the user_version or application_id +# should not cause a conflict. And committing a concurrent transaction does not +# clobber the modification - even if the concurrent transaction allocates or +# frees database pages. +# +do_multiclient_test tn { + do_test 4.$tn.1 { + sql1 { + PRAGMA journal_mode = wal; + CREATE TABLE ttt(y UNIQUE, z UNIQUE); + PRAGMA user_version = 14; + BEGIN CONCURRENT; + INSERT INTO ttt VALUES('y', 'z'); + } + } {wal} + do_test 4.$tn.2 { + sql2 { PRAGMA user_version = 16 } + sql1 COMMIT + sql1 { PRAGMA user_version } + } {16} + + do_test 4.$tn.3 { + sql1 { + BEGIN CONCURRENT; + INSERT INTO ttt VALUES(randomblob(10000), randomblob(4)); + PRAGMA user_version; + } + } {16} + do_test 4.$tn.4 { + sql2 { PRAGMA user_version = 1234 } + sql1 { + PRAGMA user_version; + COMMIT; + PRAGMA user_version; + PRAGMA integrity_check; + } + } {16 1234 ok} + + do_test 4.$tn.5 { + sql1 { + BEGIN CONCURRENT; + DELETE FROM ttt; + PRAGMA user_version; + } + } {1234} + do_test 4.$tn.4 { + sql2 { PRAGMA user_version = 5678 } + sql1 { + PRAGMA user_version; + COMMIT; + PRAGMA user_version; + PRAGMA integrity_check; + } + } {1234 5678 ok} +} + +do_multiclient_test tn { + do_test 5.$tn.1 { + sql1 { + PRAGMA journal_mode = wal; + CREATE TABLE tt(a INTEGER PRIMARY KEY, b); + CREATE TABLE t2(a INTEGER PRIMARY KEY, b); + INSERT INTO tt VALUES(1, randomblob(400)); + BEGIN CONCURRENT; + } + } {wal} + + do_test 5.$tn.2 { + sql1 { UPDATE t2 SET b=5 WHERE a=3 } + sql2 { INSERT INTO tt VALUES(2, randomblob(6000)) } + } {} + + do_test 5.$tn.3 { + sql1 { COMMIT } + } {} +} + +do_multiclient_test tn { + do_test 6.$tn.1 { + sql1 { + PRAGMA journal_mode = wal; + CREATE TABLE t1(a INTEGER PRIMARY KEY, b); + CREATE TABLE t2(a INTEGER PRIMARY KEY, b); + INSERT INTO t1 VALUES(1, 'one'); + INSERT INTO t2 VALUES(2, 'two'); + } + } {wal} + + do_test 6.$tn.2 { + sql2 { + BEGIN CONCURRENT; + SELECT * FROM t2; + INSERT INTO t1 VALUES(3, 'three'); + } + } {2 two} + + do_test 6.$tn.3 { + sql1 { + INSERT INTO t2 VALUES(3, 'three'); + } + } {} + + do_test 6.$tn.2 { + list [catch { sql2 { COMMIT } } msg] $msg + } {1 {database is locked}} +} + +do_multiclient_test tn { + do_test 7.$tn.1 { + sql1 { + PRAGMA journal_mode = wal; + CREATE TABLE t1(a INTEGER PRIMARY KEY, b); + WITH s(i) AS ( VALUES(1) UNION ALL SELECT i+1 FROM s WHERE i<100) + INSERT INTO t1 SELECT NULL, randomblob(400) FROM s; + + CREATE TABLE t2(a INTEGER PRIMARY KEY, b); + WITH s(i) AS ( VALUES(1) UNION ALL SELECT i+1 FROM s WHERE i<50000) + INSERT INTO t2 SELECT NULL, randomblob(400) FROM s; + + CREATE TABLE t3(a INTEGER PRIMARY KEY, b); + WITH s(i) AS ( VALUES(1) UNION ALL SELECT i+1 FROM s WHERE i<100) + INSERT INTO t3 SELECT NULL, randomblob(400) FROM s; + + CREATE TABLE t4(a INTEGER PRIMARY KEY, b); + + } + set {} {} + } {} + + do_test 7.$tn.2 { + sql2 { + BEGIN CONCURRENT; + SELECT * FROM t1; + INSERT INTO t4 VALUES(1, 2); + } + set {} {} + } {} + + do_test 7.$tn.3 { + sql3 { + BEGIN CONCURRENT; + SELECT * FROM t3; + INSERT INTO t4 VALUES(1, 2); + } + set {} {} + } {} + + do_test 7.$tn.4 { + sql1 { + UPDATE t1 SET b=randomblob(400); + UPDATE t2 SET b=randomblob(400); + UPDATE t3 SET b=randomblob(400); + } + } {} + + do_test 7.$tn.5 { + csql2 { COMMIT } + } {1 {database is locked}} + + do_test 7.$tn.6 { + csql3 { COMMIT } + } {1 {database is locked}} + + + csql2 ROLLBACK + csql3 ROLLBACK + + # The following test works with $tn==1 (sql2 and sql3 use separate + # processes), but is quite slow. So only run it with $tn==2 (all + # connections in the same process). + # + if {$tn==2} { + do_test 7.$tn.7 { + for {set i 1} {$i < 10000} {incr i} { + sql3 { + PRAGMA wal_checkpoint; + BEGIN CONCURRENT; + SELECT * FROM t3; + INSERT INTO t4 VALUES(1, 2); + } + + sql1 { + UPDATE t2 SET b = randomblob(400) WHERE rowid <= $i; + UPDATE t3 SET b = randomblob(400) WHERE rowid = 1; + } + + if {[csql3 COMMIT]!={1 {database is locked}}} { + error "Failed at i=$i" + } + csql3 ROLLBACK + } + } {} + } + +} + +finish_test diff --git a/test/concurrent2.test b/test/concurrent2.test new file mode 100644 index 0000000000..f6a97b8456 --- /dev/null +++ b/test/concurrent2.test @@ -0,0 +1,630 @@ +# 2015 July 26 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# +# Miscellaneous tests for transactions started with BEGIN CONCURRENT. +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl +source $testdir/lock_common.tcl +source $testdir/wal_common.tcl +set ::testprefix concurrent2 + +ifcapable !concurrent { + finish_test + return +} + +do_test 0.1 { + llength [sqlite3_wal_info db main] +} {2} + +do_multiclient_test tn { + + do_test 1.$tn.1 { + sql1 { + PRAGMA journal_mode = wal; + CREATE TABLE t1(x); + CREATE TABLE t2(y); + } + } {wal} + do_test 1.$tn.5 { sql3 { PRAGMA integrity_check } } {ok} + + # Test that an CONCURRENT transaction that allocates/frees no pages does + # not conflict with a transaction that does allocate pages. + do_test 1.$tn.2 { + sql1 { + BEGIN CONCURRENT; + INSERT INTO t1 VALUES(4); + } + sql2 { + INSERT INTO t2 VALUES(randomblob(1500)); + } + sql1 { + COMMIT; + } + } {} + do_test 1.$tn.5 { sql3 { PRAGMA integrity_check } } {ok} + + # But that an CONCURRENT transaction does conflict with a transaction + # that modifies the db schema. + do_test 1.$tn.3 { + sql1 { + BEGIN CONCURRENT; + INSERT INTO t1 VALUES(5); + } + sql2 { + CREATE TABLE t3(z); + } + list [catch { sql1 COMMIT } msg] $msg + } {1 {database is locked}} + do_test 1.$tn.5 { sql3 { PRAGMA integrity_check } } {ok} + + # Test that an CONCURRENT transaction that allocates at least one page + # does not conflict with a transaction that allocates no pages. + do_test 1.$tn.4 { + sql1 { + ROLLBACK; + BEGIN CONCURRENT; + INSERT INTO t1 VALUES(randomblob(1500)); + } + sql2 { + INSERT INTO t2 VALUES(8); + } + sql1 { + COMMIT; + } + } {} + + do_test 1.$tn.5 { sql3 { PRAGMA integrity_check } } {ok} +} + +do_multiclient_test tn { + do_test 2.$tn.1 { + sql1 { + PRAGMA journal_mode = wal; + CREATE TABLE t1(x UNIQUE); + CREATE TABLE t2(y UNIQUE); + } + } {wal} + + do_test 2.$tn.2 { + sql1 { + BEGIN CONCURRENT; + INSERT INTO t1 VALUES(randomblob(1500)); + } + sql2 { + INSERT INTO t2 VALUES(randomblob(1500)); + } + sql1 COMMIT + } {} + + do_test 2.$tn.3 { sql3 { PRAGMA integrity_check } } {ok} + + do_test 2.$tn.4 { + sql1 { + BEGIN CONCURRENT; + DELETE FROM t1; + } + sql2 { + DELETE FROM t2; + } + sql1 COMMIT + } {} + + do_test 2.$tn.5 { sql3 { PRAGMA integrity_check } } {ok} + + do_test 2.$tn.6 { + sql1 { + INSERT INTO t1 VALUES(randomblob(1500)); + INSERT INTO t1 VALUES(randomblob(1500)); + INSERT INTO t2 VALUES(randomblob(1500)); + DELETE FROM t1 WHERE rowid=1; + } + + sql1 { + BEGIN CONCURRENT; + DELETE FROM t1 WHERE rowid=2; + } + + sql2 { + DELETE FROM t2; + } + + sql1 COMMIT + } {} + + do_test 2.$tn.7 { sql3 { PRAGMA integrity_check } } {ok} +} + +#------------------------------------------------------------------------- +# When an CONCURRENT transaction is opened on a database, the nFree and +# iTrunk header fields of the cached version of page 1 are both set +# to 0. This allows an CONCURRENT transaction to use its own private +# free-page-list, which is merged with the main database free-list when +# the transaction is committed. +# +# The following tests check that nFree/iTrunk are correctly restored if +# an CONCURRENT transaction is rolled back, and that savepoint rollbacks +# that occur within CONCURRENT transactions do not incorrectly restore +# these fields to their on-disk values. +# +reset_db +do_execsql_test 3.0 { + PRAGMA journal_mode = wal; + CREATE TABLE t1(x, y); + INSERT INTO t1 VALUES(randomblob(1500), randomblob(1500)); + DELETE FROM t1; +} {wal} + +do_execsql_test 3.1 { + BEGIN CONCURRENT; + INSERT INTO t1 VALUES(1, 2); + ROLLBACK; +} + +do_execsql_test 3.2 { PRAGMA integrity_check } {ok} +do_execsql_test 3.3 { PRAGMA freelist_count } {2} + +do_execsql_test 3.4.1 { + BEGIN CONCURRENT; + PRAGMA freelist_count; +} {2} +do_execsql_test 3.4.2 { + SAVEPOINT xyz; + INSERT INTO t1 VALUES(randomblob(1500), NULL); + PRAGMA freelist_count; +} {0} +do_execsql_test 3.4.3 { + ROLLBACK TO xyz; +} {} +do_execsql_test 3.4.4 { PRAGMA freelist_count } {0} +do_execsql_test 3.4.5 { COMMIT; PRAGMA freelist_count } {2} +do_execsql_test 3.4.6 { PRAGMA integrity_check } {ok} + +do_execsql_test 3.5.1 { + BEGIN CONCURRENT; + UPDATE t1 SET x=randomblob(10) WHERE y=555; + PRAGMA freelist_count; +} {0} +do_execsql_test 3.5.2 { + ROLLBACK; + PRAGMA freelist_count; +} {2} +do_execsql_test 3.5.3 { PRAGMA integrity_check } {ok} + +#------------------------------------------------------------------------- +# Test that nothing goes wrong if an CONCURRENT transaction allocates a +# page at the end of the file, frees it within the same transaction, and +# then has to move the same page to avoid a conflict on COMMIT. +# +do_multiclient_test tn { + do_test 4.$tn.1 { + sql1 { + PRAGMA journal_mode = wal; + CREATE TABLE t1(x); + CREATE TABLE t2(x); + } + } {wal} + + do_test 4.$tn.2 { + sql1 { + BEGIN CONCURRENT; + INSERT INTO t1 VALUES(randomblob(1500)); + INSERT INTO t1 VALUES(randomblob(1500)); + DELETE FROM t1 WHERE rowid = 1; + } + + sql2 { + INSERT INTO t2 VALUES(randomblob(1500)); + INSERT INTO t2 VALUES(randomblob(1500)); + INSERT INTO t2 VALUES(randomblob(1500)); + INSERT INTO t2 VALUES(randomblob(1500)); + DELETE FROM t2 WHERE rowid IN (1, 2); + } + + sql1 COMMIT + } {} +} + +#------------------------------------------------------------------------- +# +do_multiclient_test tn { + do_test 5.$tn.1 { + sql1 { + PRAGMA journal_mode = wal; + CREATE TABLE t1(x); + CREATE TABLE t2(x); + INSERT INTO t1 VALUES(randomblob(1500)); + PRAGMA page_count; + } + } {wal 4} + + do_test 5.$tn.2 { + sql1 { + BEGIN CONCURRENT; + INSERT INTO t2 VALUES(randomblob(1500)); + PRAGMA page_count; + } + } {5} + + do_test 5.$tn.3 { + sql2 { + DELETE FROM t1; + PRAGMA freelist_count; + PRAGMA page_count; + } + } {1 4} + + do_test 5.$tn.4 { sql1 COMMIT } {} + do_test 5.$tn.5 { sql3 { PRAGMA integrity_check } } {ok} +} + +#------------------------------------------------------------------------- +# +do_multiclient_test tn { + do_test 6.$tn.1 { + sql1 { + PRAGMA journal_mode = wal; + CREATE TABLE t1(x); + INSERT INTO t1 VALUES(randomblob(1500)); + PRAGMA wal_checkpoint; + } + } {wal 0 5 5} + + do_test 6.$tn.2 { + sql1 { + BEGIN CONCURRENT; + INSERT INTO t1 VALUES(randomblob(1500)); + INSERT INTO t1 VALUES(randomblob(1500)); + } + } {} + + do_test 6.$tn.3 { + sql2 { + BEGIN; + INSERT INTO t1 VALUES(randomblob(1500)); + INSERT INTO t1 VALUES(randomblob(1500)); + COMMIT; + } + } {} + + do_test 6.$tn.4 { + list [catch { sql1 COMMIT } msg] $msg + } {1 {database is locked}} + do_test 6.$tn.5 { sql3 { PRAGMA integrity_check } } {ok} + do_test 6.$tn.5 { sql3 { SELECT count(*) from t1 } } {3} +} + +#------------------------------------------------------------------------- +# Test that if a corrupt wal-index-header is encountered when attempting +# to commit a CONCURRENT transaction, the transaction is not committed +# (or rolled back) and that SQLITE_BUSY_SNAPSHOT is returned to the user. +# +catch { db close } +forcedelete test.db +testvfs tvfs +sqlite3 db test.db -vfs tvfs +do_execsql_test 7.1 { + PRAGMA journal_mode = wal; + BEGIN; + CREATE TABLE t1(a, b, PRIMARY KEY(a)); + INSERT INTO t1 VALUES(1, 2); + INSERT INTO t1 VALUES(3, 4); + COMMIT; + BEGIN CONCURRENT; + INSERT INTO t1 VALUES(5, 6); + INSERT INTO t1 VALUES(7, 8); + SELECT * FROM t1; +} {wal 1 2 3 4 5 6 7 8} + +# Corrupt the wal-index header +incr_tvfs_hdr test.db 11 1 + +do_catchsql_test 7.2.1 { COMMIT } {1 {database is locked}} +do_test 7.2.2 { sqlite3_extended_errcode db } SQLITE_BUSY_SNAPSHOT + +do_execsql_test 7.3.1 { + SELECT * FROM t1; + ROLLBACK; +} {1 2 3 4 5 6 7 8} +do_execsql_test 7.3.2 { + SELECT * FROM t1; +} {1 2 3 4} + +#------------------------------------------------------------------------- +# Test that "PRAGMA integrity_check" works within a concurrent +# transaction. Within a concurrent transaction, "PRAGMA integrity_check" +# is unable to detect unused database pages, but can detect other types +# of corruption. +# +reset_db +do_test 8.1 { + execsql { + PRAGMA journal_mode = wal; + CREATE TABLE kv(k INTEGER PRIMARY KEY, v UNIQUE); + INSERT INTO kv VALUES(NULL, randomblob(750)); + INSERT INTO kv SELECT NULL, randomblob(750) FROM kv; + INSERT INTO kv SELECT NULL, randomblob(750) FROM kv; + INSERT INTO kv SELECT NULL, randomblob(750) FROM kv; + INSERT INTO kv SELECT NULL, randomblob(750) FROM kv; + INSERT INTO kv SELECT NULL, randomblob(750) FROM kv; + DELETE FROM kv WHERE rowid%2; + } + set v [db one {PRAGMA freelist_count}] + expr $v==33 || $v==34 +} {1} +do_execsql_test 8.2 { PRAGMA integrity_check } ok +do_execsql_test 8.3 { + BEGIN CONCURRENT; + PRAGMA integrity_check; +} {ok} +do_execsql_test 8.4 { + INSERT INTO kv VALUES(1100, 1100); + PRAGMA integrity_check; +} {ok} +do_execsql_test 8.5 { + COMMIT; + PRAGMA integrity_check; +} {ok} + +#------------------------------------------------------------------------- +# Test that concurrent transactions do not allow foreign-key constraints +# to be bypassed. +# +do_multiclient_test tn { + do_test 9.$tn.1 { + sql1 { + PRAGMA journal_mode = wal; + CREATE TABLE pp(i INTEGER PRIMARY KEY, j); + CREATE TABLE cc(a, b REFERENCES pp); + + WITH seq(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM seq WHERE i<100) + INSERT INTO pp SELECT i, randomblob(1000) FROM seq; + + PRAGMA foreign_keys = 1; + } + } {wal} + + + do_test 9.$tn.2.1 { + sql1 { + BEGIN CONCURRENT; + INSERT INTO cc VALUES(42, 42); + } + } {} + do_test 9.$tn.2.2 { + sql2 { DELETE FROM pp WHERE i=42 } + list [catch { sql1 COMMIT } msg] $msg + } {1 {database is locked}} + do_test 9.$tn.2.3 { + sql1 ROLLBACK + } {} + + do_test 9.$tn.3.1 { + sql1 { + PRAGMA foreign_keys = 0; + BEGIN CONCURRENT; + INSERT INTO cc VALUES(43, 43); + } + } {} + do_test 9.$tn.3.2 { + sql2 { DELETE FROM pp WHERE i=43 } + list [catch { sql1 COMMIT } msg] $msg + } {0 {}} + + do_test 9.$tn.4.1 { + sql1 { + PRAGMA foreign_keys = on; + BEGIN CONCURRENT; + INSERT INTO cc VALUES(44, 44); + } + } {} + do_test 9.$tn.4.2 { + sql2 { DELETE FROM pp WHERE i=1 } + list [catch { sql1 COMMIT } msg] $msg + } {0 {}} +} + +#------------------------------------------------------------------------- +# Test that even if a SELECT statement appears before all writes within +# a CONCURRENT transaction, the pages it reads are still considered when +# considering whether or not the transaction may be committed. +# +do_multiclient_test tn { + do_test 10.$tn.1.1 { + sql1 { + PRAGMA journal_mode = wal; + CREATE TABLE t1(a); + CREATE TABLE t2(b); + CREATE TABLE t3(c); + INSERT INTO t1 VALUES(1), (2), (3); + INSERT INTO t2 VALUES(1), (2), (3); + INSERT INTO t3 VALUES(1), (2), (3); + } + } {wal} + + do_test 10.$tn.1.2 { + sql1 { + BEGIN CONCURRENT; + SELECT * FROM t1; + INSERT INTO t2 VALUES(4); + } + } {1 2 3} + + do_test 10.$tn.1.3 { + sql2 { INSERT INTO t1 VALUES(4) } + list [catch {sql1 COMMIT} msg] $msg + } {1 {database is locked}} + sql1 ROLLBACK + + # In this case, because the "SELECT * FROM t1" is first stepped before + # the "BEGIN CONCURRENT", the pages it reads are not recorded by the + # pager object. And so the transaction can be committed. Technically + # this behaviour (the effect of an ongoing SELECT on a BEGIN CONCURRENT + # transacation) is undefined. + # + do_test 10.$tn.2.1 { + code1 { + set ::stmt [sqlite3_prepare db "SELECT * FROM t1" -1 dummy] + sqlite3_step $::stmt + } + } {SQLITE_ROW} + do_test 10.$tn.2.2 { + sql1 { + BEGIN CONCURRENT; + INSERT INTO t2 VALUES(4); + } + code1 { + set res [list] + lappend res [sqlite3_column_int $::stmt 0] + while {[sqlite3_step $::stmt]=="SQLITE_ROW"} { + lappend res [sqlite3_column_int $::stmt 0] + } + sqlite3_finalize $::stmt + set res + } + } {1 2 3 4} + do_test 10.$tn.2.3 { + sql2 { INSERT INTO t1 VALUES(5) } + sql1 COMMIT + } {} + + # More tests surrounding long-lived prepared statements and concurrent + # transactions. + do_test 10.$tn.3.1 { + sql1 { + BEGIN CONCURRENT; + SELECT * FROM t1; + COMMIT; + } + sql1 { + BEGIN CONCURRENT; + INSERT INTO t2 VALUES(5); + } + sql2 { + INSERT INTO t1 VALUES(5); + } + sql1 COMMIT + sql3 { + SELECT * FROM t2; + } + } {1 2 3 4 5} + do_test 10.$tn.3.2 { + sql1 { + BEGIN CONCURRENT; + SELECT * FROM t1; + ROLLBACK; + } + sql1 { + BEGIN CONCURRENT; + INSERT INTO t2 VALUES(6); + } + sql2 { + INSERT INTO t1 VALUES(6); + } + sql1 COMMIT + sql3 { SELECT * FROM t2 } + } {1 2 3 4 5 6} + do_test 10.$tn.3.3 { + sql1 { BEGIN CONCURRENT } + code1 { + set ::stmt [sqlite3_prepare db "SELECT * FROM t1" -1 dummy] + sqlite3_step $::stmt + } + sql1 { + INSERT INTO t2 VALUES(7); + SELECT * FROM t3; + ROLLBACK; + BEGIN CONCURRENT; + } + sql2 { INSERT INTO t3 VALUES(5) } + code1 { sqlite3_finalize $::stmt } + sql1 { + INSERT INTO t2 VALUES(8); + COMMIT; + } + } {} +} + +do_multiclient_test tn { + do_test 11.$tn.1 { + sql1 { + PRAGMA journal_mode = wal; + CREATE TABLE t1(a); + } + } {wal} + + do_test 11.$tn.2 { + code1 { sqlite3_wal_info db main } + } {0 2} + + do_test 11.$tn.3 { + sql1 { INSERT INTO t1 VALUES(1) } + code1 { sqlite3_wal_info db main } + } {2 3} + + do_test 11.$tn.4 { + sql2 { INSERT INTO t1 VALUES(2) } + code2 { sqlite3_wal_info db2 main } + } {3 4} + + do_test 11.$tn.5 { + sql1 { PRAGMA wal_checkpoint } + sql2 { INSERT INTO t1 VALUES(3) } + code2 { sqlite3_wal_info db2 main } + } {0 1} +} + +reset_db +do_execsql_test 12.0 { + PRAGMA journal_mode = wal; + CREATE TABLE tx(a INTEGER PRIMARY KEY, b); +} {wal} +do_test 12.1 { + for {set i 0} {$i < 50} {incr i} { + execsql { + BEGIN CONCURRENT; + INSERT INTO tx(b) VALUES( randomblob( 1200 ) ); + COMMIT; + } + } + execsql { PRAGMA page_size } +} {1024} +do_execsql_test 12.2 { + DELETE FROM tx; +} +do_test 12.3 { + for {set i 0} {$i < 50} {incr i} { + execsql { + BEGIN CONCURRENT; + INSERT INTO tx(b) VALUES( randomblob( 1200 ) ); + COMMIT; + } + } + execsql { PRAGMA page_size } +} {1024} +do_execsql_test 12.4 { + DELETE FROM tx; +} +do_test 12.5 { + execsql { BEGIN CONCURRENT } + for {set i 0} {$i < 5000} {incr i} { + execsql { + INSERT INTO tx(b) VALUES( randomblob( 1200 ) ); + } + } + execsql { COMMIT } + execsql { PRAGMA page_size } +} {1024} + + +finish_test diff --git a/test/concurrent3.test b/test/concurrent3.test new file mode 100644 index 0000000000..8ca8a9887a --- /dev/null +++ b/test/concurrent3.test @@ -0,0 +1,234 @@ +# 2015 July 26 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# +# Tests for transactions started with BEGIN CONCURRENT. The tests in this +# file focus on testing that deferred page allocation works properly. +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl +source $testdir/lock_common.tcl +set ::testprefix concurrent3 + +if {$AUTOVACUUM} { finish_test ; return } +ifcapable !concurrent { + finish_test + return +} + +db close +sqlite3_shutdown +test_sqlite3_log xLog +proc xLog {error_code msg} { + # puts "$error_code: $msg" + # Enable the previous for debugging +} +reset_db + +proc create_schema {} { + db eval { + PRAGMA journal_mode = wal; + + CREATE TABLE t1(x, y); + CREATE TABLE t2(x, y); + CREATE TABLE t3(x, y); + CREATE TABLE t4(x, y); + + CREATE INDEX i1 ON t1(y, x); + CREATE INDEX i2 ON t2(y, x); + CREATE INDEX i3 ON t3(y, x); + CREATE INDEX i4 ON t4(y, x); + } +} + +proc do_sql_op {iTbl iOp} { + set db "db$iTbl" + + switch $iOp { + "i" { + set sql " + WITH cnt(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM cnt WHERE i<10) + INSERT INTO t$iTbl SELECT randomblob(800), randomblob(800) FROM cnt; + " + } + + "d" { + set sql " + DELETE FROM t$iTbl WHERE rowid IN ( + SELECT rowid FROM t$iTbl ORDER BY 1 ASC LIMIT 10 + ) + " + } + + "D" { + set sql " + DELETE FROM t$iTbl WHERE rowid IN ( + SELECT rowid FROM t$iTbl o WHERE ( + SELECT count(*) FROM t$iTbl i WHERE i.rowid$nPg + } {1} + + do_test 1.$tn.7 { + sql2 { PRAGMA integrity_check } + } {ok} + + do_test 1.$tn.8 { + sql1 { + BEGIN CONCURRENT; + CREATE TABLE t4(a, b); + } + sql2 { + INSERT INTO t1 VALUES(2, 2); + } + list [catch { sql1 COMMIT } msg] $msg + } {1 {database is locked}} + sql1 ROLLBACK + + do_test 1.$tn.9 { + sql1 { + BEGIN CONCURRENT; + CREATE TEMP TABLE t5(a, b); + INSERT INTO t2 VALUES('x', 'x'); + } + sql2 { + INSERT INTO t1 VALUES(3, 3); + CREATE TEMP TABLE t1(x, y); + } + sql1 COMMIT + } {} +} + + + +finish_test + + diff --git a/test/concurrent9.test b/test/concurrent9.test new file mode 100644 index 0000000000..2eb18c8623 --- /dev/null +++ b/test/concurrent9.test @@ -0,0 +1,152 @@ +# 2023 January 12 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#************************************************************************* +# This file implements regression tests for SQLite library. +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl +set testprefix concurrent9 + +do_execsql_test 1.0 { + CREATE TABLE t1(x); + INSERT INTO t1 VALUES(1), (2); + CREATE TABLE t2(y); + INSERT INTO t2 VALUES('a'), ('b'); + PRAGMA journal_mode = wal; +} {wal} + +db close + +#------------------------------------------------------------------------- +# Fix a problem that may occur if a BEGIN CONCURRENT transaction is +# started when the wal file is completely empty and committed after +# it has been initialized by some other connection. +# +sqlite3 db test.db +sqlite3 db2 test.db + +do_execsql_test -db db 1.1 { + BEGIN CONCURRENT; + INSERT INTO t2 VALUES('c'); +} + +do_execsql_test -db db2 1.2 { + INSERT INTO t1 VALUES(3); +} + +do_execsql_test -db db 1.3 { + COMMIT; +} + +do_execsql_test -db db2 1.4 { + SELECT * FROM t1; + SELECT * FROM t2; +} {1 2 3 a b c} + +db2 close + +#------------------------------------------------------------------------- +reset_db + +do_execsql_test 2.1 { + CREATE TABLE t1(a INTEGER PRIMARY KEY, b); + PRAGMA journal_mode = wal; + WITH s(i) AS ( + SELECT 1 UNION SELECT i+1 FROM s WHERE i<500 + ) + INSERT INTO t1(b) SELECT hex(randomblob(200)) FROM s; + PRAGMA page_count; +} {wal 255} + +sqlite3 db2 test.db +do_execsql_test -db db2 2.2 { + DELETE FROM t1 WHERE a<100; + PRAGMA freelist_count; +} {49} + +do_execsql_test 2.3 { + BEGIN CONCURRENT; + WITH s(i) AS ( + SELECT 1 UNION SELECT i+1 FROM s WHERE i<100 + ) + INSERT INTO t1(b) SELECT hex(randomblob(200)) FROM s; +} + +sqlite3_db_status db CACHE_MISS 1 +do_execsql_test 2.4.1 { + COMMIT; +} + +do_test 2.4.2 { + lindex [sqlite3_db_status db CACHE_MISS 0] 1 +} {1} + +do_execsql_test -db db2 2.5 { + DELETE FROM t1 WHERE a<200; + PRAGMA freelist_count; +} {50} + +do_execsql_test 2.6 { + BEGIN CONCURRENT; + WITH s(i) AS ( + SELECT 1 UNION SELECT i+1 FROM s WHERE i<100 + ) + INSERT INTO t1(b) SELECT hex(randomblob(200)) FROM s; + DELETE FROM t1 WHERE rowid BETWEEN 600 AND 680; +} + +sqlite3_db_status db CACHE_MISS 1 +do_execsql_test 2.7.1 { + COMMIT; +} +do_test 2.7.2 { + lindex [sqlite3_db_status db CACHE_MISS 0] 1 +} {1} + +do_execsql_test 2.8 { + PRAGMA integrity_check; +} {ok} + +db2 close + +#------------------------------------------------------------------------- +reset_db +do_execsql_test 3.0 { + PRAGMA journal_mode = wal2; + CREATE TABLE t1(a, b); + INSERT INTO t1 VALUES(1, 2); +} {wal2} + +sqlite3 db2 test.db + +do_execsql_test -db db2 3.1 { + BEGIN CONCURRENT; + UPDATE t1 SET b=3; +} + +do_execsql_test 3.2 { + INSERT INTO t1 VALUES(3, 4); +} + +do_test 3.3 { + list [catch { db2 eval COMMIT } msg] $msg +} {1 {database is locked}} + +do_test 3.4 { + list [catch { db2 eval COMMIT } msg] $msg +} {1 {database is locked}} + +db2 close + + +finish_test + + diff --git a/test/corruptA.test b/test/corruptA.test index 12d918615f..653b88b693 100644 --- a/test/corruptA.test +++ b/test/corruptA.test @@ -47,7 +47,7 @@ db close forcecopy test.db test.db-template set unreadable_version 02 -ifcapable wal { set unreadable_version 03 } +ifcapable wal { set unreadable_version 04 } do_test corruptA-2.1 { forcecopy test.db-template test.db hexio_write test.db 19 $unreadable_version ;# the read format number diff --git a/test/corruptN.test b/test/corruptN.test index 2297991aba..096940af13 100644 --- a/test/corruptN.test +++ b/test/corruptN.test @@ -141,7 +141,6 @@ do_test 2.0 { | end c-b92b.txt.db }]} {} - reset_db if {![info exists ::G(perm:presql)]} { do_execsql_test 3.0 { diff --git a/test/fts3corrupt4.test b/test/fts3corrupt4.test index 01effa0850..679df6e2e3 100644 --- a/test/fts3corrupt4.test +++ b/test/fts3corrupt4.test @@ -4376,6 +4376,9 @@ do_test 25.0 { | end crash-dde9e76ed8ab2d.db }]} {} +proc rndblob {n} { return [sqlite3_randomness $n] } +db func randomblob rndblob + reset_prng_state do_catchsql_test 25.1 { diff --git a/test/parser1.test b/test/parser1.test deleted file mode 100644 index b8d3d8b420..0000000000 --- a/test/parser1.test +++ /dev/null @@ -1,125 +0,0 @@ -# 2014-08-24 -# -# The author disclaims copyright to this source code. In place of -# a legal notice, here is a blessing: -# -# May you do good and not evil. -# May you find forgiveness for yourself and forgive others. -# May you share freely, never taking more than you give. -# -#*********************************************************************** -# This file implements regression tests for SQLite library. -# The focus of this script is testing details of the SQL language parser. -# - -set testdir [file dirname $argv0] -source $testdir/tester.tcl - -do_catchsql_test parser1-1.1 { - CREATE TABLE t1( - a TEXT PRIMARY KEY, - b TEXT, - FOREIGN KEY(b COLLATE nocase DESC) REFERENCES t1(a COLLATE binary ASC) - ); -} {1 {syntax error after column name "b"}} - - -# Verify that a legacy schema in the sqlite_master file is allowed to have -# COLLATE, ASC, and DESC keywords on the id list of a FK constraint, and that -# those keywords are silently ignored. -# -sqlite3_db_config db DEFENSIVE 0 -do_execsql_test parser1-1.2 { - CREATE TABLE t1( - a TEXT PRIMARY KEY, - b TEXT, - FOREIGN KEY(b) REFERENCES t1(a) - ); - INSERT INTO t1 VALUES('abc',NULL),('xyz','abc'); - PRAGMA writable_schema=on; - UPDATE sqlite_master SET sql='CREATE TABLE t1( - a TEXT PRIMARY KEY, - b TEXT, - FOREIGN KEY(b COLLATE nocase) REFERENCES t1(a) - )' WHERE name='t1'; - SELECT name FROM sqlite_master WHERE sql LIKE '%collate%'; -} {t1} -sqlite3 db2 test.db -do_test parser1-1.3 { - sqlite3 db2 test.db - db2 eval {SELECT * FROM t1 ORDER BY 1} -} {abc {} xyz abc} -db2 close - -do_execsql_test parser1-1.4 { - UPDATE sqlite_master SET sql='CREATE TABLE t1( - a TEXT PRIMARY KEY, - b TEXT, - FOREIGN KEY(b ASC) REFERENCES t1(a) - )' WHERE name='t1'; - SELECT name FROM sqlite_master WHERE sql LIKE '%ASC%'; -} {t1} -sqlite3 db2 test.db -do_test parser1-1.5 { - sqlite3 db2 test.db - db2 eval {SELECT * FROM t1 ORDER BY 1} -} {abc {} xyz abc} -db2 close - -do_catchsql_test parser1-2.1 { - WITH RECURSIVE - c(x COLLATE binary) AS (VALUES(1) UNION SELECT x+1 FROM c WHERE x<5) - SELECT x FROM c; -} {1 {syntax error after column name "x"}} -do_catchsql_test parser1-2.2 { - WITH RECURSIVE - c(x ASC) AS (VALUES(1) UNION SELECT x+1 FROM c WHERE x<5) - SELECT x FROM c; -} {1 {syntax error after column name "x"}} - -# Verify that the comma between multiple table constraints is -# optional. -# -# The missing comma is technically a syntax error. But we have to support -# it because there might be legacy databases that omit the commas in their -# sqlite_master tables. -# -do_execsql_test parser1-3.1 { - CREATE TABLE t300(id INTEGER PRIMARY KEY); - CREATE TABLE t301( - id INTEGER PRIMARY KEY, - c1 INTEGER NOT NULL, - c2 INTEGER NOT NULL, - c3 BOOLEAN NOT NULL DEFAULT 0, - FOREIGN KEY(c1) REFERENCES t300(id) ON DELETE CASCADE ON UPDATE RESTRICT - /* no comma */ - FOREIGN KEY(c2) REFERENCES t300(id) ON DELETE CASCADE ON UPDATE RESTRICT - /* no comma */ - UNIQUE(c1, c2) - ); - PRAGMA foreign_key_list(t301); -} {0 0 t300 c2 id RESTRICT CASCADE NONE 1 0 t300 c1 id RESTRICT CASCADE NONE} - -# 2025-07-01 https://sqlite.org/forum/forumpost/f4878de3e7dd4764 -# Do not allow parse-time optimizations to omit aggregate functions, -# because doing so can change the meaning of the query. -# -unset -nocomplain zero -set zero [expr {0+0}] -do_execsql_test parser1-4.1 { - DROP TABLE IF EXISTS t1; - CREATE TABLE t1(x); - SELECT max(x) AND $zero FROM t1; -} 0 -do_execsql_test parser1-4.2 { - SELECT max(x) AND 0 FROM t1; -} 0 -do_execsql_test parser1-4.3 { - SELECT max(x) IN () FROM t1; -} 0 -do_execsql_test parser1-4.4 { - SELECT max(x) NOT IN () FROM t1; -} 1 - - -finish_test diff --git a/test/permutations.test b/test/permutations.test index 02f4827189..8e107a46d9 100644 --- a/test/permutations.test +++ b/test/permutations.test @@ -91,7 +91,6 @@ foreach f [glob $testdir/*.test] { lappend alltests [file tail $f] } foreach f [glob -nocomplain \ $testdir/../ext/rtree/*.test \ $testdir/../ext/fts5/test/*.test \ - $testdir/../ext/expert/*.test \ $testdir/../ext/lsm1/test/*.test \ $testdir/../ext/recover/*.test \ $testdir/../ext/rbu/*.test \ @@ -465,18 +464,28 @@ lappend ::testsuitelist xxx test_suite "coverage-wal" -description { Coverage tests for file wal.c. } -files { - wal.test wal2.test wal3.test wal4.test wal5.test - wal64k.test wal6.test wal7.test wal8.test wal9.test - walbak.test walbig.test walblock.test walcksum.test walcrash2.test - walcrash3.test walcrash4.test walcrash.test walfault.test walhook.test - walmode.test walnoshm.test waloverwrite.test walpersist.test - walprotocol2.test walprotocol.test walro2.test walrofault.test - walro.test walshared.test walslow.test walvfs.test - walfault2.test - nockpt.test - +wal2big.test wal2recover.test wal2rewrite.test +wal2simple.test wal2snapshot.test wal2.test +wal3.test wal4.test wal5.test +wal64k.test wal6.test wal7.test wal8.test wal9.test +walbak.test walbig.test walblock.test walcksum.test +walfault.test walhook.test walmode.test walnoshm.test +waloverwrite.test walpersist.test walprotocol2.test +walprotocol.test walro2.test walrofault.test walro.test +walshared.test walslow.test wal.test +wal2savepoint.test wal2lock.test wal2recover2.test + + wal2concurrent.test + concurrent.test concurrent2.test concurrent3.test + concurrent4.test concurrent5.test concurrent6.test + concurrent7.test + concfault.test concfault2.test + + walvfs.test walfault2.test nockpt.test snapshot2.test snapshot3.test snapshot4.test snapshot_fault.test snapshot.test snapshot_up.test + walcrash2.test walcrash3.test walcrash4.test walcrash.test + wal2fault.test } test_suite "coverage-pager" -description { @@ -650,6 +659,16 @@ test_suite "onefile" -description { rollback.test select1.test select2.test select3.test } +# Run some tests using the "unix-excl" VFS. +# +test_suite "unix-excl" -description { + Run some tests using the "unix-excl" VFS +} -initialize { + set ::G(perm:sqlite3_args) [list -vfs unix-excl] +} -files { + shmlock.test +} + # Run some tests using UTF-16 databases. # test_suite "utf16" -description { @@ -1033,6 +1052,23 @@ test_suite "wal" -description { fts3c.test fts3d.test fts3e.test fts3query.test } +test_suite "wal2" -description { + Run tests with journal_mode=WAL2 +} -initialize { + set ::G(savepoint6_iterations) 100 +} -shutdown { + unset -nocomplain ::G(savepoint6_iterations) +} -files { + savepoint.test savepoint2.test savepoint6.test + trans.test avtrans.test + + fts3aa.test fts3ab.test fts3ac.test fts3ad.test + fts3ae.test fts3af.test fts3ag.test fts3ah.test + fts3ai.test fts3aj.test fts3ak.test fts3al.test + fts3am.test fts3an.test fts3ao.test fts3b.test + fts3c.test fts3d.test fts3e.test fts3query.test +} + test_suite "rtree" -description { All R-tree related tests. Provides coverage of source file rtree.c. } -files [glob -nocomplain $::testdir/../ext/rtree/*.test] diff --git a/test/rdonly.test b/test/rdonly.test index 404c613b21..bdc70cdd70 100644 --- a/test/rdonly.test +++ b/test/rdonly.test @@ -41,7 +41,7 @@ do_test rdonly-1.1.1 { sqlite3_db_readonly db main } {0} -# Changes the write version from 1 to 3. Verify that the database +# Changes the write version from 1 to 4. Verify that the database # can be read but not written. # do_test rdonly-1.2 { @@ -49,7 +49,7 @@ do_test rdonly-1.2 { hexio_get_int [hexio_read test.db 18 1] } 1 do_test rdonly-1.3 { - hexio_write test.db 18 03 + hexio_write test.db 18 04 sqlite3 db test.db execsql { SELECT * FROM t1; @@ -83,7 +83,7 @@ do_test rdonly-1.5 { # the database is read-only until after it is locked. # set ro_version 02 -ifcapable wal { set ro_version 03 } +ifcapable wal { set ro_version 04 } do_test rdonly-1.6 { hexio_write test.db 18 $ro_version ; # write-version hexio_write test.db 24 11223344 ; # change-counter diff --git a/test/savepoint.test b/test/savepoint.test index 3952981dab..048771659e 100644 --- a/test/savepoint.test +++ b/test/savepoint.test @@ -30,6 +30,7 @@ do_test savepoint-1.1 { RELEASE sp1; } } {} +wal_check_journal_mode savepoint-1.1 do_test savepoint-1.2 { execsql { SAVEPOINT sp1; @@ -807,7 +808,8 @@ do_test savepoint-11.6 { integrity_check savepoint-11.7 do_test savepoint-11.8 { execsql { ROLLBACK } - execsql { PRAGMA wal_checkpoint } + db close + sqlite3 db test.db file size test.db } {8192} diff --git a/test/superlock.test b/test/superlock.test index 10e7caa298..281083f7e7 100644 --- a/test/superlock.test +++ b/test/superlock.test @@ -14,6 +14,14 @@ set testdir [file dirname $argv0] source $testdir/tester.tcl source $testdir/lock_common.tcl +# The superlock demo does not work with SQLITE_ENABLE_WAL_BIGHASH, as it +# assumes the *-shm page-size is 32KB. +# +ifcapable wal_bighash { + finish_test + return +} + set testprefix superlock do_not_use_codec diff --git a/test/tester.tcl b/test/tester.tcl index 856df54215..bddbe26c0c 100644 --- a/test/tester.tcl +++ b/test/tester.tcl @@ -553,6 +553,7 @@ proc reset_db {} { forcedelete test.db forcedelete test.db-journal forcedelete test.db-wal + forcedelete test.db-wal2 sqlite3 db ./test.db set ::DB [sqlite3_connection_pointer db] if {[info exists ::SETUP_SQL]} { @@ -2306,17 +2307,32 @@ proc drop_all_indexes {{db db}} { # Returns true if this test should be run in WAL mode. False otherwise. # proc wal_is_wal_mode {} { - expr {[permutation] eq "wal"} + if {[permutation] eq "wal"} { return 1 } + if {[permutation] eq "wal2"} { return 2 } + return 0 } proc wal_set_journal_mode {{db db}} { - if { [wal_is_wal_mode] } { - $db eval "PRAGMA journal_mode = WAL" + switch -- [wal_is_wal_mode] { + 0 { + } + + 1 { + $db eval "PRAGMA journal_mode = WAL" + } + + 2 { + $db eval "PRAGMA journal_mode = WAL2" + } } } proc wal_check_journal_mode {testname {db db}} { if { [wal_is_wal_mode] } { $db eval { SELECT * FROM sqlite_master } - do_test $testname [list $db eval "PRAGMA main.journal_mode"] {wal} + set expected "wal" + if {[wal_is_wal_mode]==2} { + set expected "wal2" + } + do_test $testname [list $db eval "PRAGMA main.journal_mode"] $expected } } diff --git a/test/threadtest3.c b/test/threadtest3.c index 3a12c5889b..da8effc67d 100644 --- a/test/threadtest3.c +++ b/test/threadtest3.c @@ -1494,6 +1494,8 @@ static void dynamic_triggers(int nMs){ #include "tt3_stress.c" #include "tt3_shared.c" +#include "tt3_bcwal2.c" + int main(int argc, char **argv){ struct ThreadTest { void (*xTest)(int); /* Routine for running this test */ @@ -1518,6 +1520,8 @@ int main(int argc, char **argv){ { stress1, "stress1", 10000 }, { stress2, "stress2", 60000 }, { shared1, "shared1", 10000 }, + + { bcwal2_1, "bcwal2_1", 100000 }, }; static char *substArgv[] = { 0, "*", 0 }; int i, iArg; diff --git a/test/tt3_bcwal2.c b/test/tt3_bcwal2.c new file mode 100644 index 0000000000..d0e3fab4d4 --- /dev/null +++ b/test/tt3_bcwal2.c @@ -0,0 +1,122 @@ +/* +** 2011-02-02 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** This file is part of the test program "threadtest3". Despite being a C +** file it is not compiled separately, but included by threadtest3.c using +** the #include directive normally used with header files. +** +** This file contains the implementation of test cases: +** +** bcwal2_1 +*/ + +static char *bcwal2_1_checkpointer(int iTid, void *pArg){ + Error err = {0}; /* Error code and message */ + Sqlite db = {0}; /* SQLite database connection */ + int nIter = 0; + + opendb(&err, &db, "test.db", 0); + while( !timetostop(&err) ){ + sql_script(&err, &db, "PRAGMA wal_checkpoint;"); + nIter++; + } + closedb(&err, &db); + + print_and_free_err(&err); + return sqlite3_mprintf("%d iterations", nIter); +} + +static char *bcwal2_1_integrity(int iTid, void *pArg){ + Error err = {0}; /* Error code and message */ + Sqlite db = {0}; /* SQLite database connection */ + int nIter = 0; + + opendb(&err, &db, "test.db", 0); + while( !timetostop(&err) ){ + // integrity_check(&err, &db); + sql_script(&err, &db, "SELECT * FROM t1;"); + nIter++; + } + closedb(&err, &db); + + print_and_free_err(&err); + return sqlite3_mprintf("%d integrity-checks", nIter); +} + +static char *bcwal2_1_writer(int iTid, void *pArg){ + Error err = {0}; /* Error code and message */ + Sqlite db = {0}; /* SQLite database connection */ + int nWrite = 0; /* Writes so far */ + int nBusy = 0; /* Busy errors so far */ + sqlite3_mutex *pMutex = sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_APP1); + + opendb(&err, &db, "test.db", 0); + while( !timetostop(&err) ){ + + sql_script(&err, &db, + "PRAGMA wal_autocheckpoint = 0;" + "BEGIN CONCURRENT;" + " REPLACE INTO t1 VALUES( abs(random() % 100000), " + " hex(randomblob( abs( random() % 200 ) + 50 ))" + " );" + ); + + if( err.rc==SQLITE_OK ){ + sqlite3_mutex_enter(pMutex); + sql_script(&err, &db, "COMMIT"); + sqlite3_mutex_leave(pMutex); + if( err.rc==SQLITE_OK ){ + nWrite++; + }else{ + clear_error(&err, SQLITE_BUSY); + sql_script(&err, &db, "ROLLBACK"); + nBusy++; + } + + assert( err.rc!=SQLITE_OK || sqlite3_get_autocommit(db.db)==1 ); + } + } + closedb(&err, &db); + + print_and_free_err(&err); + return sqlite3_mprintf("%d successful writes, %d busy", nWrite, nBusy); +} + +static void bcwal2_1(int nMs){ + Error err = {0}; + Sqlite db = {0}; + Threadset threads = {0}; + + opendb(&err, &db, "test.db", 1); + sql_script(&err, &db, + "PRAGMA page_size = 1024;" + "PRAGMA journal_mode = wal2;" + "CREATE TABLE t1(ii INTEGER PRIMARY KEY, tt TEXT);" + "CREATE INDEX t1tt ON t1(tt);" + ); + + setstoptime(&err, nMs); + + launch_thread(&err, &threads, bcwal2_1_writer, 0); + launch_thread(&err, &threads, bcwal2_1_writer, 0); + launch_thread(&err, &threads, bcwal2_1_writer, 0); + launch_thread(&err, &threads, bcwal2_1_integrity, 0); + launch_thread(&err, &threads, bcwal2_1_checkpointer, 0); + + join_all_threads(&err, &threads); + + /* Do a final integrity-check on the db */ + integrity_check(&err, &db); + closedb(&err, &db); + + print_and_free_err(&err); +} + diff --git a/test/tt3_core.c b/test/tt3_core.c new file mode 100644 index 0000000000..31612e9cf6 --- /dev/null +++ b/test/tt3_core.c @@ -0,0 +1,1035 @@ +/* +** 2016-05-07 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +*/ + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* +** The "Set Error Line" macro. +*/ +#define SEL(e) ((e)->iLine = ((e)->rc ? (e)->iLine : __LINE__)) + +/* Database functions */ +#define opendb(w,x,y,z) (SEL(w), opendb_x(w,x,y,z)) +#define closedb(y,z) (SEL(y), closedb_x(y,z)) + +/* Functions to execute SQL */ +#define sql_script(x,y,z) (SEL(x), sql_script_x(x,y,z)) +#define integrity_check(x,y) (SEL(x), integrity_check_x(x,y)) +#define execsql_i64(x,y,...) (SEL(x), execsql_i64_x(x,y,__VA_ARGS__)) +#define execsql_text(x,y,z,...) (SEL(x), execsql_text_x(x,y,z,__VA_ARGS__)) +#define execsql(x,y,...) (SEL(x), (void)execsql_i64_x(x,y,__VA_ARGS__)) +#define sql_script_printf(x,y,z,...) ( \ + SEL(x), sql_script_printf_x(x,y,z,__VA_ARGS__) \ +) + +/* Thread functions */ +#define launch_thread(w,x,y,z) (SEL(w), launch_thread_x(w,x,y,z)) +#define join_all_threads(y,z) (SEL(y), join_all_threads_x(y,z)) + +/* Timer functions */ +#define setstoptime(y,z) (SEL(y), setstoptime_x(y,z)) +#define timetostop(z) (SEL(z), timetostop_x(z)) + +/* Report/clear errors. */ +#define test_error(z, ...) test_error_x(z, sqlite3_mprintf(__VA_ARGS__)) +#define clear_error(y,z) clear_error_x(y, z) + +/* File-system operations */ +#define filesize(y,z) (SEL(y), filesize_x(y,z)) +#define filecopy(x,y,z) (SEL(x), filecopy_x(x,y,z)) + +#define PTR2INT(x) ((int)((intptr_t)x)) +#define INT2PTR(x) ((void*)((intptr_t)x)) + +/* +** End of test code/infrastructure interface macros. +*************************************************************************/ + + +/************************************************************************ +** Start of command line processing utilities. +*/ +#define CMDLINE_INT 1 +#define CMDLINE_BOOL 2 +#define CMDLINE_STRING 3 + +typedef struct CmdlineArg CmdlineArg; +struct CmdlineArg { + const char *zSwitch; + int eType; + int iOffset; +}; + +static void cmdline_error(const char *zFmt, ...){ + va_list ap; /* ... arguments */ + char *zMsg = 0; + va_start(ap, zFmt); + zMsg = sqlite3_vmprintf(zFmt, ap); + fprintf(stderr, "%s\n", zMsg); + sqlite3_free(zMsg); + va_end(ap); + exit(-1); +} + +static void cmdline_usage(const char *zPrg, CmdlineArg *apArg){ + int i; + fprintf(stderr, "Usage: %s SWITCHES\n", zPrg); + fprintf(stderr, "\n"); + fprintf(stderr, "where switches are\n"); + for(i=0; apArg[i].zSwitch; i++){ + const char *zExtra = ""; + switch( apArg[i].eType ){ + case CMDLINE_STRING: zExtra = "STRING"; break; + case CMDLINE_INT: zExtra = "N"; break; + case CMDLINE_BOOL: zExtra = ""; break; + default: + zExtra = "???"; + break; + } + fprintf(stderr, " %s %s\n", apArg[i].zSwitch, zExtra); + } + fprintf(stderr, "\n"); + exit(-2); +} + +static char *cmdline_construct(CmdlineArg *apArg, void *pObj){ + unsigned char *p = (unsigned char*)pObj; + char *zRet = 0; + int iArg; + + for(iArg=0; apArg[iArg].zSwitch; iArg++){ + const char *zSpace = (zRet ? " " : ""); + CmdlineArg *pArg = &apArg[iArg]; + + switch( pArg->eType ){ + case CMDLINE_STRING: { + char *zVal = *(char**)(p + pArg->iOffset); + if( zVal ){ + zRet = sqlite3_mprintf("%z%s%s %s", zRet, zSpace, pArg->zSwitch,zVal); + } + break; + }; + + case CMDLINE_INT: { + zRet = sqlite3_mprintf("%z%s%s %d", zRet, zSpace, pArg->zSwitch, + *(int*)(p + pArg->iOffset) + ); + break; + }; + + case CMDLINE_BOOL: + if( *(int*)(p + pArg->iOffset) ){ + zRet = sqlite3_mprintf("%z%s%s", zRet, zSpace, pArg->zSwitch); + } + break; + + default: + zRet = sqlite3_mprintf("%z%s%s ???", zRet, zSpace, pArg->zSwitch); + } + } + + return zRet; +} + +static void cmdline_process( + CmdlineArg *apArg, + int argc, + const char **argv, + void *pObj +){ + int i; + int iArg; + unsigned char *p = (unsigned char*)pObj; + + for(i=1; i=0 ){ + cmdline_error("ambiguous switch: %s", z); + } + iOpt = iArg; + switch( apArg[iArg].eType ){ + case CMDLINE_INT: + i++; + if( i==argc ){ + cmdline_error("option requires an argument: %s", z); + } + *(int*)(p + apArg[iArg].iOffset) = atoi(argv[i]); + break; + + case CMDLINE_STRING: + i++; + if( i==argc ){ + cmdline_error("option requires an argument: %s", z); + } + *(char**)(p + apArg[iArg].iOffset) = sqlite3_mprintf("%s", argv[i]); + break; + + case CMDLINE_BOOL: + *(int*)(p + apArg[iArg].iOffset) = 1; + break; + + default: + assert( 0 ); + cmdline_error("internal error"); + return; + } + } + } + + if( iOpt<0 ){ + cmdline_usage(argv[0], apArg); + } + } +} + +/* +** End of command line processing utilities. +*************************************************************************/ + + +/* + * This code implements the MD5 message-digest algorithm. + * The algorithm is due to Ron Rivest. This code was + * written by Colin Plumb in 1993, no copyright is claimed. + * This code is in the public domain; do with it what you wish. + * + * Equivalent code is available from RSA Data Security, Inc. + * This code has been tested against that, and is equivalent, + * except that you don't need to include two pages of legalese + * with every copy. + * + * To compute the message digest of a chunk of bytes, declare an + * MD5Context structure, pass it to MD5Init, call MD5Update as + * needed on buffers full of bytes, and then call MD5Final, which + * will fill a supplied 16-byte array with the digest. + */ + +/* + * If compiled on a machine that doesn't have a 32-bit integer, + * you just set "uint32" to the appropriate datatype for an + * unsigned 32-bit integer. For example: + * + * cc -Duint32='unsigned long' md5.c + * + */ +#ifndef uint32 +# define uint32 unsigned int +#endif + +struct MD5Context { + int isInit; + uint32 buf[4]; + uint32 bits[2]; + union { + unsigned char in[64]; + uint32 in32[16]; + } u; +}; +typedef struct MD5Context MD5Context; + +/* + * Note: this code is harmless on little-endian machines. + */ +static void byteReverse (unsigned char *buf, unsigned longs){ + uint32 t; + do { + t = (uint32)((unsigned)buf[3]<<8 | buf[2]) << 16 | + ((unsigned)buf[1]<<8 | buf[0]); + *(uint32 *)buf = t; + buf += 4; + } while (--longs); +} +/* The four core functions - F1 is optimized somewhat */ + +/* #define F1(x, y, z) (x & y | ~x & z) */ +#define F1(x, y, z) (z ^ (x & (y ^ z))) +#define F2(x, y, z) F1(z, x, y) +#define F3(x, y, z) (x ^ y ^ z) +#define F4(x, y, z) (y ^ (x | ~z)) + +/* This is the central step in the MD5 algorithm. */ +#define MD5STEP(f, w, x, y, z, data, s) \ + ( w += f(x, y, z) + data, w = w<>(32-s), w += x ) + +/* + * The core of the MD5 algorithm, this alters an existing MD5 hash to + * reflect the addition of 16 longwords of new data. MD5Update blocks + * the data and converts bytes into longwords for this routine. + */ +static void MD5Transform(uint32 buf[4], const uint32 in[16]){ + register uint32 a, b, c, d; + + a = buf[0]; + b = buf[1]; + c = buf[2]; + d = buf[3]; + + MD5STEP(F1, a, b, c, d, in[ 0]+0xd76aa478, 7); + MD5STEP(F1, d, a, b, c, in[ 1]+0xe8c7b756, 12); + MD5STEP(F1, c, d, a, b, in[ 2]+0x242070db, 17); + MD5STEP(F1, b, c, d, a, in[ 3]+0xc1bdceee, 22); + MD5STEP(F1, a, b, c, d, in[ 4]+0xf57c0faf, 7); + MD5STEP(F1, d, a, b, c, in[ 5]+0x4787c62a, 12); + MD5STEP(F1, c, d, a, b, in[ 6]+0xa8304613, 17); + MD5STEP(F1, b, c, d, a, in[ 7]+0xfd469501, 22); + MD5STEP(F1, a, b, c, d, in[ 8]+0x698098d8, 7); + MD5STEP(F1, d, a, b, c, in[ 9]+0x8b44f7af, 12); + MD5STEP(F1, c, d, a, b, in[10]+0xffff5bb1, 17); + MD5STEP(F1, b, c, d, a, in[11]+0x895cd7be, 22); + MD5STEP(F1, a, b, c, d, in[12]+0x6b901122, 7); + MD5STEP(F1, d, a, b, c, in[13]+0xfd987193, 12); + MD5STEP(F1, c, d, a, b, in[14]+0xa679438e, 17); + MD5STEP(F1, b, c, d, a, in[15]+0x49b40821, 22); + + MD5STEP(F2, a, b, c, d, in[ 1]+0xf61e2562, 5); + MD5STEP(F2, d, a, b, c, in[ 6]+0xc040b340, 9); + MD5STEP(F2, c, d, a, b, in[11]+0x265e5a51, 14); + MD5STEP(F2, b, c, d, a, in[ 0]+0xe9b6c7aa, 20); + MD5STEP(F2, a, b, c, d, in[ 5]+0xd62f105d, 5); + MD5STEP(F2, d, a, b, c, in[10]+0x02441453, 9); + MD5STEP(F2, c, d, a, b, in[15]+0xd8a1e681, 14); + MD5STEP(F2, b, c, d, a, in[ 4]+0xe7d3fbc8, 20); + MD5STEP(F2, a, b, c, d, in[ 9]+0x21e1cde6, 5); + MD5STEP(F2, d, a, b, c, in[14]+0xc33707d6, 9); + MD5STEP(F2, c, d, a, b, in[ 3]+0xf4d50d87, 14); + MD5STEP(F2, b, c, d, a, in[ 8]+0x455a14ed, 20); + MD5STEP(F2, a, b, c, d, in[13]+0xa9e3e905, 5); + MD5STEP(F2, d, a, b, c, in[ 2]+0xfcefa3f8, 9); + MD5STEP(F2, c, d, a, b, in[ 7]+0x676f02d9, 14); + MD5STEP(F2, b, c, d, a, in[12]+0x8d2a4c8a, 20); + + MD5STEP(F3, a, b, c, d, in[ 5]+0xfffa3942, 4); + MD5STEP(F3, d, a, b, c, in[ 8]+0x8771f681, 11); + MD5STEP(F3, c, d, a, b, in[11]+0x6d9d6122, 16); + MD5STEP(F3, b, c, d, a, in[14]+0xfde5380c, 23); + MD5STEP(F3, a, b, c, d, in[ 1]+0xa4beea44, 4); + MD5STEP(F3, d, a, b, c, in[ 4]+0x4bdecfa9, 11); + MD5STEP(F3, c, d, a, b, in[ 7]+0xf6bb4b60, 16); + MD5STEP(F3, b, c, d, a, in[10]+0xbebfbc70, 23); + MD5STEP(F3, a, b, c, d, in[13]+0x289b7ec6, 4); + MD5STEP(F3, d, a, b, c, in[ 0]+0xeaa127fa, 11); + MD5STEP(F3, c, d, a, b, in[ 3]+0xd4ef3085, 16); + MD5STEP(F3, b, c, d, a, in[ 6]+0x04881d05, 23); + MD5STEP(F3, a, b, c, d, in[ 9]+0xd9d4d039, 4); + MD5STEP(F3, d, a, b, c, in[12]+0xe6db99e5, 11); + MD5STEP(F3, c, d, a, b, in[15]+0x1fa27cf8, 16); + MD5STEP(F3, b, c, d, a, in[ 2]+0xc4ac5665, 23); + + MD5STEP(F4, a, b, c, d, in[ 0]+0xf4292244, 6); + MD5STEP(F4, d, a, b, c, in[ 7]+0x432aff97, 10); + MD5STEP(F4, c, d, a, b, in[14]+0xab9423a7, 15); + MD5STEP(F4, b, c, d, a, in[ 5]+0xfc93a039, 21); + MD5STEP(F4, a, b, c, d, in[12]+0x655b59c3, 6); + MD5STEP(F4, d, a, b, c, in[ 3]+0x8f0ccc92, 10); + MD5STEP(F4, c, d, a, b, in[10]+0xffeff47d, 15); + MD5STEP(F4, b, c, d, a, in[ 1]+0x85845dd1, 21); + MD5STEP(F4, a, b, c, d, in[ 8]+0x6fa87e4f, 6); + MD5STEP(F4, d, a, b, c, in[15]+0xfe2ce6e0, 10); + MD5STEP(F4, c, d, a, b, in[ 6]+0xa3014314, 15); + MD5STEP(F4, b, c, d, a, in[13]+0x4e0811a1, 21); + MD5STEP(F4, a, b, c, d, in[ 4]+0xf7537e82, 6); + MD5STEP(F4, d, a, b, c, in[11]+0xbd3af235, 10); + MD5STEP(F4, c, d, a, b, in[ 2]+0x2ad7d2bb, 15); + MD5STEP(F4, b, c, d, a, in[ 9]+0xeb86d391, 21); + + buf[0] += a; + buf[1] += b; + buf[2] += c; + buf[3] += d; +} + +/* + * Start MD5 accumulation. Set bit count to 0 and buffer to mysterious + * initialization constants. + */ +static void MD5Init(MD5Context *ctx){ + ctx->isInit = 1; + ctx->buf[0] = 0x67452301; + ctx->buf[1] = 0xefcdab89; + ctx->buf[2] = 0x98badcfe; + ctx->buf[3] = 0x10325476; + ctx->bits[0] = 0; + ctx->bits[1] = 0; +} + +/* + * Update context to reflect the concatenation of another buffer full + * of bytes. + */ +static +void MD5Update(MD5Context *ctx, const unsigned char *buf, unsigned int len){ + uint32 t; + + /* Update bitcount */ + + t = ctx->bits[0]; + if ((ctx->bits[0] = t + ((uint32)len << 3)) < t) + ctx->bits[1]++; /* Carry from low to high */ + ctx->bits[1] += len >> 29; + + t = (t >> 3) & 0x3f; /* Bytes already in shsInfo->data */ + + /* Handle any leading odd-sized chunks */ + + if ( t ) { + unsigned char *p = (unsigned char *)ctx->u.in + t; + + t = 64-t; + if (len < t) { + memcpy(p, buf, len); + return; + } + memcpy(p, buf, t); + byteReverse(ctx->u.in, 16); + MD5Transform(ctx->buf, (uint32 *)ctx->u.in); + buf += t; + len -= t; + } + + /* Process data in 64-byte chunks */ + + while (len >= 64) { + memcpy(ctx->u.in, buf, 64); + byteReverse(ctx->u.in, 16); + MD5Transform(ctx->buf, (uint32 *)ctx->u.in); + buf += 64; + len -= 64; + } + + /* Handle any remaining bytes of data. */ + + memcpy(ctx->u.in, buf, len); +} + +/* + * Final wrapup - pad to 64-byte boundary with the bit pattern + * 1 0* (64-bit count of bits processed, MSB-first) + */ +static void MD5Final(unsigned char digest[16], MD5Context *ctx){ + unsigned count; + unsigned char *p; + + /* Compute number of bytes mod 64 */ + count = (ctx->bits[0] >> 3) & 0x3F; + + /* Set the first char of padding to 0x80. This is safe since there is + always at least one byte free */ + p = ctx->u.in + count; + *p++ = 0x80; + + /* Bytes of padding needed to make 64 bytes */ + count = 64 - 1 - count; + + /* Pad out to 56 mod 64 */ + if (count < 8) { + /* Two lots of padding: Pad the first block to 64 bytes */ + memset(p, 0, count); + byteReverse(ctx->u.in, 16); + MD5Transform(ctx->buf, (uint32 *)ctx->u.in); + + /* Now fill the next block with 56 bytes */ + memset(ctx->u.in, 0, 56); + } else { + /* Pad block to 56 bytes */ + memset(p, 0, count-8); + } + byteReverse(ctx->u.in, 14); + + /* Append length in bits and transform */ + ctx->u.in32[14] = ctx->bits[0]; + ctx->u.in32[15] = ctx->bits[1]; + + MD5Transform(ctx->buf, (uint32 *)ctx->u.in); + byteReverse((unsigned char *)ctx->buf, 4); + memcpy(digest, ctx->buf, 16); + memset(ctx, 0, sizeof(*ctx)); /* In case it is sensitive */ +} + +/* +** Convert a 128-bit MD5 digest into a 32-digit base-16 number. +*/ +static void MD5DigestToBase16(unsigned char *digest, char *zBuf){ + static char const zEncode[] = "0123456789abcdef"; + int i, j; + + for(j=i=0; i<16; i++){ + int a = digest[i]; + zBuf[j++] = zEncode[(a>>4)&0xf]; + zBuf[j++] = zEncode[a & 0xf]; + } + zBuf[j] = 0; +} + +/* +** During testing, the special md5sum() aggregate function is available. +** inside SQLite. The following routines implement that function. +*/ +static void md5step(sqlite3_context *context, int argc, sqlite3_value **argv){ + MD5Context *p; + int i; + if( argc<1 ) return; + p = sqlite3_aggregate_context(context, sizeof(*p)); + if( p==0 ) return; + if( !p->isInit ){ + MD5Init(p); + } + for(i=0; izErr); + p->zErr = 0; + p->rc = 0; +} + +static void print_err(Error *p){ + if( p->rc!=SQLITE_OK ){ + int isWarn = 0; + if( p->rc==SQLITE_SCHEMA ) isWarn = 1; + if( sqlite3_strglob("* - no such table: *",p->zErr)==0 ) isWarn = 1; + printf("%s: (%d) \"%s\" at line %d\n", isWarn ? "Warning" : "Error", + p->rc, p->zErr, p->iLine); + if( !isWarn ) nGlobalErr++; + fflush(stdout); + } +} + +static void print_and_free_err(Error *p){ + print_err(p); + free_err(p); +} + +static void system_error(Error *pErr, int iSys){ + pErr->rc = iSys; + pErr->zErr = (char *)sqlite3_malloc(512); + strerror_r(iSys, pErr->zErr, 512); + pErr->zErr[511] = '\0'; +} + +static void sqlite_error( + Error *pErr, + Sqlite *pDb, + const char *zFunc +){ + pErr->rc = sqlite3_errcode(pDb->db); + pErr->zErr = sqlite3_mprintf( + "sqlite3_%s() - %s (%d)", zFunc, sqlite3_errmsg(pDb->db), + sqlite3_extended_errcode(pDb->db) + ); +} + +static void test_error_x( + Error *pErr, + char *zErr +){ + if( pErr->rc==SQLITE_OK ){ + pErr->rc = 1; + pErr->zErr = zErr; + }else{ + sqlite3_free(zErr); + } +} + +static void clear_error_x( + Error *pErr, + int rc +){ + if( pErr->rc==rc ){ + pErr->rc = SQLITE_OK; + sqlite3_free(pErr->zErr); + pErr->zErr = 0; + } +} + +static int busyhandler(void *pArg, int n){ + usleep(10*1000); + return 1; +} + +static void opendb_x( + Error *pErr, /* IN/OUT: Error code */ + Sqlite *pDb, /* OUT: Database handle */ + const char *zFile, /* Database file name */ + int bDelete /* True to delete db file before opening */ +){ + if( pErr->rc==SQLITE_OK ){ + int rc; + int flags = SQLITE_OPEN_CREATE | SQLITE_OPEN_READWRITE | SQLITE_OPEN_URI; + if( bDelete ) unlink(zFile); + rc = sqlite3_open_v2(zFile, &pDb->db, flags, 0); + if( rc ){ + sqlite_error(pErr, pDb, "open"); + sqlite3_close(pDb->db); + pDb->db = 0; + }else{ + sqlite3_create_function( + pDb->db, "md5sum", -1, SQLITE_UTF8, 0, 0, md5step, md5finalize + ); + sqlite3_busy_handler(pDb->db, busyhandler, 0); + sqlite3_exec(pDb->db, "PRAGMA synchronous=OFF", 0, 0, 0); + } + } +} + +static void closedb_x( + Error *pErr, /* IN/OUT: Error code */ + Sqlite *pDb /* OUT: Database handle */ +){ + int rc; + int i; + Statement *pIter; + Statement *pNext; + for(pIter=pDb->pCache; pIter; pIter=pNext){ + pNext = pIter->pNext; + sqlite3_finalize(pIter->pStmt); + sqlite3_free(pIter); + } + for(i=0; inText; i++){ + sqlite3_free(pDb->aText[i]); + } + sqlite3_free(pDb->aText); + rc = sqlite3_close(pDb->db); + if( rc && pErr->rc==SQLITE_OK ){ + pErr->zErr = sqlite3_mprintf("%s", sqlite3_errmsg(pDb->db)); + } + memset(pDb, 0, sizeof(Sqlite)); +} + +static void sql_script_x( + Error *pErr, /* IN/OUT: Error code */ + Sqlite *pDb, /* Database handle */ + const char *zSql /* SQL script to execute */ +){ + if( pErr->rc==SQLITE_OK ){ + pErr->rc = sqlite3_exec(pDb->db, zSql, 0, 0, &pErr->zErr); + } +} + +static void sql_script_printf_x( + Error *pErr, /* IN/OUT: Error code */ + Sqlite *pDb, /* Database handle */ + const char *zFormat, /* SQL printf format string */ + ... /* Printf args */ +){ + va_list ap; /* ... printf arguments */ + va_start(ap, zFormat); + if( pErr->rc==SQLITE_OK ){ + char *zSql = sqlite3_vmprintf(zFormat, ap); + pErr->rc = sqlite3_exec(pDb->db, zSql, 0, 0, &pErr->zErr); + sqlite3_free(zSql); + } + va_end(ap); +} + +static Statement *getSqlStatement( + Error *pErr, /* IN/OUT: Error code */ + Sqlite *pDb, /* Database handle */ + const char *zSql /* SQL statement */ +){ + Statement *pRet; + int rc; + + for(pRet=pDb->pCache; pRet; pRet=pRet->pNext){ + if( 0==strcmp(sqlite3_sql(pRet->pStmt), zSql) ){ + return pRet; + } + } + + pRet = sqlite3_malloc(sizeof(Statement)); + rc = sqlite3_prepare_v2(pDb->db, zSql, -1, &pRet->pStmt, 0); + if( rc!=SQLITE_OK ){ + sqlite_error(pErr, pDb, "prepare_v2"); + return 0; + } + assert( 0==strcmp(sqlite3_sql(pRet->pStmt), zSql) ); + + pRet->pNext = pDb->pCache; + pDb->pCache = pRet; + return pRet; +} + +static sqlite3_stmt *getAndBindSqlStatement( + Error *pErr, /* IN/OUT: Error code */ + Sqlite *pDb, /* Database handle */ + va_list ap /* SQL followed by parameters */ +){ + Statement *pStatement; /* The SQLite statement wrapper */ + sqlite3_stmt *pStmt; /* The SQLite statement to return */ + int i; /* Used to iterate through parameters */ + + pStatement = getSqlStatement(pErr, pDb, va_arg(ap, const char *)); + if( !pStatement ) return 0; + pStmt = pStatement->pStmt; + for(i=1; i<=sqlite3_bind_parameter_count(pStmt); i++){ + const char *zName = sqlite3_bind_parameter_name(pStmt, i); + void * pArg = va_arg(ap, void*); + + switch( zName[1] ){ + case 'i': + sqlite3_bind_int64(pStmt, i, *(i64 *)pArg); + break; + + default: + pErr->rc = 1; + pErr->zErr = sqlite3_mprintf("Cannot discern type: \"%s\"", zName); + pStmt = 0; + break; + } + } + + return pStmt; +} + +static i64 execsql_i64_x( + Error *pErr, /* IN/OUT: Error code */ + Sqlite *pDb, /* Database handle */ + ... /* SQL and pointers to parameter values */ +){ + i64 iRet = 0; + if( pErr->rc==SQLITE_OK ){ + sqlite3_stmt *pStmt; /* SQL statement to execute */ + va_list ap; /* ... arguments */ + va_start(ap, pDb); + pStmt = getAndBindSqlStatement(pErr, pDb, ap); + if( pStmt ){ + int first = 1; + while( SQLITE_ROW==sqlite3_step(pStmt) ){ + if( first && sqlite3_column_count(pStmt)>0 ){ + iRet = sqlite3_column_int64(pStmt, 0); + } + first = 0; + } + if( SQLITE_OK!=sqlite3_reset(pStmt) ){ + sqlite_error(pErr, pDb, "reset"); + } + } + va_end(ap); + } + return iRet; +} + +static char * execsql_text_x( + Error *pErr, /* IN/OUT: Error code */ + Sqlite *pDb, /* Database handle */ + int iSlot, /* Db handle slot to store text in */ + ... /* SQL and pointers to parameter values */ +){ + char *zRet = 0; + + if( iSlot>=pDb->nText ){ + int nByte = sizeof(char *)*(iSlot+1); + pDb->aText = (char **)sqlite3_realloc(pDb->aText, nByte); + memset(&pDb->aText[pDb->nText], 0, sizeof(char*)*(iSlot+1-pDb->nText)); + pDb->nText = iSlot+1; + } + + if( pErr->rc==SQLITE_OK ){ + sqlite3_stmt *pStmt; /* SQL statement to execute */ + va_list ap; /* ... arguments */ + va_start(ap, iSlot); + pStmt = getAndBindSqlStatement(pErr, pDb, ap); + if( pStmt ){ + int first = 1; + while( SQLITE_ROW==sqlite3_step(pStmt) ){ + if( first && sqlite3_column_count(pStmt)>0 ){ + zRet = sqlite3_mprintf("%s", sqlite3_column_text(pStmt, 0)); + sqlite3_free(pDb->aText[iSlot]); + pDb->aText[iSlot] = zRet; + } + first = 0; + } + if( SQLITE_OK!=sqlite3_reset(pStmt) ){ + sqlite_error(pErr, pDb, "reset"); + } + } + va_end(ap); + } + + return zRet; +} + +static void integrity_check_x( + Error *pErr, /* IN/OUT: Error code */ + Sqlite *pDb /* Database handle */ +){ + if( pErr->rc==SQLITE_OK ){ + Statement *pStatement; /* Statement to execute */ + char *zErr = 0; /* Integrity check error */ + + pStatement = getSqlStatement(pErr, pDb, "PRAGMA integrity_check"); + if( pStatement ){ + sqlite3_stmt *pStmt = pStatement->pStmt; + while( SQLITE_ROW==sqlite3_step(pStmt) ){ + const char *z = (const char*)sqlite3_column_text(pStmt, 0); + if( strcmp(z, "ok") ){ + if( zErr==0 ){ + zErr = sqlite3_mprintf("%s", z); + }else{ + zErr = sqlite3_mprintf("%z\n%s", zErr, z); + } + } + } + sqlite3_reset(pStmt); + + if( zErr ){ + pErr->zErr = zErr; + pErr->rc = 1; + } + } + } +} + +static void *launch_thread_main(void *pArg){ + Thread *p = (Thread *)pArg; + return (void *)p->xProc(p->iTid, p->pArg); +} + +static void launch_thread_x( + Error *pErr, /* IN/OUT: Error code */ + Threadset *pThreads, /* Thread set */ + char *(*xProc)(int, void*), /* Proc to run */ + void *pArg /* Argument passed to thread proc */ +){ + if( pErr->rc==SQLITE_OK ){ + int iTid = ++pThreads->iMaxTid; + Thread *p; + int rc; + + p = (Thread *)sqlite3_malloc(sizeof(Thread)); + memset(p, 0, sizeof(Thread)); + p->iTid = iTid; + p->pArg = pArg; + p->xProc = xProc; + + rc = pthread_create(&p->tid, NULL, launch_thread_main, (void *)p); + if( rc!=0 ){ + system_error(pErr, rc); + sqlite3_free(p); + }else{ + p->pNext = pThreads->pThread; + pThreads->pThread = p; + } + } +} + +static void join_all_threads_x( + Error *pErr, /* IN/OUT: Error code */ + Threadset *pThreads /* Thread set */ +){ + Thread *p; + Thread *pNext; + for(p=pThreads->pThread; p; p=pNext){ + void *ret; + pNext = p->pNext; + int rc; + rc = pthread_join(p->tid, &ret); + if( rc!=0 ){ + if( pErr->rc==SQLITE_OK ) system_error(pErr, rc); + }else{ + printf("Thread %d says: %s\n", p->iTid, (ret==0 ? "..." : (char *)ret)); + fflush(stdout); + } + sqlite3_free(p); + } + pThreads->pThread = 0; +} + +static i64 filesize_x( + Error *pErr, + const char *zFile +){ + i64 iRet = 0; + if( pErr->rc==SQLITE_OK ){ + struct stat sStat; + if( stat(zFile, &sStat) ){ + iRet = -1; + }else{ + iRet = sStat.st_size; + } + } + return iRet; +} + +static void filecopy_x( + Error *pErr, + const char *zFrom, + const char *zTo +){ + if( pErr->rc==SQLITE_OK ){ + i64 nByte = filesize_x(pErr, zFrom); + if( nByte<0 ){ + test_error_x(pErr, sqlite3_mprintf("no such file: %s", zFrom)); + }else{ + i64 iOff; + char aBuf[1024]; + int fd1; + int fd2; + unlink(zTo); + + fd1 = open(zFrom, O_RDONLY); + if( fd1<0 ){ + system_error(pErr, errno); + return; + } + fd2 = open(zTo, O_RDWR|O_CREAT|O_EXCL, 0644); + if( fd2<0 ){ + system_error(pErr, errno); + close(fd1); + return; + } + + iOff = 0; + while( iOffnByte ){ + nCopy = nByte - iOff; + } + if( nCopy!=read(fd1, aBuf, nCopy) ){ + system_error(pErr, errno); + break; + } + if( nCopy!=write(fd2, aBuf, nCopy) ){ + system_error(pErr, errno); + break; + } + iOff += nCopy; + } + + close(fd1); + close(fd2); + } + } +} + +/* +** Used by setstoptime() and timetostop(). +*/ +static double timelimit = 0.0; + +static double currentTime(void){ + double t; + static sqlite3_vfs *pTimelimitVfs = 0; + if( pTimelimitVfs==0 ) pTimelimitVfs = sqlite3_vfs_find(0); + if( pTimelimitVfs->iVersion>=2 && pTimelimitVfs->xCurrentTimeInt64!=0 ){ + sqlite3_int64 tm; + pTimelimitVfs->xCurrentTimeInt64(pTimelimitVfs, &tm); + t = tm/86400000.0; + }else{ + pTimelimitVfs->xCurrentTime(pTimelimitVfs, &t); + } + return t; +} + +static void setstoptime_x( + Error *pErr, /* IN/OUT: Error code */ + int nMs /* Milliseconds until "stop time" */ +){ + if( pErr->rc==SQLITE_OK ){ + double t = currentTime(); + timelimit = t + ((double)nMs)/(1000.0*60.0*60.0*24.0); + } +} + +static int timetostop_x( + Error *pErr /* IN/OUT: Error code */ +){ + int ret = 1; + if( pErr->rc==SQLITE_OK ){ + double t = currentTime(); + ret = (t >= timelimit); + } + return ret; +} + diff --git a/test/uri.test b/test/uri.test index 74da225acc..62090f46af 100644 --- a/test/uri.test +++ b/test/uri.test @@ -282,11 +282,11 @@ ifcapable wal { INSERT INTO t2 VALUES('x', 'y'); } lsort [array names ::T1] - } {test.db1 test.db1-journal test.db1-wal} + } {test.db1 test.db1-journal test.db1-wal test.db1-wal2} do_test 5.1.2 { lsort [array names ::T2] - } {test.db2 test.db2-journal test.db2-wal} + } {test.db2 test.db2-journal test.db2-wal test.db2-wal2} db close tvfs1 delete diff --git a/test/wal.test b/test/wal.test index 50988debe3..d5bf261ed3 100644 --- a/test/wal.test +++ b/test/wal.test @@ -1175,7 +1175,7 @@ foreach {tn pgsz works} { 9 32768 1 10 65536 1 11 131072 0 - 11 1016 0 + 12 1016 0 } { if {$::SQLITE_MAX_PAGE_SIZE < $pgsz} { @@ -1185,14 +1185,14 @@ foreach {tn pgsz works} { for {set pg 1} {$pg <= 3} {incr pg} { forcecopy testX.db test.db forcedelete test.db-wal - + # Check that the database now exists and consists of three pages. And # that there is no associated wal file. # do_test wal-18.2.$tn.$pg.1 { file exists test.db-wal } 0 do_test wal-18.2.$tn.$pg.2 { file exists test.db } 1 do_test wal-18.2.$tn.$pg.3 { file size test.db } [expr 1024*3] - + do_test wal-18.2.$tn.$pg.4 { # Create a wal file that contains a single frame (database page @@ -1224,16 +1224,16 @@ foreach {tn pgsz works} { puts -nonewline $fd $framehdr puts -nonewline $fd $framebody close $fd - + file size test.db-wal } [wal_file_size 1 $pgsz] - + do_test wal-18.2.$tn.$pg.5 { sqlite3 db test.db set rc [catch { db one {PRAGMA integrity_check} } msg] expr { $rc!=0 || $msg!="ok" } } $works - + db close } } diff --git a/test/wal2.test b/test/wal2.test index 064bed0b20..93a8e9ee04 100644 --- a/test/wal2.test +++ b/test/wal2.test @@ -931,6 +931,11 @@ db close # that exist are versions 3007000 (corresponding to SQLite version 3.7.0, # the first version of SQLite to feature wal mode). # +set wal_version 3007000 +ifcapable wal_bighash { + set wal_version 3007001 +} + do_test wal2-10.1.1 { faultsim_delete_and_reopen execsql { @@ -950,9 +955,9 @@ do_test wal2-10.1.3 { faultsim_restore_and_reopen set hdr [wal_set_walhdr test.db-wal] lindex $hdr 1 -} {3007000} +} $wal_version do_test wal2-10.1.4 { - lset hdr 1 3007001 + lset hdr 1 [expr $wal_version + 1] wal_set_walhdr test.db-wal $hdr catchsql { SELECT * FROM t1 } } {1 {unable to open database file}} @@ -965,9 +970,9 @@ do_test wal2-10.2.1 { do_test wal2-10.2.2 { set hdr [set_tvfs_hdr $::filename] lindex $hdr 0 -} {3007000} +} $wal_version do_test wal2-10.2.3 { - lset hdr 0 3007001 + lset hdr 0 [expr $wal_version + 1] wal_fix_walindex_cksum hdr set_tvfs_hdr $::filename $hdr catchsql { SELECT * FROM t1 } diff --git a/test/wal2big.test b/test/wal2big.test new file mode 100644 index 0000000000..77daab5af6 --- /dev/null +++ b/test/wal2big.test @@ -0,0 +1,73 @@ +# 2017 September 19 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# TESTRUNNER: slow +# +# This file implements regression tests for SQLite library. The +# focus of this file is testing the operation of the library in +# "PRAGMA journal_mode=WAL2" mode. +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl +source $testdir/lock_common.tcl +source $testdir/malloc_common.tcl +source $testdir/wal_common.tcl + +set testprefix wal2big +ifcapable !wal {finish_test ; return } + +do_execsql_test 1.0 { + CREATE TABLE t1(a, b, c); + CREATE INDEX t1a ON t1(a); + CREATE INDEX t1b ON t1(b); + CREATE INDEX t1c ON t1(c); + PRAGMA journal_mode = wal2; + PRAGMA journal_size_limit = 10000000; + + WITH s(i) AS ( + SELECT 1 UNION ALL SELECT i+1 FROM s WHERE i<200000 + ) + INSERT INTO t1 SELECT random(), random(), random() FROM s; +} {wal2 10000000} + +do_execsql_test 1.1 { + WITH s(i) AS ( + SELECT 1 UNION ALL SELECT i+1 FROM s WHERE i<200000 + ) + INSERT INTO t1 SELECT random(), random(), random() FROM s; +} + +do_test 1.2 { + list [expr [file size test.db-wal]>10000000] \ + [expr [file size test.db-wal2]>10000000] +} {1 1} + +do_test 1.3 { + sqlite3 db2 test.db + execsql { + SELECT count(*) FROM t1; + PRAGMA integrity_check; + } db2 +} {400000 ok} + +do_test 1.4 { + db2 close + forcecopy test.db test.db2 + forcecopy test.db-wal test.db2-wal + forcecopy test.db-wal2 test.db2-wal2 + sqlite3 db2 test.db2 + execsql { + SELECT count(*) FROM t1; + PRAGMA integrity_check; + } +} {400000 ok} + +finish_test diff --git a/test/wal2concurrent.test b/test/wal2concurrent.test new file mode 100644 index 0000000000..a8b8f32d3c --- /dev/null +++ b/test/wal2concurrent.test @@ -0,0 +1,164 @@ +# 2018 December 6 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl +source $testdir/lock_common.tcl +set ::testprefix wal2concurrent + +ifcapable !concurrent { + finish_test + return +} + + +#------------------------------------------------------------------------- +# Warm-body test. +# +foreach tn {1 2} { + reset_db + sqlite3 db2 test.db + do_execsql_test 1.0 { + PRAGMA page_size = 1024; + CREATE TABLE t1(x); + CREATE TABLE t2(y); + PRAGMA journal_size_limit = 5000; + PRAGMA journal_mode = wal2; + } {5000 wal2} + + do_execsql_test 1.1 { + INSERT INTO t1 VALUES(1); + BEGIN CONCURRENT; + INSERT INTO t1 VALUES(2); + } {} + + do_test 1.2 { + execsql { + PRAGMA journal_size_limit = 5000; + INSERT INTO t1 VALUES(3) + } db2 + catchsql { COMMIT } + } {1 {database is locked}} + + do_catchsql_test 1.3 { COMMIT } {1 {database is locked}} + do_catchsql_test 1.4 { ROLLBACK } {0 {}} + + do_test 1.5 { + list [file size test.db-wal] [file size test.db-wal2] + } {2128 0} + + do_execsql_test 1.6 { + BEGIN CONCURRENT; + INSERT INTO t1 VALUES(2); + } {} + + do_test 1.7 { + execsql { INSERT INTO t2 VALUES(randomblob(4000)) } db2 + list [file size test.db-wal] [file size test.db-wal2] + } {7368 0} + + if {$tn==1} { + do_test 1.8 { + execsql { + INSERT INTO t2 VALUES(1); + INSERT INTO t1 VALUES(5); + } db2 + list [file size test.db-wal] [file size test.db-wal2] + } {7368 2128} + + do_catchsql_test 1.9 { COMMIT } {1 {database is locked}} + do_catchsql_test 1.10 { ROLLBACK } {0 {}} + db close + sqlite3 db test.db + do_execsql_test 1.11 { SELECT * FROM t1 } {1 3 5} + do_execsql_test 1.12 { SELECT count(*) FROM t2 } {2} + } else { + do_test 1.8 { + execsql { + INSERT INTO t2 VALUES(1); + } db2 + list [file size test.db-wal] [file size test.db-wal2] + } {7368 1080} + + do_catchsql_test 1.9 { COMMIT } {0 {}} + db close + sqlite3 db test.db + do_execsql_test 1.11 { SELECT * FROM t1 } {1 3 2} + do_execsql_test 1.12 { SELECT count(*) FROM t2 } {2} + + do_test 1.13 { + list [file size test.db-wal] [file size test.db-wal2] + } {7368 2128} + } +} + +do_multiclient_test tn { + do_test 2.$tn.1 { + sql1 { + PRAGMA auto_vacuum = OFF; + CREATE TABLE t1(x UNIQUE); + CREATE TABLE t2(x UNIQUE); + PRAGMA journal_mode = wal2; + PRAGMA journal_size_limit = 15000; + } + } {wal2 15000} + + do_test 2.$tn.2 { + sql1 { + WITH s(i) AS ( + SELECT 1 UNION ALL SELECT i+1 FROM s WHERE i<=10 + ) + INSERT INTO t1 SELECT randomblob(800) FROM s; + } + } {} + + do_test 2.$tn.3 { + sql1 { DELETE FROM t1 WHERE (rowid%4)==0 } + list [expr [file size test.db-wal]>15000] \ + [expr [file size test.db-wal2]>15000] + } {1 0} + + do_test 2.$tn.4 { + sql1 { PRAGMA wal_checkpoint; } + sql1 { + BEGIN CONCURRENT; + INSERT INTO t1 VALUES(randomblob(800)); + } + } {} + + do_test 2.$tn.5 { + sql2 { + PRAGMA journal_size_limit = 15000; + INSERT INTO t2 VALUES(randomblob(800)); + INSERT INTO t2 VALUES(randomblob(800)); + INSERT INTO t2 VALUES(randomblob(800)); + INSERT INTO t2 VALUES(randomblob(800)); + INSERT INTO t2 VALUES(randomblob(800)); + DELETE FROM t2; + } + list [expr [file size test.db-wal]>15000] \ + [expr [file size test.db-wal2]>15000] + } {1 1} + + do_test 2.$tn.6 { + sql1 { + INSERT INTO t1 VALUES(randomblob(800)); + COMMIT; + PRAGMA integrity_check; + } + } {ok} +} + + + +finish_test + diff --git a/test/wal2fault.test b/test/wal2fault.test new file mode 100644 index 0000000000..7067e45529 --- /dev/null +++ b/test/wal2fault.test @@ -0,0 +1,52 @@ +# 2010 May 03 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# This file implements regression tests for SQLite library. The +# focus of this file is testing the operation of the library in +# "PRAGMA journal_mode=WAL" mode. +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl +source $testdir/malloc_common.tcl +source $testdir/lock_common.tcl + +ifcapable !wal {finish_test ; return } +set testprefix wal2fault + +do_execsql_test 1.0 { + CREATE TABLE t1(x,y); + PRAGMA journal_mode = wal2; + WITH s(i) AS ( SELECT 100 UNION ALL SELECT i-1 FROM s WHERE (i-1)>0 ) + INSERT INTO t1 SELECT i, randomblob(i) FROM s; + WITH s(i) AS ( SELECT 100 UNION ALL SELECT i-1 FROM s WHERE (i-1)>0 ) + INSERT INTO t1 SELECT i, randomblob(i) FROM s; +} {wal2} + +do_test 1.1 { + expr [file size test.db-wal]>10000 +} {1} +faultsim_save_and_close + +do_faultsim_test 1 -prep { + faultsim_restore_and_reopen + execsql { + PRAGMA journal_size_limit = 10000; + SELECT count(*) FROM sqlite_master; + } +} -body { + execsql { + INSERT INTO t1 VALUES(1, 2); + } +} -test { + faultsim_test_result {0 {}} +} + +finish_test diff --git a/test/wal2lock.test b/test/wal2lock.test new file mode 100644 index 0000000000..f86cf87925 --- /dev/null +++ b/test/wal2lock.test @@ -0,0 +1,106 @@ +# 2018 December 15 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# This file implements regression tests for SQLite library. The +# focus of this file is testing the operation of the library in +# "PRAGMA journal_mode=WAL2" mode. +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl +source $testdir/lock_common.tcl +source $testdir/malloc_common.tcl +source $testdir/wal_common.tcl + +set testprefix wal2lock +ifcapable !wal {finish_test ; return } + +db close +testvfs tvfs +sqlite3 db test.db -vfs tvfs + +do_execsql_test 1.0 { + PRAGMA journal_mode = wal2; + CREATE TABLE y1(y, yy); + CREATE INDEX y1y ON y1(y); + CREATE INDEX y1yy ON y1(yy); + INSERT INTO y1 VALUES(1, 2), (3, 4), (5, 6); +} {wal2} + +tvfs script vfs_callback +tvfs filter xShmLock + +set ::lock [list] +proc vfs_callback {func file name lock} { + lappend ::lock $lock + return SQLITE_OK +} + +do_execsql_test 1.1.1 { + SELECT * FROM y1 +} {1 2 3 4 5 6} +do_test 1.1.2 { + set ::lock +} {{4 1 lock shared} {4 1 unlock shared}} + +set ::bFirst 1 +proc vfs_callback {func file name lock} { + if {$::bFirst} { + set ::bFirst 0 + return SQLITE_BUSY + } + return SQLITE_OK +} +do_execsql_test 1.2 { + SELECT * FROM y1 +} {1 2 3 4 5 6} + +set ::bFirst 1 +proc vfs_callback {func file name lock} { + if {$::bFirst} { + set ::bFirst 0 + return SQLITE_IOERR + } + return SQLITE_OK +} +do_catchsql_test 1.3 { + SELECT * FROM y1 +} {1 {disk I/O error}} + +puts "# Warning: This next test case causes SQLite to call xSleep(1) 100 times." +puts "# Normally this equates to a delay of roughly 10 seconds, but if SQLite" +puts "# is built on unix without HAVE_USLEEP defined, it may be much longer." +proc vfs_callback {func file name lock} { return SQLITE_BUSY } +do_catchsql_test 1.4 { + SELECT * FROM y1 +} {1 {locking protocol}} +proc vfs_callback {func file name lock} { return SQLITE_OK } + +sqlite3 db2 test.db -vfs tvfs +set ::bFirst 1 + +proc vfs_callback {func file name lock} { + if {$::bFirst} { + set ::bFirst 0 + db2 eval { INSERT INTO y1 VALUES(7, 8) } + } +} + +do_execsql_test 1.5.1 { + SELECT * FROM y1 +} {1 2 3 4 5 6 7 8} +do_execsql_test 1.5.2 { + SELECT * FROM y1 +} {1 2 3 4 5 6 7 8} + +db close +db2 close +tvfs delete +finish_test diff --git a/test/wal2openclose.test b/test/wal2openclose.test new file mode 100644 index 0000000000..66e5875813 --- /dev/null +++ b/test/wal2openclose.test @@ -0,0 +1,81 @@ +# 2017 September 19 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# This file implements regression tests for SQLite library. The +# focus of this file is testing the operation of the library in +# "PRAGMA journal_mode=WAL2" mode. +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl +source $testdir/lock_common.tcl +source $testdir/malloc_common.tcl +source $testdir/wal_common.tcl + +set testprefix wal2openclose +ifcapable !wal {finish_test ; return } + +do_execsql_test 1.0 { + CREATE TABLE t1(a, b, c); + PRAGMA journal_mode = wal2; + PRAGMA wal_autocheckpoint = 0; + PRAGMA journal_size_limit = 75000; +} {wal2 0 75000} + +do_test 1.1 { + for {set ii 1} {$ii <= 200} {incr ii} { + execsql { + INSERT INTO t1 VALUES($ii, $ii, $ii); + } + } + expr ([file size test.db-wal2] - 75000) > 30000 +} {1} + +do_test 1.2 { + db close + list [file exists test.db-wal] [file exists test.db-wal2] +} {0 0} + +sqlite3 db test.db +do_execsql_test 1.3 { + SELECT sum(c) FROM t1 +} {20100} +db close + +#------------------------------------------------------------------------- +reset_db +do_execsql_test 2.0 { + CREATE TABLE t1(a, b, c); + PRAGMA journal_mode = wal2; + INSERT INTO t1 VALUES(1, 2, 3); +} {wal2} +db_save_and_close + +db_restore_and_reopen +do_execsql_test 2.1 { + SELECT * FROM t1; +} {1 2 3} + +do_test 2.2 { + sqlite3 db2 test.db + db2 eval {INSERT INTO t1 VALUES(4, 5, 6)} + db2 close +} {} + +breakpoint +db close +sqlite3 db test.db +do_execsql_test 2.2 { + SELECT * FROM t1; +} {1 2 3 4 5 6} + + + +finish_test diff --git a/test/wal2recover.test b/test/wal2recover.test new file mode 100644 index 0000000000..b4e1022e85 --- /dev/null +++ b/test/wal2recover.test @@ -0,0 +1,271 @@ +# 2018 December 13 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# This file implements regression tests for SQLite library. The +# focus of this file is testing the operation of the library in +# "PRAGMA journal_mode=WAL2" mode. +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl +source $testdir/lock_common.tcl +source $testdir/malloc_common.tcl +source $testdir/wal_common.tcl + +set testprefix wal2recover +ifcapable !wal {finish_test ; return } + +proc db_copy {from to} { + forcecopy $from $to + forcecopy ${from}-wal ${to}-wal + forcecopy ${from}-wal2 ${to}-wal2 +} + +do_execsql_test 1.0 { + CREATE TABLE t1(a, b, c); + CREATE INDEX t1a ON t1(a); + CREATE INDEX t1b ON t1(b); + CREATE INDEX t1c ON t1(c); + PRAGMA journal_mode = wal2; + PRAGMA journal_size_limit = 15000; + PRAGMA wal_autocheckpoint = 0; +} {wal2 15000 0} + +do_test 1.1 { + for {set i 1} {$i <= 1000} {incr i} { + execsql { INSERT INTO t1 VALUES(random(), random(), random()) } + db_copy test.db test.db2 + sqlite3 db2 test.db + set res [execsql { + SELECT count(*) FROM t1; + PRAGMA integrity_check; + } db2] + db2 close + if {$res != [list $i ok]} { + error "failure on iteration $i" + } + } + set {} {} +} {} + +#-------------------------------------------------------------------------- +reset_db +do_execsql_test 2.0 { + CREATE TABLE t1(x UNIQUE); + CREATE TABLE t2(x UNIQUE); + PRAGMA journal_mode = wal2; + PRAGMA journal_size_limit = 10000; + PRAGMA wal_autocheckpoint = 0; + BEGIN; + INSERT INTO t1 VALUES(randomblob(4000)); + INSERT INTO t1 VALUES(randomblob(4000)); + INSERT INTO t1 VALUES(randomblob(4000)); + COMMIT; + BEGIN; + INSERT INTO t2 VALUES(randomblob(4000)); + INSERT INTO t2 VALUES(randomblob(4000)); + INSERT INTO t2 VALUES(randomblob(4000)); + COMMIT; +} {wal2 10000 0} +do_test 2.0.1 { + list [file size test.db] [file size test.db-wal] [file size test.db-wal2] +} {5120 28328 28328} + +# Test recovery with both wal files intact. +# +do_test 2.1 { + db_copy test.db test.db2 + sqlite3 db2 test.db2 + execsql { + SELECT count(*) FROM t1; + SELECT count(*) FROM t2; + PRAGMA integrity_check; + } db2 +} {3 3 ok} + +do_test 2.2 { + db2 close + db_copy test.db test.db2 + hexio_write test.db2-wal 16 12345678 + sqlite3 db2 test.db2 + execsql { + SELECT count(*) FROM t1; + SELECT count(*) FROM t2; + } db2 +} {0 3} + +do_test 2.3 { + db2 close + db_copy test.db test.db2 + hexio_write test.db2-wal2 16 12345678 + sqlite3 db2 test.db2 + execsql { + SELECT count(*) FROM t1; + SELECT count(*) FROM t2; + PRAGMA integrity_check; + } db2 +} {3 0 ok} + +do_test 2.4 { + db2 close + db_copy test.db test.db2 + forcecopy test.db-wal test.db2-wal2 + sqlite3 db2 test.db2 + execsql { + SELECT count(*) FROM t1; + SELECT count(*) FROM t2; + PRAGMA integrity_check; + } db2 +} {3 0 ok} + +do_test 2.5 { + db2 close + db_copy test.db test.db2 + forcecopy test.db-wal test.db2-wal2 + forcecopy test.db-wal2 test.db2-wal + sqlite3 db2 test.db2 + execsql { + SELECT count(*) FROM t1; + SELECT count(*) FROM t2; + PRAGMA integrity_check; + } db2 +} {3 3 ok} + +do_test 2.6 { + db2 close + db_copy test.db test.db2 + forcecopy test.db-wal test.db2-wal2 + close [open test.db-wal w] + sqlite3 db2 test.db2 + execsql { + SELECT count(*) FROM t1; + SELECT count(*) FROM t2; + PRAGMA integrity_check; + } db2 +} {3 0 ok} + +do_test 2.7 { + db2 close + db_copy test.db test.db2 + forcedelete test.db2-wal + sqlite3 db2 test.db2 + execsql { + SELECT count(*) FROM t1; + SELECT count(*) FROM t2; + PRAGMA integrity_check; + } db2 +} {0 0 ok} +db2 close + +#------------------------------------------------------------------------- +# +reset_db +do_execsql_test 3.0 { + CREATE TABLE t1(a TEXT, b TEXT, c TEXT); + CREATE INDEX t1a ON t1(a); + CREATE INDEX t1b ON t1(b); + CREATE INDEX t1c ON t1(c); + PRAGMA journal_mode = wal2; + PRAGMA journal_size_limit = 10000; + PRAGMA wal_autocheckpoint = 0; + PRAGMA cache_size = 5; +} {wal2 10000 0} + +do_execsql_test 3.1 { + WITH s(i) AS ( SELECT 1 UNION ALL SELECT i+1 FROM s where i < 200) + INSERT INTO t1 SELECT i, i, i FROM s; + + INSERT INTO t1 VALUES(201, 201, 201); +} {} + +do_test 3.2 { + list [file size test.db] [file size test.db-wal] [file size test.db-wal2] +} {5120 15752 4224} + +do_test 3.3 { + forcecopy test.db test.db2 + forcecopy test.db-wal test.db2-wal + forcecopy test.db-wal2 test.db2-wal2 + sqlite3 db2 test.db2 + execsql { + PRAGMA journal_size_limit = 10000; + PRAGMA wal_autocheckpoint = 0; + PRAGMA cache_size = 5; + BEGIN; + WITH s(i) AS ( SELECT 1 UNION ALL SELECT i+1 FROM s where i < 200) + INSERT INTO t1 SELECT i, i, i FROM s; + } db2 + list [file size test.db2] [file size test.db2-wal] [file size test.db2-wal2] +} {5120 15752 23088} + + +if {$tcl_platform(platform)!="windows"} { + # These cannot be run under windows, as the *-shm file may not be read + # while it is locked by the database connection. + do_test 3.4 { + set fd [open test.db2-shm] + fconfigure $fd -translation binary + set data [read $fd] + close $fd + + set fd [open test.db-shm w] + fconfigure $fd -translation binary + puts -nonewline $fd $data + close $fd + + execsql { + WITH s(i) AS ( SELECT 1 UNION ALL SELECT i+1 FROM s where i < 10) + INSERT INTO t1 SELECT i, i, i FROM s; + SELECT count(*) FROM t1; + PRAGMA integrity_check; + } + } {211 ok} + + do_test 3.5 { + list [file size test.db] [file size test.db-wal] [file size test.db-wal2] + } {5120 15752 18896} +} + +#------------------------------------------------------------------------- +# +reset_db +do_execsql_test 4.0 { + PRAGMA journal_mode = wal2; + CREATE TABLE xyz(x, y, z); + INSERT INTO xyz VALUES('x', 'y', 'z'); +} {wal2} +db close +do_test 4.1 { + close [open test.db-wal w] + file mkdir test.db-wal2 + sqlite3 db test.db + catchsql { SELECT * FROM xyz } +} {1 {unable to open database file}} +db close +file delete test.db-wal2 +db2 close + +do_test 4.2 { + sqlite3 db test.db + execsql { + INSERT INTO xyz VALUES('a', 'b', 'c'); + } + forcecopy test.db test.db2 + forcecopy test.db-wal test.db2-wal + forcedelete test.db2-wal2 + file mkdir test.db2-wal2 + sqlite3 db2 test.db2 + catchsql { SELECT * FROM xyz } db2 +} {1 {unable to open database file}} +db2 close +file delete test.db2-wal2 + + +finish_test diff --git a/test/wal2recover2.test b/test/wal2recover2.test new file mode 100644 index 0000000000..6807612c95 --- /dev/null +++ b/test/wal2recover2.test @@ -0,0 +1,335 @@ +# 2018 December 13 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# This file implements regression tests for SQLite library. The +# focus of this file is testing the operation of the library in +# "PRAGMA journal_mode=WAL2" mode. +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl +source $testdir/lock_common.tcl +source $testdir/malloc_common.tcl +source $testdir/wal_common.tcl + +set testprefix wal2recover2 +ifcapable !wal {finish_test ; return } + +do_execsql_test 1.0 { + CREATE TABLE t1(x); + CREATE TABLE t2(x); + WITH s(i) AS ( VALUES(1) UNION ALL SELECT i+1 FROM s WHERE i<1500 ) + INSERT INTO t1 SELECT i FROM s; + WITH s(i) AS ( VALUES(1) UNION ALL SELECT i+1 FROM s WHERE i<1500 ) + INSERT INTO t2 SELECT i FROM s; + + PRAGMA journal_mode = wal2; + PRAGMA journal_size_limit = 10000; +} {wal2 10000} + +set ::L 1125750 +set ::M 1126500 +set ::H 1127250 + +do_execsql_test 1.1 { + UPDATE t1 SET x=x+1; + UPDATE t2 SET x=x+1 WHERE rowid<=750; + + SELECT sum(x) FROM t1; + SELECT sum(x) FROM t2; +} [list $H $M] + +do_test 1.2 { + list [file size test.db] [file size test.db-wal] [file size test.db-wal2] +} {31744 14704 7368} + +proc cksum {zIn data} { + if {[string length $zIn]==0} { + set s0 0 + set s1 0 + } else { + set s0 [hexio_get_int [string range $zIn 0 7]] + set s1 [hexio_get_int [string range $zIn 8 15]] + } + set n [expr [string length $data] / 8] + + for {set i 0} {$i < $n} {incr i 2} { + set x0 [hexio_get_int -l [string range $data [expr $i*8] [expr $i*8+7]]] + set x1 [hexio_get_int -l [string range $data [expr $i*8+8] [expr $i*8+8+7]]] + + set s0 [expr ($s0 + $x0 + $s1) & 0xFFFFFFFF] + set s1 [expr ($s1 + $x1 + $s0) & 0xFFFFFFFF] + } + + return "[hexio_render_int32 $s0][hexio_render_int32 $s1]" +} + +proc fix_wal_cksums {file} { + # Fix the checksum on the wal header. + set data [hexio_read $file 0 32] + set cksum [cksum {} [string range $data 0 47]] + set salt [hexio_read $file 16 8] + hexio_write $file 24 $cksum + + # Fix the checksums for all pages in the wal file. + set pgsz [hexio_get_int [hexio_read $file 8 4]] + set sz [file size $file] + for {set off 32} {$off < $sz} {incr off [expr $pgsz+24]} { + set e [hexio_read $file $off 8] + set cksum [cksum $cksum $e] + + ifcapable !wal2nocksum { + set p [hexio_read $file [expr $off+24] $pgsz] + set cksum [cksum $cksum $p] + } + + hexio_write $file [expr $off+8] $salt + hexio_write $file [expr $off+16] $cksum + } +} + +proc wal_incr_hdrfield {file field} { + switch -- $field { + nCkpt { set offset 12 } + salt0 { set offset 16 } + salt1 { set offset 20 } + default { + error "unknown field $field - should be \"nCkpt\", \"salt0\" or \"salt1\"" + } + } + + # Increment the value in the wal header. + set v [hexio_get_int [hexio_read $file $offset 4]] + incr v + hexio_write $file $offset [hexio_render_int32 $v] + + # Fix various checksums + fix_wal_cksums $file +} + +proc wal_set_nckpt {file val} { + # Increment the value in the wal header. + hexio_write $file 12 [hexio_render_int32 $val] + + # Fix various checksums + fix_wal_cksums $file +} + +proc wal_set_follow {file prevfile} { + set pgsz [hexio_get_int [hexio_read $prevfile 8 4]] + set sz [file size $prevfile] + set cksum [hexio_read $prevfile [expr $sz-$pgsz-8] 8] + + hexio_write $file 16 $cksum + fix_wal_cksums $file +} + +do_execsql_test 1.3.0 { + SELECT sum(x) FROM t1; + SELECT sum(x) FROM t2; +} [list $H $M] + +foreach {tn file field} { + 1 test.db2-wal salt0 + 2 test.db2-wal salt1 + 3 test.db2-wal nCkpt + 4 test.db2-wal2 salt0 + 5 test.db2-wal2 salt1 + 6 test.db2-wal2 nCkpt +} { + do_test 1.3.$tn { + forcecopy test.db test.db2 + forcecopy test.db-wal test.db2-wal + forcecopy test.db-wal2 test.db2-wal2 + wal_incr_hdrfield $file $field + sqlite3 db2 test.db2 + execsql { + SELECT sum(x) FROM t1; + SELECT sum(x) FROM t2; + } db2 + } [list $H $L] + db2 close +} + +do_test 1.4 { + forcecopy test.db test.db2 + forcecopy test.db-wal2 test.db2-wal + forcedelete test.db2-wal2 + sqlite3 db2 test.db2 + execsql { + SELECT sum(x) FROM t1; + SELECT sum(x) FROM t2; + } db2 +} [list $L $M] + +do_test 1.5 { + db2 close + forcecopy test.db test.db2 + forcecopy test.db-wal2 test.db2-wal + forcecopy test.db-wal test.db2-wal2 + sqlite3 db2 test.db2 + execsql { + SELECT sum(x) FROM t1; + SELECT sum(x) FROM t2; + } db2 +} [list $H $M] + +db2 close +foreach {tn file field} { + 1 test.db2-wal salt0 + 2 test.db2-wal salt1 + 3 test.db2-wal2 salt0 + 4 test.db2-wal2 salt1 +} { + do_test 1.6.$tn { + forcecopy test.db test.db2 + forcecopy test.db-wal2 test.db2-wal + forcecopy test.db-wal test.db2-wal2 + wal_incr_hdrfield $file $field + sqlite3 db2 test.db2 + execsql { + SELECT sum(x) FROM t1; + SELECT sum(x) FROM t2; + } db2 + } [list $H $L] + db2 close +} + +foreach {tn nCkpt1 nCkpt2 res} [list \ + 1 2 1 "$H $M" \ + 2 2 2 "$L $M" \ + 3 3 1 "$H $L" \ + 4 15 14 "$H $M" \ + 5 0 15 "$H $M" \ + 6 1 15 "$L $M" \ +] { + do_test 1.7.$tn { + forcecopy test.db test.db2 + forcecopy test.db-wal2 test.db2-wal + forcecopy test.db-wal test.db2-wal2 + + wal_set_nckpt test.db2-wal2 $nCkpt2 + wal_set_nckpt test.db2-wal $nCkpt1 + wal_set_follow test.db2-wal test.db2-wal2 + + + sqlite3 db2 test.db2 + execsql { + SELECT sum(x) FROM t1; + SELECT sum(x) FROM t2; + } db2 + } $res + db2 close +} + + +#------------------------------------------------------------------------- +reset_db +do_execsql_test 1.8.1 { + PRAGMA autovacuum = 0; + PRAGMA page_size = 4096; + CREATE TABLE t1(x); + CREATE TABLE t2(x); + WITH s(i) AS ( VALUES(1) UNION ALL SELECT i+1 FROM s WHERE i<1500 ) + INSERT INTO t1 SELECT i FROM s; + WITH s(i) AS ( VALUES(1) UNION ALL SELECT i+1 FROM s WHERE i<1500 ) + INSERT INTO t2 SELECT i FROM s; + + PRAGMA journal_mode = wal2; + PRAGMA journal_size_limit = 10000; + + WITH s(i) AS ( VALUES(1) UNION ALL SELECT i+1 FROM s WHERE i<1500 ) + INSERT INTO t2 SELECT i FROM s; +} {wal2 10000} + +do_test 1.8.2 { + list [file size test.db-wal] [file size test.db-wal2] +} {24752 0} + +do_execsql_test 1.8.3 { PRAGMA user_version = 123 } +do_test 1.8.4 { + list [file size test.db-wal] [file size test.db-wal2] +} {24752 4152} + +do_test 1.8.5 { + hexio_write test.db-wal2 [expr 56+16] 0400 + fix_wal_cksums test.db-wal2 +} {} + +ifcapable oversize_cell_check { + set msg {database disk image is malformed} +} else { + set msg {malformed database schema (?)} +} + +do_test 1.8.6 { + forcecopy test.db test.db2 + forcecopy test.db-wal test.db2-wal + forcecopy test.db-wal2 test.db2-wal2 + sqlite3 db2 test.db2 + catchsql { SELECT * FROM sqlite_master } db2 +} [list 1 $msg] +db2 close + +#------------------------------------------------------------------------- +reset_db +do_execsql_test 1.0 { + CREATE TABLE t1(a, b, c); + CREATE INDEX t1a ON t1(a); + CREATE INDEX t1b ON t1(b); + CREATE INDEX t1c ON t1(c); + PRAGMA journal_mode = wal2; + + INSERT INTO t1 VALUES(randomblob(50), randomblob(50), randomblob(50)); + INSERT INTO t1 VALUES(randomblob(50), randomblob(50), randomblob(50)); + INSERT INTO t1 VALUES(randomblob(50), randomblob(50), randomblob(50)); + PRAGMA journal_size_limit = 5000; + INSERT INTO t1 VALUES(randomblob(50), randomblob(50), randomblob(50)); + INSERT INTO t1 VALUES(randomblob(50), randomblob(50), randomblob(50)); + INSERT INTO t1 VALUES(randomblob(50), randomblob(50), randomblob(50)); + INSERT INTO t1 VALUES(randomblob(50), randomblob(50), randomblob(50)); + INSERT INTO t1 VALUES(randomblob(50), randomblob(50), randomblob(50)); +} {wal2 5000} + +do_test 2.1 { + forcecopy test.db test.db2 + forcecopy test.db-wal2 test.db2-wal + forcecopy test.db-wal test.db2-wal2 + + ifcapable wal2nocksum { + # With wal2nocksum set, each frame checksum is calculated based on the + # previous checksum and the contents of the first 8 bytes of the frame + # header. So to get the behaviour the test requires we need to mess up + # the frame header, not the body. Hence 4224 instead of 5000. + hexio_write test.db2-wal 4224 1234567890 + } else { + hexio_write test.db2-wal 5000 1234567890 + } +} {5} + +do_test 2.2 { + sqlite3 db2 test.db2 + breakpoint + execsql { + SELECT count(*) FROM t1; + PRAGMA integrity_check + } db2 +} {4 ok} + +do_test 2.3 { + execsql { + INSERT INTO t1 VALUES(randomblob(50), randomblob(50), randomblob(50)); + SELECT count(*) FROM t1; + PRAGMA integrity_check + } db2 +} {5 ok} + + +finish_test diff --git a/test/wal2recover3.test b/test/wal2recover3.test new file mode 100644 index 0000000000..eab3011341 --- /dev/null +++ b/test/wal2recover3.test @@ -0,0 +1,52 @@ +# 2022 June 28 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# This file implements regression tests for SQLite library. The +# focus of this file is testing the operation of the library in +# "PRAGMA journal_mode=WAL2" mode. +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl +source $testdir/lock_common.tcl +source $testdir/malloc_common.tcl +source $testdir/wal_common.tcl + +set testprefix wal2recover3 +ifcapable !wal {finish_test ; return } + +do_execsql_test 1.0 { + CREATE TABLE t1(x); + CREATE TABLE t2(x); + PRAGMA journal_mode = wal2; + PRAGMA wal_autocheckpoint = 0; + PRAGMA journal_size_limit = 10000; +} {wal2 0 10000} + +do_execsql_test 1.1 { + WITH s(i) AS ( VALUES(1) UNION ALL SELECT i+1 FROM s WHERE i<1500 ) + INSERT INTO t1 SELECT i FROM s; + WITH s(i) AS ( VALUES(1) UNION ALL SELECT i+1 FROM s WHERE i<1500 ) + INSERT INTO t2 SELECT i FROM s; +} + +db_save_and_close +set fd [open sv_test.db-wal2 r+] +seek $fd 4000 +puts -nonewline $fd 0 +close $fd + +db_restore_and_reopen +do_execsql_test 1.2 { + SELECT sql FROM sqlite_schema; +} {{CREATE TABLE t1(x)} {CREATE TABLE t2(x)}} + +finish_test + diff --git a/test/wal2rewrite.test b/test/wal2rewrite.test new file mode 100644 index 0000000000..7e3b7b17b5 --- /dev/null +++ b/test/wal2rewrite.test @@ -0,0 +1,92 @@ +# 2017 September 19 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# This file implements regression tests for SQLite library. The +# focus of this file is testing the operation of the library in +# "PRAGMA journal_mode=WAL2" mode. +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl +source $testdir/lock_common.tcl +source $testdir/malloc_common.tcl +source $testdir/wal_common.tcl + +set testprefix wal2rewrite +ifcapable !wal {finish_test ; return } + +proc filesize {filename} { + if {[file exists $filename]} { + return [file size $filename] + } + return 0 +} + +foreach {tn jrnlmode} { + 1 wal + 2 wal2 +} { + reset_db + execsql "PRAGMA journal_mode = $jrnlmode" + do_execsql_test $tn.1 { + PRAGMA journal_size_limit = 10000; + PRAGMA cache_size = 5; + PRAGMA wal_autocheckpoint = 10; + + CREATE TABLE t1(a INTEGER PRIMARY KEY, b INTEGER, c BLOB); + CREATE INDEX t1b ON t1(b); + CREATE INDEX t1c ON t1(c); + + WITH s(i) AS ( + SELECT 1 UNION SELECT i+1 FROM s WHERE i<10 + ) + INSERT INTO t1 SELECT i, i, randomblob(800) FROM s; + } {10000 10} + + for {set i 0} {$i < 4} {incr i} { + do_execsql_test $tn.$i.1 { + UPDATE t1 SET c=randomblob(800) WHERE (b%10)==5 AND ($i%2) + } + do_execsql_test $tn.$i.2 { + BEGIN; + UPDATE t1 SET b=b+10, c=randomblob(800); + UPDATE t1 SET b=b+10, c=randomblob(800); + UPDATE t1 SET b=b+10, c=randomblob(800); + UPDATE t1 SET b=b+10, c=randomblob(800); + UPDATE t1 SET b=b+10, c=randomblob(800); + UPDATE t1 SET b=b+10, c=randomblob(800); + UPDATE t1 SET b=b+10, c=randomblob(800); + UPDATE t1 SET b=b+10, c=randomblob(800); + UPDATE t1 SET b=b+10, c=randomblob(800); + UPDATE t1 SET b=b+10, c=randomblob(800); + } + execsql COMMIT + + do_test $tn.$i.3 { expr [filesize test.db-wal] < 100000 } 1 + do_test $tn.$i.4 { expr [filesize test.db-wal2] < 100000 } 1 + + set sum [db eval {SELECT sum(b), md5sum(c) FROM t1}] + + do_test $tn.$i.5 { + foreach f [glob -nocomplain test.db2*] {forcedelete $f} + foreach f [glob -nocomplain test.db*] { + forcecopy $f [string map {test.db test.db2} $f] + } + + sqlite3 db2 test.db2 + db2 eval {SELECT sum(b), md5sum(c) FROM t1} + } $sum + db2 close + } +} + + + +finish_test diff --git a/test/wal2rollback.test b/test/wal2rollback.test new file mode 100644 index 0000000000..91842ee220 --- /dev/null +++ b/test/wal2rollback.test @@ -0,0 +1,62 @@ +# 2017 September 19 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# This file implements regression tests for SQLite library. The +# focus of this file is testing the operation of the library in +# "PRAGMA journal_mode=WAL2" mode. +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl +source $testdir/lock_common.tcl +source $testdir/malloc_common.tcl +source $testdir/wal_common.tcl + +set testprefix wal2rollback +ifcapable !wal {finish_test ; return } + +do_execsql_test 1.0 { + CREATE TABLE t1(a, b, c); + CREATE TABLE t2(a, b, c); + CREATE INDEX i1 ON t1(a); + CREATE INDEX i2 ON t1(b); + PRAGMA journal_mode = wal2; + PRAGMA cache_size = 5; + PRAGMA journal_size_limit = 10000; + WITH s(i) AS ( + SELECT 1 UNION ALL SELECT i+1 FROM s LIMIT 1000 + ) + INSERT INTO t1 SELECT i, i, randomblob(200) FROM s; +} {wal2 10000} + +do_test 1.1 { + expr [file size test.db-wal] > 10000 +} 1 + +do_test 1.2 { + execsql { + BEGIN; + UPDATE t1 SET b=b+1; + INSERT INTO t2 VALUES(1,2,3); + } + expr [file size test.db-wal2] > 10000 +} {1} + +breakpoint +do_execsql_test 1.3 { + ROLLBACK; + SELECT * FROM t2; + SELECT count(*) FROM t1 WHERE a=b; + PRAGMA integrity_check; +} {1000 ok} + + + +finish_test diff --git a/test/wal2savepoint.test b/test/wal2savepoint.test new file mode 100644 index 0000000000..2b09164394 --- /dev/null +++ b/test/wal2savepoint.test @@ -0,0 +1,74 @@ +# 2018 December 13 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# This file implements regression tests for SQLite library. The +# focus of this file is testing the operation of the library in +# "PRAGMA journal_mode=WAL2" mode. +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl +source $testdir/lock_common.tcl +source $testdir/malloc_common.tcl +source $testdir/wal_common.tcl + +set testprefix wal2savepoint +ifcapable !wal {finish_test ; return } + +reset_prng_state +do_execsql_test 1.0 { + CREATE TABLE t1(a, b, c); + CREATE INDEX t1a ON t1(a); + CREATE INDEX t1b ON t1(b); + CREATE INDEX t1c ON t1(c); + PRAGMA journal_mode = wal2; + PRAGMA journal_size_limit = 15000; + PRAGMA wal_autocheckpoint = 0; + PRAGMA cache_size = 5; +} {wal2 15000 0} + +do_execsql_test 1.1 { + WITH s(i) AS ( SELECT 1 UNION ALL SELECT i+1 FROM s where i < 200) + INSERT INTO t1 SELECT random(), random(), random() FROM s; +} {} + +do_test 1.2 { + list [file size test.db] [file size test.db-wal2] \ + [expr [file size test.db-wal]>20000] +} {5120 0 1} + +do_execsql_test 1.3 { + BEGIN; + SAVEPOINT abc; + WITH s(i) AS ( SELECT 1 UNION ALL SELECT i+1 FROM s where i < 100) + INSERT INTO t1 SELECT random(), random(), random() FROM s; + ROLLBACK TO abc; + WITH s(i) AS ( SELECT 1 UNION ALL SELECT i+1 FROM s where i < 10) + INSERT INTO t1 SELECT random(), random(), random() FROM s; + COMMIT; + SELECT count(*) FROM t1; + PRAGMA integrity_check; +} {210 ok} + +do_execsql_test 1.4 { + BEGIN; + SAVEPOINT abc; + WITH s(i) AS ( SELECT 1 UNION ALL SELECT i+1 FROM s where i < 100) + INSERT INTO t1 SELECT random(), random(), random() FROM s; + ROLLBACK TO abc; + WITH s(i) AS ( SELECT 1 UNION ALL SELECT i+1 FROM s where i < 10) + INSERT INTO t1 SELECT random(), random(), random() FROM s; + COMMIT; + SELECT count(*) FROM t1; + PRAGMA integrity_check; +} {220 ok} + + +finish_test diff --git a/test/wal2simple.test b/test/wal2simple.test new file mode 100644 index 0000000000..1a8c1e63bd --- /dev/null +++ b/test/wal2simple.test @@ -0,0 +1,543 @@ +# 2017 September 19 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# This file implements regression tests for SQLite library. The +# focus of this file is testing the operation of the library in +# "PRAGMA journal_mode=WAL2" mode. +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl +source $testdir/lock_common.tcl +source $testdir/malloc_common.tcl +source $testdir/wal_common.tcl + +set testprefix wal2simple +ifcapable !wal {finish_test ; return } + +#------------------------------------------------------------------------- +# The following tests verify that a client can switch in and out of wal +# and wal2 mode. But that it is not possible to change directly from wal +# to wal2, or from wal2 to wal mode. +# +do_execsql_test 1.1.0 { + PRAGMA journal_mode = wal2 +} {wal2} +execsql { SELECT * FROM sqlite_master} +do_execsql_test 1.x { + PRAGMA journal_mode; + PRAGMA main.journal_mode; +} {wal2 wal2} +db close +do_test 1.1.1 { file size test.db } {1024} +do_test 1.1.2 { hexio_read test.db 18 2 } 0303 + +sqlite3 db test.db +do_execsql_test 1.2.0 { + SELECT * FROM sqlite_master; + PRAGMA journal_mode = delete; +} {delete} +db close +do_test 1.2.1 { file size test.db } {1024} +do_test 1.2.2 { hexio_read test.db 18 2 } 0101 + +sqlite3 db test.db +do_execsql_test 1.3.0 { + SELECT * FROM sqlite_master; + PRAGMA journal_mode = wal; +} {wal} +db close +do_test 1.3.1 { file size test.db } {1024} +do_test 1.3.2 { hexio_read test.db 18 2 } 0202 + +sqlite3 db test.db +do_catchsql_test 1.4.0 { + PRAGMA journal_mode = wal2; +} {1 {cannot change from wal to wal2 mode}} +do_execsql_test 1.4.1 { + PRAGMA journal_mode = wal; + PRAGMA journal_mode = delete; + PRAGMA journal_mode = wal2; + PRAGMA journal_mode = wal2; +} {wal delete wal2 wal2} +do_catchsql_test 1.4.2 { + PRAGMA journal_mode = wal; +} {1 {cannot change from wal2 to wal mode}} +db close +do_test 1.4.3 { hexio_read test.db 18 2 } 0303 + +#------------------------------------------------------------------------- +# Test that recovery in wal2 mode works. +# +forcedelete test.db test.db-wal test.db-wal2 +reset_db +do_execsql_test 2.0 { + CREATE TABLE t1(a INTEGER PRIMARY KEY, b); + PRAGMA journal_mode = wal2; + PRAGMA journal_size_limit = 5000; +} {wal2 5000} + +proc wal_hook {DB nm nFrame} { $DB eval { PRAGMA wal_checkpoint } } +db wal_hook {wal_hook db} + +for {set i 1} {$i <= 200} {incr i} { + execsql { INSERT INTO t1 VALUES(NULL, randomblob(100)) } + set res [db eval { SELECT sum(a), md5sum(b) FROM t1 }] + + do_test 2.1.$i { + foreach f [glob -nocomplain test.db2*] { forcedelete $f } + forcecopy test.db test.db2 + forcecopy test.db-wal test.db2-wal + forcecopy test.db-wal2 test.db2-wal2 + + sqlite3 db2 test.db2 + db2 eval { SELECT sum(a), md5sum(b) FROM t1 } + } $res + + db2 close +} + +#------------------------------------------------------------------------- + +reset_db +do_execsql_test 3.0 { + CREATE TABLE t1(x BLOB, y INTEGER PRIMARY KEY); + CREATE INDEX i1 ON t1(x); + PRAGMA cache_size = 5; + PRAGMA journal_mode = wal2; +} {wal2} + +do_test 3.1 { + execsql BEGIN + for {set i 1} {$i < 1000} {incr i} { + execsql { INSERT INTO t1 VALUES(randomblob(800), $i) } + } + execsql COMMIT +} {} + +do_execsql_test 3.2 { + PRAGMA integrity_check; +} {ok} + +#------------------------------------------------------------------------- +catch { db close } +foreach f [glob -nocomplain test.db*] { forcedelete $f } +reset_db +do_execsql_test 4.0 { + CREATE TABLE t1(x, y); + PRAGMA journal_mode = wal2; +} {wal2} + +do_execsql_test 4.1 { + SELECT * FROM t1; +} {} + +do_execsql_test 4.2 { + INSERT INTO t1 VALUES(1, 2); +} {} + +do_execsql_test 4.3 { + SELECT * FROM t1; +} {1 2} + +do_test 4.4 { + sqlite3 db2 test.db + execsql { SELECT * FROM t1 } db2 +} {1 2} + +do_test 4.5 { + lsort [glob test.db*] +} {test.db test.db-shm test.db-wal test.db-wal2} + +do_test 4.6 { + db close + db2 close + sqlite3 db test.db + execsql { SELECT * FROM t1 } +} {1 2} + +do_execsql_test 4.7 { + PRAGMA journal_size_limit = 4000; + INSERT INTO t1 VALUES(3, 4); + INSERT INTO t1 VALUES(5, 6); + INSERT INTO t1 VALUES(7, 8); + INSERT INTO t1 VALUES(9, 10); + INSERT INTO t1 VALUES(11, 12); + INSERT INTO t1 VALUES(13, 14); + INSERT INTO t1 VALUES(15, 16); + INSERT INTO t1 VALUES(17, 18); + SELECT * FROM t1; +} {4000 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18} + +do_test 4.8 { + sqlite3 db2 test.db + execsql { SELECT * FROM t1 } db2 +} {1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18} + +do_test 4.9 { + db close + db2 close + lsort [glob test.db*] +} {test.db} + +#------------------------------------------------------------------------- +reset_db +do_execsql_test 5.0 { + CREATE TABLE t1(a INTEGER PRIMARY KEY, b, c); + CREATE INDEX i1 ON t1(b, c); + PRAGMA journal_mode = wal2; + PRAGMA journal_size_limit = 4000; +} {wal2 4000} + +proc wal_hook {DB nm nFrame} { + $DB eval { PRAGMA wal_checkpoint } +} +db wal_hook [list wal_hook db] + + +foreach js {4000 8000 12000} { + foreach NROW [list 100 200 300 400 500 600 1000] { + do_test 5.$js.$NROW.1 { + db eval "DELETE FROM t1" + db eval "PRAGMA journal_size_limit = $js" + set nTotal 0 + for {set i 0} {$i < $NROW} {incr i} { + db eval { INSERT INTO t1 VALUES($i, $i, randomblob(abs(random()%50))) } + incr nTotal $i + } + set {} {} + } {} + + do_test 5.$js.$NROW.2 { + sqlite3 db2 test.db + db2 eval { + PRAGMA integrity_check; + SELECT count(*), sum(b) FROM t1; + } + } [list ok $NROW $nTotal] + + db2 close + } +} + + +#------------------------------------------------------------------------- +reset_db +do_execsql_test 6.0 { + CREATE TABLE tx(x); + PRAGMA journal_mode = wal2; + PRAGMA journal_size_limit = 3500; +} {wal2 3500} + +do_test 6.1 { + for {set i 0} {$i < 10} {incr i} { + execsql "CREATE TABLE t$i (x);" + } +} {} + +do_test 6.2.1 { + foreach f [glob -nocomplain test.db2*] { forcedelete $f } + forcecopy test.db-wal2 test.db2-wal2 + sqlite3 db2 test.db2 + db2 eval { SELECT * FROM sqlite_master } +} {} +do_test 6.2.2 { + db2 eval { + PRAGMA journal_mode = wal2; + SELECT * FROM sqlite_master; + } +} {wal2} + +do_test 6.3.1 { + db2 close + foreach f [glob -nocomplain test.db2*] { forcedelete $f } + forcecopy test.db-wal2 test.db2-wal2 + forcecopy test.db test.db2 + sqlite3 db2 test.db2 + db2 eval { SELECT * FROM sqlite_master } +} {table tx tx 2 {CREATE TABLE tx(x)}} +do_test 6.3.2 { + db2 eval { + PRAGMA journal_mode = wal2; + SELECT * FROM sqlite_master; + } +} {wal2 table tx tx 2 {CREATE TABLE tx(x)}} + +do_test 6.4.1 { + db2 close + foreach f [glob -nocomplain test.db2*] { forcedelete $f } + forcecopy test.db-wal2 test.db2-wal2 + forcecopy test.db-wal test.db2-wal + sqlite3 db2 test.db2 + db2 eval { SELECT * FROM sqlite_master } +} {} +do_test 6.4.2 { + db2 eval { + PRAGMA journal_mode = wal2; + SELECT * FROM sqlite_master; + } +} {wal2} +db2 close + +#------------------------------------------------------------------------- +reset_db +sqlite3 db2 test.db +do_execsql_test 7.0 { + PRAGMA journal_size_limit = 10000; + PRAGMA journal_mode = wal2; + PRAGMA wal_autocheckpoint = 0; + BEGIN; + CREATE TABLE t1(a); + INSERT INTO t1 VALUES( randomblob(8000) ); + COMMIT; +} {10000 wal2 0} + +do_test 7.1 { + list [file size test.db-wal] [file size test.db-wal2] +} {9464 0} + +# Connection db2 is holding a PART1 lock. +# +# 7.2.2: Test that the PART1 does not prevent db from switching to the +# other wal file. +# +# 7.2.3: Test that the PART1 does prevent a checkpoint of test.db-wal. +# +# 7.2.4: Test that after the PART1 is released the checkpoint is possible. +# +do_test 7.2.1 { + execsql { + BEGIN; + SELECT count(*) FROM t1; + } db2 +} {1} +do_test 7.2.2 { + execsql { + INSERT INTO t1 VALUES( randomblob(800) ); + INSERT INTO t1 VALUES( randomblob(800) ); + } + list [file size test.db-wal] [file size test.db-wal2] [file size test.db] +} {13656 3176 1024} +do_test 7.2.3 { + execsql { PRAGMA wal_checkpoint } + list [file size test.db-wal] [file size test.db-wal2] [file size test.db] +} {13656 3176 1024} +do_test 7.2.4 { + execsql { END } db2 + execsql { PRAGMA wal_checkpoint } + list [file size test.db-wal] [file size test.db-wal2] [file size test.db] +} {13656 3176 11264} + +# Connection db2 is holding a PART2_FULL1 lock. +# +# 7.3.2: Test that the lock does not prevent checkpointing. +# +# 7.3.3: Test that the lock does prevent the writer from overwriting +# test.db-wal. +# +# 7.3.4: Test that after the PART2_FULL1 is released the writer can +# switch wal files and overwrite test.db-wal +# +db close +db2 close +sqlite3 db test.db +sqlite3 db2 test.db +do_test 7.3.1 { + execsql { + PRAGMA wal_autocheckpoint = 0; + PRAGMA journal_size_limit = 10000; + INSERT INTO t1 VALUES(randomblob(10000)); + INSERT INTO t1 VALUES(randomblob(500)); + } + execsql { + BEGIN; + SELECT count(*) FROM t1; + } db2 + list [file size test.db-wal] [file size test.db-wal2] [file size test.db] +} {12608 3176 12288} +do_test 7.3.2 { + execsql { PRAGMA wal_checkpoint } + list [file size test.db-wal] [file size test.db-wal2] [file size test.db] +} {12608 3176 22528} +do_test 7.3.3 { + execsql { + INSERT INTO t1 VALUES(randomblob(10000)); + INSERT INTO t1 VALUES(randomblob(500)); + } + list [file size test.db-wal] [file size test.db-wal2] [file size test.db] +} {12608 18896 22528} +do_test 7.3.4 { + execsql END db2 + execsql { INSERT INTO t1 VALUES(randomblob(5000)); } + list [file size test.db-wal] [file size test.db-wal2] [file size test.db] +} {12608 18896 22528} + +# Connection db2 is holding a PART2 lock. +# +# 7.4.2: Test that the lock does not prevent writer switching to test.db-wal. +# +# 7.3.3: Test that the lock does prevent checkpointing of test.db-wal2. +# +# 7.3.4: Test that after the PART2 is released test.db-wal2 can be +# checkpointed. +# +db close +db2 close +breakpoint +sqlite3 db test.db +sqlite3 db2 test.db +do_test 7.4.1 { + execsql { + PRAGMA wal_autocheckpoint = 0; + PRAGMA journal_size_limit = 10000; + INSERT INTO t1 VALUES(randomblob(10000)); + INSERT INTO t1 VALUES(randomblob(10000)); + PRAGMA wal_checkpoint; + } + execsql { + BEGIN; + SELECT count(*) FROM t1; + } db2 + list [file size test.db-wal] [file size test.db-wal2] [file size test.db] +} {12608 12608 50176} +do_test 7.4.2 { + execsql { + INSERT INTO t1 VALUES(randomblob(5000)); + } + list [file size test.db-wal] [file size test.db-wal2] [file size test.db] +} {12608 12608 50176} +do_test 7.4.3 { + execsql { PRAGMA wal_checkpoint } + list [file size test.db-wal] [file size test.db-wal2] [file size test.db] +} {12608 12608 50176} +do_test 7.4.4 { + execsql END db2 + execsql { PRAGMA wal_checkpoint } + list [file size test.db-wal] [file size test.db-wal2] [file size test.db] +} {12608 12608 60416} + +# Connection db2 is holding a PART1_FULL2 lock. +# +# 7.5.2: Test that the lock does not prevent a checkpoint of test.db-wal2. +# +# 7.5.3: Test that the lock does prevent the writer from overwriting +# test.db-wal2. +# +# 7.5.4: Test that after the PART1_FULL2 lock is released, the writer +# can switch to test.db-wal2. +# +db close +db2 close +sqlite3 db test.db +sqlite3 db2 test.db +do_test 7.5.1 { + execsql { + PRAGMA wal_autocheckpoint = 0; + PRAGMA journal_size_limit = 10000; + INSERT INTO t1 VALUES(randomblob(10000)); + INSERT INTO t1 VALUES(randomblob(10000)); + PRAGMA wal_checkpoint; + INSERT INTO t1 VALUES(randomblob(5000)); + } + execsql { + BEGIN; + SELECT count(*) FROM t1; + } db2 + list [file size test.db-wal] [file size test.db-wal2] [file size test.db] +} {12608 12608 76800} +do_test 7.5.2 { + execsql { PRAGMA wal_checkpoint } + list [file size test.db-wal] [file size test.db-wal2] [file size test.db] +} {12608 12608 87040} +do_test 7.5.3.1 { + execsql { INSERT INTO t1 VALUES(randomblob(5000)) } + list [file size test.db-wal] [file size test.db-wal2] [file size test.db] +} {14704 12608 87040} +do_test 7.5.3.2 { + execsql { INSERT INTO t1 VALUES(randomblob(5000)) } + list [file size test.db-wal] [file size test.db-wal2] [file size test.db] +} {22040 12608 87040} +do_test 7.5.4 { + execsql END db2 + execsql { INSERT INTO t1 VALUES(randomblob(5000)) } + list [file size test.db-wal] [file size test.db-wal2] [file size test.db] +} {22040 12608 87040} + +#------------------------------------------------------------------------- +reset_db +do_execsql_test 8.0 { + PRAGMA journal_size_limit = 10000; + PRAGMA journal_mode = wal2; + CREATE TABLE t1(x); + INSERT INTO t1 VALUES( hex( randomblob(5000) ) ); + INSERT INTO t1 VALUES( hex( randomblob(5000) ) ); + INSERT INTO t1 VALUES( hex( randomblob(5000) ) ); + INSERT INTO t1 VALUES( hex( randomblob(5000) ) ); + BEGIN; + INSERT INTO t1 VALUES( hex( randomblob(5000) ) ); +} {10000 wal2} + +sqlite3 db2 test.db +do_execsql_test -db db2 8.1 { + PRAGMA wal_checkpoint; +} {0 50 13} + +do_execsql_test 8.2 { + COMMIT; +} + +db2 close + + +#------------------------------------------------------------------------- +reset_db +do_execsql_test 9.0 { + PRAGMA journal_size_limit = 10000; + PRAGMA journal_mode = wal2; + CREATE TABLE t1(x); + INSERT INTO t1 VALUES( hex( randomblob(5000) ) ); + INSERT INTO t1 VALUES( hex( randomblob(5000) ) ); + INSERT INTO t1 VALUES( hex( randomblob(5000) ) ); + INSERT INTO t1 VALUES( hex( randomblob(5000) ) ); +} {10000 wal2} + +do_execsql_test 9.1 { + PRAGMA wal_checkpoint; +} {0 50 13} + + +#------------------------------------------------------------------------- +# Check that it is possible to do a non-PASSIVE checkpoint on a wal2 +# db without blocking writers. +# +reset_db +do_execsql_test 10.0 { + PRAGMA journal_size_limit = 10000; + PRAGMA journal_mode = wal2; + CREATE TABLE t1(x); + INSERT INTO t1 VALUES( hex( randomblob(5000) ) ); + INSERT INTO t1 VALUES( hex( randomblob(5000) ) ); + INSERT INTO t1 VALUES( hex( randomblob(5000) ) ); + INSERT INTO t1 VALUES( hex( randomblob(5000) ) ); + BEGIN; + INSERT INTO t1 VALUES( hex( randomblob(5000) ) ); +} {10000 wal2} + +sqlite3 db2 test.db +do_execsql_test -db db2 10.1 { + PRAGMA wal_checkpoint = FULL; +} {0 50 13} + +do_execsql_test 10.2 { + COMMIT; +} + +finish_test + diff --git a/test/wal2snapshot.test b/test/wal2snapshot.test new file mode 100644 index 0000000000..c4331f6702 --- /dev/null +++ b/test/wal2snapshot.test @@ -0,0 +1,94 @@ +# 2018 December 5 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# This file implements regression tests for SQLite library. The +# focus of this file is testing the operation of the library in +# "PRAGMA journal_mode=WAL2" mode. +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl + +set testprefix wal2snapshot +ifcapable !wal {finish_test ; return } +ifcapable !snapshot {finish_test; return} + +foreach {tn mode} {1 wal 2 wal2} { + reset_db + do_execsql_test $tn.1 "PRAGMA journal_mode = $mode" $mode + + do_execsql_test $tn.2 { + CREATE TABLE t1(a, b); + INSERT INTO t1 VALUES(1, 2); + INSERT INTO t1 VALUES(3, 4); + BEGIN; + } + + # Check that sqlite3_snapshot_get() is an error for a wal2 db. + # + if {$tn==1} { + do_test 1.3 { + set S [sqlite3_snapshot_get db main] + sqlite3_snapshot_free $S + } {} + } else { + do_test 2.3 { + list [catch { sqlite3_snapshot_get db main } msg] $msg + } {1 SQLITE_ERROR} + } + + # Check that sqlite3_snapshot_recover() is an error for a wal2 db. + # + do_execsql_test $tn.4 COMMIT + if {$tn==1} { + do_test 1.5 { + sqlite3_snapshot_recover db main + } {} + } else { + do_test 2.5 { + list [catch { sqlite3_snapshot_recover db main } msg] $msg + } {1 SQLITE_ERROR} + } + + # Check that sqlite3_snapshot_open() is an error for a wal2 db. + # + if {$tn==1} { + do_test 1.6 { + execsql BEGIN + set SNAPSHOT [sqlite3_snapshot_get_blob db main] + sqlite3_snapshot_open_blob db main $SNAPSHOT + execsql COMMIT + } {} + } else { + + do_test 2.6.1 { + execsql BEGIN + set res [ + list [catch { sqlite3_snapshot_open_blob db main $SNAPSHOT } msg] $msg + ] + execsql COMMIT + set res + } {1 SQLITE_ERROR} + do_test 2.6.2 { + execsql BEGIN + execsql {SELECT * FROM sqlite_master} + set res [ + list [catch { sqlite3_snapshot_open_blob db main $SNAPSHOT } msg] $msg + ] + execsql COMMIT + set res + } {1 SQLITE_ERROR} + } +} + + +finish_test + + diff --git a/test/wal64k.test b/test/wal64k.test index bacb14328a..5b17580e42 100644 --- a/test/wal64k.test +++ b/test/wal64k.test @@ -17,7 +17,7 @@ set testdir [file dirname $argv0] source $testdir/tester.tcl set testprefix wal64k -ifcapable !wal {finish_test ; return } +ifcapable !wal||wal_bighash {finish_test ; return } if {[llength [info commands test_syscall]]==0} { finish_test diff --git a/test/walckptnoop.test b/test/walckptnoop.test index 89055316ff..3a25b1a515 100644 --- a/test/walckptnoop.test +++ b/test/walckptnoop.test @@ -112,3 +112,114 @@ do_execsql_test 1.10 { } {delete 0 -1 -1} finish_test +||||||| COMMON ANCESTOR content follows ||||||||||||||||||||||||| (line 1) +# 2025 September 5 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# This file implements regression tests for SQLite library. The +# focus of this file is testing the operation of the library in +# "PRAGMA wal_checkpoint = noop" mode. +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl +source $testdir/lock_common.tcl +source $testdir/malloc_common.tcl +source $testdir/wal_common.tcl + +set testprefix walckptnoop + +ifcapable !wal {finish_test ; return } + +set VAL 123 + +proc myrand {} { + global VAL + + set A 1103515245 + set C 12345 + set M 2147483648 + + set VAL [expr {($A * $VAL + $C) % $M}] + return $VAL +} + +proc myrandomblob {n} { + set l [list] + for {set i 0} {$i < $n} {incr i} { + lappend l [expr [myrand] % 256] + } + binary format c* $l +} + +db func myrandomblob myrandomblob + + +do_execsql_test 1.0 { + PRAGMA page_size=1024; + PRAGMA auto_vacuum=NONE; + PRAGMA secure_delete=OFF; + VACUUM; + CREATE TABLE t1(x INTEGER PRIMARY KEY, y TEXT); + CREATE INDEX i1 ON t1(y); + PRAGMA journal_mode = wal; + + WITH s(i) AS ( + SELECT 1 UNION ALL SELECT i+1 FROM s WHERE i<1000 + ) + INSERT INTO t1 SELECT NULL, hex(myrandomblob(64)) FROM s; +} {0 wal} + +do_execsql_test 1.1 { + PRAGMA wal_checkpoint = noop; +} {0 298 0} +do_execsql_test 1.2 { + PRAGMA wal_checkpoint = noop; +} {0 298 0} +do_execsql_test 1.3 { + PRAGMA wal_checkpoint = passive; +} {0 298 298} +do_execsql_test 1.4 { + PRAGMA wal_checkpoint = noop; +} {0 298 298} + +db_save_and_close +db_restore_and_reopen +do_execsql_test 1.5 { + PRAGMA wal_checkpoint = noop; +} {0 298 0} + +db close +sqlite3 db test.db +db eval { + PRAGMA auto_vacuum=NONE; + PRAGMA secure_delete=OFF; +} +do_execsql_test 1.6 { + PRAGMA wal_checkpoint = noop; +} {0 0 0} + +do_catchsql_test 1.7 { + BEGIN; + DELETE FROM t1; + PRAGMA wal_checkpoint = noop; +} {1 {database table is locked}} + +do_catchsql_test 1.8 { + COMMIT; + PRAGMA wal_checkpoint = noop; +} {0 {0 5 0}} + +do_execsql_test 1.9 { + PRAGMA journal_mode = delete; + PRAGMA wal_checkpoint = noop; +} {delete 0 -1 -1} + +finish_test diff --git a/test/walprotocol2.test b/test/walprotocol2.test index 0792c9aae0..389ccb5528 100644 --- a/test/walprotocol2.test +++ b/test/walprotocol2.test @@ -85,7 +85,7 @@ proc lock_callback {method filename handle lock} { db2 eval { INSERT INTO x VALUES('x') } } } -db timeout 10 +db timeout 1100 do_catchsql_test 2.4 { BEGIN EXCLUSIVE; } {0 {}} diff --git a/test/walrofault.test b/test/walrofault.test index 3e66e2d920..76f7bebffa 100644 --- a/test/walrofault.test +++ b/test/walrofault.test @@ -55,6 +55,4 @@ do_faultsim_test 1 -faults oom* -prep { faultsim_test_result {0 {hello world ! world hello}} } - - finish_test diff --git a/tool/mkctimec.tcl b/tool/mkctimec.tcl index 64d4a121a8..76e7e1b68a 100755 --- a/tool/mkctimec.tcl +++ b/tool/mkctimec.tcl @@ -394,6 +394,8 @@ set options(THREADSAFE) { #endif } +set options(WAL2) { "WAL2", } + proc trim_name {in} { set ret $in if {[string range $in 0 6]=="SQLITE_"} { diff --git a/tool/mkpragmatab.tcl b/tool/mkpragmatab.tcl index 70988cf82e..ff18de5638 100644 --- a/tool/mkpragmatab.tcl +++ b/tool/mkpragmatab.tcl @@ -115,6 +115,12 @@ set pragma_def { IF: !defined(SQLITE_OMIT_FLAG_PRAGMAS) IF: defined(SQLITE_DEBUG) + NAME: noop_update + TYPE: FLAG + ARG: SQLITE_NoopUpdate + IF: !defined(SQLITE_OMIT_FLAG_PRAGMAS) + IF: defined(SQLITE_ENABLE_NOOP_UPDATE) + NAME: ignore_check_constraints TYPE: FLAG ARG: SQLITE_IgnoreChecks diff --git a/tool/tserver.c b/tool/tserver.c new file mode 100644 index 0000000000..3c0768714b --- /dev/null +++ b/tool/tserver.c @@ -0,0 +1,643 @@ +/* +** 2017 June 7 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** +** Simple multi-threaded server used for informal testing of concurrency +** between connections in different threads. Listens for tcp/ip connections +** on port 9999 of the 127.0.0.1 interface only. To build: +** +** gcc -g $(TOP)/tool/tserver.c sqlite3.o -lpthread -o tserver +** +** To run using "x.db" as the db file: +** +** ./tserver x.db +** +** To connect, open a client socket on port 9999 and start sending commands. +** Commands are either SQL - which must be terminated by a semi-colon, or +** dot-commands, which must be terminated by a newline. If an SQL statement +** is seen, it is prepared and added to an internal list. +** +** Dot-commands are: +** +** .list Display all SQL statements in the list. +** .quit Disconnect. +** .run Run all SQL statements in the list. +** .repeats N Configure the number of repeats per ".run". +** .seconds N Configure the number of seconds to ".run" for. +** .mutex_commit Add a "COMMIT" protected by a g.commit_mutex +** to the current SQL. +** .stop Stop the tserver process - exit(0). +** .checkpoint N +** .integrity_check +** +** Example input: +** +** BEGIN; +** INSERT INTO t1 VALUES(randomblob(10), randomblob(100)); +** INSERT INTO t1 VALUES(randomblob(10), randomblob(100)); +** INSERT INTO t1 VALUES(randomblob(10), randomblob(100)); +** COMMIT; +** .repeats 100000 +** .run +** +*/ +#define TSERVER_PORTNUMBER 9999 + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "sqlite3.h" + +#define TSERVER_DEFAULT_CHECKPOINT_THRESHOLD 3900 + +/* Global variables */ +struct TserverGlobal { + char *zDatabaseName; /* Database used by this server */ + char *zVfs; + sqlite3_mutex *commit_mutex; + sqlite3 *db; /* Global db handle */ + + /* The following use native pthreads instead of a portable interface. This + ** is because a condition variable, as well as a mutex, is required. */ + pthread_mutex_t ckpt_mutex; + pthread_cond_t ckpt_cond; + int nThreshold; /* Checkpoint when wal is this large */ + int bCkptRequired; /* True if wal checkpoint is required */ + int nRun; /* Number of clients in ".run" */ + int nWait; /* Number of clients waiting on ckpt_cond */ +}; + +static struct TserverGlobal g = {0}; + +typedef struct ClientSql ClientSql; +struct ClientSql { + sqlite3_stmt *pStmt; + int flags; +}; + +#define TSERVER_CLIENTSQL_MUTEX 0x0001 +#define TSERVER_CLIENTSQL_INTEGRITY 0x0002 + +typedef struct ClientCtx ClientCtx; +struct ClientCtx { + sqlite3 *db; /* Database handle for this client */ + int fd; /* Client fd */ + int nRepeat; /* Number of times to repeat SQL */ + int nSecond; /* Number of seconds to run for */ + ClientSql *aPrepare; /* Array of prepared statements */ + int nPrepare; /* Valid size of apPrepare[] */ + int nAlloc; /* Allocated size of apPrepare[] */ + + int nClientThreshold; /* Threshold for checkpointing */ + int bClientCkptRequired; /* True to do a checkpoint */ +}; + +static int is_eol(int i){ + return (i=='\n' || i=='\r'); +} +static int is_whitespace(int i){ + return (i==' ' || i=='\t' || is_eol(i)); +} + +/* +** Implementation of SQL scalar function usleep(). +*/ +static void usleepFunc( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + int nUs; + sqlite3_vfs *pVfs = (sqlite3_vfs*)sqlite3_user_data(context); + assert( argc==1 ); + nUs = sqlite3_value_int64(argv[0]); + pVfs->xSleep(pVfs, nUs); +} + +static void trim_string(const char **pzStr, int *pnStr){ + const char *zStr = *pzStr; + int nStr = *pnStr; + + while( nStr>0 && is_whitespace(zStr[0]) ){ + zStr++; + nStr--; + } + while( nStr>0 && is_whitespace(zStr[nStr-1]) ){ + nStr--; + } + + *pzStr = zStr; + *pnStr = nStr; +} + +static int send_message(ClientCtx *p, const char *zFmt, ...){ + char *zMsg; + va_list ap; /* Vararg list */ + va_start(ap, zFmt); + int res = -1; + + zMsg = sqlite3_vmprintf(zFmt, ap); + if( zMsg ){ + res = write(p->fd, zMsg, strlen(zMsg)); + } + sqlite3_free(zMsg); + va_end(ap); + + return (res<0); +} + +static int handle_some_sql(ClientCtx *p, const char *zSql, int nSql){ + const char *zTail = zSql; + int nTail = nSql; + int rc = SQLITE_OK; + + while( rc==SQLITE_OK ){ + if( p->nPrepare>=p->nAlloc ){ + int nByte = (p->nPrepare+32) * sizeof(ClientSql); + ClientSql *aNew = sqlite3_realloc(p->aPrepare, nByte); + if( aNew ){ + memset(&aNew[p->nPrepare], 0, sizeof(ClientSql)*32); + p->aPrepare = aNew; + p->nAlloc = p->nPrepare+32; + }else{ + rc = SQLITE_NOMEM; + break; + } + } + rc = sqlite3_prepare_v2( + p->db, zTail, nTail, &p->aPrepare[p->nPrepare].pStmt, &zTail + ); + if( rc!=SQLITE_OK ){ + send_message(p, "error - %s (eec=%d)\n", sqlite3_errmsg(p->db), + sqlite3_extended_errcode(p->db) + ); + rc = 1; + break; + } + if( p->aPrepare[p->nPrepare].pStmt==0 ){ + break; + } + p->nPrepare++; + nTail = nSql - (zTail-zSql); + rc = send_message(p, "ok (%d SQL statements)\n", p->nPrepare); + } + + return rc; +} + +/* +** Return a micro-seconds resolution timer. +*/ +static sqlite3_int64 get_timer(void){ + struct timeval t; + gettimeofday(&t, 0); + return (sqlite3_int64)t.tv_usec + ((sqlite3_int64)t.tv_sec * 1000000); +} + +static void clear_sql(ClientCtx *p){ + int j; + for(j=0; jnPrepare; j++){ + sqlite3_finalize(p->aPrepare[j].pStmt); + } + p->nPrepare = 0; +} + +/* +** The sqlite3_wal_hook() callback used by all client database connections. +*/ +static int clientWalHook(void *pArg, sqlite3 *db, const char *zDb, int nFrame){ + if( g.nThreshold>0 ){ + if( nFrame>=g.nThreshold ){ + g.bCkptRequired = 1; + } + }else{ + ClientCtx *pCtx = (ClientCtx*)pArg; + if( pCtx->nClientThreshold && nFrame>=pCtx->nClientThreshold ){ + pCtx->bClientCkptRequired = 1; + } + } + return SQLITE_OK; +} + +static int handle_run_command(ClientCtx *p){ + int i, j; + int nBusy = 0; + sqlite3_int64 t0 = get_timer(); + sqlite3_int64 t1 = t0; + sqlite3_int64 tCommit = 0; + int nT1 = 0; + int nTBusy1 = 0; + int rc = SQLITE_OK; + + pthread_mutex_lock(&g.ckpt_mutex); + g.nRun++; + pthread_mutex_unlock(&g.ckpt_mutex); + + for(j=0; (p->nRepeat<=0 || jnRepeat) && rc==SQLITE_OK; j++){ + sqlite3_int64 t2; + + for(i=0; inPrepare && rc==SQLITE_OK; i++){ + sqlite3_stmt *pStmt = p->aPrepare[i].pStmt; + + /* If the MUTEX flag is set, grab g.commit_mutex before executing + ** the SQL statement (which is always "COMMIT" in this case). */ + if( p->aPrepare[i].flags & TSERVER_CLIENTSQL_MUTEX ){ + sqlite3_mutex_enter(g.commit_mutex); + tCommit -= get_timer(); + } + + /* Execute the statement */ + if( p->aPrepare[i].flags & TSERVER_CLIENTSQL_INTEGRITY ){ + sqlite3_step(pStmt); + if( sqlite3_stricmp("ok", (const char*)sqlite3_column_text(pStmt, 0)) ){ + send_message(p, "error - integrity_check failed: %s\n", + sqlite3_column_text(pStmt, 0) + ); + } + sqlite3_reset(pStmt); + } + while( sqlite3_step(pStmt)==SQLITE_ROW ); + rc = sqlite3_reset(pStmt); + + /* Relinquish the g.commit_mutex mutex if required. */ + if( p->aPrepare[i].flags & TSERVER_CLIENTSQL_MUTEX ){ + tCommit += get_timer(); + sqlite3_mutex_leave(g.commit_mutex); + } + + if( (rc & 0xFF)==SQLITE_BUSY ){ + if( sqlite3_get_autocommit(p->db)==0 ){ + sqlite3_exec(p->db, "ROLLBACK", 0, 0, 0); + } + nBusy++; + rc = SQLITE_OK; + break; + } + else if( rc!=SQLITE_OK ){ + send_message(p, "error - %s (eec=%d)\n", sqlite3_errmsg(p->db), + sqlite3_extended_errcode(p->db) + ); + } + } + + t2 = get_timer(); + if( t2>=(t1+1000000) ){ + sqlite3_int64 nUs = (t2 - t1); + sqlite3_int64 nDone = (j+1 - nBusy - nT1); + + rc = send_message( + p, "(%d done @ %lld per second, %d busy)\n", + (int)nDone, (1000000*nDone + nUs/2) / nUs, nBusy - nTBusy1 + ); + t1 = t2; + nT1 = j+1 - nBusy; + nTBusy1 = nBusy; + if( p->nSecond>0 && ((sqlite3_int64)p->nSecond*1000000)<=t1-t0 ) break; + } + + /* Global checkpoint handling. */ + if( g.nThreshold>0 ){ + pthread_mutex_lock(&g.ckpt_mutex); + if( rc==SQLITE_OK && g.bCkptRequired ){ + if( g.nWait==g.nRun-1 ){ + /* All other clients are already waiting on the condition variable. + ** Run the checkpoint, signal the condition and move on. */ + rc = sqlite3_wal_checkpoint(p->db, "main"); + g.bCkptRequired = 0; + pthread_cond_broadcast(&g.ckpt_cond); + }else{ + assert( g.nWaitbClientCkptRequired ){ + rc = sqlite3_wal_checkpoint(p->db, "main"); + if( rc==SQLITE_BUSY ) rc = SQLITE_OK; + assert( rc==SQLITE_OK ); + p->bClientCkptRequired = 0; + } + } + + if( rc==SQLITE_OK ){ + int nMs = (get_timer() - t0) / 1000; + send_message(p, "ok (%d/%d SQLITE_BUSY)\n", nBusy, j); + if( p->nRepeat<=0 ){ + send_message(p, "### ok %d busy %d ms %d commit-ms %d\n", + j-nBusy, nBusy, nMs, (int)(tCommit / 1000) + ); + } + } + clear_sql(p); + + pthread_mutex_lock(&g.ckpt_mutex); + g.nRun--; + pthread_mutex_unlock(&g.ckpt_mutex); + + return rc; +} + +static int handle_dot_command(ClientCtx *p, const char *zCmd, int nCmd){ + int n; + int rc = 0; + const char *z = &zCmd[1]; + const char *zArg; + int nArg; + + assert( zCmd[0]=='.' ); + for(n=0; n<(nCmd-1); n++){ + if( is_whitespace(z[n]) ) break; + } + + zArg = &z[n]; + nArg = nCmd-n; + trim_string(&zArg, &nArg); + + if( n>=1 && n<=4 && 0==strncmp(z, "list", n) ){ + int i; + for(i=0; rc==0 && inPrepare; i++){ + const char *zSql = sqlite3_sql(p->aPrepare[i].pStmt); + int nSql = strlen(zSql); + trim_string(&zSql, &nSql); + rc = send_message(p, "%d: %.*s\n", i, nSql, zSql); + } + } + + else if( n>=1 && n<=4 && 0==strncmp(z, "quit", n) ){ + rc = -1; + } + + else if( n>=2 && n<=7 && 0==strncmp(z, "repeats", n) ){ + if( nArg ){ + p->nRepeat = strtol(zArg, 0, 0); + if( p->nRepeat>0 ) p->nSecond = 0; + } + rc = send_message(p, "ok (repeat=%d)\n", p->nRepeat); + } + + else if( n>=2 && n<=3 && 0==strncmp(z, "run", n) ){ + rc = handle_run_command(p); + } + + else if( n>=2 && n<=7 && 0==strncmp(z, "seconds", n) ){ + if( nArg ){ + p->nSecond = strtol(zArg, 0, 0); + if( p->nSecond>0 ) p->nRepeat = 0; + } + rc = send_message(p, "ok (seconds=%d)\n", p->nSecond); + } + + else if( n>=1 && n<=12 && 0==strncmp(z, "mutex_commit", n) ){ + rc = handle_some_sql(p, "COMMIT;", 7); + if( rc==SQLITE_OK ){ + p->aPrepare[p->nPrepare-1].flags |= TSERVER_CLIENTSQL_MUTEX; + } + } + + else if( n>=1 && n<=10 && 0==strncmp(z, "checkpoint", n) ){ + if( nArg ){ + p->nClientThreshold = strtol(zArg, 0, 0); + } + rc = send_message(p, "ok (checkpoint=%d)\n", p->nClientThreshold); + } + + else if( n>=2 && n<=4 && 0==strncmp(z, "stop", n) ){ + sqlite3_close(g.db); + exit(0); + } + + else if( n>=2 && n<=15 && 0==strncmp(z, "integrity_check", n) ){ + rc = handle_some_sql(p, "PRAGMA integrity_check;", 23); + if( rc==SQLITE_OK ){ + p->aPrepare[p->nPrepare-1].flags |= TSERVER_CLIENTSQL_INTEGRITY; + } + } + + else{ + send_message(p, + "unrecognized dot command: %.*s\n" + "should be \"list\", \"run\", \"repeats\", \"mutex_commit\", " + "\"checkpoint\", \"integrity_check\" or \"seconds\"\n", n, z + ); + rc = 1; + } + + return rc; +} + +static void *handle_client(void *pArg){ + char zCmd[32*1024]; /* Read buffer */ + int nCmd = 0; /* Valid bytes in zCmd[] */ + int res; /* Result of read() call */ + int rc = SQLITE_OK; + + ClientCtx ctx; + memset(&ctx, 0, sizeof(ClientCtx)); + + ctx.fd = (int)(intptr_t)pArg; + ctx.nRepeat = 1; + rc = sqlite3_open_v2(g.zDatabaseName, &ctx.db, + SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE, g.zVfs + ); + if( rc!=SQLITE_OK ){ + fprintf(stderr, "sqlite3_open(): %s\n", sqlite3_errmsg(ctx.db)); + return 0; + } + sqlite3_create_function( + ctx.db, "usleep", 1, SQLITE_UTF8, (void*)sqlite3_vfs_find(0), + usleepFunc, 0, 0 + ); + + /* Register the wal-hook with the new client connection */ + sqlite3_wal_hook(ctx.db, clientWalHook, (void*)&ctx); + + while( rc==SQLITE_OK ){ + int i; + int iStart; + int nConsume; + res = read(ctx.fd, &zCmd[nCmd], sizeof(zCmd)-nCmd-1); + if( res<=0 ) break; + nCmd += res; + if( nCmd>=sizeof(zCmd)-1 ){ + fprintf(stderr, "oversized (>32KiB) message\n"); + res = 0; + break; + } + zCmd[nCmd] = '\0'; + + do { + nConsume = 0; + + /* Gobble up any whitespace */ + iStart = 0; + while( is_whitespace(zCmd[iStart]) ) iStart++; + + if( zCmd[iStart]=='.' ){ + /* This is a dot-command. Search for end-of-line. */ + for(i=iStart; i0 ){ + nCmd = nCmd-nConsume; + if( nCmd>0 ){ + memmove(zCmd, &zCmd[nConsume], nCmd); + } + } + }while( rc==SQLITE_OK && nConsume>0 ); + } + + fprintf(stdout, "Client %d disconnects (rc=%d)\n", ctx.fd, rc); + fflush(stdout); + close(ctx.fd); + clear_sql(&ctx); + sqlite3_free(ctx.aPrepare); + sqlite3_close(ctx.db); + return 0; +} + +static void usage(const char *zExec){ + fprintf(stderr, "Usage: %s ?-vfs VFS? DATABASE\n", zExec); + exit(1); +} + +int main(int argc, char *argv[]) { + int sfd; + int rc; + int yes = 1; + struct sockaddr_in server; + int i; + + /* Ignore SIGPIPE. Otherwise the server exits if a client disconnects + ** abruptly. */ + signal(SIGPIPE, SIG_IGN); + + g.nThreshold = TSERVER_DEFAULT_CHECKPOINT_THRESHOLD; + if( (argc%2) ) usage(argv[0]); + for(i=1; i<(argc-1); i+=2){ + int n = strlen(argv[i]); + if( n>=2 && 0==sqlite3_strnicmp("-walautocheckpoint", argv[i], n) ){ + g.nThreshold = strtol(argv[i+1], 0, 0); + }else + if( n>=2 && 0==sqlite3_strnicmp("-vfs", argv[i], n) ){ + g.zVfs = argv[i+1]; + } + } + g.zDatabaseName = argv[argc-1]; + + g.commit_mutex = sqlite3_mutex_alloc(SQLITE_MUTEX_FAST); + pthread_mutex_init(&g.ckpt_mutex, 0); + pthread_cond_init(&g.ckpt_cond, 0); + + rc = sqlite3_open_v2(g.zDatabaseName, &g.db, + SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE, g.zVfs + ); + if( rc!=SQLITE_OK ){ + fprintf(stderr, "sqlite3_open(): %s\n", sqlite3_errmsg(g.db)); + return 1; + } + + rc = sqlite3_exec(g.db, "SELECT * FROM sqlite_master", 0, 0, 0); + if( rc!=SQLITE_OK ){ + fprintf(stderr, "sqlite3_exec(): %s\n", sqlite3_errmsg(g.db)); + return 1; + } + + sfd = socket(AF_INET, SOCK_STREAM, 0); + if( sfd<0 ){ + fprintf(stderr, "socket() failed\n"); + return 1; + } + + rc = setsockopt(sfd, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)); + if( rc<0 ){ + perror("setsockopt"); + return 1; + } + + memset(&server, 0, sizeof(server)); + server.sin_family = AF_INET; + server.sin_addr.s_addr = inet_addr("127.0.0.1"); + server.sin_port = htons(TSERVER_PORTNUMBER); + + rc = bind(sfd, (struct sockaddr *)&server, sizeof(struct sockaddr)); + if( rc<0 ){ + fprintf(stderr, "bind() failed\n"); + return 1; + } + + rc = listen(sfd, 8); + if( rc<0 ){ + fprintf(stderr, "listen() failed\n"); + return 1; + } + + while( 1 ){ + pthread_t tid; + int cfd = accept(sfd, NULL, NULL); + if( cfd<0 ){ + perror("accept()"); + return 1; + } + + fprintf(stdout, "Client %d connects\n", cfd); + fflush(stdout); + rc = pthread_create(&tid, NULL, handle_client, (void*)(intptr_t)cfd); + if( rc!=0 ){ + perror("pthread_create()"); + return 1; + } + + pthread_detach(tid); + } + + return 0; +} diff --git a/tool/tserver_test.tcl b/tool/tserver_test.tcl new file mode 100644 index 0000000000..5f9e763bb0 --- /dev/null +++ b/tool/tserver_test.tcl @@ -0,0 +1,304 @@ +#!/usr/bin/tclsh +# +# This script is used to run the performance test cases described in +# README-server-edition.html. +# + + +package require sqlite3 + +# Default values for command line switches: +set O(-database) "" +set O(-rows) [expr 5000000] +set O(-mode) wal2 +set O(-tserver) "./tserver" +set O(-seconds) 20 +set O(-writers) 1 +set O(-readers) 0 +set O(-integrity) 0 +set O(-verbose) 0 +set O(-external) 0 + + +proc error_out {err} { + puts stderr $err + exit -1 +} + +proc usage {} { + puts stderr "Usage: $::argv0 ?OPTIONS?" + puts stderr "" + puts stderr "Where OPTIONS are:" + puts stderr " -database (default: test.db)" + puts stderr " -mode (default: wal2)" + puts stderr " -rows (default: 5000000)" + puts stderr " -tserver (default: ./tserver)" + puts stderr " -seconds