[sqlite] improve performance of sqlite generation by proper PRAGMA

Also, force WAL checkpoint at the end, otherwise the DB might be incomplete.
parent c0b775e0
Pipeline #7769 passed with stages
in 42 minutes and 33 seconds
......@@ -28,11 +28,12 @@ import Gargantext.API.Node.Corpus.Export.Types ( Corpus(..), CorpusSQLite(..) )
import Gargantext.API.Node.Corpus.Export.Utils (getContextNgrams, mkCorpusSQLite, mkCorpusSQLiteData)
import Gargantext.API.Node.Document.Export.Types qualified as DocumentExport
import Gargantext.API.Prelude (IsGargServer)
import Gargantext.API.Routes.Named.Corpus qualified as Named
import Gargantext.Core.NodeStory
import Gargantext.Core.Text.Ngrams (NgramsType(..))
import Gargantext.Core.Types.Main ( ListType(MapTerm) )
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..) )
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Admin.Types.Node (Context, CorpusId, ListId, context2node, nodeId2ContextId)
import Gargantext.Database.Prelude
import Gargantext.Database.Query.Table.Node ( defaultList )
import Gargantext.Database.Query.Table.NodeContext (selectDocNodes)
......@@ -41,7 +42,6 @@ import Gargantext.Prelude hiding (hash)
import Gargantext.Prelude.Crypto.Hash (hash)
import Servant (Headers, Header, addHeader)
import Servant.Server.Generic (AsServerT)
import qualified Gargantext.API.Routes.Named.Corpus as Named
--------------------------------------------------
-- | Hashes are ordered by Set
......
......@@ -87,3 +87,4 @@ data CorpusSQLiteData =
, _csd_stop_context_ngrams :: Map ContextId (Set NgramsTerm)
, _csd_candidate_context_ngrams :: Map ContextId (Set NgramsTerm)
} deriving (Show, Eq, Generic)
......@@ -141,7 +141,9 @@ mkCorpusSQLite (CorpusSQLiteData { .. }) = withTempSQLiteDir $ \(fp, _fname, fpa
S.withConnection fpath $ \conn -> do
-- better performance
-- https://kerkour.com/sqlite-for-servers
S.execute_ conn "PRAGMA journal_mode = WAL"
S.execute_ conn "PRAGMA journal_mode = WAL" -- overall, a good idea for sqlite
S.execute_ conn "PRAGMA synchronous = NORMAL" -- faster writes
S.execute_ conn "PRAGMA cache_size = 1000000" -- better memory usage
S.execute_ conn "CREATE TABLE info (key, value);"
S.execute conn "INSERT INTO info (key, value) VALUES ('gargVersion', ?)" (S.Only $ showVersion _csd_version)
......@@ -179,6 +181,10 @@ mkCorpusSQLite (CorpusSQLiteData { .. }) = withTempSQLiteDir $ \(fp, _fname, fpa
, iso8601Show ctxDate
, Aeson.encode ctxHyperdata )) <$> _csd_contexts)
-- Force WAL checkpoint so we don't leave any data in the WAL log
-- (we only send the sqlite file, while WAL creates additional index files which would not be sent)
S.execute_ conn "PRAGMA wal_checkpoint(FULL)"
bsl <- BSL.readFile fpath
pure $ CorpusSQLite { _cs_bs = bsl }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment