[sqlite] improve performance of sqlite generation by proper PRAGMA

Also, force WAL checkpoint at the end, otherwise the DB might be incomplete.
parent c0b775e0
Pipeline #7769 passed with stages
in 42 minutes and 33 seconds
...@@ -28,11 +28,12 @@ import Gargantext.API.Node.Corpus.Export.Types ( Corpus(..), CorpusSQLite(..) ) ...@@ -28,11 +28,12 @@ import Gargantext.API.Node.Corpus.Export.Types ( Corpus(..), CorpusSQLite(..) )
import Gargantext.API.Node.Corpus.Export.Utils (getContextNgrams, mkCorpusSQLite, mkCorpusSQLiteData) import Gargantext.API.Node.Corpus.Export.Utils (getContextNgrams, mkCorpusSQLite, mkCorpusSQLiteData)
import Gargantext.API.Node.Document.Export.Types qualified as DocumentExport import Gargantext.API.Node.Document.Export.Types qualified as DocumentExport
import Gargantext.API.Prelude (IsGargServer) import Gargantext.API.Prelude (IsGargServer)
import Gargantext.API.Routes.Named.Corpus qualified as Named
import Gargantext.Core.NodeStory import Gargantext.Core.NodeStory
import Gargantext.Core.Text.Ngrams (NgramsType(..)) import Gargantext.Core.Text.Ngrams (NgramsType(..))
import Gargantext.Core.Types.Main ( ListType(MapTerm) ) import Gargantext.Core.Types.Main ( ListType(MapTerm) )
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..) ) import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..) )
import Gargantext.Database.Admin.Types.Node import Gargantext.Database.Admin.Types.Node (Context, CorpusId, ListId, context2node, nodeId2ContextId)
import Gargantext.Database.Prelude import Gargantext.Database.Prelude
import Gargantext.Database.Query.Table.Node ( defaultList ) import Gargantext.Database.Query.Table.Node ( defaultList )
import Gargantext.Database.Query.Table.NodeContext (selectDocNodes) import Gargantext.Database.Query.Table.NodeContext (selectDocNodes)
...@@ -41,7 +42,6 @@ import Gargantext.Prelude hiding (hash) ...@@ -41,7 +42,6 @@ import Gargantext.Prelude hiding (hash)
import Gargantext.Prelude.Crypto.Hash (hash) import Gargantext.Prelude.Crypto.Hash (hash)
import Servant (Headers, Header, addHeader) import Servant (Headers, Header, addHeader)
import Servant.Server.Generic (AsServerT) import Servant.Server.Generic (AsServerT)
import qualified Gargantext.API.Routes.Named.Corpus as Named
-------------------------------------------------- --------------------------------------------------
-- | Hashes are ordered by Set -- | Hashes are ordered by Set
......
...@@ -87,3 +87,4 @@ data CorpusSQLiteData = ...@@ -87,3 +87,4 @@ data CorpusSQLiteData =
, _csd_stop_context_ngrams :: Map ContextId (Set NgramsTerm) , _csd_stop_context_ngrams :: Map ContextId (Set NgramsTerm)
, _csd_candidate_context_ngrams :: Map ContextId (Set NgramsTerm) , _csd_candidate_context_ngrams :: Map ContextId (Set NgramsTerm)
} deriving (Show, Eq, Generic) } deriving (Show, Eq, Generic)
...@@ -141,7 +141,9 @@ mkCorpusSQLite (CorpusSQLiteData { .. }) = withTempSQLiteDir $ \(fp, _fname, fpa ...@@ -141,7 +141,9 @@ mkCorpusSQLite (CorpusSQLiteData { .. }) = withTempSQLiteDir $ \(fp, _fname, fpa
S.withConnection fpath $ \conn -> do S.withConnection fpath $ \conn -> do
-- better performance -- better performance
-- https://kerkour.com/sqlite-for-servers -- https://kerkour.com/sqlite-for-servers
S.execute_ conn "PRAGMA journal_mode = WAL" S.execute_ conn "PRAGMA journal_mode = WAL" -- overall, a good idea for sqlite
S.execute_ conn "PRAGMA synchronous = NORMAL" -- faster writes
S.execute_ conn "PRAGMA cache_size = 1000000" -- better memory usage
S.execute_ conn "CREATE TABLE info (key, value);" S.execute_ conn "CREATE TABLE info (key, value);"
S.execute conn "INSERT INTO info (key, value) VALUES ('gargVersion', ?)" (S.Only $ showVersion _csd_version) S.execute conn "INSERT INTO info (key, value) VALUES ('gargVersion', ?)" (S.Only $ showVersion _csd_version)
...@@ -179,6 +181,10 @@ mkCorpusSQLite (CorpusSQLiteData { .. }) = withTempSQLiteDir $ \(fp, _fname, fpa ...@@ -179,6 +181,10 @@ mkCorpusSQLite (CorpusSQLiteData { .. }) = withTempSQLiteDir $ \(fp, _fname, fpa
, iso8601Show ctxDate , iso8601Show ctxDate
, Aeson.encode ctxHyperdata )) <$> _csd_contexts) , Aeson.encode ctxHyperdata )) <$> _csd_contexts)
-- Force WAL checkpoint so we don't leave any data in the WAL log
-- (we only send the sqlite file, while WAL creates additional index files which would not be sent)
S.execute_ conn "PRAGMA wal_checkpoint(FULL)"
bsl <- BSL.readFile fpath bsl <- BSL.readFile fpath
pure $ CorpusSQLite { _cs_bs = bsl } pure $ CorpusSQLite { _cs_bs = bsl }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment