Commit 019ca23d authored by Alexandre Delanoë's avatar Alexandre Delanoë

[DB][OPTIM] WIP

parent b37bd924
...@@ -155,10 +155,11 @@ CREATE UNIQUE INDEX ON public.nodes USING btree (typename, parent_id, ((hyperdat ...@@ -155,10 +155,11 @@ CREATE UNIQUE INDEX ON public.nodes USING btree (typename, parent_id, ((hyperdat
CREATE UNIQUE INDEX ON public.ngrams (terms); -- TEST GIN CREATE UNIQUE INDEX ON public.ngrams (terms); -- TEST GIN
CREATE INDEX ON public.ngrams USING btree (id, terms); CREATE INDEX ON public.ngrams USING btree (id, terms);
CREATE INDEX ON public.node_ngrams USING btree (node_id,node_subtype);
CREATE UNIQUE INDEX ON public.node_ngrams USING btree (node_id,node_subtype, ngrams_id); CREATE UNIQUE INDEX ON public.node_ngrams USING btree (node_id,node_subtype, ngrams_id);
CREATE INDEX ON public.nodes_nodes USING btree (node1_id, node2_id, category);
CREATE UNIQUE INDEX ON public.nodes_nodes USING btree (node1_id, node2_id); CREATE UNIQUE INDEX ON public.nodes_nodes USING btree (node1_id, node2_id);
CREATE INDEX ON public.nodes_nodes USING btree (node1_id, node2_id, category);
CREATE UNIQUE INDEX ON public.node_node_ngrams USING btree (node1_id, node2_id, ngrams_id, ngrams_type); CREATE UNIQUE INDEX ON public.node_node_ngrams USING btree (node1_id, node2_id, ngrams_id, ngrams_type);
CREATE INDEX ON public.node_node_ngrams USING btree (node1_id, node2_id); CREATE INDEX ON public.node_node_ngrams USING btree (node1_id, node2_id);
......
...@@ -208,6 +208,7 @@ library: ...@@ -208,6 +208,7 @@ library:
- transformers - transformers
- transformers-base - transformers-base
- unordered-containers - unordered-containers
- Unique
- uuid - uuid
- validity - validity
- vector - vector
......
...@@ -75,7 +75,7 @@ import Gargantext.Ext.IMT (toSchoolName) ...@@ -75,7 +75,7 @@ import Gargantext.Ext.IMT (toSchoolName)
import Gargantext.Ext.IMTUser (deserialiseImtUsersFromFile) import Gargantext.Ext.IMTUser (deserialiseImtUsersFromFile)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Text.Terms.Eleve (buildTries, toToken) import Gargantext.Text.Terms.Eleve (buildTries, toToken)
import Gargantext.Text.List (buildNgramsLists,StopSize(..)) --import Gargantext.Text.List (buildNgramsLists,StopSize(..))
import Gargantext.Text.Corpus.Parsers (parseFile, FileFormat) import Gargantext.Text.Corpus.Parsers (parseFile, FileFormat)
import qualified Gargantext.Text.Corpus.API.Isidore as Isidore import qualified Gargantext.Text.Corpus.API.Isidore as Isidore
import Gargantext.Text.Terms (TermType(..), tt_lang, extractTerms, uniText) import Gargantext.Text.Terms (TermType(..), tt_lang, extractTerms, uniText)
...@@ -214,7 +214,7 @@ flowCorpusUser :: (FlowCmdM env err m, MkCorpus c) ...@@ -214,7 +214,7 @@ flowCorpusUser :: (FlowCmdM env err m, MkCorpus c)
-> Maybe c -> Maybe c
-> [NodeId] -> [NodeId]
-> m CorpusId -> m CorpusId
flowCorpusUser l userName corpusName ctype ids = do flowCorpusUser _l userName corpusName ctype ids = do
-- User Flow -- User Flow
(userId, _rootId, userCorpusId) <- getOrMk_RootWithCorpus userName corpusName ctype (userId, _rootId, userCorpusId) <- getOrMk_RootWithCorpus userName corpusName ctype
listId <- getOrMkList userCorpusId userId listId <- getOrMkList userCorpusId userId
...@@ -226,9 +226,9 @@ flowCorpusUser l userName corpusName ctype ids = do ...@@ -226,9 +226,9 @@ flowCorpusUser l userName corpusName ctype ids = do
-- printDebug "Node Text Id" tId -- printDebug "Node Text Id" tId
-- User List Flow -- User List Flow
(_masterUserId, _masterRootId, masterCorpusId) <- getOrMk_RootWithCorpus userMaster (Left "") ctype (_masterUserId, _masterRootId, _masterCorpusId) <- getOrMk_RootWithCorpus userMaster (Left "") ctype
ngs <- buildNgramsLists l 2 3 (StopSize 3) userCorpusId masterCorpusId --ngs <- buildNgramsLists l 2 3 (StopSize 3) userCorpusId masterCorpusId
_userListId <- flowList_DbRepo listId ngs -- _userListId <- flowList_DbRepo listId ngs
--mastListId <- getOrMkList masterCorpusId masterUserId --mastListId <- getOrMkList masterCorpusId masterUserId
-- _ <- insertOccsUpdates userCorpusId mastListId -- _ <- insertOccsUpdates userCorpusId mastListId
-- printDebug "userListId" userListId -- printDebug "userListId" userListId
......
...@@ -28,6 +28,7 @@ module Gargantext.Database.Schema.NodeNgrams where ...@@ -28,6 +28,7 @@ module Gargantext.Database.Schema.NodeNgrams where
import Data.Map (Map) import Data.Map (Map)
import qualified Data.Map as Map import qualified Data.Map as Map
import Data.List.UniqueStrict (sortUniq)
import qualified Data.List as List import qualified Data.List as List
import Data.Text (Text) import Data.Text (Text)
import qualified Database.PostgreSQL.Simple as PGS (Query, Only(..)) import qualified Database.PostgreSQL.Simple as PGS (Query, Only(..))
...@@ -63,9 +64,47 @@ data NodeNgramsPoly id ...@@ -63,9 +64,47 @@ data NodeNgramsPoly id
, _nng_ngrams_tag :: ngrams_tag , _nng_ngrams_tag :: ngrams_tag
, _nng_ngrams_class :: ngrams_class , _nng_ngrams_class :: ngrams_class
, _nng_ngrams_weight :: weight , _nng_ngrams_weight :: weight
} deriving (Show) } deriving (Show, Eq, Ord)
{- {-
instance ( Eq id
, Eq node_id'
, Eq node_subtype
, Eq ngrams_id
, Eq ngrams_type
, Eq ngrams_field
, Eq ngrams_tag
, Eq ngrams_class
, Eq weight
) => Eq (NodeNgramsPoly id node_id' node_subtype ngrams_id ngrams_type ngrams_field ngrams_tag ngrams_class weight) where
(==) (NodeNgrams a b c d e f g h i)
(NodeNgrams a' b' c' d' e' f' g' h' i') =
all identity [ a == a'
, b == b'
, c == c'
, d == d'
, e == e'
, f == f'
, g == g'
, h == h'
, i == i'
]
instance ( Ord id
, Ord node_id'
, Ord node_subtype
, Ord ngrams_id
, Ord ngrams_type
, Ord ngrams_field
, Ord ngrams_tag
, Ord ngrams_class
, Ord weight
) => Ord (NodeNgramsPoly id node_id' node_subtype ngrams_id ngrams_type ngrams_field ngrams_tag ngrams_class weight) where
compare (NodeNgrams a _b _c _d _e _f _g _h _i)
(NodeNgrams a' _b' _c' _d' _e' _f' _g' _h' _i') =
compare a a'
type NodeNgramsWrite = NodeNgramsPoly (Maybe (Column (PGInt4))) type NodeNgramsWrite = NodeNgramsPoly (Maybe (Column (PGInt4)))
(Column (PGInt4)) (Column (PGInt4))
(Maybe (Column (PGInt4))) (Maybe (Column (PGInt4)))
...@@ -126,7 +165,7 @@ getCgramsId mapId nt t = case Map.lookup nt mapId of ...@@ -126,7 +165,7 @@ getCgramsId mapId nt t = case Map.lookup nt mapId of
Just mapId' -> Map.lookup t mapId' Just mapId' -> Map.lookup t mapId'
-- insertDb :: ListId -> Map NgramsType [NgramsElemet] -> Cmd err [Result] -- insertDb :: ListId -> Map NgramsType [NgramsElement] -> Cmd err [Result]
listInsertDb :: Show a => ListId listInsertDb :: Show a => ListId
-> (ListId -> a -> [NodeNgramsW]) -> (ListId -> a -> [NodeNgramsW])
-> a -> a
...@@ -156,7 +195,7 @@ insertNodeNgrams nns = runPGSQuery query (PGS.Only $ Values fields nns') ...@@ -156,7 +195,7 @@ insertNodeNgrams nns = runPGSQuery query (PGS.Only $ Values fields nns')
, toField $ fromMaybe 0 ngrams_class , toField $ fromMaybe 0 ngrams_class
, toField weight , toField weight
] ]
) nns ) $ sortUniq nns
query :: PGS.Query query :: PGS.Query
query = [sql| query = [sql|
......
...@@ -23,6 +23,7 @@ import Database.PostgreSQL.Simple.SqlQQ (sql) ...@@ -23,6 +23,7 @@ import Database.PostgreSQL.Simple.SqlQQ (sql)
-- import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..)) -- import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
import Gargantext.Database.Config (nodeTypeId) import Gargantext.Database.Config (nodeTypeId)
import Gargantext.Database.Types.Node -- (ListId, CorpusId, NodeId) import Gargantext.Database.Types.Node -- (ListId, CorpusId, NodeId)
import Gargantext.Core.Types.Main (listTypeId, ListType(CandidateTerm))
import Gargantext.Database.Utils (Cmd, execPGSQuery) import Gargantext.Database.Utils (Cmd, execPGSQuery)
import Gargantext.Prelude import Gargantext.Prelude
import qualified Database.PostgreSQL.Simple as DPS import qualified Database.PostgreSQL.Simple as DPS
...@@ -105,7 +106,12 @@ triggerCountInsert2 = execPGSQuery query (nodeTypeId NodeCorpus, nodeTypeId Node ...@@ -105,7 +106,12 @@ triggerCountInsert2 = execPGSQuery query (nodeTypeId NodeCorpus, nodeTypeId Node
-- TODO add the groups -- TODO add the groups
triggerCoocInsert :: Cmd err Int64 triggerCoocInsert :: Cmd err Int64
triggerCoocInsert = execPGSQuery query (nodeTypeId NodeCorpus, nodeTypeId NodeDocument, nodeTypeId NodeList) triggerCoocInsert = execPGSQuery query ( nodeTypeId NodeCorpus
, nodeTypeId NodeDocument
, nodeTypeId NodeList
, listTypeId CandidateTerm
, listTypeId CandidateTerm
)
where where
query :: DPS.Query query :: DPS.Query
query = [sql| query = [sql|
...@@ -116,7 +122,7 @@ triggerCoocInsert = execPGSQuery query (nodeTypeId NodeCorpus, nodeTypeId NodeDo ...@@ -116,7 +122,7 @@ triggerCoocInsert = execPGSQuery query (nodeTypeId NodeCorpus, nodeTypeId NodeDo
END IF; END IF;
IF TG_OP = 'INSERT' THEN IF TG_OP = 'INSERT' THEN
INSERT INTO node_nodengrams_nodengrams (node_id, node_ngrams1_id, node_ngrams2_id, weight) INSERT INTO node_nodengrams_nodengrams (node_id, node_ngrams1_id, node_ngrams2_id, weight)
WITH input(corpus_id, nn1, nn2, weight) AS (
SELECT corpus.id, nng1.id, nng2.id, count(*) from NEW as new1 SELECT corpus.id, nng1.id, nng2.id, count(*) from NEW as new1
INNER JOIN node_ngrams nng1 ON nng1.id = new1.nodengrams_id INNER JOIN node_ngrams nng1 ON nng1.id = new1.nodengrams_id
INNER JOIN nodes list ON list.id = nng1.node_id INNER JOIN nodes list ON list.id = nng1.node_id
...@@ -131,8 +137,12 @@ triggerCoocInsert = execPGSQuery query (nodeTypeId NodeCorpus, nodeTypeId NodeDo ...@@ -131,8 +137,12 @@ triggerCoocInsert = execPGSQuery query (nodeTypeId NodeCorpus, nodeTypeId NodeDo
AND doc.typename = ? -- 4 -- maybe not mandatory AND doc.typename = ? -- 4 -- maybe not mandatory
AND list.typename = ? -- 5 -- list AND list.typename = ? -- 5 -- list
AND nng2.node_id = list.id AND nng2.node_id = list.id
-- AND nng1.id <> nng2.id AND nng1.id < nng2.id
AND nng1.node_subtype >= ?
AND nng2.node_subtype >= ?
GROUP BY corpus.id, nng1.id, nng2.id GROUP BY corpus.id, nng1.id, nng2.id
)
SELECT * from input where weight > 1
ON CONFLICT (node_id, node_ngrams1_id, node_ngrams2_id) ON CONFLICT (node_id, node_ngrams1_id, node_ngrams2_id)
DO UPDATE set weight = node_nodengrams_nodengrams.weight + excluded.weight DO UPDATE set weight = node_nodengrams_nodengrams.weight + excluded.weight
......
...@@ -46,6 +46,7 @@ extra-deps: ...@@ -46,6 +46,7 @@ extra-deps:
commit: 53385de076be09f728a1b58c035a18e9ff9bcfd6 commit: 53385de076be09f728a1b58c035a18e9ff9bcfd6
- git: https://github.com/delanoe/hsparql.git - git: https://github.com/delanoe/hsparql.git
commit: 308c74b71a1abb0a91546fa57d353131248e3a7f commit: 308c74b71a1abb0a91546fa57d353131248e3a7f
- Unique-0.4.7.6@sha256:a1ff411f4d68c756e01e8d532fbe8e57f1ac77f2cc0ee8a999770be2bca185c5,2723
- KMP-0.1.0.2 - KMP-0.1.0.2
- accelerate-1.2.0.1 - accelerate-1.2.0.1
- aeson-lens-0.5.0.0 - aeson-lens-0.5.0.0
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment