Commit 019ca23d authored by Alexandre Delanoë's avatar Alexandre Delanoë

[DB][OPTIM] WIP

parent b37bd924
......@@ -155,10 +155,11 @@ CREATE UNIQUE INDEX ON public.nodes USING btree (typename, parent_id, ((hyperdat
CREATE UNIQUE INDEX ON public.ngrams (terms); -- TEST GIN
CREATE INDEX ON public.ngrams USING btree (id, terms);
CREATE INDEX ON public.node_ngrams USING btree (node_id,node_subtype);
CREATE UNIQUE INDEX ON public.node_ngrams USING btree (node_id,node_subtype, ngrams_id);
CREATE INDEX ON public.nodes_nodes USING btree (node1_id, node2_id, category);
CREATE UNIQUE INDEX ON public.nodes_nodes USING btree (node1_id, node2_id);
CREATE INDEX ON public.nodes_nodes USING btree (node1_id, node2_id, category);
CREATE UNIQUE INDEX ON public.node_node_ngrams USING btree (node1_id, node2_id, ngrams_id, ngrams_type);
CREATE INDEX ON public.node_node_ngrams USING btree (node1_id, node2_id);
......
......@@ -208,6 +208,7 @@ library:
- transformers
- transformers-base
- unordered-containers
- Unique
- uuid
- validity
- vector
......
......@@ -75,7 +75,7 @@ import Gargantext.Ext.IMT (toSchoolName)
import Gargantext.Ext.IMTUser (deserialiseImtUsersFromFile)
import Gargantext.Prelude
import Gargantext.Text.Terms.Eleve (buildTries, toToken)
import Gargantext.Text.List (buildNgramsLists,StopSize(..))
--import Gargantext.Text.List (buildNgramsLists,StopSize(..))
import Gargantext.Text.Corpus.Parsers (parseFile, FileFormat)
import qualified Gargantext.Text.Corpus.API.Isidore as Isidore
import Gargantext.Text.Terms (TermType(..), tt_lang, extractTerms, uniText)
......@@ -214,7 +214,7 @@ flowCorpusUser :: (FlowCmdM env err m, MkCorpus c)
-> Maybe c
-> [NodeId]
-> m CorpusId
flowCorpusUser l userName corpusName ctype ids = do
flowCorpusUser _l userName corpusName ctype ids = do
-- User Flow
(userId, _rootId, userCorpusId) <- getOrMk_RootWithCorpus userName corpusName ctype
listId <- getOrMkList userCorpusId userId
......@@ -226,9 +226,9 @@ flowCorpusUser l userName corpusName ctype ids = do
-- printDebug "Node Text Id" tId
-- User List Flow
(_masterUserId, _masterRootId, masterCorpusId) <- getOrMk_RootWithCorpus userMaster (Left "") ctype
ngs <- buildNgramsLists l 2 3 (StopSize 3) userCorpusId masterCorpusId
_userListId <- flowList_DbRepo listId ngs
(_masterUserId, _masterRootId, _masterCorpusId) <- getOrMk_RootWithCorpus userMaster (Left "") ctype
--ngs <- buildNgramsLists l 2 3 (StopSize 3) userCorpusId masterCorpusId
-- _userListId <- flowList_DbRepo listId ngs
--mastListId <- getOrMkList masterCorpusId masterUserId
-- _ <- insertOccsUpdates userCorpusId mastListId
-- printDebug "userListId" userListId
......
......@@ -28,6 +28,7 @@ module Gargantext.Database.Schema.NodeNgrams where
import Data.Map (Map)
import qualified Data.Map as Map
import Data.List.UniqueStrict (sortUniq)
import qualified Data.List as List
import Data.Text (Text)
import qualified Database.PostgreSQL.Simple as PGS (Query, Only(..))
......@@ -63,9 +64,47 @@ data NodeNgramsPoly id
, _nng_ngrams_tag :: ngrams_tag
, _nng_ngrams_class :: ngrams_class
, _nng_ngrams_weight :: weight
} deriving (Show)
} deriving (Show, Eq, Ord)
{-
instance ( Eq id
, Eq node_id'
, Eq node_subtype
, Eq ngrams_id
, Eq ngrams_type
, Eq ngrams_field
, Eq ngrams_tag
, Eq ngrams_class
, Eq weight
) => Eq (NodeNgramsPoly id node_id' node_subtype ngrams_id ngrams_type ngrams_field ngrams_tag ngrams_class weight) where
(==) (NodeNgrams a b c d e f g h i)
(NodeNgrams a' b' c' d' e' f' g' h' i') =
all identity [ a == a'
, b == b'
, c == c'
, d == d'
, e == e'
, f == f'
, g == g'
, h == h'
, i == i'
]
instance ( Ord id
, Ord node_id'
, Ord node_subtype
, Ord ngrams_id
, Ord ngrams_type
, Ord ngrams_field
, Ord ngrams_tag
, Ord ngrams_class
, Ord weight
) => Ord (NodeNgramsPoly id node_id' node_subtype ngrams_id ngrams_type ngrams_field ngrams_tag ngrams_class weight) where
compare (NodeNgrams a _b _c _d _e _f _g _h _i)
(NodeNgrams a' _b' _c' _d' _e' _f' _g' _h' _i') =
compare a a'
type NodeNgramsWrite = NodeNgramsPoly (Maybe (Column (PGInt4)))
(Column (PGInt4))
(Maybe (Column (PGInt4)))
......@@ -126,7 +165,7 @@ getCgramsId mapId nt t = case Map.lookup nt mapId of
Just mapId' -> Map.lookup t mapId'
-- insertDb :: ListId -> Map NgramsType [NgramsElemet] -> Cmd err [Result]
-- insertDb :: ListId -> Map NgramsType [NgramsElement] -> Cmd err [Result]
listInsertDb :: Show a => ListId
-> (ListId -> a -> [NodeNgramsW])
-> a
......@@ -156,7 +195,7 @@ insertNodeNgrams nns = runPGSQuery query (PGS.Only $ Values fields nns')
, toField $ fromMaybe 0 ngrams_class
, toField weight
]
) nns
) $ sortUniq nns
query :: PGS.Query
query = [sql|
......
......@@ -23,6 +23,7 @@ import Database.PostgreSQL.Simple.SqlQQ (sql)
-- import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
import Gargantext.Database.Config (nodeTypeId)
import Gargantext.Database.Types.Node -- (ListId, CorpusId, NodeId)
import Gargantext.Core.Types.Main (listTypeId, ListType(CandidateTerm))
import Gargantext.Database.Utils (Cmd, execPGSQuery)
import Gargantext.Prelude
import qualified Database.PostgreSQL.Simple as DPS
......@@ -105,7 +106,12 @@ triggerCountInsert2 = execPGSQuery query (nodeTypeId NodeCorpus, nodeTypeId Node
-- TODO add the groups
triggerCoocInsert :: Cmd err Int64
triggerCoocInsert = execPGSQuery query (nodeTypeId NodeCorpus, nodeTypeId NodeDocument, nodeTypeId NodeList)
triggerCoocInsert = execPGSQuery query ( nodeTypeId NodeCorpus
, nodeTypeId NodeDocument
, nodeTypeId NodeList
, listTypeId CandidateTerm
, listTypeId CandidateTerm
)
where
query :: DPS.Query
query = [sql|
......@@ -116,7 +122,7 @@ triggerCoocInsert = execPGSQuery query (nodeTypeId NodeCorpus, nodeTypeId NodeDo
END IF;
IF TG_OP = 'INSERT' THEN
INSERT INTO node_nodengrams_nodengrams (node_id, node_ngrams1_id, node_ngrams2_id, weight)
WITH input(corpus_id, nn1, nn2, weight) AS (
SELECT corpus.id, nng1.id, nng2.id, count(*) from NEW as new1
INNER JOIN node_ngrams nng1 ON nng1.id = new1.nodengrams_id
INNER JOIN nodes list ON list.id = nng1.node_id
......@@ -131,8 +137,12 @@ triggerCoocInsert = execPGSQuery query (nodeTypeId NodeCorpus, nodeTypeId NodeDo
AND doc.typename = ? -- 4 -- maybe not mandatory
AND list.typename = ? -- 5 -- list
AND nng2.node_id = list.id
-- AND nng1.id <> nng2.id
AND nng1.id < nng2.id
AND nng1.node_subtype >= ?
AND nng2.node_subtype >= ?
GROUP BY corpus.id, nng1.id, nng2.id
)
SELECT * from input where weight > 1
ON CONFLICT (node_id, node_ngrams1_id, node_ngrams2_id)
DO UPDATE set weight = node_nodengrams_nodengrams.weight + excluded.weight
......
......@@ -46,6 +46,7 @@ extra-deps:
commit: 53385de076be09f728a1b58c035a18e9ff9bcfd6
- git: https://github.com/delanoe/hsparql.git
commit: 308c74b71a1abb0a91546fa57d353131248e3a7f
- Unique-0.4.7.6@sha256:a1ff411f4d68c756e01e8d532fbe8e57f1ac77f2cc0ee8a999770be2bca185c5,2723
- KMP-0.1.0.2
- accelerate-1.2.0.1
- aeson-lens-0.5.0.0
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment