Commit 25b81234 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[Backup][WIP]

[*r*epoSaver*A cEtxicoen]p "rtepioosn/:no deSqlError Id{-0s-tqmlp-Srteatepo1 2=3 8"22-3150013."c,b osrq"l
E[wrixteecNSotdaetSutso r=i eFsa]t a[l(E)r]r
or, sqlErrorMsg = "une instruction insert ou update sur la table \194\171 context_node_ngrams \194\187 viole la contrainte de cl\195\169\n\195\169trang\195\168re \194\171 context_node_ngrams_context_id_fkey \194\187", sqlErrorDetail = "La cl\195\169 (context_id)=(135) n'est pas pr\195\169sente dans la table \194\171 nodes \194\187.", sqlErrorHint = ""}
parent ea17dccb
...@@ -166,6 +166,29 @@ CREATE TABLE public.context_node_ngrams2 ( ...@@ -166,6 +166,29 @@ CREATE TABLE public.context_node_ngrams2 (
); );
ALTER TABLE public.context_node_ngrams2 OWNER TO gargantua; ALTER TABLE public.context_node_ngrams2 OWNER TO gargantua;
--------------------------------------------------------------------
CREATE TABLE public.node_node_ngrams (
node1_id INTEGER NOT NULL REFERENCES public.nodes (id) ON DELETE CASCADE,
node2_id INTEGER NOT NULL REFERENCES public.nodes (id) ON DELETE CASCADE,
ngrams_id INTEGER NOT NULL REFERENCES public.ngrams (id) ON DELETE CASCADE,
ngrams_type INTEGER,
weight double precision,
PRIMARY KEY (node1_id, node2_id, ngrams_id, ngrams_type)
);
ALTER TABLE public.node_node_ngrams OWNER TO gargantua;
CREATE TABLE public.node_node_ngrams2 (
node_id INTEGER NOT NULL REFERENCES public.nodes (id) ON DELETE CASCADE,
nodengrams_id INTEGER NOT NULL REFERENCES public.node_ngrams (id) ON DELETE CASCADE,
weight double precision,
PRIMARY KEY (node_id, nodengrams_id)
);
ALTER TABLE public.node_node_ngrams2 OWNER TO gargantua;
-------------------------------------------------------------- --------------------------------------------------------------
--CREATE TABLE public.nodes_ngrams_repo ( --CREATE TABLE public.nodes_ngrams_repo (
...@@ -243,6 +266,15 @@ CREATE INDEX ON public.context_node_ngrams2 USING btree (context_id); ...@@ -243,6 +266,15 @@ CREATE INDEX ON public.context_node_ngrams2 USING btree (context_id);
CREATE INDEX ON public.context_node_ngrams2 USING btree (nodengrams_id); CREATE INDEX ON public.context_node_ngrams2 USING btree (nodengrams_id);
CREATE INDEX ON public.context_node_ngrams2 USING btree (context_id, nodengrams_id); CREATE INDEX ON public.context_node_ngrams2 USING btree (context_id, nodengrams_id);
CREATE UNIQUE INDEX ON public.node_node_ngrams USING btree (node1_id, node2_id, ngrams_id, ngrams_type);
CREATE INDEX ON public.node_node_ngrams USING btree (node1_id, node2_id);
CREATE INDEX ON public.node_node_ngrams USING btree (ngrams_id, node2_id);
CREATE INDEX ON public.node_node_ngrams USING btree (ngrams_type);
CREATE INDEX ON public.node_node_ngrams2 USING btree (node_id);
CREATE INDEX ON public.node_node_ngrams2 USING btree (nodengrams_id);
CREATE INDEX ON public.node_node_ngrams2 USING btree (node_id, nodengrams_id);
-- CREATE INDEX ON public.context_nodengrams_nodengrams USING btree (context_id, node_ngrams1_id, node_ngrams2_id); -- CREATE INDEX ON public.context_nodengrams_nodengrams USING btree (context_id, node_ngrams1_id, node_ngrams2_id);
-- CREATE INDEX ON public.context_nodengrams_nodengrams USING btree (node_ngrams1_id); -- CREATE INDEX ON public.context_nodengrams_nodengrams USING btree (node_ngrams1_id);
-- CREATE INDEX ON public.context_nodengrams_nodengrams USING btree (node_ngrams2_id); -- CREATE INDEX ON public.context_nodengrams_nodengrams USING btree (node_ngrams2_id);
......
...@@ -562,21 +562,17 @@ getTableNgrams _nType nId tabType listId limit_ offset ...@@ -562,21 +562,17 @@ getTableNgrams _nType nId tabType listId limit_ offset
setScores False table = pure table setScores False table = pure table
setScores True table = do setScores True table = do
let ngrams_terms = table ^.. each . ne_ngrams let ngrams_terms = table ^.. each . ne_ngrams
printDebug "ngrams_terms" ngrams_terms
t1 <- getTime t1 <- getTime
occurrences <- getOccByNgramsOnlyFast' nId occurrences <- getOccByNgramsOnlyFast' nId
listId listId
ngramsType ngramsType
ngrams_terms ngrams_terms
printDebug "occurrences" occurrences
t2 <- getTime t2 <- getTime
liftBase $ hprint stderr liftBase $ hprint stderr
("getTableNgrams/setScores #ngrams=" % int % " time=" % hasTime % "\n") ("getTableNgrams/setScores #ngrams=" % int % " time=" % hasTime % "\n")
(length ngrams_terms) t1 t2 (length ngrams_terms) t1 t2
{-
occurrences <- getOccByNgramsOnlySlow nType nId
(lIds <> [listId])
ngramsType
ngrams_terms
-}
let let
setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
......
...@@ -217,6 +217,9 @@ buildNgramsTermsList user uCid mCid mfslw groupParams (nt, _mapListSize)= do ...@@ -217,6 +217,9 @@ buildNgramsTermsList user uCid mCid mfslw groupParams (nt, _mapListSize)= do
nt nt
selectedTerms selectedTerms
printDebug "mapTextDocIds" mapTextDocIds
let let
groupedTreeScores_SetNodeId :: HashMap NgramsTerm (GroupedTreeScores (Set NodeId)) groupedTreeScores_SetNodeId :: HashMap NgramsTerm (GroupedTreeScores (Set NodeId))
groupedTreeScores_SetNodeId = HashMap.filter (\g -> Set.size (view gts'_score g) > 1) -- removing hapax groupedTreeScores_SetNodeId = HashMap.filter (\g -> Set.size (view gts'_score g) > 1) -- removing hapax
......
...@@ -30,7 +30,7 @@ import Gargantext.Core.Types.Main (ListType(CandidateTerm)) ...@@ -30,7 +30,7 @@ import Gargantext.Core.Types.Main (ListType(CandidateTerm))
import Gargantext.Core.NodeStory import Gargantext.Core.NodeStory
import Gargantext.Database.Action.Flow.Types import Gargantext.Database.Action.Flow.Types
import Gargantext.Database.Admin.Types.Node import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Query.Table.NodeNgrams (NodeNgramsPoly(..), NodeNgramsW{-, listInsertDb, getCgramsId -}) import Gargantext.Database.Query.Table.NodeNgrams (NodeNgramsPoly(..), NodeNgramsW, listInsertDb,{- getCgramsId -})
-- import Gargantext.Database.Query.Table.Node_NodeNgramsNodeNgrams -- import Gargantext.Database.Query.Table.Node_NodeNgramsNodeNgrams
import Gargantext.Database.Schema.Ngrams (NgramsType(..)) import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.Prelude import Gargantext.Prelude
...@@ -88,9 +88,8 @@ flowList_DbRepo :: FlowCmdM env err m ...@@ -88,9 +88,8 @@ flowList_DbRepo :: FlowCmdM env err m
-> m ListId -> m ListId
flowList_DbRepo lId ngs = do flowList_DbRepo lId ngs = do
-- printDebug "listId flowList" lId -- printDebug "listId flowList" lId
_mapCgramsId <- listInsertDb lId toNodeNgramsW (Map.toList ngs)
{- {-
mapCgramsId <- listInsertDb lId toNodeNgramsW (Map.toList ngs)
let toInsert = catMaybes [ (,) <$> (getCgramsId mapCgramsId ntype <$> (unNgramsTerm <$> parent)) let toInsert = catMaybes [ (,) <$> (getCgramsId mapCgramsId ntype <$> (unNgramsTerm <$> parent))
<*> getCgramsId mapCgramsId ntype ngram <*> getCgramsId mapCgramsId ntype ngram
| (ntype, ngs') <- Map.toList ngs | (ntype, ngs') <- Map.toList ngs
......
...@@ -31,11 +31,11 @@ data DocumentIdWithNgrams a b = ...@@ -31,11 +31,11 @@ data DocumentIdWithNgrams a b =
, documentNgrams :: HashMap b (Map NgramsType Int) , documentNgrams :: HashMap b (Map NgramsType Int)
} deriving (Show) } deriving (Show)
docNgrams2contextNodeNgrams :: CorpusId docNgrams2contextNodeNgrams :: ListId
-> DocNgrams -> DocNgrams
-> ContextNodeNgrams -> ContextNodeNgrams
docNgrams2contextNodeNgrams cId (DocNgrams d n nt w) = docNgrams2contextNodeNgrams lId (DocNgrams d n nt w) =
ContextNodeNgrams cId d n nt w ContextNodeNgrams d lId n nt w
data DocNgrams = DocNgrams { dn_doc_id :: DocId data DocNgrams = DocNgrams { dn_doc_id :: DocId
, dn_ngrams_id :: Int , dn_ngrams_id :: Int
...@@ -43,15 +43,15 @@ data DocNgrams = DocNgrams { dn_doc_id :: DocId ...@@ -43,15 +43,15 @@ data DocNgrams = DocNgrams { dn_doc_id :: DocId
, dn_weight :: Double , dn_weight :: Double
} }
insertDocNgramsOn :: CorpusId insertDocNgramsOn :: ListId
-> [DocNgrams] -> [DocNgrams]
-> Cmd err Int -> Cmd err Int
insertDocNgramsOn cId dn = insertDocNgramsOn cId dn =
insertContextNodeNgrams insertContextNodeNgrams
$ (map (docNgrams2contextNodeNgrams cId) dn) $ (map (docNgrams2contextNodeNgrams cId) dn)
insertDocNgrams :: CorpusId insertDocNgrams :: ListId
-> HashMap (Indexed Int Ngrams) (Map NgramsType (Map NodeId Int)) -> HashMap (Indexed Int Ngrams) (Map NgramsType (Map DocId Int))
-> Cmd err Int -> Cmd err Int
insertDocNgrams cId m = insertDocNgrams cId m =
insertDocNgramsOn cId [ DocNgrams { dn_doc_id = n insertDocNgramsOn cId [ DocNgrams { dn_doc_id = n
......
...@@ -24,7 +24,7 @@ import Data.Text (Text) ...@@ -24,7 +24,7 @@ import Data.Text (Text)
import Data.Tuple.Extra (first, second, swap) import Data.Tuple.Extra (first, second, swap)
import Database.PostgreSQL.Simple.SqlQQ (sql) import Database.PostgreSQL.Simple.SqlQQ (sql)
import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..)) import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
import Debug.Trace (trace) -- import Debug.Trace (trace)
import Gargantext.Core import Gargantext.Core
import Gargantext.API.Ngrams.Types (NgramsTerm(..)) import Gargantext.API.Ngrams.Types (NgramsTerm(..))
import Gargantext.Data.HashMap.Strict.Utils as HM import Gargantext.Data.HashMap.Strict.Utils as HM
...@@ -39,6 +39,7 @@ import qualified Database.PostgreSQL.Simple as DPS ...@@ -39,6 +39,7 @@ import qualified Database.PostgreSQL.Simple as DPS
-- | fst is size of Supra Corpus -- | fst is size of Supra Corpus
-- snd is Texts and size of Occurrences (different docs) -- snd is Texts and size of Occurrences (different docs)
countContextsByNgramsWith :: (NgramsTerm -> NgramsTerm) countContextsByNgramsWith :: (NgramsTerm -> NgramsTerm)
-> HashMap NgramsTerm (Set ContextId) -> HashMap NgramsTerm (Set ContextId)
-> (Double, HashMap NgramsTerm (Double, Set NgramsTerm)) -> (Double, HashMap NgramsTerm (Double, Set NgramsTerm))
...@@ -49,14 +50,15 @@ countContextsByNgramsWith f m = (total, m') ...@@ -49,14 +50,15 @@ countContextsByNgramsWith f m = (total, m')
$ groupContextsByNgramsWith f m $ groupContextsByNgramsWith f m
groupContextsByNgramsWith :: (NgramsTerm -> NgramsTerm) groupContextsByNgramsWith :: (NgramsTerm -> NgramsTerm)
-> HashMap NgramsTerm (Set NodeId) -> HashMap NgramsTerm (Set NodeId)
-> HashMap NgramsTerm (Set NgramsTerm, Set ContextId) -> HashMap NgramsTerm (Set NgramsTerm, Set ContextId)
groupContextsByNgramsWith f m = groupContextsByNgramsWith f' m'' =
HM.fromListWith (<>) $ map (\(t,ns) -> (f t, (Set.singleton t, ns))) HM.fromListWith (<>) $ map (\(t,ns) -> (f' t, (Set.singleton t, ns)))
$ HM.toList m $ HM.toList m''
------------------------------------------------------------------------ ------------------------------------------------------------------------
getContextsByNgramsUser :: HasDBid NodeType getContextsByNgramsUser :: HasDBid NodeType
=> CorpusId => CorpusId
-> NgramsType -> NgramsType
...@@ -81,21 +83,22 @@ getContextsByNgramsUser cId nt = ...@@ -81,21 +83,22 @@ getContextsByNgramsUser cId nt =
queryNgramsByContextUser :: DPS.Query queryNgramsByContextUser :: DPS.Query
queryNgramsByContextUser = [sql| queryNgramsByContextUser = [sql|
SELECT cng.node_id, ng.terms FROM context_node_ngrams cng SELECT cng.context_id, ng.terms FROM context_node_ngrams cng
JOIN ngrams ng ON cng.ngrams_id = ng.id JOIN ngrams ng ON cng.ngrams_id = ng.id
JOIN nodes_contexts nn ON nn.context_id = cng.context_id JOIN nodes_contexts nc ON nc.context_id = cng.context_id
JOIN contexts n ON nn.context_id = n.id JOIN contexts c ON nc.context_id = c.id
WHERE nn.node_id = ? -- CorpusId WHERE nc.node_id = ? -- CorpusId
AND n.typename = ? -- toDBid AND c.typename = ? -- toDBid
AND cng.ngrams_type = ? -- NgramsTypeId AND cng.ngrams_type = ? -- NgramsTypeId
AND nn.category > 0 AND nc.category > 0 -- is not in Trash
GROUP BY cng.node_id, ng.terms GROUP BY cng.context_id, ng.terms
ORDER BY (cng.node_id, ng.terms) DESC
-- LIMIT ?
-- OFFSET ?
|] |]
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- TODO add groups -- TODO add groups
{-
getOccByNgramsOnlyFast :: HasDBid NodeType getOccByNgramsOnlyFast :: HasDBid NodeType
=> CorpusId => CorpusId
-> NgramsType -> NgramsType
...@@ -103,7 +106,7 @@ getOccByNgramsOnlyFast :: HasDBid NodeType ...@@ -103,7 +106,7 @@ getOccByNgramsOnlyFast :: HasDBid NodeType
-> Cmd err (HashMap NgramsTerm Int) -> Cmd err (HashMap NgramsTerm Int)
getOccByNgramsOnlyFast cId nt ngs = getOccByNgramsOnlyFast cId nt ngs =
HM.fromListWith (+) <$> selectNgramsOccurrencesOnlyByContextUser cId nt ngs HM.fromListWith (+) <$> selectNgramsOccurrencesOnlyByContextUser cId nt ngs
-}
getOccByNgramsOnlyFast_withSample :: HasDBid NodeType getOccByNgramsOnlyFast_withSample :: HasDBid NodeType
=> CorpusId => CorpusId
...@@ -120,7 +123,7 @@ getOccByNgramsOnlyFast' :: CorpusId ...@@ -120,7 +123,7 @@ getOccByNgramsOnlyFast' :: CorpusId
-> NgramsType -> NgramsType
-> [NgramsTerm] -> [NgramsTerm]
-> Cmd err (HashMap NgramsTerm Int) -> Cmd err (HashMap NgramsTerm Int)
getOccByNgramsOnlyFast' cId lId nt tms = trace (show (cId, lId)) $ getOccByNgramsOnlyFast' cId lId nt tms = -- trace (show (cId, lId)) $
HM.fromListWith (+) <$> map (second round) <$> run cId lId nt tms HM.fromListWith (+) <$> map (second round) <$> run cId lId nt tms
where where
...@@ -131,7 +134,7 @@ getOccByNgramsOnlyFast' cId lId nt tms = trace (show (cId, lId)) $ ...@@ -131,7 +134,7 @@ getOccByNgramsOnlyFast' cId lId nt tms = trace (show (cId, lId)) $
-> NgramsType -> NgramsType
-> [NgramsTerm] -> [NgramsTerm]
-> Cmd err [(NgramsTerm, Double)] -> Cmd err [(NgramsTerm, Double)]
run cId' lId' nt' tms' = fmap (first NgramsTerm) <$> runPGSQuery query run cId' lId' nt' tms' = map (first NgramsTerm) <$> runPGSQuery query
( Values fields ((DPS.Only . unNgramsTerm) <$> tms') ( Values fields ((DPS.Only . unNgramsTerm) <$> tms')
, cId' , cId'
, lId' , lId'
...@@ -141,17 +144,19 @@ getOccByNgramsOnlyFast' cId lId nt tms = trace (show (cId, lId)) $ ...@@ -141,17 +144,19 @@ getOccByNgramsOnlyFast' cId lId nt tms = trace (show (cId, lId)) $
query :: DPS.Query query :: DPS.Query
query = [sql| query = [sql|
WITH input_rows(terms) AS (?) WITH input_rows(terms) AS (?)
SELECT ng.terms, cng.weight FROM context_node_ngrams cng SELECT ng.terms, nng.weight FROM nodes_contexts nc
JOIN ngrams ng ON cng.ngrams_id = ng.id JOIN node_node_ngrams nng ON nng.node1_id = nc.node_id
JOIN ngrams ng ON nng.ngrams_id = ng.id
JOIN input_rows ir ON ir.terms = ng.terms JOIN input_rows ir ON ir.terms = ng.terms
WHERE cng.context_id = ? -- CorpusId WHERE nng.node1_id = ? -- CorpusId
AND cng.node_id = ? -- ListId AND nng.node2_id = ? -- ListId
AND cng.ngrams_type = ? -- NgramsTypeId AND nng.ngrams_type = ? -- NgramsTypeId
-- AND nn.category > 0 -- TODO AND nc.category > 0 -- Not trash
GROUP BY ng.terms, cng.weight GROUP BY ng.terms, nng.weight
|] |]
{-
-- just slower than getOccByNgramsOnlyFast -- just slower than getOccByNgramsOnlyFast
getOccByNgramsOnlySlow :: HasDBid NodeType getOccByNgramsOnlySlow :: HasDBid NodeType
=> NodeType => NodeType
...@@ -200,8 +205,6 @@ selectNgramsOccurrencesOnlyByContextUser cId nt tms = ...@@ -200,8 +205,6 @@ selectNgramsOccurrencesOnlyByContextUser cId nt tms =
where where
fields = [QualifiedIdentifier Nothing "text"] fields = [QualifiedIdentifier Nothing "text"]
-- same as queryNgramsOnlyByNodeUser but using COUNT on the node ids. -- same as queryNgramsOnlyByNodeUser but using COUNT on the node ids.
-- Question: with the grouping is the result exactly the same (since Set NodeId for -- Question: with the grouping is the result exactly the same (since Set NodeId for
-- equivalent ngrams intersections are not empty) -- equivalent ngrams intersections are not empty)
...@@ -220,6 +223,7 @@ queryNgramsOccurrencesOnlyByContextUser = [sql| ...@@ -220,6 +223,7 @@ queryNgramsOccurrencesOnlyByContextUser = [sql|
GROUP BY cng.context_id, ng.terms GROUP BY cng.context_id, ng.terms
|] |]
-}
selectNgramsOccurrencesOnlyByContextUser_withSample :: HasDBid NodeType selectNgramsOccurrencesOnlyByContextUser_withSample :: HasDBid NodeType
=> CorpusId => CorpusId
...@@ -259,7 +263,7 @@ queryNgramsOccurrencesOnlyByContextUser_withSample = [sql| ...@@ -259,7 +263,7 @@ queryNgramsOccurrencesOnlyByContextUser_withSample = [sql|
|] |]
{-
queryNgramsOccurrencesOnlyByContextUser' :: DPS.Query queryNgramsOccurrencesOnlyByContextUser' :: DPS.Query
queryNgramsOccurrencesOnlyByContextUser' = [sql| queryNgramsOccurrencesOnlyByContextUser' = [sql|
WITH input_rows(terms) AS (?) WITH input_rows(terms) AS (?)
...@@ -274,8 +278,10 @@ queryNgramsOccurrencesOnlyByContextUser' = [sql| ...@@ -274,8 +278,10 @@ queryNgramsOccurrencesOnlyByContextUser' = [sql|
AND nn.category > 0 AND nn.category > 0
GROUP BY cng.node_id, ng.terms GROUP BY cng.node_id, ng.terms
|] |]
-}
------------------------------------------------------------------------ ------------------------------------------------------------------------
getContextsByNgramsOnlyUser :: HasDBid NodeType getContextsByNgramsOnlyUser :: HasDBid NodeType
=> CorpusId => CorpusId
-> [ListId] -> [ListId]
...@@ -289,7 +295,6 @@ getContextsByNgramsOnlyUser cId ls nt ngs = ...@@ -289,7 +295,6 @@ getContextsByNgramsOnlyUser cId ls nt ngs =
<$> mapM (selectNgramsOnlyByContextUser cId ls nt) <$> mapM (selectNgramsOnlyByContextUser cId ls nt)
(splitEvery 1000 ngs) (splitEvery 1000 ngs)
getNgramsByContextOnlyUser :: HasDBid NodeType getNgramsByContextOnlyUser :: HasDBid NodeType
=> NodeId => NodeId
-> [ListId] -> [ListId]
...@@ -306,12 +311,13 @@ getNgramsByContextOnlyUser cId ls nt ngs = ...@@ -306,12 +311,13 @@ getNgramsByContextOnlyUser cId ls nt ngs =
(splitEvery 1000 ngs) (splitEvery 1000 ngs)
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- used in G.Core.Text.List
selectNgramsOnlyByContextUser :: HasDBid NodeType selectNgramsOnlyByContextUser :: HasDBid NodeType
=> CorpusId => CorpusId
-> [ListId] -> [ListId]
-> NgramsType -> NgramsType
-> [NgramsTerm] -> [NgramsTerm]
-> Cmd err [(NgramsTerm, NodeId)] -> Cmd err [(NgramsTerm, ContextId)]
selectNgramsOnlyByContextUser cId ls nt tms = selectNgramsOnlyByContextUser cId ls nt tms =
fmap (first NgramsTerm) <$> fmap (first NgramsTerm) <$>
runPGSQuery queryNgramsOnlyByContextUser runPGSQuery queryNgramsOnlyByContextUser
...@@ -329,20 +335,21 @@ queryNgramsOnlyByContextUser :: DPS.Query ...@@ -329,20 +335,21 @@ queryNgramsOnlyByContextUser :: DPS.Query
queryNgramsOnlyByContextUser = [sql| queryNgramsOnlyByContextUser = [sql|
WITH input_rows(terms) AS (?), WITH input_rows(terms) AS (?),
input_list(id) AS (?) input_list(id) AS (?)
SELECT ng.terms, cng.node_id FROM context_node_ngrams cng SELECT ng.terms, cng.context_id FROM context_node_ngrams cng
JOIN ngrams ng ON cng.ngrams_id = ng.id JOIN ngrams ng ON cng.ngrams_id = ng.id
JOIN input_rows ir ON ir.terms = ng.terms JOIN input_rows ir ON ir.terms = ng.terms
JOIN input_list il ON il.id = cng.context_id JOIN input_list il ON il.id = cng.node_id
JOIN nodes_contexts nn ON nn.context_id = cng.context_id JOIN nodes_contexts nn ON nn.context_id = cng.context_id
JOIN contexts n ON nn.context_id = n.id JOIN contexts c ON nn.context_id = c.id
WHERE nn.node_id = ? -- CorpusId WHERE nn.node_id = ? -- CorpusId
AND n.typename = ? -- toDBid AND c.typename = ? -- toDBid (maybe not useful with context table)
AND cng.ngrams_type = ? -- NgramsTypeId AND cng.ngrams_type = ? -- NgramsTypeId
AND nn.category > 0 AND nn.category > 0
GROUP BY ng.terms, cng.node_id GROUP BY ng.terms, cng.context_id
|] |]
{-
selectNgramsOnlyByContextUser' :: HasDBid NodeType selectNgramsOnlyByContextUser' :: HasDBid NodeType
=> CorpusId => CorpusId
-> [ListId] -> [ListId]
...@@ -374,7 +381,7 @@ queryNgramsOnlyByContextUser' = [sql| ...@@ -374,7 +381,7 @@ queryNgramsOnlyByContextUser' = [sql|
-- AND nn.category > 0 -- AND nn.category > 0
GROUP BY ng.terms, cng.weight GROUP BY ng.terms, cng.weight
|] |]
-}
getNgramsByDocOnlyUser :: DocId getNgramsByDocOnlyUser :: DocId
-> [ListId] -> [ListId]
...@@ -421,7 +428,9 @@ queryNgramsOnlyByDocUser = [sql| ...@@ -421,7 +428,9 @@ queryNgramsOnlyByDocUser = [sql|
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | TODO filter by language, database, any social field -- | TODO filter by language, database, any social field
getContextsByNgramsMaster :: HasDBid NodeType getContextsByNgramsMaster :: HasDBid NodeType
=> UserCorpusId -> MasterCorpusId -> Cmd err (HashMap Text (Set NodeId)) => UserCorpusId
-> MasterCorpusId
-> Cmd err (HashMap Text (Set NodeId))
getContextsByNgramsMaster ucId mcId = unionsWith (<>) getContextsByNgramsMaster ucId mcId = unionsWith (<>)
. map (HM.fromListWith (<>) . map (\(n,t) -> (t, Set.singleton n))) . map (HM.fromListWith (<>) . map (\(n,t) -> (t, Set.singleton n)))
-- . takeWhile (not . List.null) -- . takeWhile (not . List.null)
......
...@@ -21,7 +21,7 @@ import qualified Data.HashMap.Strict as HM ...@@ -21,7 +21,7 @@ import qualified Data.HashMap.Strict as HM
import Data.Maybe (fromMaybe) import Data.Maybe (fromMaybe)
import Gargantext.Core import Gargantext.Core
import Gargantext.Core.Text.Metrics.TFICF import Gargantext.Core.Text.Metrics.TFICF
import Gargantext.Database.Action.Metrics.NgramsByContext (getContextsByNgramsUser, getOccByNgramsOnlyFast, getOccByNgramsOnlyFast_withSample) import Gargantext.Database.Action.Metrics.NgramsByContext (getContextsByNgramsUser, {-getOccByNgramsOnlyFast,-} getOccByNgramsOnlyFast_withSample)
import Gargantext.Database.Admin.Types.Node -- (ListId, CorpusId, NodeId) import Gargantext.Database.Admin.Types.Node -- (ListId, CorpusId, NodeId)
import Gargantext.Database.Prelude (Cmd) import Gargantext.Database.Prelude (Cmd)
import Gargantext.Database.Query.Table.NodeContext (selectCountDocs) import Gargantext.Database.Query.Table.NodeContext (selectCountDocs)
...@@ -30,6 +30,7 @@ import Gargantext.API.Ngrams.Types ...@@ -30,6 +30,7 @@ import Gargantext.API.Ngrams.Types
import Gargantext.Prelude import Gargantext.Prelude
import qualified Data.Set as Set import qualified Data.Set as Set
{-
getTficf :: HasDBid NodeType getTficf :: HasDBid NodeType
=> UserCorpusId => UserCorpusId
-> MasterCorpusId -> MasterCorpusId
...@@ -54,7 +55,7 @@ getTficf cId mId nt = do ...@@ -54,7 +55,7 @@ getTficf cId mId nt = do
(TficfSupra (Count $ fromMaybe 0 $ HM.lookup t mapTextDoubleGlobal) (TficfSupra (Count $ fromMaybe 0 $ HM.lookup t mapTextDoubleGlobal)
(Total $ fromIntegral countGlobal)) (Total $ fromIntegral countGlobal))
) mapTextDoubleLocal ) mapTextDoubleLocal
-}
getTficf_withSample :: HasDBid NodeType getTficf_withSample :: HasDBid NodeType
=> UserCorpusId => UserCorpusId
......
...@@ -19,7 +19,7 @@ module Gargantext.Database.Admin.Trigger.Init ...@@ -19,7 +19,7 @@ module Gargantext.Database.Admin.Trigger.Init
import Data.Text (Text) import Data.Text (Text)
import Gargantext.Database.Admin.Trigger.ContextNodeNgrams (triggerCountInsert, triggerCountInsert2) import Gargantext.Database.Admin.Trigger.ContextNodeNgrams (triggerCountInsert, triggerCountInsert2)
import Gargantext.Database.Admin.Trigger.Nodes (triggerSearchUpdate, triggerUpdateHash) import Gargantext.Database.Admin.Trigger.Nodes (triggerSearchUpdate, triggerUpdateHash)
import Gargantext.Database.Admin.Trigger.NodesNodes (triggerDeleteCount, triggerInsertCount, triggerUpdateAdd, triggerUpdateDel, MasterListId) -- , triggerCoocInsert) import Gargantext.Database.Admin.Trigger.NodesNodes ({-triggerDeleteCount,-} triggerInsertCount, triggerUpdateAdd, triggerUpdateDel, MasterListId) -- , triggerCoocInsert)
import Gargantext.Database.Prelude (Cmd) import Gargantext.Database.Prelude (Cmd)
import Gargantext.Prelude import Gargantext.Prelude
...@@ -35,7 +35,7 @@ initLastTriggers lId = do ...@@ -35,7 +35,7 @@ initLastTriggers lId = do
t1 <- triggerCountInsert t1 <- triggerCountInsert
t1' <- triggerCountInsert2 t1' <- triggerCountInsert2
-- t1'' <- triggerCoocInsert lId -- t1'' <- triggerCoocInsert lId
t2 <- triggerDeleteCount lId -- t2 <- triggerDeleteCount lId
t3 <- triggerInsertCount lId t3 <- triggerInsertCount lId
t4 <- triggerUpdateAdd lId t4 <- triggerUpdateAdd lId
t5 <- triggerUpdateDel lId t5 <- triggerUpdateDel lId
...@@ -43,7 +43,7 @@ initLastTriggers lId = do ...@@ -43,7 +43,7 @@ initLastTriggers lId = do
,t1 ,t1
,t1' ,t1'
-- ,t1'' -- ,t1''
,t2 -- ,t2
,t3 ,t3
,t4 ,t4
,t5] ,t5]
......
...@@ -17,64 +17,32 @@ module Gargantext.Database.Admin.Trigger.NodesNodes ...@@ -17,64 +17,32 @@ module Gargantext.Database.Admin.Trigger.NodesNodes
where where
import Database.PostgreSQL.Simple.SqlQQ (sql) import Database.PostgreSQL.Simple.SqlQQ (sql)
-- import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
import Gargantext.Core import Gargantext.Core
import Gargantext.Database.Admin.Config import Gargantext.Database.Admin.Config
import Gargantext.Database.Admin.Types.Node -- (ListId, CorpusId, NodeId) import Gargantext.Database.Admin.Types.Node -- (ListId, CorpusId, NodeId)
-- import Gargantext.Core.Types.Main (ListType(CandidateTerm))
import Gargantext.Database.Prelude (Cmd, execPGSQuery) import Gargantext.Database.Prelude (Cmd, execPGSQuery)
import Gargantext.Prelude import Gargantext.Prelude
import qualified Database.PostgreSQL.Simple as DPS import qualified Database.PostgreSQL.Simple as DPS
type MasterListId = ListId type MasterListId = ListId
triggerDeleteCount :: MasterListId -> Cmd err Int64
triggerDeleteCount lId = execPGSQuery query (lId, toDBid NodeList)
where
query :: DPS.Query
query = [sql|
CREATE OR REPLACE FUNCTION set_delete_count() RETURNS trigger AS $$
BEGIN
UPDATE node_node_ngrams SET weight = weight - d.delete_count
FROM (SELECT old1.node1_id as node1_id, lists.id as node2_id, nnn.ngrams_id as ngrams_id, nnn.ngrams_type as ngrams_type, count(*) as delete_count FROM OLD as old1
INNER JOIN nodes doc ON doc.id = old1.node2_id
INNER JOIN nodes lists ON lists.parent_id = old1.node1_id
INNER JOIN node_node_ngrams nnn ON nnn.node2_id = doc.id
WHERE nnn.node1_id in (?, lists.id)
AND lists.typename = ?
GROUP BY old1.node1_id, lists.id, nnn.ngrams_id, nnn.ngrams_type
) AS d
WHERE node_node_ngrams.node1_id = d.node1_id
AND node_node_ngrams.node2_id = d.node2_id
AND node_node_ngrams.ngrams_id = d.ngrams_id
AND node_node_ngrams.ngrams_type = d.ngrams_type
;
RETURN NULL;
END
$$ LANGUAGE plpgsql;
-- DROP trigger trigger_delete_count on nodes_nodes;
CREATE TRIGGER trigger_delete_count AFTER DELETE on nodes_nodes
REFERENCING OLD TABLE AS OLD
FOR EACH STATEMENT
EXECUTE PROCEDURE set_delete_count();
|]
triggerInsertCount :: MasterListId -> Cmd err Int64 triggerInsertCount :: MasterListId -> Cmd err Int64
triggerInsertCount lId = execPGSQuery query (lId, nodeTypeId NodeList) triggerInsertCount lId = execPGSQuery query (lId, nodeTypeId NodeList)
where where
query :: DPS.Query query :: DPS.Query
query = [sql| query = [sql|
CREATE OR REPLACE FUNCTION set_insert_count() RETURNS trigger AS $$ CREATE OR REPLACE FUNCTION set_insert_count() RETURNS trigger
AS $$
BEGIN BEGIN
INSERT INTO node_node_ngrams (node1_id, node2_id, ngrams_id, ngrams_type, weight) INSERT INTO node_node_ngrams (node1_id, node2_id, ngrams_id, ngrams_type, weight)
SELECT new1.node1_id , lists.id, nnn.ngrams_id, nnn.ngrams_type, count(*) as weight from NEW as new1 SELECT new1.node_id, lists.id, nnn.ngrams_id, nnn.ngrams_type, count(*) as weight
INNER JOIN nodes doc ON doc.id = new1.node2_id FROM NEW as new1
INNER JOIN nodes lists ON lists.parent_id = new1.node1_id INNER JOIN contexts doc ON doc.id = new1.context_id
INNER JOIN node_node_ngrams nnn ON nnn.node2_id = doc.id INNER JOIN nodes lists ON lists.parent_id = new1.node_id
WHERE nnn.node1_id in (?, lists.id) INNER JOIN context_node_ngrams nnn ON nnn.context_id = doc.id
WHERE lists.id in (?, lists.id)
AND lists.typename = ? AND lists.typename = ?
GROUP BY new1.node1_id, lists.id, nnn.ngrams_id, nnn.ngrams_type GROUP BY new1.node_id, lists.id, nnn.ngrams_id, nnn.ngrams_type
ON CONFLICT (node1_id, node2_id, ngrams_id, ngrams_type) ON CONFLICT (node1_id, node2_id, ngrams_id, ngrams_type)
DO UPDATE set weight = node_node_ngrams.weight + excluded.weight DO UPDATE set weight = node_node_ngrams.weight + excluded.weight
; ;
...@@ -82,13 +50,14 @@ triggerInsertCount lId = execPGSQuery query (lId, nodeTypeId NodeList) ...@@ -82,13 +50,14 @@ triggerInsertCount lId = execPGSQuery query (lId, nodeTypeId NodeList)
END END
$$ LANGUAGE plpgsql; $$ LANGUAGE plpgsql;
-- DROP trigger trigger_insert_count on nodes_nodes; DROP TRIGGER IF EXISTS trigger_insert_count ON nodes_contexts;
CREATE TRIGGER trigger_insert_count AFTER INSERT on nodes_nodes CREATE TRIGGER trigger_insert_count AFTER INSERT ON nodes_contexts
REFERENCING NEW TABLE AS NEW REFERENCING NEW TABLE AS NEW
FOR EACH STATEMENT FOR EACH STATEMENT
EXECUTE PROCEDURE set_insert_count(); EXECUTE PROCEDURE set_insert_count();
|] |]
triggerUpdateAdd :: MasterListId -> Cmd err Int64 triggerUpdateAdd :: MasterListId -> Cmd err Int64
triggerUpdateAdd lId = execPGSQuery query (lId, nodeTypeId NodeList) triggerUpdateAdd lId = execPGSQuery query (lId, nodeTypeId NodeList)
where where
...@@ -97,13 +66,18 @@ triggerUpdateAdd lId = execPGSQuery query (lId, nodeTypeId NodeList) ...@@ -97,13 +66,18 @@ triggerUpdateAdd lId = execPGSQuery query (lId, nodeTypeId NodeList)
CREATE OR REPLACE FUNCTION set_update_ngrams_add() RETURNS trigger AS $$ CREATE OR REPLACE FUNCTION set_update_ngrams_add() RETURNS trigger AS $$
BEGIN BEGIN
UPDATE node_node_ngrams nnn0 SET weight = weight + d.fix_count UPDATE node_node_ngrams nnn0 SET weight = weight + d.fix_count
FROM (SELECT new1.node1_id as node1_id, lists.id as node2_id, nnn.ngrams_id as ngrams_id, nnn.ngrams_type as ngrams_type, count(*) as fix_count FROM ( SELECT lists.parent_id as node1_id
, lists.id as node2_id
, nnn.ngrams_id as ngrams_id
, nnn.ngrams_type as ngrams_type
, count(*) as fix_count
FROM NEW as new1 FROM NEW as new1
INNER JOIN nodes lists ON new1.node1_id = lists.parent_id INNER JOIN contexts doc ON doc.id = new1.context_id
INNER JOIN node_node_ngrams nnn ON new1.node2_id = nnn.node2_id INNER JOIN nodes lists ON lists.id = new1.node_id
WHERE nnn.node1_id in (?, lists.id) -- (masterList_id, userLists) INNER JOIN context_node_ngrams nnn ON nnn.context_id = doc.id
WHERE nnn.node_id in (?, lists.id) -- (masterList_id, userLists)
AND lists.typename = ? AND lists.typename = ?
GROUP BY new1.node1_id, lists.id, nnn.ngrams_id, nnn.ngrams_type GROUP BY node1_id, node2_id, ngrams_id, ngrams_type
) as d ) as d
WHERE nnn0.node1_id = d.node1_id WHERE nnn0.node1_id = d.node1_id
AND nnn0.node2_id = d.node2_id AND nnn0.node2_id = d.node2_id
...@@ -114,14 +88,16 @@ triggerUpdateAdd lId = execPGSQuery query (lId, nodeTypeId NodeList) ...@@ -114,14 +88,16 @@ triggerUpdateAdd lId = execPGSQuery query (lId, nodeTypeId NodeList)
END END
$$ LANGUAGE plpgsql; $$ LANGUAGE plpgsql;
-- DROP trigger trigger_count_update_add on nodes_nodes; DROP trigger IF EXISTS trigger_count_update_add on nodes_contexts;
CREATE TRIGGER trigger_count_update_add AFTER UPDATE on nodes_nodes CREATE TRIGGER trigger_count_update_add AFTER UPDATE on nodes_contexts
REFERENCING OLD TABLE AS OLD NEW TABLE AS NEW REFERENCING OLD TABLE AS OLD
NEW TABLE AS NEW
FOR EACH ROW FOR EACH ROW
WHEN (OLD.category <= 0 AND NEW.category >= 1) WHEN (OLD.category <= 0 AND NEW.category >= 1)
EXECUTE PROCEDURE set_update_ngrams_add(); EXECUTE PROCEDURE set_update_ngrams_add();
|] |]
triggerUpdateDel :: MasterListId -> Cmd err Int64 triggerUpdateDel :: MasterListId -> Cmd err Int64
triggerUpdateDel lId = execPGSQuery query (lId, nodeTypeId NodeList) triggerUpdateDel lId = execPGSQuery query (lId, nodeTypeId NodeList)
where where
...@@ -130,13 +106,18 @@ triggerUpdateDel lId = execPGSQuery query (lId, nodeTypeId NodeList) ...@@ -130,13 +106,18 @@ triggerUpdateDel lId = execPGSQuery query (lId, nodeTypeId NodeList)
CREATE OR REPLACE FUNCTION set_update_ngrams_count_del() RETURNS trigger AS $$ CREATE OR REPLACE FUNCTION set_update_ngrams_count_del() RETURNS trigger AS $$
BEGIN BEGIN
UPDATE node_node_ngrams nnn0 SET weight = weight - d.fix_count UPDATE node_node_ngrams nnn0 SET weight = weight - d.fix_count
FROM (SELECT new1.node1_id as node1_id, lists.id as node2_id, nnn.ngrams_id as ngrams_id, nnn.ngrams_type as ngrams_type, count(*) as fix_count FROM ( SELECT lists.parent_id AS node1_id
FROM NEW as new1 , lists.id AS node2_id
INNER JOIN nodes lists ON new1.node1_id = lists.parent_id , cnn.ngrams_id AS ngrams_id
INNER JOIN node_node_ngrams nnn ON new1.node2_id = nnn.node2_id , cnn.ngrams_type AS ngrams_type
WHERE nnn.node1_id in (?, lists.id) -- (masterList_id, userLists) , count(*) AS fix_count
FROM NEW AS new1
INNER JOIN contexts doc ON doc.id = new1.context_id
INNER JOIN nodes lists ON new1.node_id = lists.parent_id
INNER JOIN context_node_ngrams cnn ON cnn.context_id = doc.id
WHERE lists.id in (?, lists.id) -- (masterList_id, userLists)
AND lists.typename = ? AND lists.typename = ?
GROUP BY new1.node1_id, lists.id, nnn.ngrams_id, nnn.ngrams_type GROUP BY node1_id, node2_id, ngrams_id, ngrams_type
) as d ) as d
WHERE nnn0.node1_id = d.node1_id WHERE nnn0.node1_id = d.node1_id
AND nnn0.node2_id = d.node2_id AND nnn0.node2_id = d.node2_id
...@@ -147,15 +128,55 @@ triggerUpdateDel lId = execPGSQuery query (lId, nodeTypeId NodeList) ...@@ -147,15 +128,55 @@ triggerUpdateDel lId = execPGSQuery query (lId, nodeTypeId NodeList)
END END
$$ LANGUAGE plpgsql; $$ LANGUAGE plpgsql;
-- DROP trigger trigger_count_delete2 on nodes_nodes; DROP TRIGGER IF EXISTS trigger_count_delete2 ON nodes_contexts;
CREATE TRIGGER trigger_count_delete2 AFTER UPDATE on nodes_nodes CREATE TRIGGER trigger_count_delete2 AFTER UPDATE ON nodes_contexts
REFERENCING OLD TABLE AS OLD NEW TABLE AS NEW REFERENCING OLD TABLE AS OLD
NEW TABLE AS NEW
FOR EACH ROW FOR EACH ROW
WHEN (OLD.category >= 1 AND NEW.category <= 0) WHEN (OLD.category >= 1 AND NEW.category <= 0)
EXECUTE PROCEDURE set_update_ngrams_count_del(); EXECUTE PROCEDURE set_update_ngrams_count_del();
|]
triggerDeleteCount :: MasterListId -> Cmd err Int64
triggerDeleteCount lId = execPGSQuery query (lId, toDBid NodeList)
where
query :: DPS.Query
query = [sql|
CREATE OR REPLACE FUNCTION set_delete_count() RETURNS trigger AS $$
BEGIN
UPDATE context_node_ngrams SET weight = weight - d.delete_count
FROM ( SELECT lists.id as node_id
, old1.context_id as context_id
, nnn.ngrams_id as ngrams_id
, nnn.ngrams_type as ngrams_type
, count(*) as delete_count FROM OLD as old1
INNER JOIN contexts doc ON doc.id = old1.context_id
INNER JOIN nodes lists ON lists.parent_id = old1.node_id
INNER JOIN context_node_ngrams nnn ON nnn.context_id = doc.id
WHERE nnn.node_id in (?, lists.id)
AND lists.typename = ?
GROUP BY old1.context_id, lists.id, nnn.ngrams_id, nnn.ngrams_type
) AS d
WHERE context_node_ngrams.context_id = d.context_id
AND context_node_ngrams.node_id = d.node_id
AND context_node_ngrams.ngrams_id = d.ngrams_id
AND context_node_ngrams.ngrams_type = d.ngrams_type
;
RETURN NULL;
END
$$ LANGUAGE plpgsql;
-- DROP trigger trigger_delete_count on nodes_nodes;
CREATE TRIGGER trigger_delete_count AFTER DELETE on nodes_contexts
REFERENCING OLD TABLE AS OLD
FOR EACH STATEMENT
EXECUTE PROCEDURE set_delete_count();
|] |]
-- TODO add groups -- TODO add groups
{- {-
triggerCoocInsert :: MasterListId -> Cmd err Int64 triggerCoocInsert :: MasterListId -> Cmd err Int64
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment