Commit 04da4749 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FIX] Requests with Contexts (WIP)

parent 51152a29
...@@ -103,7 +103,7 @@ import Gargantext.Core.Mail.Types (HasMail) ...@@ -103,7 +103,7 @@ import Gargantext.Core.Mail.Types (HasMail)
import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, Limit, Offset, TODO, assertValid, HasInvalidError) import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, Limit, Offset, TODO, assertValid, HasInvalidError)
import Gargantext.API.Ngrams.Tools import Gargantext.API.Ngrams.Tools
import Gargantext.Database.Action.Flow.Types import Gargantext.Database.Action.Flow.Types
import Gargantext.Database.Action.Metrics.NgramsByNode (getOccByNgramsOnlyFast') import Gargantext.Database.Action.Metrics.NgramsByContext (getOccByNgramsOnlyFast')
import Gargantext.Database.Admin.Config (userMaster) import Gargantext.Database.Admin.Config (userMaster)
import Gargantext.Database.Admin.Types.Node (NodeType(..)) import Gargantext.Database.Admin.Types.Node (NodeType(..))
import Gargantext.Database.Prelude (HasConnectionPool, HasConfig) import Gargantext.Database.Prelude (HasConnectionPool, HasConfig)
......
...@@ -36,7 +36,7 @@ import Gargantext.Core.Text.Terms.WithList (buildPatterns, termsInText) ...@@ -36,7 +36,7 @@ import Gargantext.Core.Text.Terms.WithList (buildPatterns, termsInText)
import Gargantext.Core.Types.Main (ListType(..)) import Gargantext.Core.Types.Main (ListType(..))
import Gargantext.Database.Action.Flow (saveDocNgramsWith) import Gargantext.Database.Action.Flow (saveDocNgramsWith)
import Gargantext.Database.Action.Flow.Types (FlowCmdM) import Gargantext.Database.Action.Flow.Types (FlowCmdM)
import Gargantext.Database.Action.Metrics.NgramsByNode (getOccByNgramsOnlyFast') import Gargantext.Database.Action.Metrics.NgramsByContext (getOccByNgramsOnlyFast')
import Gargantext.Database.Admin.Types.Hyperdata.Document import Gargantext.Database.Admin.Types.Hyperdata.Document
import Gargantext.Database.Admin.Types.Node import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Query.Table.NodeNode (selectDocNodes) import Gargantext.Database.Query.Table.NodeNode (selectDocNodes)
......
...@@ -32,7 +32,7 @@ import Gargantext.API.Prelude (GargNoServer) ...@@ -32,7 +32,7 @@ import Gargantext.API.Prelude (GargNoServer)
import Gargantext.Prelude.Crypto.Hash (hash) import Gargantext.Prelude.Crypto.Hash (hash)
import Gargantext.Core.Types import Gargantext.Core.Types
import Gargantext.Core.NodeStory import Gargantext.Core.NodeStory
import Gargantext.Database.Action.Metrics.NgramsByNode (getNgramsByNodeOnlyUser) import Gargantext.Database.Action.Metrics.NgramsByContext (getNgramsByContextOnlyUser)
import Gargantext.Database.Admin.Config (userMaster) import Gargantext.Database.Admin.Config (userMaster)
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..)) import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Database.Prelude (Cmd) import Gargantext.Database.Prelude (Cmd)
...@@ -94,7 +94,7 @@ getNodeNgrams cId lId nt repo = do ...@@ -94,7 +94,7 @@ getNodeNgrams cId lId nt repo = do
lIds <- selectNodesWithUsername NodeList userMaster lIds <- selectNodesWithUsername NodeList userMaster
let ngs = filterListWithRoot MapTerm $ mapTermListRoot [lId] nt repo let ngs = filterListWithRoot MapTerm $ mapTermListRoot [lId] nt repo
-- TODO HashMap -- TODO HashMap
r <- getNgramsByNodeOnlyUser cId (lIds <> [lId]) nt (HashMap.keys ngs) r <- getNgramsByContextOnlyUser cId (lIds <> [lId]) nt (HashMap.keys ngs)
pure r pure r
-- TODO -- TODO
......
...@@ -34,7 +34,7 @@ import Gargantext.Core.Text.List.Social.Prelude ...@@ -34,7 +34,7 @@ import Gargantext.Core.Text.List.Social.Prelude
import Gargantext.Core.Text.Metrics (scored', Scored(..), scored_speExc, scored_genInc, normalizeGlobal, normalizeLocal, scored_terms) import Gargantext.Core.Text.Metrics (scored', Scored(..), scored_speExc, scored_genInc, normalizeGlobal, normalizeLocal, scored_terms)
import Gargantext.Core.Types (ListType(..), MasterCorpusId, UserCorpusId) import Gargantext.Core.Types (ListType(..), MasterCorpusId, UserCorpusId)
import Gargantext.Core.Types.Individu (User(..)) import Gargantext.Core.Types.Individu (User(..))
import Gargantext.Database.Action.Metrics.NgramsByNode (getNodesByNgramsUser, getNodesByNgramsOnlyUser) import Gargantext.Database.Action.Metrics.NgramsByContext (getContextsByNgramsUser, getContextsByNgramsOnlyUser)
import Gargantext.Database.Action.Metrics.TFICF (getTficf_withSample) import Gargantext.Database.Action.Metrics.TFICF (getTficf_withSample)
import Gargantext.Database.Admin.Types.Node (NodeId) import Gargantext.Database.Admin.Types.Node (NodeId)
import Gargantext.Database.Prelude (CmdM) import Gargantext.Database.Prelude (CmdM)
...@@ -98,7 +98,7 @@ buildNgramsOthersList :: ( HasNodeError err ...@@ -98,7 +98,7 @@ buildNgramsOthersList :: ( HasNodeError err
-> (NgramsType, MapListSize) -> (NgramsType, MapListSize)
-> m (Map NgramsType [NgramsElement]) -> m (Map NgramsType [NgramsElement])
buildNgramsOthersList user uCid mfslw _groupParams (nt, MapListSize mapListSize) = do buildNgramsOthersList user uCid mfslw _groupParams (nt, MapListSize mapListSize) = do
allTerms :: HashMap NgramsTerm (Set NodeId) <- getNodesByNgramsUser uCid nt allTerms :: HashMap NgramsTerm (Set NodeId) <- getContextsByNgramsUser uCid nt
-- PrivateFirst for first developments since Public NodeMode is not implemented yet -- PrivateFirst for first developments since Public NodeMode is not implemented yet
socialLists :: FlowCont NgramsTerm FlowListScores socialLists :: FlowCont NgramsTerm FlowListScores
...@@ -212,7 +212,7 @@ buildNgramsTermsList user uCid mCid mfslw groupParams (nt, _mapListSize)= do ...@@ -212,7 +212,7 @@ buildNgramsTermsList user uCid mCid mfslw groupParams (nt, _mapListSize)= do
userListId <- defaultList uCid userListId <- defaultList uCid
masterListId <- defaultList mCid masterListId <- defaultList mCid
mapTextDocIds <- getNodesByNgramsOnlyUser uCid mapTextDocIds <- getContextsByNgramsOnlyUser uCid
[userListId, masterListId] [userListId, masterListId]
nt nt
selectedTerms selectedTerms
......
...@@ -36,7 +36,7 @@ import Gargantext.API.Ngrams.Tools ...@@ -36,7 +36,7 @@ import Gargantext.API.Ngrams.Tools
import Gargantext.API.Ngrams.Types import Gargantext.API.Ngrams.Types
import Gargantext.Core.Types import Gargantext.Core.Types
import Gargantext.Database.Action.Flow.Types import Gargantext.Database.Action.Flow.Types
import Gargantext.Database.Action.Metrics.NgramsByNode import Gargantext.Database.Action.Metrics.NgramsByContext
import Gargantext.Database.Schema.Ngrams import Gargantext.Database.Schema.Ngrams
import Gargantext.Core.Viz.Types import Gargantext.Core.Viz.Types
import qualified Data.HashMap.Strict as HashMap import qualified Data.HashMap.Strict as HashMap
...@@ -67,8 +67,8 @@ chartData cId nt lt = do ...@@ -67,8 +67,8 @@ chartData cId nt lt = do
Nothing -> x Nothing -> x
Just x' -> maybe x identity x' Just x' -> maybe x identity x'
(_total,mapTerms) <- countNodesByNgramsWith (group dico) (_total,mapTerms) <- countContextsByNgramsWith (group dico)
<$> getNodesByNgramsOnlyUser cId (ls' <> ls) nt terms <$> getContextsByNgramsOnlyUser cId (ls' <> ls) nt terms
let (dates, count) = V.unzip $ let (dates, count) = V.unzip $
V.fromList $ V.fromList $
List.sortOn snd $ List.sortOn snd $
...@@ -89,7 +89,7 @@ treeData cId nt lt = do ...@@ -89,7 +89,7 @@ treeData cId nt lt = do
dico = filterListWithRoot lt ts dico = filterListWithRoot lt ts
terms = catMaybes $ List.concat $ map (\(a,b) -> [Just a, b]) $ HashMap.toList dico terms = catMaybes $ List.concat $ map (\(a,b) -> [Just a, b]) $ HashMap.toList dico
cs' <- getNodesByNgramsOnlyUser cId (ls' <> ls) nt terms cs' <- getContextsByNgramsOnlyUser cId (ls' <> ls) nt terms
m <- getListNgrams ls nt m <- getListNgrams ls nt
pure $ V.fromList $ toTree lt cs' m pure $ V.fromList $ toTree lt cs' m
......
...@@ -31,7 +31,7 @@ import Gargantext.Core.Types.Main ...@@ -31,7 +31,7 @@ import Gargantext.Core.Types.Main
import Gargantext.Core.Viz.Graph import Gargantext.Core.Viz.Graph
import Gargantext.Core.Viz.Graph.GEXF () import Gargantext.Core.Viz.Graph.GEXF ()
import Gargantext.Core.Viz.Graph.Tools -- (cooc2graph) import Gargantext.Core.Viz.Graph.Tools -- (cooc2graph)
import Gargantext.Database.Action.Metrics.NgramsByNode (getNodesByNgramsOnlyUser) import Gargantext.Database.Action.Metrics.NgramsByContext (getContextsByNgramsOnlyUser)
import Gargantext.Database.Action.Flow.Types (FlowCmdM) import Gargantext.Database.Action.Flow.Types (FlowCmdM)
import Gargantext.Database.Action.Node (mkNodeWithParent) import Gargantext.Database.Action.Node (mkNodeWithParent)
import Gargantext.Database.Admin.Config import Gargantext.Database.Admin.Config
...@@ -179,7 +179,7 @@ computeGraph cId d nt repo = do ...@@ -179,7 +179,7 @@ computeGraph cId d nt repo = do
-- <$> getCoocByNgrams (if d == Conditional then Diagonal True else Diagonal False) -- <$> getCoocByNgrams (if d == Conditional then Diagonal True else Diagonal False)
<$> getCoocByNgrams (Diagonal True) <$> getCoocByNgrams (Diagonal True)
<$> groupNodesByNgrams ngs <$> groupNodesByNgrams ngs
<$> getNodesByNgramsOnlyUser cId (lIds <> [lId]) nt (HashMap.keys ngs) <$> getContextsByNgramsOnlyUser cId (lIds <> [lId]) nt (HashMap.keys ngs)
-- printDebug "myCooc" myCooc -- printDebug "myCooc" myCooc
-- saveAsFileDebug "debug/my-cooc" myCooc -- saveAsFileDebug "debug/my-cooc" myCooc
......
...@@ -28,7 +28,7 @@ import Gargantext.Core ...@@ -28,7 +28,7 @@ import Gargantext.Core
import Gargantext.Core.Types (TableResult(..)) import Gargantext.Core.Types (TableResult(..))
import Gargantext.Core.Types.Main import Gargantext.Core.Types.Main
import Gargantext.Database import Gargantext.Database
import Gargantext.Database.Action.Metrics.NgramsByNode (getNodesByNgramsOnlyUser) import Gargantext.Database.Action.Metrics.NgramsByContext (getContextsByNgramsOnlyUser)
import Gargantext.Database.Admin.Config import Gargantext.Database.Admin.Config
import Gargantext.Database.Admin.Types.Hyperdata -- (HyperdataContact(..)) import Gargantext.Database.Admin.Types.Hyperdata -- (HyperdataContact(..))
import Gargantext.Database.Admin.Types.Node -- (AnnuaireId, CorpusId, ListId, DocId, ContactId, NodeId) import Gargantext.Database.Admin.Types.Node -- (AnnuaireId, CorpusId, ListId, DocId, ContactId, NodeId)
...@@ -190,4 +190,4 @@ getNgramsDocId cId lId nt = do ...@@ -190,4 +190,4 @@ getNgramsDocId cId lId nt = do
let ngs = filterListWithRoot MapTerm $ mapTermListRoot [lId] nt repo let ngs = filterListWithRoot MapTerm $ mapTermListRoot [lId] nt repo
groupNodesByNgrams ngs groupNodesByNgrams ngs
<$> getNodesByNgramsOnlyUser cId (lIds <> [lId]) nt (HashMap.keys ngs) <$> getContextsByNgramsOnlyUser cId (lIds <> [lId]) nt (HashMap.keys ngs)
...@@ -22,7 +22,7 @@ import Gargantext.Core.Mail.Types (HasMail) ...@@ -22,7 +22,7 @@ import Gargantext.Core.Mail.Types (HasMail)
import Gargantext.Core.Types (ListType(..), Limit, NodeType(..)) import Gargantext.Core.Types (ListType(..), Limit, NodeType(..))
import Gargantext.Core.NodeStory import Gargantext.Core.NodeStory
import Gargantext.Database.Action.Flow.Types (FlowCmdM) import Gargantext.Database.Action.Flow.Types (FlowCmdM)
import Gargantext.Database.Action.Metrics.NgramsByNode (getNodesByNgramsOnlyUser{-, getTficfWith-}) import Gargantext.Database.Action.Metrics.NgramsByContext (getContextsByNgramsOnlyUser{-, getTficfWith-})
import Gargantext.Database.Admin.Config (userMaster) import Gargantext.Database.Admin.Config (userMaster)
import Gargantext.Database.Admin.Types.Node (ListId, CorpusId) import Gargantext.Database.Admin.Types.Node (ListId, CorpusId)
import Gargantext.Database.Query.Table.Node (defaultList) import Gargantext.Database.Query.Table.Node (defaultList)
...@@ -57,7 +57,7 @@ getNgramsCooc cId maybeListId tabType maybeLimit = do ...@@ -57,7 +57,7 @@ getNgramsCooc cId maybeListId tabType maybeLimit = do
myCooc <- HM.filter (>1) <$> getCoocByNgrams (Diagonal True) myCooc <- HM.filter (>1) <$> getCoocByNgrams (Diagonal True)
<$> groupNodesByNgrams ngs <$> groupNodesByNgrams ngs
<$> getNodesByNgramsOnlyUser cId (lIds <> [lId]) (ngramsTypeFromTabType tabType) <$> getContextsByNgramsOnlyUser cId (lIds <> [lId]) (ngramsTypeFromTabType tabType)
(take' maybeLimit $ HM.keys ngs) (take' maybeLimit $ HM.keys ngs)
pure $ (ngs', ngs, myCooc) pure $ (ngs', ngs, myCooc)
......
{-| {-|
Module : Gargantext.Database.Metrics.NgramsByNode Module : Gargantext.Database.Metrics.NgramsByContext
Description : Ngrams by Node user and master Description : Ngrams by Node user and master
Copyright : (c) CNRS, 2017-Present Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3 License : AGPL + CECILL v3
...@@ -13,7 +13,7 @@ Ngrams by node enable contextual metrics. ...@@ -13,7 +13,7 @@ Ngrams by node enable contextual metrics.
{-# LANGUAGE QuasiQuotes #-} {-# LANGUAGE QuasiQuotes #-}
module Gargantext.Database.Action.Metrics.NgramsByNode module Gargantext.Database.Action.Metrics.NgramsByContext
where where
--import Data.Map.Strict.Patch (PatchMap, Replace, diff) --import Data.Map.Strict.Patch (PatchMap, Replace, diff)
...@@ -39,39 +39,39 @@ import qualified Database.PostgreSQL.Simple as DPS ...@@ -39,39 +39,39 @@ import qualified Database.PostgreSQL.Simple as DPS
-- | fst is size of Supra Corpus -- | fst is size of Supra Corpus
-- snd is Texts and size of Occurrences (different docs) -- snd is Texts and size of Occurrences (different docs)
countNodesByNgramsWith :: (NgramsTerm -> NgramsTerm) countContextsByNgramsWith :: (NgramsTerm -> NgramsTerm)
-> HashMap NgramsTerm (Set NodeId) -> HashMap NgramsTerm (Set ContextId)
-> (Double, HashMap NgramsTerm (Double, Set NgramsTerm)) -> (Double, HashMap NgramsTerm (Double, Set NgramsTerm))
countNodesByNgramsWith f m = (total, m') countContextsByNgramsWith f m = (total, m')
where where
total = fromIntegral $ Set.size $ Set.unions $ HM.elems m total = fromIntegral $ Set.size $ Set.unions $ HM.elems m
m' = HM.map ( swap . second (fromIntegral . Set.size)) m' = HM.map ( swap . second (fromIntegral . Set.size))
$ groupNodesByNgramsWith f m $ groupContextsByNgramsWith f m
groupNodesByNgramsWith :: (NgramsTerm -> NgramsTerm) groupContextsByNgramsWith :: (NgramsTerm -> NgramsTerm)
-> HashMap NgramsTerm (Set NodeId) -> HashMap NgramsTerm (Set NodeId)
-> HashMap NgramsTerm (Set NgramsTerm, Set NodeId) -> HashMap NgramsTerm (Set NgramsTerm, Set ContextId)
groupNodesByNgramsWith f m = groupContextsByNgramsWith f m =
HM.fromListWith (<>) $ map (\(t,ns) -> (f t, (Set.singleton t, ns))) HM.fromListWith (<>) $ map (\(t,ns) -> (f t, (Set.singleton t, ns)))
$ HM.toList m $ HM.toList m
------------------------------------------------------------------------ ------------------------------------------------------------------------
getNodesByNgramsUser :: HasDBid NodeType getContextsByNgramsUser :: HasDBid NodeType
=> CorpusId => CorpusId
-> NgramsType -> NgramsType
-> Cmd err (HashMap NgramsTerm (Set NodeId)) -> Cmd err (HashMap NgramsTerm (Set ContextId))
getNodesByNgramsUser cId nt = getContextsByNgramsUser cId nt =
HM.fromListWith (<>) <$> map (\(n,t) -> (NgramsTerm t, Set.singleton n)) HM.fromListWith (<>) <$> map (\(n,t) -> (NgramsTerm t, Set.singleton n))
<$> selectNgramsByNodeUser cId nt <$> selectNgramsByContextUser cId nt
where where
selectNgramsByNodeUser :: HasDBid NodeType selectNgramsByContextUser :: HasDBid NodeType
=> CorpusId => CorpusId
-> NgramsType -> NgramsType
-> Cmd err [(NodeId, Text)] -> Cmd err [(NodeId, Text)]
selectNgramsByNodeUser cId' nt' = selectNgramsByContextUser cId' nt' =
runPGSQuery queryNgramsByNodeUser runPGSQuery queryNgramsByContextUser
( cId' ( cId'
, toDBid NodeDocument , toDBid NodeDocument
, ngramsTypeId nt' , ngramsTypeId nt'
...@@ -79,13 +79,13 @@ getNodesByNgramsUser cId nt = ...@@ -79,13 +79,13 @@ getNodesByNgramsUser cId nt =
-- , 0 :: Int -- offset -- , 0 :: Int -- offset
) )
queryNgramsByNodeUser :: DPS.Query queryNgramsByContextUser :: DPS.Query
queryNgramsByNodeUser = [sql| queryNgramsByContextUser = [sql|
SELECT cng.node_id, ng.terms FROM context_node_ngrams cng SELECT cng.node_id, ng.terms FROM context_node_ngrams cng
JOIN ngrams ng ON cng.ngrams_id = ng.id JOIN ngrams ng ON cng.ngrams_id = ng.id
JOIN nodes_nodes nn ON nn.node2_id = cng.node_id JOIN nodes_contexts nn ON nn.context_id = cng.context_id
JOIN nodes n ON nn.node2_id = n.id JOIN contexts n ON nn.context_id = n.id
WHERE nn.node1_id = ? -- CorpusId WHERE nn.node_id = ? -- CorpusId
AND n.typename = ? -- toDBid AND n.typename = ? -- toDBid
AND cng.ngrams_type = ? -- NgramsTypeId AND cng.ngrams_type = ? -- NgramsTypeId
AND nn.category > 0 AND nn.category > 0
...@@ -102,7 +102,7 @@ getOccByNgramsOnlyFast :: HasDBid NodeType ...@@ -102,7 +102,7 @@ getOccByNgramsOnlyFast :: HasDBid NodeType
-> [NgramsTerm] -> [NgramsTerm]
-> Cmd err (HashMap NgramsTerm Int) -> Cmd err (HashMap NgramsTerm Int)
getOccByNgramsOnlyFast cId nt ngs = getOccByNgramsOnlyFast cId nt ngs =
HM.fromListWith (+) <$> selectNgramsOccurrencesOnlyByNodeUser cId nt ngs HM.fromListWith (+) <$> selectNgramsOccurrencesOnlyByContextUser cId nt ngs
getOccByNgramsOnlyFast_withSample :: HasDBid NodeType getOccByNgramsOnlyFast_withSample :: HasDBid NodeType
...@@ -112,9 +112,7 @@ getOccByNgramsOnlyFast_withSample :: HasDBid NodeType ...@@ -112,9 +112,7 @@ getOccByNgramsOnlyFast_withSample :: HasDBid NodeType
-> [NgramsTerm] -> [NgramsTerm]
-> Cmd err (HashMap NgramsTerm Int) -> Cmd err (HashMap NgramsTerm Int)
getOccByNgramsOnlyFast_withSample cId int nt ngs = getOccByNgramsOnlyFast_withSample cId int nt ngs =
HM.fromListWith (+) <$> selectNgramsOccurrencesOnlyByNodeUser_withSample cId int nt ngs HM.fromListWith (+) <$> selectNgramsOccurrencesOnlyByContextUser_withSample cId int nt ngs
getOccByNgramsOnlyFast' :: CorpusId getOccByNgramsOnlyFast' :: CorpusId
...@@ -147,9 +145,9 @@ getOccByNgramsOnlyFast' cId lId nt tms = trace (show (cId, lId)) $ ...@@ -147,9 +145,9 @@ getOccByNgramsOnlyFast' cId lId nt tms = trace (show (cId, lId)) $
JOIN ngrams ng ON cng.ngrams_id = ng.id JOIN ngrams ng ON cng.ngrams_id = ng.id
JOIN input_rows ir ON ir.terms = ng.terms JOIN input_rows ir ON ir.terms = ng.terms
WHERE cng.context_id = ? -- CorpusId WHERE cng.context_id = ? -- CorpusId
AND cng.node_id = ? -- ListId AND cng.node_id = ? -- ListId
AND cng.ngrams_type = ? -- NgramsTypeId AND cng.ngrams_type = ? -- NgramsTypeId
-- AND nn.category > 0 -- TODO -- AND nn.category > 0 -- TODO
GROUP BY ng.terms, cng.weight GROUP BY ng.terms, cng.weight
|] |]
...@@ -165,9 +163,9 @@ getOccByNgramsOnlySlow :: HasDBid NodeType ...@@ -165,9 +163,9 @@ getOccByNgramsOnlySlow :: HasDBid NodeType
getOccByNgramsOnlySlow t cId ls nt ngs = getOccByNgramsOnlySlow t cId ls nt ngs =
HM.map Set.size <$> getScore' t cId ls nt ngs HM.map Set.size <$> getScore' t cId ls nt ngs
where where
getScore' NodeCorpus = getNodesByNgramsOnlyUser getScore' NodeCorpus = getContextsByNgramsOnlyUser
getScore' NodeDocument = getNgramsByDocOnlyUser getScore' NodeDocument = getNgramsByDocOnlyUser
getScore' _ = getNodesByNgramsOnlyUser getScore' _ = getContextsByNgramsOnlyUser
getOccByNgramsOnlySafe :: HasDBid NodeType getOccByNgramsOnlySafe :: HasDBid NodeType
=> CorpusId => CorpusId
...@@ -186,14 +184,14 @@ getOccByNgramsOnlySafe cId ls nt ngs = do ...@@ -186,14 +184,14 @@ getOccByNgramsOnlySafe cId ls nt ngs = do
pure slow pure slow
selectNgramsOccurrencesOnlyByNodeUser :: HasDBid NodeType selectNgramsOccurrencesOnlyByContextUser :: HasDBid NodeType
=> CorpusId => CorpusId
-> NgramsType -> NgramsType
-> [NgramsTerm] -> [NgramsTerm]
-> Cmd err [(NgramsTerm, Int)] -> Cmd err [(NgramsTerm, Int)]
selectNgramsOccurrencesOnlyByNodeUser cId nt tms = selectNgramsOccurrencesOnlyByContextUser cId nt tms =
fmap (first NgramsTerm) <$> fmap (first NgramsTerm) <$>
runPGSQuery queryNgramsOccurrencesOnlyByNodeUser runPGSQuery queryNgramsOccurrencesOnlyByContextUser
( Values fields ((DPS.Only . unNgramsTerm) <$> tms) ( Values fields ((DPS.Only . unNgramsTerm) <$> tms)
, cId , cId
, toDBid NodeDocument , toDBid NodeDocument
...@@ -205,10 +203,10 @@ selectNgramsOccurrencesOnlyByNodeUser cId nt tms = ...@@ -205,10 +203,10 @@ selectNgramsOccurrencesOnlyByNodeUser cId nt tms =
-- same as queryNgramsOnlyByNodeUser but using COUNT on the node ids. -- same as queryNgramsOnlyByNodeUser but using COUNT on the node ids.
-- Question: with the grouping is the result exactly the same (since Set NodeId for -- Question: with the grouping is the result exactly the same (since Set NodeId for
-- equivalent ngrams intersections are not empty) -- equivalent ngrams intersections are not empty)
queryNgramsOccurrencesOnlyByNodeUser :: DPS.Query queryNgramsOccurrencesOnlyByContextUser :: DPS.Query
queryNgramsOccurrencesOnlyByNodeUser = [sql| queryNgramsOccurrencesOnlyByContextUser = [sql|
WITH input_rows(terms) AS (?) WITH input_rows(terms) AS (?)
SELECT ng.terms, COUNT(cng.node_id) FROM context_node_ngrams cng SELECT ng.terms, COUNT(cng.node_id) FROM context_node_ngrams cng
JOIN ngrams ng ON cng.ngrams_id = ng.id JOIN ngrams ng ON cng.ngrams_id = ng.id
...@@ -223,15 +221,15 @@ queryNgramsOccurrencesOnlyByNodeUser = [sql| ...@@ -223,15 +221,15 @@ queryNgramsOccurrencesOnlyByNodeUser = [sql|
|] |]
selectNgramsOccurrencesOnlyByNodeUser_withSample :: HasDBid NodeType selectNgramsOccurrencesOnlyByContextUser_withSample :: HasDBid NodeType
=> CorpusId => CorpusId
-> Int -> Int
-> NgramsType -> NgramsType
-> [NgramsTerm] -> [NgramsTerm]
-> Cmd err [(NgramsTerm, Int)] -> Cmd err [(NgramsTerm, Int)]
selectNgramsOccurrencesOnlyByNodeUser_withSample cId int nt tms = selectNgramsOccurrencesOnlyByContextUser_withSample cId int nt tms =
fmap (first NgramsTerm) <$> fmap (first NgramsTerm) <$>
runPGSQuery queryNgramsOccurrencesOnlyByNodeUser_withSample runPGSQuery queryNgramsOccurrencesOnlyByContextUser_withSample
( int ( int
, toDBid NodeDocument , toDBid NodeDocument
, cId , cId
...@@ -242,8 +240,8 @@ selectNgramsOccurrencesOnlyByNodeUser_withSample cId int nt tms = ...@@ -242,8 +240,8 @@ selectNgramsOccurrencesOnlyByNodeUser_withSample cId int nt tms =
where where
fields = [QualifiedIdentifier Nothing "text"] fields = [QualifiedIdentifier Nothing "text"]
queryNgramsOccurrencesOnlyByNodeUser_withSample :: DPS.Query queryNgramsOccurrencesOnlyByContextUser_withSample :: DPS.Query
queryNgramsOccurrencesOnlyByNodeUser_withSample = [sql| queryNgramsOccurrencesOnlyByContextUser_withSample = [sql|
WITH nodes_sample AS (SELECT id FROM nodes n TABLESAMPLE SYSTEM_ROWS (?) WITH nodes_sample AS (SELECT id FROM nodes n TABLESAMPLE SYSTEM_ROWS (?)
JOIN nodes_nodes nn ON n.id = nn.node2_id JOIN nodes_nodes nn ON n.id = nn.node2_id
WHERE n.typename = ? WHERE n.typename = ?
...@@ -262,8 +260,8 @@ queryNgramsOccurrencesOnlyByNodeUser_withSample = [sql| ...@@ -262,8 +260,8 @@ queryNgramsOccurrencesOnlyByNodeUser_withSample = [sql|
queryNgramsOccurrencesOnlyByNodeUser' :: DPS.Query queryNgramsOccurrencesOnlyByContextUser' :: DPS.Query
queryNgramsOccurrencesOnlyByNodeUser' = [sql| queryNgramsOccurrencesOnlyByContextUser' = [sql|
WITH input_rows(terms) AS (?) WITH input_rows(terms) AS (?)
SELECT ng.terms, COUNT(cng.node_id) FROM context_node_ngrams cng SELECT ng.terms, COUNT(cng.node_id) FROM context_node_ngrams cng
JOIN ngrams ng ON cng.ngrams_id = ng.id JOIN ngrams ng ON cng.ngrams_id = ng.id
...@@ -278,47 +276,47 @@ queryNgramsOccurrencesOnlyByNodeUser' = [sql| ...@@ -278,47 +276,47 @@ queryNgramsOccurrencesOnlyByNodeUser' = [sql|
|] |]
------------------------------------------------------------------------ ------------------------------------------------------------------------
getNodesByNgramsOnlyUser :: HasDBid NodeType getContextsByNgramsOnlyUser :: HasDBid NodeType
=> CorpusId => CorpusId
-> [ListId] -> [ListId]
-> NgramsType -> NgramsType
-> [NgramsTerm] -> [NgramsTerm]
-> Cmd err (HashMap NgramsTerm (Set NodeId)) -> Cmd err (HashMap NgramsTerm (Set NodeId))
getNodesByNgramsOnlyUser cId ls nt ngs = getContextsByNgramsOnlyUser cId ls nt ngs =
HM.unionsWith (<>) HM.unionsWith (<>)
. map (HM.fromListWith (<>) . map (HM.fromListWith (<>)
. map (second Set.singleton)) . map (second Set.singleton))
<$> mapM (selectNgramsOnlyByNodeUser cId ls nt) <$> mapM (selectNgramsOnlyByContextUser cId ls nt)
(splitEvery 1000 ngs) (splitEvery 1000 ngs)
getNgramsByNodeOnlyUser :: HasDBid NodeType getNgramsByContextOnlyUser :: HasDBid NodeType
=> NodeId => NodeId
-> [ListId] -> [ListId]
-> NgramsType -> NgramsType
-> [NgramsTerm] -> [NgramsTerm]
-> Cmd err (Map NodeId (Set NgramsTerm)) -> Cmd err (Map NodeId (Set NgramsTerm))
getNgramsByNodeOnlyUser cId ls nt ngs = getNgramsByContextOnlyUser cId ls nt ngs =
Map.unionsWith (<>) Map.unionsWith (<>)
. map ( Map.fromListWith (<>) . map ( Map.fromListWith (<>)
. map (second Set.singleton) . map (second Set.singleton)
) )
. map (map swap) . map (map swap)
<$> mapM (selectNgramsOnlyByNodeUser cId ls nt) <$> mapM (selectNgramsOnlyByContextUser cId ls nt)
(splitEvery 1000 ngs) (splitEvery 1000 ngs)
------------------------------------------------------------------------ ------------------------------------------------------------------------
selectNgramsOnlyByNodeUser :: HasDBid NodeType selectNgramsOnlyByContextUser :: HasDBid NodeType
=> CorpusId => CorpusId
-> [ListId] -> [ListId]
-> NgramsType -> NgramsType
-> [NgramsTerm] -> [NgramsTerm]
-> Cmd err [(NgramsTerm, NodeId)] -> Cmd err [(NgramsTerm, NodeId)]
selectNgramsOnlyByNodeUser cId ls nt tms = selectNgramsOnlyByContextUser cId ls nt tms =
fmap (first NgramsTerm) <$> fmap (first NgramsTerm) <$>
runPGSQuery queryNgramsOnlyByNodeUser runPGSQuery queryNgramsOnlyByContextUser
( Values fields ((DPS.Only . unNgramsTerm) <$> tms) ( Values fields ((DPS.Only . unNgramsTerm) <$> tms)
, Values [QualifiedIdentifier Nothing "int4"] , Values [QualifiedIdentifier Nothing "int4"]
(DPS.Only <$> (map (\(NodeId n) -> n) ls)) (DPS.Only <$> (map (\(NodeId n) -> n) ls))
, cId , cId
, toDBid NodeDocument , toDBid NodeDocument
...@@ -327,17 +325,17 @@ selectNgramsOnlyByNodeUser cId ls nt tms = ...@@ -327,17 +325,17 @@ selectNgramsOnlyByNodeUser cId ls nt tms =
where where
fields = [QualifiedIdentifier Nothing "text"] fields = [QualifiedIdentifier Nothing "text"]
queryNgramsOnlyByNodeUser :: DPS.Query queryNgramsOnlyByContextUser :: DPS.Query
queryNgramsOnlyByNodeUser = [sql| queryNgramsOnlyByContextUser = [sql|
WITH input_rows(terms) AS (?), WITH input_rows(terms) AS (?),
input_list(id) AS (?) input_list(id) AS (?)
SELECT ng.terms, cng.node_id FROM context_node_ngrams cng SELECT ng.terms, cng.node_id FROM context_node_ngrams cng
JOIN ngrams ng ON cng.ngrams_id = ng.id JOIN ngrams ng ON cng.ngrams_id = ng.id
JOIN input_rows ir ON ir.terms = ng.terms JOIN input_rows ir ON ir.terms = ng.terms
JOIN input_list il ON il.id = cng.context_id JOIN input_list il ON il.id = cng.context_id
JOIN nodes_nodes nn ON nn.node2_id = cng.node_id JOIN nodes_contexts nn ON nn.context_id = cng.context_id
JOIN nodes n ON nn.node2_id = n.id JOIN contexts n ON nn.context_id = n.id
WHERE nn.node1_id = ? -- CorpusId WHERE nn.node_id = ? -- CorpusId
AND n.typename = ? -- toDBid AND n.typename = ? -- toDBid
AND cng.ngrams_type = ? -- NgramsTypeId AND cng.ngrams_type = ? -- NgramsTypeId
AND nn.category > 0 AND nn.category > 0
...@@ -345,14 +343,14 @@ queryNgramsOnlyByNodeUser = [sql| ...@@ -345,14 +343,14 @@ queryNgramsOnlyByNodeUser = [sql|
|] |]
selectNgramsOnlyByNodeUser' :: HasDBid NodeType selectNgramsOnlyByContextUser' :: HasDBid NodeType
=> CorpusId => CorpusId
-> [ListId] -> [ListId]
-> NgramsType -> NgramsType
-> [Text] -> [Text]
-> Cmd err [(Text, Int)] -> Cmd err [(Text, Int)]
selectNgramsOnlyByNodeUser' cId ls nt tms = selectNgramsOnlyByContextUser' cId ls nt tms =
runPGSQuery queryNgramsOnlyByNodeUser runPGSQuery queryNgramsOnlyByContextUser
( Values fields (DPS.Only <$> tms) ( Values fields (DPS.Only <$> tms)
, Values [QualifiedIdentifier Nothing "int4"] , Values [QualifiedIdentifier Nothing "int4"]
(DPS.Only <$> (map (\(NodeId n) -> n) ls)) (DPS.Only <$> (map (\(NodeId n) -> n) ls))
...@@ -363,8 +361,8 @@ selectNgramsOnlyByNodeUser' cId ls nt tms = ...@@ -363,8 +361,8 @@ selectNgramsOnlyByNodeUser' cId ls nt tms =
where where
fields = [QualifiedIdentifier Nothing "text"] fields = [QualifiedIdentifier Nothing "text"]
queryNgramsOnlyByNodeUser' :: DPS.Query queryNgramsOnlyByContextUser' :: DPS.Query
queryNgramsOnlyByNodeUser' = [sql| queryNgramsOnlyByContextUser' = [sql|
WITH input_rows(terms) AS (?), WITH input_rows(terms) AS (?),
input_list(id) AS (?) input_list(id) AS (?)
SELECT ng.terms, cng.weight FROM context_node_ngrams cng SELECT ng.terms, cng.weight FROM context_node_ngrams cng
...@@ -372,7 +370,7 @@ queryNgramsOnlyByNodeUser' = [sql| ...@@ -372,7 +370,7 @@ queryNgramsOnlyByNodeUser' = [sql|
JOIN input_rows ir ON ir.terms = ng.terms JOIN input_rows ir ON ir.terms = ng.terms
JOIN input_list il ON il.id = cng.node_id JOIN input_list il ON il.id = cng.node_id
WHERE cng.context_id = ? -- CorpusId WHERE cng.context_id = ? -- CorpusId
AND cng.ngrams_type = ? -- NgramsTypeId AND cng.ngrams_type = ? -- NgramsTypeId
-- AND nn.category > 0 -- AND nn.category > 0
GROUP BY ng.terms, cng.weight GROUP BY ng.terms, cng.weight
|] |]
...@@ -422,22 +420,22 @@ queryNgramsOnlyByDocUser = [sql| ...@@ -422,22 +420,22 @@ queryNgramsOnlyByDocUser = [sql|
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | TODO filter by language, database, any social field -- | TODO filter by language, database, any social field
getNodesByNgramsMaster :: HasDBid NodeType getContextsByNgramsMaster :: HasDBid NodeType
=> UserCorpusId -> MasterCorpusId -> Cmd err (HashMap Text (Set NodeId)) => UserCorpusId -> MasterCorpusId -> Cmd err (HashMap Text (Set NodeId))
getNodesByNgramsMaster ucId mcId = unionsWith (<>) getContextsByNgramsMaster ucId mcId = unionsWith (<>)
. map (HM.fromListWith (<>) . map (\(n,t) -> (t, Set.singleton n))) . map (HM.fromListWith (<>) . map (\(n,t) -> (t, Set.singleton n)))
-- . takeWhile (not . List.null) -- . takeWhile (not . List.null)
-- . takeWhile (\l -> List.length l > 3) -- . takeWhile (\l -> List.length l > 3)
<$> mapM (selectNgramsByNodeMaster 1000 ucId mcId) [0,500..10000] <$> mapM (selectNgramsByContextMaster 1000 ucId mcId) [0,500..10000]
selectNgramsByNodeMaster :: HasDBid NodeType selectNgramsByContextMaster :: HasDBid NodeType
=> Int => Int
-> UserCorpusId -> UserCorpusId
-> MasterCorpusId -> MasterCorpusId
-> Int -> Int
-> Cmd err [(NodeId, Text)] -> Cmd err [(NodeId, Text)]
selectNgramsByNodeMaster n ucId mcId p = runPGSQuery selectNgramsByContextMaster n ucId mcId p = runPGSQuery
queryNgramsByNodeMaster' queryNgramsByContextMaster'
( ucId ( ucId
, ngramsTypeId NgramsTerms , ngramsTypeId NgramsTerms
, toDBid NodeDocument , toDBid NodeDocument
...@@ -451,15 +449,15 @@ selectNgramsByNodeMaster n ucId mcId p = runPGSQuery ...@@ -451,15 +449,15 @@ selectNgramsByNodeMaster n ucId mcId p = runPGSQuery
) )
-- | TODO fix context_node_ngrams relation -- | TODO fix context_node_ngrams relation
queryNgramsByNodeMaster' :: DPS.Query queryNgramsByContextMaster' :: DPS.Query
queryNgramsByNodeMaster' = [sql| queryNgramsByContextMaster' = [sql|
WITH nodesByNgramsUser AS ( WITH contextsByNgramsUser AS (
SELECT n.id, ng.terms FROM nodes n SELECT n.id, ng.terms FROM contexts n
JOIN nodes_nodes nn ON n.id = nn.node2_id JOIN nodes_contexts nn ON n.id = nn.context_id
JOIN context_node_ngrams cng ON cng.node_id = n.id JOIN context_node_ngrams cng ON cng.context_id = n.id
JOIN ngrams ng ON cng.ngrams_id = ng.id JOIN ngrams ng ON cng.ngrams_id = ng.id
WHERE nn.node1_id = ? -- UserCorpusId WHERE nn.node_id = ? -- UserCorpusId
-- AND n.typename = ? -- toDBid -- AND n.typename = ? -- toDBid
AND cng.ngrams_type = ? -- NgramsTypeId AND cng.ngrams_type = ? -- NgramsTypeId
AND nn.category > 0 AND nn.category > 0
...@@ -469,10 +467,10 @@ queryNgramsByNodeMaster' = [sql| ...@@ -469,10 +467,10 @@ queryNgramsByNodeMaster' = [sql|
), ),
nodesByNgramsMaster AS ( contextsByNgramsMaster AS (
SELECT n.id, ng.terms FROM nodes n TABLESAMPLE SYSTEM_ROWS(?) SELECT n.id, ng.terms FROM contexts n TABLESAMPLE SYSTEM_ROWS(?)
JOIN context_node_ngrams cng ON n.id = cng.node_id JOIN context_node_ngrams cng ON n.id = cng.context_id
JOIN ngrams ng ON ng.id = cng.ngrams_id JOIN ngrams ng ON ng.id = cng.ngrams_id
WHERE n.parent_id = ? -- Master Corpus toDBid WHERE n.parent_id = ? -- Master Corpus toDBid
...@@ -482,5 +480,5 @@ queryNgramsByNodeMaster' = [sql| ...@@ -482,5 +480,5 @@ queryNgramsByNodeMaster' = [sql|
) )
SELECT m.id, m.terms FROM nodesByNgramsMaster m SELECT m.id, m.terms FROM nodesByNgramsMaster m
RIGHT JOIN nodesByNgramsUser u ON u.id = m.id RIGHT JOIN contextsByNgramsUser u ON u.id = m.id
|] |]
...@@ -21,10 +21,10 @@ import qualified Data.HashMap.Strict as HM ...@@ -21,10 +21,10 @@ import qualified Data.HashMap.Strict as HM
import Data.Maybe (fromMaybe) import Data.Maybe (fromMaybe)
import Gargantext.Core import Gargantext.Core
import Gargantext.Core.Text.Metrics.TFICF import Gargantext.Core.Text.Metrics.TFICF
import Gargantext.Database.Action.Metrics.NgramsByNode (getNodesByNgramsUser, getOccByNgramsOnlyFast, getOccByNgramsOnlyFast_withSample) import Gargantext.Database.Action.Metrics.NgramsByContext (getContextsByNgramsUser, getOccByNgramsOnlyFast, getOccByNgramsOnlyFast_withSample)
import Gargantext.Database.Admin.Types.Node -- (ListId, CorpusId, NodeId) import Gargantext.Database.Admin.Types.Node -- (ListId, CorpusId, NodeId)
import Gargantext.Database.Prelude (Cmd) import Gargantext.Database.Prelude (Cmd)
import Gargantext.Database.Query.Table.NodeNode (selectCountDocs) import Gargantext.Database.Query.Table.NodeContext (selectCountDocs)
import Gargantext.Database.Schema.Ngrams (NgramsType(..)) import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.API.Ngrams.Types import Gargantext.API.Ngrams.Types
import Gargantext.Prelude import Gargantext.Prelude
...@@ -38,7 +38,7 @@ getTficf :: HasDBid NodeType ...@@ -38,7 +38,7 @@ getTficf :: HasDBid NodeType
getTficf cId mId nt = do getTficf cId mId nt = do
mapTextDoubleLocal <- HM.filter (> 1) mapTextDoubleLocal <- HM.filter (> 1)
<$> HM.map (fromIntegral . Set.size) <$> HM.map (fromIntegral . Set.size)
<$> getNodesByNgramsUser cId nt <$> getContextsByNgramsUser cId nt
mapTextDoubleGlobal <- HM.map fromIntegral mapTextDoubleGlobal <- HM.map fromIntegral
<$> getOccByNgramsOnlyFast mId nt (HM.keys mapTextDoubleLocal) <$> getOccByNgramsOnlyFast mId nt (HM.keys mapTextDoubleLocal)
...@@ -62,7 +62,7 @@ getTficf_withSample :: HasDBid NodeType ...@@ -62,7 +62,7 @@ getTficf_withSample :: HasDBid NodeType
getTficf_withSample cId mId nt = do getTficf_withSample cId mId nt = do
mapTextDoubleLocal <- HM.filter (> 1) mapTextDoubleLocal <- HM.filter (> 1)
<$> HM.map (fromIntegral . Set.size) <$> HM.map (fromIntegral . Set.size)
<$> getNodesByNgramsUser cId nt <$> getContextsByNgramsUser cId nt
countLocal <- selectCountDocs cId countLocal <- selectCountDocs cId
let countGlobal = countLocal * 10 let countGlobal = countLocal * 10
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment