Commit 4e076676 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[Metrics] optim

parent b449027c
Pipeline #4653 failed with stage
in 9 minutes and 42 seconds
...@@ -130,7 +130,7 @@ import GHC.Generics (Generic) ...@@ -130,7 +130,7 @@ import GHC.Generics (Generic)
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger) import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
-- import Gargantext.Database.Schema.Ngrams (NgramsTypeId, ngramsTypeId, NgramsTableData(..)) -- import Gargantext.Database.Schema.Ngrams (NgramsTypeId, ngramsTypeId, NgramsTableData(..))
import Gargantext.Database.Config (userMaster) import Gargantext.Database.Config (userMaster)
import Gargantext.Database.Metrics.NgramsByNode (getOccByNgramsOnlyFast) import Gargantext.Database.Metrics.NgramsByNode (getOccByNgramsOnlyFast')
import Gargantext.Database.Schema.Ngrams (NgramsType) import Gargantext.Database.Schema.Ngrams (NgramsType)
import Gargantext.Database.Types.Node (NodeType(..)) import Gargantext.Database.Types.Node (NodeType(..))
import Gargantext.Database.Utils (fromField', HasConnection) import Gargantext.Database.Utils (fromField', HasConnection)
...@@ -1019,7 +1019,8 @@ getTableNgrams _nType nId tabType listId limit_ offset ...@@ -1019,7 +1019,8 @@ getTableNgrams _nType nId tabType listId limit_ offset
setScores True table = do setScores True table = do
let ngrams_terms = (table ^.. each . ne_ngrams) let ngrams_terms = (table ^.. each . ne_ngrams)
t1 <- getTime' t1 <- getTime'
occurrences <- getOccByNgramsOnlyFast nId occurrences <- getOccByNgramsOnlyFast' nId
listId
ngramsType ngramsType
ngrams_terms ngrams_terms
t2 <- getTime' t2 <- getTime'
......
...@@ -19,6 +19,7 @@ Ngrams by node enable contextual metrics. ...@@ -19,6 +19,7 @@ Ngrams by node enable contextual metrics.
module Gargantext.Database.Metrics.NgramsByNode module Gargantext.Database.Metrics.NgramsByNode
where where
import Debug.Trace (trace)
import Data.Map.Strict (Map, fromListWith, elems, toList, fromList) import Data.Map.Strict (Map, fromListWith, elems, toList, fromList)
import Data.Map.Strict.Patch (PatchMap, Replace, diff) import Data.Map.Strict.Patch (PatchMap, Replace, diff)
import Data.Set (Set) import Data.Set (Set)
...@@ -162,6 +163,46 @@ getOccByNgramsOnlyFast :: CorpusId ...@@ -162,6 +163,46 @@ getOccByNgramsOnlyFast :: CorpusId
getOccByNgramsOnlyFast cId nt ngs = getOccByNgramsOnlyFast cId nt ngs =
fromListWith (+) <$> selectNgramsOccurrencesOnlyByNodeUser cId nt ngs fromListWith (+) <$> selectNgramsOccurrencesOnlyByNodeUser cId nt ngs
getOccByNgramsOnlyFast' :: CorpusId
-> ListId
-> NgramsType
-> [Text]
-> Cmd err (Map Text Int)
getOccByNgramsOnlyFast' cId lId nt tms = trace (show (cId, lId)) $
fromListWith (+) <$> map (second round) <$> run cId lId nt tms
where
fields = [QualifiedIdentifier Nothing "text"]
run :: CorpusId
-> ListId
-> NgramsType
-> [Text]
-> Cmd err [(Text, Double)]
run cId' lId' _nt' tms' = runPGSQuery query
( Values fields (DPS.Only <$> tms')
, cId'
, lId'
-- , ngramsTypeId nt'
)
query :: DPS.Query
query = [sql|
WITH input_rows(terms) AS (?)
SELECT ng.terms, nng.weight FROM node_node_ngrams nng
JOIN ngrams ng ON nng.ngrams_id = ng.id
JOIN input_rows ir ON ir.terms = ng.terms
WHERE nng.node1_id = ? -- CorpusId
AND nng.node2_id = ?
-- AND nng.ngrams_type = ? -- NgramsTypeId
-- AND nn.category > 0
GROUP BY ng.terms, nng.weight
|]
-- just slower than getOccByNgramsOnlyFast -- just slower than getOccByNgramsOnlyFast
getOccByNgramsOnlySlow :: NodeType getOccByNgramsOnlySlow :: NodeType
-> CorpusId -> CorpusId
...@@ -279,6 +320,38 @@ queryNgramsOnlyByNodeUser = [sql| ...@@ -279,6 +320,38 @@ queryNgramsOnlyByNodeUser = [sql|
|] |]
selectNgramsOnlyByNodeUser' :: CorpusId -> [ListId] -> NgramsType -> [Text]
-> Cmd err [(Text, Int)]
selectNgramsOnlyByNodeUser' cId ls nt tms =
runPGSQuery queryNgramsOnlyByNodeUser
( Values fields (DPS.Only <$> tms)
, Values [QualifiedIdentifier Nothing "int4"] (DPS.Only <$> (map (\(NodeId n) -> n) ls))
, cId
, nodeTypeId NodeDocument
, ngramsTypeId nt
)
where
fields = [QualifiedIdentifier Nothing "text"]
queryNgramsOnlyByNodeUser' :: DPS.Query
queryNgramsOnlyByNodeUser' = [sql|
WITH input_rows(terms) AS (?),
input_list(id) AS (?)
SELECT ng.terms, nng.weight FROM node_node_ngrams nng
JOIN ngrams ng ON nng.ngrams_id = ng.id
JOIN input_rows ir ON ir.terms = ng.terms
JOIN input_list il ON il.id = nng.node2_id
WHERE nng.node1_id = ? -- CorpusId
AND nng.ngrams_type = ? -- NgramsTypeId
-- AND nn.category > 0
GROUP BY ng.terms, nng.weight
|]
getNgramsByDocOnlyUser :: NodeId -> [ListId] -> NgramsType -> [Text] getNgramsByDocOnlyUser :: NodeId -> [ListId] -> NgramsType -> [Text]
-> Cmd err (Map Text (Set NodeId)) -> Cmd err (Map Text (Set NodeId))
getNgramsByDocOnlyUser cId ls nt ngs = getNgramsByDocOnlyUser cId ls nt ngs =
......
...@@ -37,7 +37,7 @@ insertOccsUpdates cId lId = runPGSQuery query (cId, lId, nodeTypeId NodeList, no ...@@ -37,7 +37,7 @@ insertOccsUpdates cId lId = runPGSQuery query (cId, lId, nodeTypeId NodeList, no
query :: DPS.Query query :: DPS.Query
query = [sql| query = [sql|
INSERT INTO node_node_ngrams (node1_id, node2_id, ngrams_id, ngrams_type, weight) INSERT INTO node_node_ngrams (node1_id, node2_id, ngrams_id, ngrams_type, weight)
SELECT nn.node1_id, lists.id, nnn.ngrams_id, count(*), 1 -- type of score SELECT nn.node1_id, lists.id, nnn.ngrams_id, 1, count(*) as c -- type of score
FROM node_node_ngrams nnn FROM node_node_ngrams nnn
INNER JOIN nodes_nodes nn ON nn.node2_id = nnn.node2_id INNER JOIN nodes_nodes nn ON nn.node2_id = nnn.node2_id
INNER JOIN nodes docs ON docs.id = nnn.node2_id INNER JOIN nodes docs ON docs.id = nnn.node2_id
...@@ -49,7 +49,9 @@ insertOccsUpdates cId lId = runPGSQuery query (cId, lId, nodeTypeId NodeList, no ...@@ -49,7 +49,9 @@ insertOccsUpdates cId lId = runPGSQuery query (cId, lId, nodeTypeId NodeList, no
AND docs.typename = ? AND docs.typename = ?
GROUP BY nn.node1_id, lists.id, nnn.ngrams_id GROUP BY nn.node1_id, lists.id, nnn.ngrams_id
ON CONFLICT (node1_id, node2_id, ngrams_id, ngrams_type) ON CONFLICT (node1_id, node2_id, ngrams_id, ngrams_type)
DO UPDATE SET weight = excluded.weight +1; -- TOCHECK DO UPDATE SET weight = 3 -- c -- excluded.weight
RETURNING 1
-- TOCHECK
|] |]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment