Commit 4e076676 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[Metrics] optim

parent b449027c
......@@ -130,7 +130,7 @@ import GHC.Generics (Generic)
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
-- import Gargantext.Database.Schema.Ngrams (NgramsTypeId, ngramsTypeId, NgramsTableData(..))
import Gargantext.Database.Config (userMaster)
import Gargantext.Database.Metrics.NgramsByNode (getOccByNgramsOnlyFast)
import Gargantext.Database.Metrics.NgramsByNode (getOccByNgramsOnlyFast')
import Gargantext.Database.Schema.Ngrams (NgramsType)
import Gargantext.Database.Types.Node (NodeType(..))
import Gargantext.Database.Utils (fromField', HasConnection)
......@@ -1019,7 +1019,8 @@ getTableNgrams _nType nId tabType listId limit_ offset
setScores True table = do
let ngrams_terms = (table ^.. each . ne_ngrams)
t1 <- getTime'
occurrences <- getOccByNgramsOnlyFast nId
occurrences <- getOccByNgramsOnlyFast' nId
listId
ngramsType
ngrams_terms
t2 <- getTime'
......
......@@ -19,6 +19,7 @@ Ngrams by node enable contextual metrics.
module Gargantext.Database.Metrics.NgramsByNode
where
import Debug.Trace (trace)
import Data.Map.Strict (Map, fromListWith, elems, toList, fromList)
import Data.Map.Strict.Patch (PatchMap, Replace, diff)
import Data.Set (Set)
......@@ -162,6 +163,46 @@ getOccByNgramsOnlyFast :: CorpusId
getOccByNgramsOnlyFast cId nt ngs =
fromListWith (+) <$> selectNgramsOccurrencesOnlyByNodeUser cId nt ngs
getOccByNgramsOnlyFast' :: CorpusId
-> ListId
-> NgramsType
-> [Text]
-> Cmd err (Map Text Int)
getOccByNgramsOnlyFast' cId lId nt tms = trace (show (cId, lId)) $
fromListWith (+) <$> map (second round) <$> run cId lId nt tms
where
fields = [QualifiedIdentifier Nothing "text"]
run :: CorpusId
-> ListId
-> NgramsType
-> [Text]
-> Cmd err [(Text, Double)]
run cId' lId' _nt' tms' = runPGSQuery query
( Values fields (DPS.Only <$> tms')
, cId'
, lId'
-- , ngramsTypeId nt'
)
query :: DPS.Query
query = [sql|
WITH input_rows(terms) AS (?)
SELECT ng.terms, nng.weight FROM node_node_ngrams nng
JOIN ngrams ng ON nng.ngrams_id = ng.id
JOIN input_rows ir ON ir.terms = ng.terms
WHERE nng.node1_id = ? -- CorpusId
AND nng.node2_id = ?
-- AND nng.ngrams_type = ? -- NgramsTypeId
-- AND nn.category > 0
GROUP BY ng.terms, nng.weight
|]
-- just slower than getOccByNgramsOnlyFast
getOccByNgramsOnlySlow :: NodeType
-> CorpusId
......@@ -279,6 +320,38 @@ queryNgramsOnlyByNodeUser = [sql|
|]
selectNgramsOnlyByNodeUser' :: CorpusId -> [ListId] -> NgramsType -> [Text]
-> Cmd err [(Text, Int)]
selectNgramsOnlyByNodeUser' cId ls nt tms =
runPGSQuery queryNgramsOnlyByNodeUser
( Values fields (DPS.Only <$> tms)
, Values [QualifiedIdentifier Nothing "int4"] (DPS.Only <$> (map (\(NodeId n) -> n) ls))
, cId
, nodeTypeId NodeDocument
, ngramsTypeId nt
)
where
fields = [QualifiedIdentifier Nothing "text"]
queryNgramsOnlyByNodeUser' :: DPS.Query
queryNgramsOnlyByNodeUser' = [sql|
WITH input_rows(terms) AS (?),
input_list(id) AS (?)
SELECT ng.terms, nng.weight FROM node_node_ngrams nng
JOIN ngrams ng ON nng.ngrams_id = ng.id
JOIN input_rows ir ON ir.terms = ng.terms
JOIN input_list il ON il.id = nng.node2_id
WHERE nng.node1_id = ? -- CorpusId
AND nng.ngrams_type = ? -- NgramsTypeId
-- AND nn.category > 0
GROUP BY ng.terms, nng.weight
|]
getNgramsByDocOnlyUser :: NodeId -> [ListId] -> NgramsType -> [Text]
-> Cmd err (Map Text (Set NodeId))
getNgramsByDocOnlyUser cId ls nt ngs =
......
......@@ -37,7 +37,7 @@ insertOccsUpdates cId lId = runPGSQuery query (cId, lId, nodeTypeId NodeList, no
query :: DPS.Query
query = [sql|
INSERT INTO node_node_ngrams (node1_id, node2_id, ngrams_id, ngrams_type, weight)
SELECT nn.node1_id, lists.id, nnn.ngrams_id, count(*), 1 -- type of score
SELECT nn.node1_id, lists.id, nnn.ngrams_id, 1, count(*) as c -- type of score
FROM node_node_ngrams nnn
INNER JOIN nodes_nodes nn ON nn.node2_id = nnn.node2_id
INNER JOIN nodes docs ON docs.id = nnn.node2_id
......@@ -49,7 +49,9 @@ insertOccsUpdates cId lId = runPGSQuery query (cId, lId, nodeTypeId NodeList, no
AND docs.typename = ?
GROUP BY nn.node1_id, lists.id, nnn.ngrams_id
ON CONFLICT (node1_id, node2_id, ngrams_id, ngrams_type)
DO UPDATE SET weight = excluded.weight +1; -- TOCHECK
DO UPDATE SET weight = 3 -- c -- excluded.weight
RETURNING 1
-- TOCHECK
|]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment