Commit dcb737c1 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[OPTIM] Ngrams Table query

parent de1fddb3
......@@ -5,7 +5,7 @@ cabal-version: 1.12
-- see: https://github.com/sol/hpack
name: gargantext
version: 0.0.6.7
version: 0.0.6.7
synopsis: Search, map, share
description: Please see README.md
category: Data
......
......@@ -105,7 +105,7 @@ import Gargantext.Core.Mail.Types (HasMail)
import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, Limit, Offset, TODO, assertValid, HasInvalidError)
import Gargantext.API.Ngrams.Tools
import Gargantext.Database.Action.Flow.Types
import Gargantext.Database.Action.Metrics.NgramsByContext (getOccByNgramsOnlyFast')
import Gargantext.Database.Action.Metrics.NgramsByContext (getOccByNgramsOnlyFast)
import Gargantext.Database.Admin.Config (userMaster)
import Gargantext.Database.Admin.Types.Node (NodeType(..))
import Gargantext.Database.Prelude (HasConnectionPool(..), HasConfig)
......@@ -581,10 +581,9 @@ getTableNgrams _nType nId tabType listId limit_ offset
let ngrams_terms = table ^.. each . ne_ngrams
-- printDebug "ngrams_terms" ngrams_terms
t1 <- getTime
occurrences <- getOccByNgramsOnlyFast' nId
occurrences <- getOccByNgramsOnlyFast nId
listId
ngramsType
ngrams_terms
--printDebug "occurrences" occurrences
t2 <- getTime
liftBase $ hprint stderr
......@@ -644,19 +643,15 @@ scoresRecomputeTableNgrams nId tabType listId = do
setScores :: forall t. Each t t NgramsElement NgramsElement => t -> m t
setScores table = do
let ngrams_terms = table ^.. each . ne_ngrams
occurrences <- getOccByNgramsOnlyFast' nId
occurrences <- getOccByNgramsOnlyFast nId
listId
ngramsType
ngrams_terms
let
setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
pure $ table & each %~ setOcc
-- APIs
-- TODO: find a better place for the code above, All APIs stay here
......
......@@ -20,7 +20,6 @@ module Gargantext.Database.Action.Metrics.NgramsByContext
--import Data.Map.Strict.Patch (PatchMap, Replace, diff)
import Data.HashMap.Strict (HashMap)
import Data.Map (Map)
import Data.Maybe (catMaybes)
import Data.Set (Set)
import Data.Text (Text)
import Data.Tuple.Extra (first, second, swap)
......@@ -31,8 +30,7 @@ import Gargantext.Core
import Gargantext.Data.HashMap.Strict.Utils as HM
import Gargantext.Database.Admin.Types.Node (ListId, CorpusId, NodeId(..), ContextId, MasterCorpusId, NodeType(NodeDocument), UserCorpusId, DocId)
import Gargantext.Database.Prelude (Cmd, runPGSQuery)
import Gargantext.Database.Query.Table.Ngrams (selectNgramsId)
import Gargantext.Database.Schema.Ngrams (ngramsTypeId, NgramsType(..), NgramsId)
import Gargantext.Database.Schema.Ngrams (ngramsTypeId, NgramsType(..))
import Gargantext.Prelude
import qualified Data.HashMap.Strict as HM
import qualified Data.Map as Map
......@@ -107,47 +105,47 @@ getOccByNgramsOnlyFast_withSample cId int nt ngs =
HM.fromListWith (+) <$> selectNgramsOccurrencesOnlyByContextUser_withSample cId int nt ngs
getOccByNgramsOnlyFast' :: CorpusId
getOccByNgramsOnlyFast :: CorpusId
-> ListId
-> NgramsType
-> [NgramsTerm]
-> Cmd err (HashMap NgramsTerm Int)
getOccByNgramsOnlyFast' cId lId nt tms = do -- trace (show (cId, lId)) $
mapNgramsIds <- selectNgramsId $ map unNgramsTerm tms
HM.fromListWith (+) <$> catMaybes
<$> map (\(nId, s) -> (,) <$> (NgramsTerm <$> (Map.lookup nId mapNgramsIds)) <*> (Just $ round s) )
<$> run cId lId nt (Map.keys mapNgramsIds)
getOccByNgramsOnlyFast cId lId nt = do
HM.fromList <$> map (\(t,n) -> (NgramsTerm t, round n)) <$> run cId lId nt
where
run :: CorpusId
-> ListId
-> NgramsType
-> [NgramsId]
-> Cmd err [(NgramsId, Double)]
run cId' lId' nt' tms' = runPGSQuery query
( Values fields ((DPS.Only) <$> tms')
, cId'
-> Cmd err [(Text, Double)]
run cId' lId' nt' = runPGSQuery query
( cId'
, lId'
, ngramsTypeId nt'
)
fields = [QualifiedIdentifier Nothing "int4"]
query :: DPS.Query
query = [sql|
WITH input_ngrams(id) AS (?)
SELECT ng.terms
-- , ng.id
, round(nng.weight)
-- , ns.version
-- , nng.ngrams_type
-- , ns.ngrams_type_id
FROM ngrams ng
JOIN node_stories ns ON ng.id = ns.ngrams_id
JOIN node_node_ngrams nng ON ns.node_id = nng.node2_id
WHERE nng.node1_id = ?
AND nng.node2_id = ?
AND nng.ngrams_type = ?
AND nng.ngrams_id = ng.id
AND nng.ngrams_type = ns.ngrams_type_id
ORDER BY ng.id ASC;
|]
SELECT ngi.id, nng.weight FROM nodes_contexts nc
JOIN node_node_ngrams nng ON nng.node1_id = nc.node_id
JOIN input_ngrams ngi ON nng.ngrams_id = ngi.id
WHERE nng.node1_id = ?
AND nng.node2_id = ?
AND nng.ngrams_type = ?
AND nc.category > 0
GROUP BY ngi.id, nng.weight
|]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment