Commit dcb737c1 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[OPTIM] Ngrams Table query

parent de1fddb3
...@@ -5,7 +5,7 @@ cabal-version: 1.12 ...@@ -5,7 +5,7 @@ cabal-version: 1.12
-- see: https://github.com/sol/hpack -- see: https://github.com/sol/hpack
name: gargantext name: gargantext
version: 0.0.6.7 version: 0.0.6.7
synopsis: Search, map, share synopsis: Search, map, share
description: Please see README.md description: Please see README.md
category: Data category: Data
......
...@@ -105,7 +105,7 @@ import Gargantext.Core.Mail.Types (HasMail) ...@@ -105,7 +105,7 @@ import Gargantext.Core.Mail.Types (HasMail)
import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, Limit, Offset, TODO, assertValid, HasInvalidError) import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, Limit, Offset, TODO, assertValid, HasInvalidError)
import Gargantext.API.Ngrams.Tools import Gargantext.API.Ngrams.Tools
import Gargantext.Database.Action.Flow.Types import Gargantext.Database.Action.Flow.Types
import Gargantext.Database.Action.Metrics.NgramsByContext (getOccByNgramsOnlyFast') import Gargantext.Database.Action.Metrics.NgramsByContext (getOccByNgramsOnlyFast)
import Gargantext.Database.Admin.Config (userMaster) import Gargantext.Database.Admin.Config (userMaster)
import Gargantext.Database.Admin.Types.Node (NodeType(..)) import Gargantext.Database.Admin.Types.Node (NodeType(..))
import Gargantext.Database.Prelude (HasConnectionPool(..), HasConfig) import Gargantext.Database.Prelude (HasConnectionPool(..), HasConfig)
...@@ -581,10 +581,9 @@ getTableNgrams _nType nId tabType listId limit_ offset ...@@ -581,10 +581,9 @@ getTableNgrams _nType nId tabType listId limit_ offset
let ngrams_terms = table ^.. each . ne_ngrams let ngrams_terms = table ^.. each . ne_ngrams
-- printDebug "ngrams_terms" ngrams_terms -- printDebug "ngrams_terms" ngrams_terms
t1 <- getTime t1 <- getTime
occurrences <- getOccByNgramsOnlyFast' nId occurrences <- getOccByNgramsOnlyFast nId
listId listId
ngramsType ngramsType
ngrams_terms
--printDebug "occurrences" occurrences --printDebug "occurrences" occurrences
t2 <- getTime t2 <- getTime
liftBase $ hprint stderr liftBase $ hprint stderr
...@@ -644,19 +643,15 @@ scoresRecomputeTableNgrams nId tabType listId = do ...@@ -644,19 +643,15 @@ scoresRecomputeTableNgrams nId tabType listId = do
setScores :: forall t. Each t t NgramsElement NgramsElement => t -> m t setScores :: forall t. Each t t NgramsElement NgramsElement => t -> m t
setScores table = do setScores table = do
let ngrams_terms = table ^.. each . ne_ngrams occurrences <- getOccByNgramsOnlyFast nId
occurrences <- getOccByNgramsOnlyFast' nId
listId listId
ngramsType ngramsType
ngrams_terms
let let
setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
pure $ table & each %~ setOcc pure $ table & each %~ setOcc
-- APIs -- APIs
-- TODO: find a better place for the code above, All APIs stay here -- TODO: find a better place for the code above, All APIs stay here
......
...@@ -20,7 +20,6 @@ module Gargantext.Database.Action.Metrics.NgramsByContext ...@@ -20,7 +20,6 @@ module Gargantext.Database.Action.Metrics.NgramsByContext
--import Data.Map.Strict.Patch (PatchMap, Replace, diff) --import Data.Map.Strict.Patch (PatchMap, Replace, diff)
import Data.HashMap.Strict (HashMap) import Data.HashMap.Strict (HashMap)
import Data.Map (Map) import Data.Map (Map)
import Data.Maybe (catMaybes)
import Data.Set (Set) import Data.Set (Set)
import Data.Text (Text) import Data.Text (Text)
import Data.Tuple.Extra (first, second, swap) import Data.Tuple.Extra (first, second, swap)
...@@ -31,8 +30,7 @@ import Gargantext.Core ...@@ -31,8 +30,7 @@ import Gargantext.Core
import Gargantext.Data.HashMap.Strict.Utils as HM import Gargantext.Data.HashMap.Strict.Utils as HM
import Gargantext.Database.Admin.Types.Node (ListId, CorpusId, NodeId(..), ContextId, MasterCorpusId, NodeType(NodeDocument), UserCorpusId, DocId) import Gargantext.Database.Admin.Types.Node (ListId, CorpusId, NodeId(..), ContextId, MasterCorpusId, NodeType(NodeDocument), UserCorpusId, DocId)
import Gargantext.Database.Prelude (Cmd, runPGSQuery) import Gargantext.Database.Prelude (Cmd, runPGSQuery)
import Gargantext.Database.Query.Table.Ngrams (selectNgramsId) import Gargantext.Database.Schema.Ngrams (ngramsTypeId, NgramsType(..))
import Gargantext.Database.Schema.Ngrams (ngramsTypeId, NgramsType(..), NgramsId)
import Gargantext.Prelude import Gargantext.Prelude
import qualified Data.HashMap.Strict as HM import qualified Data.HashMap.Strict as HM
import qualified Data.Map as Map import qualified Data.Map as Map
...@@ -107,47 +105,47 @@ getOccByNgramsOnlyFast_withSample cId int nt ngs = ...@@ -107,47 +105,47 @@ getOccByNgramsOnlyFast_withSample cId int nt ngs =
HM.fromListWith (+) <$> selectNgramsOccurrencesOnlyByContextUser_withSample cId int nt ngs HM.fromListWith (+) <$> selectNgramsOccurrencesOnlyByContextUser_withSample cId int nt ngs
getOccByNgramsOnlyFast' :: CorpusId getOccByNgramsOnlyFast :: CorpusId
-> ListId -> ListId
-> NgramsType -> NgramsType
-> [NgramsTerm]
-> Cmd err (HashMap NgramsTerm Int) -> Cmd err (HashMap NgramsTerm Int)
getOccByNgramsOnlyFast' cId lId nt tms = do -- trace (show (cId, lId)) $ getOccByNgramsOnlyFast cId lId nt = do
mapNgramsIds <- selectNgramsId $ map unNgramsTerm tms HM.fromList <$> map (\(t,n) -> (NgramsTerm t, round n)) <$> run cId lId nt
HM.fromListWith (+) <$> catMaybes
<$> map (\(nId, s) -> (,) <$> (NgramsTerm <$> (Map.lookup nId mapNgramsIds)) <*> (Just $ round s) )
<$> run cId lId nt (Map.keys mapNgramsIds)
where where
run :: CorpusId run :: CorpusId
-> ListId -> ListId
-> NgramsType -> NgramsType
-> [NgramsId] -> Cmd err [(Text, Double)]
-> Cmd err [(NgramsId, Double)] run cId' lId' nt' = runPGSQuery query
run cId' lId' nt' tms' = runPGSQuery query ( cId'
( Values fields ((DPS.Only) <$> tms')
, cId'
, lId' , lId'
, ngramsTypeId nt' , ngramsTypeId nt'
) )
fields = [QualifiedIdentifier Nothing "int4"]
query :: DPS.Query query :: DPS.Query
query = [sql| query = [sql|
WITH input_ngrams(id) AS (?) SELECT ng.terms
-- , ng.id
, round(nng.weight)
-- , ns.version
-- , nng.ngrams_type
-- , ns.ngrams_type_id
FROM ngrams ng
JOIN node_stories ns ON ng.id = ns.ngrams_id
JOIN node_node_ngrams nng ON ns.node_id = nng.node2_id
WHERE nng.node1_id = ?
AND nng.node2_id = ?
AND nng.ngrams_type = ?
AND nng.ngrams_id = ng.id
AND nng.ngrams_type = ns.ngrams_type_id
ORDER BY ng.id ASC;
|]
SELECT ngi.id, nng.weight FROM nodes_contexts nc
JOIN node_node_ngrams nng ON nng.node1_id = nc.node_id
JOIN input_ngrams ngi ON nng.ngrams_id = ngi.id
WHERE nng.node1_id = ?
AND nng.node2_id = ?
AND nng.ngrams_type = ?
AND nc.category > 0
GROUP BY ngi.id, nng.weight
|]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment