Commit dcb737c1 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[OPTIM] Ngrams Table query

parent de1fddb3
...@@ -105,7 +105,7 @@ import Gargantext.Core.Mail.Types (HasMail) ...@@ -105,7 +105,7 @@ import Gargantext.Core.Mail.Types (HasMail)
import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, Limit, Offset, TODO, assertValid, HasInvalidError) import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, Limit, Offset, TODO, assertValid, HasInvalidError)
import Gargantext.API.Ngrams.Tools import Gargantext.API.Ngrams.Tools
import Gargantext.Database.Action.Flow.Types import Gargantext.Database.Action.Flow.Types
import Gargantext.Database.Action.Metrics.NgramsByContext (getOccByNgramsOnlyFast') import Gargantext.Database.Action.Metrics.NgramsByContext (getOccByNgramsOnlyFast)
import Gargantext.Database.Admin.Config (userMaster) import Gargantext.Database.Admin.Config (userMaster)
import Gargantext.Database.Admin.Types.Node (NodeType(..)) import Gargantext.Database.Admin.Types.Node (NodeType(..))
import Gargantext.Database.Prelude (HasConnectionPool(..), HasConfig) import Gargantext.Database.Prelude (HasConnectionPool(..), HasConfig)
...@@ -581,10 +581,9 @@ getTableNgrams _nType nId tabType listId limit_ offset ...@@ -581,10 +581,9 @@ getTableNgrams _nType nId tabType listId limit_ offset
let ngrams_terms = table ^.. each . ne_ngrams let ngrams_terms = table ^.. each . ne_ngrams
-- printDebug "ngrams_terms" ngrams_terms -- printDebug "ngrams_terms" ngrams_terms
t1 <- getTime t1 <- getTime
occurrences <- getOccByNgramsOnlyFast' nId occurrences <- getOccByNgramsOnlyFast nId
listId listId
ngramsType ngramsType
ngrams_terms
--printDebug "occurrences" occurrences --printDebug "occurrences" occurrences
t2 <- getTime t2 <- getTime
liftBase $ hprint stderr liftBase $ hprint stderr
...@@ -644,19 +643,15 @@ scoresRecomputeTableNgrams nId tabType listId = do ...@@ -644,19 +643,15 @@ scoresRecomputeTableNgrams nId tabType listId = do
setScores :: forall t. Each t t NgramsElement NgramsElement => t -> m t setScores :: forall t. Each t t NgramsElement NgramsElement => t -> m t
setScores table = do setScores table = do
let ngrams_terms = table ^.. each . ne_ngrams occurrences <- getOccByNgramsOnlyFast nId
occurrences <- getOccByNgramsOnlyFast' nId
listId listId
ngramsType ngramsType
ngrams_terms
let let
setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
pure $ table & each %~ setOcc pure $ table & each %~ setOcc
-- APIs -- APIs
-- TODO: find a better place for the code above, All APIs stay here -- TODO: find a better place for the code above, All APIs stay here
......
...@@ -20,7 +20,6 @@ module Gargantext.Database.Action.Metrics.NgramsByContext ...@@ -20,7 +20,6 @@ module Gargantext.Database.Action.Metrics.NgramsByContext
--import Data.Map.Strict.Patch (PatchMap, Replace, diff) --import Data.Map.Strict.Patch (PatchMap, Replace, diff)
import Data.HashMap.Strict (HashMap) import Data.HashMap.Strict (HashMap)
import Data.Map (Map) import Data.Map (Map)
import Data.Maybe (catMaybes)
import Data.Set (Set) import Data.Set (Set)
import Data.Text (Text) import Data.Text (Text)
import Data.Tuple.Extra (first, second, swap) import Data.Tuple.Extra (first, second, swap)
...@@ -31,8 +30,7 @@ import Gargantext.Core ...@@ -31,8 +30,7 @@ import Gargantext.Core
import Gargantext.Data.HashMap.Strict.Utils as HM import Gargantext.Data.HashMap.Strict.Utils as HM
import Gargantext.Database.Admin.Types.Node (ListId, CorpusId, NodeId(..), ContextId, MasterCorpusId, NodeType(NodeDocument), UserCorpusId, DocId) import Gargantext.Database.Admin.Types.Node (ListId, CorpusId, NodeId(..), ContextId, MasterCorpusId, NodeType(NodeDocument), UserCorpusId, DocId)
import Gargantext.Database.Prelude (Cmd, runPGSQuery) import Gargantext.Database.Prelude (Cmd, runPGSQuery)
import Gargantext.Database.Query.Table.Ngrams (selectNgramsId) import Gargantext.Database.Schema.Ngrams (ngramsTypeId, NgramsType(..))
import Gargantext.Database.Schema.Ngrams (ngramsTypeId, NgramsType(..), NgramsId)
import Gargantext.Prelude import Gargantext.Prelude
import qualified Data.HashMap.Strict as HM import qualified Data.HashMap.Strict as HM
import qualified Data.Map as Map import qualified Data.Map as Map
...@@ -107,50 +105,50 @@ getOccByNgramsOnlyFast_withSample cId int nt ngs = ...@@ -107,50 +105,50 @@ getOccByNgramsOnlyFast_withSample cId int nt ngs =
HM.fromListWith (+) <$> selectNgramsOccurrencesOnlyByContextUser_withSample cId int nt ngs HM.fromListWith (+) <$> selectNgramsOccurrencesOnlyByContextUser_withSample cId int nt ngs
getOccByNgramsOnlyFast' :: CorpusId getOccByNgramsOnlyFast :: CorpusId
-> ListId -> ListId
-> NgramsType -> NgramsType
-> [NgramsTerm]
-> Cmd err (HashMap NgramsTerm Int) -> Cmd err (HashMap NgramsTerm Int)
getOccByNgramsOnlyFast' cId lId nt tms = do -- trace (show (cId, lId)) $ getOccByNgramsOnlyFast cId lId nt = do
mapNgramsIds <- selectNgramsId $ map unNgramsTerm tms HM.fromList <$> map (\(t,n) -> (NgramsTerm t, round n)) <$> run cId lId nt
HM.fromListWith (+) <$> catMaybes
<$> map (\(nId, s) -> (,) <$> (NgramsTerm <$> (Map.lookup nId mapNgramsIds)) <*> (Just $ round s) )
<$> run cId lId nt (Map.keys mapNgramsIds)
where where
run :: CorpusId run :: CorpusId
-> ListId -> ListId
-> NgramsType -> NgramsType
-> [NgramsId] -> Cmd err [(Text, Double)]
-> Cmd err [(NgramsId, Double)] run cId' lId' nt' = runPGSQuery query
run cId' lId' nt' tms' = runPGSQuery query ( cId'
( Values fields ((DPS.Only) <$> tms')
, cId'
, lId' , lId'
, ngramsTypeId nt' , ngramsTypeId nt'
) )
fields = [QualifiedIdentifier Nothing "int4"]
query :: DPS.Query query :: DPS.Query
query = [sql| query = [sql|
WITH input_ngrams(id) AS (?) SELECT ng.terms
-- , ng.id
SELECT ngi.id, nng.weight FROM nodes_contexts nc , round(nng.weight)
JOIN node_node_ngrams nng ON nng.node1_id = nc.node_id -- , ns.version
JOIN input_ngrams ngi ON nng.ngrams_id = ngi.id -- , nng.ngrams_type
-- , ns.ngrams_type_id
FROM ngrams ng
JOIN node_stories ns ON ng.id = ns.ngrams_id
JOIN node_node_ngrams nng ON ns.node_id = nng.node2_id
WHERE nng.node1_id = ? WHERE nng.node1_id = ?
AND nng.node2_id = ? AND nng.node2_id = ?
AND nng.ngrams_type = ? AND nng.ngrams_type = ?
AND nc.category > 0 AND nng.ngrams_id = ng.id
GROUP BY ngi.id, nng.weight AND nng.ngrams_type = ns.ngrams_type_id
ORDER BY ng.id ASC;
|] |]
selectNgramsOccurrencesOnlyByContextUser_withSample :: HasDBid NodeType selectNgramsOccurrencesOnlyByContextUser_withSample :: HasDBid NodeType
=> CorpusId => CorpusId
-> Int -> Int
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment