Commit ba15b251 authored by Przemyslaw Kaminski's avatar Przemyslaw Kaminski

[ngrams] send array of context ids, instead of occurrences int

parent ff6d1d32
Pipeline #3382 canceled with stage
......@@ -83,7 +83,7 @@ module Gargantext.API.Ngrams
where
import Control.Concurrent
import Control.Lens ((.~), view, (^.), (^..), (+~), (%~), (.~), sumOf, at, _Just, Each(..), (%%~), mapped, ifolded, withIndex, over)
import Control.Lens ((.~), view, (^.), (^..), (+~), (%~), (.~), msumOf, at, _Just, Each(..), (%%~), mapped, ifolded, to, withIndex, over)
import Control.Monad.Reader
import Data.Aeson hiding ((.=))
import Data.Either (Either(..))
......@@ -553,8 +553,8 @@ getTableNgrams _nType nId tabType listId limit_ offset
sortOnOrder Nothing = sortOnOrder (Just ScoreDesc)
sortOnOrder (Just TermAsc) = List.sortOn $ view ne_ngrams
sortOnOrder (Just TermDesc) = List.sortOn $ Down . view ne_ngrams
sortOnOrder (Just ScoreAsc) = List.sortOn $ view ne_occurrences
sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view ne_occurrences
sortOnOrder (Just ScoreAsc) = List.sortOn $ view (ne_occurrences . to length)
sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view (ne_occurrences . to length)
---------------------------------------
-- | Filter the given `tableMap` with the search criteria.
......@@ -584,15 +584,16 @@ getTableNgrams _nType nId tabType listId limit_ offset
let scoresNeeded = needsScores orderBy
t1 <- getTime
tableMap2 <- getNgramsTable' nId listId ngramsType :: m (Versioned (Map NgramsTerm NgramsElement))
tableMap <- getNgramsTable' nId listId ngramsType :: m (Versioned (Map NgramsTerm NgramsElement))
let fltr = tableMap2 & v_data %~ NgramsTable . filteredNodes :: Versioned NgramsTable
let fltr = tableMap & v_data %~ NgramsTable . filteredNodes :: Versioned NgramsTable
let fltrCount = length $ fltr ^. v_data . _NgramsTable
t2 <- getTime
let tableMap3 = over (v_data . _NgramsTable) ((withInners (tableMap2 ^. v_data)) . sortAndPaginate) fltr
let tableMapSorted = over (v_data . _NgramsTable) ((withInners (tableMap ^. v_data)) . sortAndPaginate) fltr
t3 <- getTime
--printDebug "[getTableNgrams] tableMapSorted" tableMapSorted
liftBase $ do
hprint stderr
("getTableNgrams total=" % hasTime
......@@ -603,8 +604,8 @@ getTableNgrams _nType nId tabType listId limit_ offset
% "\n"
) t0 t3 t0 t1 t1 t2 t2 t3
-- printDebug "[getTableNgrams] tableMap3" $ show tableMap3
pure $ toVersionedWithCount fltrCount tableMap3
-- printDebug "[getTableNgrams] tableMapSorted" $ show tableMapSorted
pure $ toVersionedWithCount fltrCount tableMapSorted
-- | Helper function to get the ngrams table with scores.
......@@ -619,9 +620,9 @@ getNgramsTable' :: forall env err m.
-> TableNgrams.NgramsType
-> m (Versioned (Map.Map NgramsTerm NgramsElement))
getNgramsTable' nId listId ngramsType = do
tableMap1 <- getNgramsTableMap listId ngramsType
tableMap1 & v_data %%~ (setNgramsTableScores nId listId ngramsType)
. Map.mapWithKey ngramsElementFromRepo
tableMap <- getNgramsTableMap listId ngramsType
tableMap & v_data %%~ (setNgramsTableScores nId listId ngramsType)
. Map.mapWithKey ngramsElementFromRepo
-- | Helper function to set scores on an `NgramsTable`.
setNgramsTableScores :: forall env err m t.
......@@ -636,10 +637,10 @@ setNgramsTableScores :: forall env err m t.
-> TableNgrams.NgramsType
-> t
-> m t
setNgramsTableScores nId listId ngramsType table = do
setNgramsTableScores nId listId ngramsType table = do
t1 <- getTime
occurrences <- getOccByNgramsOnlyFast nId listId ngramsType
printDebug "[setNgramsTableScores] occurrences" occurrences
--printDebug "[setNgramsTableScores] occurrences" occurrences
t2 <- getTime
liftBase $ do
let ngrams_terms = table ^.. each . ne_ngrams
......@@ -648,7 +649,9 @@ setNgramsTableScores nId listId ngramsType table = do
("getTableNgrams/setScores #ngrams=" % int % " time=" % hasTime % "\n")
(length ngrams_terms) t1 t2
let
setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
setOcc ne = ne & ne_occurrences .~ msumOf (at (ne ^. ne_ngrams) . _Just) occurrences
--printDebug "[setNgramsTableScores] with occurences" $ table & each %~ setOcc
pure $ table & each %~ setOcc
......
......@@ -34,6 +34,7 @@ import GHC.Generics (Generic)
import Gargantext.Core.Text (size)
import Gargantext.Core.Types (ListType(..), ListId, NodeId, TODO)
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixUntagged, unPrefixSwagger, wellNamedSchema)
import Gargantext.Database.Admin.Types.Node (ContextId)
import Gargantext.Database.Prelude (fromField', HasConnectionPool, HasConfig, CmdM')
import Gargantext.Prelude
import Gargantext.Prelude.Crypto.Hash (IsHashable(..))
......@@ -170,7 +171,7 @@ data NgramsElement =
NgramsElement { _ne_ngrams :: NgramsTerm
, _ne_size :: Int
, _ne_list :: ListType
, _ne_occurrences :: Int
, _ne_occurrences :: [ContextId]
, _ne_root :: Maybe NgramsTerm
, _ne_parent :: Maybe NgramsTerm
, _ne_children :: MSet NgramsTerm
......@@ -186,7 +187,7 @@ mkNgramsElement :: NgramsTerm
-> MSet NgramsTerm
-> NgramsElement
mkNgramsElement ngrams list rp children =
NgramsElement ngrams (size (unNgramsTerm ngrams)) list 1 (_rp_root <$> rp) (_rp_parent <$> rp) children
NgramsElement ngrams (size (unNgramsTerm ngrams)) list [] (_rp_root <$> rp) (_rp_parent <$> rp) children
newNgramsElement :: Maybe ListType -> NgramsTerm -> NgramsElement
newNgramsElement mayList ngrams =
......@@ -571,7 +572,7 @@ ngramsElementFromRepo
, _ne_parent = p
, _ne_children = c
, _ne_ngrams = ngrams
, _ne_occurrences = 0 -- panic $ "API.Ngrams.Types._ne_occurrences"
, _ne_occurrences = [] -- panic $ "API.Ngrams.Types._ne_occurrences"
{-
-- Here we could use 0 if we want to avoid any `panic`.
-- It will not happen using getTableNgrams if
......
......@@ -36,6 +36,7 @@ import qualified Data.HashMap.Strict as HM
import qualified Data.Map as Map
import qualified Data.Set as Set
import qualified Database.PostgreSQL.Simple as DPS
import qualified Database.PostgreSQL.Simple.Types as DPST
-- | fst is size of Supra Corpus
-- snd is Texts and size of Occurrences (different docs)
......@@ -108,15 +109,16 @@ getOccByNgramsOnlyFast_withSample cId int nt ngs =
getOccByNgramsOnlyFast :: CorpusId
-> ListId
-> NgramsType
-> Cmd err (HashMap NgramsTerm Int)
-> Cmd err (HashMap NgramsTerm [ContextId])
getOccByNgramsOnlyFast cId lId nt = do
HM.fromList <$> map (\(t,n) -> (NgramsTerm t, round n)) <$> run cId lId nt
--HM.fromList <$> map (\(t,n) -> (NgramsTerm t, round n)) <$> run cId lId nt
HM.fromList <$> map (\(t, ns) -> (NgramsTerm t, NodeId <$> DPST.fromPGArray ns)) <$> run cId lId nt
where
run :: CorpusId
-> ListId
-> NgramsType
-> Cmd err [(Text, Double)]
-> Cmd err [(Text, DPST.PGArray Int)]
run cId' lId' nt' = runPGSQuery query
( cId'
, lId'
......@@ -127,7 +129,8 @@ getOccByNgramsOnlyFast cId lId nt = do
query = [sql|
SELECT ng.terms
-- , ng.id
, round(nng.weight)
--, round(nng.weight)
, ARRAY(SELECT DISTINCT context_node_ngrams.context_id FROM context_node_ngrams WHERE ng.id = context_id) AS context_ids
-- , ns.version
-- , nng.ngrams_type
-- , ns.ngrams_type_id
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment