Commit ba15b251 authored by Przemyslaw Kaminski's avatar Przemyslaw Kaminski

[ngrams] send array of context ids, instead of occurrences int

parent ff6d1d32
Pipeline #3382 canceled with stage
...@@ -83,7 +83,7 @@ module Gargantext.API.Ngrams ...@@ -83,7 +83,7 @@ module Gargantext.API.Ngrams
where where
import Control.Concurrent import Control.Concurrent
import Control.Lens ((.~), view, (^.), (^..), (+~), (%~), (.~), sumOf, at, _Just, Each(..), (%%~), mapped, ifolded, withIndex, over) import Control.Lens ((.~), view, (^.), (^..), (+~), (%~), (.~), msumOf, at, _Just, Each(..), (%%~), mapped, ifolded, to, withIndex, over)
import Control.Monad.Reader import Control.Monad.Reader
import Data.Aeson hiding ((.=)) import Data.Aeson hiding ((.=))
import Data.Either (Either(..)) import Data.Either (Either(..))
...@@ -553,8 +553,8 @@ getTableNgrams _nType nId tabType listId limit_ offset ...@@ -553,8 +553,8 @@ getTableNgrams _nType nId tabType listId limit_ offset
sortOnOrder Nothing = sortOnOrder (Just ScoreDesc) sortOnOrder Nothing = sortOnOrder (Just ScoreDesc)
sortOnOrder (Just TermAsc) = List.sortOn $ view ne_ngrams sortOnOrder (Just TermAsc) = List.sortOn $ view ne_ngrams
sortOnOrder (Just TermDesc) = List.sortOn $ Down . view ne_ngrams sortOnOrder (Just TermDesc) = List.sortOn $ Down . view ne_ngrams
sortOnOrder (Just ScoreAsc) = List.sortOn $ view ne_occurrences sortOnOrder (Just ScoreAsc) = List.sortOn $ view (ne_occurrences . to length)
sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view ne_occurrences sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view (ne_occurrences . to length)
--------------------------------------- ---------------------------------------
-- | Filter the given `tableMap` with the search criteria. -- | Filter the given `tableMap` with the search criteria.
...@@ -584,15 +584,16 @@ getTableNgrams _nType nId tabType listId limit_ offset ...@@ -584,15 +584,16 @@ getTableNgrams _nType nId tabType listId limit_ offset
let scoresNeeded = needsScores orderBy let scoresNeeded = needsScores orderBy
t1 <- getTime t1 <- getTime
tableMap2 <- getNgramsTable' nId listId ngramsType :: m (Versioned (Map NgramsTerm NgramsElement)) tableMap <- getNgramsTable' nId listId ngramsType :: m (Versioned (Map NgramsTerm NgramsElement))
let fltr = tableMap2 & v_data %~ NgramsTable . filteredNodes :: Versioned NgramsTable let fltr = tableMap & v_data %~ NgramsTable . filteredNodes :: Versioned NgramsTable
let fltrCount = length $ fltr ^. v_data . _NgramsTable let fltrCount = length $ fltr ^. v_data . _NgramsTable
t2 <- getTime t2 <- getTime
let tableMap3 = over (v_data . _NgramsTable) ((withInners (tableMap2 ^. v_data)) . sortAndPaginate) fltr let tableMapSorted = over (v_data . _NgramsTable) ((withInners (tableMap ^. v_data)) . sortAndPaginate) fltr
t3 <- getTime t3 <- getTime
--printDebug "[getTableNgrams] tableMapSorted" tableMapSorted
liftBase $ do liftBase $ do
hprint stderr hprint stderr
("getTableNgrams total=" % hasTime ("getTableNgrams total=" % hasTime
...@@ -603,8 +604,8 @@ getTableNgrams _nType nId tabType listId limit_ offset ...@@ -603,8 +604,8 @@ getTableNgrams _nType nId tabType listId limit_ offset
% "\n" % "\n"
) t0 t3 t0 t1 t1 t2 t2 t3 ) t0 t3 t0 t1 t1 t2 t2 t3
-- printDebug "[getTableNgrams] tableMap3" $ show tableMap3 -- printDebug "[getTableNgrams] tableMapSorted" $ show tableMapSorted
pure $ toVersionedWithCount fltrCount tableMap3 pure $ toVersionedWithCount fltrCount tableMapSorted
-- | Helper function to get the ngrams table with scores. -- | Helper function to get the ngrams table with scores.
...@@ -619,9 +620,9 @@ getNgramsTable' :: forall env err m. ...@@ -619,9 +620,9 @@ getNgramsTable' :: forall env err m.
-> TableNgrams.NgramsType -> TableNgrams.NgramsType
-> m (Versioned (Map.Map NgramsTerm NgramsElement)) -> m (Versioned (Map.Map NgramsTerm NgramsElement))
getNgramsTable' nId listId ngramsType = do getNgramsTable' nId listId ngramsType = do
tableMap1 <- getNgramsTableMap listId ngramsType tableMap <- getNgramsTableMap listId ngramsType
tableMap1 & v_data %%~ (setNgramsTableScores nId listId ngramsType) tableMap & v_data %%~ (setNgramsTableScores nId listId ngramsType)
. Map.mapWithKey ngramsElementFromRepo . Map.mapWithKey ngramsElementFromRepo
-- | Helper function to set scores on an `NgramsTable`. -- | Helper function to set scores on an `NgramsTable`.
setNgramsTableScores :: forall env err m t. setNgramsTableScores :: forall env err m t.
...@@ -636,10 +637,10 @@ setNgramsTableScores :: forall env err m t. ...@@ -636,10 +637,10 @@ setNgramsTableScores :: forall env err m t.
-> TableNgrams.NgramsType -> TableNgrams.NgramsType
-> t -> t
-> m t -> m t
setNgramsTableScores nId listId ngramsType table = do setNgramsTableScores nId listId ngramsType table = do
t1 <- getTime t1 <- getTime
occurrences <- getOccByNgramsOnlyFast nId listId ngramsType occurrences <- getOccByNgramsOnlyFast nId listId ngramsType
printDebug "[setNgramsTableScores] occurrences" occurrences --printDebug "[setNgramsTableScores] occurrences" occurrences
t2 <- getTime t2 <- getTime
liftBase $ do liftBase $ do
let ngrams_terms = table ^.. each . ne_ngrams let ngrams_terms = table ^.. each . ne_ngrams
...@@ -648,7 +649,9 @@ setNgramsTableScores nId listId ngramsType table = do ...@@ -648,7 +649,9 @@ setNgramsTableScores nId listId ngramsType table = do
("getTableNgrams/setScores #ngrams=" % int % " time=" % hasTime % "\n") ("getTableNgrams/setScores #ngrams=" % int % " time=" % hasTime % "\n")
(length ngrams_terms) t1 t2 (length ngrams_terms) t1 t2
let let
setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences setOcc ne = ne & ne_occurrences .~ msumOf (at (ne ^. ne_ngrams) . _Just) occurrences
--printDebug "[setNgramsTableScores] with occurences" $ table & each %~ setOcc
pure $ table & each %~ setOcc pure $ table & each %~ setOcc
......
...@@ -34,6 +34,7 @@ import GHC.Generics (Generic) ...@@ -34,6 +34,7 @@ import GHC.Generics (Generic)
import Gargantext.Core.Text (size) import Gargantext.Core.Text (size)
import Gargantext.Core.Types (ListType(..), ListId, NodeId, TODO) import Gargantext.Core.Types (ListType(..), ListId, NodeId, TODO)
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixUntagged, unPrefixSwagger, wellNamedSchema) import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixUntagged, unPrefixSwagger, wellNamedSchema)
import Gargantext.Database.Admin.Types.Node (ContextId)
import Gargantext.Database.Prelude (fromField', HasConnectionPool, HasConfig, CmdM') import Gargantext.Database.Prelude (fromField', HasConnectionPool, HasConfig, CmdM')
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Prelude.Crypto.Hash (IsHashable(..)) import Gargantext.Prelude.Crypto.Hash (IsHashable(..))
...@@ -170,7 +171,7 @@ data NgramsElement = ...@@ -170,7 +171,7 @@ data NgramsElement =
NgramsElement { _ne_ngrams :: NgramsTerm NgramsElement { _ne_ngrams :: NgramsTerm
, _ne_size :: Int , _ne_size :: Int
, _ne_list :: ListType , _ne_list :: ListType
, _ne_occurrences :: Int , _ne_occurrences :: [ContextId]
, _ne_root :: Maybe NgramsTerm , _ne_root :: Maybe NgramsTerm
, _ne_parent :: Maybe NgramsTerm , _ne_parent :: Maybe NgramsTerm
, _ne_children :: MSet NgramsTerm , _ne_children :: MSet NgramsTerm
...@@ -186,7 +187,7 @@ mkNgramsElement :: NgramsTerm ...@@ -186,7 +187,7 @@ mkNgramsElement :: NgramsTerm
-> MSet NgramsTerm -> MSet NgramsTerm
-> NgramsElement -> NgramsElement
mkNgramsElement ngrams list rp children = mkNgramsElement ngrams list rp children =
NgramsElement ngrams (size (unNgramsTerm ngrams)) list 1 (_rp_root <$> rp) (_rp_parent <$> rp) children NgramsElement ngrams (size (unNgramsTerm ngrams)) list [] (_rp_root <$> rp) (_rp_parent <$> rp) children
newNgramsElement :: Maybe ListType -> NgramsTerm -> NgramsElement newNgramsElement :: Maybe ListType -> NgramsTerm -> NgramsElement
newNgramsElement mayList ngrams = newNgramsElement mayList ngrams =
...@@ -571,7 +572,7 @@ ngramsElementFromRepo ...@@ -571,7 +572,7 @@ ngramsElementFromRepo
, _ne_parent = p , _ne_parent = p
, _ne_children = c , _ne_children = c
, _ne_ngrams = ngrams , _ne_ngrams = ngrams
, _ne_occurrences = 0 -- panic $ "API.Ngrams.Types._ne_occurrences" , _ne_occurrences = [] -- panic $ "API.Ngrams.Types._ne_occurrences"
{- {-
-- Here we could use 0 if we want to avoid any `panic`. -- Here we could use 0 if we want to avoid any `panic`.
-- It will not happen using getTableNgrams if -- It will not happen using getTableNgrams if
......
...@@ -36,6 +36,7 @@ import qualified Data.HashMap.Strict as HM ...@@ -36,6 +36,7 @@ import qualified Data.HashMap.Strict as HM
import qualified Data.Map as Map import qualified Data.Map as Map
import qualified Data.Set as Set import qualified Data.Set as Set
import qualified Database.PostgreSQL.Simple as DPS import qualified Database.PostgreSQL.Simple as DPS
import qualified Database.PostgreSQL.Simple.Types as DPST
-- | fst is size of Supra Corpus -- | fst is size of Supra Corpus
-- snd is Texts and size of Occurrences (different docs) -- snd is Texts and size of Occurrences (different docs)
...@@ -108,15 +109,16 @@ getOccByNgramsOnlyFast_withSample cId int nt ngs = ...@@ -108,15 +109,16 @@ getOccByNgramsOnlyFast_withSample cId int nt ngs =
getOccByNgramsOnlyFast :: CorpusId getOccByNgramsOnlyFast :: CorpusId
-> ListId -> ListId
-> NgramsType -> NgramsType
-> Cmd err (HashMap NgramsTerm Int) -> Cmd err (HashMap NgramsTerm [ContextId])
getOccByNgramsOnlyFast cId lId nt = do getOccByNgramsOnlyFast cId lId nt = do
HM.fromList <$> map (\(t,n) -> (NgramsTerm t, round n)) <$> run cId lId nt --HM.fromList <$> map (\(t,n) -> (NgramsTerm t, round n)) <$> run cId lId nt
HM.fromList <$> map (\(t, ns) -> (NgramsTerm t, NodeId <$> DPST.fromPGArray ns)) <$> run cId lId nt
where where
run :: CorpusId run :: CorpusId
-> ListId -> ListId
-> NgramsType -> NgramsType
-> Cmd err [(Text, Double)] -> Cmd err [(Text, DPST.PGArray Int)]
run cId' lId' nt' = runPGSQuery query run cId' lId' nt' = runPGSQuery query
( cId' ( cId'
, lId' , lId'
...@@ -127,7 +129,8 @@ getOccByNgramsOnlyFast cId lId nt = do ...@@ -127,7 +129,8 @@ getOccByNgramsOnlyFast cId lId nt = do
query = [sql| query = [sql|
SELECT ng.terms SELECT ng.terms
-- , ng.id -- , ng.id
, round(nng.weight) --, round(nng.weight)
, ARRAY(SELECT DISTINCT context_node_ngrams.context_id FROM context_node_ngrams WHERE ng.id = context_id) AS context_ids
-- , ns.version -- , ns.version
-- , nng.ngrams_type -- , nng.ngrams_type
-- , ns.ngrams_type_id -- , ns.ngrams_type_id
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment