getOccByNgramsOnly: For now it calls the fast and the slow and compares

parent 1c37c774
......@@ -72,7 +72,7 @@ import GHC.Generics (Generic)
import Gargantext.Core.Utils.Prefix (unPrefix)
-- import Gargantext.Database.Schema.Ngrams (NgramsTypeId, ngramsTypeId, NgramsTableData(..))
--import Gargantext.Database.Config (userMaster)
import Gargantext.Database.Metrics.NgramsByNode (getOccByNgramsOnly)
import Gargantext.Database.Metrics.NgramsByNode (getOccByNgramsOnlySafe)
import Gargantext.Database.Schema.Ngrams (NgramsType)
import Gargantext.Database.Utils (fromField', HasConnection)
--import Gargantext.Database.Lists (listsWith)
......@@ -936,7 +936,7 @@ getTableNgrams cId tabType listId limit_ moffset
-- getNgramsTableMap ({-lists <>-} listIds) ngramsType
table <- getNgramsTableMap listId ngramsType & mapped . v_data %~ finalize
occurrences <- getOccByNgramsOnly cId ngramsType (table ^.. v_data . _NgramsTable . each . ne_ngrams)
occurrences <- getOccByNgramsOnlySafe cId ngramsType (table ^.. v_data . _NgramsTable . each . ne_ngrams)
let
setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
......
......@@ -20,6 +20,7 @@ module Gargantext.Database.Metrics.NgramsByNode
where
import Data.Map.Strict (Map, fromListWith, elems, toList, fromList)
import Data.Map.Strict.Patch (PatchMap, Replace, diff)
import Data.Set (Set)
import Data.Text (Text)
import Data.Tuple.Extra (second, swap)
......@@ -135,17 +136,27 @@ queryNgramsByNodeUser = [sql|
|]
------------------------------------------------------------------------
-- TODO add groups
getOccByNgramsOnly :: CorpusId -> NgramsType -> [Text]
-> Cmd err (Map Text Int)
getOccByNgramsOnly cId nt ngs =
getOccByNgramsOnlyFast :: CorpusId -> NgramsType -> [Text]
-> Cmd err (Map Text Int)
getOccByNgramsOnlyFast cId nt ngs =
fromListWith (+) <$> selectNgramsOccurrencesOnlyByNodeUser cId nt ngs
-- just slower than getOccByNgramsOnly
getOccByNgramsOnly' :: CorpusId -> NgramsType -> [Text]
-> Cmd err (Map Text Int)
getOccByNgramsOnly' cId nt ngs =
-- just slower than getOccByNgramsOnlyFast
getOccByNgramsOnlySlow :: CorpusId -> NgramsType -> [Text]
-> Cmd err (Map Text Int)
getOccByNgramsOnlySlow cId nt ngs =
Map.map Set.size <$> getNodesByNgramsOnlyUser cId nt ngs
getOccByNgramsOnlySafe :: CorpusId -> NgramsType -> [Text]
-> Cmd err (Map Text Int)
getOccByNgramsOnlySafe cId nt ngs = do
printDebug "getOccByNgramsOnlySafe" (cId, nt, length ngs)
fast <- getOccByNgramsOnlyFast cId nt ngs
slow <- getOccByNgramsOnlySlow cId nt ngs
when (fast /= slow) $
printDebug "getOccByNgramsOnlySafe: difference" (diff slow fast :: PatchMap Text (Replace (Maybe Int)))
pure slow
selectNgramsOccurrencesOnlyByNodeUser :: CorpusId -> NgramsType -> [Text]
-> Cmd err [(Text, Int)]
selectNgramsOccurrencesOnlyByNodeUser cId nt tms =
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment