getOccByNgramsOnly: For now it calls the fast and the slow and compares

parent 1c37c774
...@@ -72,7 +72,7 @@ import GHC.Generics (Generic) ...@@ -72,7 +72,7 @@ import GHC.Generics (Generic)
import Gargantext.Core.Utils.Prefix (unPrefix) import Gargantext.Core.Utils.Prefix (unPrefix)
-- import Gargantext.Database.Schema.Ngrams (NgramsTypeId, ngramsTypeId, NgramsTableData(..)) -- import Gargantext.Database.Schema.Ngrams (NgramsTypeId, ngramsTypeId, NgramsTableData(..))
--import Gargantext.Database.Config (userMaster) --import Gargantext.Database.Config (userMaster)
import Gargantext.Database.Metrics.NgramsByNode (getOccByNgramsOnly) import Gargantext.Database.Metrics.NgramsByNode (getOccByNgramsOnlySafe)
import Gargantext.Database.Schema.Ngrams (NgramsType) import Gargantext.Database.Schema.Ngrams (NgramsType)
import Gargantext.Database.Utils (fromField', HasConnection) import Gargantext.Database.Utils (fromField', HasConnection)
--import Gargantext.Database.Lists (listsWith) --import Gargantext.Database.Lists (listsWith)
...@@ -936,7 +936,7 @@ getTableNgrams cId tabType listId limit_ moffset ...@@ -936,7 +936,7 @@ getTableNgrams cId tabType listId limit_ moffset
-- getNgramsTableMap ({-lists <>-} listIds) ngramsType -- getNgramsTableMap ({-lists <>-} listIds) ngramsType
table <- getNgramsTableMap listId ngramsType & mapped . v_data %~ finalize table <- getNgramsTableMap listId ngramsType & mapped . v_data %~ finalize
occurrences <- getOccByNgramsOnly cId ngramsType (table ^.. v_data . _NgramsTable . each . ne_ngrams) occurrences <- getOccByNgramsOnlySafe cId ngramsType (table ^.. v_data . _NgramsTable . each . ne_ngrams)
let let
setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences setOcc ne = ne & ne_occurrences .~ sumOf (at (ne ^. ne_ngrams) . _Just) occurrences
......
...@@ -20,6 +20,7 @@ module Gargantext.Database.Metrics.NgramsByNode ...@@ -20,6 +20,7 @@ module Gargantext.Database.Metrics.NgramsByNode
where where
import Data.Map.Strict (Map, fromListWith, elems, toList, fromList) import Data.Map.Strict (Map, fromListWith, elems, toList, fromList)
import Data.Map.Strict.Patch (PatchMap, Replace, diff)
import Data.Set (Set) import Data.Set (Set)
import Data.Text (Text) import Data.Text (Text)
import Data.Tuple.Extra (second, swap) import Data.Tuple.Extra (second, swap)
...@@ -135,17 +136,27 @@ queryNgramsByNodeUser = [sql| ...@@ -135,17 +136,27 @@ queryNgramsByNodeUser = [sql|
|] |]
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- TODO add groups -- TODO add groups
getOccByNgramsOnly :: CorpusId -> NgramsType -> [Text] getOccByNgramsOnlyFast :: CorpusId -> NgramsType -> [Text]
-> Cmd err (Map Text Int) -> Cmd err (Map Text Int)
getOccByNgramsOnly cId nt ngs = getOccByNgramsOnlyFast cId nt ngs =
fromListWith (+) <$> selectNgramsOccurrencesOnlyByNodeUser cId nt ngs fromListWith (+) <$> selectNgramsOccurrencesOnlyByNodeUser cId nt ngs
-- just slower than getOccByNgramsOnly -- just slower than getOccByNgramsOnlyFast
getOccByNgramsOnly' :: CorpusId -> NgramsType -> [Text] getOccByNgramsOnlySlow :: CorpusId -> NgramsType -> [Text]
-> Cmd err (Map Text Int) -> Cmd err (Map Text Int)
getOccByNgramsOnly' cId nt ngs = getOccByNgramsOnlySlow cId nt ngs =
Map.map Set.size <$> getNodesByNgramsOnlyUser cId nt ngs Map.map Set.size <$> getNodesByNgramsOnlyUser cId nt ngs
getOccByNgramsOnlySafe :: CorpusId -> NgramsType -> [Text]
-> Cmd err (Map Text Int)
getOccByNgramsOnlySafe cId nt ngs = do
printDebug "getOccByNgramsOnlySafe" (cId, nt, length ngs)
fast <- getOccByNgramsOnlyFast cId nt ngs
slow <- getOccByNgramsOnlySlow cId nt ngs
when (fast /= slow) $
printDebug "getOccByNgramsOnlySafe: difference" (diff slow fast :: PatchMap Text (Replace (Maybe Int)))
pure slow
selectNgramsOccurrencesOnlyByNodeUser :: CorpusId -> NgramsType -> [Text] selectNgramsOccurrencesOnlyByNodeUser :: CorpusId -> NgramsType -> [Text]
-> Cmd err [(Text, Int)] -> Cmd err [(Text, Int)]
selectNgramsOccurrencesOnlyByNodeUser cId nt tms = selectNgramsOccurrencesOnlyByNodeUser cId nt tms =
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment