Verified Commit d9294f34 authored by Przemyslaw Kaminski's avatar Przemyslaw Kaminski

Merge branch 'dev' into dev-threaded-flow-with-new-ngrams-extraction

parents 5556c1a1 706734c5
Pipeline #7958 passed with stages
in 56 minutes and 59 seconds
## Version 0.0.7.5.3
* [BACK][FIX][Resolve "[Server slowness] With the dev branch on the dev instance, we're experiencing a real slowness" (JobInfo changes)](https://gitlab.iscpif.fr/gargantext/haskell-gargantext/merge_requests/429)
* [BACK][OPTIM][Concurrent queries in NLP](https://gitlab.iscpif.fr/gargantext/haskell-gargantext/merge_requests/451)
* [FRONT][FIX][[graph] restore proportional labels and make edges transparent](https://gitlab.iscpif.fr/gargantext/purescript-gargantext/merge_requests/522)
* [BACK][FIX][Resolve "On IMT Instance : Error message or crashes when changing the status of terms in large batch"](https://gitlab.iscpif.fr/gargantext/haskell-gargantext/merge_requests/445)
* [FRONT][FIX][graph: rework edge renderer based on rectangle renderer](https://gitlab.iscpif.fr/gargantext/purescript-gargantext/merge_requests/523)
* [BACK][FIX][Break loops in Ngrams graphs](https://gitlab.iscpif.fr/gargantext/haskell-gargantext/merge_requests/453)
## Version 0.0.7.5.2
* [BACK/FRONT][FIX][[query] a prefixed query like ~prefix should transform to 'prefix', not '"prefix'](https://gitlab.iscpif.fr/gargantext/haskell-gargantext/merge_requests/443)
......
......@@ -5,7 +5,7 @@ cabal-version: 3.4
-- see: https://github.com/sol/hpack
name: gargantext
version: 0.0.7.5.2
version: 0.0.7.5.3
synopsis: Search, map, share
description: Please see README.md
category: Data
......
......@@ -449,8 +449,8 @@ matchingNode listType minSize maxSize searchFn (Node inputNode children) =
-- | Version of 'buildForest' specialised over the 'NgramsElement' as the values of the tree.
-- /IMPORTANT/: This functions returns an error in case we found a loop.
buildForest :: Map NgramsTerm NgramsElement -> Either BuildForestError (Forest NgramsElement)
buildForest = fmap (map (fmap snd)) . NodeStory.buildForest
buildForest :: OnLoopDetectedStrategy -> Map NgramsTerm NgramsElement -> Either BuildForestError (Forest NgramsElement)
buildForest onLoopStrategy = fmap (map (fmap snd)) . NodeStory.buildForest onLoopStrategy
-- | Folds an Ngrams forest back to a table map.
-- This function doesn't aggregate information, but merely just recostructs the original
......@@ -482,7 +482,7 @@ searchTableNgrams :: Versioned (Map NgramsTerm NgramsElement)
-> Either BuildForestError (VersionedWithCount NgramsTable)
searchTableNgrams versionedTableMap NgramsSearchQuery{..} =
let tableMap = versionedTableMap ^. v_data
in case keepRoots <$> buildForest tableMap of
in case keepRoots <$> buildForest (BreakLoop LBA_just_do_it) tableMap of
Left err -> Left err
Right fs ->
let forestRoots = filterNgramsNodes _nsq_listType _nsq_minSize _nsq_maxSize _nsq_searchQuery $ fs
......
......@@ -111,6 +111,9 @@ mSetFromSet = MSet . Map.fromSet (const ())
mSetFromList :: Ord a => [a] -> MSet a
mSetFromList = MSet . Map.fromList . map (\x -> (x, ()))
mSetDifference :: Ord a => MSet a -> MSet a -> MSet a
mSetDifference (MSet m1) (MSet m2) = MSet (m1 `Map.difference` m2)
-- mSetToSet :: Ord a => MSet a -> Set a
-- mSetToSet (MSet a) = Set.fromList ( Map.keys a)
mSetToSet :: Ord a => MSet a -> Set a
......
This diff is collapsed.
......@@ -42,6 +42,7 @@ module Gargantext.Core.NodeStory.Types
, ArchiveState
, ArchiveStateSet
, ArchiveStateList
, allVisitedNgramsTerms
-- * Errors
, HasNodeStoryError(..)
......@@ -197,7 +198,7 @@ data BuildForestError
= -- We found a loop, something that shouldn't normally happen if the calling
-- code is correct by construction, but if that does happen, the value will
-- contain the full path to the cycle.
BFE_loop_detected !(Set VisitedNode)
BFE_loop_detected !(Set VisitedNode)
deriving (Show, Eq)
instance ToHumanFriendlyError BuildForestError where
......@@ -217,6 +218,9 @@ data VisitedNode =
VN { _vn_position :: !Int, _vn_term :: !NgramsTerm }
deriving (Show)
allVisitedNgramsTerms :: Set VisitedNode -> Set NgramsTerm
allVisitedNgramsTerms = Set.map _vn_term
-- /NOTA BENE/: It's important to use this custom instance for the loop detector
-- to work correctly. If we stop comparing on the terms the loop detector .. will loop.
instance Eq VisitedNode where
......
......@@ -132,7 +132,8 @@ getOccByNgramsOnlyFast_withSample cId int nt ngs =
HM.fromListWith (+) <$> selectNgramsOccurrencesOnlyByContextUser_withSample cId int nt ngs
-- Returns occurrences of ngrams in given corpus/list (for each ngram, a list of contexts is returned)
-- | Returns occurrences of ngrams in given corpus/list (for each
-- ngram, a list of contexts is returned)
getOccByNgramsOnlyFast :: CorpusId
-> ListId
-> NgramsType
......@@ -154,34 +155,39 @@ getOccByNgramsOnlyFast cId lId nt = do
query :: DPS.Query
query = [sql|
WITH cnnv AS
( SELECT DISTINCT context_node_ngrams.context_id,
context_node_ngrams.ngrams_id,
nodes_contexts.node_id,
nodes_contexts.category
FROM nodes_contexts
JOIN context_node_ngrams ON context_node_ngrams.context_id = nodes_contexts.context_id
WITH nc AS (
SELECT DISTINCT context_id
FROM nodes_contexts
WHERE node_id = ?
AND category > 0
),
cnnv AS
( SELECT DISTINCT context_id,
ngrams_id
FROM context_node_ngrams
WHERE context_id IN (SELECT context_id FROM nc)
),
node_context_ids AS
(SELECT context_id, ngrams_id, terms
FROM cnnv
JOIN ngrams ON cnnv.ngrams_id = ngrams.id
WHERE node_id = ? AND cnnv.category > 0
),
ncids_agg AS
(SELECT ngrams_id, terms, array_agg(DISTINCT context_id) AS agg
FROM node_context_ids
GROUP BY (ngrams_id, terms)),
( SELECT array_agg(DISTINCT context_id) AS agg,
ngrams_id,
terms
FROM cnnv
JOIN ngrams
ON cnnv.ngrams_id = ngrams.id
GROUP BY (ngrams_id, terms)
),
ns AS
(SELECT ngrams_id, terms
FROM node_stories
JOIN ngrams ON ngrams_id = ngrams.id
WHERE node_id = ? AND ngrams_type_id = ?
JOIN ngrams
ON ngrams_id = ngrams.id
WHERE node_id = ? AND ngrams_type_id = ?
)
SELECT ns.terms, CASE WHEN agg IS NULL THEN '{}' ELSE agg END
FROM ns
LEFT JOIN ncids_agg ON ns.ngrams_id = ncids_agg.ngrams_id
LEFT JOIN ncids_agg
ON ns.ngrams_id = ncids_agg.ngrams_id
|]
-- query = [sql|
-- WITH node_context_ids AS
......
......@@ -611,8 +611,8 @@ tests = sequential $ aroundAll withTestDBAndPort $ beforeAllWith dbEnvSetup $ do
liftIO $ exportedNgrams `shouldBe` exportedNgrams2
-- We test that if we try to import terms which, when merged with the existing,
-- would cause a loop, GGTX is capable of rejecting the request.
it "refuses to import terms which will lead to a loop" $ \(SpecContext testEnv port app _) -> do
-- would cause a loop but GGTX is capable of breaking them, serving the request.
it "allows importing terms which will lead to a loop (because it can break them)" $ \(SpecContext testEnv port app _) -> do
cId <- newCorpusForUser testEnv "alice"
let log_cfg = (test_config testEnv) ^. gc_logging
withApplication app $ do
......@@ -685,14 +685,14 @@ tests = sequential $ aroundAll withTestDBAndPort $ beforeAllWith dbEnvSetup $ do
}
ji <- checkEither $ liftIO $ runClientM (add_form_to_list token listId params) clientEnv
ji' <- pollUntilWorkFinished log_cfg port ji
-- Unfortunately we don't have a better way then to match on the stringified exception, sigh.
case _scst_events ji' of
Just [ScraperEvent{..}]
| Just msg <- _scev_message
-> liftIO $ msg `shouldSatisfy` \txt -> "Loop detected in terms: foo -> bar -> foo" `T.isInfixOf` txt
, "Loop detected in terms: foo -> bar -> foo" `T.isInfixOf` msg
-> fail (T.unpack msg)
| otherwise
-> fail "No suitable message in ScraperEvent."
_ -> fail "Expected job to fail, but it didn't"
-> pure () -- no loop!
_ -> pure () -- no loop!
createDocsList :: FilePath
-> TestEnv
......
......@@ -13,6 +13,7 @@ Portability : POSIX
{-# LANGUAGE StandaloneDeriving #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE ViewPatterns #-}
module Test.Instances where
......@@ -52,6 +53,7 @@ import Gargantext.API.Search.Types (SearchQuery(..), SearchResult(..), SearchRes
import Gargantext.API.Table.Types (TableQuery(..))
import Gargantext.API.Viz.Types (PhyloData)
import Gargantext.Core (Lang)
import Gargantext.Core.NodeStory hiding (buildForest)
import Gargantext.Core.Notifications.CentralExchange.Types qualified as CET
import Gargantext.Core.Notifications.Dispatcher.Types qualified as DET
import Gargantext.Core.Text.Corpus.Query (RawQuery(..))
......@@ -771,9 +773,12 @@ genCorpusWithMatchingElement = do
el <- over ne_children (breakLoop trm) . makeItRoot <$> (resize depth arbitrary)
pure (trm, el { _ne_ngrams = trm })
-- Let's build the map first, so that duplicates will be overwritten.
fullMap <- (Map.fromList <$> vectorOf depth mkEntry) `suchThat` (\x -> isRight (buildForest x)) -- exclude loops
let (hd NE.:| _) = NE.fromList $ Map.elems fullMap
pure $ AcyclicTableMap fullMap hd
fullMapE <- buildForest (BreakLoop LBA_just_do_it) . Map.fromList <$> vectorOf depth mkEntry
case fullMapE of
Left e -> panicTrace (show e)
Right (destroyForest -> fullMap) -> do
let (hd NE.:| _) = NE.fromList $ map snd fullMap
pure $ AcyclicTableMap (Map.fromList fullMap) hd
where
breakLoop :: NgramsTerm -> MSet NgramsTerm -> MSet NgramsTerm
breakLoop t = mSetFromSet . Set.delete t . mSetToSet
......
......@@ -235,7 +235,7 @@ testFlat05 = do
testForestSearchProp :: Property
testForestSearchProp = forAll arbitrary $ \(AcyclicTableMap ngramsTable el) -> do
case searchTableNgrams (Versioned 0 ngramsTable) (searchQuery el) of
Left (BFE_loop_detected err) -> fail (T.unpack $ renderLoop err)
Left (BFE_loop_detected err) -> fail (T.unpack $ renderLoop err)
Right res -> res ^. vc_data `shouldSatisfy` (any (containsTerm (_ne_ngrams el)) . getNgramsTable)
where
searchQuery term = NgramsSearchQuery {
......
......@@ -11,12 +11,15 @@ import Data.Char (isSpace)
import Data.Char qualified as Char
import Data.Map.Strict (Map)
import Data.Map.Strict qualified as Map
import Data.Set qualified as Set
import Data.Text qualified as T
import Data.Tree
import Gargantext.API.Ngrams
import Gargantext.API.Ngrams.Types
import Gargantext.API.Ngrams.Types qualified as NT
import Gargantext.Core
import Gargantext.Core.NodeStory (LoopBreakAlgorithm(..), OnLoopDetectedStrategy(..))
import Gargantext.Core.NodeStory.Types (VisitedNode(..))
import Gargantext.Core.Text.Terms.Mono (isSep)
import Gargantext.Core.Text.Terms.WithList
import Gargantext.Core.Types
......@@ -109,6 +112,8 @@ tests = describe "Ngrams" $ do
it "building a complex deep tree works" testBuildNgramsTree_03
it "pruning a simple tree works" testPruningNgramsForest_01
it "pruning a complex tree works" testPruningNgramsForest_02
it "fails on loops if asked to do nothing" testLoopBreaker_01
it "breaks a simple loop (just do it algorithm)" testLoopBreaker_02
prop "destroyForest . buildForest === id" buildDestroyForestRoundtrips
describe "hierarchical grouping" $ do
it "filterNgramsNodes with empty query is identity" testFilterNgramsNodesEmptyQuery
......@@ -140,7 +145,12 @@ instance Show ASCIIForest where
show (ASCIIForest x) = x
buildForestOrFail :: Map NgramsTerm NgramsElement -> Forest NgramsElement
buildForestOrFail mp = case buildForest mp of
buildForestOrFail mp = case buildForest FailOnLoop mp of
Left (BFE_loop_detected treeLoop) -> error (T.unpack $ renderLoop treeLoop)
Right x -> x
buildForestOrBreakLoop :: Map NgramsTerm NgramsElement -> Forest NgramsElement
buildForestOrBreakLoop mp = case buildForest (BreakLoop LBA_just_do_it) mp of
Left (BFE_loop_detected treeLoop) -> error (T.unpack $ renderLoop treeLoop)
Right x -> x
......@@ -292,3 +302,21 @@ testPruningNgramsForest_02 =
`- ford
|]
testLoopBreaker_01 :: Property
testLoopBreaker_01 =
let t1 = Map.fromList [ ( "foo", mkMapTerm "foo" & ne_children .~ mSetFromList ["bar"])
, ( "bar", mkMapTerm "bar" & ne_children .~ mSetFromList ["foo"])
]
in (buildForest FailOnLoop t1) === Left (BFE_loop_detected $ Set.fromList [ VN 0 "foo", VN 1 "bar"])
testLoopBreaker_02 :: Property
testLoopBreaker_02 =
let t1 = Map.fromList [ ( "foo", mkMapTerm "foo" & ne_children .~ mSetFromList ["bar"])
, ( "bar", mkMapTerm "bar" & ne_children .~ mSetFromList ["foo"])
]
in (pruneForest $ buildForestOrBreakLoop t1) `compareForestVisually` [r|
bar
|
`- foo
|]
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment