Commit 781b74dc authored by Fabien Maniere's avatar Fabien Maniere

Revert "Merge branch 'adinapoli/issue-504' into 'dev'"

This reverts merge request !439
parent ca7f0f26
Pipeline #7922 passed with stages
in 49 minutes and 8 seconds
...@@ -17,12 +17,14 @@ add get ...@@ -17,12 +17,14 @@ add get
{-# LANGUAGE BangPatterns #-} {-# LANGUAGE BangPatterns #-}
{-# LANGUAGE ConstraintKinds #-} {-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE IncoherentInstances #-}
{-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TupleSections #-}
{-# LANGUAGE TypeFamilies #-} {-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE TypeOperators #-} {-# LANGUAGE TypeOperators #-}
{-# LANGUAGE TupleSections #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.API.Ngrams module Gargantext.API.Ngrams
( (
...@@ -112,7 +114,7 @@ import Gargantext.API.Ngrams.Types ...@@ -112,7 +114,7 @@ import Gargantext.API.Ngrams.Types
import Gargantext.Core.NodeStory hiding (buildForest) import Gargantext.Core.NodeStory hiding (buildForest)
import Gargantext.Core.NodeStory qualified as NodeStory import Gargantext.Core.NodeStory qualified as NodeStory
import Gargantext.Core.Text.Ngrams (Ngrams, NgramsType) import Gargantext.Core.Text.Ngrams (Ngrams, NgramsType)
import Gargantext.Core.Types (ListType(..), NodeId, ListId, TODO, assertValid, HasValidationError) import Gargantext.Core.Types (ListType(..), NodeId, ListId, TODO, assertValid, ContextId, HasValidationError)
import Gargantext.Core.Types.Query (Limit(..), Offset(..), MinSize(..), MaxSize(..)) import Gargantext.Core.Types.Query (Limit(..), Offset(..), MinSize(..), MaxSize(..))
import Gargantext.Database.Action.Metrics.NgramsByContext (getOccByNgramsOnlyFast) import Gargantext.Database.Action.Metrics.NgramsByContext (getOccByNgramsOnlyFast)
import Gargantext.Database.Prelude import Gargantext.Database.Prelude
...@@ -456,16 +458,14 @@ buildForest = fmap (map (fmap snd)) . NodeStory.buildForest ...@@ -456,16 +458,14 @@ buildForest = fmap (map (fmap snd)) . NodeStory.buildForest
-- This function doesn't aggregate information, but merely just recostructs the original -- This function doesn't aggregate information, but merely just recostructs the original
-- map without loss of information. To perform operations on the forest, use the appropriate -- map without loss of information. To perform operations on the forest, use the appropriate
-- functions. -- functions.
-- /NOTA BENE:/ We return a list and not a Map because we might have sorted the forest, and destroyForest :: Forest NgramsElement -> Map NgramsTerm NgramsElement
-- converting into a map would trash the carefully-constructed sorting. destroyForest f = Map.fromList . map (foldTree destroyTree) $ f
destroyForest :: Forest NgramsElement -> [(NgramsTerm, NgramsElement)]
destroyForest f = concatMap (map (\el -> (_ne_ngrams el, el)) . flatten) $ f
where where
-- _destroyTree :: NgramsElement -> [(NgramsTerm, [NgramsElement])] -> (NgramsTerm, [NgramsElement]) destroyTree :: NgramsElement -> [(NgramsTerm, NgramsElement)] -> (NgramsTerm, NgramsElement)
-- _destroyTree rootEl childrenEl = (_ne_ngrams rootEl, childrenEl) destroyTree rootEl childrenEl = (_ne_ngrams rootEl, squashElements rootEl childrenEl)
-- _squashElements :: NgramsElement -> [(NgramsTerm, NgramsElement)] -> NgramsElement squashElements :: NgramsElement -> [(NgramsTerm, NgramsElement)] -> NgramsElement
-- _squashElements r _ = r squashElements r _ = r
-- | TODO Errors management -- | TODO Errors management
-- TODO: polymorphic for Annuaire or Corpus or ... -- TODO: polymorphic for Annuaire or Corpus or ...
...@@ -482,63 +482,58 @@ searchTableNgrams :: Versioned (Map NgramsTerm NgramsElement) ...@@ -482,63 +482,58 @@ searchTableNgrams :: Versioned (Map NgramsTerm NgramsElement)
-> Either BuildForestError (VersionedWithCount NgramsTable) -> Either BuildForestError (VersionedWithCount NgramsTable)
searchTableNgrams versionedTableMap NgramsSearchQuery{..} = searchTableNgrams versionedTableMap NgramsSearchQuery{..} =
let tableMap = versionedTableMap ^. v_data let tableMap = versionedTableMap ^. v_data
in case keepRoots <$> buildForest tableMap of in case buildForest tableMap of
Left err -> Left err Left err -> Left err
Right fs -> Right fs ->
let forestRoots = filterNgramsNodes _nsq_listType _nsq_minSize _nsq_maxSize _nsq_searchQuery $ fs let forestRoots = Set.fromList
. Map.elems
. destroyForest
. filterNgramsNodes _nsq_listType _nsq_minSize _nsq_maxSize _nsq_searchQuery
$ fs
tableMapSorted = versionedTableMap tableMapSorted = versionedTableMap
& v_data .~ (NgramsTable . map snd & v_data .~ (NgramsTable . sortAndPaginate . withInners tableMap $ forestRoots)
. destroyForest
. sortAndPaginateForest _nsq_offset _nsq_limit _nsq_orderBy in Right $ toVersionedWithCount (Set.size forestRoots) tableMapSorted
. withInnersForest
$ forestRoots
)
in Right $ toVersionedWithCount (length forestRoots) tableMapSorted
keepRoots :: Forest NgramsElement -> Forest NgramsElement
keepRoots = filter (\(Node r _) -> isNothing (_ne_root r) || isNothing (_ne_parent r))
-- | For each input root, extends its occurrence count with
-- the information found in the subforest.
withInnersForest :: Forest NgramsElement -> Forest NgramsElement
withInnersForest = map sumSubitemsOccurrences
where where
sumSubitemsOccurrences :: Tree NgramsElement -> Tree NgramsElement
sumSubitemsOccurrences (Node root children) = -- Sorts the input 'NgramsElement' list.
let children' = withInnersForest children -- /IMPORTANT/: As we might be sorting ngrams in all sorts of language,
root' = root { _ne_occurrences = (_ne_occurrences root) <> foldMap (_ne_occurrences . rootLabel) children' } -- some of them might include letters with accents and other unicode symbols,
in Node root' children' -- but we need to filter those /diacritics/ out so that the sorting would
-- happen in the way users would expect. See ticket #331.
sortAndPaginateForest :: Maybe Offset sortOnOrder :: Maybe OrderBy -> ([NgramsElement] -> [NgramsElement])
-> Limit sortOnOrder Nothing = sortOnOrder (Just ScoreDesc)
-> Maybe OrderBy sortOnOrder (Just TermAsc) = List.sortBy ngramTermsAscSorter
-> Forest NgramsElement sortOnOrder (Just TermDesc) = List.sortBy ngramTermsDescSorter
-> Forest NgramsElement sortOnOrder (Just ScoreAsc) = List.sortOn $ view (ne_occurrences . to Set.size)
sortAndPaginateForest mb_offset limit orderBy xs = sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view (ne_occurrences . to Set.size)
let offset' = getOffset $ maybe 0 identity mb_offset
in take (getLimit limit) ngramTermsAscSorter = on unicodeDUCETSorter (unNgramsTerm . view ne_ngrams)
. drop offset' ngramTermsDescSorter = on (\n1 n2 -> unicodeDUCETSorter n2 n1) (unNgramsTerm . view ne_ngrams)
. sortOnOrderForest orderBy
$ xs -- | For each input root, extends its occurrence count with
-- the information found in the subitems.
-- Sorts the input 'NgramsElement' list. withInners :: Map NgramsTerm NgramsElement -> Set NgramsElement -> Set NgramsElement
-- /IMPORTANT/: As we might be sorting ngrams in all sorts of language, withInners tblMap roots = Set.map addSubitemsOccurrences roots
-- some of them might include letters with accents and other unicode symbols, where
-- but we need to filter those /diacritics/ out so that the sorting would addSubitemsOccurrences :: NgramsElement -> NgramsElement
-- happen in the way users would expect. See ticket #331. addSubitemsOccurrences e =
sortOnOrderForest :: Maybe OrderBy -> (Forest NgramsElement -> Forest NgramsElement) e { _ne_occurrences = foldl' alterOccurrences (e ^. ne_occurrences) (e ^. ne_children) }
sortOnOrderForest Nothing = sortOnOrderForest (Just ScoreDesc)
sortOnOrderForest (Just TermAsc) = List.sortBy (\(Node t1 _) (Node t2 _) -> ngramTermsAscSorter t1 t2) alterOccurrences :: Set ContextId -> NgramsTerm -> Set ContextId
sortOnOrderForest (Just TermDesc) = List.sortBy (\(Node t1 _) (Node t2 _) -> ngramTermsDescSorter t1 t2) alterOccurrences occs t = case Map.lookup t tblMap of
sortOnOrderForest (Just ScoreAsc) = List.sortOn $ \(Node root _) -> root ^. (ne_occurrences . to Set.size) Nothing -> occs
sortOnOrderForest (Just ScoreDesc) = List.sortOn $ Down . (\(Node root _) -> root ^. (ne_occurrences . to Set.size)) Just e' -> occs <> e' ^. ne_occurrences
ngramTermsAscSorter :: NgramsElement -> NgramsElement -> Ordering -- | Paginate the results
ngramTermsAscSorter = on unicodeDUCETSorter (unNgramsTerm . view ne_ngrams) sortAndPaginate :: Set NgramsElement -> [NgramsElement]
sortAndPaginate xs =
ngramTermsDescSorter :: NgramsElement -> NgramsElement -> Ordering let offset' = getOffset $ maybe 0 identity _nsq_offset
ngramTermsDescSorter = on (\n1 n2 -> unicodeDUCETSorter n2 n1) (unNgramsTerm . view ne_ngrams) in take (getLimit _nsq_limit)
. drop offset'
. sortOnOrder _nsq_orderBy
. Set.toList
$ xs
-- | This function allows sorting two texts via their unicode sorting -- | This function allows sorting two texts via their unicode sorting
-- (as opposed as the standard lexicographical sorting) by relying on -- (as opposed as the standard lexicographical sorting) by relying on
......
...@@ -214,16 +214,9 @@ tests = sequential $ aroundAll withTestDBAndPort $ beforeAllWith dbEnvSetup $ do ...@@ -214,16 +214,9 @@ tests = sequential $ aroundAll withTestDBAndPort $ beforeAllWith dbEnvSetup $ do
eRes <- runClientM (get_table_ngrams token cId APINgrams.Terms listId 50 Nothing (Just MapTerm) Nothing Nothing Nothing Nothing) clientEnv eRes <- runClientM (get_table_ngrams token cId APINgrams.Terms listId 50 Nothing (Just MapTerm) Nothing Nothing Nothing Nothing) clientEnv
eRes `shouldSatisfy` isRight eRes `shouldSatisfy` isRight
let (Right res) = eRes let (Right res) = eRes
-- /NOTA BENE/ The count is 1 because the count applies to roots only.
Just res `shouldBe` JSON.decode [json| {"version":5 Just res `shouldBe` JSON.decode [json| {"version":5
,"count":1 ,"count":3
,"data":[ ,"data":[
{"ngrams":"overdrives"
,"size":1
,"list":"MapTerm"
,"occurrences":[]
,"children":["guitar pedals"]
},
{"ngrams":"guitar pedals" {"ngrams":"guitar pedals"
,"size":1 ,"size":1
,"list":"MapTerm" ,"list":"MapTerm"
...@@ -232,6 +225,12 @@ tests = sequential $ aroundAll withTestDBAndPort $ beforeAllWith dbEnvSetup $ do ...@@ -232,6 +225,12 @@ tests = sequential $ aroundAll withTestDBAndPort $ beforeAllWith dbEnvSetup $ do
,"occurrences":[] ,"occurrences":[]
,"children":["tube screamers"] ,"children":["tube screamers"]
}, },
{"ngrams":"overdrives"
,"size":1
,"list":"MapTerm"
,"occurrences":[]
,"children":["guitar pedals"]
},
{"ngrams":"tube screamers" {"ngrams":"tube screamers"
,"size":1 ,"size":1
,"list":"MapTerm" ,"list":"MapTerm"
...@@ -310,16 +309,8 @@ tests = sequential $ aroundAll withTestDBAndPort $ beforeAllWith dbEnvSetup $ do ...@@ -310,16 +309,8 @@ tests = sequential $ aroundAll withTestDBAndPort $ beforeAllWith dbEnvSetup $ do
-- check that new term is parent of old one -- check that new term is parent of old one
checkNgrams getNgrams [json| {"version": 2 checkNgrams getNgrams [json| {"version": 2
,"count":1 ,"count":2
,"data":[ ,"data":[
{"ngrams":"new abelian group"
,"size":1
,"list":"MapTerm"
,"root":null
,"parent":null
,"occurrences":[]
,"children":["abelian group"]
},
{"ngrams":"abelian group" {"ngrams":"abelian group"
,"size":2 ,"size":2
,"list":"MapTerm" ,"list":"MapTerm"
...@@ -327,6 +318,14 @@ tests = sequential $ aroundAll withTestDBAndPort $ beforeAllWith dbEnvSetup $ do ...@@ -327,6 +318,14 @@ tests = sequential $ aroundAll withTestDBAndPort $ beforeAllWith dbEnvSetup $ do
,"parent": "new abelian group" ,"parent": "new abelian group"
,"occurrences":[] ,"occurrences":[]
,"children":[] ,"children":[]
},
{"ngrams":"new abelian group"
,"size":1
,"list":"MapTerm"
,"root":null
,"parent":null
,"occurrences":[]
,"children":["abelian group"]
} }
] ]
} }
...@@ -342,16 +341,8 @@ tests = sequential $ aroundAll withTestDBAndPort $ beforeAllWith dbEnvSetup $ do ...@@ -342,16 +341,8 @@ tests = sequential $ aroundAll withTestDBAndPort $ beforeAllWith dbEnvSetup $ do
-- In essence, this JSON needs to be exactly the same as the previous one, -- In essence, this JSON needs to be exactly the same as the previous one,
-- i.e. important doesn't change the topology. -- i.e. important doesn't change the topology.
checkNgrams getNgrams [json| {"version": 2 checkNgrams getNgrams [json| {"version": 2
,"count":1 ,"count":2
,"data":[ ,"data":[
{"ngrams":"new abelian group"
,"size":1
,"list":"MapTerm"
,"root":null
,"parent":null
,"occurrences":[]
,"children":["abelian group"]
},
{"ngrams":"abelian group" {"ngrams":"abelian group"
,"size":2 ,"size":2
,"list":"MapTerm" ,"list":"MapTerm"
...@@ -359,6 +350,14 @@ tests = sequential $ aroundAll withTestDBAndPort $ beforeAllWith dbEnvSetup $ do ...@@ -359,6 +350,14 @@ tests = sequential $ aroundAll withTestDBAndPort $ beforeAllWith dbEnvSetup $ do
,"parent": "new abelian group" ,"parent": "new abelian group"
,"occurrences":[] ,"occurrences":[]
,"children":[] ,"children":[]
},
{"ngrams":"new abelian group"
,"size":1
,"list":"MapTerm"
,"root":null
,"parent":null
,"occurrences":[]
,"children":["abelian group"]
} }
] ]
} }
......
...@@ -768,7 +768,7 @@ genCorpusWithMatchingElement = do ...@@ -768,7 +768,7 @@ genCorpusWithMatchingElement = do
depth <- choose (1, 5) depth <- choose (1, 5)
let mkEntry = do let mkEntry = do
trm <- arbitrary trm <- arbitrary
el <- over ne_children (breakLoop trm) . makeItRoot <$> (resize depth arbitrary) el <- over ne_children (breakLoop trm) <$> (resize depth arbitrary)
pure (trm, el { _ne_ngrams = trm }) pure (trm, el { _ne_ngrams = trm })
-- Let's build the map first, so that duplicates will be overwritten. -- Let's build the map first, so that duplicates will be overwritten.
fullMap <- (Map.fromList <$> vectorOf depth mkEntry) `suchThat` (\x -> isRight (buildForest x)) -- exclude loops fullMap <- (Map.fromList <$> vectorOf depth mkEntry) `suchThat` (\x -> isRight (buildForest x)) -- exclude loops
...@@ -778,10 +778,6 @@ genCorpusWithMatchingElement = do ...@@ -778,10 +778,6 @@ genCorpusWithMatchingElement = do
breakLoop :: NgramsTerm -> MSet NgramsTerm -> MSet NgramsTerm breakLoop :: NgramsTerm -> MSet NgramsTerm -> MSet NgramsTerm
breakLoop t = mSetFromSet . Set.delete t . mSetToSet breakLoop t = mSetFromSet . Set.delete t . mSetToSet
makeItRoot :: NgramsElement -> NgramsElement
makeItRoot ne = ne & ne_root .~ Nothing
& ne_parent .~ Nothing
instance Arbitrary AcyclicTableMap where instance Arbitrary AcyclicTableMap where
arbitrary = genCorpusWithMatchingElement arbitrary = genCorpusWithMatchingElement
shrink = shrinkTree shrink = shrinkTree
......
...@@ -236,7 +236,7 @@ testForestSearchProp :: Property ...@@ -236,7 +236,7 @@ testForestSearchProp :: Property
testForestSearchProp = forAll arbitrary $ \(AcyclicTableMap ngramsTable el) -> do testForestSearchProp = forAll arbitrary $ \(AcyclicTableMap ngramsTable el) -> do
case searchTableNgrams (Versioned 0 ngramsTable) (searchQuery el) of case searchTableNgrams (Versioned 0 ngramsTable) (searchQuery el) of
Left (BFE_loop_detected err) -> fail (T.unpack $ renderLoop err) Left (BFE_loop_detected err) -> fail (T.unpack $ renderLoop err)
Right res -> res ^. vc_data `shouldSatisfy` (any (containsTerm (_ne_ngrams el)) . getNgramsTable) Right res -> res ^. vc_data `shouldSatisfy` (elem (_ne_ngrams el) . map _ne_ngrams . getNgramsTable)
where where
searchQuery term = NgramsSearchQuery { searchQuery term = NgramsSearchQuery {
_nsq_limit = Limit 5 _nsq_limit = Limit 5
...@@ -255,9 +255,7 @@ testSearchNestedTerms :: Assertion ...@@ -255,9 +255,7 @@ testSearchNestedTerms :: Assertion
testSearchNestedTerms = do testSearchNestedTerms = do
case searchTableNgrams (Versioned 0 hierarchicalTableMap) searchQuery of case searchTableNgrams (Versioned 0 hierarchicalTableMap) searchQuery of
Left (BFE_loop_detected err) -> fail (T.unpack $ renderLoop err) Left (BFE_loop_detected err) -> fail (T.unpack $ renderLoop err)
Right res -> Right res -> res ^. vc_data `shouldSatisfy` (elem "ford" . map _ne_ngrams . getNgramsTable)
-- it should appear at the top level or as one of the children.
res ^. vc_data `shouldSatisfy` (any (containsTerm "ford") . getNgramsTable)
where where
searchQuery = NgramsSearchQuery { searchQuery = NgramsSearchQuery {
_nsq_limit = Limit 5 _nsq_limit = Limit 5
...@@ -269,11 +267,6 @@ testSearchNestedTerms = do ...@@ -269,11 +267,6 @@ testSearchNestedTerms = do
, _nsq_searchQuery = mockQueryFn (Just "ford") , _nsq_searchQuery = mockQueryFn (Just "ford")
} }
-- | Returns True if the input 'NgramsElement' contains (either in the root or in the children)
-- the input term.
containsTerm :: NgramsTerm -> NgramsElement -> Bool
containsTerm t (NgramsElement{..}) = _ne_ngrams == t || any ((==) t) (mSetToList _ne_children)
-- Pagination tests -- Pagination tests
test_pagination_allTerms :: Assertion test_pagination_allTerms :: Assertion
...@@ -397,7 +390,7 @@ test_paginationQuantum = do ...@@ -397,7 +390,7 @@ test_paginationQuantum = do
Left err -> fail (show err) Left err -> fail (show err)
Right res -> do Right res -> do
let elems = coerce @NgramsTable @[NgramsElement] $ _vc_data res let elems = coerce @NgramsTable @[NgramsElement] $ _vc_data res
countRoots elems @?= 10 length elems @?= 10
forM_ elems $ \term -> forM_ elems $ \term ->
assertBool ("found " <> show (_ne_list term) <> " in: " <> show elems) (_ne_list term == MapTerm) assertBool ("found " <> show (_ne_list term) <> " in: " <> show elems) (_ne_list term == MapTerm)
where where
...@@ -411,20 +404,13 @@ test_paginationQuantum = do ...@@ -411,20 +404,13 @@ test_paginationQuantum = do
, _nsq_searchQuery = mockQueryFn Nothing , _nsq_searchQuery = mockQueryFn Nothing
} }
countRoots :: [NgramsElement] -> Int
countRoots [] = 0
countRoots (x:xs) =
if isNothing (_ne_root x) || isNothing (_ne_parent x)
then 1 + countRoots xs
else countRoots xs
test_paginationQuantum_02 :: Assertion test_paginationQuantum_02 :: Assertion
test_paginationQuantum_02 = do test_paginationQuantum_02 = do
case searchTableNgrams quantumComputingCorpus searchQuery of case searchTableNgrams quantumComputingCorpus searchQuery of
Left err -> fail (show err) Left err -> fail (show err)
Right res -> do Right res -> do
let elems = coerce @NgramsTable @[NgramsElement] $ _vc_data res let elems = coerce @NgramsTable @[NgramsElement] $ _vc_data res
assertBool ("found only " <> show (length elems) <> " in: " <> show elems) (countRoots elems == 10) assertBool ("found only " <> show (length elems) <> " in: " <> show elems) (length elems == 10)
where where
searchQuery = NgramsSearchQuery { searchQuery = NgramsSearchQuery {
_nsq_limit = Limit 10 _nsq_limit = Limit 10
......
...@@ -269,7 +269,7 @@ testBuildNgramsTree_03 = ...@@ -269,7 +269,7 @@ testBuildNgramsTree_03 =
-- /PRECONDITION/: The '_ne_ngrams' field always matches the 'NgramsTerm', key of the map. -- /PRECONDITION/: The '_ne_ngrams' field always matches the 'NgramsTerm', key of the map.
buildDestroyForestRoundtrips :: AcyclicTableMap -> Property buildDestroyForestRoundtrips :: AcyclicTableMap -> Property
buildDestroyForestRoundtrips (AcyclicTableMap mp _) = buildDestroyForestRoundtrips (AcyclicTableMap mp _) =
(Map.fromList . destroyForest . buildForestOrFail $ mp) === mp (destroyForest . buildForestOrFail $ mp) === mp
testPruningNgramsForest_01 :: Property testPruningNgramsForest_01 :: Property
testPruningNgramsForest_01 = testPruningNgramsForest_01 =
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment