Commit 04e23602 authored by Alfredo Di Napoli's avatar Alfredo Di Napoli

Fix subtle bug in searchTableNgrams

This commit fixes a bug (or feature? unclear at this stage) of the
`rootOf` function inside `searchTableNgrams`, which could, in some
occasion, return a root which has a different `ListType`, which
in turn throws off pagination.
parent 75488abc
Pipeline #3984 failed with stage
in 26 minutes and 52 seconds
...@@ -25,6 +25,7 @@ data-files: ...@@ -25,6 +25,7 @@ data-files:
ekg-assets/bootstrap-1.4.0.min.css ekg-assets/bootstrap-1.4.0.min.css
ekg-assets/chart_line_add.png ekg-assets/chart_line_add.png
ekg-assets/cross.png ekg-assets/cross.png
test-data/ngrams/GarganText_NgramsTerms-QuantumComputing.json
library library
exposed-modules: exposed-modules:
...@@ -883,7 +884,9 @@ test-suite garg-test ...@@ -883,7 +884,9 @@ test-suite garg-test
ghc-options: -Wall -threaded -rtsopts -with-rtsopts=-N ghc-options: -Wall -threaded -rtsopts -with-rtsopts=-N
build-depends: build-depends:
QuickCheck QuickCheck
, aeson
, base , base
, bytestring
, containers , containers
, duckling , duckling
, extra , extra
......
...@@ -43,6 +43,7 @@ data-files: ...@@ -43,6 +43,7 @@ data-files:
- ekg-assets/bootstrap-1.4.0.min.css - ekg-assets/bootstrap-1.4.0.min.css
- ekg-assets/chart_line_add.png - ekg-assets/chart_line_add.png
- ekg-assets/cross.png - ekg-assets/cross.png
- test-data/ngrams/GarganText_NgramsTerms-QuantumComputing.json
library: library:
source-dirs: src source-dirs: src
ghc-options: ghc-options:
...@@ -503,7 +504,9 @@ tests: ...@@ -503,7 +504,9 @@ tests:
- -rtsopts - -rtsopts
- -with-rtsopts=-N - -with-rtsopts=-N
dependencies: dependencies:
- aeson
- base - base
- bytestring
- containers - containers
- gargantext - gargantext
- gargantext-prelude - gargantext-prelude
......
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TypeApplications #-}
module Ngrams.Query where module Ngrams.Query where
import Control.Monad
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.API.Ngrams import Gargantext.API.Ngrams
import Gargantext.API.Ngrams.Types import Gargantext.API.Ngrams.Types
import Data.Coerce
import Data.Monoid import Data.Monoid
import qualified Data.Text as T import qualified Data.Text as T
import qualified Data.Map.Strict as Map import qualified Data.Map.Strict as Map
...@@ -43,14 +46,21 @@ unitTests = testGroup "Query tests" ...@@ -43,14 +46,21 @@ unitTests = testGroup "Query tests"
[ -- Sorting [ -- Sorting
testCase "Simple query mockFlatCorpus" testFlat01 testCase "Simple query mockFlatCorpus" testFlat01
, testCase "Simple query (desc sorting)" testFlat02 , testCase "Simple query (desc sorting)" testFlat02
-- Filtering -- -- Filtering
, testCase "Simple query (listType = MapTerm)" testFlat03 , testCase "Simple query (listType = MapTerm)" testFlat03
, testCase "Simple query (listType = StopTerm)" testFlat04 , testCase "Simple query (listType = StopTerm)" testFlat04
-- Full text search -- -- Full text search
, testCase "Simple query (search with match)" testFlat05 , testCase "Simple query (search with match)" testFlat05
-- Pagination -- -- Pagination
, testCase "Simple pagination on all terms" test_pagination_allTerms
, testCase "Simple pagination on MapTerm" test_pagination01 , testCase "Simple pagination on MapTerm" test_pagination01
, testCase "Simple pagination on MapTerm (limit < total terms)" test_pagination02 , testCase "Simple pagination on MapTerm (limit < total terms)" test_pagination02
, testCase "Simple pagination on MapTerm (offset works)" test_pagination02_offset
, testCase "Simple pagination on ListTerm (limit < total terms)" test_pagination03
, testCase "Simple pagination on ListTerm (offset works)" test_pagination03_offset
, testCase "Simple pagination on CandidateTerm (limit < total terms)" test_pagination04
, testCase "paginating QuantumComputing corpus works (MapTerms)" test_paginationQuantum
, testCase "paginating QuantumComputing corpus works (CandidateTerm)" test_paginationQuantum_02
] ]
-- Let's test that if we request elements sorted in -- Let's test that if we request elements sorted in
...@@ -139,6 +149,29 @@ testFlat05 = do ...@@ -139,6 +149,29 @@ testFlat05 = do
-- Pagination tests -- Pagination tests
test_pagination_allTerms :: Assertion
test_pagination_allTerms = do
let res = searchTableNgrams paginationCorpus searchQuery
res @?= VersionedWithCount 0 10 ( NgramsTable [ haskellElem
, sideEffectsElem
, concHaskellElem
, implementationElem
, ooElem
, languagesElem
, javaElem
, termsElem
] )
where
searchQuery = NgramsSearchQuery {
_nsq_limit = Limit 8
, _nsq_offset = Nothing
, _nsq_listType = Nothing
, _nsq_minSize = Nothing
, _nsq_maxSize = Nothing
, _nsq_orderBy = Nothing
, _nsq_searchQuery = mockQueryFn Nothing
}
-- In this test, I'm asking for 5 /map terms/, and as the -- In this test, I'm asking for 5 /map terms/, and as the
-- corpus has only 2, that's what I should get back. -- corpus has only 2, that's what I should get back.
test_pagination01 :: Assertion test_pagination01 :: Assertion
...@@ -170,3 +203,97 @@ test_pagination02 = do ...@@ -170,3 +203,97 @@ test_pagination02 = do
, _nsq_orderBy = Just ScoreDesc , _nsq_orderBy = Just ScoreDesc
, _nsq_searchQuery = mockQueryFn Nothing , _nsq_searchQuery = mockQueryFn Nothing
} }
test_pagination02_offset :: Assertion
test_pagination02_offset = do
let res = searchTableNgrams paginationCorpus searchQuery
res @?= VersionedWithCount 0 4 ( NgramsTable [termsElem, proofElem] )
where
searchQuery = NgramsSearchQuery {
_nsq_limit = Limit 2
, _nsq_offset = Just (Offset 2)
, _nsq_listType = Just MapTerm
, _nsq_minSize = Nothing
, _nsq_maxSize = Nothing
, _nsq_orderBy = Just ScoreDesc
, _nsq_searchQuery = mockQueryFn Nothing
}
test_pagination03 :: Assertion
test_pagination03 = do
let res = searchTableNgrams paginationCorpus searchQuery
res @?= VersionedWithCount 0 4 ( NgramsTable [sideEffectsElem, ooElem, javaElem] )
where
searchQuery = NgramsSearchQuery {
_nsq_limit = Limit 3
, _nsq_offset = Nothing
, _nsq_listType = Just StopTerm
, _nsq_minSize = Nothing
, _nsq_maxSize = Nothing
, _nsq_orderBy = Just ScoreDesc
, _nsq_searchQuery = mockQueryFn Nothing
}
test_pagination03_offset :: Assertion
test_pagination03_offset = do
let res = searchTableNgrams paginationCorpus searchQuery
res @?= VersionedWithCount 0 4 ( NgramsTable [javaElem, pascalElem] )
where
searchQuery = NgramsSearchQuery {
_nsq_limit = Limit 2
, _nsq_offset = Just (Offset 2)
, _nsq_listType = Just StopTerm
, _nsq_minSize = Nothing
, _nsq_maxSize = Nothing
, _nsq_orderBy = Just ScoreDesc
, _nsq_searchQuery = mockQueryFn Nothing
}
test_pagination04 :: Assertion
test_pagination04 = do
let res = searchTableNgrams paginationCorpus searchQuery
res @?= VersionedWithCount 0 2 ( NgramsTable [haskellElem] )
where
searchQuery = NgramsSearchQuery {
_nsq_limit = Limit 1
, _nsq_offset = Nothing
, _nsq_listType = Just CandidateTerm
, _nsq_minSize = Nothing
, _nsq_maxSize = Nothing
, _nsq_orderBy = Just ScoreDesc
, _nsq_searchQuery = mockQueryFn Nothing
}
test_paginationQuantum :: Assertion
test_paginationQuantum = do
let res = searchTableNgrams quantumComputingCorpus searchQuery
let elems = coerce @NgramsTable @[NgramsElement] $ _vc_data res
length elems @?= 10
forM_ elems $ \term ->
assertBool ("found " <> show (_ne_list term) <> " in: " <> show elems) (_ne_list term == MapTerm)
where
searchQuery = NgramsSearchQuery {
_nsq_limit = Limit 10
, _nsq_offset = Nothing
, _nsq_listType = Just MapTerm
, _nsq_minSize = Nothing
, _nsq_maxSize = Nothing
, _nsq_orderBy = Nothing
, _nsq_searchQuery = mockQueryFn Nothing
}
test_paginationQuantum_02 :: Assertion
test_paginationQuantum_02 = do
let res = searchTableNgrams quantumComputingCorpus searchQuery
let elems = coerce @NgramsTable @[NgramsElement] $ _vc_data res
assertBool ("found only " <> show (length elems) <> " in: " <> show elems) (length elems == 10)
where
searchQuery = NgramsSearchQuery {
_nsq_limit = Limit 10
, _nsq_offset = Nothing
, _nsq_listType = Just CandidateTerm
, _nsq_minSize = Nothing
, _nsq_maxSize = Nothing
, _nsq_orderBy = Nothing
, _nsq_searchQuery = mockQueryFn Nothing
}
{-# LANGUAGE ScopedTypeVariables #-}
module Ngrams.Query.PaginationCorpus where module Ngrams.Query.PaginationCorpus where
import Prelude
import Data.Aeson
import Data.Map.Strict (Map) import Data.Map.Strict (Map)
import Gargantext.API.Ngrams import Gargantext.API.Ngrams
import Gargantext.API.Ngrams.Types
import Gargantext.Core.Types.Main import Gargantext.Core.Types.Main
import Gargantext.Database.Admin.Types.Node import Gargantext.Database.Admin.Types.Node
import Gargantext.Prelude import System.IO.Unsafe
import qualified Data.ByteString as B
import qualified Data.Map.Strict as Map import qualified Data.Map.Strict as Map
import qualified Data.Set as Set import qualified Data.Set as Set
import Paths_gargantext
implementationElem :: NgramsElement implementationElem :: NgramsElement
implementationElem = NgramsElement { implementationElem = NgramsElement {
...@@ -76,6 +83,28 @@ ooElem = NgramsElement { ...@@ -76,6 +83,28 @@ ooElem = NgramsElement {
, _ne_children = mSetFromList [ "null pointer exception" ] , _ne_children = mSetFromList [ "null pointer exception" ]
} }
javaElem :: NgramsElement
javaElem = NgramsElement {
_ne_ngrams = NgramsTerm {unNgramsTerm = "java"}
, _ne_size = 1
, _ne_list = StopTerm
, _ne_occurrences = Set.fromList [ NodeId 1, NodeId 2, NodeId 3 ]
, _ne_root = Nothing
, _ne_parent = Nothing
, _ne_children = mSetFromList [ "JVM" ]
}
pascalElem :: NgramsElement
pascalElem = NgramsElement {
_ne_ngrams = NgramsTerm {unNgramsTerm = "pascal"}
, _ne_size = 1
, _ne_list = StopTerm
, _ne_occurrences = Set.fromList [ NodeId 1, NodeId 2 ]
, _ne_root = Nothing
, _ne_parent = Nothing
, _ne_children = mSetFromList [ "turbo", "borland" ]
}
haskellElem :: NgramsElement haskellElem :: NgramsElement
haskellElem = NgramsElement { haskellElem = NgramsElement {
_ne_ngrams = NgramsTerm {unNgramsTerm = "haskell"} _ne_ngrams = NgramsTerm {unNgramsTerm = "haskell"}
...@@ -113,8 +142,20 @@ paginationCorpus = Versioned 0 $ Map.fromList [ ...@@ -113,8 +142,20 @@ paginationCorpus = Versioned 0 $ Map.fromList [
-- Stop terms -- Stop terms
, ("side effects", sideEffectsElem) , ("side effects", sideEffectsElem)
, ("object oriented", ooElem) , ("object oriented", ooElem)
, ("java", javaElem)
, ("pascal", pascalElem)
-- Candidate terms -- Candidate terms
, ("haskell", haskellElem) , ("haskell", haskellElem)
, ("concurrent haskell", concHaskellElem) , ("concurrent haskell", concHaskellElem)
] ]
quantumComputingCorpus :: Versioned (Map NgramsTerm NgramsElement)
quantumComputingCorpus = unsafePerformIO $ do
pth <- getDataFileName "test-data/ngrams/GarganText_NgramsTerms-QuantumComputing.json"
jsonBlob <- B.readFile pth
case eitherDecodeStrict' jsonBlob of
Left err -> error err
Right (Versioned ver (mp :: Map NgramsTerm NgramsRepoElement)) ->
pure $ Versioned ver (Map.mapWithKey (\k -> ngramsElementFromRepo k) mp)
{-# NOINLINE quantumComputingCorpus #-}
...@@ -84,7 +84,7 @@ module Gargantext.API.Ngrams ...@@ -84,7 +84,7 @@ module Gargantext.API.Ngrams
where where
import Control.Concurrent import Control.Concurrent
import Control.Lens ((.~), view, (^.), (^..), (+~), (%~), (.~), msumOf, at, _Just, Each(..), (%%~), mapped, ifolded, to, withIndex) import Control.Lens ((.~), view, (^.), (^..), (+~), (%~), (.~), msumOf, at, _Just, Each(..), (%%~), mapped, ifolded, to, withIndex, over)
import Control.Monad.Reader import Control.Monad.Reader
import Data.Foldable import Data.Foldable
import Data.Map.Strict (Map) import Data.Map.Strict (Map)
...@@ -521,50 +521,66 @@ dumpJsonTableMap fpath nodeId ngramsType = do ...@@ -521,50 +521,66 @@ dumpJsonTableMap fpath nodeId ngramsType = do
-- | Table of Ngrams is a ListNgrams formatted (sorted and/or cut). -- | Table of Ngrams is a ListNgrams formatted (sorted and/or cut).
-- TODO: should take only one ListId -- TODO: should take only one ListId
-- | /pure/ function to query a 'Map NgramsTerm NgramsElement', -- | /pure/ function to query a 'Map NgramsTerm NgramsElement', according to a
-- according to a search function. Returns a /versioned/ 'NgramsTable' -- search function. Returns a /versioned/ 'NgramsTable' which is paginated and
-- which is paginated and sorted according to the input -- sorted according to the input 'NgramsSearchQuery', together with the
-- 'NgramsSearchQuery', together with the occurrences of the -- occurrences of the elements.
-- elements.
searchTableNgrams :: Versioned (Map NgramsTerm NgramsElement) searchTableNgrams :: Versioned (Map NgramsTerm NgramsElement)
-> NgramsSearchQuery -> NgramsSearchQuery
-- ^ The search query on the retrieved data -- ^ The search query on the retrieved data
-> VersionedWithCount NgramsTable -> VersionedWithCount NgramsTable
searchTableNgrams versionedTableMap NgramsSearchQuery{..} = searchTableNgrams versionedTableMap NgramsSearchQuery{..} =
-- lIds <- selectNodesWithUsername NodeList userMaster let tableMap = versionedTableMap ^. v_data
let filteredData = filterNodes tableMap
offset' = getOffset $ maybe 0 identity _nsq_offset tableMapSorted = versionedTableMap
listType' = maybe (const True) (==) _nsq_listType & v_data .~ (NgramsTable . sortAndPaginate . withInners tableMap $ filteredData)
minSize' = maybe (const True) (<=) (getMinSize <$> _nsq_minSize)
maxSize' = maybe (const True) (>=) (getMaxSize <$> _nsq_maxSize) in toVersionedWithCount (Set.size filteredData) tableMapSorted
where
rootOf tblMap ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root")
(tblMap ^. at r)
)
(ne ^. ne_root)
selected_node n = minSize' s
&& maxSize' s
&& _nsq_searchQuery (n ^. ne_ngrams)
&& listType' (n ^. ne_list)
where
s = n ^. ne_size
--------------------------------------- -- | Returns the \"root\" of the 'NgramsElement', or it falls back to the input
-- 'NgramsElement' itself, if no root can be found.
-- /CAREFUL/: The root we select might /not/ have the same 'listType' we are
-- filtering for, in which case we have to change its type to match, if needed.
rootOf :: Map NgramsTerm NgramsElement -> NgramsElement -> NgramsElement
rootOf tblMap ne = case ne ^. ne_root of
Nothing -> ne
Just rootKey
| Just r <- tblMap ^. at rootKey
-- NOTE(adinapoli) It's unclear what is the correct behaviour here: should
-- we override the type or we filter out the node altogether?
-> over ne_list (\oldList -> fromMaybe oldList _nsq_listType) r
| otherwise
-> ne
-- | Returns 'True' if the input 'NgramsElement' satisfies the search criteria
-- mandated by 'NgramsSearchQuery'.
matchingNode :: NgramsElement -> Bool
matchingNode inputNode =
let nodeSize = inputNode ^. ne_size
matchesListType = maybe (const True) (==) _nsq_listType
respectsMinSize = maybe (const True) (<=) (getMinSize <$> _nsq_minSize)
respectsMaxSize = maybe (const True) (>=) (getMaxSize <$> _nsq_maxSize)
in respectsMinSize nodeSize
&& respectsMaxSize nodeSize
&& _nsq_searchQuery (inputNode ^. ne_ngrams)
&& matchesListType (inputNode ^. ne_list)
sortOnOrder :: Maybe OrderBy -> ([NgramsElement] -> [NgramsElement])
sortOnOrder Nothing = sortOnOrder (Just ScoreDesc) sortOnOrder Nothing = sortOnOrder (Just ScoreDesc)
sortOnOrder (Just TermAsc) = List.sortOn $ view ne_ngrams sortOnOrder (Just TermAsc) = List.sortOn $ view ne_ngrams
sortOnOrder (Just TermDesc) = List.sortOn $ Down . view ne_ngrams sortOnOrder (Just TermDesc) = List.sortOn $ Down . view ne_ngrams
sortOnOrder (Just ScoreAsc) = List.sortOn $ view (ne_occurrences . to Set.size) sortOnOrder (Just ScoreAsc) = List.sortOn $ view (ne_occurrences . to Set.size)
sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view (ne_occurrences . to Set.size) sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view (ne_occurrences . to Set.size)
--------------------------------------- -- | Filters the given `tableMap` with the search criteria. It returns
-- | Filter the given `tableMap` with the search criteria. -- a set of 'NgramsElement' all matching the input 'NGramsSearchQuery'.
filteredNodes :: Map NgramsTerm NgramsElement -> Set NgramsElement filterNodes :: Map NgramsTerm NgramsElement -> Set NgramsElement
filteredNodes tblMap = roots filterNodes tblMap = Set.map (rootOf tblMap) selectedNodes
where where
list = Set.fromList $ Map.elems tblMap allNodes = Set.fromList $ Map.elems tblMap
selected_nodes = list & Set.filter selected_node selectedNodes = Set.filter matchingNode allNodes
roots = Set.map (rootOf tblMap) selected_nodes
-- | For each input root, extends its occurrence count with -- | For each input root, extends its occurrence count with
-- the information found in the subitems. -- the information found in the subitems.
...@@ -582,22 +598,14 @@ searchTableNgrams versionedTableMap NgramsSearchQuery{..} = ...@@ -582,22 +598,14 @@ searchTableNgrams versionedTableMap NgramsSearchQuery{..} =
-- | Paginate the results -- | Paginate the results
sortAndPaginate :: Set NgramsElement -> [NgramsElement] sortAndPaginate :: Set NgramsElement -> [NgramsElement]
sortAndPaginate = take (getLimit _nsq_limit) sortAndPaginate xs =
. drop offset' let offset' = getOffset $ maybe 0 identity _nsq_offset
. sortOnOrder _nsq_orderBy in take (getLimit _nsq_limit)
. Set.toList . drop offset'
. sortOnOrder _nsq_orderBy
--------------------------------------- . Set.toList
$ xs
tableMap = versionedTableMap ^. v_data
filteredData = filteredNodes tableMap
fltrCount = Set.size filteredData
tableMapSorted = versionedTableMap
& v_data .~ (NgramsTable . sortAndPaginate . withInners tableMap $ filteredData)
in toVersionedWithCount fltrCount tableMapSorted
getTableNgrams :: forall env err m. getTableNgrams :: forall env err m.
(HasNodeStory env err m, HasNodeError err, CmdCommon env) (HasNodeStory env err m, HasNodeError err, CmdCommon env)
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment