Commit 04e23602 authored by Alfredo Di Napoli's avatar Alfredo Di Napoli

Fix subtle bug in searchTableNgrams

This commit fixes a bug (or feature? unclear at this stage) of the
`rootOf` function inside `searchTableNgrams`, which could, in some
occasion, return a root which has a different `ListType`, which
in turn throws off pagination.
parent 75488abc
Pipeline #3984 failed with stage
in 26 minutes and 52 seconds
......@@ -25,6 +25,7 @@ data-files:
ekg-assets/bootstrap-1.4.0.min.css
ekg-assets/chart_line_add.png
ekg-assets/cross.png
test-data/ngrams/GarganText_NgramsTerms-QuantumComputing.json
library
exposed-modules:
......@@ -883,7 +884,9 @@ test-suite garg-test
ghc-options: -Wall -threaded -rtsopts -with-rtsopts=-N
build-depends:
QuickCheck
, aeson
, base
, bytestring
, containers
, duckling
, extra
......
......@@ -43,6 +43,7 @@ data-files:
- ekg-assets/bootstrap-1.4.0.min.css
- ekg-assets/chart_line_add.png
- ekg-assets/cross.png
- test-data/ngrams/GarganText_NgramsTerms-QuantumComputing.json
library:
source-dirs: src
ghc-options:
......@@ -503,7 +504,9 @@ tests:
- -rtsopts
- -with-rtsopts=-N
dependencies:
- aeson
- base
- bytestring
- containers
- gargantext
- gargantext-prelude
......
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TypeApplications #-}
module Ngrams.Query where
import Control.Monad
import Gargantext.Prelude
import Gargantext.API.Ngrams
import Gargantext.API.Ngrams.Types
import Data.Coerce
import Data.Monoid
import qualified Data.Text as T
import qualified Data.Map.Strict as Map
......@@ -43,14 +46,21 @@ unitTests = testGroup "Query tests"
[ -- Sorting
testCase "Simple query mockFlatCorpus" testFlat01
, testCase "Simple query (desc sorting)" testFlat02
-- Filtering
-- -- Filtering
, testCase "Simple query (listType = MapTerm)" testFlat03
, testCase "Simple query (listType = StopTerm)" testFlat04
-- Full text search
-- -- Full text search
, testCase "Simple query (search with match)" testFlat05
-- Pagination
-- -- Pagination
, testCase "Simple pagination on all terms" test_pagination_allTerms
, testCase "Simple pagination on MapTerm" test_pagination01
, testCase "Simple pagination on MapTerm (limit < total terms)" test_pagination02
, testCase "Simple pagination on MapTerm (limit < total terms)" test_pagination02
, testCase "Simple pagination on MapTerm (offset works)" test_pagination02_offset
, testCase "Simple pagination on ListTerm (limit < total terms)" test_pagination03
, testCase "Simple pagination on ListTerm (offset works)" test_pagination03_offset
, testCase "Simple pagination on CandidateTerm (limit < total terms)" test_pagination04
, testCase "paginating QuantumComputing corpus works (MapTerms)" test_paginationQuantum
, testCase "paginating QuantumComputing corpus works (CandidateTerm)" test_paginationQuantum_02
]
-- Let's test that if we request elements sorted in
......@@ -139,6 +149,29 @@ testFlat05 = do
-- Pagination tests
test_pagination_allTerms :: Assertion
test_pagination_allTerms = do
let res = searchTableNgrams paginationCorpus searchQuery
res @?= VersionedWithCount 0 10 ( NgramsTable [ haskellElem
, sideEffectsElem
, concHaskellElem
, implementationElem
, ooElem
, languagesElem
, javaElem
, termsElem
] )
where
searchQuery = NgramsSearchQuery {
_nsq_limit = Limit 8
, _nsq_offset = Nothing
, _nsq_listType = Nothing
, _nsq_minSize = Nothing
, _nsq_maxSize = Nothing
, _nsq_orderBy = Nothing
, _nsq_searchQuery = mockQueryFn Nothing
}
-- In this test, I'm asking for 5 /map terms/, and as the
-- corpus has only 2, that's what I should get back.
test_pagination01 :: Assertion
......@@ -170,3 +203,97 @@ test_pagination02 = do
, _nsq_orderBy = Just ScoreDesc
, _nsq_searchQuery = mockQueryFn Nothing
}
test_pagination02_offset :: Assertion
test_pagination02_offset = do
let res = searchTableNgrams paginationCorpus searchQuery
res @?= VersionedWithCount 0 4 ( NgramsTable [termsElem, proofElem] )
where
searchQuery = NgramsSearchQuery {
_nsq_limit = Limit 2
, _nsq_offset = Just (Offset 2)
, _nsq_listType = Just MapTerm
, _nsq_minSize = Nothing
, _nsq_maxSize = Nothing
, _nsq_orderBy = Just ScoreDesc
, _nsq_searchQuery = mockQueryFn Nothing
}
test_pagination03 :: Assertion
test_pagination03 = do
let res = searchTableNgrams paginationCorpus searchQuery
res @?= VersionedWithCount 0 4 ( NgramsTable [sideEffectsElem, ooElem, javaElem] )
where
searchQuery = NgramsSearchQuery {
_nsq_limit = Limit 3
, _nsq_offset = Nothing
, _nsq_listType = Just StopTerm
, _nsq_minSize = Nothing
, _nsq_maxSize = Nothing
, _nsq_orderBy = Just ScoreDesc
, _nsq_searchQuery = mockQueryFn Nothing
}
test_pagination03_offset :: Assertion
test_pagination03_offset = do
let res = searchTableNgrams paginationCorpus searchQuery
res @?= VersionedWithCount 0 4 ( NgramsTable [javaElem, pascalElem] )
where
searchQuery = NgramsSearchQuery {
_nsq_limit = Limit 2
, _nsq_offset = Just (Offset 2)
, _nsq_listType = Just StopTerm
, _nsq_minSize = Nothing
, _nsq_maxSize = Nothing
, _nsq_orderBy = Just ScoreDesc
, _nsq_searchQuery = mockQueryFn Nothing
}
test_pagination04 :: Assertion
test_pagination04 = do
let res = searchTableNgrams paginationCorpus searchQuery
res @?= VersionedWithCount 0 2 ( NgramsTable [haskellElem] )
where
searchQuery = NgramsSearchQuery {
_nsq_limit = Limit 1
, _nsq_offset = Nothing
, _nsq_listType = Just CandidateTerm
, _nsq_minSize = Nothing
, _nsq_maxSize = Nothing
, _nsq_orderBy = Just ScoreDesc
, _nsq_searchQuery = mockQueryFn Nothing
}
test_paginationQuantum :: Assertion
test_paginationQuantum = do
let res = searchTableNgrams quantumComputingCorpus searchQuery
let elems = coerce @NgramsTable @[NgramsElement] $ _vc_data res
length elems @?= 10
forM_ elems $ \term ->
assertBool ("found " <> show (_ne_list term) <> " in: " <> show elems) (_ne_list term == MapTerm)
where
searchQuery = NgramsSearchQuery {
_nsq_limit = Limit 10
, _nsq_offset = Nothing
, _nsq_listType = Just MapTerm
, _nsq_minSize = Nothing
, _nsq_maxSize = Nothing
, _nsq_orderBy = Nothing
, _nsq_searchQuery = mockQueryFn Nothing
}
test_paginationQuantum_02 :: Assertion
test_paginationQuantum_02 = do
let res = searchTableNgrams quantumComputingCorpus searchQuery
let elems = coerce @NgramsTable @[NgramsElement] $ _vc_data res
assertBool ("found only " <> show (length elems) <> " in: " <> show elems) (length elems == 10)
where
searchQuery = NgramsSearchQuery {
_nsq_limit = Limit 10
, _nsq_offset = Nothing
, _nsq_listType = Just CandidateTerm
, _nsq_minSize = Nothing
, _nsq_maxSize = Nothing
, _nsq_orderBy = Nothing
, _nsq_searchQuery = mockQueryFn Nothing
}
{-# LANGUAGE ScopedTypeVariables #-}
module Ngrams.Query.PaginationCorpus where
import Prelude
import Data.Aeson
import Data.Map.Strict (Map)
import Gargantext.API.Ngrams
import Gargantext.API.Ngrams.Types
import Gargantext.Core.Types.Main
import Gargantext.Database.Admin.Types.Node
import Gargantext.Prelude
import System.IO.Unsafe
import qualified Data.ByteString as B
import qualified Data.Map.Strict as Map
import qualified Data.Set as Set
import Paths_gargantext
implementationElem :: NgramsElement
implementationElem = NgramsElement {
......@@ -76,6 +83,28 @@ ooElem = NgramsElement {
, _ne_children = mSetFromList [ "null pointer exception" ]
}
javaElem :: NgramsElement
javaElem = NgramsElement {
_ne_ngrams = NgramsTerm {unNgramsTerm = "java"}
, _ne_size = 1
, _ne_list = StopTerm
, _ne_occurrences = Set.fromList [ NodeId 1, NodeId 2, NodeId 3 ]
, _ne_root = Nothing
, _ne_parent = Nothing
, _ne_children = mSetFromList [ "JVM" ]
}
pascalElem :: NgramsElement
pascalElem = NgramsElement {
_ne_ngrams = NgramsTerm {unNgramsTerm = "pascal"}
, _ne_size = 1
, _ne_list = StopTerm
, _ne_occurrences = Set.fromList [ NodeId 1, NodeId 2 ]
, _ne_root = Nothing
, _ne_parent = Nothing
, _ne_children = mSetFromList [ "turbo", "borland" ]
}
haskellElem :: NgramsElement
haskellElem = NgramsElement {
_ne_ngrams = NgramsTerm {unNgramsTerm = "haskell"}
......@@ -113,8 +142,20 @@ paginationCorpus = Versioned 0 $ Map.fromList [
-- Stop terms
, ("side effects", sideEffectsElem)
, ("object oriented", ooElem)
, ("java", javaElem)
, ("pascal", pascalElem)
-- Candidate terms
, ("haskell", haskellElem)
, ("concurrent haskell", concHaskellElem)
]
quantumComputingCorpus :: Versioned (Map NgramsTerm NgramsElement)
quantumComputingCorpus = unsafePerformIO $ do
pth <- getDataFileName "test-data/ngrams/GarganText_NgramsTerms-QuantumComputing.json"
jsonBlob <- B.readFile pth
case eitherDecodeStrict' jsonBlob of
Left err -> error err
Right (Versioned ver (mp :: Map NgramsTerm NgramsRepoElement)) ->
pure $ Versioned ver (Map.mapWithKey (\k -> ngramsElementFromRepo k) mp)
{-# NOINLINE quantumComputingCorpus #-}
......@@ -84,7 +84,7 @@ module Gargantext.API.Ngrams
where
import Control.Concurrent
import Control.Lens ((.~), view, (^.), (^..), (+~), (%~), (.~), msumOf, at, _Just, Each(..), (%%~), mapped, ifolded, to, withIndex)
import Control.Lens ((.~), view, (^.), (^..), (+~), (%~), (.~), msumOf, at, _Just, Each(..), (%%~), mapped, ifolded, to, withIndex, over)
import Control.Monad.Reader
import Data.Foldable
import Data.Map.Strict (Map)
......@@ -521,50 +521,66 @@ dumpJsonTableMap fpath nodeId ngramsType = do
-- | Table of Ngrams is a ListNgrams formatted (sorted and/or cut).
-- TODO: should take only one ListId
-- | /pure/ function to query a 'Map NgramsTerm NgramsElement',
-- according to a search function. Returns a /versioned/ 'NgramsTable'
-- which is paginated and sorted according to the input
-- 'NgramsSearchQuery', together with the occurrences of the
-- elements.
-- | /pure/ function to query a 'Map NgramsTerm NgramsElement', according to a
-- search function. Returns a /versioned/ 'NgramsTable' which is paginated and
-- sorted according to the input 'NgramsSearchQuery', together with the
-- occurrences of the elements.
searchTableNgrams :: Versioned (Map NgramsTerm NgramsElement)
-> NgramsSearchQuery
-- ^ The search query on the retrieved data
-> VersionedWithCount NgramsTable
searchTableNgrams versionedTableMap NgramsSearchQuery{..} =
-- lIds <- selectNodesWithUsername NodeList userMaster
let
offset' = getOffset $ maybe 0 identity _nsq_offset
listType' = maybe (const True) (==) _nsq_listType
minSize' = maybe (const True) (<=) (getMinSize <$> _nsq_minSize)
maxSize' = maybe (const True) (>=) (getMaxSize <$> _nsq_maxSize)
rootOf tblMap ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root")
(tblMap ^. at r)
)
(ne ^. ne_root)
selected_node n = minSize' s
&& maxSize' s
&& _nsq_searchQuery (n ^. ne_ngrams)
&& listType' (n ^. ne_list)
where
s = n ^. ne_size
let tableMap = versionedTableMap ^. v_data
filteredData = filterNodes tableMap
tableMapSorted = versionedTableMap
& v_data .~ (NgramsTable . sortAndPaginate . withInners tableMap $ filteredData)
in toVersionedWithCount (Set.size filteredData) tableMapSorted
where
---------------------------------------
-- | Returns the \"root\" of the 'NgramsElement', or it falls back to the input
-- 'NgramsElement' itself, if no root can be found.
-- /CAREFUL/: The root we select might /not/ have the same 'listType' we are
-- filtering for, in which case we have to change its type to match, if needed.
rootOf :: Map NgramsTerm NgramsElement -> NgramsElement -> NgramsElement
rootOf tblMap ne = case ne ^. ne_root of
Nothing -> ne
Just rootKey
| Just r <- tblMap ^. at rootKey
-- NOTE(adinapoli) It's unclear what is the correct behaviour here: should
-- we override the type or we filter out the node altogether?
-> over ne_list (\oldList -> fromMaybe oldList _nsq_listType) r
| otherwise
-> ne
-- | Returns 'True' if the input 'NgramsElement' satisfies the search criteria
-- mandated by 'NgramsSearchQuery'.
matchingNode :: NgramsElement -> Bool
matchingNode inputNode =
let nodeSize = inputNode ^. ne_size
matchesListType = maybe (const True) (==) _nsq_listType
respectsMinSize = maybe (const True) (<=) (getMinSize <$> _nsq_minSize)
respectsMaxSize = maybe (const True) (>=) (getMaxSize <$> _nsq_maxSize)
in respectsMinSize nodeSize
&& respectsMaxSize nodeSize
&& _nsq_searchQuery (inputNode ^. ne_ngrams)
&& matchesListType (inputNode ^. ne_list)
sortOnOrder :: Maybe OrderBy -> ([NgramsElement] -> [NgramsElement])
sortOnOrder Nothing = sortOnOrder (Just ScoreDesc)
sortOnOrder (Just TermAsc) = List.sortOn $ view ne_ngrams
sortOnOrder (Just TermDesc) = List.sortOn $ Down . view ne_ngrams
sortOnOrder (Just ScoreAsc) = List.sortOn $ view (ne_occurrences . to Set.size)
sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view (ne_occurrences . to Set.size)
---------------------------------------
-- | Filter the given `tableMap` with the search criteria.
filteredNodes :: Map NgramsTerm NgramsElement -> Set NgramsElement
filteredNodes tblMap = roots
-- | Filters the given `tableMap` with the search criteria. It returns
-- a set of 'NgramsElement' all matching the input 'NGramsSearchQuery'.
filterNodes :: Map NgramsTerm NgramsElement -> Set NgramsElement
filterNodes tblMap = Set.map (rootOf tblMap) selectedNodes
where
list = Set.fromList $ Map.elems tblMap
selected_nodes = list & Set.filter selected_node
roots = Set.map (rootOf tblMap) selected_nodes
allNodes = Set.fromList $ Map.elems tblMap
selectedNodes = Set.filter matchingNode allNodes
-- | For each input root, extends its occurrence count with
-- the information found in the subitems.
......@@ -582,22 +598,14 @@ searchTableNgrams versionedTableMap NgramsSearchQuery{..} =
-- | Paginate the results
sortAndPaginate :: Set NgramsElement -> [NgramsElement]
sortAndPaginate = take (getLimit _nsq_limit)
. drop offset'
. sortOnOrder _nsq_orderBy
. Set.toList
---------------------------------------
tableMap = versionedTableMap ^. v_data
filteredData = filteredNodes tableMap
fltrCount = Set.size filteredData
tableMapSorted = versionedTableMap
& v_data .~ (NgramsTable . sortAndPaginate . withInners tableMap $ filteredData)
sortAndPaginate xs =
let offset' = getOffset $ maybe 0 identity _nsq_offset
in take (getLimit _nsq_limit)
. drop offset'
. sortOnOrder _nsq_orderBy
. Set.toList
$ xs
in toVersionedWithCount fltrCount tableMapSorted
getTableNgrams :: forall env err m.
(HasNodeStory env err m, HasNodeError err, CmdCommon env)
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment