Commit 04e23602 authored by Alfredo Di Napoli's avatar Alfredo Di Napoli

Fix subtle bug in searchTableNgrams

This commit fixes a bug (or feature? unclear at this stage) of the
`rootOf` function inside `searchTableNgrams`, which could, in some
occasion, return a root which has a different `ListType`, which
in turn throws off pagination.
parent 75488abc
Pipeline #3984 failed with stage
in 26 minutes and 52 seconds
......@@ -25,6 +25,7 @@ data-files:
ekg-assets/bootstrap-1.4.0.min.css
ekg-assets/chart_line_add.png
ekg-assets/cross.png
test-data/ngrams/GarganText_NgramsTerms-QuantumComputing.json
library
exposed-modules:
......@@ -883,7 +884,9 @@ test-suite garg-test
ghc-options: -Wall -threaded -rtsopts -with-rtsopts=-N
build-depends:
QuickCheck
, aeson
, base
, bytestring
, containers
, duckling
, extra
......
......@@ -43,6 +43,7 @@ data-files:
- ekg-assets/bootstrap-1.4.0.min.css
- ekg-assets/chart_line_add.png
- ekg-assets/cross.png
- test-data/ngrams/GarganText_NgramsTerms-QuantumComputing.json
library:
source-dirs: src
ghc-options:
......@@ -503,7 +504,9 @@ tests:
- -rtsopts
- -with-rtsopts=-N
dependencies:
- aeson
- base
- bytestring
- containers
- gargantext
- gargantext-prelude
......
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TypeApplications #-}
module Ngrams.Query where
import Control.Monad
import Gargantext.Prelude
import Gargantext.API.Ngrams
import Gargantext.API.Ngrams.Types
import Data.Coerce
import Data.Monoid
import qualified Data.Text as T
import qualified Data.Map.Strict as Map
......@@ -43,14 +46,21 @@ unitTests = testGroup "Query tests"
[ -- Sorting
testCase "Simple query mockFlatCorpus" testFlat01
, testCase "Simple query (desc sorting)" testFlat02
-- Filtering
-- -- Filtering
, testCase "Simple query (listType = MapTerm)" testFlat03
, testCase "Simple query (listType = StopTerm)" testFlat04
-- Full text search
-- -- Full text search
, testCase "Simple query (search with match)" testFlat05
-- Pagination
-- -- Pagination
, testCase "Simple pagination on all terms" test_pagination_allTerms
, testCase "Simple pagination on MapTerm" test_pagination01
, testCase "Simple pagination on MapTerm (limit < total terms)" test_pagination02
, testCase "Simple pagination on MapTerm (limit < total terms)" test_pagination02
, testCase "Simple pagination on MapTerm (offset works)" test_pagination02_offset
, testCase "Simple pagination on ListTerm (limit < total terms)" test_pagination03
, testCase "Simple pagination on ListTerm (offset works)" test_pagination03_offset
, testCase "Simple pagination on CandidateTerm (limit < total terms)" test_pagination04
, testCase "paginating QuantumComputing corpus works (MapTerms)" test_paginationQuantum
, testCase "paginating QuantumComputing corpus works (CandidateTerm)" test_paginationQuantum_02
]
-- Let's test that if we request elements sorted in
......@@ -139,6 +149,29 @@ testFlat05 = do
-- Pagination tests
test_pagination_allTerms :: Assertion
test_pagination_allTerms = do
let res = searchTableNgrams paginationCorpus searchQuery
res @?= VersionedWithCount 0 10 ( NgramsTable [ haskellElem
, sideEffectsElem
, concHaskellElem
, implementationElem
, ooElem
, languagesElem
, javaElem
, termsElem
] )
where
searchQuery = NgramsSearchQuery {
_nsq_limit = Limit 8
, _nsq_offset = Nothing
, _nsq_listType = Nothing
, _nsq_minSize = Nothing
, _nsq_maxSize = Nothing
, _nsq_orderBy = Nothing
, _nsq_searchQuery = mockQueryFn Nothing
}
-- In this test, I'm asking for 5 /map terms/, and as the
-- corpus has only 2, that's what I should get back.
test_pagination01 :: Assertion
......@@ -170,3 +203,97 @@ test_pagination02 = do
, _nsq_orderBy = Just ScoreDesc
, _nsq_searchQuery = mockQueryFn Nothing
}
test_pagination02_offset :: Assertion
test_pagination02_offset = do
let res = searchTableNgrams paginationCorpus searchQuery
res @?= VersionedWithCount 0 4 ( NgramsTable [termsElem, proofElem] )
where
searchQuery = NgramsSearchQuery {
_nsq_limit = Limit 2
, _nsq_offset = Just (Offset 2)
, _nsq_listType = Just MapTerm
, _nsq_minSize = Nothing
, _nsq_maxSize = Nothing
, _nsq_orderBy = Just ScoreDesc
, _nsq_searchQuery = mockQueryFn Nothing
}
test_pagination03 :: Assertion
test_pagination03 = do
let res = searchTableNgrams paginationCorpus searchQuery
res @?= VersionedWithCount 0 4 ( NgramsTable [sideEffectsElem, ooElem, javaElem] )
where
searchQuery = NgramsSearchQuery {
_nsq_limit = Limit 3
, _nsq_offset = Nothing
, _nsq_listType = Just StopTerm
, _nsq_minSize = Nothing
, _nsq_maxSize = Nothing
, _nsq_orderBy = Just ScoreDesc
, _nsq_searchQuery = mockQueryFn Nothing
}
test_pagination03_offset :: Assertion
test_pagination03_offset = do
let res = searchTableNgrams paginationCorpus searchQuery
res @?= VersionedWithCount 0 4 ( NgramsTable [javaElem, pascalElem] )
where
searchQuery = NgramsSearchQuery {
_nsq_limit = Limit 2
, _nsq_offset = Just (Offset 2)
, _nsq_listType = Just StopTerm
, _nsq_minSize = Nothing
, _nsq_maxSize = Nothing
, _nsq_orderBy = Just ScoreDesc
, _nsq_searchQuery = mockQueryFn Nothing
}
test_pagination04 :: Assertion
test_pagination04 = do
let res = searchTableNgrams paginationCorpus searchQuery
res @?= VersionedWithCount 0 2 ( NgramsTable [haskellElem] )
where
searchQuery = NgramsSearchQuery {
_nsq_limit = Limit 1
, _nsq_offset = Nothing
, _nsq_listType = Just CandidateTerm
, _nsq_minSize = Nothing
, _nsq_maxSize = Nothing
, _nsq_orderBy = Just ScoreDesc
, _nsq_searchQuery = mockQueryFn Nothing
}
test_paginationQuantum :: Assertion
test_paginationQuantum = do
let res = searchTableNgrams quantumComputingCorpus searchQuery
let elems = coerce @NgramsTable @[NgramsElement] $ _vc_data res
length elems @?= 10
forM_ elems $ \term ->
assertBool ("found " <> show (_ne_list term) <> " in: " <> show elems) (_ne_list term == MapTerm)
where
searchQuery = NgramsSearchQuery {
_nsq_limit = Limit 10
, _nsq_offset = Nothing
, _nsq_listType = Just MapTerm
, _nsq_minSize = Nothing
, _nsq_maxSize = Nothing
, _nsq_orderBy = Nothing
, _nsq_searchQuery = mockQueryFn Nothing
}
test_paginationQuantum_02 :: Assertion
test_paginationQuantum_02 = do
let res = searchTableNgrams quantumComputingCorpus searchQuery
let elems = coerce @NgramsTable @[NgramsElement] $ _vc_data res
assertBool ("found only " <> show (length elems) <> " in: " <> show elems) (length elems == 10)
where
searchQuery = NgramsSearchQuery {
_nsq_limit = Limit 10
, _nsq_offset = Nothing
, _nsq_listType = Just CandidateTerm
, _nsq_minSize = Nothing
, _nsq_maxSize = Nothing
, _nsq_orderBy = Nothing
, _nsq_searchQuery = mockQueryFn Nothing
}
{-# LANGUAGE ScopedTypeVariables #-}
module Ngrams.Query.PaginationCorpus where
import Prelude
import Data.Aeson
import Data.Map.Strict (Map)
import Gargantext.API.Ngrams
import Gargantext.API.Ngrams.Types
import Gargantext.Core.Types.Main
import Gargantext.Database.Admin.Types.Node
import Gargantext.Prelude
import System.IO.Unsafe
import qualified Data.ByteString as B
import qualified Data.Map.Strict as Map
import qualified Data.Set as Set
import Paths_gargantext
implementationElem :: NgramsElement
implementationElem = NgramsElement {
......@@ -76,6 +83,28 @@ ooElem = NgramsElement {
, _ne_children = mSetFromList [ "null pointer exception" ]
}
javaElem :: NgramsElement
javaElem = NgramsElement {
_ne_ngrams = NgramsTerm {unNgramsTerm = "java"}
, _ne_size = 1
, _ne_list = StopTerm
, _ne_occurrences = Set.fromList [ NodeId 1, NodeId 2, NodeId 3 ]
, _ne_root = Nothing
, _ne_parent = Nothing
, _ne_children = mSetFromList [ "JVM" ]
}
pascalElem :: NgramsElement
pascalElem = NgramsElement {
_ne_ngrams = NgramsTerm {unNgramsTerm = "pascal"}
, _ne_size = 1
, _ne_list = StopTerm
, _ne_occurrences = Set.fromList [ NodeId 1, NodeId 2 ]
, _ne_root = Nothing
, _ne_parent = Nothing
, _ne_children = mSetFromList [ "turbo", "borland" ]
}
haskellElem :: NgramsElement
haskellElem = NgramsElement {
_ne_ngrams = NgramsTerm {unNgramsTerm = "haskell"}
......@@ -113,8 +142,20 @@ paginationCorpus = Versioned 0 $ Map.fromList [
-- Stop terms
, ("side effects", sideEffectsElem)
, ("object oriented", ooElem)
, ("java", javaElem)
, ("pascal", pascalElem)
-- Candidate terms
, ("haskell", haskellElem)
, ("concurrent haskell", concHaskellElem)
]
quantumComputingCorpus :: Versioned (Map NgramsTerm NgramsElement)
quantumComputingCorpus = unsafePerformIO $ do
pth <- getDataFileName "test-data/ngrams/GarganText_NgramsTerms-QuantumComputing.json"
jsonBlob <- B.readFile pth
case eitherDecodeStrict' jsonBlob of
Left err -> error err
Right (Versioned ver (mp :: Map NgramsTerm NgramsRepoElement)) ->
pure $ Versioned ver (Map.mapWithKey (\k -> ngramsElementFromRepo k) mp)
{-# NOINLINE quantumComputingCorpus #-}
......@@ -84,7 +84,7 @@ module Gargantext.API.Ngrams
where
import Control.Concurrent
import Control.Lens ((.~), view, (^.), (^..), (+~), (%~), (.~), msumOf, at, _Just, Each(..), (%%~), mapped, ifolded, to, withIndex)
import Control.Lens ((.~), view, (^.), (^..), (+~), (%~), (.~), msumOf, at, _Just, Each(..), (%%~), mapped, ifolded, to, withIndex, over)
import Control.Monad.Reader
import Data.Foldable
import Data.Map.Strict (Map)
......@@ -521,50 +521,66 @@ dumpJsonTableMap fpath nodeId ngramsType = do
-- | Table of Ngrams is a ListNgrams formatted (sorted and/or cut).
-- TODO: should take only one ListId
-- | /pure/ function to query a 'Map NgramsTerm NgramsElement',
-- according to a search function. Returns a /versioned/ 'NgramsTable'
-- which is paginated and sorted according to the input
-- 'NgramsSearchQuery', together with the occurrences of the
-- elements.
-- | /pure/ function to query a 'Map NgramsTerm NgramsElement', according to a
-- search function. Returns a /versioned/ 'NgramsTable' which is paginated and
-- sorted according to the input 'NgramsSearchQuery', together with the
-- occurrences of the elements.
searchTableNgrams :: Versioned (Map NgramsTerm NgramsElement)
-> NgramsSearchQuery
-- ^ The search query on the retrieved data
-> VersionedWithCount NgramsTable
searchTableNgrams versionedTableMap NgramsSearchQuery{..} =
-- lIds <- selectNodesWithUsername NodeList userMaster
let
offset' = getOffset $ maybe 0 identity _nsq_offset
listType' = maybe (const True) (==) _nsq_listType
minSize' = maybe (const True) (<=) (getMinSize <$> _nsq_minSize)
maxSize' = maybe (const True) (>=) (getMaxSize <$> _nsq_maxSize)
rootOf tblMap ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root")
(tblMap ^. at r)
)
(ne ^. ne_root)
selected_node n = minSize' s
&& maxSize' s
&& _nsq_searchQuery (n ^. ne_ngrams)
&& listType' (n ^. ne_list)
where
s = n ^. ne_size
let tableMap = versionedTableMap ^. v_data
filteredData = filterNodes tableMap
tableMapSorted = versionedTableMap
& v_data .~ (NgramsTable . sortAndPaginate . withInners tableMap $ filteredData)
in toVersionedWithCount (Set.size filteredData) tableMapSorted
where
---------------------------------------
-- | Returns the \"root\" of the 'NgramsElement', or it falls back to the input
-- 'NgramsElement' itself, if no root can be found.
-- /CAREFUL/: The root we select might /not/ have the same 'listType' we are
-- filtering for, in which case we have to change its type to match, if needed.
rootOf :: Map NgramsTerm NgramsElement -> NgramsElement -> NgramsElement
rootOf tblMap ne = case ne ^. ne_root of
Nothing -> ne
Just rootKey
| Just r <- tblMap ^. at rootKey
-- NOTE(adinapoli) It's unclear what is the correct behaviour here: should
-- we override the type or we filter out the node altogether?
-> over ne_list (\oldList -> fromMaybe oldList _nsq_listType) r
| otherwise
-> ne
-- | Returns 'True' if the input 'NgramsElement' satisfies the search criteria
-- mandated by 'NgramsSearchQuery'.
matchingNode :: NgramsElement -> Bool
matchingNode inputNode =
let nodeSize = inputNode ^. ne_size
matchesListType = maybe (const True) (==) _nsq_listType
respectsMinSize = maybe (const True) (<=) (getMinSize <$> _nsq_minSize)
respectsMaxSize = maybe (const True) (>=) (getMaxSize <$> _nsq_maxSize)
in respectsMinSize nodeSize
&& respectsMaxSize nodeSize
&& _nsq_searchQuery (inputNode ^. ne_ngrams)
&& matchesListType (inputNode ^. ne_list)
sortOnOrder :: Maybe OrderBy -> ([NgramsElement] -> [NgramsElement])
sortOnOrder Nothing = sortOnOrder (Just ScoreDesc)
sortOnOrder (Just TermAsc) = List.sortOn $ view ne_ngrams
sortOnOrder (Just TermDesc) = List.sortOn $ Down . view ne_ngrams
sortOnOrder (Just ScoreAsc) = List.sortOn $ view (ne_occurrences . to Set.size)
sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view (ne_occurrences . to Set.size)
---------------------------------------
-- | Filter the given `tableMap` with the search criteria.
filteredNodes :: Map NgramsTerm NgramsElement -> Set NgramsElement
filteredNodes tblMap = roots
-- | Filters the given `tableMap` with the search criteria. It returns
-- a set of 'NgramsElement' all matching the input 'NGramsSearchQuery'.
filterNodes :: Map NgramsTerm NgramsElement -> Set NgramsElement
filterNodes tblMap = Set.map (rootOf tblMap) selectedNodes
where
list = Set.fromList $ Map.elems tblMap
selected_nodes = list & Set.filter selected_node
roots = Set.map (rootOf tblMap) selected_nodes
allNodes = Set.fromList $ Map.elems tblMap
selectedNodes = Set.filter matchingNode allNodes
-- | For each input root, extends its occurrence count with
-- the information found in the subitems.
......@@ -582,22 +598,14 @@ searchTableNgrams versionedTableMap NgramsSearchQuery{..} =
-- | Paginate the results
sortAndPaginate :: Set NgramsElement -> [NgramsElement]
sortAndPaginate = take (getLimit _nsq_limit)
. drop offset'
. sortOnOrder _nsq_orderBy
. Set.toList
---------------------------------------
tableMap = versionedTableMap ^. v_data
filteredData = filteredNodes tableMap
fltrCount = Set.size filteredData
tableMapSorted = versionedTableMap
& v_data .~ (NgramsTable . sortAndPaginate . withInners tableMap $ filteredData)
sortAndPaginate xs =
let offset' = getOffset $ maybe 0 identity _nsq_offset
in take (getLimit _nsq_limit)
. drop offset'
. sortOnOrder _nsq_orderBy
. Set.toList
$ xs
in toVersionedWithCount fltrCount tableMapSorted
getTableNgrams :: forall env err m.
(HasNodeStory env err m, HasNodeError err, CmdCommon env)
......
This source diff could not be displayed because it is too large. You can view the blob instead.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment