Verified Commit b19412f7 authored by Przemyslaw Kaminski's avatar Przemyslaw Kaminski

Merge branch 'dev' into 162-dev-haskell-9.2

parents ef12efb6 147dcb9d
## Version 0.0.6.9.9.4.2
* [BACK][FIX][[Node terms] Random slowness on loading a page list of terms (#199)](https://gitlab.iscpif.fr/gargantext/haskell-gargantext/issues/199)
* [FRONT][CLEAN] Removing API that do not fully work (yet)
* [FRONT][FIX] Chat Link
## Version 0.0.6.9.9.4.1
......
......@@ -5,7 +5,7 @@ cabal-version: 1.12
-- see: https://github.com/sol/hpack
name: gargantext
version: 0.0.6.9.9.4.1
version: 0.0.6.9.9.4.2
synopsis: Search, map, share
description: Please see README.md
category: Data
......@@ -25,6 +25,7 @@ data-files:
ekg-assets/bootstrap-1.4.0.min.css
ekg-assets/chart_line_add.png
ekg-assets/cross.png
test-data/ngrams/GarganText_NgramsTerms-QuantumComputing.json
library
exposed-modules:
......@@ -848,6 +849,8 @@ test-suite garg-test
Ngrams.Lang.Occurrences
Ngrams.Metrics
Ngrams.NLP
Ngrams.Query
Ngrams.Query.PaginationCorpus
Parsers.Date
Parsers.Types
Parsers.WOS
......@@ -881,7 +884,10 @@ test-suite garg-test
ghc-options: -Wall -threaded -rtsopts -with-rtsopts=-N
build-depends:
QuickCheck
, aeson
, base
, bytestring
, containers
, duckling
, extra
, gargantext
......@@ -889,6 +895,8 @@ test-suite garg-test
, hspec
, parsec
, quickcheck-instances
, tasty
, tasty-hunit
, text
, time
, unordered-containers
......
......@@ -6,7 +6,7 @@ name: gargantext
# | | | +----- Layers * : New versions with API additions
# | | | | +--- Layers * : New versions without API breaking changes
# | | | | |
version: '0.0.6.9.9.4.1'
version: '0.0.6.9.9.4.2'
synopsis: Search, map, share
description: Please see README.md
category: Data
......@@ -43,6 +43,7 @@ data-files:
- ekg-assets/bootstrap-1.4.0.min.css
- ekg-assets/chart_line_add.png
- ekg-assets/cross.png
- test-data/ngrams/GarganText_NgramsTerms-QuantumComputing.json
library:
source-dirs: src
ghc-options:
......@@ -503,7 +504,10 @@ tests:
- -rtsopts
- -with-rtsopts=-N
dependencies:
- aeson
- base
- bytestring
- containers
- gargantext
- gargantext-prelude
- hspec
......@@ -512,6 +516,8 @@ tests:
- time
- parsec
- duckling
- tasty
- tasty-hunit
- text
- unordered-containers
jobqueue-test:
......
......@@ -9,5 +9,5 @@ LOGFILE=$FOLDER"/"$FILE
mkdir -p $FOLDER
env LANG=en_US.UTF-8 ~/.local/bin/gargantext-server --ini gargantext.ini --run Dev +RTS > $LOGFILE 2>&1 & tail -F $LOGFILE # -p
env LANG=en_US.UTF-8 ~/.local/bin/gargantext-server --ini gargantext.ini --run Prod +RTS > $LOGFILE 2>&1 & tail -F $LOGFILE # -p
#env LANG=en_US.UTF-8 stack --docker exec gargantext-server -- --ini gargantext.ini --run Dev +RTS > $LOGFILE 2>&1 & tail -F $LOGFILE # -p
......@@ -10,14 +10,12 @@ Portability : POSIX
import Gargantext.Prelude
import Gargantext.Core (Lang(..))
import qualified Core.Utils as Utils
--import qualified Ngrams.Lang.Fr as Fr
--import qualified Ngrams.Lang as Lang
import qualified Ngrams.Lang.Occurrences as Occ
import qualified Ngrams.NLP as NLP
import qualified Ngrams.Metrics as Metrics
import qualified Ngrams.Query as NgramsQuery
import qualified Parsers.Date as PD
-- import qualified Graph.Distance as GD
import qualified Graph.Clustering as Graph
......@@ -35,3 +33,4 @@ main = do
-- GD.test
Crypto.test
NLP.main
NgramsQuery.main
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TypeApplications #-}
module Ngrams.Query where
import Control.Monad
import Gargantext.Prelude
import Gargantext.API.Ngrams
import Gargantext.API.Ngrams.Types
import Data.Coerce
import Data.Monoid
import qualified Data.Text as T
import qualified Data.Map.Strict as Map
import Data.Map.Strict (Map)
import Gargantext.Core.Types.Query
import Gargantext.Core.Types.Main
import Ngrams.Query.PaginationCorpus
import Test.Tasty
import Test.Tasty.HUnit
main :: IO ()
main = defaultMain tests
tests :: TestTree
tests = testGroup "Ngrams" [unitTests]
curryElem :: NgramsElement
curryElem = mkNgramsElement "curry" MapTerm Nothing mempty
elbaElem :: NgramsElement
elbaElem = mkNgramsElement "elba" MapTerm Nothing mempty
mockFlatCorpus :: Versioned (Map NgramsTerm NgramsElement)
mockFlatCorpus = Versioned 0 $ Map.fromList [
( "haskell", curryElem)
, ( "idris", elbaElem)
]
mockQueryFn :: Maybe T.Text -> NgramsTerm -> Bool
mockQueryFn searchQuery (NgramsTerm nt) =
maybe (const True) T.isInfixOf (T.toLower <$> searchQuery) (T.toLower nt)
unitTests :: TestTree
unitTests = testGroup "Query tests"
[ -- Sorting
testCase "Simple query mockFlatCorpus" testFlat01
, testCase "Simple query (desc sorting)" testFlat02
-- -- Filtering
, testCase "Simple query (listType = MapTerm)" testFlat03
, testCase "Simple query (listType = StopTerm)" testFlat04
-- -- Full text search
, testCase "Simple query (search with match)" testFlat05
-- -- Pagination
, testCase "Simple pagination on all terms" test_pagination_allTerms
, testCase "Simple pagination on MapTerm" test_pagination01
, testCase "Simple pagination on MapTerm (limit < total terms)" test_pagination02
, testCase "Simple pagination on MapTerm (offset works)" test_pagination02_offset
, testCase "Simple pagination on ListTerm (limit < total terms)" test_pagination03
, testCase "Simple pagination on ListTerm (offset works)" test_pagination03_offset
, testCase "Simple pagination on CandidateTerm (limit < total terms)" test_pagination04
, testCase "paginating QuantumComputing corpus works (MapTerms)" test_paginationQuantum
, testCase "paginating QuantumComputing corpus works (CandidateTerm)" test_paginationQuantum_02
]
-- Let's test that if we request elements sorted in
-- /ascending/ order, we get them.
testFlat01 :: Assertion
testFlat01 = do
let res = searchTableNgrams mockFlatCorpus searchQuery
res @?= VersionedWithCount 0 2 ( NgramsTable [curryElem, elbaElem] )
where
searchQuery = NgramsSearchQuery {
_nsq_limit = Limit 5
, _nsq_offset = Nothing
, _nsq_listType = Nothing
, _nsq_minSize = Nothing
, _nsq_maxSize = Nothing
, _nsq_orderBy = Just TermAsc
, _nsq_searchQuery = mockQueryFn Nothing
}
-- Let's test that if we request elements sorted in
-- /descending/ order, we get them.
testFlat02 :: Assertion
testFlat02 = do
let res = searchTableNgrams mockFlatCorpus searchQuery
res @?= VersionedWithCount 0 2 ( NgramsTable [elbaElem, curryElem] )
where
searchQuery = NgramsSearchQuery {
_nsq_limit = Limit 5
, _nsq_offset = Nothing
, _nsq_listType = Nothing
, _nsq_minSize = Nothing
, _nsq_maxSize = Nothing
, _nsq_orderBy = Just TermDesc
, _nsq_searchQuery = mockQueryFn Nothing
}
testFlat03 :: Assertion
testFlat03 = do
let res = searchTableNgrams mockFlatCorpus searchQuery
res @?= VersionedWithCount 0 2 ( NgramsTable [elbaElem, curryElem] )
where
searchQuery = NgramsSearchQuery {
_nsq_limit = Limit 5
, _nsq_offset = Nothing
, _nsq_listType = Just MapTerm
, _nsq_minSize = Nothing
, _nsq_maxSize = Nothing
, _nsq_orderBy = Just TermDesc
, _nsq_searchQuery = mockQueryFn Nothing
}
-- Here we are searching for all the stop terms, but
-- due to the fact we don't have any inside 'mockFlatCorpus',
-- we should get no results.
testFlat04 :: Assertion
testFlat04 = do
let res = searchTableNgrams mockFlatCorpus searchQuery
res @?= VersionedWithCount 0 0 ( NgramsTable [] )
where
searchQuery = NgramsSearchQuery {
_nsq_limit = Limit 5
, _nsq_offset = Nothing
, _nsq_listType = Just StopTerm
, _nsq_minSize = Nothing
, _nsq_maxSize = Nothing
, _nsq_orderBy = Just TermDesc
, _nsq_searchQuery = mockQueryFn Nothing
}
-- For this test, we run a full text search on the word
-- \"curry\", and we expect back a result.
testFlat05 :: Assertion
testFlat05 = do
let res = searchTableNgrams mockFlatCorpus searchQuery
res @?= VersionedWithCount 0 1 ( NgramsTable [curryElem] )
where
searchQuery = NgramsSearchQuery {
_nsq_limit = Limit 5
, _nsq_offset = Nothing
, _nsq_listType = Nothing
, _nsq_minSize = Nothing
, _nsq_maxSize = Nothing
, _nsq_orderBy = Just TermDesc
, _nsq_searchQuery = mockQueryFn (Just "curry")
}
-- Pagination tests
test_pagination_allTerms :: Assertion
test_pagination_allTerms = do
let res = searchTableNgrams paginationCorpus searchQuery
res @?= VersionedWithCount 0 10 ( NgramsTable [ haskellElem
, sideEffectsElem
, concHaskellElem
, implementationElem
, ooElem
, languagesElem
, javaElem
, termsElem
] )
where
searchQuery = NgramsSearchQuery {
_nsq_limit = Limit 8
, _nsq_offset = Nothing
, _nsq_listType = Nothing
, _nsq_minSize = Nothing
, _nsq_maxSize = Nothing
, _nsq_orderBy = Nothing
, _nsq_searchQuery = mockQueryFn Nothing
}
-- In this test, I'm asking for 5 /map terms/, and as the
-- corpus has only 2, that's what I should get back.
test_pagination01 :: Assertion
test_pagination01 = do
let res = searchTableNgrams paginationCorpus searchQuery
res @?= VersionedWithCount 0 4 ( NgramsTable [implementationElem, languagesElem, termsElem, proofElem] )
where
searchQuery = NgramsSearchQuery {
_nsq_limit = Limit 5
, _nsq_offset = Nothing
, _nsq_listType = Just MapTerm
, _nsq_minSize = Nothing
, _nsq_maxSize = Nothing
, _nsq_orderBy = Just ScoreDesc
, _nsq_searchQuery = mockQueryFn Nothing
}
test_pagination02 :: Assertion
test_pagination02 = do
let res = searchTableNgrams paginationCorpus searchQuery
res @?= VersionedWithCount 0 4 ( NgramsTable [implementationElem, languagesElem, termsElem] )
where
searchQuery = NgramsSearchQuery {
_nsq_limit = Limit 3
, _nsq_offset = Nothing
, _nsq_listType = Just MapTerm
, _nsq_minSize = Nothing
, _nsq_maxSize = Nothing
, _nsq_orderBy = Just ScoreDesc
, _nsq_searchQuery = mockQueryFn Nothing
}
test_pagination02_offset :: Assertion
test_pagination02_offset = do
let res = searchTableNgrams paginationCorpus searchQuery
res @?= VersionedWithCount 0 4 ( NgramsTable [termsElem, proofElem] )
where
searchQuery = NgramsSearchQuery {
_nsq_limit = Limit 2
, _nsq_offset = Just (Offset 2)
, _nsq_listType = Just MapTerm
, _nsq_minSize = Nothing
, _nsq_maxSize = Nothing
, _nsq_orderBy = Just ScoreDesc
, _nsq_searchQuery = mockQueryFn Nothing
}
test_pagination03 :: Assertion
test_pagination03 = do
let res = searchTableNgrams paginationCorpus searchQuery
res @?= VersionedWithCount 0 4 ( NgramsTable [sideEffectsElem, ooElem, javaElem] )
where
searchQuery = NgramsSearchQuery {
_nsq_limit = Limit 3
, _nsq_offset = Nothing
, _nsq_listType = Just StopTerm
, _nsq_minSize = Nothing
, _nsq_maxSize = Nothing
, _nsq_orderBy = Just ScoreDesc
, _nsq_searchQuery = mockQueryFn Nothing
}
test_pagination03_offset :: Assertion
test_pagination03_offset = do
let res = searchTableNgrams paginationCorpus searchQuery
res @?= VersionedWithCount 0 4 ( NgramsTable [javaElem, pascalElem] )
where
searchQuery = NgramsSearchQuery {
_nsq_limit = Limit 2
, _nsq_offset = Just (Offset 2)
, _nsq_listType = Just StopTerm
, _nsq_minSize = Nothing
, _nsq_maxSize = Nothing
, _nsq_orderBy = Just ScoreDesc
, _nsq_searchQuery = mockQueryFn Nothing
}
test_pagination04 :: Assertion
test_pagination04 = do
let res = searchTableNgrams paginationCorpus searchQuery
res @?= VersionedWithCount 0 2 ( NgramsTable [haskellElem] )
where
searchQuery = NgramsSearchQuery {
_nsq_limit = Limit 1
, _nsq_offset = Nothing
, _nsq_listType = Just CandidateTerm
, _nsq_minSize = Nothing
, _nsq_maxSize = Nothing
, _nsq_orderBy = Just ScoreDesc
, _nsq_searchQuery = mockQueryFn Nothing
}
test_paginationQuantum :: Assertion
test_paginationQuantum = do
let res = searchTableNgrams quantumComputingCorpus searchQuery
let elems = coerce @NgramsTable @[NgramsElement] $ _vc_data res
length elems @?= 10
forM_ elems $ \term ->
assertBool ("found " <> show (_ne_list term) <> " in: " <> show elems) (_ne_list term == MapTerm)
where
searchQuery = NgramsSearchQuery {
_nsq_limit = Limit 10
, _nsq_offset = Nothing
, _nsq_listType = Just MapTerm
, _nsq_minSize = Nothing
, _nsq_maxSize = Nothing
, _nsq_orderBy = Nothing
, _nsq_searchQuery = mockQueryFn Nothing
}
test_paginationQuantum_02 :: Assertion
test_paginationQuantum_02 = do
let res = searchTableNgrams quantumComputingCorpus searchQuery
let elems = coerce @NgramsTable @[NgramsElement] $ _vc_data res
assertBool ("found only " <> show (length elems) <> " in: " <> show elems) (length elems == 10)
where
searchQuery = NgramsSearchQuery {
_nsq_limit = Limit 10
, _nsq_offset = Nothing
, _nsq_listType = Just CandidateTerm
, _nsq_minSize = Nothing
, _nsq_maxSize = Nothing
, _nsq_orderBy = Nothing
, _nsq_searchQuery = mockQueryFn Nothing
}
{-# LANGUAGE ScopedTypeVariables #-}
module Ngrams.Query.PaginationCorpus where
import Prelude
import Data.Aeson
import Data.Map.Strict (Map)
import Gargantext.API.Ngrams
import Gargantext.API.Ngrams.Types
import Gargantext.Core.Types.Main
import Gargantext.Database.Admin.Types.Node
import System.IO.Unsafe
import qualified Data.ByteString as B
import qualified Data.Map.Strict as Map
import qualified Data.Set as Set
import Paths_gargantext
implementationElem :: NgramsElement
implementationElem = NgramsElement {
_ne_ngrams = "implementation"
, _ne_size = 1
, _ne_list = MapTerm
, _ne_occurrences = Set.fromList [ NodeId 1, NodeId 2, NodeId 3, NodeId 4, NodeId 5 ]
, _ne_root = Nothing
, _ne_parent = Nothing
, _ne_children = mSetFromList [ "code", "functions", "language", "programs" ]
}
languagesElem :: NgramsElement
languagesElem = NgramsElement {
_ne_ngrams = NgramsTerm {unNgramsTerm = "languages"}
, _ne_size = 1
, _ne_list = MapTerm
, _ne_occurrences = Set.fromList [ NodeId 1, NodeId 2 , NodeId 3 , NodeId 4 ]
, _ne_root = Nothing
, _ne_parent = Nothing
, _ne_children = mSetFromList [ "approach", "use" ]
}
termsElem :: NgramsElement
termsElem = NgramsElement {
_ne_ngrams = NgramsTerm {unNgramsTerm = "terms"}
, _ne_size = 1
, _ne_list = MapTerm
, _ne_occurrences = Set.fromList [ NodeId 1, NodeId 2 , NodeId 3 ]
, _ne_root = Nothing
, _ne_parent = Nothing
, _ne_children = mSetFromList [ "algorithm", "evaluation", "monad", "programmers" ]
}
proofElem :: NgramsElement
proofElem = NgramsElement {
_ne_ngrams = NgramsTerm {unNgramsTerm = "proof"}
, _ne_size = 1
, _ne_list = MapTerm
, _ne_occurrences = Set.fromList [ NodeId 1, NodeId 2 ]
, _ne_root = Nothing
, _ne_parent = Nothing
, _ne_children = mSetFromList [ "proofs" ]
}
sideEffectsElem :: NgramsElement
sideEffectsElem = NgramsElement {
_ne_ngrams = NgramsTerm {unNgramsTerm = "side effects"}
, _ne_size = 1
, _ne_list = StopTerm
, _ne_occurrences = Set.fromList [ NodeId 1, NodeId 2, NodeId 3, NodeId 4, NodeId 5, NodeId 6 ]
, _ne_root = Nothing
, _ne_parent = Nothing
, _ne_children = mSetFromList [ ]
}
ooElem :: NgramsElement
ooElem = NgramsElement {
_ne_ngrams = NgramsTerm {unNgramsTerm = "object oriented"}
, _ne_size = 1
, _ne_list = StopTerm
, _ne_occurrences = Set.fromList [ NodeId 1, NodeId 2, NodeId 3, NodeId 4, NodeId 5 ]
, _ne_root = Nothing
, _ne_parent = Nothing
, _ne_children = mSetFromList [ "null pointer exception" ]
}
javaElem :: NgramsElement
javaElem = NgramsElement {
_ne_ngrams = NgramsTerm {unNgramsTerm = "java"}
, _ne_size = 1
, _ne_list = StopTerm
, _ne_occurrences = Set.fromList [ NodeId 1, NodeId 2, NodeId 3 ]
, _ne_root = Nothing
, _ne_parent = Nothing
, _ne_children = mSetFromList [ "JVM" ]
}
pascalElem :: NgramsElement
pascalElem = NgramsElement {
_ne_ngrams = NgramsTerm {unNgramsTerm = "pascal"}
, _ne_size = 1
, _ne_list = StopTerm
, _ne_occurrences = Set.fromList [ NodeId 1, NodeId 2 ]
, _ne_root = Nothing
, _ne_parent = Nothing
, _ne_children = mSetFromList [ "turbo", "borland" ]
}
haskellElem :: NgramsElement
haskellElem = NgramsElement {
_ne_ngrams = NgramsTerm {unNgramsTerm = "haskell"}
, _ne_size = 1
, _ne_list = CandidateTerm
, _ne_occurrences = Set.fromList [ NodeId 1, NodeId 2, NodeId 3, NodeId 4, NodeId 5, NodeId 6, NodeId 7, NodeId 8 ]
, _ne_root = Nothing
, _ne_parent = Nothing
, _ne_children = mSetFromList [ ]
}
concHaskellElem :: NgramsElement
concHaskellElem = NgramsElement {
_ne_ngrams = NgramsTerm {unNgramsTerm = "concurrent haskell"}
, _ne_size = 1
, _ne_list = CandidateTerm
, _ne_occurrences = Set.fromList [ NodeId 1, NodeId 2, NodeId 3, NodeId 4, NodeId 5 ]
, _ne_root = Nothing
, _ne_parent = Nothing
, _ne_children = mSetFromList [ "Simon Marlow" ]
}
-- | A big (for the sake of the tests anyway) corpus which has
-- * 4 @MapTerm@s
-- * 4 @StopTerm@s
-- * 2 @CandidateTerm@s
paginationCorpus :: Versioned (Map NgramsTerm NgramsElement)
paginationCorpus = Versioned 0 $ Map.fromList [
-- Map terms
( "implementation", implementationElem)
, ( "languages", languagesElem)
, ( "terms", termsElem)
, ("proof", proofElem)
-- Stop terms
, ("side effects", sideEffectsElem)
, ("object oriented", ooElem)
, ("java", javaElem)
, ("pascal", pascalElem)
-- Candidate terms
, ("haskell", haskellElem)
, ("concurrent haskell", concHaskellElem)
]
quantumComputingCorpus :: Versioned (Map NgramsTerm NgramsElement)
quantumComputingCorpus = unsafePerformIO $ do
pth <- getDataFileName "test-data/ngrams/GarganText_NgramsTerms-QuantumComputing.json"
jsonBlob <- B.readFile pth
case eitherDecodeStrict' jsonBlob of
Left err -> error err
Right (Versioned ver (mp :: Map NgramsTerm NgramsRepoElement)) ->
pure $ Versioned ver (Map.mapWithKey (\k -> ngramsElementFromRepo k) mp)
{-# NOINLINE quantumComputingCorpus #-}
......@@ -28,6 +28,7 @@ module Gargantext.API.Ngrams
, TableNgramsApiGet
, TableNgramsApiPut
, searchTableNgrams
, getTableNgrams
, getTableNgramsCorpus
, setListNgrams
......@@ -83,10 +84,8 @@ module Gargantext.API.Ngrams
where
import Control.Concurrent
import Control.Lens ((.~), view, (^.), (^..), (+~), (%~), (.~), msumOf, at, _Just, Each(..), (%%~), mapped, ifolded, to, withIndex)
import Control.Lens ((.~), view, (^.), (^..), (+~), (%~), (.~), msumOf, at, _Just, Each(..), (%%~), mapped, ifolded, to, withIndex, over)
import Control.Monad.Reader
import Data.Aeson hiding ((.=))
import Data.Either (Either(..))
import Data.Foldable
import Data.Map.Strict (Map)
import Data.Maybe (fromMaybe)
......@@ -94,11 +93,9 @@ import Data.Monoid
import Data.Ord (Down(..))
import Data.Patch.Class (Action(act), Transformable(..), ours)
import Data.Set (Set)
import Data.Swagger hiding (version, patch)
import Data.Text (Text, isInfixOf, toLower, unpack, pack)
import Data.Text (Text, isInfixOf, toLower, unpack)
import Data.Text.Lazy.IO as DTL
import Formatting (hprint, int, (%))
import GHC.Generics (Generic)
import Gargantext.API.Admin.EnvTypes (Env, GargJob(..))
import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
import Gargantext.API.Admin.Types (HasSettings)
......@@ -106,7 +103,7 @@ import Gargantext.API.Ngrams.Types
import Gargantext.API.Prelude
import Gargantext.Core.NodeStory
import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, TODO, assertValid, HasInvalidError, ContextId)
import Gargantext.Core.Types.Query (Limit(..), Offset(..))
import Gargantext.Core.Types.Query (Limit(..), Offset(..), MinSize(..), MaxSize(..))
import Gargantext.API.Ngrams.Tools
import Gargantext.Database.Action.Flow.Types
import Gargantext.Database.Action.Metrics.NgramsByContext (getOccByNgramsOnlyFast)
......@@ -124,8 +121,6 @@ import Prelude (error)
import Servant hiding (Patch)
import Gargantext.Utils.Jobs (serveJobsAPI, MonadJobStatus(..))
import System.IO (stderr)
import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
import qualified Data.Aeson.Text as DAT
import qualified Data.List as List
import qualified Data.Map.Strict as Map
......@@ -521,114 +516,108 @@ dumpJsonTableMap fpath nodeId ngramsType = do
pure ()
type MinSize = Int
type MaxSize = Int
-- | TODO Errors management
-- TODO: polymorphic for Annuaire or Corpus or ...
-- | Table of Ngrams is a ListNgrams formatted (sorted and/or cut).
-- TODO: should take only one ListId
-- | /pure/ function to query a 'Map NgramsTerm NgramsElement', according to a
-- search function. Returns a /versioned/ 'NgramsTable' which is paginated and
-- sorted according to the input 'NgramsSearchQuery', together with the
-- occurrences of the elements.
searchTableNgrams :: Versioned (Map NgramsTerm NgramsElement)
-> NgramsSearchQuery
-- ^ The search query on the retrieved data
-> VersionedWithCount NgramsTable
searchTableNgrams versionedTableMap NgramsSearchQuery{..} =
let tableMap = versionedTableMap ^. v_data
filteredData = filterNodes tableMap
tableMapSorted = versionedTableMap
& v_data .~ (NgramsTable . sortAndPaginate . withInners tableMap $ filteredData)
getTableNgrams :: forall env err m.
(HasNodeStory env err m, HasNodeError err, CmdCommon env)
=> NodeType -> NodeId -> TabType
-> ListId -> Limit -> Maybe Offset
-> Maybe ListType
-> Maybe MinSize -> Maybe MaxSize
-> Maybe OrderBy
-> (NgramsTerm -> Bool)
-> m (VersionedWithCount NgramsTable)
getTableNgrams _nType nId tabType listId limit_ offset
listType minSize maxSize orderBy searchQuery = do
t0 <- getTime
-- lIds <- selectNodesWithUsername NodeList userMaster
let
ngramsType = ngramsTypeFromTabType tabType
offset' = getOffset $ maybe 0 identity offset
listType' = maybe (const True) (==) listType
minSize' = maybe (const True) (<=) minSize
maxSize' = maybe (const True) (>=) maxSize
rootOf tableMap ne = maybe ne (\r -> fromMaybe (panic "getTableNgrams: invalid root")
(tableMap ^. at r)
)
(ne ^. ne_root)
selected_node n = minSize' s
&& maxSize' s
&& searchQuery (n ^. ne_ngrams)
&& listType' (n ^. ne_list)
where
s = n ^. ne_size
in toVersionedWithCount (Set.size filteredData) tableMapSorted
where
---------------------------------------
-- | Returns the \"root\" of the 'NgramsElement', or it falls back to the input
-- 'NgramsElement' itself, if no root can be found.
-- /CAREFUL/: The root we select might /not/ have the same 'listType' we are
-- filtering for, in which case we have to change its type to match, if needed.
rootOf :: Map NgramsTerm NgramsElement -> NgramsElement -> NgramsElement
rootOf tblMap ne = case ne ^. ne_root of
Nothing -> ne
Just rootKey
| Just r <- tblMap ^. at rootKey
-- NOTE(adinapoli) It's unclear what is the correct behaviour here: should
-- we override the type or we filter out the node altogether?
-> over ne_list (\oldList -> fromMaybe oldList _nsq_listType) r
| otherwise
-> ne
-- | Returns 'True' if the input 'NgramsElement' satisfies the search criteria
-- mandated by 'NgramsSearchQuery'.
matchingNode :: NgramsElement -> Bool
matchingNode inputNode =
let nodeSize = inputNode ^. ne_size
matchesListType = maybe (const True) (==) _nsq_listType
respectsMinSize = maybe (const True) (<=) (getMinSize <$> _nsq_minSize)
respectsMaxSize = maybe (const True) (>=) (getMaxSize <$> _nsq_maxSize)
in respectsMinSize nodeSize
&& respectsMaxSize nodeSize
&& _nsq_searchQuery (inputNode ^. ne_ngrams)
&& matchesListType (inputNode ^. ne_list)
sortOnOrder :: Maybe OrderBy -> ([NgramsElement] -> [NgramsElement])
sortOnOrder Nothing = sortOnOrder (Just ScoreDesc)
sortOnOrder (Just TermAsc) = List.sortOn $ view ne_ngrams
sortOnOrder (Just TermDesc) = List.sortOn $ Down . view ne_ngrams
sortOnOrder (Just ScoreAsc) = List.sortOn $ view (ne_occurrences . to Set.size)
sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view (ne_occurrences . to Set.size)
---------------------------------------
-- | Filter the given `tableMap` with the search criteria.
filteredNodes :: Map NgramsTerm NgramsElement -> Set NgramsElement
filteredNodes tableMap = roots
-- | Filters the given `tableMap` with the search criteria. It returns
-- a set of 'NgramsElement' all matching the input 'NGramsSearchQuery'.
filterNodes :: Map NgramsTerm NgramsElement -> Set NgramsElement
filterNodes tblMap = Set.map (rootOf tblMap) selectedNodes
where
list = Set.fromList $ Map.elems tableMap
selected_nodes = list & Set.filter selected_node
roots = Set.map (rootOf tableMap) selected_nodes
allNodes = Set.fromList $ Map.elems tblMap
selectedNodes = Set.filter matchingNode allNodes
-- | For each input root, extends its occurrence count with
-- the information found in the subitems.
withInners :: Map NgramsTerm NgramsElement -> Set NgramsElement -> Set NgramsElement
withInners tableMap roots = Set.map addSubitemsOccurrences roots
withInners tblMap roots = Set.map addSubitemsOccurrences roots
where
addSubitemsOccurrences :: NgramsElement -> NgramsElement
addSubitemsOccurrences e =
e { _ne_occurrences = foldl' alterOccurrences (e ^. ne_occurrences) (e ^. ne_children) }
alterOccurrences :: Set ContextId -> NgramsTerm -> Set ContextId
alterOccurrences occs t = case Map.lookup t tableMap of
alterOccurrences occs t = case Map.lookup t tblMap of
Nothing -> occs
Just e' -> occs <> e' ^. ne_occurrences
-- | Paginate the results
sortAndPaginate :: Set NgramsElement -> [NgramsElement]
sortAndPaginate = take (getLimit limit_)
. drop offset'
. sortOnOrder orderBy
. Set.toList
---------------------------------------
let scoresNeeded = needsScores orderBy
t1 <- getTime
versionedTableMap <- getNgramsTable' nId listId ngramsType :: m (Versioned (Map NgramsTerm NgramsElement))
let tableMap = versionedTableMap ^. v_data
let filteredData = filteredNodes tableMap
sortAndPaginate xs =
let offset' = getOffset $ maybe 0 identity _nsq_offset
in take (getLimit _nsq_limit)
. drop offset'
. sortOnOrder _nsq_orderBy
. Set.toList
$ xs
let fltrCount = Set.size filteredData
t2 <- getTime
let tableMapSorted = versionedTableMap
& v_data .~ (NgramsTable . sortAndPaginate . withInners tableMap $ filteredData)
t3 <- getTime
--printDebug "[getTableNgrams] tableMapSorted" tableMapSorted
liftBase $ do
hprint stderr
("getTableNgrams total=" % hasTime
% " map1=" % hasTime
% " map2=" % hasTime
% " map3=" % hasTime
% " sql=" % (if scoresNeeded then "map2" else "map3")
% "\n"
) t0 t3 t0 t1 t1 t2 t2 t3
-- printDebug "[getTableNgrams] tableMapSorted" $ show tableMapSorted
pure $ toVersionedWithCount fltrCount tableMapSorted
getTableNgrams :: forall env err m.
(HasNodeStory env err m, HasNodeError err, CmdCommon env)
=> NodeId
-> ListId
-> TabType
-> NgramsSearchQuery
-> m (VersionedWithCount NgramsTable)
getTableNgrams nodeId listId tabType searchQuery = do
let ngramsType = ngramsTypeFromTabType tabType
versionedInput <- getNgramsTable' nodeId listId ngramsType
pure $ searchTableNgrams versionedInput searchQuery
-- | Helper function to get the ngrams table with scores.
......@@ -694,28 +683,6 @@ scoresRecomputeTableNgrams nId tabType listId = do
-- TODO: find a better place for the code above, All APIs stay here
data OrderBy = TermAsc | TermDesc | ScoreAsc | ScoreDesc
deriving (Generic, Enum, Bounded, Read, Show)
instance FromHttpApiData OrderBy
where
parseUrlPiece "TermAsc" = pure TermAsc
parseUrlPiece "TermDesc" = pure TermDesc
parseUrlPiece "ScoreAsc" = pure ScoreAsc
parseUrlPiece "ScoreDesc" = pure ScoreDesc
parseUrlPiece _ = Left "Unexpected value of OrderBy"
instance ToHttpApiData OrderBy where
toUrlPiece = pack . show
instance ToParamSchema OrderBy
instance FromJSON OrderBy
instance ToJSON OrderBy
instance ToSchema OrderBy
instance Arbitrary OrderBy
where
arbitrary = elements [minBound..maxBound]
needsScores :: Maybe OrderBy -> Bool
needsScores (Just ScoreAsc) = True
needsScores (Just ScoreDesc) = True
......@@ -773,9 +740,19 @@ getTableNgramsCorpus :: (HasNodeStory env err m, HasNodeError err, CmdCommon env
-> Maybe Text -- full text search
-> m (VersionedWithCount NgramsTable)
getTableNgramsCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy mt =
getTableNgrams NodeCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
getTableNgrams nId listId tabType searchQuery
where
searchQuery (NgramsTerm nt) = maybe (const True) isInfixOf (toLower <$> mt) (toLower nt)
searchQueryFn (NgramsTerm nt) = maybe (const True) isInfixOf (toLower <$> mt) (toLower nt)
searchQuery = NgramsSearchQuery {
_nsq_limit = limit_
, _nsq_offset = offset
, _nsq_listType = listType
, _nsq_minSize = minSize
, _nsq_maxSize = maxSize
, _nsq_orderBy = orderBy
, _nsq_searchQuery = searchQueryFn
}
......@@ -807,9 +784,17 @@ getTableNgramsDoc dId tabType listId limit_ offset listType minSize maxSize orde
ns <- selectNodesWithUsername NodeList userMaster
let ngramsType = ngramsTypeFromTabType tabType
ngs <- selectNgramsByDoc (ns <> [listId]) dId ngramsType
let searchQuery (NgramsTerm nt) = flip Set.member (Set.fromList ngs) nt
getTableNgrams NodeDocument dId tabType listId limit_ offset listType minSize maxSize orderBy searchQuery
let searchQueryFn (NgramsTerm nt) = flip Set.member (Set.fromList ngs) nt
searchQuery = NgramsSearchQuery {
_nsq_limit = limit_
, _nsq_offset = offset
, _nsq_listType = listType
, _nsq_minSize = minSize
, _nsq_maxSize = maxSize
, _nsq_orderBy = orderBy
, _nsq_searchQuery = searchQueryFn
}
getTableNgrams dId listId tabType searchQuery
apiNgramsTableCorpus :: NodeId -> ServerT TableNgramsApi (GargM Env GargError)
......
......@@ -41,6 +41,7 @@ import Database.PostgreSQL.Simple.ToField (ToField, toJSONField, toField)
import GHC.Generics (Generic)
import Gargantext.Core.Text (size)
import Gargantext.Core.Types (ListType(..), ListId, NodeId, TODO)
import Gargantext.Core.Types.Query (Limit, Offset, MaxSize, MinSize)
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixUntagged, unPrefixSwagger, wellNamedSchema)
import Gargantext.Database.Admin.Types.Node (ContextId)
import Gargantext.Database.Prelude (fromField', HasConnectionPool, HasConfig, CmdM')
......@@ -268,6 +269,43 @@ instance Arbitrary NgramsTable where
instance ToSchema NgramsTable
------------------------------------------------------------------------
-- Searching in a Ngram Table
data OrderBy = TermAsc | TermDesc | ScoreAsc | ScoreDesc
deriving (Generic, Enum, Bounded, Read, Show)
instance FromHttpApiData OrderBy
where
parseUrlPiece "TermAsc" = pure TermAsc
parseUrlPiece "TermDesc" = pure TermDesc
parseUrlPiece "ScoreAsc" = pure ScoreAsc
parseUrlPiece "ScoreDesc" = pure ScoreDesc
parseUrlPiece _ = Left "Unexpected value of OrderBy"
instance ToHttpApiData OrderBy where
toUrlPiece = pack . show
instance ToParamSchema OrderBy
instance FromJSON OrderBy
instance ToJSON OrderBy
instance ToSchema OrderBy
instance Arbitrary OrderBy
where
arbitrary = elements [minBound..maxBound]
-- | A query on a 'NgramsTable'.
data NgramsSearchQuery = NgramsSearchQuery
{ _nsq_limit :: !Limit
, _nsq_offset :: !(Maybe Offset)
, _nsq_listType :: !(Maybe ListType)
, _nsq_minSize :: !(Maybe MinSize)
, _nsq_maxSize :: !(Maybe MaxSize)
, _nsq_orderBy :: !(Maybe OrderBy)
, _nsq_searchQuery :: !(NgramsTerm -> Bool)
}
------------------------------------------------------------------------
type NgramsTableMap = Map NgramsTerm NgramsRepoElement
------------------------------------------------------------------------
......
......@@ -28,3 +28,12 @@ newtype Offset = Offset { getOffset :: Int }
, Servant.FromHttpApiData, Servant.ToHttpApiData
, Swagger.ToParamSchema, Swagger.ToSchema)
type IsTrash = Bool
newtype MinSize = MinSize { getMinSize :: Int }
deriving newtype ( Show, Eq, Num
, Servant.FromHttpApiData, Servant.ToHttpApiData
, Swagger.ToParamSchema, Swagger.ToSchema)
newtype MaxSize = MaxSize { getMaxSize :: Int }
deriving newtype ( Show, Eq, Num
, Servant.FromHttpApiData, Servant.ToHttpApiData
, Swagger.ToParamSchema, Swagger.ToSchema)
This source diff could not be displayed because it is too large. You can view the blob instead.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment