Verified Commit 7d3afcff authored by Przemyslaw Kaminski's avatar Przemyslaw Kaminski

Merge branch 'dev' into 169-dev-revert-fix

parents 1d7b37fe b71d4a5f
Pipeline #5747 passed with stages
in 105 minutes and 9 seconds
use_nix use_nix
#use_flake
export LANG=C.UTF-8 export LANG=C.UTF-8
## Version 0.0.6.9.9.9.6.6 [RELEASE CANDIDATE 007]
* [BACK][FIX][[Terms] Importing JSON or CSV seems to add new terms to the old ones, rather than overwriting and replacing them all (#313)](https://gitlab.iscpif.fr/gargantext/haskell-gargantext/issues/313)
* [BACK][FIX][Coherent Stemming interface (#324)](https://gitlab.iscpif.fr/gargantext/haskell-gargantext/issues/324)
* [FRONT][RELATED][[Node phylo] Phylomemy displays terms with broken accented words (#632)](https://gitlab.iscpif.fr/gargantext/purescript-gargantext/issues/632)
## Version 0.0.6.9.9.9.6.5.1 [RELEASE CANDIDATE 007] ## Version 0.0.6.9.9.9.6.5.1 [RELEASE CANDIDATE 007]
* [FRONT][FIX][Machting Documents are not displayed anymore in graph (#636)](https://gitlab.iscpif.fr/gargantext/purescript-gargantext/issues/636) * [FRONT][FIX][Machting Documents are not displayed anymore in graph (#636)](https://gitlab.iscpif.fr/gargantext/purescript-gargantext/issues/636)
......
...@@ -18,7 +18,7 @@ fi ...@@ -18,7 +18,7 @@ fi
# with the `sha256sum` result calculated on the `cabal.project` and # with the `sha256sum` result calculated on the `cabal.project` and
# `cabal.project.freeze`. This ensures the files stay deterministic so that CI # `cabal.project.freeze`. This ensures the files stay deterministic so that CI
# cache can kick in. # cache can kick in.
expected_cabal_project_hash="c9fe39301e8b60bfd183e60e7e25a14cd1c9c66d8739bf9041ca3f4db89db7c6" expected_cabal_project_hash="1cbb47fd3f929a01b3b968cc2e148dcbf5ef4e662e14ed9832d32471a68f6766"
expected_cabal_project_freeze_hash="2c8960ffcf1b94aa11a3543e3b5facd2db5af19569fecaec4bc0ab4c1edd22a5" expected_cabal_project_freeze_hash="2c8960ffcf1b94aa11a3543e3b5facd2db5af19569fecaec4bc0ab4c1edd22a5"
cabal --store-dir=$STORE_DIR v2-build --dry-run cabal --store-dir=$STORE_DIR v2-build --dry-run
......
...@@ -121,7 +121,7 @@ source-repository-package ...@@ -121,7 +121,7 @@ source-repository-package
source-repository-package source-repository-package
type: git type: git
location: https://gitlab.iscpif.fr/gargantext/crawlers/openalex.git location: https://gitlab.iscpif.fr/gargantext/crawlers/openalex.git
tag: cd179f6dda15d77a085c0176284c921b7bc50c46 tag: ceb8f2cebd4890b6d9d151ab01ee14e925bc0499
source-repository-package source-repository-package
type: git type: git
......
...@@ -5,7 +5,7 @@ cabal-version: 3.4 ...@@ -5,7 +5,7 @@ cabal-version: 3.4
-- see: https://github.com/sol/hpack -- see: https://github.com/sol/hpack
name: gargantext name: gargantext
version: 0.0.6.9.9.9.6.5.1 version: 0.0.6.9.9.9.6.6
synopsis: Search, map, share synopsis: Search, map, share
description: Please see README.md description: Please see README.md
category: Data category: Data
...@@ -158,8 +158,10 @@ library ...@@ -158,8 +158,10 @@ library
Gargantext.Core.Text.Terms Gargantext.Core.Text.Terms
Gargantext.Core.Text.Terms.Eleve Gargantext.Core.Text.Terms.Eleve
Gargantext.Core.Text.Terms.Mono Gargantext.Core.Text.Terms.Mono
Gargantext.Core.Text.Terms.Mono.Stem.En Gargantext.Core.Text.Terms.Mono.Stem
Gargantext.Core.Text.Terms.Mono.Stem.Lancaster Gargantext.Core.Text.Terms.Mono.Stem.Internal.GargPorter
Gargantext.Core.Text.Terms.Mono.Stem.Internal.Lancaster
Gargantext.Core.Text.Terms.Mono.Stem.Internal.Porter
Gargantext.Core.Text.Terms.Multi Gargantext.Core.Text.Terms.Multi
Gargantext.Core.Text.Terms.Multi.Lang.En Gargantext.Core.Text.Terms.Multi.Lang.En
Gargantext.Core.Text.Terms.Multi.Lang.Fr Gargantext.Core.Text.Terms.Multi.Lang.Fr
...@@ -327,7 +329,6 @@ library ...@@ -327,7 +329,6 @@ library
Gargantext.Core.Text.Samples.FR Gargantext.Core.Text.Samples.FR
Gargantext.Core.Text.Samples.PL Gargantext.Core.Text.Samples.PL
Gargantext.Core.Text.Samples.ZH Gargantext.Core.Text.Samples.ZH
Gargantext.Core.Text.Terms.Mono.Stem
Gargantext.Core.Text.Terms.Mono.Token Gargantext.Core.Text.Terms.Mono.Token
Gargantext.Core.Text.Terms.Mono.Token.En Gargantext.Core.Text.Terms.Mono.Token.En
Gargantext.Core.Text.Terms.Multi.Group Gargantext.Core.Text.Terms.Multi.Group
...@@ -840,7 +841,9 @@ test-suite garg-test-tasty ...@@ -840,7 +841,9 @@ test-suite garg-test-tasty
type: exitcode-stdio-1.0 type: exitcode-stdio-1.0
main-is: drivers/tasty/Main.hs main-is: drivers/tasty/Main.hs
other-modules: other-modules:
Test.API.Routes
Test.API.Setup Test.API.Setup
Test.Core.Similarity
Test.Core.Text Test.Core.Text
Test.Core.Text.Corpus.Query Test.Core.Text.Corpus.Query
Test.Core.Text.Examples Test.Core.Text.Examples
...@@ -868,6 +871,7 @@ test-suite garg-test-tasty ...@@ -868,6 +871,7 @@ test-suite garg-test-tasty
Test.Parsers.Date Test.Parsers.Date
Test.Parsers.Types Test.Parsers.Types
Test.Parsers.WOS Test.Parsers.WOS
Test.Types
Test.Utils Test.Utils
Test.Utils.Crypto Test.Utils.Crypto
Test.Utils.Jobs Test.Utils.Jobs
...@@ -920,6 +924,7 @@ test-suite garg-test-tasty ...@@ -920,6 +924,7 @@ test-suite garg-test-tasty
, servant-auth , servant-auth
, servant-auth-client , servant-auth-client
, servant-client , servant-client
, servant-client-core
, servant-job , servant-job
, servant-server , servant-server
, shelly >= 1.9 && < 2 , shelly >= 1.9 && < 2
...@@ -952,6 +957,7 @@ test-suite garg-test-hspec ...@@ -952,6 +957,7 @@ test-suite garg-test-hspec
Test.API.Errors Test.API.Errors
Test.API.GraphQL Test.API.GraphQL
Test.API.Private Test.API.Private
Test.API.Routes
Test.API.Setup Test.API.Setup
Test.API.UpdateList Test.API.UpdateList
Test.Database.Operations Test.Database.Operations
...@@ -960,6 +966,7 @@ test-suite garg-test-hspec ...@@ -960,6 +966,7 @@ test-suite garg-test-hspec
Test.Database.Setup Test.Database.Setup
Test.Database.Types Test.Database.Types
Test.Utils Test.Utils
Test.Types
Paths_gargantext Paths_gargantext
hs-source-dirs: hs-source-dirs:
test test
...@@ -1007,6 +1014,7 @@ test-suite garg-test-hspec ...@@ -1007,6 +1014,7 @@ test-suite garg-test-hspec
, servant-auth , servant-auth
, servant-auth-client , servant-auth-client
, servant-client , servant-client
, servant-client-core
, servant-job , servant-job
, servant-server , servant-server
, shelly >= 1.9 && < 2 , shelly >= 1.9 && < 2
......
...@@ -12,20 +12,23 @@ Portability : POSIX ...@@ -12,20 +12,23 @@ Portability : POSIX
-- Use only for dev/repl -- Use only for dev/repl
module Gargantext.API.Dev where module Gargantext.API.Dev where
import Control.Lens (view)
import Control.Monad (fail) import Control.Monad (fail)
import Gargantext.API.Admin.EnvTypes import Data.Pool (withResource)
import Gargantext.API.Admin.Settings import Database.PostgreSQL.Simple qualified as PGS
import Gargantext.API.Errors.Types import Gargantext.API.Admin.EnvTypes ( DevEnv(..), Mode(Dev) )
import Gargantext.API.Prelude import Gargantext.API.Admin.Settings ( devJwkFile, devSettings, newPool )
import Gargantext.API.Errors.Types ( BackendInternalError )
import Gargantext.API.Prelude ( GargM )
import Gargantext.Core.NLP (nlpServerMap) import Gargantext.Core.NLP (nlpServerMap)
import Gargantext.Core.NodeStory import Gargantext.Core.NodeStory (fromDBNodeStoryEnv)
import Gargantext.Database.Prelude (Cmd', Cmd'', databaseParameters, runCmd) import Gargantext.Database.Prelude (Cmd', Cmd'', connPool, databaseParameters, runCmd)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Prelude.Config (readConfig) import Gargantext.Prelude.Config (readConfig)
import Gargantext.Prelude.Mail qualified as Mail import Gargantext.Prelude.Mail qualified as Mail
import Gargantext.Prelude.NLP qualified as NLP import Gargantext.Prelude.NLP qualified as NLP
import Gargantext.System.Logging import Gargantext.System.Logging ( withLoggerHoisted )
import Servant import Servant ( ServerError )
type IniPath = FilePath type IniPath = FilePath
------------------------------------------------------------------- -------------------------------------------------------------------
...@@ -67,7 +70,7 @@ runCmdReplServantErr = runCmdRepl ...@@ -67,7 +70,7 @@ runCmdReplServantErr = runCmdRepl
-- using HasConnectionPool and HasRepoVar. -- using HasConnectionPool and HasRepoVar.
runCmdDev :: Show err => DevEnv -> Cmd'' DevEnv err a -> IO a runCmdDev :: Show err => DevEnv -> Cmd'' DevEnv err a -> IO a
runCmdDev env f = runCmdDev env f =
(either (fail . show) pure =<< runCmd env f) either (fail . show) pure =<< runCmd env f
runCmdGargDev :: DevEnv -> GargM DevEnv BackendInternalError a -> IO a runCmdGargDev :: DevEnv -> GargM DevEnv BackendInternalError a -> IO a
runCmdGargDev env cmd = runCmdGargDev env cmd =
...@@ -81,3 +84,9 @@ runCmdDevServantErr = runCmdDev ...@@ -81,3 +84,9 @@ runCmdDevServantErr = runCmdDev
runCmdReplEasy :: Cmd'' DevEnv BackendInternalError a -> IO a runCmdReplEasy :: Cmd'' DevEnv BackendInternalError a -> IO a
runCmdReplEasy f = withDevEnv "gargantext.ini" $ \env -> runCmdDev env f runCmdReplEasy f = withDevEnv "gargantext.ini" $ \env -> runCmdDev env f
-- | Execute a function that takes PSQL.Connection from the DB pool as
-- first parameter.
-- e.g.: runCmdReplEasyDB $ \c -> getNodeStory' c
runCmdReplEasyDB :: (PGS.Connection -> IO a) -> IO a
runCmdReplEasyDB f = runCmdReplEasy $ view connPool >>= (\p -> liftBase $ withResource p f)
...@@ -25,6 +25,7 @@ add get ...@@ -25,6 +25,7 @@ add get
{-# LANGUAGE IncoherentInstances #-} {-# LANGUAGE IncoherentInstances #-}
module Gargantext.API.Ngrams module Gargantext.API.Ngrams
( TableNgramsApi ( TableNgramsApi
, TableNgramsApiGet , TableNgramsApiGet
...@@ -86,38 +87,35 @@ module Gargantext.API.Ngrams ...@@ -86,38 +87,35 @@ module Gargantext.API.Ngrams
) )
where where
import Control.Lens ((.~), view, (^.), (^..), (+~), (%~), (.~), msumOf, at, _Just, Each(..), (%%~), mapped, ifolded, to, withIndex, over) import Control.Lens ((.~), view, (^.), (^..), (+~), (%~), (.~), msumOf, at, ix, _Just, Each(..), (%%~), ifolded, to, withIndex, over)
import Control.Monad.Reader
import Data.Aeson.Text qualified as DAT import Data.Aeson.Text qualified as DAT
import Data.Foldable
import Data.List qualified as List import Data.List qualified as List
import Data.Map.Strict qualified as Map import Data.Map.Strict qualified as Map
import Data.Map.Strict.Patch qualified as PM import Data.Map.Strict.Patch qualified as PM
import Data.Monoid
import Data.Patch.Class (Action(act), Transformable(..), ours) import Data.Patch.Class (Action(act), Transformable(..), ours)
import Data.Set qualified as Set import Data.Set qualified as Set
import Data.Text (isInfixOf, toLower, unpack) import Data.Text (isInfixOf, toLower, unpack)
import Data.Text.Lazy.IO as DTL import Data.Text.Lazy.IO as DTL ( writeFile )
import Formatting (hprint, int, (%)) import Formatting (hprint, int, (%))
import Gargantext.API.Admin.EnvTypes (Env, GargJob(..)) import Gargantext.API.Admin.EnvTypes (Env, GargJob(..))
import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs) import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
import Gargantext.API.Admin.Types (HasSettings) import Gargantext.API.Admin.Types (HasSettings)
import Gargantext.API.Errors.Types import Gargantext.API.Errors.Types ( BackendInternalError )
import Gargantext.API.Metrics qualified as Metrics import Gargantext.API.Metrics qualified as Metrics
import Gargantext.API.Ngrams.Tools (getNodeStory) import Gargantext.API.Ngrams.Tools (getNodeStory)
import Gargantext.API.Ngrams.Types import Gargantext.API.Ngrams.Types
import Gargantext.API.Prelude import Gargantext.API.Prelude ( GargM )
import Gargantext.Core.NodeStory (ArchiveList, HasNodeStory, HasNodeArchiveStoryImmediateSaver(..), HasNodeStoryImmediateSaver(..), NgramsStatePatch', a_history, a_state, a_version, currentVersion) import Gargantext.Core.NodeStory (ArchiveList, HasNodeStory, HasNodeArchiveStoryImmediateSaver(..), HasNodeStoryImmediateSaver(..), NgramsStatePatch', a_history, a_state, a_version, currentVersion)
import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, TODO, assertValid, HasValidationError, ContextId) import Gargantext.Core.Types (ListType(..), NodeId, ListId, DocId, TODO, assertValid, HasValidationError, ContextId)
import Gargantext.Core.Types.Query (Limit(..), Offset(..), MinSize(..), MaxSize(..)) import Gargantext.Core.Types.Query (Limit(..), Offset(..), MinSize(..), MaxSize(..))
import Gargantext.Database.Action.Metrics.NgramsByContext (getOccByNgramsOnlyFast) import Gargantext.Database.Action.Metrics.NgramsByContext (getOccByNgramsOnlyFast)
import Gargantext.Database.Admin.Config (userMaster) import Gargantext.Database.Admin.Config (userMaster)
import Gargantext.Database.Admin.Types.Node (NodeType(..)) import Gargantext.Database.Admin.Types.Node (NodeType(..))
import Gargantext.Database.Query.Table.Ngrams hiding (NgramsType(..), ngramsType, ngrams_terms) import Gargantext.Database.Query.Table.Ngrams ( text2ngrams, Ngrams, insertNgrams, selectNgramsByDoc )
import Gargantext.Database.Query.Table.Ngrams qualified as TableNgrams import Gargantext.Database.Schema.Ngrams qualified as TableNgrams
import Gargantext.Database.Query.Table.Node (getNode) import Gargantext.Database.Query.Table.Node (getNode)
import Gargantext.Database.Query.Table.Node.Error (HasNodeError) import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
import Gargantext.Database.Query.Table.Node.Select import Gargantext.Database.Query.Table.Node.Select ( selectNodesWithUsername )
import Gargantext.Database.Schema.Node (node_id, node_parent_id, node_user_id) import Gargantext.Database.Schema.Node (node_id, node_parent_id, node_user_id)
import Gargantext.Prelude hiding (log, to, toLower, (%), isInfixOf) import Gargantext.Prelude hiding (log, to, toLower, (%), isInfixOf)
import Gargantext.Prelude.Clock (hasTime, getTime) import Gargantext.Prelude.Clock (hasTime, getTime)
...@@ -370,7 +368,7 @@ tableNgramsPull listId ngramsType p_version = do ...@@ -370,7 +368,7 @@ tableNgramsPull listId ngramsType p_version = do
let let
-- a = r ^. unNodeStory . at listId . non initArchive -- a = r ^. unNodeStory . at listId . non initArchive
q = mconcat $ take (a ^. a_version - p_version) (a ^. a_history) q = mconcat $ take (a ^. a_version - p_version) (a ^. a_history)
q_table = q ^. _PatchMap . at ngramsType . _Just q_table = q ^. _PatchMap . ix ngramsType
pure (Versioned (a ^. a_version) q_table) pure (Versioned (a ^. a_version) q_table)
...@@ -404,7 +402,7 @@ tableNgramsPut tabType listId (Versioned p_version p_table) ...@@ -404,7 +402,7 @@ tableNgramsPut tabType listId (Versioned p_version p_table)
assertValid p_validity assertValid p_validity
ret <- commitStatePatch listId (Versioned p_version p) ret <- commitStatePatch listId (Versioned p_version p)
<&> v_data %~ (view (_PatchMap . at ngramsType . _Just)) <&> v_data %~ view (_PatchMap . ix ngramsType)
pure ret pure ret
...@@ -474,7 +472,7 @@ tableNgramsPostChartsAsync utn jobHandle = do ...@@ -474,7 +472,7 @@ tableNgramsPostChartsAsync utn jobHandle = do
_ <- Metrics.updateTree cId (Just listId) tabType MapTerm _ <- Metrics.updateTree cId (Just listId) tabType MapTerm
-} -}
markComplete jobHandle markComplete jobHandle
_ -> do _otherTabType -> do
-- printDebug "[tableNgramsPostChartsAsync] no update for tabType = " tabType -- printDebug "[tableNgramsPostChartsAsync] no update for tabType = " tabType
markStarted 1 jobHandle markStarted 1 jobHandle
markFailed Nothing jobHandle markFailed Nothing jobHandle
...@@ -494,7 +492,7 @@ getNgramsTableMap :: HasNodeStory env err m ...@@ -494,7 +492,7 @@ getNgramsTableMap :: HasNodeStory env err m
getNgramsTableMap nodeId ngramsType = do getNgramsTableMap nodeId ngramsType = do
a <- getNodeStory nodeId a <- getNodeStory nodeId
pure $ Versioned (a ^. a_version) pure $ Versioned (a ^. a_version)
(a ^. a_state . at ngramsType . _Just) (a ^. a_state . ix ngramsType)
dumpJsonTableMap :: HasNodeStory env err m dumpJsonTableMap :: HasNodeStory env err m
...@@ -551,8 +549,8 @@ searchTableNgrams versionedTableMap NgramsSearchQuery{..} = ...@@ -551,8 +549,8 @@ searchTableNgrams versionedTableMap NgramsSearchQuery{..} =
matchingNode inputNode = matchingNode inputNode =
let nodeSize = inputNode ^. ne_size let nodeSize = inputNode ^. ne_size
matchesListType = maybe (const True) (==) _nsq_listType matchesListType = maybe (const True) (==) _nsq_listType
respectsMinSize = maybe (const True) (<=) (getMinSize <$> _nsq_minSize) respectsMinSize = maybe (const True) ((<=) . getMinSize) _nsq_minSize
respectsMaxSize = maybe (const True) (>=) (getMaxSize <$> _nsq_maxSize) respectsMaxSize = maybe (const True) ((>=) . getMaxSize) _nsq_maxSize
in respectsMinSize nodeSize in respectsMinSize nodeSize
&& respectsMaxSize nodeSize && respectsMaxSize nodeSize
...@@ -623,7 +621,7 @@ getNgramsTable' :: forall env err m. ...@@ -623,7 +621,7 @@ getNgramsTable' :: forall env err m.
-> m (Versioned (Map.Map NgramsTerm NgramsElement)) -> m (Versioned (Map.Map NgramsTerm NgramsElement))
getNgramsTable' nId listId ngramsType = do getNgramsTable' nId listId ngramsType = do
tableMap <- getNgramsTableMap listId ngramsType tableMap <- getNgramsTableMap listId ngramsType
tableMap & v_data %%~ (setNgramsTableScores nId listId ngramsType) tableMap & v_data %%~ setNgramsTableScores nId listId ngramsType
. Map.mapWithKey ngramsElementFromRepo . Map.mapWithKey ngramsElementFromRepo
-- | Helper function to set scores on an `NgramsTable`. -- | Helper function to set scores on an `NgramsTable`.
...@@ -648,7 +646,7 @@ setNgramsTableScores nId listId ngramsType table = do ...@@ -648,7 +646,7 @@ setNgramsTableScores nId listId ngramsType table = do
("getTableNgrams/setScores #ngrams=" % int % " time=" % hasTime % "\n") ("getTableNgrams/setScores #ngrams=" % int % " time=" % hasTime % "\n")
(length ngrams_terms) t1 t2 (length ngrams_terms) t1 t2
let let
setOcc ne = ne & ne_occurrences .~ Set.fromList (msumOf (at (ne ^. ne_ngrams) . _Just) occurrences) setOcc ne = ne & ne_occurrences .~ Set.fromList (msumOf (ix (ne ^. ne_ngrams)) occurrences)
--printDebug "[setNgramsTableScores] with occurences" $ table & each %~ setOcc --printDebug "[setNgramsTableScores] with occurences" $ table & each %~ setOcc
...@@ -734,7 +732,7 @@ getTableNgramsCorpus :: ( HasNodeStory env err m ...@@ -734,7 +732,7 @@ getTableNgramsCorpus :: ( HasNodeStory env err m
getTableNgramsCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy mt = getTableNgramsCorpus nId tabType listId limit_ offset listType minSize maxSize orderBy mt =
getTableNgrams nId listId tabType searchQuery getTableNgrams nId listId tabType searchQuery
where where
searchQueryFn (NgramsTerm nt) = maybe (const True) isInfixOf (toLower <$> mt) (toLower nt) searchQueryFn (NgramsTerm nt) = maybe (const True) (isInfixOf . toLower) mt (toLower nt)
searchQuery = NgramsSearchQuery { searchQuery = NgramsSearchQuery {
_nsq_limit = limit_ _nsq_limit = limit_
, _nsq_offset = offset , _nsq_offset = offset
...@@ -778,7 +776,7 @@ getTableNgramsDoc dId tabType listId limit_ offset listType minSize maxSize orde ...@@ -778,7 +776,7 @@ getTableNgramsDoc dId tabType listId limit_ offset listType minSize maxSize orde
ns <- selectNodesWithUsername NodeList userMaster ns <- selectNodesWithUsername NodeList userMaster
let ngramsType = ngramsTypeFromTabType tabType let ngramsType = ngramsTypeFromTabType tabType
ngs <- selectNgramsByDoc (ns <> [listId]) dId ngramsType ngs <- selectNgramsByDoc (ns <> [listId]) dId ngramsType
let searchQueryFn (NgramsTerm nt) = flip Set.member (Set.fromList ngs) nt let searchQueryFn (NgramsTerm nt) = Set.member nt (Set.fromList ngs)
searchQuery = NgramsSearchQuery { searchQuery = NgramsSearchQuery {
_nsq_limit = limit_ _nsq_limit = limit_
, _nsq_offset = offset , _nsq_offset = offset
...@@ -828,4 +826,4 @@ listNgramsChangedSince listId ngramsType version ...@@ -828,4 +826,4 @@ listNgramsChangedSince listId ngramsType version
| version < 0 = | version < 0 =
Versioned <$> currentVersion listId <*> pure True Versioned <$> currentVersion listId <*> pure True
| otherwise = | otherwise =
tableNgramsPull listId ngramsType version & mapped . v_data %~ (== mempty) tableNgramsPull listId ngramsType version <&> v_data %~ (== mempty)
...@@ -152,7 +152,7 @@ postAsyncJSON l ngramsList jobHandle = do ...@@ -152,7 +152,7 @@ postAsyncJSON l ngramsList jobHandle = do
setList :: HasNodeStory env err m => m () setList :: HasNodeStory env err m => m ()
setList = do setList = do
-- TODO check with Version for optim -- TODO check with Version for optim
mapM_ (\(nt, Versioned _v ns) -> (setListNgrams l nt ns)) $ toList ngramsList mapM_ (\(nt, Versioned _v ns) -> setListNgrams l nt ns) $ toList ngramsList
-- TODO reindex -- TODO reindex
......
...@@ -11,22 +11,23 @@ Portability : POSIX ...@@ -11,22 +11,23 @@ Portability : POSIX
{-# LANGUAGE TypeFamilies #-} {-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE TypeOperators #-} {-# LANGUAGE TypeOperators #-}
{-# OPTIONS_GHC -Wno-unrecognised-pragmas #-}
{-# HLINT ignore "Use infix" #-}
module Gargantext.API.Ngrams.Tools module Gargantext.API.Ngrams.Tools
where where
-- import Gargantext.Core.NodeStoryFile qualified as NSF -- import Gargantext.Core.NodeStoryFile qualified as NSF
import Control.Lens (_Just, (^.), at, view, At, Index, IxValue) import Control.Lens (_Just, (^.), at, ix, view, At, Index, IxValue)
import Control.Monad.Reader
import Data.HashMap.Strict (HashMap) import Data.HashMap.Strict (HashMap)
import Data.HashMap.Strict qualified as HM import Data.HashMap.Strict qualified as HM
import Data.Map.Strict qualified as Map import Data.Map.Strict qualified as Map
import Data.Set qualified as Set import Data.Set qualified as Set
import Data.Validity
-- import GHC.Conc (TVar, readTVar) -- import GHC.Conc (TVar, readTVar)
import Gargantext.API.Ngrams.Types import Gargantext.API.Ngrams.Types
import Gargantext.Core.NodeStory import Gargantext.Core.NodeStory
import Gargantext.Core.Types (ListType(..), NodeId, ListId) import Gargantext.Core.Types.Main ( ListType(..) )
import Gargantext.Database.Admin.Types.Node ( NodeId, ListId )
import Gargantext.Database.Schema.Ngrams (NgramsType) import Gargantext.Database.Schema.Ngrams (NgramsType)
import Gargantext.Prelude import Gargantext.Prelude
...@@ -93,7 +94,7 @@ listNgramsFromRepo nodeIds ngramsType repo = ...@@ -93,7 +94,7 @@ listNgramsFromRepo nodeIds ngramsType repo =
^. unNodeStory ^. unNodeStory
. at nodeId . _Just . at nodeId . _Just
. a_state . a_state
. at ngramsType . _Just . ix ngramsType
| nodeId <- nodeIds | nodeId <- nodeIds
] ]
...@@ -153,7 +154,7 @@ filterListWithRoot :: [ListType] ...@@ -153,7 +154,7 @@ filterListWithRoot :: [ListType]
filterListWithRoot lt m = snd <$> HM.filter isMapTerm m filterListWithRoot lt m = snd <$> HM.filter isMapTerm m
where where
isMapTerm (l, maybeRoot) = case maybeRoot of isMapTerm (l, maybeRoot) = case maybeRoot of
Nothing -> elem l lt Nothing -> l `elem` lt
Just r -> case HM.lookup r m of Just r -> case HM.lookup r m of
Nothing -> panicTrace $ "[Garg.API.Ngrams.Tools] filterListWithRoot, unknown key: " <> unNgramsTerm r Nothing -> panicTrace $ "[Garg.API.Ngrams.Tools] filterListWithRoot, unknown key: " <> unNgramsTerm r
Just (l',_) -> elem l' lt Just (l',_) -> elem l' lt
...@@ -175,7 +176,7 @@ groupNodesByNgrams syn occs = HM.fromListWith (<>) occs' ...@@ -175,7 +176,7 @@ groupNodesByNgrams syn occs = HM.fromListWith (<>) occs'
Nothing -> (t, ns) Nothing -> (t, ns)
Just r' -> (r',ns) Just r' -> (r',ns)
data Diagonal = Diagonal Bool newtype Diagonal = Diagonal Bool
getCoocByNgrams :: Diagonal getCoocByNgrams :: Diagonal
-> HashMap NgramsTerm (Set NodeId) -> HashMap NgramsTerm (Set NodeId)
......
...@@ -25,9 +25,10 @@ import Data.Aeson.TH (deriveJSON) ...@@ -25,9 +25,10 @@ import Data.Aeson.TH (deriveJSON)
import Data.Csv (defaultEncodeOptions, encodeByNameWith, header, namedRecord, EncodeOptions(..), NamedRecord, Quoting(QuoteNone)) import Data.Csv (defaultEncodeOptions, encodeByNameWith, header, namedRecord, EncodeOptions(..), NamedRecord, Quoting(QuoteNone))
import Data.Csv qualified as Csv import Data.Csv qualified as Csv
import Data.HashMap.Strict.InsOrd qualified as InsOrdHashMap import Data.HashMap.Strict.InsOrd qualified as InsOrdHashMap
import Data.List qualified as List
import Data.Map.Strict qualified as Map import Data.Map.Strict qualified as Map
import Data.Map.Strict.Patch qualified as PM import Data.Map.Strict.Patch qualified as PM
import Data.Patch.Class (Replace, replace, Action(act), Group, Applicable(..), Composable(..), Transformable(..), PairPatch(..), Patched, ConflictResolution, ConflictResolutionReplace, MaybePatch(Mod), unMod, old, new) import Data.Patch.Class (Replace(Keep), replace, Action(act), Group, Applicable(..), Composable(..), Transformable(..), PairPatch(..), Patched, ConflictResolution, ConflictResolutionReplace, MaybePatch(Mod), unMod, old, new)
import Data.Set qualified as Set import Data.Set qualified as Set
import Data.String (IsString(..)) import Data.String (IsString(..))
import Data.Swagger ( NamedSchema(NamedSchema), declareSchemaRef, genericDeclareNamedSchema, SwaggerType(SwaggerObject), ToParamSchema, ToSchema(..), HasProperties(properties), HasRequired(required), HasType(type_) ) import Data.Swagger ( NamedSchema(NamedSchema), declareSchemaRef, genericDeclareNamedSchema, SwaggerType(SwaggerObject), ToParamSchema, ToSchema(..), HasProperties(properties), HasRequired(required), HasType(type_) )
...@@ -848,3 +849,13 @@ instance Arbitrary NgramsRepoElement where ...@@ -848,3 +849,13 @@ instance Arbitrary NgramsRepoElement where
arbitrary = elements $ map ngramsElementToRepo ns arbitrary = elements $ map ngramsElementToRepo ns
where where
NgramsTable ns = mockTable NgramsTable ns = mockTable
toNgramsPatch :: [NgramsTerm] -> NgramsPatch
toNgramsPatch children = NgramsPatch children' Keep
where
children' :: PatchMSet NgramsTerm
children' = PatchMSet
$ fst
$ PM.fromList
$ List.zip children (List.cycle [addPatch])
...@@ -134,7 +134,7 @@ type NodeAPI a = PolicyChecked (NodeNodeAPI a) ...@@ -134,7 +134,7 @@ type NodeAPI a = PolicyChecked (NodeNodeAPI a)
:<|> "category" :> CatApi :<|> "category" :> CatApi
:<|> "score" :> ScoreApi :<|> "score" :> ScoreApi
:<|> "search" :> (Search.API Search.SearchResult) :<|> "search" :> Search.API Search.SearchResult
:<|> "share" :> Share.API :<|> "share" :> Share.API
-- Pairing utilities -- Pairing utilities
......
...@@ -12,9 +12,7 @@ Portability : POSIX ...@@ -12,9 +12,7 @@ Portability : POSIX
{-# OPTIONS_GHC -fno-warn-orphans #-} {-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE ConstraintKinds #-} {-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE KindSignatures #-}
{-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeFamilies #-} {-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE TypeOperators #-} {-# LANGUAGE TypeOperators #-}
...@@ -22,35 +20,34 @@ Portability : POSIX ...@@ -22,35 +20,34 @@ Portability : POSIX
module Gargantext.API.Node.Contact module Gargantext.API.Node.Contact
where where
import Conduit import Conduit ( yield )
import Data.Aeson import Data.Aeson
import Data.Either (Either(Right)) import Data.Either (Either(Right))
import Data.Maybe (Maybe(..)) import Data.Maybe (Maybe(..))
import Data.Swagger import Data.Swagger ( ToSchema )
import Data.Text (Text) import Data.Text (Text)
import GHC.Generics (Generic) import GHC.Generics (Generic)
import Servant import Gargantext.API.Admin.Auth.Types ( AuthenticatedUser(AuthenticatedUser) )
import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary
import Gargantext.API.Admin.Auth.Types
import Gargantext.API.Admin.EnvTypes (Env, GargJob(..)) import Gargantext.API.Admin.EnvTypes (Env, GargJob(..))
import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs) import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
import Gargantext.API.Admin.Types (HasSettings) import Gargantext.API.Admin.Types (HasSettings)
import Gargantext.API.Errors.Types import Gargantext.API.Errors.Types ( BackendInternalError )
import Gargantext.API.Node import Gargantext.API.Node ( nodeNodeAPI, NodeNodeAPI )
import Gargantext.API.Prelude (GargM, simuLogs) import Gargantext.API.Prelude (GargM, simuLogs)
import Gargantext.Core (Lang(..)) import Gargantext.Core (Lang(..))
import Gargantext.Core.Text.Terms (TermType(..)) import Gargantext.Core.Text.Terms (TermType(..))
import Gargantext.Core.Types.Individu (User(..)) import Gargantext.Core.Types.Individu (User(..))
import Gargantext.Database.Action.Flow (flow) import Gargantext.Database.Action.Flow (flow)
import Gargantext.Database.Action.Flow.Types (FlowCmdM) import Gargantext.Database.Action.Flow.Types (FlowCmdM)
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataAnnuaire(..), HyperdataContact) import Gargantext.Database.Admin.Types.Hyperdata.Contact ( HyperdataContact, hyperdataContact )
import Gargantext.Database.Admin.Types.Hyperdata.Contact (hyperdataContact) import Gargantext.Database.Admin.Types.Hyperdata.Corpus ( HyperdataAnnuaire(..) )
import Gargantext.Database.Admin.Types.Node import Gargantext.Database.Admin.Types.Node ( CorpusId, NodeId )
import Gargantext.Prelude (($), {-printDebug,-}) import Gargantext.Prelude (($), {-printDebug,-})
import Gargantext.Utils.Aeson qualified as GUA
import Gargantext.Utils.Jobs (serveJobsAPI, MonadJobStatus(..)) import Gargantext.Utils.Jobs (serveJobsAPI, MonadJobStatus(..))
import qualified Gargantext.Utils.Aeson as GUA import Servant
import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary ( Arbitrary(arbitrary) )
------------------------------------------------------------------------ ------------------------------------------------------------------------
type API = "contact" :> Summary "Contact endpoint" type API = "contact" :> Summary "Contact endpoint"
......
...@@ -52,7 +52,6 @@ import Network.HTTP.Client.TLS ...@@ -52,7 +52,6 @@ import Network.HTTP.Client.TLS
import Prelude qualified import Prelude qualified
langToSearx :: Lang -> Text langToSearx :: Lang -> Text
langToSearx All = "en-US"
langToSearx x = Text.toLower acronym <> "-" <> acronym langToSearx x = Text.toLower acronym <> "-" <> acronym
where where
acronym = show x acronym = show x
......
...@@ -16,36 +16,36 @@ Portability : POSIX ...@@ -16,36 +16,36 @@ Portability : POSIX
module Gargantext.API.Node.FrameCalcUpload where module Gargantext.API.Node.FrameCalcUpload where
import Control.Lens ((^.)) import Control.Lens ((^.))
import Data.Aeson import Data.Aeson ( FromJSON, ToJSON )
import Data.ByteString.Lazy qualified as BSL import Data.ByteString.Lazy qualified as BSL
import Data.ByteString.UTF8 qualified as BSU8 import Data.ByteString.UTF8 qualified as BSU8
import Data.Swagger import Data.Swagger ( ToSchema )
import Data.Text qualified as T import Data.Text qualified as T
import Network.HTTP.Client (newManager, httpLbs, parseRequest, responseBody) import Gargantext.API.Admin.Auth.Types ( auth_node_id, AuthenticatedUser )
import Network.HTTP.Client.TLS (tlsManagerSettings)
import Servant
import Web.FormUrlEncoded (FromForm)
import Gargantext.API.Admin.Auth.Types
import Gargantext.API.Admin.EnvTypes (GargJob(..), Env) import Gargantext.API.Admin.EnvTypes (GargJob(..), Env)
import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs) import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
import Gargantext.API.Errors.Types import Gargantext.API.Errors.Types ( BackendInternalError )
import Gargantext.API.Node.Corpus.New (addToCorpusWithForm) import Gargantext.API.Node.Corpus.New (addToCorpusWithForm)
import Gargantext.API.Node.Corpus.New.Types (FileFormat(..), FileType(..)) import Gargantext.API.Node.Corpus.New.Types (FileFormat(..), FileType(..))
import Gargantext.API.Node.Types (NewWithForm(..)) import Gargantext.API.Node.Types (NewWithForm(..))
import Gargantext.API.Prelude import Gargantext.API.Prelude ( GargM )
import Gargantext.Core (Lang) import Gargantext.Core (Lang)
import Gargantext.Core.NodeStory (HasNodeArchiveStoryImmediateSaver) import Gargantext.Core.NodeStory.Types ( HasNodeArchiveStoryImmediateSaver )
import Gargantext.Core.Text.List.Social (FlowSocialListWith(..)) import Gargantext.Core.Text.List.Social (FlowSocialListWith(..))
import Gargantext.Core.Types.Individu (User(..)) import Gargantext.Core.Types.Individu (User(..))
import Gargantext.Database.Action.Flow.Types import Gargantext.Database.Action.Flow.Types ( FlowCmdM )
import Gargantext.Database.Admin.Types.Hyperdata.Frame import Gargantext.Database.Admin.Types.Hyperdata.Frame ( HyperdataFrame(..) )
import Gargantext.Database.Admin.Types.Node import Gargantext.Database.Admin.Types.Node ( NodeId, NodeType(NodeCorpus) )
import Gargantext.Database.Prelude (HasConfig) import Gargantext.Database.Prelude (HasConfig)
import Gargantext.Database.Query.Table.Node (getClosestParentIdByType, getNodeWith) import Gargantext.Database.Query.Table.Node (getClosestParentIdByType, getNodeWith)
import Gargantext.Database.Schema.Node (node_hyperdata) import Gargantext.Database.Schema.Node (node_hyperdata)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Utils.Jobs (serveJobsAPI, MonadJobStatus(..)) import Gargantext.Utils.Jobs (serveJobsAPI, MonadJobStatus(..))
import Network.HTTP.Client (newManager, httpLbs, parseRequest, responseBody)
import Network.HTTP.Client.TLS (tlsManagerSettings)
import Servant ( type (:>), JSON, Summary, HasServer(ServerT) )
import Web.FormUrlEncoded (FromForm)
data FrameCalcUpload = FrameCalcUpload { data FrameCalcUpload = FrameCalcUpload {
_wf_lang :: !(Maybe Lang) _wf_lang :: !(Maybe Lang)
......
...@@ -46,8 +46,7 @@ import Prelude (userError) ...@@ -46,8 +46,7 @@ import Prelude (userError)
-- NOTE: Use international country codes -- NOTE: Use international country codes
-- https://en.wikipedia.org/wiki/List_of_ISO_3166_country_codes -- https://en.wikipedia.org/wiki/List_of_ISO_3166_country_codes
-- TODO This should be deprecated in favor of iso-639 library -- TODO This should be deprecated in favor of iso-639 library
data Lang = All data Lang = DE
| DE
| EL | EL
| EN | EN
| ES | ES
...@@ -58,7 +57,7 @@ data Lang = All ...@@ -58,7 +57,7 @@ data Lang = All
| RU | RU
| UK | UK
| ZH | ZH
deriving (Show, Eq, Ord, Enum, Bounded, Generic, GQLType) deriving (Read, Show, Eq, Ord, Enum, Bounded, Generic, GQLType)
-- | Defaults to 'EN' in all those places where a language is mandatory, -- | Defaults to 'EN' in all those places where a language is mandatory,
-- but an optional one has been passed. -- but an optional one has been passed.
...@@ -75,41 +74,30 @@ instance ToSchema Lang where ...@@ -75,41 +74,30 @@ instance ToSchema Lang where
declareNamedSchema = genericDeclareNamedSchemaUnrestricted defaultSchemaOptions declareNamedSchema = genericDeclareNamedSchemaUnrestricted defaultSchemaOptions
instance FromHttpApiData Lang instance FromHttpApiData Lang
where where
-- parseUrlPiece "All" = pure All -- parseUrlPiece is exactly the 'read' instance,
parseUrlPiece "DE" = pure DE -- if we are disciplined. Either way, this needs to
parseUrlPiece "EL" = pure EL -- be tested.
parseUrlPiece "EN" = pure EN parseUrlPiece fragment = case readMaybe fragment of
parseUrlPiece "ES" = pure ES Nothing -> Left $ "Unexpected value of Lang: " <> fragment
parseUrlPiece "FR" = pure FR Just lang -> Right lang
parseUrlPiece "IT" = pure IT
parseUrlPiece "PL" = pure PL
parseUrlPiece "PT" = pure PT
parseUrlPiece "RU" = pure RU
parseUrlPiece "UK" = pure UK
parseUrlPiece "ZH" = pure ZH
parseUrlPiece _ = Left "Unexpected value of Lang"
instance ToHttpApiData Lang where instance ToHttpApiData Lang where
toUrlPiece = pack . show toUrlPiece = pack . show
instance Hashable Lang instance Hashable Lang
instance Arbitrary Lang where instance Arbitrary Lang where
arbitrary = arbitraryBoundedEnum arbitrary = arbitraryBoundedEnum
toISO639 :: Lang -> Maybe ISO639.ISO639_1 toISO639 :: Lang -> ISO639.ISO639_1
toISO639 DE = Just ISO639.DE toISO639 DE = ISO639.DE
toISO639 EL = Just ISO639.EL toISO639 EL = ISO639.EL
toISO639 EN = Just ISO639.EN toISO639 EN = ISO639.EN
toISO639 ES = Just ISO639.ES toISO639 ES = ISO639.ES
toISO639 FR = Just ISO639.FR toISO639 FR = ISO639.FR
toISO639 IT = Just ISO639.IT toISO639 IT = ISO639.IT
toISO639 PL = Just ISO639.PL toISO639 PL = ISO639.PL
toISO639 PT = Just ISO639.PT toISO639 PT = ISO639.PT
toISO639 RU = Just ISO639.RU toISO639 RU = ISO639.RU
toISO639 UK = Just ISO639.UK toISO639 UK = ISO639.UK
toISO639 ZH = Just ISO639.ZH toISO639 ZH = ISO639.ZH
toISO639 All = Nothing
toISO639EN :: Lang -> ISO639.ISO639_1
toISO639EN l = fromMaybe ISO639.EN $ toISO639 l
iso639ToText :: ISO639.ISO639_1 -> Text iso639ToText :: ISO639.ISO639_1 -> Text
iso639ToText la = pack [a, b] iso639ToText la = pack [a, b]
...@@ -117,19 +105,18 @@ iso639ToText la = pack [a, b] ...@@ -117,19 +105,18 @@ iso639ToText la = pack [a, b]
(a, b) = ISO639.toChars la (a, b) = ISO639.toChars la
-- | https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes -- | https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes
toISO639Lang :: Lang -> Maybe Text toISO639Lang :: Lang -> Text
toISO639Lang All = Nothing toISO639Lang DE = "de"
toISO639Lang DE = Just "de" toISO639Lang EL = "el"
toISO639Lang EL = Just "el" toISO639Lang EN = "en"
toISO639Lang EN = Just "en" toISO639Lang ES = "es"
toISO639Lang ES = Just "es" toISO639Lang FR = "fr"
toISO639Lang FR = Just "fr" toISO639Lang IT = "it"
toISO639Lang IT = Just "it" toISO639Lang PL = "pl"
toISO639Lang PL = Just "pl" toISO639Lang PT = "pt"
toISO639Lang PT = Just "pt" toISO639Lang RU = "ru"
toISO639Lang RU = Just "ru" toISO639Lang UK = "uk"
toISO639Lang UK = Just "uk" toISO639Lang ZH = "zh"
toISO639Lang ZH = Just "zh"
allLangs :: [Lang] allLangs :: [Lang]
allLangs = [minBound .. maxBound] allLangs = [minBound .. maxBound]
...@@ -145,7 +132,6 @@ class HasDBid a where ...@@ -145,7 +132,6 @@ class HasDBid a where
-- once we add a new 'Lang'. -- once we add a new 'Lang'.
langIds :: Bimap Lang Int langIds :: Bimap Lang Int
langIds = Bimap.fromList $ allLangs <&> \lid -> case lid of langIds = Bimap.fromList $ allLangs <&> \lid -> case lid of
All -> (lid, 0)
DE -> (lid, 276) DE -> (lid, 276)
EL -> (lid, 300) EL -> (lid, 300)
EN -> (lid, 2) EN -> (lid, 2)
......
...@@ -43,7 +43,6 @@ TODO: ...@@ -43,7 +43,6 @@ TODO:
{-# LANGUAGE Arrows #-} {-# LANGUAGE Arrows #-}
{-# LANGUAGE ConstraintKinds #-} {-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE QuasiQuotes #-} {-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Core.NodeStory module Gargantext.Core.NodeStory
( module Gargantext.Core.NodeStory.Types ( module Gargantext.Core.NodeStory.Types
...@@ -54,17 +53,18 @@ module Gargantext.Core.NodeStory ...@@ -54,17 +53,18 @@ module Gargantext.Core.NodeStory
, fromDBNodeStoryEnv , fromDBNodeStoryEnv
, upsertNodeStories , upsertNodeStories
-- , getNodeStory -- , getNodeStory
, getNodeStory'
, nodeStoriesQuery , nodeStoriesQuery
, currentVersion , currentVersion
, archiveStateFromList , archiveStateFromList
, archiveStateToList , archiveStateToList
, fixNodeStoryVersions ) , fixNodeStoryVersions
, fixChildrenDuplicatedAsParents
, getParentsChildren )
where where
import Control.Lens ((^.), (.~), (%~), non, _Just, at, view) import Control.Lens ((^.), (.~), (%~), non, _Just, at, over, view)
import Control.Monad.Except
import Data.Map.Strict qualified as Map import Data.Map.Strict qualified as Map
import Data.Monoid
import Data.Pool (Pool, withResource) import Data.Pool (Pool, withResource)
import Data.Set qualified as Set import Data.Set qualified as Set
import Database.PostgreSQL.Simple qualified as PGS import Database.PostgreSQL.Simple qualified as PGS
...@@ -73,12 +73,12 @@ import Database.PostgreSQL.Simple.ToField qualified as PGS ...@@ -73,12 +73,12 @@ import Database.PostgreSQL.Simple.ToField qualified as PGS
import Gargantext.API.Ngrams.Types import Gargantext.API.Ngrams.Types
import Gargantext.Core.NodeStory.DB import Gargantext.Core.NodeStory.DB
import Gargantext.Core.NodeStory.Types import Gargantext.Core.NodeStory.Types
import Gargantext.Core.Types (ListId, NodeId(..)) import Gargantext.Database.Admin.Types.Node ( ListId, NodeId(..) )
import Gargantext.Database.Admin.Config () import Gargantext.Database.Admin.Config ()
import Gargantext.Database.Prelude (HasConnectionPool(..)) import Gargantext.Database.Prelude (HasConnectionPool(..))
import Gargantext.Database.Query.Table.Ngrams qualified as TableNgrams import Gargantext.Database.Query.Table.Ngrams qualified as TableNgrams
import Gargantext.Prelude hiding (to) import Gargantext.Prelude hiding (to)
import Gargantext.Prelude.Database import Gargantext.Prelude.Database ( runPGSAdvisoryXactLock, runPGSExecute, runPGSQuery )
getNodeStory' :: PGS.Connection -> NodeId -> IO ArchiveList getNodeStory' :: PGS.Connection -> NodeId -> IO ArchiveList
...@@ -105,7 +105,7 @@ getNodeStory' c nId = do ...@@ -105,7 +105,7 @@ getNodeStory' c nId = do
pure () pure ()
-} -}
pure $ foldl combine initArchive dbData pure $ foldl' combine initArchive dbData
where where
-- NOTE (<>) for Archive doesn't concatenate states, so we have to use `combine` -- NOTE (<>) for Archive doesn't concatenate states, so we have to use `combine`
combine a1 a2 = a1 & a_state %~ combineState (a2 ^. a_state) combine a1 a2 = a1 & a_state %~ combineState (a2 ^. a_state)
...@@ -221,15 +221,12 @@ nodeStoryInc c ns@(NodeStory nls) nId = do ...@@ -221,15 +221,12 @@ nodeStoryInc c ns@(NodeStory nls) nId = do
-- `nre_parent` and `nre_children`. We want to make sure that all -- `nre_parent` and `nre_children`. We want to make sure that all
-- children entries (i.e. ones that have `nre_parent`) have the same -- children entries (i.e. ones that have `nre_parent`) have the same
-- `list` as their parent entry. -- `list` as their parent entry.
fixChildrenInNgramsStatePatch :: NgramsState' -> NgramsState' fixChildrenInNgrams :: NgramsState' -> NgramsState'
fixChildrenInNgramsStatePatch ns = archiveStateFromList $ nsParents <> nsChildrenFixed fixChildrenInNgrams ns = archiveStateFromList $ nsParents <> nsChildrenFixed
where where
nls = archiveStateToList ns (nsParents, nsChildren) = getParentsChildren ns
nsParents = filter (\(_nt, _t, nre) -> isNothing $ nre ^. nre_parent) nls
parentNtMap = Map.fromList $ (\(_nt, t, nre) -> (t, nre ^. nre_list)) <$> nsParents parentNtMap = Map.fromList $ (\(_nt, t, nre) -> (t, nre ^. nre_list)) <$> nsParents
nsChildren = filter (\(_nt, _t, nre) -> isJust $ nre ^. nre_parent) nls
nsChildrenFixed = (\(nt, t, nre) -> nsChildrenFixed = (\(nt, t, nre) ->
( nt ( nt
, t , t
...@@ -241,15 +238,12 @@ fixChildrenInNgramsStatePatch ns = archiveStateFromList $ nsParents <> nsChildre ...@@ -241,15 +238,12 @@ fixChildrenInNgramsStatePatch ns = archiveStateFromList $ nsParents <> nsChildre
-- | Sometimes, when we upload a new list, a child can be left without -- | Sometimes, when we upload a new list, a child can be left without
-- a parent. Find such ngrams and set their 'root' and 'parent' to -- a parent. Find such ngrams and set their 'root' and 'parent' to
-- 'Nothing'. -- 'Nothing'.
fixChildrenWithNoParentStatePatch :: NgramsState' -> NgramsState' fixChildrenWithNoParent :: NgramsState' -> NgramsState'
fixChildrenWithNoParentStatePatch ns = archiveStateFromList $ nsParents <> nsChildrenFixed fixChildrenWithNoParent ns = archiveStateFromList $ nsParents <> nsChildrenFixed
where where
nls = archiveStateToList ns (nsParents, nsChildren) = getParentsChildren ns
nsParents = filter (\(_nt, _t, nre) -> isNothing $ nre ^. nre_parent) nls
parentNtMap = Map.fromList $ (\(_nt, t, nre) -> (t, nre ^. nre_children & mSetToSet)) <$> nsParents parentNtMap = Map.fromList $ (\(_nt, t, nre) -> (t, nre ^. nre_children & mSetToSet)) <$> nsParents
nsChildren = filter (\(_nt, _t, nre) -> isJust $ nre ^. nre_parent) nls
nsChildrenFixFunc (nt, t, nre) = nsChildrenFixFunc (nt, t, nre) =
( nt ( nt
, t , t
...@@ -263,6 +257,30 @@ fixChildrenWithNoParentStatePatch ns = archiveStateFromList $ nsParents <> nsChi ...@@ -263,6 +257,30 @@ fixChildrenWithNoParentStatePatch ns = archiveStateFromList $ nsParents <> nsChi
nsChildrenFixed = nsChildrenFixFunc <$> nsChildren nsChildrenFixed = nsChildrenFixFunc <$> nsChildren
-- | Sometimes children can also become parents (e.g. #313). Find such
-- | children and remove them from the list.
fixChildrenDuplicatedAsParents :: NgramsState' -> NgramsState'
fixChildrenDuplicatedAsParents ns = archiveStateFromList $ nsChildren <> nsParentsFixed
where
(nsParents, nsChildren) = getParentsChildren ns
parentNtMap = Map.fromList $ (\(_nt, t, nre) -> (t, nre ^. nre_children & mSetToSet)) <$> nsParents
parentsSet = Set.fromList $ Map.keys parentNtMap
nsParentsFixed = (\(nt, t, nre) ->
( nt
, t
, over nre_children
(\c -> mSetFromSet $ Set.difference (mSetToSet c) parentsSet)
nre ) ) <$> nsParents
getParentsChildren :: NgramsState' -> (ArchiveStateList, ArchiveStateList)
getParentsChildren ns = (nsParents, nsChildren)
where
nls = archiveStateToList ns
nsParents = filter (\(_nt, _t, nre) -> isNothing $ nre ^. nre_parent) nls
nsChildren = filter (\(_nt, _t, nre) -> isJust $ nre ^. nre_parent) nls
------------------------------------ ------------------------------------
fromDBNodeStoryEnv :: Pool PGS.Connection -> IO NodeStoryEnv fromDBNodeStoryEnv :: Pool PGS.Connection -> IO NodeStoryEnv
...@@ -280,8 +298,15 @@ fromDBNodeStoryEnv pool = do ...@@ -280,8 +298,15 @@ fromDBNodeStoryEnv pool = do
withResource pool $ \c -> do withResource pool $ \c -> do
--printDebug "[mkNodeStorySaver] will call writeNodeStories, ns" ns --printDebug "[mkNodeStorySaver] will call writeNodeStories, ns" ns
-- writeNodeStories c $ fixChildrenWithNoParent $ fixChildrenTermTypes ns -- writeNodeStories c $ fixChildrenWithNoParent $ fixChildrenTermTypes ns
-- |NOTE Fixing a_state is kinda a hack. We shouldn't land
-- |with bad state in the first place.
upsertNodeStories c nId $ upsertNodeStories c nId $
a & a_state %~ (fixChildrenInNgramsStatePatch . fixChildrenWithNoParentStatePatch) a & a_state %~ (
fixChildrenDuplicatedAsParents
. fixChildrenInNgrams
. fixChildrenWithNoParent
)
let archive_saver_immediate nId a = withResource pool $ \c -> do let archive_saver_immediate nId a = withResource pool $ \c -> do
insertNodeArchiveHistory c nId (a ^. a_version) $ reverse $ a ^. a_history insertNodeArchiveHistory c nId (a ^. a_version) $ reverse $ a ^. a_history
pure $ a & a_history .~ [] pure $ a & a_history .~ []
...@@ -295,7 +320,7 @@ fromDBNodeStoryEnv pool = do ...@@ -295,7 +320,7 @@ fromDBNodeStoryEnv pool = do
, _nse_getter = \nId -> withResource pool $ \c -> , _nse_getter = \nId -> withResource pool $ \c ->
getNodeStory' c nId getNodeStory' c nId
, _nse_getter_multi = \nIds -> withResource pool $ \c -> , _nse_getter_multi = \nIds -> withResource pool $ \c ->
foldM (\m nId -> nodeStoryInc c m nId) (NodeStory Map.empty) nIds foldM (nodeStoryInc c) (NodeStory Map.empty) nIds
} }
currentVersion :: (HasNodeStory env err m) => ListId -> m Version currentVersion :: (HasNodeStory env err m) => ListId -> m Version
......
...@@ -47,23 +47,19 @@ module Gargantext.Core.NodeStory.Types ...@@ -47,23 +47,19 @@ module Gargantext.Core.NodeStory.Types
, ArchiveStateList ) , ArchiveStateList )
where where
import Codec.Serialise.Class import Codec.Serialise.Class ( Serialise )
import Control.Lens (makeLenses, Getter, (^.)) import Control.Lens (makeLenses, Getter, (^.))
import Control.Monad.Except
import Control.Monad.Reader
import Data.Aeson hiding ((.=), decode) import Data.Aeson hiding ((.=), decode)
import Data.Map.Strict qualified as Map import Data.Map.Strict qualified as Map
import Data.Monoid
import Data.Profunctor.Product.TH (makeAdaptorAndInstance) import Data.Profunctor.Product.TH (makeAdaptorAndInstance)
import Data.Semigroup
import Data.Set qualified as Set import Data.Set qualified as Set
import Database.PostgreSQL.Simple.FromField (FromField(fromField), fromJSONField) import Database.PostgreSQL.Simple.FromField (FromField(fromField), fromJSONField)
import Gargantext.API.Ngrams.Types import Gargantext.API.Ngrams.Types
import Gargantext.Core.Types (NodeId(..)) import Gargantext.Database.Admin.Types.Node ( NodeId(..) )
import Gargantext.Core.Utils.Prefix (unPrefix) import Gargantext.Core.Utils.Prefix (unPrefix)
import Gargantext.Database.Admin.Config () import Gargantext.Database.Admin.Config ()
import Gargantext.Database.Prelude (DbCmd') import Gargantext.Database.Prelude (DbCmd')
import Gargantext.Database.Query.Table.Ngrams qualified as TableNgrams import Gargantext.Database.Schema.Ngrams qualified as TableNgrams
import Gargantext.Database.Query.Table.Node.Error (HasNodeError()) import Gargantext.Database.Query.Table.Node.Error (HasNodeError())
import Gargantext.Prelude hiding (to) import Gargantext.Prelude hiding (to)
import Opaleye (DefaultFromField(..), SqlJsonb, fromPGSFromField) import Opaleye (DefaultFromField(..), SqlJsonb, fromPGSFromField)
......
...@@ -23,7 +23,7 @@ import Control.Monad.Except ...@@ -23,7 +23,7 @@ import Control.Monad.Except
import Data.Text qualified as T import Data.Text qualified as T
import EPO.API.Client.Types qualified as EPO import EPO.API.Client.Types qualified as EPO
import Gargantext.API.Admin.Orchestrator.Types (ExternalAPIs(..), externalAPIs) import Gargantext.API.Admin.Orchestrator.Types (ExternalAPIs(..), externalAPIs)
import Gargantext.Core (Lang(..), toISO639, toISO639EN) import Gargantext.Core (Lang(..), toISO639)
import Gargantext.Core.Text.Corpus.API.Arxiv qualified as Arxiv import Gargantext.Core.Text.Corpus.API.Arxiv qualified as Arxiv
import Gargantext.Core.Text.Corpus.API.EPO qualified as EPO import Gargantext.Core.Text.Corpus.API.EPO qualified as EPO
import Gargantext.Core.Text.Corpus.API.Hal qualified as HAL import Gargantext.Core.Text.Corpus.API.Hal qualified as HAL
...@@ -47,6 +47,9 @@ data GetCorpusError ...@@ -47,6 +47,9 @@ data GetCorpusError
-- | Get External API metadata main function -- | Get External API metadata main function
get :: ExternalAPIs get :: ExternalAPIs
-> Lang -> Lang
-- ^ A user-selected language in which documents needs to be retrieved.
-- If the provider doesn't support the search filtered by language, or if the language
-- is not important, the frontend will simply send 'EN' to the backend.
-> Corpus.RawQuery -> Corpus.RawQuery
-> Maybe PUBMED.APIKey -> Maybe PUBMED.APIKey
-> Maybe EPO.AuthKey -> Maybe EPO.AuthKey
...@@ -54,26 +57,26 @@ get :: ExternalAPIs ...@@ -54,26 +57,26 @@ get :: ExternalAPIs
-> Maybe Corpus.Limit -> Maybe Corpus.Limit
-- -> IO [HyperdataDocument] -- -> IO [HyperdataDocument]
-> IO (Either GetCorpusError (Maybe Integer, ConduitT () HyperdataDocument IO ())) -> IO (Either GetCorpusError (Maybe Integer, ConduitT () HyperdataDocument IO ()))
get externalAPI la q mPubmedAPIKey epoAuthKey epoAPIUrl limit = do get externalAPI lang q mPubmedAPIKey epoAuthKey epoAPIUrl limit = do
-- For PUBMED, HAL, IsTex, Isidore and OpenAlex, we want to send the query as-it. -- For PUBMED, HAL, IsTex, Isidore and OpenAlex, we want to send the query as-it.
-- For Arxiv we parse the query into a structured boolean query we submit over. -- For Arxiv we parse the query into a structured boolean query we submit over.
case externalAPI of case externalAPI of
PubMed -> PubMed ->
first ExternalAPIError <$> PUBMED.get (fromMaybe "" mPubmedAPIKey) q limit first ExternalAPIError <$> PUBMED.get (fromMaybe "" mPubmedAPIKey) q limit
OpenAlex -> OpenAlex ->
first ExternalAPIError <$> OpenAlex.get (fromMaybe "" Nothing {- email -}) q (toISO639 la) limit first ExternalAPIError <$> OpenAlex.get (fromMaybe "" Nothing {- email -}) q (Just $ toISO639 lang) limit
Arxiv -> runExceptT $ do Arxiv -> runExceptT $ do
corpusQuery <- ExceptT (pure parse_query) corpusQuery <- ExceptT (pure parse_query)
ExceptT $ fmap Right (Arxiv.get la corpusQuery limit) ExceptT $ fmap Right (Arxiv.get lang corpusQuery limit)
HAL -> HAL ->
first ExternalAPIError <$> HAL.getC (toISO639 la) (Corpus.getRawQuery q) (Corpus.getLimit <$> limit) first ExternalAPIError <$> HAL.getC (Just $ toISO639 lang) (Corpus.getRawQuery q) (Corpus.getLimit <$> limit)
IsTex -> do IsTex -> do
docs <- ISTEX.get la (Corpus.getRawQuery q) (Corpus.getLimit <$> limit) docs <- ISTEX.get lang (Corpus.getRawQuery q) (Corpus.getLimit <$> limit)
pure $ Right (Just $ fromIntegral $ length docs, yieldMany docs) pure $ Right (Just $ fromIntegral $ length docs, yieldMany docs)
Isidore -> do Isidore -> do
docs <- ISIDORE.get la (Corpus.getLimit <$> limit) (Just $ Corpus.getRawQuery q) Nothing docs <- ISIDORE.get lang (Corpus.getLimit <$> limit) (Just $ Corpus.getRawQuery q) Nothing
pure $ Right (Just $ fromIntegral $ length docs, yieldMany docs) pure $ Right (Just $ fromIntegral $ length docs, yieldMany docs)
EPO -> do EPO -> do
first ExternalAPIError <$> EPO.get epoAuthKey epoAPIUrl q (toISO639EN la) limit first ExternalAPIError <$> EPO.get epoAuthKey epoAPIUrl q (toISO639 lang) limit
where where
parse_query = first (InvalidInputQuery q . T.pack) $ Corpus.parseQuery q parse_query = first (InvalidInputQuery q . T.pack) $ Corpus.parseQuery q
...@@ -19,13 +19,11 @@ module Gargantext.Core.Text.Corpus.API.Arxiv ...@@ -19,13 +19,11 @@ module Gargantext.Core.Text.Corpus.API.Arxiv
) where ) where
import Arxiv qualified as Arxiv import Arxiv qualified as Arxiv
import Conduit ( ConduitT, (.|), mapC, takeC ) import Conduit
import Data.Text (unpack)
import Data.Text qualified as Text import Data.Text qualified as Text
import Gargantext.Core (Lang(..)) import Gargantext.Core (Lang(..))
import Gargantext.Core.Text.Corpus.Query as Corpus import Gargantext.Core.Text.Corpus.Query as Corpus
import Gargantext.Core.Types (Term(..)) import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..) )
import Gargantext.Prelude hiding (get) import Gargantext.Prelude hiding (get)
import Network.Api.Arxiv qualified as Ax import Network.Api.Arxiv qualified as Ax
...@@ -40,9 +38,12 @@ convertQuery q = mkQuery (interpretQuery q transformAST) ...@@ -40,9 +38,12 @@ convertQuery q = mkQuery (interpretQuery q transformAST)
, Ax.qStart = 0 , Ax.qStart = 0
, Ax.qItems = Arxiv.batchSize } , Ax.qItems = Arxiv.batchSize }
mergeTerms :: [QueryTerm] -> Maybe Ax.Expression
mergeTerms trms = Just $ Ax.Exp $ Ax.Abs [Text.unpack $ Text.unwords $ map renderQueryTerm trms]
-- Converts a 'BoolExpr' with 'Term's on the leaves into an Arxiv's expression. -- Converts a 'BoolExpr' with 'Term's on the leaves into an Arxiv's expression.
-- It yields 'Nothing' if the AST cannot be converted into a meaningful expression. -- It yields 'Nothing' if the AST cannot be converted into a meaningful expression.
transformAST :: BoolExpr Term -> Maybe Ax.Expression transformAST :: BoolExpr [QueryTerm] -> Maybe Ax.Expression
transformAST ast = case ast of transformAST ast = case ast of
BAnd sub (BConst (Negative term)) BAnd sub (BConst (Negative term))
-- The second term become positive, so that it can be translated. -- The second term become positive, so that it can be translated.
...@@ -64,11 +65,17 @@ convertQuery q = mkQuery (interpretQuery q transformAST) ...@@ -64,11 +65,17 @@ convertQuery q = mkQuery (interpretQuery q transformAST)
-- BTrue cannot happen is the query parser doesn't support parsing 'FALSE' alone. -- BTrue cannot happen is the query parser doesn't support parsing 'FALSE' alone.
BFalse BFalse
-> Nothing -> Nothing
BConst (Positive (Term term)) -- TODO(adinapoli) Apparently there is some fuzzy search going on under the hood
-> Just $ Ax.Exp $ Ax.Abs [unpack term] -- by Arxiv (see for example https://stackoverflow.com/questions/69003677/arxiv-api-problem-with-searching-for-two-keywords)
-- so it should be sufficient to search for the stemmed term. However, for simplicity and
-- backward compat, at the moment we don't stem.
BConst (Positive terms)
-> mergeTerms terms
-- We can handle negatives via `ANDNOT` with itself. -- We can handle negatives via `ANDNOT` with itself.
BConst (Negative (Term term)) -- TODO(adinapoli) Ditto as per the 'Positive' case (re partial matches)
-> Just $ Ax.AndNot (Ax.Exp $ Ax.Abs [unpack term]) (Ax.Exp $ Ax.Abs [unpack term]) BConst (Negative terms)
-> let term = Text.unpack $ Text.unwords (map renderQueryTerm terms)
in Just $ Ax.AndNot (Ax.Exp $ Ax.Abs [term]) (Ax.Exp $ Ax.Abs [term])
-- | TODO put default pubmed query in gargantext.ini -- | TODO put default pubmed query in gargantext.ini
-- by default: 10K docs -- by default: 10K docs
......
...@@ -11,7 +11,12 @@ Portability : POSIX ...@@ -11,7 +11,12 @@ Portability : POSIX
{-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE ScopedTypeVariables #-}
module Gargantext.Core.Text.Corpus.API.Isidore where module Gargantext.Core.Text.Corpus.API.Isidore (
get
-- * Internals (possibly unused?)
, isidore2csvFile
) where
import Data.Text qualified as Text import Data.Text qualified as Text
import Gargantext.Core (Lang(..)) import Gargantext.Core (Lang(..))
...@@ -26,10 +31,12 @@ import Isidore.Client ...@@ -26,10 +31,12 @@ import Isidore.Client
import Servant.Client ( ClientError(DecodeFailure) ) import Servant.Client ( ClientError(DecodeFailure) )
-- | TODO work with the ServantErr -- | TODO work with the ServantErr
get :: Lang -> Maybe Isidore.Limit get :: Lang
-> Maybe Isidore.TextQuery -> Maybe Isidore.AuthorQuery -> Maybe Isidore.Limit
-> Maybe Isidore.TextQuery
-> Maybe Isidore.AuthorQuery
-> IO [HyperdataDocument] -> IO [HyperdataDocument]
get la l q a = do get lang l q a = do
let let
printErr (DecodeFailure e _) = panicTrace e printErr (DecodeFailure e _) = panicTrace e
printErr e = panicTrace (show e) printErr e = panicTrace (show e)
...@@ -40,18 +47,18 @@ get la l q a = do ...@@ -40,18 +47,18 @@ get la l q a = do
iDocs <- either printErr _content <$> Isidore.get l q a iDocs <- either printErr _content <$> Isidore.get l q a
hDocs <- mapM (isidoreToDoc la) (toIsidoreDocs iDocs) hDocs <- mapM (isidoreToDoc lang) (toIsidoreDocs iDocs)
pure hDocs pure hDocs
isidore2csvFile :: FilePath -> Lang -> Maybe Isidore.Limit isidore2csvFile :: FilePath -> Lang -> Maybe Isidore.Limit
-> Maybe Isidore.TextQuery -> Maybe Isidore.AuthorQuery -> Maybe Isidore.TextQuery -> Maybe Isidore.AuthorQuery
-> IO () -> IO ()
isidore2csvFile fp la li tq aq = do isidore2csvFile fp lang li tq aq = do
hdocs <- get la li tq aq hdocs <- get lang li tq aq
writeDocs2Csv fp hdocs writeDocs2Csv fp hdocs
isidoreToDoc :: Lang -> IsidoreDoc -> IO HyperdataDocument isidoreToDoc :: Lang -> IsidoreDoc -> IO HyperdataDocument
isidoreToDoc l (IsidoreDoc t a d u s as) = do isidoreToDoc lang (IsidoreDoc t a d u s as) = do
let let
author :: Author -> Text author :: Author -> Text
author (Author fn ln) = _name fn <> ", " <> _name ln author (Author fn ln) = _name fn <> ", " <> _name ln
...@@ -86,5 +93,5 @@ isidoreToDoc l (IsidoreDoc t a d u s as) = do ...@@ -86,5 +93,5 @@ isidoreToDoc l (IsidoreDoc t a d u s as) = do
, _hd_publication_hour = Nothing , _hd_publication_hour = Nothing
, _hd_publication_minute = Nothing , _hd_publication_minute = Nothing
, _hd_publication_second = Nothing , _hd_publication_second = Nothing
, _hd_language_iso2 = Just $ (Text.pack . show) l , _hd_language_iso2 = Just . Text.pack . show $ lang
} }
{-# LANGUAGE LambdaCase #-}
{-| {-|
Module : Gargantext.Core.Text.Corpus.API.Istex Module : Gargantext.Core.Text.Corpus.API.Istex
Description : Pubmed API connection Description : Pubmed API connection
...@@ -11,6 +13,7 @@ Portability : POSIX ...@@ -11,6 +13,7 @@ Portability : POSIX
module Gargantext.Core.Text.Corpus.API.Istex module Gargantext.Core.Text.Corpus.API.Istex
( get )
where where
import Data.List qualified as List import Data.List qualified as List
...@@ -18,7 +21,7 @@ import Data.Text qualified as Text ...@@ -18,7 +21,7 @@ import Data.Text qualified as Text
import Gargantext.Core (Lang(..)) import Gargantext.Core (Lang(..))
import Gargantext.Core.Text.Corpus.Parsers.JSON.Istex (toDoc) import Gargantext.Core.Text.Corpus.Parsers.JSON.Istex (toDoc)
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..)) import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Prelude import Gargantext.Prelude hiding (get)
import ISTEX qualified as ISTEX import ISTEX qualified as ISTEX
import ISTEX.Client qualified as ISTEX import ISTEX.Client qualified as ISTEX
...@@ -42,10 +45,7 @@ get la query' maxResults = do ...@@ -42,10 +45,7 @@ get la query' maxResults = do
-- In that case we need to enrich his query with 2 parameters -- In that case we need to enrich his query with 2 parameters
-- First expected language: user has to define it in GTXT -- First expected language: user has to define it in GTXT
-- Second : query in abstract -- Second : query in abstract
True -> ("language:"<> lang la) <> " AND abstract:"<>query' True -> ("language:"<> toISTEXLanguageCode la) <> " AND abstract:"<>query'
where
lang FR = "fre"
lang _ = "eng"
False -> query' False -> query'
-- Complex queries of IsTex needs parameters using ":" so we leave the query as it is -- Complex queries of IsTex needs parameters using ":" so we leave the query as it is
...@@ -70,4 +70,18 @@ toDoc' :: Lang -> ISTEX.Documents -> IO [HyperdataDocument] ...@@ -70,4 +70,18 @@ toDoc' :: Lang -> ISTEX.Documents -> IO [HyperdataDocument]
toDoc' la docs' = mapM (toDoc la) (ISTEX._documents_hits docs') toDoc' la docs' = mapM (toDoc la) (ISTEX._documents_hits docs')
--printDebug "ISTEX" (ISTEX._documents_total docs') --printDebug "ISTEX" (ISTEX._documents_total docs')
-- | Returns the properly-rendered language code according to
-- https://doc.istex.fr/tdm/annexes/codes-langues.html
toISTEXLanguageCode :: Lang -> Text.Text
toISTEXLanguageCode = \case
DE -> "ger"
EL -> "gre"
EN -> "eng"
ES -> "spa"
FR -> "fre"
IT -> "ita"
PL -> "pol"
PT -> "por"
RU -> "Rus"
UK -> "ukr"
ZH -> "chi"
...@@ -10,6 +10,7 @@ Portability : POSIX ...@@ -10,6 +10,7 @@ Portability : POSIX
-} -}
{-# LANGUAGE DerivingStrategies #-} {-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE LambdaCase #-}
module Gargantext.Core.Text.Corpus.API.Pubmed module Gargantext.Core.Text.Corpus.API.Pubmed
( get ( get
...@@ -25,8 +26,7 @@ import Data.Text qualified as Text ...@@ -25,8 +26,7 @@ import Data.Text qualified as Text
import Data.Text.Encoding qualified as TE import Data.Text.Encoding qualified as TE
import Gargantext.Core (Lang(..)) import Gargantext.Core (Lang(..))
import Gargantext.Core.Text.Corpus.Query as Corpus import Gargantext.Core.Text.Corpus.Query as Corpus
import Gargantext.Core.Types (Term(..)) import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..) )
import Gargantext.Prelude hiding (get) import Gargantext.Prelude hiding (get)
import Network.HTTP.Types.URI (EscapeItem(..), renderQueryPartialEscape) import Network.HTTP.Types.URI (EscapeItem(..), renderQueryPartialEscape)
import PUBMED qualified as PubMed import PUBMED qualified as PubMed
...@@ -60,7 +60,11 @@ getESearch (ESearch items) = ...@@ -60,7 +60,11 @@ getESearch (ESearch items) =
convertQuery :: Corpus.Query -> ESearch convertQuery :: Corpus.Query -> ESearch
convertQuery q = ESearch (interpretQuery q transformAST) convertQuery q = ESearch (interpretQuery q transformAST)
where where
transformAST :: BoolExpr Term -> [EscapeItem]
mergeTerms :: [QueryTerm] -> [EscapeItem]
mergeTerms trms = [QE $ TE.encodeUtf8 (Text.unwords $ map renderQueryTerm trms)]
transformAST :: BoolExpr [QueryTerm] -> [EscapeItem]
transformAST ast = case ast of transformAST ast = case ast of
BAnd sub (BConst (Negative term)) BAnd sub (BConst (Negative term))
-- The second term become positive, so that it can be translated. -- The second term become positive, so that it can be translated.
...@@ -81,11 +85,12 @@ convertQuery q = ESearch (interpretQuery q transformAST) ...@@ -81,11 +85,12 @@ convertQuery q = ESearch (interpretQuery q transformAST)
-- BTrue cannot happen is the query parser doesn't support parsing 'FALSE' alone. -- BTrue cannot happen is the query parser doesn't support parsing 'FALSE' alone.
BFalse BFalse
-> mempty -> mempty
BConst (Positive (Term term)) BConst (Positive terms)
-> [QE (TE.encodeUtf8 term)] -> mergeTerms terms
-- TODO(adinapoli) Support partial match queries
-- We can handle negatives via `ANDNOT` with itself. -- We can handle negatives via `ANDNOT` with itself.
BConst (Negative (Term term)) BConst (Negative terms)
-> [QN "NOT+", QE (TE.encodeUtf8 term)] -> [QN "NOT+"] <> mergeTerms terms
get :: Text get :: Text
-> Corpus.RawQuery -> Corpus.RawQuery
......
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE DerivingStrategies #-} {-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ViewPatterns #-}
module Gargantext.Core.Text.Corpus.Query ( module Gargantext.Core.Text.Corpus.Query (
Query -- * opaque Query -- * opaque
, RawQuery(..) , RawQuery(..)
, Limit(..) , Limit(..)
, QueryTerm(..)
, getQuery , getQuery
, parseQuery , parseQuery
, mapQuery , mapQuery
, renderQuery , renderQuery
, renderQueryTerm
, interpretQuery , interpretQuery
, ExternalAPIs(..) , ExternalAPIs(..)
, module BoolExpr , module BoolExpr
...@@ -49,13 +52,32 @@ newtype Limit = Limit { getLimit :: Int } ...@@ -49,13 +52,32 @@ newtype Limit = Limit { getLimit :: Int }
, Aeson.FromJSON, Aeson.ToJSON , Aeson.FromJSON, Aeson.ToJSON
, Swagger.ToParamSchema, Swagger.ToSchema) , Swagger.ToParamSchema, Swagger.ToSchema)
-- | A /query/ term, i.e. a node of the query expression tree which can be
-- either a Gargantext 'Term' (i.e. just a textual value) or something else,
-- like a partial match (i.e. the user is asking to perform a search that would
-- match only a suffix of a word).
data QueryTerm
= QT_exact_match Term
| QT_partial_match Term
deriving (Show, Eq, Ord)
instance IsString QueryTerm where
fromString input = case P.runParser queryTermToken () "Corpus.Query.fromString" input of
Left _ -> QT_exact_match (Term $ T.pack input)
Right [qt] -> qt
Right _ -> QT_exact_match (Term $ T.pack input)
renderQueryTerm :: QueryTerm -> T.Text
renderQueryTerm (QT_exact_match (Term t)) = t
renderQueryTerm (QT_partial_match (Term t)) = t
-- | An opaque wrapper around a 'Query' type which can be parsed from a boolean -- | An opaque wrapper around a 'Query' type which can be parsed from a boolean
-- expression like (a AND b) OR c, and which can be interpreted in many ways -- expression like (a AND b) OR c, and which can be interpreted in many ways
-- according to the particular service we are targeting. -- according to the particular service we are targeting.
newtype Query = Query { getQuery :: (BoolExpr.CNF Term) } newtype Query = Query { getQuery :: (BoolExpr.CNF [QueryTerm]) }
deriving Show deriving Show
interpretQuery :: Query -> (BoolExpr.BoolExpr Term -> ast) -> ast interpretQuery :: Query -> (BoolExpr.BoolExpr [QueryTerm] -> ast) -> ast
interpretQuery (Query q) transform = transform . simplify . BoolExpr.fromCNF $ q interpretQuery (Query q) transform = transform . simplify . BoolExpr.fromCNF $ q
simplify :: BoolExpr.BoolExpr a -> BoolExpr.BoolExpr a simplify :: BoolExpr.BoolExpr a -> BoolExpr.BoolExpr a
...@@ -78,22 +100,36 @@ simplify expr = case expr of ...@@ -78,22 +100,36 @@ simplify expr = case expr of
BFalse -> BFalse BFalse -> BFalse
BConst signed -> BConst signed BConst signed -> BConst signed
unsafeMkQuery :: BoolExpr.BoolExpr Term -> Query unsafeMkQuery :: BoolExpr.BoolExpr [QueryTerm] -> Query
unsafeMkQuery = Query . BoolExpr.boolTreeToCNF unsafeMkQuery = Query . BoolExpr.boolTreeToCNF
termToken :: CharParser st Term queryTermToken :: CharParser st [QueryTerm]
termToken = Term <$> (try (T.pack <$> BoolExpr.identifier) <|> (between dubQuote dubQuote multipleTerms)) queryTermToken = do
map mkQueryTerm <$> termToken
where
mkQueryTerm :: Term -> QueryTerm
mkQueryTerm (Term (T.unpack -> t)) =
case t of
'"' : '~' : rest
-> QT_partial_match (Term $ T.pack $ '"' : rest)
'~' : rest
-> QT_partial_match (Term $ T.pack $ '"' : rest)
_
-> QT_exact_match (Term $ T.pack t)
termToken :: CharParser st [Term]
termToken = (try ((:[]) . Term . T.pack <$> BoolExpr.identifier) <|> (between dubQuote dubQuote multipleTerms))
where where
dubQuote = BoolExpr.symbol "\"" dubQuote = BoolExpr.symbol "\""
multipleTerms = T.intercalate " " . map T.pack <$> sepBy BoolExpr.identifier BoolExpr.whiteSpace multipleTerms = map (Term . T.pack) <$> sepBy BoolExpr.identifier BoolExpr.whiteSpace
-- | Parses an input 'Text' into a 'Query', reporting an error if it fails. -- | Parses an input 'Text' into a 'Query', reporting an error if it fails.
parseQuery :: RawQuery -> Either String Query parseQuery :: RawQuery -> Either String Query
parseQuery (RawQuery txt) = bimap show (Query . BoolExpr.boolTreeToCNF) $ parseQuery (RawQuery txt) = bimap show (Query . BoolExpr.boolTreeToCNF) $
P.runParser (BoolExpr.parseBoolExpr termToken) () "Corpus.Query" (T.unpack txt) P.runParser (BoolExpr.parseBoolExpr queryTermToken) () "Corpus.Query" (T.unpack txt)
renderQuery :: Query -> RawQuery renderQuery :: Query -> RawQuery
renderQuery (Query cnf) = RawQuery . T.pack $ BoolExpr.boolExprPrinter (showsPrec 0) (BoolExpr.fromCNF cnf) "" renderQuery (Query cnf) = RawQuery . T.pack $ BoolExpr.boolExprPrinter (showsPrec 0) (BoolExpr.fromCNF cnf) ""
mapQuery :: (Term -> Term) -> Query -> Query mapQuery :: (QueryTerm -> QueryTerm) -> Query -> Query
mapQuery f = Query . fmap f . getQuery mapQuery f = Query . fmap (map f) . getQuery
...@@ -24,15 +24,13 @@ import Data.HashSet (HashSet) ...@@ -24,15 +24,13 @@ import Data.HashSet (HashSet)
import Data.HashSet qualified as Set import Data.HashSet qualified as Set
import Data.List qualified as List import Data.List qualified as List
import Data.Map.Strict qualified as Map import Data.Map.Strict qualified as Map
import Data.Map.Strict.Patch qualified as PatchMap
import Data.Patch.Class qualified as Patch (Replace(..))
import Data.Text qualified as Text import Data.Text qualified as Text
import Gargantext.API.Ngrams.Types import Gargantext.API.Ngrams.Types
import Gargantext.Core (Lang(..), Form, Lem, NLPServerConfig) import Gargantext.Core (Lang(..), Form, Lem, NLPServerConfig)
import Gargantext.Core.Text.List.Group.Prelude import Gargantext.Core.Text.List.Group.Prelude
import Gargantext.Core.Text.List.Social.Patch import Gargantext.Core.Text.List.Social.Patch
import Gargantext.Core.Text.List.Social.Prelude import Gargantext.Core.Text.List.Social.Prelude
import Gargantext.Core.Text.Terms.Mono.Stem (stem) import Gargantext.Core.Text.Terms.Mono.Stem (stem, StemmingAlgorithm(..))
import Gargantext.Prelude import Gargantext.Prelude
------------------------------------------------------------------------ ------------------------------------------------------------------------
...@@ -73,7 +71,7 @@ groupWith GroupIdentity t = identity t ...@@ -73,7 +71,7 @@ groupWith GroupIdentity t = identity t
groupWith (GroupParams { unGroupParams_lang = l }) t = groupWith (GroupParams { unGroupParams_lang = l }) t =
NgramsTerm NgramsTerm
$ Text.intercalate " " $ Text.intercalate " "
$ map (stem l) $ map (stem l PorterAlgorithm)
-- . take n -- . take n
$ List.sort $ List.sort
-- \$ Set.toList -- \$ Set.toList
...@@ -120,14 +118,5 @@ patch s = case Set.size s > 1 of ...@@ -120,14 +118,5 @@ patch s = case Set.size s > 1 of
let children = List.tail ngrams let children = List.tail ngrams
pure (parent, toNgramsPatch children) pure (parent, toNgramsPatch children)
toNgramsPatch :: [NgramsTerm] -> NgramsPatch
toNgramsPatch children = NgramsPatch children' Patch.Keep
where
children' :: PatchMSet NgramsTerm
children' = PatchMSet
$ fst
$ PatchMap.fromList
$ List.zip children (List.cycle [addPatch])
-- | Instances -- | Instances
makeLenses ''GroupParams makeLenses ''GroupParams
...@@ -57,7 +57,7 @@ docSearchConfig = ...@@ -57,7 +57,7 @@ docSearchConfig =
normaliseQueryToken :: Text -> DocField -> Text normaliseQueryToken :: Text -> DocField -> Text
normaliseQueryToken tok = normaliseQueryToken tok =
let tokStem = ST.stem ST.EN let tokStem = ST.stem ST.EN ST.PorterAlgorithm
in \field -> case field of in \field -> case field of
TitleField -> tokStem tok TitleField -> tokStem tok
AbstractField -> tokStem tok AbstractField -> tokStem tok
......
...@@ -49,7 +49,7 @@ import Gargantext.Core ...@@ -49,7 +49,7 @@ import Gargantext.Core
import Gargantext.Core.Text (sentences, HasText(..)) import Gargantext.Core.Text (sentences, HasText(..))
import Gargantext.Core.Text.Terms.Eleve (mainEleveWith, Tries, Token, buildTries, toToken) import Gargantext.Core.Text.Terms.Eleve (mainEleveWith, Tries, Token, buildTries, toToken)
import Gargantext.Core.Text.Terms.Mono (monoTerms) import Gargantext.Core.Text.Terms.Mono (monoTerms)
import Gargantext.Core.Text.Terms.Mono.Stem (stem) import Gargantext.Core.Text.Terms.Mono.Stem (stem, StemmingAlgorithm(..))
import Gargantext.Core.Text.Terms.Mono.Token.En (tokenize) import Gargantext.Core.Text.Terms.Mono.Token.En (tokenize)
import Gargantext.Core.Text.Terms.Multi (multiterms) import Gargantext.Core.Text.Terms.Multi (multiterms)
import Gargantext.Core.Types import Gargantext.Core.Types
...@@ -211,7 +211,7 @@ uniText = map (List.filter (not . isPunctuation)) ...@@ -211,7 +211,7 @@ uniText = map (List.filter (not . isPunctuation))
text2term :: Lang -> [Text] -> Terms text2term :: Lang -> [Text] -> Terms
text2term _ [] = Terms [] Set.empty text2term _ [] = Terms [] Set.empty
text2term lang txt = Terms txt (Set.fromList $ map (stem lang) txt) text2term lang txt = Terms txt (Set.fromList $ map (stem lang PorterAlgorithm) txt)
isPunctuation :: Text -> Bool isPunctuation :: Text -> Bool
isPunctuation x = List.elem x $ (Text.pack . pure) isPunctuation x = List.elem x $ (Text.pack . pure)
......
...@@ -19,7 +19,7 @@ import Data.List qualified as L ...@@ -19,7 +19,7 @@ import Data.List qualified as L
import Data.Set qualified as S import Data.Set qualified as S
import Data.Text qualified as T import Data.Text qualified as T
import Gargantext.Core import Gargantext.Core
import Gargantext.Core.Text.Terms.Mono.Stem (stem) import Gargantext.Core.Text.Terms.Mono.Stem (stem, StemmingAlgorithm(..))
import Gargantext.Core.Types import Gargantext.Core.Types
import Gargantext.Prelude hiding (words) import Gargantext.Prelude hiding (words)
import Prelude (String) import Prelude (String)
...@@ -43,7 +43,7 @@ monoTexts = L.concat . monoTextsBySentence ...@@ -43,7 +43,7 @@ monoTexts = L.concat . monoTextsBySentence
-- | TODO use text2term only -- | TODO use text2term only
monoText2term :: Lang -> Text -> Terms monoText2term :: Lang -> Text -> Terms
monoText2term lang txt = Terms [txt] (S.singleton $ stem lang txt) monoText2term lang txt = Terms [txt] (S.singleton $ stem lang PorterAlgorithm txt)
monoTextsBySentence :: Text -> [[Text]] monoTextsBySentence :: Text -> [[Text]]
monoTextsBySentence = map T.words monoTextsBySentence = map T.words
......
{-| {-|
Module : Gargantext.Core.Text.Ngrams.Stem Module : Gargantext.Core.Text.Terms.Mono.Stem
Description : Description : Stemming of mono (i.e. single word) terms.
Copyright : (c) CNRS, 2017-Present Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3 License : AGPL + CECILL v3
Maintainer : team@gargantext.org Maintainer : team@gargantext.org
...@@ -14,45 +14,67 @@ not be identical to the morphological root of the word; it is usually ...@@ -14,45 +14,67 @@ not be identical to the morphological root of the word; it is usually
sufficient that related words map to the same stem, even if this stem is sufficient that related words map to the same stem, even if this stem is
not in itself a valid root. not in itself a valid root.
Source : https://en.wikipedia.org/wiki/Stemming Source : https://en.wikipedia.org/wiki/Stemming
A stemmer for English, for example, should identify the string "cats"
(and possibly "catlike", "catty" etc.) as based on the root "cat", and
"stems", "stemmer", "stemming", "stemmed" as based on "stem". A stemming
algorithm reduces the words "fishing", "fished", and "fisher" to the
root word, "fish". On the other hand, "argue", "argued", "argues",
"arguing", and "argus" reduce to the stem "argu" (illustrating the
case where the stem is not itself a word or root) but "argument" and
"arguments" reduce to the stem "argument".
-} -}
module Gargantext.Core.Text.Terms.Mono.Stem (stem, Lang(..)) module Gargantext.Core.Text.Terms.Mono.Stem (
where
import Data.Text qualified as DT
import Gargantext.Core (Lang(..))
import Gargantext.Prelude
import NLP.Stemmer qualified as N
-- (stem, Stemmer(..))
--import Language.Aspell (check, suggest, spellChecker, spellCheckerWithOptions)
--import Language.Aspell.Options (ACOption(..))
-- * Types
StemmingAlgorithm(..),
-- | Stemmer -- * Universal stemming function
stem,
-- A stemmer for English, for example, should identify the string "cats" -- * Handy re-exports
-- (and possibly "catlike", "catty" etc.) as based on the root "cat". Lang(..)
-- and
-- "stems", "stemmer", "stemming", "stemmed" as based on "stem". A stemming
-- algorithm reduces the words "fishing", "fished", and "fisher" to the
-- root word, "fish". On the other hand, "argue", "argued", "argues",
-- "arguing", and "argus" reduce to the stem "argu" (illustrating the
-- case where the stem is not itself a word or root) but "argument" and
-- "arguments" reduce to the stem "argument".
stem :: Lang -> Text -> Text
stem lang = DT.pack . N.stem lang' . DT.unpack
where
lang' = case lang of
EN -> N.English
FR -> N.French
_ -> panicTrace $ DT.pack "not implemented yet"
) where
import Gargantext.Core.Text.Terms.Mono.Stem.Internal.Porter qualified as Porter
import Gargantext.Core.Text.Terms.Mono.Stem.Internal.Lancaster qualified as Lancaster
import Gargantext.Core.Text.Terms.Mono.Stem.Internal.GargPorter qualified as GargPorter
import Gargantext.Core (Lang(..))
import Gargantext.Prelude
-- | A stemming algorithm. There are different stemming algorithm,
-- each with different tradeoffs, strengths and weaknesses. Typically
-- one uses one or the other based on the given task at hand.
data StemmingAlgorithm
= -- | The porter algorithm is the classic stemming algorithm, possibly
-- one of the most widely used.
PorterAlgorithm
-- | Slight variation of the porter algorithm; it's more aggressive with
-- stemming, which might or might not be what you want. It also makes some
-- subtle chances to the stem; for example, the stemming of \"dancer\" using
-- Porter is simply \"dancer\" (i.e. it cannot be further stemmed). Using
-- Lancaster we would get \"dant\", which is not a prefix of the initial word anymore.
| LancasterAlgorithm
-- | A variation of the Porter algorithm tailored for Gargantext.
| GargPorterAlgorithm
deriving (Show, Eq, Ord)
-- | Stems the input 'Text' based on the input 'Lang' and using the
-- given 'StemmingAlgorithm'.
stem :: Lang -> StemmingAlgorithm -> Text -> Text
stem lang algo unstemmed = case algo of
PorterAlgorithm
-> Porter.stem lang unstemmed
LancasterAlgorithm
| EN <- lang
-> Lancaster.stem unstemmed
| otherwise
-> unstemmed -- Lancaster doesn't support any other language which is not english.
GargPorterAlgorithm
| EN <- lang
-> GargPorter.stem unstemmed
| otherwise
-> unstemmed -- Our garg porter doesn't support other languages other than english.
...@@ -16,7 +16,7 @@ Adapted from: ...@@ -16,7 +16,7 @@ Adapted from:
-} -}
module Gargantext.Core.Text.Terms.Mono.Stem.En (stemIt) module Gargantext.Core.Text.Terms.Mono.Stem.Internal.GargPorter (stem)
where where
import Control.Monad import Control.Monad
...@@ -194,8 +194,8 @@ step5 = step5b . step5a ...@@ -194,8 +194,8 @@ step5 = step5b . step5a
allSteps :: [Char] -> [Char] allSteps :: [Char] -> [Char]
allSteps = step5 . step4 . step3 . step2 . step1 allSteps = step5 . step4 . step3 . step2 . step1
stemIt :: Text -> Text stem :: Text -> Text
stemIt s = pack (stem' $ unpack s) stem s = pack (stem' $ unpack s)
stem' :: [Char] -> [Char] stem' :: [Char] -> [Char]
stem' s | length s < 3 = s stem' s | length s < 3 = s
......
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
module Gargantext.Core.Text.Terms.Mono.Stem.Lancaster module Gargantext.Core.Text.Terms.Mono.Stem.Internal.Lancaster
( stemIt ( stem
) where ) where
import Prelude import Prelude
...@@ -84,8 +84,8 @@ vowelsSet :: String ...@@ -84,8 +84,8 @@ vowelsSet :: String
vowelsSet = "aeiouy" vowelsSet = "aeiouy"
{-# INLINE vowelsSet #-} {-# INLINE vowelsSet #-}
stemIt :: Text -> Text stem :: Text -> Text
stemIt inputText = lancasterStemmer inputText rulesPaper stem inputText = lancasterStemmer inputText rulesPaper
-- Lancaster Stemmer -- Lancaster Stemmer
lancasterStemmer :: Text -> RuleCollection -> Text lancasterStemmer :: Text -> RuleCollection -> Text
...@@ -113,8 +113,8 @@ applyRules value isIntact rules = ...@@ -113,8 +113,8 @@ applyRules value isIntact rules =
then Nothing then Nothing
else case T.stripSuffix m val of else case T.stripSuffix m val of
Nothing -> Nothing Nothing -> Nothing
Just stem -> Just stm ->
let next = stem `T.append` r let next = stm `T.append` r
in if not (acceptable next) in if not (acceptable next)
then Nothing then Nothing
else if t == cont || t == contint else if t == cont || t == contint
......
{-# LANGUAGE LambdaCase #-}
module Gargantext.Core.Text.Terms.Mono.Stem.Internal.Porter
( stem ) where
import Prelude
import Data.Text qualified as T
import Gargantext.Core (Lang(..))
import NLP.Stemmer qualified as N
fromGargLang :: Lang -> N.Stemmer
fromGargLang = \case
DE -> N.German
EL -> N.Porter -- no greek specialised algo, defaults to 'Porter'
EN -> N.English
ES -> N.Spanish
FR -> N.French
IT -> N.Italian
PL -> N.Porter -- no Polish specialised algo, defaults to 'Porter'
PT -> N.Portuguese
RU -> N.Russian
UK -> N.Porter -- no Ukraine specialised algo, defaults to 'Porter'
ZH -> N.Porter -- no chinese specialised algo, defaults to 'Porter'
stem :: Lang -> T.Text -> T.Text
stem lang = T.pack . N.stem (fromGargLang lang) . T.unpack
...@@ -76,7 +76,7 @@ import Gargantext.Core.Text.List (buildNgramsLists) ...@@ -76,7 +76,7 @@ import Gargantext.Core.Text.List (buildNgramsLists)
import Gargantext.Core.Text.List.Group.WithStem ({-StopSize(..),-} GroupParams(..)) import Gargantext.Core.Text.List.Group.WithStem ({-StopSize(..),-} GroupParams(..))
import Gargantext.Core.Text.List.Social (FlowSocialListWith(..)) import Gargantext.Core.Text.List.Social (FlowSocialListWith(..))
import Gargantext.Core.Text.Terms import Gargantext.Core.Text.Terms
import Gargantext.Core.Text.Terms.Mono.Stem.En (stemIt) import Gargantext.Core.Text.Terms.Mono.Stem (stem, StemmingAlgorithm(..))
import Gargantext.Core.Types (HasValidationError, TermsCount) import Gargantext.Core.Types (HasValidationError, TermsCount)
import Gargantext.Core.Types.Individu (User(..)) import Gargantext.Core.Types.Individu (User(..))
import Gargantext.Core.Types.Main import Gargantext.Core.Types.Main
...@@ -138,12 +138,12 @@ getDataText (ExternalOrigin api) la q mPubmedAPIKey mAuthKey li = do ...@@ -138,12 +138,12 @@ getDataText (ExternalOrigin api) la q mPubmedAPIKey mAuthKey li = do
cfg <- view hasConfig cfg <- view hasConfig
eRes <- liftBase $ API.get api (_tt_lang la) q mPubmedAPIKey mAuthKey (_gc_epo_api_url cfg) li eRes <- liftBase $ API.get api (_tt_lang la) q mPubmedAPIKey mAuthKey (_gc_epo_api_url cfg) li
pure $ DataNew <$> eRes pure $ DataNew <$> eRes
getDataText (InternalOrigin _) _la q _ _ _li = do getDataText (InternalOrigin _) la q _ _ _li = do
(_masterUserId, _masterRootId, cId) <- getOrMk_RootWithCorpus (_masterUserId, _masterRootId, cId) <- getOrMk_RootWithCorpus
(UserName userMaster) (UserName userMaster)
(Left "") (Left "")
(Nothing :: Maybe HyperdataCorpus) (Nothing :: Maybe HyperdataCorpus)
ids <- map fst <$> searchDocInDatabase cId (stemIt $ API.getRawQuery q) ids <- map fst <$> searchDocInDatabase cId (stem (_tt_lang la) GargPorterAlgorithm $ API.getRawQuery q)
pure $ Right $ DataOld ids pure $ Right $ DataOld ids
getDataText_Debug :: (HasNodeError err) getDataText_Debug :: (HasNodeError err)
......
...@@ -33,7 +33,7 @@ import Data.Text qualified as T ...@@ -33,7 +33,7 @@ import Data.Text qualified as T
import Data.Time (UTCTime) import Data.Time (UTCTime)
import Gargantext.Core import Gargantext.Core
import Gargantext.Core.Text.Corpus.Query qualified as API import Gargantext.Core.Text.Corpus.Query qualified as API
import Gargantext.Core.Text.Terms.Mono.Stem.En (stemIt) import Gargantext.Core.Text.Terms.Mono.Stem (stem, StemmingAlgorithm(..))
import Gargantext.Core.Types import Gargantext.Core.Types
import Gargantext.Core.Types.Query (IsTrash, Limit, Offset) import Gargantext.Core.Types.Query (IsTrash, Limit, Offset)
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..), HyperdataContact(..)) import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..), HyperdataContact(..))
...@@ -61,7 +61,41 @@ import Opaleye qualified as O hiding (Order) ...@@ -61,7 +61,41 @@ import Opaleye qualified as O hiding (Order)
queryToTsSearch :: API.Query -> Field SqlTSQuery queryToTsSearch :: API.Query -> Field SqlTSQuery
queryToTsSearch q = sqlToTSQuery $ T.unpack $ (API.interpretQuery q transformAST) queryToTsSearch q = sqlToTSQuery $ T.unpack $ (API.interpretQuery q transformAST)
where where
transformAST :: BoolExpr Term -> T.Text
-- It's important to understand how things work under the hood: When we perform
-- a search, we do it on a /ts vector/ in Postgres, which is already stemmed in
-- lexemes. For example, this:
--
-- SELECT to_tsvector('Effects on postpartum on vitamins and minerals in women');
--
-- yields:
--
-- 'effect':1 'miner':7 'postpartum':3 'vitamin':5 'women':9
--
-- As you can see, minimum processing has happened: plurals have been stripped and
-- what it looks like the Porter stemming has been applied (we get 'miner' instead
-- of the original /mineral/, for example.
--
-- Therefore, in case of exact match searches, we need to perform stemming /regardless/,
-- and this stemming should ideally match the one performed by Postgres.
--
-- Now, if the user is doing a partial match search (like \"~postpartum\" for example)
-- then we need to stem /AND/ use the \":*\" operator to perform a
-- sort of fuzzy search. Compare the followings:
--
-- SELECT to_tsvector('Effects on postpartum on vitamins and minerals in women') @@ to_tsquery('postpartum');
-- SELECT to_tsvector('Effects on postpartum on vitamins and minerals in women') @@ to_tsquery('postpart');
-- SELECT to_tsvector('Effects on postpartum on vitamins and minerals in women') @@ to_tsquery('postpart:*');
--
-- The first will match, the second won't, the third will.
renderQueryTerms :: [API.QueryTerm] -> T.Text
renderQueryTerms trms = T.intercalate " & " $ trms <&> \case
API.QT_exact_match (Term term)
-> stem EN GargPorterAlgorithm term
API.QT_partial_match (Term term)
-> stem EN GargPorterAlgorithm term <> ":*"
transformAST :: BoolExpr [API.QueryTerm] -> T.Text
transformAST ast = case ast of transformAST ast = case ast of
BAnd sub1 sub2 BAnd sub1 sub2
-> " (" <> transformAST sub1 <> " & " <> transformAST sub2 <> ") " -> " (" <> transformAST sub1 <> " & " <> transformAST sub2 <> ") "
...@@ -77,11 +111,11 @@ queryToTsSearch q = sqlToTSQuery $ T.unpack $ (API.interpretQuery q transformAST ...@@ -77,11 +111,11 @@ queryToTsSearch q = sqlToTSQuery $ T.unpack $ (API.interpretQuery q transformAST
-- BTrue cannot happen is the query parser doesn't support parsing 'FALSE' alone. -- BTrue cannot happen is the query parser doesn't support parsing 'FALSE' alone.
BFalse BFalse
-> T.empty -> T.empty
BConst (Positive (Term term)) BConst (Positive queryTerms)
-> T.intercalate " & " $ T.words term -> renderQueryTerms queryTerms
-- We can handle negatives via `ANDNOT` with itself. -- We can handle negatives via `ANDNOT` with itself.
BConst (Negative (Term term)) BConst (Negative queryTerms)
-> "!" <> term -> "!" <> renderQueryTerms queryTerms
------------------------------------------------------------------------ ------------------------------------------------------------------------
...@@ -181,7 +215,7 @@ searchInCorpus :: HasDBid NodeType ...@@ -181,7 +215,7 @@ searchInCorpus :: HasDBid NodeType
searchInCorpus cId t q o l order = runOpaQuery searchInCorpus cId t q o l order = runOpaQuery
$ filterWith o l order $ filterWith o l order
$ queryInCorpus cId t $ queryInCorpus cId t
$ API.mapQuery (Term . stemIt . getTerm) q $ q
searchCountInCorpus :: HasDBid NodeType searchCountInCorpus :: HasDBid NodeType
=> CorpusId => CorpusId
...@@ -190,7 +224,7 @@ searchCountInCorpus :: HasDBid NodeType ...@@ -190,7 +224,7 @@ searchCountInCorpus :: HasDBid NodeType
-> DBCmd err Int -> DBCmd err Int
searchCountInCorpus cId t q = runCountOpaQuery searchCountInCorpus cId t q = runCountOpaQuery
$ queryInCorpus cId t $ queryInCorpus cId t
$ API.mapQuery (Term . stemIt . getTerm) q $ q
queryInCorpus :: HasDBid NodeType queryInCorpus :: HasDBid NodeType
=> CorpusId => CorpusId
...@@ -233,7 +267,7 @@ searchInCorpusWithContacts cId aId q o l _order = ...@@ -233,7 +267,7 @@ searchInCorpusWithContacts cId aId q o l _order =
$ offset' o $ offset' o
$ orderBy (desc _fp_score) $ orderBy (desc _fp_score)
$ selectGroup cId aId $ selectGroup cId aId
$ API.mapQuery (Term . stemIt . getTerm) q $ q
selectGroup :: HasDBid NodeType selectGroup :: HasDBid NodeType
=> CorpusId => CorpusId
......
...@@ -19,7 +19,7 @@ import Data.String (IsString(..)) ...@@ -19,7 +19,7 @@ import Data.String (IsString(..))
import Database.PostgreSQL.Simple (Query) import Database.PostgreSQL.Simple (Query)
import Database.PostgreSQL.Simple.ToField import Database.PostgreSQL.Simple.ToField
import Gargantext.Core import Gargantext.Core
import Gargantext.Core.Text.Terms.Mono.Stem.En (stemIt) import Gargantext.Core.Text.Terms.Mono.Stem (stem, StemmingAlgorithm(..))
import Gargantext.Core.Types import Gargantext.Core.Types
import Gargantext.Core.Types.Query (Limit, Offset) import Gargantext.Core.Types.Query (Limit, Offset)
import Gargantext.Database.Prelude (DBCmd, runPGSQuery) import Gargantext.Database.Prelude (DBCmd, runPGSQuery)
...@@ -30,7 +30,7 @@ newtype TSQuery = UnsafeTSQuery [Text] ...@@ -30,7 +30,7 @@ newtype TSQuery = UnsafeTSQuery [Text]
-- | TODO [""] -> panic "error" -- | TODO [""] -> panic "error"
toTSQuery :: [Text] -> TSQuery toTSQuery :: [Text] -> TSQuery
toTSQuery txt = UnsafeTSQuery $ map stemIt txt toTSQuery txt = UnsafeTSQuery $ map (stem EN GargPorterAlgorithm) txt
instance IsString TSQuery instance IsString TSQuery
......
...@@ -20,19 +20,19 @@ module Gargantext.Utils.Jobs ( ...@@ -20,19 +20,19 @@ module Gargantext.Utils.Jobs (
, MonadJobStatus(..) , MonadJobStatus(..)
) where ) where
import Control.Monad.Except import Control.Monad.Except ( runExceptT )
import Control.Monad.Reader import Control.Monad.Reader ( MonadReader(ask), ReaderT(runReaderT) )
import Data.Aeson (ToJSON) import Data.Aeson (ToJSON)
import Prelude import Prelude
import System.Directory (doesFileExist) import System.Directory (doesFileExist)
import Text.Read (readMaybe) import Text.Read (readMaybe)
import qualified Data.Text as T import qualified Data.Text as T
import Gargantext.API.Admin.EnvTypes import Gargantext.API.Admin.EnvTypes ( mkJobHandle, Env, GargJob(..) )
import Gargantext.API.Errors.Types import Gargantext.API.Errors.Types ( BackendInternalError(InternalJobError) )
import Gargantext.API.Prelude import Gargantext.API.Prelude ( GargM )
import qualified Gargantext.Utils.Jobs.Internal as Internal import qualified Gargantext.Utils.Jobs.Internal as Internal
import Gargantext.Utils.Jobs.Monad import Gargantext.Utils.Jobs.Monad ( JobError, MonadJobStatus(..) )
import Gargantext.System.Logging import Gargantext.System.Logging
import qualified Servant.Job.Async as SJ import qualified Servant.Job.Async as SJ
...@@ -49,7 +49,7 @@ serveJobsAPI ...@@ -49,7 +49,7 @@ serveJobsAPI
, ToJSON (JobEventType m) , ToJSON (JobEventType m)
, ToJSON (JobOutputType m) , ToJSON (JobOutputType m)
, MonadJobStatus m , MonadJobStatus m
, m ~ (GargM Env BackendInternalError) , m ~ GargM Env BackendInternalError
, JobEventType m ~ JobOutputType m , JobEventType m ~ JobOutputType m
, MonadLogger m , MonadLogger m
) )
......
...@@ -33,7 +33,6 @@ data JSSpell = JSPOS Lang | JSLemma Lang ...@@ -33,7 +33,6 @@ data JSSpell = JSPOS Lang | JSLemma Lang
deriving (Show) deriving (Show)
instance ToJSON JSSpell where instance ToJSON JSSpell where
toJSON (JSPOS All) = "pos"
toJSON (JSPOS DE) = "de.pos" toJSON (JSPOS DE) = "de.pos"
toJSON (JSPOS EL) = "el.pos" toJSON (JSPOS EL) = "el.pos"
toJSON (JSPOS EN) = "en.pos" toJSON (JSPOS EN) = "en.pos"
...@@ -46,7 +45,6 @@ instance ToJSON JSSpell where ...@@ -46,7 +45,6 @@ instance ToJSON JSSpell where
toJSON (JSPOS UK) = "uk.pos" toJSON (JSPOS UK) = "uk.pos"
toJSON (JSPOS ZH) = "zh.pos" toJSON (JSPOS ZH) = "zh.pos"
toJSON (JSLemma All) = "lemma"
toJSON (JSLemma DE) = "de.lemma" toJSON (JSLemma DE) = "de.lemma"
toJSON (JSLemma EL) = "el.lemma" toJSON (JSLemma EL) = "el.lemma"
toJSON (JSLemma EN) = "en.lemma" toJSON (JSLemma EN) = "en.lemma"
...@@ -71,7 +69,6 @@ instance FromJSON JSSpell where ...@@ -71,7 +69,6 @@ instance FromJSON JSSpell where
parseJSON (String "ru.pos") = pure $ JSPOS RU parseJSON (String "ru.pos") = pure $ JSPOS RU
parseJSON (String "uk.pos") = pure $ JSPOS UK parseJSON (String "uk.pos") = pure $ JSPOS UK
parseJSON (String "zh.pos") = pure $ JSPOS ZH parseJSON (String "zh.pos") = pure $ JSPOS ZH
parseJSON (String "pos") = pure $ JSPOS All
parseJSON (String "de.lemma") = pure $ JSLemma DE parseJSON (String "de.lemma") = pure $ JSLemma DE
parseJSON (String "en.lemma") = pure $ JSLemma EN parseJSON (String "en.lemma") = pure $ JSLemma EN
...@@ -84,7 +81,6 @@ instance FromJSON JSSpell where ...@@ -84,7 +81,6 @@ instance FromJSON JSSpell where
parseJSON (String "ru.lemma") = pure $ JSLemma RU parseJSON (String "ru.lemma") = pure $ JSLemma RU
parseJSON (String "uk.lemma") = pure $ JSLemma UK parseJSON (String "uk.lemma") = pure $ JSLemma UK
parseJSON (String "zh.lemma") = pure $ JSLemma ZH parseJSON (String "zh.lemma") = pure $ JSLemma ZH
parseJSON (String "lemma") = pure $ JSLemma All
parseJSON s = parseJSON s =
prependFailure "parsing spell failed, " prependFailure "parsing spell failed, "
(typeMismatch "Spell" s) (typeMismatch "Spell" s)
......
...@@ -147,7 +147,7 @@ ...@@ -147,7 +147,7 @@
git: "https://gitlab.iscpif.fr/gargantext/crawlers/istex.git" git: "https://gitlab.iscpif.fr/gargantext/crawlers/istex.git"
subdirs: subdirs:
- . - .
- commit: cd179f6dda15d77a085c0176284c921b7bc50c46 - commit: ceb8f2cebd4890b6d9d151ab01ee14e925bc0499
git: "https://gitlab.iscpif.fr/gargantext/crawlers/openalex.git" git: "https://gitlab.iscpif.fr/gargantext/crawlers/openalex.git"
subdirs: subdirs:
- . - .
...@@ -323,7 +323,7 @@ flags: ...@@ -323,7 +323,7 @@ flags:
"full-text-search": "full-text-search":
"build-search-demo": false "build-search-demo": false
gargantext: gargantext:
"disable-db-obfuscation-executable": true "disable-db-obfuscation-executable": false
"no-phylo-debug-logs": false "no-phylo-debug-logs": false
"test-crypto": false "test-crypto": false
"generic-deriving": "generic-deriving":
......
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE TypeApplications #-} {-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TypeFamilies #-} {-# LANGUAGE TypeFamilies #-}
{-# OPTIONS_GHC -Wno-orphans #-} {-# OPTIONS_GHC -Wno-orphans #-}
{-# LANGUAGE BangPatterns #-}
module Test.API.Authentication ( module Test.API.Authentication (
tests tests
...@@ -21,13 +21,11 @@ import Network.HTTP.Client hiding (Proxy) ...@@ -21,13 +21,11 @@ import Network.HTTP.Client hiding (Proxy)
import Prelude qualified import Prelude qualified
import Servant.Auth.Client () import Servant.Auth.Client ()
import Servant.Client import Servant.Client
import Test.API.Routes (auth_api)
import Test.API.Setup (withTestDBAndPort, setupEnvironment) import Test.API.Setup (withTestDBAndPort, setupEnvironment)
import Test.Database.Types import Test.Database.Types
import Test.Hspec import Test.Hspec
auth_api :: AuthRequest -> ClientM AuthResponse
auth_api = client (Proxy :: Proxy (MkGargAPI (GargAPIVersion AuthAPI)))
cannedToken :: T.Text cannedToken :: T.Text
cannedToken = "eyJhbGciOiJIUzUxMiJ9.eyJkYXQiOnsiaWQiOjF9fQ.t49zZSqkPAulEkYEh4pW17H2uwrkyPTdZKwHyG3KUJ0hzU2UUoPBNj8vdv087RCVBJ4tXgxNbP4j0RBv3gxdqg" cannedToken = "eyJhbGciOiJIUzUxMiJ9.eyJkYXQiOnsiaWQiOjF9fQ.t49zZSqkPAulEkYEh4pW17H2uwrkyPTdZKwHyG3KUJ0hzU2UUoPBNj8vdv087RCVBJ4tXgxNbP4j0RBv3gxdqg"
...@@ -66,7 +64,7 @@ tests = sequential $ aroundAll withTestDBAndPort $ do ...@@ -66,7 +64,7 @@ tests = sequential $ aroundAll withTestDBAndPort $ do
, _authRes_user_id = fromMaybe (UnsafeMkUserId 1) $ listToMaybe $ result0 ^.. _Right . authRes_user_id , _authRes_user_id = fromMaybe (UnsafeMkUserId 1) $ listToMaybe $ result0 ^.. _Right . authRes_user_id
} }
result `shouldBe` (Right expected) result `shouldBe` Right expected
it "denies login for user 'alice' if password is invalid" $ \((_testEnv, port), _) -> do it "denies login for user 'alice' if password is invalid" $ \((_testEnv, port), _) -> do
let authPayload = AuthRequest "alice" (GargPassword "wrong") let authPayload = AuthRequest "alice" (GargPassword "wrong")
......
...@@ -9,13 +9,14 @@ import Network.HTTP.Types ...@@ -9,13 +9,14 @@ import Network.HTTP.Types
import Network.Wai.Test import Network.Wai.Test
import Servant import Servant
import Servant.Auth.Client () import Servant.Auth.Client ()
import Servant.Auth.Client qualified as SA
import Servant.Client import Servant.Client
import Test.API.Private (protected, withValidLogin, protectedNewError) import Test.API.Routes (mkUrl)
import Test.API.Setup (withTestDBAndPort, setupEnvironment, mkUrl, createAliceAndBob) import Test.API.Setup (withTestDBAndPort, setupEnvironment, createAliceAndBob)
import Test.Hspec import Test.Hspec
import Test.Hspec.Wai.Internal (withApplication) import Test.Hspec.Wai.Internal (withApplication)
import Test.Utils (protected, withValidLogin, protectedNewError)
import Text.RawString.QQ (r) import Text.RawString.QQ (r)
import qualified Servant.Auth.Client as SA
tests :: Spec tests :: Spec
tests = sequential $ aroundAll withTestDBAndPort $ do tests = sequential $ aroundAll withTestDBAndPort $ do
......
...@@ -10,12 +10,11 @@ module Test.API.GraphQL ( ...@@ -10,12 +10,11 @@ module Test.API.GraphQL (
import Gargantext.Core.Types.Individu import Gargantext.Core.Types.Individu
import Prelude import Prelude
import Servant.Auth.Client () import Servant.Auth.Client ()
import Test.API.Private (withValidLogin, protected, protectedNewError)
import Test.API.Setup (withTestDBAndPort, setupEnvironment, createAliceAndBob) import Test.API.Setup (withTestDBAndPort, setupEnvironment, createAliceAndBob)
import Test.Hspec import Test.Hspec
import Test.Hspec.Wai.Internal (withApplication) import Test.Hspec.Wai.Internal (withApplication)
import Test.Hspec.Wai.JSON (json) import Test.Hspec.Wai.JSON (json)
import Test.Utils import Test.Utils (protected, protectedNewError, shouldRespondWithFragment, shouldRespondWithFragmentCustomStatus, withValidLogin)
import Text.RawString.QQ (r) import Text.RawString.QQ (r)
tests :: Spec tests :: Spec
......
...@@ -5,117 +5,24 @@ ...@@ -5,117 +5,24 @@
module Test.API.Private ( module Test.API.Private (
tests tests
-- * Utility functions
, withValidLogin
, getJSON
, protected
, protectedJSON
, postJSONUrlEncoded
, protectedNewError
, protectedWith
) where ) where
import Data.Aeson qualified as JSON
import Data.ByteString.Lazy qualified as L
import Data.ByteString.Lazy.Char8 qualified as C8L
import Data.CaseInsensitive qualified as CI
import Data.Map.Strict qualified as Map
import Data.Text.Encoding qualified as TE import Data.Text.Encoding qualified as TE
import Gargantext.API.Admin.Auth.Types
import Gargantext.API.Routes import Gargantext.API.Routes
import Gargantext.Core.Types.Individu import Gargantext.Core.Types.Individu
import Gargantext.Prelude hiding (get) import Gargantext.Prelude hiding (get)
import Network.HTTP.Client hiding (Proxy) import Network.HTTP.Client hiding (Proxy)
import Network.HTTP.Types
import Network.Wai.Handler.Warp qualified as Wai
import Network.Wai.Test (SResponse (..))
import Prelude qualified
import Servant import Servant
import Servant.Auth.Client () import Servant.Auth.Client ()
import Servant.Auth.Client qualified as SA import Servant.Auth.Client qualified as SA
import Servant.Client import Servant.Client
import Test.API.Authentication (auth_api) import Test.API.Routes (mkUrl)
import Test.API.Setup (withTestDBAndPort, setupEnvironment, mkUrl, createAliceAndBob) import Test.API.Setup (withTestDBAndPort, setupEnvironment, createAliceAndBob)
import Test.Hspec import Test.Hspec
import Test.Hspec.Wai hiding (pendingWith) import Test.Hspec.Wai hiding (pendingWith)
import Test.Hspec.Wai.Internal (withApplication) import Test.Hspec.Wai.Internal (withApplication)
import Test.Hspec.Wai.JSON (json) import Test.Hspec.Wai.JSON (json)
import Test.Utils (shouldRespondWithFragment) import Test.Utils (protected, shouldRespondWithFragment, withValidLogin)
-- | Issue a request with a valid 'Authorization: Bearer' inside.
protected :: HasCallStack
=> Token
-> Method
-> ByteString
-> L.ByteString
-> WaiSession () SResponse
protected tkn mth url = protectedWith mempty tkn mth url
protectedJSON :: forall a. (JSON.FromJSON a, Typeable a, HasCallStack)
=> Token
-> Method
-> ByteString
-> JSON.Value
-> WaiSession () a
protectedJSON tkn mth url = protectedJSONWith mempty tkn mth url
protectedJSONWith :: forall a. (JSON.FromJSON a, Typeable a, HasCallStack)
=> [Network.HTTP.Types.Header]
-> Token
-> Method
-> ByteString
-> JSON.Value
-> WaiSession () a
protectedJSONWith hdrs tkn mth url jsonV = do
SResponse{..} <- protectedWith hdrs tkn mth url (JSON.encode jsonV)
case JSON.eitherDecode simpleBody of
Left err -> Prelude.fail $ "protectedJSON failed when parsing " <> show (typeRep $ Proxy @a) <> ": " <> err
Right x -> pure x
protectedWith :: HasCallStack
=> [Network.HTTP.Types.Header]
-> Token
-> Method -> ByteString -> L.ByteString -> WaiSession () SResponse
protectedWith extraHeaders tkn mth url payload =
-- Using a map means that if any of the extra headers contains a clashing header name,
-- the extra headers will take precedence.
let defaultHeaders = [ (hAccept, "application/json;charset=utf-8")
, (hContentType, "application/json")
, (hAuthorization, "Bearer " <> TE.encodeUtf8 tkn)
]
hdrs = Map.toList $ Map.fromList $ defaultHeaders <> extraHeaders
in request mth url hdrs payload
protectedNewError :: HasCallStack => Token -> Method -> ByteString -> L.ByteString -> WaiSession () SResponse
protectedNewError tkn mth url = protectedWith newErrorFormat tkn mth url
where
newErrorFormat = [(CI.mk "X-Garg-Error-Scheme", "new")]
getJSON :: Token -> ByteString -> WaiSession () SResponse
getJSON tkn url = protectedWith mempty tkn "GET" url ""
postJSONUrlEncoded :: forall a. (JSON.FromJSON a, Typeable a, HasCallStack)
=> Token
-> ByteString
-> L.ByteString
-> WaiSession () a
postJSONUrlEncoded tkn url queryPaths = do
SResponse{..} <- protectedWith [(hContentType, "application/x-www-form-urlencoded")] tkn "POST" url queryPaths
case JSON.eitherDecode simpleBody of
Left err -> Prelude.fail $ "postJSONUrlEncoded failed when parsing " <> show (typeRep $ Proxy @a) <> ": " <> err <> "\nPayload was: " <> (C8L.unpack simpleBody)
Right x -> pure x
withValidLogin :: (MonadFail m, MonadIO m) => Wai.Port -> Username -> GargPassword -> (Token -> m a) -> m a
withValidLogin port ur pwd act = do
baseUrl <- liftIO $ parseBaseUrl "http://localhost"
manager <- liftIO $ newManager defaultManagerSettings
let clientEnv = mkClientEnv manager (baseUrl { baseUrlPort = port })
let authPayload = AuthRequest ur pwd
result <- liftIO $ runClientM (auth_api authPayload) clientEnv
case result of
Left err -> liftIO $ throwIO $ Prelude.userError (show err)
Right res -> act $ _authRes_token res
tests :: Spec tests :: Spec
......
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TypeOperators #-}
module Test.API.Routes where
import Fmt (Builder, (+|), (|+))
import Gargantext.API.Admin.Auth.Types (AuthRequest, AuthResponse)
import Gargantext.API.Ngrams (TableNgramsApiGet, TableNgramsApiPut)
import Gargantext.API.Ngrams.Types ( NgramsTable, NgramsTablePatch, OrderBy, TabType, Versioned, VersionedWithCount )
import Gargantext.API.Routes (AuthAPI, GargAPIVersion, MkGargAPI)
import Gargantext.Database.Admin.Types.Node (NodeId)
import Gargantext.Core.Types.Main (ListType)
import Gargantext.Core.Types.Query (Limit, MaxSize, MinSize, Offset)
import Gargantext.Prelude
import Network.Wai.Handler.Warp (Port)
import Servant ((:>), Capture)
import Servant.Client (ClientM, client)
-- This is for requests made by http.client directly to hand-crafted URLs
curApi :: Builder
curApi = "v1.0"
mkUrl :: Port -> Builder -> ByteString
mkUrl _port urlPiece =
"/api/" +| curApi |+ urlPiece
-- This is for Servant.Client requests
auth_api :: AuthRequest -> ClientM AuthResponse
auth_api = client (Proxy :: Proxy (MkGargAPI (GargAPIVersion AuthAPI)))
-- | Shortcut for TableNgramsApiGet full path
type APITableNgramsGet = MkGargAPI (GargAPIVersion ( "node"
:> Capture "node_id" NodeId
:> "ngrams"
:> TableNgramsApiGet ) )
table_ngrams_get_api :: NodeId
-> TabType
-> NodeId
-> Limit
-> Maybe Offset
-> Maybe ListType
-> Maybe MinSize
-> Maybe MaxSize
-> Maybe OrderBy
-> Maybe Text
-> ClientM (VersionedWithCount NgramsTable)
table_ngrams_get_api = client (Proxy :: Proxy APITableNgramsGet)
type APITableNgramsPut = MkGargAPI (GargAPIVersion ( "node"
:> Capture "node_id" NodeId
:> "ngrams"
:> TableNgramsApiPut ) )
table_ngrams_put_api :: NodeId
-> TabType
-> NodeId
-> Versioned NgramsTablePatch
-> ClientM (Versioned NgramsTablePatch)
table_ngrams_put_api = client (Proxy :: Proxy APITableNgramsPut)
...@@ -5,8 +5,6 @@ module Test.API.Setup where ...@@ -5,8 +5,6 @@ module Test.API.Setup where
import Control.Lens import Control.Lens
import Control.Monad.Reader import Control.Monad.Reader
import Data.ByteString (ByteString)
import Fmt (Builder, (+|), (|+))
import Gargantext.API (makeApp) import Gargantext.API (makeApp)
import Gargantext.API.Admin.EnvTypes (Mode(Mock), Env (..)) import Gargantext.API.Admin.EnvTypes (Mode(Mock), Env (..))
import Gargantext.API.Admin.Settings import Gargantext.API.Admin.Settings
...@@ -40,7 +38,6 @@ import qualified Gargantext.Utils.Jobs.Monad as Jobs ...@@ -40,7 +38,6 @@ import qualified Gargantext.Utils.Jobs.Monad as Jobs
import qualified Gargantext.Utils.Jobs.Queue as Jobs import qualified Gargantext.Utils.Jobs.Queue as Jobs
import qualified Gargantext.Utils.Jobs.Settings as Jobs import qualified Gargantext.Utils.Jobs.Settings as Jobs
import qualified Network.Wai.Handler.Warp as Warp import qualified Network.Wai.Handler.Warp as Warp
import qualified Network.Wai.Handler.Warp as Wai
import qualified Servant.Job.Async as ServantAsync import qualified Servant.Job.Async as ServantAsync
...@@ -117,10 +114,3 @@ createAliceAndBob testEnv = do ...@@ -117,10 +114,3 @@ createAliceAndBob testEnv = do
void $ new_user nur1 void $ new_user nur1
void $ new_user nur2 void $ new_user nur2
curApi :: Builder
curApi = "v1.0"
mkUrl :: Wai.Port -> Builder -> ByteString
mkUrl _port urlPiece =
"/api/" +| curApi |+ urlPiece
This diff is collapsed.
...@@ -81,7 +81,7 @@ tests = withResource pubmedSettings (const (pure ())) $ \getPubmedKey -> ...@@ -81,7 +81,7 @@ tests = withResource pubmedSettings (const (pure ())) $ \getPubmedKey ->
-- | Checks that the 'RawQuery' can be translated into the expected 'BoolExpr' form, -- | Checks that the 'RawQuery' can be translated into the expected 'BoolExpr' form,
-- by also checking that both renders back to the initial 'RawQuery'. -- by also checking that both renders back to the initial 'RawQuery'.
translatesInto :: RawQuery -> BoolExpr Term -> Property translatesInto :: RawQuery -> BoolExpr [QueryTerm] -> Property
(translatesInto) raw boolExpr = (translatesInto) raw boolExpr =
let parsed = parseQuery raw let parsed = parseQuery raw
expected = Right (unsafeMkQuery boolExpr) expected = Right (unsafeMkQuery boolExpr)
...@@ -89,70 +89,69 @@ translatesInto :: RawQuery -> BoolExpr Term -> Property ...@@ -89,70 +89,69 @@ translatesInto :: RawQuery -> BoolExpr Term -> Property
(renderQuery <$> parsed) === (renderQuery <$> expected) (renderQuery <$> parsed) === (renderQuery <$> expected)
testParse01 :: Property testParse01 :: Property
testParse01 = "A OR B" `translatesInto` (BConst (Positive "A") `BOr` BConst (Positive "B")) testParse01 = "A OR B" `translatesInto` (BConst (Positive ["A"]) `BOr` BConst (Positive ["B"]))
testParse02 :: Property testParse02 :: Property
testParse02 = "A AND B" `translatesInto` (BConst (Positive "A") `BAnd` BConst (Positive "B")) testParse02 = "A AND B" `translatesInto` (BConst (Positive ["A"]) `BAnd` BConst (Positive ["B"]))
testParse03 :: Property testParse03 :: Property
testParse03 = "-A" `translatesInto` (BConst (Negative "A")) testParse03 = "-A" `translatesInto` (BConst (Negative ["A"]))
testParse03_01 :: Property testParse03_01 :: Property
testParse03_01 = "NOT A" `translatesInto` (BConst (Negative "A")) testParse03_01 = "NOT A" `translatesInto` (BConst (Negative ["A"]))
testParse04 :: Property testParse04 :: Property
testParse04 = "A -B" `translatesInto` (BConst (Positive "A") `BAnd` BConst (Negative "B")) testParse04 = "A -B" `translatesInto` (BConst (Positive ["A"]) `BAnd` BConst (Negative ["B"]))
-- Both 'A -B' and 'A AND -B' desugars into the same form. -- Both 'A -B' and 'A AND -B' desugars into the same form.
testParse04_01 :: Property testParse04_01 :: Property
testParse04_01 = "A AND -B" `translatesInto` (BConst (Positive "A") `BAnd` BConst (Negative "B")) testParse04_01 = "A AND -B" `translatesInto` (BConst (Positive ["A"]) `BAnd` BConst (Negative ["B"]))
testParse05 :: Property testParse05 :: Property
testParse05 = "A AND B -C" `translatesInto` ((BConst (Positive "A") `BAnd` BConst (Positive "B")) `BAnd` BConst (Negative "C")) testParse05 = "A AND B -C" `translatesInto` ((BConst (Positive ["A"]) `BAnd` BConst (Positive ["B"])) `BAnd` BConst (Negative ["C"]))
testParse05_01 :: Property testParse05_01 :: Property
testParse05_01 = testParse05_01 =
"A AND (B -C)" `translatesInto` (BConst (Positive "A") `BAnd` (BConst (Positive "B") `BAnd` BConst (Negative "C"))) "A AND (B -C)" `translatesInto` (BConst (Positive ["A"]) `BAnd` (BConst (Positive ["B"]) `BAnd` BConst (Negative ["C"])))
testParse06 :: Property testParse06 :: Property
testParse06 = testParse06 =
translatesInto "(A OR B OR NOT C) AND (D OR E OR F) -(G OR H OR I)" translatesInto "(A OR B OR NOT C) AND (D OR E OR F) -(G OR H OR I)"
( (
( (
((BConst (Positive "A") `BOr` (BConst (Positive "B"))) `BOr` (BConst (Negative "C"))) ((BConst (Positive ["A"]) `BOr` (BConst (Positive ["B"]))) `BOr` (BConst (Negative ["C"])))
`BAnd` `BAnd`
((BConst (Positive "D") `BOr` (BConst (Positive "E"))) `BOr` (BConst (Positive "F"))) ((BConst (Positive ["D"]) `BOr` (BConst (Positive ["E"]))) `BOr` (BConst (Positive ["F"])))
) )
`BAnd` BNot ( `BAnd` BNot (
((BConst (Positive "G") `BOr` (BConst (Positive "H"))) `BOr` (BConst (Positive "I"))) ((BConst (Positive ["G"]) `BOr` (BConst (Positive ["H"]))) `BOr` (BConst (Positive ["I"])))
) )
) )
testParse07 :: Property testParse07 :: Property
testParse07 = testParse07 =
translatesInto "\"Haskell\" AND \"Agda\"" translatesInto "\"Haskell\" AND \"Agda\""
((BConst (Positive "Haskell") `BAnd` (BConst (Positive "Agda")))) ((BConst (Positive ["Haskell"]) `BAnd` (BConst (Positive ["Agda"]))))
testParse07_01 :: Property testParse07_01 :: Property
testParse07_01 = testParse07_01 =
translatesInto "Haskell AND Agda" translatesInto "Haskell AND Agda"
((BConst (Positive "Haskell") `BAnd` (BConst (Positive "Agda")))) ((BConst (Positive ["Haskell"]) `BAnd` (BConst (Positive ["Agda"]))))
testParse07_02 :: Property testParse07_02 :: Property
testParse07_02 = testParse07_02 =
translatesInto "Raphael" translatesInto "Raphael"
((BConst (Positive "Raphael"))) ((BConst (Positive ["Raphael"])))
testParse07_03 :: Property testParse07_03 :: Property
testParse07_03 = testParse07_03 =
translatesInto "Niki" ((BConst (Positive "Niki"))) .&&. translatesInto "Niki" ((BConst (Positive ["Niki"]))) .&&.
translatesInto "Ajeje" ((BConst (Positive "Ajeje"))) .&&. translatesInto "Ajeje" ((BConst (Positive ["Ajeje"]))) .&&.
translatesInto "Orf" ((BConst (Positive "Orf"))) translatesInto "Orf" ((BConst (Positive ["Orf"])))
testWordsIntoConst :: Assertion testWordsIntoConst :: Assertion
testWordsIntoConst = testWordsIntoConst =
let (expected :: BoolExpr Term) = let (expected :: BoolExpr [QueryTerm]) = fromCNF (boolTreeToCNF @[QueryTerm] $ (BAnd (BOr (BConst (Positive [QT_exact_match "The",QT_exact_match "Art",QT_exact_match "of",QT_exact_match "Computer",QT_exact_match "Programming"])) BFalse) (BAnd (BOr (BConst (Positive [QT_exact_match "Conceptual",QT_exact_match "Mathematics"])) BFalse) BTrue)))
fromCNF (boolTreeToCNF @Term $ (BConst (Positive "The Art of Computer Programming") `BAnd` (BConst (Positive "Conceptual Mathematics"))))
in case parseQuery "\"The Art of Computer Programming\" AND \"Conceptual Mathematics\"" of in case parseQuery "\"The Art of Computer Programming\" AND \"Conceptual Mathematics\"" of
Left err Left err
-> assertBool err False -> assertBool err False
...@@ -168,43 +167,43 @@ withValidQuery rawQuery onValidParse = do ...@@ -168,43 +167,43 @@ withValidQuery rawQuery onValidParse = do
testArxiv01_01 :: Assertion testArxiv01_01 :: Assertion
testArxiv01_01 = withValidQuery "A AND B" $ \q -> testArxiv01_01 = withValidQuery "A AND B" $ \q ->
assertBool ("Query not converted into expression: " <> show @(BoolExpr Term) (fromCNF $ getQuery q)) assertBool ("Query not converted into expression: " <> show @(BoolExpr [QueryTerm]) (fromCNF $ getQuery q))
(Arxiv.qExp (Arxiv.convertQuery q) == Just (Arxiv.And (Arxiv.Exp $ Arxiv.Abs ["A"]) ((Arxiv.Exp $ Arxiv.Abs ["B"])))) (Arxiv.qExp (Arxiv.convertQuery q) == Just (Arxiv.And (Arxiv.Exp $ Arxiv.Abs ["A"]) ((Arxiv.Exp $ Arxiv.Abs ["B"]))))
testArxiv01_02 :: Assertion testArxiv01_02 :: Assertion
testArxiv01_02 = withValidQuery "\"Haskell\" AND \"Agda\"" $ \q -> testArxiv01_02 = withValidQuery "\"Haskell\" AND \"Agda\"" $ \q ->
assertBool ("Query not converted into expression: " <> show @(BoolExpr Term) (fromCNF $ getQuery q)) assertBool ("Query not converted into expression: " <> show @(BoolExpr [QueryTerm]) (fromCNF $ getQuery q))
(Arxiv.qExp (Arxiv.convertQuery q) == Just (Arxiv.And (Arxiv.Exp $ Arxiv.Abs ["Haskell"]) ((Arxiv.Exp $ Arxiv.Abs ["Agda"])))) (Arxiv.qExp (Arxiv.convertQuery q) == Just (Arxiv.And (Arxiv.Exp $ Arxiv.Abs ["Haskell"]) ((Arxiv.Exp $ Arxiv.Abs ["Agda"]))))
testArxiv02 :: Assertion testArxiv02 :: Assertion
testArxiv02 = withValidQuery "A OR B" $ \q -> testArxiv02 = withValidQuery "A OR B" $ \q ->
assertBool ("Query not converted into expression: " <> show @(BoolExpr Term) (fromCNF $ getQuery q)) assertBool ("Query not converted into expression: " <> show @(BoolExpr [QueryTerm]) (fromCNF $ getQuery q))
(Arxiv.qExp (Arxiv.convertQuery q) == Just (Arxiv.Or (Arxiv.Exp $ Arxiv.Abs ["A"]) ((Arxiv.Exp $ Arxiv.Abs ["B"])))) (Arxiv.qExp (Arxiv.convertQuery q) == Just (Arxiv.Or (Arxiv.Exp $ Arxiv.Abs ["A"]) ((Arxiv.Exp $ Arxiv.Abs ["B"]))))
testArxiv03_01 :: Assertion testArxiv03_01 :: Assertion
testArxiv03_01 = withValidQuery "A AND NOT B" $ \q -> testArxiv03_01 = withValidQuery "A AND NOT B" $ \q ->
assertBool ("Query not converted into expression: " <> show @(BoolExpr Term) (fromCNF $ getQuery q)) assertBool ("Query not converted into expression: " <> show @(BoolExpr [QueryTerm]) (fromCNF $ getQuery q))
(Arxiv.qExp (Arxiv.convertQuery q) == Just (Arxiv.AndNot (Arxiv.Exp $ Arxiv.Abs ["A"]) ((Arxiv.Exp $ Arxiv.Abs ["B"])))) (Arxiv.qExp (Arxiv.convertQuery q) == Just (Arxiv.AndNot (Arxiv.Exp $ Arxiv.Abs ["A"]) ((Arxiv.Exp $ Arxiv.Abs ["B"]))))
testArxiv03_02 :: Assertion testArxiv03_02 :: Assertion
testArxiv03_02 = withValidQuery "A AND -B" $ \q -> testArxiv03_02 = withValidQuery "A AND -B" $ \q ->
assertBool ("Query not converted into expression: " <> show @(BoolExpr Term) (fromCNF $ getQuery q)) assertBool ("Query not converted into expression: " <> show @(BoolExpr [QueryTerm]) (fromCNF $ getQuery q))
(Arxiv.qExp (Arxiv.convertQuery q) == Just (Arxiv.AndNot (Arxiv.Exp $ Arxiv.Abs ["A"]) ((Arxiv.Exp $ Arxiv.Abs ["B"])))) (Arxiv.qExp (Arxiv.convertQuery q) == Just (Arxiv.AndNot (Arxiv.Exp $ Arxiv.Abs ["A"]) ((Arxiv.Exp $ Arxiv.Abs ["B"]))))
-- Double negation get turned into positive. -- Double negation get turned into positive.
testArxiv04_01 :: Assertion testArxiv04_01 :: Assertion
testArxiv04_01 = withValidQuery "A AND NOT (NOT B)" $ \q -> testArxiv04_01 = withValidQuery "A AND NOT (NOT B)" $ \q ->
assertBool ("Query not converted into expression: " <> show @(BoolExpr Term) (fromCNF $ getQuery q)) assertBool ("Query not converted into expression: " <> show @(BoolExpr [QueryTerm]) (fromCNF $ getQuery q))
(Arxiv.qExp (Arxiv.convertQuery q) == Just (Arxiv.And (Arxiv.Exp $ Arxiv.Abs ["A"]) ((Arxiv.Exp $ Arxiv.Abs ["B"])))) (Arxiv.qExp (Arxiv.convertQuery q) == Just (Arxiv.And (Arxiv.Exp $ Arxiv.Abs ["A"]) ((Arxiv.Exp $ Arxiv.Abs ["B"]))))
testArxiv04_02 :: Assertion testArxiv04_02 :: Assertion
testArxiv04_02 = withValidQuery "A AND NOT (NOT (NOT B))" $ \q -> testArxiv04_02 = withValidQuery "A AND NOT (NOT (NOT B))" $ \q ->
assertBool ("Query not converted into expression: " <> show @(BoolExpr Term) (fromCNF $ getQuery q)) assertBool ("Query not converted into expression: " <> show @(BoolExpr [QueryTerm]) (fromCNF $ getQuery q))
(Arxiv.qExp (Arxiv.convertQuery q) == Just (Arxiv.AndNot (Arxiv.Exp $ Arxiv.Abs ["A"]) ((Arxiv.Exp $ Arxiv.Abs ["B"])))) (Arxiv.qExp (Arxiv.convertQuery q) == Just (Arxiv.AndNot (Arxiv.Exp $ Arxiv.Abs ["A"]) ((Arxiv.Exp $ Arxiv.Abs ["B"]))))
testArxiv05 :: Assertion testArxiv05 :: Assertion
testArxiv05 = withValidQuery "A OR NOT B" $ \q -> testArxiv05 = withValidQuery "A OR NOT B" $ \q ->
assertBool ("Query not converted into expression: " <> show @(BoolExpr Term) (fromCNF $ getQuery q)) assertBool ("Query not converted into expression: " <> show @(BoolExpr [QueryTerm]) (fromCNF $ getQuery q))
(Arxiv.qExp (Arxiv.convertQuery q) == Just ( (Arxiv.qExp (Arxiv.convertQuery q) == Just (
Arxiv.Or (Arxiv.Exp $ Arxiv.Abs ["A"]) Arxiv.Or (Arxiv.Exp $ Arxiv.Abs ["A"])
(Arxiv.AndNot (Arxiv.Exp $ Arxiv.Abs ["B"]) (Arxiv.Exp $ Arxiv.Abs ["B"])) (Arxiv.AndNot (Arxiv.Exp $ Arxiv.Abs ["B"]) (Arxiv.Exp $ Arxiv.Abs ["B"]))
...@@ -213,7 +212,7 @@ testArxiv05 = withValidQuery "A OR NOT B" $ \q -> ...@@ -213,7 +212,7 @@ testArxiv05 = withValidQuery "A OR NOT B" $ \q ->
testArxiv06 :: Assertion testArxiv06 :: Assertion
testArxiv06 = withValidQuery "-A" $ \q -> testArxiv06 = withValidQuery "-A" $ \q ->
assertBool ("Query not converted into expression: " <> show @(BoolExpr Term) (fromCNF $ getQuery q)) assertBool ("Query not converted into expression: " <> show @(BoolExpr [QueryTerm]) (fromCNF $ getQuery q))
(Arxiv.qExp (Arxiv.convertQuery q) == Just ( (Arxiv.qExp (Arxiv.convertQuery q) == Just (
Arxiv.AndNot (Arxiv.Exp $ Arxiv.Abs ["A"]) (Arxiv.Exp $ Arxiv.Abs ["A"]) Arxiv.AndNot (Arxiv.Exp $ Arxiv.Abs ["A"]) (Arxiv.Exp $ Arxiv.Abs ["A"])
) )
...@@ -225,7 +224,7 @@ testArxiv06 = withValidQuery "-A" $ \q -> ...@@ -225,7 +224,7 @@ testArxiv06 = withValidQuery "-A" $ \q ->
testPubMed01 :: Assertion testPubMed01 :: Assertion
testPubMed01 = withValidQuery "A" $ \q -> testPubMed01 = withValidQuery "A" $ \q ->
assertBool ("Query not converted into expression: " <> show @(BoolExpr Term) (fromCNF $ getQuery q)) assertBool ("Query not converted into expression: " <> show @(BoolExpr [QueryTerm]) (fromCNF $ getQuery q))
(Pubmed.getESearch (Pubmed.convertQuery q) == "A") (Pubmed.getESearch (Pubmed.convertQuery q) == "A")
testPubMed02_01 :: Assertion testPubMed02_01 :: Assertion
......
...@@ -34,7 +34,7 @@ import Gargantext.Database.Schema.Node (NodePoly(..)) ...@@ -34,7 +34,7 @@ import Gargantext.Database.Schema.Node (NodePoly(..))
import Test.Database.Types import Test.Database.Types
import Test.Hspec.Expectations import Test.Hspec.Expectations
import Test.Tasty.HUnit import Test.Tasty.HUnit
import Gargantext.Core.Text.Terms.Mono.Stem.En import Gargantext.Core.Text.Terms.Mono.Stem
import Gargantext.Database.Admin.Config (userMaster) import Gargantext.Database.Admin.Config (userMaster)
import qualified Data.Text as T import qualified Data.Text as T
import qualified Gargantext.Core.Text.Corpus.Query as API import qualified Gargantext.Core.Text.Corpus.Query as API
...@@ -134,8 +134,15 @@ corpusAddDocuments env = do ...@@ -134,8 +134,15 @@ corpusAddDocuments env = do
stemmingTest :: TestEnv -> Assertion stemmingTest :: TestEnv -> Assertion
stemmingTest _env = do stemmingTest _env = do
stemIt "Ajeje" `shouldBe` "Ajeje" stem EN GargPorterAlgorithm "Ajeje" `shouldBe` "Ajeje"
stemIt "PyPlasm:" `shouldBe` "PyPlasm:" stem EN GargPorterAlgorithm "PyPlasm:" `shouldBe` "PyPlasm:"
-- This test outlines the main differences between Porter and Lancaster.
stem EN GargPorterAlgorithm "dancer" `shouldBe` "dancer"
stem EN LancasterAlgorithm "dancer" `shouldBe` "dant"
stem EN GargPorterAlgorithm "postpartum" `shouldBe` "postpartum"
stem EN LancasterAlgorithm "postpartum" `shouldBe` "postpart"
stem IT PorterAlgorithm "catechizzare" `shouldBe` "catechizz"
stem IT LancasterAlgorithm "catechizzare" `shouldBe` "catechizzare" -- lancaster doesn't support Italian
mkQ :: T.Text -> API.Query mkQ :: T.Text -> API.Query
mkQ txt = either (\e -> error $ "(query) = " <> T.unpack txt <> ": " <> e) id . API.parseQuery . API.RawQuery $ txt mkQ txt = either (\e -> error $ "(query) = " <> T.unpack txt <> ": " <> e) id . API.parseQuery . API.RawQuery $ txt
......
...@@ -13,7 +13,7 @@ Portability : POSIX ...@@ -13,7 +13,7 @@ Portability : POSIX
module Test.Database.Operations.NodeStory where module Test.Database.Operations.NodeStory where
import Control.Lens ((^.), (.~), _2) import Control.Lens ((^.), (.~), (?~), _2)
import Control.Monad.Reader import Control.Monad.Reader
import Data.Map.Strict qualified as Map import Data.Map.Strict qualified as Map
import Data.Map.Strict.Patch qualified as PM import Data.Map.Strict.Patch qualified as PM
...@@ -23,7 +23,7 @@ import Database.PostgreSQL.Simple.SqlQQ ...@@ -23,7 +23,7 @@ import Database.PostgreSQL.Simple.SqlQQ
import Gargantext.API.Ngrams (commitStatePatch, mSetFromList, setListNgrams, saveNodeStory) import Gargantext.API.Ngrams (commitStatePatch, mSetFromList, setListNgrams, saveNodeStory)
import Gargantext.API.Ngrams.Types (MSet(..), NgramsPatch(..), NgramsRepoElement(..), NgramsTerm(..), Versioned(..), mkNgramsTablePatch, nre_children, nre_list, nre_parent, nre_root) import Gargantext.API.Ngrams.Types (MSet(..), NgramsPatch(..), NgramsRepoElement(..), NgramsTerm(..), Versioned(..), mkNgramsTablePatch, nre_children, nre_list, nre_parent, nre_root)
import Gargantext.API.Ngrams.Tools (getNodeStory) import Gargantext.API.Ngrams.Tools (getNodeStory)
import Gargantext.Core.NodeStory hiding (runPGSQuery) import Gargantext.Core.NodeStory
import Gargantext.Core.Types.Individu import Gargantext.Core.Types.Individu
import Gargantext.Core.Types (ListType(..), ListId, NodeId, UserId) import Gargantext.Core.Types (ListType(..), ListId, NodeId, UserId)
import Gargantext.Database.Action.User (getUserId) import Gargantext.Database.Action.User (getUserId)
...@@ -73,15 +73,15 @@ simpleParentTerm' = fst simpleTerm ...@@ -73,15 +73,15 @@ simpleParentTerm' = fst simpleTerm
simpleParentTerm :: (NgramsTerm, NgramsRepoElement) simpleParentTerm :: (NgramsTerm, NgramsRepoElement)
simpleParentTerm = ( simpleParentTerm' simpleParentTerm = ( simpleParentTerm'
, simpleTerm ^. _2 , simpleTerm ^. _2
& nre_children .~ (mSetFromList [simpleChildTerm']) ) & nre_children .~ mSetFromList [simpleChildTerm'] )
simpleChildTerm' :: NgramsTerm simpleChildTerm' :: NgramsTerm
simpleChildTerm' = NgramsTerm "world" simpleChildTerm' = NgramsTerm "world"
simpleChildTerm :: (NgramsTerm, NgramsRepoElement) simpleChildTerm :: (NgramsTerm, NgramsRepoElement)
simpleChildTerm = ( simpleChildTerm' simpleChildTerm = ( simpleChildTerm'
, simpleTerm ^. _2 , simpleTerm ^. _2
& nre_parent .~ Just simpleParentTerm' & nre_parent ?~ simpleParentTerm'
& nre_root .~ Just simpleParentTerm' ) & nre_root ?~ simpleParentTerm' )
-- tests start here -- tests start here
......
...@@ -5,7 +5,7 @@ import Prelude ...@@ -5,7 +5,7 @@ import Prelude
import Data.ByteString.Char8 qualified as C8 import Data.ByteString.Char8 qualified as C8
import Data.Text qualified as T import Data.Text qualified as T
import Gargantext.Core.Text.Terms.Mono.Stem.Lancaster (stemIt) import Gargantext.Core.Text.Terms.Mono.Stem.Internal.Lancaster (stem)
import Gargantext.Prelude (toS) import Gargantext.Prelude (toS)
import Test.Tasty import Test.Tasty
import Test.Tasty.Golden (goldenVsStringDiff) import Test.Tasty.Golden (goldenVsStringDiff)
...@@ -128,4 +128,4 @@ testWords = [ ...@@ -128,4 +128,4 @@ testWords = [
] ]
mkTestVector :: IO BL.ByteString mkTestVector :: IO BL.ByteString
mkTestVector = pure $ toS $ C8.unlines (map (\(indx, w) -> (C8.pack $ show indx) <> "," <> TE.encodeUtf8 (stemIt w)) testWords) mkTestVector = pure $ toS $ C8.unlines (map (\(indx, w) -> (C8.pack $ show indx) <> "," <> TE.encodeUtf8 (stem w)) testWords)
module Test.Types where
import Data.Aeson ((.:), (.:?), (.=), FromJSON(..), ToJSON(..), object, withObject)
import Gargantext.API.Admin.Orchestrator.Types (JobLog(..))
import Gargantext.Prelude
data JobPollHandle = JobPollHandle {
_jph_id :: !Text
, _jph_log :: [JobLog]
, _jph_status :: !Text
, _jph_error :: !(Maybe Text)
} deriving Show
instance FromJSON JobPollHandle where
parseJSON = withObject "JobPollHandle" $ \o -> do
_jph_id <- o .: "id"
_jph_log <- o .: "log"
_jph_status <- o .: "status"
_jph_error <- o .:? "error"
pure JobPollHandle{..}
instance ToJSON JobPollHandle where
toJSON JobPollHandle{..} = object [
"id" .= toJSON _jph_id
, "log" .= toJSON _jph_log
, "status" .= toJSON _jph_status
, "error" .= toJSON _jph_error
]
{-# LANGUAGE NumericUnderscores #-}
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE QuasiQuotes #-} {-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeApplications #-}
module Test.Utils where module Test.Utils where
import Control.Exception import Control.Exception ()
import Control.Monad import Control.Lens ((^.))
import Data.Aeson import Control.Monad ()
import Data.Aeson qualified as JSON import Data.Aeson qualified as JSON
import Data.Aeson.KeyMap qualified as KM import Data.Aeson.KeyMap qualified as KM
import Data.ByteString.Char8 qualified as B import Data.ByteString.Char8 qualified as B
import Data.Char (isSpace) import Data.ByteString.Lazy qualified as L
import Network.HTTP.Types import Data.CaseInsensitive qualified as CI
import Network.Wai.Test import Data.Map.Strict qualified as Map
import Prelude import Data.Text qualified as T
import Data.Text.Encoding qualified as TE
import Data.Text.Lazy qualified as TL
import Data.Text.Lazy.Encoding qualified as TLE
import Fmt (Builder)
import Gargantext.API.Admin.Auth.Types (AuthRequest(..), Token, authRes_token)
import Gargantext.Core.Types.Individu (Username, GargPassword)
import Gargantext.Prelude
import Network.HTTP.Client (defaultManagerSettings, newManager)
import Network.HTTP.Types (Header, Method, status200)
import Network.HTTP.Types.Header (hAccept, hAuthorization, hContentType)
import Network.Wai.Handler.Warp (Port)
import Network.Wai.Test (SResponse(..))
import Prelude qualified
import Servant.Client (ClientEnv, baseUrlPort, defaultMakeClientRequest, makeClientRequest, mkClientEnv, parseBaseUrl, runClientM)
import Servant.Client.Core.Request (addHeader)
import Test.API.Routes (auth_api, mkUrl)
import Test.Hspec.Expectations import Test.Hspec.Expectations
import Test.Hspec.Wai import Test.Hspec.Wai (MatchBody(..), WaiExpectation, WaiSession, request)
import Test.Hspec.Wai.JSON (FromValue(..)) import Test.Hspec.Wai.JSON (FromValue(..))
import Test.Hspec.Wai.Matcher import Test.Hspec.Wai.Matcher (MatchHeader(..), ResponseMatcher(..), bodyEquals, formatHeader, match)
import Test.Tasty.HUnit import Test.Tasty.HUnit (Assertion)
import Test.Types
-- | Marks the input 'Assertion' as pending, by ignoring any exception -- | Marks the input 'Assertion' as pending, by ignoring any exception
-- thrown by it. -- thrown by it.
pending :: String -> Assertion -> Assertion pending :: Prelude.String -> Assertion -> Assertion
pending reason act = act `catch` (\(e :: SomeException) -> do pending reason act = act `catch` (\(e :: SomeException) -> do
putStrLn $ "PENDING: " <> reason putStrLn $ "PENDING: " <> reason
putStrLn (displayException e)) putStrLn (displayException e))
...@@ -56,9 +77,9 @@ instance FromValue JsonFragmentResponseMatcher where ...@@ -56,9 +77,9 @@ instance FromValue JsonFragmentResponseMatcher where
fromValue = JsonFragmentResponseMatcher . ResponseMatcher 200 [matchHeader] . containsJSON fromValue = JsonFragmentResponseMatcher . ResponseMatcher 200 [matchHeader] . containsJSON
where where
matchHeader = MatchHeader $ \headers _body -> matchHeader = MatchHeader $ \headers _body ->
case lookup "Content-Type" headers of case Prelude.lookup "Content-Type" headers of
Just h | isJSON h -> Nothing Just h | isJSON h -> Nothing
_ -> Just $ unlines [ _ -> Just $ Prelude.unlines [
"missing header:" "missing header:"
, formatHeader ("Content-Type", "application/json") , formatHeader ("Content-Type", "application/json")
] ]
...@@ -70,7 +91,7 @@ instance FromValue JsonFragmentResponseMatcher where ...@@ -70,7 +91,7 @@ instance FromValue JsonFragmentResponseMatcher where
breakAt c = fmap (B.drop 1) . B.break (== c) breakAt c = fmap (B.drop 1) . B.break (== c)
strip = B.reverse . B.dropWhile isSpace . B.reverse . B.dropWhile isSpace strip = B.reverse . B.dropWhile isSpace . B.reverse . B.dropWhile isSpace
shouldRespondWithJSON :: (FromJSON a, ToJSON a, HasCallStack) shouldRespondWithJSON :: (JSON.FromJSON a, JSON.ToJSON a, HasCallStack)
=> WaiSession st a => WaiSession st a
-> JsonFragmentResponseMatcher -> JsonFragmentResponseMatcher
-> WaiExpectation st -> WaiExpectation st
...@@ -78,14 +99,127 @@ shouldRespondWithJSON action matcher = do ...@@ -78,14 +99,127 @@ shouldRespondWithJSON action matcher = do
r <- action r <- action
forM_ (match (SResponse status200 mempty (JSON.encode r)) (getJsonMatcher matcher)) (liftIO . expectationFailure) forM_ (match (SResponse status200 mempty (JSON.encode r)) (getJsonMatcher matcher)) (liftIO . expectationFailure)
containsJSON :: Value -> MatchBody containsJSON :: JSON.Value -> MatchBody
containsJSON expected = MatchBody matcher containsJSON expected = MatchBody matcher
where where
matcher headers actualBody = case decode actualBody of matcher headers actualBody = case JSON.decode actualBody of
Just actual | expected `isSubsetOf` actual -> Nothing Just actual | expected `isSubsetOf` actual -> Nothing
_ -> let MatchBody m = bodyEquals (encode expected) in m headers actualBody _ -> let MatchBody m = bodyEquals (JSON.encode expected) in m headers actualBody
isSubsetOf :: Value -> Value -> Bool isSubsetOf :: JSON.Value -> JSON.Value -> Bool
isSubsetOf (Object sub) (Object sup) = isSubsetOf (JSON.Object sub) (JSON.Object sup) =
all (\(key, value) -> KM.lookup key sup == Just value) (KM.toList sub) all (\(key, value) -> KM.lookup key sup == Just value) (KM.toList sub)
isSubsetOf x y = x == y isSubsetOf x y = x == y
authenticatedServantClient :: Int -> T.Text -> IO ClientEnv
authenticatedServantClient port token = do
baseUrl <- parseBaseUrl "http://localhost"
manager <- newManager defaultManagerSettings
let requestAddToken url req =
defaultMakeClientRequest url $ addHeader hAuthorization ("Bearer " <> token)
$ addHeader hContentType (T.pack "application/json") req
pure $ (mkClientEnv manager (baseUrl { baseUrlPort = port })) { makeClientRequest = requestAddToken }
-- | Issue a request with a valid 'Authorization: Bearer' inside.
protected :: HasCallStack
=> Token
-> Method
-> ByteString
-> L.ByteString
-> WaiSession () SResponse
protected tkn mth url = protectedWith mempty tkn mth url
protectedJSON :: forall a. (JSON.FromJSON a, Typeable a, HasCallStack)
=> Token
-> Method
-> ByteString
-> JSON.Value
-> WaiSession () a
protectedJSON tkn mth url = protectedJSONWith mempty tkn mth url
protectedJSONWith :: forall a. (JSON.FromJSON a, Typeable a, HasCallStack)
=> [Header]
-> Token
-> Method
-> ByteString
-> JSON.Value
-> WaiSession () a
protectedJSONWith hdrs tkn mth url jsonV = do
SResponse{..} <- protectedWith hdrs tkn mth url (JSON.encode jsonV)
case JSON.eitherDecode simpleBody of
Left err -> Prelude.fail $ "protectedJSON failed when parsing " <> show (typeRep $ Proxy @a) <> ": " <> err
Right x -> pure x
protectedWith :: HasCallStack
=> [Header]
-> Token
-> Method -> ByteString -> L.ByteString -> WaiSession () SResponse
protectedWith extraHeaders tkn mth url payload =
-- Using a map means that if any of the extra headers contains a clashing header name,
-- the extra headers will take precedence.
let defaultHeaders = [ (hAccept, "application/json;charset=utf-8")
, (hContentType, "application/json")
, (hAuthorization, "Bearer " <> TE.encodeUtf8 tkn)
]
hdrs = Map.toList $ Map.fromList $ defaultHeaders <> extraHeaders
in request mth url hdrs payload
protectedNewError :: HasCallStack => Token -> Method -> ByteString -> L.ByteString -> WaiSession () SResponse
protectedNewError tkn mth url = protectedWith newErrorFormat tkn mth url
where
newErrorFormat = [(CI.mk "X-Garg-Error-Scheme", "new")]
getJSON :: Token -> ByteString -> WaiSession () SResponse
getJSON tkn url = protectedWith mempty tkn "GET" url ""
postJSONUrlEncoded :: forall a. (JSON.FromJSON a, Typeable a, HasCallStack)
=> Token
-> ByteString
-> L.ByteString
-> WaiSession () a
postJSONUrlEncoded tkn url queryPaths = do
SResponse{..} <- protectedWith [(hContentType, "application/x-www-form-urlencoded")] tkn "POST" url queryPaths
case JSON.eitherDecode simpleBody of
Left err -> Prelude.fail $ "postJSONUrlEncoded failed when parsing " <> show (typeRep $ Proxy @a) <> ": " <> err <> "\nPayload was: " <> (T.unpack . TL.toStrict . TLE.decodeUtf8 $ simpleBody)
Right x -> pure x
withValidLogin :: (MonadFail m, MonadIO m) => Port -> Username -> GargPassword -> (Token -> m a) -> m a
withValidLogin port ur pwd act = do
baseUrl <- liftIO $ parseBaseUrl "http://localhost"
manager <- liftIO $ newManager defaultManagerSettings
let clientEnv = mkClientEnv manager (baseUrl { baseUrlPort = port })
let authPayload = AuthRequest ur pwd
result <- liftIO $ runClientM (auth_api authPayload) clientEnv
case result of
Left err -> liftIO $ throwIO $ Prelude.userError (show err)
Right res -> act $ res ^. authRes_token
-- | Poll the given job URL every second until it finishes.
-- Retries up to 60 times (i.e. for 1 minute, before giving up)
pollUntilFinished :: HasCallStack
=> Token
-> Port
-> (JobPollHandle -> Builder)
-> JobPollHandle
-> WaiSession () JobPollHandle
pollUntilFinished tkn port mkUrlPiece = go 60
where
go :: Int -> JobPollHandle -> WaiSession () JobPollHandle
go 0 h = panicTrace $ "pollUntilFinished exhausted attempts. Last found JobPollHandle: " <> TE.decodeUtf8 (L.toStrict $ JSON.encode h)
go n h = case _jph_status h == "IsPending" || _jph_status h == "IsRunning" of
True -> do
liftIO $ threadDelay 1_000_000
h' <- protectedJSON tkn "GET" (mkUrl port $ mkUrlPiece h) ""
go (n-1) h'
False
| _jph_status h == "IsFailure"
-> panicTrace $ "JobPollHandle contains a failure: " <> TE.decodeUtf8 (L.toStrict $ JSON.encode h)
| otherwise
-> pure h
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment