Commit 5e837ed9 authored by Alexandre Delanoë's avatar Alexandre Delanoë

Merge remote-tracking branch 'origin/adinapoli/issue-182' into dev-merge

parents 84c04281 72a0c10f
...@@ -53,6 +53,9 @@ library ...@@ -53,6 +53,9 @@ library
Gargantext.Core.Text Gargantext.Core.Text
Gargantext.Core.Text.Context Gargantext.Core.Text.Context
Gargantext.Core.Text.Corpus.API Gargantext.Core.Text.Corpus.API
Gargantext.Core.Text.Corpus.API.Arxiv
Gargantext.Core.Text.Corpus.API.Pubmed
Gargantext.Core.Text.Corpus.Query
Gargantext.Core.Text.Corpus.Parsers Gargantext.Core.Text.Corpus.Parsers
Gargantext.Core.Text.Corpus.Parsers.CSV Gargantext.Core.Text.Corpus.Parsers.CSV
Gargantext.Core.Text.Corpus.Parsers.Date.Parsec Gargantext.Core.Text.Corpus.Parsers.Date.Parsec
...@@ -185,11 +188,9 @@ library ...@@ -185,11 +188,9 @@ library
Gargantext.Core.NodeStoryFile Gargantext.Core.NodeStoryFile
Gargantext.Core.Statistics Gargantext.Core.Statistics
Gargantext.Core.Text.Convert Gargantext.Core.Text.Convert
Gargantext.Core.Text.Corpus.API.Arxiv
Gargantext.Core.Text.Corpus.API.Hal Gargantext.Core.Text.Corpus.API.Hal
Gargantext.Core.Text.Corpus.API.Isidore Gargantext.Core.Text.Corpus.API.Isidore
Gargantext.Core.Text.Corpus.API.Istex Gargantext.Core.Text.Corpus.API.Istex
Gargantext.Core.Text.Corpus.API.Pubmed
Gargantext.Core.Text.Corpus.Parsers.Book Gargantext.Core.Text.Corpus.Parsers.Book
Gargantext.Core.Text.Corpus.Parsers.Date Gargantext.Core.Text.Corpus.Parsers.Date
Gargantext.Core.Text.Corpus.Parsers.Date.Attoparsec Gargantext.Core.Text.Corpus.Parsers.Date.Attoparsec
...@@ -383,6 +384,7 @@ library ...@@ -383,6 +384,7 @@ library
, blaze-html , blaze-html
, blaze-markup , blaze-markup
, blaze-svg , blaze-svg
, boolexpr
, bytestring , bytestring
, case-insensitive , case-insensitive
, cassava , cassava
...@@ -847,6 +849,7 @@ test-suite garg-test ...@@ -847,6 +849,7 @@ test-suite garg-test
main-is: Main.hs main-is: Main.hs
other-modules: other-modules:
Core.Text Core.Text
Core.Text.Corpus.Query
Core.Text.Examples Core.Text.Examples
Core.Text.Flow Core.Text.Flow
Core.Utils Core.Utils
...@@ -864,6 +867,7 @@ test-suite garg-test ...@@ -864,6 +867,7 @@ test-suite garg-test
Parsers.Types Parsers.Types
Parsers.WOS Parsers.WOS
Utils.Crypto Utils.Crypto
Utils.Jobs
Paths_gargantext Paths_gargantext
hs-source-dirs: hs-source-dirs:
src-test src-test
...@@ -894,61 +898,33 @@ test-suite garg-test ...@@ -894,61 +898,33 @@ test-suite garg-test
build-depends: build-depends:
QuickCheck QuickCheck
, aeson , aeson
, async
, base , base
, boolexpr
, bytestring , bytestring
, conduit
, containers , containers
, crawlerArxiv
, duckling , duckling
, extra , extra
, gargantext , gargantext
, gargantext-prelude , gargantext-prelude
, hspec , hspec
, http-client
, http-client-tls
, mtl
, parsec , parsec
, patches-class , patches-class
, patches-map , patches-map
, quickcheck-instances , quickcheck-instances
, servant-job
, stm
, tasty , tasty
, tasty-hspec
, tasty-hunit , tasty-hunit
, tasty-quickcheck
, text , text
, time , time
, unordered-containers , unordered-containers
, validity , validity
default-language: Haskell2010 default-language: Haskell2010
test-suite jobqueue-test
type: exitcode-stdio-1.0
main-is: Main.hs
other-modules:
Paths_gargantext
hs-source-dirs:
tests/queue
default-extensions:
DataKinds
DeriveGeneric
FlexibleContexts
FlexibleInstances
GADTs
GeneralizedNewtypeDeriving
MultiParamTypeClasses
NamedFieldPuns
NoImplicitPrelude
OverloadedStrings
RankNTypes
RecordWildCards
StrictData
ghc-options: -Wall -threaded -rtsopts -with-rtsopts=-N
build-depends:
aeson
, async
, base
, containers
, extra
, gargantext
, hspec
, http-client
, http-client-tls
, mtl
, servant-job
, stm
, text
, time
default-language: Haskell2010
...@@ -81,6 +81,9 @@ library: ...@@ -81,6 +81,9 @@ library:
- Gargantext.Core.Text - Gargantext.Core.Text
- Gargantext.Core.Text.Context - Gargantext.Core.Text.Context
- Gargantext.Core.Text.Corpus.API - Gargantext.Core.Text.Corpus.API
- Gargantext.Core.Text.Corpus.API.Arxiv
- Gargantext.Core.Text.Corpus.API.Pubmed
- Gargantext.Core.Text.Corpus.Query
- Gargantext.Core.Text.Corpus.Parsers - Gargantext.Core.Text.Corpus.Parsers
- Gargantext.Core.Text.Corpus.Parsers.CSV - Gargantext.Core.Text.Corpus.Parsers.CSV
- Gargantext.Core.Text.Corpus.Parsers.Date.Parsec - Gargantext.Core.Text.Corpus.Parsers.Date.Parsec
...@@ -166,6 +169,7 @@ library: ...@@ -166,6 +169,7 @@ library:
- blaze-html - blaze-html
- blaze-markup - blaze-markup
- blaze-svg - blaze-svg
- boolexpr
- bytestring - bytestring
- case-insensitive - case-insensitive
- cassava - cassava
...@@ -492,7 +496,6 @@ executables: ...@@ -492,7 +496,6 @@ executables:
- aeson - aeson
- serialise - serialise
tests: tests:
garg-test: garg-test:
main: Main.hs main: Main.hs
...@@ -512,45 +515,37 @@ tests: ...@@ -512,45 +515,37 @@ tests:
- -rtsopts - -rtsopts
- -with-rtsopts=-N - -with-rtsopts=-N
dependencies: dependencies:
- QuickCheck
- aeson - aeson
- async
- base - base
- boolexpr
- bytestring - bytestring
- conduit
- containers - containers
- crawlerArxiv
- duckling
- gargantext - gargantext
- gargantext-prelude - gargantext-prelude
- hspec - hspec
- QuickCheck - http-client
- quickcheck-instances - http-client-tls
- time - mtl
- parsec - parsec
- patches-class - patches-class
- patches-map - patches-map
- duckling - duckling
- quickcheck-instances
- servant-job
- stm
- tasty - tasty
- tasty-hspec
- tasty-hunit - tasty-hunit
- tasty-quickcheck
- text - text
- time
- unordered-containers - unordered-containers
- validity - validity
jobqueue-test:
main: Main.hs
source-dirs: tests/queue
ghc-options:
- -threaded
- -rtsopts
- -with-rtsopts=-N
dependencies:
- aeson
- async
- base
- containers
- gargantext
- mtl
- hspec
- http-client
- http-client-tls
- servant-job
- stm
- time
# garg-doctest: # garg-doctest:
# main: Main.hs # main: Main.hs
# source-dirs: src-doctest # source-dirs: src-doctest
......
This diff is collapsed.
...@@ -18,8 +18,8 @@ import Gargantext.Prelude ...@@ -18,8 +18,8 @@ import Gargantext.Prelude
import Gargantext.Core.Utils import Gargantext.Core.Utils
-- | Core.Utils tests -- | Core.Utils tests
test :: IO () test :: Spec
test = hspec $ do test = do
describe "check if groupWithCounts works" $ do describe "check if groupWithCounts works" $ do
it "simple integer array" $ do it "simple integer array" $ do
(groupWithCounts [1, 2, 3, 1, 2, 3]) `shouldBe` [(1, 2), (2, 2), (3, 2)] (groupWithCounts [1, 2, 3, 1, 2, 3]) `shouldBe` [(1, 2), (2, 2), (3, 2)]
......
...@@ -30,8 +30,8 @@ myCooc = HashMap.fromList [((NgramsTerm {unNgramsTerm = "gev au"},NgramsTerm {un ...@@ -30,8 +30,8 @@ myCooc = HashMap.fromList [((NgramsTerm {unNgramsTerm = "gev au"},NgramsTerm {un
test :: IO () test :: Spec
test = hspec $ do test = do
describe "Cross" $ do describe "Cross" $ do
let let
(distanceMap,_,_) = doSimilarityMap Conditional 0 Weak myCooc (distanceMap,_,_) = doSimilarityMap Conditional 0 Weak myCooc
......
...@@ -11,6 +11,7 @@ Portability : POSIX ...@@ -11,6 +11,7 @@ Portability : POSIX
import Gargantext.Prelude import Gargantext.Prelude
import qualified Core.Text.Corpus.Query as CorpusQuery
import qualified Core.Utils as Utils import qualified Core.Utils as Utils
--import qualified Ngrams.Lang.Fr as Fr --import qualified Ngrams.Lang.Fr as Fr
--import qualified Ngrams.Lang as Lang --import qualified Ngrams.Lang as Lang
...@@ -20,17 +21,32 @@ import qualified Parsers.Date as PD ...@@ -20,17 +21,32 @@ import qualified Parsers.Date as PD
-- import qualified Graph.Distance as GD -- import qualified Graph.Distance as GD
import qualified Graph.Clustering as Graph import qualified Graph.Clustering as Graph
import qualified Utils.Crypto as Crypto import qualified Utils.Crypto as Crypto
import qualified Utils.Jobs as Jobs
import Test.Tasty
import Test.Tasty.Hspec
main :: IO () main :: IO ()
main = do main = do
Utils.test utilSpec <- testSpec "Utils" Utils.test
clusteringSpec <- testSpec "Graph Clustering" Graph.test
dateParserSpec <- testSpec "Date Parsing" PD.testFromRFC3339
cryptoSpec <- testSpec "Crypto" Crypto.test
nlpSpec <- testSpec "NLP" NLP.test
jobsSpec <- testSpec "Jobs" Jobs.test
defaultMain $ testGroup "Gargantext"
[ utilSpec
, clusteringSpec
, dateParserSpec
, cryptoSpec
, nlpSpec
, jobsSpec
, NgramsQuery.tests
, CorpusQuery.tests
]
-- Occ.parsersTest -- Occ.parsersTest
-- Lang.ngramsExtractionTest FR -- Lang.ngramsExtractionTest FR
-- Lang.ngramsExtractionTest EN -- Lang.ngramsExtractionTest EN
-- Metrics.main -- Metrics.main
Graph.test
PD.testFromRFC3339
-- GD.test -- GD.test
Crypto.test
NLP.main
NgramsQuery.main
...@@ -19,8 +19,8 @@ import Gargantext.Prelude ...@@ -19,8 +19,8 @@ import Gargantext.Prelude
import Gargantext.Core.Text.Terms.Multi import Gargantext.Core.Text.Terms.Multi
main :: IO () test :: Spec
main = hspec $ do test = do
describe "Text that should be cleaned before sending it to NLP tools as micro-services." $ do describe "Text that should be cleaned before sending it to NLP tools as micro-services." $ do
let text = "This is a url http://cnrs.gargantext.org to be remove and another one www.gargantext.org and digits 343242-2332 to be remove and some to keep: 232 231 33." :: Text let text = "This is a url http://cnrs.gargantext.org to be remove and another one www.gargantext.org and digits 343242-2332 to be remove and some to keep: 232 231 33." :: Text
let result = "This is a url to be remove and another one and digits to be remove and some to keep: 232 231 33." let result = "This is a url to be remove and another one and digits to be remove and some to keep: 232 231 33."
......
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TypeApplications #-} {-# LANGUAGE TypeApplications #-}
module Ngrams.Query where module Ngrams.Query (tests) where
import Control.Monad import Control.Monad
import Data.Coerce import Data.Coerce
...@@ -21,9 +21,6 @@ import Test.Tasty ...@@ -21,9 +21,6 @@ import Test.Tasty
import Test.Tasty.HUnit import Test.Tasty.HUnit
main :: IO ()
main = defaultMain tests
tests :: TestTree tests :: TestTree
tests = testGroup "Ngrams" [unitTests] tests = testGroup "Ngrams" [unitTests]
......
...@@ -36,8 +36,8 @@ fromRFC3339Inv :: Either ParseError ZonedTime -> Text ...@@ -36,8 +36,8 @@ fromRFC3339Inv :: Either ParseError ZonedTime -> Text
fromRFC3339Inv (Right z) = toRFC3339 z fromRFC3339Inv (Right z) = toRFC3339 z
fromRFC3339Inv (Left pe) = panic . pack $ show pe fromRFC3339Inv (Left pe) = panic . pack $ show pe
testFromRFC3339 :: IO () testFromRFC3339 :: Spec
testFromRFC3339 = hspec $ do testFromRFC3339 = do
describe "Test fromRFC3339: " $ do describe "Test fromRFC3339: " $ do
it "is the inverse of Duckling's toRFC3339" $ property $ it "is the inverse of Duckling's toRFC3339" $ property $
((==) <*> (fromRFC3339 . fromRFC3339Inv)) . Right . looseZonedTimePrecision ((==) <*> (fromRFC3339 . fromRFC3339Inv)) . Right . looseZonedTimePrecision
......
...@@ -16,11 +16,10 @@ import Test.Hspec ...@@ -16,11 +16,10 @@ import Test.Hspec
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Prelude.Crypto.Hash import Gargantext.Prelude.Crypto.Hash
import Gargantext.Prelude.Utils
-- | Crypto Hash tests -- | Crypto Hash tests
test :: IO () test :: Spec
test = hspec $ do test = do
describe "Hash String with frontend works" $ do describe "Hash String with frontend works" $ do
let text = "To hash with backend" :: Text let text = "To hash with backend" :: Text
let hashed = "8a69a94d164279af2b7d1443ce08da6184b3d7e815406076e148159c284b53c3" :: Hash let hashed = "8a69a94d164279af2b7d1443ce08da6184b3d7e815406076e148159c284b53c3" :: Hash
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
{-# LANGUAGE TypeApplications #-} {-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TypeFamilies #-} {-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE NumericUnderscores #-} {-# LANGUAGE NumericUnderscores #-}
module Main where module Utils.Jobs (test) where
import Control.Concurrent import Control.Concurrent
import qualified Control.Concurrent.Async as Async import qualified Control.Concurrent.Async as Async
...@@ -17,7 +17,6 @@ import Data.Either ...@@ -17,7 +17,6 @@ import Data.Either
import Data.List import Data.List
import Data.Sequence (Seq, (|>), fromList) import Data.Sequence (Seq, (|>), fromList)
import Data.Time import Data.Time
import GHC.Stack
import Prelude import Prelude
import System.IO.Unsafe import System.IO.Unsafe
import Network.HTTP.Client.TLS (newTlsManager) import Network.HTTP.Client.TLS (newTlsManager)
...@@ -54,16 +53,6 @@ addJobToSchedule jobt mvar = do ...@@ -54,16 +53,6 @@ addJobToSchedule jobt mvar = do
data Counts = Counts { countAs :: Int, countBs :: Int } data Counts = Counts { countAs :: Int, countBs :: Int }
deriving (Eq, Show) deriving (Eq, Show)
inc, dec :: JobT -> Counts -> Counts
inc A cs = cs { countAs = countAs cs + 1 }
inc B cs = cs { countBs = countBs cs + 1 }
inc C cs = cs
inc D cs = cs
dec A cs = cs { countAs = countAs cs - 1 }
dec B cs = cs { countBs = countBs cs - 1 }
dec C cs = cs
dec D cs = cs
jobDuration, initialDelay :: Int jobDuration, initialDelay :: Int
jobDuration = 100000 jobDuration = 100000
initialDelay = 20000 initialDelay = 20000
...@@ -194,9 +183,6 @@ testTlsManager :: Manager ...@@ -194,9 +183,6 @@ testTlsManager :: Manager
testTlsManager = unsafePerformIO newTlsManager testTlsManager = unsafePerformIO newTlsManager
{-# NOINLINE testTlsManager #-} {-# NOINLINE testTlsManager #-}
shouldBeE :: (MonadIO m, HasCallStack, Show a, Eq a) => a -> a -> m ()
shouldBeE a b = liftIO (shouldBe a b)
withJob :: Env withJob :: Env
-> (JobHandle MyDummyMonad -> () -> MyDummyMonad ()) -> (JobHandle MyDummyMonad -> () -> MyDummyMonad ())
-> IO (SJ.JobStatus 'SJ.Safe JobLog) -> IO (SJ.JobStatus 'SJ.Safe JobLog)
...@@ -362,8 +348,8 @@ testMarkProgress = do ...@@ -362,8 +348,8 @@ testMarkProgress = do
] ]
} }
main :: IO () test :: Spec
main = hspec $ do test = do
describe "job queue" $ do describe "job queue" $ do
it "respects max runners limit" $ it "respects max runners limit" $
testMaxRunners testMaxRunners
......
{-# OPTIONS_GHC -fno-warn-orphans #-} {-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-} {-# LANGUAGE TypeOperators #-}
module Gargantext.API.Admin.Orchestrator.Types module Gargantext.API.Admin.Orchestrator.Types
where where
import Control.Lens hiding (elements) import Control.Lens hiding (elements)
import Control.Monad.Reader (MonadReader)
import Data.Aeson import Data.Aeson
import Data.Morpheus.Types import Data.Morpheus.Types
( GQLType ( GQLType
...@@ -24,9 +23,7 @@ import Test.QuickCheck.Arbitrary ...@@ -24,9 +23,7 @@ import Test.QuickCheck.Arbitrary
import qualified Gargantext.API.GraphQL.Utils as GQLU import qualified Gargantext.API.GraphQL.Utils as GQLU
import Gargantext.Core.Types (TODO(..)) import Gargantext.Core.Types (TODO(..))
import Gargantext.Database.Prelude (HasConfig(..))
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Prelude.Config (gc_pubmed_api_key)
------------------------------------------------------------------------ ------------------------------------------------------------------------
instance Arbitrary a => Arbitrary (JobStatus 'Safe a) where instance Arbitrary a => Arbitrary (JobStatus 'Safe a) where
...@@ -37,39 +34,24 @@ instance Arbitrary a => Arbitrary (JobOutput a) where ...@@ -37,39 +34,24 @@ instance Arbitrary a => Arbitrary (JobOutput a) where
-- | Main Types -- | Main Types
-- TODO IsidoreAuth -- TODO IsidoreAuth
data ExternalAPIs = All data ExternalAPIs = PubMed
| PubMed { mAPIKey :: Maybe Text }
| Arxiv | Arxiv
| HAL | HAL
| IsTex | IsTex
| Isidore | Isidore
deriving (Show, Eq, Generic) deriving (Show, Eq, Generic, Enum, Bounded)
-- | Main Instances -- | Main Instances
instance FromJSON ExternalAPIs instance FromJSON ExternalAPIs
instance ToJSON ExternalAPIs instance ToJSON ExternalAPIs
externalAPIs :: ( MonadReader env m externalAPIs :: [ExternalAPIs]
, HasConfig env) => m [ExternalAPIs] externalAPIs = [minBound .. maxBound]
externalAPIs = do
pubmed_api_key <- view $ hasConfig . gc_pubmed_api_key
pure [ All
, PubMed { mAPIKey = Just pubmed_api_key }
, Arxiv
, HAL
, IsTex
, Isidore ]
instance Arbitrary ExternalAPIs instance Arbitrary ExternalAPIs
where where
arbitrary = elements [ All arbitrary = arbitraryBoundedEnum
, PubMed { mAPIKey = Nothing }
, Arxiv
, HAL
, IsTex
, Isidore ]
instance ToSchema ExternalAPIs where instance ToSchema ExternalAPIs where
declareNamedSchema = genericDeclareNamedSchemaUnrestricted defaultSchemaOptions declareNamedSchema = genericDeclareNamedSchemaUnrestricted defaultSchemaOptions
......
...@@ -54,17 +54,18 @@ import Gargantext.Database.Action.Mail (sendMail) ...@@ -54,17 +54,18 @@ import Gargantext.Database.Action.Mail (sendMail)
import Gargantext.Database.Action.Node (mkNodeWithParent) import Gargantext.Database.Action.Node (mkNodeWithParent)
import Gargantext.Database.Action.User (getUserId) import Gargantext.Database.Action.User (getUserId)
import Gargantext.Database.Admin.Types.Hyperdata import Gargantext.Database.Admin.Types.Hyperdata
import Gargantext.Database.Admin.Types.Node (CorpusId, NodeType(..), UserId) import Gargantext.Database.Admin.Types.Node (CorpusId, NodeType(..))
import Gargantext.Database.Prelude (hasConfig) import Gargantext.Database.Prelude (hasConfig)
import Gargantext.Database.Query.Table.Node (getNodeWith, updateCorpusPubmedAPIKey) import Gargantext.Database.Query.Table.Node (getNodeWith, updateCorpusPubmedAPIKey)
import Gargantext.Database.Query.Table.Node.UpdateOpaleye (updateHyperdata) import Gargantext.Database.Query.Table.Node.UpdateOpaleye (updateHyperdata)
import Gargantext.Database.Schema.Node (node_hyperdata) import Gargantext.Database.Schema.Node (node_hyperdata)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Prelude.Config (gc_max_docs_parsers) import Gargantext.Prelude.Config (gc_max_docs_parsers, gc_pubmed_api_key)
import Gargantext.Utils.Jobs (JobHandle, MonadJobStatus(..)) import Gargantext.Utils.Jobs (JobHandle, MonadJobStatus(..))
import qualified Gargantext.Core.Text.Corpus.API as API import qualified Gargantext.Core.Text.Corpus.API as API
import qualified Gargantext.Core.Text.Corpus.Parsers as Parser (FileType(..), parseFormatC) import qualified Gargantext.Core.Text.Corpus.Parsers as Parser (FileType(..), parseFormatC)
import qualified Gargantext.Database.GargDB as GargDB import qualified Gargantext.Database.GargDB as GargDB
------------------------------------------------------------------------ ------------------------------------------------------------------------
{- {-
data Query = Query { query_query :: Text data Query = Query { query_query :: Text
...@@ -130,16 +131,13 @@ deriveJSON (unPrefix "") 'ApiInfo ...@@ -130,16 +131,13 @@ deriveJSON (unPrefix "") 'ApiInfo
instance ToSchema ApiInfo instance ToSchema ApiInfo
info :: FlowCmdM env err m => UserId -> m ApiInfo info :: ApiInfo
info _u = do info = ApiInfo API.externalAPIs
ext <- API.externalAPIs
pure $ ApiInfo ext
------------------------------------------------------------------------ ------------------------------------------------------------------------
------------------------------------------------------------------------ ------------------------------------------------------------------------
data WithQuery = WithQuery data WithQuery = WithQuery
{ _wq_query :: !Text { _wq_query :: !API.RawQuery
, _wq_databases :: !Database , _wq_databases :: !Database
, _wq_datafield :: !(Maybe Datafield) , _wq_datafield :: !(Maybe Datafield)
, _wq_lang :: !Lang , _wq_lang :: !Lang
...@@ -185,7 +183,7 @@ addToCorpusWithQuery :: (FlowCmdM env err m, MonadJobStatus m) ...@@ -185,7 +183,7 @@ addToCorpusWithQuery :: (FlowCmdM env err m, MonadJobStatus m)
=> User => User
-> CorpusId -> CorpusId
-> WithQuery -> WithQuery
-> Maybe Integer -> Maybe API.Limit
-> JobHandle m -> JobHandle m
-> m () -> m ()
addToCorpusWithQuery user cid (WithQuery { _wq_query = q addToCorpusWithQuery user cid (WithQuery { _wq_query = q
...@@ -210,7 +208,8 @@ addToCorpusWithQuery user cid (WithQuery { _wq_query = q ...@@ -210,7 +208,8 @@ addToCorpusWithQuery user cid (WithQuery { _wq_query = q
_ -> do _ -> do
case datafield of case datafield of
Just (External (PubMed { _api_key })) -> do Just (External PubMed) -> do
_api_key <- view $ hasConfig . gc_pubmed_api_key
printDebug "[addToCorpusWithQuery] pubmed api key" _api_key printDebug "[addToCorpusWithQuery] pubmed api key" _api_key
_ <- updateCorpusPubmedAPIKey cid _api_key _ <- updateCorpusPubmedAPIKey cid _api_key
pure () pure ()
...@@ -222,7 +221,7 @@ addToCorpusWithQuery user cid (WithQuery { _wq_query = q ...@@ -222,7 +221,7 @@ addToCorpusWithQuery user cid (WithQuery { _wq_query = q
-- if cid is corpus -> add to corpus -- if cid is corpus -> add to corpus
-- if cid is root -> create corpus in Private -- if cid is root -> create corpus in Private
-- printDebug "[G.A.N.C.New] getDataText with query" q -- printDebug "[G.A.N.C.New] getDataText with query" q
db <- database2origin dbs let db = database2origin dbs
eTxt <- getDataText db (Multi l) q maybeLimit eTxt <- getDataText db (Multi l) q maybeLimit
-- printDebug "[G.A.N.C.New] lTxts" lTxts -- printDebug "[G.A.N.C.New] lTxts" lTxts
......
...@@ -150,7 +150,7 @@ insertSearxResponse user cId listId l (Right (SearxResponse { _srs_results })) = ...@@ -150,7 +150,7 @@ insertSearxResponse user cId listId l (Right (SearxResponse { _srs_results })) =
triggerSearxSearch :: (MonadBase IO m, FlowCmdM env err m, MonadJobStatus m) triggerSearxSearch :: (MonadBase IO m, FlowCmdM env err m, MonadJobStatus m)
=> User => User
-> CorpusId -> CorpusId
-> API.Query -> API.RawQuery
-> Lang -> Lang
-> JobHandle m -> JobHandle m
-> m () -> m ()
...@@ -183,7 +183,7 @@ triggerSearxSearch user cId q l jobHandle = do ...@@ -183,7 +183,7 @@ triggerSearxSearch user cId q l jobHandle = do
res <- liftBase $ fetchSearxPage $ FetchSearxParams { _fsp_language = l res <- liftBase $ fetchSearxPage $ FetchSearxParams { _fsp_language = l
, _fsp_manager = manager , _fsp_manager = manager
, _fsp_pageno = page , _fsp_pageno = page
, _fsp_query = q , _fsp_query = API.getRawQuery q
, _fsp_url = surl } , _fsp_url = surl }
insertSearxResponse user cId listId l res insertSearxResponse user cId listId l res
......
...@@ -3,44 +3,37 @@ ...@@ -3,44 +3,37 @@
module Gargantext.API.Node.Corpus.Types where module Gargantext.API.Node.Corpus.Types where
import Control.Lens hiding (elements, Empty) import Control.Lens hiding (elements, Empty)
import Control.Monad.Reader (MonadReader)
import Data.Aeson import Data.Aeson
import Data.Aeson.TH (deriveJSON) import Data.Aeson.TH (deriveJSON)
import Data.Monoid (mempty) import Data.Monoid (mempty)
import Data.Swagger import Data.Swagger
import GHC.Generics (Generic) import GHC.Generics (Generic)
import qualified PUBMED.Types as PUBMED
import Gargantext.Prelude import Gargantext.Prelude
import qualified Gargantext.API.Admin.Orchestrator.Types as T import qualified Gargantext.API.Admin.Orchestrator.Types as Types
import Gargantext.Core.Utils.Prefix (unPrefix) import Gargantext.Core.Utils.Prefix (unPrefix)
import Gargantext.Database.Action.Flow (DataOrigin(..)) import Gargantext.Database.Action.Flow (DataOrigin(..))
import Gargantext.Database.Prelude (HasConfig(..))
data Database = Empty data Database = Empty
| PubMed { _api_key :: Maybe PUBMED.APIKey } | PubMed
| Arxiv | Arxiv
| HAL | HAL
| IsTex | IsTex
| Isidore | Isidore
deriving (Eq, Show, Generic) deriving (Eq, Show, Generic, Enum, Bounded)
deriveJSON (unPrefix "") ''Database deriveJSON (unPrefix "") ''Database
instance ToSchema Database where instance ToSchema Database where
declareNamedSchema = genericDeclareNamedSchemaUnrestricted defaultSchemaOptions declareNamedSchema = genericDeclareNamedSchemaUnrestricted defaultSchemaOptions
database2origin :: ( MonadReader env m database2origin :: Database -> DataOrigin
, HasConfig env ) => Database -> m DataOrigin database2origin Empty = InternalOrigin Types.IsTex
database2origin Empty = pure $ InternalOrigin T.IsTex database2origin PubMed = ExternalOrigin Types.PubMed
database2origin (PubMed { _api_key }) = do database2origin Arxiv = ExternalOrigin Types.Arxiv
-- pubmed_api_key <- view $ hasConfig . gc_pubmed_api_key database2origin HAL = ExternalOrigin Types.HAL
database2origin IsTex = ExternalOrigin Types.IsTex
pure $ ExternalOrigin $ T.PubMed { mAPIKey = _api_key } database2origin Isidore = ExternalOrigin Types.Isidore
database2origin Arxiv = pure $ ExternalOrigin T.Arxiv
database2origin HAL = pure $ ExternalOrigin T.HAL
database2origin IsTex = pure $ ExternalOrigin T.IsTex
database2origin Isidore = pure $ ExternalOrigin T.Isidore
------------------------------------------------------------------------ ------------------------------------------------------------------------
data Datafield = Gargantext data Datafield = Gargantext
......
...@@ -290,7 +290,7 @@ addCorpusWithQuery :: User -> ServerT New.AddWithQuery (GargM Env GargError) ...@@ -290,7 +290,7 @@ addCorpusWithQuery :: User -> ServerT New.AddWithQuery (GargM Env GargError)
addCorpusWithQuery user cid = addCorpusWithQuery user cid =
serveJobsAPI AddCorpusQueryJob $ \jHandle q -> do serveJobsAPI AddCorpusQueryJob $ \jHandle q -> do
limit <- view $ hasConfig . gc_max_docs_scrapers limit <- view $ hasConfig . gc_max_docs_scrapers
New.addToCorpusWithQuery user cid q (Just limit) jHandle New.addToCorpusWithQuery user cid q (Just $ fromIntegral limit) jHandle
{- let log' x = do {- let log' x = do
printDebug "addToCorpusWithQuery" x printDebug "addToCorpusWithQuery" x
liftBase $ log x liftBase $ log x
......
...@@ -11,46 +11,58 @@ Portability : POSIX ...@@ -11,46 +11,58 @@ Portability : POSIX
module Gargantext.Core.Text.Corpus.API module Gargantext.Core.Text.Corpus.API
( ExternalAPIs(..) ( ExternalAPIs(..)
, Query , Corpus.RawQuery(..)
, Limit , Corpus.Limit(..)
, GetCorpusError(..)
, get , get
, externalAPIs , externalAPIs
) where ) where
import Conduit import Conduit
import Control.Lens ((^.))
import Data.Bifunctor
import Data.Either (Either(..)) import Data.Either (Either(..))
import Data.Maybe import Data.Maybe
import qualified Data.Text as T
import Gargantext.API.Admin.Orchestrator.Types (ExternalAPIs(..), externalAPIs) import Gargantext.API.Admin.Orchestrator.Types (ExternalAPIs(..), externalAPIs)
import Gargantext.Core (Lang(..)) import Gargantext.Core (Lang(..))
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..)) import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Prelude.Config (GargConfig, gc_pubmed_api_key)
import qualified Gargantext.Core.Text.Corpus.API.Arxiv as Arxiv import qualified Gargantext.Core.Text.Corpus.API.Arxiv as Arxiv
import qualified Gargantext.Core.Text.Corpus.API.Hal as HAL import qualified Gargantext.Core.Text.Corpus.API.Hal as HAL
import qualified Gargantext.Core.Text.Corpus.API.Isidore as ISIDORE import qualified Gargantext.Core.Text.Corpus.API.Isidore as ISIDORE
import qualified Gargantext.Core.Text.Corpus.API.Istex as ISTEX import qualified Gargantext.Core.Text.Corpus.API.Istex as ISTEX
import qualified Gargantext.Core.Text.Corpus.API.Pubmed as PUBMED import qualified Gargantext.Core.Text.Corpus.API.Pubmed as PUBMED
import qualified Gargantext.Core.Text.Corpus.Query as Corpus
import Servant.Client (ClientError) import Servant.Client (ClientError)
data GetCorpusError
= -- | We couldn't parse the user input query into something meaningful.
InvalidInputQuery !Corpus.RawQuery !T.Text
-- | The external service returned an error.
| ExternalAPIError !ClientError
deriving (Show, Eq)
-- | Get External API metadata main function -- | Get External API metadata main function
get :: ExternalAPIs get :: GargConfig
-> ExternalAPIs
-> Lang -> Lang
-> Query -> Corpus.RawQuery
-> Maybe Limit -> Maybe Corpus.Limit
-- -> IO [HyperdataDocument] -- -> IO [HyperdataDocument]
-> IO (Either ClientError (Maybe Integer, ConduitT () HyperdataDocument IO ())) -> IO (Either GetCorpusError (Maybe Integer, ConduitT () HyperdataDocument IO ()))
get PubMed { mAPIKey = mAPIKey } _la q limit = PUBMED.get mAPIKey q limit get cfg externalAPI la q limit = do
--docs <- PUBMED.get q default_limit -- EN only by default case Corpus.parseQuery q of
--pure (Just $ fromIntegral $ length docs, yieldMany docs) Left err -> pure $ Left $ InvalidInputQuery q (T.pack err)
get Arxiv la q limit = Arxiv.get la q (fromIntegral <$> limit) Right corpusQuery -> case externalAPI of
get HAL la q limit = HAL.getC la q limit PubMed -> first ExternalAPIError <$>
get IsTex la q limit = do PUBMED.get (cfg ^. gc_pubmed_api_key) corpusQuery limit
docs <- ISTEX.get la q limit --docs <- PUBMED.get q default_limit -- EN only by default
pure $ Right (Just $ fromIntegral $ length docs, yieldMany docs) --pure (Just $ fromIntegral $ length docs, yieldMany docs)
get Isidore la q limit = do Arxiv -> Right <$> Arxiv.get la corpusQuery limit
docs <- ISIDORE.get la (fromIntegral <$> limit) (Just q) Nothing HAL -> first ExternalAPIError <$> HAL.getC la (Corpus.getRawQuery q) (Corpus.getLimit <$> limit)
pure $ Right (Just $ fromIntegral $ length docs, yieldMany docs) IsTex -> do docs <- ISTEX.get la (Corpus.getRawQuery q) (Corpus.getLimit <$> limit)
get externalApi _ _ _ = panic $ "[G.C.T.Corpus.API] This options are note taken into account: " <> (cs $ show externalApi) pure $ Right (Just $ fromIntegral $ length docs, yieldMany docs)
Isidore -> do docs <- ISIDORE.get la (Corpus.getLimit <$> limit) (Just $ Corpus.getRawQuery q) Nothing
-- | Some Sugar for the documentation pure $ Right (Just $ fromIntegral $ length docs, yieldMany docs)
type Query = PUBMED.Query
type Limit = PUBMED.Limit
...@@ -10,34 +10,81 @@ Portability : POSIX ...@@ -10,34 +10,81 @@ Portability : POSIX
-} -}
{-# OPTIONS_GHC -fno-warn-orphans -fno-warn-unused-top-binds #-} {-# OPTIONS_GHC -fno-warn-orphans -fno-warn-unused-top-binds #-}
{-# LANGUAGE ViewPatterns #-}
module Gargantext.Core.Text.Corpus.API.Arxiv module Gargantext.Core.Text.Corpus.API.Arxiv
where ( get
-- * Internals for testing
, convertQuery
) where
import Conduit import Conduit
import Data.Either (Either(..))
import Data.Maybe import Data.Maybe
import Data.Text (Text) import Data.Text (Text, unpack)
import qualified Data.Text as Text import qualified Data.Text as Text
import Servant.Client (ClientError)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Core (Lang(..)) import Gargantext.Core (Lang(..))
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..)) import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Core.Text.Corpus.Query as Corpus
import Gargantext.Core.Types (Term(..))
import qualified Arxiv as Arxiv import qualified Arxiv as Arxiv
import qualified Network.Api.Arxiv as Ax import qualified Network.Api.Arxiv as Ax
type Query = Text -- | Converts a Gargantext's generic boolean query into an Arxiv Query.
type Limit = Arxiv.Limit convertQuery :: Corpus.Query -> Ax.Query
convertQuery q = mkQuery (interpretQuery q transformAST)
where
mkQuery :: Maybe Ax.Expression -> Ax.Query
mkQuery mb_exp = Ax.Query { Ax.qExp = mb_exp
, Ax.qIds = []
, Ax.qStart = 0
, Ax.qItems = Arxiv.batchSize }
-- Converts a 'BoolExpr' with 'Term's on the leaves into an Arxiv's expression.
-- It yields 'Nothing' if the AST cannot be converted into a meaningful expression.
transformAST :: BoolExpr Term -> Maybe Ax.Expression
transformAST ast = case ast of
BAnd sub (BConst (Negative term))
-- The second term become positive, so that it can be translated.
-> Ax.AndNot <$> (transformAST sub) <*> transformAST (BConst (Positive term))
BAnd term1 (BNot term2)
-> Ax.AndNot <$> transformAST term1 <*> transformAST term2
BAnd sub1 sub2
-> Ax.And <$> transformAST sub1 <*> transformAST sub2
BOr sub1 sub2
-> Ax.Or <$> transformAST sub1 <*> transformAST sub2
BNot (BConst (Negative term))
-> transformAST (BConst (Positive term)) -- double negation
-- We can handle negatives via `ANDNOT` with itself.
BNot sub
-> Ax.AndNot <$> transformAST sub <*> transformAST sub
-- BTrue cannot happen is the query parser doesn't support parsing 'TRUE' alone.
BTrue
-> Nothing
-- BTrue cannot happen is the query parser doesn't support parsing 'FALSE' alone.
BFalse
-> Nothing
BConst (Positive (Term term))
-> Just $ Ax.Exp $ Ax.Abs [unpack term]
-- We can handle negatives via `ANDNOT` with itself.
BConst (Negative (Term term))
-> Just $ Ax.AndNot (Ax.Exp $ Ax.Abs [unpack term]) (Ax.Exp $ Ax.Abs [unpack term])
-- | TODO put default pubmed query in gargantext.ini -- | TODO put default pubmed query in gargantext.ini
-- by default: 10K docs -- by default: 10K docs
get :: Lang -> Query -> Maybe Limit -> IO (Either ClientError (Maybe Integer, ConduitT () HyperdataDocument IO ())) get :: Lang
get la q _l = do -> Corpus.Query
(cnt, resC) <- Arxiv.apiSimpleC Nothing [Text.unpack q] -> Maybe Corpus.Limit
pure $ Right (Just $ fromIntegral cnt, resC .| mapC (toDoc la)) -> IO (Maybe Integer, ConduitT () HyperdataDocument IO ())
get la (convertQuery -> query) (fmap getLimit -> limit) = do
(cnt, resC) <- case limit of
Nothing -> Arxiv.searchAxv' query
(Just l) -> do (cnt, res) <- Arxiv.searchAxv' query
pure (cnt, res .| takeC l)
pure $ (Just $ fromIntegral cnt, resC .| mapC (toDoc la))
toDoc :: Lang -> Arxiv.Result -> HyperdataDocument toDoc :: Lang -> Arxiv.Result -> HyperdataDocument
toDoc l (Arxiv.Result { abstract toDoc l (Arxiv.Result { abstract
......
...@@ -27,14 +27,14 @@ import qualified HAL as HAL ...@@ -27,14 +27,14 @@ import qualified HAL as HAL
import qualified HAL.Client as HAL import qualified HAL.Client as HAL
import qualified HAL.Doc.Corpus as HAL import qualified HAL.Doc.Corpus as HAL
get :: Lang -> Text -> Maybe Integer -> IO [HyperdataDocument] get :: Lang -> Text -> Maybe Int -> IO [HyperdataDocument]
get la q ml = do get la q ml = do
eDocs <- HAL.getMetadataWith q (Just 0) ml eDocs <- HAL.getMetadataWith q (Just 0) (fromIntegral <$> ml)
either (panic . pack . show) (\d -> mapM (toDoc' la) $ HAL._docs d) eDocs either (panic . pack . show) (\d -> mapM (toDoc' la) $ HAL._docs d) eDocs
getC :: Lang -> Text -> Maybe Integer -> IO (Either ClientError (Maybe Integer, ConduitT () HyperdataDocument IO ())) getC :: Lang -> Text -> Maybe Int -> IO (Either ClientError (Maybe Integer, ConduitT () HyperdataDocument IO ()))
getC la q ml = do getC la q ml = do
eRes <- HAL.getMetadataWithC q (Just 0) ml eRes <- HAL.getMetadataWithC q (Just 0) (fromIntegral <$> ml)
pure $ (\(len, docsC) -> (len, docsC .| mapMC (toDoc' la))) <$> eRes pure $ (\(len, docsC) -> (len, docsC .| mapMC (toDoc' la))) <$> eRes
-- case eRes of -- case eRes of
-- Left err -> panic $ pack $ show err -- Left err -> panic $ pack $ show err
......
...@@ -29,7 +29,7 @@ import qualified ISTEX as ISTEX ...@@ -29,7 +29,7 @@ import qualified ISTEX as ISTEX
import qualified ISTEX.Client as ISTEX import qualified ISTEX.Client as ISTEX
type Query = Text type Query = Text
type MaxResults = Maybe Integer type MaxResults = Maybe Int
get :: Lang -> Query -> MaxResults -> IO [HyperdataDocument] get :: Lang -> Query -> MaxResults -> IO [HyperdataDocument]
get la query' maxResults = do get la query' maxResults = do
...@@ -57,7 +57,7 @@ get la query' maxResults = do ...@@ -57,7 +57,7 @@ get la query' maxResults = do
-- Complex queries of IsTex needs parameters using ":" so we leave the query as it is -- Complex queries of IsTex needs parameters using ":" so we leave the query as it is
-- in that case we suppose user is knowing what s.he is doing -- in that case we suppose user is knowing what s.he is doing
eDocs <- ISTEX.getMetadataWith query (fromIntegral <$> maxResults) eDocs <- ISTEX.getMetadataWith query maxResults
-- printDebug "[Istex.get] will print length" (0 :: Int) -- printDebug "[Istex.get] will print length" (0 :: Int)
case eDocs of case eDocs of
Left _ -> pure () Left _ -> pure ()
......
...@@ -9,20 +9,33 @@ Portability : POSIX ...@@ -9,20 +9,33 @@ Portability : POSIX
-} -}
{-# LANGUAGE DerivingStrategies #-}
module Gargantext.Core.Text.Corpus.API.Pubmed module Gargantext.Core.Text.Corpus.API.Pubmed
( get
-- * Internals for testing
, ESearch(..)
, convertQuery
, getESearch
)
where where
import Conduit import Conduit
import Control.Monad.Reader (runReaderT) import Control.Monad.Reader (runReaderT)
import Data.Either (Either) import Data.Either (Either)
import Data.Maybe import Data.Maybe
import Data.Semigroup
import Data.Monoid
import Data.Text (Text) import Data.Text (Text)
import qualified Data.Text as Text import qualified Data.Text as Text
import qualified Data.Text.Encoding as TE
import Network.HTTP.Types.URI (EscapeItem(..), renderQueryPartialEscape)
import Servant.Client (ClientError) import Servant.Client (ClientError)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Core (Lang(..)) import Gargantext.Core (Lang(..))
import Gargantext.Core.Text.Corpus.Query as Corpus
import Gargantext.Core.Types (Term(..))
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..)) import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import qualified PUBMED as PubMed import qualified PUBMED as PubMed
...@@ -30,24 +43,72 @@ import qualified PUBMED.Parser as PubMedDoc ...@@ -30,24 +43,72 @@ import qualified PUBMED.Parser as PubMedDoc
import PUBMED.Types (Config(..)) import PUBMED.Types (Config(..))
type Query = Text -- | A pubmed query.
type Limit = Integer -- See: https://www.ncbi.nlm.nih.gov/books/NBK25499/#chapter4.ESearch
newtype ESearch = ESearch { _ESearch :: [EscapeItem] }
deriving stock (Show, Eq)
deriving newtype (Semigroup, Monoid)
-- | Returns an /url encoded/ query ready to be sent to pubmed.
getESearch :: ESearch -> Text
getESearch (ESearch items) =
Text.replace "term=" "" . TE.decodeUtf8 . renderQueryPartialEscape False $ [
("term", items)
]
convertQuery :: Corpus.Query -> ESearch
convertQuery q = ESearch (interpretQuery q transformAST)
where
transformAST :: BoolExpr Term -> [EscapeItem]
transformAST ast = case ast of
BAnd sub (BConst (Negative term))
-- The second term become positive, so that it can be translated.
-> (transformAST sub) <> [QN "+AND+NOT+"] <> transformAST (BConst (Positive term))
BAnd term1 (BNot term2)
-> transformAST term1 <> [QN "+AND+NOT+"] <> transformAST term2
BAnd sub1 sub2
-> transformAST sub1 <> [QN "+AND+"] <> transformAST sub2
BOr sub1 sub2
-> transformAST sub1 <> [QN "+OR+"] <> transformAST sub2
BNot (BConst (Negative term))
-> transformAST (BConst (Positive term)) -- double negation
BNot sub
-> [QN "NOT+"] <> transformAST sub
-- BTrue cannot happen is the query parser doesn't support parsing 'TRUE' alone.
BTrue
-> mempty
-- BTrue cannot happen is the query parser doesn't support parsing 'FALSE' alone.
BFalse
-> mempty
BConst (Positive (Term term))
-> [QE (TE.encodeUtf8 term)]
-- We can handle negatives via `ANDNOT` with itself.
BConst (Negative (Term term))
-> [QN "NOT+", QE (TE.encodeUtf8 term)]
-- | TODO put default pubmed query in gargantext.ini -- | TODO put default pubmed query in gargantext.ini
-- by default: 10K docs -- by default: 10K docs
get :: Maybe Text get :: Text
-> Query -> Corpus.Query
-> Maybe Limit -> Maybe Limit
-> IO (Either ClientError (Maybe Integer, ConduitT () HyperdataDocument IO ())) -> IO (Either ClientError (Maybe Integer, ConduitT () HyperdataDocument IO ()))
get mAPIKey q l = do get apiKey q l = do
eRes <- runReaderT PubMed.getMetadataWithC (Config { apiKey = mAPIKey -- The documentation for PUBMED says:
, query = q -- Values for query keys may also be provided in term if they are preceeded by a
-- '#' (%23 in the URL). While only one query_key parameter can be provided to ESearch,
-- any number of query keys can be combined in term. Also, if query keys are provided in term,
-- they can be combined with OR or NOT in addition to AND.
-- Example:
-- esearch.fcgi?db=pubmed&term=%231+AND+asthma&WebEnv=<webenv string>&usehistory=y
--
-- Therefore, we can pretty-print our 'Query' back into something that PubMed could understand.
eRes <- runReaderT PubMed.getMetadataWithC (Config { apiKey = Just apiKey
, query = getESearch $ convertQuery q
, perPage = Just 200 , perPage = Just 200
, mWebEnv = Nothing }) , mWebEnv = Nothing })
let takeLimit = case l of let takeLimit = case l of
Nothing -> mapC identity Nothing -> mapC identity
Just l' -> takeC $ fromIntegral l' Just l' -> takeC $ getLimit l'
pure $ (\(len, docsC) -> (len, docsC .| takeLimit .| mapC (toDoc EN))) <$> eRes pure $ (\(len, docsC) -> (len, docsC .| takeLimit .| mapC (toDoc EN))) <$> eRes
--either (\e -> panic $ "CRAWL: PubMed" <> e) (map (toDoc EN)) --either (\e -> panic $ "CRAWL: PubMed" <> e) (map (toDoc EN))
-- <$> PubMed.getMetadataWithC q l -- <$> PubMed.getMetadataWithC q l
......
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE DerivingStrategies #-}
module Gargantext.Core.Text.Corpus.Query (
Query -- * opaque
, RawQuery(..)
, Limit(..)
, getQuery
, parseQuery
, renderQuery
, interpretQuery
, ExternalAPIs(..)
, module BoolExpr
-- * Useful for testing
, unsafeMkQuery
) where
import Data.Bifunctor
import Data.String
import Gargantext.API.Admin.Orchestrator.Types
import Gargantext.Core.Types
import Prelude
import Text.ParserCombinators.Parsec
import qualified Data.Aeson as Aeson
import Data.BoolExpr as BoolExpr
import Data.BoolExpr.Parser as BoolExpr
import Data.BoolExpr.Printer as BoolExpr
import qualified Data.Swagger as Swagger
import qualified Data.Text as T
import qualified Servant.API as Servant
import qualified Text.Parsec as P
-- | A raw query, as typed by the user from the frontend.
newtype RawQuery = RawQuery { getRawQuery :: T.Text }
deriving newtype ( Show, Eq, IsString
, Servant.FromHttpApiData, Servant.ToHttpApiData
, Aeson.FromJSON, Aeson.ToJSON
, Swagger.ToParamSchema, Swagger.ToSchema)
-- | A limit to the number of results we want to retrieve.
newtype Limit = Limit { getLimit :: Int }
deriving newtype ( Show, Eq, Num
, Servant.FromHttpApiData, Servant.ToHttpApiData
, Aeson.FromJSON, Aeson.ToJSON
, Swagger.ToParamSchema, Swagger.ToSchema)
-- | An opaque wrapper around a 'Query' type which can be parsed from a boolean
-- expression like (a AND b) OR c, and which can be interpreted in many ways
-- according to the particular service we are targeting.
newtype Query = Query { getQuery :: (BoolExpr.CNF Term) }
deriving Show
interpretQuery :: Query -> (BoolExpr.BoolExpr Term -> ast) -> ast
interpretQuery (Query q) transform = transform . simplify . BoolExpr.fromCNF $ q
simplify :: BoolExpr.BoolExpr a -> BoolExpr.BoolExpr a
simplify expr = case expr of
BAnd sub BTrue -> simplify sub
BAnd BTrue sub -> simplify sub
BAnd BFalse _ -> BFalse
BAnd _ BFalse -> BFalse
BAnd sub1 sub2 -> BAnd (simplify sub1) (simplify sub2)
BOr _ BTrue -> BTrue
BOr BTrue _ -> BTrue
BOr sub BFalse -> simplify sub
BOr BFalse sub -> simplify sub
BOr sub1 sub2 -> BOr (simplify sub1) (simplify sub2)
BNot BTrue -> BFalse
BNot BFalse -> BTrue
BNot (BNot sub) -> simplify sub
BNot sub -> BNot (simplify sub)
BTrue -> BTrue
BFalse -> BFalse
BConst signed -> BConst signed
unsafeMkQuery :: BoolExpr.BoolExpr Term -> Query
unsafeMkQuery = Query . BoolExpr.boolTreeToCNF
termToken :: CharParser st Term
termToken = Term <$> (try (T.pack <$> BoolExpr.identifier) <|> (between dubQuote dubQuote multipleTerms))
where
dubQuote = BoolExpr.symbol "\""
multipleTerms = T.intercalate " " . map T.pack <$> sepBy BoolExpr.identifier BoolExpr.whiteSpace
-- | Parses an input 'Text' into a 'Query', reporting an error if it fails.
parseQuery :: RawQuery -> Either String Query
parseQuery (RawQuery txt) = bimap show (Query . BoolExpr.boolTreeToCNF) $
P.runParser (BoolExpr.parseBoolExpr termToken) () "Corpus.Query" (T.unpack txt)
renderQuery :: Query -> RawQuery
renderQuery (Query cnf) = RawQuery . T.pack $ BoolExpr.boolExprPrinter (showsPrec 0) (BoolExpr.fromCNF cnf) ""
...@@ -13,11 +13,12 @@ commentary with @some markup@. ...@@ -13,11 +13,12 @@ commentary with @some markup@.
------------------------------------------------------------------------ ------------------------------------------------------------------------
{-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE DerivingStrategies #-}
module Gargantext.Core.Types ( module Gargantext.Core.Types.Main module Gargantext.Core.Types ( module Gargantext.Core.Types.Main
, module Gargantext.Database.Admin.Types.Node , module Gargantext.Database.Admin.Types.Node
, DebugMode(..), withDebugMode , DebugMode(..), withDebugMode
, Term, Terms(..), TermsCount, TermsWithCount , Term(..), Terms(..), TermsCount, TermsWithCount
, TokenTag(..), POS(..), NER(..) , TokenTag(..), POS(..), NER(..)
, Label, Stems , Label, Stems
, HasInvalidError(..), assertValid , HasInvalidError(..), assertValid
...@@ -38,6 +39,7 @@ import Data.Maybe ...@@ -38,6 +39,7 @@ import Data.Maybe
import Data.Monoid import Data.Monoid
import Data.Semigroup import Data.Semigroup
import Data.Set (Set, empty) import Data.Set (Set, empty)
import Data.String
import Data.Swagger (ToParamSchema) import Data.Swagger (ToParamSchema)
import Data.Swagger (ToSchema(..)) import Data.Swagger (ToSchema(..))
import Data.Text (Text, unpack) import Data.Text (Text, unpack)
...@@ -63,7 +65,10 @@ data Ordering = Down | Up ...@@ -63,7 +65,10 @@ data Ordering = Down | Up
------------------------------------------------------------------------ ------------------------------------------------------------------------
type Name = Text type Name = Text
type Term = Text
newtype Term = Term { getTerm :: Text }
deriving newtype (Eq, Ord, IsString, Show)
type Stems = Set Text type Stems = Set Text
type Label = [Text] type Label = [Text]
......
...@@ -50,7 +50,6 @@ module Gargantext.Database.Action.Flow -- (flowDatabase, ngrams2list) ...@@ -50,7 +50,6 @@ module Gargantext.Database.Action.Flow -- (flowDatabase, ngrams2list)
import Conduit import Conduit
import Control.Lens ((^.), view, _Just, makeLenses, over, traverse) import Control.Lens ((^.), view, _Just, makeLenses, over, traverse)
import Control.Monad.Reader (MonadReader)
import Data.Aeson.TH (deriveJSON) import Data.Aeson.TH (deriveJSON)
import Data.Conduit.Internal (zipSources) import Data.Conduit.Internal (zipSources)
import qualified Data.Conduit.List as CList import qualified Data.Conduit.List as CList
...@@ -65,7 +64,6 @@ import Data.Swagger ...@@ -65,7 +64,6 @@ import Data.Swagger
import qualified Data.Text as T import qualified Data.Text as T
import Data.Tuple.Extra (first, second) import Data.Tuple.Extra (first, second)
import GHC.Generics (Generic) import GHC.Generics (Generic)
import Servant.Client (ClientError)
import System.FilePath (FilePath) import System.FilePath (FilePath)
import qualified Data.HashMap.Strict as HashMap import qualified Data.HashMap.Strict as HashMap
import qualified Gargantext.Data.HashMap.Strict.Utils as HashMap import qualified Gargantext.Data.HashMap.Strict.Utils as HashMap
...@@ -133,13 +131,8 @@ deriveJSON (unPrefix "_do_") ''DataOrigin ...@@ -133,13 +131,8 @@ deriveJSON (unPrefix "_do_") ''DataOrigin
instance ToSchema DataOrigin where instance ToSchema DataOrigin where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_do_") declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_do_")
allDataOrigins :: ( MonadReader env m allDataOrigins :: [DataOrigin]
, HasConfig env) => m [DataOrigin] allDataOrigins = map InternalOrigin API.externalAPIs <> map ExternalOrigin API.externalAPIs
allDataOrigins = do
ext <- API.externalAPIs
pure $ map InternalOrigin ext
<> map ExternalOrigin ext
--------------- ---------------
data DataText = DataOld ![NodeId] data DataText = DataOld ![NodeId]
...@@ -157,11 +150,12 @@ printDataText (DataNew (maybeInt, conduitData)) = do ...@@ -157,11 +150,12 @@ printDataText (DataNew (maybeInt, conduitData)) = do
getDataText :: FlowCmdM env err m getDataText :: FlowCmdM env err m
=> DataOrigin => DataOrigin
-> TermType Lang -> TermType Lang
-> API.Query -> API.RawQuery
-> Maybe API.Limit -> Maybe API.Limit
-> m (Either ClientError DataText) -> m (Either API.GetCorpusError DataText)
getDataText (ExternalOrigin api) la q li = liftBase $ do getDataText (ExternalOrigin api) la q li = do
eRes <- API.get api (_tt_lang la) q li cfg <- view $ hasConfig
eRes <- liftBase $ API.get cfg api (_tt_lang la) q li
pure $ DataNew <$> eRes pure $ DataNew <$> eRes
getDataText (InternalOrigin _) _la q _li = do getDataText (InternalOrigin _) _la q _li = do
...@@ -169,13 +163,13 @@ getDataText (InternalOrigin _) _la q _li = do ...@@ -169,13 +163,13 @@ getDataText (InternalOrigin _) _la q _li = do
(UserName userMaster) (UserName userMaster)
(Left "") (Left "")
(Nothing :: Maybe HyperdataCorpus) (Nothing :: Maybe HyperdataCorpus)
ids <- map fst <$> searchDocInDatabase cId (stemIt q) ids <- map fst <$> searchDocInDatabase cId (stemIt $ API.getRawQuery q)
pure $ Right $ DataOld ids pure $ Right $ DataOld ids
getDataText_Debug :: FlowCmdM env err m getDataText_Debug :: FlowCmdM env err m
=> DataOrigin => DataOrigin
-> TermType Lang -> TermType Lang
-> API.Query -> API.RawQuery
-> Maybe API.Limit -> Maybe API.Limit
-> m () -> m ()
getDataText_Debug a l q li = do getDataText_Debug a l q li = do
......
...@@ -342,8 +342,8 @@ getCorpusPubmedAPIKey cId = do ...@@ -342,8 +342,8 @@ getCorpusPubmedAPIKey cId = do
|] |]
params = PGS.Only cId params = PGS.Only cId
updateCorpusPubmedAPIKey :: NodeId -> Maybe PUBMED.APIKey -> Cmd err Int64 updateCorpusPubmedAPIKey :: NodeId -> PUBMED.APIKey -> Cmd err Int64
updateCorpusPubmedAPIKey cId mAPIKey = updateCorpusPubmedAPIKey cId apiKey =
execPGSQuery query params execPGSQuery query params
where where
query :: PGS.Query query :: PGS.Query
...@@ -352,7 +352,7 @@ updateCorpusPubmedAPIKey cId mAPIKey = ...@@ -352,7 +352,7 @@ updateCorpusPubmedAPIKey cId mAPIKey =
SET hyperdata = hyperdata || ? SET hyperdata = hyperdata || ?
WHERE id = ? WHERE id = ?
|] |]
params = (encode $ object [ "pubmed_api_key" .= mAPIKey ], cId) params = (encode $ object [ "pubmed_api_key" .= apiKey ], cId)
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- TODO -- TODO
-- currently this function removes the child relation -- currently this function removes the child relation
......
...@@ -37,6 +37,7 @@ extra-deps: ...@@ -37,6 +37,7 @@ extra-deps:
- HSvm-0.1.1.3.22 - HSvm-0.1.1.3.22
- hsparql-0.3.8 - hsparql-0.3.8
- ghc-clippy-plugin-0.0.0.1 - ghc-clippy-plugin-0.0.0.1
- boolexpr-0.2
#- git: https://gitlab.iscpif.fr/gargantext/haskell-gargantext-prelude.git #- git: https://gitlab.iscpif.fr/gargantext/haskell-gargantext-prelude.git
# commit: 791c2a7046a3760f8ae5fabdbd708f61caa63741 # commit: 791c2a7046a3760f8ae5fabdbd708f61caa63741
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment