Verified Commit 1af015a5 authored by Przemyslaw Kaminski's avatar Przemyslaw Kaminski

Merge branch 'dev' into 551-dev-graphql-contexts-ngrams

parents 1c01593e 6e57d503
Pipeline #4197 passed with stages
in 123 minutes and 53 seconds
## Version 0.0.6.9.9.6.3
* [BACK][FIX][Boolean Query Parser (#182)](https://gitlab.iscpif.fr/gargantext/haskell-gargantext/issues/182)
* [BACK][FEAT] Gitlab Issue Parser, Welcome Christian
## Version 0.0.6.9.9.6.2
* [BACK][FIX][Ngrams Status change (#217)](https://gitlab.iscpif.fr/gargantext/haskell-gargantext/issues/217)
......
......@@ -5,7 +5,7 @@ cabal-version: 1.12
-- see: https://github.com/sol/hpack
name: gargantext
version: 0.0.6.9.9.6.2
version: 0.0.6.9.9.6.3
synopsis: Search, map, share
description: Please see README.md
category: Data
......@@ -53,6 +53,9 @@ library
Gargantext.Core.Text
Gargantext.Core.Text.Context
Gargantext.Core.Text.Corpus.API
Gargantext.Core.Text.Corpus.API.Arxiv
Gargantext.Core.Text.Corpus.API.Pubmed
Gargantext.Core.Text.Corpus.Query
Gargantext.Core.Text.Corpus.Parsers
Gargantext.Core.Text.Corpus.Parsers.CSV
Gargantext.Core.Text.Corpus.Parsers.Date.Parsec
......@@ -185,15 +188,14 @@ library
Gargantext.Core.NodeStoryFile
Gargantext.Core.Statistics
Gargantext.Core.Text.Convert
Gargantext.Core.Text.Corpus.API.Arxiv
Gargantext.Core.Text.Corpus.API.Hal
Gargantext.Core.Text.Corpus.API.Isidore
Gargantext.Core.Text.Corpus.API.Istex
Gargantext.Core.Text.Corpus.API.Pubmed
Gargantext.Core.Text.Corpus.Parsers.Book
Gargantext.Core.Text.Corpus.Parsers.Date
Gargantext.Core.Text.Corpus.Parsers.Date.Attoparsec
Gargantext.Core.Text.Corpus.Parsers.FrameWrite
Gargantext.Core.Text.Corpus.Parsers.Gitlab
Gargantext.Core.Text.Corpus.Parsers.GrandDebat
Gargantext.Core.Text.Corpus.Parsers.Iramuteq
Gargantext.Core.Text.Corpus.Parsers.Isidore
......@@ -382,6 +384,7 @@ library
, blaze-html
, blaze-markup
, blaze-svg
, boolexpr
, bytestring
, case-insensitive
, cassava
......@@ -846,6 +849,7 @@ test-suite garg-test
main-is: Main.hs
other-modules:
Core.Text
Core.Text.Corpus.Query
Core.Text.Examples
Core.Text.Flow
Core.Utils
......@@ -863,6 +867,7 @@ test-suite garg-test
Parsers.Types
Parsers.WOS
Utils.Crypto
Utils.Jobs
Paths_gargantext
hs-source-dirs:
src-test
......@@ -893,61 +898,33 @@ test-suite garg-test
build-depends:
QuickCheck
, aeson
, async
, base
, boolexpr
, bytestring
, conduit
, containers
, crawlerArxiv
, duckling
, extra
, gargantext
, gargantext-prelude
, hspec
, http-client
, http-client-tls
, mtl
, parsec
, patches-class
, patches-map
, quickcheck-instances
, servant-job
, stm
, tasty
, tasty-hspec
, tasty-hunit
, tasty-quickcheck
, text
, time
, unordered-containers
, validity
default-language: Haskell2010
test-suite jobqueue-test
type: exitcode-stdio-1.0
main-is: Main.hs
other-modules:
Paths_gargantext
hs-source-dirs:
tests/queue
default-extensions:
DataKinds
DeriveGeneric
FlexibleContexts
FlexibleInstances
GADTs
GeneralizedNewtypeDeriving
MultiParamTypeClasses
NamedFieldPuns
NoImplicitPrelude
OverloadedStrings
RankNTypes
RecordWildCards
StrictData
ghc-options: -Wall -threaded -rtsopts -with-rtsopts=-N
build-depends:
aeson
, async
, base
, containers
, extra
, gargantext
, hspec
, http-client
, http-client-tls
, mtl
, servant-job
, stm
, text
, time
default-language: Haskell2010
......@@ -6,7 +6,7 @@ name: gargantext
# | | | +----- Layers * : New versions with API additions
# | | | | +--- Layers * : New versions without API breaking changes
# | | | | |
version: '0.0.6.9.9.6.2'
version: '0.0.6.9.9.6.3'
synopsis: Search, map, share
description: Please see README.md
category: Data
......@@ -81,6 +81,9 @@ library:
- Gargantext.Core.Text
- Gargantext.Core.Text.Context
- Gargantext.Core.Text.Corpus.API
- Gargantext.Core.Text.Corpus.API.Arxiv
- Gargantext.Core.Text.Corpus.API.Pubmed
- Gargantext.Core.Text.Corpus.Query
- Gargantext.Core.Text.Corpus.Parsers
- Gargantext.Core.Text.Corpus.Parsers.CSV
- Gargantext.Core.Text.Corpus.Parsers.Date.Parsec
......@@ -166,6 +169,7 @@ library:
- blaze-html
- blaze-markup
- blaze-svg
- boolexpr
- bytestring
- case-insensitive
- cassava
......@@ -492,7 +496,6 @@ executables:
- aeson
- serialise
tests:
garg-test:
main: Main.hs
......@@ -512,45 +515,37 @@ tests:
- -rtsopts
- -with-rtsopts=-N
dependencies:
- QuickCheck
- aeson
- async
- base
- boolexpr
- bytestring
- conduit
- containers
- crawlerArxiv
- duckling
- gargantext
- gargantext-prelude
- hspec
- QuickCheck
- quickcheck-instances
- time
- http-client
- http-client-tls
- mtl
- parsec
- patches-class
- patches-map
- duckling
- quickcheck-instances
- servant-job
- stm
- tasty
- tasty-hspec
- tasty-hunit
- tasty-quickcheck
- text
- time
- unordered-containers
- validity
jobqueue-test:
main: Main.hs
source-dirs: tests/queue
ghc-options:
- -threaded
- -rtsopts
- -with-rtsopts=-N
dependencies:
- aeson
- async
- base
- containers
- gargantext
- mtl
- hspec
- http-client
- http-client-tls
- servant-job
- stm
- time
# garg-doctest:
# main: Main.hs
# source-dirs: src-doctest
......
{-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}
module Core.Text.Corpus.Query (tests) where
import Data.BoolExpr
import Data.Conduit
import Data.String
import Gargantext.Core (Lang(..))
import Gargantext.Core.Text.Corpus.Query
import Gargantext.Core.Types
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Prelude
import System.Environment
import qualified Data.Text as T
import qualified Gargantext.Core.Text.Corpus.API.Arxiv as Arxiv
import qualified Gargantext.Core.Text.Corpus.API.Pubmed as Pubmed
import qualified Network.Api.Arxiv as Arxiv
import Test.Tasty
import Test.Tasty.HUnit
import Test.Tasty.QuickCheck hiding (Positive, Negative)
newtype PubmedApiKey
= PubmedApiKey { _PubmedApiKey :: T.Text }
deriving stock (Show, Eq)
deriving newtype IsString
pubmedSettings :: IO (Maybe PubmedApiKey)
pubmedSettings = fmap fromString <$> lookupEnv "PUBMED_API_KEY"
tests :: TestTree
tests = withResource pubmedSettings (const (pure ())) $ \getPubmedKey ->
testGroup "Boolean Query Engine" [
testProperty "Parses 'A OR B'" testParse01
, testProperty "Parses 'A AND B'" testParse02
, testProperty "Parses '-A'" testParse03
, testProperty "Parses 'NOT A'" testParse03_01
, testProperty "Parses 'A -B'" testParse04
, testProperty "Parses 'A NOT -B'" testParse04_01
, testProperty "Parses 'A AND B -C' (left associative)" testParse05
, testProperty "Parses 'A AND (B -C)' (right associative)" testParse05_01
, testProperty "Parses (A OR B OR NOT C) AND (D OR E OR F) -(G OR H OR I)" testParse06
, testCase "Parses words into a single constant" testWordsIntoConst
, testGroup "Arxiv expression converter" [
testCase "It supports 'A AND B'" testArxiv01_01
, testCase "It supports '\"Haskell\" AND \"Agda\"'" testArxiv01_02
, testCase "It supports 'A OR B'" testArxiv02
, testCase "It supports 'A AND NOT B'" testArxiv03_01
, testCase "It supports 'A AND -B'" testArxiv03_02
, testCase "It supports 'A AND -B'" testArxiv03_02
, testCase "It supports 'A AND NOT (NOT B)'" testArxiv04_01
, testCase "It supports 'A AND NOT (NOT (NOT B))'" testArxiv04_02
, testCase "It supports 'A OR NOT B'" testArxiv05
, testCase "It supports '-A'" testArxiv06
]
, testGroup "PUBMED expression converter" [
testCase "It supports 'A'" testPubMed01
, testCase "It supports '-A'" testPubMed02_01
, testCase "It supports 'NOT A'" testPubMed02_02
, testCase "It supports 'NOT (NOT A)'" testPubMed02_03
, testCase "It supports '\"Haskell\" AND \"Idris\"'" testPubMed03
, testCase "It supports 'A OR B'" testPubMed04
]
, testGroup "PUBMED real queries (skipped if PUBMED_API_KEY env var not set)" [
testCase "It searches for \"Covid\"" (testPubMedCovid_01 getPubmedKey)
, testCase "It searches for \"Covid\" AND \"Alzheimer\"" (testPubMedCovid_02 getPubmedKey)
]
-- We skip the Arxiv tests if the PUBMED_API_KEY is not set just for conveniency, to have
-- only a single flow-control mechanism.
, testGroup "ARXIV real queries (skipped if PUBMED_API_KEY env var not set)" [
testCase "It searches for \"Haskell\"" (testArxivRealWorld_01 getPubmedKey)
, testCase "It searches for \"Haskell\" AND \"Agda\"" (testArxivRealWorld_02 getPubmedKey)
]
]
-- | Checks that the 'RawQuery' can be translated into the expected 'BoolExpr' form,
-- by also checking that both renders back to the initial 'RawQuery'.
translatesInto :: RawQuery -> BoolExpr Term -> Property
(translatesInto) raw boolExpr =
let parsed = parseQuery raw
expected = Right (unsafeMkQuery boolExpr)
in counterexample (show parsed <> " != " <> show expected) $
(renderQuery <$> parsed) === (renderQuery <$> expected)
testParse01 :: Property
testParse01 = "A OR B" `translatesInto` (BConst (Positive "A") `BOr` BConst (Positive "B"))
testParse02 :: Property
testParse02 = "A AND B" `translatesInto` (BConst (Positive "A") `BAnd` BConst (Positive "B"))
testParse03 :: Property
testParse03 = "-A" `translatesInto` (BConst (Negative "A"))
testParse03_01 :: Property
testParse03_01 = "NOT A" `translatesInto` (BConst (Negative "A"))
testParse04 :: Property
testParse04 = "A -B" `translatesInto` (BConst (Positive "A") `BAnd` BConst (Negative "B"))
-- Both 'A -B' and 'A AND -B' desugars into the same form.
testParse04_01 :: Property
testParse04_01 = "A AND -B" `translatesInto` (BConst (Positive "A") `BAnd` BConst (Negative "B"))
testParse05 :: Property
testParse05 = "A AND B -C" `translatesInto` ((BConst (Positive "A") `BAnd` BConst (Positive "B")) `BAnd` BConst (Negative "C"))
testParse05_01 :: Property
testParse05_01 =
"A AND (B -C)" `translatesInto` (BConst (Positive "A") `BAnd` (BConst (Positive "B") `BAnd` BConst (Negative "C")))
testParse06 :: Property
testParse06 =
translatesInto "(A OR B OR NOT C) AND (D OR E OR F) -(G OR H OR I)"
(
(
((BConst (Positive "A") `BOr` (BConst (Positive "B"))) `BOr` (BConst (Negative "C")))
`BAnd`
((BConst (Positive "D") `BOr` (BConst (Positive "E"))) `BOr` (BConst (Positive "F")))
)
`BAnd` BNot (
((BConst (Positive "G") `BOr` (BConst (Positive "H"))) `BOr` (BConst (Positive "I")))
)
)
testWordsIntoConst :: Assertion
testWordsIntoConst =
let (expected :: BoolExpr Term) =
fromCNF (boolTreeToCNF @Term $ (BConst (Positive "The Art of Computer Programming") `BAnd` (BConst (Positive "Conceptual Mathematics"))))
in case parseQuery "\"The Art of Computer Programming\" AND \"Conceptual Mathematics\"" of
Left err
-> assertBool err False
Right x
-> fromCNF (getQuery x) @?= expected
withValidQuery :: RawQuery -> (Query -> Assertion) -> Assertion
withValidQuery rawQuery onValidParse = do
case parseQuery rawQuery of
Left err -> assertBool err False
Right x -> onValidParse x
testArxiv01_01 :: Assertion
testArxiv01_01 = withValidQuery "A AND B" $ \q ->
assertBool ("Query not converted into expression: " <> show @(BoolExpr Term) (fromCNF $ getQuery q))
(Arxiv.qExp (Arxiv.convertQuery q) == Just (Arxiv.And (Arxiv.Exp $ Arxiv.Abs ["A"]) ((Arxiv.Exp $ Arxiv.Abs ["B"]))))
testArxiv01_02 :: Assertion
testArxiv01_02 = withValidQuery "\"Haskell\" AND \"Agda\"" $ \q ->
assertBool ("Query not converted into expression: " <> show @(BoolExpr Term) (fromCNF $ getQuery q))
(Arxiv.qExp (Arxiv.convertQuery q) == Just (Arxiv.And (Arxiv.Exp $ Arxiv.Abs ["Haskell"]) ((Arxiv.Exp $ Arxiv.Abs ["Agda"]))))
testArxiv02 :: Assertion
testArxiv02 = withValidQuery "A OR B" $ \q ->
assertBool ("Query not converted into expression: " <> show @(BoolExpr Term) (fromCNF $ getQuery q))
(Arxiv.qExp (Arxiv.convertQuery q) == Just (Arxiv.Or (Arxiv.Exp $ Arxiv.Abs ["A"]) ((Arxiv.Exp $ Arxiv.Abs ["B"]))))
testArxiv03_01 :: Assertion
testArxiv03_01 = withValidQuery "A AND NOT B" $ \q ->
assertBool ("Query not converted into expression: " <> show @(BoolExpr Term) (fromCNF $ getQuery q))
(Arxiv.qExp (Arxiv.convertQuery q) == Just (Arxiv.AndNot (Arxiv.Exp $ Arxiv.Abs ["A"]) ((Arxiv.Exp $ Arxiv.Abs ["B"]))))
testArxiv03_02 :: Assertion
testArxiv03_02 = withValidQuery "A AND -B" $ \q ->
assertBool ("Query not converted into expression: " <> show @(BoolExpr Term) (fromCNF $ getQuery q))
(Arxiv.qExp (Arxiv.convertQuery q) == Just (Arxiv.AndNot (Arxiv.Exp $ Arxiv.Abs ["A"]) ((Arxiv.Exp $ Arxiv.Abs ["B"]))))
-- Double negation get turned into positive.
testArxiv04_01 :: Assertion
testArxiv04_01 = withValidQuery "A AND NOT (NOT B)" $ \q ->
assertBool ("Query not converted into expression: " <> show @(BoolExpr Term) (fromCNF $ getQuery q))
(Arxiv.qExp (Arxiv.convertQuery q) == Just (Arxiv.And (Arxiv.Exp $ Arxiv.Abs ["A"]) ((Arxiv.Exp $ Arxiv.Abs ["B"]))))
testArxiv04_02 :: Assertion
testArxiv04_02 = withValidQuery "A AND NOT (NOT (NOT B))" $ \q ->
assertBool ("Query not converted into expression: " <> show @(BoolExpr Term) (fromCNF $ getQuery q))
(Arxiv.qExp (Arxiv.convertQuery q) == Just (Arxiv.AndNot (Arxiv.Exp $ Arxiv.Abs ["A"]) ((Arxiv.Exp $ Arxiv.Abs ["B"]))))
testArxiv05 :: Assertion
testArxiv05 = withValidQuery "A OR NOT B" $ \q ->
assertBool ("Query not converted into expression: " <> show @(BoolExpr Term) (fromCNF $ getQuery q))
(Arxiv.qExp (Arxiv.convertQuery q) == Just (
Arxiv.Or (Arxiv.Exp $ Arxiv.Abs ["A"])
(Arxiv.AndNot (Arxiv.Exp $ Arxiv.Abs ["B"]) (Arxiv.Exp $ Arxiv.Abs ["B"]))
)
)
testArxiv06 :: Assertion
testArxiv06 = withValidQuery "-A" $ \q ->
assertBool ("Query not converted into expression: " <> show @(BoolExpr Term) (fromCNF $ getQuery q))
(Arxiv.qExp (Arxiv.convertQuery q) == Just (
Arxiv.AndNot (Arxiv.Exp $ Arxiv.Abs ["A"]) (Arxiv.Exp $ Arxiv.Abs ["A"])
)
)
--
-- PUBMED tests
--
testPubMed01 :: Assertion
testPubMed01 = withValidQuery "A" $ \q ->
assertBool ("Query not converted into expression: " <> show @(BoolExpr Term) (fromCNF $ getQuery q))
(Pubmed.getESearch (Pubmed.convertQuery q) == "A")
testPubMed02_01 :: Assertion
testPubMed02_01 = withValidQuery "-A" $ \q -> Pubmed.getESearch (Pubmed.convertQuery q) @?= "NOT+A"
testPubMed02_02 :: Assertion
testPubMed02_02 = withValidQuery "NOT A" $ \q -> Pubmed.getESearch (Pubmed.convertQuery q) @?= "NOT+A"
testPubMed02_03 :: Assertion
testPubMed02_03 = withValidQuery "NOT (NOT A)" $ \q -> Pubmed.getESearch (Pubmed.convertQuery q) @?= "A"
testPubMed03 :: Assertion
testPubMed03 = withValidQuery "\"Haskell\" AND \"Idris\"" $ \q ->
Pubmed.getESearch (Pubmed.convertQuery q) @?= "Haskell+AND+Idris"
testPubMed04 :: Assertion
testPubMed04 = withValidQuery "A OR B" $ \q ->
Pubmed.getESearch (Pubmed.convertQuery q) @?= "A+OR+B"
testPubMedCovid_01 :: IO (Maybe PubmedApiKey) -> Assertion
testPubMedCovid_01 getPubmedKey = do
mb_key <- getPubmedKey
case mb_key of
Nothing -> pure ()
Just k -> withValidQuery "\"Covid\"" $ \query -> do
res <- Pubmed.get (_PubmedApiKey k) query (Just 1)
case res of
Left err -> fail (show err)
Right (_, cnd) -> do
hyperDocs <- sourceToList cnd
case hyperDocs of
[] -> fail "No documents found."
(x:_) -> _hd_title x @?= Just "Being a Hospice Nurse in Times of the COVID-19 Pandemic: A Phenomenological Study of Providing End-of-Life Care."
testPubMedCovid_02 :: IO (Maybe PubmedApiKey) -> Assertion
testPubMedCovid_02 getPubmedKey = do
mb_key <- getPubmedKey
case mb_key of
Nothing -> pure ()
Just k -> withValidQuery "\"Covid\" AND \"Alzheimer\"" $ \query -> do
res <- Pubmed.get (_PubmedApiKey k) query (Just 1)
case res of
Left err -> fail (show err)
Right (_, cnd) -> do
hyperDocs <- sourceToList cnd
case hyperDocs of
[] -> fail "No documents found."
(x:_) -> _hd_title x @?= Just "Neurodegenerative and Neurodevelopmental Diseases and the Gut-Brain Axis: The Potential of Therapeutic Targeting of the Microbiome."
testArxivRealWorld_01 :: IO (Maybe PubmedApiKey) -> Assertion
testArxivRealWorld_01 getPubmedKey = do
mb_key <- getPubmedKey
case mb_key of
Nothing -> pure ()
Just _ -> withValidQuery "\"Haskell\"" $ \query -> do
(_, cnd) <- Arxiv.get EN query (Just 1)
hyperDocs <- sourceToList cnd
case hyperDocs of
[] -> fail "No documents found."
(x:_) -> _hd_title x @?= Just "Haskell for OCaml programmers"
testArxivRealWorld_02 :: IO (Maybe PubmedApiKey) -> Assertion
testArxivRealWorld_02 getPubmedKey = do
mb_key <- getPubmedKey
case mb_key of
Nothing -> pure ()
Just _ -> withValidQuery "\"Haskell\" AND \"Agda\"" $ \query -> do
(_, cnd) <- Arxiv.get EN query (Just 1)
hyperDocs <- sourceToList cnd
case hyperDocs of
[] -> fail "No documents found."
(x:_) -> _hd_title x @?= Just "Toward Hole-Driven Development with Liquid Haskell"
......@@ -18,8 +18,8 @@ import Gargantext.Prelude
import Gargantext.Core.Utils
-- | Core.Utils tests
test :: IO ()
test = hspec $ do
test :: Spec
test = do
describe "check if groupWithCounts works" $ do
it "simple integer array" $ do
(groupWithCounts [1, 2, 3, 1, 2, 3]) `shouldBe` [(1, 2), (2, 2), (3, 2)]
......
......@@ -30,8 +30,8 @@ myCooc = HashMap.fromList [((NgramsTerm {unNgramsTerm = "gev au"},NgramsTerm {un
test :: IO ()
test = hspec $ do
test :: Spec
test = do
describe "Cross" $ do
let
(distanceMap,_,_) = doSimilarityMap Conditional 0 Weak myCooc
......
......@@ -11,6 +11,7 @@ Portability : POSIX
import Gargantext.Prelude
import qualified Core.Text.Corpus.Query as CorpusQuery
import qualified Core.Utils as Utils
--import qualified Ngrams.Lang.Fr as Fr
--import qualified Ngrams.Lang as Lang
......@@ -20,17 +21,32 @@ import qualified Parsers.Date as PD
-- import qualified Graph.Distance as GD
import qualified Graph.Clustering as Graph
import qualified Utils.Crypto as Crypto
import qualified Utils.Jobs as Jobs
import Test.Tasty
import Test.Tasty.Hspec
main :: IO ()
main = do
Utils.test
utilSpec <- testSpec "Utils" Utils.test
clusteringSpec <- testSpec "Graph Clustering" Graph.test
dateParserSpec <- testSpec "Date Parsing" PD.testFromRFC3339
cryptoSpec <- testSpec "Crypto" Crypto.test
nlpSpec <- testSpec "NLP" NLP.test
jobsSpec <- testSpec "Jobs" Jobs.test
defaultMain $ testGroup "Gargantext"
[ utilSpec
, clusteringSpec
, dateParserSpec
, cryptoSpec
, nlpSpec
, jobsSpec
, NgramsQuery.tests
, CorpusQuery.tests
]
-- Occ.parsersTest
-- Lang.ngramsExtractionTest FR
-- Lang.ngramsExtractionTest EN
-- Metrics.main
Graph.test
PD.testFromRFC3339
-- GD.test
Crypto.test
NLP.main
NgramsQuery.main
......@@ -19,8 +19,8 @@ import Gargantext.Prelude
import Gargantext.Core.Text.Terms.Multi
main :: IO ()
main = hspec $ do
test :: Spec
test = do
describe "Text that should be cleaned before sending it to NLP tools as micro-services." $ do
let text = "This is a url http://cnrs.gargantext.org to be remove and another one www.gargantext.org and digits 343242-2332 to be remove and some to keep: 232 231 33." :: Text
let result = "This is a url to be remove and another one and digits to be remove and some to keep: 232 231 33."
......
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TypeApplications #-}
module Ngrams.Query where
module Ngrams.Query (tests) where
import Control.Monad
import Data.Coerce
......@@ -21,9 +21,6 @@ import Test.Tasty
import Test.Tasty.HUnit
main :: IO ()
main = defaultMain tests
tests :: TestTree
tests = testGroup "Ngrams" [unitTests]
......
......@@ -36,8 +36,8 @@ fromRFC3339Inv :: Either ParseError ZonedTime -> Text
fromRFC3339Inv (Right z) = toRFC3339 z
fromRFC3339Inv (Left pe) = panic . pack $ show pe
testFromRFC3339 :: IO ()
testFromRFC3339 = hspec $ do
testFromRFC3339 :: Spec
testFromRFC3339 = do
describe "Test fromRFC3339: " $ do
it "is the inverse of Duckling's toRFC3339" $ property $
((==) <*> (fromRFC3339 . fromRFC3339Inv)) . Right . looseZonedTimePrecision
......
......@@ -16,11 +16,10 @@ import Test.Hspec
import Gargantext.Prelude
import Gargantext.Prelude.Crypto.Hash
import Gargantext.Prelude.Utils
-- | Crypto Hash tests
test :: IO ()
test = hspec $ do
test :: Spec
test = do
describe "Hash String with frontend works" $ do
let text = "To hash with backend" :: Text
let hashed = "8a69a94d164279af2b7d1443ce08da6184b3d7e815406076e148159c284b53c3" :: Hash
......
......@@ -3,7 +3,7 @@
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE NumericUnderscores #-}
module Main where
module Utils.Jobs (test) where
import Control.Concurrent
import qualified Control.Concurrent.Async as Async
......@@ -17,7 +17,6 @@ import Data.Either
import Data.List
import Data.Sequence (Seq, (|>), fromList)
import Data.Time
import GHC.Stack
import Prelude
import System.IO.Unsafe
import Network.HTTP.Client.TLS (newTlsManager)
......@@ -54,16 +53,6 @@ addJobToSchedule jobt mvar = do
data Counts = Counts { countAs :: Int, countBs :: Int }
deriving (Eq, Show)
inc, dec :: JobT -> Counts -> Counts
inc A cs = cs { countAs = countAs cs + 1 }
inc B cs = cs { countBs = countBs cs + 1 }
inc C cs = cs
inc D cs = cs
dec A cs = cs { countAs = countAs cs - 1 }
dec B cs = cs { countBs = countBs cs - 1 }
dec C cs = cs
dec D cs = cs
jobDuration, initialDelay :: Int
jobDuration = 100000
initialDelay = 20000
......@@ -194,9 +183,6 @@ testTlsManager :: Manager
testTlsManager = unsafePerformIO newTlsManager
{-# NOINLINE testTlsManager #-}
shouldBeE :: (MonadIO m, HasCallStack, Show a, Eq a) => a -> a -> m ()
shouldBeE a b = liftIO (shouldBe a b)
withJob :: Env
-> (JobHandle MyDummyMonad -> () -> MyDummyMonad ())
-> IO (SJ.JobStatus 'SJ.Safe JobLog)
......@@ -362,8 +348,8 @@ testMarkProgress = do
]
}
main :: IO ()
main = hspec $ do
test :: Spec
test = do
describe "job queue" $ do
it "respects max runners limit" $
testMaxRunners
......
......@@ -6,7 +6,6 @@ module Gargantext.API.Admin.Orchestrator.Types
where
import Control.Lens hiding (elements)
import Control.Monad.Reader (MonadReader)
import Data.Aeson
import Data.Morpheus.Types
( GQLType
......@@ -24,9 +23,7 @@ import Test.QuickCheck.Arbitrary
import qualified Gargantext.API.GraphQL.Utils as GQLU
import Gargantext.Core.Types (TODO(..))
import Gargantext.Database.Prelude (HasConfig(..))
import Gargantext.Prelude
import Gargantext.Prelude.Config (gc_pubmed_api_key)
------------------------------------------------------------------------
instance Arbitrary a => Arbitrary (JobStatus 'Safe a) where
......@@ -37,39 +34,24 @@ instance Arbitrary a => Arbitrary (JobOutput a) where
-- | Main Types
-- TODO IsidoreAuth
data ExternalAPIs = All
| PubMed { mAPIKey :: Maybe Text }
data ExternalAPIs = PubMed
| Arxiv
| HAL
| IsTex
| Isidore
deriving (Show, Eq, Generic)
deriving (Show, Eq, Generic, Enum, Bounded)
-- | Main Instances
instance FromJSON ExternalAPIs
instance ToJSON ExternalAPIs
externalAPIs :: ( MonadReader env m
, HasConfig env) => m [ExternalAPIs]
externalAPIs = do
pubmed_api_key <- view $ hasConfig . gc_pubmed_api_key
pure [ All
, PubMed { mAPIKey = Just pubmed_api_key }
, Arxiv
, HAL
, IsTex
, Isidore ]
externalAPIs :: [ExternalAPIs]
externalAPIs = [minBound .. maxBound]
instance Arbitrary ExternalAPIs
where
arbitrary = elements [ All
, PubMed { mAPIKey = Nothing }
, Arxiv
, HAL
, IsTex
, Isidore ]
arbitrary = arbitraryBoundedEnum
instance ToSchema ExternalAPIs where
declareNamedSchema = genericDeclareNamedSchemaUnrestricted defaultSchemaOptions
......
......@@ -54,17 +54,18 @@ import Gargantext.Database.Action.Mail (sendMail)
import Gargantext.Database.Action.Node (mkNodeWithParent)
import Gargantext.Database.Action.User (getUserId)
import Gargantext.Database.Admin.Types.Hyperdata
import Gargantext.Database.Admin.Types.Node (CorpusId, NodeType(..), UserId)
import Gargantext.Database.Admin.Types.Node (CorpusId, NodeType(..))
import Gargantext.Database.Prelude (hasConfig)
import Gargantext.Database.Query.Table.Node (getNodeWith, updateCorpusPubmedAPIKey)
import Gargantext.Database.Query.Table.Node.UpdateOpaleye (updateHyperdata)
import Gargantext.Database.Schema.Node (node_hyperdata)
import Gargantext.Prelude
import Gargantext.Prelude.Config (gc_max_docs_parsers)
import Gargantext.Prelude.Config (gc_max_docs_parsers, gc_pubmed_api_key)
import Gargantext.Utils.Jobs (JobHandle, MonadJobStatus(..))
import qualified Gargantext.Core.Text.Corpus.API as API
import qualified Gargantext.Core.Text.Corpus.Parsers as Parser (FileType(..), parseFormatC)
import qualified Gargantext.Database.GargDB as GargDB
------------------------------------------------------------------------
{-
data Query = Query { query_query :: Text
......@@ -130,16 +131,13 @@ deriveJSON (unPrefix "") 'ApiInfo
instance ToSchema ApiInfo
info :: FlowCmdM env err m => UserId -> m ApiInfo
info _u = do
ext <- API.externalAPIs
pure $ ApiInfo ext
info :: ApiInfo
info = ApiInfo API.externalAPIs
------------------------------------------------------------------------
------------------------------------------------------------------------
data WithQuery = WithQuery
{ _wq_query :: !Text
{ _wq_query :: !API.RawQuery
, _wq_databases :: !Database
, _wq_datafield :: !(Maybe Datafield)
, _wq_lang :: !Lang
......@@ -185,7 +183,7 @@ addToCorpusWithQuery :: (FlowCmdM env err m, MonadJobStatus m)
=> User
-> CorpusId
-> WithQuery
-> Maybe Integer
-> Maybe API.Limit
-> JobHandle m
-> m ()
addToCorpusWithQuery user cid (WithQuery { _wq_query = q
......@@ -210,7 +208,8 @@ addToCorpusWithQuery user cid (WithQuery { _wq_query = q
_ -> do
case datafield of
Just (External (PubMed { _api_key })) -> do
Just (External PubMed) -> do
_api_key <- view $ hasConfig . gc_pubmed_api_key
printDebug "[addToCorpusWithQuery] pubmed api key" _api_key
_ <- updateCorpusPubmedAPIKey cid _api_key
pure ()
......@@ -222,7 +221,7 @@ addToCorpusWithQuery user cid (WithQuery { _wq_query = q
-- if cid is corpus -> add to corpus
-- if cid is root -> create corpus in Private
-- printDebug "[G.A.N.C.New] getDataText with query" q
db <- database2origin dbs
let db = database2origin dbs
eTxt <- getDataText db (Multi l) q maybeLimit
-- printDebug "[G.A.N.C.New] lTxts" lTxts
......
......@@ -150,7 +150,7 @@ insertSearxResponse user cId listId l (Right (SearxResponse { _srs_results })) =
triggerSearxSearch :: (MonadBase IO m, FlowCmdM env err m, MonadJobStatus m)
=> User
-> CorpusId
-> API.Query
-> API.RawQuery
-> Lang
-> JobHandle m
-> m ()
......@@ -183,7 +183,7 @@ triggerSearxSearch user cId q l jobHandle = do
res <- liftBase $ fetchSearxPage $ FetchSearxParams { _fsp_language = l
, _fsp_manager = manager
, _fsp_pageno = page
, _fsp_query = q
, _fsp_query = API.getRawQuery q
, _fsp_url = surl }
insertSearxResponse user cId listId l res
......
......@@ -3,44 +3,37 @@
module Gargantext.API.Node.Corpus.Types where
import Control.Lens hiding (elements, Empty)
import Control.Monad.Reader (MonadReader)
import Data.Aeson
import Data.Aeson.TH (deriveJSON)
import Data.Monoid (mempty)
import Data.Swagger
import GHC.Generics (Generic)
import qualified PUBMED.Types as PUBMED
import Gargantext.Prelude
import qualified Gargantext.API.Admin.Orchestrator.Types as T
import qualified Gargantext.API.Admin.Orchestrator.Types as Types
import Gargantext.Core.Utils.Prefix (unPrefix)
import Gargantext.Database.Action.Flow (DataOrigin(..))
import Gargantext.Database.Prelude (HasConfig(..))
data Database = Empty
| PubMed { _api_key :: Maybe PUBMED.APIKey }
| PubMed
| Arxiv
| HAL
| IsTex
| Isidore
deriving (Eq, Show, Generic)
deriving (Eq, Show, Generic, Enum, Bounded)
deriveJSON (unPrefix "") ''Database
instance ToSchema Database where
declareNamedSchema = genericDeclareNamedSchemaUnrestricted defaultSchemaOptions
database2origin :: ( MonadReader env m
, HasConfig env ) => Database -> m DataOrigin
database2origin Empty = pure $ InternalOrigin T.IsTex
database2origin (PubMed { _api_key }) = do
-- pubmed_api_key <- view $ hasConfig . gc_pubmed_api_key
pure $ ExternalOrigin $ T.PubMed { mAPIKey = _api_key }
database2origin Arxiv = pure $ ExternalOrigin T.Arxiv
database2origin HAL = pure $ ExternalOrigin T.HAL
database2origin IsTex = pure $ ExternalOrigin T.IsTex
database2origin Isidore = pure $ ExternalOrigin T.Isidore
database2origin :: Database -> DataOrigin
database2origin Empty = InternalOrigin Types.IsTex
database2origin PubMed = ExternalOrigin Types.PubMed
database2origin Arxiv = ExternalOrigin Types.Arxiv
database2origin HAL = ExternalOrigin Types.HAL
database2origin IsTex = ExternalOrigin Types.IsTex
database2origin Isidore = ExternalOrigin Types.Isidore
------------------------------------------------------------------------
data Datafield = Gargantext
......
......@@ -290,7 +290,7 @@ addCorpusWithQuery :: User -> ServerT New.AddWithQuery (GargM Env GargError)
addCorpusWithQuery user cid =
serveJobsAPI AddCorpusQueryJob $ \jHandle q -> do
limit <- view $ hasConfig . gc_max_docs_scrapers
New.addToCorpusWithQuery user cid q (Just limit) jHandle
New.addToCorpusWithQuery user cid q (Just $ fromIntegral limit) jHandle
{- let log' x = do
printDebug "addToCorpusWithQuery" x
liftBase $ log x
......
......@@ -11,46 +11,58 @@ Portability : POSIX
module Gargantext.Core.Text.Corpus.API
( ExternalAPIs(..)
, Query
, Limit
, Corpus.RawQuery(..)
, Corpus.Limit(..)
, GetCorpusError(..)
, get
, externalAPIs
) where
import Conduit
import Control.Lens ((^.))
import Data.Bifunctor
import Data.Either (Either(..))
import Data.Maybe
import qualified Data.Text as T
import Gargantext.API.Admin.Orchestrator.Types (ExternalAPIs(..), externalAPIs)
import Gargantext.Core (Lang(..))
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Prelude
import Gargantext.Prelude.Config (GargConfig, gc_pubmed_api_key)
import qualified Gargantext.Core.Text.Corpus.API.Arxiv as Arxiv
import qualified Gargantext.Core.Text.Corpus.API.Hal as HAL
import qualified Gargantext.Core.Text.Corpus.API.Isidore as ISIDORE
import qualified Gargantext.Core.Text.Corpus.API.Istex as ISTEX
import qualified Gargantext.Core.Text.Corpus.API.Pubmed as PUBMED
import qualified Gargantext.Core.Text.Corpus.Query as Corpus
import Servant.Client (ClientError)
data GetCorpusError
= -- | We couldn't parse the user input query into something meaningful.
InvalidInputQuery !Corpus.RawQuery !T.Text
-- | The external service returned an error.
| ExternalAPIError !ClientError
deriving (Show, Eq)
-- | Get External API metadata main function
get :: ExternalAPIs
get :: GargConfig
-> ExternalAPIs
-> Lang
-> Query
-> Maybe Limit
-> Corpus.RawQuery
-> Maybe Corpus.Limit
-- -> IO [HyperdataDocument]
-> IO (Either ClientError (Maybe Integer, ConduitT () HyperdataDocument IO ()))
get PubMed { mAPIKey = mAPIKey } _la q limit = PUBMED.get mAPIKey q limit
-> IO (Either GetCorpusError (Maybe Integer, ConduitT () HyperdataDocument IO ()))
get cfg externalAPI la q limit = do
case Corpus.parseQuery q of
Left err -> pure $ Left $ InvalidInputQuery q (T.pack err)
Right corpusQuery -> case externalAPI of
PubMed -> first ExternalAPIError <$>
PUBMED.get (cfg ^. gc_pubmed_api_key) corpusQuery limit
--docs <- PUBMED.get q default_limit -- EN only by default
--pure (Just $ fromIntegral $ length docs, yieldMany docs)
get Arxiv la q limit = Arxiv.get la q (fromIntegral <$> limit)
get HAL la q limit = HAL.getC la q limit
get IsTex la q limit = do
docs <- ISTEX.get la q limit
Arxiv -> Right <$> Arxiv.get la corpusQuery limit
HAL -> first ExternalAPIError <$> HAL.getC la (Corpus.getRawQuery q) (Corpus.getLimit <$> limit)
IsTex -> do docs <- ISTEX.get la (Corpus.getRawQuery q) (Corpus.getLimit <$> limit)
pure $ Right (Just $ fromIntegral $ length docs, yieldMany docs)
get Isidore la q limit = do
docs <- ISIDORE.get la (fromIntegral <$> limit) (Just q) Nothing
Isidore -> do docs <- ISIDORE.get la (Corpus.getLimit <$> limit) (Just $ Corpus.getRawQuery q) Nothing
pure $ Right (Just $ fromIntegral $ length docs, yieldMany docs)
get externalApi _ _ _ = panic $ "[G.C.T.Corpus.API] This options are note taken into account: " <> (cs $ show externalApi)
-- | Some Sugar for the documentation
type Query = PUBMED.Query
type Limit = PUBMED.Limit
......@@ -10,34 +10,81 @@ Portability : POSIX
-}
{-# OPTIONS_GHC -fno-warn-orphans -fno-warn-unused-top-binds #-}
{-# LANGUAGE ViewPatterns #-}
module Gargantext.Core.Text.Corpus.API.Arxiv
where
( get
-- * Internals for testing
, convertQuery
) where
import Conduit
import Data.Either (Either(..))
import Data.Maybe
import Data.Text (Text)
import Data.Text (Text, unpack)
import qualified Data.Text as Text
import Servant.Client (ClientError)
import Gargantext.Prelude
import Gargantext.Core (Lang(..))
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Core.Text.Corpus.Query as Corpus
import Gargantext.Core.Types (Term(..))
import qualified Arxiv as Arxiv
import qualified Network.Api.Arxiv as Ax
type Query = Text
type Limit = Arxiv.Limit
-- | Converts a Gargantext's generic boolean query into an Arxiv Query.
convertQuery :: Corpus.Query -> Ax.Query
convertQuery q = mkQuery (interpretQuery q transformAST)
where
mkQuery :: Maybe Ax.Expression -> Ax.Query
mkQuery mb_exp = Ax.Query { Ax.qExp = mb_exp
, Ax.qIds = []
, Ax.qStart = 0
, Ax.qItems = Arxiv.batchSize }
-- Converts a 'BoolExpr' with 'Term's on the leaves into an Arxiv's expression.
-- It yields 'Nothing' if the AST cannot be converted into a meaningful expression.
transformAST :: BoolExpr Term -> Maybe Ax.Expression
transformAST ast = case ast of
BAnd sub (BConst (Negative term))
-- The second term become positive, so that it can be translated.
-> Ax.AndNot <$> (transformAST sub) <*> transformAST (BConst (Positive term))
BAnd term1 (BNot term2)
-> Ax.AndNot <$> transformAST term1 <*> transformAST term2
BAnd sub1 sub2
-> Ax.And <$> transformAST sub1 <*> transformAST sub2
BOr sub1 sub2
-> Ax.Or <$> transformAST sub1 <*> transformAST sub2
BNot (BConst (Negative term))
-> transformAST (BConst (Positive term)) -- double negation
-- We can handle negatives via `ANDNOT` with itself.
BNot sub
-> Ax.AndNot <$> transformAST sub <*> transformAST sub
-- BTrue cannot happen is the query parser doesn't support parsing 'TRUE' alone.
BTrue
-> Nothing
-- BTrue cannot happen is the query parser doesn't support parsing 'FALSE' alone.
BFalse
-> Nothing
BConst (Positive (Term term))
-> Just $ Ax.Exp $ Ax.Abs [unpack term]
-- We can handle negatives via `ANDNOT` with itself.
BConst (Negative (Term term))
-> Just $ Ax.AndNot (Ax.Exp $ Ax.Abs [unpack term]) (Ax.Exp $ Ax.Abs [unpack term])
-- | TODO put default pubmed query in gargantext.ini
-- by default: 10K docs
get :: Lang -> Query -> Maybe Limit -> IO (Either ClientError (Maybe Integer, ConduitT () HyperdataDocument IO ()))
get la q _l = do
(cnt, resC) <- Arxiv.apiSimpleC Nothing [Text.unpack q]
pure $ Right (Just $ fromIntegral cnt, resC .| mapC (toDoc la))
get :: Lang
-> Corpus.Query
-> Maybe Corpus.Limit
-> IO (Maybe Integer, ConduitT () HyperdataDocument IO ())
get la (convertQuery -> query) (fmap getLimit -> limit) = do
(cnt, resC) <- case limit of
Nothing -> Arxiv.searchAxv' query
(Just l) -> do (cnt, res) <- Arxiv.searchAxv' query
pure (cnt, res .| takeC l)
pure $ (Just $ fromIntegral cnt, resC .| mapC (toDoc la))
toDoc :: Lang -> Arxiv.Result -> HyperdataDocument
toDoc l (Arxiv.Result { abstract
......
......@@ -27,14 +27,14 @@ import qualified HAL as HAL
import qualified HAL.Client as HAL
import qualified HAL.Doc.Corpus as HAL
get :: Lang -> Text -> Maybe Integer -> IO [HyperdataDocument]
get :: Lang -> Text -> Maybe Int -> IO [HyperdataDocument]
get la q ml = do
eDocs <- HAL.getMetadataWith q (Just 0) ml
eDocs <- HAL.getMetadataWith q (Just 0) (fromIntegral <$> ml)
either (panic . pack . show) (\d -> mapM (toDoc' la) $ HAL._docs d) eDocs
getC :: Lang -> Text -> Maybe Integer -> IO (Either ClientError (Maybe Integer, ConduitT () HyperdataDocument IO ()))
getC :: Lang -> Text -> Maybe Int -> IO (Either ClientError (Maybe Integer, ConduitT () HyperdataDocument IO ()))
getC la q ml = do
eRes <- HAL.getMetadataWithC q (Just 0) ml
eRes <- HAL.getMetadataWithC q (Just 0) (fromIntegral <$> ml)
pure $ (\(len, docsC) -> (len, docsC .| mapMC (toDoc' la))) <$> eRes
-- case eRes of
-- Left err -> panic $ pack $ show err
......
......@@ -29,7 +29,7 @@ import qualified ISTEX as ISTEX
import qualified ISTEX.Client as ISTEX
type Query = Text
type MaxResults = Maybe Integer
type MaxResults = Maybe Int
get :: Lang -> Query -> MaxResults -> IO [HyperdataDocument]
get la query' maxResults = do
......@@ -57,7 +57,7 @@ get la query' maxResults = do
-- Complex queries of IsTex needs parameters using ":" so we leave the query as it is
-- in that case we suppose user is knowing what s.he is doing
eDocs <- ISTEX.getMetadataWith query (fromIntegral <$> maxResults)
eDocs <- ISTEX.getMetadataWith query maxResults
-- printDebug "[Istex.get] will print length" (0 :: Int)
case eDocs of
Left _ -> pure ()
......
......@@ -9,20 +9,33 @@ Portability : POSIX
-}
{-# LANGUAGE DerivingStrategies #-}
module Gargantext.Core.Text.Corpus.API.Pubmed
( get
-- * Internals for testing
, ESearch(..)
, convertQuery
, getESearch
)
where
import Conduit
import Control.Monad.Reader (runReaderT)
import Data.Either (Either)
import Data.Maybe
import Data.Semigroup
import Data.Monoid
import Data.Text (Text)
import qualified Data.Text as Text
import qualified Data.Text.Encoding as TE
import Network.HTTP.Types.URI (EscapeItem(..), renderQueryPartialEscape)
import Servant.Client (ClientError)
import Gargantext.Prelude
import Gargantext.Core (Lang(..))
import Gargantext.Core.Text.Corpus.Query as Corpus
import Gargantext.Core.Types (Term(..))
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import qualified PUBMED as PubMed
......@@ -30,24 +43,72 @@ import qualified PUBMED.Parser as PubMedDoc
import PUBMED.Types (Config(..))
type Query = Text
type Limit = Integer
-- | A pubmed query.
-- See: https://www.ncbi.nlm.nih.gov/books/NBK25499/#chapter4.ESearch
newtype ESearch = ESearch { _ESearch :: [EscapeItem] }
deriving stock (Show, Eq)
deriving newtype (Semigroup, Monoid)
-- | Returns an /url encoded/ query ready to be sent to pubmed.
getESearch :: ESearch -> Text
getESearch (ESearch items) =
Text.replace "term=" "" . TE.decodeUtf8 . renderQueryPartialEscape False $ [
("term", items)
]
convertQuery :: Corpus.Query -> ESearch
convertQuery q = ESearch (interpretQuery q transformAST)
where
transformAST :: BoolExpr Term -> [EscapeItem]
transformAST ast = case ast of
BAnd sub (BConst (Negative term))
-- The second term become positive, so that it can be translated.
-> (transformAST sub) <> [QN "+AND+NOT+"] <> transformAST (BConst (Positive term))
BAnd term1 (BNot term2)
-> transformAST term1 <> [QN "+AND+NOT+"] <> transformAST term2
BAnd sub1 sub2
-> transformAST sub1 <> [QN "+AND+"] <> transformAST sub2
BOr sub1 sub2
-> transformAST sub1 <> [QN "+OR+"] <> transformAST sub2
BNot (BConst (Negative term))
-> transformAST (BConst (Positive term)) -- double negation
BNot sub
-> [QN "NOT+"] <> transformAST sub
-- BTrue cannot happen is the query parser doesn't support parsing 'TRUE' alone.
BTrue
-> mempty
-- BTrue cannot happen is the query parser doesn't support parsing 'FALSE' alone.
BFalse
-> mempty
BConst (Positive (Term term))
-> [QE (TE.encodeUtf8 term)]
-- We can handle negatives via `ANDNOT` with itself.
BConst (Negative (Term term))
-> [QN "NOT+", QE (TE.encodeUtf8 term)]
-- | TODO put default pubmed query in gargantext.ini
-- by default: 10K docs
get :: Maybe Text
-> Query
get :: Text
-> Corpus.Query
-> Maybe Limit
-> IO (Either ClientError (Maybe Integer, ConduitT () HyperdataDocument IO ()))
get mAPIKey q l = do
eRes <- runReaderT PubMed.getMetadataWithC (Config { apiKey = mAPIKey
, query = q
get apiKey q l = do
-- The documentation for PUBMED says:
-- Values for query keys may also be provided in term if they are preceeded by a
-- '#' (%23 in the URL). While only one query_key parameter can be provided to ESearch,
-- any number of query keys can be combined in term. Also, if query keys are provided in term,
-- they can be combined with OR or NOT in addition to AND.
-- Example:
-- esearch.fcgi?db=pubmed&term=%231+AND+asthma&WebEnv=<webenv string>&usehistory=y
--
-- Therefore, we can pretty-print our 'Query' back into something that PubMed could understand.
eRes <- runReaderT PubMed.getMetadataWithC (Config { apiKey = Just apiKey
, query = getESearch $ convertQuery q
, perPage = Just 200
, mWebEnv = Nothing })
let takeLimit = case l of
Nothing -> mapC identity
Just l' -> takeC $ fromIntegral l'
Just l' -> takeC $ getLimit l'
pure $ (\(len, docsC) -> (len, docsC .| takeLimit .| mapC (toDoc EN))) <$> eRes
--either (\e -> panic $ "CRAWL: PubMed" <> e) (map (toDoc EN))
-- <$> PubMed.getMetadataWithC q l
......
module Gargantext.Core.Text.Corpus.Parsers.Gitlab (
Issue(..), gitlabIssue2hyperdataDocument, readFile_Issues, readFile_IssuesAsDocs
) where
import Data.Aeson
import Data.Time
import qualified Data.Text as DT
import qualified Data.ByteString.Lazy as DBL
import System.FilePath (FilePath)
import Gargantext.Prelude
import Gargantext.Core (Lang(..))
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
data Issue = Issue { _issue_id :: !Int
, _issue_title :: !DT.Text
, _issue_content :: !DT.Text
, _issue_created :: !LocalTime
, _issue_closed :: !(Maybe UTCTime)
}
deriving (Show)
instance FromJSON Issue where
parseJSON = withObject "Issue" $ \v -> Issue
<$> v .: "c0" -- id
<*> v .: "c1" -- title
<*> v .: "c2" -- content
<*> v .: "c3" -- creation time
<*> v .:? "c4" -- close time
gitlabIssue2hyperdataDocument :: Issue -> HyperdataDocument
gitlabIssue2hyperdataDocument issue = HyperdataDocument
{ _hd_bdd = Nothing
, _hd_doi = Nothing
, _hd_url = Nothing
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing
, _hd_title = Just (_issue_title issue)
, _hd_authors = Nothing
, _hd_institutes = Nothing
, _hd_source = Nothing
, _hd_abstract = Just (_issue_content issue)
, _hd_publication_date = Just $ DT.pack $ show date
, _hd_publication_year = Just $ fromIntegral year
, _hd_publication_month = Just month
, _hd_publication_day = Just day
, _hd_publication_hour = Just (todHour tod)
, _hd_publication_minute = Just (todMin tod)
, _hd_publication_second = Just (round $ todSec tod)
, _hd_language_iso2 = Just $ (DT.pack . show) lang
}
where lang = EN
date = _issue_created issue
(year, month, day) = toGregorian $ localDay date
tod = localTimeOfDay date
readFile_Issues :: FilePath -> IO [Issue]
readFile_Issues fp = do
raw <- DBL.readFile fp
let mayIssues = decode raw
case mayIssues of
Just is -> pure is
Nothing -> pure []
readFile_IssuesAsDocs :: FilePath -> IO [HyperdataDocument]
readFile_IssuesAsDocs = fmap (fmap gitlabIssue2hyperdataDocument) . readFile_Issues
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE DerivingStrategies #-}
module Gargantext.Core.Text.Corpus.Query (
Query -- * opaque
, RawQuery(..)
, Limit(..)
, getQuery
, parseQuery
, renderQuery
, interpretQuery
, ExternalAPIs(..)
, module BoolExpr
-- * Useful for testing
, unsafeMkQuery
) where
import Data.Bifunctor
import Data.String
import Gargantext.API.Admin.Orchestrator.Types
import Gargantext.Core.Types
import Prelude
import Text.ParserCombinators.Parsec
import qualified Data.Aeson as Aeson
import Data.BoolExpr as BoolExpr
import Data.BoolExpr.Parser as BoolExpr
import Data.BoolExpr.Printer as BoolExpr
import qualified Data.Swagger as Swagger
import qualified Data.Text as T
import qualified Servant.API as Servant
import qualified Text.Parsec as P
-- | A raw query, as typed by the user from the frontend.
newtype RawQuery = RawQuery { getRawQuery :: T.Text }
deriving newtype ( Show, Eq, IsString
, Servant.FromHttpApiData, Servant.ToHttpApiData
, Aeson.FromJSON, Aeson.ToJSON
, Swagger.ToParamSchema, Swagger.ToSchema)
-- | A limit to the number of results we want to retrieve.
newtype Limit = Limit { getLimit :: Int }
deriving newtype ( Show, Eq, Num
, Servant.FromHttpApiData, Servant.ToHttpApiData
, Aeson.FromJSON, Aeson.ToJSON
, Swagger.ToParamSchema, Swagger.ToSchema)
-- | An opaque wrapper around a 'Query' type which can be parsed from a boolean
-- expression like (a AND b) OR c, and which can be interpreted in many ways
-- according to the particular service we are targeting.
newtype Query = Query { getQuery :: (BoolExpr.CNF Term) }
deriving Show
interpretQuery :: Query -> (BoolExpr.BoolExpr Term -> ast) -> ast
interpretQuery (Query q) transform = transform . simplify . BoolExpr.fromCNF $ q
simplify :: BoolExpr.BoolExpr a -> BoolExpr.BoolExpr a
simplify expr = case expr of
BAnd sub BTrue -> simplify sub
BAnd BTrue sub -> simplify sub
BAnd BFalse _ -> BFalse
BAnd _ BFalse -> BFalse
BAnd sub1 sub2 -> BAnd (simplify sub1) (simplify sub2)
BOr _ BTrue -> BTrue
BOr BTrue _ -> BTrue
BOr sub BFalse -> simplify sub
BOr BFalse sub -> simplify sub
BOr sub1 sub2 -> BOr (simplify sub1) (simplify sub2)
BNot BTrue -> BFalse
BNot BFalse -> BTrue
BNot (BNot sub) -> simplify sub
BNot sub -> BNot (simplify sub)
BTrue -> BTrue
BFalse -> BFalse
BConst signed -> BConst signed
unsafeMkQuery :: BoolExpr.BoolExpr Term -> Query
unsafeMkQuery = Query . BoolExpr.boolTreeToCNF
termToken :: CharParser st Term
termToken = Term <$> (try (T.pack <$> BoolExpr.identifier) <|> (between dubQuote dubQuote multipleTerms))
where
dubQuote = BoolExpr.symbol "\""
multipleTerms = T.intercalate " " . map T.pack <$> sepBy BoolExpr.identifier BoolExpr.whiteSpace
-- | Parses an input 'Text' into a 'Query', reporting an error if it fails.
parseQuery :: RawQuery -> Either String Query
parseQuery (RawQuery txt) = bimap show (Query . BoolExpr.boolTreeToCNF) $
P.runParser (BoolExpr.parseBoolExpr termToken) () "Corpus.Query" (T.unpack txt)
renderQuery :: Query -> RawQuery
renderQuery (Query cnf) = RawQuery . T.pack $ BoolExpr.boolExprPrinter (showsPrec 0) (BoolExpr.fromCNF cnf) ""
......@@ -13,11 +13,12 @@ commentary with @some markup@.
------------------------------------------------------------------------
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE DerivingStrategies #-}
module Gargantext.Core.Types ( module Gargantext.Core.Types.Main
, module Gargantext.Database.Admin.Types.Node
, DebugMode(..), withDebugMode
, Term, Terms(..), TermsCount, TermsWithCount
, Term(..), Terms(..), TermsCount, TermsWithCount
, TokenTag(..), POS(..), NER(..)
, Label, Stems
, HasInvalidError(..), assertValid
......@@ -38,6 +39,7 @@ import Data.Maybe
import Data.Monoid
import Data.Semigroup
import Data.Set (Set, empty)
import Data.String
import Data.Swagger (ToParamSchema)
import Data.Swagger (ToSchema(..))
import Data.Text (Text, unpack)
......@@ -63,7 +65,10 @@ data Ordering = Down | Up
------------------------------------------------------------------------
type Name = Text
type Term = Text
newtype Term = Term { getTerm :: Text }
deriving newtype (Eq, Ord, IsString, Show)
type Stems = Set Text
type Label = [Text]
......
......@@ -50,7 +50,6 @@ module Gargantext.Database.Action.Flow -- (flowDatabase, ngrams2list)
import Conduit
import Control.Lens ((^.), view, _Just, makeLenses, over, traverse)
import Control.Monad.Reader (MonadReader)
import Data.Aeson.TH (deriveJSON)
import Data.Conduit.Internal (zipSources)
import qualified Data.Conduit.List as CList
......@@ -65,7 +64,6 @@ import Data.Swagger
import qualified Data.Text as T
import Data.Tuple.Extra (first, second)
import GHC.Generics (Generic)
import Servant.Client (ClientError)
import System.FilePath (FilePath)
import qualified Data.HashMap.Strict as HashMap
import qualified Gargantext.Data.HashMap.Strict.Utils as HashMap
......@@ -133,13 +131,8 @@ deriveJSON (unPrefix "_do_") ''DataOrigin
instance ToSchema DataOrigin where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_do_")
allDataOrigins :: ( MonadReader env m
, HasConfig env) => m [DataOrigin]
allDataOrigins = do
ext <- API.externalAPIs
pure $ map InternalOrigin ext
<> map ExternalOrigin ext
allDataOrigins :: [DataOrigin]
allDataOrigins = map InternalOrigin API.externalAPIs <> map ExternalOrigin API.externalAPIs
---------------
data DataText = DataOld ![NodeId]
......@@ -157,11 +150,12 @@ printDataText (DataNew (maybeInt, conduitData)) = do
getDataText :: FlowCmdM env err m
=> DataOrigin
-> TermType Lang
-> API.Query
-> API.RawQuery
-> Maybe API.Limit
-> m (Either ClientError DataText)
getDataText (ExternalOrigin api) la q li = liftBase $ do
eRes <- API.get api (_tt_lang la) q li
-> m (Either API.GetCorpusError DataText)
getDataText (ExternalOrigin api) la q li = do
cfg <- view $ hasConfig
eRes <- liftBase $ API.get cfg api (_tt_lang la) q li
pure $ DataNew <$> eRes
getDataText (InternalOrigin _) _la q _li = do
......@@ -169,13 +163,13 @@ getDataText (InternalOrigin _) _la q _li = do
(UserName userMaster)
(Left "")
(Nothing :: Maybe HyperdataCorpus)
ids <- map fst <$> searchDocInDatabase cId (stemIt q)
ids <- map fst <$> searchDocInDatabase cId (stemIt $ API.getRawQuery q)
pure $ Right $ DataOld ids
getDataText_Debug :: FlowCmdM env err m
=> DataOrigin
-> TermType Lang
-> API.Query
-> API.RawQuery
-> Maybe API.Limit
-> m ()
getDataText_Debug a l q li = do
......
......@@ -342,8 +342,8 @@ getCorpusPubmedAPIKey cId = do
|]
params = PGS.Only cId
updateCorpusPubmedAPIKey :: NodeId -> Maybe PUBMED.APIKey -> Cmd err Int64
updateCorpusPubmedAPIKey cId mAPIKey =
updateCorpusPubmedAPIKey :: NodeId -> PUBMED.APIKey -> Cmd err Int64
updateCorpusPubmedAPIKey cId apiKey =
execPGSQuery query params
where
query :: PGS.Query
......@@ -352,7 +352,7 @@ updateCorpusPubmedAPIKey cId mAPIKey =
SET hyperdata = hyperdata || ?
WHERE id = ?
|]
params = (encode $ object [ "pubmed_api_key" .= mAPIKey ], cId)
params = (encode $ object [ "pubmed_api_key" .= apiKey ], cId)
------------------------------------------------------------------------
-- TODO
-- currently this function removes the child relation
......
......@@ -37,6 +37,7 @@ extra-deps:
- HSvm-0.1.1.3.22
- hsparql-0.3.8
- ghc-clippy-plugin-0.0.0.1
- boolexpr-0.2
#- git: https://gitlab.iscpif.fr/gargantext/haskell-gargantext-prelude.git
# commit: 791c2a7046a3760f8ae5fabdbd708f61caa63741
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment