Commit 772987e7 authored by Alfredo Di Napoli's avatar Alfredo Di Napoli

Initial support for document search by author

parent ba6ff613
# Optimising CI speed by using tips from https://blog.nimbleways.com/let-s-make-faster-gitlab-ci-cd-pipelines/ # Optimising CI speed by using tips from https://blog.nimbleways.com/let-s-make-faster-gitlab-ci-cd-pipelines/
image: adinapoli/gargantext:v2.2 image: adinapoli/gargantext:v2.2.2
variables: variables:
STACK_ROOT: "${CI_PROJECT_DIR}/.stack-root" STACK_ROOT: "${CI_PROJECT_DIR}/.stack-root"
...@@ -78,8 +78,9 @@ test: ...@@ -78,8 +78,9 @@ test:
chown -R test:test /root/.cache/cabal/logs/ chown -R test:test /root/.cache/cabal/logs/
chown -R test:test /root/.cache/cabal/packages/hackage.haskell.org/ chown -R test:test /root/.cache/cabal/packages/hackage.haskell.org/
cd /builds/gargantext/haskell-gargantext/devops/coreNLP; ./build.sh ls /builds/gargantext/devops/coreNLP/stanford-corenlp-current
cd /builds/gargantext/haskell-gargantext cp -R /builds/gargantext/devops/coreNLP/stanford-corenlp-current /build/gargantext/haskell-gargantext/devops/coreNLP/
chown -R test:test /build/gargantext/haskell-gargantext/devops/coreNLP/stanford-corenlp-current
nix-shell --run "chown -R test:test /root/.config/ && su -m test -c \"export PATH=$PATH:$TEST_NIX_PATH && cd /builds/gargantext/haskell-gargantext; $CABAL --store-dir=$CABAL_STORE_DIR v2-test --test-show-details=streaming --flags test-crypto --ghc-options='-O0 -fclear-plugins'\"" nix-shell --run "chown -R test:test /root/.config/ && su -m test -c \"export PATH=$PATH:$TEST_NIX_PATH && cd /builds/gargantext/haskell-gargantext; $CABAL --store-dir=$CABAL_STORE_DIR v2-test --test-show-details=streaming --flags test-crypto --ghc-options='-O0 -fclear-plugins'\""
chown -R root:root dist-newstyle/ chown -R root:root dist-newstyle/
......
...@@ -25,7 +25,7 @@ import Gargantext.Database.Admin.Config (userMaster, corpusMasterName) ...@@ -25,7 +25,7 @@ import Gargantext.Database.Admin.Config (userMaster, corpusMasterName)
import Gargantext.Database.Admin.Trigger.Init (initFirstTriggers, initLastTriggers) import Gargantext.Database.Admin.Trigger.Init (initFirstTriggers, initLastTriggers)
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataCorpus) import Gargantext.Database.Admin.Types.Hyperdata (HyperdataCorpus)
import Gargantext.Database.Admin.Types.Node import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Prelude (Cmd, ) import Gargantext.Database.Prelude (Cmd, DBCmd)
import Gargantext.Database.Query.Table.Node (getOrMkList) import Gargantext.Database.Query.Table.Node (getOrMkList)
import Gargantext.Database.Query.Table.User (insertNewUsers, ) import Gargantext.Database.Query.Table.User (insertNewUsers, )
import Gargantext.Prelude import Gargantext.Prelude
......
...@@ -11,7 +11,7 @@ STORE_DIR="${1:-$DEFAULT_STORE}" ...@@ -11,7 +11,7 @@ STORE_DIR="${1:-$DEFAULT_STORE}"
# `expected_cabal_project_freeze_hash` with the # `expected_cabal_project_freeze_hash` with the
# `sha256sum` result calculated on the `cabal.project` and `cabal.project.freeze`. # `sha256sum` result calculated on the `cabal.project` and `cabal.project.freeze`.
# This ensures the files stay deterministic so that CI cache can kick in. # This ensures the files stay deterministic so that CI cache can kick in.
expected_cabal_project_hash="eb12c232115b3fffa1f81add7c83d921e5899c7712eddee6100ff8df7305088e" expected_cabal_project_hash="7b82fda55b0051a14b461ce3939e934da47e417794de69cb70973702c43e337e"
expected_cabal_project_freeze_hash="b7acfd12c970323ffe2c6684a13130db09d8ec9fa5676a976afed329f1ef3436" expected_cabal_project_freeze_hash="b7acfd12c970323ffe2c6684a13130db09d8ec9fa5676a976afed329f1ef3436"
cabal --store-dir=$STORE_DIR v2-update 'hackage.haskell.org,2023-06-24T21:28:46Z' cabal --store-dir=$STORE_DIR v2-update 'hackage.haskell.org,2023-06-24T21:28:46Z'
......
...@@ -7,6 +7,11 @@ with-compiler: ghc-8.10.7 ...@@ -7,6 +7,11 @@ with-compiler: ghc-8.10.7
packages: packages:
./ ./
source-repository-package
type: git
location: https://github.com/adinapoli/haskell-opaleye.git
tag: e9a29582ac66198dd2c2fdc3f8c8a4b1e6fbe004
source-repository-package source-repository-package
type: git type: git
location: https://github.com/alpmestan/accelerate.git location: https://github.com/alpmestan/accelerate.git
...@@ -56,11 +61,6 @@ source-repository-package ...@@ -56,11 +61,6 @@ source-repository-package
location: https://github.com/delanoe/patches-map location: https://github.com/delanoe/patches-map
tag: 76cae88f367976ff091e661ee69a5c3126b94694 tag: 76cae88f367976ff091e661ee69a5c3126b94694
source-repository-package
type: git
location: https://github.com/garganscript/haskell-opaleye.git
tag: a5693a2010e6d13f51cdc576fa1dc9985e79ee0e
source-repository-package source-repository-package
type: git type: git
location: https://gitlab.iscpif.fr/amestanogullari/accelerate-utility.git location: https://gitlab.iscpif.fr/amestanogullari/accelerate-utility.git
......
FROM ubuntu:jammy FROM ubuntu:jammy
## NOTA BENE: In order for this to be built successfully, you have to run ./devops/coreNLP/build.sh first.
ARG DEBIAN_FRONTEND=noninteractive ARG DEBIAN_FRONTEND=noninteractive
ARG GHC=8.10.7 ARG GHC=8.10.7
ARG STACK=2.7.3 ARG STACK=2.7.3
ARG CABAL=3.10.1.0 ARG CABAL=3.10.1.0
ARG CORENLP=4.5.4
ARG CORE ARG CORE
COPY ./shell.nix /builds/gargantext/shell.nix COPY ./shell.nix /builds/gargantext/shell.nix
COPY ./nix/pkgs.nix /builds/gargantext/nix/pkgs.nix COPY ./nix/pkgs.nix /builds/gargantext/nix/pkgs.nix
...@@ -15,8 +18,7 @@ COPY ./nix/overlays/Cabal-syntax-3.10.1.0.nix /builds/gargantext/nix/ov ...@@ -15,8 +18,7 @@ COPY ./nix/overlays/Cabal-syntax-3.10.1.0.nix /builds/gargantext/nix/ov
COPY ./nix/overlays/directory-1.3.7.0.nix /builds/gargantext/nix/overlays/directory-1.3.7.0.nix COPY ./nix/overlays/directory-1.3.7.0.nix /builds/gargantext/nix/overlays/directory-1.3.7.0.nix
COPY ./nix/overlays/hackage-security-0.6.2.3.nix /builds/gargantext/nix/overlays/hackage-security-0.6.2.3.nix COPY ./nix/overlays/hackage-security-0.6.2.3.nix /builds/gargantext/nix/overlays/hackage-security-0.6.2.3.nix
COPY ./nix/overlays/process-1.6.15.0.nix /builds/gargantext/nix/overlays/process-1.6.15.0.nix COPY ./nix/overlays/process-1.6.15.0.nix /builds/gargantext/nix/overlays/process-1.6.15.0.nix
COPY ./devops/coreNLP/build.sh /builds/gargantext/devops/coreNLP/build.sh COPY ./devops/coreNLP/stanford-corenlp-${CORENLP}/ /builds/gargantext/devops/coreNLP/stanford-corenlp-current/
COPY ./devops/coreNLP/startServer.sh /builds/gargantext/devops/coreNLP/startServer.sh
ENV TZ=Europe/Rome ENV TZ=Europe/Rome
RUN apt-get update && \ RUN apt-get update && \
...@@ -57,8 +59,6 @@ RUN gpg --batch --keyserver keys.openpgp.org --recv-keys 7D1E8AFD1D4A16D71FA ...@@ -57,8 +59,6 @@ RUN gpg --batch --keyserver keys.openpgp.org --recv-keys 7D1E8AFD1D4A16D71FA
gpg --batch --keyserver keyserver.ubuntu.com --recv-keys FE5AB6C91FEA597C3B31180B73EDE9E8CFBAEF01 gpg --batch --keyserver keyserver.ubuntu.com --recv-keys FE5AB6C91FEA597C3B31180B73EDE9E8CFBAEF01
SHELL ["/bin/bash", "-o", "pipefail", "-c"] SHELL ["/bin/bash", "-o", "pipefail", "-c"]
RUN cd /builds/gargantext/devops/coreNLP; ./build.sh; rm -rf *.zip
RUN set -o pipefail && \ RUN set -o pipefail && \
bash <(curl -L https://releases.nixos.org/nix/nix-2.15.0/install) --no-daemon && \ bash <(curl -L https://releases.nixos.org/nix/nix-2.15.0/install) --no-daemon && \
locale-gen en_US.UTF-8 && chown root -R /nix locale-gen en_US.UTF-8 && chown root -R /nix
......
...@@ -28,13 +28,15 @@ import Gargantext.Core.Types.Search ...@@ -28,13 +28,15 @@ import Gargantext.Core.Types.Search
import Gargantext.Core.Utils.Prefix (unPrefixSwagger) import Gargantext.Core.Utils.Prefix (unPrefixSwagger)
import Gargantext.Database.Action.Flow.Pairing (isPairedWith) import Gargantext.Database.Action.Flow.Pairing (isPairedWith)
import Gargantext.Database.Action.Search import Gargantext.Database.Action.Search
import Gargantext.Database.Admin.Types.Node import Gargantext.Database.Admin.Types.Node hiding (DEBUG)
import Gargantext.Database.Query.Facet import Gargantext.Database.Query.Facet
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.System.Logging
import Gargantext.Utils.Aeson (defaultTaggedObject) import Gargantext.Utils.Aeson (defaultTaggedObject)
import Servant import Servant
import Test.QuickCheck (elements) import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary import Test.QuickCheck.Arbitrary
import qualified Data.Text as T
----------------------------------------------------------------------- -----------------------------------------------------------------------
-- TODO-ACCESS: CanSearch? or is it part of CanGetNode -- TODO-ACCESS: CanSearch? or is it part of CanGetNode
...@@ -48,7 +50,8 @@ type API results = Summary "Search endpoint" ...@@ -48,7 +50,8 @@ type API results = Summary "Search endpoint"
----------------------------------------------------------------------- -----------------------------------------------------------------------
-- | Api search function -- | Api search function
api :: NodeId -> GargServer (API SearchResult) api :: NodeId -> GargServer (API SearchResult)
api nId (SearchQuery q SearchDoc) o l order = api nId (SearchQuery q SearchDoc) o l order = do
$(logLocM) DEBUG $ T.pack "New search started with query = " <> T.pack (show q)
SearchResult <$> SearchResultDoc SearchResult <$> SearchResultDoc
<$> map (toRow nId) <$> map (toRow nId)
<$> searchInCorpus nId False q o l order <$> searchInCorpus nId False q o l order
......
...@@ -61,7 +61,7 @@ searchDocInDatabase p t = runOpaQuery (queryDocInDatabase p t) ...@@ -61,7 +61,7 @@ searchDocInDatabase p t = runOpaQuery (queryDocInDatabase p t)
queryDocInDatabase :: ParentId -> Text -> O.Select (Column SqlInt4, Column SqlJsonb) queryDocInDatabase :: ParentId -> Text -> O.Select (Column SqlInt4, Column SqlJsonb)
queryDocInDatabase _p q = proc () -> do queryDocInDatabase _p q = proc () -> do
row <- queryNodeSearchTable -< () row <- queryNodeSearchTable -< ()
restrict -< (_ns_search row) @@ (sqlTSQuery (unpack q)) restrict -< (_ns_search row) @@ (sqlToTSQuery (unpack q))
restrict -< (_ns_typename row) .== (sqlInt4 $ toDBid NodeDocument) restrict -< (_ns_typename row) .== (sqlInt4 $ toDBid NodeDocument)
returnA -< (_ns_id row, _ns_hyperdata row) returnA -< (_ns_id row, _ns_hyperdata row)
...@@ -175,7 +175,7 @@ queryInCorpus cId t q = proc () -> do ...@@ -175,7 +175,7 @@ queryInCorpus cId t q = proc () -> do
else matchMaybe (view nc_category <$> nc) $ \case else matchMaybe (view nc_category <$> nc) $ \case
Nothing -> toFields False Nothing -> toFields False
Just c' -> c' .>= sqlInt4 1 Just c' -> c' .>= sqlInt4 1
restrict -< (c ^. cs_search) @@ sqlTSQuery (unpack q) restrict -< (c ^. cs_search) @@ sqlToTSQuery (unpack q)
restrict -< (c ^. cs_typename ) .== sqlInt4 (toDBid NodeDocument) restrict -< (c ^. cs_typename ) .== sqlInt4 (toDBid NodeDocument)
returnA -< FacetDoc { facetDoc_id = c^.cs_id returnA -< FacetDoc { facetDoc_id = c^.cs_id
, facetDoc_created = c^.cs_date , facetDoc_created = c^.cs_date
...@@ -231,7 +231,7 @@ selectContactViaDoc cId aId query = proc () -> do ...@@ -231,7 +231,7 @@ selectContactViaDoc cId aId query = proc () -> do
(contact, annuaire, _, corpus, doc) <- queryContactViaDoc -< () (contact, annuaire, _, corpus, doc) <- queryContactViaDoc -< ()
restrict -< matchMaybe (view cs_search <$> doc) $ \case restrict -< matchMaybe (view cs_search <$> doc) $ \case
Nothing -> toFields False Nothing -> toFields False
Just s -> s @@ sqlTSQuery (unpack query) Just s -> s @@ sqlToTSQuery (unpack query)
restrict -< (view cs_typename <$> doc) .=== justFields (sqlInt4 (toDBid NodeDocument)) restrict -< (view cs_typename <$> doc) .=== justFields (sqlInt4 (toDBid NodeDocument))
restrict -< (view nc_node_id <$> corpus) .=== justFields (pgNodeId cId) restrict -< (view nc_node_id <$> corpus) .=== justFields (pgNodeId cId)
restrict -< (view nc_node_id <$> annuaire) .=== justFields (pgNodeId aId) restrict -< (view nc_node_id <$> annuaire) .=== justFields (pgNodeId aId)
......
...@@ -38,10 +38,10 @@ triggerSearchUpdate = execPGSQuery query ( toDBid NodeDocument ...@@ -38,10 +38,10 @@ triggerSearchUpdate = execPGSQuery query ( toDBid NodeDocument
RETURNS trigger AS $$ RETURNS trigger AS $$
begin begin
IF new.typename = ? AND new.hyperdata @> '{"language_iso2":"EN"}' THEN IF new.typename = ? AND new.hyperdata @> '{"language_iso2":"EN"}' THEN
new.search := to_tsvector( 'english' , (new.hyperdata ->> 'title') || ' ' || (new.hyperdata ->> 'abstract')); new.search := to_tsvector( 'english' , new.hyperdata::jsonb );
ELSIF new.typename = ? AND new.hyperdata @> '{"language_iso2":"FR"}' THEN ELSIF new.typename = ? AND new.hyperdata @> '{"language_iso2":"FR"}' THEN
new.search := to_tsvector( 'french' , (new.hyperdata ->> 'title') || ' ' || (new.hyperdata ->> 'abstract')); new.search := to_tsvector( 'english' , new.hyperdata::jsonb );
ELSIF new.typename = ? THEN ELSIF new.typename = ? THEN
new.search := to_tsvector( 'french' , (new.hyperdata ->> 'prenom') new.search := to_tsvector( 'french' , (new.hyperdata ->> 'prenom')
...@@ -49,7 +49,7 @@ triggerSearchUpdate = execPGSQuery query ( toDBid NodeDocument ...@@ -49,7 +49,7 @@ triggerSearchUpdate = execPGSQuery query ( toDBid NodeDocument
|| ' ' || (new.hyperdata ->> 'fonction') || ' ' || (new.hyperdata ->> 'fonction')
); );
ELSE ELSE
new.search := to_tsvector( 'english' , (new.hyperdata ->> 'title') || ' ' || (new.hyperdata ->> 'abstract')); new.search := to_tsvector( 'english' , new.hyperdata::jsonb );
END IF; END IF;
return new; return new;
end end
......
...@@ -44,8 +44,8 @@ extra-deps: ...@@ -44,8 +44,8 @@ extra-deps:
- git: https://github.com/alpmestan/ekg-json.git - git: https://github.com/alpmestan/ekg-json.git
commit: fd7e5d7325939103cd87d0dc592faf644160341c commit: fd7e5d7325939103cd87d0dc592faf644160341c
# Databases libs # Databases libs
- git: https://github.com/garganscript/haskell-opaleye.git - git: https://github.com/adinapoli/haskell-opaleye.git
commit: a5693a2010e6d13f51cdc576fa1dc9985e79ee0e commit: e9a29582ac66198dd2c2fdc3f8c8a4b1e6fbe004
- git: https://github.com/robstewart57/rdf4h.git - git: https://github.com/robstewart57/rdf4h.git
commit: 4fd2edf30c141600ffad6d730cc4c1c08a6dbce4 commit: 4fd2edf30c141600ffad6d730cc4c1c08a6dbce4
# External Data API connectors # External Data API connectors
......
...@@ -127,6 +127,7 @@ tests = sequential $ aroundAll withTestDB $ describe "Database" $ do ...@@ -127,6 +127,7 @@ tests = sequential $ aroundAll withTestDB $ describe "Database" $ do
describe "Corpus search" $ do describe "Corpus search" $ do
it "Can stem query terms" stemmingTest it "Can stem query terms" stemmingTest
it "Can perform a simple search inside documents" corpusSearch01 it "Can perform a simple search inside documents" corpusSearch01
it "Can perform search by author in documents" corpusSearch02
data ExpectedActual a = data ExpectedActual a =
Expected a Expected a
......
...@@ -27,7 +27,7 @@ import Gargantext.Database.Admin.Config (userMaster) ...@@ -27,7 +27,7 @@ import Gargantext.Database.Admin.Config (userMaster)
exampleDocument_01 :: HyperdataDocument exampleDocument_01 :: HyperdataDocument
exampleDocument_01 = either error id $ parseEither parseJSON $ [aesonQQ| exampleDocument_01 = either error id $ parseEither parseJSON $ [aesonQQ|
{ "doi":"sdfds" { "doi":"01"
, "publication_day":6 , "publication_day":6
, "language_iso2":"EN" , "language_iso2":"EN"
, "publication_minute":0 , "publication_minute":0
...@@ -48,7 +48,7 @@ exampleDocument_01 = either error id $ parseEither parseJSON $ [aesonQQ| ...@@ -48,7 +48,7 @@ exampleDocument_01 = either error id $ parseEither parseJSON $ [aesonQQ|
exampleDocument_02 :: HyperdataDocument exampleDocument_02 :: HyperdataDocument
exampleDocument_02 = either error id $ parseEither parseJSON $ [aesonQQ| exampleDocument_02 = either error id $ parseEither parseJSON $ [aesonQQ|
{ "doi":"sdfds" { "doi":"02"
, "publication_day":6 , "publication_day":6
, "language_iso2":"EN" , "language_iso2":"EN"
, "publication_minute":0 , "publication_minute":0
...@@ -67,6 +67,24 @@ exampleDocument_02 = either error id $ parseEither parseJSON $ [aesonQQ| ...@@ -67,6 +67,24 @@ exampleDocument_02 = either error id $ parseEither parseJSON $ [aesonQQ|
} }
|] |]
exampleDocument_03 :: HyperdataDocument
exampleDocument_03 = either error id $ parseEither parseJSON $ [aesonQQ|
{
"bdd": "Arxiv"
, "doi": ""
, "url": "http://arxiv.org/pdf/1405.3072v2"
, "title": "Haskell for OCaml programmers"
, "source": ""
, "uniqId": "1405.3072v2"
, "authors": "Raphael Poss"
, "abstract": " This introduction to Haskell is written to optimize learning by programmers who already know OCaml. "
, "institutes": ""
, "language_iso2": "EN"
, "publication_date": "2014-05-13T09:10:32Z"
, "publication_year": 2014
}
|]
nlpServerConfig :: NLPServerConfig nlpServerConfig :: NLPServerConfig
nlpServerConfig = nlpServerConfig =
let uri = parseURI "http://localhost:9000" let uri = parseURI "http://localhost:9000"
...@@ -85,8 +103,8 @@ corpusAddDocuments env = do ...@@ -85,8 +103,8 @@ corpusAddDocuments env = do
(Just $ _node_hyperdata $ corpus) (Just $ _node_hyperdata $ corpus)
(Multi EN) (Multi EN)
corpusId corpusId
[exampleDocument_01, exampleDocument_02] [exampleDocument_01, exampleDocument_02, exampleDocument_03]
liftIO $ length ids `shouldBe` 2 liftIO $ length ids `shouldBe` 3
stemmingTest :: TestEnv -> Assertion stemmingTest :: TestEnv -> Assertion
stemmingTest _env = do stemmingTest _env = do
...@@ -97,7 +115,7 @@ corpusSearch01 :: TestEnv -> Assertion ...@@ -97,7 +115,7 @@ corpusSearch01 :: TestEnv -> Assertion
corpusSearch01 env = do corpusSearch01 env = do
flip runReaderT env $ runTestMonad $ do flip runReaderT env $ runTestMonad $ do
parentId <- getRootId (UserName "gargantua") parentId <- getRootId (UserName userMaster)
[corpus] <- getCorporaWithParentId parentId [corpus] <- getCorporaWithParentId parentId
results1 <- searchInCorpus (_node_id corpus) False ["mineral"] Nothing Nothing Nothing results1 <- searchInCorpus (_node_id corpus) False ["mineral"] Nothing Nothing Nothing
...@@ -105,3 +123,16 @@ corpusSearch01 env = do ...@@ -105,3 +123,16 @@ corpusSearch01 env = do
liftIO $ length results1 `shouldBe` 1 liftIO $ length results1 `shouldBe` 1
liftIO $ length results2 `shouldBe` 1 liftIO $ length results2 `shouldBe` 1
-- | Check that we support more complex queries
corpusSearch02 :: TestEnv -> Assertion
corpusSearch02 env = do
flip runReaderT env $ runTestMonad $ do
parentId <- getRootId (UserName userMaster)
[corpus] <- getCorporaWithParentId parentId
results1 <- searchInCorpus (_node_id corpus) False ["Raphael"] Nothing Nothing Nothing
liftIO $ do
length results1 `shouldBe` 1
...@@ -4,7 +4,6 @@ ...@@ -4,7 +4,6 @@
module Database.Operations.Types where module Database.Operations.Types where
import Control.Concurrent.Async
import Control.Exception import Control.Exception
import Control.Lens import Control.Lens
import Control.Monad.Except import Control.Monad.Except
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment