Commit 772987e7 authored by Alfredo Di Napoli's avatar Alfredo Di Napoli

Initial support for document search by author

parent ba6ff613
# Optimising CI speed by using tips from https://blog.nimbleways.com/let-s-make-faster-gitlab-ci-cd-pipelines/
image: adinapoli/gargantext:v2.2
image: adinapoli/gargantext:v2.2.2
variables:
STACK_ROOT: "${CI_PROJECT_DIR}/.stack-root"
......@@ -78,8 +78,9 @@ test:
chown -R test:test /root/.cache/cabal/logs/
chown -R test:test /root/.cache/cabal/packages/hackage.haskell.org/
cd /builds/gargantext/haskell-gargantext/devops/coreNLP; ./build.sh
cd /builds/gargantext/haskell-gargantext
ls /builds/gargantext/devops/coreNLP/stanford-corenlp-current
cp -R /builds/gargantext/devops/coreNLP/stanford-corenlp-current /build/gargantext/haskell-gargantext/devops/coreNLP/
chown -R test:test /build/gargantext/haskell-gargantext/devops/coreNLP/stanford-corenlp-current
nix-shell --run "chown -R test:test /root/.config/ && su -m test -c \"export PATH=$PATH:$TEST_NIX_PATH && cd /builds/gargantext/haskell-gargantext; $CABAL --store-dir=$CABAL_STORE_DIR v2-test --test-show-details=streaming --flags test-crypto --ghc-options='-O0 -fclear-plugins'\""
chown -R root:root dist-newstyle/
......
......@@ -25,7 +25,7 @@ import Gargantext.Database.Admin.Config (userMaster, corpusMasterName)
import Gargantext.Database.Admin.Trigger.Init (initFirstTriggers, initLastTriggers)
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataCorpus)
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Prelude (Cmd, )
import Gargantext.Database.Prelude (Cmd, DBCmd)
import Gargantext.Database.Query.Table.Node (getOrMkList)
import Gargantext.Database.Query.Table.User (insertNewUsers, )
import Gargantext.Prelude
......
......@@ -11,7 +11,7 @@ STORE_DIR="${1:-$DEFAULT_STORE}"
# `expected_cabal_project_freeze_hash` with the
# `sha256sum` result calculated on the `cabal.project` and `cabal.project.freeze`.
# This ensures the files stay deterministic so that CI cache can kick in.
expected_cabal_project_hash="eb12c232115b3fffa1f81add7c83d921e5899c7712eddee6100ff8df7305088e"
expected_cabal_project_hash="7b82fda55b0051a14b461ce3939e934da47e417794de69cb70973702c43e337e"
expected_cabal_project_freeze_hash="b7acfd12c970323ffe2c6684a13130db09d8ec9fa5676a976afed329f1ef3436"
cabal --store-dir=$STORE_DIR v2-update 'hackage.haskell.org,2023-06-24T21:28:46Z'
......
......@@ -7,6 +7,11 @@ with-compiler: ghc-8.10.7
packages:
./
source-repository-package
type: git
location: https://github.com/adinapoli/haskell-opaleye.git
tag: e9a29582ac66198dd2c2fdc3f8c8a4b1e6fbe004
source-repository-package
type: git
location: https://github.com/alpmestan/accelerate.git
......@@ -56,11 +61,6 @@ source-repository-package
location: https://github.com/delanoe/patches-map
tag: 76cae88f367976ff091e661ee69a5c3126b94694
source-repository-package
type: git
location: https://github.com/garganscript/haskell-opaleye.git
tag: a5693a2010e6d13f51cdc576fa1dc9985e79ee0e
source-repository-package
type: git
location: https://gitlab.iscpif.fr/amestanogullari/accelerate-utility.git
......
FROM ubuntu:jammy
## NOTA BENE: In order for this to be built successfully, you have to run ./devops/coreNLP/build.sh first.
ARG DEBIAN_FRONTEND=noninteractive
ARG GHC=8.10.7
ARG STACK=2.7.3
ARG CABAL=3.10.1.0
ARG CORENLP=4.5.4
ARG CORE
COPY ./shell.nix /builds/gargantext/shell.nix
COPY ./nix/pkgs.nix /builds/gargantext/nix/pkgs.nix
......@@ -15,8 +18,7 @@ COPY ./nix/overlays/Cabal-syntax-3.10.1.0.nix /builds/gargantext/nix/ov
COPY ./nix/overlays/directory-1.3.7.0.nix /builds/gargantext/nix/overlays/directory-1.3.7.0.nix
COPY ./nix/overlays/hackage-security-0.6.2.3.nix /builds/gargantext/nix/overlays/hackage-security-0.6.2.3.nix
COPY ./nix/overlays/process-1.6.15.0.nix /builds/gargantext/nix/overlays/process-1.6.15.0.nix
COPY ./devops/coreNLP/build.sh /builds/gargantext/devops/coreNLP/build.sh
COPY ./devops/coreNLP/startServer.sh /builds/gargantext/devops/coreNLP/startServer.sh
COPY ./devops/coreNLP/stanford-corenlp-${CORENLP}/ /builds/gargantext/devops/coreNLP/stanford-corenlp-current/
ENV TZ=Europe/Rome
RUN apt-get update && \
......@@ -57,8 +59,6 @@ RUN gpg --batch --keyserver keys.openpgp.org --recv-keys 7D1E8AFD1D4A16D71FA
gpg --batch --keyserver keyserver.ubuntu.com --recv-keys FE5AB6C91FEA597C3B31180B73EDE9E8CFBAEF01
SHELL ["/bin/bash", "-o", "pipefail", "-c"]
RUN cd /builds/gargantext/devops/coreNLP; ./build.sh; rm -rf *.zip
RUN set -o pipefail && \
bash <(curl -L https://releases.nixos.org/nix/nix-2.15.0/install) --no-daemon && \
locale-gen en_US.UTF-8 && chown root -R /nix
......
......@@ -28,13 +28,15 @@ import Gargantext.Core.Types.Search
import Gargantext.Core.Utils.Prefix (unPrefixSwagger)
import Gargantext.Database.Action.Flow.Pairing (isPairedWith)
import Gargantext.Database.Action.Search
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Admin.Types.Node hiding (DEBUG)
import Gargantext.Database.Query.Facet
import Gargantext.Prelude
import Gargantext.System.Logging
import Gargantext.Utils.Aeson (defaultTaggedObject)
import Servant
import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary
import qualified Data.Text as T
-----------------------------------------------------------------------
-- TODO-ACCESS: CanSearch? or is it part of CanGetNode
......@@ -48,7 +50,8 @@ type API results = Summary "Search endpoint"
-----------------------------------------------------------------------
-- | Api search function
api :: NodeId -> GargServer (API SearchResult)
api nId (SearchQuery q SearchDoc) o l order =
api nId (SearchQuery q SearchDoc) o l order = do
$(logLocM) DEBUG $ T.pack "New search started with query = " <> T.pack (show q)
SearchResult <$> SearchResultDoc
<$> map (toRow nId)
<$> searchInCorpus nId False q o l order
......
......@@ -61,7 +61,7 @@ searchDocInDatabase p t = runOpaQuery (queryDocInDatabase p t)
queryDocInDatabase :: ParentId -> Text -> O.Select (Column SqlInt4, Column SqlJsonb)
queryDocInDatabase _p q = proc () -> do
row <- queryNodeSearchTable -< ()
restrict -< (_ns_search row) @@ (sqlTSQuery (unpack q))
restrict -< (_ns_search row) @@ (sqlToTSQuery (unpack q))
restrict -< (_ns_typename row) .== (sqlInt4 $ toDBid NodeDocument)
returnA -< (_ns_id row, _ns_hyperdata row)
......@@ -175,7 +175,7 @@ queryInCorpus cId t q = proc () -> do
else matchMaybe (view nc_category <$> nc) $ \case
Nothing -> toFields False
Just c' -> c' .>= sqlInt4 1
restrict -< (c ^. cs_search) @@ sqlTSQuery (unpack q)
restrict -< (c ^. cs_search) @@ sqlToTSQuery (unpack q)
restrict -< (c ^. cs_typename ) .== sqlInt4 (toDBid NodeDocument)
returnA -< FacetDoc { facetDoc_id = c^.cs_id
, facetDoc_created = c^.cs_date
......@@ -231,7 +231,7 @@ selectContactViaDoc cId aId query = proc () -> do
(contact, annuaire, _, corpus, doc) <- queryContactViaDoc -< ()
restrict -< matchMaybe (view cs_search <$> doc) $ \case
Nothing -> toFields False
Just s -> s @@ sqlTSQuery (unpack query)
Just s -> s @@ sqlToTSQuery (unpack query)
restrict -< (view cs_typename <$> doc) .=== justFields (sqlInt4 (toDBid NodeDocument))
restrict -< (view nc_node_id <$> corpus) .=== justFields (pgNodeId cId)
restrict -< (view nc_node_id <$> annuaire) .=== justFields (pgNodeId aId)
......
......@@ -38,10 +38,10 @@ triggerSearchUpdate = execPGSQuery query ( toDBid NodeDocument
RETURNS trigger AS $$
begin
IF new.typename = ? AND new.hyperdata @> '{"language_iso2":"EN"}' THEN
new.search := to_tsvector( 'english' , (new.hyperdata ->> 'title') || ' ' || (new.hyperdata ->> 'abstract'));
new.search := to_tsvector( 'english' , new.hyperdata::jsonb );
ELSIF new.typename = ? AND new.hyperdata @> '{"language_iso2":"FR"}' THEN
new.search := to_tsvector( 'french' , (new.hyperdata ->> 'title') || ' ' || (new.hyperdata ->> 'abstract'));
new.search := to_tsvector( 'english' , new.hyperdata::jsonb );
ELSIF new.typename = ? THEN
new.search := to_tsvector( 'french' , (new.hyperdata ->> 'prenom')
......@@ -49,7 +49,7 @@ triggerSearchUpdate = execPGSQuery query ( toDBid NodeDocument
|| ' ' || (new.hyperdata ->> 'fonction')
);
ELSE
new.search := to_tsvector( 'english' , (new.hyperdata ->> 'title') || ' ' || (new.hyperdata ->> 'abstract'));
new.search := to_tsvector( 'english' , new.hyperdata::jsonb );
END IF;
return new;
end
......
......@@ -44,8 +44,8 @@ extra-deps:
- git: https://github.com/alpmestan/ekg-json.git
commit: fd7e5d7325939103cd87d0dc592faf644160341c
# Databases libs
- git: https://github.com/garganscript/haskell-opaleye.git
commit: a5693a2010e6d13f51cdc576fa1dc9985e79ee0e
- git: https://github.com/adinapoli/haskell-opaleye.git
commit: e9a29582ac66198dd2c2fdc3f8c8a4b1e6fbe004
- git: https://github.com/robstewart57/rdf4h.git
commit: 4fd2edf30c141600ffad6d730cc4c1c08a6dbce4
# External Data API connectors
......
......@@ -127,6 +127,7 @@ tests = sequential $ aroundAll withTestDB $ describe "Database" $ do
describe "Corpus search" $ do
it "Can stem query terms" stemmingTest
it "Can perform a simple search inside documents" corpusSearch01
it "Can perform search by author in documents" corpusSearch02
data ExpectedActual a =
Expected a
......
......@@ -27,7 +27,7 @@ import Gargantext.Database.Admin.Config (userMaster)
exampleDocument_01 :: HyperdataDocument
exampleDocument_01 = either error id $ parseEither parseJSON $ [aesonQQ|
{ "doi":"sdfds"
{ "doi":"01"
, "publication_day":6
, "language_iso2":"EN"
, "publication_minute":0
......@@ -48,7 +48,7 @@ exampleDocument_01 = either error id $ parseEither parseJSON $ [aesonQQ|
exampleDocument_02 :: HyperdataDocument
exampleDocument_02 = either error id $ parseEither parseJSON $ [aesonQQ|
{ "doi":"sdfds"
{ "doi":"02"
, "publication_day":6
, "language_iso2":"EN"
, "publication_minute":0
......@@ -67,6 +67,24 @@ exampleDocument_02 = either error id $ parseEither parseJSON $ [aesonQQ|
}
|]
exampleDocument_03 :: HyperdataDocument
exampleDocument_03 = either error id $ parseEither parseJSON $ [aesonQQ|
{
"bdd": "Arxiv"
, "doi": ""
, "url": "http://arxiv.org/pdf/1405.3072v2"
, "title": "Haskell for OCaml programmers"
, "source": ""
, "uniqId": "1405.3072v2"
, "authors": "Raphael Poss"
, "abstract": " This introduction to Haskell is written to optimize learning by programmers who already know OCaml. "
, "institutes": ""
, "language_iso2": "EN"
, "publication_date": "2014-05-13T09:10:32Z"
, "publication_year": 2014
}
|]
nlpServerConfig :: NLPServerConfig
nlpServerConfig =
let uri = parseURI "http://localhost:9000"
......@@ -85,8 +103,8 @@ corpusAddDocuments env = do
(Just $ _node_hyperdata $ corpus)
(Multi EN)
corpusId
[exampleDocument_01, exampleDocument_02]
liftIO $ length ids `shouldBe` 2
[exampleDocument_01, exampleDocument_02, exampleDocument_03]
liftIO $ length ids `shouldBe` 3
stemmingTest :: TestEnv -> Assertion
stemmingTest _env = do
......@@ -97,7 +115,7 @@ corpusSearch01 :: TestEnv -> Assertion
corpusSearch01 env = do
flip runReaderT env $ runTestMonad $ do
parentId <- getRootId (UserName "gargantua")
parentId <- getRootId (UserName userMaster)
[corpus] <- getCorporaWithParentId parentId
results1 <- searchInCorpus (_node_id corpus) False ["mineral"] Nothing Nothing Nothing
......@@ -105,3 +123,16 @@ corpusSearch01 env = do
liftIO $ length results1 `shouldBe` 1
liftIO $ length results2 `shouldBe` 1
-- | Check that we support more complex queries
corpusSearch02 :: TestEnv -> Assertion
corpusSearch02 env = do
flip runReaderT env $ runTestMonad $ do
parentId <- getRootId (UserName userMaster)
[corpus] <- getCorporaWithParentId parentId
results1 <- searchInCorpus (_node_id corpus) False ["Raphael"] Nothing Nothing Nothing
liftIO $ do
length results1 `shouldBe` 1
......@@ -4,7 +4,6 @@
module Database.Operations.Types where
import Control.Concurrent.Async
import Control.Exception
import Control.Lens
import Control.Monad.Except
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment