Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Christian Merten
haskell-gargantext
Commits
772987e7
Commit
772987e7
authored
Sep 04, 2023
by
Alfredo Di Napoli
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Initial support for document search by author
parent
ba6ff613
Changes
12
Hide whitespace changes
Inline
Side-by-side
Showing
12 changed files
with
65 additions
and
30 deletions
+65
-30
.gitlab-ci.yml
.gitlab-ci.yml
+4
-3
Main.hs
bin/gargantext-init/Main.hs
+1
-1
update-cabal-project
bin/update-cabal-project
+1
-1
cabal.project
cabal.project
+5
-5
Dockerfile
devops/docker/Dockerfile
+4
-4
Search.hs
src/Gargantext/API/Search.hs
+5
-2
Search.hs
src/Gargantext/Database/Action/Search.hs
+3
-3
Contexts.hs
src/Gargantext/Database/Admin/Trigger/Contexts.hs
+3
-3
stack.yaml
stack.yaml
+2
-2
Operations.hs
test/Database/Operations.hs
+1
-0
DocumentSearch.hs
test/Database/Operations/DocumentSearch.hs
+36
-5
Types.hs
test/Database/Operations/Types.hs
+0
-1
No files found.
.gitlab-ci.yml
View file @
772987e7
# Optimising CI speed by using tips from https://blog.nimbleways.com/let-s-make-faster-gitlab-ci-cd-pipelines/
image
:
adinapoli/gargantext:v2.2
image
:
adinapoli/gargantext:v2.2
.2
variables
:
STACK_ROOT
:
"
${CI_PROJECT_DIR}/.stack-root"
...
...
@@ -78,8 +78,9 @@ test:
chown -R test:test /root/.cache/cabal/logs/
chown -R test:test /root/.cache/cabal/packages/hackage.haskell.org/
cd /builds/gargantext/haskell-gargantext/devops/coreNLP; ./build.sh
cd /builds/gargantext/haskell-gargantext
ls /builds/gargantext/devops/coreNLP/stanford-corenlp-current
cp -R /builds/gargantext/devops/coreNLP/stanford-corenlp-current /build/gargantext/haskell-gargantext/devops/coreNLP/
chown -R test:test /build/gargantext/haskell-gargantext/devops/coreNLP/stanford-corenlp-current
nix-shell --run "chown -R test:test /root/.config/ && su -m test -c \"export PATH=$PATH:$TEST_NIX_PATH && cd /builds/gargantext/haskell-gargantext; $CABAL --store-dir=$CABAL_STORE_DIR v2-test --test-show-details=streaming --flags test-crypto --ghc-options='-O0 -fclear-plugins'\""
chown -R root:root dist-newstyle/
...
...
bin/gargantext-init/Main.hs
View file @
772987e7
...
...
@@ -25,7 +25,7 @@ import Gargantext.Database.Admin.Config (userMaster, corpusMasterName)
import
Gargantext.Database.Admin.Trigger.Init
(
initFirstTriggers
,
initLastTriggers
)
import
Gargantext.Database.Admin.Types.Hyperdata
(
HyperdataCorpus
)
import
Gargantext.Database.Admin.Types.Node
import
Gargantext.Database.Prelude
(
Cmd
,
)
import
Gargantext.Database.Prelude
(
Cmd
,
DBCmd
)
import
Gargantext.Database.Query.Table.Node
(
getOrMkList
)
import
Gargantext.Database.Query.Table.User
(
insertNewUsers
,
)
import
Gargantext.Prelude
...
...
bin/update-cabal-project
View file @
772987e7
...
...
@@ -11,7 +11,7 @@ STORE_DIR="${1:-$DEFAULT_STORE}"
# `expected_cabal_project_freeze_hash` with the
# `sha256sum` result calculated on the `cabal.project` and `cabal.project.freeze`.
# This ensures the files stay deterministic so that CI cache can kick in.
expected_cabal_project_hash
=
"
eb12c232115b3fffa1f81add7c83d921e5899c7712eddee6100ff8df7305088
e"
expected_cabal_project_hash
=
"
7b82fda55b0051a14b461ce3939e934da47e417794de69cb70973702c43e337
e"
expected_cabal_project_freeze_hash
=
"b7acfd12c970323ffe2c6684a13130db09d8ec9fa5676a976afed329f1ef3436"
cabal
--store-dir
=
$STORE_DIR
v2-update
'hackage.haskell.org,2023-06-24T21:28:46Z'
...
...
cabal.project
View file @
772987e7
...
...
@@ -7,6 +7,11 @@ with-compiler: ghc-8.10.7
packages
:
./
source
-
repository
-
package
type
:
git
location
:
https
://
github
.
com
/
adinapoli
/
haskell
-
opaleye
.
git
tag
:
e9a29582ac66198dd2c2fdc3f8c8a4b1e6fbe004
source
-
repository
-
package
type
:
git
location
:
https
://
github
.
com
/
alpmestan
/
accelerate
.
git
...
...
@@ -56,11 +61,6 @@ source-repository-package
location
:
https
://
github
.
com
/
delanoe
/
patches
-
map
tag
:
76
cae88f367976ff091e661ee69a5c3126b94694
source
-
repository
-
package
type
:
git
location
:
https
://
github
.
com
/
garganscript
/
haskell
-
opaleye
.
git
tag
:
a5693a2010e6d13f51cdc576fa1dc9985e79ee0e
source
-
repository
-
package
type
:
git
location
:
https
://
gitlab
.
iscpif
.
fr
/
amestanogullari
/
accelerate
-
utility
.
git
...
...
devops/docker/Dockerfile
View file @
772987e7
FROM
ubuntu:jammy
## NOTA BENE: In order for this to be built successfully, you have to run ./devops/coreNLP/build.sh first.
ARG
DEBIAN_FRONTEND=noninteractive
ARG
GHC=8.10.7
ARG
STACK=2.7.3
ARG
CABAL=3.10.1.0
ARG
CORENLP=4.5.4
ARG
CORE
COPY
./shell.nix /builds/gargantext/shell.nix
COPY
./nix/pkgs.nix /builds/gargantext/nix/pkgs.nix
...
...
@@ -15,8 +18,7 @@ COPY ./nix/overlays/Cabal-syntax-3.10.1.0.nix /builds/gargantext/nix/ov
COPY
./nix/overlays/directory-1.3.7.0.nix /builds/gargantext/nix/overlays/directory-1.3.7.0.nix
COPY
./nix/overlays/hackage-security-0.6.2.3.nix /builds/gargantext/nix/overlays/hackage-security-0.6.2.3.nix
COPY
./nix/overlays/process-1.6.15.0.nix /builds/gargantext/nix/overlays/process-1.6.15.0.nix
COPY
./devops/coreNLP/build.sh /builds/gargantext/devops/coreNLP/build.sh
COPY
./devops/coreNLP/startServer.sh /builds/gargantext/devops/coreNLP/startServer.sh
COPY
./devops/coreNLP/stanford-corenlp-${CORENLP}/ /builds/gargantext/devops/coreNLP/stanford-corenlp-current/
ENV
TZ=Europe/Rome
RUN
apt-get update
&&
\
...
...
@@ -57,8 +59,6 @@ RUN gpg --batch --keyserver keys.openpgp.org --recv-keys 7D1E8AFD1D4A16D71FA
gpg
--batch
--keyserver
keyserver.ubuntu.com
--recv-keys
FE5AB6C91FEA597C3B31180B73EDE9E8CFBAEF01
SHELL
["/bin/bash", "-o", "pipefail", "-c"]
RUN
cd
/builds/gargantext/devops/coreNLP
;
./build.sh
;
rm
-rf
*
.zip
RUN
set
-o
pipefail
&&
\
bash <
(
curl
-L
https://releases.nixos.org/nix/nix-2.15.0/install
)
--no-daemon
&&
\
locale-gen en_US.UTF-8
&&
chown
root
-R
/nix
...
...
src/Gargantext/API/Search.hs
View file @
772987e7
...
...
@@ -28,13 +28,15 @@ import Gargantext.Core.Types.Search
import
Gargantext.Core.Utils.Prefix
(
unPrefixSwagger
)
import
Gargantext.Database.Action.Flow.Pairing
(
isPairedWith
)
import
Gargantext.Database.Action.Search
import
Gargantext.Database.Admin.Types.Node
import
Gargantext.Database.Admin.Types.Node
hiding
(
DEBUG
)
import
Gargantext.Database.Query.Facet
import
Gargantext.Prelude
import
Gargantext.System.Logging
import
Gargantext.Utils.Aeson
(
defaultTaggedObject
)
import
Servant
import
Test.QuickCheck
(
elements
)
import
Test.QuickCheck.Arbitrary
import
qualified
Data.Text
as
T
-----------------------------------------------------------------------
-- TODO-ACCESS: CanSearch? or is it part of CanGetNode
...
...
@@ -48,7 +50,8 @@ type API results = Summary "Search endpoint"
-----------------------------------------------------------------------
-- | Api search function
api
::
NodeId
->
GargServer
(
API
SearchResult
)
api
nId
(
SearchQuery
q
SearchDoc
)
o
l
order
=
api
nId
(
SearchQuery
q
SearchDoc
)
o
l
order
=
do
$
(
logLocM
)
DEBUG
$
T
.
pack
"New search started with query = "
<>
T
.
pack
(
show
q
)
SearchResult
<$>
SearchResultDoc
<$>
map
(
toRow
nId
)
<$>
searchInCorpus
nId
False
q
o
l
order
...
...
src/Gargantext/Database/Action/Search.hs
View file @
772987e7
...
...
@@ -61,7 +61,7 @@ searchDocInDatabase p t = runOpaQuery (queryDocInDatabase p t)
queryDocInDatabase
::
ParentId
->
Text
->
O
.
Select
(
Column
SqlInt4
,
Column
SqlJsonb
)
queryDocInDatabase
_p
q
=
proc
()
->
do
row
<-
queryNodeSearchTable
-<
()
restrict
-<
(
_ns_search
row
)
@@
(
sqlTSQuery
(
unpack
q
))
restrict
-<
(
_ns_search
row
)
@@
(
sqlT
oT
SQuery
(
unpack
q
))
restrict
-<
(
_ns_typename
row
)
.==
(
sqlInt4
$
toDBid
NodeDocument
)
returnA
-<
(
_ns_id
row
,
_ns_hyperdata
row
)
...
...
@@ -175,7 +175,7 @@ queryInCorpus cId t q = proc () -> do
else
matchMaybe
(
view
nc_category
<$>
nc
)
$
\
case
Nothing
->
toFields
False
Just
c'
->
c'
.>=
sqlInt4
1
restrict
-<
(
c
^.
cs_search
)
@@
sqlTSQuery
(
unpack
q
)
restrict
-<
(
c
^.
cs_search
)
@@
sqlT
oT
SQuery
(
unpack
q
)
restrict
-<
(
c
^.
cs_typename
)
.==
sqlInt4
(
toDBid
NodeDocument
)
returnA
-<
FacetDoc
{
facetDoc_id
=
c
^.
cs_id
,
facetDoc_created
=
c
^.
cs_date
...
...
@@ -231,7 +231,7 @@ selectContactViaDoc cId aId query = proc () -> do
(
contact
,
annuaire
,
_
,
corpus
,
doc
)
<-
queryContactViaDoc
-<
()
restrict
-<
matchMaybe
(
view
cs_search
<$>
doc
)
$
\
case
Nothing
->
toFields
False
Just
s
->
s
@@
sqlTSQuery
(
unpack
query
)
Just
s
->
s
@@
sqlT
oT
SQuery
(
unpack
query
)
restrict
-<
(
view
cs_typename
<$>
doc
)
.===
justFields
(
sqlInt4
(
toDBid
NodeDocument
))
restrict
-<
(
view
nc_node_id
<$>
corpus
)
.===
justFields
(
pgNodeId
cId
)
restrict
-<
(
view
nc_node_id
<$>
annuaire
)
.===
justFields
(
pgNodeId
aId
)
...
...
src/Gargantext/Database/Admin/Trigger/Contexts.hs
View file @
772987e7
...
...
@@ -38,10 +38,10 @@ triggerSearchUpdate = execPGSQuery query ( toDBid NodeDocument
RETURNS trigger AS $$
begin
IF new.typename = ? AND new.hyperdata @> '{"language_iso2":"EN"}' THEN
new.search := to_tsvector( 'english' ,
(new.hyperdata ->> 'title') || ' ' || (new.hyperdata ->> 'abstract')
);
new.search := to_tsvector( 'english' ,
new.hyperdata::jsonb
);
ELSIF new.typename = ? AND new.hyperdata @> '{"language_iso2":"FR"}' THEN
new.search := to_tsvector( '
french' , (new.hyperdata ->> 'title') || ' ' || (new.hyperdata ->> 'abstract')
);
new.search := to_tsvector( '
english' , new.hyperdata::jsonb
);
ELSIF new.typename = ? THEN
new.search := to_tsvector( 'french' , (new.hyperdata ->> 'prenom')
...
...
@@ -49,7 +49,7 @@ triggerSearchUpdate = execPGSQuery query ( toDBid NodeDocument
|| ' ' || (new.hyperdata ->> 'fonction')
);
ELSE
new.search := to_tsvector( 'english' ,
(new.hyperdata ->> 'title') || ' ' || (new.hyperdata ->> 'abstract')
);
new.search := to_tsvector( 'english' ,
new.hyperdata::jsonb
);
END IF;
return new;
end
...
...
stack.yaml
View file @
772987e7
...
...
@@ -44,8 +44,8 @@ extra-deps:
-
git
:
https://github.com/alpmestan/ekg-json.git
commit
:
fd7e5d7325939103cd87d0dc592faf644160341c
# Databases libs
-
git
:
https://github.com/
garganscript
/haskell-opaleye.git
commit
:
a5693a2010e6d13f51cdc576fa1dc9985e79ee0e
-
git
:
https://github.com/
adinapoli
/haskell-opaleye.git
commit
:
e9a29582ac66198dd2c2fdc3f8c8a4b1e6fbe004
-
git
:
https://github.com/robstewart57/rdf4h.git
commit
:
4fd2edf30c141600ffad6d730cc4c1c08a6dbce4
# External Data API connectors
...
...
test/Database/Operations.hs
View file @
772987e7
...
...
@@ -127,6 +127,7 @@ tests = sequential $ aroundAll withTestDB $ describe "Database" $ do
describe
"Corpus search"
$
do
it
"Can stem query terms"
stemmingTest
it
"Can perform a simple search inside documents"
corpusSearch01
it
"Can perform search by author in documents"
corpusSearch02
data
ExpectedActual
a
=
Expected
a
...
...
test/Database/Operations/DocumentSearch.hs
View file @
772987e7
...
...
@@ -27,7 +27,7 @@ import Gargantext.Database.Admin.Config (userMaster)
exampleDocument_01
::
HyperdataDocument
exampleDocument_01
=
either
error
id
$
parseEither
parseJSON
$
[
aesonQQ
|
{ "doi":"
sdfds
"
{ "doi":"
01
"
, "publication_day":6
, "language_iso2":"EN"
, "publication_minute":0
...
...
@@ -48,7 +48,7 @@ exampleDocument_01 = either error id $ parseEither parseJSON $ [aesonQQ|
exampleDocument_02
::
HyperdataDocument
exampleDocument_02
=
either
error
id
$
parseEither
parseJSON
$
[
aesonQQ
|
{ "doi":"
sdfds
"
{ "doi":"
02
"
, "publication_day":6
, "language_iso2":"EN"
, "publication_minute":0
...
...
@@ -67,6 +67,24 @@ exampleDocument_02 = either error id $ parseEither parseJSON $ [aesonQQ|
}
|]
exampleDocument_03
::
HyperdataDocument
exampleDocument_03
=
either
error
id
$
parseEither
parseJSON
$
[
aesonQQ
|
{
"bdd": "Arxiv"
, "doi": ""
, "url": "http://arxiv.org/pdf/1405.3072v2"
, "title": "Haskell for OCaml programmers"
, "source": ""
, "uniqId": "1405.3072v2"
, "authors": "Raphael Poss"
, "abstract": " This introduction to Haskell is written to optimize learning by programmers who already know OCaml. "
, "institutes": ""
, "language_iso2": "EN"
, "publication_date": "2014-05-13T09:10:32Z"
, "publication_year": 2014
}
|]
nlpServerConfig
::
NLPServerConfig
nlpServerConfig
=
let
uri
=
parseURI
"http://localhost:9000"
...
...
@@ -85,8 +103,8 @@ corpusAddDocuments env = do
(
Just
$
_node_hyperdata
$
corpus
)
(
Multi
EN
)
corpusId
[
exampleDocument_01
,
exampleDocument_02
]
liftIO
$
length
ids
`
shouldBe
`
2
[
exampleDocument_01
,
exampleDocument_02
,
exampleDocument_03
]
liftIO
$
length
ids
`
shouldBe
`
3
stemmingTest
::
TestEnv
->
Assertion
stemmingTest
_env
=
do
...
...
@@ -97,7 +115,7 @@ corpusSearch01 :: TestEnv -> Assertion
corpusSearch01
env
=
do
flip
runReaderT
env
$
runTestMonad
$
do
parentId
<-
getRootId
(
UserName
"gargantua"
)
parentId
<-
getRootId
(
UserName
userMaster
)
[
corpus
]
<-
getCorporaWithParentId
parentId
results1
<-
searchInCorpus
(
_node_id
corpus
)
False
[
"mineral"
]
Nothing
Nothing
Nothing
...
...
@@ -105,3 +123,16 @@ corpusSearch01 env = do
liftIO
$
length
results1
`
shouldBe
`
1
liftIO
$
length
results2
`
shouldBe
`
1
-- | Check that we support more complex queries
corpusSearch02
::
TestEnv
->
Assertion
corpusSearch02
env
=
do
flip
runReaderT
env
$
runTestMonad
$
do
parentId
<-
getRootId
(
UserName
userMaster
)
[
corpus
]
<-
getCorporaWithParentId
parentId
results1
<-
searchInCorpus
(
_node_id
corpus
)
False
[
"Raphael"
]
Nothing
Nothing
Nothing
liftIO
$
do
length
results1
`
shouldBe
`
1
test/Database/Operations/Types.hs
View file @
772987e7
...
...
@@ -4,7 +4,6 @@
module
Database.Operations.Types
where
import
Control.Concurrent.Async
import
Control.Exception
import
Control.Lens
import
Control.Monad.Except
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment