Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Christian Merten
haskell-gargantext
Commits
ee1b45b3
Commit
ee1b45b3
authored
Sep 23, 2022
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[FEAT] Backend NLP French tested
parent
91a63511
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
39 additions
and
34 deletions
+39
-34
backup
bin/backup
+1
-1
removeUnusedDocuments.sql
devops/postgres/tools/removeUnusedDocuments.sql
+8
-7
Core.hs
src/Gargantext/Core.hs
+4
-2
NodeStory.hs
src/Gargantext/Core/NodeStory.hs
+10
-6
Multi.hs
src/Gargantext/Core/Text/Terms/Multi.hs
+8
-8
PosTagging.hs
src/Gargantext/Core/Text/Terms/Multi/PosTagging.hs
+4
-6
Flow.hs
src/Gargantext/Database/Action/Flow.hs
+4
-4
No files found.
bin/backup
View file @
ee1b45b3
...
...
@@ -3,7 +3,7 @@
# 0 3 * * * pg_dump --dbname=$MYDB | gzip > ~/backup/db/$(date +%Y-%m-%d).psql.gz
if
[[
$1
==
""
||
$2
==
""
]]
then
echo
"USAGE : ./
psql
gargantext.ini backup_directory"
then
echo
"USAGE : ./
bin/backup
gargantext.ini backup_directory"
else
INIFILE
=
$1
...
...
devops/postgres/tools/removeUnusedDocuments.sql
View file @
ee1b45b3
WITH
repeated
AS
(
select
nn
.
node2_id
AS
id
,
count
(
*
)
AS
c
FROM
nodes_nodes
nn
GROUP
BY
nn
.
node2_id
(
select
nn
.
context_id
AS
id
,
count
(
*
)
AS
c
FROM
nodes_contexts
nn
GROUP
BY
nn
.
context_id
)
DELETE
FROM
nodes
n
DELETE
FROM
contexts
c
USING
repeated
r
WHERE
n
.
id
=
r
.
id
AND
r
.
c
<
=
1
AND
n
.
typename
=
4
c
.
id
=
r
.
id
AND
r
.
c
=
1
AND
c
.
typename
=
4
;
src/Gargantext/Core.hs
View file @
ee1b45b3
...
...
@@ -77,15 +77,17 @@ instance HasDBid Lang where
type
Form
=
Text
type
Lem
=
Text
------------------------------------------------------------------------
data
PosTagAlgo
=
CoreNLP
|
JohnSnowServer
data
PosTagAlgo
=
CoreNLP
|
JohnSnowServer
|
Spacy
deriving
(
Show
,
Read
,
Eq
,
Ord
,
Generic
)
instance
Hashable
PosTagAlgo
instance
HasDBid
PosTagAlgo
where
toDBid
CoreNLP
=
1
toDBid
CoreNLP
=
1
toDBid
JohnSnowServer
=
2
toDBid
Spacy
=
3
fromDBid
1
=
CoreNLP
fromDBid
2
=
JohnSnowServer
fromDBid
3
=
Spacy
fromDBid
_
=
panic
"HasDBid posTagAlgo : Not implemented"
src/Gargantext/Core/NodeStory.hs
View file @
ee1b45b3
...
...
@@ -254,9 +254,7 @@ makeLenses ''NodeStoryEnv
makeLenses
''
N
odeStory
makeLenses
''
A
rchive
-----------------------------------------
----------------------------------------------------------------------
data
NodeStoryPoly
nid
v
ngtid
ngid
nre
=
NodeStoryDB
{
node_id
::
nid
,
version
::
v
...
...
@@ -614,14 +612,20 @@ readNodeStoryEnv pool = do
pure
$
NodeStoryEnv
{
_nse_var
=
mvar
,
_nse_saver
=
saver
,
_nse_saver_immediate
=
saver_immediate
,
_nse_getter
=
nodeStoryVar
pool
(
Just
mvar
)
}
,
_nse_getter
=
nodeStoryVar
pool
(
Just
mvar
)
}
nodeStoryVar
::
Pool
PGS
.
Connection
->
Maybe
(
MVar
NodeListStory
)
->
[
NodeId
]
->
IO
(
MVar
NodeListStory
)
nodeStoryVar
::
Pool
PGS
.
Connection
->
Maybe
(
MVar
NodeListStory
)
->
[
NodeId
]
->
IO
(
MVar
NodeListStory
)
nodeStoryVar
pool
Nothing
nIds
=
do
state
<-
withResource
pool
$
\
c
->
nodeStoryIncs
c
Nothing
nIds
newMVar
state
nodeStoryVar
pool
(
Just
mv
)
nIds
=
do
_
<-
withResource
pool
$
\
c
->
modifyMVar_
mv
$
\
nsl
->
(
nodeStoryIncs
c
(
Just
nsl
)
nIds
)
_
<-
withResource
pool
$
\
c
->
modifyMVar_
mv
$
\
nsl
->
(
nodeStoryIncs
c
(
Just
nsl
)
nIds
)
pure
mv
-- Debounce is useful since it could delay the saving to some later
...
...
src/Gargantext/Core/Text/Terms/Multi.hs
View file @
ee1b45b3
...
...
@@ -29,6 +29,7 @@ import qualified Gargantext.Core.Text.Terms.Multi.Lang.Fr as Fr
import
Gargantext.Core.Text.Terms.Multi.RAKE
(
multiterms_rake
)
-- import qualified Gargantext.Utils.JohnSnowNLP as JohnSnow
import
qualified
Gargantext.Utils.SpacyNLP
as
SpacyNLP
...
...
@@ -36,15 +37,14 @@ import qualified Gargantext.Utils.SpacyNLP as SpacyNLP
type
NLP_API
=
Lang
->
Text
->
IO
PosSentences
-------------------------------------------------------------------
-- To be removed
multiterms
::
Lang
->
Text
->
IO
[
Terms
]
multiterms
=
multiterms'
tokenTag2terms
multiterms'
::
(
TokenTag
->
a
)
->
Lang
->
Text
->
IO
[
a
]
multiterms'
f
lang
txt
=
concat
<$>
map
(
map
f
)
<$>
map
(
filter
(
\
t
->
_my_token_pos
t
==
Just
NP
))
<$>
tokenTags
lang
txt
where
multiterms'
::
(
TokenTag
->
a
)
->
Lang
->
Text
->
IO
[
a
]
multiterms'
f
lang
txt
=
concat
<$>
map
(
map
f
)
<$>
map
(
filter
(
\
t
->
_my_token_pos
t
==
Just
NP
))
<$>
tokenTags
lang
txt
-------------------------------------------------------------------
tokenTag2terms
::
TokenTag
->
Terms
...
...
@@ -53,7 +53,7 @@ tokenTag2terms (TokenTag ws t _ _) = Terms ws t
tokenTags
::
Lang
->
Text
->
IO
[[
TokenTag
]]
tokenTags
EN
txt
=
tokenTagsWith
EN
txt
corenlp
tokenTags
FR
txt
=
tokenTagsWith
FR
txt
SpacyNLP
.
nlp
tokenTags
_
_
=
panic
"[G.C.T.T.Multi] NLP API not implemented yet"
tokenTags
l
_
=
panic
$
"[G.C.T.T.Multi] Lang NLP API not implemented yet "
<>
(
cs
$
show
l
)
tokenTagsWith
::
Lang
->
Text
->
NLP_API
->
IO
[[
TokenTag
]]
tokenTagsWith
lang
txt
nlp
=
map
(
groupTokens
lang
)
...
...
src/Gargantext/Core/Text/Terms/Multi/PosTagging.hs
View file @
ee1b45b3
...
...
@@ -35,6 +35,10 @@ import Gargantext.Core.Types
import
Gargantext.Prelude
import
Network.HTTP.Simple
-- import qualified Gargantext.Utils.SpacyNLP as SpacyNLP
------------------------------------------------------------------------
------------------------------------------------------------------------
tokens2tokensTags
::
[
Token
]
->
[
TokenTag
]
...
...
@@ -73,18 +77,12 @@ corenlp' :: ( FromJSON a
corenlp'
lang
txt
=
do
let
properties
=
case
lang
of
EN
->
"{
\"
annotators
\"
:
\"
tokenize,ssplit,pos,ner
\"
,
\"
outputFormat
\"
:
\"
json
\"
}"
-- FR -> "{\"annotators\": \"tokenize,ssplit,pos,ner\", \"outputFormat\": \"json\"}"
FR
->
"{
\"
annotators
\"
:
\"
tokenize,ssplit,pos,lemma,ner
\"
,
\"
parse.model
\"
:
\"
edu/stanford/nlp/models/lexparser/frenchFactored.ser.gz
\"
,
\"
pos.model
\"
:
\"
edu/stanford/nlp/models/pos-tagger/french/french.tagger
\"
,
\"
tokenize.language
\"
:
\"
fr
\"
,
\"
outputFormat
\"
:
\"
json
\"
}"
_
->
panic
$
pack
"not implemented yet"
url
<-
parseRequest
$
"POST http://localhost:9000/?properties="
<>
properties
let
request
=
setRequestBodyLBS
(
cs
txt
)
url
httpJSON
request
corenlpRaw
::
Lang
->
Text
->
IO
Value
corenlpRaw
lang
txt
=
do
response
<-
corenlp'
lang
txt
pure
(
getResponseBody
response
)
corenlp
::
Lang
->
Text
->
IO
PosSentences
corenlp
lang
txt
=
do
...
...
src/Gargantext/Database/Action/Flow.hs
View file @
ee1b45b3
...
...
@@ -318,9 +318,9 @@ flowCorpusUser l user corpusName ctype ids mfslw = do
<-
getOrMk_RootWithCorpus
(
UserName
userMaster
)
(
Left
""
)
ctype
--let gp = (GroupParams l 2 3 (StopSize 3))
-- Here the PosTagAlgo should be chosen according the Lang
let
gp
=
GroupWithPosTag
l
CoreNLP
HashMap
.
empty
ngs
<-
buildNgramsLists
user
userCorpusId
masterCorpusId
mfslw
gp
-- Here the PosTagAlgo should be chosen according t
o t
he Lang
let
gp
=
GroupWithPosTag
l
CoreNLP
HashMap
.
empty
ngs
<-
buildNgramsLists
user
userCorpusId
masterCorpusId
mfslw
gp
-- printDebug "flowCorpusUser:ngs" ngs
...
...
@@ -329,8 +329,8 @@ flowCorpusUser l user corpusName ctype ids mfslw = do
-- _ <- insertOccsUpdates userCorpusId mastListId
-- printDebug "userListId" userListId
-- User Graph Flow
_
<-
insertDefaultNodeIfNotExists
NodeDashboard
userCorpusId
userId
_
<-
insertDefaultNodeIfNotExists
NodeGraph
userCorpusId
userId
_
<-
insertDefaultNodeIfNotExists
NodeDashboard
userCorpusId
userId
--
_
<-
mkPhylo
userCorpusId
userId
-- Annuaire Flow
-- _ <- mkAnnuaire rootUserId userId
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment