Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
152
Issues
152
List
Board
Labels
Milestones
Merge Requests
9
Merge Requests
9
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
a43c33d0
Commit
a43c33d0
authored
May 04, 2022
by
qlobbe
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'dev-phylo' of
https://gitlab.iscpif.fr/gargantext/haskell-gargantext
into dev-phylo
parents
c77a8b8d
8873a848
Pipeline
#2789
failed with stage
in 99 minutes and 1 second
Changes
29
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
29 changed files
with
354 additions
and
162 deletions
+354
-162
CHANGELOG.md
CHANGELOG.md
+13
-0
Auth.hs
bin/gargantext-client/Auth.hs
+2
-0
Core.hs
bin/gargantext-client/Core.hs
+2
-0
Main.hs
bin/gargantext-client/Main.hs
+3
-3
Options.hs
bin/gargantext-client/Options.hs
+1
-0
Script.hs
bin/gargantext-client/Script.hs
+4
-4
Tracking.hs
bin/gargantext-client/Tracking.hs
+1
-0
Main.hs
bin/gargantext-import/Main.hs
+7
-3
create
devops/postgres/create
+6
-5
package.yaml
package.yaml
+38
-1
List.hs
src/Gargantext/API/Ngrams/List.hs
+52
-35
Contact.hs
src/Gargantext/API/Node/Contact.hs
+2
-1
New.hs
src/Gargantext/API/Node/Corpus/New.hs
+41
-25
DocumentUpload.hs
src/Gargantext/API/Node/DocumentUpload.hs
+16
-4
DocumentsFromWriteNodes.hs
src/Gargantext/API/Node/DocumentsFromWriteNodes.hs
+2
-1
Update.hs
src/Gargantext/API/Node/Update.hs
+4
-3
API.hs
src/Gargantext/Core/Text/Corpus/API.hs
+15
-9
Hal.hs
src/Gargantext/Core/Text/Corpus/API/Hal.hs
+13
-2
Pubmed.hs
src/Gargantext/Core/Text/Corpus/API/Pubmed.hs
+13
-6
Main.hs
src/Gargantext/Core/Types/Main.hs
+6
-2
Phylo.hs
src/Gargantext/Core/Viz/Phylo.hs
+8
-8
API.hs
src/Gargantext/Core/Viz/Phylo/API.hs
+13
-7
Tools.hs
src/Gargantext/Core/Viz/Phylo/API/Tools.hs
+0
-1
PhyloExport.hs
src/Gargantext/Core/Viz/Phylo/PhyloExport.hs
+1
-0
Flow.hs
src/Gargantext/Database/Action/Flow.hs
+60
-35
New.hs
src/Gargantext/Database/Action/User/New.hs
+14
-2
Prelude.hs
src/Gargantext/Database/Prelude.hs
+7
-3
Node.hs
src/Gargantext/Database/Query/Table/Node.hs
+8
-0
stack.yaml
stack.yaml
+2
-2
No files found.
CHANGELOG.md
View file @
a43c33d0
## Version 0.0.5.6.7
*
[
BACK
]
fix limit with MAX_DOCS_SCRAPERS
*
[
FEAT
]
Users Password Sugar function : in repl, runCmdReplEasy $ updateUsersPassword
[
"user@mail.com"
]
## Version 0.0.5.6.6
*
[
BACK
]
CSV List post and reindex after (for both CSV and JSON)
## Version 0.0.5.6.5
*
[
BACK
]
HAL parser with Conduit
## Version 0.0.5.6.4
*
[
FRONT
]
Forest Tooltip + Async progress bar fix
## Version 0.0.5.6.3
*
[
BACK
][
EXPORT
][
GEXF
]
node size
...
...
bin/gargantext-client/Auth.hs
View file @
a43c33d0
module
Auth
where
import
Prelude
import
Data.Maybe
import
Core
import
Options
...
...
bin/gargantext-client/Core.hs
View file @
a43c33d0
module
Core
(
problem
,
whenVerbose
)
where
import
Prelude
import
Options
import
Options.Generic
...
...
bin/gargantext-client/Main.hs
View file @
a43c33d0
...
...
@@ -2,11 +2,11 @@ module Main where
import
Control.Monad
import
Network.HTTP.Client
import
Options.Generic
import
Servant.Client
import
Options
import
Options.Generic
import
Prelude
import
Script
(
script
)
import
Servant.Client
main
::
IO
()
main
=
do
...
...
bin/gargantext-client/Options.hs
View file @
a43c33d0
{-# LANGUAGE TypeOperators #-}
module
Options
where
import
Prelude
import
Options.Generic
-- | Some general options to be specified on the command line.
...
...
bin/gargantext-client/Script.hs
View file @
a43c33d0
module
Script
(
script
)
where
import
Control.Monad.IO.Class
import
Gargantext.API.Client
import
Servant.Client
import
Auth
import
Control.Monad.IO.Class
import
Core
import
Gargantext.API.Client
import
Options
import
Prelude
import
Servant.Client
import
Tracking
-- | An example script. Tweak, rebuild and re-run the executable to see the
...
...
bin/gargantext-client/Tracking.hs
View file @
a43c33d0
...
...
@@ -8,6 +8,7 @@ module Tracking
import
Core
import
Options
import
Prelude
import
Control.Monad.IO.Class
import
Data.List
(
intersperse
)
...
...
bin/gargantext-import/Main.hs
View file @
a43c33d0
...
...
@@ -17,10 +17,11 @@ module Main where
import
Control.Exception
(
finally
)
import
Data.Either
import
Data.Maybe
(
Maybe
(
..
))
import
Data.Text
(
Text
)
import
Prelude
(
read
)
import
System.Environment
(
getArgs
)
import
qualified
Data.Text
as
Text
import
Text.Read
(
readMaybe
)
import
Gargantext.API.Dev
(
withDevEnv
,
runCmdDev
)
import
Gargantext.API.Admin.EnvTypes
(
DevEnv
(
..
))
...
...
@@ -46,11 +47,14 @@ main = do
--tt = (Unsupervised EN 6 0 Nothing)
tt
=
(
Multi
EN
)
format
=
CsvGargV3
-- CsvHal --WOS
limit'
=
case
(
readMaybe
limit
::
Maybe
Int
)
of
Nothing
->
panic
$
"Cannot read limit: "
<>
(
Text
.
pack
limit
)
Just
l
->
l
corpus
::
forall
m
.
FlowCmdM
DevEnv
GargError
m
=>
m
CorpusId
corpus
=
flowCorpusFile
(
UserName
$
cs
user
)
(
Left
(
cs
name
::
Text
))
(
read
limit
::
Int
)
tt
format
corpusPath
Nothing
(
\
_
->
pure
()
)
corpus
=
flowCorpusFile
(
UserName
$
cs
user
)
(
Left
(
cs
name
::
Text
))
limit'
tt
format
corpusPath
Nothing
(
\
_
->
pure
()
)
corpusCsvHal
::
forall
m
.
FlowCmdM
DevEnv
GargError
m
=>
m
CorpusId
corpusCsvHal
=
flowCorpusFile
(
UserName
$
cs
user
)
(
Left
(
cs
name
::
Text
))
(
read
limit
::
Int
)
tt
CsvHal
corpusPath
Nothing
(
\
_
->
pure
()
)
corpusCsvHal
=
flowCorpusFile
(
UserName
$
cs
user
)
(
Left
(
cs
name
::
Text
))
limit'
tt
CsvHal
corpusPath
Nothing
(
\
_
->
pure
()
)
annuaire
::
forall
m
.
FlowCmdM
DevEnv
GargError
m
=>
m
CorpusId
annuaire
=
flowAnnuaire
(
UserName
$
cs
user
)
(
Left
"Annuaire"
)
(
Multi
EN
)
corpusPath
(
\
_
->
pure
()
)
...
...
devops/postgres/create
View file @
a43c33d0
...
...
@@ -16,16 +16,17 @@ HOST=$(getter "DB_HOST")
PORT
=
$(
getter
"DB_PORT"
)
#psql -c "CREATE USER \"${USER}\""
#psql -c "ALTER USER \"${USER}\" with PASSWORD '${PW}'"
psql
-c
"CREATE USER
\"
${
USER
}
\"
"
psql
-c
"ALTER USER
\"
${
USER
}
\"
with PASSWORD '
${
PW
}
'"
psql
-c
"DROP DATABASE IF EXISTS
\"
${
NAME
}
\"
"
createdb
"
${
NAME
}
"
psql
"
${
NAME
}
"
< extensions.sql
#psql "${NAME}" < schema.sql
# if new
#psql "${NAME}" < schema.sql
#../../bin/psql ../../gargantext.ini < gargandb.dump
psql
${
NAME
}
<
$2
psql
-c
"ALTER DATABASE
\"
${
NAME
}
\"
OWNER to
\"
${
USER
}
\"
"
...
...
package.yaml
View file @
a43c33d0
name
:
gargantext
version
:
'
0.0.5.6.
3
'
version
:
'
0.0.5.6.
7
'
synopsis
:
Search, map, share
description
:
Please see README.md
category
:
Data
...
...
@@ -57,6 +57,7 @@ library:
-
Gargantext.API.Ngrams.Prelude
-
Gargantext.API.Admin.Settings
-
Gargantext.API.Admin.EnvTypes
-
Gargantext.API.Admin.Auth.Types
-
Gargantext.API.Admin.Types
-
Gargantext.API.Prelude
-
Gargantext.API.Client
...
...
@@ -327,6 +328,42 @@ executables:
-
unordered-containers
-
full-text-search
gargantext-client
:
main
:
Main.hs
source-dirs
:
bin/gargantext-client
ghc-options
:
-
-Wall
-
-threaded
-
-rtsopts
-
-with-rtsopts=-N
-
-O2
-
-Wmissing-signatures
default-extensions
:
-
DataKinds
-
DeriveGeneric
-
FlexibleContexts
-
FlexibleInstances
-
GeneralizedNewtypeDeriving
-
MultiParamTypeClasses
-
NamedFieldPuns
-
NoImplicitPrelude
-
OverloadedStrings
-
RankNTypes
-
RecordWildCards
dependencies
:
-
base
-
extra
-
servant
-
text
-
optparse-generic
-
exceptions
-
servant-client
-
servant-auth-client
-
gargantext
-
ekg-json
-
http-client
gargantext-phylo
:
main
:
Main.hs
source-dirs
:
bin/gargantext-phylo
...
...
src/Gargantext/API/Ngrams/List.hs
View file @
a43c33d0
...
...
@@ -20,16 +20,16 @@ import Data.Aeson
import
Data.Either
(
Either
(
..
))
import
Data.HashMap.Strict
(
HashMap
)
import
Data.Map
(
Map
,
toList
)
import
Data.Maybe
(
catMaybes
)
import
Data.Maybe
(
catMaybes
,
fromMaybe
)
import
Data.Set
(
Set
)
import
Data.Text
(
Text
,
concat
,
pack
)
import
Data.Text
(
Text
,
concat
,
pack
,
splitOn
)
import
Data.Vector
(
Vector
)
import
Gargantext.API.Admin.Orchestrator.Types
import
Gargantext.API.Ngrams
(
setListNgrams
)
import
Gargantext.API.Ngrams.List.Types
import
Gargantext.API.Ngrams.Prelude
(
getNgramsList
)
import
Gargantext.API.Ngrams.Tools
(
getTermsWith
)
import
Gargantext.API.Ngrams.Types
import
Gargantext.API.Ngrams.Prelude
(
getNgramsList
)
import
Gargantext.API.Ngrams.List.Types
import
Gargantext.API.Prelude
(
GargServer
)
import
Gargantext.Core.NodeStory
import
Gargantext.Core.Text.Terms
(
ExtractedNgrams
(
..
))
...
...
@@ -40,9 +40,11 @@ import Gargantext.Database.Action.Flow.Types (FlowCmdM)
import
Gargantext.Database.Action.Metrics.NgramsByContext
(
getOccByNgramsOnlyFast'
)
import
Gargantext.Database.Admin.Types.Hyperdata.Document
import
Gargantext.Database.Admin.Types.Node
import
Gargantext.Database.Query.Table.Node
(
getNode
)
import
Gargantext.Database.Query.Table.NodeContext
(
selectDocNodes
)
import
Gargantext.Database.Schema.Ngrams
import
Gargantext.Database.Schema.Context
import
Gargantext.Database.Schema.Ngrams
import
Gargantext.Database.Schema.Node
(
_node_parent_id
)
import
Gargantext.Database.Types
(
Indexed
(
..
))
import
Gargantext.Prelude
import
Network.HTTP.Media
((
//
),
(
/:
))
...
...
@@ -53,22 +55,12 @@ import qualified Data.Csv as Csv
import
qualified
Data.HashMap.Strict
as
HashMap
import
qualified
Data.List
as
List
import
qualified
Data.Map
as
Map
import
qualified
Data.Set
as
Set
import
qualified
Data.Text
as
Text
import
qualified
Data.Vector
as
Vec
import
qualified
Prelude
as
Prelude
import
qualified
Protolude
as
P
------------------------------------------------------------------------
-- | TODO refactor
{-
type API = Get '[JSON, HTML] (Headers '[Header "Content-Disposition" Text] NgramsList)
-- :<|> ReqBody '[JSON] NgramsList :> Post '[JSON] Bool
:<|> PostAPI
:<|> CSVPostAPI
api :: ListId -> GargServer API
api l = get l :<|> postAsync l :<|> csvPostAsync l
-}
----------------------
type
GETAPI
=
Summary
"Get List"
:>
"lists"
:>
Capture
"listId"
ListId
...
...
@@ -122,11 +114,11 @@ get lId = do
------------------------------------------------------------------------
-- TODO : purge list
-- TODO talk
po
st
::
FlowCmdM
env
err
m
setLi
st
::
FlowCmdM
env
err
m
=>
ListId
->
NgramsList
->
m
Bool
po
st
l
m
=
do
setLi
st
l
m
=
do
-- TODO check with Version for optim
printDebug
"New list as file"
l
_
<-
mapM
(
\
(
nt
,
Versioned
_v
ns
)
->
setListNgrams
l
nt
ns
)
$
toList
m
...
...
@@ -149,13 +141,13 @@ reIndexWith cId lId nt lts = do
<$>
map
(
\
(
k
,
vs
)
->
k
:
vs
)
<$>
HashMap
.
toList
<$>
getTermsWith
identity
[
lId
]
nt
lts
printDebug
"ts"
ts
--
printDebug "ts" ts
-- Taking the ngrams with 0 occurrences only (orphans)
occs
<-
getOccByNgramsOnlyFast'
cId
lId
nt
ts
printDebug
"occs"
occs
--
printDebug "occs" occs
let
orphans
=
List
.
concat
$
map
(
\
t
->
case
HashMap
.
lookup
t
occs
of
...
...
@@ -163,11 +155,11 @@ reIndexWith cId lId nt lts = do
Just
n
->
if
n
<=
1
then
[
t
]
else
[ ]
)
ts
printDebug
"orphans"
orphans
--
printDebug "orphans" orphans
-- Get all documents of the corpus
docs
<-
selectDocNodes
cId
printDebug
"docs length"
(
List
.
length
docs
)
--
printDebug "docs length" (List.length docs)
-- Checking Text documents where orphans match
-- TODO Tests here
...
...
@@ -184,7 +176,7 @@ reIndexWith cId lId nt lts = do
(
List
.
cycle
[
Map
.
fromList
$
[(
nt
,
Map
.
singleton
(
doc
^.
context_id
)
1
)]])
)
docs
printDebug
"ngramsByDoc"
ngramsByDoc
--
printDebug "ngramsByDoc" ngramsByDoc
-- Saving the indexation in database
_
<-
mapM
(
saveDocNgramsWith
lId
)
ngramsByDoc
...
...
@@ -210,7 +202,7 @@ postAsync lId =
JobFunction
(
\
f
log'
->
let
log''
x
=
do
printDebug
"postAsync ListId"
x
--
printDebug "postAsync ListId" x
liftBase
$
log'
x
in
postAsync'
lId
f
log''
)
...
...
@@ -223,20 +215,32 @@ postAsync' l (WithFile _ m _) logStatus = do
logStatus
JobLog
{
_scst_succeeded
=
Just
0
,
_scst_failed
=
Just
0
,
_scst_remaining
=
Just
1
,
_scst_remaining
=
Just
2
,
_scst_events
=
Just
[]
}
printDebug
"New list as file"
l
_
<-
po
st
l
m
_
<-
setLi
st
l
m
-- printDebug "Done" r
pure
JobLog
{
_scst_succeeded
=
Just
1
logStatus
JobLog
{
_scst_succeeded
=
Just
1
,
_scst_failed
=
Just
0
,
_scst_remaining
=
Just
1
,
_scst_events
=
Just
[]
}
corpus_node
<-
getNode
l
-- (Proxy :: Proxy HyperdataList)
let
corpus_id
=
fromMaybe
(
panic
""
)
(
_node_parent_id
corpus_node
)
_
<-
reIndexWith
corpus_id
l
NgramsTerms
(
Set
.
fromList
[
MapTerm
,
CandidateTerm
])
pure
JobLog
{
_scst_succeeded
=
Just
2
,
_scst_failed
=
Just
0
,
_scst_remaining
=
Just
0
,
_scst_events
=
Just
[]
}
------------------------------------------------------------------------
------------------------------------------------------------------------
type
CSVPostAPI
=
Summary
"Update List (legacy v3 CSV)"
:>
"csv"
:>
"add"
...
...
@@ -257,12 +261,22 @@ readCsvText t = case eDec of
parseCsvData
::
[(
Text
,
Text
,
Text
)]
->
Map
NgramsTerm
NgramsRepoElement
parseCsvData
lst
=
Map
.
fromList
$
conv
<$>
lst
where
conv
(
_status
,
label
,
_
forms
)
=
conv
(
status
,
label
,
forms
)
=
(
NgramsTerm
label
,
NgramsRepoElement
{
_nre_size
=
1
,
_nre_list
=
CandidateTerm
,
_nre_list
=
case
status
==
"map"
of
True
->
MapTerm
False
->
case
status
==
"main"
of
True
->
CandidateTerm
False
->
StopTerm
,
_nre_root
=
Nothing
,
_nre_parent
=
Nothing
,
_nre_children
=
MSet
Map
.
empty
})
,
_nre_children
=
MSet
$
Map
.
fromList
$
map
(
\
form
->
(
NgramsTerm
form
,
()
))
$
filter
(
/=
""
)
$
splitOn
"|&|"
forms
}
)
csvPost
::
FlowCmdM
env
err
m
=>
ListId
...
...
@@ -277,11 +291,14 @@ csvPost l m = do
--printDebug "[csvPost] lst" lst
printDebug
"[csvPost] p"
p
_
<-
setListNgrams
l
NgramsTerms
p
pure
True
------------------------------------------------------------------------
printDebug
"ReIndexing List"
l
corpus_node
<-
getNode
l
-- (Proxy :: Proxy HyperdataList)
let
corpus_id
=
fromMaybe
(
panic
""
)
(
_node_parent_id
corpus_node
)
_
<-
reIndexWith
corpus_id
l
NgramsTerms
(
Set
.
fromList
[
MapTerm
,
CandidateTerm
])
pure
True
------------------------------------------------------------------------
csvPostAsync
::
GargServer
CSVAPI
csvPostAsync
lId
=
serveJobsAPI
$
...
...
src/Gargantext/API/Node/Contact.hs
View file @
a43c33d0
...
...
@@ -22,6 +22,7 @@ Portability : POSIX
module
Gargantext.API.Node.Contact
where
import
Conduit
import
Data.Aeson
import
Data.Either
(
Either
(
Right
))
import
Data.Maybe
(
Maybe
(
..
))
...
...
@@ -93,7 +94,7 @@ addContact u nId (AddContactParams fn ln) logStatus = do
,
_scst_remaining
=
Just
1
,
_scst_events
=
Just
[]
}
_
<-
flow
(
Nothing
::
Maybe
HyperdataAnnuaire
)
u
(
Right
[
nId
])
(
Multi
EN
)
Nothing
[[
hyperdataContact
fn
ln
]]
logStatus
_
<-
flow
(
Nothing
::
Maybe
HyperdataAnnuaire
)
u
(
Right
[
nId
])
(
Multi
EN
)
Nothing
(
Just
1
,
yield
$
hyperdataContact
fn
ln
)
logStatus
pure
JobLog
{
_scst_succeeded
=
Just
2
,
_scst_failed
=
Just
0
...
...
src/Gargantext/API/Node/Corpus/New.hs
View file @
a43c33d0
...
...
@@ -18,6 +18,8 @@ New corpus means either:
module
Gargantext.API.Node.Corpus.New
where
import
Conduit
import
Control.Lens
hiding
(
elements
,
Empty
)
import
Data.Aeson
import
Data.Aeson.TH
(
deriveJSON
)
...
...
@@ -39,7 +41,7 @@ import Gargantext.Prelude
import
Gargantext.API.Admin.Orchestrator.Types
(
JobLog
(
..
),
AsyncJobs
,
ScraperEvent
(
..
),
scst_events
)
import
Gargantext.API.Admin.Types
(
HasSettings
)
import
Gargantext.API.Job
(
jobLogSuccess
,
jobLogFailTotal
,
jobLogFailTotalWithMessage
)
import
Gargantext.API.Job
(
addEvent
,
jobLogSuccess
,
jobLogFailTotal
,
jobLogFailTotalWithMessage
)
import
Gargantext.API.Node.Corpus.New.File
import
Gargantext.API.Node.Corpus.Searx
import
Gargantext.API.Node.Corpus.Types
...
...
@@ -213,24 +215,37 @@ addToCorpusWithQuery user cid (WithQuery { _wq_query = q
-- TODO if cid is folder -> create Corpus
-- if cid is corpus -> add to corpus
-- if cid is root -> create corpus in Private
txts
<-
mapM
(
\
db
->
getDataText
db
(
Multi
l
)
q
maybeLimit
)
[
database2origin
dbs
]
logStatus
JobLog
{
_scst_succeeded
=
Just
2
,
_scst_failed
=
Just
0
,
_scst_remaining
=
Just
$
1
+
length
txts
,
_scst_events
=
Just
[]
}
cids
<-
mapM
(
\
txt
->
flowDataText
user
txt
(
Multi
l
)
cid
Nothing
logStatus
)
txts
printDebug
"corpus id"
cids
printDebug
"sending email"
(
"xxxxxxxxxxxxxxxxxxxxx"
::
Text
)
sendMail
user
-- TODO ...
pure
JobLog
{
_scst_succeeded
=
Just
3
,
_scst_failed
=
Just
0
,
_scst_remaining
=
Just
0
,
_scst_events
=
Just
[]
}
eTxts
<-
mapM
(
\
db
->
getDataText
db
(
Multi
l
)
q
maybeLimit
)
[
database2origin
dbs
]
let
lTxts
=
lefts
eTxts
case
lTxts
of
[]
->
do
let
txts
=
rights
eTxts
-- TODO Sum lenghts of each txt elements
logStatus
$
JobLog
{
_scst_succeeded
=
Just
2
,
_scst_failed
=
Just
0
,
_scst_remaining
=
Just
$
1
+
length
txts
,
_scst_events
=
Just
[]
}
cids
<-
mapM
(
\
txt
->
do
flowDataText
user
txt
(
Multi
l
)
cid
Nothing
logStatus
)
txts
printDebug
"corpus id"
cids
printDebug
"sending email"
(
"xxxxxxxxxxxxxxxxxxxxx"
::
Text
)
sendMail
user
-- TODO ...
pure
JobLog
{
_scst_succeeded
=
Just
3
,
_scst_failed
=
Just
0
,
_scst_remaining
=
Just
0
,
_scst_events
=
Just
[]
}
(
err
:
_
)
->
do
pure
$
addEvent
"ERROR"
(
T
.
pack
$
show
err
)
$
JobLog
{
_scst_succeeded
=
Just
2
,
_scst_failed
=
Just
1
,
_scst_remaining
=
Just
0
,
_scst_events
=
Just
[]
}
type
AddWithForm
=
Summary
"Add with FormUrlEncoded to corpus endpoint"
...
...
@@ -268,15 +283,16 @@ addToCorpusWithForm user cid (NewWithForm ft d l _n) logStatus jobLog = do
_
->
cs
d
eDocs
<-
liftBase
$
parse
data
'
case
eDocs
of
Right
docs
'
->
do
Right
docs
->
do
-- TODO Add progress (jobStatus) update for docs - this is a
-- long action
limit'
<-
view
$
hasConfig
.
gc_max_docs_parsers
let
limit
=
fromIntegral
limit'
if
length
docs
'
>
limit
then
do
printDebug
"[addToCorpusWithForm] number of docs exceeds the limit"
(
show
$
length
docs
'
)
if
length
docs
>
limit
then
do
printDebug
"[addToCorpusWithForm] number of docs exceeds the limit"
(
show
$
length
docs
)
let
panicMsg'
=
[
"[addToCorpusWithForm] number of docs ("
,
show
$
length
docs
'
,
show
$
length
docs
,
") exceeds the MAX_DOCS_PARSERS limit ("
,
show
limit
,
")"
]
...
...
@@ -285,7 +301,6 @@ addToCorpusWithForm user cid (NewWithForm ft d l _n) logStatus jobLog = do
panic
panicMsg
else
pure
()
let
docs
=
splitEvery
500
$
take
limit
docs'
printDebug
"Parsing corpus finished : "
cid
logStatus
jobLog2
...
...
@@ -296,7 +311,8 @@ addToCorpusWithForm user cid (NewWithForm ft d l _n) logStatus jobLog = do
(
Right
[
cid
])
(
Multi
$
fromMaybe
EN
l
)
Nothing
(
map
(
map
toHyperdataDocument
)
docs
)
(
Just
$
fromIntegral
$
length
docs
,
yieldMany
docs
.|
mapC
toHyperdataDocument
)
--(map (map toHyperdataDocument) docs)
logStatus
printDebug
"Extraction finished : "
cid
...
...
src/Gargantext/API/Node/DocumentUpload.hs
View file @
a43c33d0
...
...
@@ -69,21 +69,32 @@ api :: UserId -> NodeId -> GargServer API
api
uId
nId
=
serveJobsAPI
$
JobFunction
(
\
q
log'
->
do
documentUpload
uId
nId
q
(
liftBase
.
log'
)
documentUpload
Async
uId
nId
q
(
liftBase
.
log'
)
)
documentUpload
::
(
FlowCmdM
env
err
m
)
documentUpload
Async
::
(
FlowCmdM
env
err
m
)
=>
UserId
->
NodeId
->
DocumentUpload
->
(
JobLog
->
m
()
)
->
m
JobLog
documentUpload
_uId
nId
doc
logStatus
=
do
documentUpload
Async
_uId
nId
doc
logStatus
=
do
let
jl
=
JobLog
{
_scst_succeeded
=
Just
0
,
_scst_failed
=
Just
0
,
_scst_remaining
=
Just
1
,
_scst_events
=
Just
[]
}
logStatus
jl
docIds
<-
documentUpload
nId
doc
printDebug
"documentUploadAsync"
docIds
pure
$
jobLogSuccess
jl
documentUpload
::
(
FlowCmdM
env
err
m
)
=>
NodeId
->
DocumentUpload
->
m
[
DocId
]
documentUpload
nId
doc
=
do
mcId
<-
getClosestParentIdByType'
nId
NodeCorpus
let
cId
=
case
mcId
of
Just
c
->
c
...
...
@@ -116,5 +127,6 @@ documentUpload _uId nId doc logStatus = do
docIds
<-
insertMasterDocs
(
Nothing
::
Maybe
HyperdataCorpus
)
(
Multi
EN
)
[
hd
]
_
<-
Doc
.
add
cId
docIds
pure
docIds
pure
$
jobLogSuccess
jl
src/Gargantext/API/Node/DocumentsFromWriteNodes.hs
View file @
a43c33d0
...
...
@@ -16,6 +16,7 @@ Portability : POSIX
module
Gargantext.API.Node.DocumentsFromWriteNodes
where
import
Conduit
import
Control.Lens
((
^.
))
import
Data.Aeson
import
Data.Either
(
Either
(
..
),
rights
)
...
...
@@ -100,7 +101,7 @@ documentsFromWriteNodes uId nId _p logStatus = do
let
parsedE
=
(
\
(
node
,
contents
)
->
hyperdataDocumentFromFrameWrite
(
node
^.
node_hyperdata
,
contents
))
<$>
frameWritesWithContents
let
parsed
=
rights
parsedE
_
<-
flowDataText
(
RootId
(
NodeId
uId
))
(
DataNew
[
parsed
]
)
(
Multi
EN
)
cId
Nothing
logStatus
_
<-
flowDataText
(
RootId
(
NodeId
uId
))
(
DataNew
(
Just
$
fromIntegral
$
length
parsed
,
yieldMany
parsed
)
)
(
Multi
EN
)
cId
Nothing
logStatus
pure
$
jobLogSuccess
jobLog
------------------------------------------------------------------------
...
...
src/Gargantext/API/Node/Update.hs
View file @
a43c33d0
...
...
@@ -36,7 +36,8 @@ import Gargantext.Database.Action.Flow.Types (FlowCmdM)
import
Gargantext.Database.Action.Metrics
(
updateNgramsOccurrences
,
updateContextScore
)
import
Gargantext.Database.Admin.Types.Hyperdata
import
Gargantext.Database.Admin.Types.Node
import
Gargantext.Database.Query.Table.Node
(
defaultList
,
getNode
,
insertNodes
,
node
)
import
Gargantext.Database.Query.Table.Node
(
defaultList
,
getNode
)
import
Gargantext.Database.Query.Table.Node.UpdateOpaleye
(
updateHyperdata
)
import
Gargantext.Database.Schema.Ngrams
(
NgramsType
(
NgramsTerms
))
import
Gargantext.Database.Schema.Node
(
node_parent_id
)
import
Gargantext.Prelude
(
Bool
(
..
),
Ord
,
Eq
,
(
<$>
),
(
$
),
liftBase
,
(
.
),
printDebug
,
pure
,
show
,
cs
,
(
<>
),
panic
,
(
<*>
))
...
...
@@ -191,7 +192,7 @@ updateNode _uId lId (UpdateNodeParamsList _mode) logStatus = do
,
_scst_events
=
Just
[]
}
updateNode
userId
phyloId
(
UpdateNodePhylo
config
)
logStatus
=
do
updateNode
_
userId
phyloId
(
UpdateNodePhylo
config
)
logStatus
=
do
logStatus
JobLog
{
_scst_succeeded
=
Just
1
,
_scst_failed
=
Just
0
,
_scst_remaining
=
Just
2
...
...
@@ -210,7 +211,7 @@ updateNode userId phyloId (UpdateNodePhylo config) logStatus = do
,
_scst_events
=
Just
[]
}
_
phyloId
<-
insertNodes
[
node
NodePhylo
"Phylo"
(
HyperdataPhylo
Nothing
(
Just
phy
))
(
Just
corpusId
)
userId
]
_
<-
updateHyperdata
phyloId
(
HyperdataPhylo
Nothing
(
Just
phy
))
pure
JobLog
{
_scst_succeeded
=
Just
3
,
_scst_failed
=
Just
0
...
...
src/Gargantext/Core/Text/Corpus/API.hs
View file @
a43c33d0
...
...
@@ -18,6 +18,8 @@ module Gargantext.Core.Text.Corpus.API
)
where
import
Conduit
import
Data.Either
(
Either
(
..
))
import
Data.Maybe
import
Gargantext.API.Admin.Orchestrator.Types
(
ExternalAPIs
(
..
),
externalAPIs
)
import
Gargantext.Core
(
Lang
(
..
))
...
...
@@ -27,21 +29,25 @@ import qualified Gargantext.Core.Text.Corpus.API.Hal as HAL
import
qualified
Gargantext.Core.Text.Corpus.API.Isidore
as
ISIDORE
import
qualified
Gargantext.Core.Text.Corpus.API.Istex
as
ISTEX
import
qualified
Gargantext.Core.Text.Corpus.API.Pubmed
as
PUBMED
-- | TODO put in gargantext.init
default_limit
::
Maybe
Integer
default_limit
=
Just
10000
import
Servant.Client
(
ClientError
)
-- | Get External API metadata main function
get
::
ExternalAPIs
->
Lang
->
Query
->
Maybe
Limit
->
IO
[
HyperdataDocument
]
get
PubMed
_la
q
_l
=
PUBMED
.
get
q
default_limit
-- EN only by default
get
HAL
la
q
_l
=
HAL
.
get
la
q
default_limit
get
IsTex
la
q
_l
=
ISTEX
.
get
la
q
default_limit
get
Isidore
la
q
_l
=
ISIDORE
.
get
la
(
fromIntegral
<$>
default_limit
)
(
Just
q
)
Nothing
-- -> IO [HyperdataDocument]
->
IO
(
Either
ClientError
(
Maybe
Integer
,
ConduitT
()
HyperdataDocument
IO
()
))
get
PubMed
_la
q
limit
=
PUBMED
.
get
q
limit
--docs <- PUBMED.get q default_limit -- EN only by default
--pure (Just $ fromIntegral $ length docs, yieldMany docs)
get
HAL
la
q
limit
=
HAL
.
getC
la
q
limit
get
IsTex
la
q
limit
=
do
docs
<-
ISTEX
.
get
la
q
limit
pure
$
Right
(
Just
$
fromIntegral
$
length
docs
,
yieldMany
docs
)
get
Isidore
la
q
limit
=
do
docs
<-
ISIDORE
.
get
la
(
fromIntegral
<$>
limit
)
(
Just
q
)
Nothing
pure
$
Right
(
Just
$
fromIntegral
$
length
docs
,
yieldMany
docs
)
get
_
_
_
_
=
undefined
-- | Some Sugar for the documentation
...
...
src/Gargantext/Core/Text/Corpus/API/Hal.hs
View file @
a43c33d0
...
...
@@ -12,8 +12,11 @@ Portability : POSIX
module
Gargantext.Core.Text.Corpus.API.Hal
where
import
Conduit
import
Data.Either
import
Data.Maybe
import
Data.Text
(
Text
,
pack
,
intercalate
)
import
Servant.Client
(
ClientError
)
import
Gargantext.Core
(
Lang
(
..
))
import
Gargantext.Database.Admin.Types.Hyperdata
(
HyperdataDocument
(
..
))
...
...
@@ -25,8 +28,16 @@ import qualified HAL.Doc.Corpus as HAL
get
::
Lang
->
Text
->
Maybe
Integer
->
IO
[
HyperdataDocument
]
get
la
q
ml
=
do
docs
<-
HAL
.
getMetadataWith
q
(
Just
0
)
(
fromIntegral
<$>
ml
)
either
(
panic
.
pack
.
show
)
(
\
d
->
mapM
(
toDoc'
la
)
$
HAL
.
_docs
d
)
docs
eDocs
<-
HAL
.
getMetadataWith
q
(
Just
0
)
ml
either
(
panic
.
pack
.
show
)
(
\
d
->
mapM
(
toDoc'
la
)
$
HAL
.
_docs
d
)
eDocs
getC
::
Lang
->
Text
->
Maybe
Integer
->
IO
(
Either
ClientError
(
Maybe
Integer
,
ConduitT
()
HyperdataDocument
IO
()
))
getC
la
q
ml
=
do
eRes
<-
HAL
.
getMetadataWithC
q
(
Just
0
)
ml
pure
$
(
\
(
len
,
docsC
)
->
(
len
,
docsC
.|
mapMC
(
toDoc'
la
)))
<$>
eRes
-- case eRes of
-- Left err -> panic $ pack $ show err
-- Right (len, docsC) -> pure (len, docsC .| mapMC (toDoc' la))
toDoc'
::
Lang
->
HAL
.
Corpus
->
IO
HyperdataDocument
toDoc'
la
(
HAL
.
Corpus
i
t
ab
d
s
aus
affs
struct_id
)
=
do
...
...
src/Gargantext/Core/Text/Corpus/API/Pubmed.hs
View file @
a43c33d0
...
...
@@ -13,9 +13,12 @@ Portability : POSIX
module
Gargantext.Core.Text.Corpus.API.Pubmed
where
import
Conduit
import
Data.Either
(
Either
)
import
Data.Maybe
import
Data.Text
(
Text
)
import
qualified
Data.Text
as
Text
import
Servant.Client
(
ClientError
)
import
Gargantext.Prelude
import
Gargantext.Core
(
Lang
(
..
))
...
...
@@ -31,17 +34,21 @@ type Limit = PubMed.Limit
-- | TODO put default pubmed query in gargantext.ini
-- by default: 10K docs
get
::
Query
->
Maybe
Limit
->
IO
[
HyperdataDocument
]
get
q
l
=
either
(
\
e
->
panic
$
"CRAWL: PubMed"
<>
e
)
(
map
(
toDoc
EN
))
<$>
PubMed
.
getMetadataWith
q
l
get
::
Query
->
Maybe
Limit
->
IO
(
Either
ClientError
(
Maybe
Integer
,
ConduitT
()
HyperdataDocument
IO
()
))
get
q
l
=
do
eRes
<-
PubMed
.
getMetadataWithC
q
l
pure
$
(
\
(
len
,
docsC
)
->
(
len
,
docsC
.|
mapC
(
toDoc
EN
)))
<$>
eRes
--either (\e -> panic $ "CRAWL: PubMed" <> e) (map (toDoc EN))
-- <$> PubMed.getMetadataWithC q l
toDoc
::
Lang
->
PubMedDoc
.
PubMed
->
HyperdataDocument
toDoc
l
(
PubMedDoc
.
PubMed
(
PubMedDoc
.
PubMedArticle
t
j
as
aus
)
(
PubMedDoc
.
PubMedDate
a
y
m
d
)
toDoc
l
(
PubMedDoc
.
PubMed
{
pubmed_id
,
pubmed_article
=
PubMedDoc
.
PubMedArticle
t
j
as
aus
,
pubmed_date
=
PubMedDoc
.
PubMedDate
a
y
m
d
}
)
=
HyperdataDocument
{
_hd_bdd
=
Just
"PubMed"
,
_hd_doi
=
Nothing
,
_hd_url
=
Nothing
,
_hd_uniqId
=
Nothing
,
_hd_uniqId
=
Just
$
Text
.
pack
$
show
pubmed_id
,
_hd_uniqIdBdd
=
Nothing
,
_hd_page
=
Nothing
,
_hd_title
=
t
...
...
src/Gargantext/Core/Types/Main.hs
View file @
a43c33d0
...
...
@@ -34,7 +34,7 @@ import Gargantext.Prelude
import
Servant.API
(
FromHttpApiData
(
..
),
ToHttpApiData
(
..
))
import
Test.QuickCheck
(
elements
)
import
Test.QuickCheck.Arbitrary
(
Arbitrary
,
arbitrary
)
import
Text.Read
(
read
)
import
Text.Read
(
read
Maybe
)
type
CorpusName
=
Text
------------------------------------------------------------------------
...
...
@@ -74,7 +74,11 @@ instance Semigroup ListType
instance
FromHttpApiData
ListType
where
parseUrlPiece
=
Right
.
read
.
unpack
parseUrlPiece
s
=
Right
s'
where
s'
=
case
(
readMaybe
$
unpack
s
)
of
Nothing
->
panic
$
"Cannot read url piece: "
<>
s
Just
s''
->
s''
instance
ToHttpApiData
ListType
where
toUrlPiece
=
pack
.
show
...
...
src/Gargantext/Core/Viz/Phylo.hs
View file @
a43c33d0
...
...
@@ -203,23 +203,23 @@ data PhyloConfig =
------------------------------------------------------------------------
data
PhyloSubConfig
=
PhyloSubConfig
{
_sc_phyloProximity
::
Proximity
,
_sc_phyloSynchrony
::
Synchrony
,
_sc_phyloQuality
::
Quality
PhyloSubConfig
{
_sc_phyloProximity
::
Double
,
_sc_phyloSynchrony
::
Double
,
_sc_phyloQuality
::
Double
,
_sc_timeUnit
::
TimeUnit
,
_sc_clique
::
Clique
,
_sc_exportFilter
::
[
Filter
]
,
_sc_exportFilter
::
Double
}
deriving
(
Show
,
Generic
,
Eq
)
subConfig2config
::
PhyloSubConfig
->
PhyloConfig
subConfig2config
subConfig
=
defaultConfig
{
phyloProximity
=
_sc_phyloProximity
subConfig
,
phyloSynchrony
=
_sc_phyloSynchrony
subConfig
,
phyloQuality
=
_sc_phyloQuality
subConfig
subConfig2config
subConfig
=
defaultConfig
{
phyloProximity
=
WeightedLogJaccard
$
_sc_phyloProximity
subConfig
,
phyloSynchrony
=
ByProximityThreshold
(
_sc_phyloSynchrony
subConfig
)
0
AllBranches
MergeAllGroups
,
phyloQuality
=
Quality
(
_sc_phyloQuality
subConfig
)
1
,
timeUnit
=
_sc_timeUnit
subConfig
,
clique
=
_sc_clique
subConfig
,
exportFilter
=
_sc_exportFilter
subConfig
,
exportFilter
=
[
ByBranchSize
$
_sc_exportFilter
subConfig
]
}
------------------------------------------------------------------------
...
...
src/Gargantext/Core/Viz/Phylo/API.hs
View file @
a43c33d0
...
...
@@ -30,7 +30,8 @@ import Gargantext.Core.Viz.Phylo.Example (phyloExample)
import
Gargantext.Core.Viz.Phylo.Legacy.LegacyMain
import
Gargantext.Database.Admin.Types.Hyperdata
import
Gargantext.Database.Admin.Types.Node
-- (PhyloId, ListId, CorpusId, UserId, NodeId(..))
import
Gargantext.Database.Query.Table.Node
(
insertNodes
,
node
)
import
Gargantext.Database.Query.Table.Node
(
getClosestParentIdByType
)
import
Gargantext.Database.Query.Table.Node.UpdateOpaleye
(
updateHyperdata
)
import
Gargantext.Prelude
import
Network.HTTP.Media
((
//
),
(
/:
))
import
Servant
...
...
@@ -90,7 +91,10 @@ type GetPhylo = QueryParam "listId" ListId
-- Fix Filter parameters
-- TODO fix parameters to default config that should be in Node
getPhylo
::
PhyloId
->
GargServer
GetPhylo
getPhylo
phyloId
_lId
_level
_minSizeBranch
=
getPhyloDataJson
phyloId
getPhylo
phyloId
_lId
_level
_minSizeBranch
=
do
theData
<-
getPhyloDataJson
phyloId
-- printDebug "getPhylo" theData
pure
theData
getPhyloDataJson
::
PhyloId
->
GargNoServer
Value
getPhyloDataJson
phyloId
=
do
...
...
@@ -118,17 +122,19 @@ type PostPhylo = QueryParam "listId" ListId
-- :> ReqBody '[JSON] PhyloQueryBuild
:>
(
Post
'[
J
SON
]
NodeId
)
postPhylo
::
Corpus
Id
->
UserId
->
GargServer
PostPhylo
postPhylo
corpusId
userId
_lId
=
do
postPhylo
::
Phylo
Id
->
UserId
->
GargServer
PostPhylo
postPhylo
phyloId
_
userId
_lId
=
do
-- TODO get Reader settings
-- s <- ask
-- let
-- _vrs = Just ("1" :: Text)
-- _sft = Just (Software "Gargantext" "4")
-- _prm = initPhyloParam vrs sft (Just q)
phy
<-
flowPhyloAPI
defaultConfig
corpusId
-- params
phyloId
<-
insertNodes
[
node
NodePhylo
"Phylo"
(
HyperdataPhylo
Nothing
(
Just
phy
))
(
Just
corpusId
)
userId
]
pure
$
NodeId
(
fromIntegral
phyloId
)
corpusId
<-
getClosestParentIdByType
phyloId
NodeCorpus
phy
<-
flowPhyloAPI
defaultConfig
(
fromMaybe
(
panic
"[G.C.V.P.API] no corpus ID found"
)
corpusId
)
-- params
-- phyloId <- insertNodes [node NodePhylo "Phylo" (HyperdataPhylo Nothing (Just phy)) (Just corpusId) userId]
_
<-
updateHyperdata
phyloId
(
HyperdataPhylo
Nothing
(
Just
phy
))
pure
phyloId
------------------------------------------------------------------------
-- | DELETE Phylo == delete a node
...
...
src/Gargantext/Core/Viz/Phylo/API/Tools.hs
View file @
a43c33d0
...
...
@@ -9,7 +9,6 @@ Portability : POSIX
-}
module
Gargantext.Core.Viz.Phylo.API.Tools
where
...
...
src/Gargantext/Core/Viz/Phylo/PhyloExport.hs
View file @
a43c33d0
...
...
@@ -209,6 +209,7 @@ exportToDot phylo export =
graphAttrs
(
[
Label
(
toDotLabel
$
(
phyloName
$
getConfig
phylo
))]
<>
[
FontSize
30
,
LabelLoc
VTop
,
NodeSep
1
,
RankSep
[
1
],
Rank
SameRank
,
Splines
SplineEdges
,
Overlap
ScaleOverlaps
,
Ratio
FillRatio
-- , Ratio AutoRatio
,
Style
[
SItem
Filled
[]
],
Color
[
toWColor
White
]]
{-- home made attributes -}
<>
[(
toAttr
(
fromStrict
"phyloFoundations"
)
$
pack
$
show
(
length
$
Vector
.
toList
$
getRoots
phylo
))
...
...
src/Gargantext/Database/Action/Flow.hs
View file @
a43c33d0
...
...
@@ -46,8 +46,10 @@ module Gargantext.Database.Action.Flow -- (flowDatabase, ngrams2list)
)
where
import
Conduit
import
Control.Lens
((
^.
),
view
,
_Just
,
makeLenses
)
import
Data.Aeson.TH
(
deriveJSON
)
import
Data.Conduit.Internal
(
zipSources
)
import
Data.Either
import
Data.HashMap.Strict
(
HashMap
)
import
Data.Hashable
(
Hashable
)
...
...
@@ -60,6 +62,7 @@ import qualified Data.Text as T
import
Data.Traversable
(
traverse
)
import
Data.Tuple.Extra
(
first
,
second
)
import
GHC.Generics
(
Generic
)
import
Servant.Client
(
ClientError
)
import
System.FilePath
(
FilePath
)
import
qualified
Data.HashMap.Strict
as
HashMap
import
qualified
Gargantext.Data.HashMap.Strict.Utils
as
HashMap
...
...
@@ -103,6 +106,7 @@ import Gargantext.Prelude
import
Gargantext.Prelude.Crypto.Hash
(
Hash
)
import
qualified
Gargantext.Core.Text.Corpus.API
as
API
import
qualified
Gargantext.Database.Query.Table.Node.Document.Add
as
Doc
(
add
)
import
qualified
Prelude
as
Prelude
------------------------------------------------------------------------
-- Imports for upgrade function
...
...
@@ -127,7 +131,8 @@ allDataOrigins = map InternalOrigin API.externalAPIs
---------------
data
DataText
=
DataOld
!
[
NodeId
]
|
DataNew
!
[[
HyperdataDocument
]]
|
DataNew
!
(
Maybe
Integer
,
ConduitT
()
HyperdataDocument
IO
()
)
-- | DataNew ![[HyperdataDocument]]
-- TODO use the split parameter in config file
getDataText
::
FlowCmdM
env
err
m
...
...
@@ -135,10 +140,10 @@ getDataText :: FlowCmdM env err m
->
TermType
Lang
->
API
.
Query
->
Maybe
API
.
Limit
->
m
DataText
getDataText
(
ExternalOrigin
api
)
la
q
li
=
liftBase
$
DataNew
<$>
splitEvery
500
<$>
API
.
get
api
(
_tt_lang
la
)
q
li
->
m
(
Either
ClientError
DataText
)
getDataText
(
ExternalOrigin
api
)
la
q
li
=
liftBase
$
do
eRes
<-
API
.
get
api
(
_tt_lang
la
)
q
li
pure
$
DataNew
<$>
eRes
getDataText
(
InternalOrigin
_
)
_la
q
_li
=
do
(
_masterUserId
,
_masterRootId
,
cId
)
<-
getOrMk_RootWithCorpus
...
...
@@ -146,10 +151,11 @@ getDataText (InternalOrigin _) _la q _li = do
(
Left
""
)
(
Nothing
::
Maybe
HyperdataCorpus
)
ids
<-
map
fst
<$>
searchDocInDatabase
cId
(
stemIt
q
)
pure
$
DataOld
ids
pure
$
Right
$
DataOld
ids
-------------------------------------------------------------------------------
flowDataText
::
(
FlowCmdM
env
err
m
flowDataText
::
forall
env
err
m
.
(
FlowCmdM
env
err
m
)
=>
User
->
DataText
...
...
@@ -161,7 +167,8 @@ flowDataText :: ( FlowCmdM env err m
flowDataText
u
(
DataOld
ids
)
tt
cid
mfslw
_
=
flowCorpusUser
(
_tt_lang
tt
)
u
(
Right
[
cid
])
corpusType
ids
mfslw
where
corpusType
=
(
Nothing
::
Maybe
HyperdataCorpus
)
flowDataText
u
(
DataNew
txt
)
tt
cid
mfslw
logStatus
=
flowCorpus
u
(
Right
[
cid
])
tt
mfslw
txt
logStatus
flowDataText
u
(
DataNew
(
mLen
,
txtC
))
tt
cid
mfslw
logStatus
=
flowCorpus
u
(
Right
[
cid
])
tt
mfslw
(
mLen
,
(
transPipe
liftBase
txtC
))
logStatus
------------------------------------------------------------------------
-- TODO use proxy
...
...
@@ -173,8 +180,9 @@ flowAnnuaire :: (FlowCmdM env err m)
->
(
JobLog
->
m
()
)
->
m
AnnuaireId
flowAnnuaire
u
n
l
filePath
logStatus
=
do
docs
<-
liftBase
$
((
splitEvery
500
<$>
readFile_Annuaire
filePath
)
::
IO
[[
HyperdataContact
]])
flow
(
Nothing
::
Maybe
HyperdataAnnuaire
)
u
n
l
Nothing
docs
logStatus
-- TODO Conduit for file
docs
<-
liftBase
$
((
readFile_Annuaire
filePath
)
::
IO
[
HyperdataContact
])
flow
(
Nothing
::
Maybe
HyperdataAnnuaire
)
u
n
l
Nothing
(
Just
$
fromIntegral
$
length
docs
,
yieldMany
docs
)
logStatus
------------------------------------------------------------------------
flowCorpusFile
::
(
FlowCmdM
env
err
m
)
...
...
@@ -185,12 +193,13 @@ flowCorpusFile :: (FlowCmdM env err m)
->
Maybe
FlowSocialListWith
->
(
JobLog
->
m
()
)
->
m
CorpusId
flowCorpusFile
u
n
l
la
ff
fp
mfslw
logStatus
=
do
flowCorpusFile
u
n
_
l
la
ff
fp
mfslw
logStatus
=
do
eParsed
<-
liftBase
$
parseFile
ff
fp
case
eParsed
of
Right
parsed
->
do
let
docs
=
splitEvery
500
$
take
l
parsed
flowCorpus
u
n
la
mfslw
(
map
(
map
toHyperdataDocument
)
docs
)
logStatus
flowCorpus
u
n
la
mfslw
(
Just
$
fromIntegral
$
length
parsed
,
yieldMany
parsed
.|
mapC
toHyperdataDocument
)
logStatus
--let docs = splitEvery 500 $ take l parsed
--flowCorpus u n la mfslw (yieldMany $ map (map toHyperdataDocument) docs) logStatus
Left
e
->
panic
$
"Error: "
<>
(
T
.
pack
e
)
------------------------------------------------------------------------
...
...
@@ -201,13 +210,14 @@ flowCorpus :: (FlowCmdM env err m, FlowCorpus a)
->
Either
CorpusName
[
CorpusId
]
->
TermType
Lang
->
Maybe
FlowSocialListWith
->
[[
a
]]
->
(
Maybe
Integer
,
ConduitT
()
a
m
()
)
->
(
JobLog
->
m
()
)
->
m
CorpusId
flowCorpus
=
flow
(
Nothing
::
Maybe
HyperdataCorpus
)
flow
::
(
FlowCmdM
env
err
m
flow
::
forall
env
err
m
a
c
.
(
FlowCmdM
env
err
m
,
FlowCorpus
a
,
MkCorpus
c
)
...
...
@@ -216,23 +226,40 @@ flow :: ( FlowCmdM env err m
->
Either
CorpusName
[
CorpusId
]
->
TermType
Lang
->
Maybe
FlowSocialListWith
->
[[
a
]]
->
(
Maybe
Integer
,
ConduitT
()
a
m
()
)
->
(
JobLog
->
m
()
)
->
m
CorpusId
flow
c
u
cn
la
mfslw
docs
logStatus
=
do
flow
c
u
cn
la
mfslw
(
mLength
,
docsC
)
logStatus
=
do
-- TODO if public insertMasterDocs else insertUserDocs
ids
<-
traverse
(
\
(
idx
,
doc
)
->
do
id
<-
insertMasterDocs
c
la
doc
logStatus
JobLog
{
_scst_succeeded
=
Just
$
1
+
idx
,
_scst_failed
=
Just
0
,
_scst_remaining
=
Just
$
length
docs
-
idx
,
_scst_events
=
Just
[]
}
pure
id
)
(
zip
[
1
..
]
docs
)
flowCorpusUser
(
la
^.
tt_lang
)
u
cn
c
(
concat
ids
)
mfslw
ids
<-
runConduit
$
zipSources
(
yieldMany
[
1
..
])
docsC
.|
mapMC
insertDoc
.|
sinkList
-- ids <- traverse (\(idx, doc) -> do
-- id <- insertMasterDocs c la doc
-- logStatus JobLog { _scst_succeeded = Just $ 1 + idx
-- , _scst_failed = Just 0
-- , _scst_remaining = Just $ length docs - idx
-- , _scst_events = Just []
-- }
-- pure id
-- ) (zip [1..] docs)
flowCorpusUser
(
la
^.
tt_lang
)
u
cn
c
ids
mfslw
where
insertDoc
::
(
Integer
,
a
)
->
m
NodeId
insertDoc
(
idx
,
doc
)
=
do
id
<-
insertMasterDocs
c
la
[
doc
]
case
mLength
of
Nothing
->
pure
()
Just
len
->
do
logStatus
JobLog
{
_scst_succeeded
=
Just
$
fromIntegral
$
1
+
idx
,
_scst_failed
=
Just
0
,
_scst_remaining
=
Just
$
fromIntegral
$
len
-
idx
,
_scst_events
=
Just
[]
}
pure
$
Prelude
.
head
id
------------------------------------------------------------------------
...
...
@@ -250,7 +277,7 @@ flowCorpusUser l user corpusName ctype ids mfslw = do
-- User Flow
(
userId
,
_rootId
,
userCorpusId
)
<-
getOrMk_RootWithCorpus
user
corpusName
ctype
-- NodeTexts is first
_tId
<-
insertDefaultNode
NodeTexts
userCorpusId
userId
_tId
<-
insertDefaultNode
IfNotExists
NodeTexts
userCorpusId
userId
-- printDebug "NodeTexts: " tId
-- NodeList is second
...
...
@@ -276,8 +303,8 @@ flowCorpusUser l user corpusName ctype ids mfslw = do
-- _ <- insertOccsUpdates userCorpusId mastListId
-- printDebug "userListId" userListId
-- User Graph Flow
_
<-
insertDefaultNode
NodeDashboard
userCorpusId
userId
_
<-
insertDefaultNode
NodeGraph
userCorpusId
userId
_
<-
insertDefaultNode
IfNotExists
NodeDashboard
userCorpusId
userId
_
<-
insertDefaultNode
IfNotExists
NodeGraph
userCorpusId
userId
--
_
<-
mkPhylo
userCorpusId
userId
-- Annuaire Flow
-- _ <- mkAnnuaire rootUserId userId
...
...
@@ -322,7 +349,7 @@ saveDocNgramsWith :: ( FlowCmdM env err m)
->
m
()
saveDocNgramsWith
lId
mapNgramsDocs'
=
do
terms2id
<-
insertExtractedNgrams
$
HashMap
.
keys
mapNgramsDocs'
printDebug
"terms2id"
terms2id
--
printDebug "terms2id" terms2id
let
mapNgramsDocs
=
HashMap
.
mapKeys
extracted2ngrams
mapNgramsDocs'
...
...
@@ -331,7 +358,7 @@ saveDocNgramsWith lId mapNgramsDocs' = do
$
map
(
first
_ngramsTerms
.
second
Map
.
keys
)
$
HashMap
.
toList
mapNgramsDocs
printDebug
"saveDocNgramsWith"
mapCgramsId
--
printDebug "saveDocNgramsWith" mapCgramsId
-- insertDocNgrams
_return
<-
insertContextNodeNgrams2
$
catMaybes
[
ContextNodeNgrams2
<$>
Just
nId
...
...
@@ -504,5 +531,3 @@ extractInsert docs = do
documentsWithId
_
<-
insertExtractedNgrams
$
HashMap
.
keys
mapNgramsDocs'
pure
()
src/Gargantext/Database/Action/User/New.hs
View file @
a43c33d0
...
...
@@ -27,7 +27,7 @@ import Gargantext.Database.Query.Table.User
import
Gargantext.Prelude
import
Gargantext.Prelude.Crypto.Pass.User
(
gargPass
)
import
Gargantext.Prelude.Mail.Types
(
MailConfig
)
------------------------------------------------------------------------
------------------------------------------------------------------------
newUsers
::
(
CmdM
env
err
m
,
MonadRandom
m
,
HasNodeError
err
,
HasMail
env
)
=>
[
EmailAddress
]
->
m
Int64
...
...
@@ -35,6 +35,18 @@ newUsers us = do
us'
<-
mapM
newUserQuick
us
config
<-
view
$
mailSettings
newUsers'
config
us'
------------------------------------------------------------------------
updateUsersPassword
::
(
CmdM
env
err
m
,
MonadRandom
m
,
HasNodeError
err
,
HasMail
env
)
=>
[
EmailAddress
]
->
m
Int64
updateUsersPassword
us
=
do
us'
<-
mapM
newUserQuick
us
config
<-
view
$
mailSettings
_
<-
mapM
(
\
u
->
updateUser
(
SendEmail
True
)
config
u
)
us'
pure
1
------------------------------------------------------------------------
------------------------------------------------------------------------
newUserQuick
::
(
MonadRandom
m
)
=>
Text
->
m
(
NewUser
GargPassword
)
...
...
@@ -44,6 +56,7 @@ newUserQuick n = do
Just
(
u'
,
_m
)
->
u'
Nothing
->
panic
"[G.D.A.U.N.newUserQuick]: Email invalid"
pure
(
NewUser
u
n
(
GargPassword
pass
))
------------------------------------------------------------------------
------------------------------------------------------------------------
-- | guessUserName
...
...
@@ -68,7 +81,6 @@ newUsers' cfg us = do
printDebug
"newUsers'"
us
pure
r
------------------------------------------------------------------------
updateUser
::
HasNodeError
err
=>
SendEmail
->
MailConfig
->
NewUser
GargPassword
->
Cmd
err
Int64
updateUser
(
SendEmail
send
)
cfg
u
=
do
...
...
src/Gargantext/Database/Prelude.hs
View file @
a43c33d0
...
...
@@ -24,7 +24,7 @@ import Data.ByteString.Char8 (hPutStrLn)
import
Data.Either.Extra
(
Either
)
import
Data.Pool
(
Pool
,
withResource
)
import
Data.Profunctor.Product.Default
(
Default
)
import
Data.Text
(
unpack
,
Text
)
import
Data.Text
(
pack
,
unpack
,
Text
)
import
Data.Word
(
Word16
)
import
Database.PostgreSQL.Simple
(
Connection
,
connect
)
import
Database.PostgreSQL.Simple.FromField
(
Conversion
,
ResultError
(
ConversionFailed
),
fromField
,
returnError
)
...
...
@@ -36,7 +36,7 @@ import Opaleye (Unpackspec, showSql, FromFields, Select, runSelect, SqlJsonb, De
import
Opaleye.Aggregate
(
countRows
)
import
System.IO
(
FilePath
)
import
System.IO
(
stderr
)
import
Text.Read
(
read
)
import
Text.Read
(
read
Maybe
)
import
qualified
Data.ByteString
as
DB
import
qualified
Data.List
as
DL
import
qualified
Database.PostgreSQL.Simple
as
PGS
...
...
@@ -176,9 +176,13 @@ databaseParameters :: FilePath -> IO PGS.ConnectInfo
databaseParameters
fp
=
do
ini
<-
readIniFile'
fp
let
val'
key
=
unpack
$
val
ini
"database"
key
let
dbPortRaw
=
val'
"DB_PORT"
let
dbPort
=
case
(
readMaybe
dbPortRaw
::
Maybe
Word16
)
of
Nothing
->
panic
$
"DB_PORT incorrect: "
<>
(
pack
dbPortRaw
)
Just
d
->
d
pure
$
PGS
.
ConnectInfo
{
PGS
.
connectHost
=
val'
"DB_HOST"
,
PGS
.
connectPort
=
read
(
val'
"DB_PORT"
)
::
Word16
,
PGS
.
connectPort
=
dbPort
,
PGS
.
connectUser
=
val'
"DB_USER"
,
PGS
.
connectPassword
=
val'
"DB_PASS"
,
PGS
.
connectDatabase
=
val'
"DB_NAME"
...
...
src/Gargantext/Database/Query/Table/Node.hs
View file @
a43c33d0
...
...
@@ -255,6 +255,14 @@ insertDefaultNode :: HasDBid NodeType
=>
NodeType
->
ParentId
->
UserId
->
Cmd
err
[
NodeId
]
insertDefaultNode
nt
p
u
=
insertNode
nt
Nothing
Nothing
p
u
insertDefaultNodeIfNotExists
::
HasDBid
NodeType
=>
NodeType
->
ParentId
->
UserId
->
Cmd
err
[
NodeId
]
insertDefaultNodeIfNotExists
nt
p
u
=
do
children
<-
getChildrenByType
p
nt
case
children
of
[]
->
insertDefaultNode
nt
p
u
xs
->
pure
xs
insertNode
::
HasDBid
NodeType
=>
NodeType
->
Maybe
Name
->
Maybe
DefaultHyperdata
->
ParentId
->
UserId
->
Cmd
err
[
NodeId
]
insertNode
nt
n
h
p
u
=
insertNodesR
[
nodeW
nt
n
h
p
u
]
...
...
stack.yaml
View file @
a43c33d0
...
...
@@ -69,11 +69,11 @@ extra-deps:
# External Data API connectors
-
git
:
https://gitlab.iscpif.fr/gargantext/crawlers/pubmed.git
commit
:
9cdba6423decad5acfacb0f274212fd8723ce73
4
commit
:
12cb16c391577bff4295e3dd1b126281d78037b
4
-
git
:
https://gitlab.iscpif.fr/gargantext/crawlers/istex.git
commit
:
daeae80365250c4bd539f0a65e271f9aa37f731f
-
git
:
https://gitlab.iscpif.fr/gargantext/crawlers/hal.git
commit
:
020f5f9b308f5c23c925aedf5fb11f8b4728fb19
commit
:
3bf77f28d3dc71d2e8349cbf422a34cf4c23cd11
-
git
:
https://gitlab.iscpif.fr/gargantext/crawlers/isidore.git
commit
:
3db385e767d2100d8abe900833c6e7de3ac55e1b
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment