Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
H
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Przemyslaw Kaminski
haskell-gargantext
Commits
46d254d9
Commit
46d254d9
authored
Dec 09, 2021
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'dev-merge' into dev
parents
0322ffc8
bdd5deec
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
143 additions
and
28 deletions
+143
-28
.gitlab-ci.yml
.gitlab-ci.yml
+1
-1
package.yaml
package.yaml
+1
-0
New.hs
src/Gargantext/API/Node/Corpus/New.hs
+1
-1
Searx.hs
src/Gargantext/API/Node/Corpus/Searx.hs
+135
-24
Flow.hs
src/Gargantext/Database/Action/Flow.hs
+1
-1
Insert.hs
src/Gargantext/Database/Query/Table/Node/Document/Insert.hs
+4
-1
No files found.
.gitlab-ci.yml
View file @
46d254d9
...
@@ -2,7 +2,7 @@
...
@@ -2,7 +2,7 @@
# https://vadosware.io/post/zero-to-continuous-integrated-testing-a-haskell-project-with-gitlab/
# https://vadosware.io/post/zero-to-continuous-integrated-testing-a-haskell-project-with-gitlab/
#
#
#
#
image
:
cgenie/stack-build:lts-1
7.13
-garg
image
:
cgenie/stack-build:lts-1
8.18
-garg
variables
:
variables
:
STACK_ROOT
:
"
${CI_PROJECT_DIR}/.stack-root"
STACK_ROOT
:
"
${CI_PROJECT_DIR}/.stack-root"
...
...
package.yaml
View file @
46d254d9
...
@@ -246,6 +246,7 @@ library:
...
@@ -246,6 +246,7 @@ library:
-
timezone-series
-
timezone-series
-
transformers
-
transformers
-
transformers-base
-
transformers-base
-
tuple
-
unordered-containers
-
unordered-containers
-
utf8-string
-
utf8-string
-
uuid
-
uuid
...
...
src/Gargantext/API/Node/Corpus/New.hs
View file @
46d254d9
...
@@ -200,7 +200,7 @@ addToCorpusWithQuery user cid (WithQuery { _wq_query = q
...
@@ -200,7 +200,7 @@ addToCorpusWithQuery user cid (WithQuery { _wq_query = q
Just
Web
->
do
Just
Web
->
do
printDebug
"[addToCorpusWithQuery] processing web request"
datafield
printDebug
"[addToCorpusWithQuery] processing web request"
datafield
_
<-
triggerSearxSearch
cid
q
l
_
<-
triggerSearxSearch
user
cid
q
l
logStatus
pure
JobLog
{
_scst_succeeded
=
Just
3
pure
JobLog
{
_scst_succeeded
=
Just
3
,
_scst_failed
=
Just
0
,
_scst_failed
=
Just
0
...
...
src/Gargantext/API/Node/Corpus/Searx.hs
View file @
46d254d9
...
@@ -7,32 +7,60 @@ module Gargantext.API.Node.Corpus.Searx where
...
@@ -7,32 +7,60 @@ module Gargantext.API.Node.Corpus.Searx where
import
Control.Lens
(
view
)
import
Control.Lens
(
view
)
import
qualified
Data.Aeson
as
Aeson
import
qualified
Data.Aeson
as
Aeson
import
Data.Aeson.TH
(
deriveJSON
)
import
Data.Aeson.TH
(
deriveJSON
)
import
Data.Either
(
Either
(
..
))
import
qualified
Data.HashMap.Strict
as
HashMap
import
qualified
Data.Text
as
T
import
qualified
Data.Text
as
T
import
Data.Time.Calendar
(
Day
,
toGregorian
)
import
Data.Time.Format
(
defaultTimeLocale
,
formatTime
,
parseTimeM
)
import
Data.Tuple.Select
(
sel1
,
sel2
,
sel3
)
import
GHC.Generics
(
Generic
)
import
GHC.Generics
(
Generic
)
import
Network.HTTP.Client
import
Network.HTTP.Client
import
Network.HTTP.Client.TLS
import
Network.HTTP.Client.TLS
import
qualified
Prelude
as
Prelude
import
qualified
Prelude
as
Prelude
import
Protolude
(
encodeUtf8
,
Text
,
Either
)
import
Protolude
(
catMaybes
,
encodeUtf8
,
rightToMaybe
,
Text
)
import
Gargantext.Prelude
import
Gargantext.Prelude
import
Gargantext.Prelude.Config
import
Gargantext.Prelude.Config
import
Gargantext.Core
(
Lang
(
..
))
import
Gargantext.API.Admin.Orchestrator.Types
(
JobLog
(
..
))
--import Gargantext.API.Admin.Types (HasSettings)
import
Gargantext.API.Job
(
jobLogSuccess
)
import
Gargantext.Core
(
Lang
(
..
),
PosTagAlgo
(
..
))
import
qualified
Gargantext.Core.Text.Corpus.API
as
API
import
qualified
Gargantext.Core.Text.Corpus.API
as
API
import
Gargantext.Core.Text.List
(
buildNgramsLists
)
import
Gargantext.Core.Text.List.Group.WithStem
(
{-StopSize(..),-}
GroupParams
(
..
))
import
Gargantext.Core.Text.Terms
(
TermType
(
..
))
import
Gargantext.Core.Types.Individu
(
User
(
..
))
import
Gargantext.Core.Utils.Prefix
(
unPrefix
)
import
Gargantext.Core.Utils.Prefix
(
unPrefix
)
import
Gargantext.Database.Action.Flow
(
insertMasterDocs
)
--, DataText(..))
import
Gargantext.Database.Action.Flow.List
(
flowList_DbRepo
)
import
Gargantext.Database.Action.Flow.Types
(
FlowCmdM
)
import
Gargantext.Database.Action.Flow.Types
(
FlowCmdM
)
import
Gargantext.Database.Admin.Types.Node
(
CorpusId
)
import
Gargantext.Database.Action.User
(
getUserId
)
import
Gargantext.Database.Admin.Config
(
userMaster
)
import
Gargantext.Database.Admin.Types.Hyperdata.Corpus
(
HyperdataCorpus
)
import
Gargantext.Database.Admin.Types.Hyperdata.Document
(
HyperdataDocument
(
..
))
import
Gargantext.Database.Admin.Types.Node
(
CorpusId
,
ListId
)
import
Gargantext.Database.Prelude
(
hasConfig
)
import
Gargantext.Database.Prelude
(
hasConfig
)
import
Gargantext.Database.Query.Table.Node
(
defaultListMaybe
,
getOrMkList
)
import
Gargantext.Database.Query.Tree.Root
(
getOrMk_RootWithCorpus
)
import
qualified
Gargantext.Database.Query.Table.Node.Document.Add
as
Doc
(
add
)
langToSearx
::
Lang
->
Text
langToSearx
EN
=
"en-US"
langToSearx
FR
=
"fr-FR"
langToSearx
All
=
"en-US"
data
SearxResult
=
SearxResult
data
SearxResult
=
SearxResult
{
_sr_url
::
Text
{
_sr_url
::
Text
,
_sr_title
::
Text
,
_sr_title
::
Text
,
_sr_content
::
Maybe
Text
,
_sr_content
::
Maybe
Text
,
_sr_engine
::
Text
,
_sr_engine
::
Text
,
_sr_score
::
Double
,
_sr_score
::
Double
,
_sr_category
::
Text
,
_sr_category
::
Text
,
_sr_pretty_url
::
Text
}
,
_sr_pretty_url
::
Text
,
_sr_publishedDate
::
Text
-- "Nov 19, 2021"
,
_sr_pubdate
::
Text
-- "2021-11-19 02:12:00+0000"
}
deriving
(
Show
,
Eq
,
Generic
)
deriving
(
Show
,
Eq
,
Generic
)
-- , _sr_parsed_url
-- , _sr_parsed_url
-- , _sr_engines
-- , _sr_engines
...
@@ -76,34 +104,117 @@ fetchSearxPage (FetchSearxParams { _fsp_language
...
@@ -76,34 +104,117 @@ fetchSearxPage (FetchSearxParams { _fsp_language
,
(
"categories"
,
"news"
)
-- https://gitlab.iscpif.fr/gargantext/haskell-gargantext/issues/70#note_3976
,
(
"categories"
,
"news"
)
-- https://gitlab.iscpif.fr/gargantext/haskell-gargantext/issues/70#note_3976
,
(
"pageno"
,
encodeUtf8
$
T
.
pack
$
show
_fsp_pageno
)
,
(
"pageno"
,
encodeUtf8
$
T
.
pack
$
show
_fsp_pageno
)
--, ("time_range", "None")
--, ("time_range", "None")
,
(
"language"
,
encodeUtf8
$
T
.
pack
$
show
_fsp_language
)
,
(
"language"
,
encodeUtf8
$
langToSearx
_fsp_language
)
,
(
"format"
,
"json"
)
,
(
"format"
,
"json"
)
]
req
]
req
res
<-
httpLbs
request
_fsp_manager
res
<-
httpLbs
request
_fsp_manager
let
dec
=
Aeson
.
eitherDecode
$
responseBody
res
::
(
Either
Prelude
.
String
SearxResponse
)
let
dec
=
Aeson
.
eitherDecode
$
responseBody
res
::
(
Either
Prelude
.
String
SearxResponse
)
pure
dec
pure
dec
insertSearxResponse
::
(
MonadBase
IO
m
,
FlowCmdM
env
err
m
)
=>
User
->
CorpusId
->
ListId
->
Lang
->
Either
Prelude
.
String
SearxResponse
->
m
()
insertSearxResponse
_
_
_
_
(
Left
_
)
=
pure
()
insertSearxResponse
user
cId
listId
l
(
Right
(
SearxResponse
{
_srs_results
}))
=
do
let
docs
=
hyperdataDocumentFromSearxResult
l
<$>
_srs_results
--printDebug "[triggerSearxSearch] docs" docs
-- docs :: [Either Text HyperdataDocument]
let
docs'
=
catMaybes
$
rightToMaybe
<$>
docs
Prelude
.
mapM_
(
\
(
HyperdataDocument
{
_hd_title
,
_hd_publication_year
,
_hd_publication_date
})
->
do
printDebug
"[triggerSearxSearch] doc time"
$
"[title] "
<>
(
show
_hd_title
)
<>
" :: [publication_year] "
<>
(
show
_hd_publication_year
)
<>
" :: [publication_date] "
<>
(
show
_hd_publication_date
)
)
docs'
--
_
<-
flowDataText
user
(
DataNew
[
docs'
])
(
Multi
l
)
cId
Nothing
logStatus
let
mCorpus
=
Nothing
::
Maybe
HyperdataCorpus
ids
<-
insertMasterDocs
mCorpus
(
Multi
l
)
docs'
_
<-
Doc
.
add
cId
ids
(
_masterUserId
,
_masterRootId
,
masterCorpusId
)
<-
getOrMk_RootWithCorpus
(
UserName
userMaster
)
(
Left
""
)
mCorpus
let
gp
=
GroupWithPosTag
l
CoreNLP
HashMap
.
empty
ngs
<-
buildNgramsLists
user
cId
masterCorpusId
Nothing
gp
_userListId
<-
flowList_DbRepo
listId
ngs
pure
()
-- TODO Make an async task out of this?
triggerSearxSearch
::
(
MonadBase
IO
m
,
FlowCmdM
env
err
m
)
triggerSearxSearch
::
(
MonadBase
IO
m
,
FlowCmdM
env
err
m
)
=>
CorpusId
=>
User
->
CorpusId
->
API
.
Query
->
API
.
Query
->
Lang
->
Lang
->
m
()
->
(
JobLog
->
m
()
)
->
m
JobLog
triggerSearxSearch
cid
q
l
=
do
triggerSearxSearch
user
cId
q
l
logStatus
=
do
printDebug
"[triggerSearxSearch] cid"
cid
let
numPages
=
100
let
jobLog
=
JobLog
{
_scst_succeeded
=
Just
0
,
_scst_failed
=
Just
0
,
_scst_remaining
=
Just
numPages
,
_scst_events
=
Just
[]
}
logStatus
jobLog
printDebug
"[triggerSearxSearch] cId"
cId
printDebug
"[triggerSearxSearch] q"
q
printDebug
"[triggerSearxSearch] q"
q
printDebug
"[triggerSearxSearch] l"
l
printDebug
"[triggerSearxSearch] l"
l
cfg
<-
view
hasConfig
cfg
<-
view
hasConfig
uId
<-
getUserId
user
let
surl
=
_gc_frame_searx_url
cfg
let
surl
=
_gc_frame_searx_url
cfg
printDebug
"[triggerSearxSearch] surl"
surl
printDebug
"[triggerSearxSearch] surl"
surl
mListId
<-
defaultListMaybe
cId
listId
<-
case
mListId
of
Nothing
->
do
listId
<-
getOrMkList
cId
uId
pure
listId
Just
listId
->
pure
listId
printDebug
"[triggerSearxSearch] listId"
listId
manager
<-
liftBase
$
newManager
tlsManagerSettings
manager
<-
liftBase
$
newManager
tlsManagerSettings
res
<-
liftBase
$
fetchSearxPage
$
FetchSearxParams
{
_fsp_language
=
l
_
<-
mapM
(
\
page
->
do
,
_fsp_manager
=
manager
res
<-
liftBase
$
fetchSearxPage
$
FetchSearxParams
{
_fsp_language
=
l
,
_fsp_pageno
=
1
,
_fsp_manager
=
manager
,
_fsp_query
=
q
,
_fsp_pageno
=
page
,
_fsp_url
=
surl
}
,
_fsp_query
=
q
,
_fsp_url
=
surl
}
printDebug
"[triggerSearxSearch] res"
res
insertSearxResponse
user
cId
listId
l
res
logStatus
$
JobLog
{
_scst_succeeded
=
Just
page
,
_scst_failed
=
Just
0
,
_scst_remaining
=
Just
(
numPages
-
page
)
,
_scst_events
=
Just
[]
}
)
[
1
..
numPages
]
--printDebug "[triggerSearxSearch] res" res
pure
$
jobLogSuccess
jobLog
hyperdataDocumentFromSearxResult
::
Lang
->
SearxResult
->
Either
T
.
Text
HyperdataDocument
hyperdataDocumentFromSearxResult
l
(
SearxResult
{
_sr_content
,
_sr_engine
,
_sr_pubdate
,
_sr_title
})
=
do
let
mDate
=
parseTimeM
False
defaultTimeLocale
"%Y-%m-%d %H:%M:%S+0000"
(
T
.
unpack
_sr_pubdate
)
::
Maybe
Day
let
mGregorian
=
toGregorian
<$>
mDate
Right
HyperdataDocument
{
_hd_bdd
=
Just
"Searx"
,
_hd_doi
=
Nothing
,
_hd_url
=
Nothing
,
_hd_uniqId
=
Nothing
,
_hd_uniqIdBdd
=
Nothing
,
_hd_page
=
Nothing
,
_hd_title
=
Just
_sr_title
,
_hd_authors
=
Nothing
,
_hd_institutes
=
Nothing
,
_hd_source
=
Just
_sr_engine
,
_hd_abstract
=
_sr_content
,
_hd_publication_date
=
T
.
pack
<$>
formatTime
defaultTimeLocale
"%Y-%m-%dT%H:%M:%S"
<$>
mDate
,
_hd_publication_year
=
fromIntegral
<$>
sel1
<$>
mGregorian
,
_hd_publication_month
=
sel2
<$>
mGregorian
,
_hd_publication_day
=
sel3
<$>
mGregorian
,
_hd_publication_hour
=
Nothing
,
_hd_publication_minute
=
Nothing
,
_hd_publication_second
=
Nothing
,
_hd_language_iso2
=
Just
$
T
.
pack
$
show
l
}
pure
()
src/Gargantext/Database/Action/Flow.hs
View file @
46d254d9
...
@@ -71,9 +71,9 @@ import Gargantext.Core.Ext.IMT (toSchoolName)
...
@@ -71,9 +71,9 @@ import Gargantext.Core.Ext.IMT (toSchoolName)
import
Gargantext.Core.Ext.IMTUser
(
readFile_Annuaire
)
import
Gargantext.Core.Ext.IMTUser
(
readFile_Annuaire
)
import
Gargantext.Core.Flow.Types
import
Gargantext.Core.Flow.Types
import
Gargantext.Core.Text
import
Gargantext.Core.Text
import
Gargantext.Core.Text.List.Group.WithStem
(
{-StopSize(..),-}
GroupParams
(
..
))
import
Gargantext.Core.Text.Corpus.Parsers
(
parseFile
,
FileFormat
)
import
Gargantext.Core.Text.Corpus.Parsers
(
parseFile
,
FileFormat
)
import
Gargantext.Core.Text.List
(
buildNgramsLists
)
import
Gargantext.Core.Text.List
(
buildNgramsLists
)
import
Gargantext.Core.Text.List.Group.WithStem
(
{-StopSize(..),-}
GroupParams
(
..
))
import
Gargantext.Core.Text.List.Social
(
FlowSocialListWith
)
import
Gargantext.Core.Text.List.Social
(
FlowSocialListWith
)
import
Gargantext.Core.Text.Terms
import
Gargantext.Core.Text.Terms
import
Gargantext.Core.Text.Terms.Mono.Stem.En
(
stemIt
)
import
Gargantext.Core.Text.Terms.Mono.Stem.En
(
stemIt
)
...
...
src/Gargantext/Database/Query/Table/Node/Document/Insert.hs
View file @
46d254d9
...
@@ -279,7 +279,10 @@ instance ToNode HyperdataDocument where
...
@@ -279,7 +279,10 @@ instance ToNode HyperdataDocument where
where
where
n
=
maybe
"No Title"
(
DT
.
take
255
)
(
_hd_title
h
)
n
=
maybe
"No Title"
(
DT
.
take
255
)
(
_hd_title
h
)
date
=
jour
y
m
d
date
=
jour
y
m
d
y
=
maybe
0
fromIntegral
$
_hd_publication_year
h
-- NOTE: There is no year '0' in postgres, there is year 1 AD and beofre that year 1 BC:
-- select '0001-01-01'::date, '0001-01-01'::date - '1 day'::interval;
-- 0001-01-01 0001-12-31 00:00:00 BC
y
=
maybe
1
fromIntegral
$
_hd_publication_year
h
m
=
fromMaybe
1
$
_hd_publication_month
h
m
=
fromMaybe
1
$
_hd_publication_month
h
d
=
fromMaybe
1
$
_hd_publication_day
h
d
=
fromMaybe
1
$
_hd_publication_day
h
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment