Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
157
Issues
157
List
Board
Labels
Milestones
Merge Requests
9
Merge Requests
9
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
91e68737
Commit
91e68737
authored
Dec 08, 2021
by
Przemyslaw Kaminski
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[searx] custom flow for searx docs insert
parent
0e384787
Pipeline
#2236
failed with stage
in 10 minutes and 22 seconds
Changes
3
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
63 additions
and
40 deletions
+63
-40
Searx.hs
src/Gargantext/API/Node/Corpus/Searx.hs
+59
-39
Flow.hs
src/Gargantext/Database/Action/Flow.hs
+1
-1
Insert.hs
src/Gargantext/Database/Query/Table/Node/Document/Insert.hs
+3
-0
No files found.
src/Gargantext/API/Node/Corpus/Searx.hs
View file @
91e68737
...
...
@@ -8,6 +8,7 @@ import Control.Lens (view)
import
qualified
Data.Aeson
as
Aeson
import
Data.Aeson.TH
(
deriveJSON
)
import
Data.Either
(
Either
(
..
))
import
qualified
Data.HashMap.Strict
as
HashMap
import
qualified
Data.Text
as
T
import
Data.Time.Calendar
(
Day
,
toGregorian
)
import
Data.Time.Format
(
defaultTimeLocale
,
formatTime
,
parseTimeM
)
...
...
@@ -23,20 +24,26 @@ import Gargantext.Prelude.Config
import
Gargantext.API.Admin.Orchestrator.Types
(
JobLog
(
..
))
--import Gargantext.API.Admin.Types (HasSettings)
import
Gargantext.API.Job
(
jobLogSuccess
,
jobLogFailTotalWithMessage
)
import
Gargantext.Core
(
Lang
(
..
))
import
Gargantext.API.Job
(
jobLogSuccess
)
import
Gargantext.Core
(
Lang
(
..
)
,
PosTagAlgo
(
..
)
)
import
qualified
Gargantext.Core.Text.Corpus.API
as
API
import
Gargantext.Core.Text.List
(
buildNgramsLists
)
import
Gargantext.Core.Text.List.Group.WithStem
(
{-StopSize(..),-}
GroupParams
(
..
))
import
Gargantext.Core.Text.Terms
(
TermType
(
..
))
import
Gargantext.Core.Types.Individu
(
User
(
..
))
import
Gargantext.Core.Utils.Prefix
(
unPrefix
)
import
Gargantext.Database.Action.Flow
(
flowDataText
,
DataText
(
..
))
import
Gargantext.Database.Action.Flow
(
insertMasterDocs
)
--, DataText(..))
import
Gargantext.Database.Action.Flow.List
(
flowList_DbRepo
)
import
Gargantext.Database.Action.Flow.Types
(
FlowCmdM
)
import
Gargantext.Database.Admin.Config
()
import
Gargantext.Database.Action.User
(
getUserId
)
import
Gargantext.Database.Admin.Config
(
userMaster
)
import
Gargantext.Database.Admin.Types.Hyperdata.Corpus
(
HyperdataCorpus
)
import
Gargantext.Database.Admin.Types.Hyperdata.Document
(
HyperdataDocument
(
..
))
import
Gargantext.Database.Admin.Types.Node
(
CorpusId
)
import
Gargantext.Database.Prelude
(
hasConfig
)
import
Gargantext.Database.Query.Table.Node
(
defaultListMaybe
)
import
Gargantext.Database.Query.Table.Node
(
defaultListMaybe
,
getOrMkList
)
import
Gargantext.Database.Query.Tree.Root
(
getOrMk_RootWithCorpus
)
import
qualified
Gargantext.Database.Query.Table.Node.Document.Add
as
Doc
(
add
)
langToSearx
::
Lang
->
Text
langToSearx
EN
=
"en-US"
...
...
@@ -124,43 +131,56 @@ triggerSearxSearch user cId q l logStatus = do
printDebug
"[triggerSearxSearch] q"
q
printDebug
"[triggerSearxSearch] l"
l
cfg
<-
view
hasConfig
uId
<-
getUserId
user
let
surl
=
_gc_frame_searx_url
cfg
printDebug
"[triggerSearxSearch] surl"
surl
mListId
<-
defaultListMaybe
cId
case
mListId
of
listId
<-
case
mListId
of
Nothing
->
do
let
failedJobLog
=
jobLogFailTotalWithMessage
"[triggerSearxSearch] no list id"
jobLog
logStatus
failedJobLog
pure
failedJobLog
Just
listId
->
do
printDebug
"[triggerSearxSearch] listId"
listId
manager
<-
liftBase
$
newManager
tlsManagerSettings
res
<-
liftBase
$
fetchSearxPage
$
FetchSearxParams
{
_fsp_language
=
l
,
_fsp_manager
=
manager
,
_fsp_pageno
=
1
,
_fsp_query
=
q
,
_fsp_url
=
surl
}
--printDebug "[triggerSearxSearch] res" res
case
res
of
Left
_
->
pure
()
Right
(
SearxResponse
{
_srs_results
})
->
do
let
docs
=
hyperdataDocumentFromSearxResult
<$>
_srs_results
--printDebug "[triggerSearxSearch] docs" docs
-- docs :: [Either Text HyperdataDocument]
let
docs'
=
catMaybes
$
rightToMaybe
<$>
docs
Prelude
.
mapM_
(
\
(
HyperdataDocument
{
_hd_title
,
_hd_publication_year
,
_hd_publication_date
})
->
do
printDebug
"[triggerSearxSearch] doc time"
$
"[title] "
<>
(
show
_hd_title
)
<>
" :: [publication_year] "
<>
(
show
_hd_publication_year
)
<>
" :: [publication_date] "
<>
(
show
_hd_publication_date
)
)
docs'
_
<-
flowDataText
user
(
DataNew
[
docs'
])
(
Multi
EN
)
cId
Nothing
logStatus
pure
()
pure
$
jobLogSuccess
jobLog
--let failedJobLog = jobLogFailTotalWithMessage "[triggerSearxSearch] no list id" jobLog
--logStatus failedJobLog
--pure failedJobLog
listId
<-
getOrMkList
cId
uId
pure
listId
Just
listId
->
pure
listId
printDebug
"[triggerSearxSearch] listId"
listId
manager
<-
liftBase
$
newManager
tlsManagerSettings
res
<-
liftBase
$
fetchSearxPage
$
FetchSearxParams
{
_fsp_language
=
l
,
_fsp_manager
=
manager
,
_fsp_pageno
=
1
,
_fsp_query
=
q
,
_fsp_url
=
surl
}
--printDebug "[triggerSearxSearch] res" res
case
res
of
Left
_
->
pure
()
Right
(
SearxResponse
{
_srs_results
})
->
do
let
docs
=
hyperdataDocumentFromSearxResult
<$>
_srs_results
--printDebug "[triggerSearxSearch] docs" docs
-- docs :: [Either Text HyperdataDocument]
let
docs'
=
catMaybes
$
rightToMaybe
<$>
docs
Prelude
.
mapM_
(
\
(
HyperdataDocument
{
_hd_title
,
_hd_publication_year
,
_hd_publication_date
})
->
do
printDebug
"[triggerSearxSearch] doc time"
$
"[title] "
<>
(
show
_hd_title
)
<>
" :: [publication_year] "
<>
(
show
_hd_publication_year
)
<>
" :: [publication_date] "
<>
(
show
_hd_publication_date
)
)
docs'
--
_
<-
flowDataText
user
(
DataNew
[
docs'
])
(
Multi
EN
)
cId
Nothing
logStatus
let
mCorpus
=
Nothing
::
Maybe
HyperdataCorpus
ids
<-
insertMasterDocs
mCorpus
(
Multi
EN
)
docs'
_
<-
Doc
.
add
cId
ids
(
_masterUserId
,
_masterRootId
,
masterCorpusId
)
<-
getOrMk_RootWithCorpus
(
UserName
userMaster
)
(
Left
""
)
mCorpus
let
gp
=
GroupWithPosTag
l
CoreNLP
HashMap
.
empty
ngs
<-
buildNgramsLists
user
cId
masterCorpusId
Nothing
gp
_userListId
<-
flowList_DbRepo
listId
ngs
pure
()
pure
$
jobLogSuccess
jobLog
hyperdataDocumentFromSearxResult
::
SearxResult
->
Either
T
.
Text
HyperdataDocument
hyperdataDocumentFromSearxResult
(
SearxResult
{
_sr_content
,
_sr_engine
,
_sr_pubdate
,
_sr_title
})
=
do
...
...
src/Gargantext/Database/Action/Flow.hs
View file @
91e68737
...
...
@@ -71,9 +71,9 @@ import Gargantext.Core.Ext.IMT (toSchoolName)
import
Gargantext.Core.Ext.IMTUser
(
readFile_Annuaire
)
import
Gargantext.Core.Flow.Types
import
Gargantext.Core.Text
import
Gargantext.Core.Text.List.Group.WithStem
(
{-StopSize(..),-}
GroupParams
(
..
))
import
Gargantext.Core.Text.Corpus.Parsers
(
parseFile
,
FileFormat
)
import
Gargantext.Core.Text.List
(
buildNgramsLists
)
import
Gargantext.Core.Text.List.Group.WithStem
(
{-StopSize(..),-}
GroupParams
(
..
))
import
Gargantext.Core.Text.List.Social
(
FlowSocialListWith
)
import
Gargantext.Core.Text.Terms
import
Gargantext.Core.Text.Terms.Mono.Stem.En
(
stemIt
)
...
...
src/Gargantext/Database/Query/Table/Node/Document/Insert.hs
View file @
91e68737
...
...
@@ -279,6 +279,9 @@ instance ToNode HyperdataDocument where
where
n
=
maybe
"No Title"
(
DT
.
take
255
)
(
_hd_title
h
)
date
=
jour
y
m
d
-- NOTE: There is no year '0' in postgres, there is year 1 AD and beofre that year 1 BC:
-- select '0001-01-01'::date, '0001-01-01'::date - '1 day'::interval;
-- 0001-01-01 0001-12-31 00:00:00 BC
y
=
maybe
1
fromIntegral
$
_hd_publication_year
h
m
=
fromMaybe
1
$
_hd_publication_month
h
d
=
fromMaybe
1
$
_hd_publication_day
h
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment