Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
158
Issues
158
List
Board
Labels
Milestones
Merge Requests
11
Merge Requests
11
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
22e2da48
Commit
22e2da48
authored
Jul 28, 2021
by
Przemyslaw Kaminski
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[searx] first draft of searx parsing, updated stack to lts 18.4
parent
f8ba5ba1
Pipeline
#1681
passed with stage
in 54 minutes and 35 seconds
Changes
9
Pipelines
1
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
491 additions
and
55 deletions
+491
-55
search-api.org
docs/search-api.org
+288
-0
package.yaml
package.yaml
+1
-0
Settings.hs
src/Gargantext/API/Admin/Settings.hs
+1
-1
New.hs
src/Gargantext/API/Node/Corpus/New.hs
+48
-49
Searx.hs
src/Gargantext/API/Node/Corpus/Searx.hs
+82
-0
Types.hs
src/Gargantext/API/Node/Corpus/Types.hs
+67
-0
Terms.hs
src/Gargantext/Core/Text/Terms.hs
+1
-1
Utils.hs
src/Gargantext/Data/HashMap/Strict/Utils.hs
+0
-3
stack.yaml
stack.yaml
+3
-1
No files found.
docs/search-api.org
0 → 100644
View file @
22e2da48
This diff is collapsed.
Click to expand it.
package.yaml
View file @
22e2da48
...
...
@@ -188,6 +188,7 @@ library:
-
random
-
rdf4h
-
regex-compat
-
regex-tdfa
-
resource-pool
-
resourcet
-
safe
...
...
src/Gargantext/API/Admin/Settings.hs
View file @
22e2da48
...
...
@@ -166,7 +166,7 @@ newEnv port file = do
when
(
port
/=
settings'
^.
appPort
)
$
panic
"TODO: conflicting settings of port"
config'
<-
readConfig
file
config'
<-
readConfig
file
self_url
<-
parseBaseUrl
$
"http://0.0.0.0:"
<>
show
port
dbParam
<-
databaseParameters
file
pool
<-
newPool
dbParam
...
...
src/Gargantext/API/Node/Corpus/New.hs
View file @
22e2da48
...
...
@@ -36,25 +36,26 @@ import Test.QuickCheck.Arbitrary
import
Gargantext.Prelude
import
Gargantext.API.Admin.Orchestrator.Types
(
JobLog
(
..
),
AsyncJobs
)
import
qualified
Gargantext.API.Admin.Orchestrator.Types
as
T
import
Gargantext.API.Admin.Types
(
HasSettings
)
import
Gargantext.API.Node.Corpus.New.File
import
Gargantext.API.Node.Corpus.Searx
import
Gargantext.API.Node.Corpus.Types
import
Gargantext.API.Node.Types
import
Gargantext.Core
(
Lang
(
..
)
{-, allLangs-}
)
import
Gargantext.Database.Action.Mail
(
sendMail
)
import
qualified
Gargantext.Core.Text.Corpus.API
as
API
import
qualified
Gargantext.Core.Text.Corpus.Parsers
as
Parser
(
FileFormat
(
..
),
parseFormat
)
import
Gargantext.Core.Types.Individu
(
User
(
..
))
import
Gargantext.Core.Utils.Prefix
(
unPrefix
,
unPrefixSwagger
)
import
Gargantext.Database.Action.Flow
(
FlowCmdM
,
flowCorpus
,
getDataText
,
flowDataText
,
TermType
(
..
)
,
DataOrigin
(
..
)
{-, allDataOrigins-}
)
import
Gargantext.Database.Action.
User
(
getUserId
)
import
Gargantext.Database.Action.Flow
(
FlowCmdM
,
flowCorpus
,
getDataText
,
flowDataText
,
TermType
(
..
)
{-, allDataOrigins-}
)
import
Gargantext.Database.Action.
Mail
(
sendMail
)
import
Gargantext.Database.Action.Node
(
mkNodeWithParent
)
import
Gargantext.Database.Action.User
(
getUserId
)
import
Gargantext.Database.Admin.Types.Hyperdata
import
Gargantext.Database.Admin.Types.Node
(
CorpusId
,
NodeType
(
..
),
UserId
)
import
Gargantext.Database.Query.Table.Node
(
getNodeWith
)
import
Gargantext.Database.Query.Table.Node.UpdateOpaleye
(
updateHyperdata
)
import
Gargantext.Database.Schema.Node
(
node_hyperdata
)
import
qualified
Gargantext.Database.GargDB
as
GargDB
import
qualified
Gargantext.Core.Text.Corpus.API
as
API
import
qualified
Gargantext.Core.Text.Corpus.Parsers
as
Parser
(
FileFormat
(
..
),
parseFormat
)
------------------------------------------------------------------------
{-
...
...
@@ -125,28 +126,11 @@ info :: FlowCmdM env err m => UserId -> m ApiInfo
info
_u
=
pure
$
ApiInfo
API
.
externalAPIs
------------------------------------------------------------------------
data
Database
=
Empty
|
PubMed
|
HAL
|
IsTex
|
Isidore
deriving
(
Eq
,
Show
,
Generic
)
deriveJSON
(
unPrefix
""
)
''
D
atabase
instance
ToSchema
Database
database2origin
::
Database
->
DataOrigin
database2origin
Empty
=
InternalOrigin
T
.
IsTex
database2origin
PubMed
=
ExternalOrigin
T
.
PubMed
database2origin
HAL
=
ExternalOrigin
T
.
HAL
database2origin
IsTex
=
ExternalOrigin
T
.
IsTex
database2origin
Isidore
=
ExternalOrigin
T
.
Isidore
------------------------------------------------------------------------
data
WithQuery
=
WithQuery
{
_wq_query
::
!
Text
,
_wq_databases
::
!
Database
,
_wq_datafield
::
!
Datafield
,
_wq_lang
::
!
Lang
,
_wq_node_id
::
!
Int
}
...
...
@@ -190,36 +174,51 @@ addToCorpusWithQuery :: FlowCmdM env err m
->
Maybe
Integer
->
(
JobLog
->
m
()
)
->
m
JobLog
addToCorpusWithQuery
user
cid
(
WithQuery
q
dbs
l
_nid
)
maybeLimit
logStatus
=
do
addToCorpusWithQuery
user
cid
(
WithQuery
q
dbs
datafield
l
_nid
)
maybeLimit
logStatus
=
do
-- TODO ...
logStatus
JobLog
{
_scst_succeeded
=
Just
0
,
_scst_failed
=
Just
0
,
_scst_remaining
=
Just
5
,
_scst_events
=
Just
[]
}
printDebug
"addToCorpusWithQuery"
(
cid
,
dbs
)
-- TODO add cid
-- TODO if cid is folder -> create Corpus
-- if cid is corpus -> add to corpus
-- if cid is root -> create corpus in Private
txts
<-
mapM
(
\
db
->
getDataText
db
(
Multi
l
)
q
maybeLimit
)
[
database2origin
dbs
]
logStatus
JobLog
{
_scst_succeeded
=
Just
2
,
_scst_failed
=
Just
0
,
_scst_remaining
=
Just
1
,
_scst_events
=
Just
[]
}
cids
<-
mapM
(
\
txt
->
flowDataText
user
txt
(
Multi
l
)
cid
)
txts
printDebug
"corpus id"
cids
printDebug
"sending email"
(
"xxxxxxxxxxxxxxxxxxxxx"
::
Text
)
sendMail
user
-- TODO ...
pure
JobLog
{
_scst_succeeded
=
Just
3
,
_scst_failed
=
Just
0
,
_scst_remaining
=
Just
0
,
_scst_remaining
=
Just
3
,
_scst_events
=
Just
[]
}
printDebug
"[addToCorpusWithQuery] (cid, dbs)"
(
cid
,
dbs
)
printDebug
"[addToCorpusWithQuery] datafield"
datafield
case
datafield
of
Web
->
do
printDebug
"[addToCorpusWithQuery] processing web request"
datafield
_
<-
triggerSearxSearch
cid
q
l
pure
JobLog
{
_scst_succeeded
=
Just
3
,
_scst_failed
=
Just
0
,
_scst_remaining
=
Just
0
,
_scst_events
=
Just
[]
}
_
->
do
-- TODO add cid
-- TODO if cid is folder -> create Corpus
-- if cid is corpus -> add to corpus
-- if cid is root -> create corpus in Private
txts
<-
mapM
(
\
db
->
getDataText
db
(
Multi
l
)
q
maybeLimit
)
[
database2origin
dbs
]
logStatus
JobLog
{
_scst_succeeded
=
Just
2
,
_scst_failed
=
Just
0
,
_scst_remaining
=
Just
1
,
_scst_events
=
Just
[]
}
cids
<-
mapM
(
\
txt
->
flowDataText
user
txt
(
Multi
l
)
cid
)
txts
printDebug
"corpus id"
cids
printDebug
"sending email"
(
"xxxxxxxxxxxxxxxxxxxxx"
::
Text
)
sendMail
user
-- TODO ...
pure
JobLog
{
_scst_succeeded
=
Just
3
,
_scst_failed
=
Just
0
,
_scst_remaining
=
Just
0
,
_scst_events
=
Just
[]
}
type
AddWithForm
=
Summary
"Add with FormUrlEncoded to corpus endpoint"
...
...
src/Gargantext/API/Node/Corpus/Searx.hs
0 → 100644
View file @
22e2da48
{-# LANGUAGE TemplateHaskell #-}
module
Gargantext.API.Node.Corpus.Searx
where
import
Control.Lens
(
view
)
import
qualified
Data.Aeson
as
Aeson
import
Data.Aeson.TH
(
deriveJSON
)
import
qualified
Data.Text
as
T
import
GHC.Generics
(
Generic
)
import
Network.HTTP.Client
import
Network.HTTP.Client.TLS
import
Protolude
(
encodeUtf8
,
Text
)
import
Gargantext.Prelude
import
Gargantext.Prelude.Config
import
Gargantext.Core
(
Lang
(
..
))
import
qualified
Gargantext.Core.Text.Corpus.API
as
API
import
Gargantext.Core.Utils.Prefix
(
unPrefix
)
import
Gargantext.Database.Action.Flow
(
FlowCmdM
)
import
Gargantext.Database.Admin.Types.Node
(
CorpusId
)
import
Gargantext.Database.Prelude
(
hasConfig
)
data
SearxResult
=
SearxResult
{
_sr_url
::
Text
,
_sr_title
::
Text
,
_sr_content
::
Text
,
_sr_engine
::
Text
,
_sr_score
::
Double
,
_sr_category
::
Text
,
_sr_pretty_url
::
Text
}
deriving
(
Show
,
Eq
,
Generic
)
-- , _sr_parsed_url
-- , _sr_engines
-- , _sr_positions
$
(
deriveJSON
(
unPrefix
"_sr_"
)
''
S
earxResult
)
data
SearxResponse
=
SearxResponse
{
_srs_query
::
Text
,
_srs_number_of_results
::
Int
,
_srs_results
::
[
SearxResult
]
}
deriving
(
Show
,
Eq
,
Generic
)
-- , _srs_answers
-- , _srs_corrections
-- , _srs_infoboxes
-- , _srs_suggestions :: [Text]
-- , _srs_unresponsive_engines :: [Text] }
$
(
deriveJSON
(
unPrefix
"_srs_"
)
''
S
earxResponse
)
triggerSearxSearch
::
(
MonadBase
IO
m
,
FlowCmdM
env
err
m
)
=>
CorpusId
->
API
.
Query
->
Lang
->
m
()
triggerSearxSearch
cid
q
l
=
do
printDebug
"[triggerSearxSearch] cid"
cid
printDebug
"[triggerSearxSearch] q"
q
printDebug
"[triggerSearxSearch] l"
l
cfg
<-
view
hasConfig
let
surl
=
_gc_frame_searx_url
cfg
printDebug
"[triggerSearxSearch] surl"
surl
res
<-
liftBase
$
do
manager
<-
newManager
tlsManagerSettings
req
<-
parseRequest
$
T
.
unpack
surl
let
request
=
urlEncodedBody
[
(
"category_general"
,
"1"
)
,
(
"q"
,
encodeUtf8
q
)
,
(
"pageno"
,
"1"
)
,
(
"time_range"
,
"None"
)
,
(
"language"
,
"en-US"
)
-- TODO
,
(
"format"
,
"json"
)]
req
httpLbs
request
manager
let
dec
=
Aeson
.
decode
$
responseBody
res
::
(
Maybe
SearxResponse
)
printDebug
"[triggerSearxSearch] dec"
dec
pure
()
src/Gargantext/API/Node/Corpus/Types.hs
0 → 100644
View file @
22e2da48
{-# LANGUAGE TemplateHaskell #-}
module
Gargantext.API.Node.Corpus.Types
where
import
Control.Lens
hiding
(
elements
,
Empty
)
import
Control.Monad.Fail
(
fail
)
import
Data.Aeson
import
Data.Aeson.TH
(
deriveJSON
)
import
Data.Monoid
(
mempty
)
import
Data.Swagger
import
Data.Text
(
Text
)
import
qualified
Data.Text
as
T
import
GHC.Generics
(
Generic
)
import
Text.Regex.TDFA
((
=~
))
import
Protolude
((
++
))
import
Gargantext.Prelude
import
qualified
Gargantext.API.Admin.Orchestrator.Types
as
T
import
Gargantext.Core.Utils.Prefix
(
unPrefix
)
import
Gargantext.Database.Action.Flow
(
DataOrigin
(
..
))
data
Database
=
Empty
|
PubMed
|
HAL
|
IsTex
|
Isidore
deriving
(
Eq
,
Show
,
Generic
)
deriveJSON
(
unPrefix
""
)
''
D
atabase
instance
ToSchema
Database
database2origin
::
Database
->
DataOrigin
database2origin
Empty
=
InternalOrigin
T
.
IsTex
database2origin
PubMed
=
ExternalOrigin
T
.
PubMed
database2origin
HAL
=
ExternalOrigin
T
.
HAL
database2origin
IsTex
=
ExternalOrigin
T
.
IsTex
database2origin
Isidore
=
ExternalOrigin
T
.
Isidore
------------------------------------------------------------------------
data
Datafield
=
Gargantext
|
External
(
Maybe
Database
)
|
Web
|
Files
deriving
(
Eq
,
Show
,
Generic
)
instance
FromJSON
Datafield
where
parseJSON
=
withText
"Datafield"
$
\
text
->
case
text
of
"Gargantext"
->
pure
Gargantext
"Web"
->
pure
Web
"Files"
->
pure
Files
v
->
let
(
preExternal
,
_
,
postExternal
)
=
v
=~
(
"External "
::
Text
)
::
(
Text
,
Text
,
Text
)
in
if
preExternal
==
""
then
do
db
<-
parseJSON
$
String
postExternal
pure
$
External
db
else
fail
$
"Cannot match patterh 'External <db>' for string "
++
(
T
.
unpack
v
)
instance
ToJSON
Datafield
where
toJSON
(
External
db
)
=
toJSON
$
"External "
++
(
show
db
)
toJSON
s
=
toJSON
$
show
s
instance
ToSchema
Datafield
where
declareNamedSchema
_
=
do
return
$
NamedSchema
(
Just
"Datafield"
)
$
mempty
&
type_
?~
SwaggerObject
src/Gargantext/Core/Text/Terms.hs
View file @
22e2da48
...
...
@@ -69,7 +69,7 @@ data TermType lang
,
_tt_ngramsSize
::
!
Int
,
_tt_model
::
!
(
Maybe
(
Tries
Token
()
))
}
deriving
Generic
deriving
(
Generic
)
makeLenses
''
T
ermType
--group :: [Text] -> [Text]
...
...
src/Gargantext/Data/HashMap/Strict/Utils.hs
View file @
22e2da48
...
...
@@ -23,9 +23,6 @@ partitionWithKey :: (Ord a, Hashable k) => (k -> a -> Bool) -> HashMap k a -> (H
partitionWithKey
p
m
=
(
HashMap
.
filterWithKey
p
m
,
HashMap
.
filterWithKey
(
\
k
->
not
.
p
k
)
m
)
mapKeys
::
(
Ord
k2
,
Hashable
k2
)
=>
(
k1
->
k2
)
->
HashMap
k1
a
->
HashMap
k2
a
mapKeys
f
=
HashMap
.
fromList
.
HashMap
.
foldrWithKey
(
\
k
x
xs
->
(
f
k
,
x
)
:
xs
)
[]
------------------------------------------------------------------------
-- getKeyWithMaxValue :: Hashable k => HashMap k a -> Maybe k
getKeysOrderedByValueMaxFirst
::
(
Ord
k
,
Hashable
k
,
Ord
a
)
=>
HashMap
k
a
->
[
k
]
...
...
stack.yaml
View file @
22e2da48
resolver
:
url
:
https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/1
7/10
.yaml
url
:
https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/1
8/4
.yaml
flags
:
{}
extra-package-dbs
:
[]
packages
:
...
...
@@ -94,6 +94,7 @@ extra-deps:
-
json-stream-0.4.2.4@sha256:8b7f17d54a6e1e6311756270f8bcf51e91bab4300945400de66118470dcf51b9,4716
-
located-base-0.1.1.1@sha256:7c6395f2b6fbf2d5f76c3514f774423838c0ea94e1c6a5530dd3c94b30c9d1c8,1904
-
logging-effect-1.3.12@sha256:72d168dd09887649ba9501627219b6027cbec2d5541931555b7885b133785ce3,1679
-
MissingH-1.4.3.0@sha256:32f9892ec98cd21df4f4d3ed8d95a3831ae74287ea0641d6f09b2dc6ef061d39,4859
-
monoid-extras-0.5.1@sha256:438dbfd7b4dce47d8f0ca577f56caf94bd1e21391afa545cad09fe7cf2e5793d,2333
-
rake-0.0.1@sha256:3380f6567fb17505d1095b7f32222c0b631fa04126ad39726c84262da99c08b3,2025
-
servant-cassava-0.10.1@sha256:07e7b6ca67cf57dcb4a0041a399a25d058844505837c6479e01d62be59d01fdf,1665
...
...
@@ -103,3 +104,4 @@ extra-deps:
-
stemmer-0.5.2@sha256:823aec56249ec2619f60a2c0d1384b732894dbbbe642856d337ebfe9629a0efd,4082
-
xmlbf-0.6.1@sha256:57867fcb39e0514d17b3328ff5de8d241a18482fc89bb742d9ed820a6a2a5187,1540
-
xmlbf-xeno-0.2@sha256:39f70fced6052524c290cf595f114661c721452e65fc3e0953a44e7682a6a6b0,950
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment