Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
H
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Przemyslaw Kaminski
haskell-gargantext
Commits
22e2da48
Commit
22e2da48
authored
Jul 28, 2021
by
Przemyslaw Kaminski
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[searx] first draft of searx parsing, updated stack to lts 18.4
parent
f8ba5ba1
Changes
9
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
491 additions
and
55 deletions
+491
-55
search-api.org
docs/search-api.org
+288
-0
package.yaml
package.yaml
+1
-0
Settings.hs
src/Gargantext/API/Admin/Settings.hs
+1
-1
New.hs
src/Gargantext/API/Node/Corpus/New.hs
+48
-49
Searx.hs
src/Gargantext/API/Node/Corpus/Searx.hs
+82
-0
Types.hs
src/Gargantext/API/Node/Corpus/Types.hs
+67
-0
Terms.hs
src/Gargantext/Core/Text/Terms.hs
+1
-1
Utils.hs
src/Gargantext/Data/HashMap/Strict/Utils.hs
+0
-3
stack.yaml
stack.yaml
+3
-1
No files found.
docs/search-api.org
0 → 100644
View file @
22e2da48
This diff is collapsed.
Click to expand it.
package.yaml
View file @
22e2da48
...
@@ -188,6 +188,7 @@ library:
...
@@ -188,6 +188,7 @@ library:
-
random
-
random
-
rdf4h
-
rdf4h
-
regex-compat
-
regex-compat
-
regex-tdfa
-
resource-pool
-
resource-pool
-
resourcet
-
resourcet
-
safe
-
safe
...
...
src/Gargantext/API/Admin/Settings.hs
View file @
22e2da48
...
@@ -166,7 +166,7 @@ newEnv port file = do
...
@@ -166,7 +166,7 @@ newEnv port file = do
when
(
port
/=
settings'
^.
appPort
)
$
when
(
port
/=
settings'
^.
appPort
)
$
panic
"TODO: conflicting settings of port"
panic
"TODO: conflicting settings of port"
config'
<-
readConfig
file
config'
<-
readConfig
file
self_url
<-
parseBaseUrl
$
"http://0.0.0.0:"
<>
show
port
self_url
<-
parseBaseUrl
$
"http://0.0.0.0:"
<>
show
port
dbParam
<-
databaseParameters
file
dbParam
<-
databaseParameters
file
pool
<-
newPool
dbParam
pool
<-
newPool
dbParam
...
...
src/Gargantext/API/Node/Corpus/New.hs
View file @
22e2da48
...
@@ -36,25 +36,26 @@ import Test.QuickCheck.Arbitrary
...
@@ -36,25 +36,26 @@ import Test.QuickCheck.Arbitrary
import
Gargantext.Prelude
import
Gargantext.Prelude
import
Gargantext.API.Admin.Orchestrator.Types
(
JobLog
(
..
),
AsyncJobs
)
import
Gargantext.API.Admin.Orchestrator.Types
(
JobLog
(
..
),
AsyncJobs
)
import
qualified
Gargantext.API.Admin.Orchestrator.Types
as
T
import
Gargantext.API.Admin.Types
(
HasSettings
)
import
Gargantext.API.Admin.Types
(
HasSettings
)
import
Gargantext.API.Node.Corpus.New.File
import
Gargantext.API.Node.Corpus.New.File
import
Gargantext.API.Node.Corpus.Searx
import
Gargantext.API.Node.Corpus.Types
import
Gargantext.API.Node.Types
import
Gargantext.API.Node.Types
import
Gargantext.Core
(
Lang
(
..
)
{-, allLangs-}
)
import
Gargantext.Core
(
Lang
(
..
)
{-, allLangs-}
)
import
Gargantext.Database.Action.Mail
(
sendMail
)
import
qualified
Gargantext.Core.Text.Corpus.API
as
API
import
qualified
Gargantext.Core.Text.Corpus.Parsers
as
Parser
(
FileFormat
(
..
),
parseFormat
)
import
Gargantext.Core.Types.Individu
(
User
(
..
))
import
Gargantext.Core.Types.Individu
(
User
(
..
))
import
Gargantext.Core.Utils.Prefix
(
unPrefix
,
unPrefixSwagger
)
import
Gargantext.Core.Utils.Prefix
(
unPrefix
,
unPrefixSwagger
)
import
Gargantext.Database.Action.Flow
(
FlowCmdM
,
flowCorpus
,
getDataText
,
flowDataText
,
TermType
(
..
)
,
DataOrigin
(
..
)
{-, allDataOrigins-}
)
import
Gargantext.Database.Action.Flow
(
FlowCmdM
,
flowCorpus
,
getDataText
,
flowDataText
,
TermType
(
..
)
{-, allDataOrigins-}
)
import
Gargantext.Database.Action.
User
(
getUserId
)
import
Gargantext.Database.Action.
Mail
(
sendMail
)
import
Gargantext.Database.Action.Node
(
mkNodeWithParent
)
import
Gargantext.Database.Action.Node
(
mkNodeWithParent
)
import
Gargantext.Database.Action.User
(
getUserId
)
import
Gargantext.Database.Admin.Types.Hyperdata
import
Gargantext.Database.Admin.Types.Hyperdata
import
Gargantext.Database.Admin.Types.Node
(
CorpusId
,
NodeType
(
..
),
UserId
)
import
Gargantext.Database.Admin.Types.Node
(
CorpusId
,
NodeType
(
..
),
UserId
)
import
Gargantext.Database.Query.Table.Node
(
getNodeWith
)
import
Gargantext.Database.Query.Table.Node
(
getNodeWith
)
import
Gargantext.Database.Query.Table.Node.UpdateOpaleye
(
updateHyperdata
)
import
Gargantext.Database.Query.Table.Node.UpdateOpaleye
(
updateHyperdata
)
import
Gargantext.Database.Schema.Node
(
node_hyperdata
)
import
Gargantext.Database.Schema.Node
(
node_hyperdata
)
import
qualified
Gargantext.Database.GargDB
as
GargDB
import
qualified
Gargantext.Database.GargDB
as
GargDB
import
qualified
Gargantext.Core.Text.Corpus.API
as
API
import
qualified
Gargantext.Core.Text.Corpus.Parsers
as
Parser
(
FileFormat
(
..
),
parseFormat
)
------------------------------------------------------------------------
------------------------------------------------------------------------
{-
{-
...
@@ -125,28 +126,11 @@ info :: FlowCmdM env err m => UserId -> m ApiInfo
...
@@ -125,28 +126,11 @@ info :: FlowCmdM env err m => UserId -> m ApiInfo
info
_u
=
pure
$
ApiInfo
API
.
externalAPIs
info
_u
=
pure
$
ApiInfo
API
.
externalAPIs
------------------------------------------------------------------------
------------------------------------------------------------------------
data
Database
=
Empty
|
PubMed
|
HAL
|
IsTex
|
Isidore
deriving
(
Eq
,
Show
,
Generic
)
deriveJSON
(
unPrefix
""
)
''
D
atabase
instance
ToSchema
Database
database2origin
::
Database
->
DataOrigin
database2origin
Empty
=
InternalOrigin
T
.
IsTex
database2origin
PubMed
=
ExternalOrigin
T
.
PubMed
database2origin
HAL
=
ExternalOrigin
T
.
HAL
database2origin
IsTex
=
ExternalOrigin
T
.
IsTex
database2origin
Isidore
=
ExternalOrigin
T
.
Isidore
------------------------------------------------------------------------
------------------------------------------------------------------------
data
WithQuery
=
WithQuery
data
WithQuery
=
WithQuery
{
_wq_query
::
!
Text
{
_wq_query
::
!
Text
,
_wq_databases
::
!
Database
,
_wq_databases
::
!
Database
,
_wq_datafield
::
!
Datafield
,
_wq_lang
::
!
Lang
,
_wq_lang
::
!
Lang
,
_wq_node_id
::
!
Int
,
_wq_node_id
::
!
Int
}
}
...
@@ -190,36 +174,51 @@ addToCorpusWithQuery :: FlowCmdM env err m
...
@@ -190,36 +174,51 @@ addToCorpusWithQuery :: FlowCmdM env err m
->
Maybe
Integer
->
Maybe
Integer
->
(
JobLog
->
m
()
)
->
(
JobLog
->
m
()
)
->
m
JobLog
->
m
JobLog
addToCorpusWithQuery
user
cid
(
WithQuery
q
dbs
l
_nid
)
maybeLimit
logStatus
=
do
addToCorpusWithQuery
user
cid
(
WithQuery
q
dbs
datafield
l
_nid
)
maybeLimit
logStatus
=
do
-- TODO ...
-- TODO ...
logStatus
JobLog
{
_scst_succeeded
=
Just
0
logStatus
JobLog
{
_scst_succeeded
=
Just
0
,
_scst_failed
=
Just
0
,
_scst_failed
=
Just
0
,
_scst_remaining
=
Just
5
,
_scst_remaining
=
Just
3
,
_scst_events
=
Just
[]
}
printDebug
"addToCorpusWithQuery"
(
cid
,
dbs
)
-- TODO add cid
-- TODO if cid is folder -> create Corpus
-- if cid is corpus -> add to corpus
-- if cid is root -> create corpus in Private
txts
<-
mapM
(
\
db
->
getDataText
db
(
Multi
l
)
q
maybeLimit
)
[
database2origin
dbs
]
logStatus
JobLog
{
_scst_succeeded
=
Just
2
,
_scst_failed
=
Just
0
,
_scst_remaining
=
Just
1
,
_scst_events
=
Just
[]
}
cids
<-
mapM
(
\
txt
->
flowDataText
user
txt
(
Multi
l
)
cid
)
txts
printDebug
"corpus id"
cids
printDebug
"sending email"
(
"xxxxxxxxxxxxxxxxxxxxx"
::
Text
)
sendMail
user
-- TODO ...
pure
JobLog
{
_scst_succeeded
=
Just
3
,
_scst_failed
=
Just
0
,
_scst_remaining
=
Just
0
,
_scst_events
=
Just
[]
,
_scst_events
=
Just
[]
}
}
printDebug
"[addToCorpusWithQuery] (cid, dbs)"
(
cid
,
dbs
)
printDebug
"[addToCorpusWithQuery] datafield"
datafield
case
datafield
of
Web
->
do
printDebug
"[addToCorpusWithQuery] processing web request"
datafield
_
<-
triggerSearxSearch
cid
q
l
pure
JobLog
{
_scst_succeeded
=
Just
3
,
_scst_failed
=
Just
0
,
_scst_remaining
=
Just
0
,
_scst_events
=
Just
[]
}
_
->
do
-- TODO add cid
-- TODO if cid is folder -> create Corpus
-- if cid is corpus -> add to corpus
-- if cid is root -> create corpus in Private
txts
<-
mapM
(
\
db
->
getDataText
db
(
Multi
l
)
q
maybeLimit
)
[
database2origin
dbs
]
logStatus
JobLog
{
_scst_succeeded
=
Just
2
,
_scst_failed
=
Just
0
,
_scst_remaining
=
Just
1
,
_scst_events
=
Just
[]
}
cids
<-
mapM
(
\
txt
->
flowDataText
user
txt
(
Multi
l
)
cid
)
txts
printDebug
"corpus id"
cids
printDebug
"sending email"
(
"xxxxxxxxxxxxxxxxxxxxx"
::
Text
)
sendMail
user
-- TODO ...
pure
JobLog
{
_scst_succeeded
=
Just
3
,
_scst_failed
=
Just
0
,
_scst_remaining
=
Just
0
,
_scst_events
=
Just
[]
}
type
AddWithForm
=
Summary
"Add with FormUrlEncoded to corpus endpoint"
type
AddWithForm
=
Summary
"Add with FormUrlEncoded to corpus endpoint"
...
...
src/Gargantext/API/Node/Corpus/Searx.hs
0 → 100644
View file @
22e2da48
{-# LANGUAGE TemplateHaskell #-}
module
Gargantext.API.Node.Corpus.Searx
where
import
Control.Lens
(
view
)
import
qualified
Data.Aeson
as
Aeson
import
Data.Aeson.TH
(
deriveJSON
)
import
qualified
Data.Text
as
T
import
GHC.Generics
(
Generic
)
import
Network.HTTP.Client
import
Network.HTTP.Client.TLS
import
Protolude
(
encodeUtf8
,
Text
)
import
Gargantext.Prelude
import
Gargantext.Prelude.Config
import
Gargantext.Core
(
Lang
(
..
))
import
qualified
Gargantext.Core.Text.Corpus.API
as
API
import
Gargantext.Core.Utils.Prefix
(
unPrefix
)
import
Gargantext.Database.Action.Flow
(
FlowCmdM
)
import
Gargantext.Database.Admin.Types.Node
(
CorpusId
)
import
Gargantext.Database.Prelude
(
hasConfig
)
data
SearxResult
=
SearxResult
{
_sr_url
::
Text
,
_sr_title
::
Text
,
_sr_content
::
Text
,
_sr_engine
::
Text
,
_sr_score
::
Double
,
_sr_category
::
Text
,
_sr_pretty_url
::
Text
}
deriving
(
Show
,
Eq
,
Generic
)
-- , _sr_parsed_url
-- , _sr_engines
-- , _sr_positions
$
(
deriveJSON
(
unPrefix
"_sr_"
)
''
S
earxResult
)
data
SearxResponse
=
SearxResponse
{
_srs_query
::
Text
,
_srs_number_of_results
::
Int
,
_srs_results
::
[
SearxResult
]
}
deriving
(
Show
,
Eq
,
Generic
)
-- , _srs_answers
-- , _srs_corrections
-- , _srs_infoboxes
-- , _srs_suggestions :: [Text]
-- , _srs_unresponsive_engines :: [Text] }
$
(
deriveJSON
(
unPrefix
"_srs_"
)
''
S
earxResponse
)
triggerSearxSearch
::
(
MonadBase
IO
m
,
FlowCmdM
env
err
m
)
=>
CorpusId
->
API
.
Query
->
Lang
->
m
()
triggerSearxSearch
cid
q
l
=
do
printDebug
"[triggerSearxSearch] cid"
cid
printDebug
"[triggerSearxSearch] q"
q
printDebug
"[triggerSearxSearch] l"
l
cfg
<-
view
hasConfig
let
surl
=
_gc_frame_searx_url
cfg
printDebug
"[triggerSearxSearch] surl"
surl
res
<-
liftBase
$
do
manager
<-
newManager
tlsManagerSettings
req
<-
parseRequest
$
T
.
unpack
surl
let
request
=
urlEncodedBody
[
(
"category_general"
,
"1"
)
,
(
"q"
,
encodeUtf8
q
)
,
(
"pageno"
,
"1"
)
,
(
"time_range"
,
"None"
)
,
(
"language"
,
"en-US"
)
-- TODO
,
(
"format"
,
"json"
)]
req
httpLbs
request
manager
let
dec
=
Aeson
.
decode
$
responseBody
res
::
(
Maybe
SearxResponse
)
printDebug
"[triggerSearxSearch] dec"
dec
pure
()
src/Gargantext/API/Node/Corpus/Types.hs
0 → 100644
View file @
22e2da48
{-# LANGUAGE TemplateHaskell #-}
module
Gargantext.API.Node.Corpus.Types
where
import
Control.Lens
hiding
(
elements
,
Empty
)
import
Control.Monad.Fail
(
fail
)
import
Data.Aeson
import
Data.Aeson.TH
(
deriveJSON
)
import
Data.Monoid
(
mempty
)
import
Data.Swagger
import
Data.Text
(
Text
)
import
qualified
Data.Text
as
T
import
GHC.Generics
(
Generic
)
import
Text.Regex.TDFA
((
=~
))
import
Protolude
((
++
))
import
Gargantext.Prelude
import
qualified
Gargantext.API.Admin.Orchestrator.Types
as
T
import
Gargantext.Core.Utils.Prefix
(
unPrefix
)
import
Gargantext.Database.Action.Flow
(
DataOrigin
(
..
))
data
Database
=
Empty
|
PubMed
|
HAL
|
IsTex
|
Isidore
deriving
(
Eq
,
Show
,
Generic
)
deriveJSON
(
unPrefix
""
)
''
D
atabase
instance
ToSchema
Database
database2origin
::
Database
->
DataOrigin
database2origin
Empty
=
InternalOrigin
T
.
IsTex
database2origin
PubMed
=
ExternalOrigin
T
.
PubMed
database2origin
HAL
=
ExternalOrigin
T
.
HAL
database2origin
IsTex
=
ExternalOrigin
T
.
IsTex
database2origin
Isidore
=
ExternalOrigin
T
.
Isidore
------------------------------------------------------------------------
data
Datafield
=
Gargantext
|
External
(
Maybe
Database
)
|
Web
|
Files
deriving
(
Eq
,
Show
,
Generic
)
instance
FromJSON
Datafield
where
parseJSON
=
withText
"Datafield"
$
\
text
->
case
text
of
"Gargantext"
->
pure
Gargantext
"Web"
->
pure
Web
"Files"
->
pure
Files
v
->
let
(
preExternal
,
_
,
postExternal
)
=
v
=~
(
"External "
::
Text
)
::
(
Text
,
Text
,
Text
)
in
if
preExternal
==
""
then
do
db
<-
parseJSON
$
String
postExternal
pure
$
External
db
else
fail
$
"Cannot match patterh 'External <db>' for string "
++
(
T
.
unpack
v
)
instance
ToJSON
Datafield
where
toJSON
(
External
db
)
=
toJSON
$
"External "
++
(
show
db
)
toJSON
s
=
toJSON
$
show
s
instance
ToSchema
Datafield
where
declareNamedSchema
_
=
do
return
$
NamedSchema
(
Just
"Datafield"
)
$
mempty
&
type_
?~
SwaggerObject
src/Gargantext/Core/Text/Terms.hs
View file @
22e2da48
...
@@ -69,7 +69,7 @@ data TermType lang
...
@@ -69,7 +69,7 @@ data TermType lang
,
_tt_ngramsSize
::
!
Int
,
_tt_ngramsSize
::
!
Int
,
_tt_model
::
!
(
Maybe
(
Tries
Token
()
))
,
_tt_model
::
!
(
Maybe
(
Tries
Token
()
))
}
}
deriving
Generic
deriving
(
Generic
)
makeLenses
''
T
ermType
makeLenses
''
T
ermType
--group :: [Text] -> [Text]
--group :: [Text] -> [Text]
...
...
src/Gargantext/Data/HashMap/Strict/Utils.hs
View file @
22e2da48
...
@@ -23,9 +23,6 @@ partitionWithKey :: (Ord a, Hashable k) => (k -> a -> Bool) -> HashMap k a -> (H
...
@@ -23,9 +23,6 @@ partitionWithKey :: (Ord a, Hashable k) => (k -> a -> Bool) -> HashMap k a -> (H
partitionWithKey
p
m
=
(
HashMap
.
filterWithKey
p
m
,
HashMap
.
filterWithKey
(
\
k
->
not
.
p
k
)
m
)
partitionWithKey
p
m
=
(
HashMap
.
filterWithKey
p
m
,
HashMap
.
filterWithKey
(
\
k
->
not
.
p
k
)
m
)
mapKeys
::
(
Ord
k2
,
Hashable
k2
)
=>
(
k1
->
k2
)
->
HashMap
k1
a
->
HashMap
k2
a
mapKeys
f
=
HashMap
.
fromList
.
HashMap
.
foldrWithKey
(
\
k
x
xs
->
(
f
k
,
x
)
:
xs
)
[]
------------------------------------------------------------------------
------------------------------------------------------------------------
-- getKeyWithMaxValue :: Hashable k => HashMap k a -> Maybe k
-- getKeyWithMaxValue :: Hashable k => HashMap k a -> Maybe k
getKeysOrderedByValueMaxFirst
::
(
Ord
k
,
Hashable
k
,
Ord
a
)
=>
HashMap
k
a
->
[
k
]
getKeysOrderedByValueMaxFirst
::
(
Ord
k
,
Hashable
k
,
Ord
a
)
=>
HashMap
k
a
->
[
k
]
...
...
stack.yaml
View file @
22e2da48
resolver
:
resolver
:
url
:
https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/1
7/10
.yaml
url
:
https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/1
8/4
.yaml
flags
:
{}
flags
:
{}
extra-package-dbs
:
[]
extra-package-dbs
:
[]
packages
:
packages
:
...
@@ -94,6 +94,7 @@ extra-deps:
...
@@ -94,6 +94,7 @@ extra-deps:
-
json-stream-0.4.2.4@sha256:8b7f17d54a6e1e6311756270f8bcf51e91bab4300945400de66118470dcf51b9,4716
-
json-stream-0.4.2.4@sha256:8b7f17d54a6e1e6311756270f8bcf51e91bab4300945400de66118470dcf51b9,4716
-
located-base-0.1.1.1@sha256:7c6395f2b6fbf2d5f76c3514f774423838c0ea94e1c6a5530dd3c94b30c9d1c8,1904
-
located-base-0.1.1.1@sha256:7c6395f2b6fbf2d5f76c3514f774423838c0ea94e1c6a5530dd3c94b30c9d1c8,1904
-
logging-effect-1.3.12@sha256:72d168dd09887649ba9501627219b6027cbec2d5541931555b7885b133785ce3,1679
-
logging-effect-1.3.12@sha256:72d168dd09887649ba9501627219b6027cbec2d5541931555b7885b133785ce3,1679
-
MissingH-1.4.3.0@sha256:32f9892ec98cd21df4f4d3ed8d95a3831ae74287ea0641d6f09b2dc6ef061d39,4859
-
monoid-extras-0.5.1@sha256:438dbfd7b4dce47d8f0ca577f56caf94bd1e21391afa545cad09fe7cf2e5793d,2333
-
monoid-extras-0.5.1@sha256:438dbfd7b4dce47d8f0ca577f56caf94bd1e21391afa545cad09fe7cf2e5793d,2333
-
rake-0.0.1@sha256:3380f6567fb17505d1095b7f32222c0b631fa04126ad39726c84262da99c08b3,2025
-
rake-0.0.1@sha256:3380f6567fb17505d1095b7f32222c0b631fa04126ad39726c84262da99c08b3,2025
-
servant-cassava-0.10.1@sha256:07e7b6ca67cf57dcb4a0041a399a25d058844505837c6479e01d62be59d01fdf,1665
-
servant-cassava-0.10.1@sha256:07e7b6ca67cf57dcb4a0041a399a25d058844505837c6479e01d62be59d01fdf,1665
...
@@ -103,3 +104,4 @@ extra-deps:
...
@@ -103,3 +104,4 @@ extra-deps:
-
stemmer-0.5.2@sha256:823aec56249ec2619f60a2c0d1384b732894dbbbe642856d337ebfe9629a0efd,4082
-
stemmer-0.5.2@sha256:823aec56249ec2619f60a2c0d1384b732894dbbbe642856d337ebfe9629a0efd,4082
-
xmlbf-0.6.1@sha256:57867fcb39e0514d17b3328ff5de8d241a18482fc89bb742d9ed820a6a2a5187,1540
-
xmlbf-0.6.1@sha256:57867fcb39e0514d17b3328ff5de8d241a18482fc89bb742d9ed820a6a2a5187,1540
-
xmlbf-xeno-0.2@sha256:39f70fced6052524c290cf595f114661c721452e65fc3e0953a44e7682a6a6b0,950
-
xmlbf-xeno-0.2@sha256:39f70fced6052524c290cf595f114661c721452e65fc3e0953a44e7682a6a6b0,950
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment