Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
H
hal
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Przemyslaw Kaminski
hal
Commits
fe4e032c
Commit
fe4e032c
authored
Jun 13, 2023
by
Alfredo Di Napoli
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Initial cleanup
parent
7f209ce7
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
74 additions
and
27 deletions
+74
-27
HAL.hs
src/HAL.hs
+69
-24
Client.hs
src/HAL/Client.hs
+5
-3
No files found.
src/HAL.hs
View file @
fe4e032c
module
HAL
where
{-# LANGUAGE BangPatterns #-}
import
Conduit
import
Control.Monad
import
Data.Default
(
def
)
import
Data.Maybe
(
fromMaybe
)
import
Data.Either
(
fromRight
)
...
...
@@ -15,50 +18,92 @@ import HAL.Client
import
HAL.Doc.Corpus
import
HAL.Doc.Struct
import
Prelude
import
Servant.API
import
Data.Aeson
batchSize
::
Int
batchSize
=
1000
getMetadataWith
::
Text
->
Maybe
Int
->
Maybe
Integer
->
IO
(
Either
ClientError
(
Response
Corpus
))
getMetadataWith
q
start
rows
=
do
manager'
<-
newManager
tlsManagerSettings
runHalAPIClient
$
search
(
Just
requestedFields
)
[
q
]
Nothing
start
rows
getMetadataWithC
::
Text
->
Maybe
Int
->
Maybe
Integer
->
IO
(
Either
ClientError
(
Maybe
Integer
,
ConduitT
()
Corpus
IO
()
))
getMetadataWithC
q
start
rows
=
do
data
HalCrawlerOptions
=
HalCrawlerOptions
{
-- | If 'True', enable the debug logs to stdout.
_hco_debugLogs
::
!
Bool
,
_hco_batchSize
::
!
Int
}
defaultHalOptions
::
HalCrawlerOptions
defaultHalOptions
=
HalCrawlerOptions
{
_hco_debugLogs
=
False
,
_hco_batchSize
=
1000
}
getMetadataWithOptionsC
::
HalCrawlerOptions
-- ^ The options for the crawler
->
Text
-- ^ The textual query
->
Maybe
Int
-- ^ An optional offset
->
Maybe
Int
-- ^ An optional limit for the search, it influences the number of
-- rows returned.
->
IO
(
Either
ClientError
(
Maybe
Int
,
ConduitT
()
Corpus
IO
()
))
getMetadataWithOptionsC
opts
@
HalCrawlerOptions
{
..
}
q
mb_offset
mb_limit
=
do
manager'
<-
newManager
tlsManagerSettings
-- First, estimate the total number of documents
eCount
<-
countResults
q
pure
$
get'
q
start
rows
<$>
eCount
pure
$
get'
<$>
eCount
where
get'
::
Text
->
Maybe
Int
->
Maybe
Integer
->
Integer
->
(
Maybe
Integer
,
ConduitT
()
Corpus
IO
()
)
get'
q
start
rows
numFound
=
get'
::
Int
->
(
Maybe
Int
,
ConduitT
()
Corpus
IO
()
)
get'
numFound
=
(
Just
numResults
,
yieldMany
[
0
..
]
.|
takeC
(
fromInteger
numPages
)
.|
concatMapMC
(
getPage
q
start'
))
.|
takeC
numPages
.|
concatMapMC
(
getPage
q
offset
))
where
start'
=
fromMaybe
0
start
rows'
=
min
numFound
$
fromMaybe
numFound
rows
numResults
=
rows'
-
(
fromIntegral
start'
)
numPages
=
numResults
`
div
`
(
fromIntegral
batchSize
)
+
1
offset
=
fromMaybe
0
mb_offset
limit
=
min
numFound
$
fromMaybe
numFound
mb_limit
numResults
=
limit
-
offset
numPages
=
numResults
`
div
`
_hco_batchSize
+
1
debugLog
::
String
->
IO
()
debugLog
msg
=
when
_hco_debugLogs
$
putStrLn
msg
getPage
::
Text
->
Int
->
Int
->
IO
[
Corpus
]
getPage
q
start
pageNum
=
do
let
offset
=
start
+
pageNum
*
batchSize
putStrLn
$
"[getMetadataWithC] getPage: "
<>
show
offset
eRes
<-
runHalAPIClient
$
search
(
Just
requestedFields
)
[
q
]
Nothing
(
Just
offset
)
(
Just
$
fromIntegral
batchSize
)
let
offset
=
start
+
pageNum
*
_hco_
batchSize
debugLog
$
"[getMetadataWithC] getPage: "
<>
show
offset
eRes
<-
runHalAPIClient
$
search
(
Just
requestedFields
)
[
q
]
Nothing
(
Just
offset
)
(
Just
_hco_
batchSize
)
pure
$
case
eRes
of
Left
_
->
[]
Right
(
Response
{
_docs
})
->
_docs
printDoc
::
Corpus
->
IO
Corpus
printDoc
c
@
(
Corpus
{
_corpus_docid
,
_corpus_title
})
=
do
print
$
show
_corpus_title
debugLog
$
show
_corpus_title
pure
c
countResults
::
Text
->
IO
(
Either
ClientError
Integer
)
getMetadataWith
::
Text
-- ^ The query, as a text.
->
Maybe
Int
-- ^ The offset, it influences where the search starts.
->
Maybe
Int
-- ^ The offset, it influences how many rows are returned.
->
IO
(
Either
ClientError
(
Response
Corpus
))
getMetadataWith
q
start
rows
=
do
manager'
<-
newManager
tlsManagerSettings
runHalAPIClient
$
search
(
Just
requestedFields
)
[
q
]
Nothing
start
rows
getMetadataWithC
::
Text
-- ^ The textual query
->
Maybe
Int
-- ^ An optional offset for the search, it influences where to start.
->
Maybe
Int
-- ^ An optional limit for the search, it influences how many rows are
-- returned.
->
IO
(
Either
ClientError
(
Maybe
Int
,
ConduitT
()
Corpus
IO
()
))
getMetadataWithC
q
start
=
getMetadataWithOptionsC
defaultHalOptions
q
start
countResults
::
Text
->
IO
(
Either
ClientError
Int
)
countResults
q
=
do
manager'
<-
newManager
tlsManagerSettings
-- First, estimate the total number of documents
...
...
src/HAL/Client.hs
View file @
fe4e032c
...
...
@@ -11,6 +11,7 @@ import Data.Text
import
Data.Map
import
Data.Aeson
import
Data.Aeson.Types
(
typeMismatch
)
import
qualified
Data.ByteString.Lazy
as
BSL
import
qualified
Codec.Binary.UTF8.String
as
UTF
...
...
@@ -30,7 +31,7 @@ type Search doc = "search"
-- permit to start at the x result
:>
QueryParam
"start"
Int
-- use rows to make the request only return the x number of result
:>
QueryParam
"rows"
Int
eger
:>
QueryParam
"rows"
Int
:>
Get
'[
J
SON
]
(
Response
doc
)
type
Structure
doc
=
"ref"
:>
"structure"
...
...
@@ -55,7 +56,7 @@ desc = Just . Desc
-- Response type
data
Response
doc
=
Response
{
_numFound
::
Int
eger
{
_numFound
::
Int
,
_start
::
Int
,
_docs
::
[
doc
]
}
deriving
(
Show
,
Generic
)
...
...
@@ -66,6 +67,7 @@ instance FromJSON doc => FromJSON (Response doc) where
((
o
.:
"response"
)
>>=
(
.:
"numFound"
))
<*>
((
o
.:
"response"
)
>>=
(
.:
"start"
))
<*>
((
o
.:
"response"
)
>>=
(
.:
"docs"
))
parseJSON
ty
=
typeMismatch
"Hal Response"
ty
halAPI
::
Proxy
(
HALAPI
doc
)
halAPI
=
Proxy
...
...
@@ -76,7 +78,7 @@ search :: (FromJSON doc, ToHttpApiData doc) =>
->
[
Text
]
-- fq
->
Maybe
SortField
-- sort
->
Maybe
Int
-- start
->
Maybe
Int
eger
-- rows
->
Maybe
Int
-- rows
->
ClientM
(
Response
doc
)
structure
::
(
FromJSON
doc
,
ToHttpApiData
doc
)
=>
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment