Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
A
arxiv-api
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
crawlers
arxiv-api
Commits
2d7e5753
Verified
Commit
2d7e5753
authored
Feb 16, 2023
by
Przemyslaw Kaminski
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[conduit] fixes to the searchAxv function to properly parse results
parent
a2d78abe
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
23 additions
and
31 deletions
+23
-31
Main.hs
app/Main.hs
+0
-3
Arxiv.hs
src/Arxiv.hs
+13
-17
Arxiv.hs
src/Network/Api/Arxiv.hs
+10
-11
No files found.
app/Main.hs
View file @
2d7e5753
...
...
@@ -11,6 +11,3 @@ main = do
--let resC = searchAxv $ simpleQuery ["besov fluid"]
--runConduitRes $ resC .| outSnk
src/Arxiv.hs
View file @
2d7e5753
...
...
@@ -16,8 +16,8 @@ import Network.HTTP.Types.Status
import
Text.HTML.TagSoup
import
Text.Read
(
readMaybe
)
import
qualified
Conduit
as
C
import
qualified
Data.ByteString
as
B
hiding
(
unpack
)
import
qualified
Data.ByteString.Char8
as
B
(
unpack
)
import
qualified
Data.ByteString
as
B
hiding
(
unpack
)
import
qualified
Data.ByteString.Char8
as
B
(
unpack
)
import
qualified
Data.Conduit
as
C
import
qualified
Data.Conduit.List
as
CL
import
qualified
Network.Api.Arxiv
as
Ax
...
...
@@ -27,7 +27,7 @@ type Limit = Int
type
Soup
=
Tag
String
batchSize
::
Int
batchSize
=
30
00
batchSize
=
4
00
------------------------------------------------------------
...
...
@@ -59,7 +59,7 @@ simpleQuery xs = Ax.Query { Ax.qExp = Just $ Exp $ Abs xs
,
Ax
.
qItems
=
batchSize
}
complexQuery
::
Ax
.
Query
complexQuery
=
complexQuery
=
let
au
=
Exp
$
Au
[
"Aaronson"
]
t1
=
Exp
$
Ti
[
"quantum"
]
t2
=
Exp
$
Ti
[
"complexity"
]
...
...
@@ -88,27 +88,23 @@ searchAxvBody q =
st
->
error
$
"Error:"
++
show
st
searchAxv'
::
(
Monad
m
,
C
.
MonadIO
m
)
=>
Ax
.
Query
->
IO
(
Int
,
C
.
ConduitT
()
Result
m
()
)
searchAxv'
q
=
searchAxv'
q
=
let
s
=
Ax
.
mkQuery
q
in
do
rsp
<-
HT
.
httpBS
=<<
liftIO
(
parseRequest
s
)
case
getResponseStatus
rsp
of
(
Status
200
_
)
->
case
getSoup
(
getResponseBody
rsp
)
of
[]
->
pure
(
0
,
C
.
yieldMany
[]
)
[]
->
pure
(
0
,
C
.
yieldMany
[]
)
soups
@
(
s
:
_
)
->
pure
(
Ax
.
totalResults
soups
,
results
q
soups
)
st
->
error
$
"Error:"
++
show
st
searchAxv
::
(
Monad
m
,
C
.
MonadIO
m
)
=>
Ax
.
Query
->
C
.
ConduitT
()
Result
m
()
searchAxv
q
=
searchAxv
q
=
let
s
=
Ax
.
mkQuery
q
in
do
rsp
<-
HT
.
httpBS
=<<
liftIO
(
parseRequest
s
)
case
getResponseStatus
rsp
of
(
Status
200
_
)
->
let
body
=
getResponseBody
rsp
in
-- If no mo results, break, otherwise this keeps looping for next page forever
case
Ax
.
getEntry
(
getSoup
body
)
of
(
[]
,
_
)
->
C
.
yieldMany
[]
_
->
getSoupC
body
>>=
results
q
(
Status
200
_
)
->
case
getSoup
(
getResponseBody
rsp
)
of
[]
->
C
.
yieldMany
[]
soups
@
(
s
:
_
)
->
results
q
soups
st
->
error
$
"Error:"
++
show
st
----------------------------------------------------------------------
...
...
@@ -136,8 +132,9 @@ toSoup = C.awaitForever (C.yield . parseTags . B.unpack)
----------------------------------------------------------------------
results
::
(
Monad
m
,
C
.
MonadIO
m
)
=>
Ax
.
Query
->
[
Soup
]
->
C
.
ConduitT
()
Result
m
()
results
q
sp
=
Ax
.
forEachEntryM
sp
(
C
.
yield
.
mkResult
)
>>
searchAxv
(
Ax
.
nextPage
q
)
results
q
sp
=
do
Ax
.
forEachEntryM
sp
(
C
.
yield
.
mkResult
)
searchAxv
(
Ax
.
nextPage
q
)
----------------------------------------------------------------------
-- Get data and format
...
...
@@ -182,4 +179,3 @@ mkResult sp = let abstract = Ax.getSummary sp & clean'
----------------------------------------------------------------------
outSnk
::
(
Monad
m
,
C
.
MonadIO
m
)
=>
C
.
ConduitT
Result
C
.
Void
m
()
outSnk
=
C
.
awaitForever
(
liftIO
.
putStrLn
.
show
)
src/Network/Api/Arxiv.hs
View file @
2d7e5753
...
...
@@ -69,7 +69,7 @@ where
import
qualified
Data.List.Split
as
S
import
Control.Applicative
((
<$>
))
import
Control.Monad
(
void
)
------------------------------------------------------------------------
-- import Debug.Trace (trace)
------------------------------------------------------------------------
...
...
@@ -88,7 +88,7 @@ where
-}
{- $RequestOv
Requests are URL parameters,
either \"search_query\" or \"id_list\".
This module provides functions
...
...
@@ -96,13 +96,13 @@ where
to create the full request string
and to navigate through a multi-page request
with a maximum number of items per page.
For details of the Arxiv request format,
please refer to the Arxiv documentation.
-}
{- $ResponseOv
Response processing expects [Tag String] as input (see TagSoup).
The result produced by your http library
(such as http-conduit) must be converted to [Tag String]
...
...
@@ -142,7 +142,7 @@ where
>
> main :: IO ()
> main = withSocketsDo (execQuery makeQuery)
>
>
> makeQuery :: Ax.Query
> makeQuery =
> let au = Exp $ Au ["Aaronson"]
...
...
@@ -175,7 +175,7 @@ where
> ----------------------------------------------------------------------
> -- Consume page by page
> ----------------------------------------------------------------------
> getSoup :: MonadResource m =>
> getSoup :: MonadResource m =>
> B.ByteString -> C.ConduitT () String m [Soup]
> getSoup b = concat <$> (C.yield b .| toSoup .| CL.consume)
>
...
...
@@ -195,7 +195,7 @@ where
> then C.yield ("EOT: " ++ show (Ax.totalResults sp) ++ " results")
> else Ax.forEachEntryM_ sp (C.yield . mkResult)
> >> searchAxv (Ax.nextPage q)
>
>
> ----------------------------------------------------------------------
> -- Get data and format
> ----------------------------------------------------------------------
...
...
@@ -519,7 +519,7 @@ where
-- to first item and results per page:
--
-- * 'Int': Start index for this page
--
--
-- * 'Int': Number of results per page.
-------------------------------------------------------------------------
itemControl
::
Int
->
Int
->
String
...
...
@@ -621,7 +621,7 @@ where
forEachEntryM
::
Monad
m
=>
[
Tag
String
]
->
([
Tag
String
]
->
m
r
)
->
m
[
r
]
forEachEntryM
=
forEachM
"entry"
------------------------------------------------------------------------
-- | Variant of 'forEachEntryM' for actions
-- that do not return a result.
...
...
@@ -975,7 +975,7 @@ where
i
<-
fieldId
ts
<-
terms
return
(
Exp
$
i
ts
)
------------------------------------------------------------------------
-- The field ids
------------------------------------------------------------------------
...
...
@@ -1070,4 +1070,3 @@ where
op
=
try
(
void
(
string
"ANDNOT"
)
>>
return
AndNot
)
<|>
try
(
void
(
string
"OR"
)
>>
return
Or
)
<|>
(
void
(
string
"AND"
)
>>
return
And
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment