Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
A
arxiv-api
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
crawlers
arxiv-api
Commits
2d7e5753
Verified
Commit
2d7e5753
authored
Feb 16, 2023
by
Przemyslaw Kaminski
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[conduit] fixes to the searchAxv function to properly parse results
parent
a2d78abe
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
23 additions
and
31 deletions
+23
-31
Main.hs
app/Main.hs
+0
-3
Arxiv.hs
src/Arxiv.hs
+13
-17
Arxiv.hs
src/Network/Api/Arxiv.hs
+10
-11
No files found.
app/Main.hs
View file @
2d7e5753
...
@@ -11,6 +11,3 @@ main = do
...
@@ -11,6 +11,3 @@ main = do
--let resC = searchAxv $ simpleQuery ["besov fluid"]
--let resC = searchAxv $ simpleQuery ["besov fluid"]
--runConduitRes $ resC .| outSnk
--runConduitRes $ resC .| outSnk
src/Arxiv.hs
View file @
2d7e5753
...
@@ -16,8 +16,8 @@ import Network.HTTP.Types.Status
...
@@ -16,8 +16,8 @@ import Network.HTTP.Types.Status
import
Text.HTML.TagSoup
import
Text.HTML.TagSoup
import
Text.Read
(
readMaybe
)
import
Text.Read
(
readMaybe
)
import
qualified
Conduit
as
C
import
qualified
Conduit
as
C
import
qualified
Data.ByteString
as
B
hiding
(
unpack
)
import
qualified
Data.ByteString
as
B
hiding
(
unpack
)
import
qualified
Data.ByteString.Char8
as
B
(
unpack
)
import
qualified
Data.ByteString.Char8
as
B
(
unpack
)
import
qualified
Data.Conduit
as
C
import
qualified
Data.Conduit
as
C
import
qualified
Data.Conduit.List
as
CL
import
qualified
Data.Conduit.List
as
CL
import
qualified
Network.Api.Arxiv
as
Ax
import
qualified
Network.Api.Arxiv
as
Ax
...
@@ -27,7 +27,7 @@ type Limit = Int
...
@@ -27,7 +27,7 @@ type Limit = Int
type
Soup
=
Tag
String
type
Soup
=
Tag
String
batchSize
::
Int
batchSize
::
Int
batchSize
=
30
00
batchSize
=
4
00
------------------------------------------------------------
------------------------------------------------------------
...
@@ -59,7 +59,7 @@ simpleQuery xs = Ax.Query { Ax.qExp = Just $ Exp $ Abs xs
...
@@ -59,7 +59,7 @@ simpleQuery xs = Ax.Query { Ax.qExp = Just $ Exp $ Abs xs
,
Ax
.
qItems
=
batchSize
}
,
Ax
.
qItems
=
batchSize
}
complexQuery
::
Ax
.
Query
complexQuery
::
Ax
.
Query
complexQuery
=
complexQuery
=
let
au
=
Exp
$
Au
[
"Aaronson"
]
let
au
=
Exp
$
Au
[
"Aaronson"
]
t1
=
Exp
$
Ti
[
"quantum"
]
t1
=
Exp
$
Ti
[
"quantum"
]
t2
=
Exp
$
Ti
[
"complexity"
]
t2
=
Exp
$
Ti
[
"complexity"
]
...
@@ -88,27 +88,23 @@ searchAxvBody q =
...
@@ -88,27 +88,23 @@ searchAxvBody q =
st
->
error
$
"Error:"
++
show
st
st
->
error
$
"Error:"
++
show
st
searchAxv'
::
(
Monad
m
,
C
.
MonadIO
m
)
=>
Ax
.
Query
->
IO
(
Int
,
C
.
ConduitT
()
Result
m
()
)
searchAxv'
::
(
Monad
m
,
C
.
MonadIO
m
)
=>
Ax
.
Query
->
IO
(
Int
,
C
.
ConduitT
()
Result
m
()
)
searchAxv'
q
=
searchAxv'
q
=
let
s
=
Ax
.
mkQuery
q
let
s
=
Ax
.
mkQuery
q
in
do
rsp
<-
HT
.
httpBS
=<<
liftIO
(
parseRequest
s
)
in
do
rsp
<-
HT
.
httpBS
=<<
liftIO
(
parseRequest
s
)
case
getResponseStatus
rsp
of
case
getResponseStatus
rsp
of
(
Status
200
_
)
->
case
getSoup
(
getResponseBody
rsp
)
of
(
Status
200
_
)
->
case
getSoup
(
getResponseBody
rsp
)
of
[]
->
pure
(
0
,
C
.
yieldMany
[]
)
[]
->
pure
(
0
,
C
.
yieldMany
[]
)
soups
@
(
s
:
_
)
->
pure
(
Ax
.
totalResults
soups
,
results
q
soups
)
soups
@
(
s
:
_
)
->
pure
(
Ax
.
totalResults
soups
,
results
q
soups
)
st
->
error
$
"Error:"
++
show
st
st
->
error
$
"Error:"
++
show
st
searchAxv
::
(
Monad
m
,
C
.
MonadIO
m
)
=>
Ax
.
Query
->
C
.
ConduitT
()
Result
m
()
searchAxv
::
(
Monad
m
,
C
.
MonadIO
m
)
=>
Ax
.
Query
->
C
.
ConduitT
()
Result
m
()
searchAxv
q
=
searchAxv
q
=
let
s
=
Ax
.
mkQuery
q
let
s
=
Ax
.
mkQuery
q
in
do
rsp
<-
HT
.
httpBS
=<<
liftIO
(
parseRequest
s
)
in
do
rsp
<-
HT
.
httpBS
=<<
liftIO
(
parseRequest
s
)
case
getResponseStatus
rsp
of
case
getResponseStatus
rsp
of
(
Status
200
_
)
->
(
Status
200
_
)
->
case
getSoup
(
getResponseBody
rsp
)
of
let
body
=
getResponseBody
rsp
[]
->
C
.
yieldMany
[]
in
soups
@
(
s
:
_
)
->
results
q
soups
-- If no mo results, break, otherwise this keeps looping for next page forever
case
Ax
.
getEntry
(
getSoup
body
)
of
(
[]
,
_
)
->
C
.
yieldMany
[]
_
->
getSoupC
body
>>=
results
q
st
->
error
$
"Error:"
++
show
st
st
->
error
$
"Error:"
++
show
st
----------------------------------------------------------------------
----------------------------------------------------------------------
...
@@ -136,8 +132,9 @@ toSoup = C.awaitForever (C.yield . parseTags . B.unpack)
...
@@ -136,8 +132,9 @@ toSoup = C.awaitForever (C.yield . parseTags . B.unpack)
----------------------------------------------------------------------
----------------------------------------------------------------------
results
::
(
Monad
m
,
C
.
MonadIO
m
)
=>
results
::
(
Monad
m
,
C
.
MonadIO
m
)
=>
Ax
.
Query
->
[
Soup
]
->
C
.
ConduitT
()
Result
m
()
Ax
.
Query
->
[
Soup
]
->
C
.
ConduitT
()
Result
m
()
results
q
sp
=
Ax
.
forEachEntryM
sp
(
C
.
yield
.
mkResult
)
results
q
sp
=
do
>>
searchAxv
(
Ax
.
nextPage
q
)
Ax
.
forEachEntryM
sp
(
C
.
yield
.
mkResult
)
searchAxv
(
Ax
.
nextPage
q
)
----------------------------------------------------------------------
----------------------------------------------------------------------
-- Get data and format
-- Get data and format
...
@@ -182,4 +179,3 @@ mkResult sp = let abstract = Ax.getSummary sp & clean'
...
@@ -182,4 +179,3 @@ mkResult sp = let abstract = Ax.getSummary sp & clean'
----------------------------------------------------------------------
----------------------------------------------------------------------
outSnk
::
(
Monad
m
,
C
.
MonadIO
m
)
=>
C
.
ConduitT
Result
C
.
Void
m
()
outSnk
::
(
Monad
m
,
C
.
MonadIO
m
)
=>
C
.
ConduitT
Result
C
.
Void
m
()
outSnk
=
C
.
awaitForever
(
liftIO
.
putStrLn
.
show
)
outSnk
=
C
.
awaitForever
(
liftIO
.
putStrLn
.
show
)
src/Network/Api/Arxiv.hs
View file @
2d7e5753
...
@@ -69,7 +69,7 @@ where
...
@@ -69,7 +69,7 @@ where
import
qualified
Data.List.Split
as
S
import
qualified
Data.List.Split
as
S
import
Control.Applicative
((
<$>
))
import
Control.Applicative
((
<$>
))
import
Control.Monad
(
void
)
import
Control.Monad
(
void
)
------------------------------------------------------------------------
------------------------------------------------------------------------
-- import Debug.Trace (trace)
-- import Debug.Trace (trace)
------------------------------------------------------------------------
------------------------------------------------------------------------
...
@@ -88,7 +88,7 @@ where
...
@@ -88,7 +88,7 @@ where
-}
-}
{- $RequestOv
{- $RequestOv
Requests are URL parameters,
Requests are URL parameters,
either \"search_query\" or \"id_list\".
either \"search_query\" or \"id_list\".
This module provides functions
This module provides functions
...
@@ -96,13 +96,13 @@ where
...
@@ -96,13 +96,13 @@ where
to create the full request string
to create the full request string
and to navigate through a multi-page request
and to navigate through a multi-page request
with a maximum number of items per page.
with a maximum number of items per page.
For details of the Arxiv request format,
For details of the Arxiv request format,
please refer to the Arxiv documentation.
please refer to the Arxiv documentation.
-}
-}
{- $ResponseOv
{- $ResponseOv
Response processing expects [Tag String] as input (see TagSoup).
Response processing expects [Tag String] as input (see TagSoup).
The result produced by your http library
The result produced by your http library
(such as http-conduit) must be converted to [Tag String]
(such as http-conduit) must be converted to [Tag String]
...
@@ -142,7 +142,7 @@ where
...
@@ -142,7 +142,7 @@ where
>
>
> main :: IO ()
> main :: IO ()
> main = withSocketsDo (execQuery makeQuery)
> main = withSocketsDo (execQuery makeQuery)
>
>
> makeQuery :: Ax.Query
> makeQuery :: Ax.Query
> makeQuery =
> makeQuery =
> let au = Exp $ Au ["Aaronson"]
> let au = Exp $ Au ["Aaronson"]
...
@@ -175,7 +175,7 @@ where
...
@@ -175,7 +175,7 @@ where
> ----------------------------------------------------------------------
> ----------------------------------------------------------------------
> -- Consume page by page
> -- Consume page by page
> ----------------------------------------------------------------------
> ----------------------------------------------------------------------
> getSoup :: MonadResource m =>
> getSoup :: MonadResource m =>
> B.ByteString -> C.ConduitT () String m [Soup]
> B.ByteString -> C.ConduitT () String m [Soup]
> getSoup b = concat <$> (C.yield b .| toSoup .| CL.consume)
> getSoup b = concat <$> (C.yield b .| toSoup .| CL.consume)
>
>
...
@@ -195,7 +195,7 @@ where
...
@@ -195,7 +195,7 @@ where
> then C.yield ("EOT: " ++ show (Ax.totalResults sp) ++ " results")
> then C.yield ("EOT: " ++ show (Ax.totalResults sp) ++ " results")
> else Ax.forEachEntryM_ sp (C.yield . mkResult)
> else Ax.forEachEntryM_ sp (C.yield . mkResult)
> >> searchAxv (Ax.nextPage q)
> >> searchAxv (Ax.nextPage q)
>
>
> ----------------------------------------------------------------------
> ----------------------------------------------------------------------
> -- Get data and format
> -- Get data and format
> ----------------------------------------------------------------------
> ----------------------------------------------------------------------
...
@@ -519,7 +519,7 @@ where
...
@@ -519,7 +519,7 @@ where
-- to first item and results per page:
-- to first item and results per page:
--
--
-- * 'Int': Start index for this page
-- * 'Int': Start index for this page
--
--
-- * 'Int': Number of results per page.
-- * 'Int': Number of results per page.
-------------------------------------------------------------------------
-------------------------------------------------------------------------
itemControl
::
Int
->
Int
->
String
itemControl
::
Int
->
Int
->
String
...
@@ -621,7 +621,7 @@ where
...
@@ -621,7 +621,7 @@ where
forEachEntryM
::
Monad
m
=>
forEachEntryM
::
Monad
m
=>
[
Tag
String
]
->
([
Tag
String
]
->
m
r
)
->
m
[
r
]
[
Tag
String
]
->
([
Tag
String
]
->
m
r
)
->
m
[
r
]
forEachEntryM
=
forEachM
"entry"
forEachEntryM
=
forEachM
"entry"
------------------------------------------------------------------------
------------------------------------------------------------------------
-- | Variant of 'forEachEntryM' for actions
-- | Variant of 'forEachEntryM' for actions
-- that do not return a result.
-- that do not return a result.
...
@@ -975,7 +975,7 @@ where
...
@@ -975,7 +975,7 @@ where
i
<-
fieldId
i
<-
fieldId
ts
<-
terms
ts
<-
terms
return
(
Exp
$
i
ts
)
return
(
Exp
$
i
ts
)
------------------------------------------------------------------------
------------------------------------------------------------------------
-- The field ids
-- The field ids
------------------------------------------------------------------------
------------------------------------------------------------------------
...
@@ -1070,4 +1070,3 @@ where
...
@@ -1070,4 +1070,3 @@ where
op
=
try
(
void
(
string
"ANDNOT"
)
>>
return
AndNot
)
op
=
try
(
void
(
string
"ANDNOT"
)
>>
return
AndNot
)
<|>
try
(
void
(
string
"OR"
)
>>
return
Or
)
<|>
try
(
void
(
string
"OR"
)
>>
return
Or
)
<|>
(
void
(
string
"AND"
)
>>
return
And
)
<|>
(
void
(
string
"AND"
)
>>
return
And
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment