[conduit] fixes to the searchAxv function to properly parse results

parent a2d78abe
......@@ -11,6 +11,3 @@ main = do
--let resC = searchAxv $ simpleQuery ["besov fluid"]
--runConduitRes $ resC .| outSnk
......@@ -16,8 +16,8 @@ import Network.HTTP.Types.Status
import Text.HTML.TagSoup
import Text.Read (readMaybe)
import qualified Conduit as C
import qualified Data.ByteString as B hiding (unpack)
import qualified Data.ByteString.Char8 as B (unpack)
import qualified Data.ByteString as B hiding (unpack)
import qualified Data.ByteString.Char8 as B (unpack)
import qualified Data.Conduit as C
import qualified Data.Conduit.List as CL
import qualified Network.Api.Arxiv as Ax
......@@ -27,7 +27,7 @@ type Limit = Int
type Soup = Tag String
batchSize :: Int
batchSize = 3000
batchSize = 400
------------------------------------------------------------
......@@ -59,7 +59,7 @@ simpleQuery xs = Ax.Query { Ax.qExp = Just $ Exp $ Abs xs
, Ax.qItems = batchSize }
complexQuery :: Ax.Query
complexQuery =
complexQuery =
let au = Exp $ Au ["Aaronson"]
t1 = Exp $ Ti ["quantum"]
t2 = Exp $ Ti ["complexity"]
......@@ -88,27 +88,23 @@ searchAxvBody q =
st -> error $ "Error:" ++ show st
searchAxv' :: (Monad m, C.MonadIO m) => Ax.Query -> IO (Int, C.ConduitT () Result m ())
searchAxv' q =
searchAxv' q =
let s = Ax.mkQuery q
in do rsp <- HT.httpBS =<< liftIO (parseRequest s)
case getResponseStatus rsp of
(Status 200 _) -> case getSoup (getResponseBody rsp) of
[] -> pure (0, C.yieldMany [])
[] -> pure (0, C.yieldMany [])
soups@(s:_) -> pure (Ax.totalResults soups, results q soups)
st -> error $ "Error:" ++ show st
searchAxv :: (Monad m, C.MonadIO m) => Ax.Query -> C.ConduitT () Result m ()
searchAxv q =
searchAxv q =
let s = Ax.mkQuery q
in do rsp <- HT.httpBS =<< liftIO (parseRequest s)
case getResponseStatus rsp of
(Status 200 _) ->
let body = getResponseBody rsp
in
-- If no mo results, break, otherwise this keeps looping for next page forever
case Ax.getEntry (getSoup body) of
([], _) -> C.yieldMany []
_ -> getSoupC body >>= results q
(Status 200 _) -> case getSoup (getResponseBody rsp) of
[] -> C.yieldMany []
soups@(s:_) -> results q soups
st -> error $ "Error:" ++ show st
----------------------------------------------------------------------
......@@ -136,8 +132,9 @@ toSoup = C.awaitForever (C.yield . parseTags . B.unpack)
----------------------------------------------------------------------
results :: (Monad m, C.MonadIO m) =>
Ax.Query -> [Soup] -> C.ConduitT () Result m ()
results q sp = Ax.forEachEntryM sp (C.yield . mkResult)
>> searchAxv (Ax.nextPage q)
results q sp = do
Ax.forEachEntryM sp (C.yield . mkResult)
searchAxv (Ax.nextPage q)
----------------------------------------------------------------------
-- Get data and format
......@@ -182,4 +179,3 @@ mkResult sp = let abstract = Ax.getSummary sp & clean'
----------------------------------------------------------------------
outSnk :: (Monad m, C.MonadIO m) => C.ConduitT Result C.Void m ()
outSnk = C.awaitForever (liftIO . putStrLn . show)
......@@ -69,7 +69,7 @@ where
import qualified Data.List.Split as S
import Control.Applicative ((<$>))
import Control.Monad (void)
------------------------------------------------------------------------
-- import Debug.Trace (trace)
------------------------------------------------------------------------
......@@ -88,7 +88,7 @@ where
-}
{- $RequestOv
Requests are URL parameters,
either \"search_query\" or \"id_list\".
This module provides functions
......@@ -96,13 +96,13 @@ where
to create the full request string
and to navigate through a multi-page request
with a maximum number of items per page.
For details of the Arxiv request format,
please refer to the Arxiv documentation.
-}
{- $ResponseOv
Response processing expects [Tag String] as input (see TagSoup).
The result produced by your http library
(such as http-conduit) must be converted to [Tag String]
......@@ -142,7 +142,7 @@ where
>
> main :: IO ()
> main = withSocketsDo (execQuery makeQuery)
>
>
> makeQuery :: Ax.Query
> makeQuery =
> let au = Exp $ Au ["Aaronson"]
......@@ -175,7 +175,7 @@ where
> ----------------------------------------------------------------------
> -- Consume page by page
> ----------------------------------------------------------------------
> getSoup :: MonadResource m =>
> getSoup :: MonadResource m =>
> B.ByteString -> C.ConduitT () String m [Soup]
> getSoup b = concat <$> (C.yield b .| toSoup .| CL.consume)
>
......@@ -195,7 +195,7 @@ where
> then C.yield ("EOT: " ++ show (Ax.totalResults sp) ++ " results")
> else Ax.forEachEntryM_ sp (C.yield . mkResult)
> >> searchAxv (Ax.nextPage q)
>
>
> ----------------------------------------------------------------------
> -- Get data and format
> ----------------------------------------------------------------------
......@@ -519,7 +519,7 @@ where
-- to first item and results per page:
--
-- * 'Int': Start index for this page
--
--
-- * 'Int': Number of results per page.
-------------------------------------------------------------------------
itemControl :: Int -> Int -> String
......@@ -621,7 +621,7 @@ where
forEachEntryM :: Monad m =>
[Tag String] -> ([Tag String] -> m r) -> m [r]
forEachEntryM = forEachM "entry"
------------------------------------------------------------------------
-- | Variant of 'forEachEntryM' for actions
-- that do not return a result.
......@@ -975,7 +975,7 @@ where
i <- fieldId
ts <- terms
return (Exp $ i ts)
------------------------------------------------------------------------
-- The field ids
------------------------------------------------------------------------
......@@ -1070,4 +1070,3 @@ where
op = try (void (string "ANDNOT") >> return AndNot)
<|> try (void (string "OR") >> return Or)
<|> (void (string "AND") >> return And)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment