[conduit] fixes to the searchAxv function to properly parse results

parent a2d78abe
...@@ -11,6 +11,3 @@ main = do ...@@ -11,6 +11,3 @@ main = do
--let resC = searchAxv $ simpleQuery ["besov fluid"] --let resC = searchAxv $ simpleQuery ["besov fluid"]
--runConduitRes $ resC .| outSnk --runConduitRes $ resC .| outSnk
...@@ -16,8 +16,8 @@ import Network.HTTP.Types.Status ...@@ -16,8 +16,8 @@ import Network.HTTP.Types.Status
import Text.HTML.TagSoup import Text.HTML.TagSoup
import Text.Read (readMaybe) import Text.Read (readMaybe)
import qualified Conduit as C import qualified Conduit as C
import qualified Data.ByteString as B hiding (unpack) import qualified Data.ByteString as B hiding (unpack)
import qualified Data.ByteString.Char8 as B (unpack) import qualified Data.ByteString.Char8 as B (unpack)
import qualified Data.Conduit as C import qualified Data.Conduit as C
import qualified Data.Conduit.List as CL import qualified Data.Conduit.List as CL
import qualified Network.Api.Arxiv as Ax import qualified Network.Api.Arxiv as Ax
...@@ -27,7 +27,7 @@ type Limit = Int ...@@ -27,7 +27,7 @@ type Limit = Int
type Soup = Tag String type Soup = Tag String
batchSize :: Int batchSize :: Int
batchSize = 3000 batchSize = 400
------------------------------------------------------------ ------------------------------------------------------------
...@@ -59,7 +59,7 @@ simpleQuery xs = Ax.Query { Ax.qExp = Just $ Exp $ Abs xs ...@@ -59,7 +59,7 @@ simpleQuery xs = Ax.Query { Ax.qExp = Just $ Exp $ Abs xs
, Ax.qItems = batchSize } , Ax.qItems = batchSize }
complexQuery :: Ax.Query complexQuery :: Ax.Query
complexQuery = complexQuery =
let au = Exp $ Au ["Aaronson"] let au = Exp $ Au ["Aaronson"]
t1 = Exp $ Ti ["quantum"] t1 = Exp $ Ti ["quantum"]
t2 = Exp $ Ti ["complexity"] t2 = Exp $ Ti ["complexity"]
...@@ -88,27 +88,23 @@ searchAxvBody q = ...@@ -88,27 +88,23 @@ searchAxvBody q =
st -> error $ "Error:" ++ show st st -> error $ "Error:" ++ show st
searchAxv' :: (Monad m, C.MonadIO m) => Ax.Query -> IO (Int, C.ConduitT () Result m ()) searchAxv' :: (Monad m, C.MonadIO m) => Ax.Query -> IO (Int, C.ConduitT () Result m ())
searchAxv' q = searchAxv' q =
let s = Ax.mkQuery q let s = Ax.mkQuery q
in do rsp <- HT.httpBS =<< liftIO (parseRequest s) in do rsp <- HT.httpBS =<< liftIO (parseRequest s)
case getResponseStatus rsp of case getResponseStatus rsp of
(Status 200 _) -> case getSoup (getResponseBody rsp) of (Status 200 _) -> case getSoup (getResponseBody rsp) of
[] -> pure (0, C.yieldMany []) [] -> pure (0, C.yieldMany [])
soups@(s:_) -> pure (Ax.totalResults soups, results q soups) soups@(s:_) -> pure (Ax.totalResults soups, results q soups)
st -> error $ "Error:" ++ show st st -> error $ "Error:" ++ show st
searchAxv :: (Monad m, C.MonadIO m) => Ax.Query -> C.ConduitT () Result m () searchAxv :: (Monad m, C.MonadIO m) => Ax.Query -> C.ConduitT () Result m ()
searchAxv q = searchAxv q =
let s = Ax.mkQuery q let s = Ax.mkQuery q
in do rsp <- HT.httpBS =<< liftIO (parseRequest s) in do rsp <- HT.httpBS =<< liftIO (parseRequest s)
case getResponseStatus rsp of case getResponseStatus rsp of
(Status 200 _) -> (Status 200 _) -> case getSoup (getResponseBody rsp) of
let body = getResponseBody rsp [] -> C.yieldMany []
in soups@(s:_) -> results q soups
-- If no mo results, break, otherwise this keeps looping for next page forever
case Ax.getEntry (getSoup body) of
([], _) -> C.yieldMany []
_ -> getSoupC body >>= results q
st -> error $ "Error:" ++ show st st -> error $ "Error:" ++ show st
---------------------------------------------------------------------- ----------------------------------------------------------------------
...@@ -136,8 +132,9 @@ toSoup = C.awaitForever (C.yield . parseTags . B.unpack) ...@@ -136,8 +132,9 @@ toSoup = C.awaitForever (C.yield . parseTags . B.unpack)
---------------------------------------------------------------------- ----------------------------------------------------------------------
results :: (Monad m, C.MonadIO m) => results :: (Monad m, C.MonadIO m) =>
Ax.Query -> [Soup] -> C.ConduitT () Result m () Ax.Query -> [Soup] -> C.ConduitT () Result m ()
results q sp = Ax.forEachEntryM sp (C.yield . mkResult) results q sp = do
>> searchAxv (Ax.nextPage q) Ax.forEachEntryM sp (C.yield . mkResult)
searchAxv (Ax.nextPage q)
---------------------------------------------------------------------- ----------------------------------------------------------------------
-- Get data and format -- Get data and format
...@@ -182,4 +179,3 @@ mkResult sp = let abstract = Ax.getSummary sp & clean' ...@@ -182,4 +179,3 @@ mkResult sp = let abstract = Ax.getSummary sp & clean'
---------------------------------------------------------------------- ----------------------------------------------------------------------
outSnk :: (Monad m, C.MonadIO m) => C.ConduitT Result C.Void m () outSnk :: (Monad m, C.MonadIO m) => C.ConduitT Result C.Void m ()
outSnk = C.awaitForever (liftIO . putStrLn . show) outSnk = C.awaitForever (liftIO . putStrLn . show)
...@@ -69,7 +69,7 @@ where ...@@ -69,7 +69,7 @@ where
import qualified Data.List.Split as S import qualified Data.List.Split as S
import Control.Applicative ((<$>)) import Control.Applicative ((<$>))
import Control.Monad (void) import Control.Monad (void)
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- import Debug.Trace (trace) -- import Debug.Trace (trace)
------------------------------------------------------------------------ ------------------------------------------------------------------------
...@@ -88,7 +88,7 @@ where ...@@ -88,7 +88,7 @@ where
-} -}
{- $RequestOv {- $RequestOv
Requests are URL parameters, Requests are URL parameters,
either \"search_query\" or \"id_list\". either \"search_query\" or \"id_list\".
This module provides functions This module provides functions
...@@ -96,13 +96,13 @@ where ...@@ -96,13 +96,13 @@ where
to create the full request string to create the full request string
and to navigate through a multi-page request and to navigate through a multi-page request
with a maximum number of items per page. with a maximum number of items per page.
For details of the Arxiv request format, For details of the Arxiv request format,
please refer to the Arxiv documentation. please refer to the Arxiv documentation.
-} -}
{- $ResponseOv {- $ResponseOv
Response processing expects [Tag String] as input (see TagSoup). Response processing expects [Tag String] as input (see TagSoup).
The result produced by your http library The result produced by your http library
(such as http-conduit) must be converted to [Tag String] (such as http-conduit) must be converted to [Tag String]
...@@ -142,7 +142,7 @@ where ...@@ -142,7 +142,7 @@ where
> >
> main :: IO () > main :: IO ()
> main = withSocketsDo (execQuery makeQuery) > main = withSocketsDo (execQuery makeQuery)
> >
> makeQuery :: Ax.Query > makeQuery :: Ax.Query
> makeQuery = > makeQuery =
> let au = Exp $ Au ["Aaronson"] > let au = Exp $ Au ["Aaronson"]
...@@ -175,7 +175,7 @@ where ...@@ -175,7 +175,7 @@ where
> ---------------------------------------------------------------------- > ----------------------------------------------------------------------
> -- Consume page by page > -- Consume page by page
> ---------------------------------------------------------------------- > ----------------------------------------------------------------------
> getSoup :: MonadResource m => > getSoup :: MonadResource m =>
> B.ByteString -> C.ConduitT () String m [Soup] > B.ByteString -> C.ConduitT () String m [Soup]
> getSoup b = concat <$> (C.yield b .| toSoup .| CL.consume) > getSoup b = concat <$> (C.yield b .| toSoup .| CL.consume)
> >
...@@ -195,7 +195,7 @@ where ...@@ -195,7 +195,7 @@ where
> then C.yield ("EOT: " ++ show (Ax.totalResults sp) ++ " results") > then C.yield ("EOT: " ++ show (Ax.totalResults sp) ++ " results")
> else Ax.forEachEntryM_ sp (C.yield . mkResult) > else Ax.forEachEntryM_ sp (C.yield . mkResult)
> >> searchAxv (Ax.nextPage q) > >> searchAxv (Ax.nextPage q)
> >
> ---------------------------------------------------------------------- > ----------------------------------------------------------------------
> -- Get data and format > -- Get data and format
> ---------------------------------------------------------------------- > ----------------------------------------------------------------------
...@@ -519,7 +519,7 @@ where ...@@ -519,7 +519,7 @@ where
-- to first item and results per page: -- to first item and results per page:
-- --
-- * 'Int': Start index for this page -- * 'Int': Start index for this page
-- --
-- * 'Int': Number of results per page. -- * 'Int': Number of results per page.
------------------------------------------------------------------------- -------------------------------------------------------------------------
itemControl :: Int -> Int -> String itemControl :: Int -> Int -> String
...@@ -621,7 +621,7 @@ where ...@@ -621,7 +621,7 @@ where
forEachEntryM :: Monad m => forEachEntryM :: Monad m =>
[Tag String] -> ([Tag String] -> m r) -> m [r] [Tag String] -> ([Tag String] -> m r) -> m [r]
forEachEntryM = forEachM "entry" forEachEntryM = forEachM "entry"
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | Variant of 'forEachEntryM' for actions -- | Variant of 'forEachEntryM' for actions
-- that do not return a result. -- that do not return a result.
...@@ -975,7 +975,7 @@ where ...@@ -975,7 +975,7 @@ where
i <- fieldId i <- fieldId
ts <- terms ts <- terms
return (Exp $ i ts) return (Exp $ i ts)
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- The field ids -- The field ids
------------------------------------------------------------------------ ------------------------------------------------------------------------
...@@ -1070,4 +1070,3 @@ where ...@@ -1070,4 +1070,3 @@ where
op = try (void (string "ANDNOT") >> return AndNot) op = try (void (string "ANDNOT") >> return AndNot)
<|> try (void (string "OR") >> return Or) <|> try (void (string "OR") >> return Or)
<|> (void (string "AND") >> return And) <|> (void (string "AND") >> return And)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment