Commit 59c82079 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[WIP] first function to get Some [Result]

parent 91bce820
module Main where module Main where
import qualified Network.Api.Arxiv as Ax
import Network.Api.Arxiv (Expression(..),
Field(..), (/*/), (/+/),(/-/))
import Network.Socket (withSocketsDo)
import Network.HTTP.Simple as HT
import Network.HTTP.Conduit (parseRequest)
import Network.HTTP.Types.Status
import Data.List (intercalate)
import qualified Data.ByteString as B hiding (unpack)
import qualified Data.ByteString.Char8 as B (unpack)
import Data.Conduit ((.|))
import qualified Data.Conduit as C
import qualified Data.Conduit.List as CL
import Data.Function ((&))
import Text.HTML.TagSoup
import Control.Monad.Trans (liftIO)
import Control.Monad.Trans.Resource (MonadResource)
import Control.Applicative ((<$>))
main :: IO () main :: IO ()
main = undefined main = withSocketsDo (execQuery makeQuery)
makeQuery :: Ax.Query
makeQuery =
let au = Exp $ Au ["Aaronson"]
t1 = Exp $ Ti ["quantum"]
t2 = Exp $ Ti ["complexity"]
x = au /*/ (t1 /+/ t2)
in Ax.Query {
Ax.qExp = Just x,
Ax.qIds = [],
Ax.qStart = 0,
Ax.qItems = 25}
type Soup = Tag String
execQuery :: Ax.Query -> IO ()
execQuery q = C.runConduitRes (searchAxv q .| outSnk)
----------------------------------------------------------------------
-- Execute query and start a source
----------------------------------------------------------------------
searchAxv :: MonadResource m => Ax.Query -> C.ConduitT () String m ()
searchAxv q =
let s = Ax.mkQuery q
in do rsp <- HT.httpBS =<< liftIO (parseRequest s)
case getResponseStatus rsp of
(Status 200 _) -> getSoup (getResponseBody rsp)
>>= results q
st -> error $ "Error:" ++ show st
----------------------------------------------------------------------
-- Consume page by page
----------------------------------------------------------------------
getSoup :: MonadResource m =>
B.ByteString -> C.ConduitT () String m [Soup]
getSoup b = concat <$> (C.yield b .| toSoup .| CL.consume)
----------------------------------------------------------------------
-- Receive a ByteString and yield Soup
----------------------------------------------------------------------
toSoup :: MonadResource m => C.ConduitT B.ByteString [Soup] m ()
toSoup = C.awaitForever (C.yield . parseTags . B.unpack)
----------------------------------------------------------------------
-- Yield all entries and fetch next page
----------------------------------------------------------------------
results :: MonadResource m =>
Ax.Query -> [Soup] -> C.ConduitT () String m ()
results q sp =
if Ax.exhausted sp
then C.yield ("EOT: " ++ show (Ax.totalResults sp) ++ " results")
else Ax.forEachEntryM_ sp (C.yield . mkResult)
>> searchAxv (Ax.nextPage q)
----------------------------------------------------------------------
-- Get data and format
----------------------------------------------------------------------
mkResult :: [Soup] -> String
mkResult sp = let aus = Ax.getAuthorNames sp
y = Ax.getYear sp
tmp = Ax.getTitle sp & clean ['\n', '\r', '\t']
ti = if null tmp then "No title" else tmp
in intercalate ", " aus ++ " (" ++ y ++ "): " ++ ti
where clean _ [] = []
clean d (c:cs) | c `elem` d = clean d cs
| otherwise = c:clean d cs
----------------------------------------------------------------------
-- Sink results
----------------------------------------------------------------------
outSnk :: MonadResource m => C.ConduitT String C.Void m ()
outSnk = C.awaitForever (liftIO . putStrLn)
cabal-version: 1.12 cabal-version: 1.12
-- This file has been generated from package.yaml by hpack version 0.33.0. -- This file has been generated from package.yaml by hpack version 0.34.4.
-- --
-- see: https://github.com/sol/hpack -- see: https://github.com/sol/hpack
-- --
-- hash: 4117de9e11172de2f2e05b5376a7917e5f32b501a9e1d8834ce2c0ff93e586d3 -- hash: 53def8dbc0673724afe7e2e7c3bc9611a27b13ca3997bcdceebf998e5c04082d
name: arxiv-api name: arxiv-api
version: 0.1.0.0 version: 0.1.0.0
description: Please see the README on GitHub at <https://github.com/githubuser/arxiv#readme> description: Please see the README on GitHub at <https://github.com/githubuser/arxiv#readme>
homepage: https://github.com/#readme homepage: https://github.com/delanoe/arxiv-api#readme
bug-reports: https://github.com//issues bug-reports: https://github.com/delanoe/arxiv-api/issues
author: Alexandre Delanoë author: Alexandre Delanoë
maintainer: alexandre+dev@delanoe.org maintainer: alexandre+dev@delanoe.org
copyright: 2021 CNRS/A. Delanoë copyright: 2021 CNRS/A. Delanoë
...@@ -23,11 +23,11 @@ extra-source-files: ...@@ -23,11 +23,11 @@ extra-source-files:
source-repository head source-repository head
type: git type: git
location: https://github.com/ location: https://github.com/delanoe/arxiv-api
library library
exposed-modules: exposed-modules:
Lib Arxiv
other-modules: other-modules:
Paths_arxiv_api Paths_arxiv_api
hs-source-dirs: hs-source-dirs:
......
name: arxiv-api name: arxiv-api
version: 0.1.0.0 version: 0.1.0.0
github: "" github: "delanoe/arxiv-api"
license: GPLv3 license: GPLv3
author: "Alexandre Delanoë" author: "Alexandre Delanoë"
maintainer: "alexandre+dev@delanoe.org" maintainer: "alexandre+dev@delanoe.org"
......
module Lib where module Arxiv where
import Data.Text (Text)
import qualified Network.Api.Arxiv as Ax import qualified Network.Api.Arxiv as Ax
import Network.Api.Arxiv (Expression(..), import Network.Api.Arxiv (Expression(..),
Field(..), (/*/), (/+/)) Field(..), (/*/), (/+/),(/-/))
import Network.Socket (withSocketsDo) import Network.Socket (withSocketsDo)
import Network.HTTP.Simple as HT import Network.HTTP.Simple as HT
import Network.HTTP.Conduit (parseRequest) import Network.HTTP.Conduit (parseRequest)
...@@ -21,59 +20,47 @@ import Control.Monad.Trans.Resource (MonadResource) ...@@ -21,59 +20,47 @@ import Control.Monad.Trans.Resource (MonadResource)
import Control.Applicative ((<$>)) import Control.Applicative ((<$>))
data ArxivDocument = main' :: IO [Result]
ArxivDocument { title :: Text main' = withSocketsDo (execQuery' makeQuery)
, date_update :: Text
, date_published :: Text
, date_year :: Text
, summary :: Text
, comment :: Text
, journal :: Text
, doi :: Text
-- , links :: [Link]
, pdfLink :: Maybe Text
}
search :: IO ()
search = withSocketsDo (execQuery makeQuery)
makeQuery :: Ax.Query makeQuery :: Ax.Query
makeQuery = makeQuery =
let au = Exp $ Au ["Aaronson"] let au = Exp $ Au ["Aaronson"]
t1 = Exp $ Ti ["quantum"] t1 = Exp $ Ti ["quantum"]
t2 = Exp $ Ti ["complexity"] t2 = Exp $ Ti ["complexity"]
x = au /*/ (t1 /+/ t2) x = au /*/ (t1 /+/ t2)
x' = Exp (Abs ["clustering"]) /*/ Exp (Abs ["louvain"])
in Ax.Query { in Ax.Query {
Ax.qExp = Just x', Ax.qExp = Just $ Exp $ All ["nuclear fusion"],
Ax.qIds = [], Ax.qIds = [],
Ax.qStart = 0, Ax.qStart = 500,
Ax.qItems = 25} Ax.qItems = 25}
type Soup = Tag String type Soup = Tag String
execQuery :: Ax.Query -> IO () execQuery' :: Ax.Query -> IO [Result]
execQuery q = C.runConduitRes (searchAxv q .| outSnk) execQuery' q = C.runConduitRes (searchAxv' q .| CL.consume) -- .| outSnk')
---------------------------------------------------------------------- ----------------------------------------------------------------------
-- Execute query and start a source -- Execute query and start a source
---------------------------------------------------------------------- ----------------------------------------------------------------------
searchAxv :: MonadResource m => Ax.Query -> C.ConduitT () String m () searchAxv' :: MonadResource m => Ax.Query -> C.ConduitT () Result m ()
searchAxv q = searchAxv' q =
let s = Ax.mkQuery q let s = Ax.mkQuery q
in do rsp <- HT.httpBS =<< liftIO (parseRequest s) in do rsp <- HT.httpBS =<< liftIO (parseRequest s)
case getResponseStatus rsp of case getResponseStatus rsp of
(Status 200 _) -> getSoup (getResponseBody rsp) (Status 200 _) -> getSoup' (getResponseBody rsp) >>= results' q
>>= results q
st -> error $ "Error:" ++ show st st -> error $ "Error:" ++ show st
---------------------------------------------------------------------- ----------------------------------------------------------------------
-- Consume page by page -- Consume page by page
---------------------------------------------------------------------- ----------------------------------------------------------------------
getSoup :: MonadResource m => getSoup' :: MonadResource m =>
B.ByteString -> C.ConduitT () String m [Soup] B.ByteString -> C.ConduitT () Result m [Soup]
getSoup b = concat <$> (C.yield b .| toSoup .| CL.consume) getSoup' b = concat <$> (C.yield b .| toSoup .| CL.consume)
---------------------------------------------------------------------- ----------------------------------------------------------------------
-- Receive a ByteString and yield Soup -- Receive a ByteString and yield Soup
...@@ -84,30 +71,39 @@ toSoup = C.awaitForever (C.yield . parseTags . B.unpack) ...@@ -84,30 +71,39 @@ toSoup = C.awaitForever (C.yield . parseTags . B.unpack)
---------------------------------------------------------------------- ----------------------------------------------------------------------
-- Yield all entries and fetch next page -- Yield all entries and fetch next page
---------------------------------------------------------------------- ----------------------------------------------------------------------
results :: MonadResource m => results' :: MonadResource m =>
Ax.Query -> [Soup] -> C.ConduitT () String m () Ax.Query -> [Soup] -> C.ConduitT () Result m ()
results q sp = results' q sp =
if Ax.exhausted sp Ax.forEachEntryM sp (C.yield . mkResult')
then C.yield ("EOT: " ++ show (Ax.totalResults sp) ++ " results") >> searchAxv' (Ax.nextPage q)
else Ax.forEachEntryM_ sp (C.yield . mkResult)
>> searchAxv (Ax.nextPage q)
---------------------------------------------------------------------- ----------------------------------------------------------------------
-- Get data and format -- Get data and format
---------------------------------------------------------------------- ----------------------------------------------------------------------
mkResult :: [Soup] -> String data Result = Result { authorNames :: String
mkResult sp = let aus = Ax.getAuthorNames sp , year :: String
y = Ax.getYear sp , title :: String
tmp = Ax.getTitle sp & clean ['\n', '\r', '\t'] }
ti = if null tmp then "No title" else tmp
in intercalate ", " aus ++ " (" ++ y ++ "): " ++ ti mkResult' :: [Soup] -> Result
mkResult' sp = let aus = Ax.getAuthorNames sp
y = Ax.getYear sp
tmp = Ax.getTitle sp & clean ['\n', '\r', '\t']
ti = if null tmp then "No title" else tmp
in Result (intercalate " " aus) y ti
where clean _ [] = [] where clean _ [] = []
clean d (c:cs) | c `elem` d = clean d cs clean d (c:cs) | c `elem` d = clean d cs
| otherwise = c:clean d cs | otherwise = c:clean d cs
---------------------------------------------------------------------- ----------------------------------------------------------------------
-- Sink results -- Sink results
---------------------------------------------------------------------- ----------------------------------------------------------------------
outSnk :: MonadResource m => C.ConduitT String C.Void m () outSnk :: MonadResource m => C.ConduitT String C.Void m ()
outSnk = C.awaitForever (liftIO . putStrLn) outSnk = C.awaitForever (liftIO . putStrLn)
{-
outSnk' :: MonadResource m => C.ConduitT Result Result m () -- [Result]
outSnk' = C.awaitForever (CL.consume) -- pure
-}
...@@ -18,7 +18,7 @@ ...@@ -18,7 +18,7 @@
# resolver: ./custom-snapshot.yaml # resolver: ./custom-snapshot.yaml
# resolver: https://example.com/snapshots/2018-01-01.yaml # resolver: https://example.com/snapshots/2018-01-01.yaml
resolver: resolver:
url: https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/17/2.yaml url: https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/18/18.yaml
# User packages to be built. # User packages to be built.
# Various formats can be used as shown in the example below. # Various formats can be used as shown in the example below.
...@@ -41,8 +41,7 @@ packages: ...@@ -41,8 +41,7 @@ packages:
# commit: e7b331f14bcffb8367cd58fbfc8b40ec7642100a # commit: e7b331f14bcffb8367cd58fbfc8b40ec7642100a
# #
extra-deps: extra-deps:
- arxiv-0.0.2@sha256:2e9299c132af5838a756c3e4eaf879e0a0cfa9a2c7e2925ad2936df7d37bc993,1587 - arxiv-0.0.3@sha256:02de1114091d11f1f3ab401d104d125ad4301260806feb7f63b3dcefc7db88cf,1588
# Override default flag values for local packages and extra-deps # Override default flag values for local packages and extra-deps
# flags: {} # flags: {}
......
...@@ -5,16 +5,16 @@ ...@@ -5,16 +5,16 @@
packages: packages:
- completed: - completed:
hackage: arxiv-0.0.2@sha256:2e9299c132af5838a756c3e4eaf879e0a0cfa9a2c7e2925ad2936df7d37bc993,1587 hackage: arxiv-0.0.3@sha256:02de1114091d11f1f3ab401d104d125ad4301260806feb7f63b3dcefc7db88cf,1588
pantry-tree: pantry-tree:
size: 280 size: 283
sha256: 0a55ee0f4cb4337e0c8eea362b8895fe647493c53db76ed4d160589b79592fb4 sha256: 97318cdbfc5426addee56911001caa7948c5379556e873c32f50e37d6c1f970c
original: original:
hackage: arxiv-0.0.2@sha256:2e9299c132af5838a756c3e4eaf879e0a0cfa9a2c7e2925ad2936df7d37bc993,1587 hackage: arxiv-0.0.3@sha256:02de1114091d11f1f3ab401d104d125ad4301260806feb7f63b3dcefc7db88cf,1588
snapshots: snapshots:
- completed: - completed:
size: 563099 size: 586296
url: https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/17/2.yaml url: https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/18/18.yaml
sha256: 92b1a17e31d0a978fca4bf270305d4d1dd8092271bf60eafbc9349c890854026 sha256: 63539429076b7ebbab6daa7656cfb079393bf644971156dc349d7c0453694ac2
original: original:
url: https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/17/2.yaml url: https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/18/18.yaml
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment