Commit 59c82079 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[WIP] first function to get Some [Result]

parent 91bce820
module Main where
import qualified Network.Api.Arxiv as Ax
import Network.Api.Arxiv (Expression(..),
Field(..), (/*/), (/+/),(/-/))
import Network.Socket (withSocketsDo)
import Network.HTTP.Simple as HT
import Network.HTTP.Conduit (parseRequest)
import Network.HTTP.Types.Status
import Data.List (intercalate)
import qualified Data.ByteString as B hiding (unpack)
import qualified Data.ByteString.Char8 as B (unpack)
import Data.Conduit ((.|))
import qualified Data.Conduit as C
import qualified Data.Conduit.List as CL
import Data.Function ((&))
import Text.HTML.TagSoup
import Control.Monad.Trans (liftIO)
import Control.Monad.Trans.Resource (MonadResource)
import Control.Applicative ((<$>))
main :: IO ()
main = undefined
main = withSocketsDo (execQuery makeQuery)
makeQuery :: Ax.Query
makeQuery =
let au = Exp $ Au ["Aaronson"]
t1 = Exp $ Ti ["quantum"]
t2 = Exp $ Ti ["complexity"]
x = au /*/ (t1 /+/ t2)
in Ax.Query {
Ax.qExp = Just x,
Ax.qIds = [],
Ax.qStart = 0,
Ax.qItems = 25}
type Soup = Tag String
execQuery :: Ax.Query -> IO ()
execQuery q = C.runConduitRes (searchAxv q .| outSnk)
----------------------------------------------------------------------
-- Execute query and start a source
----------------------------------------------------------------------
searchAxv :: MonadResource m => Ax.Query -> C.ConduitT () String m ()
searchAxv q =
let s = Ax.mkQuery q
in do rsp <- HT.httpBS =<< liftIO (parseRequest s)
case getResponseStatus rsp of
(Status 200 _) -> getSoup (getResponseBody rsp)
>>= results q
st -> error $ "Error:" ++ show st
----------------------------------------------------------------------
-- Consume page by page
----------------------------------------------------------------------
getSoup :: MonadResource m =>
B.ByteString -> C.ConduitT () String m [Soup]
getSoup b = concat <$> (C.yield b .| toSoup .| CL.consume)
----------------------------------------------------------------------
-- Receive a ByteString and yield Soup
----------------------------------------------------------------------
toSoup :: MonadResource m => C.ConduitT B.ByteString [Soup] m ()
toSoup = C.awaitForever (C.yield . parseTags . B.unpack)
----------------------------------------------------------------------
-- Yield all entries and fetch next page
----------------------------------------------------------------------
results :: MonadResource m =>
Ax.Query -> [Soup] -> C.ConduitT () String m ()
results q sp =
if Ax.exhausted sp
then C.yield ("EOT: " ++ show (Ax.totalResults sp) ++ " results")
else Ax.forEachEntryM_ sp (C.yield . mkResult)
>> searchAxv (Ax.nextPage q)
----------------------------------------------------------------------
-- Get data and format
----------------------------------------------------------------------
mkResult :: [Soup] -> String
mkResult sp = let aus = Ax.getAuthorNames sp
y = Ax.getYear sp
tmp = Ax.getTitle sp & clean ['\n', '\r', '\t']
ti = if null tmp then "No title" else tmp
in intercalate ", " aus ++ " (" ++ y ++ "): " ++ ti
where clean _ [] = []
clean d (c:cs) | c `elem` d = clean d cs
| otherwise = c:clean d cs
----------------------------------------------------------------------
-- Sink results
----------------------------------------------------------------------
outSnk :: MonadResource m => C.ConduitT String C.Void m ()
outSnk = C.awaitForever (liftIO . putStrLn)
cabal-version: 1.12
-- This file has been generated from package.yaml by hpack version 0.33.0.
-- This file has been generated from package.yaml by hpack version 0.34.4.
--
-- see: https://github.com/sol/hpack
--
-- hash: 4117de9e11172de2f2e05b5376a7917e5f32b501a9e1d8834ce2c0ff93e586d3
-- hash: 53def8dbc0673724afe7e2e7c3bc9611a27b13ca3997bcdceebf998e5c04082d
name: arxiv-api
version: 0.1.0.0
description: Please see the README on GitHub at <https://github.com/githubuser/arxiv#readme>
homepage: https://github.com/#readme
bug-reports: https://github.com//issues
homepage: https://github.com/delanoe/arxiv-api#readme
bug-reports: https://github.com/delanoe/arxiv-api/issues
author: Alexandre Delanoë
maintainer: alexandre+dev@delanoe.org
copyright: 2021 CNRS/A. Delanoë
......@@ -23,11 +23,11 @@ extra-source-files:
source-repository head
type: git
location: https://github.com/
location: https://github.com/delanoe/arxiv-api
library
exposed-modules:
Lib
Arxiv
other-modules:
Paths_arxiv_api
hs-source-dirs:
......
name: arxiv-api
version: 0.1.0.0
github: ""
github: "delanoe/arxiv-api"
license: GPLv3
author: "Alexandre Delanoë"
maintainer: "alexandre+dev@delanoe.org"
......
module Lib where
module Arxiv where
import Data.Text (Text)
import qualified Network.Api.Arxiv as Ax
import Network.Api.Arxiv (Expression(..),
Field(..), (/*/), (/+/))
Field(..), (/*/), (/+/),(/-/))
import Network.Socket (withSocketsDo)
import Network.HTTP.Simple as HT
import Network.HTTP.Conduit (parseRequest)
......@@ -21,59 +20,47 @@ import Control.Monad.Trans.Resource (MonadResource)
import Control.Applicative ((<$>))
data ArxivDocument =
ArxivDocument { title :: Text
, date_update :: Text
, date_published :: Text
, date_year :: Text
, summary :: Text
, comment :: Text
, journal :: Text
, doi :: Text
-- , links :: [Link]
, pdfLink :: Maybe Text
}
main' :: IO [Result]
main' = withSocketsDo (execQuery' makeQuery)
search :: IO ()
search = withSocketsDo (execQuery makeQuery)
makeQuery :: Ax.Query
makeQuery =
let au = Exp $ Au ["Aaronson"]
t1 = Exp $ Ti ["quantum"]
t2 = Exp $ Ti ["complexity"]
x = au /*/ (t1 /+/ t2)
x' = Exp (Abs ["clustering"]) /*/ Exp (Abs ["louvain"])
in Ax.Query {
Ax.qExp = Just x',
Ax.qExp = Just $ Exp $ All ["nuclear fusion"],
Ax.qIds = [],
Ax.qStart = 0,
Ax.qStart = 500,
Ax.qItems = 25}
type Soup = Tag String
execQuery :: Ax.Query -> IO ()
execQuery q = C.runConduitRes (searchAxv q .| outSnk)
execQuery' :: Ax.Query -> IO [Result]
execQuery' q = C.runConduitRes (searchAxv' q .| CL.consume) -- .| outSnk')
----------------------------------------------------------------------
-- Execute query and start a source
----------------------------------------------------------------------
searchAxv :: MonadResource m => Ax.Query -> C.ConduitT () String m ()
searchAxv q =
searchAxv' :: MonadResource m => Ax.Query -> C.ConduitT () Result m ()
searchAxv' q =
let s = Ax.mkQuery q
in do rsp <- HT.httpBS =<< liftIO (parseRequest s)
case getResponseStatus rsp of
(Status 200 _) -> getSoup (getResponseBody rsp)
>>= results q
(Status 200 _) -> getSoup' (getResponseBody rsp) >>= results' q
st -> error $ "Error:" ++ show st
----------------------------------------------------------------------
-- Consume page by page
----------------------------------------------------------------------
getSoup :: MonadResource m =>
B.ByteString -> C.ConduitT () String m [Soup]
getSoup b = concat <$> (C.yield b .| toSoup .| CL.consume)
getSoup' :: MonadResource m =>
B.ByteString -> C.ConduitT () Result m [Soup]
getSoup' b = concat <$> (C.yield b .| toSoup .| CL.consume)
----------------------------------------------------------------------
-- Receive a ByteString and yield Soup
......@@ -84,30 +71,39 @@ toSoup = C.awaitForever (C.yield . parseTags . B.unpack)
----------------------------------------------------------------------
-- Yield all entries and fetch next page
----------------------------------------------------------------------
results :: MonadResource m =>
Ax.Query -> [Soup] -> C.ConduitT () String m ()
results q sp =
if Ax.exhausted sp
then C.yield ("EOT: " ++ show (Ax.totalResults sp) ++ " results")
else Ax.forEachEntryM_ sp (C.yield . mkResult)
>> searchAxv (Ax.nextPage q)
results' :: MonadResource m =>
Ax.Query -> [Soup] -> C.ConduitT () Result m ()
results' q sp =
Ax.forEachEntryM sp (C.yield . mkResult')
>> searchAxv' (Ax.nextPage q)
----------------------------------------------------------------------
-- Get data and format
----------------------------------------------------------------------
mkResult :: [Soup] -> String
mkResult sp = let aus = Ax.getAuthorNames sp
y = Ax.getYear sp
tmp = Ax.getTitle sp & clean ['\n', '\r', '\t']
ti = if null tmp then "No title" else tmp
in intercalate ", " aus ++ " (" ++ y ++ "): " ++ ti
data Result = Result { authorNames :: String
, year :: String
, title :: String
}
mkResult' :: [Soup] -> Result
mkResult' sp = let aus = Ax.getAuthorNames sp
y = Ax.getYear sp
tmp = Ax.getTitle sp & clean ['\n', '\r', '\t']
ti = if null tmp then "No title" else tmp
in Result (intercalate " " aus) y ti
where clean _ [] = []
clean d (c:cs) | c `elem` d = clean d cs
| otherwise = c:clean d cs
----------------------------------------------------------------------
-- Sink results
----------------------------------------------------------------------
outSnk :: MonadResource m => C.ConduitT String C.Void m ()
outSnk = C.awaitForever (liftIO . putStrLn)
{-
outSnk' :: MonadResource m => C.ConduitT Result Result m () -- [Result]
outSnk' = C.awaitForever (CL.consume) -- pure
-}
......@@ -18,7 +18,7 @@
# resolver: ./custom-snapshot.yaml
# resolver: https://example.com/snapshots/2018-01-01.yaml
resolver:
url: https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/17/2.yaml
url: https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/18/18.yaml
# User packages to be built.
# Various formats can be used as shown in the example below.
......@@ -41,8 +41,7 @@ packages:
# commit: e7b331f14bcffb8367cd58fbfc8b40ec7642100a
#
extra-deps:
- arxiv-0.0.2@sha256:2e9299c132af5838a756c3e4eaf879e0a0cfa9a2c7e2925ad2936df7d37bc993,1587
- arxiv-0.0.3@sha256:02de1114091d11f1f3ab401d104d125ad4301260806feb7f63b3dcefc7db88cf,1588
# Override default flag values for local packages and extra-deps
# flags: {}
......
......@@ -5,16 +5,16 @@
packages:
- completed:
hackage: arxiv-0.0.2@sha256:2e9299c132af5838a756c3e4eaf879e0a0cfa9a2c7e2925ad2936df7d37bc993,1587
hackage: arxiv-0.0.3@sha256:02de1114091d11f1f3ab401d104d125ad4301260806feb7f63b3dcefc7db88cf,1588
pantry-tree:
size: 280
sha256: 0a55ee0f4cb4337e0c8eea362b8895fe647493c53db76ed4d160589b79592fb4
size: 283
sha256: 97318cdbfc5426addee56911001caa7948c5379556e873c32f50e37d6c1f970c
original:
hackage: arxiv-0.0.2@sha256:2e9299c132af5838a756c3e4eaf879e0a0cfa9a2c7e2925ad2936df7d37bc993,1587
hackage: arxiv-0.0.3@sha256:02de1114091d11f1f3ab401d104d125ad4301260806feb7f63b3dcefc7db88cf,1588
snapshots:
- completed:
size: 563099
url: https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/17/2.yaml
sha256: 92b1a17e31d0a978fca4bf270305d4d1dd8092271bf60eafbc9349c890854026
size: 586296
url: https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/18/18.yaml
sha256: 63539429076b7ebbab6daa7656cfb079393bf644971156dc349d7c0453694ac2
original:
url: https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/17/2.yaml
url: https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/18/18.yaml
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment