Commit f64cd01e authored by Alexandre Delanoë's avatar Alexandre Delanoë

[CLEAN] api useable for GarganText core parsers

parent b635d31b
module Main where
import Arxiv (main')
import Arxiv
main :: IO ()
main = undefined
......
module Arxiv where
module Arxiv
where
import Control.Applicative ((<$>))
import Control.Monad.Trans (liftIO)
......@@ -6,66 +7,72 @@ import Control.Monad.Trans.Resource (MonadResource)
import Data.Conduit ((.|))
import Data.Function ((&))
import Data.List (intercalate)
import Debug.Trace (trace)
import Network.Api.Arxiv (Expression(..), Field(..), (/*/), (/+/),(/-/))
import Network.HTTP.Conduit (parseRequest)
import Network.HTTP.Simple as HT
import Network.HTTP.Types.Status
import Network.Socket (withSocketsDo)
import Text.HTML.TagSoup
import Debug.Trace (trace)
import qualified Data.ByteString as B hiding (unpack)
import qualified Data.ByteString.Char8 as B (unpack)
import qualified Data.Conduit as C
import qualified Data.Conduit.List as CL
import qualified Network.Api.Arxiv as Ax
main'' :: IO ()
main'' = withSocketsDo (execQuery makeQuery)
main' :: IO [Result]
main' = withSocketsDo (execQuery' makeQuery)
------------------------------------------------------------
type Limit = Int
------------------------------------------------------------
-- | Simple Query search for terms in All fields
apiSimple :: Maybe Limit -> [String] -> IO [Result]
apiSimple limit query = withSocketsDo (execQuery limit $ simpleQuery query)
-- | Complex Query search for terms in specific fields to be defined by Ax.Query
apiComplex :: Maybe Limit -> Ax.Query -> IO [Result]
apiComplex limit query = withSocketsDo (execQuery limit query)
------------------------------------------------------------
simpleQuery :: [String] -> Ax.Query
simpleQuery xs = Ax.Query (Just $ Exp $ All xs) [] 0 25
makeQuery :: Ax.Query
makeQuery =
complexQuery :: Ax.Query
complexQuery =
let au = Exp $ Au ["Aaronson"]
t1 = Exp $ Ti ["quantum"]
t2 = Exp $ Ti ["complexity"]
x = au /*/ (t1 /+/ t2)
in Ax.Query {
Ax.qExp = Just $ Exp $ All ["nuclear fusion"],
Ax.qIds = [],
Ax.qStart = 0,
Ax.qItems = 25}
in Ax.Query { Ax.qExp = Just $ Exp $ All ["nuclear fusion", "plasma"]
, Ax.qIds = []
, Ax.qStart = 0
, Ax.qItems = 25
}
type Soup = Tag String
type Soup = Tag String
execQuery :: Ax.Query -> IO ()
execQuery q = C.runConduitRes (searchAxv' q .| outSnk)
execQuery' :: Ax.Query -> IO [Result]
execQuery' q = C.runConduitRes (searchAxv' q .| CL.take 10) -- .| outSnk')
execQuery :: Maybe Limit -> Ax.Query -> IO [Result]
execQuery Nothing q = C.runConduitRes (searchAxv q .| CL.consume)
execQuery (Just l) q = C.runConduitRes (searchAxv q .| CL.take l)
-- execQuery q = C.runConduitRes (searchAxv' q .| outSnk)
----------------------------------------------------------------------
-- Execute query and start a source
----------------------------------------------------------------------
searchAxv' :: MonadResource m => Ax.Query -> C.ConduitT () Result m ()
searchAxv' q =
searchAxv :: MonadResource m => Ax.Query -> C.ConduitT () Result m ()
searchAxv q =
let s = Ax.mkQuery q
in do rsp <- HT.httpBS =<< liftIO (parseRequest s)
case getResponseStatus rsp of
(Status 200 _) -> getSoup' (getResponseBody rsp) >>= results' q
(Status 200 _) -> getSoup (getResponseBody rsp) >>= results q
st -> error $ "Error:" ++ show st
----------------------------------------------------------------------
-- Consume page by page
----------------------------------------------------------------------
getSoup' :: MonadResource m =>
getSoup :: MonadResource m =>
B.ByteString -> C.ConduitT () Result m [Soup]
getSoup' b = concat <$> (C.yield b .| toSoup .| CL.consume)
getSoup b = concat <$> (C.yield b .| toSoup .| CL.consume)
----------------------------------------------------------------------
......@@ -77,39 +84,53 @@ toSoup = C.awaitForever (C.yield . parseTags . B.unpack)
----------------------------------------------------------------------
-- Yield all entries and fetch next page
----------------------------------------------------------------------
results' :: MonadResource m =>
results :: MonadResource m =>
Ax.Query -> [Soup] -> C.ConduitT () Result m ()
results' q sp =
Ax.forEachEntryM sp (C.yield . mkResult')
>> searchAxv' (Ax.nextPage q)
results q sp =
Ax.forEachEntryM sp (C.yield . mkResult)
>> searchAxv (Ax.nextPage q)
----------------------------------------------------------------------
-- Get data and format
----------------------------------------------------------------------
data Result = Result { authorNames :: String
, year :: String
, title :: String
data Result = Result { doi :: String
, url :: String
, primaryCategory :: Maybe Ax.Category
, categories :: [Ax.Category]
, journal :: String
, authors :: [Ax.Author]
, publication_date :: String
, year :: String
, title :: String
, abstract :: String
} deriving (Show)
mkResult' :: [Soup] -> Result
mkResult' sp = let aus = Ax.getAuthorNames sp
y = Ax.getYear sp
tmp = Ax.getTitle sp & clean ['\n', '\r', '\t']
ti = if null tmp then "No title" else tmp
in trace ti $ Result (intercalate " " aus) y ti
mkResult :: [Soup] -> Result
mkResult sp = let doi' = Ax.getDoi sp
url' = Ax.getPdf sp
primaryCategory' = Ax.getPrimaryCategory sp
categories' = Ax.getCategories sp
journal' = Ax.getJournal sp
authors' = Ax.getAuthors sp
publication_date'= Ax.getPublished sp
year' = Ax.getYear sp
title_tmp = Ax.getTitle sp & clean ['\n', '\r', '\t']
title' = if null title_tmp then "No title" else title_tmp
abstract' = Ax.getSummary sp
in ( Result doi' url'
primaryCategory' categories'
journal' authors'
publication_date' year'
title' abstract'
)
where clean _ [] = []
clean d (c:cs) | c `elem` d = clean d cs
clean d (c:cs) | c `elem` d = " " <> clean d cs
| otherwise = c:clean d cs
----------------------------------------------------------------------
-- Sink results
-- Sink results To print
----------------------------------------------------------------------
outSnk :: MonadResource m => C.ConduitT Result C.Void m ()
outSnk = C.awaitForever (liftIO . putStrLn . show)
{-
outSnk' :: MonadResource m => C.ConduitT Result Result m () -- [Result]
outSnk' = C.awaitForever (CL.consume) -- pure
-}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment