Commit f64cd01e authored by Alexandre Delanoë's avatar Alexandre Delanoë

[CLEAN] api useable for GarganText core parsers

parent b635d31b
module Main where module Main where
import Arxiv (main') import Arxiv
main :: IO () main :: IO ()
main = undefined main = undefined
......
module Arxiv where module Arxiv
where
import Control.Applicative ((<$>)) import Control.Applicative ((<$>))
import Control.Monad.Trans (liftIO) import Control.Monad.Trans (liftIO)
...@@ -6,66 +7,72 @@ import Control.Monad.Trans.Resource (MonadResource) ...@@ -6,66 +7,72 @@ import Control.Monad.Trans.Resource (MonadResource)
import Data.Conduit ((.|)) import Data.Conduit ((.|))
import Data.Function ((&)) import Data.Function ((&))
import Data.List (intercalate) import Data.List (intercalate)
import Debug.Trace (trace)
import Network.Api.Arxiv (Expression(..), Field(..), (/*/), (/+/),(/-/)) import Network.Api.Arxiv (Expression(..), Field(..), (/*/), (/+/),(/-/))
import Network.HTTP.Conduit (parseRequest) import Network.HTTP.Conduit (parseRequest)
import Network.HTTP.Simple as HT import Network.HTTP.Simple as HT
import Network.HTTP.Types.Status import Network.HTTP.Types.Status
import Network.Socket (withSocketsDo) import Network.Socket (withSocketsDo)
import Text.HTML.TagSoup import Text.HTML.TagSoup
import Debug.Trace (trace)
import qualified Data.ByteString as B hiding (unpack) import qualified Data.ByteString as B hiding (unpack)
import qualified Data.ByteString.Char8 as B (unpack) import qualified Data.ByteString.Char8 as B (unpack)
import qualified Data.Conduit as C import qualified Data.Conduit as C
import qualified Data.Conduit.List as CL import qualified Data.Conduit.List as CL
import qualified Network.Api.Arxiv as Ax import qualified Network.Api.Arxiv as Ax
main'' :: IO () ------------------------------------------------------------
main'' = withSocketsDo (execQuery makeQuery) type Limit = Int
main' :: IO [Result] ------------------------------------------------------------
main' = withSocketsDo (execQuery' makeQuery) -- | Simple Query search for terms in All fields
apiSimple :: Maybe Limit -> [String] -> IO [Result]
apiSimple limit query = withSocketsDo (execQuery limit $ simpleQuery query)
-- | Complex Query search for terms in specific fields to be defined by Ax.Query
apiComplex :: Maybe Limit -> Ax.Query -> IO [Result]
apiComplex limit query = withSocketsDo (execQuery limit query)
------------------------------------------------------------
simpleQuery :: [String] -> Ax.Query
simpleQuery xs = Ax.Query (Just $ Exp $ All xs) [] 0 25
makeQuery :: Ax.Query complexQuery :: Ax.Query
makeQuery = complexQuery =
let au = Exp $ Au ["Aaronson"] let au = Exp $ Au ["Aaronson"]
t1 = Exp $ Ti ["quantum"] t1 = Exp $ Ti ["quantum"]
t2 = Exp $ Ti ["complexity"] t2 = Exp $ Ti ["complexity"]
x = au /*/ (t1 /+/ t2) x = au /*/ (t1 /+/ t2)
in Ax.Query { in Ax.Query { Ax.qExp = Just $ Exp $ All ["nuclear fusion", "plasma"]
Ax.qExp = Just $ Exp $ All ["nuclear fusion"], , Ax.qIds = []
Ax.qIds = [], , Ax.qStart = 0
Ax.qStart = 0, , Ax.qItems = 25
Ax.qItems = 25} }
type Soup = Tag String
type Soup = Tag String execQuery :: Maybe Limit -> Ax.Query -> IO [Result]
execQuery Nothing q = C.runConduitRes (searchAxv q .| CL.consume)
execQuery :: Ax.Query -> IO () execQuery (Just l) q = C.runConduitRes (searchAxv q .| CL.take l)
execQuery q = C.runConduitRes (searchAxv' q .| outSnk) -- execQuery q = C.runConduitRes (searchAxv' q .| outSnk)
execQuery' :: Ax.Query -> IO [Result]
execQuery' q = C.runConduitRes (searchAxv' q .| CL.take 10) -- .| outSnk')
---------------------------------------------------------------------- ----------------------------------------------------------------------
-- Execute query and start a source -- Execute query and start a source
---------------------------------------------------------------------- ----------------------------------------------------------------------
searchAxv' :: MonadResource m => Ax.Query -> C.ConduitT () Result m () searchAxv :: MonadResource m => Ax.Query -> C.ConduitT () Result m ()
searchAxv' q = searchAxv q =
let s = Ax.mkQuery q let s = Ax.mkQuery q
in do rsp <- HT.httpBS =<< liftIO (parseRequest s) in do rsp <- HT.httpBS =<< liftIO (parseRequest s)
case getResponseStatus rsp of case getResponseStatus rsp of
(Status 200 _) -> getSoup' (getResponseBody rsp) >>= results' q (Status 200 _) -> getSoup (getResponseBody rsp) >>= results q
st -> error $ "Error:" ++ show st st -> error $ "Error:" ++ show st
---------------------------------------------------------------------- ----------------------------------------------------------------------
-- Consume page by page -- Consume page by page
---------------------------------------------------------------------- ----------------------------------------------------------------------
getSoup' :: MonadResource m => getSoup :: MonadResource m =>
B.ByteString -> C.ConduitT () Result m [Soup] B.ByteString -> C.ConduitT () Result m [Soup]
getSoup' b = concat <$> (C.yield b .| toSoup .| CL.consume) getSoup b = concat <$> (C.yield b .| toSoup .| CL.consume)
---------------------------------------------------------------------- ----------------------------------------------------------------------
...@@ -77,39 +84,53 @@ toSoup = C.awaitForever (C.yield . parseTags . B.unpack) ...@@ -77,39 +84,53 @@ toSoup = C.awaitForever (C.yield . parseTags . B.unpack)
---------------------------------------------------------------------- ----------------------------------------------------------------------
-- Yield all entries and fetch next page -- Yield all entries and fetch next page
---------------------------------------------------------------------- ----------------------------------------------------------------------
results' :: MonadResource m => results :: MonadResource m =>
Ax.Query -> [Soup] -> C.ConduitT () Result m () Ax.Query -> [Soup] -> C.ConduitT () Result m ()
results' q sp = results q sp =
Ax.forEachEntryM sp (C.yield . mkResult') Ax.forEachEntryM sp (C.yield . mkResult)
>> searchAxv' (Ax.nextPage q) >> searchAxv (Ax.nextPage q)
---------------------------------------------------------------------- ----------------------------------------------------------------------
-- Get data and format -- Get data and format
---------------------------------------------------------------------- ----------------------------------------------------------------------
data Result = Result { authorNames :: String data Result = Result { doi :: String
, year :: String , url :: String
, title :: String , primaryCategory :: Maybe Ax.Category
, categories :: [Ax.Category]
, journal :: String
, authors :: [Ax.Author]
, publication_date :: String
, year :: String
, title :: String
, abstract :: String
} deriving (Show) } deriving (Show)
mkResult' :: [Soup] -> Result mkResult :: [Soup] -> Result
mkResult' sp = let aus = Ax.getAuthorNames sp mkResult sp = let doi' = Ax.getDoi sp
y = Ax.getYear sp url' = Ax.getPdf sp
tmp = Ax.getTitle sp & clean ['\n', '\r', '\t'] primaryCategory' = Ax.getPrimaryCategory sp
ti = if null tmp then "No title" else tmp categories' = Ax.getCategories sp
in trace ti $ Result (intercalate " " aus) y ti journal' = Ax.getJournal sp
authors' = Ax.getAuthors sp
publication_date'= Ax.getPublished sp
year' = Ax.getYear sp
title_tmp = Ax.getTitle sp & clean ['\n', '\r', '\t']
title' = if null title_tmp then "No title" else title_tmp
abstract' = Ax.getSummary sp
in ( Result doi' url'
primaryCategory' categories'
journal' authors'
publication_date' year'
title' abstract'
)
where clean _ [] = [] where clean _ [] = []
clean d (c:cs) | c `elem` d = clean d cs clean d (c:cs) | c `elem` d = " " <> clean d cs
| otherwise = c:clean d cs | otherwise = c:clean d cs
---------------------------------------------------------------------- ----------------------------------------------------------------------
-- Sink results -- Sink results To print
---------------------------------------------------------------------- ----------------------------------------------------------------------
outSnk :: MonadResource m => C.ConduitT Result C.Void m () outSnk :: MonadResource m => C.ConduitT Result C.Void m ()
outSnk = C.awaitForever (liftIO . putStrLn . show) outSnk = C.awaitForever (liftIO . putStrLn . show)
{-
outSnk' :: MonadResource m => C.ConduitT Result Result m () -- [Result]
outSnk' = C.awaitForever (CL.consume) -- pure
-}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment