Commit 84b68adb authored by Przemyslaw Kaminski's avatar Przemyslaw Kaminski

bring back Arxiv module, better Result type

parent a7a556b6
module Main where
import Arxiv.Wrapper
import Arxiv
import Conduit
main :: IO ()
......
......@@ -25,11 +25,13 @@ source-repository head
library
exposed-modules:
Arxiv.Wrapper
Arxiv
other-modules:
Paths_crawlerArxiv
hs-source-dirs:
src
default-extensions:
RecordWildCards
build-depends:
arxiv
, base >=4.7 && <5
......@@ -50,6 +52,8 @@ executable arxiv-exe
Paths_crawlerArxiv
hs-source-dirs:
app
default-extensions:
RecordWildCards
ghc-options: -threaded -rtsopts -with-rtsopts=-N
build-depends:
arxiv
......@@ -73,6 +77,8 @@ test-suite arxiv-test
Paths_crawlerArxiv
hs-source-dirs:
test
default-extensions:
RecordWildCards
ghc-options: -threaded -rtsopts -with-rtsopts=-N
build-depends:
arxiv
......
......@@ -22,6 +22,9 @@ extra-source-files:
# common to point users to the README.md file.
description: Please see the README on GitHub at <https://github.com/githubuser/arxiv#readme>
default-extensions:
- RecordWildCards
dependencies:
- arxiv
- base >= 4.7 && < 5
......
module Arxiv.Wrapper
module Arxiv
where
import Control.Applicative ((<$>))
......@@ -14,6 +14,7 @@ import Network.HTTP.Conduit (parseRequest)
import Network.HTTP.Simple as HT
import Network.HTTP.Types.Status
import Text.HTML.TagSoup
import Text.Read (readMaybe)
import qualified Conduit as C
import qualified Data.ByteString as B hiding (unpack)
import qualified Data.ByteString.Char8 as B (unpack)
......@@ -129,38 +130,32 @@ results q sp = Ax.forEachEntryM sp (C.yield . mkResult)
----------------------------------------------------------------------
-- Get data and format
----------------------------------------------------------------------
data Result = Result { doi :: String
, url :: String
, primaryCategory :: Maybe Ax.Category
data Result = Result { abstract :: String
, authors :: [Ax.Author]
, categories :: [Ax.Category]
, doi :: String
, journal :: String
, authors :: [Ax.Author]
, primaryCategory :: Maybe Ax.Category
, publication_date :: String
, year :: String
, title :: String
, abstract :: String
, total :: Int
, url :: String
, year :: Maybe Int
} deriving (Show)
mkResult :: [Soup] -> Result
mkResult sp = let doi' = Ax.getDoi sp
url' = Ax.getPdf sp
primaryCategory' = Ax.getPrimaryCategory sp
categories' = Ax.getCategories sp
journal' = Ax.getJournal sp
authors' = Ax.getAuthors sp
publication_date'= Ax.getPublished sp
year' = Ax.getYear sp
title' = Ax.getTitle sp & clean'
abstract' = Ax.getSummary sp & clean'
total' = Ax.totalResults sp
in ( Result doi' url'
primaryCategory' categories'
journal' authors'
publication_date' year'
title' abstract'
total'
)
mkResult sp = let abstract = Ax.getSummary sp & clean'
authors = Ax.getAuthors sp
categories = Ax.getCategories sp
doi = Ax.getDoi sp
journal = Ax.getJournal sp
primaryCategory = Ax.getPrimaryCategory sp
publication_date = Ax.getPublished sp
title = Ax.getTitle sp & clean'
total = Ax.totalResults sp
url = Ax.getPdf sp
year = readMaybe $ Ax.getYear sp
in ( Result { .. } )
where clean' x = let x' = clean ['\n', '\r', '\t'] x
in if null x' then "Not found" else x'
clean _ [] = []
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment