Commit 84b68adb authored by Przemyslaw Kaminski's avatar Przemyslaw Kaminski

bring back Arxiv module, better Result type

parent a7a556b6
module Main where module Main where
import Arxiv.Wrapper import Arxiv
import Conduit import Conduit
main :: IO () main :: IO ()
......
...@@ -25,11 +25,13 @@ source-repository head ...@@ -25,11 +25,13 @@ source-repository head
library library
exposed-modules: exposed-modules:
Arxiv.Wrapper Arxiv
other-modules: other-modules:
Paths_crawlerArxiv Paths_crawlerArxiv
hs-source-dirs: hs-source-dirs:
src src
default-extensions:
RecordWildCards
build-depends: build-depends:
arxiv arxiv
, base >=4.7 && <5 , base >=4.7 && <5
...@@ -50,6 +52,8 @@ executable arxiv-exe ...@@ -50,6 +52,8 @@ executable arxiv-exe
Paths_crawlerArxiv Paths_crawlerArxiv
hs-source-dirs: hs-source-dirs:
app app
default-extensions:
RecordWildCards
ghc-options: -threaded -rtsopts -with-rtsopts=-N ghc-options: -threaded -rtsopts -with-rtsopts=-N
build-depends: build-depends:
arxiv arxiv
...@@ -73,6 +77,8 @@ test-suite arxiv-test ...@@ -73,6 +77,8 @@ test-suite arxiv-test
Paths_crawlerArxiv Paths_crawlerArxiv
hs-source-dirs: hs-source-dirs:
test test
default-extensions:
RecordWildCards
ghc-options: -threaded -rtsopts -with-rtsopts=-N ghc-options: -threaded -rtsopts -with-rtsopts=-N
build-depends: build-depends:
arxiv arxiv
......
...@@ -22,6 +22,9 @@ extra-source-files: ...@@ -22,6 +22,9 @@ extra-source-files:
# common to point users to the README.md file. # common to point users to the README.md file.
description: Please see the README on GitHub at <https://github.com/githubuser/arxiv#readme> description: Please see the README on GitHub at <https://github.com/githubuser/arxiv#readme>
default-extensions:
- RecordWildCards
dependencies: dependencies:
- arxiv - arxiv
- base >= 4.7 && < 5 - base >= 4.7 && < 5
......
module Arxiv.Wrapper module Arxiv
where where
import Control.Applicative ((<$>)) import Control.Applicative ((<$>))
...@@ -14,6 +14,7 @@ import Network.HTTP.Conduit (parseRequest) ...@@ -14,6 +14,7 @@ import Network.HTTP.Conduit (parseRequest)
import Network.HTTP.Simple as HT import Network.HTTP.Simple as HT
import Network.HTTP.Types.Status import Network.HTTP.Types.Status
import Text.HTML.TagSoup import Text.HTML.TagSoup
import Text.Read (readMaybe)
import qualified Conduit as C import qualified Conduit as C
import qualified Data.ByteString as B hiding (unpack) import qualified Data.ByteString as B hiding (unpack)
import qualified Data.ByteString.Char8 as B (unpack) import qualified Data.ByteString.Char8 as B (unpack)
...@@ -129,38 +130,32 @@ results q sp = Ax.forEachEntryM sp (C.yield . mkResult) ...@@ -129,38 +130,32 @@ results q sp = Ax.forEachEntryM sp (C.yield . mkResult)
---------------------------------------------------------------------- ----------------------------------------------------------------------
-- Get data and format -- Get data and format
---------------------------------------------------------------------- ----------------------------------------------------------------------
data Result = Result { doi :: String data Result = Result { abstract :: String
, url :: String , authors :: [Ax.Author]
, primaryCategory :: Maybe Ax.Category
, categories :: [Ax.Category] , categories :: [Ax.Category]
, doi :: String
, journal :: String , journal :: String
, authors :: [Ax.Author] , primaryCategory :: Maybe Ax.Category
, publication_date :: String , publication_date :: String
, year :: String
, title :: String , title :: String
, abstract :: String
, total :: Int , total :: Int
, url :: String
, year :: Maybe Int
} deriving (Show) } deriving (Show)
mkResult :: [Soup] -> Result mkResult :: [Soup] -> Result
mkResult sp = let doi' = Ax.getDoi sp mkResult sp = let abstract = Ax.getSummary sp & clean'
url' = Ax.getPdf sp authors = Ax.getAuthors sp
primaryCategory' = Ax.getPrimaryCategory sp categories = Ax.getCategories sp
categories' = Ax.getCategories sp doi = Ax.getDoi sp
journal' = Ax.getJournal sp journal = Ax.getJournal sp
authors' = Ax.getAuthors sp primaryCategory = Ax.getPrimaryCategory sp
publication_date'= Ax.getPublished sp publication_date = Ax.getPublished sp
year' = Ax.getYear sp title = Ax.getTitle sp & clean'
title' = Ax.getTitle sp & clean' total = Ax.totalResults sp
abstract' = Ax.getSummary sp & clean' url = Ax.getPdf sp
total' = Ax.totalResults sp year = readMaybe $ Ax.getYear sp
in ( Result doi' url' in ( Result { .. } )
primaryCategory' categories'
journal' authors'
publication_date' year'
title' abstract'
total'
)
where clean' x = let x' = clean ['\n', '\r', '\t'] x where clean' x = let x' = clean ['\n', '\r', '\t'] x
in if null x' then "Not found" else x' in if null x' then "Not found" else x'
clean _ [] = [] clean _ [] = []
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment