Conduit implemented for works, fixes for types

parent 5aac73a7
module Main where
import Conduit
import Options.Applicative.Simple
import Protolude
import qualified OpenAlex as OA
import qualified OpenAlex.Types as OA
main :: IO ()
main = do
......@@ -20,13 +22,16 @@ main = do
"Fetch OpenAlex works (https://docs.openalex.org/api-entities/works/work-object)"
(const fetchWorks)
(pure ())
addCommand "works-c"
"Fetch OpenAlex works (https://docs.openalex.org/api-entities/works/work-object) with conduit"
(const fetchWorksC)
(pure ())
runCmd ()
fetchConcepts :: () -> IO ()
fetchConcepts _ = do
-- ec <- OA.fetchConcepts (Just 1) (Just 1) Nothing
ec <- OA.fetchConcepts (Just 1) (Just 1) (Just "*")
case ec of
Left err -> putText $ "error: " <> show err
......@@ -35,9 +40,22 @@ fetchConcepts _ = do
fetchWorks :: () -> IO ()
fetchWorks _ = do
-- ec <- OA.fetchConcepts (Just 1) (Just 1) Nothing
ew <- OA.fetchWorks (Just 1) (Just 1) (Just "*")
case ew of
Left err -> putText $ "error: " <> show err
Right w -> do
putText $ show w
fetchWorksC :: () -> IO ()
fetchWorksC _ = do
eWorksC <- OA.fetchWorksC Nothing
case eWorksC of
Left err -> putText $ "error: " <> show err
Right (Just count, c) -> do
putText $ "Count: " <> show count
_ <- runConduit $ c
.| takeC 1000
.| mapM_C (\(OA.Work { .. }) -> do
liftIO $ putText $ show id
)
pure ()
......@@ -81,6 +81,7 @@ executable openalex-main
-- LANGUAGE extensions used by modules in this package.
-- other-extensions:
build-depends: base ^>=4.14.3.0 && < 5
, conduit >= 1.3.5 && < 1.4
, optparse-simple >= 0.1.1.4 && < 0.2
, protolude >= 0.3.3 && < 0.4
......
......@@ -16,20 +16,18 @@ module OpenAlex
-- , fetchConcepts'
, fetchConcepts
, fetchWorks
, fetchWorksC
)
where
-- import Data.Aeson
-- import qualified Data.ByteString.Lazy as BSL
import Conduit
import qualified Data.Text as T
-- import qualified Data.Text.Encoding as TE
-- import Network.HTTP.Client (httpLbs, newManager, parseRequest, requestHeaders, responseBody)
import Network.HTTP.Client (newManager, requestHeaders)
import Network.HTTP.Client.TLS (tlsManagerSettings)
import Protolude
import Protolude hiding (yield)
import OpenAlex.Client
import OpenAlex.ServantClientLogging
import OpenAlex.Types (ListOf(..), Page, PerPage, Cursor, Concept, Work)
import OpenAlex.Types (ListOf(..), Meta(..), Page, PerPage, Cursor, Concept, Work)
import Servant.Client (BaseUrl(..), ClientEnv(..), ClientError, Scheme(Https), defaultMakeClientRequest, mkClientEnv, runClientM)
defaultClientEnv :: IO ClientEnv
......@@ -53,18 +51,26 @@ fetchWorks mPage mPerPage mCursor = do
env <- defaultClientEnv
runClientM (works mPage mPerPage mCursor) env
-- fetchConcepts' :: IO (Either Text (ListOf Concept))
-- fetchConcepts' = do
-- manager <- newManager tlsManagerSettings
-- req' <- parseRequest "https://api.openalex.org/concepts?page=1&per_page=10"
-- -- let setHeaders r =
-- -- filter (\(h, _) -> h /= "Accept") $
-- -- requestHeaders r -- <> [("Content-Type", "application/json")]
-- let setHeaders = const [("User-Agent", "v.http")]
-- let req = req' { requestHeaders = setHeaders req' }
-- putText $ show req
-- resBS <- httpLbs req manager
-- putText $ TE.decodeUtf8 $ BSL.toStrict $ responseBody resBS
-- pure $ case eitherDecode (responseBody resBS) of
-- Left err -> Left $ T.pack err
-- Right r -> Right $ r
fetchWorksC :: Maybe Cursor -> IO (Either ClientError (Maybe Integer, ConduitT () Work IO ()))
fetchWorksC Nothing = do
fetchWorksC (Just "*")
fetchWorksC mCursor = do
env <- defaultClientEnv
-- NOTE: per-page max is 200
eRes <- runClientM (works (Just 1) (Just 1) Nothing) env
case eRes of
Left err -> pure $ Left err
Right ListOf { meta = Meta { count }} -> do
pure $ Right ( Just $ fromIntegral count
, producer env mCursor )
where
producer :: ClientEnv -> Maybe Cursor -> ConduitT () Work IO ()
producer env mCursor' = do
eRes <- liftIO $ runClientM (works Nothing (Just 20) mCursor') env
liftIO $ putText $ "Conduit fetching page with cursor " <> show mCursor'
case eRes of
Left err -> panic $ "error: " <> show err
Right (ListOf { results, meta = meta@(Meta { next_cursor }) }) -> do
liftIO $ putText $ "Meta: " <> show meta
yieldMany results
producer env next_cursor
......@@ -194,11 +194,11 @@ instance FromJSON SummaryStats where
-- | https://docs.openalex.org/api-entities/works/work-object
data Work = Work
{ abstract_inverted_index :: Map Text [Int] -- TODO
{ abstract_inverted_index :: Maybe (Map Text [Int]) -- TODO
, authorships :: [Authorship]
, apc_list :: APCList
, apc_paid :: APCPaid
, best_oa_location :: Location
, apc_list :: Maybe APCList
, apc_paid :: Maybe APCPaid
, best_oa_location :: Maybe Location
, biblio :: Biblio
, cited_by_api_url :: Text
, cited_by_count :: Count
......@@ -207,25 +207,25 @@ data Work = Work
, corresponding_institution_ids :: [OpenAlexID]
, counts_by_year :: [CountByYear]
, created_date :: CreatedDate
, display_name :: Text
, doi :: DOI
, display_name :: Maybe Text
, doi :: Maybe DOI
, grants :: [Grant]
, id :: OpenAlexID
, ids :: Map Text ExternalID -- TODO ExternalDB
, is_paratext :: Bool
, is_retracted :: Bool
, language :: Text
, language :: Maybe Text
, locations :: [Location]
, locations_count :: Count
, mesh :: [MeSH]
, ngrams_url :: URL
, open_access :: OpenAccess
, primary_location :: Location
, primary_location :: Maybe Location
, publication_date :: CreatedDate
, publication_year :: Year
, referenced_works :: [OpenAlexID]
, related_works :: [OpenAlexID]
, title :: Text
, title :: Maybe Text
, type_ :: Text
, updated_date :: UpdatedDate
, is_oa :: Maybe Bool
......@@ -248,25 +248,25 @@ instance FromJSON Work where
corresponding_institution_ids <- v .: "corresponding_institution_ids"
counts_by_year <- v .: "counts_by_year"
created_date <- v .: "created_date"
display_name <- v .: "display_name"
doi <- v .: "doi"
display_name <- v .:? "display_name"
doi <- v .:? "doi"
grants <- v .: "grants"
id <- v .: "id"
ids <- v .: "ids"
is_paratext <- v .: "is_paratext"
is_retracted <- v .: "is_retracted"
language <- v .: "language"
language <- v .:? "language"
locations <- v .: "locations"
locations_count <- v .: "locations_count"
mesh <- v .: "mesh"
ngrams_url <- v .: "ngrams_url"
open_access <- v .: "open_access"
primary_location <- v .: "primary_location"
primary_location <- v .:? "primary_location"
publication_date <- v .: "publication_date"
publication_year <- v .: "publication_year"
referenced_works <- v .: "referenced_works"
related_works <- v .: "related_works"
title <- v .: "title"
title <- v .:? "title"
type_ <- v .: "type"
updated_date <- v .: "updated_date"
is_oa <- v .:? "is_oa"
......@@ -299,10 +299,10 @@ data Authorship = Authorship
} deriving (Generic, Show, FromJSON)
data Biblio = Biblio
{ volume :: Text
, issue :: Text
, first_page :: Text
, last_page :: Text
{ volume :: Maybe Text
, issue :: Maybe Text
, first_page :: Maybe Text
, last_page :: Maybe Text
} deriving (Generic, Show, FromJSON)
data DehydratedAuthor = DehydratedAuthor
......@@ -312,33 +312,33 @@ data DehydratedAuthor = DehydratedAuthor
} deriving (Generic, Show, FromJSON)
data DehydratedInstitution = DehydratedInstitution
{ id :: OpenAlexID
{ id :: Maybe OpenAlexID
, display_name :: Text
, ror :: Text
, country_code :: Text
, type_ :: Text
, ror :: Maybe Text
, country_code :: Maybe Text
, type_ :: Maybe Text
} deriving (Generic, Show)
instance FromJSON DehydratedInstitution where
parseJSON (Object v) = do
id <- v .: "id"
id <- v .:? "id"
display_name <- v .: "display_name"
ror <- v .: "ror"
country_code <- v .: "country_code"
type_ <- v .: "type"
ror <- v .:? "ror"
country_code <- v .:? "country_code"
type_ <- v .:? "type"
pure $ DehydratedInstitution { .. }
parseJSON _ = fail "Don't know how to parse a dehydrated institution from a non-object"
data Grant = Grant
{ funder :: OpenAlexID
, funder_display_name :: Text
, award_id :: Text
, award_id :: Maybe Text
} deriving (Generic, Show, FromJSON)
data Location = Location
{ is_oa :: Bool
, landing_page_url :: URL
, license :: Text
, source :: DehydratedSource
, landing_page_url :: Maybe URL
, license :: Maybe Text
, source :: Maybe DehydratedSource
, pdf_url :: Maybe URL
, version :: Maybe Text
} deriving (Generic, Show, FromJSON)
......@@ -348,7 +348,7 @@ data MeSH = MeSH
{ descriptor_ui :: Text
, descriptor_name :: Text
, qualifier_ui :: Text
, qualifier_name :: Text
, qualifier_name :: Maybe Text
, is_major_topic :: Bool
} deriving (Generic, Show, FromJSON)
......@@ -357,30 +357,32 @@ data OpenAccess = OpenAccess
{ any_repository_has_fulltext :: Bool
, is_oa :: Bool
, oa_status :: OAStatus
, oa_url :: URL
, oa_url :: Maybe URL
} deriving (Generic, Show, FromJSON)
-- | https://docs.openalex.org/api-entities/sources/source-object#the-dehydratedsource-object
data DehydratedSource = DehydratedSource
{ display_name :: Text
, host_organization :: Text
, host_organization :: Maybe Text
, host_organization_lineage :: [OpenAlexID]
, host_organization_name :: Text
, host_organization_name :: Maybe Text
, id :: OpenAlexID
, is_in_doaj :: Bool
, issn :: [ISSN]
, issn_l :: ISSNL
, issn_l :: Maybe ISSNL
, type_ ::Text
} deriving (Generic, Show)
instance FromJSON DehydratedSource where
parseJSON = withObject "DehydratedSource" $ \v -> do
display_name <- v .: "display_name"
host_organization <- v .: "host_organization"
host_organization_lineage <- v .: "host_organization_lineage"
host_organization_name <- v .: "host_organization_name"
host_organization <- v .:? "host_organization"
host_organization_lineage' <- v .:? "host_organization_lineage"
let host_organization_lineage = fromMaybe [] host_organization_lineage'
host_organization_name <- v .:? "host_organization_name"
id <- v .: "id"
is_in_doaj <- v .: "is_in_doaj"
issn <- v .: "issn"
issn_l <- v .: "issn_l"
issn' <- v .:? "issn"
let issn = fromMaybe [] issn'
issn_l <- v .:? "issn_l"
type_ <- v .: "type"
pure $ DehydratedSource { .. }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment