Conduit implemented for works, fixes for types

parent 5aac73a7
module Main where module Main where
import Conduit
import Options.Applicative.Simple import Options.Applicative.Simple
import Protolude import Protolude
import qualified OpenAlex as OA import qualified OpenAlex as OA
import qualified OpenAlex.Types as OA
main :: IO () main :: IO ()
main = do main = do
...@@ -20,13 +22,16 @@ main = do ...@@ -20,13 +22,16 @@ main = do
"Fetch OpenAlex works (https://docs.openalex.org/api-entities/works/work-object)" "Fetch OpenAlex works (https://docs.openalex.org/api-entities/works/work-object)"
(const fetchWorks) (const fetchWorks)
(pure ()) (pure ())
addCommand "works-c"
"Fetch OpenAlex works (https://docs.openalex.org/api-entities/works/work-object) with conduit"
(const fetchWorksC)
(pure ())
runCmd () runCmd ()
fetchConcepts :: () -> IO () fetchConcepts :: () -> IO ()
fetchConcepts _ = do fetchConcepts _ = do
-- ec <- OA.fetchConcepts (Just 1) (Just 1) Nothing
ec <- OA.fetchConcepts (Just 1) (Just 1) (Just "*") ec <- OA.fetchConcepts (Just 1) (Just 1) (Just "*")
case ec of case ec of
Left err -> putText $ "error: " <> show err Left err -> putText $ "error: " <> show err
...@@ -35,9 +40,22 @@ fetchConcepts _ = do ...@@ -35,9 +40,22 @@ fetchConcepts _ = do
fetchWorks :: () -> IO () fetchWorks :: () -> IO ()
fetchWorks _ = do fetchWorks _ = do
-- ec <- OA.fetchConcepts (Just 1) (Just 1) Nothing
ew <- OA.fetchWorks (Just 1) (Just 1) (Just "*") ew <- OA.fetchWorks (Just 1) (Just 1) (Just "*")
case ew of case ew of
Left err -> putText $ "error: " <> show err Left err -> putText $ "error: " <> show err
Right w -> do Right w -> do
putText $ show w putText $ show w
fetchWorksC :: () -> IO ()
fetchWorksC _ = do
eWorksC <- OA.fetchWorksC Nothing
case eWorksC of
Left err -> putText $ "error: " <> show err
Right (Just count, c) -> do
putText $ "Count: " <> show count
_ <- runConduit $ c
.| takeC 1000
.| mapM_C (\(OA.Work { .. }) -> do
liftIO $ putText $ show id
)
pure ()
...@@ -81,6 +81,7 @@ executable openalex-main ...@@ -81,6 +81,7 @@ executable openalex-main
-- LANGUAGE extensions used by modules in this package. -- LANGUAGE extensions used by modules in this package.
-- other-extensions: -- other-extensions:
build-depends: base ^>=4.14.3.0 && < 5 build-depends: base ^>=4.14.3.0 && < 5
, conduit >= 1.3.5 && < 1.4
, optparse-simple >= 0.1.1.4 && < 0.2 , optparse-simple >= 0.1.1.4 && < 0.2
, protolude >= 0.3.3 && < 0.4 , protolude >= 0.3.3 && < 0.4
......
...@@ -16,20 +16,18 @@ module OpenAlex ...@@ -16,20 +16,18 @@ module OpenAlex
-- , fetchConcepts' -- , fetchConcepts'
, fetchConcepts , fetchConcepts
, fetchWorks , fetchWorks
, fetchWorksC
) )
where where
-- import Data.Aeson import Conduit
-- import qualified Data.ByteString.Lazy as BSL
import qualified Data.Text as T import qualified Data.Text as T
-- import qualified Data.Text.Encoding as TE
-- import Network.HTTP.Client (httpLbs, newManager, parseRequest, requestHeaders, responseBody)
import Network.HTTP.Client (newManager, requestHeaders) import Network.HTTP.Client (newManager, requestHeaders)
import Network.HTTP.Client.TLS (tlsManagerSettings) import Network.HTTP.Client.TLS (tlsManagerSettings)
import Protolude import Protolude hiding (yield)
import OpenAlex.Client import OpenAlex.Client
import OpenAlex.ServantClientLogging import OpenAlex.ServantClientLogging
import OpenAlex.Types (ListOf(..), Page, PerPage, Cursor, Concept, Work) import OpenAlex.Types (ListOf(..), Meta(..), Page, PerPage, Cursor, Concept, Work)
import Servant.Client (BaseUrl(..), ClientEnv(..), ClientError, Scheme(Https), defaultMakeClientRequest, mkClientEnv, runClientM) import Servant.Client (BaseUrl(..), ClientEnv(..), ClientError, Scheme(Https), defaultMakeClientRequest, mkClientEnv, runClientM)
defaultClientEnv :: IO ClientEnv defaultClientEnv :: IO ClientEnv
...@@ -53,18 +51,26 @@ fetchWorks mPage mPerPage mCursor = do ...@@ -53,18 +51,26 @@ fetchWorks mPage mPerPage mCursor = do
env <- defaultClientEnv env <- defaultClientEnv
runClientM (works mPage mPerPage mCursor) env runClientM (works mPage mPerPage mCursor) env
-- fetchConcepts' :: IO (Either Text (ListOf Concept)) fetchWorksC :: Maybe Cursor -> IO (Either ClientError (Maybe Integer, ConduitT () Work IO ()))
-- fetchConcepts' = do fetchWorksC Nothing = do
-- manager <- newManager tlsManagerSettings fetchWorksC (Just "*")
-- req' <- parseRequest "https://api.openalex.org/concepts?page=1&per_page=10" fetchWorksC mCursor = do
-- -- let setHeaders r = env <- defaultClientEnv
-- -- filter (\(h, _) -> h /= "Accept") $ -- NOTE: per-page max is 200
-- -- requestHeaders r -- <> [("Content-Type", "application/json")] eRes <- runClientM (works (Just 1) (Just 1) Nothing) env
-- let setHeaders = const [("User-Agent", "v.http")] case eRes of
-- let req = req' { requestHeaders = setHeaders req' } Left err -> pure $ Left err
-- putText $ show req Right ListOf { meta = Meta { count }} -> do
-- resBS <- httpLbs req manager pure $ Right ( Just $ fromIntegral count
-- putText $ TE.decodeUtf8 $ BSL.toStrict $ responseBody resBS , producer env mCursor )
-- pure $ case eitherDecode (responseBody resBS) of where
-- Left err -> Left $ T.pack err producer :: ClientEnv -> Maybe Cursor -> ConduitT () Work IO ()
-- Right r -> Right $ r producer env mCursor' = do
eRes <- liftIO $ runClientM (works Nothing (Just 20) mCursor') env
liftIO $ putText $ "Conduit fetching page with cursor " <> show mCursor'
case eRes of
Left err -> panic $ "error: " <> show err
Right (ListOf { results, meta = meta@(Meta { next_cursor }) }) -> do
liftIO $ putText $ "Meta: " <> show meta
yieldMany results
producer env next_cursor
...@@ -194,11 +194,11 @@ instance FromJSON SummaryStats where ...@@ -194,11 +194,11 @@ instance FromJSON SummaryStats where
-- | https://docs.openalex.org/api-entities/works/work-object -- | https://docs.openalex.org/api-entities/works/work-object
data Work = Work data Work = Work
{ abstract_inverted_index :: Map Text [Int] -- TODO { abstract_inverted_index :: Maybe (Map Text [Int]) -- TODO
, authorships :: [Authorship] , authorships :: [Authorship]
, apc_list :: APCList , apc_list :: Maybe APCList
, apc_paid :: APCPaid , apc_paid :: Maybe APCPaid
, best_oa_location :: Location , best_oa_location :: Maybe Location
, biblio :: Biblio , biblio :: Biblio
, cited_by_api_url :: Text , cited_by_api_url :: Text
, cited_by_count :: Count , cited_by_count :: Count
...@@ -207,25 +207,25 @@ data Work = Work ...@@ -207,25 +207,25 @@ data Work = Work
, corresponding_institution_ids :: [OpenAlexID] , corresponding_institution_ids :: [OpenAlexID]
, counts_by_year :: [CountByYear] , counts_by_year :: [CountByYear]
, created_date :: CreatedDate , created_date :: CreatedDate
, display_name :: Text , display_name :: Maybe Text
, doi :: DOI , doi :: Maybe DOI
, grants :: [Grant] , grants :: [Grant]
, id :: OpenAlexID , id :: OpenAlexID
, ids :: Map Text ExternalID -- TODO ExternalDB , ids :: Map Text ExternalID -- TODO ExternalDB
, is_paratext :: Bool , is_paratext :: Bool
, is_retracted :: Bool , is_retracted :: Bool
, language :: Text , language :: Maybe Text
, locations :: [Location] , locations :: [Location]
, locations_count :: Count , locations_count :: Count
, mesh :: [MeSH] , mesh :: [MeSH]
, ngrams_url :: URL , ngrams_url :: URL
, open_access :: OpenAccess , open_access :: OpenAccess
, primary_location :: Location , primary_location :: Maybe Location
, publication_date :: CreatedDate , publication_date :: CreatedDate
, publication_year :: Year , publication_year :: Year
, referenced_works :: [OpenAlexID] , referenced_works :: [OpenAlexID]
, related_works :: [OpenAlexID] , related_works :: [OpenAlexID]
, title :: Text , title :: Maybe Text
, type_ :: Text , type_ :: Text
, updated_date :: UpdatedDate , updated_date :: UpdatedDate
, is_oa :: Maybe Bool , is_oa :: Maybe Bool
...@@ -248,25 +248,25 @@ instance FromJSON Work where ...@@ -248,25 +248,25 @@ instance FromJSON Work where
corresponding_institution_ids <- v .: "corresponding_institution_ids" corresponding_institution_ids <- v .: "corresponding_institution_ids"
counts_by_year <- v .: "counts_by_year" counts_by_year <- v .: "counts_by_year"
created_date <- v .: "created_date" created_date <- v .: "created_date"
display_name <- v .: "display_name" display_name <- v .:? "display_name"
doi <- v .: "doi" doi <- v .:? "doi"
grants <- v .: "grants" grants <- v .: "grants"
id <- v .: "id" id <- v .: "id"
ids <- v .: "ids" ids <- v .: "ids"
is_paratext <- v .: "is_paratext" is_paratext <- v .: "is_paratext"
is_retracted <- v .: "is_retracted" is_retracted <- v .: "is_retracted"
language <- v .: "language" language <- v .:? "language"
locations <- v .: "locations" locations <- v .: "locations"
locations_count <- v .: "locations_count" locations_count <- v .: "locations_count"
mesh <- v .: "mesh" mesh <- v .: "mesh"
ngrams_url <- v .: "ngrams_url" ngrams_url <- v .: "ngrams_url"
open_access <- v .: "open_access" open_access <- v .: "open_access"
primary_location <- v .: "primary_location" primary_location <- v .:? "primary_location"
publication_date <- v .: "publication_date" publication_date <- v .: "publication_date"
publication_year <- v .: "publication_year" publication_year <- v .: "publication_year"
referenced_works <- v .: "referenced_works" referenced_works <- v .: "referenced_works"
related_works <- v .: "related_works" related_works <- v .: "related_works"
title <- v .: "title" title <- v .:? "title"
type_ <- v .: "type" type_ <- v .: "type"
updated_date <- v .: "updated_date" updated_date <- v .: "updated_date"
is_oa <- v .:? "is_oa" is_oa <- v .:? "is_oa"
...@@ -299,10 +299,10 @@ data Authorship = Authorship ...@@ -299,10 +299,10 @@ data Authorship = Authorship
} deriving (Generic, Show, FromJSON) } deriving (Generic, Show, FromJSON)
data Biblio = Biblio data Biblio = Biblio
{ volume :: Text { volume :: Maybe Text
, issue :: Text , issue :: Maybe Text
, first_page :: Text , first_page :: Maybe Text
, last_page :: Text , last_page :: Maybe Text
} deriving (Generic, Show, FromJSON) } deriving (Generic, Show, FromJSON)
data DehydratedAuthor = DehydratedAuthor data DehydratedAuthor = DehydratedAuthor
...@@ -312,33 +312,33 @@ data DehydratedAuthor = DehydratedAuthor ...@@ -312,33 +312,33 @@ data DehydratedAuthor = DehydratedAuthor
} deriving (Generic, Show, FromJSON) } deriving (Generic, Show, FromJSON)
data DehydratedInstitution = DehydratedInstitution data DehydratedInstitution = DehydratedInstitution
{ id :: OpenAlexID { id :: Maybe OpenAlexID
, display_name :: Text , display_name :: Text
, ror :: Text , ror :: Maybe Text
, country_code :: Text , country_code :: Maybe Text
, type_ :: Text , type_ :: Maybe Text
} deriving (Generic, Show) } deriving (Generic, Show)
instance FromJSON DehydratedInstitution where instance FromJSON DehydratedInstitution where
parseJSON (Object v) = do parseJSON (Object v) = do
id <- v .: "id" id <- v .:? "id"
display_name <- v .: "display_name" display_name <- v .: "display_name"
ror <- v .: "ror" ror <- v .:? "ror"
country_code <- v .: "country_code" country_code <- v .:? "country_code"
type_ <- v .: "type" type_ <- v .:? "type"
pure $ DehydratedInstitution { .. } pure $ DehydratedInstitution { .. }
parseJSON _ = fail "Don't know how to parse a dehydrated institution from a non-object" parseJSON _ = fail "Don't know how to parse a dehydrated institution from a non-object"
data Grant = Grant data Grant = Grant
{ funder :: OpenAlexID { funder :: OpenAlexID
, funder_display_name :: Text , funder_display_name :: Text
, award_id :: Text , award_id :: Maybe Text
} deriving (Generic, Show, FromJSON) } deriving (Generic, Show, FromJSON)
data Location = Location data Location = Location
{ is_oa :: Bool { is_oa :: Bool
, landing_page_url :: URL , landing_page_url :: Maybe URL
, license :: Text , license :: Maybe Text
, source :: DehydratedSource , source :: Maybe DehydratedSource
, pdf_url :: Maybe URL , pdf_url :: Maybe URL
, version :: Maybe Text , version :: Maybe Text
} deriving (Generic, Show, FromJSON) } deriving (Generic, Show, FromJSON)
...@@ -348,7 +348,7 @@ data MeSH = MeSH ...@@ -348,7 +348,7 @@ data MeSH = MeSH
{ descriptor_ui :: Text { descriptor_ui :: Text
, descriptor_name :: Text , descriptor_name :: Text
, qualifier_ui :: Text , qualifier_ui :: Text
, qualifier_name :: Text , qualifier_name :: Maybe Text
, is_major_topic :: Bool , is_major_topic :: Bool
} deriving (Generic, Show, FromJSON) } deriving (Generic, Show, FromJSON)
...@@ -357,30 +357,32 @@ data OpenAccess = OpenAccess ...@@ -357,30 +357,32 @@ data OpenAccess = OpenAccess
{ any_repository_has_fulltext :: Bool { any_repository_has_fulltext :: Bool
, is_oa :: Bool , is_oa :: Bool
, oa_status :: OAStatus , oa_status :: OAStatus
, oa_url :: URL , oa_url :: Maybe URL
} deriving (Generic, Show, FromJSON) } deriving (Generic, Show, FromJSON)
-- | https://docs.openalex.org/api-entities/sources/source-object#the-dehydratedsource-object -- | https://docs.openalex.org/api-entities/sources/source-object#the-dehydratedsource-object
data DehydratedSource = DehydratedSource data DehydratedSource = DehydratedSource
{ display_name :: Text { display_name :: Text
, host_organization :: Text , host_organization :: Maybe Text
, host_organization_lineage :: [OpenAlexID] , host_organization_lineage :: [OpenAlexID]
, host_organization_name :: Text , host_organization_name :: Maybe Text
, id :: OpenAlexID , id :: OpenAlexID
, is_in_doaj :: Bool , is_in_doaj :: Bool
, issn :: [ISSN] , issn :: [ISSN]
, issn_l :: ISSNL , issn_l :: Maybe ISSNL
, type_ ::Text , type_ ::Text
} deriving (Generic, Show) } deriving (Generic, Show)
instance FromJSON DehydratedSource where instance FromJSON DehydratedSource where
parseJSON = withObject "DehydratedSource" $ \v -> do parseJSON = withObject "DehydratedSource" $ \v -> do
display_name <- v .: "display_name" display_name <- v .: "display_name"
host_organization <- v .: "host_organization" host_organization <- v .:? "host_organization"
host_organization_lineage <- v .: "host_organization_lineage" host_organization_lineage' <- v .:? "host_organization_lineage"
host_organization_name <- v .: "host_organization_name" let host_organization_lineage = fromMaybe [] host_organization_lineage'
host_organization_name <- v .:? "host_organization_name"
id <- v .: "id" id <- v .: "id"
is_in_doaj <- v .: "is_in_doaj" is_in_doaj <- v .: "is_in_doaj"
issn <- v .: "issn" issn' <- v .:? "issn"
issn_l <- v .: "issn_l" let issn = fromMaybe [] issn'
issn_l <- v .:? "issn_l"
type_ <- v .: "type" type_ <- v .: "type"
pure $ DehydratedSource { .. } pure $ DehydratedSource { .. }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment