Commit c86412b5 authored by Przemyslaw Kaminski's avatar Przemyslaw Kaminski

Merge branch '6-dev-remove-unnecessary-fields' into 'main'

Resolve "Review our API implementation and leave only what is needed for us"

See merge request !9
parents d2949cde ed5c8cc2
# Revision history for openalex # Revision history for openalex
## 0.1.0.0 -- YYYY-mm-dd ## 0.2.0.0 -- 2025-02-24
* First version. Released on an unsuspecting world. ### Changed
- Removed lots of fields that we don't use in gargantext. This makes
the library of worse quality for general usage, but stabilizes it
for us (the API could throw random errors at times, because some
field turned out to be missing).
## 0.1.0.0 -- 2025-02-23
- First version. Released on an unsuspecting world.
...@@ -47,7 +47,7 @@ main = do ...@@ -47,7 +47,7 @@ main = do
strOption (long "output") strOption (long "output")
(opts, runCmd) <- (opts, runCmd) <-
simpleOptions "0.1.0.0" simpleOptions "0.2.0.0"
"OpenAlex" "OpenAlex"
"OpenAlex command line" "OpenAlex command line"
(pure ()) $ do (pure ()) $ do
......
cabal-version: 2.4 cabal-version: 2.4
name: openalex name: openalex
version: 0.1.0.0 version: 0.2.0.0
-- A short (one-line) description of the package. -- A short (one-line) description of the package.
-- synopsis: An importer for OpenAlex database. -- synopsis: An importer for OpenAlex database.
......
...@@ -239,84 +239,84 @@ data Work = Work ...@@ -239,84 +239,84 @@ data Work = Work
{ abstract_inverted_index :: Maybe (Map Text [Int]) -- TODO { abstract_inverted_index :: Maybe (Map Text [Int]) -- TODO
, abstract_reconstructed :: Text , abstract_reconstructed :: Text
, authorships :: [Authorship] , authorships :: [Authorship]
, apc_list :: Maybe APCList -- , apc_list :: Maybe APCList
, apc_paid :: Maybe APCPaid -- , apc_paid :: Maybe APCPaid
, best_oa_location :: Maybe Location -- , best_oa_location :: Maybe Location
, biblio :: Biblio , biblio :: Biblio
, cited_by_api_url :: Text -- , cited_by_api_url :: Text
, cited_by_count :: Count -- , cited_by_count :: Count
, concepts :: [DehydratedConcept] -- , concepts :: [DehydratedConcept]
, corresponding_author_ids :: [OpenAlexID] -- , corresponding_author_ids :: [OpenAlexID]
, corresponding_institution_ids :: [OpenAlexID] -- , corresponding_institution_ids :: [OpenAlexID]
, counts_by_year :: [CountByYear] -- , counts_by_year :: [CountByYear]
, created_date :: CreatedDate -- , created_date :: CreatedDate
, display_name :: Maybe Text -- , display_name :: Maybe Text
, doi :: Maybe DOI , doi :: Maybe DOI
, grants :: [Grant] -- , grants :: [Grant]
, id :: OpenAlexID -- , id :: OpenAlexID
, ids :: Map Text ExternalID -- TODO ExternalDB -- , ids :: Map Text ExternalID -- TODO ExternalDB
, is_paratext :: Bool -- , is_paratext :: Bool
, is_retracted :: Bool -- , is_retracted :: Bool
, language :: Maybe Text , language :: Maybe Text
, locations :: [Location] -- , locations :: [Location]
, locations_count :: Count -- , locations_count :: Count
, mesh :: [MeSH] -- , mesh :: [MeSH]
, ngrams_url :: Maybe URL -- , ngrams_url :: Maybe URL
, open_access :: OpenAccess -- , open_access :: OpenAccess
, primary_location :: Maybe Location , primary_location :: Maybe Location
, publication_date :: CreatedDate , publication_date :: CreatedDate
, publication_year :: Year , publication_year :: Year
, referenced_works :: [OpenAlexID] -- , referenced_works :: [OpenAlexID]
, related_works :: [OpenAlexID] -- , related_works :: [OpenAlexID]
, title :: Maybe Text , title :: Maybe Text
, type_ :: Text -- , type_ :: Text
, updated_date :: UpdatedDate -- , updated_date :: UpdatedDate
, is_oa :: Maybe Bool -- , is_oa :: Maybe Bool
, license :: Maybe Text -- , license :: Maybe Text
, url :: Maybe URL , url :: Maybe URL
, version :: Maybe Text -- , version :: Maybe Text
} deriving (Generic, Show) } deriving (Generic, Show)
instance FromJSON Work where instance FromJSON Work where
parseJSON = withObject "Work" $ \v -> do parseJSON = withObject "Work" $ \v -> do
abstract_inverted_index <- v .: "abstract_inverted_index" abstract_inverted_index <- v .: "abstract_inverted_index"
let abstract_reconstructed = reconstructAbstract abstract_inverted_index let abstract_reconstructed = reconstructAbstract abstract_inverted_index
authorships <- v .: "authorships" authorships <- v .: "authorships"
apc_list <- v .: "apc_list" -- apc_list <- v .: "apc_list"
apc_paid <- v .: "apc_paid" -- apc_paid <- v .: "apc_paid"
best_oa_location <- v .: "best_oa_location" -- best_oa_location <- v .: "best_oa_location"
biblio <- v .: "biblio" biblio <- v .: "biblio"
cited_by_api_url <- v .: "cited_by_api_url" -- cited_by_api_url <- v .: "cited_by_api_url"
cited_by_count <- v .: "cited_by_count" -- cited_by_count <- v .: "cited_by_count"
concepts <- v .: "concepts" -- concepts <- v .: "concepts"
corresponding_author_ids <- v .: "corresponding_author_ids" -- corresponding_author_ids <- v .: "corresponding_author_ids"
corresponding_institution_ids <- v .: "corresponding_institution_ids" -- corresponding_institution_ids <- v .: "corresponding_institution_ids"
counts_by_year <- v .: "counts_by_year" -- counts_by_year <- v .: "counts_by_year"
created_date <- v .: "created_date" -- created_date <- v .: "created_date"
display_name <- v .:? "display_name" -- display_name <- v .:? "display_name"
doi <- v .:? "doi" doi <- v .:? "doi"
grants <- v .: "grants" -- grants <- v .: "grants"
id <- v .: "id" -- id <- v .: "id"
ids <- v .: "ids" -- ids <- v .: "ids"
is_paratext <- v .: "is_paratext" -- is_paratext <- v .: "is_paratext"
is_retracted <- v .: "is_retracted" -- is_retracted <- v .: "is_retracted"
language <- v .:? "language" language <- v .:? "language"
locations <- v .: "locations" -- locations <- v .: "locations"
locations_count <- v .: "locations_count" -- locations_count <- v .: "locations_count"
mesh <- v .: "mesh" -- mesh <- v .: "mesh"
ngrams_url <- v .:? "ngrams_url" -- ngrams_url <- v .:? "ngrams_url"
open_access <- v .: "open_access" -- open_access <- v .: "open_access"
primary_location <- v .:? "primary_location" primary_location <- v .:? "primary_location"
publication_date <- v .: "publication_date" publication_date <- v .: "publication_date"
publication_year <- v .: "publication_year" publication_year <- v .: "publication_year"
referenced_works <- v .: "referenced_works" -- referenced_works <- v .: "referenced_works"
related_works <- v .: "related_works" -- related_works <- v .: "related_works"
title <- v .:? "title" title <- v .:? "title"
type_ <- v .: "type" -- type_ <- v .: "type"
updated_date <- v .: "updated_date" -- updated_date <- v .: "updated_date"
is_oa <- v .:? "is_oa" -- is_oa <- v .:? "is_oa"
license <- v .:? "license" -- license <- v .:? "license"
url <- v .:? "url" url <- v .:? "url"
version <- v .:? "version" -- version <- v .:? "version"
pure $ Work { .. } pure $ Work { .. }
-- | Publication Day,Publication Month,Publication Year,Authors,Title,Source,Abstract -- | Publication Day,Publication Month,Publication Year,Authors,Title,Source,Abstract
instance Csv.DefaultOrdered Work where instance Csv.DefaultOrdered Work where
...@@ -348,116 +348,116 @@ instance Csv.ToNamedRecord Work where ...@@ -348,116 +348,116 @@ instance Csv.ToNamedRecord Work where
Nothing -> "" Nothing -> ""
Just (DehydratedSource { display_name = dn }) -> dn Just (DehydratedSource { display_name = dn }) -> dn
data APCList = APCList -- data APCList = APCList
{ value :: Maybe Int -- { value :: Maybe Int
, currency :: Text -- , currency :: Text
, provenance :: Maybe Text -- , provenance :: Maybe Text
, value_usd :: Maybe Int -- , value_usd :: Maybe Int
} deriving (Generic, Show, FromJSON) -- } deriving (Generic, Show, FromJSON)
data APCPaid = APCPaid -- data APCPaid = APCPaid
{ value :: Int -- { value :: Int
, currency :: Text -- , currency :: Text
, provenance :: Maybe Text -- , provenance :: Maybe Text
, value_usd :: Maybe Int -- , value_usd :: Maybe Int
} deriving (Generic, Show, FromJSON) -- } deriving (Generic, Show, FromJSON)
-- | https://docs.openalex.org/api-entities/works/work-object/authorship-object -- | https://docs.openalex.org/api-entities/works/work-object/authorship-object
data Authorship = Authorship data Authorship = Authorship
{ author :: DehydratedAuthor { author :: DehydratedAuthor
, author_position :: Text -- , author_position :: Text
, institutions :: [DehydratedInstitution] , institutions :: [DehydratedInstitution]
, is_corresponding :: Maybe Bool -- , is_corresponding :: Maybe Bool
, raw_affiliation_string :: Maybe Text -- , raw_affiliation_string :: Maybe Text
} deriving (Generic, Show, FromJSON) } deriving (Generic, Show, FromJSON)
authorshipAuthorName :: Authorship -> Maybe Text authorshipAuthorName :: Authorship -> Maybe Text
authorshipAuthorName (Authorship { author = DehydratedAuthor { display_name } }) = display_name authorshipAuthorName (Authorship { author = DehydratedAuthor { display_name } }) = display_name
data Biblio = Biblio data Biblio = Biblio
{ volume :: Maybe Text { -- volume :: Maybe Text
, issue :: Maybe Text -- , issue :: Maybe Text
, first_page :: Maybe Text first_page :: Maybe Text
, last_page :: Maybe Text -- , last_page :: Maybe Text
} deriving (Generic, Show, FromJSON) } deriving (Generic, Show, FromJSON)
data DehydratedAuthor = DehydratedAuthor data DehydratedAuthor = DehydratedAuthor
{ id :: OpenAlexID { -- id :: OpenAlexID
, display_name :: Maybe Text display_name :: Maybe Text
, orcid :: Maybe URL -- , orcid :: Maybe URL
} deriving (Generic, Show, FromJSON) } deriving (Generic, Show, FromJSON)
data DehydratedInstitution = DehydratedInstitution data DehydratedInstitution = DehydratedInstitution
{ id :: Maybe OpenAlexID { -- id :: Maybe OpenAlexID
, display_name :: Text display_name :: Text
, ror :: Maybe Text -- , ror :: Maybe Text
, country_code :: Maybe Text -- , country_code :: Maybe Text
, type_ :: Maybe Text -- , type_ :: Maybe Text
} deriving (Generic, Show) } deriving (Generic, Show)
instance FromJSON DehydratedInstitution where instance FromJSON DehydratedInstitution where
parseJSON (Object v) = do parseJSON (Object v) = do
id <- v .:? "id" -- id <- v .:? "id"
display_name <- v .: "display_name" display_name <- v .: "display_name"
ror <- v .:? "ror" -- ror <- v .:? "ror"
country_code <- v .:? "country_code" -- country_code <- v .:? "country_code"
type_ <- v .:? "type" -- type_ <- v .:? "type"
pure $ DehydratedInstitution { .. } pure $ DehydratedInstitution { .. }
parseJSON _ = fail "Don't know how to parse a dehydrated institution from a non-object" parseJSON _ = fail "Don't know how to parse a dehydrated institution from a non-object"
data Grant = Grant -- data Grant = Grant
{ funder :: OpenAlexID -- { funder :: OpenAlexID
, funder_display_name :: Text -- , funder_display_name :: Text
, award_id :: Maybe Text -- , award_id :: Maybe Text
} deriving (Generic, Show, FromJSON) -- } deriving (Generic, Show, FromJSON)
data Location = Location data Location = Location
{ is_oa :: Bool { -- is_oa :: Bool
, landing_page_url :: Maybe URL -- , landing_page_url :: Maybe URL
, license :: Maybe Text -- , license :: Maybe Text
, source :: Maybe DehydratedSource source :: Maybe DehydratedSource
, pdf_url :: Maybe URL -- , pdf_url :: Maybe URL
, version :: Maybe Text -- , version :: Maybe Text
} deriving (Generic, Show, FromJSON) } deriving (Generic, Show, FromJSON)
-- | PubMed only, https://docs.openalex.org/api-entities/works/work-object#mesh -- | PubMed only, https://docs.openalex.org/api-entities/works/work-object#mesh
data MeSH = MeSH -- data MeSH = MeSH
{ descriptor_ui :: Text -- { descriptor_ui :: Text
, descriptor_name :: Text -- , descriptor_name :: Text
, qualifier_ui :: Text -- , qualifier_ui :: Text
, qualifier_name :: Maybe Text -- , qualifier_name :: Maybe Text
, is_major_topic :: Bool -- , is_major_topic :: Bool
} deriving (Generic, Show, FromJSON) -- } deriving (Generic, Show, FromJSON)
-- | https://docs.openalex.org/api-entities/works/work-object#the-openaccess-object -- | https://docs.openalex.org/api-entities/works/work-object#the-openaccess-object
data OpenAccess = OpenAccess -- data OpenAccess = OpenAccess
{ any_repository_has_fulltext :: Bool -- { any_repository_has_fulltext :: Bool
, is_oa :: Bool -- , is_oa :: Bool
, oa_status :: OAStatus -- , oa_status :: OAStatus
, oa_url :: Maybe URL -- , oa_url :: Maybe URL
} deriving (Generic, Show, FromJSON) -- } deriving (Generic, Show, FromJSON)
-- | https://docs.openalex.org/api-entities/sources/source-object#the-dehydratedsource-object -- | https://docs.openalex.org/api-entities/sources/source-object#the-dehydratedsource-object
data DehydratedSource = DehydratedSource data DehydratedSource = DehydratedSource
{ display_name :: Text { display_name :: Text
, host_organization :: Maybe Text -- , host_organization :: Maybe Text
, host_organization_lineage :: [OpenAlexID] -- , host_organization_lineage :: [OpenAlexID]
, host_organization_name :: Maybe Text -- , host_organization_name :: Maybe Text
, id :: OpenAlexID -- , id :: OpenAlexID
, is_in_doaj :: Bool -- , is_in_doaj :: Bool
, issn :: [ISSN] -- , issn :: [ISSN]
, issn_l :: Maybe ISSNL -- , issn_l :: Maybe ISSNL
, type_ ::Text -- , type_ ::Text
} deriving (Generic, Show) } deriving (Generic, Show)
instance FromJSON DehydratedSource where instance FromJSON DehydratedSource where
parseJSON = withObject "DehydratedSource" $ \v -> do parseJSON = withObject "DehydratedSource" $ \v -> do
display_name <- v .: "display_name" display_name <- v .: "display_name"
host_organization <- v .:? "host_organization" -- host_organization <- v .:? "host_organization"
host_organization_lineage' <- v .:? "host_organization_lineage" -- host_organization_lineage' <- v .:? "host_organization_lineage"
let host_organization_lineage = fromMaybe [] host_organization_lineage' -- let host_organization_lineage = fromMaybe [] host_organization_lineage'
host_organization_name <- v .:? "host_organization_name" -- host_organization_name <- v .:? "host_organization_name"
id <- v .: "id" -- id <- v .: "id"
is_in_doaj <- v .: "is_in_doaj" -- is_in_doaj <- v .: "is_in_doaj"
issn' <- v .:? "issn" -- issn' <- v .:? "issn"
let issn = fromMaybe [] issn' -- let issn = fromMaybe [] issn'
issn_l <- v .:? "issn_l" -- issn_l <- v .:? "issn_l"
type_ <- v .: "type" -- type_ <- v .: "type"
pure $ DehydratedSource { .. } pure $ DehydratedSource { .. }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment