Commit c86412b5 authored by Przemyslaw Kaminski's avatar Przemyslaw Kaminski

Merge branch '6-dev-remove-unnecessary-fields' into 'main'

Resolve "Review our API implementation and leave only what is needed for us"

See merge request !9
parents d2949cde ed5c8cc2
# Revision history for openalex
## 0.1.0.0 -- YYYY-mm-dd
## 0.2.0.0 -- 2025-02-24
* First version. Released on an unsuspecting world.
### Changed
- Removed lots of fields that we don't use in gargantext. This makes
the library of worse quality for general usage, but stabilizes it
for us (the API could throw random errors at times, because some
field turned out to be missing).
## 0.1.0.0 -- 2025-02-23
- First version. Released on an unsuspecting world.
......@@ -47,7 +47,7 @@ main = do
strOption (long "output")
(opts, runCmd) <-
simpleOptions "0.1.0.0"
simpleOptions "0.2.0.0"
"OpenAlex"
"OpenAlex command line"
(pure ()) $ do
......
cabal-version: 2.4
name: openalex
version: 0.1.0.0
version: 0.2.0.0
-- A short (one-line) description of the package.
-- synopsis: An importer for OpenAlex database.
......
......@@ -239,84 +239,84 @@ data Work = Work
{ abstract_inverted_index :: Maybe (Map Text [Int]) -- TODO
, abstract_reconstructed :: Text
, authorships :: [Authorship]
, apc_list :: Maybe APCList
, apc_paid :: Maybe APCPaid
, best_oa_location :: Maybe Location
-- , apc_list :: Maybe APCList
-- , apc_paid :: Maybe APCPaid
-- , best_oa_location :: Maybe Location
, biblio :: Biblio
, cited_by_api_url :: Text
, cited_by_count :: Count
, concepts :: [DehydratedConcept]
, corresponding_author_ids :: [OpenAlexID]
, corresponding_institution_ids :: [OpenAlexID]
, counts_by_year :: [CountByYear]
, created_date :: CreatedDate
, display_name :: Maybe Text
-- , cited_by_api_url :: Text
-- , cited_by_count :: Count
-- , concepts :: [DehydratedConcept]
-- , corresponding_author_ids :: [OpenAlexID]
-- , corresponding_institution_ids :: [OpenAlexID]
-- , counts_by_year :: [CountByYear]
-- , created_date :: CreatedDate
-- , display_name :: Maybe Text
, doi :: Maybe DOI
, grants :: [Grant]
, id :: OpenAlexID
, ids :: Map Text ExternalID -- TODO ExternalDB
, is_paratext :: Bool
, is_retracted :: Bool
-- , grants :: [Grant]
-- , id :: OpenAlexID
-- , ids :: Map Text ExternalID -- TODO ExternalDB
-- , is_paratext :: Bool
-- , is_retracted :: Bool
, language :: Maybe Text
, locations :: [Location]
, locations_count :: Count
, mesh :: [MeSH]
, ngrams_url :: Maybe URL
, open_access :: OpenAccess
-- , locations :: [Location]
-- , locations_count :: Count
-- , mesh :: [MeSH]
-- , ngrams_url :: Maybe URL
-- , open_access :: OpenAccess
, primary_location :: Maybe Location
, publication_date :: CreatedDate
, publication_year :: Year
, referenced_works :: [OpenAlexID]
, related_works :: [OpenAlexID]
-- , referenced_works :: [OpenAlexID]
-- , related_works :: [OpenAlexID]
, title :: Maybe Text
, type_ :: Text
, updated_date :: UpdatedDate
, is_oa :: Maybe Bool
, license :: Maybe Text
-- , type_ :: Text
-- , updated_date :: UpdatedDate
-- , is_oa :: Maybe Bool
-- , license :: Maybe Text
, url :: Maybe URL
, version :: Maybe Text
-- , version :: Maybe Text
} deriving (Generic, Show)
instance FromJSON Work where
parseJSON = withObject "Work" $ \v -> do
abstract_inverted_index <- v .: "abstract_inverted_index"
let abstract_reconstructed = reconstructAbstract abstract_inverted_index
authorships <- v .: "authorships"
apc_list <- v .: "apc_list"
apc_paid <- v .: "apc_paid"
best_oa_location <- v .: "best_oa_location"
-- apc_list <- v .: "apc_list"
-- apc_paid <- v .: "apc_paid"
-- best_oa_location <- v .: "best_oa_location"
biblio <- v .: "biblio"
cited_by_api_url <- v .: "cited_by_api_url"
cited_by_count <- v .: "cited_by_count"
concepts <- v .: "concepts"
corresponding_author_ids <- v .: "corresponding_author_ids"
corresponding_institution_ids <- v .: "corresponding_institution_ids"
counts_by_year <- v .: "counts_by_year"
created_date <- v .: "created_date"
display_name <- v .:? "display_name"
-- cited_by_api_url <- v .: "cited_by_api_url"
-- cited_by_count <- v .: "cited_by_count"
-- concepts <- v .: "concepts"
-- corresponding_author_ids <- v .: "corresponding_author_ids"
-- corresponding_institution_ids <- v .: "corresponding_institution_ids"
-- counts_by_year <- v .: "counts_by_year"
-- created_date <- v .: "created_date"
-- display_name <- v .:? "display_name"
doi <- v .:? "doi"
grants <- v .: "grants"
id <- v .: "id"
ids <- v .: "ids"
is_paratext <- v .: "is_paratext"
is_retracted <- v .: "is_retracted"
-- grants <- v .: "grants"
-- id <- v .: "id"
-- ids <- v .: "ids"
-- is_paratext <- v .: "is_paratext"
-- is_retracted <- v .: "is_retracted"
language <- v .:? "language"
locations <- v .: "locations"
locations_count <- v .: "locations_count"
mesh <- v .: "mesh"
ngrams_url <- v .:? "ngrams_url"
open_access <- v .: "open_access"
-- locations <- v .: "locations"
-- locations_count <- v .: "locations_count"
-- mesh <- v .: "mesh"
-- ngrams_url <- v .:? "ngrams_url"
-- open_access <- v .: "open_access"
primary_location <- v .:? "primary_location"
publication_date <- v .: "publication_date"
publication_year <- v .: "publication_year"
referenced_works <- v .: "referenced_works"
related_works <- v .: "related_works"
-- referenced_works <- v .: "referenced_works"
-- related_works <- v .: "related_works"
title <- v .:? "title"
type_ <- v .: "type"
updated_date <- v .: "updated_date"
is_oa <- v .:? "is_oa"
license <- v .:? "license"
-- type_ <- v .: "type"
-- updated_date <- v .: "updated_date"
-- is_oa <- v .:? "is_oa"
-- license <- v .:? "license"
url <- v .:? "url"
version <- v .:? "version"
-- version <- v .:? "version"
pure $ Work { .. }
-- | Publication Day,Publication Month,Publication Year,Authors,Title,Source,Abstract
instance Csv.DefaultOrdered Work where
......@@ -348,116 +348,116 @@ instance Csv.ToNamedRecord Work where
Nothing -> ""
Just (DehydratedSource { display_name = dn }) -> dn
data APCList = APCList
{ value :: Maybe Int
, currency :: Text
, provenance :: Maybe Text
, value_usd :: Maybe Int
} deriving (Generic, Show, FromJSON)
-- data APCList = APCList
-- { value :: Maybe Int
-- , currency :: Text
-- , provenance :: Maybe Text
-- , value_usd :: Maybe Int
-- } deriving (Generic, Show, FromJSON)
data APCPaid = APCPaid
{ value :: Int
, currency :: Text
, provenance :: Maybe Text
, value_usd :: Maybe Int
} deriving (Generic, Show, FromJSON)
-- data APCPaid = APCPaid
-- { value :: Int
-- , currency :: Text
-- , provenance :: Maybe Text
-- , value_usd :: Maybe Int
-- } deriving (Generic, Show, FromJSON)
-- | https://docs.openalex.org/api-entities/works/work-object/authorship-object
data Authorship = Authorship
{ author :: DehydratedAuthor
, author_position :: Text
-- , author_position :: Text
, institutions :: [DehydratedInstitution]
, is_corresponding :: Maybe Bool
, raw_affiliation_string :: Maybe Text
-- , is_corresponding :: Maybe Bool
-- , raw_affiliation_string :: Maybe Text
} deriving (Generic, Show, FromJSON)
authorshipAuthorName :: Authorship -> Maybe Text
authorshipAuthorName (Authorship { author = DehydratedAuthor { display_name } }) = display_name
data Biblio = Biblio
{ volume :: Maybe Text
, issue :: Maybe Text
, first_page :: Maybe Text
, last_page :: Maybe Text
{ -- volume :: Maybe Text
-- , issue :: Maybe Text
first_page :: Maybe Text
-- , last_page :: Maybe Text
} deriving (Generic, Show, FromJSON)
data DehydratedAuthor = DehydratedAuthor
{ id :: OpenAlexID
, display_name :: Maybe Text
, orcid :: Maybe URL
{ -- id :: OpenAlexID
display_name :: Maybe Text
-- , orcid :: Maybe URL
} deriving (Generic, Show, FromJSON)
data DehydratedInstitution = DehydratedInstitution
{ id :: Maybe OpenAlexID
, display_name :: Text
, ror :: Maybe Text
, country_code :: Maybe Text
, type_ :: Maybe Text
{ -- id :: Maybe OpenAlexID
display_name :: Text
-- , ror :: Maybe Text
-- , country_code :: Maybe Text
-- , type_ :: Maybe Text
} deriving (Generic, Show)
instance FromJSON DehydratedInstitution where
parseJSON (Object v) = do
id <- v .:? "id"
-- id <- v .:? "id"
display_name <- v .: "display_name"
ror <- v .:? "ror"
country_code <- v .:? "country_code"
type_ <- v .:? "type"
-- ror <- v .:? "ror"
-- country_code <- v .:? "country_code"
-- type_ <- v .:? "type"
pure $ DehydratedInstitution { .. }
parseJSON _ = fail "Don't know how to parse a dehydrated institution from a non-object"
data Grant = Grant
{ funder :: OpenAlexID
, funder_display_name :: Text
, award_id :: Maybe Text
} deriving (Generic, Show, FromJSON)
-- data Grant = Grant
-- { funder :: OpenAlexID
-- , funder_display_name :: Text
-- , award_id :: Maybe Text
-- } deriving (Generic, Show, FromJSON)
data Location = Location
{ is_oa :: Bool
, landing_page_url :: Maybe URL
, license :: Maybe Text
, source :: Maybe DehydratedSource
, pdf_url :: Maybe URL
, version :: Maybe Text
{ -- is_oa :: Bool
-- , landing_page_url :: Maybe URL
-- , license :: Maybe Text
source :: Maybe DehydratedSource
-- , pdf_url :: Maybe URL
-- , version :: Maybe Text
} deriving (Generic, Show, FromJSON)
-- | PubMed only, https://docs.openalex.org/api-entities/works/work-object#mesh
data MeSH = MeSH
{ descriptor_ui :: Text
, descriptor_name :: Text
, qualifier_ui :: Text
, qualifier_name :: Maybe Text
, is_major_topic :: Bool
} deriving (Generic, Show, FromJSON)
-- data MeSH = MeSH
-- { descriptor_ui :: Text
-- , descriptor_name :: Text
-- , qualifier_ui :: Text
-- , qualifier_name :: Maybe Text
-- , is_major_topic :: Bool
-- } deriving (Generic, Show, FromJSON)
-- | https://docs.openalex.org/api-entities/works/work-object#the-openaccess-object
data OpenAccess = OpenAccess
{ any_repository_has_fulltext :: Bool
, is_oa :: Bool
, oa_status :: OAStatus
, oa_url :: Maybe URL
} deriving (Generic, Show, FromJSON)
-- data OpenAccess = OpenAccess
-- { any_repository_has_fulltext :: Bool
-- , is_oa :: Bool
-- , oa_status :: OAStatus
-- , oa_url :: Maybe URL
-- } deriving (Generic, Show, FromJSON)
-- | https://docs.openalex.org/api-entities/sources/source-object#the-dehydratedsource-object
data DehydratedSource = DehydratedSource
{ display_name :: Text
, host_organization :: Maybe Text
, host_organization_lineage :: [OpenAlexID]
, host_organization_name :: Maybe Text
, id :: OpenAlexID
, is_in_doaj :: Bool
, issn :: [ISSN]
, issn_l :: Maybe ISSNL
, type_ ::Text
-- , host_organization :: Maybe Text
-- , host_organization_lineage :: [OpenAlexID]
-- , host_organization_name :: Maybe Text
-- , id :: OpenAlexID
-- , is_in_doaj :: Bool
-- , issn :: [ISSN]
-- , issn_l :: Maybe ISSNL
-- , type_ ::Text
} deriving (Generic, Show)
instance FromJSON DehydratedSource where
parseJSON = withObject "DehydratedSource" $ \v -> do
display_name <- v .: "display_name"
host_organization <- v .:? "host_organization"
host_organization_lineage' <- v .:? "host_organization_lineage"
let host_organization_lineage = fromMaybe [] host_organization_lineage'
host_organization_name <- v .:? "host_organization_name"
id <- v .: "id"
is_in_doaj <- v .: "is_in_doaj"
issn' <- v .:? "issn"
let issn = fromMaybe [] issn'
issn_l <- v .:? "issn_l"
type_ <- v .: "type"
-- host_organization <- v .:? "host_organization"
-- host_organization_lineage' <- v .:? "host_organization_lineage"
-- let host_organization_lineage = fromMaybe [] host_organization_lineage'
-- host_organization_name <- v .:? "host_organization_name"
-- id <- v .: "id"
-- is_in_doaj <- v .: "is_in_doaj"
-- issn' <- v .:? "issn"
-- let issn = fromMaybe [] issn'
-- issn_l <- v .:? "issn_l"
-- type_ <- v .: "type"
pure $ DehydratedSource { .. }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment