Commit e72bb63a authored by mzheng's avatar mzheng

adding institutes field when importing Documents using TSV

parent c262ec0e
Pipeline #6496 failed with stages
in 36 minutes and 44 seconds
......@@ -57,6 +57,7 @@ instance DefaultOrdered Document where
, "Publication Year"
, "Authors"
, "Title"
, "Institutes"
, "Source"
, "Abstract"]
instance ToNamedRecord Document where
......@@ -67,6 +68,7 @@ instance ToNamedRecord Document where
, "Publication Year" .= _hd_publication_year _node_hyperdata
, "Authors" .= _hd_authors _node_hyperdata
, "Title" .= _hd_title _node_hyperdata
, "Institutes" .= (TE.encodeUtf8 <$> _hd_institutes _node_hyperdata)
, "Source" .= (TE.encodeUtf8 <$> _hd_source _node_hyperdata)
, "Abstract" .= (TE.encodeUtf8 <$> _hd_abstract _node_hyperdata) ]
......
......@@ -54,7 +54,8 @@ patent2tsvDoc (Patent { .. }) =
, tsv_publication_month = Just $ Defaults.month
, tsv_publication_day = Just $ Defaults.day
, tsv_abstract = _patent_abstract
, tsv_authors = "Authors" }
, tsv_authors = "Authors"
, tsv_institutes = Nothing }
......
......@@ -105,7 +105,8 @@ fromDocs = V.map fromDocs'
, tsv_publication_month = Just d_publication_month
, tsv_publication_day = Just d_publication_day
, tsv_abstract = d_abstract
, tsv_authors = d_authors }
, tsv_authors = d_authors
, tsv_institutes = Nothing }
---------------------------------------------------------------
-- | Split a document in its context
......@@ -169,6 +170,7 @@ data TsvDoc = TsvDoc
, tsv_publication_day :: !(Maybe Int)
, tsv_abstract :: !Text
, tsv_authors :: !Text
, tsv_institutes :: !(Maybe Text)
}
deriving (Show)
......@@ -181,6 +183,7 @@ instance FromNamedRecord TsvDoc where
tsv_publication_day <- r .: "publication_day" <|> r .: "Publication Day"
tsv_abstract <- r .: "abstract" <|> r .: "Abstract"
tsv_authors <- r .: "authors" <|> r .: "Authors"
tsv_institutes <- r .: "Institutes"
pure $ TsvDoc { .. }
instance ToNamedRecord TsvDoc where
......@@ -192,6 +195,7 @@ instance ToNamedRecord TsvDoc where
, "publication_day" .= tsv_publication_day
, "abstract" .= tsv_abstract
, "authors" .= tsv_authors
, "institutes" .= tsv_institutes
]
hyperdataDocument2tsvDoc :: HyperdataDocument -> TsvDoc
......@@ -201,7 +205,8 @@ hyperdataDocument2tsvDoc h = TsvDoc { tsv_title = m $ _hd_title h
, tsv_publication_month = Just $ mI $ _hd_publication_month h
, tsv_publication_day = Just $ mI $ _hd_publication_day h
, tsv_abstract = m $ _hd_abstract h
, tsv_authors = m $ _hd_authors h }
, tsv_authors = m $ _hd_authors h
, tsv_institutes = _hd_institutes h}
where
m = maybe "" identity
......@@ -548,7 +553,7 @@ tsv2doc (TsvDoc { .. })
, _hd_page = Nothing
, _hd_title = Just tsv_title
, _hd_authors = Just tsv_authors
, _hd_institutes = Nothing
, _hd_institutes = tsv_institutes
, _hd_source = Just tsv_source
, _hd_abstract = Just tsv_abstract
, _hd_publication_date = Just $ pack . show $ jour (fromIntegral pubYear)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment