Commit a8fa4d43 authored by Przemyslaw Kaminski's avatar Przemyslaw Kaminski

[pubmed] use new Taggy parser in pubmed

This fixes an issue when article title contained XML (like <b> etc).
parent df905a78
......@@ -58,22 +58,21 @@ toDoc l (PubMedDoc.PubMed (PubMedDoc.PubMedArticle t j as aus)
, _hd_publication_second = Nothing
, _hd_language_iso2 = Just $ (Text.pack . show) l }
where
authors :: Maybe [PubMedDoc.Author] -> Maybe Text
authors aus' = case aus' of
Nothing -> Nothing
Just au -> Just $ (Text.intercalate ", ")
$ catMaybes
$ map (\n -> PubMedDoc.foreName n <> Just " " <> PubMedDoc.lastName n) au
authors :: [PubMedDoc.Author] -> Maybe Text
authors [] = Nothing
authors au = Just $ (Text.intercalate ", ")
$ catMaybes
$ map (\n -> PubMedDoc.foreName n <> Just " " <> PubMedDoc.lastName n) au
institutes :: Maybe [PubMedDoc.Author] -> Maybe Text
institutes aus' = case aus' of
Nothing -> Nothing
Just au -> Just $ (Text.intercalate ", ")
$ (map (Text.replace ", " " - "))
$ catMaybes
$ map PubMedDoc.affiliation au
institutes :: [PubMedDoc.Author] -> Maybe Text
institutes [] = Nothing
institutes au = Just $ (Text.intercalate ", ")
$ (map (Text.replace ", " " - "))
$ catMaybes
$ map PubMedDoc.affiliation au
abstract :: Maybe [Text] -> Maybe Text
abstract as' = fmap (Text.intercalate ", ") as'
abstract :: [Text] -> Maybe Text
abstract [] = Nothing
abstract as' = Just $ Text.intercalate ", " as'
......@@ -69,7 +69,7 @@ extra-deps:
# External Data API connectors
- git: https://gitlab.iscpif.fr/gargantext/crawlers/pubmed.git
commit: a9d8e08a7ef82f90e29dfaced4071704a3163394
commit: 9cdba6423decad5acfacb0f274212fd8723ce734
- git: https://gitlab.iscpif.fr/gargantext/crawlers/istex.git
commit: daeae80365250c4bd539f0a65e271f9aa37f731f
- git: https://gitlab.iscpif.fr/gargantext/crawlers/hal.git
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment