Commit a8fa4d43 authored by Przemyslaw Kaminski's avatar Przemyslaw Kaminski

[pubmed] use new Taggy parser in pubmed

This fixes an issue when article title contained XML (like <b> etc).
parent df905a78
...@@ -58,22 +58,21 @@ toDoc l (PubMedDoc.PubMed (PubMedDoc.PubMedArticle t j as aus) ...@@ -58,22 +58,21 @@ toDoc l (PubMedDoc.PubMed (PubMedDoc.PubMedArticle t j as aus)
, _hd_publication_second = Nothing , _hd_publication_second = Nothing
, _hd_language_iso2 = Just $ (Text.pack . show) l } , _hd_language_iso2 = Just $ (Text.pack . show) l }
where where
authors :: Maybe [PubMedDoc.Author] -> Maybe Text authors :: [PubMedDoc.Author] -> Maybe Text
authors aus' = case aus' of authors [] = Nothing
Nothing -> Nothing authors au = Just $ (Text.intercalate ", ")
Just au -> Just $ (Text.intercalate ", ") $ catMaybes
$ catMaybes $ map (\n -> PubMedDoc.foreName n <> Just " " <> PubMedDoc.lastName n) au
$ map (\n -> PubMedDoc.foreName n <> Just " " <> PubMedDoc.lastName n) au
institutes :: Maybe [PubMedDoc.Author] -> Maybe Text institutes :: [PubMedDoc.Author] -> Maybe Text
institutes aus' = case aus' of institutes [] = Nothing
Nothing -> Nothing institutes au = Just $ (Text.intercalate ", ")
Just au -> Just $ (Text.intercalate ", ") $ (map (Text.replace ", " " - "))
$ (map (Text.replace ", " " - ")) $ catMaybes
$ catMaybes $ map PubMedDoc.affiliation au
$ map PubMedDoc.affiliation au
abstract :: Maybe [Text] -> Maybe Text abstract :: [Text] -> Maybe Text
abstract as' = fmap (Text.intercalate ", ") as' abstract [] = Nothing
abstract as' = Just $ Text.intercalate ", " as'
...@@ -69,7 +69,7 @@ extra-deps: ...@@ -69,7 +69,7 @@ extra-deps:
# External Data API connectors # External Data API connectors
- git: https://gitlab.iscpif.fr/gargantext/crawlers/pubmed.git - git: https://gitlab.iscpif.fr/gargantext/crawlers/pubmed.git
commit: a9d8e08a7ef82f90e29dfaced4071704a3163394 commit: 9cdba6423decad5acfacb0f274212fd8723ce734
- git: https://gitlab.iscpif.fr/gargantext/crawlers/istex.git - git: https://gitlab.iscpif.fr/gargantext/crawlers/istex.git
commit: daeae80365250c4bd539f0a65e271f9aa37f731f commit: daeae80365250c4bd539f0a65e271f9aa37f731f
- git: https://gitlab.iscpif.fr/gargantext/crawlers/hal.git - git: https://gitlab.iscpif.fr/gargantext/crawlers/hal.git
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment