Commit c9ce9c6f authored by Alexandre Delanoë's avatar Alexandre Delanoë

[EXT/API] Isidore.

parent 846d21f5
......@@ -22,7 +22,7 @@ please follow the types.
{-# LANGUAGE PackageImports #-}
{-# LANGUAGE OverloadedStrings #-}
module Gargantext.Text.Parsers (FileFormat(..), clean, parseFile)
module Gargantext.Text.Parsers (FileFormat(..), clean, parseFile, cleanText)
where
--import Data.ByteString (ByteString)
......@@ -164,9 +164,14 @@ openZip fp = do
bs <- mapConcurrently (\s -> withArchive fp (getEntry s)) entries
pure bs
cleanText :: Text -> Text
cleanText = cs . clean . cs
clean :: DB.ByteString -> DB.ByteString
clean txt = DBC.map clean' txt
where
clean' '’' = '\''
clean' '\r' = ' '
clean' '\t' = ' '
clean' ';' = '.'
clean' c = c
{-|
Module : Gargantext.Text.Parsers.IsidoreApi
Description : To query French Humanities publication database from its API
Copyright : (c) CNRS, 2019-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE OverloadedStrings #-}
module Gargantext.Text.Parsers.IsidoreApi where
import System.FilePath (FilePath())
import Data.Text (Text)
import Gargantext.Core (Lang(..))
import Gargantext.Database.Types.Node (HyperdataDocument(..))
import Gargantext.Prelude
import Isidore.Client
import Servant.Client
import qualified Data.Text as Text
import qualified Gargantext.Text.Parsers.Date as Date
import qualified Isidore as Isidore
import Gargantext.Text.Parsers.CSV (writeDocs2Csv)
import Gargantext.Text.Parsers (cleanText)
-- | TODO work with the ServantErr
get :: Lang -> Maybe Isidore.Limit
-> Maybe Isidore.TextQuery -> Maybe Isidore.AuthorQuery
-> IO [HyperdataDocument]
get la l q a = do
let
printErr (DecodeFailure e _) = panic e
printErr e = panic (cs $ show e)
iDocs <- either printErr (_docs) <$> Isidore.get l q a
hDocs <- mapM (\d -> isidoreToDoc la d) iDocs
pure hDocs
isidore2csvFile :: FilePath -> Lang -> Maybe Isidore.Limit
-> Maybe Isidore.TextQuery -> Maybe Isidore.AuthorQuery
-> IO ()
isidore2csvFile fp la li tq aq = do
hdocs <- get la li tq aq
writeDocs2Csv fp hdocs
isidoreToDoc :: Lang -> IsidoreDoc -> IO HyperdataDocument
isidoreToDoc l (IsidoreDoc t a d u s as) = do
let
author :: Author -> Text
author (Author fn ln) = (_name fn) <> ", " <> (_name ln)
author (Authors aus) = Text.intercalate ". " $ map author aus
creator2text :: Creator -> Text
creator2text (Creator au) = author au
creator2text (Creators aus') = Text.intercalate ". " $ map author aus'
langText :: LangText -> Text
langText (LangText _l t1) = t1
langText (OnlyText t2 ) = t2
langText (ArrayText ts) = Text.intercalate " " $ map langText ts
(utcTime, (pub_year, pub_month, pub_day)) <- Date.split l (maybe (Just "2019") (Just) d)
pure $ HyperdataDocument (Just "IsidoreApi")
Nothing
u
Nothing
Nothing
Nothing
(Just $ cleanText $ langText t)
Nothing
(creator2text <$> as)
(_sourceName <$> s)
(cleanText <$> langText <$> a)
(fmap (Text.pack . show) utcTime)
(pub_year)
(pub_month)
(pub_day)
Nothing
Nothing
Nothing
(Just $ (Text.pack . show) l)
......@@ -33,7 +33,7 @@ extra-deps:
- git: https://github.com/delanoe/hsparql.git
commit: 308c74b71a1abb0a91546fa57d353131248e3a7f
- git: https://gitlab.iscpif.fr/gargantext/crawlers/isidore.git
commit: 884906b4617955223ad83615ab04fae78d9476db
commit: 069118b29198ee1044a685d8c08dcfb242d601e3
- KMP-0.1.0.2
- accelerate-1.2.0.0
- aeson-lens-0.5.0.0
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment