Commit 32f6c049 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[PARSERS] RIS (WIP).

parent 2584e64c
...@@ -55,6 +55,7 @@ import Gargantext.Core (Lang(..)) ...@@ -55,6 +55,7 @@ import Gargantext.Core (Lang(..))
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Database.Types.Node (HyperdataDocument(..)) import Gargantext.Database.Types.Node (HyperdataDocument(..))
import Gargantext.Text.Parsers.WOS (wosParser) import Gargantext.Text.Parsers.WOS (wosParser)
import Gargantext.Text.Parsers.RIS (risParser)
import Gargantext.Text.Parsers.Date (parseDate) import Gargantext.Text.Parsers.Date (parseDate)
import Gargantext.Text.Parsers.CSV (parseHal) import Gargantext.Text.Parsers.CSV (parseHal)
import Gargantext.Text.Terms.Stop (detectLang) import Gargantext.Text.Terms.Stop (detectLang)
...@@ -71,7 +72,7 @@ type ParseError = String ...@@ -71,7 +72,7 @@ type ParseError = String
-- | According to the format of Input file, -- | According to the format of Input file,
-- different parser are available. -- different parser are available.
data FileFormat = WOS | CsvHalFormat -- | CsvGargV3 data FileFormat = WOS | RIS | CsvHalFormat -- | CsvGargV3
deriving (Show) deriving (Show)
-- Implemented (ISI Format) -- Implemented (ISI Format)
...@@ -87,7 +88,7 @@ data FileFormat = WOS | CsvHalFormat -- | CsvGargV3 ...@@ -87,7 +88,7 @@ data FileFormat = WOS | CsvHalFormat -- | CsvGargV3
-- | Parse file into documents -- | Parse file into documents
-- TODO manage errors here -- TODO manage errors here
parseDocs :: FileFormat -> FilePath -> IO [HyperdataDocument] parseDocs :: FileFormat -> FilePath -> IO [HyperdataDocument]
parseDocs WOS path = join $ mapM (toDoc WOS) <$> snd <$> parse WOS path parseDocs ff path = join $ mapM (toDoc ff) <$> snd <$> parse ff path
parseDocs CsvHalFormat p = parseHal p parseDocs CsvHalFormat p = parseHal p
type Year = Int type Year = Int
...@@ -106,7 +107,8 @@ parseDate' l (Just txt) = do ...@@ -106,7 +107,8 @@ parseDate' l (Just txt) = do
toDoc :: FileFormat -> [(Text, Text)] -> IO HyperdataDocument toDoc :: FileFormat -> [(Text, Text)] -> IO HyperdataDocument
toDoc WOS d = do -- TODO use language for RIS
toDoc ff d = do
let abstract = lookup "abstract" d let abstract = lookup "abstract" d
let lang = maybe EN identity (join $ detectLang <$> (fmap (DT.take 50) abstract)) let lang = maybe EN identity (join $ detectLang <$> (fmap (DT.take 50) abstract))
...@@ -114,7 +116,7 @@ toDoc WOS d = do ...@@ -114,7 +116,7 @@ toDoc WOS d = do
(utcTime, (pub_year, pub_month, pub_day)) <- parseDate' lang dateToParse (utcTime, (pub_year, pub_month, pub_day)) <- parseDate' lang dateToParse
pure $ HyperdataDocument (Just $ DT.pack $ show WOS) pure $ HyperdataDocument (Just $ DT.pack $ show ff)
(lookup "doi" d) (lookup "doi" d)
(lookup "URL" d) (lookup "URL" d)
Nothing Nothing
...@@ -152,7 +154,7 @@ parse format path = do ...@@ -152,7 +154,7 @@ parse format path = do
-- TODO withParser :: FileFormat -> Parser [Document] -- TODO withParser :: FileFormat -> Parser [Document]
withParser :: FileFormat -> Parser [[(DB.ByteString, DB.ByteString)]] withParser :: FileFormat -> Parser [[(DB.ByteString, DB.ByteString)]]
withParser WOS = wosParser withParser WOS = wosParser
--withParser DOC = docParser withParser RIS = risParser
--withParser ODT = odtParser --withParser ODT = odtParser
--withParser XML = xmlParser --withParser XML = xmlParser
withParser _ = panic "[ERROR] Parser not implemented yet" withParser _ = panic "[ERROR] Parser not implemented yet"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment