Commit c41cdec8 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[PARSERS] RIS PRESSE fix bug of \r

parent c45062cb
......@@ -23,6 +23,7 @@ import Gargantext.Prelude
import Gargantext.Text.Parsers.CSV (writeDocs2Csv)
import Gargantext.Text.Parsers (parseDocs, FileFormat(..))
risPress2csv :: FilePath -> IO ()
risPress2csv f = parseDocs RisPresse (f <> ".ris")
>>= \hs -> writeDocs2Csv (f <> ".csv") hs
......
......@@ -22,12 +22,13 @@ please follow the types.
{-# LANGUAGE PackageImports #-}
{-# LANGUAGE OverloadedStrings #-}
module Gargantext.Text.Parsers (parse, FileFormat(..), clean, parseDocs, risPress2csv)
module Gargantext.Text.Parsers (parse, FileFormat(..), clean, parseDocs)
where
import "zip" Codec.Archive.Zip (withArchive, getEntry, getEntries)
import Control.Concurrent.Async as CCA (mapConcurrently)
import Control.Monad (join)
import qualified Data.ByteString.Char8 as DBC
import Data.Attoparsec.ByteString (parseOnly, Parser)
import Data.Either(Either(..))
import Data.Either.Extra (partitionEithers)
......@@ -149,7 +150,7 @@ parse' :: FileFormat -> FilePath
parse' format path = do
files <- case takeExtension path of
".zip" -> openZip path
_ -> pure <$> DB.readFile path
_ -> pure <$> clean <$> DB.readFile path
partitionEithers <$> mapConcurrently (runParser format) files
......@@ -174,13 +175,11 @@ openZip fp = do
bs <- mapConcurrently (\s -> withArchive fp (getEntry s)) entries
pure bs
clean :: Text -> Text
clean txt = DT.map clean' txt
clean :: DB.ByteString -> DB.ByteString
clean txt = DBC.map clean' txt
where
clean' '’' = '\''
clean' '\r' = ' '
clean' c = c
risPress2csv f = parseDocs RisPresse (f <> ".ris") >>= \hs -> writeDocs2Csv (f <> ".csv") hs
......@@ -66,10 +66,10 @@ import qualified Text.ParserCombinators.Parsec (parse)
parseDate :: Lang -> Text -> IO UTCTime
parseDate lang s = parseDate' "%Y-%m-%dT%T" "0-0-0T0:0:0" lang s
type DateFormat = Text
type DateNull = Text
type DateFormat = Text
type DateDefault = Text
parseDate' :: DateFormat -> DateNull -> Lang -> Text -> IO UTCTime
parseDate' :: DateFormat -> DateDefault -> Lang -> Text -> IO UTCTime
parseDate' format def lang s = do
dateStr' <- parseDateRaw lang s
let dateStr = unpack $ maybe def identity
......
......@@ -63,7 +63,7 @@ lines :: Parser [ByteString]
lines = many line
where
line :: Parser ByteString
line = "\n\n" *> takeTill isEndOfLine
line = "\n " *> takeTill isEndOfLine
-------------------------------------------------------------
withField :: ByteString -> (ByteString -> [(ByteString, ByteString)])
......
......@@ -54,7 +54,7 @@ parseDate = do
presseLang :: ByteString -> [(ByteString, ByteString)]
presseLang "Français" = [("language", "FR")]
presseLang "English" = [("language", "EN")]
presseLang _ = undefined
presseLang x = [("language", x)]
presseFields :: ByteString -> ByteString
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment