Commit c41cdec8 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[PARSERS] RIS PRESSE fix bug of \r

parent c45062cb
...@@ -23,6 +23,7 @@ import Gargantext.Prelude ...@@ -23,6 +23,7 @@ import Gargantext.Prelude
import Gargantext.Text.Parsers.CSV (writeDocs2Csv) import Gargantext.Text.Parsers.CSV (writeDocs2Csv)
import Gargantext.Text.Parsers (parseDocs, FileFormat(..)) import Gargantext.Text.Parsers (parseDocs, FileFormat(..))
risPress2csv :: FilePath -> IO () risPress2csv :: FilePath -> IO ()
risPress2csv f = parseDocs RisPresse (f <> ".ris") risPress2csv f = parseDocs RisPresse (f <> ".ris")
>>= \hs -> writeDocs2Csv (f <> ".csv") hs >>= \hs -> writeDocs2Csv (f <> ".csv") hs
......
...@@ -22,12 +22,13 @@ please follow the types. ...@@ -22,12 +22,13 @@ please follow the types.
{-# LANGUAGE PackageImports #-} {-# LANGUAGE PackageImports #-}
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
module Gargantext.Text.Parsers (parse, FileFormat(..), clean, parseDocs, risPress2csv) module Gargantext.Text.Parsers (parse, FileFormat(..), clean, parseDocs)
where where
import "zip" Codec.Archive.Zip (withArchive, getEntry, getEntries) import "zip" Codec.Archive.Zip (withArchive, getEntry, getEntries)
import Control.Concurrent.Async as CCA (mapConcurrently) import Control.Concurrent.Async as CCA (mapConcurrently)
import Control.Monad (join) import Control.Monad (join)
import qualified Data.ByteString.Char8 as DBC
import Data.Attoparsec.ByteString (parseOnly, Parser) import Data.Attoparsec.ByteString (parseOnly, Parser)
import Data.Either(Either(..)) import Data.Either(Either(..))
import Data.Either.Extra (partitionEithers) import Data.Either.Extra (partitionEithers)
...@@ -149,7 +150,7 @@ parse' :: FileFormat -> FilePath ...@@ -149,7 +150,7 @@ parse' :: FileFormat -> FilePath
parse' format path = do parse' format path = do
files <- case takeExtension path of files <- case takeExtension path of
".zip" -> openZip path ".zip" -> openZip path
_ -> pure <$> DB.readFile path _ -> pure <$> clean <$> DB.readFile path
partitionEithers <$> mapConcurrently (runParser format) files partitionEithers <$> mapConcurrently (runParser format) files
...@@ -174,13 +175,11 @@ openZip fp = do ...@@ -174,13 +175,11 @@ openZip fp = do
bs <- mapConcurrently (\s -> withArchive fp (getEntry s)) entries bs <- mapConcurrently (\s -> withArchive fp (getEntry s)) entries
pure bs pure bs
clean :: Text -> Text clean :: DB.ByteString -> DB.ByteString
clean txt = DT.map clean' txt clean txt = DBC.map clean' txt
where where
clean' '’' = '\'' clean' '’' = '\''
clean' '\r' = ' '
clean' c = c clean' c = c
risPress2csv f = parseDocs RisPresse (f <> ".ris") >>= \hs -> writeDocs2Csv (f <> ".csv") hs
...@@ -67,9 +67,9 @@ parseDate :: Lang -> Text -> IO UTCTime ...@@ -67,9 +67,9 @@ parseDate :: Lang -> Text -> IO UTCTime
parseDate lang s = parseDate' "%Y-%m-%dT%T" "0-0-0T0:0:0" lang s parseDate lang s = parseDate' "%Y-%m-%dT%T" "0-0-0T0:0:0" lang s
type DateFormat = Text type DateFormat = Text
type DateNull = Text type DateDefault = Text
parseDate' :: DateFormat -> DateNull -> Lang -> Text -> IO UTCTime parseDate' :: DateFormat -> DateDefault -> Lang -> Text -> IO UTCTime
parseDate' format def lang s = do parseDate' format def lang s = do
dateStr' <- parseDateRaw lang s dateStr' <- parseDateRaw lang s
let dateStr = unpack $ maybe def identity let dateStr = unpack $ maybe def identity
......
...@@ -63,7 +63,7 @@ lines :: Parser [ByteString] ...@@ -63,7 +63,7 @@ lines :: Parser [ByteString]
lines = many line lines = many line
where where
line :: Parser ByteString line :: Parser ByteString
line = "\n\n" *> takeTill isEndOfLine line = "\n " *> takeTill isEndOfLine
------------------------------------------------------------- -------------------------------------------------------------
withField :: ByteString -> (ByteString -> [(ByteString, ByteString)]) withField :: ByteString -> (ByteString -> [(ByteString, ByteString)])
......
...@@ -54,7 +54,7 @@ parseDate = do ...@@ -54,7 +54,7 @@ parseDate = do
presseLang :: ByteString -> [(ByteString, ByteString)] presseLang :: ByteString -> [(ByteString, ByteString)]
presseLang "Français" = [("language", "FR")] presseLang "Français" = [("language", "FR")]
presseLang "English" = [("language", "EN")] presseLang "English" = [("language", "EN")]
presseLang _ = undefined presseLang x = [("language", x)]
presseFields :: ByteString -> ByteString presseFields :: ByteString -> ByteString
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment