Commit 68365f40 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[PARSERS] refactor.

parent d2029ea7
...@@ -24,14 +24,13 @@ import qualified Data.Vector as V ...@@ -24,14 +24,13 @@ import qualified Data.Vector as V
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Text.Search import Gargantext.Text.Search
import Gargantext.Text.Parsers.CSV import qualified Gargantext.Text.Parsers.CSV as CSV
------------------------------------------------------------------------ ------------------------------------------------------------------------
type Query = [S.Term] type Query = [S.Term]
filterDocs :: [DocId] -> Vector Doc -> Vector Doc filterDocs :: [DocId] -> Vector CSV.Doc -> Vector CSV.Doc
filterDocs docIds = V.filter (\doc -> S.member (d_docId doc) $ S.fromList docIds ) filterDocs docIds = V.filter (\doc -> S.member (CSV.d_docId doc) $ S.fromList docIds )
main :: IO () main :: IO ()
...@@ -41,17 +40,17 @@ main = do ...@@ -41,17 +40,17 @@ main = do
--let q = ["water", "scarcity", "morocco", "shortage","flood"] --let q = ["water", "scarcity", "morocco", "shortage","flood"]
let q = ["gratuit", "gratuité", "culture", "culturel"] let q = ["gratuit", "gratuité", "culture", "culturel"]
(h,csvDocs) <- readCsv rPath (h,csvDocs) <- CSV.readFile rPath
putStrLn $ "Number of documents before:" <> show (V.length csvDocs) putStrLn $ "Number of documents before:" <> show (V.length csvDocs)
putStrLn $ "Mean size of docs:" <> show ( docsSize csvDocs) putStrLn $ "Mean size of docs:" <> show ( CSV.docsSize csvDocs)
let docs = toDocs csvDocs let docs = CSV.toDocs csvDocs
let engine = insertDocs docs initialDocSearchEngine let engine = insertDocs docs initialDocSearchEngine
let docIds = S.query engine (map pack q) let docIds = S.query engine (map pack q)
let docs' = fromDocs $ filterDocs docIds (V.fromList docs) let docs' = CSV.fromDocs $ filterDocs docIds (V.fromList docs)
putStrLn $ "Number of documents after:" <> show (V.length docs') putStrLn $ "Number of documents after:" <> show (V.length docs')
putStrLn $ "Mean size of docs:" <> show (docsSize docs') putStrLn $ "Mean size of docs:" <> show (CSV.docsSize docs')
writeCsv wPath (h, docs') CSV.writeFile wPath (h, docs')
...@@ -38,7 +38,7 @@ import Servant.Swagger (HasSwagger(toSwagger)) ...@@ -38,7 +38,7 @@ import Servant.Swagger (HasSwagger(toSwagger))
import Control.Monad import Control.Monad
import Control.Monad.IO.Class import Control.Monad.IO.Class
import Gargantext.API.Types import Gargantext.API.Types
import Servant.CSV.Cassava (CSV'(..)) --import Servant.CSV.Cassava (CSV'(..))
--import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary) --import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
--import Data.Swagger --import Data.Swagger
--import Gargantext.API.Ngrams (TODO) --import Gargantext.API.Ngrams (TODO)
...@@ -102,3 +102,4 @@ upload multipartData = do ...@@ -102,3 +102,4 @@ upload multipartData = do
pure $ Text.concat $ map cs is pure $ Text.concat $ map cs is
------------------------------------------------------------------------------- -------------------------------------------------------------------------------
...@@ -101,7 +101,7 @@ mapIdSchool :: Map Text Text ...@@ -101,7 +101,7 @@ mapIdSchool :: Map Text Text
mapIdSchool = M.fromList $ Gargantext.Prelude.map (\(School n _ i) -> (i,n)) schools mapIdSchool = M.fromList $ Gargantext.Prelude.map (\(School n _ i) -> (i,n)) schools
hal_data :: IO (DV.Vector CsvHal) hal_data :: IO (DV.Vector CsvHal)
hal_data = snd <$> CSV.readHal "doc/corpus_imt/Gargantext_Corpus.csv" hal_data = snd <$> CSV.readCsvHal "doc/corpus_imt/Gargantext_Corpus.csv"
names :: S.Set Text names :: S.Set Text
names = S.fromList $ Gargantext.Prelude.map (\s -> school_id s) schools names = S.fromList $ Gargantext.Prelude.map (\s -> school_id s) schools
......
...@@ -22,9 +22,10 @@ please follow the types. ...@@ -22,9 +22,10 @@ please follow the types.
{-# LANGUAGE PackageImports #-} {-# LANGUAGE PackageImports #-}
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
module Gargantext.Text.Parsers (parse, FileFormat(..), clean, parseFile) module Gargantext.Text.Parsers (FileFormat(..), clean, parseFile)
where where
--import Data.ByteString (ByteString)
import "zip" Codec.Archive.Zip (withArchive, getEntry, getEntries) import "zip" Codec.Archive.Zip (withArchive, getEntry, getEntries)
import Control.Concurrent.Async as CCA (mapConcurrently) import Control.Concurrent.Async as CCA (mapConcurrently)
import Control.Monad (join) import Control.Monad (join)
...@@ -39,20 +40,18 @@ import Data.String (String()) ...@@ -39,20 +40,18 @@ import Data.String (String())
import Data.String() import Data.String()
import Data.Text (Text) import Data.Text (Text)
import Data.Text.Encoding (decodeUtf8) import Data.Text.Encoding (decodeUtf8)
import Data.Time (UTCTime(..))
import Data.Tuple.Extra (both, first, second) import Data.Tuple.Extra (both, first, second)
import System.FilePath (FilePath(), takeExtension) import System.FilePath (FilePath(), takeExtension)
import qualified Data.ByteString as DB import qualified Data.ByteString as DB
import qualified Data.Map as DM import qualified Data.Map as DM
import qualified Data.Text as DT import qualified Data.Text as DT
import qualified Data.Time as DT
import Gargantext.Core (Lang(..)) import Gargantext.Core (Lang(..))
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Database.Types.Node (HyperdataDocument(..)) import Gargantext.Database.Types.Node (HyperdataDocument(..))
import qualified Gargantext.Text.Parsers.WOS as WOS import qualified Gargantext.Text.Parsers.WOS as WOS
import qualified Gargantext.Text.Parsers.RIS as RIS import qualified Gargantext.Text.Parsers.RIS as RIS
import Gargantext.Text.Parsers.RIS.Presse (presseEnrich) import Gargantext.Text.Parsers.RIS.Presse (presseEnrich)
import Gargantext.Text.Parsers.Date (parseDate) import qualified Gargantext.Text.Parsers.Date as Date
import Gargantext.Text.Parsers.CSV (parseHal) import Gargantext.Text.Parsers.CSV (parseHal)
import Gargantext.Text.Terms.Stop (detectLang) import Gargantext.Text.Terms.Stop (detectLang)
------------------------------------------------------------------------ ------------------------------------------------------------------------
...@@ -77,33 +76,21 @@ data FileFormat = WOS | RIS | RisPresse ...@@ -77,33 +76,21 @@ data FileFormat = WOS | RIS | RisPresse
-- | ODT -- Not Implemented / import Pandoc -- | ODT -- Not Implemented / import Pandoc
-- | PDF -- Not Implemented / pdftotext and import Pandoc ? -- | PDF -- Not Implemented / pdftotext and import Pandoc ?
-- | XML -- Not Implemented / see : -- | XML -- Not Implemented / see :
-- -- > http://chrisdone.com/posts/fast-haskell-c-parsing-xml
-- TODO: to debug maybe add the filepath in error message
{-
parseFormat :: FileFormat -> ByteString -> [HyperdataDocument]
parseFormat = undefined
-}
-- | Parse file into documents -- | Parse file into documents
-- TODO manage errors here -- TODO manage errors here
-- TODO: to debug maybe add the filepath in error message
parseFile :: FileFormat -> FilePath -> IO [HyperdataDocument] parseFile :: FileFormat -> FilePath -> IO [HyperdataDocument]
parseFile CsvHalFormat p = parseHal p parseFile CsvHalFormat p = parseHal p
parseFile RisPresse p = join $ mapM (toDoc RIS) <$> snd <$> enrichWith presseEnrich <$> parse' RIS p parseFile RisPresse p = join $ mapM (toDoc RIS) <$> snd <$> enrichWith RisPresse <$> readFileWith RIS p
parseFile WOS p = join $ mapM (toDoc WOS) <$> snd <$> enrichWith (map (first WOS.keys)) <$> parse' WOS p parseFile WOS p = join $ mapM (toDoc WOS) <$> snd <$> enrichWith WOS <$> readFileWith WOS p
parseFile ff p = join $ mapM (toDoc ff) <$> snd <$> parse ff p parseFile ff p = join $ mapM (toDoc ff) <$> snd <$> enrichWith ff <$> readFileWith ff p
type Year = Int
type Month = Int
type Day = Int
-- | Parse date to Ints
-- TODO add hours, minutes and seconds
parseDate' :: Lang -> Maybe Text -> IO (Maybe UTCTime, (Maybe Year, Maybe Month, Maybe Day))
parseDate' _ Nothing = pure (Nothing, (Nothing, Nothing, Nothing))
parseDate' l (Just txt) = do
utcTime <- parseDate l txt
let (UTCTime day _) = utcTime
let (y,m,d) = DT.toGregorian day
pure (Just utcTime, (Just (fromIntegral y), Just m,Just d))
toDoc :: FileFormat -> [(Text, Text)] -> IO HyperdataDocument toDoc :: FileFormat -> [(Text, Text)] -> IO HyperdataDocument
-- TODO use language for RIS -- TODO use language for RIS
...@@ -113,7 +100,7 @@ toDoc ff d = do ...@@ -113,7 +100,7 @@ toDoc ff d = do
let dateToParse = DT.replace "-" " " <$> lookup "PY" d <> Just " " <> lookup "publication_date" d let dateToParse = DT.replace "-" " " <$> lookup "PY" d <> Just " " <> lookup "publication_date" d
(utcTime, (pub_year, pub_month, pub_day)) <- parseDate' lang dateToParse (utcTime, (pub_year, pub_month, pub_day)) <- Date.split lang dateToParse
pure $ HyperdataDocument (Just $ DT.pack $ show ff) pure $ HyperdataDocument (Just $ DT.pack $ show ff)
(lookup "doi" d) (lookup "doi" d)
...@@ -135,26 +122,28 @@ toDoc ff d = do ...@@ -135,26 +122,28 @@ toDoc ff d = do
Nothing Nothing
(Just $ (DT.pack . show) lang) (Just $ (DT.pack . show) lang)
parse :: FileFormat -> FilePath -> IO ([ParseError], [[(Text, Text)]]) enrichWith :: FileFormat
parse ff fp = enrichWith identity <$> parse' ff fp -> (a, [[[(DB.ByteString, DB.ByteString)]]]) -> (a, [[(Text, Text)]])
enrichWith RisPresse = enrichWith' presseEnrich
enrichWith WOS = enrichWith' (map (first WOS.keys))
enrichWith _ = enrichWith' identity
enrichWith :: enrichWith' :: ([(DB.ByteString, DB.ByteString)] -> [(DB.ByteString, DB.ByteString)])
([(DB.ByteString, DB.ByteString)] -> [(DB.ByteString, DB.ByteString)]) -> (a, [[[(DB.ByteString, DB.ByteString)]]]) -> (a, [[(Text, Text)]])
-> (a, [[[(DB.ByteString, DB.ByteString)]]]) -> (a, [[(Text, Text)]]) enrichWith' f = second (map both' . map f . concat)
enrichWith f = second (map both' . map f . concat)
where where
both' = map (both decodeUtf8) both' = map (both decodeUtf8)
parse' :: FileFormat -> FilePath readFileWith :: FileFormat -> FilePath
-> IO ([ParseError], [[[(DB.ByteString, DB.ByteString)]]]) -> IO ([ParseError], [[[(DB.ByteString, DB.ByteString)]]])
parse' format path = do readFileWith format path = do
files <- case takeExtension path of files <- case takeExtension path of
".zip" -> openZip path ".zip" -> openZip path
_ -> pure <$> clean <$> DB.readFile path _ -> pure <$> clean <$> DB.readFile path
partitionEithers <$> mapConcurrently (runParser format) files partitionEithers <$> mapConcurrently (runParser format) files
-- | withParser: -- | withParser:
-- According to the format of the text, choose the right parser. -- According to the format of the text, choose the right parser.
-- TODO withParser :: FileFormat -> Parser [Document] -- TODO withParser :: FileFormat -> Parser [Document]
...@@ -181,4 +170,3 @@ clean txt = DBC.map clean' txt ...@@ -181,4 +170,3 @@ clean txt = DBC.map clean' txt
clean' '’' = '\'' clean' '’' = '\''
clean' '\r' = ' ' clean' '\r' = ' '
clean' c = c clean' c = c
...@@ -17,24 +17,23 @@ CSV parser for Gargantext corpus files. ...@@ -17,24 +17,23 @@ CSV parser for Gargantext corpus files.
module Gargantext.Text.Parsers.CSV where module Gargantext.Text.Parsers.CSV where
import GHC.Real (round)
import GHC.IO (FilePath)
import Control.Applicative import Control.Applicative
import Data.Char (ord) import Data.Char (ord)
import Data.Csv import Data.Csv
import Data.Either (Either(Left, Right)) import Data.Either (Either(Left, Right))
import Data.Text (Text, pack, length, intercalate) import Data.Text (Text, pack, length, intercalate)
import qualified Data.ByteString.Lazy as BL
import Data.Time.Segment (jour) import Data.Time.Segment (jour)
import Data.Vector (Vector) import Data.Vector (Vector)
import qualified Data.Vector as V import GHC.IO (FilePath)
import GHC.Real (round)
import GHC.Word (Word8)
import Gargantext.Database.Types.Node -- (HyperdataDocument(..)) import Gargantext.Database.Types.Node -- (HyperdataDocument(..))
import Gargantext.Prelude hiding (length)
import Gargantext.Text import Gargantext.Text
import Gargantext.Text.Context import Gargantext.Text.Context
import Gargantext.Prelude hiding (length) import qualified Data.ByteString.Lazy as BL
import qualified Data.ByteString as BS
import qualified Data.Vector as V
--------------------------------------------------------------- ---------------------------------------------------------------
headerCsvGargV3 :: Header headerCsvGargV3 :: Header
...@@ -192,40 +191,65 @@ hyperdataDocument2csvDoc h = CsvDoc (m $ _hyperdataDocument_title h) ...@@ -192,40 +191,65 @@ hyperdataDocument2csvDoc h = CsvDoc (m $ _hyperdataDocument_title h)
csvDecodeOptions :: DecodeOptions csvDecodeOptions :: DecodeOptions
csvDecodeOptions = (defaultDecodeOptions csvDecodeOptions = defaultDecodeOptions {decDelimiter = delimiter}
{decDelimiter = fromIntegral $ ord '\t'}
)
csvEncodeOptions :: EncodeOptions csvEncodeOptions :: EncodeOptions
csvEncodeOptions = ( defaultEncodeOptions csvEncodeOptions = defaultEncodeOptions {encDelimiter = delimiter}
{encDelimiter = fromIntegral $ ord '\t'}
)
delimiter :: Word8
delimiter = fromIntegral $ ord '\t'
------------------------------------------------------------------------ ------------------------------------------------------------------------
------------------------------------------------------------------------ ------------------------------------------------------------------------
readCsvOn :: [CsvDoc -> Text] -> FilePath -> IO [Text] readCsvOn :: [CsvDoc -> Text] -> FilePath -> IO [Text]
readCsvOn fields fp = V.toList <$> V.map (\l -> intercalate (pack " ") $ map (\field -> field l) fields) readCsvOn fields fp = V.toList <$> V.map (\l -> intercalate (pack " ") $ map (\field -> field l) fields)
<$> snd <$> snd
<$> readCsv fp <$> readFile fp
------------------------------------------------------------------------ ------------------------------------------------------------------------
readCsv :: FilePath -> IO (Header, Vector CsvDoc)
readCsv fp = do readFileLazy :: (FromNamedRecord a) => a -> FilePath -> IO (Header, Vector a)
csvData <- BL.readFile fp readFileLazy f = fmap (readByteStringLazy f) . BL.readFile
case decodeByNameWith csvDecodeOptions csvData of
readFileStrict :: (FromNamedRecord a) => a -> FilePath -> IO (Header, Vector a)
readFileStrict f = fmap (readByteStringStrict f) . BS.readFile
readByteStringLazy :: (FromNamedRecord a) => a -> BL.ByteString -> (Header, Vector a)
readByteStringLazy f bs = case decodeByNameWith csvDecodeOptions bs of
Left e -> panic (pack e)
Right csvDocs -> csvDocs
readByteStringStrict :: (FromNamedRecord a) => a -> BS.ByteString -> (Header, Vector a)
readByteStringStrict ff = (readByteStringLazy ff) . BL.fromStrict
------------------------------------------------------------------------
-- | TODO use readFileLazy
readFile :: FilePath -> IO (Header, Vector CsvDoc)
readFile = fmap readCsvLazyBS . BL.readFile
-- | TODO use readByteStringLazy
readCsvLazyBS :: BL.ByteString -> (Header, Vector CsvDoc)
readCsvLazyBS bs = case decodeByNameWith csvDecodeOptions bs of
Left e -> panic (pack e) Left e -> panic (pack e)
Right csvDocs -> pure csvDocs Right csvDocs -> csvDocs
------------------------------------------------------------------------
-- | TODO use readFileLazy
readCsvHal :: FilePath -> IO (Header, Vector CsvHal)
readCsvHal = fmap readCsvHalLazyBS . BL.readFile
readHal :: FilePath -> IO (Header, Vector CsvHal) -- | TODO use readByteStringLazy
readHal fp = do readCsvHalLazyBS :: BL.ByteString -> (Header, Vector CsvHal)
csvData <- BL.readFile fp readCsvHalLazyBS bs = case decodeByNameWith csvDecodeOptions bs of
case decodeByNameWith csvDecodeOptions csvData of
Left e -> panic (pack e) Left e -> panic (pack e)
Right csvDocs -> pure csvDocs Right csvDocs -> csvDocs
readCsvHalBSStrict :: BS.ByteString -> (Header, Vector CsvHal)
readCsvHalBSStrict = readCsvHalLazyBS . BL.fromStrict
------------------------------------------------------------------------ ------------------------------------------------------------------------
writeCsv :: FilePath -> (Header, Vector CsvDoc) -> IO () writeFile :: FilePath -> (Header, Vector CsvDoc) -> IO ()
writeCsv fp (h, vs) = BL.writeFile fp $ writeFile fp (h, vs) = BL.writeFile fp $
encodeByNameWith csvEncodeOptions h (V.toList vs) encodeByNameWith csvEncodeOptions h (V.toList vs)
writeDocs2Csv :: FilePath -> [HyperdataDocument] -> IO () writeDocs2Csv :: FilePath -> [HyperdataDocument] -> IO ()
...@@ -342,6 +366,6 @@ csvHal2doc (CsvHal title source ...@@ -342,6 +366,6 @@ csvHal2doc (CsvHal title source
------------------------------------------------------------------------ ------------------------------------------------------------------------
parseHal :: FilePath -> IO [HyperdataDocument] parseHal :: FilePath -> IO [HyperdataDocument]
parseHal fp = map csvHal2doc <$> V.toList <$> snd <$> readHal fp parseHal fp = map csvHal2doc <$> V.toList <$> snd <$> readCsvHal fp
------------------------------------------------------------------------ ------------------------------------------------------------------------
...@@ -18,12 +18,12 @@ DGP.parseDateRaw DGP.FR "12 avril 2010" == "2010-04-12T00:00:00.000+00:00" ...@@ -18,12 +18,12 @@ DGP.parseDateRaw DGP.FR "12 avril 2010" == "2010-04-12T00:00:00.000+00:00"
{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
module Gargantext.Text.Parsers.Date (parseDate, parseDateRaw) where module Gargantext.Text.Parsers.Date (parse, parseRaw, split) where
import Data.HashMap.Strict as HM hiding (map) import Data.HashMap.Strict as HM hiding (map)
import Data.Text (Text, unpack, splitOn, pack) import Data.Text (Text, unpack, splitOn, pack)
import Data.Time (parseTimeOrError, defaultTimeLocale) import Data.Time (parseTimeOrError, defaultTimeLocale, toGregorian)
import Data.Time.Clock (UTCTime, getCurrentTime) import Data.Time.Clock (UTCTime(..), getCurrentTime)
import Data.Time.LocalTime (utc) import Data.Time.LocalTime (utc)
import Data.Time.LocalTime.TimeZone.Series (zonedTimeToZoneSeriesTime) import Data.Time.LocalTime.TimeZone.Series (zonedTimeToZoneSeriesTime)
import Duckling.Api (analyze) import Duckling.Api (analyze)
...@@ -38,21 +38,41 @@ import qualified Data.HashSet as HashSet ...@@ -38,21 +38,41 @@ import qualified Data.HashSet as HashSet
import qualified Duckling.Core as DC import qualified Duckling.Core as DC
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | Parse date to Ints
-- TODO add hours, minutes and seconds
split :: Lang -> Maybe Text -> IO (Maybe UTCTime, (Maybe Year, Maybe Month, Maybe Day))
split _ Nothing = pure (Nothing, (Nothing, Nothing, Nothing))
split l (Just txt) = do
utcTime <- parse l txt
let (y, m, d) = split' utcTime
pure (Just utcTime, (Just y, Just m,Just d))
split' :: UTCTime -> (Year, Month, Day)
split' utcTime = (fromIntegral y, m, d)
where
(UTCTime day _) = utcTime
(y,m,d) = toGregorian day
type Year = Int
type Month = Int
type Day = Int
------------------------------------------------------------------------
-- | Date Parser -- | Date Parser
-- Parses dates mentions in full text given the language. -- Parses dates mentions in full text given the language.
-- >>> parseDate FR (pack "10 avril 1979 à 19H") -- >>> parseDate FR (pack "10 avril 1979 à 19H")
-- 1979-04-10 19:00:00 UTC -- 1979-04-10 19:00:00 UTC
-- >>> parseDate EN (pack "April 10 1979") -- >>> parseDate EN (pack "April 10 1979")
-- 1979-04-10 00:00:00 UTC -- 1979-04-10 00:00:00 UTC
parseDate :: Lang -> Text -> IO UTCTime parse :: Lang -> Text -> IO UTCTime
parseDate lang s = parseDate' "%Y-%m-%dT%T" "0-0-0T0:0:0" lang s parse lang s = parseDate' "%Y-%m-%dT%T" "0-0-0T0:0:0" lang s
type DateFormat = Text type DateFormat = Text
type DateDefault = Text type DateDefault = Text
parseDate' :: DateFormat -> DateDefault -> Lang -> Text -> IO UTCTime parseDate' :: DateFormat -> DateDefault -> Lang -> Text -> IO UTCTime
parseDate' format def lang s = do parseDate' format def lang s = do
dateStr' <- parseDateRaw lang s dateStr' <- parseRaw lang s
let dateStr = unpack $ maybe def identity let dateStr = unpack $ maybe def identity
$ head $ splitOn "." dateStr' $ head $ splitOn "." dateStr'
pure $ parseTimeOrError True defaultTimeLocale (unpack format) dateStr pure $ parseTimeOrError True defaultTimeLocale (unpack format) dateStr
...@@ -70,19 +90,19 @@ parserLang EN = DC.EN ...@@ -70,19 +90,19 @@ parserLang EN = DC.EN
-- IO can be avoided here: -- IO can be avoided here:
-- currentContext :: Lang -> IO Context -- currentContext :: Lang -> IO Context
-- currentContext lang = localContext lang <$> utcToDucklingTime <$> getCurrentTime -- currentContext lang = localContext lang <$> utcToDucklingTime <$> getCurrentTime
-- parseDateRaw :: Context -> Text -> SomeErrorHandling Text -- parseRaw :: Context -> Text -> SomeErrorHandling Text
-- TODO error handling -- TODO error handling
parseDateRaw :: Lang -> Text -> IO (Text) parseRaw :: Lang -> Text -> IO (Text)
parseDateRaw lang text = do parseRaw lang text = do
maybeJson <- map jsonValue <$> parseDateWithDuckling lang text maybeJson <- map jsonValue <$> parseDateWithDuckling lang text
case headMay maybeJson of case headMay maybeJson of
Just (Json.Object object) -> case HM.lookup "value" object of Just (Json.Object object) -> case HM.lookup "value" object of
Just (Json.String date) -> pure date Just (Json.String date) -> pure date
Just _ -> panic "ParseDateRaw ERROR: should be a json String" Just _ -> panic "ParseRaw ERROR: should be a json String"
Nothing -> panic $ "ParseDateRaw ERROR: no date found" <> (pack . show) lang <> " " <> text Nothing -> panic $ "ParseRaw ERROR: no date found" <> (pack . show) lang <> " " <> text
_ -> panic $ "ParseDateRaw ERROR: type error" <> (pack . show) lang <> " " <> text _ -> panic $ "ParseRaw ERROR: type error" <> (pack . show) lang <> " " <> text
-- | Current Time in DucklingTime format -- | Current Time in DucklingTime format
......
...@@ -28,7 +28,7 @@ import Data.Text (Text, unpack) ...@@ -28,7 +28,7 @@ import Data.Text (Text, unpack)
import Gargantext.Core.Utils.Prefix (unPrefix) import Gargantext.Core.Utils.Prefix (unPrefix)
import Gargantext.Prelude import Gargantext.Prelude
import System.IO (FilePath) import System.IO (FilePath)
import Gargantext.Text.Parsers.CSV (CsvDoc(..), writeCsv, headerCsvGargV3) import Gargantext.Text.Parsers.CSV (CsvDoc(..), writeFile, headerCsvGargV3)
import Data.Vector (fromList) import Data.Vector (fromList)
data Patent = Patent { _patent_title :: Text data Patent = Patent { _patent_title :: Text
...@@ -48,7 +48,7 @@ type FilePathOut = FilePath ...@@ -48,7 +48,7 @@ type FilePathOut = FilePath
json2csv :: FilePathIn -> FilePathOut -> IO () json2csv :: FilePathIn -> FilePathOut -> IO ()
json2csv fin fout = do json2csv fin fout = do
patents <- maybe (panic "json2csv error") identity <$> readPatents fin patents <- maybe (panic "json2csv error") identity <$> readPatents fin
writeCsv fout (headerCsvGargV3, fromList $ map patent2csvDoc patents) writeFile fout (headerCsvGargV3, fromList $ map patent2csvDoc patents)
patent2csvDoc :: Patent -> CsvDoc patent2csvDoc :: Patent -> CsvDoc
patent2csvDoc (Patent title abstract year _) = patent2csvDoc (Patent title abstract year _) =
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment