Commit 7997ab36 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[CSV Parser] Parser for Gargantext (legacy) CSV files.

parent 02e3e8d2
......@@ -33,6 +33,7 @@ library:
- Gargantext.Text.Ngrams.PosTagging.CoreNLP
- Gargantext.Text.Ngrams.PosTagging.Parser
- Gargantext.Text.Ngrams.Token.Text
- Gargantext.Text.Parsers.CSV
- Gargantext.Text.Parsers.Date
- Gargantext.Database
- Gargantext.API
......@@ -47,6 +48,7 @@ library:
- base16-bytestring
- bytestring
- case-insensitive
- cassava
- conduit
- conduit-extra
- containers
......
......@@ -75,18 +75,6 @@ pr = reverse
map2 :: (t -> b) -> [[t]] -> [[b]]
map2 fun = map (map fun)
pz :: [a] -> [b] -> [(a, b)]
pz = zip
pd :: Int -> [a] -> [a]
pd = drop
ptk :: Int -> [a] -> [a]
ptk = take
pzw :: (a -> b -> c) -> [a] -> [b] -> [c]
pzw = zipWith
-- Exponential Average
eavg :: [Double] -> Double
eavg (x:xs) = a*x + (1-a)*(eavg xs)
......
{-|
Module : Gargantext.Text.Parsers.CSV
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Here is a longer description of this module, containing some
commentary with @some markup@.
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE DeriveGeneric #-}
module Gargantext.Text.Parsers.CSV where
import GHC.Generics (Generic)
import GHC.IO (FilePath)
import Data.Either (Either(Left, Right))
import Data.Text (Text)
import Control.Applicative
import qualified Data.ByteString.Lazy as BL
import Data.Csv
import qualified Data.Vector as V
import Data.Text (pack)
import Data.Char (ord)
import Gargantext.Prelude
data CsvDoc = CsvDoc
{ title :: !Text
, source :: !Text
, publication_year :: !Int
, publication_month :: !Int
, publication_day :: !Int
, abstract :: !Text
, authors :: !Text
}
deriving (Show, Generic)
instance FromNamedRecord CsvDoc where
parseNamedRecord r = CsvDoc <$> r .: "title"
<*> r .: "source"
<*> r .: "publication_year"
<*> r .: "publication_month"
<*> r .: "publication_day"
<*> r .: "abstract"
<*> r .: "authors"
instance ToNamedRecord CsvDoc where
toNamedRecord (CsvDoc t s py pm pd abst aut) =
namedRecord [ "title" .= t
, "source" .= s
, "publication_year" .= py
, "publication_month" .= pm
, "publication_day" .= pd
, "abstract" .= abst
, "authors" .= aut
]
csvDecodeOptions :: DecodeOptions
csvDecodeOptions = (defaultDecodeOptions {decDelimiter = fromIntegral $ ord '\t'} )
csvEncodeOptions :: EncodeOptions
csvEncodeOptions = (defaultEncodeOptions {encDelimiter = fromIntegral $ ord '\t'} )
readCsv :: FilePath -> IO (Header, V.Vector CsvDoc)
readCsv fp = do
csvData <- BL.readFile fp
case decodeByNameWith csvDecodeOptions csvData of
Left e -> panic (pack e)
Right csvDocs -> pure csvDocs
writeCsv :: FilePath -> (Header, V.Vector CsvDoc) -> IO ()
writeCsv fp (h, vs) = BL.writeFile fp $
encodeByNameWith csvEncodeOptions h (V.toList vs)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment