[CSV Parser] Parser for Gargantext (legacy) CSV files.

7997ab36 · Alexandre Delanoë · 02e3e8d2 · 7997ab36 · 7997ab36 · 7997ab36
Commit 7997ab36 authored May 14, 2018 by Alexandre Delanoë
Showing with 84 additions and 12 deletions

package.yaml package.yaml +2 -0

Prelude.hs src/Gargantext/Prelude.hs +0 -12

CSV.hs src/Gargantext/Text/Parsers/CSV.hs +82 -0

CSV_hs src/Gargantext/Text/Parsers/CSV_hs +0 -0

No files found.
--- a/package.yaml
+++ b/package.yaml
@@ -33,6 +33,7 @@ library:
  - Gargantext.Text.Ngrams.PosTagging.CoreNLP
  - Gargantext.Text.Ngrams.PosTagging.Parser
  - Gargantext.Text.Ngrams.Token.Text
+  - Gargantext.Text.Parsers.CSV
  - Gargantext.Text.Parsers.Date
  - Gargantext.Database
  - Gargantext.API
@@ -47,6 +48,7 @@ library:
  - base16-bytestring
  - bytestring
  - case-insensitive
+  - cassava
  - conduit
  - conduit-extra
  - containers

--- a/src/Gargantext/Prelude.hs
+++ b/src/Gargantext/Prelude.hs
@@ -75,18 +75,6 @@ pr = reverse
 map2 :: (t -> b) -> [[t]] -> [[b]]
 map2 fun = map (map fun)
-pz :: [a] -> [b] -> [(a, b)]
-pz  = zip
-pd :: Int -> [a] -> [a]
-pd  = drop
-ptk :: Int -> [a] -> [a]
-ptk = take
-pzw :: (a -> b -> c) -> [a] -> [b] -> [c]
-pzw = zipWith
 -- Exponential Average
 eavg :: [Double] -> Double
 eavg (x:xs) = a*x + (1-a)*(eavg xs)

--- a/src/Gargantext/Text/Parsers/CSV.hs
+++ b/src/Gargantext/Text/Parsers/CSV.hs
+{-|
+Module      : Gargantext.Text.Parsers.CSV
+Description : 
+Copyright   : (c) CNRS, 2017-Present
+License     : AGPL + CECILL v3
+Maintainer  : team@gargantext.org
+Stability   : experimental
+Portability : POSIX
+Here is a longer description of this module, containing some
+commentary with @some markup@.
+-}
+{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE DeriveGeneric      #-}
+module Gargantext.Text.Parsers.CSV where
+import GHC.Generics (Generic)
+import GHC.IO (FilePath)
+import Data.Either (Either(Left, Right))
+import Data.Text (Text)
+import Control.Applicative
+import qualified Data.ByteString.Lazy as BL
+import Data.Csv
+import qualified Data.Vector as V
+import Data.Text (pack)
+import Data.Char (ord)
+import Gargantext.Prelude
+data CsvDoc = CsvDoc
+    { title  :: !Text
+    , source :: !Text
+    , publication_year  :: !Int
+    , publication_month :: !Int
+    , publication_day   :: !Int
+    , abstract          :: !Text
+    , authors           :: !Text
+    }
+    deriving (Show, Generic)
+instance FromNamedRecord CsvDoc where
+  parseNamedRecord r = CsvDoc <$> r .: "title"
+                              <*> r .: "source"
+                              <*> r .: "publication_year"
+                              <*> r .: "publication_month"
+                              <*> r .: "publication_day"
+                              <*> r .: "abstract"
+                              <*> r .: "authors"
+instance ToNamedRecord CsvDoc where
+  toNamedRecord (CsvDoc t s py pm pd abst aut) = 
+    namedRecord [ "title"  .= t
+                , "source" .= s
+                , "publication_year"  .= py
+                , "publication_month" .= pm
+                , "publication_day"   .= pd
+                , "abstract"          .= abst
+                , "authors"           .= aut
+                ]
+csvDecodeOptions :: DecodeOptions
+csvDecodeOptions = (defaultDecodeOptions {decDelimiter = fromIntegral $ ord '\t'} )
+csvEncodeOptions :: EncodeOptions
+csvEncodeOptions = (defaultEncodeOptions {encDelimiter = fromIntegral $ ord '\t'} )
+readCsv :: FilePath -> IO (Header, V.Vector CsvDoc)
+readCsv fp = do
+    csvData <- BL.readFile fp
+    case decodeByNameWith csvDecodeOptions csvData of
+      Left e    -> panic (pack e)
+      Right csvDocs -> pure csvDocs
+writeCsv :: FilePath -> (Header, V.Vector CsvDoc) -> IO ()
+writeCsv fp (h, vs) = BL.writeFile fp $
+                      encodeByNameWith csvEncodeOptions h (V.toList vs)
--- a/src/Gargantext/Text/Parsers/CSV_hs
+++ b/src/Gargantext/Text/Parsers/CSV_hs