Commit 3cabc48d authored by Alexandre Delanoë's avatar Alexandre Delanoë

[UPLOAD] adding CSV Garg v3 parser.

parent 2044f66d
......@@ -43,7 +43,7 @@ import Gargantext.Database.Types.Node (CorpusId)
import Gargantext.Database.Types.Node (ToHyperdataDocument(..))
import Gargantext.Database.Types.Node (UserId)
import Gargantext.Prelude
import Gargantext.Text.Corpus.Parsers.CSV (parseHal')
import Gargantext.Text.Corpus.Parsers.CSV (parseCsv')--parseHal')--, parseCsv')
import Gargantext.Text.Terms (TermType(..))
import Servant
import Servant.API.Flatten (Flat)
......@@ -221,11 +221,17 @@ addToCorpusWithForm :: FlowCmdM env err m
-> WithForm
-> (ScraperStatus -> m ())
-> m ScraperStatus
addToCorpusWithForm cid (WithForm _ft d) logStatus = do
addToCorpusWithForm cid (WithForm ft d) logStatus = do
let docs = splitEvery 500
let
parse = case ft of
Just CSV_HAL -> parseHal'
Just CSV -> parseCsv'
_ -> parseHal'
docs = splitEvery 500
$ take 1000000
$ parseHal' (cs d)
$ parseCsv' (cs d)
logStatus ScraperStatus { _scst_succeeded = Just 1
, _scst_failed = Just 0
......
......@@ -48,7 +48,7 @@ import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
-------------------------------------------------------------
type Hash = Text
data FileType = CSV | PresseRIS
data FileType = CSV | CSV_HAL | PresseRIS
deriving (Eq, Show, Generic)
instance ToSchema FileType
......@@ -65,6 +65,7 @@ instance ToParamSchema (MultipartData Mem) where
instance FromHttpApiData FileType
where
parseUrlPiece "CSV" = pure CSV
parseUrlPiece "CSV_HAL" = pure CSV_HAL
parseUrlPiece "PresseRis" = pure PresseRIS
parseUrlPiece _ = pure CSV -- TODO error here
......
......@@ -396,3 +396,6 @@ parseHal' = V.toList . V.map csvHal2doc . snd . readCsvHalLazyBS
parseCsv :: FilePath -> IO [HyperdataDocument]
parseCsv fp = V.toList <$> V.map csv2doc <$> snd <$> readFile fp
parseCsv' :: BL.ByteString -> [HyperdataDocument]
parseCsv' bs = V.toList $ V.map csv2doc $ snd $ readCsvLazyBS bs
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment