Commit 3cabc48d authored by Alexandre Delanoë's avatar Alexandre Delanoë

[UPLOAD] adding CSV Garg v3 parser.

parent 2044f66d
...@@ -43,7 +43,7 @@ import Gargantext.Database.Types.Node (CorpusId) ...@@ -43,7 +43,7 @@ import Gargantext.Database.Types.Node (CorpusId)
import Gargantext.Database.Types.Node (ToHyperdataDocument(..)) import Gargantext.Database.Types.Node (ToHyperdataDocument(..))
import Gargantext.Database.Types.Node (UserId) import Gargantext.Database.Types.Node (UserId)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Text.Corpus.Parsers.CSV (parseHal') import Gargantext.Text.Corpus.Parsers.CSV (parseCsv')--parseHal')--, parseCsv')
import Gargantext.Text.Terms (TermType(..)) import Gargantext.Text.Terms (TermType(..))
import Servant import Servant
import Servant.API.Flatten (Flat) import Servant.API.Flatten (Flat)
...@@ -221,11 +221,17 @@ addToCorpusWithForm :: FlowCmdM env err m ...@@ -221,11 +221,17 @@ addToCorpusWithForm :: FlowCmdM env err m
-> WithForm -> WithForm
-> (ScraperStatus -> m ()) -> (ScraperStatus -> m ())
-> m ScraperStatus -> m ScraperStatus
addToCorpusWithForm cid (WithForm _ft d) logStatus = do addToCorpusWithForm cid (WithForm ft d) logStatus = do
let docs = splitEvery 500 let
parse = case ft of
Just CSV_HAL -> parseHal'
Just CSV -> parseCsv'
_ -> parseHal'
docs = splitEvery 500
$ take 1000000 $ take 1000000
$ parseHal' (cs d) $ parseCsv' (cs d)
logStatus ScraperStatus { _scst_succeeded = Just 1 logStatus ScraperStatus { _scst_succeeded = Just 1
, _scst_failed = Just 0 , _scst_failed = Just 0
......
...@@ -48,7 +48,7 @@ import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary) ...@@ -48,7 +48,7 @@ import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
------------------------------------------------------------- -------------------------------------------------------------
type Hash = Text type Hash = Text
data FileType = CSV | PresseRIS data FileType = CSV | CSV_HAL | PresseRIS
deriving (Eq, Show, Generic) deriving (Eq, Show, Generic)
instance ToSchema FileType instance ToSchema FileType
...@@ -65,6 +65,7 @@ instance ToParamSchema (MultipartData Mem) where ...@@ -65,6 +65,7 @@ instance ToParamSchema (MultipartData Mem) where
instance FromHttpApiData FileType instance FromHttpApiData FileType
where where
parseUrlPiece "CSV" = pure CSV parseUrlPiece "CSV" = pure CSV
parseUrlPiece "CSV_HAL" = pure CSV_HAL
parseUrlPiece "PresseRis" = pure PresseRIS parseUrlPiece "PresseRis" = pure PresseRIS
parseUrlPiece _ = pure CSV -- TODO error here parseUrlPiece _ = pure CSV -- TODO error here
......
...@@ -396,3 +396,6 @@ parseHal' = V.toList . V.map csvHal2doc . snd . readCsvHalLazyBS ...@@ -396,3 +396,6 @@ parseHal' = V.toList . V.map csvHal2doc . snd . readCsvHalLazyBS
parseCsv :: FilePath -> IO [HyperdataDocument] parseCsv :: FilePath -> IO [HyperdataDocument]
parseCsv fp = V.toList <$> V.map csv2doc <$> snd <$> readFile fp parseCsv fp = V.toList <$> V.map csv2doc <$> snd <$> readFile fp
parseCsv' :: BL.ByteString -> [HyperdataDocument]
parseCsv' bs = V.toList $ V.map csv2doc $ snd $ readCsvLazyBS bs
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment