Commit 4b09273b authored by Przemyslaw Kaminski's avatar Przemyslaw Kaminski

[upload zip] some more work on zipfile parsing

parent 12c2beae
Pipeline #1963 failed with stage
in 9 minutes and 56 seconds
...@@ -264,7 +264,7 @@ addToCorpusWithForm user cid (NewWithForm ft d l _n) logStatus jobLog = do ...@@ -264,7 +264,7 @@ addToCorpusWithForm user cid (NewWithForm ft d l _n) logStatus jobLog = do
-- TODO granularity of the logStatus -- TODO granularity of the logStatus
let data' = case ft of let data' = case ft of
ZIP -> case BSB64.decode $ TE.encodeUtf8 d of ZIP -> case BSB64.decode $ TE.encodeUtf8 d of
Left err -> panic $ T.pack "[addToCorpusWithForm] error decoding base64" err Left err -> panic $ T.pack "[addToCorpusWithForm] error decoding base64: " <> T.pack err
Right decoded -> decoded Right decoded -> decoded
_ -> cs d _ -> cs d
eDocs <- liftBase $ parse data' eDocs <- liftBase $ parse data'
......
...@@ -25,7 +25,8 @@ module Gargantext.Core.Text.Corpus.Parsers (FileFormat(..), clean, parseFile, cl ...@@ -25,7 +25,8 @@ module Gargantext.Core.Text.Corpus.Parsers (FileFormat(..), clean, parseFile, cl
import "zip" Codec.Archive.Zip (withArchive, getEntry, getEntries) import "zip" Codec.Archive.Zip (withArchive, getEntry, getEntries)
import Control.Concurrent.Async as CCA (mapConcurrently) import Control.Concurrent.Async as CCA (mapConcurrently)
import Control.Monad (join) import Control.Monad (join, sequence)
import Control.Monad.IO.Class (liftIO)
import Data.Attoparsec.ByteString (parseOnly, Parser) import Data.Attoparsec.ByteString (parseOnly, Parser)
import Data.Either(Either(..)) import Data.Either(Either(..))
import Data.Either.Extra (partitionEithers) import Data.Either.Extra (partitionEithers)
...@@ -100,7 +101,10 @@ parseFormat ZIP bs = do ...@@ -100,7 +101,10 @@ parseFormat ZIP bs = do
path <- emptySystemTempFile "parsed.zip" path <- emptySystemTempFile "parsed.zip"
DB.writeFile path bs DB.writeFile path bs
parsedZip <- withArchive path $ do parsedZip <- withArchive path $ do
DM.keys <$> getEntries files <- DM.keys <$> getEntries
filesContents <- mapM getEntry files
ddocs <- liftIO $ mapM (parseFormat CsvGargV3) filesContents
pure $ sequence ddocs
pure $ Left $ "Not implemented for ZIP, parsedZip" <> show parsedZip pure $ Left $ "Not implemented for ZIP, parsedZip" <> show parsedZip
parseFormat _ _ = undefined parseFormat _ _ = undefined
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment