Commit 5e8bc9f2 authored by Przemyslaw Kaminski's avatar Przemyslaw Kaminski

[upload zip] this works now

parent 4b09273b
Pipeline #1964 failed with stage
in 9 minutes and 57 seconds
......@@ -28,8 +28,6 @@ import Data.Swagger
import Data.Text (Text)
import qualified Data.Text as T
import GHC.Generics (Generic)
import qualified Prelude as Prelude
import Protolude (readFile)
import Servant
import Servant.Job.Utils (jsonOptions)
-- import Servant.Multipart
......@@ -270,6 +268,8 @@ addToCorpusWithForm user cid (NewWithForm ft d l _n) logStatus jobLog = do
eDocs <- liftBase $ parse data'
case eDocs of
Right docs' -> do
-- TODO Add progress (jobStatus) update for docs - this is a
-- long action
let docs = splitEvery 500 $ take 1000000 docs'
printDebug "Parsing corpus finished : " cid
......@@ -303,11 +303,6 @@ addToCorpusWithForm user cid (NewWithForm ft d l _n) logStatus jobLog = do
jobLog3 = jobLogSuccess jobLog2
jobLogE = jobLogFailTotal jobLog
parseCsvGargV3Path :: [Char] -> IO (Either Prelude.String [HyperdataDocument])
parseCsvGargV3Path fp = do
contents <- readFile fp
Parser.parseFormat Parser.CsvGargV3 $ cs contents
{-
addToCorpusWithFile :: FlowCmdM env err m
=> CorpusId
......
......@@ -100,12 +100,11 @@ parseFormat WOS bs = do
parseFormat ZIP bs = do
path <- emptySystemTempFile "parsed.zip"
DB.writeFile path bs
parsedZip <- withArchive path $ do
withArchive path $ do
files <- DM.keys <$> getEntries
filesContents <- mapM getEntry files
ddocs <- liftIO $ mapM (parseFormat CsvGargV3) filesContents
pure $ sequence ddocs
pure $ Left $ "Not implemented for ZIP, parsedZip" <> show parsedZip
pure $ concat <$> sequence ddocs
parseFormat _ _ = undefined
-- | Parse file into documents
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment