Commit d1ffbb9b authored by Przemyslaw Kaminski's avatar Przemyslaw Kaminski

[conduit] some work towards flow migration to conduit

Doesn't compile yet.

This is somewhere in Flow.hs in types for ConduitT () a m ()
where m monad is IO.
parent c8d0773c
Pipeline #2552 failed with stage
in 25 minutes and 53 seconds
......@@ -42,7 +42,7 @@ import Gargantext.Database.Prelude (HasConnectionPool, HasConfig)
import Gargantext.Prelude
import GHC.Generics (Generic)
import Network.HTTP.Media ((//), (/:))
import qualified Prelude as Prelude
import qualified Prelude
import Servant
( (:<|>) (..)
, (:>)
......
......@@ -22,7 +22,7 @@ import Gargantext.Database.Prelude (HasConnectionPool, HasConfig)
import qualified Gargantext.Database.Schema.Node as N
import Gargantext.Prelude
import GHC.Generics (Generic)
import qualified Prelude as Prelude
import qualified Prelude
import Text.Read (readEither)
data Node = Node
......
......@@ -58,7 +58,7 @@ import qualified Data.Map as Map
import qualified Data.Set as Set
import qualified Data.Text as Text
import qualified Data.Vector as Vec
import qualified Prelude as Prelude
import qualified Prelude
import qualified Protolude as P
------------------------------------------------------------------------
type GETAPI = Summary "Get List"
......
......@@ -14,7 +14,7 @@ import Web.FormUrlEncoded (FromForm, ToForm)
import Protolude
import Gargantext.API.Ngrams.Types (NgramsList)
import Gargantext.API.Node.Corpus.New.File (FileType(..))
import Gargantext.API.Node.Corpus.New.Types (FileType(..))
import Gargantext.Core.Utils.Prefix (unPrefixSwagger)
------------------------------------------------------------------------
......
......@@ -25,7 +25,7 @@ import Servant.Job.Types
import Servant.Job.Utils (jsonOptions)
import Web.FormUrlEncoded (FromForm)
import qualified Gargantext.API.Node.Corpus.New.File as NewFile
import qualified Gargantext.API.Node.Corpus.New.Types as NewTypes
import Gargantext.API.Admin.Orchestrator.Types hiding (AsyncJobs)
import Gargantext.Core (Lang(..))
import Gargantext.Core.Utils.Prefix (unPrefixSwagger)
......@@ -40,7 +40,7 @@ type Api = Summary "New Annuaire endpoint"
------------------------------------------------------------------------
------------------------------------------------------------------------
data AnnuaireWithForm = AnnuaireWithForm
{ _wf_filetype :: !NewFile.FileType
{ _wf_filetype :: !NewTypes.FileType
, _wf_data :: !Text
, _wf_lang :: !(Maybe Lang)
} deriving (Eq, Show, Generic)
......
......@@ -43,14 +43,14 @@ import Gargantext.Prelude
import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs, ScraperEvent(..), scst_events)
import Gargantext.API.Admin.Types (HasSettings)
import Gargantext.API.Job (addEvent, jobLogSuccess, jobLogFailTotal, jobLogFailTotalWithMessage)
import Gargantext.API.Node.Corpus.New.File
import Gargantext.API.Node.Corpus.New.Types
import Gargantext.API.Node.Corpus.Searx
import Gargantext.API.Node.Corpus.Types
import Gargantext.API.Node.Types
import Gargantext.Core (Lang(..){-, allLangs-})
import Gargantext.Core.Text.List.Social (FlowSocialListWith(..))
import qualified Gargantext.Core.Text.Corpus.API as API
import qualified Gargantext.Core.Text.Corpus.Parsers as Parser (FileFormat(..), parseFormat)
import qualified Gargantext.Core.Text.Corpus.Parsers as Parser (FileFormat(..), FileType(..), parseFormatC)
import Gargantext.Core.Types.Individu (User(..))
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
import Gargantext.Database.Action.Flow (flowCorpus, getDataText, flowDataText, TermType(..){-, allDataOrigins-})
......@@ -264,9 +264,10 @@ addToCorpusWithForm :: (FlowCmdM env err m)
-> (JobLog -> m ())
-> JobLog
-> m JobLog
addToCorpusWithForm user cid (NewWithForm ft d l _n) logStatus jobLog = do
addToCorpusWithForm user cid (NewWithForm ft ff d l _n) logStatus jobLog = do
printDebug "[addToCorpusWithForm] Parsing corpus: " cid
printDebug "[addToCorpusWithForm] fileType" ft
printDebug "[addToCorpusWithForm] fileFormat" ff
logStatus jobLog
limit' <- view $ hasConfig . gc_max_docs_parsers
let limit = fromIntegral limit'
......@@ -276,33 +277,33 @@ addToCorpusWithForm user cid (NewWithForm ft d l _n) logStatus jobLog = do
CSV -> Parser.parseFormatC Parser.CsvGargV3
WOS -> Parser.parseFormatC Parser.WOS
PresseRIS -> Parser.parseFormatC Parser.RisPresse
ZIP -> Parser.parseFormatC Parser.ZIP
-- TODO granularity of the logStatus
let data' = case ft of
ZIP -> case BSB64.decode $ TE.encodeUtf8 d of
let data' = case ff of
Plain -> cs d
ZIP -> case BSB64.decode $ TE.encodeUtf8 d of
Left err -> panic $ T.pack "[addToCorpusWithForm] error decoding base64: " <> T.pack err
Right decoded -> decoded
_ -> cs d
eDocsC <- liftBase $ parseC data'
eDocsC <- liftBase $ parseC ff data'
case eDocsC of
Right docsC -> do
-- TODO Add progress (jobStatus) update for docs - this is a
-- long action
let docsC' = zipSources (yieldMany [1..]) docsC
.| mapMC \(idx, doc) -> do
if idx > limit then do
printDebug "[addToCorpusWithForm] number of docs exceeds the limit" (show limit)
let panicMsg' = [ "[addToCorpusWithForm] number of docs "
, "exceeds the MAX_DOCS_PARSERS limit ("
, show limit
, ")" ]
let panicMsg = T.concat $ T.pack <$> panicMsg'
logStatus $ jobLogFailTotalWithMessage panicMsg jobLog
panic panicMsg
else
pure doc
.| mapMC (\(idx, doc) ->
if idx > limit then do
--printDebug "[addToCorpusWithForm] number of docs exceeds the limit" (show limit)
let panicMsg' = [ "[addToCorpusWithForm] number of docs "
, "exceeds the MAX_DOCS_PARSERS limit ("
, show limit
, ")" ]
let panicMsg = T.concat $ T.pack <$> panicMsg'
--logStatus $ jobLogFailTotalWithMessage panicMsg jobLog
panic panicMsg
else
pure doc)
.| mapC toHyperdataDocument
--printDebug "Parsing corpus finished : " cid
--logStatus jobLog2
......@@ -313,16 +314,18 @@ addToCorpusWithForm user cid (NewWithForm ft d l _n) logStatus jobLog = do
(Right [cid])
(Multi $ fromMaybe EN l)
Nothing
(Just $ fromIntegral $ length docs, docsC' .| mapC toHyperdataDocument)
--(Just $ fromIntegral $ length docs, docsC')
(Just 0, docsC') -- TODO fix number of docs
--(map (map toHyperdataDocument) docs)
logStatus
(logStatus)
printDebug "Extraction finished : " cid
printDebug "sending email" ("xxxxxxxxxxxxxxxxxxxxx" :: Text)
sendMail user
-- TODO uncomment this
--sendMail user
logStatus jobLog3
pure $ jobLog3
pure jobLog3
Left e -> do
printDebug "[addToCorpusWithForm] parse error" e
......
......@@ -20,19 +20,16 @@ module Gargantext.API.Node.Corpus.New.File
import Control.Lens ((.~), (?~))
import Control.Monad (forM)
import Data.Aeson
import Data.Maybe
import Data.Monoid (mempty)
import Data.Swagger
import Data.Text (Text())
import GHC.Generics (Generic)
import Servant
import Servant.Multipart
import Servant.Swagger.Internal
import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
import Gargantext.API.Node.Corpus.New.Types
import Gargantext.Core.Types (TODO)
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Prelude -- (Cmd, CmdM)
......@@ -41,38 +38,9 @@ import Gargantext.Prelude.Crypto.Hash (hash)
-------------------------------------------------------------
type Hash = Text
data FileType = CSV
| CSV_HAL
| PresseRIS
| WOS
| ZIP
deriving (Eq, Show, Generic)
instance ToSchema FileType
instance Arbitrary FileType where arbitrary = elements [CSV, PresseRIS]
instance ToParamSchema FileType
instance FromJSON FileType
instance ToJSON FileType
instance ToParamSchema (MultipartData Mem) where toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
instance FromHttpApiData FileType
where
parseUrlPiece "CSV" = pure CSV
parseUrlPiece "CSV_HAL" = pure CSV_HAL
parseUrlPiece "PresseRis" = pure PresseRIS
parseUrlPiece "ZIP" = pure ZIP
parseUrlPiece "WOS" = pure WOS
parseUrlPiece _ = pure CSV -- TODO error here
instance ToHttpApiData FileType where
toUrlPiece t = case t of
CSV -> "CSV"
CSV_HAL -> "CSV_HAL"
PresseRIS -> "PresseRis"
ZIP -> "ZIP"
WOS -> "WOS"
instance (ToParamSchema a, HasSwagger sub) =>
HasSwagger (MultipartForm tag a :> sub) where
-- TODO
......@@ -89,6 +57,7 @@ instance (ToParamSchema a, HasSwagger sub) =>
type WithUpload' = Summary "Upload file(s) to a corpus"
:> QueryParam "fileType" FileType
:> QueryParam "fileFormat" FileFormat
:> MultipartForm Mem (MultipartData Mem)
:> Post '[JSON] [Hash]
......@@ -96,11 +65,14 @@ type WithUpload' = Summary "Upload file(s) to a corpus"
--postUpload :: NodeId -> GargServer UploadAPI
postUpload :: NodeId
-> Maybe FileType
-> Maybe FileFormat
-> MultipartData Mem
-> Cmd err [Hash]
postUpload _ Nothing _ = panic "fileType is a required parameter"
postUpload _ (Just fileType) multipartData = do
postUpload _ Nothing _ _ = panic "fileType is a required parameter"
postUpload _ _ Nothing _ = panic "fileFormat is a required parameter"
postUpload _ (Just fileType) (Just fileFormat) multipartData = do
printDebug "File Type: " fileType
printDebug "File format: " fileFormat
is <- liftBase $ do
printDebug "Inputs:" ()
forM (inputs multipartData) $ \input -> do
......
module Gargantext.API.Node.Corpus.New.Types where
import Data.Aeson
import Data.Swagger
import Data.Text (pack)
import GHC.Generics (Generic)
import Servant
import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
import Gargantext.Prelude
data FileType = CSV
| CSV_HAL
| PresseRIS
| WOS
deriving (Eq, Show, Generic)
instance ToSchema FileType
instance Arbitrary FileType where arbitrary = elements [CSV, PresseRIS]
instance ToParamSchema FileType
instance FromJSON FileType
instance ToJSON FileType
instance FromHttpApiData FileType where
parseUrlPiece "CSV" = pure CSV
parseUrlPiece "CSV_HAL" = pure CSV_HAL
parseUrlPiece "PresseRis" = pure PresseRIS
parseUrlPiece "WOS" = pure WOS
parseUrlPiece _ = pure CSV -- TODO error here
instance ToHttpApiData FileType where
toUrlPiece = pack . show
data FileFormat = Plain | ZIP
deriving (Eq, Show, Generic)
instance ToSchema FileFormat
instance Arbitrary FileFormat where arbitrary = elements [ Plain, ZIP ]
instance ToParamSchema FileFormat
instance FromJSON FileFormat
instance ToJSON FileFormat
instance FromHttpApiData FileFormat where
parseUrlPiece "Plain" = pure Plain
parseUrlPiece "ZIP" = pure ZIP
parseUrlPiece _ = pure Plain -- TODO error here
instance ToHttpApiData FileFormat where
toUrlPiece = pack . show
......@@ -17,7 +17,7 @@ import GHC.Generics (Generic)
import Network.HTTP.Client
import Network.HTTP.Client.TLS
import qualified Prelude as Prelude
import qualified Prelude
import Protolude (catMaybes, encodeUtf8, rightToMaybe, Text)
import Gargantext.Prelude
import Gargantext.Prelude.Config
......
......@@ -19,14 +19,15 @@ import Gargantext.Core (Lang(..){-, allLangs-})
import Gargantext.Core.Utils.Prefix (unPrefixSwagger)
import Gargantext.Prelude
import qualified Gargantext.Database.GargDB as GargDB
import Gargantext.API.Node.Corpus.New.File (FileType)
import Gargantext.API.Node.Corpus.New.Types (FileType, FileFormat)
-------------------------------------------------------
data NewWithForm = NewWithForm
{ _wf_filetype :: !FileType
, _wf_data :: !Text -- NOTE for binary files, this represents base-64 data
, _wf_lang :: !(Maybe Lang)
, _wf_name :: !Text
{ _wf_filetype :: !FileType
, _wf_fileformat :: !FileFormat
, _wf_data :: !Text -- NOTE for binary files, this represents base-64 data
, _wf_lang :: !(Maybe Lang)
, _wf_name :: !Text
} deriving (Eq, Show, Generic)
makeLenses ''NewWithForm
......
......@@ -19,7 +19,7 @@ import qualified Data.Set as S
import qualified Data.List as DL
import qualified Data.Vector as DV
import qualified Data.Map as M
import qualified Prelude as Prelude
import qualified Prelude
import Gargantext.Prelude
......
......@@ -22,12 +22,12 @@ import System.FilePath (FilePath()) -- , takeExtension)
import Gargantext.Prelude
import Gargantext.Core.Text.Corpus.Parsers.CSV (writeDocs2Csv)
import Gargantext.Core.Text.Corpus.Parsers (parseFile, FileFormat(..))
import Gargantext.Core.Text.Corpus.Parsers (parseFile, FileFormat(..), FileType(..))
risPress2csvWrite :: FilePath -> IO ()
risPress2csvWrite f = do
eContents <- parseFile RisPresse (f <> ".ris")
eContents <- parseFile RisPresse Plain (f <> ".ris")
case eContents of
Right contents -> writeDocs2Csv (f <> ".csv") contents
Left e -> panic $ "Error: " <> (T.pack e)
......
......@@ -20,15 +20,15 @@ please follow the types.
{-# LANGUAGE PackageImports #-}
module Gargantext.Core.Text.Corpus.Parsers (FileFormat(..), clean, parseFile, cleanText, parseFormat)
module Gargantext.Core.Text.Corpus.Parsers (FileFormat(..), FileType(..), clean, parseFile, cleanText, parseFormatC)
where
import "zip" Codec.Archive.Zip (withArchive, getEntry, getEntries)
import Conduit
import Control.Concurrent.Async as CCA (mapConcurrently)
import Control.Monad.Identity (runIdentity)
import Data.Attoparsec.ByteString (parseOnly, Parser)
import Control.Monad.Trans.Control (MonadBaseControl)
import Control.Monad (join)
import Data.Attoparsec.ByteString (parseOnly, Parser)
import Data.Either(Either(..))
import Data.Either.Extra (partitionEithers)
import Data.List (concat, lookup)
......@@ -44,13 +44,14 @@ import qualified Data.ByteString.Char8 as DBC
import qualified Data.ByteString.Lazy as DBL
import qualified Data.Map as DM
import qualified Data.Text as DT
import qualified Prelude as Prelude
import qualified Prelude
import System.IO.Temp (emptySystemTempFile)
import Gargantext.API.Node.Corpus.New.Types (FileFormat(..))
import Gargantext.Core (Lang(..))
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Prelude
import Gargantext.Core.Text.Corpus.Parsers.CSV (parseHal, parseHal', parseCsv, parseCsv', parseCsvC)
import Gargantext.Core.Text.Corpus.Parsers.CSV (parseHal, parseCsv, parseCsvC)
import Gargantext.Core.Text.Corpus.Parsers.RIS.Presse (presseEnrich)
-- import Gargantext.Core.Text.Learn (detectLangDefault)
import qualified Gargantext.Core.Text.Corpus.Parsers.Date as Date
......@@ -69,9 +70,8 @@ type ParseError = String
-- | According to the format of Input file,
-- different parser are available.
data FileFormat = WOS | RIS | RisPresse
data FileType = WOS | RIS | RisPresse
| CsvGargV3 | CsvHal
| ZIP
deriving (Show)
-- Implemented (ISI Format)
......@@ -80,71 +80,72 @@ data FileFormat = WOS | RIS | RisPresse
-- | PDF -- Not Implemented / pdftotext and import Pandoc ?
-- | XML -- Not Implemented / see :
parseFormatC :: FileFormat -> DB.ByteString -> IO (Either Prelude.String (ConduitT () HyperdataDocument IO ()))
parseFormatC CsvGargV3 bs = pure $ transPipe (pure . runIdentity) <$> (parseCsvC $ DBL.fromStrict bs)
parseFormatC CsvHal bs = pure $ transPipe (pure . runIdentity) <$> (parseCsvC $ DBL.fromStrict bs)
parseFormatC RisPresse bs = do
docs <- snd
<$> enrichWith RisPresse
$ partitionEithers
$ [runParser' RisPresse bs]
pure $ (\docs' -> yieldMany docs' .| mapMC (toDoc RIS)) <$> docs
parseFormatC WOS bs = do
docs <- snd
<$> enrichWith WOS
$ partitionEithers
$ [runParser' WOS bs]
pure $ (\docs' -> yieldMany docs' .| mapMC (toDoc WOS)) <$> docs
parseFormatC ZIP bs = do
path <- emptySystemTempFile "parsed-zip"
DB.writeFile path bs
parsedZip <- withArchive path $ do
DM.keys <$> getEntries
pure $ Left $ "Not implemented for ZIP, parsedZip" <> show parsedZip
parseFormatC _ _ = undefined
parseFormat :: FileFormat -> DB.ByteString -> IO (Either Prelude.String [HyperdataDocument])
parseFormat CsvGargV3 bs = pure $ parseCsv' $ DBL.fromStrict bs
parseFormat CsvHal bs = pure $ parseHal' $ DBL.fromStrict bs
parseFormat RisPresse bs = do
docs <- mapM (toDoc RIS)
<$> snd
<$> enrichWith RisPresse
$ partitionEithers
$ [runParser' RisPresse bs]
pure $ Right docs
parseFormat WOS bs = do
docs <- mapM (toDoc WOS)
<$> snd
<$> enrichWith WOS
$ partitionEithers
$ [runParser' WOS bs]
pure $ Right docs
parseFormat ZIP bs = do
path <- emptySystemTempFile "parsed-zip"
DB.writeFile path bs
parsedZip <- withArchive path $ do
parseFormatC :: MonadBaseControl IO m => FileType -> FileFormat -> DB.ByteString -> m (Either Prelude.String (ConduitT () HyperdataDocument IO ()))
parseFormatC CsvGargV3 Plain bs = pure $ transPipe (pure . runIdentity) <$> (parseCsvC $ DBL.fromStrict bs)
parseFormatC CsvHal Plain bs = pure $ transPipe (pure . runIdentity) <$> (parseCsvC $ DBL.fromStrict bs)
parseFormatC RisPresse Plain bs = do
--docs <- enrichWith RisPresse
let eDocs = runParser' RisPresse bs
pure $ (\docs -> yieldMany docs
.| mapC presseEnrich
.| mapC (map $ both decodeUtf8)
.| mapMC (toDoc RIS)) <$> eDocs
parseFormatC WOS Plain bs = do
let eDocs = runParser' WOS bs
pure $ (\docs -> yieldMany docs
.| mapC (map $ first WOS.keys)
.| mapC (map $ both decodeUtf8)
.| mapMC (toDoc WOS)) <$> eDocs
parseFormatC _ft ZIP bs = do
path <- liftBase $ emptySystemTempFile "parsed-zip"
liftBase $ DB.writeFile path bs
parsedZip <- liftBase $ withArchive path $ do
DM.keys <$> getEntries
pure $ Left $ "Not implemented for ZIP, parsedZip" <> show parsedZip
parseFormat _ _ = undefined
parseFormatC _ _ _ = undefined
-- parseFormat :: FileType -> DB.ByteString -> IO (Either Prelude.String [HyperdataDocument])
-- parseFormat CsvGargV3 bs = pure $ parseCsv' $ DBL.fromStrict bs
-- parseFormat CsvHal bs = pure $ parseHal' $ DBL.fromStrict bs
-- parseFormat RisPresse bs = do
-- docs <- mapM (toDoc RIS)
-- <$> snd
-- <$> enrichWith RisPresse
-- $ partitionEithers
-- $ [runParser' RisPresse bs]
-- pure $ Right docs
-- parseFormat WOS bs = do
-- docs <- mapM (toDoc WOS)
-- <$> snd
-- <$> enrichWith WOS
-- $ partitionEithers
-- $ [runParser' WOS bs]
-- pure $ Right docs
-- parseFormat ZIP bs = do
-- path <- emptySystemTempFile "parsed-zip"
-- DB.writeFile path bs
-- parsedZip <- withArchive path $ do
-- DM.keys <$> getEntries
-- pure $ Left $ "Not implemented for ZIP, parsedZip" <> show parsedZip
-- parseFormat _ _ = undefined
-- | Parse file into documents
-- TODO manage errors here
-- TODO: to debug maybe add the filepath in error message
parseFile :: FileFormat -> FilePath -> IO (Either Prelude.String [HyperdataDocument])
parseFile CsvHal p = parseHal p
parseFile CsvGargV3 p = parseCsv p
parseFile RisPresse p = do
parseFile :: FileType -> FileFormat -> FilePath -> IO (Either Prelude.String [HyperdataDocument])
parseFile CsvHal Plain p = parseHal p
parseFile CsvGargV3 Plain p = parseCsv p
parseFile RisPresse Plain p = do
docs <- join $ mapM (toDoc RIS) <$> snd <$> enrichWith RisPresse <$> readFileWith RIS p
pure $ Right docs
parseFile WOS p = do
parseFile WOS Plain p = do
docs <- join $ mapM (toDoc WOS) <$> snd <$> enrichWith WOS <$> readFileWith WOS p
pure $ Right docs
parseFile ff p = do
parseFile ff _ p = do
docs <- join $ mapM (toDoc ff) <$> snd <$> enrichWith ff <$> readFileWith ff p
pure $ Right docs
toDoc :: FileFormat -> [(Text, Text)] -> IO HyperdataDocument
toDoc :: FileType -> [(Text, Text)] -> IO HyperdataDocument
-- TODO use language for RIS
toDoc ff d = do
-- let abstract = lookup "abstract" d
......@@ -174,7 +175,7 @@ toDoc ff d = do
, _hd_publication_second = Nothing
, _hd_language_iso2 = Just $ (DT.pack . show) lang }
enrichWith :: FileFormat
enrichWith :: FileType
-> (a, [[[(DB.ByteString, DB.ByteString)]]]) -> (a, [[(Text, Text)]])
enrichWith RisPresse = enrichWith' presseEnrich
enrichWith WOS = enrichWith' (map (first WOS.keys))
......@@ -189,7 +190,7 @@ enrichWith' f = second (map both' . map f . concat)
readFileWith :: FileFormat -> FilePath
readFileWith :: FileType -> FilePath
-> IO ([ParseError], [[[(DB.ByteString, DB.ByteString)]]])
readFileWith format path = do
files <- case takeExtension path of
......@@ -200,19 +201,19 @@ readFileWith format path = do
-- | withParser:
-- According to the format of the text, choose the right parser.
-- TODO withParser :: FileFormat -> Parser [Document]
withParser :: FileFormat -> Parser [[(DB.ByteString, DB.ByteString)]]
-- TODO withParser :: FileType -> Parser [Document]
withParser :: FileType -> Parser [[(DB.ByteString, DB.ByteString)]]
withParser WOS = WOS.parser
withParser RIS = RIS.parser
--withParser ODT = odtParser
--withParser XML = xmlParser
withParser _ = panic "[ERROR] Parser not implemented yet"
runParser :: FileFormat -> DB.ByteString
runParser :: FileType -> DB.ByteString
-> IO (Either String [[(DB.ByteString, DB.ByteString)]])
runParser format text = pure $ runParser' format text
runParser' :: FileFormat -> DB.ByteString
runParser' :: FileType -> DB.ByteString
-> (Either String [[(DB.ByteString, DB.ByteString)]])
runParser' format text = parseOnly (withParser format) text
......
......@@ -29,7 +29,7 @@ import Data.Vector (Vector)
import GHC.IO (FilePath)
import GHC.Word (Word8)
import qualified Prelude as Prelude
import qualified Prelude
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Prelude hiding (length)
......
......@@ -38,7 +38,7 @@ import Gargantext.Database.Query.Table.Node.Error
import Gargantext.Database.Query.Tree
import Gargantext.Database.Schema.Ngrams
import Gargantext.Prelude
import qualified Prelude as Prelude
import qualified Prelude
------------------------------------------------------------------------
------------------------------------------------------------------------
......
......@@ -42,7 +42,7 @@ import Gargantext.Database.Schema.Context
import Gargantext.Database.Schema.Node
import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.Prelude
import Prelude as Prelude
import Prelude
import System.Process as Shell
import qualified Data.ByteString.Lazy as Lazy
import qualified Data.List as List
......
......@@ -74,7 +74,7 @@ import Gargantext.Core.Ext.IMT (toSchoolName)
import Gargantext.Core.Ext.IMTUser (readFile_Annuaire)
import Gargantext.Core.Flow.Types
import Gargantext.Core.Text
import Gargantext.Core.Text.Corpus.Parsers (parseFile, FileFormat)
import Gargantext.Core.Text.Corpus.Parsers (parseFile, FileFormat, FileType)
import Gargantext.Core.Text.List (buildNgramsLists)
import Gargantext.Core.Text.List.Group.WithStem ({-StopSize(..),-} GroupParams(..))
import Gargantext.Core.Text.List.Social (FlowSocialListWith)
......@@ -106,7 +106,7 @@ import Gargantext.Prelude
import Gargantext.Prelude.Crypto.Hash (Hash)
import qualified Gargantext.Core.Text.Corpus.API as API
import qualified Gargantext.Database.Query.Table.Node.Document.Add as Doc (add)
import qualified Prelude as Prelude
import qualified Prelude
------------------------------------------------------------------------
-- Imports for upgrade function
......@@ -189,18 +189,21 @@ flowCorpusFile :: (FlowCmdM env err m)
=> User
-> Either CorpusName [CorpusId]
-> Limit -- Limit the number of docs (for dev purpose)
-> TermType Lang -> FileFormat -> FilePath
-> TermType Lang
-> FileType
-> FileFormat
-> FilePath
-> Maybe FlowSocialListWith
-> (JobLog -> m ())
-> m CorpusId
flowCorpusFile u n _l la ff fp mfslw logStatus = do
eParsed <- liftBase $ parseFile ff fp
flowCorpusFile u n _l la ft ff fp mfslw logStatus = do
eParsed <- liftBase $ parseFile ft ff fp
case eParsed of
Right parsed -> do
flowCorpus u n la mfslw (Just $ fromIntegral $ length parsed, yieldMany parsed .| mapC toHyperdataDocument) logStatus
--let docs = splitEvery 500 $ take l parsed
--flowCorpus u n la mfslw (yieldMany $ map (map toHyperdataDocument) docs) logStatus
Left e -> panic $ "Error: " <> (T.pack e)
Left e -> panic $ "Error: " <> T.pack e
------------------------------------------------------------------------
-- | TODO improve the needed type to create/update a corpus
......
......@@ -3,7 +3,7 @@ module Gargantext.Utils.Servant where
import qualified Data.ByteString.Lazy.Char8 as BSC
import Data.Csv (encodeDefaultOrderedByName, DefaultOrdered, ToNamedRecord)
import Network.HTTP.Media ((//), (/:))
import qualified Prelude as Prelude
import qualified Prelude
import Protolude
import Protolude.Partial (read)
import Servant
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment