Commit e72fb3c7 authored by Alexandre Delanoë's avatar Alexandre Delanoë

Merge branch 'dev' into dev-db-schema

parents 4da8baa0 253a938c
...@@ -186,6 +186,7 @@ library: ...@@ -186,6 +186,7 @@ library:
- servant-auth-swagger - servant-auth-swagger
- servant-blaze - servant-blaze
- servant-client - servant-client
- servant-flatten
- servant-job - servant-job
- servant-mock - servant-mock
- servant-multipart - servant-multipart
......
...@@ -395,9 +395,9 @@ addWithFile cid i f = ...@@ -395,9 +395,9 @@ addWithFile cid i f =
JobFunction (\_i log -> New.addToCorpusWithFile cid i f (liftIO . log)) JobFunction (\_i log -> New.addToCorpusWithFile cid i f (liftIO . log))
addWithForm :: GargServer New.AddWithForm addWithForm :: GargServer New.AddWithForm
addWithForm cid f = addWithForm cid =
serveJobsAPI $ serveJobsAPI $
JobFunction (\_i log -> New.addToCorpusWithForm cid f (liftIO . log)) JobFunction (\i log -> New.addToCorpusWithForm cid i (liftIO . log))
serverStatic :: Server (Get '[HTML] Html) serverStatic :: Server (Get '[HTML] Html)
serverStatic = $(do serverStatic = $(do
......
...@@ -24,33 +24,37 @@ New corpus means either: ...@@ -24,33 +24,37 @@ New corpus means either:
module Gargantext.API.Corpus.New module Gargantext.API.Corpus.New
where where
import Web.FormUrlEncoded (FromForm) --import Gargantext.Text.Corpus.Parsers (parseFile, FileFormat(..))
import Data.Either import Control.Lens hiding (elements)
import Control.Monad.IO.Class (liftIO) import Control.Monad.IO.Class (liftIO)
import Data.Aeson.TH (deriveJSON)
import Data.Aeson import Data.Aeson
import Servant.Job.Utils (jsonOptions) import Data.Aeson.TH (deriveJSON)
import Control.Lens hiding (elements) import Data.Either
import Servant.Multipart
import Data.Swagger import Data.Swagger
import Data.Text (Text) import Data.Text (Text)
import GHC.Generics (Generic) import GHC.Generics (Generic)
import Servant.Job.Types import Gargantext.API.Corpus.New.File
import Gargantext.API.Orchestrator.Types
import Gargantext.Core (Lang(..))
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger) import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
import Gargantext.Database.Flow (FlowCmdM, flowCorpus)
import Gargantext.Database.Flow (flowCorpusSearchInDatabase) import Gargantext.Database.Flow (flowCorpusSearchInDatabase)
import Gargantext.Database.Types.Node (CorpusId) import Gargantext.Database.Types.Node (CorpusId)
import Gargantext.Text.Terms (TermType(..)) import Gargantext.Database.Types.Node (ToHyperdataDocument(..))
import Gargantext.Database.Types.Node (UserId)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.API.Orchestrator.Types import Gargantext.Text.Corpus.Parsers.CSV (parseHal')
import Gargantext.Text.Terms (TermType(..))
import Servant import Servant
-- import Servant.Job.Server import Servant.API.Flatten (Flat)
import Servant.Job.Core
import Servant.Job.Types
import Servant.Job.Utils (jsonOptions)
import Servant.Multipart
import Test.QuickCheck (elements) import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary import Test.QuickCheck.Arbitrary
import Gargantext.Core (Lang(..)) import Web.FormUrlEncoded (FromForm)
import Gargantext.Database.Flow (FlowCmdM, flowCorpus)
import qualified Gargantext.Text.Corpus.API as API import qualified Gargantext.Text.Corpus.API as API
import Gargantext.Database.Types.Node (UserId)
import Gargantext.API.Corpus.New.File
data Query = Query { query_query :: Text data Query = Query { query_query :: Text
, query_corpus_id :: Int , query_corpus_id :: Int
...@@ -104,41 +108,6 @@ instance ToSchema ApiInfo ...@@ -104,41 +108,6 @@ instance ToSchema ApiInfo
info :: FlowCmdM env err m => UserId -> m ApiInfo info :: FlowCmdM env err m => UserId -> m ApiInfo
info _u = pure $ ApiInfo API.externalAPIs info _u = pure $ ApiInfo API.externalAPIs
{-
-- Proposal to replace the Query type which seems to generically named.
data ScraperInput = ScraperInput
{ _scin_query :: !Text
, _scin_corpus_id :: !Int
, _scin_databases :: [API.ExternalAPIs]
}
deriving (Eq, Show, Generic)
makeLenses ''ScraperInput
deriveJSON (unPrefix "_scin_") 'ScraperInput
data ScraperEvent = ScraperEvent
{ _scev_message :: !(Maybe Text)
, _scev_level :: !(Maybe Text)
, _scev_date :: !(Maybe Text)
}
deriving Generic
deriveJSON (unPrefix "_scev_") 'ScraperEvent
data ScraperStatus = ScraperStatus
{ _scst_succeeded :: !(Maybe Int)
, _scst_failed :: !(Maybe Int)
, _scst_remaining :: !(Maybe Int)
, _scst_events :: !(Maybe [ScraperEvent])
}
deriving Generic
deriveJSON (unPrefix "_scst_") 'ScraperStatus
-}
------------------------------------------------------------------------ ------------------------------------------------------------------------
------------------------------------------------------------------------ ------------------------------------------------------------------------
data WithQuery = WithQuery data WithQuery = WithQuery
...@@ -148,11 +117,11 @@ data WithQuery = WithQuery ...@@ -148,11 +117,11 @@ data WithQuery = WithQuery
deriving Generic deriving Generic
makeLenses ''WithQuery makeLenses ''WithQuery
instance FromJSON WithQuery where instance FromJSON WithQuery where
parseJSON = genericParseJSON $ jsonOptions "_wq_" parseJSON = genericParseJSON $ jsonOptions "_wq_"
instance ToSchema WithQuery where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_wq_")
instance ToSchema WithQuery
------------------------------------------------------- -------------------------------------------------------
data WithForm = WithForm data WithForm = WithForm
{ _wf_filetype :: !FileType { _wf_filetype :: !FileType
...@@ -160,27 +129,25 @@ data WithForm = WithForm ...@@ -160,27 +129,25 @@ data WithForm = WithForm
} deriving (Eq, Show, Generic) } deriving (Eq, Show, Generic)
makeLenses ''WithForm makeLenses ''WithForm
instance FromForm WithForm
instance FromJSON WithForm where instance FromJSON WithForm where
parseJSON = genericParseJSON $ jsonOptions "_wf_" parseJSON = genericParseJSON $ jsonOptions "_wf_"
instance ToSchema WithForm instance ToSchema WithForm where
instance FromForm WithForm declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_wf_")
------------------------------------------------------------------------ ------------------------------------------------------------------------
type type AsyncJobs event ctI input output =
AddAPI withInput = AsyncJobsAPI ScraperStatus withInput ScraperStatus Flat (AsyncJobsAPI' 'Unsafe 'Safe ctI '[JSON] Maybe event input output)
------------------------------------------------------------------------ ------------------------------------------------------------------------
type AddWithQuery = Summary "Add with Query to corpus endpoint"
type AddWithQuery = Summary "Add to corpus endpoint"
:> "corpus" :> "corpus"
:> Capture "corpus_id" CorpusId :> Capture "corpus_id" CorpusId
:> "add" :> "add"
:> "query" :> "query"
:> "async" :> "async"
:> AddAPI WithQuery :> AsyncJobsAPI ScraperStatus WithQuery ScraperStatus
type AddWithFile = Summary "Add to corpus endpoint" type AddWithFile = Summary "Add with MultipartData to corpus endpoint"
:> "corpus" :> "corpus"
:> Capture "corpus_id" CorpusId :> Capture "corpus_id" CorpusId
:> "add" :> "add"
...@@ -188,16 +155,15 @@ type AddWithFile = Summary "Add to corpus endpoint" ...@@ -188,16 +155,15 @@ type AddWithFile = Summary "Add to corpus endpoint"
:> MultipartForm Mem (MultipartData Mem) :> MultipartForm Mem (MultipartData Mem)
:> QueryParam "fileType" FileType :> QueryParam "fileType" FileType
:> "async" :> "async"
:> AddAPI () :> AsyncJobs ScraperStatus '[JSON] () ScraperStatus
type AddWithForm = Summary "Add to corpus endpoint" type AddWithForm = Summary "Add with FormUrlEncoded to corpus endpoint"
:> "corpus" :> "corpus"
:> Capture "corpus_id" CorpusId :> Capture "corpus_id" CorpusId
:> "add" :> "add"
:> "form" :> "form"
:> ReqBody '[FormUrlEncoded] WithForm
:> "async" :> "async"
:> AddAPI () :> AsyncJobs ScraperStatus '[FormUrlEncoded] WithForm ScraperStatus
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- TODO WithQuery also has a corpus id -- TODO WithQuery also has a corpus id
...@@ -206,7 +172,7 @@ addToCorpusJobFunction :: FlowCmdM env err m ...@@ -206,7 +172,7 @@ addToCorpusJobFunction :: FlowCmdM env err m
-> WithQuery -> WithQuery
-> (ScraperStatus -> m ()) -> (ScraperStatus -> m ())
-> m ScraperStatus -> m ScraperStatus
addToCorpusJobFunction _cid _input logStatus = do addToCorpusJobFunction _cid (WithQuery _q _dbs) logStatus = do
-- TODO ... -- TODO ...
logStatus ScraperStatus { _scst_succeeded = Just 10 logStatus ScraperStatus { _scst_succeeded = Just 10
, _scst_failed = Just 2 , _scst_failed = Just 2
...@@ -246,17 +212,23 @@ addToCorpusWithForm :: FlowCmdM env err m ...@@ -246,17 +212,23 @@ addToCorpusWithForm :: FlowCmdM env err m
-> WithForm -> WithForm
-> (ScraperStatus -> m ()) -> (ScraperStatus -> m ())
-> m ScraperStatus -> m ScraperStatus
addToCorpusWithForm _cid (WithForm ft d) logStatus = do addToCorpusWithForm cid (WithForm _ft d) logStatus = do
logStatus ScraperStatus { _scst_succeeded = Just 10
, _scst_failed = Just 2 let docs = splitEvery 500
, _scst_remaining = Just 138 $ take 10000
$ parseHal' (cs d)
logStatus ScraperStatus { _scst_succeeded = Just 1
, _scst_failed = Just 0
, _scst_remaining = Just 1
, _scst_events = Just [] , _scst_events = Just []
} }
_ <- putStrLn $ show ft
_ <- putStrLn $ show d
pure ScraperStatus { _scst_succeeded = Just 137 cid' <- flowCorpus "user1" (Right [cid]) (Multi EN) (map (map toHyperdataDocument) docs)
, _scst_failed = Just 13 printDebug "cid'" cid'
pure ScraperStatus { _scst_succeeded = Just 2
, _scst_failed = Just 0
, _scst_remaining = Just 0 , _scst_remaining = Just 0
, _scst_events = Just [] , _scst_events = Just []
} }
......
...@@ -387,6 +387,10 @@ csv2doc (CsvDoc title source ...@@ -387,6 +387,10 @@ csv2doc (CsvDoc title source
------------------------------------------------------------------------ ------------------------------------------------------------------------
parseHal :: FilePath -> IO [HyperdataDocument] parseHal :: FilePath -> IO [HyperdataDocument]
parseHal fp = V.toList <$> V.map csvHal2doc <$> snd <$> readCsvHal fp parseHal fp = V.toList <$> V.map csvHal2doc <$> snd <$> readCsvHal fp
parseHal' :: BL.ByteString -> [HyperdataDocument]
parseHal' = V.toList . V.map csvHal2doc . snd . readCsvHalLazyBS
------------------------------------------------------------------------ ------------------------------------------------------------------------
parseCsv :: FilePath -> IO [HyperdataDocument] parseCsv :: FilePath -> IO [HyperdataDocument]
......
...@@ -3,6 +3,7 @@ flags: {} ...@@ -3,6 +3,7 @@ flags: {}
extra-package-dbs: [] extra-package-dbs: []
packages: packages:
- . - .
#- 'deps/servant-job'
docker: docker:
enable: false enable: false
...@@ -35,7 +36,8 @@ extra-deps: ...@@ -35,7 +36,8 @@ extra-deps:
# #
- git: https://gitlab.iscpif.fr/gargantext/patches-class - git: https://gitlab.iscpif.fr/gargantext/patches-class
commit: 746b4ce0af8f9e600d555ad7e5b2973a940cdad9 commit: 746b4ce0af8f9e600d555ad7e5b2973a940cdad9
#- git: https://github.com/delanoe/servant-job.git #- git: https://github.com/delanoe/servant-job.git
#commit: 7a7b7100e6d132adb4c11b25b2128e6309690ac0
- git: https://github.com/np/servant-job.git - git: https://github.com/np/servant-job.git
commit: 4016c76398a56e1a352a45b3ee9d698dd0dd2597 commit: 4016c76398a56e1a352a45b3ee9d698dd0dd2597
- git: https://gitlab.iscpif.fr/gargantext/clustering-louvain.git - git: https://gitlab.iscpif.fr/gargantext/clustering-louvain.git
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment