Commit b13dfc93 authored by Przemyslaw Kaminski's avatar Przemyslaw Kaminski

[list] CSV parsing draft

parent 63f099cb
Pipeline #1679 passed with stage
in 49 minutes and 2 seconds
...@@ -9,6 +9,7 @@ Portability : POSIX ...@@ -9,6 +9,7 @@ Portability : POSIX
-} -}
{-# LANGUAGE MonoLocalBinds #-}
{-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-} {-# LANGUAGE TypeOperators #-}
...@@ -17,33 +18,36 @@ module Gargantext.API.Ngrams.List ...@@ -17,33 +18,36 @@ module Gargantext.API.Ngrams.List
import Control.Lens hiding (elements, Indexed) import Control.Lens hiding (elements, Indexed)
import Data.Aeson import Data.Aeson
import qualified Data.ByteString.Lazy as BSL
import qualified Data.Csv as Csv
import Data.Either (Either(..))
import Data.HashMap.Strict (HashMap) import Data.HashMap.Strict (HashMap)
import Data.Map (toList, fromList) import qualified Data.HashMap.Strict as HashMap
import qualified Data.List as List
import Data.Map (Map, toList, fromList)
import qualified Data.Map as Map
import Data.Maybe (catMaybes) import Data.Maybe (catMaybes)
import Data.Set (Set) import Data.Set (Set)
import Data.Swagger (ToSchema, declareNamedSchema, genericDeclareNamedSchema)
import Data.Text (Text, concat, pack) import Data.Text (Text, concat, pack)
import GHC.Generics (Generic) import qualified Data.Text as Text
import Data.Vector (Vector)
import qualified Data.Vector as Vec
import Network.HTTP.Media ((//), (/:)) import Network.HTTP.Media ((//), (/:))
import qualified Prelude as Prelude
import Servant import Servant
import Servant.Job.Async import Servant.Job.Async
import Servant.Job.Utils (jsonOptions)
import Web.FormUrlEncoded (FromForm) import qualified Protolude as P
import qualified Data.HashMap.Strict as HashMap
import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.Text as Text
import Gargantext.API.Admin.Orchestrator.Types import Gargantext.API.Admin.Orchestrator.Types
import Gargantext.API.Ngrams (getNgramsTableMap, setListNgrams) import Gargantext.API.Ngrams (getNgramsTableMap, setListNgrams)
import Gargantext.API.Ngrams.List.Types
import Gargantext.API.Ngrams.Tools (getTermsWith) import Gargantext.API.Ngrams.Tools (getTermsWith)
import Gargantext.API.Ngrams.Types import Gargantext.API.Ngrams.Types
import Gargantext.API.Node.Corpus.New.File (FileType(..))
import Gargantext.API.Prelude (GargServer) import Gargantext.API.Prelude (GargServer)
import Gargantext.Core.Text.Terms (ExtractedNgrams(..)) import Gargantext.Core.Text.Terms (ExtractedNgrams(..))
import Gargantext.Core.Text.Terms.WithList (buildPatterns, termsInText) import Gargantext.Core.Text.Terms.WithList (buildPatterns, termsInText)
import Gargantext.Core.Types.Main (ListType(..)) import Gargantext.Core.Types.Main (ListType(..))
import Gargantext.Core.Utils.Prefix (unPrefixSwagger)
import Gargantext.Database.Action.Flow (saveDocNgramsWith) import Gargantext.Database.Action.Flow (saveDocNgramsWith)
import Gargantext.Database.Action.Flow.Types (FlowCmdM) import Gargantext.Database.Action.Flow.Types (FlowCmdM)
import Gargantext.Database.Action.Metrics.NgramsByNode (getOccByNgramsOnlyFast') import Gargantext.Database.Action.Metrics.NgramsByNode (getOccByNgramsOnlyFast')
...@@ -55,20 +59,6 @@ import Gargantext.Database.Schema.Node ...@@ -55,20 +59,6 @@ import Gargantext.Database.Schema.Node
import Gargantext.Database.Types (Indexed(..)) import Gargantext.Database.Types (Indexed(..))
import Gargantext.Prelude import Gargantext.Prelude
------------------------------------------------------------------------
type API = Get '[JSON, HTML] (Headers '[Header "Content-Disposition" Text] NgramsList)
-- :<|> ReqBody '[JSON] NgramsList :> Post '[JSON] Bool
:<|> PostAPI
:<|> CSVPostAPI
api :: ListId -> GargServer API
api l = get l :<|> postAsync l :<|> csvPostAsync l
data HTML
instance Accept HTML where
contentType _ = "text" // "html" /: ("charset", "utf-8")
instance ToJSON a => MimeRender HTML a where
mimeRender _ = encode
------------------------------------------------------------------------ ------------------------------------------------------------------------
get :: RepoCmdM env err m => get :: RepoCmdM env err m =>
...@@ -101,15 +91,6 @@ post l m = do ...@@ -101,15 +91,6 @@ post l m = do
-- TODO reindex -- TODO reindex
pure True pure True
------------------------------------------------------------------------
csvPost :: FlowCmdM env err m
=> ListId
-> NgramsList
-> m Bool
csvPost l m = do
printDebug "[csvPost] l" l
printDebug "[csvPost] m" m
pure True
----------------------------------------------------------------------------- -----------------------------------------------------------------------------
-- | Re-index documents of a corpus with new ngrams (called orphans here) -- | Re-index documents of a corpus with new ngrams (called orphans here)
...@@ -207,27 +188,64 @@ postAsync' l (WithFile _ m _) logStatus = do ...@@ -207,27 +188,64 @@ postAsync' l (WithFile _ m _) logStatus = do
, _scst_events = Just [] , _scst_events = Just []
} }
------------------------------------------------------------------------ ------------------------------------------------------------------------
readCsvText :: Text -> [(Text, Text, Text)]
readCsvText t = case eDec of
Left _ -> []
Right dec -> Vec.toList dec
where
lt = BSL.fromStrict $ P.encodeUtf8 t
eDec = Csv.decodeWith
(Csv.defaultDecodeOptions { Csv.decDelimiter = fromIntegral (P.ord '\t') })
Csv.HasHeader lt :: Either Prelude.String (Vector (Text, Text, Text))
parseCsvData :: [(Text, Text, Text)] -> Map NgramsTerm NgramsRepoElement
parseCsvData lst = Map.fromList $ conv <$> lst
where
conv (_status, label, _forms) =
(NgramsTerm label, NgramsRepoElement { _nre_size = 1
, _nre_list = CandidateTerm
, _nre_root = Nothing
, _nre_parent = Nothing
, _nre_children = MSet Map.empty })
csvPost :: FlowCmdM env err m
=> ListId
-> Text
-> m Bool
csvPost l m = do
printDebug "[csvPost] l" l
-- printDebug "[csvPost] m" m
-- status label forms
let lst = readCsvText m
let p = parseCsvData lst
--printDebug "[csvPost] lst" lst
--printDebug "[csvPost] p" p
_ <- setListNgrams l NgramsTerms p
pure True
------------------------------------------------------------------------
type CSVPostAPI = Summary "Update List (legacy v3 CSV)" type CSVPostAPI = Summary "Update List (legacy v3 CSV)"
:> "csv" :> "csv"
:> "add" :> "add"
:> "form" :> "form"
:> "async" :> "async"
:> AsyncJobs JobLog '[FormUrlEncoded] WithFile JobLog :> AsyncJobs JobLog '[FormUrlEncoded] WithTextFile JobLog
csvPostAsync :: ListId -> GargServer PostAPI csvPostAsync :: ListId -> GargServer CSVPostAPI
csvPostAsync lId = csvPostAsync lId =
serveJobsAPI $ serveJobsAPI $
JobFunction $ \f@(WithFile ft _ n) log' -> do JobFunction $ \f@(WithTextFile ft _ n) log' -> do
printDebug "[csvPostAsync] filetype" ft let log'' x = do
printDebug "[csvPostAsync] name" n printDebug "[csvPostAsync] filetype" ft
csvPostAsync' lId f (liftBase . log') printDebug "[csvPostAsync] name" n
liftBase $ log' x
csvPostAsync' lId f log''
csvPostAsync' :: FlowCmdM env err m csvPostAsync' :: FlowCmdM env err m
=> ListId => ListId
-> WithFile -> WithTextFile
-> (JobLog -> m ()) -> (JobLog -> m ())
-> m JobLog -> m JobLog
csvPostAsync' l (WithFile _ m _) logStatus = do csvPostAsync' l (WithTextFile _ m _) logStatus = do
logStatus JobLog { _scst_succeeded = Just 0 logStatus JobLog { _scst_succeeded = Just 0
, _scst_failed = Just 0 , _scst_failed = Just 0
, _scst_remaining = Just 1 , _scst_remaining = Just 1
...@@ -240,19 +258,18 @@ csvPostAsync' l (WithFile _ m _) logStatus = do ...@@ -240,19 +258,18 @@ csvPostAsync' l (WithFile _ m _) logStatus = do
, _scst_remaining = Just 0 , _scst_remaining = Just 0
, _scst_events = Just [] , _scst_events = Just []
} }
------------------------------------------------------------------------ ------------------------------------------------------------------------
type API = Get '[JSON, HTML] (Headers '[Header "Content-Disposition" Text] NgramsList)
-- :<|> ReqBody '[JSON] NgramsList :> Post '[JSON] Bool
:<|> PostAPI
:<|> CSVPostAPI
data WithFile = WithFile api :: ListId -> GargServer API
{ _wf_filetype :: !FileType api l = get l :<|> postAsync l :<|> csvPostAsync l
, _wf_data :: !NgramsList
, _wf_name :: !Text data HTML
} deriving (Eq, Show, Generic) instance Accept HTML where
contentType _ = "text" // "html" /: ("charset", "utf-8")
makeLenses ''WithFile instance ToJSON a => MimeRender HTML a where
instance FromForm WithFile mimeRender _ = encode
instance FromJSON WithFile where
parseJSON = genericParseJSON $ jsonOptions "_wf_"
instance ToJSON WithFile where
toJSON = genericToJSON $ jsonOptions "_wf_"
instance ToSchema WithFile where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_wf_")
module Gargantext.API.Ngrams.List.Types where
--{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-}
--import Control.Lens hiding (elements, Indexed)
import Data.Aeson
import Data.Swagger (ToSchema, declareNamedSchema, genericDeclareNamedSchema)
import Data.Text
import Servant.Job.Utils (jsonOptions)
import Web.FormUrlEncoded (FromForm)
import Protolude
import Gargantext.API.Ngrams.Types (NgramsList)
import Gargantext.API.Node.Corpus.New.File (FileType(..))
import Gargantext.Core.Utils.Prefix (unPrefixSwagger)
------------------------------------------------------------------------
data WithFile = WithFile
{ _wf_filetype :: !FileType
, _wf_data :: !NgramsList
, _wf_name :: !Text
} deriving (Eq, Show, Generic)
--makeLenses ''WithFile
instance FromForm WithFile
instance FromJSON WithFile where
parseJSON = genericParseJSON $ jsonOptions "_wf_"
instance ToJSON WithFile where
toJSON = genericToJSON $ jsonOptions "_wf_"
instance ToSchema WithFile where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_wf_")
------------------------------------------------------------------------
data WithTextFile = WithTextFile
{ _wtf_filetype :: !FileType
, _wtf_data :: !Text
, _wtf_name :: !Text
} deriving (Eq, Show, Generic)
--makeLenses ''WithTextFile
instance FromForm WithTextFile
instance FromJSON WithTextFile where
parseJSON = genericParseJSON $ jsonOptions "_wtf_"
instance ToJSON WithTextFile where
toJSON = genericToJSON $ jsonOptions "_wtf_"
instance ToSchema WithTextFile where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_wtf_")
...@@ -22,6 +22,9 @@ nix: ...@@ -22,6 +22,9 @@ nix:
allow-newer: true allow-newer: true
#ghc-options:
# "$everything": -haddock
extra-deps: extra-deps:
- git: https://gitlab.iscpif.fr/gargantext/haskell-gargantext-prelude.git - git: https://gitlab.iscpif.fr/gargantext/haskell-gargantext-prelude.git
commit: 3e32ec3aca71eb326805355d3a99b9288dc342ee commit: 3e32ec3aca71eb326805355d3a99b9288dc342ee
...@@ -103,3 +106,6 @@ extra-deps: ...@@ -103,3 +106,6 @@ extra-deps:
- stemmer-0.5.2@sha256:823aec56249ec2619f60a2c0d1384b732894dbbbe642856d337ebfe9629a0efd,4082 - stemmer-0.5.2@sha256:823aec56249ec2619f60a2c0d1384b732894dbbbe642856d337ebfe9629a0efd,4082
- xmlbf-0.6.1@sha256:57867fcb39e0514d17b3328ff5de8d241a18482fc89bb742d9ed820a6a2a5187,1540 - xmlbf-0.6.1@sha256:57867fcb39e0514d17b3328ff5de8d241a18482fc89bb742d9ed820a6a2a5187,1540
- xmlbf-xeno-0.2@sha256:39f70fced6052524c290cf595f114661c721452e65fc3e0953a44e7682a6a6b0,950 - xmlbf-xeno-0.2@sha256:39f70fced6052524c290cf595f114661c721452e65fc3e0953a44e7682a6a6b0,950
# need Vector.uncons
- vector-0.12.3.0@sha256:0ae2c1ba86f0077910be242ec6802cc3d7725fe7b2bea6987201aa3737b239b5,7953
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment