Commit b13dfc93 authored by Przemyslaw Kaminski's avatar Przemyslaw Kaminski

[list] CSV parsing draft

parent 63f099cb
Pipeline #1679 passed with stage
in 49 minutes and 2 seconds
......@@ -9,6 +9,7 @@ Portability : POSIX
-}
{-# LANGUAGE MonoLocalBinds #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-}
......@@ -17,33 +18,36 @@ module Gargantext.API.Ngrams.List
import Control.Lens hiding (elements, Indexed)
import Data.Aeson
import qualified Data.ByteString.Lazy as BSL
import qualified Data.Csv as Csv
import Data.Either (Either(..))
import Data.HashMap.Strict (HashMap)
import Data.Map (toList, fromList)
import qualified Data.HashMap.Strict as HashMap
import qualified Data.List as List
import Data.Map (Map, toList, fromList)
import qualified Data.Map as Map
import Data.Maybe (catMaybes)
import Data.Set (Set)
import Data.Swagger (ToSchema, declareNamedSchema, genericDeclareNamedSchema)
import Data.Text (Text, concat, pack)
import GHC.Generics (Generic)
import qualified Data.Text as Text
import Data.Vector (Vector)
import qualified Data.Vector as Vec
import Network.HTTP.Media ((//), (/:))
import qualified Prelude as Prelude
import Servant
import Servant.Job.Async
import Servant.Job.Utils (jsonOptions)
import Web.FormUrlEncoded (FromForm)
import qualified Data.HashMap.Strict as HashMap
import qualified Data.List as List
import qualified Data.Map as Map
import qualified Data.Text as Text
import qualified Protolude as P
import Gargantext.API.Admin.Orchestrator.Types
import Gargantext.API.Ngrams (getNgramsTableMap, setListNgrams)
import Gargantext.API.Ngrams.List.Types
import Gargantext.API.Ngrams.Tools (getTermsWith)
import Gargantext.API.Ngrams.Types
import Gargantext.API.Node.Corpus.New.File (FileType(..))
import Gargantext.API.Prelude (GargServer)
import Gargantext.Core.Text.Terms (ExtractedNgrams(..))
import Gargantext.Core.Text.Terms.WithList (buildPatterns, termsInText)
import Gargantext.Core.Types.Main (ListType(..))
import Gargantext.Core.Utils.Prefix (unPrefixSwagger)
import Gargantext.Database.Action.Flow (saveDocNgramsWith)
import Gargantext.Database.Action.Flow.Types (FlowCmdM)
import Gargantext.Database.Action.Metrics.NgramsByNode (getOccByNgramsOnlyFast')
......@@ -55,20 +59,6 @@ import Gargantext.Database.Schema.Node
import Gargantext.Database.Types (Indexed(..))
import Gargantext.Prelude
------------------------------------------------------------------------
type API = Get '[JSON, HTML] (Headers '[Header "Content-Disposition" Text] NgramsList)
-- :<|> ReqBody '[JSON] NgramsList :> Post '[JSON] Bool
:<|> PostAPI
:<|> CSVPostAPI
api :: ListId -> GargServer API
api l = get l :<|> postAsync l :<|> csvPostAsync l
data HTML
instance Accept HTML where
contentType _ = "text" // "html" /: ("charset", "utf-8")
instance ToJSON a => MimeRender HTML a where
mimeRender _ = encode
------------------------------------------------------------------------
get :: RepoCmdM env err m =>
......@@ -101,15 +91,6 @@ post l m = do
-- TODO reindex
pure True
------------------------------------------------------------------------
csvPost :: FlowCmdM env err m
=> ListId
-> NgramsList
-> m Bool
csvPost l m = do
printDebug "[csvPost] l" l
printDebug "[csvPost] m" m
pure True
-----------------------------------------------------------------------------
-- | Re-index documents of a corpus with new ngrams (called orphans here)
......@@ -207,27 +188,64 @@ postAsync' l (WithFile _ m _) logStatus = do
, _scst_events = Just []
}
------------------------------------------------------------------------
readCsvText :: Text -> [(Text, Text, Text)]
readCsvText t = case eDec of
Left _ -> []
Right dec -> Vec.toList dec
where
lt = BSL.fromStrict $ P.encodeUtf8 t
eDec = Csv.decodeWith
(Csv.defaultDecodeOptions { Csv.decDelimiter = fromIntegral (P.ord '\t') })
Csv.HasHeader lt :: Either Prelude.String (Vector (Text, Text, Text))
parseCsvData :: [(Text, Text, Text)] -> Map NgramsTerm NgramsRepoElement
parseCsvData lst = Map.fromList $ conv <$> lst
where
conv (_status, label, _forms) =
(NgramsTerm label, NgramsRepoElement { _nre_size = 1
, _nre_list = CandidateTerm
, _nre_root = Nothing
, _nre_parent = Nothing
, _nre_children = MSet Map.empty })
csvPost :: FlowCmdM env err m
=> ListId
-> Text
-> m Bool
csvPost l m = do
printDebug "[csvPost] l" l
-- printDebug "[csvPost] m" m
-- status label forms
let lst = readCsvText m
let p = parseCsvData lst
--printDebug "[csvPost] lst" lst
--printDebug "[csvPost] p" p
_ <- setListNgrams l NgramsTerms p
pure True
------------------------------------------------------------------------
type CSVPostAPI = Summary "Update List (legacy v3 CSV)"
:> "csv"
:> "add"
:> "form"
:> "async"
:> AsyncJobs JobLog '[FormUrlEncoded] WithFile JobLog
:> AsyncJobs JobLog '[FormUrlEncoded] WithTextFile JobLog
csvPostAsync :: ListId -> GargServer PostAPI
csvPostAsync :: ListId -> GargServer CSVPostAPI
csvPostAsync lId =
serveJobsAPI $
JobFunction $ \f@(WithFile ft _ n) log' -> do
printDebug "[csvPostAsync] filetype" ft
printDebug "[csvPostAsync] name" n
csvPostAsync' lId f (liftBase . log')
JobFunction $ \f@(WithTextFile ft _ n) log' -> do
let log'' x = do
printDebug "[csvPostAsync] filetype" ft
printDebug "[csvPostAsync] name" n
liftBase $ log' x
csvPostAsync' lId f log''
csvPostAsync' :: FlowCmdM env err m
=> ListId
-> WithFile
-> WithTextFile
-> (JobLog -> m ())
-> m JobLog
csvPostAsync' l (WithFile _ m _) logStatus = do
csvPostAsync' l (WithTextFile _ m _) logStatus = do
logStatus JobLog { _scst_succeeded = Just 0
, _scst_failed = Just 0
, _scst_remaining = Just 1
......@@ -240,19 +258,18 @@ csvPostAsync' l (WithFile _ m _) logStatus = do
, _scst_remaining = Just 0
, _scst_events = Just []
}
------------------------------------------------------------------------
type API = Get '[JSON, HTML] (Headers '[Header "Content-Disposition" Text] NgramsList)
-- :<|> ReqBody '[JSON] NgramsList :> Post '[JSON] Bool
:<|> PostAPI
:<|> CSVPostAPI
data WithFile = WithFile
{ _wf_filetype :: !FileType
, _wf_data :: !NgramsList
, _wf_name :: !Text
} deriving (Eq, Show, Generic)
makeLenses ''WithFile
instance FromForm WithFile
instance FromJSON WithFile where
parseJSON = genericParseJSON $ jsonOptions "_wf_"
instance ToJSON WithFile where
toJSON = genericToJSON $ jsonOptions "_wf_"
instance ToSchema WithFile where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_wf_")
api :: ListId -> GargServer API
api l = get l :<|> postAsync l :<|> csvPostAsync l
data HTML
instance Accept HTML where
contentType _ = "text" // "html" /: ("charset", "utf-8")
instance ToJSON a => MimeRender HTML a where
mimeRender _ = encode
module Gargantext.API.Ngrams.List.Types where
--{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-}
--import Control.Lens hiding (elements, Indexed)
import Data.Aeson
import Data.Swagger (ToSchema, declareNamedSchema, genericDeclareNamedSchema)
import Data.Text
import Servant.Job.Utils (jsonOptions)
import Web.FormUrlEncoded (FromForm)
import Protolude
import Gargantext.API.Ngrams.Types (NgramsList)
import Gargantext.API.Node.Corpus.New.File (FileType(..))
import Gargantext.Core.Utils.Prefix (unPrefixSwagger)
------------------------------------------------------------------------
data WithFile = WithFile
{ _wf_filetype :: !FileType
, _wf_data :: !NgramsList
, _wf_name :: !Text
} deriving (Eq, Show, Generic)
--makeLenses ''WithFile
instance FromForm WithFile
instance FromJSON WithFile where
parseJSON = genericParseJSON $ jsonOptions "_wf_"
instance ToJSON WithFile where
toJSON = genericToJSON $ jsonOptions "_wf_"
instance ToSchema WithFile where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_wf_")
------------------------------------------------------------------------
data WithTextFile = WithTextFile
{ _wtf_filetype :: !FileType
, _wtf_data :: !Text
, _wtf_name :: !Text
} deriving (Eq, Show, Generic)
--makeLenses ''WithTextFile
instance FromForm WithTextFile
instance FromJSON WithTextFile where
parseJSON = genericParseJSON $ jsonOptions "_wtf_"
instance ToJSON WithTextFile where
toJSON = genericToJSON $ jsonOptions "_wtf_"
instance ToSchema WithTextFile where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_wtf_")
......@@ -22,6 +22,9 @@ nix:
allow-newer: true
#ghc-options:
# "$everything": -haddock
extra-deps:
- git: https://gitlab.iscpif.fr/gargantext/haskell-gargantext-prelude.git
commit: 3e32ec3aca71eb326805355d3a99b9288dc342ee
......@@ -103,3 +106,6 @@ extra-deps:
- stemmer-0.5.2@sha256:823aec56249ec2619f60a2c0d1384b732894dbbbe642856d337ebfe9629a0efd,4082
- xmlbf-0.6.1@sha256:57867fcb39e0514d17b3328ff5de8d241a18482fc89bb742d9ed820a6a2a5187,1540
- xmlbf-xeno-0.2@sha256:39f70fced6052524c290cf595f114661c721452e65fc3e0953a44e7682a6a6b0,950
# need Vector.uncons
- vector-0.12.3.0@sha256:0ae2c1ba86f0077910be242ec6802cc3d7725fe7b2bea6987201aa3737b239b5,7953
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment