Commit be745492 authored by Alfredo Di Napoli's avatar Alfredo Di Napoli

Unify CSV & JSON form upload handlers

This commit modifies the backend code to convert the input CSV into an
`NgramsList`, so that we can reuse the JSON API for the upload.
parent dc703771
Pipeline #5382 passed with stages
in 88 minutes and 33 seconds
...@@ -60,6 +60,7 @@ library ...@@ -60,6 +60,7 @@ library
Gargantext.API.Errors.Types.Backend Gargantext.API.Errors.Types.Backend
Gargantext.API.HashedResponse Gargantext.API.HashedResponse
Gargantext.API.Ngrams Gargantext.API.Ngrams
Gargantext.API.Ngrams.List
Gargantext.API.Ngrams.List.Types Gargantext.API.Ngrams.List.Types
Gargantext.API.Ngrams.Prelude Gargantext.API.Ngrams.Prelude
Gargantext.API.Ngrams.Tools Gargantext.API.Ngrams.Tools
...@@ -188,7 +189,6 @@ library ...@@ -188,7 +189,6 @@ library
Gargantext.API.Job Gargantext.API.Job
Gargantext.API.Members Gargantext.API.Members
Gargantext.API.Metrics Gargantext.API.Metrics
Gargantext.API.Ngrams.List
Gargantext.API.Ngrams.NgramsTree Gargantext.API.Ngrams.NgramsTree
Gargantext.API.Node.Contact Gargantext.API.Node.Contact
Gargantext.API.Node.Corpus.Annuaire Gargantext.API.Node.Corpus.Annuaire
......
...@@ -12,6 +12,7 @@ Portability : POSIX ...@@ -12,6 +12,7 @@ Portability : POSIX
{-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeApplications #-} {-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TypeOperators #-} {-# LANGUAGE TypeOperators #-}
{-# LANGUAGE ViewPatterns #-}
module Gargantext.API.Ngrams.List module Gargantext.API.Ngrams.List
where where
...@@ -33,7 +34,7 @@ import Gargantext.API.Ngrams (setListNgrams) ...@@ -33,7 +34,7 @@ import Gargantext.API.Ngrams (setListNgrams)
import Gargantext.API.Ngrams.List.Types import Gargantext.API.Ngrams.List.Types
import Gargantext.API.Ngrams.Prelude (getNgramsList) import Gargantext.API.Ngrams.Prelude (getNgramsList)
import Gargantext.API.Ngrams.Types import Gargantext.API.Ngrams.Types
import Gargantext.API.Prelude (GargServer, GargM) import Gargantext.API.Prelude (GargServer, GargM, serverError)
import Gargantext.API.Types import Gargantext.API.Types
import Gargantext.Core.NodeStory import Gargantext.Core.NodeStory
import Gargantext.Core.Types.Main (ListType(..)) import Gargantext.Core.Types.Main (ListType(..))
...@@ -111,20 +112,19 @@ getCsv lId = do ...@@ -111,20 +112,19 @@ getCsv lId = do
jsonPostAsync :: ServerT JSONAPI (GargM Env BackendInternalError) jsonPostAsync :: ServerT JSONAPI (GargM Env BackendInternalError)
jsonPostAsync lId = jsonPostAsync lId =
serveJobsAPI UpdateNgramsListJobJSON $ \jHandle f -> serveJobsAPI UpdateNgramsListJobJSON $ \jHandle f ->
postAsyncJSON lId f jHandle postAsyncJSON lId (_wjf_data f) jHandle
------------------------------------------------------------------------ ------------------------------------------------------------------------
postAsyncJSON :: (FlowCmdM env err m, MonadJobStatus m) postAsyncJSON :: (FlowCmdM env err m, MonadJobStatus m)
=> ListId => ListId
-> WithJsonFile -> NgramsList
-> JobHandle m -> JobHandle m
-> m () -> m ()
postAsyncJSON l (WithJsonFile m _) jobHandle = do postAsyncJSON l ngramsList jobHandle = do
markStarted 2 jobHandle markStarted 2 jobHandle
-- printDebug "New list as file" l
setList setList
-- printDebug "Done" r
markProgress 1 jobHandle markProgress 1 jobHandle
...@@ -138,7 +138,7 @@ postAsyncJSON l (WithJsonFile m _) jobHandle = do ...@@ -138,7 +138,7 @@ postAsyncJSON l (WithJsonFile m _) jobHandle = do
setList :: HasNodeStory env err m => m () setList :: HasNodeStory env err m => m ()
setList = do setList = do
-- TODO check with Version for optim -- TODO check with Version for optim
mapM_ (\(nt, Versioned _v ns) -> (setListNgrams l nt ns)) $ toList m mapM_ (\(nt, Versioned _v ns) -> (setListNgrams l nt ns)) $ toList ngramsList
-- TODO reindex -- TODO reindex
...@@ -163,44 +163,38 @@ csvApi = csvPostAsync ...@@ -163,44 +163,38 @@ csvApi = csvPostAsync
csvPostAsync :: ServerT CSVAPI (GargM Env BackendInternalError) csvPostAsync :: ServerT CSVAPI (GargM Env BackendInternalError)
csvPostAsync lId = csvPostAsync lId =
serveJobsAPI UpdateNgramsListJobCSV $ \jHandle f -> do serveJobsAPI UpdateNgramsListJobCSV $ \jHandle f -> do
postAsyncCSV lId f jHandle case ngramsListFromCSVData (_wtf_data f) of
Left err -> serverError $ err500 { errReasonPhrase = err }
postAsyncCSV :: (FlowCmdM env err m, MonadJobStatus m) Right ngramsList -> postAsyncJSON lId ngramsList jHandle
=> ListId
-> WithTextFile -- | Tries converting a text file into an 'NgramList', so that we can reuse the
-> JobHandle m -- existing JSON endpoint for the CSV upload.
-> m () ngramsListFromCSVData :: Text -> Either Prelude.String NgramsList
postAsyncCSV l (WithTextFile _filetype csvData _name) jHandle = do ngramsListFromCSVData csvData = case decodeCsv of
markStarted 2 jHandle -- /NOTE/ The legacy CSV data only supports terms in imports and exports, so this is
-- all we care about.
let eLst = readCsvText csvData Left err -> Left $ "Invalid CSV found in ngramsListFromCSVData: " <> err
case eLst of Right terms -> pure $ Map.fromList [ (NgramsTerms, Versioned 0 $ mconcat . Vec.toList $ terms) ]
Left err -> markFailed (Just err) jHandle
Right lst -> do
let p = parseCsvData lst
_ <- setListNgrams l NgramsTerms p
markProgress 1 jHandle
corpus_node <- getNode l -- (Proxy :: Proxy HyperdataList)
let corpus_id = fromMaybe (panic "") (_node_parent_id corpus_node)
_ <- reIndexWith corpus_id l NgramsTerms (Set.fromList [MapTerm, CandidateTerm])
markComplete jHandle
------------------------------------------------------------------------
readCsvText :: Text -> Either Text [(Text, Text, Text)]
readCsvText t = case eDec of
Left err -> Left $ pack err
Right dec -> Right $ Vec.toList dec
where where
lt = BSL.fromStrict $ P.encodeUtf8 t binaryData = BSL.fromStrict $ P.encodeUtf8 csvData
eDec = Csv.decodeWith
(Csv.defaultDecodeOptions { Csv.decDelimiter = fromIntegral (P.ord '\t') }) decodeCsv :: Either Prelude.String (Vector NgramsTableMap)
Csv.HasHeader lt :: Either Prelude.String (Vector (Text, Text, Text)) decodeCsv = Csv.decodeWithP csvToNgramsTableMap
(Csv.defaultDecodeOptions { Csv.decDelimiter = fromIntegral (P.ord '\t') })
Csv.HasHeader
binaryData
-- | Converts a plain CSV 'Record' into an NgramsTableMap
csvToNgramsTableMap :: Csv.Record -> Csv.Parser NgramsTableMap
csvToNgramsTableMap record = case Vec.toList record of
(map P.decodeUtf8 -> [status, label, forms])
-> pure $ conv status label forms
_ -> Prelude.fail "csvToNgramsTableMap failed"
parseCsvData :: [(Text, Text, Text)] -> Map NgramsTerm NgramsRepoElement
parseCsvData lst = Map.fromList $ conv <$> lst
where where
conv (status, label, forms) = conv :: Text -> Text -> Text -> NgramsTableMap
(NgramsTerm label, NgramsRepoElement { _nre_size = 1 conv status label forms = Map.singleton (NgramsTerm label)
$ NgramsRepoElement { _nre_size = 1
, _nre_list = case status == "map" of , _nre_list = case status == "map" of
True -> MapTerm True -> MapTerm
False -> case status == "main" of False -> case status == "main" of
...@@ -214,7 +208,6 @@ parseCsvData lst = Map.fromList $ conv <$> lst ...@@ -214,7 +208,6 @@ parseCsvData lst = Map.fromList $ conv <$> lst
$ filter (\w -> w /= "" && w /= label) $ filter (\w -> w /= "" && w /= label)
$ splitOn "|&|" forms $ splitOn "|&|" forms
} }
)
------------------------------------------------------------------------ ------------------------------------------------------------------------
...@@ -236,4 +229,3 @@ toIndexedNgrams m t = Indexed <$> i <*> n ...@@ -236,4 +229,3 @@ toIndexedNgrams m t = Indexed <$> i <*> n
where where
i = HashMap.lookup t m i = HashMap.lookup t m
n = Just (text2ngrams t) n = Just (text2ngrams t)
status label forms status label forms
map abelian group map abelian group
stop brazorf
...@@ -32,4 +32,4 @@ tests = sequential $ aroundAll withTestDBAndPort $ do ...@@ -32,4 +32,4 @@ tests = sequential $ aroundAll withTestDBAndPort $ do
withValidLogin port "alice" (GargPassword "alice") $ \token -> do withValidLogin port "alice" (GargPassword "alice") $ \token -> do
protected token "POST" "/gql" [r| { protected token "POST" "/gql" [r| {
"query": "{ user_infos(user_id: 2) { ui_id, ui_email } }" "query": "{ user_infos(user_id: 2) { ui_id, ui_email } }"
} |] `shouldRespondWith'` [jsonFragment| {"data":{"user_infos":[{"ui_id":2,"ui_email":"alice@gargan.text"}]}} |] } |] `shouldRespondWithFragment` [jsonFragment| {"data":{"user_infos":[{"ui_id":2,"ui_email":"alice@gargan.text"}]}} |]
...@@ -38,7 +38,7 @@ import Test.API.Setup (withTestDBAndPort, setupEnvironment, mkUrl, createAliceAn ...@@ -38,7 +38,7 @@ import Test.API.Setup (withTestDBAndPort, setupEnvironment, mkUrl, createAliceAn
import Test.Hspec import Test.Hspec
import Test.Hspec.Wai hiding (pendingWith) import Test.Hspec.Wai hiding (pendingWith)
import Test.Hspec.Wai.Internal (withApplication) import Test.Hspec.Wai.Internal (withApplication)
import Test.Utils (jsonFragment, shouldRespondWith') import Test.Utils (jsonFragment, shouldRespondWithFragment)
import qualified Data.Map.Strict as Map import qualified Data.Map.Strict as Map
import qualified Data.ByteString.Lazy.Char8 as C8L import qualified Data.ByteString.Lazy.Char8 as C8L
...@@ -165,7 +165,7 @@ tests = sequential $ aroundAll withTestDBAndPort $ do ...@@ -165,7 +165,7 @@ tests = sequential $ aroundAll withTestDBAndPort $ do
withApplication app $ do withApplication app $ do
withValidLogin port "alice" (GargPassword "alice") $ \token -> do withValidLogin port "alice" (GargPassword "alice") $ \token -> do
protected token "GET" (mkUrl port "/node/8") "" protected token "GET" (mkUrl port "/node/8") ""
`shouldRespondWith'` [jsonFragment| {"id":8,"user_id":2,"name":"alice" } |] `shouldRespondWithFragment` [jsonFragment| {"id":8,"user_id":2,"name":"alice" } |]
it "forbids 'alice' to see others node private info" $ \((_testEnv, port), app) -> do it "forbids 'alice' to see others node private info" $ \((_testEnv, port), app) -> do
withApplication app $ do withApplication app $ do
...@@ -181,7 +181,7 @@ tests = sequential $ aroundAll withTestDBAndPort $ do ...@@ -181,7 +181,7 @@ tests = sequential $ aroundAll withTestDBAndPort $ do
withApplication app $ do withApplication app $ do
withValidLogin port "alice" (GargPassword "alice") $ \token -> do withValidLogin port "alice" (GargPassword "alice") $ \token -> do
protected token "GET" (mkUrl port "/tree/8") "" protected token "GET" (mkUrl port "/tree/8") ""
`shouldRespondWith'` [jsonFragment| { "node": {"id":8, "name":"alice", "type": "NodeUser" } } |] `shouldRespondWithFragment` [jsonFragment| { "node": {"id":8, "name":"alice", "type": "NodeUser" } } |]
it "forbids 'alice' to see others node private info" $ \((_testEnv, port), app) -> do it "forbids 'alice' to see others node private info" $ \((_testEnv, port), app) -> do
withApplication app $ do withApplication app $ do
......
...@@ -19,12 +19,15 @@ import Data.Text.IO qualified as TIO ...@@ -19,12 +19,15 @@ import Data.Text.IO qualified as TIO
import Fmt import Fmt
import Gargantext.API.Admin.Auth.Types (Token) import Gargantext.API.Admin.Auth.Types (Token)
import Gargantext.API.Admin.Orchestrator.Types import Gargantext.API.Admin.Orchestrator.Types
import Gargantext.API.Ngrams.List ( ngramsListFromCSVData )
import Gargantext.API.Ngrams.Types
import Gargantext.Core.Types
import Gargantext.Core.Types.Individu import Gargantext.Core.Types.Individu
import Gargantext.Database.Action.User import Gargantext.Database.Action.User
import Gargantext.Database.Admin.Types.Hyperdata.Corpus import Gargantext.Database.Admin.Types.Hyperdata.Corpus
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Query.Table.Node import Gargantext.Database.Query.Table.Node
import Gargantext.Database.Query.Tree.Root import Gargantext.Database.Query.Tree.Root
import Gargantext.Database.Schema.Ngrams
import Gargantext.Prelude hiding (get) import Gargantext.Prelude hiding (get)
import Network.Wai.Handler.Warp qualified as Wai import Network.Wai.Handler.Warp qualified as Wai
import Paths_gargantext (getDataFileName) import Paths_gargantext (getDataFileName)
...@@ -34,9 +37,10 @@ import Test.API.Setup (withTestDBAndPort, setupEnvironment, mkUrl, createAliceAn ...@@ -34,9 +37,10 @@ import Test.API.Setup (withTestDBAndPort, setupEnvironment, mkUrl, createAliceAn
import Test.Database.Types import Test.Database.Types
import Test.Hspec import Test.Hspec
import Test.Hspec.Wai.Internal (withApplication, WaiSession) import Test.Hspec.Wai.Internal (withApplication, WaiSession)
import Test.Utils (shouldRespondWith')
import Web.FormUrlEncoded
import Test.Hspec.Wai.JSON (json) import Test.Hspec.Wai.JSON (json)
import Test.Hspec.Wai (shouldRespondWith)
import Web.FormUrlEncoded
import qualified Data.Map.Strict as Map
data JobPollHandle = JobPollHandle { data JobPollHandle = JobPollHandle {
_jph_id :: !Text _jph_id :: !Text
...@@ -121,7 +125,7 @@ tests = sequential $ aroundAll withTestDBAndPort $ do ...@@ -121,7 +125,7 @@ tests = sequential $ aroundAll withTestDBAndPort $ do
-- Now check that we can retrieve the ngrams -- Now check that we can retrieve the ngrams
let getUrl = "/node/" +| listId |+ "/ngrams?ngramsType=Terms&listType=MapTerm&list="+| listId |+"&limit=50" let getUrl = "/node/" +| listId |+ "/ngrams?ngramsType=Terms&listType=MapTerm&list="+| listId |+"&limit=50"
getJSON token (mkUrl port getUrl) getJSON token (mkUrl port getUrl)
`shouldRespondWith'` [json| { "version": 0, `shouldRespondWith` [json| { "version": 0,
"count": 1, "count": 1,
"data": [ "data": [
{ {
...@@ -136,6 +140,14 @@ tests = sequential $ aroundAll withTestDBAndPort $ do ...@@ -136,6 +140,14 @@ tests = sequential $ aroundAll withTestDBAndPort $ do
describe "POST /api/v1.0/lists/:id/csv/add/form/async (CSV)" $ do describe "POST /api/v1.0/lists/:id/csv/add/form/async (CSV)" $ do
it "parses CSV via ngramsListFromCSVData" $ \((_testEnv, _port), _app) -> do
simpleNgrams <- liftIO (TIO.readFile =<< getDataFileName "test-data/ngrams/simple.csv")
ngramsListFromCSVData simpleNgrams `shouldBe`
Right (Map.fromList [ (NgramsTerms, Versioned 0 $ Map.fromList [
(NgramsTerm "abelian group", NgramsRepoElement 1 MapTerm Nothing Nothing (MSet mempty))
, (NgramsTerm "brazorf", NgramsRepoElement 1 StopTerm Nothing Nothing (MSet mempty))
])])
it "allows uploading a CSV ngrams file" $ \((testEnv, port), app) -> do it "allows uploading a CSV ngrams file" $ \((testEnv, port), app) -> do
cId <- newCorpusForUser testEnv "alice" cId <- newCorpusForUser testEnv "alice"
withApplication app $ do withApplication app $ do
...@@ -154,15 +166,26 @@ tests = sequential $ aroundAll withTestDBAndPort $ do ...@@ -154,15 +166,26 @@ tests = sequential $ aroundAll withTestDBAndPort $ do
liftIO (_jph_status j' `shouldBe` "IsFinished") liftIO (_jph_status j' `shouldBe` "IsFinished")
-- Now check that we can retrieve the ngrams -- Now check that we can retrieve the ngrams
let getUrl = "/node/" +| listId |+ "/ngrams?ngramsType=Terms&listType=MapTerm&list="+| listId |+"&limit=50" let getTermsUrl = "/node/" +| listId |+ "/ngrams?ngramsType=Terms&listType=MapTerm&list="+| listId |+"&limit=50"
getJSON token (mkUrl port getUrl) getJSON token (mkUrl port getTermsUrl)
`shouldRespondWith'` [json| {"version":0 `shouldRespondWith` [json| {"version":0
,"count":1 ,"count":1
,"data":[ ,"data":[
{"ngrams":"abelian group" {"ngrams":"abelian group"
,"size":1 ,"size":1
,"list":"MapTerm" ,"list":"MapTerm"
,"occurrences":[],"children":[]} ,"occurrences":[],"children":[]}
] ]
} |] } |]
let getStopUrl = "/node/" +| listId |+ "/ngrams?ngramsType=Terms&listType=StopTerm&list="+| listId |+"&limit=50"
getJSON token (mkUrl port getStopUrl)
`shouldRespondWith` [json| {"version":0
,"count":1
,"data":[
{"ngrams":"brazorf"
,"size":1
,"list":"StopTerm"
,"occurrences":[],"children":[]}
]
} |]
...@@ -41,11 +41,14 @@ jsonFragment = QuasiQuoter { ...@@ -41,11 +41,14 @@ jsonFragment = QuasiQuoter {
newtype JsonFragmentResponseMatcher = JsonFragmentResponseMatcher { getJsonMatcher :: ResponseMatcher } newtype JsonFragmentResponseMatcher = JsonFragmentResponseMatcher { getJsonMatcher :: ResponseMatcher }
shouldRespondWith' :: HasCallStack -- | Succeeds if the full body matches the input /fragment/. Careful in using this
=> WaiSession st SResponse -- combinator, as it won't check that the full body matches the input, but rather
-> JsonFragmentResponseMatcher -- that the body contains the input fragment, which might lead to confusion.
-> WaiExpectation st shouldRespondWithFragment :: HasCallStack
shouldRespondWith' action matcher = do => WaiSession st SResponse
-> JsonFragmentResponseMatcher
-> WaiExpectation st
shouldRespondWithFragment action matcher = do
r <- action r <- action
forM_ (match r (getJsonMatcher matcher)) (liftIO . expectationFailure) forM_ (match r (getJsonMatcher matcher)) (liftIO . expectationFailure)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment