Commit be745492 authored by Alfredo Di Napoli's avatar Alfredo Di Napoli

Unify CSV & JSON form upload handlers

This commit modifies the backend code to convert the input CSV into an
`NgramsList`, so that we can reuse the JSON API for the upload.
parent dc703771
Pipeline #5382 passed with stages
in 88 minutes and 33 seconds
......@@ -60,6 +60,7 @@ library
Gargantext.API.Errors.Types.Backend
Gargantext.API.HashedResponse
Gargantext.API.Ngrams
Gargantext.API.Ngrams.List
Gargantext.API.Ngrams.List.Types
Gargantext.API.Ngrams.Prelude
Gargantext.API.Ngrams.Tools
......@@ -188,7 +189,6 @@ library
Gargantext.API.Job
Gargantext.API.Members
Gargantext.API.Metrics
Gargantext.API.Ngrams.List
Gargantext.API.Ngrams.NgramsTree
Gargantext.API.Node.Contact
Gargantext.API.Node.Corpus.Annuaire
......
......@@ -12,6 +12,7 @@ Portability : POSIX
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE ViewPatterns #-}
module Gargantext.API.Ngrams.List
where
......@@ -33,7 +34,7 @@ import Gargantext.API.Ngrams (setListNgrams)
import Gargantext.API.Ngrams.List.Types
import Gargantext.API.Ngrams.Prelude (getNgramsList)
import Gargantext.API.Ngrams.Types
import Gargantext.API.Prelude (GargServer, GargM)
import Gargantext.API.Prelude (GargServer, GargM, serverError)
import Gargantext.API.Types
import Gargantext.Core.NodeStory
import Gargantext.Core.Types.Main (ListType(..))
......@@ -111,20 +112,19 @@ getCsv lId = do
jsonPostAsync :: ServerT JSONAPI (GargM Env BackendInternalError)
jsonPostAsync lId =
serveJobsAPI UpdateNgramsListJobJSON $ \jHandle f ->
postAsyncJSON lId f jHandle
postAsyncJSON lId (_wjf_data f) jHandle
------------------------------------------------------------------------
postAsyncJSON :: (FlowCmdM env err m, MonadJobStatus m)
=> ListId
-> WithJsonFile
-> NgramsList
-> JobHandle m
-> m ()
postAsyncJSON l (WithJsonFile m _) jobHandle = do
postAsyncJSON l ngramsList jobHandle = do
markStarted 2 jobHandle
-- printDebug "New list as file" l
setList
-- printDebug "Done" r
markProgress 1 jobHandle
......@@ -138,7 +138,7 @@ postAsyncJSON l (WithJsonFile m _) jobHandle = do
setList :: HasNodeStory env err m => m ()
setList = do
-- TODO check with Version for optim
mapM_ (\(nt, Versioned _v ns) -> (setListNgrams l nt ns)) $ toList m
mapM_ (\(nt, Versioned _v ns) -> (setListNgrams l nt ns)) $ toList ngramsList
-- TODO reindex
......@@ -163,44 +163,38 @@ csvApi = csvPostAsync
csvPostAsync :: ServerT CSVAPI (GargM Env BackendInternalError)
csvPostAsync lId =
serveJobsAPI UpdateNgramsListJobCSV $ \jHandle f -> do
postAsyncCSV lId f jHandle
postAsyncCSV :: (FlowCmdM env err m, MonadJobStatus m)
=> ListId
-> WithTextFile
-> JobHandle m
-> m ()
postAsyncCSV l (WithTextFile _filetype csvData _name) jHandle = do
markStarted 2 jHandle
let eLst = readCsvText csvData
case eLst of
Left err -> markFailed (Just err) jHandle
Right lst -> do
let p = parseCsvData lst
_ <- setListNgrams l NgramsTerms p
markProgress 1 jHandle
corpus_node <- getNode l -- (Proxy :: Proxy HyperdataList)
let corpus_id = fromMaybe (panic "") (_node_parent_id corpus_node)
_ <- reIndexWith corpus_id l NgramsTerms (Set.fromList [MapTerm, CandidateTerm])
markComplete jHandle
------------------------------------------------------------------------
readCsvText :: Text -> Either Text [(Text, Text, Text)]
readCsvText t = case eDec of
Left err -> Left $ pack err
Right dec -> Right $ Vec.toList dec
case ngramsListFromCSVData (_wtf_data f) of
Left err -> serverError $ err500 { errReasonPhrase = err }
Right ngramsList -> postAsyncJSON lId ngramsList jHandle
-- | Tries converting a text file into an 'NgramList', so that we can reuse the
-- existing JSON endpoint for the CSV upload.
ngramsListFromCSVData :: Text -> Either Prelude.String NgramsList
ngramsListFromCSVData csvData = case decodeCsv of
-- /NOTE/ The legacy CSV data only supports terms in imports and exports, so this is
-- all we care about.
Left err -> Left $ "Invalid CSV found in ngramsListFromCSVData: " <> err
Right terms -> pure $ Map.fromList [ (NgramsTerms, Versioned 0 $ mconcat . Vec.toList $ terms) ]
where
lt = BSL.fromStrict $ P.encodeUtf8 t
eDec = Csv.decodeWith
(Csv.defaultDecodeOptions { Csv.decDelimiter = fromIntegral (P.ord '\t') })
Csv.HasHeader lt :: Either Prelude.String (Vector (Text, Text, Text))
binaryData = BSL.fromStrict $ P.encodeUtf8 csvData
decodeCsv :: Either Prelude.String (Vector NgramsTableMap)
decodeCsv = Csv.decodeWithP csvToNgramsTableMap
(Csv.defaultDecodeOptions { Csv.decDelimiter = fromIntegral (P.ord '\t') })
Csv.HasHeader
binaryData
-- | Converts a plain CSV 'Record' into an NgramsTableMap
csvToNgramsTableMap :: Csv.Record -> Csv.Parser NgramsTableMap
csvToNgramsTableMap record = case Vec.toList record of
(map P.decodeUtf8 -> [status, label, forms])
-> pure $ conv status label forms
_ -> Prelude.fail "csvToNgramsTableMap failed"
parseCsvData :: [(Text, Text, Text)] -> Map NgramsTerm NgramsRepoElement
parseCsvData lst = Map.fromList $ conv <$> lst
where
conv (status, label, forms) =
(NgramsTerm label, NgramsRepoElement { _nre_size = 1
conv :: Text -> Text -> Text -> NgramsTableMap
conv status label forms = Map.singleton (NgramsTerm label)
$ NgramsRepoElement { _nre_size = 1
, _nre_list = case status == "map" of
True -> MapTerm
False -> case status == "main" of
......@@ -214,7 +208,6 @@ parseCsvData lst = Map.fromList $ conv <$> lst
$ filter (\w -> w /= "" && w /= label)
$ splitOn "|&|" forms
}
)
------------------------------------------------------------------------
......@@ -236,4 +229,3 @@ toIndexedNgrams m t = Indexed <$> i <*> n
where
i = HashMap.lookup t m
n = Just (text2ngrams t)
status label forms
map abelian group
stop brazorf
......@@ -32,4 +32,4 @@ tests = sequential $ aroundAll withTestDBAndPort $ do
withValidLogin port "alice" (GargPassword "alice") $ \token -> do
protected token "POST" "/gql" [r| {
"query": "{ user_infos(user_id: 2) { ui_id, ui_email } }"
} |] `shouldRespondWith'` [jsonFragment| {"data":{"user_infos":[{"ui_id":2,"ui_email":"alice@gargan.text"}]}} |]
} |] `shouldRespondWithFragment` [jsonFragment| {"data":{"user_infos":[{"ui_id":2,"ui_email":"alice@gargan.text"}]}} |]
......@@ -38,7 +38,7 @@ import Test.API.Setup (withTestDBAndPort, setupEnvironment, mkUrl, createAliceAn
import Test.Hspec
import Test.Hspec.Wai hiding (pendingWith)
import Test.Hspec.Wai.Internal (withApplication)
import Test.Utils (jsonFragment, shouldRespondWith')
import Test.Utils (jsonFragment, shouldRespondWithFragment)
import qualified Data.Map.Strict as Map
import qualified Data.ByteString.Lazy.Char8 as C8L
......@@ -165,7 +165,7 @@ tests = sequential $ aroundAll withTestDBAndPort $ do
withApplication app $ do
withValidLogin port "alice" (GargPassword "alice") $ \token -> do
protected token "GET" (mkUrl port "/node/8") ""
`shouldRespondWith'` [jsonFragment| {"id":8,"user_id":2,"name":"alice" } |]
`shouldRespondWithFragment` [jsonFragment| {"id":8,"user_id":2,"name":"alice" } |]
it "forbids 'alice' to see others node private info" $ \((_testEnv, port), app) -> do
withApplication app $ do
......@@ -181,7 +181,7 @@ tests = sequential $ aroundAll withTestDBAndPort $ do
withApplication app $ do
withValidLogin port "alice" (GargPassword "alice") $ \token -> do
protected token "GET" (mkUrl port "/tree/8") ""
`shouldRespondWith'` [jsonFragment| { "node": {"id":8, "name":"alice", "type": "NodeUser" } } |]
`shouldRespondWithFragment` [jsonFragment| { "node": {"id":8, "name":"alice", "type": "NodeUser" } } |]
it "forbids 'alice' to see others node private info" $ \((_testEnv, port), app) -> do
withApplication app $ do
......
......@@ -19,12 +19,15 @@ import Data.Text.IO qualified as TIO
import Fmt
import Gargantext.API.Admin.Auth.Types (Token)
import Gargantext.API.Admin.Orchestrator.Types
import Gargantext.API.Ngrams.List ( ngramsListFromCSVData )
import Gargantext.API.Ngrams.Types
import Gargantext.Core.Types
import Gargantext.Core.Types.Individu
import Gargantext.Database.Action.User
import Gargantext.Database.Admin.Types.Hyperdata.Corpus
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Query.Table.Node
import Gargantext.Database.Query.Tree.Root
import Gargantext.Database.Schema.Ngrams
import Gargantext.Prelude hiding (get)
import Network.Wai.Handler.Warp qualified as Wai
import Paths_gargantext (getDataFileName)
......@@ -34,9 +37,10 @@ import Test.API.Setup (withTestDBAndPort, setupEnvironment, mkUrl, createAliceAn
import Test.Database.Types
import Test.Hspec
import Test.Hspec.Wai.Internal (withApplication, WaiSession)
import Test.Utils (shouldRespondWith')
import Web.FormUrlEncoded
import Test.Hspec.Wai.JSON (json)
import Test.Hspec.Wai (shouldRespondWith)
import Web.FormUrlEncoded
import qualified Data.Map.Strict as Map
data JobPollHandle = JobPollHandle {
_jph_id :: !Text
......@@ -121,7 +125,7 @@ tests = sequential $ aroundAll withTestDBAndPort $ do
-- Now check that we can retrieve the ngrams
let getUrl = "/node/" +| listId |+ "/ngrams?ngramsType=Terms&listType=MapTerm&list="+| listId |+"&limit=50"
getJSON token (mkUrl port getUrl)
`shouldRespondWith'` [json| { "version": 0,
`shouldRespondWith` [json| { "version": 0,
"count": 1,
"data": [
{
......@@ -136,6 +140,14 @@ tests = sequential $ aroundAll withTestDBAndPort $ do
describe "POST /api/v1.0/lists/:id/csv/add/form/async (CSV)" $ do
it "parses CSV via ngramsListFromCSVData" $ \((_testEnv, _port), _app) -> do
simpleNgrams <- liftIO (TIO.readFile =<< getDataFileName "test-data/ngrams/simple.csv")
ngramsListFromCSVData simpleNgrams `shouldBe`
Right (Map.fromList [ (NgramsTerms, Versioned 0 $ Map.fromList [
(NgramsTerm "abelian group", NgramsRepoElement 1 MapTerm Nothing Nothing (MSet mempty))
, (NgramsTerm "brazorf", NgramsRepoElement 1 StopTerm Nothing Nothing (MSet mempty))
])])
it "allows uploading a CSV ngrams file" $ \((testEnv, port), app) -> do
cId <- newCorpusForUser testEnv "alice"
withApplication app $ do
......@@ -154,15 +166,26 @@ tests = sequential $ aroundAll withTestDBAndPort $ do
liftIO (_jph_status j' `shouldBe` "IsFinished")
-- Now check that we can retrieve the ngrams
let getUrl = "/node/" +| listId |+ "/ngrams?ngramsType=Terms&listType=MapTerm&list="+| listId |+"&limit=50"
getJSON token (mkUrl port getUrl)
`shouldRespondWith'` [json| {"version":0
,"count":1
,"data":[
{"ngrams":"abelian group"
,"size":1
,"list":"MapTerm"
,"occurrences":[],"children":[]}
]
} |]
let getTermsUrl = "/node/" +| listId |+ "/ngrams?ngramsType=Terms&listType=MapTerm&list="+| listId |+"&limit=50"
getJSON token (mkUrl port getTermsUrl)
`shouldRespondWith` [json| {"version":0
,"count":1
,"data":[
{"ngrams":"abelian group"
,"size":1
,"list":"MapTerm"
,"occurrences":[],"children":[]}
]
} |]
let getStopUrl = "/node/" +| listId |+ "/ngrams?ngramsType=Terms&listType=StopTerm&list="+| listId |+"&limit=50"
getJSON token (mkUrl port getStopUrl)
`shouldRespondWith` [json| {"version":0
,"count":1
,"data":[
{"ngrams":"brazorf"
,"size":1
,"list":"StopTerm"
,"occurrences":[],"children":[]}
]
} |]
......@@ -41,11 +41,14 @@ jsonFragment = QuasiQuoter {
newtype JsonFragmentResponseMatcher = JsonFragmentResponseMatcher { getJsonMatcher :: ResponseMatcher }
shouldRespondWith' :: HasCallStack
=> WaiSession st SResponse
-> JsonFragmentResponseMatcher
-> WaiExpectation st
shouldRespondWith' action matcher = do
-- | Succeeds if the full body matches the input /fragment/. Careful in using this
-- combinator, as it won't check that the full body matches the input, but rather
-- that the body contains the input fragment, which might lead to confusion.
shouldRespondWithFragment :: HasCallStack
=> WaiSession st SResponse
-> JsonFragmentResponseMatcher
-> WaiExpectation st
shouldRespondWithFragment action matcher = do
r <- action
forM_ (match r (getJsonMatcher matcher)) (liftIO . expectationFailure)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment