Commit dc703771 authored by Alfredo Di Napoli's avatar Alfredo Di Napoli

Upload CSV test

it also brings the CSV and JSON APIs closer in look.
Temporarily disable the `importCsvFile` function.
parent eedac139
...@@ -29,6 +29,7 @@ data-files: ...@@ -29,6 +29,7 @@ data-files:
ekg-assets/cross.png ekg-assets/cross.png
test-data/ngrams/GarganText_NgramsTerms-QuantumComputing.json test-data/ngrams/GarganText_NgramsTerms-QuantumComputing.json
test-data/ngrams/simple.json test-data/ngrams/simple.json
test-data/ngrams/simple.csv
test-data/phylo/bpa_phylo_test.json test-data/phylo/bpa_phylo_test.json
test-data/phylo/open_science.json test-data/phylo/open_science.json
test-data/test_config.ini test-data/test_config.ini
......
...@@ -65,6 +65,10 @@ type GETAPI = Summary "Get List" ...@@ -65,6 +65,10 @@ type GETAPI = Summary "Get List"
getApi :: GargServer GETAPI getApi :: GargServer GETAPI
getApi = getJson :<|> getCsv getApi = getJson :<|> getCsv
--
-- JSON API
--
---------------------- ----------------------
type JSONAPI = Summary "Update List" type JSONAPI = Summary "Update List"
:> "lists" :> "lists"
...@@ -77,22 +81,10 @@ type JSONAPI = Summary "Update List" ...@@ -77,22 +81,10 @@ type JSONAPI = Summary "Update List"
jsonApi :: ServerT JSONAPI (GargM Env BackendInternalError) jsonApi :: ServerT JSONAPI (GargM Env BackendInternalError)
jsonApi = jsonPostAsync jsonApi = jsonPostAsync
----------------------
type CSVAPI = Summary "Update List (legacy v3 CSV)"
:> "lists"
:> Capture "listId" ListId
:> "csv"
:> "add"
:> "form"
:> "async"
:> AsyncJobs JobLog '[FormUrlEncoded] WithTextFile JobLog
csvApi :: ServerT CSVAPI (GargM Env BackendInternalError)
csvApi = csvPostAsync
------------------------------------------------------------------------ ------------------------------------------------------------------------
getJson :: HasNodeStory env err m => getJson :: HasNodeStory env err m
ListId -> m (Headers '[Header "Content-Disposition" Text] NgramsList) => ListId
-> m (Headers '[Header "Content-Disposition" Text] NgramsList)
getJson lId = do getJson lId = do
lst <- getNgramsList lId lst <- getNgramsList lId
pure $ addHeader (concat [ "attachment; filename=GarganText_NgramsList-" pure $ addHeader (concat [ "attachment; filename=GarganText_NgramsList-"
...@@ -101,8 +93,9 @@ getJson lId = do ...@@ -101,8 +93,9 @@ getJson lId = do
] ]
) lst ) lst
getCsv :: HasNodeStory env err m => getCsv :: HasNodeStory env err m
ListId -> m (Headers '[Header "Content-Disposition" Text] NgramsTableMap) => ListId
-> m (Headers '[Header "Content-Disposition" Text] NgramsTableMap)
getCsv lId = do getCsv lId = do
lst <- getNgramsList lId lst <- getNgramsList lId
pure $ case Map.lookup TableNgrams.NgramsTerms lst of pure $ case Map.lookup TableNgrams.NgramsTerms lst of
...@@ -114,48 +107,23 @@ getCsv lId = do ...@@ -114,48 +107,23 @@ getCsv lId = do
] ]
) _v_data ) _v_data
------------------------------------------------------------------------
-- TODO : purge list
-- TODO talk
setList :: HasNodeStory env err m
=> ListId
-> NgramsList
-> m Bool
setList l m = do
-- TODO check with Version for optim
-- printDebug "New list as file" l
_ <- mapM (\(nt, Versioned _v ns) -> (setListNgrams l nt ns)) $ toList m
-- v <- getNodeStoryVar [l]
-- liftBase $ do
-- ns <- atomically $ readTVar v
-- printDebug "[setList] node story: " ns
-- TODO reindex
pure True
------------------------------------------------------------------------
toIndexedNgrams :: HashMap Text NgramsId -> Text -> Maybe (Indexed Int Ngrams)
toIndexedNgrams m t = Indexed <$> i <*> n
where
i = HashMap.lookup t m
n = Just (text2ngrams t)
------------------------------------------------------------------------ ------------------------------------------------------------------------
jsonPostAsync :: ServerT JSONAPI (GargM Env BackendInternalError) jsonPostAsync :: ServerT JSONAPI (GargM Env BackendInternalError)
jsonPostAsync lId = jsonPostAsync lId =
serveJobsAPI UpdateNgramsListJobJSON $ \jHandle f -> serveJobsAPI UpdateNgramsListJobJSON $ \jHandle f ->
postAsync' lId f jHandle postAsyncJSON lId f jHandle
postAsync' :: (FlowCmdM env err m, MonadJobStatus m) ------------------------------------------------------------------------
postAsyncJSON :: (FlowCmdM env err m, MonadJobStatus m)
=> ListId => ListId
-> WithJsonFile -> WithJsonFile
-> JobHandle m -> JobHandle m
-> m () -> m ()
postAsync' l (WithJsonFile m _) jobHandle = do postAsyncJSON l (WithJsonFile m _) jobHandle = do
markStarted 2 jobHandle markStarted 2 jobHandle
-- printDebug "New list as file" l -- printDebug "New list as file" l
_ <- setList l m setList
-- printDebug "Done" r -- printDebug "Done" r
markProgress 1 jobHandle markProgress 1 jobHandle
...@@ -166,8 +134,58 @@ postAsync' l (WithJsonFile m _) jobHandle = do ...@@ -166,8 +134,58 @@ postAsync' l (WithJsonFile m _) jobHandle = do
markComplete jobHandle markComplete jobHandle
where
setList :: HasNodeStory env err m => m ()
setList = do
-- TODO check with Version for optim
mapM_ (\(nt, Versioned _v ns) -> (setListNgrams l nt ns)) $ toList m
-- TODO reindex
--
-- CSV API
--
----------------------
type CSVAPI = Summary "Update List (legacy v3 CSV)"
:> "lists"
:> Capture "listId" ListId
:> "csv"
:> "add"
:> "form"
:> "async"
:> AsyncJobs JobLog '[FormUrlEncoded] WithTextFile JobLog
csvApi :: ServerT CSVAPI (GargM Env BackendInternalError)
csvApi = csvPostAsync
------------------------------------------------------------------------ ------------------------------------------------------------------------
csvPostAsync :: ServerT CSVAPI (GargM Env BackendInternalError)
csvPostAsync lId =
serveJobsAPI UpdateNgramsListJobCSV $ \jHandle f -> do
postAsyncCSV lId f jHandle
postAsyncCSV :: (FlowCmdM env err m, MonadJobStatus m)
=> ListId
-> WithTextFile
-> JobHandle m
-> m ()
postAsyncCSV l (WithTextFile _filetype csvData _name) jHandle = do
markStarted 2 jHandle
let eLst = readCsvText csvData
case eLst of
Left err -> markFailed (Just err) jHandle
Right lst -> do
let p = parseCsvData lst
_ <- setListNgrams l NgramsTerms p
markProgress 1 jHandle
corpus_node <- getNode l -- (Proxy :: Proxy HyperdataList)
let corpus_id = fromMaybe (panic "") (_node_parent_id corpus_node)
_ <- reIndexWith corpus_id l NgramsTerms (Set.fromList [MapTerm, CandidateTerm])
markComplete jHandle
------------------------------------------------------------------------
readCsvText :: Text -> Either Text [(Text, Text, Text)] readCsvText :: Text -> Either Text [(Text, Text, Text)]
readCsvText t = case eDec of readCsvText t = case eDec of
Left err -> Left $ pack err Left err -> Left $ pack err
...@@ -198,47 +216,24 @@ parseCsvData lst = Map.fromList $ conv <$> lst ...@@ -198,47 +216,24 @@ parseCsvData lst = Map.fromList $ conv <$> lst
} }
) )
csvPost :: HasNodeStory env err m ------------------------------------------------------------------------
=> ListId
-> Text
-> m (Either Text ())
csvPost l m = do
-- printDebug "[csvPost] l" l
-- printDebug "[csvPost] m" m
-- status label forms
let eLst = readCsvText m
case eLst of
Left err -> pure $ Left err
Right lst -> do
let p = parseCsvData lst
--printDebug "[csvPost] lst" lst
-- printDebug "[csvPost] p" p
_ <- setListNgrams l NgramsTerms p
-- printDebug "ReIndexing List" l
corpus_node <- getNode l -- (Proxy :: Proxy HyperdataList)
let corpus_id = fromMaybe (panic "") (_node_parent_id corpus_node)
_ <- reIndexWith corpus_id l NgramsTerms (Set.fromList [MapTerm, CandidateTerm])
pure $ Right ()
------------------------------------------------------------------------ -- | This is for debugging the CSV parser in the REPL
csvPostAsync :: ServerT CSVAPI (GargM Env BackendInternalError) --importCsvFile :: (HasNodeStory env err m)
csvPostAsync lId = -- => ListId -> P.FilePath -> m ()
serveJobsAPI UpdateNgramsListJobCSV $ \jHandle f -> do --importCsvFile lId fp = do
markStarted 1 jHandle -- contents <- liftBase $ P.readFile fp
ePost <- csvPost lId (_wtf_data f) -- postAsyncCSV lId (WithTextFile mempty contents mempty) noJobHandle
case ePost of
Left err -> markFailed (Just err) jHandle
Right () -> markComplete jHandle
getLatestJobStatus jHandle >>= printDebug "[csvPostAsync] job ended with joblog: " --
-- Utils
--
------------------------------------------------------------------------ ------------------------------------------------------------------------
toIndexedNgrams :: HashMap Text NgramsId -> Text -> Maybe (Indexed Int Ngrams)
toIndexedNgrams m t = Indexed <$> i <*> n
where
i = HashMap.lookup t m
n = Just (text2ngrams t)
-- | This is for debugging the CSV parser in the REPL
importCsvFile :: (HasNodeStory env err m)
=> ListId -> P.FilePath -> m (Either Text ())
importCsvFile lId fp = do
contents <- liftBase $ P.readFile fp
csvPost lId contents
status label forms
map abelian group
...@@ -43,10 +43,15 @@ import qualified Data.Map.Strict as Map ...@@ -43,10 +43,15 @@ import qualified Data.Map.Strict as Map
import qualified Data.ByteString.Lazy.Char8 as C8L import qualified Data.ByteString.Lazy.Char8 as C8L
-- | Issue a request with a valid 'Authorization: Bearer' inside. -- | Issue a request with a valid 'Authorization: Bearer' inside.
protected :: Token -> Method -> ByteString -> L.ByteString -> WaiSession () SResponse protected :: HasCallStack
=> Token
-> Method
-> ByteString
-> L.ByteString
-> WaiSession () SResponse
protected tkn mth url = protectedWith mempty tkn mth url protected tkn mth url = protectedWith mempty tkn mth url
protectedJSON :: forall a. (JSON.FromJSON a, Typeable a) protectedJSON :: forall a. (JSON.FromJSON a, Typeable a, HasCallStack)
=> Token => Token
-> Method -> Method
-> ByteString -> ByteString
...@@ -54,7 +59,7 @@ protectedJSON :: forall a. (JSON.FromJSON a, Typeable a) ...@@ -54,7 +59,7 @@ protectedJSON :: forall a. (JSON.FromJSON a, Typeable a)
-> WaiSession () a -> WaiSession () a
protectedJSON tkn mth url = protectedJSONWith mempty tkn mth url protectedJSON tkn mth url = protectedJSONWith mempty tkn mth url
protectedJSONWith :: forall a. (JSON.FromJSON a, Typeable a) protectedJSONWith :: forall a. (JSON.FromJSON a, Typeable a, HasCallStack)
=> [Network.HTTP.Types.Header] => [Network.HTTP.Types.Header]
-> Token -> Token
-> Method -> Method
...@@ -67,7 +72,8 @@ protectedJSONWith hdrs tkn mth url jsonV = do ...@@ -67,7 +72,8 @@ protectedJSONWith hdrs tkn mth url jsonV = do
Left err -> Prelude.fail $ "protectedJSON failed when parsing " <> show (typeRep $ Proxy @a) <> ": " <> err Left err -> Prelude.fail $ "protectedJSON failed when parsing " <> show (typeRep $ Proxy @a) <> ": " <> err
Right x -> pure x Right x -> pure x
protectedWith :: [Network.HTTP.Types.Header] protectedWith :: HasCallStack
=> [Network.HTTP.Types.Header]
-> Token -> Token
-> Method -> ByteString -> L.ByteString -> WaiSession () SResponse -> Method -> ByteString -> L.ByteString -> WaiSession () SResponse
protectedWith extraHeaders tkn mth url payload = protectedWith extraHeaders tkn mth url payload =
...@@ -80,7 +86,7 @@ protectedWith extraHeaders tkn mth url payload = ...@@ -80,7 +86,7 @@ protectedWith extraHeaders tkn mth url payload =
hdrs = Map.toList $ Map.fromList $ defaultHeaders <> extraHeaders hdrs = Map.toList $ Map.fromList $ defaultHeaders <> extraHeaders
in request mth url hdrs payload in request mth url hdrs payload
protectedNewError :: Token -> Method -> ByteString -> L.ByteString -> WaiSession () SResponse protectedNewError :: HasCallStack => Token -> Method -> ByteString -> L.ByteString -> WaiSession () SResponse
protectedNewError tkn mth url = protectedWith newErrorFormat tkn mth url protectedNewError tkn mth url = protectedWith newErrorFormat tkn mth url
where where
newErrorFormat = [(CI.mk "X-Garg-Error-Scheme", "new")] newErrorFormat = [(CI.mk "X-Garg-Error-Scheme", "new")]
...@@ -88,7 +94,7 @@ protectedNewError tkn mth url = protectedWith newErrorFormat tkn mth url ...@@ -88,7 +94,7 @@ protectedNewError tkn mth url = protectedWith newErrorFormat tkn mth url
getJSON :: Token -> ByteString -> WaiSession () SResponse getJSON :: Token -> ByteString -> WaiSession () SResponse
getJSON tkn url = protectedWith mempty tkn "GET" url "" getJSON tkn url = protectedWith mempty tkn "GET" url ""
postJSONUrlEncoded :: forall a. (JSON.FromJSON a, Typeable a) postJSONUrlEncoded :: forall a. (JSON.FromJSON a, Typeable a, HasCallStack)
=> Token => Token
-> ByteString -> ByteString
-> L.ByteString -> L.ByteString
......
...@@ -71,7 +71,12 @@ newCorpusForUser env uname = flip runReaderT env $ runTestMonad $ do ...@@ -71,7 +71,12 @@ newCorpusForUser env uname = flip runReaderT env $ runTestMonad $ do
-- | Poll the given URL every second until it finishes. -- | Poll the given URL every second until it finishes.
-- Retries up to 60 times (i.e. for 1 minute, before giving up) -- Retries up to 60 times (i.e. for 1 minute, before giving up)
pollUntilFinished :: Token -> Wai.Port -> (JobPollHandle -> Builder) -> JobPollHandle -> WaiSession () JobPollHandle pollUntilFinished :: HasCallStack
=> Token
-> Wai.Port
-> (JobPollHandle -> Builder)
-> JobPollHandle
-> WaiSession () JobPollHandle
pollUntilFinished tkn port mkUrlPiece = go 60 pollUntilFinished tkn port mkUrlPiece = go 60
where where
go :: Int -> JobPollHandle -> WaiSession () JobPollHandle go :: Int -> JobPollHandle -> WaiSession () JobPollHandle
...@@ -100,15 +105,15 @@ tests = sequential $ aroundAll withTestDBAndPort $ do ...@@ -100,15 +105,15 @@ tests = sequential $ aroundAll withTestDBAndPort $ do
cId <- newCorpusForUser testEnv "alice" cId <- newCorpusForUser testEnv "alice"
withApplication app $ do withApplication app $ do
withValidLogin port "alice" (GargPassword "alice") $ \token -> do withValidLogin port "alice" (GargPassword "alice") $ \token -> do
([listId] :: [NodeId]) <- protectedJSON token "POST" (mkUrl port ("/node/" <> (fromString $ show $ _NodeId cId))) [aesonQQ|{"pn_typename":"NodeList","pn_name":"Testing"}|] ([listId] :: [NodeId]) <- protectedJSON token "POST" (mkUrl port ("/node/" <> build cId)) [aesonQQ|{"pn_typename":"NodeList","pn_name":"Testing"}|]
-- Upload the JSON doc -- Upload the JSON doc
simpleNgrams <- liftIO (TIO.readFile =<< getDataFileName "test-data/ngrams/simple.json") simpleNgrams <- liftIO (TIO.readFile =<< getDataFileName "test-data/ngrams/simple.json")
let jsonFileFormData = [ (T.pack "_wjf_data", simpleNgrams) let jsonFileFormData = [ (T.pack "_wjf_data", simpleNgrams)
, ("_wjf_filetype", "JSON") , ("_wjf_filetype", "JSON")
, ("_wjf_name", "simple_ngrams.json") , ("_wjf_name", "simple_ngrams.json")
] ]
let url = "/lists/" <> (fromString $ show $ _NodeId listId) <> "/add/form/async" let url = "/lists/" +|listId|+ "/add/form/async"
let mkPollUrl j = "/corpus/" <> (fromString $ show $ _NodeId listId) <> "/add/form/async/" +|_jph_id j|+ "/poll?limit=1" let mkPollUrl j = "/corpus/" +|listId|+ "/add/form/async/" +|_jph_id j|+ "/poll?limit=1"
(j :: JobPollHandle) <- postJSONUrlEncoded token (mkUrl port url) (urlEncodeFormStable $ toForm jsonFileFormData) (j :: JobPollHandle) <- postJSONUrlEncoded token (mkUrl port url) (urlEncodeFormStable $ toForm jsonFileFormData)
j' <- pollUntilFinished token port mkPollUrl j j' <- pollUntilFinished token port mkPollUrl j
liftIO (_jph_status j' `shouldBe` "IsFinished") liftIO (_jph_status j' `shouldBe` "IsFinished")
...@@ -129,3 +134,35 @@ tests = sequential $ aroundAll withTestDBAndPort $ do ...@@ -129,3 +134,35 @@ tests = sequential $ aroundAll withTestDBAndPort $ do
] ]
} |] } |]
describe "POST /api/v1.0/lists/:id/csv/add/form/async (CSV)" $ do
it "allows uploading a CSV ngrams file" $ \((testEnv, port), app) -> do
cId <- newCorpusForUser testEnv "alice"
withApplication app $ do
withValidLogin port "alice" (GargPassword "alice") $ \token -> do
([listId] :: [NodeId]) <- protectedJSON token "POST" (mkUrl port ("/node/" <> build cId)) [aesonQQ|{"pn_typename":"NodeList","pn_name":"Testing"}|]
-- Upload the CSV doc
simpleNgrams <- liftIO (TIO.readFile =<< getDataFileName "test-data/ngrams/simple.csv")
let tsvFileFormData = [ (T.pack "_wtf_data", simpleNgrams)
, ("_wtf_filetype", "CSV")
, ("_wtf_name", "simple.csv")
]
let url = "/lists/" <> (fromString $ show $ _NodeId listId) <> "/csv/add/form/async"
let mkPollUrl j = "/corpus/" <> (fromString $ show $ _NodeId listId) <> "/add/form/async/" +|_jph_id j|+ "/poll?limit=1"
(j :: JobPollHandle) <- postJSONUrlEncoded token (mkUrl port url) (urlEncodeFormStable $ toForm tsvFileFormData)
j' <- pollUntilFinished token port mkPollUrl j
liftIO (_jph_status j' `shouldBe` "IsFinished")
-- Now check that we can retrieve the ngrams
let getUrl = "/node/" +| listId |+ "/ngrams?ngramsType=Terms&listType=MapTerm&list="+| listId |+"&limit=50"
getJSON token (mkUrl port getUrl)
`shouldRespondWith'` [json| {"version":0
,"count":1
,"data":[
{"ngrams":"abelian group"
,"size":1
,"list":"MapTerm"
,"occurrences":[],"children":[]}
]
} |]
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment