Commit 8b76856a authored by Alfredo Di Napoli's avatar Alfredo Di Napoli

Reproduce TSV parsing issue for #380

parent 5225daf6
......@@ -56,6 +56,7 @@ data-files:
test-data/test_config.toml
test-data/issue-381/Termes_A_Ajouter_T4SC_Intellixir.tsv
test-data/issue-381/Termes_A_Ajouter_T4SC_Intellixir12.csv
test-data/issue-380/corpus.tsv
.clippy.dhall
-- common options
......
......@@ -68,3 +68,9 @@ cradle:
- path: "./test"
component: "gargantext:test:garg-test-hspec"
- path: "./bench/Main.hs"
component: "gargantext:bench:garg-bench"
- path: "./bench/Paths_gargantext.hs"
component: "gargantext:bench:garg-bench"
Title Authors Source Abstract Publication Year Publication Month Publication Day
#Ecuador | 🚨Se insta antisananews mastodon.social #Ecuador | 🚨Se instaló la audiencia de juicio contra el exalcalde de #Quito, Jorge Yunda, y 13 personas más, procesadas por #peculado en la compra de 100.000 pruebas para detectar #COVID19 y que presuntamente abrían causado un perjuicio al Estado por 2’235.491,16 dólares.#Comparta #Suscríbase↩👍Sígame enRadio Antisana Media Online: https://antisananews.blogspot.com/TikTok: https://www.tiktok.com/@antisanamediaonline?lang=esTelegram: https://t.me/AntisanaMediaOnlineVK: https://vk.com/antisanamultimediosX: https://twitter.com/AntisanaNews 2024 02 06
#NorthCarolina bann MatthewChat@mstdn.social toot.io #NorthCarolina banned immunocompromized people from wearing masks in public. This does NOT apply to the #KKK, as their is a specific exemption for them. #covid #masking 2024 05 16
'The coronation of t BenHigbie@mastodon.social fosstodon.org 'The coronation of the Serbian Tsar Stefan Dušan as East Roman Emperor' from 'The Slav Epic' by Alphonse Mucha (1926) #art #arts #artist #artists #artlover #artlovers #arthistory #illustration #painting #paintings #inspiration #artmuseum #museum #artmuseums #museums #artnet 2024 05 05
3/5 Krankenstand 202 ToveHarris mastodon.social 3/5 Krankenstand 2022 und 2023 komme Verlust von 350.000 Beschäftigten gleich. Arbeitsausfall werde derzeit durch Überstunden + erhöhte Produktivität aufgefangen. Ohne diese Leistungen der Beschäftigten gäbe es eine Lücke von 700.000 Beschäftigten.Und #CovidIsNotOver #LongCOVID #COVID #COVID19 #Coronahttps://www.vfa.de/de/wirtschaft-politik/macroscope/macroscope-hoher-krankenstand-drueckt-deutschland-in-die-rezession 2024 01 27
@ABScientist @Hidde justafrog@mstdn.social mastodon.social @ABScientist @Hidde @roelgrif How about this one?https://nos.nl/artikel/2457983-viroloog-koopmans-coronagolf-in-china-nu-niet-heel-zorgelijk-voor-nederland 2023 12 28
@cassandra17lina In gemswinc counter.social @cassandra17lina In a way, Covid was a gift to introverts 2024 01 24
@erictopol This is t SpookieRobieTheCat@mastodon.social toot.io @erictopol This is the #CovidBrain that Trump and MAGA suffer from. All those #Antivaxx proponents will suffer the consequences too. And I'm ok with it. Be anti-science, be wilfully ignorant and live a life in agony, that's their choice. I shouldn't have to pay a dime for their stupidity. 2024 01 09
A bunch of maskless crowgirl@hachyderm.io toot.io "A bunch of maskless #Covid "experts" like Gregg Gonsalves are now trying to sound credible by warning about H5N1.I think H5N1 is a serious problem that requires airborne mitigations and decontaminating the food supply.And I also don't dine in restaurants. Gregg on the other hand is proud of his masklessness.Be warned. Don't let these professional Covid minimizers get away with this crap with #H5N1." 2024 06 03
......@@ -10,24 +10,59 @@ module Test.API.Private.List (
import Data.Aeson.QQ
import Data.Text.IO qualified as TIO
import Fmt
import Gargantext.API.Ngrams.List.Types
import Gargantext.API.Ngrams.Types qualified as APINgrams
import Gargantext.API.Node.Corpus.New.Types qualified as FType
import Gargantext.Core.Config
import Gargantext.Core.Types
import Gargantext.Core.Types.Individu
import Gargantext.Core.Worker.Types
import Gargantext.Prelude
import Paths_gargantext
import Prelude
import Servant.Client.Streaming
import Test.API.Prelude (newCorpusForUser, checkEither)
import Test.API.Routes
import Test.API.Setup
import Test.API.UpdateList qualified as UpdateList
import Test.Database.Types
import Test.Hspec (Spec, it, aroundAll, describe, sequential)
import Test.Hspec.Expectations
import Test.Hspec.Wai.Internal (withApplication)
import Test.Utils
import Fmt
importTermsTSV :: SpecContext () -> String -> IO (JobInfo, CorpusId, ListId)
importTermsTSV (SpecContext testEnv port app _) name = do
cId <- liftIO $ newCorpusForUser testEnv "alice"
let log_cfg = test_config testEnv ^. gc_logging
withApplication app $ do
withValidLogin port "alice" (GargPassword "alice") $ \clientEnv token -> do
([listId] :: [NodeId]) <- protectedJSON token "POST" (mkUrl port ("/node/" <> build cId)) [aesonQQ|{"pn_typename":"NodeList","pn_name":"Testing"}|]
-- Upload the CSV doc
simpleNgrams <- liftIO (TIO.readFile =<< getDataFileName name)
let params = WithTextFile { _wtf_filetype = FType.TSV
, _wtf_data = simpleNgrams
, _wtf_name = "simple.tsv" }
pendingJob <- checkEither $ liftIO $ runClientM (add_tsv_to_list token listId params) clientEnv
jobInfo <- pollUntilWorkFinished log_cfg port pendingJob
pure (jobInfo, cId, listId)
importCorpusTSV :: SpecContext () -> String -> IO (JobInfo, CorpusId, ListId)
importCorpusTSV (SpecContext testEnv port app _) name = do
cId <- liftIO $ newCorpusForUser testEnv "alice"
let log_cfg = test_config testEnv ^. gc_logging
withApplication app $ do
withValidLogin port "alice" (GargPassword "alice") $ \clientEnv token -> do
([listId] :: [NodeId]) <- protectedJSON token "POST" (mkUrl port ("/node/" <> build cId)) [aesonQQ|{"pn_typename":"NodeList","pn_name":"Testing"}|]
-- Upload the CSV doc
simpleNgrams <- liftIO (TIO.readFile =<< getDataFileName name)
let params = UpdateList.mkNewWithForm simpleNgrams "simple.tsv"
pendingJob <- checkEither $ liftIO $ runClientM (importCorpus token listId params) clientEnv
jobInfo <- pollUntilWorkFinished log_cfg port pendingJob
pure (jobInfo, cId, listId)
tests :: Spec
tests = sequential $ aroundAll withTestDBAndPort $ do
......@@ -39,20 +74,20 @@ tests = sequential $ aroundAll withTestDBAndPort $ do
describe "Importing terms as TSV" $ do
it "should work for TSV with a missing 'forms' column" $ \(SpecContext testEnv port app _) -> do
cId <- newCorpusForUser testEnv "alice"
let log_cfg = test_config testEnv ^. gc_logging
it "should work for TSV with a missing 'forms' column" $ \ctx@(SpecContext _ port app _) -> do
(_, cId, listId) <- importTermsTSV ctx "test-data/issue-381/Termes_A_Ajouter_T4SC_Intellixir.tsv"
withApplication app $ do
withValidLogin port "alice" (GargPassword "alice") $ \clientEnv token -> do
([listId] :: [NodeId]) <- protectedJSON token "POST" (mkUrl port ("/node/" <> build cId)) [aesonQQ|{"pn_typename":"NodeList","pn_name":"Testing"}|]
-- Upload the CSV doc
simpleNgrams <- liftIO (TIO.readFile =<< getDataFileName "test-data/issue-381/Termes_A_Ajouter_T4SC_Intellixir.tsv")
let params = WithTextFile { _wtf_filetype = FType.TSV
, _wtf_data = simpleNgrams
, _wtf_name = "simple.tsv" }
pendingJob <- checkEither $ liftIO $ runClientM (add_tsv_to_list token listId params) clientEnv
_ <- pollUntilWorkFinished log_cfg port pendingJob
-- Now check that we can retrieve the ngrams, and the ngrams list is not empty!
liftIO $ do
eRes <- checkEither $ runClientM (get_table_ngrams token cId APINgrams.Terms listId 50 Nothing (Just MapTerm) Nothing Nothing Nothing Nothing) clientEnv
let (APINgrams.NgramsTable terms) = APINgrams._vc_data eRes
length terms `shouldSatisfy` (>= 1)
it "should handle dirty TSV as per issue #380" $ \ctx@(SpecContext _testEnv port app _) -> do
(_, cId, listId) <- importCorpusTSV ctx "test-data/issue-380/corpus.tsv"
withApplication app $ do
withValidLogin port "alice" (GargPassword "alice") $ \clientEnv token -> do
-- Now check that we can retrieve the ngrams, and the ngrams list is not empty!
liftIO $ do
eRes <- checkEither $ runClientM (get_table_ngrams token cId APINgrams.Terms listId 50 Nothing (Just MapTerm) Nothing Nothing Nothing Nothing) clientEnv
......
......@@ -29,6 +29,7 @@ module Test.API.Routes (
, add_form_to_list
, add_tsv_to_list
, addTeamMember
, importCorpus
) where
import Data.Text.Encoding qualified as TE
......@@ -63,6 +64,8 @@ import Network.Wai.Handler.Warp (Port)
import Servant.Auth.Client qualified as S
import Servant.Client.Streaming
import Servant.Conduit ()
import Gargantext.API.Routes.Named.Corpus (addWithFormEp)
import Gargantext.API.Node.Types (NewWithForm)
-- This is for requests made by http.client directly to hand-crafted URLs.
......@@ -358,3 +361,20 @@ addTeamMember (toServantToken -> token) nodeId params = fmap UnsafeMkNodeId $
& shareAPI
& shareNodeEp
& ($ params)
importCorpus :: Token -> CorpusId -> NewWithForm -> ClientM JobInfo
importCorpus (toServantToken -> token) corpusId params =
clientRoutes & apiWithCustomErrorScheme
& ($ GES_new)
& backendAPI
& backendAPI'
& mkBackEndAPI
& gargAPIVersion
& gargPrivateAPI
& mkPrivateAPI
& ($ token)
& addWithFormAPI
& addWithFormEp
& ($ corpusId)
& workerAPIPost
& (\submitForm -> submitForm params)
......@@ -23,6 +23,7 @@ module Test.API.UpdateList (
, JobPollHandle(..)
, updateNode
, createDocsList
, mkNewWithForm
) where
import Control.Lens (mapped, over)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment