Commit 43901901 authored by Alfredo Di Napoli's avatar Alfredo Di Napoli

Fix bug in GargPorter implementation

parent d44831d0
Pipeline #6830 failed with stages
in 45 minutes and 57 seconds
......@@ -122,9 +122,14 @@ afterStep1b word = fromMaybe word result
step1b :: [Char] -> [Char]
step1b = either identity afterStep1b . beforeStep1b
-- Issue #415: According to the Porter stemming rules, we need to replace `y` with `i` only if there
-- are no other vocals at the end.
step1c :: [Char] -> [Char]
step1c word = fromMaybe word result
where result = replaceEnd containsVowel word "y" "i"
step1c word
| length word > 2 && List.last word == 'y' && isConsonant word (List.length word - 2)
= fromMaybe word $ replaceEnd containsVowel word "y" "i"
| otherwise
= word
step1 :: [Char] -> [Char]
step1 = step1c . step1b . step1a
......
......@@ -52,6 +52,7 @@ tests = sequential $ aroundAll withTestDBAndPort $ do
let corpusId = _sctx_data
withApplication _sctx_app $ do
withValidLogin _sctx_port "alice" (GargPassword "alice") $ \clientEnv token -> do
liftIO $ do
(HashedResponse _ tr1)
<- checkEither $ runClientM (get_table token
......@@ -63,8 +64,8 @@ tests = sequential $ aroundAll withTestDBAndPort $ do
(Just $ RawQuery "soy")
Nothing
) clientEnv
length (tr_docs tr1) `shouldBe` 4
length (tr_docs tr1) `shouldBe` 3
createSoySauceCorpus :: SpecContext () -> IO (SpecContext CorpusId)
createSoySauceCorpus ctx@SpecContext{..} = do
......
......@@ -135,6 +135,8 @@ stemmingTest :: TestEnv -> Assertion
stemmingTest _env = do
stem EN GargPorterAlgorithm "Ajeje" `shouldBe` "Ajeje"
stem EN GargPorterAlgorithm "PyPlasm:" `shouldBe` "PyPlasm:"
stem EN GargPorterAlgorithm "soy" `shouldBe` "soy"
stem EN GargPorterAlgorithm "cry" `shouldBe` "cri"
-- This test outlines the main differences between Porter and Lancaster.
stem EN GargPorterAlgorithm "dancer" `shouldBe` "dancer"
stem EN LancasterAlgorithm "dancer" `shouldBe` "dant"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment