ElEve more tests

parent badd865e
...@@ -37,6 +37,7 @@ Notes for current implementation: ...@@ -37,6 +37,7 @@ Notes for current implementation:
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-} {-# LANGUAGE RankNTypes #-}
{-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeFamilies #-}
module Gargantext.Text.Eleve where module Gargantext.Text.Eleve where
...@@ -68,8 +69,8 @@ type Entropy e = ...@@ -68,8 +69,8 @@ type Entropy e =
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | Example and tests for development -- | Example and tests for development
data I e = I data I e = I
{ _info_entropy :: e { _info_entropy :: e
, _info_norm_entropy :: e , _info_autonomy :: e
} }
instance Show e => Show (I e) where instance Show e => Show (I e) where
...@@ -321,24 +322,14 @@ mainEleve n input = map unToken . split identity (t :: Trie Token Double) <$> in ...@@ -321,24 +322,14 @@ mainEleve n input = map unToken . split identity (t :: Trie Token Double) <$> in
where where
inp = toToken (n - 1) <$> input inp = toToken (n - 1) <$> input
t = buildTrie $ L.concat $ chunkAlong n 1 <$> inp t = buildTrie $ L.concat $ chunkAlong n 1 <$> inp
-- NP: here we use the entropy to split
-- instead we should use either:
-- info_norm_entropy or info_norm_entropy'
-- However they should first be fixed.
-} -}
testEleve :: Bool -> Int -> [Text] -> IO Bool sim :: Entropy e => e -> e -> Bool
testEleve debug n output = do sim x y = x == y || (P.isNaN x && P.isNaN y)
testEleve :: e ~ Double => Bool -> Int -> [Text] -> [(Text, Int, e, e, e, e, e)] -> IO Bool
testEleve debug n output checks = do
let let
out = T.words <$> output
expected = fmap (T.splitOn "-") <$> out
input = (T.splitOn "-" =<<) <$> out
inp = toToken (n - 1) <$> input
t = buildTrie $ L.concat $ chunkAlong n 1 <$> inp
-- nt = normalizeEntropy identity setNormEntropy (fwd :: Trie Token Double)
-- nt = normalizeEntropy' info_entropy (\f -> info_norm_entropy' %~ f) nt
nt = normalizeEntropy identity setNormEntropy
(t :: Trie Token Double)
{- {-
pss = [ (ps, findTrie ps fwd ^? _Just . node_entropy) -- . info_entropy) pss = [ (ps, findTrie ps fwd ^? _Just . node_entropy) -- . info_entropy)
| ps <- L.nub $ [ c | ps <- L.nub $ [ c
...@@ -350,20 +341,42 @@ testEleve debug n output = do ...@@ -350,20 +341,42 @@ testEleve debug n output = do
-} -}
--res = map unToken . split identity fwd <$> inp --res = map unToken . split identity fwd <$> inp
--res = map unToken . split info_norm_entropy' nt' <$> inp --res = map unToken . split info_norm_entropy' nt' <$> inp
res = map unToken . split info_norm_entropy nt <$> inp res = map unToken . split info_autonomy nt <$> inp
when debug $ do when debug $ do
P.putStrLn (show input) P.putStrLn (show input)
-- mapM_ (P.putStrLn . show) pss -- mapM_ (P.putStrLn . show) pss
P.putStrLn "" P.putStrLn ""
printTrie nt -- printTrie nt
{-
printTrie (_fwd nt) printTrie (_fwd nt)
printTrie (_bwd nt) printTrie (_bwd nt)
-}
P.putStrLn $ show res P.putStrLn $ show res
forM_ checks checker
pure $ expected == res pure $ expected == res
where where
out = T.words <$> output
expected = fmap (T.splitOn "-") <$> out
input = (T.splitOn "-" =<<) <$> out
inp = toToken (n - 1) <$> input
t = buildTrie $ L.concat $ chunkAlong n 1 <$> inp
-- nt = normalizeEntropy identity setNormEntropy (fwd :: Trie Token Double)
-- nt = normalizeEntropy' info_entropy (\f -> info_norm_entropy' %~ f) nt
nt = normalizeEntropy identity setNormEntropy t
check f msg x y =
if f x y
then P.putStrLn $ " PASS " <> msg <> " " <> show x <> " ~= " <> show y
else P.putStrLn $ " FAIL " <> msg <> " " <> show x <> " /= " <> show y
checker (ngram, count, entropy, _ev, autonomy, bwd_entropy, fwd_entropy) = do
let t' = findTrie (NonTerminal <$> T.words ngram) nt
P.putStrLn $ " " <> T.unpack ngram <> ":"
check (==) "count" count (_node_count (_fwd t'))
check sim "entropy" entropy (nodeEntropy info_entropy t')
check sim "autonomy" autonomy (nodeEntropy info_autonomy t')
check sim "fwd_entropy" fwd_entropy (nodeEntropy info_entropy (_fwd t'))
check sim "bwd_entropy" bwd_entropy (nodeEntropy info_entropy (_bwd t'))
printTrie = printTrie =
P.putStrLn . Tree.drawTree P.putStrLn . Tree.drawTree
. fmap show . fmap show
...@@ -384,17 +397,44 @@ example6 = ["le-petit chat" ...@@ -384,17 +397,44 @@ example6 = ["le-petit chat"
,"le gros rat" ,"le gros rat"
] ]
checks0, checks2 :: [(Text, Int, Double, Double, Double, Double, Double)]
checks0 =
[("New York", 3, 1.584962500721156, 1.584962500721156, 1.414213562373095, nan, 1.584962500721156)
,("York is", 1, 0.0, nan, nan, nan, 0.0)
,("is New", 1, 0.0, nan, nan, nan, 0.0)
,("New York", 3, 1.584962500721156, 1.584962500721156, 1.414213562373095, nan, 1.584962500721156)
,("York and", 1, 0.0, nan, nan, nan, 0.0)
,("and New", 1, 0.0, nan, nan, nan, 0.0)
,("New York", 3, 1.584962500721156, 1.584962500721156, 1.414213562373095, nan, 1.584962500721156)
]
checks2 =
[("to be", 3, 1.2516291673878228, 1.2516291673878228, 1.5535694744293167, nan, 0.9182958340544896)
,("be or", 2, 0.5, nan, nan, nan, 1.0)
,("or not", 1, 0.0, nan, nan, nan, 0.0)
,("not to", 1, 0.0, nan, nan, nan, 0.0)
,("to be", 3, 1.2516291673878228, 1.2516291673878228, 1.5535694744293167, nan, 0.9182958340544896)
,("be or", 2, 0.5, nan, nan, nan, 1.0)
,("or NOT", 1, 0.0, nan, nan, nan, 0.0)
,("NOT to", 1, 0.0, nan, nan, nan, 0.0)
,("to be", 3, 1.2516291673878228, 1.2516291673878228, 1.5535694744293167, nan, 0.9182958340544896)
,("be and", 1, 0.0, nan, nan, nan, 0.0)
]
runTests :: IO () runTests :: IO ()
runTests = runTests =
forM_ forM_
[("example0", 2, example0) [("example0", 2, example0, checks0)
,("example1", 2, example1) ,("example1", 2, example1, [])
,("example2", 3, example2) ,("example2", 3, example2, checks2)
,("example3", 2, example3) ,("example3", 2, example3, [])
,("example4", 4, example4) ,("example4", 4, example4, [])
,("example5", 5, example5) ,("example5", 5, example5, [])
] ]
(\(name, n, ex) -> do (\(name, n, ex, checks) -> do
b <- testEleve False n ex P.putStrLn $ name <> " " <> show n
P.putStrLn $ name <> " " <> show n <> " " <> if b then "PASS" else "FAIL" b <- testEleve False n ex checks
P.putStrLn $ " splitting: " <> if b then "PASS" else "FAIL"
) )
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment