[NGRAMS] fix entropy.

7835e973 · Alexandre Delanoë · fed5f0b8 · 7835e973
Commit 7835e973 authored Jun 07, 2019 by Alexandre Delanoë
Hide whitespace changes
Inline Side-by-side

Showing with 12 additions and 9 deletions

Eleve.hs src/Gargantext/Text/Eleve.hs +12 -9

No files found.
--- a/src/Gargantext/Text/Eleve.hs
+++ b/src/Gargantext/Text/Eleve.hs
@@ -99,6 +99,7 @@ parseToken "<start>" = Terminal Start
 parseToken "<stop>"  = Terminal Stop
 parseToken t         = NonTerminal t
 toToken :: [Text] -> [Token]
 toToken xs = Terminal Start : (NonTerminal <$> xs) <> [Terminal Stop]
@@ -204,7 +205,6 @@ nodeChildren (Leaf _)      = Map.empty
 -}
-data Ward = ForWard | BackWard
 class IsTrie trie where
  buildTrie   :: Entropy e => (Int -> [[Text]] -> [[Token]]) -> Int -> [[Text]] -> trie Token e
@@ -277,11 +277,6 @@ data Tries k e = Tries
  , _bwd :: Trie k e
  }
-toToken' :: Int -> [[Text]] -> [[Token]]
-toToken' n input = L.concat $ (filter (/= [Terminal Stop]) . chunkAlongEleve (n + 2)) <$> toToken <$> input
 instance IsTrie Tries where
  buildTrie to n tts = Tries { _fwd = buildTrie to n tts
                             , _bwd = buildTrie to n (map reverse $ tts)
@@ -358,6 +353,9 @@ sim x y = x == y || (P.isNaN x && P.isNaN y)
 chunkAlongEleve :: Int -> [a] -> [[a]]
 chunkAlongEleve n xs = L.take n <$> L.tails xs
+toToken' :: Int -> [[Text]] -> [[Token]]
+toToken' n input = L.concat $ (filter (/= [Terminal Stop]) . chunkAlongEleve (n + 2)) <$> toToken <$> input
 testEleve :: e ~ Double => Bool -> Int -> [Text] -> [(Text, Int, e, e, e, e, e)] -> IO Bool
 testEleve debug n output checks = do
  let
@@ -408,11 +406,16 @@ testEleve debug n output checks = do
        else P.putStrLn $ "    FAIL " <> msg <> " ref=" <> show ref <> " my=" <> show my
    checker (ngram, count, entropy, _ev, autonomy, bwd_entropy, fwd_entropy) = do
-      let ns = parseToken <$> T.words ngram
+      let ns  = parseToken <$> T.words ngram
          t' = findTrie ns nt
+          nsb = parseToken <$> (reverse $ T.words ngram)
+          tb' = findTrie nsb nt
      P.putStrLn $ "  " <> T.unpack ngram <> ":"
      check (==) "count"       count       (_node_count (_fwd t'))
-      check sim  "entropy"     entropy     (nodeEntropy info_entropy t')
+      check sim  "entropy"     entropy     (mean [(nodeEntropy info_entropy (_fwd t')), (nodeEntropy info_entropy (_bwd tb'))])
+      -- (nodeEntropy info_entropy t')
      check sim  "autonomy"    autonomy    (nodeEntropy info_autonomy t')
      check sim  "fwd_entropy" fwd_entropy (nodeEntropy info_entropy (_fwd t'))
      check sim  "bwd_entropy" bwd_entropy (nodeEntropy info_entropy (_bwd t'))
@@ -464,7 +467,7 @@ checks0 =
  ,("York and New", 1, 0.0, nan, nan, nan, 0.0)
  ,("and New York", 1, 0.0, nan, nan, nan, 0.0)
  ,("New York <stop>", 1, nan, nan, nan, nan, nan)
-}
+--}
  ]