Commit fed5f0b8 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FIX][Ngrams] order of tokens.

parent 71dce4c8
......@@ -204,8 +204,10 @@ nodeChildren (Leaf _) = Map.empty
-}
data Ward = ForWard | BackWard
class IsTrie trie where
buildTrie :: Entropy e => [[Token]] -> trie Token e
buildTrie :: Entropy e => (Int -> [[Text]] -> [[Token]]) -> Int -> [[Text]] -> trie Token e
nodeEntropy :: Entropy e => Getting e i e -> trie k i -> e
nodeChild :: Ord k => k -> trie k e -> trie k e
findTrie :: Ord k => [k] -> trie k e -> trie k e
......@@ -218,7 +220,7 @@ class IsTrie trie where
--nodeAutonomy inE t ks = nodeEntropy inE $ findTrie ks t
instance IsTrie Trie where
buildTrie = entropyTrie isTerminal . insertTries
buildTrie to n ts = entropyTrie isTerminal $ insertTries $ to n ts
nodeEntropy inE (Node _ e _) = e ^. inE
nodeEntropy _ (Leaf _) = -- trace "nodeEntropy of Leaf" $
......@@ -275,9 +277,14 @@ data Tries k e = Tries
, _bwd :: Trie k e
}
toToken' :: Int -> [[Text]] -> [[Token]]
toToken' n input = L.concat $ (filter (/= [Terminal Stop]) . chunkAlongEleve (n + 2)) <$> toToken <$> input
instance IsTrie Tries where
buildTrie tts = Tries { _fwd = buildTrie tts
, _bwd = buildTrie (reverse <$> tts)
buildTrie to n tts = Tries { _fwd = buildTrie to n tts
, _bwd = buildTrie to n (map reverse $ tts)
}
nodeEntropy inE (Tries fwd bwd) =
......@@ -390,7 +397,7 @@ testEleve debug n output checks = do
expected = fmap (T.splitOn "-") <$> out
input = (T.splitOn "-" =<<) <$> out
inp = toToken <$> input
t = buildTrie $ L.concat $ (filter (/= [Terminal Stop]) . chunkAlongEleve (n + 2)) <$> inp
t = buildTrie toToken' n input
-- nt = normalizeEntropy identity set_autonomy (fwd :: Trie Token Double)
-- nt = normalizeEntropy' info_entropy (\f -> info_norm_entropy' %~ f) nt
nt = normalizeEntropy identity set_autonomy t
......@@ -440,6 +447,7 @@ checks0 =
,("and", 1, 0.0, -2.113283334294875, -0.5000000000000002, 0.0, 0.0)
,("<stop>", 0, nan, nan, nan, 0.0, nan)
{-
,("<start> New", 1, nan, nan, nan, nan, 0.0)
,("New York", 3, 1.584962500721156, 1.584962500721156, 1.4142135623730951, nan, 1.584962500721156)
,("York is", 1, 0.0, nan, nan, nan, 0.0)
......@@ -456,6 +464,7 @@ checks0 =
,("York and New", 1, 0.0, nan, nan, nan, 0.0)
,("and New York", 1, 0.0, nan, nan, nan, 0.0)
,("New York <stop>", 1, nan, nan, nan, nan, nan)
-}
]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment