ElEve: reverse, buildTrie, printTrie...

parent f2de3b66
...@@ -106,24 +106,6 @@ parseToken "<start>" = Terminal Start ...@@ -106,24 +106,6 @@ parseToken "<start>" = Terminal Start
parseToken "<stop>" = Terminal Stop parseToken "<stop>" = Terminal Stop
parseToken t = NonTerminal t parseToken t = NonTerminal t
-- |
-- >>> reverseTokens [Terminal Start, NonTerminal "new", NonTerminal "york", Terminal Stop]
-- [Terminal Start,NonTerminal "york",NonTerminal "new",Terminal Stop]
reverseTokens :: [Token] -> [Token]
reverseTokens xs = case lastMay xs of
Nothing -> []
Just (Terminal Stop) -> reverseTokens' xs <> [Terminal Stop]
_ -> reverseTokens' xs
reverseTokens' :: [Token] -> [Token]
reverseTokens' [] = []
reverseTokens' [Terminal Stop] = []
reverseTokens' [x] = [x]
reverseTokens' (x:xs) = case x of
Terminal Start -> [Terminal Start] <> reverseTokens' xs
_ -> reverseTokens' xs <> [x]
toToken :: [Text] -> [Token] toToken :: [Text] -> [Token]
toToken xs = Terminal Start : (NonTerminal <$> xs) <> [Terminal Stop] toToken xs = Terminal Start : (NonTerminal <$> xs) <> [Terminal Stop]
...@@ -213,10 +195,11 @@ nodeChildren (Leaf _) = Map.empty ...@@ -213,10 +195,11 @@ nodeChildren (Leaf _) = Map.empty
class IsTrie trie where class IsTrie trie where
buildTrie :: Entropy e => (Int -> [[Text]] -> [[Token]]) -> Int -> [[Text]] -> trie Token e buildTrie :: Entropy e => [[Token]] -> trie Token e
nodeEntropy :: Entropy e => Getting e i e -> trie k i -> e nodeEntropy :: Entropy e => Getting e i e -> trie k i -> e
nodeChild :: Ord k => k -> trie k e -> trie k e nodeChild :: Ord k => k -> trie k e -> trie k e
findTrie :: Ord k => [k] -> trie k e -> trie k e findTrie :: Ord k => [k] -> trie k e -> trie k e
printTrie :: (Show i, Entropy e) => Getting e i e -> trie Token i -> IO ()
normalizeEntropy :: Entropy e normalizeEntropy :: Entropy e
=> Getting e i e -> ModEntropy i o e => Getting e i e -> ModEntropy i o e
-> trie k i -> trie k o -> trie k i -> trie k o
...@@ -226,7 +209,7 @@ class IsTrie trie where ...@@ -226,7 +209,7 @@ class IsTrie trie where
--nodeAutonomy inE t ks = nodeEntropy inE $ findTrie ks t --nodeAutonomy inE t ks = nodeEntropy inE $ findTrie ks t
instance IsTrie Trie where instance IsTrie Trie where
buildTrie to n ts = entropyTrie isTerminal $ insertTries $ to n ts buildTrie ts = entropyTrie isTerminal $ insertTries ts
nodeEntropy inE (Node _ e _) = e ^. inE nodeEntropy inE (Node _ e _) = e ^. inE
nodeEntropy _ (Leaf _) = nan nodeEntropy _ (Leaf _) = nan
...@@ -236,10 +219,18 @@ instance IsTrie Trie where ...@@ -236,10 +219,18 @@ instance IsTrie Trie where
findTrie ks t = L.foldl (flip nodeChild) t ks findTrie ks t = L.foldl (flip nodeChild) t ks
printTrie inE t = do
P.putStrLn . Tree.drawTree
. fmap show
$ toTree (NonTerminal "") t
P.putStrLn " Levels:"
forM_ (normalizationLevels inE t) $ \level ->
P.putStrLn $ " " <> show level
normalizeEntropy inE modE t = go (modE identity) (normalizationLevels inE t) t normalizeEntropy inE modE t = go (modE identity) (normalizationLevels inE t) t
where where
go _ [] _ = panic "normalizeEntropy' empty levels"
go _ _ (Leaf c) = Leaf c go _ _ (Leaf c) = Leaf c
go _ [] _ = panic "normalizeEntropy' empty levels"
go f ((m, v, _) : ess) (Node c i children) go f ((m, v, _) : ess) (Node c i children)
= Node c (f i) $ go (modE $ normalizeLevel (i ^. inE) m v) ess <$> children = Node c (f i) $ go (modE $ normalizeLevel (i ^. inE) m v) ess <$> children
...@@ -269,7 +260,7 @@ levels = L.takeWhile (not . L.null) . L.iterate (L.concatMap subForest) . pure ...@@ -269,7 +260,7 @@ levels = L.takeWhile (not . L.null) . L.iterate (L.concatMap subForest) . pure
subForest (Node _ _ children) = Map.elems children subForest (Node _ _ children) = Map.elems children
entropyLevels :: Entropy e => Getting e i e -> Trie k i -> [[e]] entropyLevels :: Entropy e => Getting e i e -> Trie k i -> [[e]]
entropyLevels inE = fmap (noNaNs . map (nodeEntropy inE)) . levels entropyLevels inE = fmap (noNaNs . map (nodeEntropy inE)) . L.tail . levels
normalizationLevels :: Entropy e => Getting e i e -> Trie k i -> [(e, e, Int)] normalizationLevels :: Entropy e => Getting e i e -> Trie k i -> [(e, e, Int)]
normalizationLevels inE = fmap f . entropyLevels inE normalizationLevels inE = fmap f . entropyLevels inE
...@@ -288,9 +279,9 @@ makeLenses ''Tries ...@@ -288,9 +279,9 @@ makeLenses ''Tries
instance IsTrie Tries where instance IsTrie Tries where
buildTrie to n tts = Tries { _fwd = buildTrie to n tts buildTrie tts = Tries { _fwd = buildTrie tts
, _bwd = buildTrie to n (map reverse $ tts) , _bwd = buildTrie (reverse <$> tts)
} }
nodeEntropy inE (Tries fwd bwd) = nodeEntropy inE (Tries fwd bwd) =
-- VETODO reverse the query for bwd here -- VETODO reverse the query for bwd here
...@@ -308,6 +299,13 @@ instance IsTrie Tries where ...@@ -308,6 +299,13 @@ instance IsTrie Tries where
normalizeEntropy inE modE = onTries (normalizeEntropy inE modE) normalizeEntropy inE modE = onTries (normalizeEntropy inE modE)
printTrie inE (Tries fwd bwd) = do
P.putStrLn "Forward:"
printTrie inE fwd
P.putStrLn ""
P.putStrLn "Backward:"
printTrie inE bwd
onTries :: (Trie k i -> Trie k o) -> Tries k i -> Tries k o onTries :: (Trie k i -> Trie k o) -> Tries k i -> Tries k o
onTries f (Tries fwd bwd) = Tries (f fwd) (f bwd) onTries f (Tries fwd bwd) = Tries (f fwd) (f bwd)
...@@ -382,14 +380,14 @@ entropy_var'' :: Entropy e => Getting e i e -> Tries Token i -> [Token] -> e ...@@ -382,14 +380,14 @@ entropy_var'' :: Entropy e => Getting e i e -> Tries Token i -> [Token] -> e
entropy_var'' inE tries ng = mean $ noNaNs [fwd, bwd] entropy_var'' inE tries ng = mean $ noNaNs [fwd, bwd]
where where
fwd = (nodeEntropy inE (_fwd $ findTrie ng tries)) fwd = (nodeEntropy inE (_fwd $ findTrie ng tries))
bwd = (nodeEntropy inE (_bwd $ findTrie (reverseTokens ng) tries)) bwd = (nodeEntropy inE (_bwd $ findTrie (reverse ng) tries))
--------------------------------------------- ---------------------------------------------
-- | TODO remove function below after following bug fixed -- | TODO remove function below after following bug fixed
-- | TODO entropy_var' /= entropy_var on "<start> token.." -- | TODO entropy_var' /= entropy_var on "<start> token.."
entropy_var' :: Entropy e => Tries Token (I e) -> [Token] -> e entropy_var' :: Entropy e => Tries Token (I e) -> [Token] -> e
entropy_var' tries ng = (mean $ noNaNs [ (nodeEntropy info_entropy (_fwd $ findTrie ng tries)) entropy_var' tries ng = (mean $ noNaNs [ (nodeEntropy info_entropy (_fwd $ findTrie ng tries))
, (nodeEntropy info_entropy (_bwd $ findTrie (reverseTokens ng) tries)) , (nodeEntropy info_entropy (_bwd $ findTrie (reverse ng) tries))
] ]
) )
...@@ -423,18 +421,10 @@ testEleve debug n output checks = do ...@@ -423,18 +421,10 @@ testEleve debug n output checks = do
P.putStrLn (show input) P.putStrLn (show input)
-- forM_ pss (P.putStrLn . show) -- forM_ pss (P.putStrLn . show)
P.putStrLn "" P.putStrLn ""
P.putStrLn "Forward:" printTrie info_entropy nt
printTrie (_fwd nt)
P.putStrLn ""
P.putStrLn "Backward:"
printTrie (_bwd nt)
P.putStrLn ""
P.putStrLn "Levels:"
forM_ (normalizationLevels identity t'') $ \level ->
P.putStrLn $ " " <> show level
P.putStrLn "" P.putStrLn ""
P.putStrLn "Entropy Var:" P.putStrLn "Entropy Var:"
printTrie t'' printTrie identity t''
P.putStrLn "" P.putStrLn ""
P.putStrLn "Splitting:" P.putStrLn "Splitting:"
P.putStrLn $ show res P.putStrLn $ show res
...@@ -448,7 +438,7 @@ testEleve debug n output checks = do ...@@ -448,7 +438,7 @@ testEleve debug n output checks = do
inp = toToken <$> input inp = toToken <$> input
t :: Tries Token Double t :: Tries Token Double
t = buildTrie toToken' n input t = buildTrie (toToken' n input)
& bwd . node_children . at (Terminal Start) . _Just . node_entropy .~ nan & bwd . node_children . at (Terminal Start) . _Just . node_entropy .~ nan
-- TODO NP: this is a hack to set the bwd entropy of Start at NaN. -- TODO NP: this is a hack to set the bwd entropy of Start at NaN.
...@@ -488,12 +478,6 @@ testEleve debug n output checks = do ...@@ -488,12 +478,6 @@ testEleve debug n output checks = do
check sim "fwd_entropy" fwd_entropy (nodeEntropy identity (_fwd t')) check sim "fwd_entropy" fwd_entropy (nodeEntropy identity (_fwd t'))
check sim "bwd_entropy" bwd_entropy (nodeEntropy identity (_bwd t')) check sim "bwd_entropy" bwd_entropy (nodeEntropy identity (_bwd t'))
printTrie :: Show e => Trie Token e -> IO ()
printTrie =
P.putStrLn . Tree.drawTree
. fmap show
. toTree (NonTerminal "")
-- | TODO real data is a list of tokenized sentences -- | TODO real data is a list of tokenized sentences
example0, example1, example2, example3, example4, example5, example6 :: [Text] example0, example1, example2, example3, example4, example5, example6 :: [Text]
example0 = ["New-York is New-York and New-York"] example0 = ["New-York is New-York and New-York"]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment