Eleve: improve splitting which passes 5/7 tests but still lacks a crucial point

parent c57595c8
...@@ -307,35 +307,37 @@ onTries h (Tries f b) = Tries (h f) (h b) ...@@ -307,35 +307,37 @@ onTries h (Tries f b) = Tries (h f) (h b)
------------------------------------------------------------------------ ------------------------------------------------------------------------
split :: (IsTrie trie, Entropy e) => Lens' i e -> trie Token i -> [Token] -> [[Token]] split :: (IsTrie trie, Entropy e) => Lens' i e -> trie Token i -> [Token] -> [[Token]]
split _ _ [] = [] split _ _ [] = []
split inE t0 (Terminal Start:xs0) = split inE (nodeChild (Terminal Start) t0) xs0 split inE t (Terminal Start:xs) = split inE t xs
split inE t0 (x0:xs0) = go (nodeChild x0 t0) [x0] xs0 split inE t (x0:xs0) = go [x0] xs0
where where
consRev [] xss = xss mayCons [] xss = xss
consRev xs xss = reverse xs : xss mayCons xs xss = xs : xss
go _ pref [] = [reverse pref] go pref [] = [pref]
go _ pref (Terminal Stop:_) = [reverse pref] go pref (Terminal Stop:_) = [pref]
go t pref (Terminal Start:xs) = go t pref xs go _ (Terminal Start:_) = panic "split impossible"
go t pref (x:xs) = go pref (x:xs) =
-- trace (show (if acc then "ACC" else "CUT", (reverse (x : pref), ext), if acc then ">" else "<=", ((reverse pref, et), "+", ([x], ext0)))) $ -- trace (show (if acc then "ACC" else "CUT", (prefx, epxt), if acc then ">" else "<=", ((pref, ept), "+", ([x], ext)))) $
if acc if acc
then go xt (x:pref) xs then go prefx xs
else consRev pref $ go xt0 [x] xs else mayCons pref $ go [x] xs
where where
xt = nodeChild x t prefx = pref <> [x]
xt0 = nodeChild x t0 pt = findTrieR pref t
et = ne 0 t pxt = findTrieR prefx t
xt = findTrieR [x] t
ept = ne pt
-- ^ entropy of the current prefix -- ^ entropy of the current prefix
ext0 = ne 0 xt0 ext = ne xt
-- ^ entropy of [x] -- ^ entropy of [x]
ext = ne 0 xt epxt = ne pxt
-- ^ entropy of the current prefix plus x -- ^ entropy of the current prefix plus x
acc = ext > et + ext0 acc = P.isNaN ept || P.isNaN ext || not (P.isNaN epxt) -- && (epxt > ept + ext)
-- aut(["in","this","paper"]) > aut(["in","this"]) + aut(["paper"]) -- aut(["in","this","paper"]) > aut(["in","this"]) + aut(["paper"])
ne d t = if P.isNaN e then d else e ne = nodeEntropy inE
where e = nodeEntropy inE t
{- {-
split :: Entropy e => Lens' i e -> Tries Token i -> [Token] -> [[Token]] split :: Entropy e => Lens' i e -> Tries Token i -> [Token] -> [[Token]]
...@@ -363,7 +365,7 @@ data Order = Backward | Forward ...@@ -363,7 +365,7 @@ data Order = Backward | Forward
toToken' :: Order -> Int -> [[Text]] -> [[Token]] toToken' :: Order -> Int -> [[Text]] -> [[Token]]
toToken' o n input = L.concat toToken' o n input = L.concat
$ ( filter (/= [Terminal (term o)]) $ ( filter (/= [Terminal (term o)])
. chunkAlongEleve (n + 2) . chunkAlongEleve (n + 1)
. (order o) . (order o)
) )
<$> toToken <$> toToken
...@@ -517,6 +519,7 @@ runTests = ...@@ -517,6 +519,7 @@ runTests =
,("example3", 2, example3, []) ,("example3", 2, example3, [])
,("example4", 4, example4, []) ,("example4", 4, example4, [])
,("example5", 5, example5, []) ,("example5", 5, example5, [])
,("example6", 2, example6, [])
] ]
(\(name, n, ex, checks) -> do (\(name, n, ex, checks) -> do
P.putStrLn $ name <> " " <> show n P.putStrLn $ name <> " " <> show n
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment