Commit 8b76e18c authored by Alexandre Delanoë's avatar Alexandre Delanoë

[Ngrams] Eleve implementation.

parent 61756c68
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-
Implementation of EleVe Python version of papers:
-}
module Gargantext.Text.Eleve where
import Data.Ord (Ord)
import qualified Data.List as List
import Data.Monoid
import Data.Text hiding (map)
import Data.Map (Map)
import qualified Data.Map as Map
import Gargantext.Prelude
-- prop (Noeud c _e f) = c == Map.size f
-- TODO remove Feuille
example :: [[Terminal]]
example = map terminal
$ chunkAlong 3 1
$ words "New York and New York is a big apple"
data Terminal = Debut | Terminal Text | Fin
deriving (Ord, Eq, Show)
isDebutFin :: Terminal -> Bool
isDebutFin x = case x of
Debut -> True
Fin -> True
_ -> False
terminal :: [Text] -> [Terminal]
--terminal xs = [Debut] <> (map Terminal xs) <> [Fin]
terminal xs = (map Terminal xs) <> [Fin]
data Arbre k e = Noeud { _noeud_count :: Double
, _noeud_entropy :: e
, _noeud_fils :: Map k (Arbre k e)
}
| Feuille { _noeud_count :: Double }
deriving (Show)
arbreVide :: Arbre k e
arbreVide = Feuille 0
mkArbre :: Monoid e => Double -> Map Terminal (Arbre Terminal e) -> Arbre Terminal e
mkArbre c fils
| Map.null fils = Feuille c
| otherwise = Noeud c mempty fils
insertArbre :: [Terminal] -> Arbre Terminal () -> Arbre Terminal ()
insertArbre [] n = n
insertArbre (x:xs) (Feuille c) = mkArbre (c+1) (Map.singleton x $ insertArbre xs arbreVide)
insertArbre (x:xs) (Noeud c _e f) = mkArbre (c+1) (case Map.lookup x f of
Nothing -> Map.insert x (insertArbre xs arbreVide) f
Just arbre -> Map.insert x (insertArbre xs arbre ) f
)
insertArbres :: [[Terminal]] -> Arbre Terminal ()
insertArbres = List.foldr insertArbre arbreVide
entropyArbre :: Arbre Terminal () -> Arbre Terminal Double
entropyArbre (Feuille c) = Feuille c
entropyArbre (Noeud c _e fils) = (Noeud c e (map entropyArbre fils))
where
e = sum $ map (\(k, f) -> case isDebutFin k of
True -> (_noeud_count f) / c * log c
False -> - c' * log c'
where
c' = (_noeud_count f) / c
)
$ Map.toList fils
normalizeArbre :: Arbre Terminal Double -> Arbre Terminal Double
normalizeArbre (Feuille c) = Feuille c
normalizeArbre (Noeud c e f) = Noeud c e (Map.map (\n -> normalizeLevel n $ Map.elems f) f)
normalizeLevel :: Arbre Terminal Double -> [Arbre Terminal Double] -> Arbre Terminal Double
normalizeLevel (Feuille c) _ = Feuille c
normalizeLevel (Noeud c e f) ns = Noeud c ( (e-m) / v) f
where
es = map _noeud_entropy ns
m = mean es
v = variance es
buildArbre :: [[Terminal]] -> Arbre Terminal Double
buildArbre = normalizeArbre . entropyArbre . insertArbres
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment