Commit 9ba0327c authored by Alexandre Delanoë's avatar Alexandre Delanoë

[LiCENCE] added

parent ec5bb213
Pipeline #414 failed with stage
{-# LANGUAGE NoImplicitPrelude #-} {-|
{-# LANGUAGE OverloadedStrings #-} Module : Gargantext.Text.Eleve
{-# LANGUAGE RankNTypes #-} Description : Unsupervized Word segmentation
{-# LANGUAGE TemplateHaskell #-} Copyright : (c) CNRS, 2019-Present
{- License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
# Implementation of Unsupervized Word Segmentation # Implementation of Unsupervized Word Segmentation
References: References:
...@@ -15,10 +19,10 @@ References: ...@@ -15,10 +19,10 @@ References:
, pages 383–387. [PDF](https://www.aclweb.org/anthology/P12-2075) , pages 383–387. [PDF](https://www.aclweb.org/anthology/P12-2075)
Notes for current implementation: Notes for current implementation:
- The node count is correct; TODO add tests to keep track of it - The node count is correct; TODO AD add tests to keep track of it
- NP fix normalization - NP fix normalization
- NP extract longer ngrams (see paper above, viterbi algo can be used) - NP extract longer ngrams (see paper above, viterbi algo can be used)
- TODO TEST: prop (Node c _e f) = c == Map.size f - TODO AD TEST: prop (Node c _e f) = c == Map.size f
- AD: Real ngrams extraction test - AD: Real ngrams extraction test
from Gargantext.Text.Terms import extractTermsUnsupervised from Gargantext.Text.Terms import extractTermsUnsupervised
...@@ -29,6 +33,11 @@ Notes for current implementation: ...@@ -29,6 +33,11 @@ Notes for current implementation:
-} -}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Text.Eleve where module Gargantext.Text.Eleve where
import Debug.Trace (trace) import Debug.Trace (trace)
...@@ -50,6 +59,7 @@ import Data.Tree (Tree) ...@@ -50,6 +59,7 @@ import Data.Tree (Tree)
import qualified Prelude as P (putStrLn, logBase, String) import qualified Prelude as P (putStrLn, logBase, String)
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | Example and tests for development
data I e = I data I e = I
{ _info_entropy :: e { _info_entropy :: e
, _info_norm_entropy :: e , _info_norm_entropy :: e
...@@ -185,10 +195,6 @@ nodeChildren :: Trie k e -> Map k (Trie k e) ...@@ -185,10 +195,6 @@ nodeChildren :: Trie k e -> Map k (Trie k e)
nodeChildren (Node _ _ cs) = cs nodeChildren (Node _ _ cs) = cs
nodeChildren (Leaf _) = Map.empty nodeChildren (Leaf _) = Map.empty
nodeChild :: Ord k => k -> Trie k e -> Maybe (Trie k e)
nodeChild k (Node _ _ cs) = Map.lookup k cs
nodeChild _ (Leaf _) = Nothing
levels :: Trie k e -> [[Trie k e]] levels :: Trie k e -> [[Trie k e]]
levels = L.takeWhile (not . L.null) . L.iterate (L.concatMap subForest) . pure levels = L.takeWhile (not . L.null) . L.iterate (L.concatMap subForest) . pure
where where
...@@ -243,4 +249,9 @@ split inE t0 = go t0 [] ...@@ -243,4 +249,9 @@ split inE t0 = go t0 []
True -> go xt (x:pref) xs True -> go xt (x:pref) xs
False -> consRev pref $ go xt0 [x] xs False -> consRev pref $ go xt0 [x] xs
nodeChild :: Ord k => k -> Trie k e -> Maybe (Trie k e)
nodeChild k (Node _ _ cs) = Map.lookup k cs
nodeChild _ (Leaf _) = Nothing
ne d t = fromMaybe d (nodeEntropy t ^? _Just . inE) ne d t = fromMaybe d (nodeEntropy t ^? _Just . inE)
...@@ -99,5 +99,3 @@ extractTermsUnsupervised n = ...@@ -99,5 +99,3 @@ extractTermsUnsupervised n =
. map tokenize . map tokenize
. sentences . sentences
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment