Commit 01ccce76 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[METRICS] TFICF Types improved to avoid mistakes (need some tests).

parent d8fb64ce
......@@ -31,7 +31,7 @@ import Gargantext.Database.Types.Node -- (ListId, CorpusId, NodeId)
import Gargantext.Database.Utils (Cmd, runPGSQuery)
import Gargantext.Prelude
import Gargantext.Text.Metrics.TFICF -- (tficf)
import Gargantext.Text.Metrics.TFICF
import Gargantext.Text.Terms.Mono.Stem (stem)
import qualified Data.List as List
import qualified Data.Map.Strict as Map
......@@ -76,8 +76,8 @@ type Infra = Context
toTficfData :: Infra -> Supra
-> Map Text (Double, Set Text)
toTficfData (ti, mi) (ts, ms) =
fromList [ (t, ( tficf (TficfInfra n ti)
(TficfSupra (maybe 0 fst $ Map.lookup t ms) ts)
fromList [ (t, ( tficf (TficfInfra (Total ti) (Count n ))
(TficfSupra (Total ts) (Count $ maybe 0 fst $ Map.lookup t ms))
, ns
)
)
......
......@@ -18,44 +18,25 @@ module Gargantext.Text.Metrics.TFICF where
--import Data.Text (Text)
import Gargantext.Prelude
import Gargantext.Database.Schema.Ngrams (NgramsId, NgramsTerms)
data TficfContext n m = TficfLanguage n m
| TficfCorpus n m
| TficfDocument n m
| TficfInfra n m
data TficfContext n m = TficfInfra n m
| TficfSupra n m
deriving (Show)
data Tficf = Tficf
{ tficf_ngramsId :: NgramsId
, tficf_ngramsTerms :: NgramsTerms
, tficf_score :: Double
} deriving (Show)
data Tficf' = Tficf'
{ tficf'_terms :: NgramsTerms
, tficf'_score :: Double
} deriving (Show)
type SupraContext = TficfContext
type InfraContext = TficfContext
data Total = Total {unTotal :: !Double}
data Count = Count {unCount :: !Double}
-- | TFICF is a generalization of TFIDF
-- https://en.wikipedia.org/wiki/Tf%E2%80%93idf
tficf :: InfraContext Double Double -> SupraContext Double Double -> Double
tficf (TficfCorpus c c') (TficfLanguage l l') = tficf' c c' l l'
tficf (TficfDocument d d')(TficfCorpus c c') = tficf' d d' c c'
tficf (TficfInfra d d')(TficfSupra c c') = tficf' d d' c c'
tficf _ _ = panic "Not in definition"
tficf' :: Double -> Double -> Double -> Double -> Double
tficf' c c' l l'
| c <= c' && l < l' = (l/l') / log (c/c')
| otherwise = panic "Frequency impossible"
tficf :: TficfContext Total Count -> TficfContext Total Count -> Double
tficf (TficfInfra (Total it) (Count ic))
(TficfSupra (Total st) (Count sc))
= tficf' it ic st sc
where
tficf' :: Double -> Double -> Double -> Double -> Double
tficf' it' ic' st' sc'
| it' >= ic' && st' >= sc' = (ic'/it') / log (sc'/st')
| otherwise = panic "Frequency impossible"
tficf _ _ = panic "Undefined for these contexts"
tficf_example :: [(Double,Double,Double,Double)]
tficf_example = undefined
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment