Commit 01ccce76 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[METRICS] TFICF Types improved to avoid mistakes (need some tests).

parent d8fb64ce
...@@ -31,7 +31,7 @@ import Gargantext.Database.Types.Node -- (ListId, CorpusId, NodeId) ...@@ -31,7 +31,7 @@ import Gargantext.Database.Types.Node -- (ListId, CorpusId, NodeId)
import Gargantext.Database.Utils (Cmd, runPGSQuery) import Gargantext.Database.Utils (Cmd, runPGSQuery)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Text.Metrics.TFICF -- (tficf) import Gargantext.Text.Metrics.TFICF
import Gargantext.Text.Terms.Mono.Stem (stem) import Gargantext.Text.Terms.Mono.Stem (stem)
import qualified Data.List as List import qualified Data.List as List
import qualified Data.Map.Strict as Map import qualified Data.Map.Strict as Map
...@@ -76,8 +76,8 @@ type Infra = Context ...@@ -76,8 +76,8 @@ type Infra = Context
toTficfData :: Infra -> Supra toTficfData :: Infra -> Supra
-> Map Text (Double, Set Text) -> Map Text (Double, Set Text)
toTficfData (ti, mi) (ts, ms) = toTficfData (ti, mi) (ts, ms) =
fromList [ (t, ( tficf (TficfInfra n ti) fromList [ (t, ( tficf (TficfInfra (Total ti) (Count n ))
(TficfSupra (maybe 0 fst $ Map.lookup t ms) ts) (TficfSupra (Total ts) (Count $ maybe 0 fst $ Map.lookup t ms))
, ns , ns
) )
) )
......
...@@ -18,44 +18,25 @@ module Gargantext.Text.Metrics.TFICF where ...@@ -18,44 +18,25 @@ module Gargantext.Text.Metrics.TFICF where
--import Data.Text (Text) --import Data.Text (Text)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Database.Schema.Ngrams (NgramsId, NgramsTerms)
data TficfContext n m = TficfLanguage n m data TficfContext n m = TficfInfra n m
| TficfCorpus n m
| TficfDocument n m
| TficfInfra n m
| TficfSupra n m | TficfSupra n m
deriving (Show) deriving (Show)
data Tficf = Tficf data Total = Total {unTotal :: !Double}
{ tficf_ngramsId :: NgramsId data Count = Count {unCount :: !Double}
, tficf_ngramsTerms :: NgramsTerms
, tficf_score :: Double
} deriving (Show)
data Tficf' = Tficf'
{ tficf'_terms :: NgramsTerms
, tficf'_score :: Double
} deriving (Show)
type SupraContext = TficfContext
type InfraContext = TficfContext
-- | TFICF is a generalization of TFIDF -- | TFICF is a generalization of TFIDF
-- https://en.wikipedia.org/wiki/Tf%E2%80%93idf -- https://en.wikipedia.org/wiki/Tf%E2%80%93idf
tficf :: InfraContext Double Double -> SupraContext Double Double -> Double tficf :: TficfContext Total Count -> TficfContext Total Count -> Double
tficf (TficfCorpus c c') (TficfLanguage l l') = tficf' c c' l l' tficf (TficfInfra (Total it) (Count ic))
tficf (TficfDocument d d')(TficfCorpus c c') = tficf' d d' c c' (TficfSupra (Total st) (Count sc))
tficf (TficfInfra d d')(TficfSupra c c') = tficf' d d' c c' = tficf' it ic st sc
tficf _ _ = panic "Not in definition" where
tficf' :: Double -> Double -> Double -> Double -> Double
tficf' :: Double -> Double -> Double -> Double -> Double tficf' it' ic' st' sc'
tficf' c c' l l' | it' >= ic' && st' >= sc' = (ic'/it') / log (sc'/st')
| c <= c' && l < l' = (l/l') / log (c/c') | otherwise = panic "Frequency impossible"
| otherwise = panic "Frequency impossible" tficf _ _ = panic "Undefined for these contexts"
tficf_example :: [(Double,Double,Double,Double)]
tficf_example = undefined
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment