Commit 543bad9d authored by Alexandre Delanoë's avatar Alexandre Delanoë

[METRICS] TFICF right order of parameters and clean.

parent 01ccce76
......@@ -54,9 +54,7 @@ ngramsGroup l n = Text.intercalate " "
sortTficf :: (Map Text (Double, Set Text))
-> [(Double, Set Text)]
sortTficf = List.reverse
. List.sortOn fst
. elems
sortTficf = List.sortOn fst . elems
getTficf' :: UserCorpusId -> MasterCorpusId -> (Text -> Text)
......@@ -76,8 +74,8 @@ type Infra = Context
toTficfData :: Infra -> Supra
-> Map Text (Double, Set Text)
toTficfData (ti, mi) (ts, ms) =
fromList [ (t, ( tficf (TficfInfra (Total ti) (Count n ))
(TficfSupra (Total ts) (Count $ maybe 0 fst $ Map.lookup t ms))
fromList [ (t, ( tficf (TficfInfra (Count n )(Total ti))
(TficfSupra (Count $ maybe 0 fst $ Map.lookup t ms)(Total ts))
, ns
)
)
......
......@@ -9,16 +9,18 @@ Portability : POSIX
Definition of TFICF : Term Frequency - Inverse of Context Frequency
TFICF is a generalization of [TFIDF](https://en.wikipedia.org/wiki/Tf%E2%80%93idf).
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
module Gargantext.Text.Metrics.TFICF where
--import Data.Text (Text)
import Gargantext.Prelude
type TFICF = Double
data TficfContext n m = TficfInfra n m
| TficfSupra n m
deriving (Show)
......@@ -26,17 +28,13 @@ data TficfContext n m = TficfInfra n m
data Total = Total {unTotal :: !Double}
data Count = Count {unCount :: !Double}
-- | TFICF is a generalization of TFIDF
-- https://en.wikipedia.org/wiki/Tf%E2%80%93idf
tficf :: TficfContext Total Count -> TficfContext Total Count -> Double
tficf (TficfInfra (Total it) (Count ic))
(TficfSupra (Total st) (Count sc))
= tficf' it ic st sc
where
tficf' :: Double -> Double -> Double -> Double -> Double
tficf' it' ic' st' sc'
| it' >= ic' && st' >= sc' = (ic'/it') / log (sc'/st')
| otherwise = panic "Frequency impossible"
tficf :: TficfContext Count Total
-> TficfContext Count Total
-> TFICF
tficf (TficfInfra (Count ic) (Total it) )
(TficfSupra (Count sc) (Total st) )
| it >= ic && st >= sc = (ic/it) / log (sc/st)
| otherwise = panic "Frequency impossible"
tficf _ _ = panic "Undefined for these contexts"
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment