Commit 543bad9d authored by Alexandre Delanoë's avatar Alexandre Delanoë

[METRICS] TFICF right order of parameters and clean.

parent 01ccce76
...@@ -54,9 +54,7 @@ ngramsGroup l n = Text.intercalate " " ...@@ -54,9 +54,7 @@ ngramsGroup l n = Text.intercalate " "
sortTficf :: (Map Text (Double, Set Text)) sortTficf :: (Map Text (Double, Set Text))
-> [(Double, Set Text)] -> [(Double, Set Text)]
sortTficf = List.reverse sortTficf = List.sortOn fst . elems
. List.sortOn fst
. elems
getTficf' :: UserCorpusId -> MasterCorpusId -> (Text -> Text) getTficf' :: UserCorpusId -> MasterCorpusId -> (Text -> Text)
...@@ -76,8 +74,8 @@ type Infra = Context ...@@ -76,8 +74,8 @@ type Infra = Context
toTficfData :: Infra -> Supra toTficfData :: Infra -> Supra
-> Map Text (Double, Set Text) -> Map Text (Double, Set Text)
toTficfData (ti, mi) (ts, ms) = toTficfData (ti, mi) (ts, ms) =
fromList [ (t, ( tficf (TficfInfra (Total ti) (Count n )) fromList [ (t, ( tficf (TficfInfra (Count n )(Total ti))
(TficfSupra (Total ts) (Count $ maybe 0 fst $ Map.lookup t ms)) (TficfSupra (Count $ maybe 0 fst $ Map.lookup t ms)(Total ts))
, ns , ns
) )
) )
......
...@@ -9,16 +9,18 @@ Portability : POSIX ...@@ -9,16 +9,18 @@ Portability : POSIX
Definition of TFICF : Term Frequency - Inverse of Context Frequency Definition of TFICF : Term Frequency - Inverse of Context Frequency
TFICF is a generalization of [TFIDF](https://en.wikipedia.org/wiki/Tf%E2%80%93idf).
-} -}
{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
module Gargantext.Text.Metrics.TFICF where module Gargantext.Text.Metrics.TFICF where
--import Data.Text (Text)
import Gargantext.Prelude import Gargantext.Prelude
type TFICF = Double
data TficfContext n m = TficfInfra n m data TficfContext n m = TficfInfra n m
| TficfSupra n m | TficfSupra n m
deriving (Show) deriving (Show)
...@@ -26,17 +28,13 @@ data TficfContext n m = TficfInfra n m ...@@ -26,17 +28,13 @@ data TficfContext n m = TficfInfra n m
data Total = Total {unTotal :: !Double} data Total = Total {unTotal :: !Double}
data Count = Count {unCount :: !Double} data Count = Count {unCount :: !Double}
-- | TFICF is a generalization of TFIDF tficf :: TficfContext Count Total
-- https://en.wikipedia.org/wiki/Tf%E2%80%93idf -> TficfContext Count Total
tficf :: TficfContext Total Count -> TficfContext Total Count -> Double -> TFICF
tficf (TficfInfra (Total it) (Count ic)) tficf (TficfInfra (Count ic) (Total it) )
(TficfSupra (Total st) (Count sc)) (TficfSupra (Count sc) (Total st) )
= tficf' it ic st sc | it >= ic && st >= sc = (ic/it) / log (sc/st)
where | otherwise = panic "Frequency impossible"
tficf' :: Double -> Double -> Double -> Double -> Double
tficf' it' ic' st' sc'
| it' >= ic' && st' >= sc' = (ic'/it') / log (sc'/st')
| otherwise = panic "Frequency impossible"
tficf _ _ = panic "Undefined for these contexts" tficf _ _ = panic "Undefined for these contexts"
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment