TFICF.hs 1.87 KB
Newer Older
1
{-|
2
Module      : Gargantext.Core.Text.Metrics.TFICF
3 4 5 6 7 8 9
Description : TFICF Ngrams tools
Copyright   : (c) CNRS, 2017
License     : AGPL + CECILL v3
Maintainer  : team@gargantext.org
Stability   : experimental
Portability : POSIX

10
Definition of TFICF : Term Frequency - Inverse of Context Frequency
11

12 13
TFICF is a generalization of [TFIDF](https://en.wikipedia.org/wiki/Tf%E2%80%93idf).

14 15 16
-}


17
module Gargantext.Core.Text.Metrics.TFICF ( TFICF
18 19 20 21
                                     , TficfContext(..)
                                     , Total(..)
                                     , Count(..)
                                     , tficf
Alexandre Delanoë's avatar
Alexandre Delanoë committed
22
                                     , sortTficf
23 24
                                     )
  where
25

Alexandre Delanoë's avatar
Alexandre Delanoë committed
26
import Data.Map.Strict (Map, toList)
27
import Data.Text (Text)
Alexandre Delanoë's avatar
Alexandre Delanoë committed
28
import Gargantext.Core.Types (Ordering(..))
Alexandre Delanoë's avatar
Alexandre Delanoë committed
29
import Gargantext.Prelude
Alexandre Delanoë's avatar
Alexandre Delanoë committed
30
import qualified Data.List as List
Alexandre Delanoë's avatar
Alexandre Delanoë committed
31
import qualified Data.Ord as DO (Down(..))
32

33
path :: Text
Alexandre Delanoë's avatar
Alexandre Delanoë committed
34
path = "[G.T.Metrics.TFICF]"
35

36
type TFICF = Double
37

38 39 40
data TficfContext n m = TficfInfra n m
                      | TficfSupra n m
  deriving (Show)
41

42 43
data Total = Total {unTotal :: !Double}
data Count = Count {unCount :: !Double}
44

45 46 47 48 49
tficf :: TficfContext Count Total
      -> TficfContext Count Total
      -> TFICF
tficf (TficfInfra (Count ic) (Total it) )
      (TficfSupra (Count sc) (Total st) )
50
            | it >= ic && st >= sc && it <= st = (it/ic) * log (st/sc)
51 52 53 54
            | otherwise                        = panic
                                               $ "[ERR]"
                                               <> path
                                               <> " Frequency impossible"
55
tficf _ _ = panic $ "[ERR]" <> path <> "Undefined for these contexts"
56 57


Alexandre Delanoë's avatar
Alexandre Delanoë committed
58
sortTficf :: Ordering
59 60 61 62
          -> Map Text Double
          -> [(Text, Double)]
sortTficf Down = List.sortOn (DO.Down . snd) . toList
sortTficf Up   = List.sortOn snd . toList
Alexandre Delanoë's avatar
Alexandre Delanoë committed
63