{-|
Module      : Gargantext.Core.Text.Metrics.TFICF
Description : TFICF Ngrams tools
Copyright   : (c) CNRS, 2017
License     : AGPL + CECILL v3
Maintainer  : team@gargantext.org
Stability   : experimental
Portability : POSIX

Definition of TFICF : Term Frequency - Inverse of Context Frequency

TFICF is a generalization of [TFIDF](https://en.wikipedia.org/wiki/Tf%E2%80%93idf).

-}


module Gargantext.Core.Text.Metrics.TFICF
 ( TFICF
 , TficfContext(..)
 , Total(..)
 , Count(..)
 , tficf
 , sortTficf
 )
where

import Data.List qualified as List
import Data.Map.Strict (toList)
import Data.Ord qualified as DO (Down(..))
import Gargantext.Core.Types (Ordering(..))
import Gargantext.Prelude hiding (Down, Ordering, toList)

path :: Text
path = "[G.T.Metrics.TFICF]"

type TFICF = Double

-- https://www.researchgate.net/publication/221226686_TF-ICF_A_New_Term_Weighting_Scheme_for_Clustering_Dynamic_Data_Streams
-- TficfSupra n m
--  - m is the total number of documents in the corpus
--  - n is the number of documents, where given term occured more than once
-- TficfInfra n m
--  - 
    
data TficfContext n m = TficfInfra n m
                      | TficfSupra n m
  deriving (Show)

newtype Total = Total { unTotal :: Double }
newtype Count = Count { unCount :: Double }

tficf :: TficfContext Count Total
      -> TficfContext Count Total
      -> TFICF
tficf (TficfInfra (Count ic) (Total it) )
      (TficfSupra (Count sc) (Total st) )
            | it >= ic && st >= sc && it <= st = (it/ic) * log (st/sc)
            | otherwise                        = panicTrace
                                               $ "[ERR]"
                                               <> path
                                               <> " Frequency impossible: "
                                               <> "ic = " <> show ic
                                               <> ", it = " <> show it
                                               <> ", sc = " <> show sc
                                               <> ", st = " <> show st
tficf _ _ = panicTrace $ "[ERR]" <> path <> "Undefined for these contexts"


sortTficf :: Ordering
          -> Map Text Double
          -> [(Text, Double)]
sortTficf Down = List.sortOn (DO.Down . snd) . toList
sortTficf Up   = List.sortOn snd . toList

