1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
{-|
Module : Gargantext.Text.Metrics.TFICF
Description : TFICF Ngrams tools
Copyright : (c) CNRS, 2017
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Definition of TFICF : Term Frequency - Inverse of Context Frequency
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
module Gargantext.Text.Metrics.TFICF where
--import Data.Text (Text)
import Gargantext.Prelude
import Gargantext.Database.Schema.Ngrams (NgramsId, NgramsTerms)
data TficfContext n m = TficfLanguage n m | TficfCorpus n m | TficfDocument n m
deriving (Show)
data Tficf = Tficf { tficf_ngramsId :: NgramsId
, tficf_ngramsTerms :: NgramsTerms
, tficf_score :: Double
} deriving (Show)
type SupraContext = TficfContext
type InfraContext = TficfContext
-- | TFICF is a generalization of TFIDF
-- https://en.wikipedia.org/wiki/Tf%E2%80%93idf
tficf :: InfraContext Double Double -> SupraContext Double Double -> Double
tficf (TficfCorpus c c') (TficfLanguage l l') = tficf' c c' l l'
tficf (TficfDocument d d')(TficfCorpus c c') = tficf' d d' c c'
tficf _ _ = panic "Not in definition"
tficf' :: Double -> Double -> Double -> Double -> Double
tficf' c c' l l'
| c <= c' && l < l' = (l/l') / log (c/c')
| otherwise = panic "Frequency impossible"
tficf_example :: [(Double,Double,Double,Double)]
tficf_example = undefined