Commit bf03165a authored by Alexandre Delanoë's avatar Alexandre Delanoë

[Scores] Documentation.

parent 457bf1f2
...@@ -6,7 +6,6 @@ License : AGPL + CECILL v3 ...@@ -6,7 +6,6 @@ License : AGPL + CECILL v3
Maintainer : team@gargantext.org Maintainer : team@gargantext.org
Stability : experimental Stability : experimental
Portability : POSIX Portability : POSIX
-} -}
{-# OPTIONS_GHC -fno-warn-name-shadowing #-} {-# OPTIONS_GHC -fno-warn-name-shadowing #-}
...@@ -20,7 +19,6 @@ import Data.Text.IO (readFile) ...@@ -20,7 +19,6 @@ import Data.Text.IO (readFile)
import Control.Arrow ((***)) import Control.Arrow ((***))
import Data.Map.Strict (Map) import Data.Map.Strict (Map)
import qualified Data.Map.Strict as M import qualified Data.Map.Strict as M
import qualified Data.Set as S
import qualified Data.List as L import qualified Data.List as L
import Data.Tuple.Extra (both) import Data.Tuple.Extra (both)
---------------------------------------------- ----------------------------------------------
...@@ -31,32 +29,22 @@ import Gargantext.Viz.Graph.Index (score, createIndices, toIndex, fromIndex, coo ...@@ -31,32 +29,22 @@ import Gargantext.Viz.Graph.Index (score, createIndices, toIndex, fromIndex, coo
import Gargantext.Viz.Graph.Distances.Matrice (conditional', conditional) import Gargantext.Viz.Graph.Distances.Matrice (conditional', conditional)
import Gargantext.Viz.Graph.Index (Index) import Gargantext.Viz.Graph.Index (Index)
import Gargantext.Text.Metrics.Count (cooc, removeApax) import Gargantext.Text.Metrics.Count (cooc, removeApax)
import Gargantext.Text.Metrics (incExcSpeGen) import Gargantext.Text.Metrics
import Gargantext.Text.Terms (TermType(Multi, Mono), extractTerms) import Gargantext.Text.Terms (TermType(Multi, Mono), extractTerms)
import Gargantext.Text.Context (splitBy, SplitContext(Sentences)) import Gargantext.Text.Context (splitBy, SplitContext(Sentences))
import Data.Graph.Clustering.Louvain.CplusPlus (cLouvain) import Data.Graph.Clustering.Louvain.CplusPlus (cLouvain)
-- ord relevance: top n plus inclus
-- échantillonnage de généricity
--
--filterCooc :: Ord t => Map (t, t) Int -> Map (t, t) Int
--filterCooc m =
---- filterCooc m = foldl (\k -> maybe (panic "no key") identity $ M.lookup k m) M.empty selection
----(ti, fi) = createIndices m
-- . fromIndex fi $ filterMat $ cooc2mat ti m
import Data.Array.Accelerate (Matrix)
filterMat :: Matrix Int -> [(Index, Index)] {-
filterMat m = S.toList $ S.take n $ S.fromList $ (L.take nIe incExc') <> (L.take nSg speGen') ____ _ _
where / ___| __ _ _ __ __ _ __ _ _ __ | |_ _____ _| |_
(incExc', speGen') = both ( map fst . L.sortOn snd . M.toList . mat2map) (conditional' m) | | _ / _` | '__/ _` |/ _` | '_ \| __/ _ \ \/ / __|
n = nIe + nSg | |_| | (_| | | | (_| | (_| | | | | || __/> <| |_
nIe = 30 \____|\__,_|_| \__, |\__,_|_| |_|\__\___/_/\_\\__|
nSg = 70 |___/
-}
pipeline path = do pipeline path = do
-- Text <- IO Text <- FilePath -- Text <- IO Text <- FilePath
...@@ -69,7 +57,8 @@ pipeline path = do ...@@ -69,7 +57,8 @@ pipeline path = do
let myCooc = removeApax $ cooc myterms let myCooc = removeApax $ cooc myterms
--let (ti, fi) = createIndices myCooc --let (ti, fi) = createIndices myCooc
pure $ incExcSpeGen myCooc pure True
--pure $ incExcSpeGen myCooc
-- Cooc -> Matrix -- Cooc -> Matrix
-- -- filter by spec/gen (dynmaic programming) -- -- filter by spec/gen (dynmaic programming)
...@@ -81,4 +70,3 @@ pipeline path = do ...@@ -81,4 +70,3 @@ pipeline path = do
-- pure partitions -- pure partitions
---- | Building : -> Graph -> JSON ---- | Building : -> Graph -> JSON
...@@ -8,6 +8,12 @@ Stability : experimental ...@@ -8,6 +8,12 @@ Stability : experimental
Portability : POSIX Portability : POSIX
Mainly reexport functions in @Data.Text.Metrics@ Mainly reexport functions in @Data.Text.Metrics@
TODO
noApax :: Ord a => Map a Occ -> Map a Occ
noApax m = M.filter (>1) m
-} -}
{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE NoImplicitPrelude #-}
...@@ -21,6 +27,7 @@ import Data.Map (Map) ...@@ -21,6 +27,7 @@ import Data.Map (Map)
import qualified Data.List as L import qualified Data.List as L
import qualified Data.Map as M import qualified Data.Map as M
import qualified Data.Set as S
import qualified Data.Text as T import qualified Data.Text as T
import Data.Tuple.Extra (both) import Data.Tuple.Extra (both)
--import GHC.Real (Ratio) --import GHC.Real (Ratio)
...@@ -39,8 +46,36 @@ import Gargantext.Text.Context (splitBy, SplitContext(Sentences)) ...@@ -39,8 +46,36 @@ import Gargantext.Text.Context (splitBy, SplitContext(Sentences))
import Gargantext.Viz.Graph.Distances.Matrice import Gargantext.Viz.Graph.Distances.Matrice
import Gargantext.Viz.Graph.Index import Gargantext.Viz.Graph.Index
--noApax :: Ord a => Map a Occ -> Map a Occ
--noApax m = M.filter (>1) m -- ord relevance: top n plus inclus
-- échantillonnage de généricity
--
--filterCooc :: Ord t => Map (t, t) Int -> Map (t, t) Int
--filterCooc m =
---- filterCooc m = foldl (\k -> maybe (panic "no key") identity $ M.lookup k m) M.empty selection
----(ti, fi) = createIndices m
-- . fromIndex fi $ filterMat $ cooc2mat ti m
import Data.Array.Accelerate (Matrix)
filterMat :: Matrix Int -> [(Index, Index)]
filterMat m = S.toList $ S.take n $ S.fromList $ (L.take nIe incExc') <> (L.take nSg speGen')
where
(incExc', speGen') = both ( map fst . L.sortOn snd . M.toList . mat2map) (conditional' m)
n = nIe + nSg
nIe = 30
nSg = 70
incExcSpeGen_sorted :: Ord t => Map (t,t) Int -> ([(t,Double)],[(t,Double)])
incExcSpeGen_sorted m = both ordonne (incExcSpeGen $ cooc2mat ti m)
where
(ti,fi) = createIndices m
ordonne x = L.reverse $ L.sortOn snd $ zip (map snd $ M.toList fi) (toList x)
metrics_text :: Text metrics_text :: Text
...@@ -54,7 +89,7 @@ metrics_sentences :: [Text] ...@@ -54,7 +89,7 @@ metrics_sentences :: [Text]
metrics_sentences = [ "There is a table with a glass of wine and a spoon." metrics_sentences = [ "There is a table with a glass of wine and a spoon."
, "I can see the glass on the table." , "I can see the glass on the table."
, "There was only a spoon on that table." , "There was only a spoon on that table."
, "The glass just fall from the table, pouring wine elsewhere." , "The glass just fall from the table, pouring wine everywhere."
, "I wish the glass did not contain wine." , "I wish the glass did not contain wine."
] ]
...@@ -89,23 +124,16 @@ metrics_occ = occurrences <$> L.concat <$> metrics_terms ...@@ -89,23 +124,16 @@ metrics_occ = occurrences <$> L.concat <$> metrics_terms
-} -}
metrics_cooc = cooc <$> metrics_terms metrics_cooc = cooc <$> metrics_terms
metrics_cooc_mat = do metrics_cooc_mat = do
m <- metrics_cooc m <- metrics_cooc
let (ti,_) = createIndices m let (ti,_) = createIndices m
let mat_cooc = cooc2mat ti m let mat_cooc = cooc2mat ti m
pure ( ti pure ( ti
, mat_cooc , mat_cooc
, incExcSpeGen_proba mat_cooc , incExcSpeGen_proba mat_cooc
, incExcSpeGen' mat_cooc , incExcSpeGen mat_cooc
) )
metrics_incExcSpeGen = incExcSpeGen_sorted <$> metrics_cooc
metrics_incExcSpeGen = incExcSpeGen <$> metrics_cooc
incExcSpeGen :: Ord t => Map (t,t) Int -> ([(t,Double)],[(t,Double)])
incExcSpeGen m = both (\x -> L.reverse $ L.sortOn snd $ zip (map snd $ M.toList fi) (toList x) )
(incExcSpeGen' $ cooc2mat ti m )
where
(ti,fi) = createIndices m
...@@ -153,25 +153,17 @@ distributional m = run $ miniMax $ ri (map fromIntegral $ use m) ...@@ -153,25 +153,17 @@ distributional m = run $ miniMax $ ri (map fromIntegral $ use m)
----------------------------------------------------------------------- -----------------------------------------------------------------------
----------------------------------------------------------------------- -----------------------------------------------------------------------
-- | Conditional Distance
{- {-
Metric Specificity and genericity: select terms Metric Specificity and genericity: select terms
N termes let N termes
Ni : occ de i
Ni : occ de i Nij : cooc i et j
Probability to get i given j : P(i|j)=Nij/Nj
Nij : cooc i et j Gen(i) : 1/(N-1)*Sum(j!=i, P(i|j)) : Genericity of i
Spec(i) : 1/(N-1)*Sum(j!=i, P(j|i)) : Specificity of j
P(i|j)=Nij/Nj Probability to get i given j
Gen(i) : 1/(N-1)*Sum(j!=i, P(i|j)) : Genericity of i
Spec(i) : 1/(N-1)*Sum( j!=i, P(j|i)) : Specificity of j
Inclusion (i) = Gen(i)+Spec(i) Inclusion (i) = Gen(i)+Spec(i)
Genericity score = Gen(i)- Spec(i) Genericity score = Gen(i)- Spec(i)
...@@ -193,17 +185,19 @@ incExcSpeGen m = (run' inclusionExclusion m, run' specificityGenericity m) ...@@ -193,17 +185,19 @@ incExcSpeGen m = (run' inclusionExclusion m, run' specificityGenericity m)
where where
run' fun mat = run $ fun $ map fromIntegral $ use mat run' fun mat = run $ fun $ map fromIntegral $ use mat
-- | Inclusion (i) = Gen(i)+Spec(i)
inclusionExclusion :: Acc (Matrix Double) -> Acc (Vector Double) inclusionExclusion :: Acc (Matrix Double) -> Acc (Vector Double)
inclusionExclusion mat = zipWith (+) (pV mat) (pH mat) inclusionExclusion mat = zipWith (+) (pV mat) (pH mat)
-- --
-- | Genericity score = Gen(i)- Spec(i)
specificityGenericity :: Acc (Matrix Double) -> Acc (Vector Double) specificityGenericity :: Acc (Matrix Double) -> Acc (Vector Double)
specificityGenericity mat = zipWith (-) (pV mat) (pH mat) specificityGenericity mat = zipWith (-) (pV mat) (pH mat)
-- TODO find a better term -- | Gen(i) : 1/(N-1)*Sum(j!=i, P(i|j)) : Genericity of i
pV :: Acc (Matrix Double) -> Acc (Vector Double) pV :: Acc (Matrix Double) -> Acc (Vector Double)
pV mat = map (\x -> (x-1)/(cardN-1)) $ sum $ p_ij mat pV mat = map (\x -> (x-1)/(cardN-1)) $ sum $ p_ij mat
-- TODO find a better term -- | Spec(i) : 1/(N-1)*Sum(j!=i, P(j|i)) : Specificity of j
pH :: Acc (Matrix Double) -> Acc (Vector Double) pH :: Acc (Matrix Double) -> Acc (Vector Double)
pH mat = map (\x -> (x-1)/(cardN-1)) $ sum $ p_ji mat pH mat = map (\x -> (x-1)/(cardN-1)) $ sum $ p_ji mat
...@@ -211,25 +205,24 @@ incExcSpeGen m = (run' inclusionExclusion m, run' specificityGenericity m) ...@@ -211,25 +205,24 @@ incExcSpeGen m = (run' inclusionExclusion m, run' specificityGenericity m)
cardN = constant (P.fromIntegral (dim m) :: Double) cardN = constant (P.fromIntegral (dim m) :: Double)
-- | P(i|j) = Nij /N(jj) Probability to get i given j
---- | P(i|j) = N(ij) / N(jj)
p_ij :: (Elt e, P.Fractional (Exp e)) => Acc (SymetricMatrix e) -> Acc (Matrix e) p_ij :: (Elt e, P.Fractional (Exp e)) => Acc (SymetricMatrix e) -> Acc (Matrix e)
p_ij m = zipWith (/) m (n_jj m) p_ij m = zipWith (/) m (n_jj m)
where where
n_jj :: Elt e => Acc (SymetricMatrix e) -> Acc (Matrix e) n_jj :: Elt e => Acc (SymetricMatrix e) -> Acc (Matrix e)
n_jj m = backpermute (shape m) n_jj m = backpermute (shape m)
(lift1 ( \(Z :. (i :: Exp Int) :. (j:: Exp Int)) (lift1 ( \(Z :. (_ :: Exp Int) :. (j:: Exp Int))
-> (Z :. j :. j) -> (Z :. j :. j)
) )
) m ) m
-- | P(j|i) = N(ij) / N(ii) -- | P(j|i) = Nij /N(ii) Probability to get i given j
-- to test -- to test
p_ji :: (Elt e, P.Fractional (Exp e)) => Acc (Array DIM2 e) -> Acc (Array DIM2 e) p_ji :: (Elt e, P.Fractional (Exp e)) => Acc (Array DIM2 e) -> Acc (Array DIM2 e)
p_ji = transpose . p_ij p_ji = transpose . p_ij
-- | step to ckeck the result
-- | Step to ckeck the result in visual/qualitative tests
incExcSpeGen_proba :: Matrix Int -> Matrix Double incExcSpeGen_proba :: Matrix Int -> Matrix Double
incExcSpeGen_proba m = run' pro m incExcSpeGen_proba m = run' pro m
where where
...@@ -237,7 +230,6 @@ incExcSpeGen_proba m = run' pro m ...@@ -237,7 +230,6 @@ incExcSpeGen_proba m = run' pro m
pro mat = p_ji mat pro mat = p_ji mat
{- {-
-- | Hypothesis to test maybe later (or not) -- | Hypothesis to test maybe later (or not)
-- TODO ask accelerate for instances to ease such writtings: -- TODO ask accelerate for instances to ease such writtings:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment