Commit bf03165a authored by Alexandre Delanoë's avatar Alexandre Delanoë

[Scores] Documentation.

parent 457bf1f2
......@@ -6,7 +6,6 @@ License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# OPTIONS_GHC -fno-warn-name-shadowing #-}
......@@ -20,7 +19,6 @@ import Data.Text.IO (readFile)
import Control.Arrow ((***))
import Data.Map.Strict (Map)
import qualified Data.Map.Strict as M
import qualified Data.Set as S
import qualified Data.List as L
import Data.Tuple.Extra (both)
----------------------------------------------
......@@ -31,32 +29,22 @@ import Gargantext.Viz.Graph.Index (score, createIndices, toIndex, fromIndex, coo
import Gargantext.Viz.Graph.Distances.Matrice (conditional', conditional)
import Gargantext.Viz.Graph.Index (Index)
import Gargantext.Text.Metrics.Count (cooc, removeApax)
import Gargantext.Text.Metrics (incExcSpeGen)
import Gargantext.Text.Metrics
import Gargantext.Text.Terms (TermType(Multi, Mono), extractTerms)
import Gargantext.Text.Context (splitBy, SplitContext(Sentences))
import Data.Graph.Clustering.Louvain.CplusPlus (cLouvain)
-- ord relevance: top n plus inclus
-- échantillonnage de généricity
--
--filterCooc :: Ord t => Map (t, t) Int -> Map (t, t) Int
--filterCooc m =
---- filterCooc m = foldl (\k -> maybe (panic "no key") identity $ M.lookup k m) M.empty selection
----(ti, fi) = createIndices m
-- . fromIndex fi $ filterMat $ cooc2mat ti m
import Data.Array.Accelerate (Matrix)
filterMat :: Matrix Int -> [(Index, Index)]
filterMat m = S.toList $ S.take n $ S.fromList $ (L.take nIe incExc') <> (L.take nSg speGen')
where
(incExc', speGen') = both ( map fst . L.sortOn snd . M.toList . mat2map) (conditional' m)
n = nIe + nSg
nIe = 30
nSg = 70
{-
____ _ _
/ ___| __ _ _ __ __ _ __ _ _ __ | |_ _____ _| |_
| | _ / _` | '__/ _` |/ _` | '_ \| __/ _ \ \/ / __|
| |_| | (_| | | | (_| | (_| | | | | || __/> <| |_
\____|\__,_|_| \__, |\__,_|_| |_|\__\___/_/\_\\__|
|___/
-}
pipeline path = do
-- Text <- IO Text <- FilePath
......@@ -69,7 +57,8 @@ pipeline path = do
let myCooc = removeApax $ cooc myterms
--let (ti, fi) = createIndices myCooc
pure $ incExcSpeGen myCooc
pure True
--pure $ incExcSpeGen myCooc
-- Cooc -> Matrix
-- -- filter by spec/gen (dynmaic programming)
......@@ -81,4 +70,3 @@ pipeline path = do
-- pure partitions
---- | Building : -> Graph -> JSON
......@@ -8,6 +8,12 @@ Stability : experimental
Portability : POSIX
Mainly reexport functions in @Data.Text.Metrics@
TODO
noApax :: Ord a => Map a Occ -> Map a Occ
noApax m = M.filter (>1) m
-}
{-# LANGUAGE NoImplicitPrelude #-}
......@@ -21,6 +27,7 @@ import Data.Map (Map)
import qualified Data.List as L
import qualified Data.Map as M
import qualified Data.Set as S
import qualified Data.Text as T
import Data.Tuple.Extra (both)
--import GHC.Real (Ratio)
......@@ -39,8 +46,36 @@ import Gargantext.Text.Context (splitBy, SplitContext(Sentences))
import Gargantext.Viz.Graph.Distances.Matrice
import Gargantext.Viz.Graph.Index
--noApax :: Ord a => Map a Occ -> Map a Occ
--noApax m = M.filter (>1) m
-- ord relevance: top n plus inclus
-- échantillonnage de généricity
--
--filterCooc :: Ord t => Map (t, t) Int -> Map (t, t) Int
--filterCooc m =
---- filterCooc m = foldl (\k -> maybe (panic "no key") identity $ M.lookup k m) M.empty selection
----(ti, fi) = createIndices m
-- . fromIndex fi $ filterMat $ cooc2mat ti m
import Data.Array.Accelerate (Matrix)
filterMat :: Matrix Int -> [(Index, Index)]
filterMat m = S.toList $ S.take n $ S.fromList $ (L.take nIe incExc') <> (L.take nSg speGen')
where
(incExc', speGen') = both ( map fst . L.sortOn snd . M.toList . mat2map) (conditional' m)
n = nIe + nSg
nIe = 30
nSg = 70
incExcSpeGen_sorted :: Ord t => Map (t,t) Int -> ([(t,Double)],[(t,Double)])
incExcSpeGen_sorted m = both ordonne (incExcSpeGen $ cooc2mat ti m)
where
(ti,fi) = createIndices m
ordonne x = L.reverse $ L.sortOn snd $ zip (map snd $ M.toList fi) (toList x)
metrics_text :: Text
......@@ -54,7 +89,7 @@ metrics_sentences :: [Text]
metrics_sentences = [ "There is a table with a glass of wine and a spoon."
, "I can see the glass on the table."
, "There was only a spoon on that table."
, "The glass just fall from the table, pouring wine elsewhere."
, "The glass just fall from the table, pouring wine everywhere."
, "I wish the glass did not contain wine."
]
......@@ -89,23 +124,16 @@ metrics_occ = occurrences <$> L.concat <$> metrics_terms
-}
metrics_cooc = cooc <$> metrics_terms
metrics_cooc_mat = do
m <- metrics_cooc
let (ti,_) = createIndices m
let mat_cooc = cooc2mat ti m
pure ( ti
, mat_cooc
, incExcSpeGen_proba mat_cooc
, incExcSpeGen' mat_cooc
, incExcSpeGen_proba mat_cooc
, incExcSpeGen mat_cooc
)
metrics_incExcSpeGen = incExcSpeGen <$> metrics_cooc
incExcSpeGen :: Ord t => Map (t,t) Int -> ([(t,Double)],[(t,Double)])
incExcSpeGen m = both (\x -> L.reverse $ L.sortOn snd $ zip (map snd $ M.toList fi) (toList x) )
(incExcSpeGen' $ cooc2mat ti m )
where
(ti,fi) = createIndices m
metrics_incExcSpeGen = incExcSpeGen_sorted <$> metrics_cooc
......@@ -153,25 +153,17 @@ distributional m = run $ miniMax $ ri (map fromIntegral $ use m)
-----------------------------------------------------------------------
-----------------------------------------------------------------------
-- | Conditional Distance
{-
Metric Specificity and genericity: select terms
N termes
Ni : occ de i
Nij : cooc i et j
P(i|j)=Nij/Nj Probability to get i given j
Gen(i) : 1/(N-1)*Sum(j!=i, P(i|j)) : Genericity of i
Spec(i) : 1/(N-1)*Sum( j!=i, P(j|i)) : Specificity of j
let N termes
Ni : occ de i
Nij : cooc i et j
Probability to get i given j : P(i|j)=Nij/Nj
Gen(i) : 1/(N-1)*Sum(j!=i, P(i|j)) : Genericity of i
Spec(i) : 1/(N-1)*Sum(j!=i, P(j|i)) : Specificity of j
Inclusion (i) = Gen(i)+Spec(i)
Genericity score = Gen(i)- Spec(i)
......@@ -193,17 +185,19 @@ incExcSpeGen m = (run' inclusionExclusion m, run' specificityGenericity m)
where
run' fun mat = run $ fun $ map fromIntegral $ use mat
-- | Inclusion (i) = Gen(i)+Spec(i)
inclusionExclusion :: Acc (Matrix Double) -> Acc (Vector Double)
inclusionExclusion mat = zipWith (+) (pV mat) (pH mat)
--
-- | Genericity score = Gen(i)- Spec(i)
specificityGenericity :: Acc (Matrix Double) -> Acc (Vector Double)
specificityGenericity mat = zipWith (-) (pV mat) (pH mat)
-- TODO find a better term
-- | Gen(i) : 1/(N-1)*Sum(j!=i, P(i|j)) : Genericity of i
pV :: Acc (Matrix Double) -> Acc (Vector Double)
pV mat = map (\x -> (x-1)/(cardN-1)) $ sum $ p_ij mat
-- TODO find a better term
-- | Spec(i) : 1/(N-1)*Sum(j!=i, P(j|i)) : Specificity of j
pH :: Acc (Matrix Double) -> Acc (Vector Double)
pH mat = map (\x -> (x-1)/(cardN-1)) $ sum $ p_ji mat
......@@ -211,25 +205,24 @@ incExcSpeGen m = (run' inclusionExclusion m, run' specificityGenericity m)
cardN = constant (P.fromIntegral (dim m) :: Double)
---- | P(i|j) = N(ij) / N(jj)
-- | P(i|j) = Nij /N(jj) Probability to get i given j
p_ij :: (Elt e, P.Fractional (Exp e)) => Acc (SymetricMatrix e) -> Acc (Matrix e)
p_ij m = zipWith (/) m (n_jj m)
where
n_jj :: Elt e => Acc (SymetricMatrix e) -> Acc (Matrix e)
n_jj m = backpermute (shape m)
(lift1 ( \(Z :. (i :: Exp Int) :. (j:: Exp Int))
(lift1 ( \(Z :. (_ :: Exp Int) :. (j:: Exp Int))
-> (Z :. j :. j)
)
) m
-- | P(j|i) = N(ij) / N(ii)
-- | P(j|i) = Nij /N(ii) Probability to get i given j
-- to test
p_ji :: (Elt e, P.Fractional (Exp e)) => Acc (Array DIM2 e) -> Acc (Array DIM2 e)
p_ji = transpose . p_ij
-- | step to ckeck the result
-- | Step to ckeck the result in visual/qualitative tests
incExcSpeGen_proba :: Matrix Int -> Matrix Double
incExcSpeGen_proba m = run' pro m
where
......@@ -237,7 +230,6 @@ incExcSpeGen_proba m = run' pro m
pro mat = p_ji mat
{-
-- | Hypothesis to test maybe later (or not)
-- TODO ask accelerate for instances to ease such writtings:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment