Commit d7f68b04 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FEAT] specificity / genericity and inclusion / exclusion metric.

parent 42ab55b9
......@@ -46,15 +46,10 @@ data Terms = Terms { _terms_label :: Label
instance Show Terms where
show (Terms l s) = show l
-- class Inclusion where include
--instance Eq Terms where
-- (==) (Terms _ s1) (Terms _ s2) = s1 `S.isSubsetOf` s2
-- || s2 `S.isSubsetOf` s1
instance Eq Terms where
(==) (Terms _ s1) (Terms _ s2) = s1 == s2
------------------------------------------------------------------------
data Tag = POS | NER
deriving (Show, Eq)
......
......@@ -28,7 +28,7 @@ import Gargantext.Core (Lang(FR))
import Gargantext.Prelude
import Gargantext.Viz.Graph.Index (score, createIndices, toIndex, fromIndex, cooc2mat, mat2map)
import Gargantext.Viz.Graph.Distances.Matrice (incExcSpeGen, conditional)
import Gargantext.Viz.Graph.Distances.Matrice (conditional', conditional)
import Gargantext.Viz.Graph.Index (Index)
import Gargantext.Text.Metrics.Count (cooc, removeApax)
import Gargantext.Text.Terms (TermType(Multi, Mono), extractTerms)
......@@ -48,7 +48,7 @@ import Data.Array.Accelerate (Matrix)
filterMat :: Matrix Int -> [(Index, Index)]
filterMat m = S.toList $ S.take n $ S.fromList $ (L.take nIe incExc') <> (L.take nSg speGen')
where
(incExc', speGen') = both ( map fst . L.sortOn snd . M.toList . mat2map) (incExcSpeGen m)
(incExc', speGen') = both ( map fst . L.sortOn snd . M.toList . mat2map) (conditional' m)
n = nIe + nSg
nIe = 30
nSg = 70
......
......@@ -34,7 +34,7 @@ import Gargantext.Text.Context (splitBy, SplitContext(Sentences))
metrics_text :: Text
metrics_text = T.concat ["A table is an object."
metrics_text = T.intercalate " " ["A table is an object."
,"A glas is an object too."
,"Using a glas to dring is a function."
,"Using a spoon to eat is a function."
......@@ -85,3 +85,6 @@ metrics_cooc' = (mapM (terms Multi EN) $ splitBy (Sentences 0) "The table object
......@@ -95,6 +95,7 @@ miniMax m = map (\x -> ifThenElse (x > miniMax') x 0) m
where
miniMax' = (the $ minimum $ maximum m)
-- | Conditional distance (basic version)
conditional :: Matrix Int -> Matrix Double
conditional m = run (miniMax $ proba r $ map fromIntegral $ use m)
where
......@@ -102,22 +103,9 @@ conditional m = run (miniMax $ proba r $ map fromIntegral $ use m)
r = rank' m
{-
Metric Specificity and genericty: select terms
Compute genericity/specificity:
P(j|i) = N(ij) / N(ii)
P(i|j) = N(ij) / N(jj)
Gen(i) = Mean{j} P(j_k|i)
Spec(i) = Mean{j} P(i|j_k)
Gen-clusion(i) = (Spec(i) + Gen(i)) / 2
Spec-clusion(i) = (Spec(i) - Gen(i)) / 2
-}
incExcSpeGen :: Matrix Int -> (Matrix InclusionExclusion, Matrix SpecificityGenericity)
incExcSpeGen m = (run $ ie $ map fromIntegral $ use m, run $ sg $ map fromIntegral $ use m)
-- | Conditional distance (advanced version)
conditional' :: Matrix Int -> (Matrix InclusionExclusion, Matrix SpecificityGenericity)
conditional' m = (run $ ie $ map fromIntegral $ use m, run $ sg $ map fromIntegral $ use m)
where
ie :: Matrix' Double -> Matrix' Double
......@@ -136,11 +124,9 @@ incExcSpeGen m = (run $ ie $ map fromIntegral $ use m, run $ sg $ map fromIntegr
ys :: Acc (Matrix Double) -> Acc (Matrix Double)
ys mat = zipWith (-) (proba r mat) (mkSum r $ transpose $ proba r mat)
-- filter with threshold
-----------------------------------------------------------------------
-- | Distributional Distance
distributional :: Matrix Int -> Matrix Double
distributional m = run $ miniMax $ ri (map fromIntegral $ use m)
where
......@@ -165,3 +151,35 @@ distributional m = run $ miniMax $ ri (map fromIntegral $ use m)
int2double :: Matrix Int -> Matrix Double
int2double m = run (map fromIntegral $ use m)
{-
Metric Specificity and genericty: select terms
Compute genericity/specificity:
P(j|i) = N(ij) / N(ii)
P(i|j) = N(ij) / N(jj)
Gen(i) = Mean{j} P(j_k|i)
Spec(i) = Mean{j} P(i|j_k)
Gen-clusion(i) = (Spec(i) + Gen(i)) / 2
Spec-clusion(i) = (Spec(i) - Gen(i)) / 2
-}
incExcSpeGen :: Matrix Int -> (Vector Double, Vector Double)
incExcSpeGen m = (run' ie m, run' sg m)
where
run' fun mat = run $ fun $ map fromIntegral $ use mat
pV :: Matrix' Double -> Acc (Vector Double)
pV mat = sum $ proba (rank' m) mat
pH :: Matrix' Double -> Acc (Vector Double)
pH mat = sum $ transpose $ proba (rank' m) mat
ie :: Matrix' Double -> Acc (Vector Double)
ie mat = zipWith (-) (pV mat) (pH mat)
sg :: Matrix' Double -> Acc (Vector Double)
sg mat = zipWith (+) (pV mat) (pH mat)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment