Commit be6b4469 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[SCORE] spegen quality tested, need to add test in comments.

parent c50be323
......@@ -31,6 +31,7 @@ import Gargantext.Viz.Graph.Index (score, createIndices, toIndex, fromIndex, coo
import Gargantext.Viz.Graph.Distances.Matrice (conditional', conditional)
import Gargantext.Viz.Graph.Index (Index)
import Gargantext.Text.Metrics.Count (cooc, removeApax)
import Gargantext.Text.Metrics (incExcSpeGen)
import Gargantext.Text.Terms (TermType(Multi, Mono), extractTerms)
import Gargantext.Text.Context (splitBy, SplitContext(Sentences))
......@@ -43,8 +44,8 @@ import Data.Graph.Clustering.Louvain.CplusPlus (cLouvain)
-- . fromIndex fi $ filterMat $ cooc2mat ti m
import Data.Array.Accelerate (Matrix)
filterMat :: Matrix Int -> [(Index, Index)]
filterMat m = S.toList $ S.take n $ S.fromList $ (L.take nIe incExc') <> (L.take nSg speGen')
where
......@@ -57,15 +58,15 @@ filterMat m = S.toList $ S.take n $ S.fromList $ (L.take nIe incExc') <> (L.take
pipeline path = do
-- Text <- IO Text <- FilePath
text <- readFile path
let contexts = splitBy (Sentences 3) text
let contexts = splitBy (Sentences 5) text
myterms <- extractTerms Multi FR contexts
-- TODO filter (\t -> not . elem t stopList) myterms
-- TODO groupBy (Stem | GroupList)
let myCooc = removeApax $ cooc myterms
let (ti, fi) = createIndices myCooc
pure ti
--let (ti, fi) = createIndices myCooc
pure $ incExcSpeGen myCooc
-- Cooc -> Matrix
-- -- filter by spec/gen (dynmaic programming)
......
......@@ -13,57 +13,56 @@ Mainly reexport functions in @Data.Text.Metrics@
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
module Gargantext.Text.Metrics where
module Gargantext.Text.Metrics
where
import Data.Text (Text, pack)
import qualified Data.Text as T
import Data.List (concat)
import Data.Map (Map)
import qualified Data.List as L
import qualified Data.Map as M
import qualified Data.Text as T
import Data.Tuple.Extra (both)
--import GHC.Real (Ratio)
--import qualified Data.Text.Metrics as DTM
import Data.Array.Accelerate (toList)
import Gargantext.Prelude
import Gargantext.Text.Metrics.Count (occurrences, cooc)
import Gargantext.Text.Terms (TermType(Multi), terms)
import Gargantext.Text.Terms (TermType(MonoMulti), terms)
import Gargantext.Core (Lang(EN))
import Gargantext.Core.Types (Terms(..))
import Gargantext.Text.Context (splitBy, SplitContext(Sentences))
import Gargantext.Viz.Graph.Distances.Matrice
import Gargantext.Viz.Graph.Index
--noApax :: Ord a => Map a Occ -> Map a Occ
--noApax m = M.filter (>1) m
metrics_text :: Text
metrics_text = T.intercalate " " ["A table is an object."
,"A glas is an object too."
,"Using a glas to dring is a function."
,"Using a spoon to eat is a function."
,"The spoon is an object to eat."
]
metrics_text = T.intercalate " " metrics_sentences
metrics_sentences' :: [Text]
metrics_sentences' = splitBy (Sentences 0) metrics_text
-- | Sentences
metrics_sentences :: [Text]
metrics_sentences = ["A table is an object."
,"A glas is an object too."
,"The glas and the spoon are on the table."
,"The spoon is an object to eat."
,"The spoon is on the table and the plate and the glas."]
metrics_sentences = [ "There is a table with a glass of wine and a spoon."
, "I can see the glass on the table."
, "There was a spoon on that table."
, "The glass just fall from the table, pouring wine elsewhere."
, "I wish the glass did not contain wine."
]
metrics_sentences_Test = metrics_sentences == metrics_sentences'
-- | Terms reordered to visually check occurrences
metrics_terms :: [[Text]]
metrics_terms = undefined
metrics_terms' :: IO [[Terms]]
metrics_terms' = mapM (terms Multi EN) $ splitBy (Sentences 0) metrics_text
--metrics_terms_Test = metrics_terms == ((map _terms_label) <$> metrics_terms')
metrics_terms :: IO [[Terms]]
metrics_terms = mapM (terms MonoMulti EN) $ splitBy (Sentences 0) metrics_text
-- | Occurrences
{-
......@@ -72,7 +71,7 @@ fromList [ (fromList ["table"] ,fromList [(["table"] , 3 )])]
, (fromList ["glas"] ,fromList [(["glas"] , 2 )])
, (fromList ["spoon"] ,fromList [(["spoon"] , 2 )])
-}
metrics_occ = occurrences <$> concat <$> (mapM (terms Multi EN) $ splitBy (Sentences 0) metrics_text)
metrics_occ = occurrences <$> L.concat <$> metrics_terms
{-
-- fromList [((["glas"],["object"]),6)
......@@ -80,12 +79,24 @@ metrics_occ = occurrences <$> concat <$> (mapM (terms Multi EN) $ splitBy (Sente
,((["glas"],["table"]),6),((["object"],["spoon"]),6),((["object"],["table"]),9),((["spoon"],["table"]),6)]
-}
metrics_cooc = cooc <$> (mapM (terms Multi EN) $ splitBy (Sentences 0) metrics_text)
metrics_cooc' = (mapM (terms Multi EN) $ splitBy (Sentences 0) "The table object. The table object.")
metrics_cooc = cooc <$> metrics_terms
metrics_cooc_mat = do
m <- metrics_cooc
let (ti,_) = createIndices m
let mat = cooc2mat ti m
pure ( ti
, mat
, incExcSpeGen_proba mat
, incExcSpeGen' mat
)
metrics_incExcSpeGen = incExcSpeGen <$> metrics_cooc
incExcSpeGen :: Ord t => Map (t,t) Int -> ([(t,Double)],[(t,Double)])
incExcSpeGen m = both (\x -> L.reverse $ L.sortOn snd $ zip (map snd $ M.toList fi) (toList x) )
(incExcSpeGen' $ cooc2mat ti m )
where
(ti,fi) = createIndices m
......@@ -108,7 +108,7 @@ coocOn f as = foldl' (\a b -> DMS.unionWith (+) a b) empty $ map (coocOn' f) as
xs = [ ((x, y), 1)
| x <- ts'
, y <- ts'
, x < y
-- , x /= y
]
......
......@@ -28,9 +28,9 @@ group :: [TokenTag] -> [TokenTag]
group [] = []
group ntags = group2 NP NP
$ group2 NP VB
$ group2 NP IN
-- $ group2 NP IN
$ group2 IN DT
$ group2 VB NP
-- $ group2 VB NP
$ group2 JJ NP
$ group2 JJ JJ
$ group2 JJ CC
......
......@@ -81,8 +81,13 @@ proba :: Rank -> Acc (Matrix Double) -> Acc (Matrix Double)
proba r mat = zipWith (/) mat (mkSum r mat)
mkSum :: Rank -> Acc (Matrix Double) -> Acc (Matrix Double)
mkSum r mat = replicate (constant (Z :. (r :: Int) :. All))
$ fold (+) 0 mat
mkSum r mat = replicate (constant (Z :. (r :: Int) :. All)) $ sum mat
divByDiag :: Rank -> Acc (Matrix Double) -> Acc (Matrix Double)
divByDiag r mat = zipWith (/) mat (replicate (constant (Z :. (r :: Int) :. All)) $ diag mat)
diag :: forall e. Elt e => Acc (Matrix e) -> Acc (Vector e)
diag m = backpermute (indexTail (shape m)) (lift1 (\(Z :. x) -> (Z :. x :. (x :: Exp Int)))) (m :: Acc (Array DIM2 e))
type Matrix' a = Acc (Matrix a)
......@@ -90,7 +95,7 @@ type InclusionExclusion = Double
type SpecificityGenericity = Double
miniMax :: Matrix' Double -> Matrix' Double
miniMax :: Acc (Matrix Double) -> Acc (Matrix Double)
miniMax m = map (\x -> ifThenElse (x > miniMax') x 0) m
where
miniMax' = (the $ minimum $ maximum m)
......@@ -152,7 +157,7 @@ int2double :: Matrix Int -> Matrix Double
int2double m = run (map fromIntegral $ use m)
{-
Metric Specificity and genericty: select terms
Metric Specificity and genericity: select terms
Compute genericity/specificity:
P(j|i) = N(ij) / N(ii)
P(i|j) = N(ij) / N(jj)
......@@ -160,26 +165,37 @@ Metric Specificity and genericty: select terms
Gen(i) = Mean{j} P(j_k|i)
Spec(i) = Mean{j} P(i|j_k)
Gen-clusion(i) = (Spec(i) + Gen(i)) / 2
Spec-clusion(i) = (Spec(i) - Gen(i)) / 2
Gen-clusion(i) = (Spec(i) + Gen(i)) / 2
-}
incExcSpeGen :: Matrix Int -> (Vector Double, Vector Double)
incExcSpeGen m = (run' ie m, run' sg m)
incExcSpeGen' :: Matrix Int -> (Vector Double, Vector Double)
incExcSpeGen' m = (run' ie m, run' sg m)
where
run' fun mat = run $ fun $ map fromIntegral $ use mat
pV :: Matrix' Double -> Acc (Vector Double)
pV mat = sum $ proba (rank' m) mat
pH :: Matrix' Double -> Acc (Vector Double)
pH mat = sum $ transpose $ proba (rank' m) mat
ie :: Matrix' Double -> Acc (Vector Double)
ie :: Acc (Matrix Double) -> Acc (Vector Double)
ie mat = zipWith (-) (pV mat) (pH mat)
sg :: Matrix' Double -> Acc (Vector Double)
--
sg :: Acc (Matrix Double) -> Acc (Vector Double)
sg mat = zipWith (+) (pV mat) (pH mat)
n :: Exp Double
n = constant (P.fromIntegral (rank' m - 1) :: Double)
pV :: Acc (Matrix Double) -> Acc (Vector Double)
pV mat = map (\x -> (x-1)/n) $ sum $ divByDiag (rank' m) mat
pH :: Acc (Matrix Double) -> Acc (Vector Double)
pH mat = map (\x -> (x-1)/n) $ sum $ transpose $ divByDiag (rank' m) mat
incExcSpeGen_proba :: Matrix Int -> Matrix Double
incExcSpeGen_proba m = run' pro m
where
run' fun mat = run $ fun $ map fromIntegral $ use mat
pro mat = divByDiag (rank' m) mat
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment