Commit be6b4469 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[SCORE] spegen quality tested, need to add test in comments.

parent c50be323
...@@ -31,6 +31,7 @@ import Gargantext.Viz.Graph.Index (score, createIndices, toIndex, fromIndex, coo ...@@ -31,6 +31,7 @@ import Gargantext.Viz.Graph.Index (score, createIndices, toIndex, fromIndex, coo
import Gargantext.Viz.Graph.Distances.Matrice (conditional', conditional) import Gargantext.Viz.Graph.Distances.Matrice (conditional', conditional)
import Gargantext.Viz.Graph.Index (Index) import Gargantext.Viz.Graph.Index (Index)
import Gargantext.Text.Metrics.Count (cooc, removeApax) import Gargantext.Text.Metrics.Count (cooc, removeApax)
import Gargantext.Text.Metrics (incExcSpeGen)
import Gargantext.Text.Terms (TermType(Multi, Mono), extractTerms) import Gargantext.Text.Terms (TermType(Multi, Mono), extractTerms)
import Gargantext.Text.Context (splitBy, SplitContext(Sentences)) import Gargantext.Text.Context (splitBy, SplitContext(Sentences))
...@@ -43,8 +44,8 @@ import Data.Graph.Clustering.Louvain.CplusPlus (cLouvain) ...@@ -43,8 +44,8 @@ import Data.Graph.Clustering.Louvain.CplusPlus (cLouvain)
-- . fromIndex fi $ filterMat $ cooc2mat ti m -- . fromIndex fi $ filterMat $ cooc2mat ti m
import Data.Array.Accelerate (Matrix) import Data.Array.Accelerate (Matrix)
filterMat :: Matrix Int -> [(Index, Index)] filterMat :: Matrix Int -> [(Index, Index)]
filterMat m = S.toList $ S.take n $ S.fromList $ (L.take nIe incExc') <> (L.take nSg speGen') filterMat m = S.toList $ S.take n $ S.fromList $ (L.take nIe incExc') <> (L.take nSg speGen')
where where
...@@ -57,15 +58,15 @@ filterMat m = S.toList $ S.take n $ S.fromList $ (L.take nIe incExc') <> (L.take ...@@ -57,15 +58,15 @@ filterMat m = S.toList $ S.take n $ S.fromList $ (L.take nIe incExc') <> (L.take
pipeline path = do pipeline path = do
-- Text <- IO Text <- FilePath -- Text <- IO Text <- FilePath
text <- readFile path text <- readFile path
let contexts = splitBy (Sentences 3) text let contexts = splitBy (Sentences 5) text
myterms <- extractTerms Multi FR contexts myterms <- extractTerms Multi FR contexts
-- TODO filter (\t -> not . elem t stopList) myterms -- TODO filter (\t -> not . elem t stopList) myterms
-- TODO groupBy (Stem | GroupList) -- TODO groupBy (Stem | GroupList)
let myCooc = removeApax $ cooc myterms let myCooc = removeApax $ cooc myterms
let (ti, fi) = createIndices myCooc --let (ti, fi) = createIndices myCooc
pure ti pure $ incExcSpeGen myCooc
-- Cooc -> Matrix -- Cooc -> Matrix
-- -- filter by spec/gen (dynmaic programming) -- -- filter by spec/gen (dynmaic programming)
......
...@@ -13,57 +13,56 @@ Mainly reexport functions in @Data.Text.Metrics@ ...@@ -13,57 +13,56 @@ Mainly reexport functions in @Data.Text.Metrics@
{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
module Gargantext.Text.Metrics where module Gargantext.Text.Metrics
where
import Data.Text (Text, pack) import Data.Text (Text, pack)
import qualified Data.Text as T import Data.Map (Map)
import Data.List (concat)
import qualified Data.List as L
import qualified Data.Map as M
import qualified Data.Text as T
import Data.Tuple.Extra (both)
--import GHC.Real (Ratio) --import GHC.Real (Ratio)
--import qualified Data.Text.Metrics as DTM --import qualified Data.Text.Metrics as DTM
import Data.Array.Accelerate (toList)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Text.Metrics.Count (occurrences, cooc) import Gargantext.Text.Metrics.Count (occurrences, cooc)
import Gargantext.Text.Terms (TermType(Multi), terms) import Gargantext.Text.Terms (TermType(MonoMulti), terms)
import Gargantext.Core (Lang(EN)) import Gargantext.Core (Lang(EN))
import Gargantext.Core.Types (Terms(..)) import Gargantext.Core.Types (Terms(..))
import Gargantext.Text.Context (splitBy, SplitContext(Sentences)) import Gargantext.Text.Context (splitBy, SplitContext(Sentences))
import Gargantext.Viz.Graph.Distances.Matrice
import Gargantext.Viz.Graph.Index
--noApax :: Ord a => Map a Occ -> Map a Occ --noApax :: Ord a => Map a Occ -> Map a Occ
--noApax m = M.filter (>1) m --noApax m = M.filter (>1) m
metrics_text :: Text metrics_text :: Text
metrics_text = T.intercalate " " ["A table is an object." metrics_text = T.intercalate " " metrics_sentences
,"A glas is an object too."
,"Using a glas to dring is a function."
,"Using a spoon to eat is a function."
,"The spoon is an object to eat."
]
metrics_sentences' :: [Text] metrics_sentences' :: [Text]
metrics_sentences' = splitBy (Sentences 0) metrics_text metrics_sentences' = splitBy (Sentences 0) metrics_text
-- | Sentences -- | Sentences
metrics_sentences :: [Text] metrics_sentences :: [Text]
metrics_sentences = ["A table is an object." metrics_sentences = [ "There is a table with a glass of wine and a spoon."
,"A glas is an object too." , "I can see the glass on the table."
,"The glas and the spoon are on the table." , "There was a spoon on that table."
,"The spoon is an object to eat." , "The glass just fall from the table, pouring wine elsewhere."
,"The spoon is on the table and the plate and the glas."] , "I wish the glass did not contain wine."
]
metrics_sentences_Test = metrics_sentences == metrics_sentences' metrics_sentences_Test = metrics_sentences == metrics_sentences'
-- | Terms reordered to visually check occurrences -- | Terms reordered to visually check occurrences
metrics_terms :: [[Text]] metrics_terms :: IO [[Terms]]
metrics_terms = undefined metrics_terms = mapM (terms MonoMulti EN) $ splitBy (Sentences 0) metrics_text
metrics_terms' :: IO [[Terms]]
metrics_terms' = mapM (terms Multi EN) $ splitBy (Sentences 0) metrics_text
--metrics_terms_Test = metrics_terms == ((map _terms_label) <$> metrics_terms')
-- | Occurrences -- | Occurrences
{- {-
...@@ -72,7 +71,7 @@ fromList [ (fromList ["table"] ,fromList [(["table"] , 3 )])] ...@@ -72,7 +71,7 @@ fromList [ (fromList ["table"] ,fromList [(["table"] , 3 )])]
, (fromList ["glas"] ,fromList [(["glas"] , 2 )]) , (fromList ["glas"] ,fromList [(["glas"] , 2 )])
, (fromList ["spoon"] ,fromList [(["spoon"] , 2 )]) , (fromList ["spoon"] ,fromList [(["spoon"] , 2 )])
-} -}
metrics_occ = occurrences <$> concat <$> (mapM (terms Multi EN) $ splitBy (Sentences 0) metrics_text) metrics_occ = occurrences <$> L.concat <$> metrics_terms
{- {-
-- fromList [((["glas"],["object"]),6) -- fromList [((["glas"],["object"]),6)
...@@ -80,12 +79,24 @@ metrics_occ = occurrences <$> concat <$> (mapM (terms Multi EN) $ splitBy (Sente ...@@ -80,12 +79,24 @@ metrics_occ = occurrences <$> concat <$> (mapM (terms Multi EN) $ splitBy (Sente
,((["glas"],["table"]),6),((["object"],["spoon"]),6),((["object"],["table"]),9),((["spoon"],["table"]),6)] ,((["glas"],["table"]),6),((["object"],["spoon"]),6),((["object"],["table"]),9),((["spoon"],["table"]),6)]
-} -}
metrics_cooc = cooc <$> (mapM (terms Multi EN) $ splitBy (Sentences 0) metrics_text) metrics_cooc = cooc <$> metrics_terms
metrics_cooc_mat = do
metrics_cooc' = (mapM (terms Multi EN) $ splitBy (Sentences 0) "The table object. The table object.") m <- metrics_cooc
let (ti,_) = createIndices m
let mat = cooc2mat ti m
pure ( ti
, mat
, incExcSpeGen_proba mat
, incExcSpeGen' mat
)
metrics_incExcSpeGen = incExcSpeGen <$> metrics_cooc
incExcSpeGen :: Ord t => Map (t,t) Int -> ([(t,Double)],[(t,Double)])
incExcSpeGen m = both (\x -> L.reverse $ L.sortOn snd $ zip (map snd $ M.toList fi) (toList x) )
(incExcSpeGen' $ cooc2mat ti m )
where
(ti,fi) = createIndices m
...@@ -108,7 +108,7 @@ coocOn f as = foldl' (\a b -> DMS.unionWith (+) a b) empty $ map (coocOn' f) as ...@@ -108,7 +108,7 @@ coocOn f as = foldl' (\a b -> DMS.unionWith (+) a b) empty $ map (coocOn' f) as
xs = [ ((x, y), 1) xs = [ ((x, y), 1)
| x <- ts' | x <- ts'
, y <- ts' , y <- ts'
, x < y -- , x /= y
] ]
......
...@@ -28,9 +28,9 @@ group :: [TokenTag] -> [TokenTag] ...@@ -28,9 +28,9 @@ group :: [TokenTag] -> [TokenTag]
group [] = [] group [] = []
group ntags = group2 NP NP group ntags = group2 NP NP
$ group2 NP VB $ group2 NP VB
$ group2 NP IN -- $ group2 NP IN
$ group2 IN DT $ group2 IN DT
$ group2 VB NP -- $ group2 VB NP
$ group2 JJ NP $ group2 JJ NP
$ group2 JJ JJ $ group2 JJ JJ
$ group2 JJ CC $ group2 JJ CC
......
...@@ -81,8 +81,13 @@ proba :: Rank -> Acc (Matrix Double) -> Acc (Matrix Double) ...@@ -81,8 +81,13 @@ proba :: Rank -> Acc (Matrix Double) -> Acc (Matrix Double)
proba r mat = zipWith (/) mat (mkSum r mat) proba r mat = zipWith (/) mat (mkSum r mat)
mkSum :: Rank -> Acc (Matrix Double) -> Acc (Matrix Double) mkSum :: Rank -> Acc (Matrix Double) -> Acc (Matrix Double)
mkSum r mat = replicate (constant (Z :. (r :: Int) :. All)) mkSum r mat = replicate (constant (Z :. (r :: Int) :. All)) $ sum mat
$ fold (+) 0 mat
divByDiag :: Rank -> Acc (Matrix Double) -> Acc (Matrix Double)
divByDiag r mat = zipWith (/) mat (replicate (constant (Z :. (r :: Int) :. All)) $ diag mat)
diag :: forall e. Elt e => Acc (Matrix e) -> Acc (Vector e)
diag m = backpermute (indexTail (shape m)) (lift1 (\(Z :. x) -> (Z :. x :. (x :: Exp Int)))) (m :: Acc (Array DIM2 e))
type Matrix' a = Acc (Matrix a) type Matrix' a = Acc (Matrix a)
...@@ -90,7 +95,7 @@ type InclusionExclusion = Double ...@@ -90,7 +95,7 @@ type InclusionExclusion = Double
type SpecificityGenericity = Double type SpecificityGenericity = Double
miniMax :: Matrix' Double -> Matrix' Double miniMax :: Acc (Matrix Double) -> Acc (Matrix Double)
miniMax m = map (\x -> ifThenElse (x > miniMax') x 0) m miniMax m = map (\x -> ifThenElse (x > miniMax') x 0) m
where where
miniMax' = (the $ minimum $ maximum m) miniMax' = (the $ minimum $ maximum m)
...@@ -152,7 +157,7 @@ int2double :: Matrix Int -> Matrix Double ...@@ -152,7 +157,7 @@ int2double :: Matrix Int -> Matrix Double
int2double m = run (map fromIntegral $ use m) int2double m = run (map fromIntegral $ use m)
{- {-
Metric Specificity and genericty: select terms Metric Specificity and genericity: select terms
Compute genericity/specificity: Compute genericity/specificity:
P(j|i) = N(ij) / N(ii) P(j|i) = N(ij) / N(ii)
P(i|j) = N(ij) / N(jj) P(i|j) = N(ij) / N(jj)
...@@ -160,26 +165,37 @@ Metric Specificity and genericty: select terms ...@@ -160,26 +165,37 @@ Metric Specificity and genericty: select terms
Gen(i) = Mean{j} P(j_k|i) Gen(i) = Mean{j} P(j_k|i)
Spec(i) = Mean{j} P(i|j_k) Spec(i) = Mean{j} P(i|j_k)
Gen-clusion(i) = (Spec(i) + Gen(i)) / 2
Spec-clusion(i) = (Spec(i) - Gen(i)) / 2 Spec-clusion(i) = (Spec(i) - Gen(i)) / 2
Gen-clusion(i) = (Spec(i) + Gen(i)) / 2
-} -}
incExcSpeGen :: Matrix Int -> (Vector Double, Vector Double)
incExcSpeGen m = (run' ie m, run' sg m) incExcSpeGen' :: Matrix Int -> (Vector Double, Vector Double)
incExcSpeGen' m = (run' ie m, run' sg m)
where where
run' fun mat = run $ fun $ map fromIntegral $ use mat run' fun mat = run $ fun $ map fromIntegral $ use mat
pV :: Matrix' Double -> Acc (Vector Double) ie :: Acc (Matrix Double) -> Acc (Vector Double)
pV mat = sum $ proba (rank' m) mat
pH :: Matrix' Double -> Acc (Vector Double)
pH mat = sum $ transpose $ proba (rank' m) mat
ie :: Matrix' Double -> Acc (Vector Double)
ie mat = zipWith (-) (pV mat) (pH mat) ie mat = zipWith (-) (pV mat) (pH mat)
--
sg :: Matrix' Double -> Acc (Vector Double) sg :: Acc (Matrix Double) -> Acc (Vector Double)
sg mat = zipWith (+) (pV mat) (pH mat) sg mat = zipWith (+) (pV mat) (pH mat)
n :: Exp Double
n = constant (P.fromIntegral (rank' m - 1) :: Double)
pV :: Acc (Matrix Double) -> Acc (Vector Double)
pV mat = map (\x -> (x-1)/n) $ sum $ divByDiag (rank' m) mat
pH :: Acc (Matrix Double) -> Acc (Vector Double)
pH mat = map (\x -> (x-1)/n) $ sum $ transpose $ divByDiag (rank' m) mat
incExcSpeGen_proba :: Matrix Int -> Matrix Double
incExcSpeGen_proba m = run' pro m
where
run' fun mat = run $ fun $ map fromIntegral $ use mat
pro mat = divByDiag (rank' m) mat
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment