Commit d710b723 authored by Alexandre Delanoë's avatar Alexandre Delanoë

Merge branch 'pipeline'

parents 7e930b57 b5124f1e
......@@ -2,3 +2,8 @@
*.swp
*.cabal
*purescript-gargantext
doc
bin
clustering-louvain
profiling
servant-job
......@@ -35,7 +35,6 @@ library:
dependencies:
- QuickCheck
- accelerate
- accelerate-io
- aeson
- aeson-lens
- aeson-pretty
......@@ -46,6 +45,7 @@ library:
- bytestring
- case-insensitive
- cassava
- clustering-louvain
- conduit
- conduit-extra
- containers
......
......@@ -46,15 +46,10 @@ data Terms = Terms { _terms_label :: Label
instance Show Terms where
show (Terms l s) = show l
-- class Inclusion where include
--instance Eq Terms where
-- (==) (Terms _ s1) (Terms _ s2) = s1 `S.isSubsetOf` s2
-- || s2 `S.isSubsetOf` s1
instance Eq Terms where
(==) (Terms _ s1) (Terms _ s2) = s1 == s2
------------------------------------------------------------------------
data Tag = POS | NER
deriving (Show, Eq)
......
......@@ -6,7 +6,6 @@ License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# OPTIONS_GHC -fno-warn-name-shadowing #-}
......@@ -17,27 +16,57 @@ module Gargantext.Pipeline
import Data.Text.IO (readFile)
import Control.Arrow ((***))
import Data.Map.Strict (Map)
import qualified Data.Map.Strict as M
import qualified Data.List as L
import Data.Tuple.Extra (both)
----------------------------------------------
----------------------------------------------
import Gargantext.Core
import Gargantext.Core (Lang(FR))
import Gargantext.Prelude
import Gargantext.Viz.Graph.Index (score)
import Gargantext.Viz.Graph.Distances.Matrice (distributional)
import Gargantext.Text.Metrics.Occurrences
import Gargantext.Text.Terms
import Gargantext.Text.Context
import Gargantext.Viz.Graph.Index (score, createIndices, toIndex, fromIndex, cooc2mat, mat2map)
import Gargantext.Viz.Graph.Distances.Matrice (conditional', conditional)
import Gargantext.Viz.Graph.Index (Index)
import Gargantext.Text.Metrics.Count (cooc, removeApax)
import Gargantext.Text.Metrics
import Gargantext.Text.Terms (TermType(Multi, Mono), extractTerms)
import Gargantext.Text.Context (splitBy, SplitContext(Sentences))
import Data.Graph.Clustering.Louvain.CplusPlus (cLouvain)
{-
____ _ _
/ ___| __ _ _ __ __ _ __ _ _ __ | |_ _____ _| |_
| | _ / _` | '__/ _` |/ _` | '_ \| __/ _ \ \/ / __|
| |_| | (_| | | | (_| | (_| | | | | || __/> <| |_
\____|\__,_|_| \__, |\__,_|_| |_|\__\___/_/\_\\__|
|___/
-}
pipeline path = do
-- Text <- IO Text <- FilePath
text <- readFile path
let contexts = splitBy (Sentences 3) text
text <- readFile path
let contexts = splitBy (Sentences 5) text
myterms <- extractTerms Multi FR contexts
-- TODO filter (\t -> not . elem t stopList) myterms
-- TODO groupBy (Stem | GroupList)
let myCooc = removeApax $ cooc myterms
--let (ti, fi) = createIndices myCooc
pure True
--pure $ incExcSpeGen myCooc
-- Cooc -> Matrix
pure $ score distributional myCooc
-- Matrix -> Clustering -> Graph -> JSON
-- -- filter by spec/gen (dynmaic programming)
-- let theScores = M.filter (>0) $ score conditional myCoocFiltered
----
------ -- Matrix -> Clustering
------ pure $ bestpartition False $ map2graph $ toIndex ti theScores
-- partitions <- cLouvain theScores
-- pure partitions
---- | Building : -> Graph -> JSON
......@@ -237,5 +237,3 @@ unMaybe = map fromJust . L.filter isJust
-- maximumWith
maximumWith f = L.maximumBy (\x y -> compare (f x) (f y))
......@@ -27,7 +27,7 @@ import NLP.FullStop (segment)
-----------------------------------------------------------------
import Gargantext.Core
import Gargantext.Core.Types
import Gargantext.Text.Metrics.Occurrences (Occ, occurrences, cooc)
import Gargantext.Text.Metrics.Count (Occ, occurrences, cooc)
import Gargantext.Prelude hiding (filter)
-----------------------------------------------------------------
......
......@@ -8,18 +8,132 @@ Stability : experimental
Portability : POSIX
Mainly reexport functions in @Data.Text.Metrics@
TODO
noApax :: Ord a => Map a Occ -> Map a Occ
noApax m = M.filter (>1) m
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
module Gargantext.Text.Metrics where
module Gargantext.Text.Metrics
where
--import Data.Text (Text)
import Data.Text (Text, pack)
import Data.Map (Map)
import qualified Data.List as L
import qualified Data.Map as M
import qualified Data.Set as S
import qualified Data.Text as T
import Data.Tuple.Extra (both)
--import GHC.Real (Ratio)
--import qualified Data.Text.Metrics as DTM
--
--import Gargantext.Prelude
--
--noApax :: Ord a => Map a Occ -> Map a Occ
--noApax m = M.filter (>1) m
import Data.Array.Accelerate (toList)
import Gargantext.Prelude
import Gargantext.Text.Metrics.Count (occurrences, cooc)
import Gargantext.Text.Terms (TermType(MonoMulti), terms)
import Gargantext.Core (Lang(EN))
import Gargantext.Core.Types (Terms(..))
import Gargantext.Text.Context (splitBy, SplitContext(Sentences))
import Gargantext.Viz.Graph.Distances.Matrice
import Gargantext.Viz.Graph.Index
-- ord relevance: top n plus inclus
-- échantillonnage de généricity
--
--filterCooc :: Ord t => Map (t, t) Int -> Map (t, t) Int
--filterCooc m =
---- filterCooc m = foldl (\k -> maybe (panic "no key") identity $ M.lookup k m) M.empty selection
----(ti, fi) = createIndices m
-- . fromIndex fi $ filterMat $ cooc2mat ti m
import Data.Array.Accelerate (Matrix)
filterMat :: Matrix Int -> [(Index, Index)]
filterMat m = S.toList $ S.take n $ S.fromList $ (L.take nIe incExc') <> (L.take nSg speGen')
where
(incExc', speGen') = both ( map fst . L.sortOn snd . M.toList . mat2map) (conditional' m)
n = nIe + nSg
nIe = 30
nSg = 70
incExcSpeGen_sorted :: Ord t => Map (t,t) Int -> ([(t,Double)],[(t,Double)])
incExcSpeGen_sorted m = both ordonne (incExcSpeGen $ cooc2mat ti m)
where
(ti,fi) = createIndices m
ordonne x = L.reverse $ L.sortOn snd $ zip (map snd $ M.toList fi) (toList x)
metrics_text :: Text
metrics_text = T.intercalate " " metrics_sentences
metrics_sentences' :: [Text]
metrics_sentences' = splitBy (Sentences 0) metrics_text
-- | Sentences
metrics_sentences :: [Text]
metrics_sentences = [ "There is a table with a glass of wine and a spoon."
, "I can see the glass on the table."
, "There was only a spoon on that table."
, "The glass just fall from the table, pouring wine everywhere."
, "I wish the glass did not contain wine."
]
metrics_sentences_Test = metrics_sentences == metrics_sentences'
-- | Terms reordered to visually check occurrences
-- >>>
{- [ [["table"],["glass"],["wine"],["spoon"]]
, [["glass"],["table"]]
, [["spoon"],["table"]]
, [["glass"],["table"],["wine"]]
, [["glass"],["wine"]]
]
-}
metrics_terms :: IO [[Terms]]
metrics_terms = mapM (terms MonoMulti EN) $ splitBy (Sentences 0) metrics_text
-- | Occurrences
{-
fromList [ (fromList ["table"] ,fromList [(["table"] , 3 )])]
, (fromList ["object"],fromList [(["object"], 3 )])
, (fromList ["glas"] ,fromList [(["glas"] , 2 )])
, (fromList ["spoon"] ,fromList [(["spoon"] , 2 )])
-}
metrics_occ = occurrences <$> L.concat <$> metrics_terms
{-
-- fromList [((["glas"],["object"]),6)
,((["glas"],["spoon"]),4)
,((["glas"],["table"]),6),((["object"],["spoon"]),6),((["object"],["table"]),9),((["spoon"],["table"]),6)]
-}
metrics_cooc = cooc <$> metrics_terms
metrics_cooc_mat = do
m <- metrics_cooc
let (ti,_) = createIndices m
let mat_cooc = cooc2mat ti m
pure ( ti
, mat_cooc
, incExcSpeGen_proba mat_cooc
, incExcSpeGen mat_cooc
)
metrics_incExcSpeGen = incExcSpeGen_sorted <$> metrics_cooc
{-|
Module : Gargantext.Text.Metrics.Occurrences
Module : Gargantext.Text.Metrics.Count
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
......@@ -25,7 +25,7 @@ Source : https://en.wikipedia.org/wiki/Type%E2%80%93token_distinction#Occurrence
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
module Gargantext.Text.Metrics.Occurrences
module Gargantext.Text.Metrics.Count
where
......@@ -71,7 +71,6 @@ type Grouped = Stems
--λ: cooc <$> Prelude.map occurrences <$> Prelude.mapM (terms Mono EN) ["blue lagoon", "blues lagoon blues lagoon", "red lagoon red lagoon", "red lagoon"]
--fromList [((fromList ["blue"],fromList ["lagoon"]),2),((fromList ["lagoon"],fromList ["red"]),2)]
----
-}
type Occs = Int
......@@ -81,10 +80,16 @@ removeApax :: Map (Label, Label) Int -> Map (Label, Label) Int
removeApax = DMS.filter (> 1)
cooc :: [[Terms]] -> Map (Label, Label) Int
cooc tss =
mapKeys (delta $ labelPolicy terms_occs) $ cooc' (map (Set.fromList . map _terms_stem) tss)
cooc tss = coocOnWithLabel _terms_stem (labelPolicy terms_occs) tss
where
terms_occs = occurrencesOn _terms_stem (List.concat tss)
coocOnWithLabel :: (Ord label, Ord b) => (a -> b) -> (b -> label)
-> [[a]] -> Map (label, label) Coocs
coocOnWithLabel on policy tss =
mapKeys (delta policy) $ coocOn on tss
where
terms_occs = occurrences (List.concat tss)
delta f = f *** f
......@@ -93,26 +98,29 @@ labelPolicy m g = case _terms_label <$> fst <$> maximumWith snd <$> DMS.toList
Just label -> label
Nothing -> panic $ "Label of Grouped not found: " <> (pack $ show g)
cooc' :: Ord b => [Set b] -> Map (b, b) Coocs
cooc' tss = foldl' (\m (xy,c) -> insertWith ((+)) xy c m) empty xs
coocOn :: Ord b => (a -> b) -> [[a]] -> Map (b, b) Coocs
coocOn f as = foldl' (\a b -> DMS.unionWith (+) a b) empty $ map (coocOn' f) as
where
xs = [ ((x, y), 1)
| xs <- tss
, ys <- tss
, x <- Set.toList xs
, y <- Set.toList ys
, x < y
]
coocOn' :: Ord b => (a -> b) -> [a] -> Map (b, b) Coocs
coocOn' f ts = foldl' (\m (xy,c) -> insertWith ((+)) xy c m) empty xs
where
ts' = List.nub $ map f ts
xs = [ ((x, y), 1)
| x <- ts'
, y <- ts'
-- , x /= y
]
-- | Compute the grouped occurrences (occ)
occurrences :: [Terms] -> Map Grouped (Map Terms Int)
occurrences = occurrences' _terms_stem
occurrences = occurrencesOn _terms_stem
occurrences' :: (Ord a, Ord b) => (a -> b) -> [a] -> Map b (Map a Int)
occurrences' f = foldl' (\m a -> insertWith (unionWith (+)) (f a) (singleton a 1) m) empty
occurrencesOn :: (Ord a, Ord b) => (a -> b) -> [a] -> Map b (Map a Int)
occurrencesOn f = foldl' (\m a -> insertWith (unionWith (+)) (f a) (singleton a 1) m) empty
-- TODO add groups and filter stops
sumOcc :: Ord a => [Occ a] -> Occ a
sumOcc xs = foldl' (unionWith (+)) empty xs
......
......@@ -14,42 +14,52 @@ Domain Specific Language to manage Frequent Item Set (FIS)
{-# LANGUAGE NoImplicitPrelude #-}
module Gargantext.Text.Metrics.FrequentItemSet
( Fis, Size
( Fis, Size(..)
, occ_hlcm, cooc_hlcm
, all, between
, fisWithSize
, fisWith
, fisWithSizePoly
, fisWithSizePoly2
, module HLCM
)
where
import Data.List (tail, filter)
import Data.Either
import Prelude (Functor(..)) -- TODO
import qualified Data.Map.Strict as Map
import Data.Map.Strict (Map)
import qualified Data.Set as Set
import Data.Set (Set)
import qualified Data.Vector as V
import Data.Vector (Vector)
import Data.List (filter, concat)
import Data.Maybe (catMaybes)
import HLCM
import Gargantext.Prelude
type Size = Either Int (Int, Int)
--data Size = Point | Segment
data Size = Point Int | Segment Int Int
------------------------------------------------------------------------
-- | Occurrence is Frequent Item Set of size 1
occ_hlcm :: Frequency -> [[Item]] -> [Fis]
occ_hlcm f is = fisWithSize (Left 1) f is
occ_hlcm = fisWithSize (Point 1)
-- | Cooccurrence is Frequent Item Set of size 2
cooc_hlcm :: Frequency -> [[Item]] -> [Fis]
cooc_hlcm f is = fisWithSize (Left 2) f is
cooc_hlcm = fisWithSize (Point 2)
all :: Frequency -> [[Item]] -> [Fis]
all f is = fisWith Nothing f is
all = fisWith Nothing
------------------------------------------------------------------------
between :: (Int, Int) -> Frequency -> [[Item]] -> [Fis]
between (x,y) f is = fisWithSize (Right (x,y)) f is
between (x,y) = fisWithSize (Segment x y)
--maximum :: Int -> Frequency -> [[Item]] -> [Fis]
--maximum m f is = between (0,m) f is
--maximum m = between (0,m)
------------------------------------------------------------------------
......@@ -62,31 +72,48 @@ data Fis' a = Fis' { _fisCount :: Int
, _fisItemSet :: [a]
} deriving (Show)
instance Functor Fis' where
fmap f (Fis' c is) = Fis' c (fmap f is)
-- | Sugar from items to FIS
items2fis :: [Item] -> Maybe Fis
items2fis is = case head is of
Nothing -> Nothing
Just h -> Just (Fis' h (tail is))
items2fis [] = Nothing
items2fis (i:is) = Just $ Fis' i is
------------------------------------------------------------------------
------------------------------------------------------------------------
fisWithSize :: Size -> Frequency -> [[Item]] -> [Fis]
fisWithSize n f is = case n of
Left n' -> fisWith (Just (\x -> length x == (n'+1) )) f is
Right (a,b) -> fisWith (Just (\x -> cond1 a x && cond2 b x)) f is
Point n' -> fisWith (Just (\x -> length x == (n'+1) )) f is
Segment a b -> fisWith (Just (\x -> cond a (length x) b)) f is
where
cond1 a' x = length x >= a'
cond2 b' x = length x <= b'
cond a' x b' = a' <= x && x <= b'
fisWith :: Maybe ([Item] -> Bool) -> Frequency -> [[Item]] -> [Fis]
fisWith s f is = unMaybe $ map items2fis $ filter' $ runLCMmatrix is f
fisWith s f is = catMaybes $ map items2fis $ filter' $ runLCMmatrix is f
where
filter' = case s of
Nothing -> identity
Just fun -> filter fun
-- Here the sole purpose to take the keys as a Set is tell we do not want
-- duplicates.
fisWithSizePoly :: Ord a => Size -> Frequency -> Set a -> [[a]] -> [Fis' a]
fisWithSizePoly n f ks = map (fmap fromItem) . fisWithSize n f . map (map toItem)
where
ksv = V.fromList $ Set.toList ks
ksm = Map.fromList . flip zip [0..] $ V.toList ksv
toItem = (ksm Map.!)
fromItem = (ksv V.!)
fisWithSizePoly2 :: Ord a => Size -> Frequency -> [[a]] -> [Fis' a]
fisWithSizePoly2 n f is = fisWithSizePoly n f ks is
where
ks = Set.fromList $ concat is
------------------------------------------------------------------------
------------------------------------------------------------------------
......
......@@ -42,16 +42,23 @@ import Gargantext.Core.Types
import Gargantext.Text.Terms.Multi (multiterms)
import Gargantext.Text.Terms.Mono (monoterms')
data TermType = Mono | Multi
data TermType = Mono | Multi | MonoMulti
-- remove Stop Words
-- map (filter (\t -> not . elem t)) $
------------------------------------------------------------------------
-- | Sugar to extract terms from text (hiddeng mapM from end user).
extractTerms :: Traversable t => TermType -> Lang -> t Text -> IO (t [Terms])
extractTerms termType lang = mapM (terms termType lang)
------------------------------------------------------------------------
-- | Terms from Text
-- Mono : mono terms
-- Multi : multi terms
-- MonoMulti : mono and multi
-- TODO : multi terms should exclude mono (intersection is not empty yet)
terms :: TermType -> Lang -> Text -> IO [Terms]
terms Mono lang txt = pure $ monoterms' lang txt
terms Multi lang txt = multiterms lang txt
terms Mono lang txt = pure $ monoterms' lang txt
terms Multi lang txt = multiterms lang txt
terms MonoMulti lang txt = terms Multi lang txt
------------------------------------------------------------------------
......@@ -18,23 +18,27 @@ module Gargantext.Text.Terms.Multi (multiterms)
import Data.Text hiding (map, group, filter, concat)
import Data.List (concat)
import qualified Data.Set as S
import Gargantext.Prelude
import Gargantext.Core (Lang(..))
import Gargantext.Core.Types
import Gargantext.Text.Terms.Multi.PosTagging
import Gargantext.Text.Terms.Mono.Stem (stem)
import qualified Gargantext.Text.Terms.Multi.Lang.En as En
import qualified Gargantext.Text.Terms.Multi.Lang.Fr as Fr
multiterms :: Lang -> Text -> IO [Terms]
multiterms lang txt = concat
<$> map (map tokenTag2terms)
<$> map (map (tokenTag2terms lang))
<$> map (filter (\t -> _my_token_pos t == Just NP))
<$> tokenTags lang txt
tokenTag2terms :: TokenTag -> Terms
tokenTag2terms (TokenTag w t _ _) = Terms w t
tokenTag2terms :: Lang -> TokenTag -> Terms
tokenTag2terms lang (TokenTag w t _ _) = Terms w t'
where
t' = S.fromList $ map (stem lang) $ S.toList t
tokenTags :: Lang -> Text -> IO [[TokenTag]]
tokenTags lang s = map (group lang) <$> tokenTags' lang s
......
......@@ -28,9 +28,9 @@ group :: [TokenTag] -> [TokenTag]
group [] = []
group ntags = group2 NP NP
$ group2 NP VB
$ group2 NP IN
-- $ group2 NP IN
$ group2 IN DT
$ group2 VB NP
-- $ group2 VB NP
$ group2 JJ NP
$ group2 JJ JJ
$ group2 JJ CC
......
......@@ -27,8 +27,8 @@ group :: [TokenTag] -> [TokenTag]
group [] = []
group ntags = group2 NP NP
$ group2 NP VB
$ group2 NP IN
$ group2 IN DT
-- $ group2 NP IN
-- $ group2 IN DT
$ group2 VB NP
$ group2 JJ NP
$ group2 NP JJ
......
......@@ -96,10 +96,10 @@ data Properties = Properties { _propertiesAnnotators :: Text
$(deriveJSON (unPrefix "_properties") ''Properties)
data Sentences = Sentences { _sentences :: [Sentence]}
data PosSentences = PosSentences { _sentences :: [Sentence]}
deriving (Show, Generic)
$(deriveJSON (unPrefix "_") ''Sentences)
$(deriveJSON (unPrefix "_") ''PosSentences)
-- request =
......@@ -134,7 +134,7 @@ corenlpRaw lang txt = do
pure (getResponseBody response)
corenlp :: Lang -> Text -> IO Sentences
corenlp :: Lang -> Text -> IO PosSentences
corenlp lang txt = do
response <- corenlp' lang txt
pure (getResponseBody response)
......
......@@ -60,8 +60,3 @@ $(deriveJSON (unPrefix "g_") ''Graph)
......@@ -28,10 +28,12 @@ Implementation use Accelerate library :
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE ScopedTypeVariables #-}
module Gargantext.Viz.Graph.Distances.Matrice
where
......@@ -46,7 +48,7 @@ import Data.Maybe (Maybe(Just))
import qualified Gargantext.Prelude as P
import qualified Data.Array.Accelerate.Array.Representation as Repr
import Gargantext.Text.Metrics.Occurrences
import Gargantext.Text.Metrics.Count
-----------------------------------------------------------------------
......@@ -67,33 +69,48 @@ myMat n = matrix n [1..]
rank :: (Matrix a) -> Int
rank m = arrayRank $ arrayShape m
rank' :: (Matrix a) -> Int
rank' m = n
-----------------------------------------------------------------------
-- | Dimension of a square Matrix
-- How to force use with SquareMatrix ?
type Dim = Int
dim :: (Matrix a) -> Dim
dim m = n
where
Z :. _ :. n = arrayShape m
-- == indexTail (arrayShape m)
-----------------------------------------------------------------------
-- | Conditional Distance
proba :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
proba r mat = zipWith (/) mat (mkSum r mat)
type Rank = Int
mkSum :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
mkSum r mat = replicate (constant (Z :. (r :: Int) :. All)) $ sum mat
proba :: Rank -> Acc (Matrix Double) -> Acc (Matrix Double)
proba r mat = zipWith (/) mat (mkSum r mat)
-- divByDiag
divByDiag :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
divByDiag d mat = zipWith (/) mat (replicate (constant (Z :. (d :: Int) :. All)) $ diag mat)
where
diag :: Elt e => Acc (Matrix e) -> Acc (Vector e)
diag m = backpermute (indexTail (shape m)) (lift1 (\(Z :. x) -> (Z :. x :. (x :: Exp Int)))) m
-----------------------------------------------------------------------
mkSum :: Rank -> Acc (Matrix Double) -> Acc (Matrix Double)
mkSum r mat = replicate (constant (Z :. (r :: Int) :. All))
$ fold (+) 0 mat
miniMax :: Acc (Matrix Double) -> Acc (Matrix Double)
miniMax m = map (\x -> ifThenElse (x > miniMax') x 0) m
where
miniMax' = (the $ minimum $ maximum m)
-- | Conditional distance (basic version)
conditional :: Matrix Int -> Matrix Double
conditional m = run (miniMax $ proba (dim m) $ map fromIntegral $ use m)
type Matrix' a = Acc (Matrix a)
type InclusionExclusion = Double
type SpecificityGenericity = Double
conditional :: Matrix Double -> (Matrix InclusionExclusion, Matrix SpecificityGenericity)
conditional m = (run $ ie (use m), run $ sg (use m))
-- | Conditional distance (advanced version)
conditional' :: Matrix Int -> (Matrix InclusionExclusion, Matrix SpecificityGenericity)
conditional' m = (run $ ie $ map fromIntegral $ use m, run $ sg $ map fromIntegral $ use m)
where
ie :: Matrix' Double -> Matrix' Double
ie :: Acc (Matrix Double) -> Acc (Matrix Double)
ie mat = map (\x -> x / (2*n-1)) $ zipWith (+) (xs mat) (ys mat)
sg :: Acc (Matrix Double) -> Acc (Matrix Double)
sg mat = map (\x -> x / (2*n-1)) $ zipWith (-) (xs mat) (ys mat)
......@@ -101,27 +118,21 @@ conditional m = (run $ ie (use m), run $ sg (use m))
n :: Exp Double
n = P.fromIntegral r
r :: Rank
r = rank' m
r :: Dim
r = dim m
xs :: Matrix' Double -> Matrix' Double
xs :: Acc (Matrix Double) -> Acc (Matrix Double)
xs mat = zipWith (-) (proba r mat) (mkSum r $ proba r mat)
ys :: Acc (Matrix Double) -> Acc (Matrix Double)
ys mat = zipWith (-) (proba r mat) (mkSum r $ transpose $ proba r mat)
-- filter with threshold
-----------------------------------------------------------------------
-- | Distributional Distance
distributional :: Matrix Int -> Matrix Double
distributional m = run $ filter $ ri (map fromIntegral $ use m)
distributional m = run $ miniMax $ ri (map fromIntegral $ use m)
where
n = rank' m
miniMax m = map (\x -> ifThenElse (x > miniMax') x 0) m
where
miniMax' = (the $ minimum $ maximum m)
n = dim m
filter m = zipWith (\a b -> max a b) m (transpose m)
......@@ -139,6 +150,98 @@ distributional m = run $ filter $ ri (map fromIntegral $ use m)
cross mat = zipWith (-) (mkSum n mat) (mat)
int2double :: Matrix Int -> Matrix Double
int2double m = run (map fromIntegral $ use m)
-----------------------------------------------------------------------
-----------------------------------------------------------------------
{-
Metric Specificity and genericity: select terms
let N termes
Ni : occ de i
Nij : cooc i et j
Probability to get i given j : P(i|j)=Nij/Nj
Gen(i) : 1/(N-1)*Sum(j!=i, P(i|j)) : Genericity of i
Spec(i) : 1/(N-1)*Sum(j!=i, P(j|i)) : Specificity of j
Inclusion (i) = Gen(i)+Spec(i)
Genericity score = Gen(i)- Spec(i)
References:
* Science mapping with asymmetrical paradigmatic proximity Jean-Philippe Cointet (CREA, TSV), David Chavalarias (CREA) (Submitted on 15 Mar 2008), Networks and Heterogeneous Media 3, 2 (2008) 267 - 276, arXiv:0803.2315 [cs.OH]
-}
type InclusionExclusion = Double
type SpecificityGenericity = Double
data SquareMatrix = SymetricMatrix | NonSymetricMatrix
type SymetricMatrix = Matrix
type NonSymetricMatrix = Matrix
incExcSpeGen :: Matrix Int -> (Vector InclusionExclusion, Vector SpecificityGenericity)
incExcSpeGen m = (run' inclusionExclusion m, run' specificityGenericity m)
where
run' fun mat = run $ fun $ map fromIntegral $ use mat
-- | Inclusion (i) = Gen(i)+Spec(i)
inclusionExclusion :: Acc (Matrix Double) -> Acc (Vector Double)
inclusionExclusion mat = zipWith (+) (pV mat) (pH mat)
--
-- | Genericity score = Gen(i)- Spec(i)
specificityGenericity :: Acc (Matrix Double) -> Acc (Vector Double)
specificityGenericity mat = zipWith (-) (pV mat) (pH mat)
-- | Gen(i) : 1/(N-1)*Sum(j!=i, P(i|j)) : Genericity of i
pV :: Acc (Matrix Double) -> Acc (Vector Double)
pV mat = map (\x -> (x-1)/(cardN-1)) $ sum $ p_ij mat
-- | Spec(i) : 1/(N-1)*Sum(j!=i, P(j|i)) : Specificity of j
pH :: Acc (Matrix Double) -> Acc (Vector Double)
pH mat = map (\x -> (x-1)/(cardN-1)) $ sum $ p_ji mat
cardN :: Exp Double
cardN = constant (P.fromIntegral (dim m) :: Double)
-- | P(i|j) = Nij /N(jj) Probability to get i given j
p_ij :: (Elt e, P.Fractional (Exp e)) => Acc (SymetricMatrix e) -> Acc (Matrix e)
p_ij m = zipWith (/) m (n_jj m)
where
n_jj :: Elt e => Acc (SymetricMatrix e) -> Acc (Matrix e)
n_jj m = backpermute (shape m)
(lift1 ( \(Z :. (_ :: Exp Int) :. (j:: Exp Int))
-> (Z :. j :. j)
)
) m
-- | P(j|i) = Nij /N(ii) Probability to get i given j
-- to test
p_ji :: (Elt e, P.Fractional (Exp e)) => Acc (Array DIM2 e) -> Acc (Array DIM2 e)
p_ji = transpose . p_ij
-- | Step to ckeck the result in visual/qualitative tests
incExcSpeGen_proba :: Matrix Int -> Matrix Double
incExcSpeGen_proba m = run' pro m
where
run' fun mat = run $ fun $ map fromIntegral $ use mat
pro mat = p_ji mat
{-
-- | Hypothesis to test maybe later (or not)
-- TODO ask accelerate for instances to ease such writtings:
p_ :: (Elt e, P.Fractional (Exp e)) => Acc (Array DIM2 e) -> Acc (Array DIM2 e)
p_ m = zipWith (/) m (n_ m)
where
n_ :: Elt e => Acc (SymetricMatrix e) -> Acc (Matrix e)
n_ m = backpermute (shape m)
(lift1 ( \(Z :. (i :: Exp Int) :. (j:: Exp Int))
-> (ifThenElse (i < j) (lift (Z :. j :. j)) (lift (Z :. i :. i)) :: Exp DIM2)
)
) m
-}
......@@ -39,6 +39,8 @@ import qualified Data.Set as S
import Data.Map (Map)
import qualified Data.Map.Strict as M
import Data.Vector (Vector)
import Gargantext.Prelude
type Index = Int
......@@ -50,7 +52,7 @@ score :: (Ord t) => (A.Matrix Int -> A.Matrix Double)
-> Map (t, t) Double
score f m = fromIndex fromI . mat2map . f $ cooc2mat toI m
where
(toI, fromI) = createIndexes m
(toI, fromI) = createIndices m
-------------------------------------------------------------------------------
-------------------------------------------------------------------------------
......@@ -79,21 +81,30 @@ toIndex ni ns = indexConversion ni ns
fromIndex :: Ord t => Map Index t -> Map (Index, Index) a -> Map (t,t) a
fromIndex ni ns = indexConversion ni ns
---------------------------------------------------------------------------------
indexConversion :: (Ord b, Ord k) => Map k b -> Map (k,k) a -> Map (b, b) a
indexConversion index ms = M.fromList $ map (\((k1,k2),c) -> ( ((M.!) index k1, (M.!) index k2), c)) (M.toList ms)
---------------------------------------------------------------------------------
-------------------------------------------------------------------------------
-------------------------------------------------------------------------------
createIndexes :: Ord t => Map (t, t) b -> (Map t Index, Map Index t)
createIndexes = set2indexes . cooc2set
-- TODO
fromIndex' :: Ord t => Vector t -> Map (Index, Index) a -> Map (t,t) a
fromIndex' vi ns = undefined
-- TODO
createIndices' :: Ord t => Map (t, t) b -> (Map t Index, Vector t)
createIndices' = undefined
createIndices :: Ord t => Map (t, t) b -> (Map t Index, Map Index t)
createIndices = set2indices . map2set
where
cooc2set :: Ord t => Map (t, t) a -> Set t
cooc2set cs' = foldl' (\s ((t1,t2),_) -> insert [t1,t2] s ) S.empty (M.toList cs')
map2set :: Ord t => Map (t, t) a -> Set t
map2set cs' = foldl' (\s ((t1,t2),_) -> insert [t1,t2] s ) S.empty (M.toList cs')
where
insert as s = foldl' (\s' t -> S.insert t s') s as
set2indexes :: Ord t => Set t -> (Map t Index, Map Index t)
set2indexes s = (M.fromList toIndex', M.fromList fromIndex')
set2indices :: Ord t => Set t -> (Map t Index, Map Index t)
set2indices s = (M.fromList toIndex', M.fromList fromIndex')
where
fromIndex' = zip [0..] xs
toIndex' = zip xs [0..]
......
......@@ -14,7 +14,7 @@ extra-deps:
- git: https://github.com/delanoe/servant-static-th.git
commit: fff77e79fe94d563ab5cae2609b78c17b5c1f434
- accelerate-1.2.0.0
- accelerate-io-1.2.0.0
- hashtables-1.2.3.0 # needed by accelerate-1.2.0.0
- aeson-1.2.4.0
- aeson-lens-0.5.0.0
- duckling-0.1.3.0
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment