Commit 72a1cfff authored by Alexandre Delanoë's avatar Alexandre Delanoë

[TestFlow] seems good, need to add tests on it and fix distributional distance.

parent bb989318
......@@ -50,8 +50,12 @@ library:
- Gargantext.Text.Parsers.WOS
- Gargantext.Text.Search
- Gargantext.Text.Terms
- Gargantext.Text.Terms.Mono
- Gargantext.Text.Terms.Multi.Lang.En
- Gargantext.Text.Terms.Multi.Lang.Fr
- Gargantext.Text.Terms.WithList
- Gargantext.TextFlow
- Gargantext.Viz.Graph
- Gargantext.Viz.Graph.Distances.Matrice
- Gargantext.Viz.Graph.Index
dependencies:
......
{-|PI/Application.hs
API/Count.hs
API/FrontEnd.hs
API/Node.hs
API/Auth.hs
API.hs
Database/NodeNodeNgram.hs
Database/User.hs
Database/Queries.hs
{-|
Module : Gargantext.API.Settings
Description : Settings of the API (Server and Client)
Copyright : (c) CNRS, 2017-Present
......
{-|
Module : Gargantext.Database
Description : Main commands of BASHQL a Domain Specific Language to deal with Gargantext Database.
Description : BASHQL to deal with Gargantext Database.
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
* BASHQL is a Domain Specific Language to deal with the Database
* BASHQL = functional (Bash * SQL)
* Which language to chose when working with a database ? To make it
......@@ -42,6 +44,7 @@ write Haskell bash translations.
- FileSystem is now a NodeSystem where each File is a Node in a Directed Graph (DG).
* References
[0] MIT Press has published "Category theory for the sciences". The book
can also be purchased on Amazon. Here are reviews by the MAA, by the
AMS, and by SIAM.
......@@ -64,6 +67,7 @@ module Gargantext.Database ( module Gargantext.Database.Utils
, del , del'
, tree, tree'
, postCorpus, postAnnuaire
, Connection
)
where
......
......@@ -59,6 +59,9 @@ import qualified Data.Profunctor.Product as PP
-- | Types for Node Database Management
data PGTSVector
------------------------------------------------------------------------
type CorpusId = Int
------------------------------------------------------------------------
instance FromField HyperdataCorpus where
fromField = fromField'
......
......@@ -11,9 +11,9 @@ This file is intended for these purposes:
- documentation for teaching and research
- learn basics of Haskell which is a scientific programming language
- behavioral tests (that should be completed with uni-tests and scale-tests
- behavioral tests (that should be completed with uni-tests and scale-tests)
This documents defines basic of Text definitions according to Gargantext..
This document defines basic of Text definitions according to Gargantext..
- What is a term ?
- What is a sentence ?
......@@ -104,7 +104,18 @@ ex_occ = occurrences <$> L.concat <$> ex_terms
ex_cooc :: IO (Map (Label, Label) Int)
ex_cooc = cooc <$> ex_terms
-- | Tests
-- | Tests the specificity and genericity
--
-- >>> ex_cooc_mat
-- (fromList [(["glass"],0),(["spoon"],1),(["table"],2),(["wine"],3)],Matrix (Z :. 4 :. 4)
-- [ 4, 0, 0, 0,
-- 1, 2, 0, 0,
-- 3, 2, 4, 0,
-- 3, 1, 2, 3],Matrix (Z :. 4 :. 4)
-- [ 1.0, 0.25, 0.75, 0.75,
-- 0.0, 1.0, 1.0, 0.5,
-- 0.0, 0.0, 1.0, 0.5,
-- 0.0, 0.0, 0.0, 1.0],(Vector (Z :. 4) [0.5833333333333334,0.5833333333333334,0.75,0.5833333333333334],Vector (Z :. 4) [-0.5833333333333334,-0.4166666666666667,0.41666666666666674,0.5833333333333334]))
ex_cooc_mat :: IO (Map Label Index, Matrix Int, Matrix Double, (DAA.Vector InclusionExclusion, DAA.Vector SpecificityGenericity))
ex_cooc_mat = do
m <- ex_cooc
......
......@@ -36,7 +36,7 @@ import qualified Data.Array.Accelerate as DAA
import GHC.Real (round)
--import Debug.Trace
import Debug.Trace (trace)
data MapListSize = MapListSize Int
data InclusionSize = InclusionSize Int
......@@ -51,19 +51,19 @@ data FilterConfig = FilterConfig { fc_mapListSize :: MapListSize
, fc_defaultValue :: DefaultValue
}
filterCooc :: Ord t => FilterConfig -> Map (t, t) Int -> Map (t, t) Int
filterCooc :: (Show t, Ord t) => FilterConfig -> Map (t, t) Int -> Map (t, t) Int
filterCooc fc cc = (filterCooc' fc) ts cc
where
ts = map _scored_terms $ takeSome fc $ coocScored cc
filterCooc' :: Ord t => FilterConfig -> [t] -> Map (t, t) Int -> Map (t, t) Int
filterCooc' (FilterConfig _ _ _ _ (DefaultValue dv)) ts m = -- trace ("coocScored " <> show (length ts)) $
filterCooc' :: (Show t, Ord t) => FilterConfig -> [t] -> Map (t, t) Int -> Map (t, t) Int
filterCooc' (FilterConfig _ _ _ _ (DefaultValue dv)) ts m = trace ("coocScored " <> show ts) $
foldl' (\m' k -> M.insert k (maybe dv identity $ M.lookup k m) m')
M.empty selection
where
selection = [(x,y) | x <- ts
, y <- ts
-- , x >= y
-- , x >= y
]
......@@ -75,7 +75,7 @@ filterCooc' (FilterConfig _ _ _ _ (DefaultValue dv)) ts m = -- trace ("coocScore
takeSome :: Ord t => FilterConfig -> [Scored t] -> [Scored t]
takeSome (FilterConfig (MapListSize l) (InclusionSize l') (SampleBins s) (Clusters _) _) scores = L.take l
$ takeSample n m
$ L.take l' $ sortWith (Down . _scored_incExc) scores
$ L.take l' $ reverse $ sortWith (Down . _scored_incExc) scores
-- splitKmeans k scores
where
-- TODO: benchmark with accelerate-example kmeans version
......@@ -95,10 +95,10 @@ takeSome (FilterConfig (MapListSize l) (InclusionSize l') (SampleBins s) (Cluste
$ sortWith (Down . _scored_speGen) xs
data Scored t = Scored { _scored_terms :: !t
, _scored_incExc :: !InclusionExclusion
, _scored_speGen :: !SpecificityGenericity
} deriving (Show)
data Scored ts = Scored { _scored_terms :: !ts
, _scored_incExc :: !InclusionExclusion
, _scored_speGen :: !SpecificityGenericity
} deriving (Show)
-- TODO in the textflow we end up needing these indices, it might be better
-- to compute them earlier and pass them around.
......
......@@ -17,7 +17,6 @@ module Gargantext.Text.Terms.Mono.Token.En
( EitherList(..)
, Tokenizer
, tokenize
, run
, defaultTokenizer
, whitespace
, uris
......@@ -64,8 +63,6 @@ import Gargantext.Prelude
-- ,"Hyphen-words"
-- ,"Yes/No questions"
-- ]
---
type Tokenizer = Text -> EitherList Text Text
-- | The EitherList is a newtype-wrapped list of Eithers.
......
......@@ -21,15 +21,22 @@ import GHC.IO (FilePath)
import qualified Data.Text as T
import Data.Text.IO (readFile)
import Data.Maybe (catMaybes)
import qualified Data.Set as DS
import qualified Data.Array.Accelerate as A
import qualified Data.Map.Strict as M
----------------------------------------------
import Gargantext.Database (Connection)
import Gargantext.Database.Node
import Gargantext.Core.Types.Node
import Gargantext.Core (Lang)
import Gargantext.Prelude
import Gargantext.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map)
import Gargantext.Viz.Graph.Distances.Matrice (distributional)
import Gargantext.Viz.Graph.Distances.Matrice (distributional, measureConditional)
import Gargantext.Viz.Graph (Graph(..), data2graph)
import Gargantext.Text.Metrics.Count (cooc)
import Gargantext.Text.Metrics (filterCooc, FilterConfig(..), Clusters(..), SampleBins(..), DefaultValue(..), MapListSize(..), InclusionSize(..))
......@@ -38,7 +45,7 @@ import Gargantext.Text.Context (splitBy, SplitContext(Sentences))
import Gargantext.Text.Parsers.CSV
import Data.Graph.Clustering.Louvain.CplusPlus (cLouvain)
import Data.Graph.Clustering.Louvain.CplusPlus (cLouvain, l_community_id)
{-
____ _ _
......@@ -50,11 +57,27 @@ import Data.Graph.Clustering.Louvain.CplusPlus (cLouvain)
-}
contextText :: [T.Text]
contextText = ["The dog is an animal."
,"The bird is an animal."
,"The bird is an animal."
,"The bird and the dog are an animal."
,"The table is an object."
,"The pen is an object."
,"This object is a pen or a table?"
,"The girl has a human body."
,"The girl has a human body."
,"The boy has a human body."
,"The boy has a human body."
]
data TextFlow = CSV FilePath
| FullText FilePath
| Contexts [T.Text]
| SQL Int
| Database T.Text
| DB Connection CorpusId
| Query T.Text
-- ExtDatabase Query
-- IntDatabase NodeId
......@@ -64,6 +87,7 @@ textFlow termType workType = do
FullText path -> splitBy (Sentences 5) <$> readFile path
CSV path -> readCsvOn [csv_title, csv_abstract] path
Contexts ctxt -> pure ctxt
SQL con corpusId -> catMaybes <$> map (\n -> hyperdataDocumentV3_title (node_hyperdata n) <> hyperdataDocumentV3_abstract (node_hyperdata n))<$> getDocumentsV3WithParentId con corpusId
_ -> undefined
textFlow' termType contexts
......@@ -78,53 +102,61 @@ textFlow' termType contexts = do
-- TermsType = Mono | Multi | MonoMulti
-- myterms # filter (\t -> not . elem t stopList)
-- # groupBy (Stem|GroupList|Ontology)
printDebug "terms" myterms
printDebug "myterms" (sum $ map length myterms)
-- Bulding the map list
-- compute copresences of terms, i.e. cooccurrences of terms in same context of text
-- Cooc = Map (Term, Term) Int
let myCooc1 = cooc myterms
printDebug "myCooc1" (M.size myCooc1)
printDebug "myCooc1 size" (M.size myCooc1)
-- Remove Apax: appears one time only => lighting the matrix
let myCooc2 = M.filter (>1) myCooc1
printDebug "myCooc2" (M.size myCooc2)
let myCooc2 = M.filter (>0) myCooc1
printDebug "myCooc2 size" (M.size myCooc2)
printDebug "myCooc2" myCooc2
-- Filtering terms with inclusion/Exclusion and Specificity/Genericity scores
let myCooc3 = filterCooc ( FilterConfig (MapListSize 100 )
(InclusionSize 900 )
let myCooc3 = filterCooc ( FilterConfig (MapListSize 350 )
(InclusionSize 500 )
(SampleBins 10 )
(Clusters 3 )
(DefaultValue 0 )
) myCooc2
printDebug "myCooc3" $ M.size myCooc3
-- putStrLn $ show myCooc3
printDebug "myCooc3 size" $ M.size myCooc3
printDebug "myCooc3" myCooc3
-- Cooc -> Matrix
let (ti, _) = createIndices myCooc3
printDebug "ti" $ M.size ti
printDebug "ti size" $ M.size ti
printDebug "ti" ti
let myCooc4 = toIndex ti myCooc3
printDebug "myCooc4" $ M.size myCooc4
printDebug "myCooc4 size" $ M.size myCooc4
printDebug "myCooc4" myCooc4
let matCooc = map2mat (0) (M.size ti) myCooc4
printDebug "matCooc shape" $ A.arrayShape matCooc
printDebug "matCooc" matCooc
-- Matrix -> Clustering
--let distanceMat = conditional' matCooc
let distanceMat = distributional matCooc
printDebug "distanceMat" $ A.arrayShape distanceMat
let distanceMat = measureConditional matCooc
--let distanceMat = distributional matCooc
printDebug "distanceMat shape" $ A.arrayShape distanceMat
printDebug "distanceMat" distanceMat
--
let distanceMap = mat2map distanceMat
printDebug "distanceMap" $ M.size distanceMap
--let distanceMap = M.filter (>0) $ mat2map distanceMat
let distanceMap = M.map (\n -> 1) $ M.filter (>0) $ mat2map distanceMat
printDebug "distanceMap size" $ M.size distanceMap
printDebug "distanceMap" distanceMap
-- let distance = fromIndex fi distanceMap
-- printDebug "distance" $ M.size distance
partitions <- cLouvain distanceMap
-- Building : -> Graph -> JSON
printDebug "partitions" $ length partitions
printDebug "partitions" $ DS.size $ DS.fromList $ map (l_community_id) partitions
--printDebug "partitions" partitions
pure $ data2graph (M.toList ti) myCooc4 distanceMap partitions
......
......@@ -7,15 +7,12 @@ Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
This module aims at implementig distances of terms context by context is
the same referential of corpus.
2 main measures are actually implemented in order to compute the proximity of two terms.
- Conditional measure is an absolute measure which reflects interactions of 2 terms in the corpus.
- Distributional measure is a relative measure which depends on the selected list, it represents structural equivalence.
Implementation use Accelerate library which enables GPU and CPU computation:
Motivation and definition of the @Conditional@ distance.
Implementation use Accelerate library :
* Manuel M. T. Chakravarty, Gabriele Keller, Sean Lee, Trevor L. McDonell, and Vinod Grover.
[Accelerating Haskell Array Codes with Multicore GPUs][CKLM+11].
In _DAMP '11: Declarative Aspects of Multicore Programming_, ACM, 2011.
......@@ -50,21 +47,27 @@ import qualified Gargantext.Prelude as P
-----------------------------------------------------------------------
-- | Test perf.
distriTest :: Matrix Double
distriTest = distributional $ myMat 100
-----------------------------------------------------------------------
-- | Define a vector
--
-- >>> vector 3
-- Vector (Z :. 3) [0,1,2]
vector :: Int -> (Array (Z :. Int) Int)
vector n = fromList (Z :. n) [0..n]
-- | Define a matrix
--
-- >>> matrix 3 ([1..] :: [Double])
-- Matrix (Z :. 3 :. 3)
-- [ 1.0, 2.0, 3.0,
-- 4.0, 5.0, 6.0,
-- 7.0, 8.0, 9.0]
matrix :: Elt c => Int -> [c] -> Matrix c
matrix n l = fromList (Z :. n :. n) l
myMat :: Int -> Matrix Int
myMat n = matrix n [1..]
-- | Two ways to get the rank (as documentation)
--
-- >>> rank (matrix 3 ([1..] :: [Int]))
-- 2
rank :: (Matrix a) -> Int
rank m = arrayRank $ arrayShape m
......@@ -73,6 +76,10 @@ rank m = arrayRank $ arrayShape m
-- How to force use with SquareMatrix ?
type Dim = Int
-- | Get Dimension of a square Matrix
--
-- >>> dim (matrix 3 ([1..] :: [Int]))
-- 3
dim :: Matrix a -> Dim
dim m = n
where
......@@ -80,34 +87,100 @@ dim m = n
-- indexTail (arrayShape m)
-----------------------------------------------------------------------
proba :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
proba r mat = zipWith (/) mat (mkSum r mat)
mkSum :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
mkSum r mat = replicate (constant (Z :. (r :: Int) :. All)) $ sum mat
-- | Sum of a Matrix by Column
--
-- >>> run $ matSum 3 (use $ matrix 3 [1..])
-- Matrix (Z :. 3 :. 3)
-- [ 12.0, 15.0, 18.0,
-- 12.0, 15.0, 18.0,
-- 12.0, 15.0, 18.0]
matSum :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
matSum r mat = replicate (constant (Z :. (r :: Int) :. All)) $ sum $ transpose mat
-- | divByDiag
-- | Proba computes de probability matrix: all cells divided by thee sum of its column
-- if you need get the probability on the lines, just transpose it
--
-- >>> run $ matProba 3 (use $ matrix 3 [1..])
-- Matrix (Z :. 3 :. 3)
-- [ 8.333333333333333e-2, 0.13333333333333333, 0.16666666666666666,
-- 0.3333333333333333, 0.3333333333333333, 0.3333333333333333,
-- 0.5833333333333334, 0.5333333333333333, 0.5]
matProba :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
matProba r mat = zipWith (/) mat (matSum r mat)
-- | Diagonal of the matrix
--
-- >>> run $ diag (use $ matrix 3 ([1..] :: [Int]))
-- Vector (Z :. 3) [1,5,9]
diag :: Elt e => Acc (Matrix e) -> Acc (Vector e)
diag m = backpermute (indexTail (shape m)) (lift1 (\(Z :. x) -> (Z :. x :. (x :: Exp Int)))) m
-- | Divide by the Diagonal of the matrix
--
-- >>> run $ divByDiag 3 (use $ matrix 3 ([1..] :: [Double]))
-- Matrix (Z :. 3 :. 3)
-- [ 1.0, 0.4, 0.3333333333333333,
-- 4.0, 1.0, 0.6666666666666666,
-- 7.0, 1.6, 1.0]
divByDiag :: Dim -> Acc (Matrix Double) -> Acc (Matrix Double)
divByDiag d mat = zipWith (/) mat (replicate (constant (Z :. (d :: Int) :. All)) $ diag mat)
where
diag :: Elt e => Acc (Matrix e) -> Acc (Vector e)
diag m = backpermute (indexTail (shape m)) (lift1 (\(Z :. x) -> (Z :. x :. (x :: Exp Int)))) m
-----------------------------------------------------------------------
miniMax :: Acc (Matrix Double) -> Acc (Matrix Double)
miniMax m = map (\x -> ifThenElse (x > miniMax') x 0) m
-----------------------------------------------------------------------
-- | Filters the matrix with the minimum of maximums
--
-- >>> run $ matMiniMax $ use $ matrix 3 [1..]
-- Matrix (Z :. 3 :. 3)
-- [ 0.0, 4.0, 7.0,
-- 0.0, 5.0, 8.0,
-- 0.0, 6.0, 9.0]
matMiniMax :: Acc (Matrix Double) -> Acc (Matrix Double)
matMiniMax m = map (\x -> ifThenElse (x > miniMax') x 0) (transpose m)
where
miniMax' = (the $ minimum $ maximum m)
-- | Filters the matrix with a constant
--
-- >>> run $ matFilter 5 $ use $ matrix 3 [1..]
-- Matrix (Z :. 3 :. 3)
-- [ 0.0, 0.0, 7.0,
-- 0.0, 0.0, 8.0,
-- 0.0, 6.0, 9.0]
matFilter :: Double -> Acc (Matrix Double) -> Acc (Matrix Double)
matFilter t m = map (\x -> ifThenElse (x > (constant t)) x 0) (transpose m)
-----------------------------------------------------------------------
-- * Measures of proximity
-----------------------------------------------------------------------
-- ** Conditional distance
-- *** Conditional distance (basic)
-- | Conditional distance (basic version)
conditional :: Matrix Int -> Matrix Double
conditional m = run (miniMax $ proba (dim m) $ map fromIntegral $ use m)
--
-- 2 main measures are actually implemented in order to compute the
-- proximity of two terms: conditional and distributional
--
-- Conditional measure is an absolute measure which reflects
-- interactions of 2 terms in the corpus.
measureConditional :: Matrix Int -> Matrix Double
--measureConditional m = run (matMiniMax $ matProba (dim m) $ map fromIntegral $ use m)
measureConditional m = run (matProba (dim m) $ map fromIntegral $ use m)
-- *** Conditional distance (advanced)
-- | Conditional distance (advanced version)
-- The conditional measure \[P_c\] of 2 terms @i@ and @j@, also called "confidence"
-- , is the maximum probability between @i@ and @j@. If \[n_i\] (resp.
-- \[n_j\]) is the number of occurrences of @i@ (resp. @j@) in the corpus and _[n_{ij}\] the number of its occurrences we get:
--
-- The conditional measure P(i|j) of 2 terms @i@ and @j@, also called
-- "confidence" , is the maximum probability between @i@ and @j@ to see
-- @i@ in the same context of @j@ knowing @j@.
--
-- If N(i) (resp. N(j)) is the number of occurrences of @i@ (resp. @j@)
-- in the corpus and _[n_{ij}\] the number of its occurrences we get:
--
-- \[P_c=max(\frac{n_i}{n_{ij}},\frac{n_j}{n_{ij}} )\]
conditional' :: Matrix Int -> (Matrix InclusionExclusion, Matrix SpecificityGenericity)
conditional' m = (run $ ie $ map fromIntegral $ use m, run $ sg $ map fromIntegral $ use m)
......@@ -124,21 +197,26 @@ conditional' m = (run $ ie $ map fromIntegral $ use m, run $ sg $ map fromIntegr
r = dim m
xs :: Acc (Matrix Double) -> Acc (Matrix Double)
xs mat = zipWith (-) (proba r mat) (mkSum r $ proba r mat)
xs mat = zipWith (-) (matSum r $ matProba r mat) (matProba r mat)
ys :: Acc (Matrix Double) -> Acc (Matrix Double)
ys mat = zipWith (-) (proba r mat) (mkSum r $ transpose $ proba r mat)
ys mat = zipWith (-) (matSum r $ transpose $ matProba r mat) (matProba r mat)
-----------------------------------------------------------------------
-- ** Distributional Distance
-- | Distributional Distance
-- The distributional measure \[P_c\] of @i@ and @j@ terms is:
-- \[ S_{MI} = \frac {\sum_{k \neq i,j ; MI_{ik} >0}^{} \min(MI_{ik}, MI_{jk})}{\sum_{k \neq i,j ; MI_{ik}}^{}}
-- \]
-- | Distributional Distance Measure
--
-- Distributional measure is a relative measure which depends on the
-- selected list, it represents structural equivalence.
--
-- The distributional measure \[P_c\] of @i@ and @j@ terms is: \[
-- S_{MI} = \frac {\sum_{k \neq i,j ; MI_{ik} >0}^{} \min(MI_{ik},
-- MI_{jk})}{\sum_{k \neq i,j ; MI_{ik}}^{}} \]
--
-- Mutual information
-- \[S{MI}({i},{j}) = \log(\frac{C{ij}}{E{ij}})\]
--
-- Number of cooccurrences of @i@ and @j@ in the same context of text
-- Number of cooccurrences of @i@ and @j@ in the same context of text
-- \[C{ij}\]
--
-- The expected value of the cooccurrences
......@@ -147,7 +225,7 @@ conditional' m = (run $ ie $ map fromIntegral $ use m, run $ sg $ map fromIntegr
-- Total cooccurrences of @i@ term
-- \[N_{i} = \sum_{i}^{} S_{i}\]
distributional :: Matrix Int -> Matrix Double
distributional m = run $ miniMax $ ri (map fromIntegral $ use m)
distributional m = run $ matMiniMax $ ri (map fromIntegral $ use m)
where
n = dim m
......@@ -155,8 +233,8 @@ distributional m = run $ miniMax $ ri (map fromIntegral $ use m)
ri mat = zipWith (/) mat1 mat2
where
mat1 = mkSum n $ zipWith min (mi mat) (mi $ transpose mat)
mat2 = mkSum n mat
mat1 = matSum n $ zipWith min (mi mat) (mi $ transpose mat)
mat2 = matSum n mat
mi m' = zipWith (\a b -> max (log $ a/b) 0) m'
$ zipWith (/) (crossProduct m') (total m')
......@@ -164,10 +242,12 @@ distributional m = run $ miniMax $ ri (map fromIntegral $ use m)
total m'' = replicate (constant (Z :. n :. n)) $ fold (+) 0 $ fold (+) 0 m''
crossProduct m''' = zipWith (*) (cross m''' ) (cross (transpose m'''))
cross mat = zipWith (-) (mkSum n mat) (mat)
cross mat = zipWith (-) (matSum n mat) (mat)
-----------------------------------------------------------------------
-----------------------------------------------------------------------
-- * Specificity and Genericity
{- | Metric Specificity and genericity: select terms
- let N termes and occurrences of i \[N{i}\]
......@@ -181,7 +261,10 @@ distributional m = run $ miniMax $ ri (map fromIntegral $ use m)
- \[Inclusion (i) = Gen(i) + Spec(i)\)
- \[GenericityScore = Gen(i)- Spec(i)\]
- References: Science mapping with asymmetrical paradigmatic proximity Jean-Philippe Cointet (CREA, TSV), David Chavalarias (CREA) (Submitted on 15 Mar 2008), Networks and Heterogeneous Media 3, 2 (2008) 267 - 276, arXiv:0803.2315 [cs.OH]
- References: Science mapping with asymmetrical paradigmatic proximity
Jean-Philippe Cointet (CREA, TSV), David Chavalarias (CREA) (Submitted
on 15 Mar 2008), Networks and Heterogeneous Media 3, 2 (2008) 267 - 276,
arXiv:0803.2315 [cs.OH]
-}
type InclusionExclusion = Double
type SpecificityGenericity = Double
......@@ -217,7 +300,8 @@ incExcSpeGen m = (run' inclusionExclusion m, run' specificityGenericity m)
-- | P(i|j) = Nij /N(jj) Probability to get i given j
p_ij :: (Elt e, P.Fractional (Exp e)) => Acc (SymetricMatrix e) -> Acc (Matrix e)
--p_ij :: (Elt e, P.Fractional (Exp e)) => Acc (SymetricMatrix e) -> Acc (Matrix e)
p_ij :: (Elt e, P.Fractional (Exp e)) => Acc (Matrix e) -> Acc (Matrix e)
p_ij m = zipWith (/) m (n_jj m)
where
n_jj :: Elt e => Acc (SymetricMatrix e) -> Acc (Matrix e)
......@@ -255,4 +339,10 @@ p_ m = zipWith (/) m (n_ m)
) m
-}
-- * For Tests (to be removed)
-- | Test perfermance with this matrix
-- TODO : add this in a benchmark folder
distriTest :: Matrix Double
distriTest = distributional $ matrix 100 [1..]
-----------------------------------------------------------------------
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment