Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
195
Issues
195
List
Board
Labels
Milestones
Merge Requests
12
Merge Requests
12
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
a7462f0c
Commit
a7462f0c
authored
Oct 27, 2020
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[Org] Core.Methods.Distances
parent
e5871a7d
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
338 additions
and
6 deletions
+338
-6
package.yaml
package.yaml
+0
-1
Distances.hs
src/Gargantext/Core/Methods/Distances.hs
+2
-1
Conditional.hs
...rgantext/Core/Methods/Distances/Accelerate/Conditional.hs
+87
-0
Distributional.hs
...ntext/Core/Methods/Distances/Accelerate/Distributional.hs
+112
-0
SpeGen.hs
src/Gargantext/Core/Methods/Distances/Accelerate/SpeGen.hs
+133
-0
Distributional.hs
src/Gargantext/Core/Methods/Distances/Distributional.hs
+2
-2
Examples.hs
src/Gargantext/Core/Text/Examples.hs
+1
-1
Metrics.hs
src/Gargantext/Core/Text/Metrics.hs
+1
-1
No files found.
package.yaml
View file @
a7462f0c
...
@@ -66,7 +66,6 @@ library:
...
@@ -66,7 +66,6 @@ library:
-
Gargantext.Prelude
-
Gargantext.Prelude
-
Gargantext.Prelude.Crypto.Pass.User
-
Gargantext.Prelude.Crypto.Pass.User
-
Gargantext.Prelude.Utils
-
Gargantext.Prelude.Utils
-
Gargantext.Core.Methods.Distances.Matrice
-
Gargantext.Core.Text
-
Gargantext.Core.Text
-
Gargantext.Core.Text.Context
-
Gargantext.Core.Text.Context
-
Gargantext.Core.Text.Corpus.Parsers
-
Gargantext.Core.Text.Corpus.Parsers
...
...
src/Gargantext/Core/Methods/Distances.hs
View file @
a7462f0c
...
@@ -20,7 +20,8 @@ import Data.Swagger
...
@@ -20,7 +20,8 @@ import Data.Swagger
import
GHC.Generics
(
Generic
)
import
GHC.Generics
(
Generic
)
import
Gargantext.Prelude
(
Ord
,
Eq
,
Int
,
Double
)
import
Gargantext.Prelude
(
Ord
,
Eq
,
Int
,
Double
)
import
Gargantext.Prelude
(
Show
)
import
Gargantext.Prelude
(
Show
)
import
Gargantext.Core.Methods.Distances.Matrice
(
measureConditional
,
distributional
)
import
Gargantext.Core.Methods.Distances.Accelerate.Conditional
(
measureConditional
)
import
Gargantext.Core.Methods.Distances.Accelerate.Distributional
(
distributional
)
import
Prelude
(
Enum
,
Bounded
,
minBound
,
maxBound
)
import
Prelude
(
Enum
,
Bounded
,
minBound
,
maxBound
)
import
Test.QuickCheck
(
elements
)
import
Test.QuickCheck
(
elements
)
import
Test.QuickCheck.Arbitrary
import
Test.QuickCheck.Arbitrary
...
...
src/Gargantext/Core/Methods/Distances/Accelerate/Conditional.hs
0 → 100644
View file @
a7462f0c
{-|
Module : Gargantext.Core.Methods.Distances.Accelerate.Conditional
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
This module aims at implementig distances of terms context by context is
the same referential of corpus.
Implementation use Accelerate library which enables GPU and CPU computation
See Gargantext.Core.Methods.Graph.Accelerate)
-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE ViewPatterns #-}
module
Gargantext.Core.Methods.Distances.Accelerate.Conditional
where
-- import qualified Data.Foldable as P (foldl1)
-- import Debug.Trace (trace)
import
Data.Array.Accelerate
import
Data.Array.Accelerate.Interpreter
(
run
)
import
Gargantext.Core.Methods.Matrix.Accelerate.Utils
import
Gargantext.Core.Methods.Distances.Accelerate.SpeGen
import
qualified
Gargantext.Prelude
as
P
-- * Metrics of proximity
-----------------------------------------------------------------------
-- ** Conditional distance
-- *** Conditional distance (basic)
-- | Conditional distance (basic version)
--
-- 2 main metrics are actually implemented in order to compute the
-- proximity of two terms: conditional and distributional
--
-- Conditional metric is an absolute metric which reflects
-- interactions of 2 terms in the corpus.
measureConditional
::
Matrix
Int
->
Matrix
Double
--measureConditional m = run (matMiniMax $ matProba (dim m) $ map fromIntegral $ use m)
measureConditional
m
=
run
$
matProba
(
dim
m
)
$
map
fromIntegral
$
use
m
-- *** Conditional distance (advanced)
-- | Conditional distance (advanced version)
--
-- The conditional metric P(i|j) of 2 terms @i@ and @j@, also called
-- "confidence" , is the maximum probability between @i@ and @j@ to see
-- @i@ in the same context of @j@ knowing @j@.
--
-- If N(i) (resp. N(j)) is the number of occurrences of @i@ (resp. @j@)
-- in the corpus and _[n_{ij}\] the number of its occurrences we get:
--
-- \[P_c=max(\frac{n_i}{n_{ij}},\frac{n_j}{n_{ij}} )\]
conditional'
::
Matrix
Int
->
(
Matrix
GenericityInclusion
,
Matrix
SpecificityExclusion
)
conditional'
m
=
(
run
$
ie
$
map
fromIntegral
$
use
m
,
run
$
sg
$
map
fromIntegral
$
use
m
)
where
ie
::
Acc
(
Matrix
Double
)
->
Acc
(
Matrix
Double
)
ie
mat
=
map
(
\
x
->
x
/
(
2
*
n
-
1
))
$
zipWith
(
+
)
(
xs
mat
)
(
ys
mat
)
sg
::
Acc
(
Matrix
Double
)
->
Acc
(
Matrix
Double
)
sg
mat
=
map
(
\
x
->
x
/
(
2
*
n
-
1
))
$
zipWith
(
-
)
(
xs
mat
)
(
ys
mat
)
n
::
Exp
Double
n
=
P
.
fromIntegral
r
r
::
Dim
r
=
dim
m
xs
::
Acc
(
Matrix
Double
)
->
Acc
(
Matrix
Double
)
xs
mat
=
zipWith
(
-
)
(
matSumCol
r
$
matProba
r
mat
)
(
matProba
r
mat
)
ys
::
Acc
(
Matrix
Double
)
->
Acc
(
Matrix
Double
)
ys
mat
=
zipWith
(
-
)
(
matSumCol
r
$
transpose
$
matProba
r
mat
)
(
matProba
r
mat
)
src/Gargantext/Core/Methods/Distances/Accelerate/Distributional.hs
0 → 100644
View file @
a7462f0c
{-|
Module : Gargantext.Core.Methods.Distances.Accelerate.Distributional
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
This module aims at implementig distances of terms context by context is
the same referential of corpus.
Implementation use Accelerate library which enables GPU and CPU computation
See Gargantext.Core.Methods.Graph.Accelerate)
-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE ViewPatterns #-}
module
Gargantext.Core.Methods.Distances.Accelerate.Distributional
where
-- import qualified Data.Foldable as P (foldl1)
-- import Debug.Trace (trace)
import
Data.Array.Accelerate
import
Data.Array.Accelerate.Interpreter
(
run
)
import
Gargantext.Core.Methods.Matrix.Accelerate.Utils
import
qualified
Gargantext.Prelude
as
P
-- * Metrics of proximity
-----------------------------------------------------------------------
-- ** Distributional Distance
-- | Distributional Distance metric
--
-- Distributional metric is a relative metric which depends on the
-- selected list, it represents structural equivalence of mutual information.
--
-- The distributional metric P(c) of @i@ and @j@ terms is: \[
-- S_{MI} = \frac {\sum_{k \neq i,j ; MI_{ik} >0}^{} \min(MI_{ik},
-- MI_{jk})}{\sum_{k \neq i,j ; MI_{ik}>0}^{}} \]
--
-- Mutual information
-- \[S_{MI}({i},{j}) = \log(\frac{C{ij}}{E{ij}})\]
--
-- Number of cooccurrences of @i@ and @j@ in the same context of text
-- \[C{ij}\]
--
-- The expected value of the cooccurrences @i@ and @j@ (given a map list of size @n@)
-- \[E_{ij}^{m} = \frac {S_{i} S_{j}} {N_{m}}\]
--
-- Total cooccurrences of term @i@ given a map list of size @m@
-- \[S_{i} = \sum_{j, j \neq i}^{m} S_{ij}\]
--
-- Total cooccurrences of terms given a map list of size @m@
-- \[N_{m} = \sum_{i,i \neq i}^{m} \sum_{j, j \neq j}^{m} S_{ij}\]
--
distributional
::
Matrix
Int
->
Matrix
Double
distributional
m
=
-- run {- $ matMiniMax -}
run
$
diagNull
n
$
rIJ
n
$
filterWith
0
100
$
filter'
0
$
s_mi
$
map
fromIntegral
{- from Int to Double -}
$
use
m
{- push matrix in Accelerate type -}
where
_ri
::
Acc
(
Matrix
Double
)
->
Acc
(
Matrix
Double
)
_ri
mat
=
mat1
-- zipWith (/) mat1 mat2
where
mat1
=
matSumCol
n
$
zipWith
min
(
_myMin
mat
)
(
_myMin
$
filterWith
0
100
$
diagNull
n
$
transpose
mat
)
_mat2
=
total
mat
_myMin
::
Acc
(
Matrix
Double
)
->
Acc
(
Matrix
Double
)
_myMin
=
replicate
(
constant
(
Z
:.
n
:.
All
))
.
minimum
-- TODO fix NaN
-- Quali TEST: OK
s_mi
::
Acc
(
Matrix
Double
)
->
Acc
(
Matrix
Double
)
s_mi
m'
=
zipWith
(
\
x
y
->
log
(
x
/
y
))
(
diagNull
n
m'
)
$
zipWith
(
/
)
(
crossProduct
n
m'
)
(
total
m'
)
-- crossProduct n m'
total
::
Acc
(
Matrix
Double
)
->
Acc
(
Matrix
Double
)
total
=
replicate
(
constant
(
Z
:.
n
:.
n
))
.
sum
.
sum
n
::
Dim
n
=
dim
m
rIJ
::
(
Elt
a
,
Ord
a
,
P
.
Fractional
(
Exp
a
),
P
.
Num
a
)
=>
Dim
->
Acc
(
Matrix
a
)
->
Acc
(
Matrix
a
)
rIJ
n
m
=
matMiniMax
$
divide
a
b
where
a
=
sumRowMin
n
m
b
=
sumColMin
n
m
-- * For Tests (to be removed)
-- | Test perfermance with this matrix
-- TODO : add this in a benchmark folder
distriTest
::
Int
->
Matrix
Double
distriTest
n
=
distributional
(
theMatrix
n
)
src/Gargantext/Core/Methods/Distances/
Matrice
.hs
→
src/Gargantext/Core/Methods/Distances/
Accelerate/SpeGen
.hs
View file @
a7462f0c
{-|
{-|
Module : Gargantext.Core.Methods.Distances.
Matrice
Module : Gargantext.Core.Methods.Distances.
Accelerate.SpeGen
Description :
Description :
Copyright : (c) CNRS, 2017-Present
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
License : AGPL + CECILL v3
...
@@ -20,7 +20,7 @@ See Gargantext.Core.Methods.Graph.Accelerate)
...
@@ -20,7 +20,7 @@ See Gargantext.Core.Methods.Graph.Accelerate)
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE ViewPatterns #-}
{-# LANGUAGE ViewPatterns #-}
module
Gargantext.Core.Methods.Distances.
Matrice
module
Gargantext.Core.Methods.Distances.
Accelerate.SpeGen
where
where
-- import qualified Data.Foldable as P (foldl1)
-- import qualified Data.Foldable as P (foldl1)
...
@@ -31,130 +31,6 @@ import Gargantext.Core.Methods.Matrix.Accelerate.Utils
...
@@ -31,130 +31,6 @@ import Gargantext.Core.Methods.Matrix.Accelerate.Utils
import
qualified
Gargantext.Prelude
as
P
import
qualified
Gargantext.Prelude
as
P
-- * Metrics of proximity
-----------------------------------------------------------------------
-- ** Conditional distance
-- *** Conditional distance (basic)
-- | Conditional distance (basic version)
--
-- 2 main metrics are actually implemented in order to compute the
-- proximity of two terms: conditional and distributional
--
-- Conditional metric is an absolute metric which reflects
-- interactions of 2 terms in the corpus.
measureConditional
::
Matrix
Int
->
Matrix
Double
--measureConditional m = run (matMiniMax $ matProba (dim m) $ map fromIntegral $ use m)
measureConditional
m
=
run
$
matProba
(
dim
m
)
$
map
fromIntegral
$
use
m
-- *** Conditional distance (advanced)
-- | Conditional distance (advanced version)
--
-- The conditional metric P(i|j) of 2 terms @i@ and @j@, also called
-- "confidence" , is the maximum probability between @i@ and @j@ to see
-- @i@ in the same context of @j@ knowing @j@.
--
-- If N(i) (resp. N(j)) is the number of occurrences of @i@ (resp. @j@)
-- in the corpus and _[n_{ij}\] the number of its occurrences we get:
--
-- \[P_c=max(\frac{n_i}{n_{ij}},\frac{n_j}{n_{ij}} )\]
conditional'
::
Matrix
Int
->
(
Matrix
GenericityInclusion
,
Matrix
SpecificityExclusion
)
conditional'
m
=
(
run
$
ie
$
map
fromIntegral
$
use
m
,
run
$
sg
$
map
fromIntegral
$
use
m
)
where
ie
::
Acc
(
Matrix
Double
)
->
Acc
(
Matrix
Double
)
ie
mat
=
map
(
\
x
->
x
/
(
2
*
n
-
1
))
$
zipWith
(
+
)
(
xs
mat
)
(
ys
mat
)
sg
::
Acc
(
Matrix
Double
)
->
Acc
(
Matrix
Double
)
sg
mat
=
map
(
\
x
->
x
/
(
2
*
n
-
1
))
$
zipWith
(
-
)
(
xs
mat
)
(
ys
mat
)
n
::
Exp
Double
n
=
P
.
fromIntegral
r
r
::
Dim
r
=
dim
m
xs
::
Acc
(
Matrix
Double
)
->
Acc
(
Matrix
Double
)
xs
mat
=
zipWith
(
-
)
(
matSumCol
r
$
matProba
r
mat
)
(
matProba
r
mat
)
ys
::
Acc
(
Matrix
Double
)
->
Acc
(
Matrix
Double
)
ys
mat
=
zipWith
(
-
)
(
matSumCol
r
$
transpose
$
matProba
r
mat
)
(
matProba
r
mat
)
-----------------------------------------------------------------------
-- ** Distributional Distance
-- | Distributional Distance metric
--
-- Distributional metric is a relative metric which depends on the
-- selected list, it represents structural equivalence of mutual information.
--
-- The distributional metric P(c) of @i@ and @j@ terms is: \[
-- S_{MI} = \frac {\sum_{k \neq i,j ; MI_{ik} >0}^{} \min(MI_{ik},
-- MI_{jk})}{\sum_{k \neq i,j ; MI_{ik}>0}^{}} \]
--
-- Mutual information
-- \[S_{MI}({i},{j}) = \log(\frac{C{ij}}{E{ij}})\]
--
-- Number of cooccurrences of @i@ and @j@ in the same context of text
-- \[C{ij}\]
--
-- The expected value of the cooccurrences @i@ and @j@ (given a map list of size @n@)
-- \[E_{ij}^{m} = \frac {S_{i} S_{j}} {N_{m}}\]
--
-- Total cooccurrences of term @i@ given a map list of size @m@
-- \[S_{i} = \sum_{j, j \neq i}^{m} S_{ij}\]
--
-- Total cooccurrences of terms given a map list of size @m@
-- \[N_{m} = \sum_{i,i \neq i}^{m} \sum_{j, j \neq j}^{m} S_{ij}\]
--
distributional
::
Matrix
Int
->
Matrix
Double
distributional
m
=
-- run {- $ matMiniMax -}
run
$
diagNull
n
$
rIJ
n
$
filterWith
0
100
$
filter'
0
$
s_mi
$
map
fromIntegral
{- from Int to Double -}
$
use
m
{- push matrix in Accelerate type -}
where
_ri
::
Acc
(
Matrix
Double
)
->
Acc
(
Matrix
Double
)
_ri
mat
=
mat1
-- zipWith (/) mat1 mat2
where
mat1
=
matSumCol
n
$
zipWith
min
(
_myMin
mat
)
(
_myMin
$
filterWith
0
100
$
diagNull
n
$
transpose
mat
)
_mat2
=
total
mat
_myMin
::
Acc
(
Matrix
Double
)
->
Acc
(
Matrix
Double
)
_myMin
=
replicate
(
constant
(
Z
:.
n
:.
All
))
.
minimum
-- TODO fix NaN
-- Quali TEST: OK
s_mi
::
Acc
(
Matrix
Double
)
->
Acc
(
Matrix
Double
)
s_mi
m'
=
zipWith
(
\
x
y
->
log
(
x
/
y
))
(
diagNull
n
m'
)
$
zipWith
(
/
)
(
crossProduct
n
m'
)
(
total
m'
)
-- crossProduct n m'
total
::
Acc
(
Matrix
Double
)
->
Acc
(
Matrix
Double
)
total
=
replicate
(
constant
(
Z
:.
n
:.
n
))
.
sum
.
sum
n
::
Dim
n
=
dim
m
rIJ
::
(
Elt
a
,
Ord
a
,
P
.
Fractional
(
Exp
a
),
P
.
Num
a
)
=>
Dim
->
Acc
(
Matrix
a
)
->
Acc
(
Matrix
a
)
rIJ
n
m
=
matMiniMax
$
divide
a
b
where
a
=
sumRowMin
n
m
b
=
sumColMin
n
m
-----------------------------------------------------------------------
-----------------------------------------------------------------------
-----------------------------------------------------------------------
-----------------------------------------------------------------------
-- * Specificity and Genericity
-- * Specificity and Genericity
...
@@ -255,23 +131,3 @@ p_ m = zipWith (/) m (n_ m)
...
@@ -255,23 +131,3 @@ p_ m = zipWith (/) m (n_ m)
) m
) m
-}
-}
-- * For Tests (to be removed)
-- | Test perfermance with this matrix
-- TODO : add this in a benchmark folder
distriTest
::
Int
->
Matrix
Double
distriTest
n
=
distributional
(
theMatrix
n
)
{-
theResult :: Int -> Matrix Double
theResult n | (P.==) n 2 = let r = 1.6094379124341003 in [ 0, r, r, 0]
| P.otherwise = [ 1, 1 ]
-}
colMatrix
::
Elt
e
=>
Int
->
[
e
]
->
Acc
(
Array
((
Z
:.
Int
)
:.
Int
)
e
)
colMatrix
n
ns
=
replicate
(
constant
(
Z
:.
(
n
::
Int
)
:.
All
))
v
where
v
=
use
$
vector
(
P
.
length
ns
)
ns
src/Gargantext/Core/Methods/Distances/Distributional.hs
View file @
a7462f0c
{-|
{-|
Module : Gargantext.Core.Methods.Distances
Module : Gargantext.Core.Methods.Distances
.Distributional
Description :
Description :
Copyright : (c) CNRS, 2017-Present
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Maintainer : team@gargantext.org
...
...
src/Gargantext/Core/Text/Examples.hs
View file @
a7462f0c
...
@@ -32,7 +32,7 @@ import Data.Ord (Down(..))
...
@@ -32,7 +32,7 @@ import Data.Ord (Down(..))
import
Data.Text
(
Text
)
import
Data.Text
(
Text
)
import
Data.Tuple.Extra
(
both
)
import
Data.Tuple.Extra
(
both
)
import
Gargantext.Core
(
Lang
(
EN
))
import
Gargantext.Core
(
Lang
(
EN
))
import
Gargantext.Core.Methods.Distances.
Matrice
import
Gargantext.Core.Methods.Distances.
Accelerate.SpeGen
import
Gargantext.Core.Text.Context
(
splitBy
,
SplitContext
(
Sentences
))
import
Gargantext.Core.Text.Context
(
splitBy
,
SplitContext
(
Sentences
))
import
Gargantext.Core.Text.Metrics.Count
(
Grouped
)
import
Gargantext.Core.Text.Metrics.Count
(
Grouped
)
import
Gargantext.Core.Text.Metrics.Count
(
occurrences
,
cooc
)
import
Gargantext.Core.Text.Metrics.Count
(
occurrences
,
cooc
)
...
...
src/Gargantext/Core/Text/Metrics.hs
View file @
a7462f0c
...
@@ -20,7 +20,7 @@ module Gargantext.Core.Text.Metrics
...
@@ -20,7 +20,7 @@ module Gargantext.Core.Text.Metrics
--import Math.KMeans (kmeans, euclidSq, elements)
--import Math.KMeans (kmeans, euclidSq, elements)
import
Data.Map
(
Map
)
import
Data.Map
(
Map
)
import
Gargantext.Prelude
import
Gargantext.Prelude
import
Gargantext.Core.Methods.Distances.
Matrice
import
Gargantext.Core.Methods.Distances.
Accelerate.SpeGen
import
Gargantext.Core.Viz.Graph.Index
import
Gargantext.Core.Viz.Graph.Index
import
Gargantext.Core.Statistics
(
pcaReduceTo
,
Dimension
(
..
))
import
Gargantext.Core.Statistics
(
pcaReduceTo
,
Dimension
(
..
))
import
qualified
Data.Array.Accelerate
as
DAA
import
qualified
Data.Array.Accelerate
as
DAA
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment