Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Julien Moutinho
haskell-gargantext
Commits
19071e4b
Commit
19071e4b
authored
Jul 02, 2020
by
Przemyslaw Kaminski
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'dev' into dev-doc-table-optimization
parents
4caaf612
63e3a6fd
Changes
27
Hide whitespace changes
Inline
Side-by-side
Showing
27 changed files
with
336 additions
and
202 deletions
+336
-202
package.yaml
package.yaml
+61
-38
Main.hs
src-doctest/Main.hs
+1
-0
Main.hs
src-test/Main.hs
+7
-5
Lang.hs
src-test/Ngrams/Lang.hs
+2
-1
En.hs
src-test/Ngrams/Lang/En.hs
+6
-2
Fr.hs
src-test/Ngrams/Lang/Fr.hs
+5
-2
Occurrences.hs
src-test/Ngrams/Lang/Occurrences.hs
+2
-1
Metrics.hs
src-test/Ngrams/Metrics.hs
+5
-1
Date.hs
src-test/Parsers/Date.hs
+1
-1
Types.hs
src/Gargantext/API/Admin/Orchestrator/Types.hs
+1
-1
Update.hs
src/Gargantext/API/Node/Update.hs
+1
-1
Terms.hs
src/Gargantext/Text/Terms.hs
+1
-1
Eleve.hs
src/Gargantext/Text/Terms/Eleve.hs
+1
-1
AdaptativePhylo.hs
src/Gargantext/Viz/AdaptativePhylo.hs
+3
-1
API.hs
src/Gargantext/Viz/Graph/API.hs
+2
-1
Distances.hs
src/Gargantext/Viz/Graph/Distances.hs
+1
-0
Distributional.hs
src/Gargantext/Viz/Graph/Distances/Distributional.hs
+2
-6
Matrice.hs
src/Gargantext/Viz/Graph/Distances/Matrice.hs
+137
-36
Tools.hs
src/Gargantext/Viz/Graph/Tools.hs
+1
-0
LevelMaker.hs
src/Gargantext/Viz/Phylo/LevelMaker.hs
+4
-4
LinkMaker.hs
src/Gargantext/Viz/Phylo/LinkMaker.hs
+4
-6
Metrics.hs
src/Gargantext/Viz/Phylo/Metrics.hs
+3
-9
PhyloExport.hs
src/Gargantext/Viz/Phylo/PhyloExport.hs
+18
-20
PhyloMaker.hs
src/Gargantext/Viz/Phylo/PhyloMaker.hs
+14
-14
SynchronicClustering.hs
src/Gargantext/Viz/Phylo/SynchronicClustering.hs
+15
-15
TemporalMatching.hs
src/Gargantext/Viz/Phylo/TemporalMatching.hs
+34
-34
stack.yaml
stack.yaml
+4
-1
No files found.
package.yaml
View file @
19071e4b
...
...
@@ -6,15 +6,15 @@ category: Data
author
:
Gargantext Team
maintainer
:
team@gargantext.org
copyright
:
-
!
'
Copyright:
(c)
2017-
2018
:
see
git
logs
and
README'
license
:
BSD
3
-
!
'
Copyright:
(c)
2017-
Present
:
see
git
logs
and
README'
license
:
AGPL-
3
homepage
:
https://gargantext.org
ghc-options
:
-Wall
extra-libraries
:
-
gfortran
-
gfortran
dependencies
:
-
extra
-
text
-
extra
-
text
default-extensions
:
-
DataKinds
-
DeriveGeneric
...
...
@@ -59,6 +59,7 @@ library:
-
Gargantext.Text
-
Gargantext.Text.Context
-
Gargantext.Text.Corpus.Parsers
-
Gargantext.Text.Corpus.Parsers.Date.Parsec
-
Gargantext.Text.Corpus.API
-
Gargantext.Text.Corpus.Parsers.CSV
-
Gargantext.Text.Examples
...
...
@@ -364,36 +365,58 @@ executables:
tests
:
# garg-test:
# main: Main.hs
# source-dirs: src-test
# ghc-options:
# - -threaded
# - -rtsopts
# - -with-rtsopts=-N
# dependencies:
# - base
# - gargantext
# - hspec
# - QuickCheck
# - quickcheck-instances
# - time
# - parsec
# - duckling
# - text
garg-doctest
:
main
:
Main.hs
source-dirs
:
src-doctest
ghc-options
:
-
-O2
-
-Wcompat
-
-Wmissing-signatures
-
-rtsopts
-
-threaded
-
-with-rtsopts=-N
dependencies
:
-
doctest
-
Glob
-
QuickCheck
-
base
-
gargantext
garg-test
:
main
:
Main.hs
source-dirs
:
src-test
default-extensions
:
-
DataKinds
-
DeriveGeneric
-
FlexibleContexts
-
FlexibleInstances
-
GeneralizedNewtypeDeriving
-
MultiParamTypeClasses
-
NoImplicitPrelude
-
OverloadedStrings
-
RankNTypes
ghc-options
:
-
-threaded
-
-rtsopts
-
-with-rtsopts=-N
dependencies
:
-
base
-
gargantext
-
hspec
-
QuickCheck
-
quickcheck-instances
-
time
-
parsec
-
duckling
-
text
# garg-doctest:
# main: Main.hs
# source-dirs: src-doctest
# ghc-options:
# - -O2
# - -Wcompat
# - -Wmissing-signatures
# - -rtsopts
# - -threaded
# - -with-rtsopts=-N
# dependencies:
# - doctest
# - Glob
# - QuickCheck
# - base
# - gargantext
# default-extensions:
# - DataKinds
# - DeriveGeneric
# - FlexibleContexts
# - FlexibleInstances
# - GeneralizedNewtypeDeriving
# - MultiParamTypeClasses
# - NoImplicitPrelude
# - OverloadedStrings
# - RankNTypes
#
src-doctest/Main.hs
View file @
19071e4b
import
System.FilePath.Glob
import
Test.DocTest
import
Gargantext.Prelude
main
::
IO
()
main
=
glob
"src/Gargantext/"
>>=
doctest
...
...
src-test/Main.hs
View file @
19071e4b
...
...
@@ -12,15 +12,17 @@ Portability : POSIX
import
Gargantext.Prelude
import
Gargantext.Core
(
Lang
(
..
))
--import qualified Ngrams.Lang.Fr as Fr
import
qualified
Ngrams.Lang
as
Lang
--
import qualified Ngrams.Lang as Lang
import
qualified
Ngrams.Lang.Occurrences
as
Occ
import
qualified
Ngrams.Metrics
as
Metrics
import
qualified
Parsers.Date
as
PD
import
qualified
Graph.Distance
as
GD
main
::
IO
()
main
=
do
Occ
.
parsersTest
Lang
.
ngramsExtractionTest
FR
Lang
.
ngramsExtractionTest
EN
Metrics
.
main
--
Occ.parsersTest
--
Lang.ngramsExtractionTest FR
--
Lang.ngramsExtractionTest EN
--
Metrics.main
PD
.
testFromRFC3339
GD
.
test
src-test/Ngrams/Lang.hs
View file @
19071e4b
...
...
@@ -15,6 +15,7 @@ commentary with @some markup@.
module
Ngrams.Lang
where
{-
import Gargantext.Prelude (IO())
import Gargantext.Core (Lang(..))
...
...
@@ -24,4 +25,4 @@ import qualified Ngrams.Lang.En as En
ngramsExtractionTest :: Lang -> IO ()
ngramsExtractionTest FR = Fr.ngramsExtractionTest
ngramsExtractionTest EN = En.ngramsExtractionTest
-}
src-test/Ngrams/Lang/En.hs
View file @
19071e4b
...
...
@@ -15,6 +15,7 @@ commentary with @some markup@.
module
Ngrams.Lang.En
where
{-
import Data.List ((!!))
import Data.Text (Text)
...
...
@@ -22,8 +23,11 @@ import Test.Hspec
import Gargantext.Prelude
import Gargantext.Core (Lang(..))
import
Gargantext.Text.Ngrams.PosTagging.Parser
(
extractNgrams
,
selectNgrams
)
-- TODO this import is not used anymore
import Gargantext.Text.Ngrams.PosTagging.Parser (extractNgrams, selectNgrams)
-- use instead
-- import Gargantext.Text.Terms (extractNgramsT)
ngramsExtractionTest :: IO ()
ngramsExtractionTest = hspec $ do
...
...
@@ -43,4 +47,4 @@ ngramsExtractionTest = hspec $ do
t2 <- map (selectNgrams EN) <$> extractNgrams EN t
t2 `shouldBe` [[("Donald Trump","NNP","PERSON"),("president of the United-States of America","NN","LOCATION")]]
-}
src-test/Ngrams/Lang/Fr.hs
View file @
19071e4b
...
...
@@ -15,12 +15,15 @@ commentary with @some markup@.
module
Ngrams.Lang.Fr
where
{-
import Test.Hspec
import Gargantext.Prelude
import Gargantext.Core (Lang(..))
-- TODO this import is not used anymore
import Gargantext.Text.Ngrams.PosTagging.Parser (extractNgrams, selectNgrams)
-- use instead
-
ngramsExtractionTest :: IO ()
ngramsExtractionTest = hspec $ do
describe "Behavioral tests: ngrams extraction in French Language" $ do
...
...
@@ -61,4 +64,4 @@ ngramsExtractionTest = hspec $ do
let textFr1 = "L'heure d'arrivée des coureurs dépend de la météo du jour."
testFr1 <- map (selectNgrams FR) <$> (extractNgrams FR) textFr1
testFr1 `shouldBe` [[("heure d' arrivée des coureurs","NC","O"),("météo du jour","NC","O")]]
-}
src-test/Ngrams/Lang/Occurrences.hs
View file @
19071e4b
...
...
@@ -15,6 +15,7 @@ commentary with @some markup@.
module
Ngrams.Lang.Occurrences
where
{-
import Test.Hspec
import Data.Either (Either(Right))
...
...
@@ -59,4 +60,4 @@ parsersTest = hspec $ do
-- describe "Parser for nodes" $ do
-- it "returns the result of one parsing after space" $ do
-- occOfCorpus 249509 "sciences" `shouldReturn` 7
-}
src-test/Ngrams/Metrics.hs
View file @
19071e4b
...
...
@@ -15,8 +15,10 @@ commentary with @some markup@.
{-# LANGUAGE CPP #-}
module
Ngrams.Metrics
(
main
)
where
--module Ngrams.Metrics (main) where
module
Ngrams.Metrics
where
{-
import Data.Text (Text)
import qualified Data.Text as T
import Data.Ratio
...
...
@@ -139,3 +141,5 @@ testPair :: (Eq a, Show a)
-> SpecWith ()
testPair f a b r = it ("‘" <> T.unpack a <> "’ and ‘" <> T.unpack b <> "’") $
f a b `shouldBe` r
-}
src-test/Parsers/Date.hs
View file @
19071e4b
...
...
@@ -28,7 +28,7 @@ import Duckling.Time.Types (toRFC3339)
-----------------------------------------------------------
import
Gargantext.Prelude
import
Gargantext.Text.
Parsers.Date
(
fromRFC3339
)
import
Gargantext.Text.
Corpus.Parsers.Date.Parsec
(
fromRFC3339
)
import
Parsers.Types
-----------------------------------------------------------
...
...
src/Gargantext/API/Admin/Orchestrator/Types.hs
View file @
19071e4b
...
...
@@ -27,12 +27,12 @@ instance Arbitrary a => Arbitrary (JobOutput a) where
arbitrary
=
JobOutput
<$>
arbitrary
-- | Main Types
-- TODO IsidoreAuth
data
ExternalAPIs
=
All
|
PubMed
|
HAL
|
IsTex
|
Isidore
-- | IsidoreAuth
deriving
(
Show
,
Eq
,
Enum
,
Bounded
,
Generic
)
...
...
src/Gargantext/API/Node/Update.hs
View file @
19071e4b
...
...
@@ -81,7 +81,7 @@ updateNode uId nId (UpdateNodeParamsGraph metric) logStatus = do
logStatus
JobLog
{
_scst_succeeded
=
Just
1
,
_scst_failed
=
Just
0
,
_scst_remaining
=
Just
2
,
_scst_remaining
=
Just
1
,
_scst_events
=
Just
[]
}
...
...
src/Gargantext/Text/Terms.hs
View file @
19071e4b
...
...
@@ -175,6 +175,6 @@ newTries n t = buildTries n (fmap toToken $ uniText t)
uniText
::
Text
->
[[
Text
]]
uniText
=
map
(
List
.
filter
(
not
.
isPunctuation
))
.
map
tokenize
.
sentences
--
|
TODO get sentences according to lang
.
sentences
-- TODO get sentences according to lang
.
Text
.
toLower
src/Gargantext/Text/Terms/Eleve.hs
View file @
19071e4b
...
...
@@ -73,12 +73,12 @@ subst (src, dst) x | sim src x = dst
|
otherwise
=
x
------------------------------------------------------------------------
-- | TODO: Show Instance only used for debugging
type
Entropy
e
=
(
Fractional
e
,
Floating
e
,
P
.
RealFloat
e
,
Show
e
-- ^ TODO: only used for debugging
)
------------------------------------------------------------------------
-- | Example and tests for development
...
...
src/Gargantext/Viz/AdaptativePhylo.hs
View file @
19071e4b
...
...
@@ -65,11 +65,13 @@ data SeaElevation =
data
Proximity
=
WeightedLogJaccard
{
_wlj_sensibility
::
Double
{
_wlj_sensibility
::
Double
{-
-- , _wlj_thresholdInit :: Double
-- , _wlj_thresholdStep :: Double
-- | max height for sea level in temporal matching
-- , _wlj_elevation :: Double
-}
}
|
Hamming
deriving
(
Show
,
Generic
,
Eq
)
...
...
src/Gargantext/Viz/Graph/API.hs
View file @
19071e4b
...
...
@@ -147,8 +147,9 @@ computeGraph cId d nt repo = do
lIds
<-
selectNodesWithUsername
NodeList
userMaster
let
ngs
=
filterListWithRoot
MapTerm
$
mapTermListRoot
[
lId
]
nt
repo
-- TODO split diagonal
myCooc
<-
Map
.
filter
(
>
1
)
<$>
getCoocByNgrams
(
Diagonal
Tru
e
)
<$>
getCoocByNgrams
(
Diagonal
Fals
e
)
<$>
groupNodesByNgrams
ngs
<$>
getNodesByNgramsOnlyUser
cId
(
lIds
<>
[
lId
])
nt
(
Map
.
keys
ngs
)
...
...
src/Gargantext/Viz/Graph/Distances.hs
View file @
19071e4b
...
...
@@ -27,6 +27,7 @@ import Test.QuickCheck.Arbitrary
------------------------------------------------------------------------
data
Distance
=
Conditional
|
Distributional
deriving
(
Show
)
measure
::
Distance
->
Matrix
Int
->
Matrix
Double
measure
Conditional
=
measureConditional
...
...
src/Gargantext/Viz/Graph/Distances/Distributional.hs
View file @
19071e4b
...
...
@@ -18,18 +18,15 @@ module Gargantext.Viz.Graph.Distances.Distributional
where
import
Data.Matrix
hiding
(
identity
)
import
qualified
Data.Map
as
M
import
Data.Vector
(
Vector
)
import
qualified
Data.Vector
as
V
import
Gargantext.Prelude
import
Gargantext.Viz.Graph.Utils
distributional
::
(
Floating
a
,
Ord
a
)
=>
Matrix
a
->
[((
Int
,
Int
),
a
)]
distributional
m
=
filter
(
\
((
x
,
y
),
d
)
->
foldl'
(
&&
)
True
(
conditions
x
y
d
)
)
distriList
distributional
'
::
(
Floating
a
,
Ord
a
)
=>
Matrix
a
->
[((
Int
,
Int
),
a
)]
distributional
'
m
=
filter
(
\
((
x
,
y
),
d
)
->
foldl'
(
&&
)
True
(
conditions
x
y
d
)
)
distriList
where
conditions
x
y
d
=
[
(
x
/=
y
)
,
(
d
>
miniMax'
)
...
...
@@ -51,7 +48,6 @@ ri m = matrix c r doRi
$
V
.
zip
(
ax
Col
x
y
mi'
)
(
ax
Row
x
y
mi'
)
(
c
,
r
)
=
(
nOf
Col
m
,
nOf
Row
m
)
mi
::
(
Ord
a
,
Floating
a
)
=>
Matrix
a
->
Matrix
a
mi
m
=
matrix
c
r
createMat
where
...
...
src/Gargantext/Viz/Graph/Distances/Matrice.hs
View file @
19071e4b
...
...
@@ -17,14 +17,6 @@ Implementation use Accelerate library which enables GPU and CPU computation:
[Accelerating Haskell Array Codes with Multicore GPUs][CKLM+11].
In _DAMP '11: Declarative Aspects of Multicore Programming_, ACM, 2011.
* Trevor L. McDonell, Manuel M. T. Chakravarty, Gabriele Keller, and Ben Lippmeier.
[Optimising Purely Functional GPU Programs][MCKL13].
In _ICFP '13: The 18th ACM SIGPLAN International Conference on Functional Programming_, ACM, 2013.
* Robert Clifton-Everest, Trevor L. McDonell, Manuel M. T. Chakravarty, and Gabriele Keller.
[Embedding Foreign Code][CMCK14].
In _PADL '14: The 16th International Symposium on Practical Aspects of Declarative Languages_, Springer-Verlag, LNCS, 2014.
* Trevor L. McDonell, Manuel M. T. Chakravarty, Vinod Grover, and Ryan R. Newton.
[Type-safe Runtime Code Generation: Accelerate to LLVM][MCGN15].
In _Haskell '15: The 8th ACM SIGPLAN Symposium on Haskell_, ACM, 2015.
...
...
@@ -34,13 +26,14 @@ Implementation use Accelerate library which enables GPU and CPU computation:
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE ViewPatterns #-}
module
Gargantext.Viz.Graph.Distances.Matrice
where
import
Debug.Trace
(
trace
)
import
Data.Array.Accelerate
import
Data.Array.Accelerate.Interpreter
(
run
)
import
qualified
Gargantext.Prelude
as
P
...
...
@@ -49,8 +42,8 @@ import qualified Gargantext.Prelude as P
--
-- >>> vector 3
-- Vector (Z :. 3) [0,1,2]
vector
::
Int
->
(
Array
(
Z
:.
Int
)
Int
)
vector
n
=
fromList
(
Z
:.
n
)
[
0
..
n
]
vector
::
Elt
c
=>
Int
->
[
c
]
->
(
Array
(
Z
:.
Int
)
c
)
vector
n
l
=
fromList
(
Z
:.
n
)
l
-- | Define a matrix
--
...
...
@@ -85,16 +78,26 @@ dim m = n
-- indexTail (arrayShape m)
-----------------------------------------------------------------------
-- TODO move to Utils
runExp
::
Elt
e
=>
Exp
e
->
e
runExp
e
=
indexArray
(
run
(
unit
e
))
Z
-----------------------------------------------------------------------
-- | Sum of a Matrix by Column
--
-- >>> run $ matSum 3 (use $ matrix 3 [1..])
-- >>> run $ matSum
Col
3 (use $ matrix 3 [1..])
-- Matrix (Z :. 3 :. 3)
-- [ 12.0, 15.0, 18.0,
-- 12.0, 15.0, 18.0,
-- 12.0, 15.0, 18.0]
matSum
::
Dim
->
Acc
(
Matrix
Double
)
->
Acc
(
Matrix
Double
)
matSum
r
mat
=
replicate
(
constant
(
Z
:.
(
r
::
Int
)
:.
All
))
$
sum
$
transpose
mat
matSumCol
::
Dim
->
Acc
(
Matrix
Double
)
->
Acc
(
Matrix
Double
)
matSumCol
r
mat
=
replicate
(
constant
(
Z
:.
(
r
::
Int
)
:.
All
))
$
sum
$
transpose
mat
matSumCol'
::
Matrix
Double
->
Matrix
Double
matSumCol'
m
=
run
$
matSumCol
n
m'
where
n
=
dim
m
m'
=
use
m
-- | Proba computes de probability matrix: all cells divided by thee sum of its column
...
...
@@ -106,14 +109,16 @@ matSum r mat = replicate (constant (Z :. (r :: Int) :. All)) $ sum $ transpose m
-- 0.3333333333333333, 0.3333333333333333, 0.3333333333333333,
-- 0.5833333333333334, 0.5333333333333333, 0.5]
matProba
::
Dim
->
Acc
(
Matrix
Double
)
->
Acc
(
Matrix
Double
)
matProba
r
mat
=
zipWith
(
/
)
mat
(
matSum
r
mat
)
matProba
r
mat
=
zipWith
(
/
)
mat
(
matSum
Col
r
mat
)
-- | Diagonal of the matrix
--
-- >>> run $ diag (use $ matrix 3 ([1..] :: [Int]))
-- Vector (Z :. 3) [1,5,9]
diag
::
Elt
e
=>
Acc
(
Matrix
e
)
->
Acc
(
Vector
e
)
diag
m
=
backpermute
(
indexTail
(
shape
m
))
(
lift1
(
\
(
Z
:.
x
)
->
(
Z
:.
x
:.
(
x
::
Exp
Int
))))
m
diag
m
=
backpermute
(
indexTail
(
shape
m
))
(
lift1
(
\
(
Z
:.
x
)
->
(
Z
:.
x
:.
(
x
::
Exp
Int
))))
m
-- | Divide by the Diagonal of the matrix
--
...
...
@@ -145,8 +150,8 @@ matMiniMax m = map (\x -> ifThenElse (x > miniMax') x 0) (transpose m)
-- [ 0.0, 0.0, 7.0,
-- 0.0, 0.0, 8.0,
-- 0.0, 6.0, 9.0]
matFilter
::
Double
->
Acc
(
Matrix
Double
)
->
Acc
(
Matrix
Double
)
matFilter
t
m
=
map
(
\
x
->
ifThenElse
(
x
>
(
constant
t
))
x
0
)
(
transpose
m
)
filter'
::
Double
->
Acc
(
Matrix
Double
)
->
Acc
(
Matrix
Double
)
filter'
t
m
=
map
(
\
x
->
ifThenElse
(
x
>
(
constant
t
))
x
0
)
(
transpose
m
)
-----------------------------------------------------------------------
-- * Measures of proximity
...
...
@@ -164,7 +169,9 @@ matFilter t m = map (\x -> ifThenElse (x > (constant t)) x 0) (transpose m)
-- interactions of 2 terms in the corpus.
measureConditional
::
Matrix
Int
->
Matrix
Double
--measureConditional m = run (matMiniMax $ matProba (dim m) $ map fromIntegral $ use m)
measureConditional
m
=
run
(
matProba
(
dim
m
)
$
map
fromIntegral
$
use
m
)
measureConditional
m
=
run
$
matProba
(
dim
m
)
$
map
fromIntegral
$
use
m
-- *** Conditional distance (advanced)
...
...
@@ -196,9 +203,9 @@ conditional' m = ( run $ ie $ map fromIntegral $ use m
r
=
dim
m
xs
::
Acc
(
Matrix
Double
)
->
Acc
(
Matrix
Double
)
xs
mat
=
zipWith
(
-
)
(
matSum
r
$
matProba
r
mat
)
(
matProba
r
mat
)
xs
mat
=
zipWith
(
-
)
(
matSum
Col
r
$
matProba
r
mat
)
(
matProba
r
mat
)
ys
::
Acc
(
Matrix
Double
)
->
Acc
(
Matrix
Double
)
ys
mat
=
zipWith
(
-
)
(
matSum
r
$
transpose
$
matProba
r
mat
)
(
matProba
r
mat
)
ys
mat
=
zipWith
(
-
)
(
matSum
Col
r
$
transpose
$
matProba
r
mat
)
(
matProba
r
mat
)
-----------------------------------------------------------------------
-- ** Distributional Distance
...
...
@@ -206,11 +213,11 @@ conditional' m = ( run $ ie $ map fromIntegral $ use m
-- | Distributional Distance Measure
--
-- Distributional measure is a relative measure which depends on the
-- selected list, it represents structural equivalence.
-- selected list, it represents structural equivalence
of mutual information
.
--
-- The distributional measure P(c) of @i@ and @j@ terms is: \[
-- S_{MI} = \frac {\sum_{k \neq i,j ; MI_{ik} >0}^{} \min(MI_{ik},
-- MI_{jk})}{\sum_{k \neq i,j ; MI_{ik}}^{}} \]
-- MI_{jk})}{\sum_{k \neq i,j ; MI_{ik}
>0
}^{}} \]
--
-- Mutual information
-- \[S_{MI}({i},{j}) = \log(\frac{C{ij}}{E{ij}})\]
...
...
@@ -228,26 +235,120 @@ conditional' m = ( run $ ie $ map fromIntegral $ use m
-- \[N_{m} = \sum_{i,i \neq i}^{m} \sum_{j, j \neq j}^{m} S_{ij}\]
--
distributional
::
Matrix
Int
->
Matrix
Double
distributional
m
=
run
$
matMiniMax
$
ri
(
map
fromIntegral
$
use
m
)
distributional
m
=
run
{- -- $ matMiniMax
-- $ ri
-- $ myMin
-}
$
filter'
0
$
s_mi
$
map
fromIntegral
{- from Int to Double -}
$
use
m
{- push matrix in Accelerate type -}
where
-- filter m = zipWith (\a b -> max a b) m (transpose m)
ri
mat
=
zipWith
(
/
)
mat1
mat2
{-
ri :: Acc (Matrix Double) -> Acc (Matrix Double)
ri mat = mat1 -- zipWith (/) mat1 mat2
where
mat1
=
matSum
n
$
zipWith
min
(
s_mi
mat
)
(
s_mi
$
transpose
mat
)
mat2
=
matSum
n
mat
s_mi
m'
=
zipWith
(
\
a
b
->
log
(
a
/
b
))
m'
$
zipWith
(
/
)
(
crossProduct
m'
)
(
total
m'
)
mat1 = matSumCol n $ zipWith min' (myMin mat) (myMin $ transpose mat)
mat2 = total mat
myMin :: Acc (Matrix Double) -> Acc (Matrix Double)
myMin = replicate (constant (Z :. n :. All)) . minimum
-}
-- TODO fix NaN
-- Quali TEST: OK
s_mi
::
Acc
(
Matrix
Double
)
->
Acc
(
Matrix
Double
)
s_mi
m'
=
zipWith
(
\
x
y
->
log
(
x
/
y
))
(
diagNull
n
m'
)
$
zipWith
(
/
)
(
crossProduct
n
m'
)
(
total
m'
)
-- crossProduct n m'
total
m''
=
replicate
(
constant
(
Z
:.
n
:.
n
))
$
fold
(
+
)
0
$
fold
(
+
)
0
m''
total
::
Acc
(
Matrix
Double
)
->
Acc
(
Matrix
Double
)
total
=
replicate
(
constant
(
Z
:.
n
:.
n
))
.
sum
.
sum
n
::
Dim
n
=
dim
m
crossProduct
m'''
=
zipWith
(
*
)
(
cross
m'''
)
(
cross
(
transpose
m'''
))
cross
mat
=
zipWith
(
-
)
(
matSum
n
mat
)
(
mat
)
-- run $ (identityMatrix (DAA.constant (10::Int)) :: DAA.Acc (DAA.Matrix Int)) Matrix (Z :. 10 :. 10)
identityMatrix
::
Num
a
=>
Exp
Int
->
Acc
(
Matrix
a
)
identityMatrix
n
=
let
zeros
=
fill
(
index2
n
n
)
0
ones
=
fill
(
index1
n
)
1
in
permute
const
zeros
(
\
(
unindex1
->
i
)
->
index2
i
i
)
ones
eyeMatrix
::
Num
a
=>
Dim
->
Acc
(
Matrix
a
)
->
Acc
(
Matrix
a
)
eyeMatrix
n'
_m
=
let
ones
=
fill
(
index2
n
n
)
1
zeros
=
fill
(
index1
n
)
0
n
=
constant
n'
in
permute
const
ones
(
\
(
unindex1
->
i
)
->
index2
i
i
)
zeros
selfMatrix
::
Num
a
=>
Dim
->
Acc
(
Matrix
a
)
->
Acc
(
Matrix
a
)
selfMatrix
n'
_m
=
let
zeros
=
fill
(
index2
n
n
)
0
ones
=
fill
(
index2
n
n
)
1
n
=
constant
n'
in
permute
const
ones
(
lift1
(
\
(
Z
:.
(
i
::
Exp
Int
)
:.
(
_j
::
Exp
Int
))
->
-- ifThenElse (i /= j)
-- (Z :. i :. j)
(
Z
:.
i
:.
i
)
))
zeros
selfMatrix'
::
(
Elt
a
,
P
.
Num
(
Exp
a
))
=>
Array
DIM2
a
->
Matrix
a
selfMatrix'
m'
=
run
$
selfMatrix
n
m
where
n
=
dim
m'
m
=
use
m'
-------------------------------------------------
diagNull
::
Num
a
=>
Dim
->
Acc
(
Matrix
a
)
->
Acc
(
Matrix
a
)
diagNull
n
m
=
zipWith
(
*
)
m
eye
where
eye
=
eyeMatrix
n
m
-------------------------------------------------
crossProduct
::
Dim
->
Acc
(
Matrix
Double
)
->
Acc
(
Matrix
Double
)
crossProduct
n
m
=
trace
(
P
.
show
(
run
m'
,
run
m''
))
$
zipWith
(
*
)
m'
m''
where
m'
=
cross
n
m
m''
=
cross
n
(
transpose
m
)
crossT
::
Matrix
Double
->
Matrix
Double
crossT
=
run
.
transpose
.
use
crossProduct'
::
Matrix
Double
->
Matrix
Double
crossProduct'
m
=
run
$
crossProduct
n
m'
where
n
=
dim
m
m'
=
use
m
runWith
::
(
Arrays
c
,
Elt
a1
)
=>
(
Dim
->
Acc
(
Matrix
a1
)
->
a2
->
Acc
c
)
->
Matrix
a1
->
a2
->
c
runWith
f
m
=
run
.
f
(
dim
m
)
(
use
m
)
-- | cross
cross
::
Dim
->
Acc
(
Matrix
Double
)
->
Acc
(
Matrix
Double
)
cross
n
mat
=
diagNull
n
(
matSumCol
n
$
diagNull
n
mat
)
cross'
::
Matrix
Double
->
Matrix
Double
cross'
mat
=
run
$
cross
n
mat'
where
mat'
=
use
mat
n
=
dim
mat
-----------------------------------------------------------------------
-----------------------------------------------------------------------
...
...
src/Gargantext/Viz/Graph/Tools.hs
View file @
19071e4b
...
...
@@ -56,6 +56,7 @@ cooc2graph :: Distance
->
(
Map
(
Text
,
Text
)
Int
)
->
IO
Graph
cooc2graph
distance
threshold
myCooc
=
do
printDebug
"cooc2graph"
distance
let
(
ti
,
_
)
=
createIndices
myCooc
myCooc'
=
toIndex
ti
myCooc
...
...
src/Gargantext/Viz/Phylo/LevelMaker.hs
View file @
19071e4b
...
...
@@ -207,7 +207,7 @@ toNthLevel lvlMax prox clus p
|
otherwise
=
toNthLevel
lvlMax
prox
clus
$
traceBranches
(
lvl
+
1
)
$
setPhyloBranches
(
lvl
+
1
)
-- $ transposePeriodLinks (lvl + 1)
--
\
$ transposePeriodLinks (lvl + 1)
$
traceTranspose
(
lvl
+
1
)
Descendant
$
transposeLinks
(
lvl
+
1
)
Descendant
$
traceTranspose
(
lvl
+
1
)
Ascendant
...
...
@@ -230,15 +230,15 @@ toNthLevel lvlMax prox clus p
toPhylo1
::
Cluster
->
Proximity
->
Map
(
Date
,
Date
)
[
Document
]
->
Phylo
->
Phylo
toPhylo1
clus
prox
d
p
=
case
clus
of
Fis
(
FisParams
k
s
t
)
->
traceBranches
1
-- $ reLinkPhyloBranches 1
-- $ traceBranches 1
--
\
$ reLinkPhyloBranches 1
--
\
$ traceBranches 1
$
setPhyloBranches
1
$
traceTempoMatching
Descendant
1
$
interTempoMatching
Descendant
1
prox
$
traceTempoMatching
Ascendant
1
$
interTempoMatching
Ascendant
1
prox
$
tracePhyloN
1
-- $ setLevelLinks (0,1)
--
\
$ setLevelLinks (0,1)
$
addPhyloLevel
1
(
getPhyloFis
phyloFis
)
$
trace
(
show
(
size
$
getPhyloFis
phyloFis
)
<>
" Fis created"
)
$
phyloFis
where
...
...
src/Gargantext/Viz/Phylo/LinkMaker.hs
View file @
19071e4b
...
...
@@ -126,15 +126,15 @@ phyloGroupMatching :: [PhyloPeriodId] -> PhyloGroup -> Phylo -> [Pointer]
phyloGroupMatching
periods
g
p
=
case
pointers
of
Nothing
->
[]
Just
pts
->
head'
"phyloGroupMatching"
--
|
Keep only the best set of pointers grouped by proximity
-- Keep only the best set of pointers grouped by proximity
$
groupBy
(
\
pt
pt'
->
snd
pt
==
snd
pt'
)
$
reverse
$
sortOn
snd
pts
--
|
Find the first time frame where at leats one pointer satisfies the proximity threshold
-- Find the first time frame where at leats one pointer satisfies the proximity threshold
where
--------------------------------------
pointers
::
Maybe
[
Pointer
]
pointers
=
find
(
not
.
null
)
--
|
For each time frame, process the Proximity on relevant pairs of targeted groups
-- For each time frame, process the Proximity on relevant pairs of targeted groups
$
scanl
(
\
acc
frame
->
let
pairs
=
makePairs
frame
g
p
in
acc
++
(
filter
(
\
(
_
,
proxi
)
->
filterProximity
proxi
(
getPhyloProximity
p
))
...
...
@@ -145,7 +145,7 @@ phyloGroupMatching periods g p = case pointers of
if
(
t
==
t'
)
then
[(
getGroupId
t
,
proxi
)]
else
[(
getGroupId
t
,
proxi
),(
getGroupId
t'
,
proxi
)]
)
pairs
)
)
[]
--
|
[[1900],[1900,1901],[1900,1901,1902],...] | length max => + 5 years
-- [[1900],[1900,1901],[1900,1901,1902],...] | length max => + 5 years
$
inits
periods
--------------------------------------
...
...
@@ -218,8 +218,6 @@ interTempoMatching fil lvl _ p = updateGroups fil lvl (Map.fromList pointers) p
------------------------------------------------------------------------
-- | Make links from Period to Period after level 1
listToTuple
::
(
a
->
b
)
->
[
a
]
->
[(
b
,
a
)]
listToTuple
f
l
=
map
(
\
x
->
(
f
x
,
x
))
l
...
...
src/Gargantext/Viz/Phylo/Metrics.hs
View file @
19071e4b
...
...
@@ -90,13 +90,13 @@ findDynamics n pv pn m =
bid
=
fromJust
$
(
pn
^.
pn_bid
)
end
=
last'
"dynamics"
(
sort
$
map
snd
$
elems
m
)
in
if
(((
snd
prd
)
==
(
snd
$
m
!
n
))
&&
(
snd
prd
/=
end
))
--
|
emergence
-- emergence
then
2
else
if
((
fst
prd
)
==
(
fst
$
m
!
n
))
--
|
recombination
-- recombination
then
0
else
if
(
not
$
sharedWithParents
(
fst
prd
)
bid
n
pv
)
--
|
decrease
-- decrease
then
1
else
3
...
...
@@ -175,9 +175,3 @@ hamming f1 f2 = fromIntegral $ max ((size inter) - (size f1)) ((size inter) - (s
inter
::
Map
(
Int
,
Int
)
Double
inter
=
intersection
f1
f2
--------------------------------------
src/Gargantext/Viz/Phylo/PhyloExport.hs
View file @
19071e4b
...
...
@@ -171,12 +171,12 @@ exportToDot phylo export =
<>
"##########################"
)
$
digraph
((
Str
.
fromStrict
)
$
(
phyloName
$
getConfig
phylo
))
$
do
-- | 1) init the dot graph
{- 1) init the dot graph -}
graphAttrs
(
[
Label
(
toDotLabel
$
(
phyloName
$
getConfig
phylo
))]
<>
[
FontSize
30
,
LabelLoc
VTop
,
NodeSep
1
,
RankSep
[
1
],
Rank
SameRank
,
Splines
SplineEdges
,
Overlap
ScaleOverlaps
,
Ratio
FillRatio
,
Style
[
SItem
Filled
[]
],
Color
[
toWColor
White
]]
-- | home made attributes
{-- home made attributes -}
<>
[(
toAttr
(
fromStrict
"phyloFoundations"
)
$
pack
$
show
(
length
$
Vector
.
toList
$
getRoots
phylo
))
,(
toAttr
(
fromStrict
"phyloTerms"
)
$
pack
$
show
(
length
$
nub
$
concat
$
map
(
\
g
->
g
^.
phylo_groupNgrams
)
$
export
^.
export_groups
))
,(
toAttr
(
fromStrict
"phyloDocs"
)
$
pack
$
show
(
sum
$
elems
$
phylo
^.
phylo_timeDocs
))
...
...
@@ -185,36 +185,36 @@ exportToDot phylo export =
,(
toAttr
(
fromStrict
"phyloGroups"
)
$
pack
$
show
(
length
$
export
^.
export_groups
))
])
{-
-- toAttr (fromStrict k) $ (pack . unwords) $ map show v
--
| 2) create a layer for the branches labels
--
2) create a layer for the branches labels -}
subgraph
(
Str
"Branches peaks"
)
$
do
graphAttrs
[
Rank
SameRank
]
--
|
3) group the branches by hierarchy
{-
-- 3) group the branches by hierarchy
-- mapM (\branches ->
-- subgraph (Str "Branches clade") $ do
-- graphAttrs [Rank SameRank]
-- --
|
4) create a node for each branch
-- -- 4) create a node for each branch
-- mapM branchToDotNode branches
-- ) $ elems $ fromListWith (++) $ map (\b -> ((init . snd) $ b ^. branch_id,[b])) $ export ^. export_branches
-}
mapM
(
\
b
->
branchToDotNode
b
(
fromJust
$
elemIndex
b
(
export
^.
export_branches
)))
$
export
^.
export_branches
-- | 5) create a layer for each period
{-- 5) create a layer for each period -}
_
<-
mapM
(
\
period
->
subgraph
((
Str
.
fromStrict
.
Text
.
pack
)
$
(
"Period"
<>
show
(
fst
period
)
<>
show
(
snd
period
)))
$
do
graphAttrs
[
Rank
SameRank
]
periodToDotNode
period
-- | 6) create a node for each group
{-- 6) create a node for each group -}
mapM
(
\
g
->
groupToDotNode
(
getRoots
phylo
)
g
(
toBid
g
(
export
^.
export_branches
)))
(
filter
(
\
g
->
g
^.
phylo_groupPeriod
==
period
)
$
export
^.
export_groups
)
)
$
getPeriodIds
phylo
-- | 7) create the edges between a branch and its first groups
{-- 7) create the edges between a branch and its first groups -}
_
<-
mapM
(
\
(
bId
,
groups
)
->
mapM
(
\
g
->
toDotEdge
(
branchIdToDotId
bId
)
(
groupIdToDotId
$
getGroupId
g
)
""
BranchToGroup
)
groups
)
...
...
@@ -224,31 +224,29 @@ exportToDot phylo export =
$
sortOn
(
fst
.
_phylo_groupPeriod
)
groups
)
$
fromListWith
(
++
)
$
map
(
\
g
->
(
g
^.
phylo_groupBranchId
,[
g
]))
$
export
^.
export_groups
-- | 8) create the edges between the groups
{- 8) create the edges between the groups -}
_
<-
mapM
(
\
((
k
,
k'
),
_
)
->
toDotEdge
(
groupIdToDotId
k
)
(
groupIdToDotId
k'
)
""
GroupToGroup
)
$
(
toList
.
mergePointers
)
$
export
^.
export_groups
-- | 7) create the edges between the periods
{- 7) create the edges between the periods -}
_
<-
mapM
(
\
(
prd
,
prd'
)
->
toDotEdge
(
periodIdToDotId
prd
)
(
periodIdToDotId
prd'
)
""
PeriodToPeriod
)
$
nubBy
(
\
combi
combi'
->
fst
combi
==
fst
combi'
)
$
listToCombi'
$
getPeriodIds
phylo
-- |
8) create the edges between the branches
{-
8) create the edges between the branches
-- _ <- mapM (\(bId,bId') ->
-- toDotEdge (branchIdToDotId bId) (branchIdToDotId bId')
-- (Text.pack $ show(branchIdsToProximity bId bId'
-- (getThresholdInit $ phyloProximity $ getConfig phylo)
-- (getThresholdStep $ phyloProximity $ getConfig phylo))) BranchToBranch
-- ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ map _branch_id $ export ^. export_branches
-}
graphAttrs
[
Rank
SameRank
]
----------------
-- | Filter | --
----------------
...
...
@@ -439,13 +437,13 @@ toDynamics n parents g m =
let
prd
=
g
^.
phylo_groupPeriod
end
=
last'
"dynamics"
(
sort
$
map
snd
$
elems
m
)
in
if
(((
snd
prd
)
==
(
snd
$
m
!
n
))
&&
(
snd
prd
/=
end
))
-- | decrease
{- decrease -}
then
2
else
if
((
fst
prd
)
==
(
fst
$
m
!
n
))
-- | recombination
{- recombination -}
then
0
else
if
isNew
-- | emergence
{- emergence -}
then
1
else
3
where
...
...
src/Gargantext/Viz/Phylo/PhyloMaker.hs
View file @
19071e4b
...
...
@@ -115,7 +115,7 @@ cliqueToGroup fis pId lvl idx fdt coocs =
(
fis
^.
phyloClique_support
)
ngrams
(
ngramsToCooc
ngrams
coocs
)
(
1
,[
0
])
--
|
branchid (lvl,[path in the branching tree])
(
1
,[
0
])
-- branchid (lvl,[path in the branching tree])
(
fromList
[(
"breaks"
,[
0
]),(
"seaLevels"
,[
0
])])
[]
[]
[]
[]
...
...
@@ -142,24 +142,24 @@ toPhylo1 docs phyloBase = case (getSeaElevation phyloBase) of
---------------------------
--
|
To apply a filter with the possibility of keeping some periods non empty (keep : True|False)
-- To apply a filter with the possibility of keeping some periods non empty (keep : True|False)
filterClique
::
Bool
->
Int
->
(
Int
->
[
PhyloClique
]
->
[
PhyloClique
])
->
Map
(
Date
,
Date
)
[
PhyloClique
]
->
Map
(
Date
,
Date
)
[
PhyloClique
]
filterClique
keep
thr
f
m
=
case
keep
of
False
->
map
(
\
l
->
f
thr
l
)
m
True
->
map
(
\
l
->
keepFilled
(
f
)
thr
l
)
m
--
|
To filter Fis with small Support
-- To filter Fis with small Support
filterCliqueBySupport
::
Int
->
[
PhyloClique
]
->
[
PhyloClique
]
filterCliqueBySupport
thr
l
=
filter
(
\
clq
->
(
clq
^.
phyloClique_support
)
>=
thr
)
l
--
|
To filter Fis with small Clique size
-- To filter Fis with small Clique size
filterCliqueBySize
::
Int
->
[
PhyloClique
]
->
[
PhyloClique
]
filterCliqueBySize
thr
l
=
filter
(
\
clq
->
(
size
$
clq
^.
phyloClique_nodes
)
>=
thr
)
l
--
|
To filter nested Fis
-- To filter nested Fis
filterCliqueByNested
::
Map
(
Date
,
Date
)
[
PhyloClique
]
->
Map
(
Date
,
Date
)
[
PhyloClique
]
filterCliqueByNested
m
=
let
clq
=
map
(
\
l
->
...
...
@@ -173,16 +173,16 @@ filterCliqueByNested m =
in
fromList
$
zip
(
keys
m
)
clq'
--
|
To transform a time map of docs innto a time map of Fis with some filters
-- To transform a time map of docs innto a time map of Fis with some filters
toPhyloClique
::
Phylo
->
Map
(
Date
,
Date
)
[
Document
]
->
Map
(
Date
,
Date
)
[
PhyloClique
]
toPhyloClique
phylo
phyloDocs
=
case
(
clique
$
getConfig
phylo
)
of
Fis
s
s'
->
-- traceFis "Filtered Fis"
filterCliqueByNested
-- $ traceFis "Filtered by clique size"
{- \$ traceFis "Filtered by clique size" -}
$
filterClique
True
s'
(
filterCliqueBySize
)
-- $ traceFis "Filtered by support"
{- \$ traceFis "Filtered by support" -}
$
filterClique
True
s
(
filterCliqueBySupport
)
-- $ traceFis "Unfiltered Fis"
{- \$ traceFis "Unfiltered Fis" -}
phyloClique
MaxClique
_
->
undefined
where
...
...
@@ -204,7 +204,7 @@ toPhyloClique phylo phyloDocs = case (clique $ getConfig phylo) of
--------------------
--
|
To transform the docs into a time map of coocurency matrix
-- To transform the docs into a time map of coocurency matrix
docsToTimeScaleCooc
::
[
Document
]
->
Vector
Ngrams
->
Map
Date
Cooc
docsToTimeScaleCooc
docs
fdt
=
let
mCooc
=
fromListWith
sumCooc
...
...
@@ -221,7 +221,7 @@ docsToTimeScaleCooc docs fdt =
-- | to Phylo Base | --
-----------------------
--
|
To group a list of Documents by fixed periods
-- To group a list of Documents by fixed periods
groupDocsByPeriod'
::
(
NFData
doc
,
Ord
date
,
Enum
date
)
=>
(
doc
->
date
)
->
[(
date
,
date
)]
->
[
doc
]
->
Map
(
date
,
date
)
[
doc
]
groupDocsByPeriod'
f
pds
docs
=
let
docs'
=
groupBy
(
\
d
d'
->
f
d
==
f
d'
)
$
sortOn
f
docs
...
...
@@ -237,7 +237,7 @@ groupDocsByPeriod' f pds docs =
--
|
To group a list of Documents by fixed periods
-- To group a list of Documents by fixed periods
groupDocsByPeriod
::
(
NFData
doc
,
Ord
date
,
Enum
date
)
=>
(
doc
->
date
)
->
[(
date
,
date
)]
->
[
doc
]
->
Map
(
date
,
date
)
[
doc
]
groupDocsByPeriod
_
_
[]
=
panic
"[ERR][Viz.Phylo.PhyloMaker] Empty [Documents] can not have any periods"
groupDocsByPeriod
f
pds
es
=
...
...
@@ -265,7 +265,7 @@ docsToTermFreq docs fdt =
in
map
(
/
sumFreqs
)
freqs
--
|
To count the number of docs by unit of time
-- To count the number of docs by unit of time
docsToTimeScaleNb
::
[
Document
]
->
Map
Date
Double
docsToTimeScaleNb
docs
=
let
docs'
=
fromListWith
(
+
)
$
map
(
\
d
->
(
date
d
,
1
))
docs
...
...
@@ -279,7 +279,7 @@ initPhyloLevels lvlMax pId =
fromList
$
map
(
\
lvl
->
((
pId
,
lvl
),
PhyloLevel
pId
lvl
empty
))
[
1
..
lvlMax
]
--
|
To init the basic elements of a Phylo
-- To init the basic elements of a Phylo
toPhyloBase
::
[
Document
]
->
TermList
->
Config
->
Phylo
toPhyloBase
docs
lst
conf
=
let
foundations
=
PhyloFoundations
(
Vector
.
fromList
$
nub
$
concat
$
map
text
docs
)
lst
...
...
src/Gargantext/Viz/Phylo/SynchronicClustering.hs
View file @
19071e4b
...
...
@@ -36,13 +36,13 @@ import qualified Data.Set as Set
mergeBranchIds
::
[[
Int
]]
->
[
Int
]
mergeBranchIds
ids
=
(
head'
"mergeBranchIds"
.
sort
.
mostFreq'
)
ids
where
--
|
2) find the most Up Left ids in the hierarchy of similarity
-- 2) find the most Up Left ids in the hierarchy of similarity
-- mostUpLeft :: [[Int]] -> [[Int]]
-- mostUpLeft ids' =
-- let groupIds = (map (\gIds -> (length $ head' "gIds" gIds, head' "gIds" gIds)) . groupBy (\id id' -> length id == length id') . sortOn length) ids'
-- inf = (fst . minimum) groupIds
-- in map snd $ filter (\gIds -> fst gIds == inf) groupIds
--
|
1) find the most frequent ids
-- 1) find the most frequent ids
mostFreq'
::
[[
Int
]]
->
[[
Int
]]
mostFreq'
ids'
=
let
groupIds
=
(
map
(
\
gIds
->
(
length
gIds
,
head'
"gIds"
gIds
))
.
group
.
sort
)
ids'
...
...
@@ -58,12 +58,12 @@ mergeMeta bId groups =
groupsToBranches'
::
Map
PhyloGroupId
PhyloGroup
->
[[
PhyloGroup
]]
groupsToBranches'
groups
=
--
|
run the related component algorithm
-- run the related component algorithm
let
egos
=
map
(
\
g
->
[
getGroupId
g
]
++
(
map
fst
$
g
^.
phylo_groupPeriodParents
)
++
(
map
fst
$
g
^.
phylo_groupPeriodChilds
)
)
$
elems
groups
graph
=
relatedComponents
egos
--
|
update each group's branch id
-- update each group's branch id
in
map
(
\
ids
->
let
groups'
=
elems
$
restrictKeys
groups
(
Set
.
fromList
ids
)
bId
=
mergeBranchIds
$
map
(
\
g
->
snd
$
g
^.
phylo_groupBranchId
)
groups'
...
...
@@ -103,26 +103,26 @@ toNextLevel' phylo groups =
newGroups
=
concat
$
groupsToBranches'
$
fromList
$
map
(
\
g
->
(
getGroupId
g
,
g
))
$
foldlWithKey
(
\
acc
id
groups'
->
--
|
4) create the parent group
-- 4) create the parent group
let
parent
=
mergeGroups
(
elems
$
restrictKeys
(
phylo
^.
phylo_timeCooc
)
$
periodsToYears
[(
fst
.
fst
)
id
])
id
oldGroups
groups'
in
acc
++
[
parent
])
[]
--
|
3) group the current groups by parentId
-- 3) group the current groups by parentId
$
fromListWith
(
++
)
$
map
(
\
g
->
(
getLevelParentId
g
,
[
g
]))
groups
newPeriods
=
fromListWith
(
++
)
$
map
(
\
g
->
(
g
^.
phylo_groupPeriod
,
[
g
]))
newGroups
in
traceSynchronyEnd
$
over
(
phylo_periods
.
traverse
.
phylo_periodLevels
.
traverse
--
|
6) update each period at curLvl + 1
-- 6) update each period at curLvl + 1
.
filtered
(
\
phyloLvl
->
phyloLvl
^.
phylo_levelLevel
==
(
curLvl
+
1
)))
--
|
7) by adding the parents
-- 7) by adding the parents
(
\
phyloLvl
->
if
member
(
phyloLvl
^.
phylo_levelPeriod
)
newPeriods
then
phyloLvl
&
phylo_levelGroups
.~
fromList
(
map
(
\
g
->
(
getGroupId
g
,
g
))
$
newPeriods
!
(
phyloLvl
^.
phylo_levelPeriod
))
else
phyloLvl
)
--
|
2) add the curLvl + 1 phyloLevel to the phylo
-- 2) add the curLvl + 1 phyloLevel to the phylo
$
addPhyloLevel
(
curLvl
+
1
)
--
|
1) update the current groups (with level parent pointers) in the phylo
-- 1) update the current groups (with level parent pointers) in the phylo
$
updatePhyloGroups
curLvl
(
fromList
$
map
(
\
g
->
(
getGroupId
g
,
g
))
groups
)
phylo
--------------------
...
...
@@ -187,19 +187,19 @@ toParentId child = ((child ^. phylo_groupPeriod, child ^. phylo_groupLevel + 1),
reduceGroups
::
Proximity
->
Synchrony
->
Map
Date
Double
->
Map
Date
Cooc
->
[
PhyloGroup
]
->
[
PhyloGroup
]
reduceGroups
prox
sync
docs
diagos
branch
=
--
|
1) reduce a branch as a set of periods & groups
-- 1) reduce a branch as a set of periods & groups
let
periods
=
fromListWith
(
++
)
$
map
(
\
g
->
(
g
^.
phylo_groupPeriod
,[
g
]))
branch
in
(
concat
.
concat
.
elems
)
$
mapWithKey
(
\
prd
groups
->
--
|
2) for each period, transform the groups as a proximity graph filtered by a threshold
-- 2) for each period, transform the groups as a proximity graph filtered by a threshold
let
diago
=
reduceDiagos
$
filterDiago
diagos
[
prd
]
edges
=
groupsToEdges
prox
sync
((
sum
.
elems
)
$
restrictKeys
docs
$
periodsToYears
[
prd
])
diago
groups
in
map
(
\
comp
->
--
|
4) add to each groups their futur level parent group
-- 4) add to each groups their futur level parent group
let
parentId
=
toParentId
(
head'
"parentId"
comp
)
in
map
(
\
g
->
g
&
phylo_groupLevelParents
%~
(
++
[(
parentId
,
1
)])
)
comp
)
--
|
3) reduce the graph a a set of related components
-- 3) reduce the graph a a set of related components
$
toRelatedComponents
groups
edges
)
periods
...
...
@@ -251,4 +251,4 @@ synchronicClustering phylo =
-- <> "\n"
-- ) "" edges
-- ) "" $ elems $ groupByField _phylo_groupPeriod branch)
-- ) "period\tdistance\n" $ elems $ groupByField _phylo_groupBranchId $ getGroupsFromLevel lvl phylo
\ No newline at end of file
-- ) "period\tdistance\n" $ elems $ groupByField _phylo_groupBranchId $ getGroupsFromLevel lvl phylo
src/Gargantext/Viz/Phylo/TemporalMatching.hs
View file @
19071e4b
...
...
@@ -117,7 +117,7 @@ makePairs' (egoId, egoNgrams) candidates periods oldPointers fil thr prox docs d
if
(
null
periods
)
then
[]
else
removeOldPointers
oldPointers
fil
thr
prox
lastPrd
-- | at least on of the pair candidates should be from the last added period
{- at least on of the pair candidates should be from the last added period -}
$
filter
(
\
((
id
,
_
),(
id'
,
_
))
->
((
fst
.
fst
)
id
==
lastPrd
)
||
((
fst
.
fst
)
id'
==
lastPrd
))
$
listToKeys
$
filter
(
\
(
id
,
ngrams
)
->
...
...
@@ -143,36 +143,36 @@ phyloGroupMatching :: [[(PhyloGroupId,[Int])]] -> Filiation -> Proximity -> Map
->
Double
->
[
Pointer
]
->
(
PhyloGroupId
,[
Int
])
->
[
Pointer
]
phyloGroupMatching
candidates
fil
proxi
docs
diagos
thr
oldPointers
(
id
,
ngrams
)
=
if
(
null
$
filterPointers
proxi
thr
oldPointers
)
-- | let's find new pointers
{- let's find new pointers -}
then
if
null
nextPointers
then
[]
else
head'
"phyloGroupMatching"
-- | Keep only the best set of pointers grouped by proximity
{- Keep only the best set of pointers grouped by proximity -}
$
groupBy
(
\
pt
pt'
->
snd
pt
==
snd
pt'
)
$
reverse
$
sortOn
snd
$
head'
"pointers"
nextPointers
-- | Find the first time frame where at leats one pointer satisfies the proximity threshold
{- Find the first time frame where at leats one pointer satisfies the proximity threshold -}
else
oldPointers
where
nextPointers
::
[[
Pointer
]]
nextPointers
=
take
1
$
dropWhile
(
null
)
-- | for each time frame, process the proximity on relevant pairs of targeted groups
{- for each time frame, process the proximity on relevant pairs of targeted groups -}
$
scanl
(
\
acc
groups
->
let
periods
=
nub
$
map
(
fst
.
fst
.
fst
)
$
concat
groups
nbdocs
=
sum
$
elems
$
(
filterDocs
docs
([(
fst
.
fst
)
id
]
++
periods
))
diago
=
reduceDiagos
$
filterDiago
diagos
([(
fst
.
fst
)
id
]
++
periods
)
-- | important resize nbdocs et diago dans le make pairs
{- important resize nbdocs et diago dans le make pairs -}
pairs
=
makePairs'
(
id
,
ngrams
)
(
concat
groups
)
periods
oldPointers
fil
thr
proxi
docs
diagos
in
acc
++
(
filterPointers
proxi
thr
$
concat
$
map
(
\
(
c
,
c'
)
->
-- | process the proximity between the current group and a pair of candidates
{- process the proximity between the current group and a pair of candidates -}
let
proximity
=
toProximity
nbdocs
diago
proxi
ngrams
(
snd
c
)
(
snd
c'
)
in
if
(
c
==
c'
)
then
[(
fst
c
,
proximity
)]
else
[(
fst
c
,
proximity
),(
fst
c'
,
proximity
)]
)
pairs
))
[]
$
inits
candidates
-- | groups from [[1900],[1900,1901],[1900,1901,1902],...]
$
inits
candidates
{- groups from [[1900],[1900,1901],[1900,1901,1902],...] -}
filterDocs
::
Map
Date
Double
->
[
PhyloPeriodId
]
->
Map
Date
Double
...
...
@@ -205,19 +205,19 @@ matchGroupsToGroups :: Int -> [PhyloPeriodId] -> Proximity -> Double -> Map Date
matchGroupsToGroups
frame
periods
proximity
thr
docs
coocs
groups
=
let
groups'
=
groupByField
_phylo_groupPeriod
groups
in
foldl'
(
\
acc
prd
->
let
--
|
1) find the parents/childs matching periods
let
-- 1) find the parents/childs matching periods
periodsPar
=
getNextPeriods
ToParents
frame
prd
periods
periodsChi
=
getNextPeriods
ToChilds
frame
prd
periods
--
|
2) find the parents/childs matching candidates
-- 2) find the parents/childs matching candidates
candidatesPar
=
map
(
\
prd'
->
map
(
\
g
->
(
getGroupId
g
,
g
^.
phylo_groupNgrams
))
$
findWithDefault
[]
prd'
groups'
)
periodsPar
candidatesChi
=
map
(
\
prd'
->
map
(
\
g
->
(
getGroupId
g
,
g
^.
phylo_groupNgrams
))
$
findWithDefault
[]
prd'
groups'
)
periodsChi
--
|
3) find the parents/child number of docs by years
-- 3) find the parents/child number of docs by years
docsPar
=
filterDocs
docs
([
prd
]
++
periodsPar
)
docsChi
=
filterDocs
docs
([
prd
]
++
periodsChi
)
--
|
4) find the parents/child diago by years
-- 4) find the parents/child diago by years
diagoPar
=
filterDiago
(
map
coocToDiago
coocs
)
([
prd
]
++
periodsPar
)
diagoChi
=
filterDiago
(
map
coocToDiago
coocs
)
([
prd
]
++
periodsPar
)
--
|
5) match in parallel all the groups (egos) to their possible candidates
-- 5) match in parallel all the groups (egos) to their possible candidates
egos
=
map
(
\
ego
->
let
pointersPar
=
phyloGroupMatching
(
getCandidates
ego
candidatesPar
)
ToParents
proximity
docsPar
diagoPar
thr
(
getPeriodPointers
ToParents
ego
)
(
getGroupId
ego
,
ego
^.
phylo_groupNgrams
)
...
...
@@ -272,19 +272,19 @@ toPhyloQuality' beta freq branches =
groupsToBranches
::
Map
PhyloGroupId
PhyloGroup
->
[[
PhyloGroup
]]
groupsToBranches
groups
=
--
|
run the related component algorithm
-- run the related component algorithm
let
egos
=
groupBy
(
\
gs
gs'
->
(
fst
$
fst
$
head'
"egos"
gs
)
==
(
fst
$
fst
$
head'
"egos"
gs'
))
$
sortOn
(
\
gs
->
fst
$
fst
$
head'
"egos"
gs
)
$
map
(
\
group
->
[
getGroupId
group
]
++
(
map
fst
$
group
^.
phylo_groupPeriodParents
)
++
(
map
fst
$
group
^.
phylo_groupPeriodChilds
)
)
$
elems
groups
--
|
first find the related components by inside each ego's period
--
|
a supprimer
-- first find the related components by inside each ego's period
-- a supprimer
graph'
=
map
relatedComponents
egos
--
|
then run it for the all the periods
-- then run it for the all the periods
graph
=
zip
[
1
..
]
$
relatedComponents
$
concat
(
graph'
`
using
`
parList
rdeepseq
)
--
|
update each group's branch id
-- update each group's branch id
in
map
(
\
(
bId
,
ids
)
->
let
groups'
=
map
(
\
group
->
group
&
phylo_groupBranchId
%~
(
\
(
lvl
,
lst
)
->
(
lvl
,
lst
++
[
bId
])))
$
elems
$
restrictKeys
groups
(
Set
.
fromList
ids
)
...
...
@@ -300,14 +300,14 @@ updateThr thr branches = map (\b -> map (\g ->
g
&
phylo_groupMeta
.~
(
singleton
"seaLevels"
(((
g
^.
phylo_groupMeta
)
!
"seaLevels"
)
++
[
thr
])))
b
)
branches
--
|
Sequentially break each branch of a phylo where
-- Sequentially break each branch of a phylo where
-- done = all the allready broken branches
-- ego = the current branch we want to break
-- rest = the branches we still have to break
breakBranches
::
Proximity
->
Double
->
Map
Int
Double
->
Int
->
Double
->
Double
->
Double
->
Int
->
Map
Date
Double
->
Map
Date
Cooc
->
[
PhyloPeriodId
]
->
[([
PhyloGroup
],
Bool
)]
->
([
PhyloGroup
],
Bool
)
->
[([
PhyloGroup
],
Bool
)]
->
[([
PhyloGroup
],
Bool
)]
breakBranches
proximity
beta
frequency
minBranch
thr
depth
elevation
frame
docs
coocs
periods
done
ego
rest
=
--
|
1) keep or not the new division of ego
-- 1) keep or not the new division of ego
let
done'
=
done
++
(
if
snd
ego
then
(
if
((
null
(
fst
ego'
))
||
(
quality
>
quality'
))
...
...
@@ -325,7 +325,7 @@ breakBranches proximity beta frequency minBranch thr depth elevation frame docs
((
map
(
\
e
->
(
e
,
True
))
(
fst
ego'
))
++
(
map
(
\
e
->
(
e
,
False
))
(
snd
ego'
))))
else
[
ego
])
in
--
|
2) if there is no more branches in rest then return else continue
-- 2) if there is no more branches in rest then return else continue
if
null
rest
then
done'
else
breakBranches
proximity
beta
frequency
minBranch
thr
depth
elevation
frame
docs
coocs
periods
...
...
@@ -352,11 +352,11 @@ breakBranches proximity beta frequency minBranch thr depth elevation frame docs
seaLevelMatching
::
Proximity
->
Double
->
Int
->
Map
Int
Double
->
Double
->
Double
->
Double
->
Double
->
Int
->
[
PhyloPeriodId
]
->
Map
Date
Double
->
Map
Date
Cooc
->
[([
PhyloGroup
],
Bool
)]
->
[([
PhyloGroup
],
Bool
)]
seaLevelMatching
proximity
beta
minBranch
frequency
thr
step
depth
elevation
frame
periods
docs
coocs
branches
=
--
|
if there is no branch to break or if seaLvl level > 1 then end
-- if there is no branch to break or if seaLvl level > 1 then end
if
(
thr
>=
1
)
||
((
not
.
or
)
$
map
snd
branches
)
then
branches
else
--
|
break all the possible branches at the current seaLvl level
-- break all the possible branches at the current seaLvl level
let
branches'
=
breakBranches
proximity
beta
frequency
minBranch
thr
depth
elevation
frame
docs
coocs
periods
[]
(
head'
"seaLevelMatching"
branches
)
(
tail'
"seaLevelMatching"
branches
)
frequency'
=
reduceFrequency
frequency
(
map
fst
branches'
)
...
...
@@ -368,7 +368,7 @@ constanteTemporalMatching start step phylo = updatePhyloGroups 1
(
fromList
$
map
(
\
g
->
(
getGroupId
g
,
g
))
$
traceMatchEnd
$
concat
branches
)
phylo
where
--
|
2) process the temporal matching by elevating seaLvl level
-- 2) process the temporal matching by elevating seaLvl level
branches
::
[[
PhyloGroup
]]
branches
=
map
fst
$
seaLevelMatching
(
phyloProximity
$
getConfig
phylo
)
...
...
@@ -383,8 +383,8 @@ constanteTemporalMatching start step phylo = updatePhyloGroups 1
(
phylo
^.
phylo_timeDocs
)
(
phylo
^.
phylo_timeCooc
)
groups
--
|
1) for each group process an initial temporal Matching
--
|
here we suppose that all the groups of level 1 are part of the same big branch
-- 1) for each group process an initial temporal Matching
-- here we suppose that all the groups of level 1 are part of the same big branch
groups
::
[([
PhyloGroup
],
Bool
)]
groups
=
map
(
\
b
->
(
b
,(
length
$
nub
$
map
_phylo_groupPeriod
b
)
>=
(
_qua_minBranch
$
phyloQuality
$
getConfig
phylo
)))
$
groupsToBranches
$
fromList
$
map
(
\
g
->
(
getGroupId
g
,
g
))
...
...
@@ -441,7 +441,7 @@ adaptativeBreakBranches :: Proximity -> Double -> Double -> Map (PhyloGroupId,Ph
->
[
PhyloPeriodId
]
->
[([
PhyloGroup
],(
Bool
,[
Double
]))]
->
([
PhyloGroup
],(
Bool
,[
Double
]))
->
[([
PhyloGroup
],(
Bool
,[
Double
]))]
->
[([
PhyloGroup
],(
Bool
,[
Double
]))]
adaptativeBreakBranches
proxiConf
depth
elevation
groupsProxi
beta
frequency
minBranch
frame
docs
coocs
periods
done
ego
rest
=
--
|
1) keep or not the new division of ego
-- 1) keep or not the new division of ego
let
done'
=
done
++
(
if
(
fst
.
snd
)
ego
then
(
if
((
null
(
fst
ego'
))
||
(
quality
>
quality'
))
then
...
...
@@ -451,13 +451,13 @@ adaptativeBreakBranches proxiConf depth elevation groupsProxi beta frequency min
++
(
map
(
\
e
->
(
e
,(
False
,
((
snd
.
snd
)
ego
))))
(
snd
ego'
))))
else
[(
concat
$
thrToMeta
thr
$
[
fst
ego
],
snd
ego
)])
in
--
|
uncomment let .. in for debugging
-- uncomment let .. in for debugging
-- let part1 = partition (snd) done'
-- part2 = partition (snd) rest
-- in trace ( "[✓ " <> show(length $ fst part1) <> "(" <> show(length $ concat $ map (fst) $ fst part1) <> ")|✗ " <> show(length $ snd part1) <> "(" <> show(length $ concat $ map (fst) $ snd part1) <> ")] "
-- <> "[✓ " <> show(length $ fst part2) <> "(" <> show(length $ concat $ map (fst) $ fst part2) <> ")|✗ " <> show(length $ snd part2) <> "(" <> show(length $ concat $ map (fst) $ snd part2) <> ")]"
-- ) $
--
|
2) if there is no more branches in rest then return else continue
-- 2) if there is no more branches in rest then return else continue
if
null
rest
then
done'
else
adaptativeBreakBranches
proxiConf
depth
elevation
groupsProxi
beta
frequency
minBranch
frame
docs
coocs
periods
...
...
@@ -489,11 +489,11 @@ adaptativeSeaLevelMatching :: Proximity -> Double -> Double -> Map (PhyloGroupId
->
Int
->
[
PhyloPeriodId
]
->
Map
Date
Double
->
Map
Date
Cooc
->
[([
PhyloGroup
],(
Bool
,[
Double
]))]
->
[([
PhyloGroup
],(
Bool
,[
Double
]))]
adaptativeSeaLevelMatching
proxiConf
depth
elevation
groupsProxi
beta
minBranch
frequency
frame
periods
docs
coocs
branches
=
--
|
if there is no branch to break or if seaLvl level >= depth then end
-- if there is no branch to break or if seaLvl level >= depth then end
if
(
Map
.
null
groupsProxi
)
||
(
depth
<=
0
)
||
((
not
.
or
)
$
map
(
fst
.
snd
)
branches
)
then
branches
else
--
|
break all the possible branches at the current seaLvl level
-- break all the possible branches at the current seaLvl level
let
branches'
=
adaptativeBreakBranches
proxiConf
depth
elevation
groupsProxi
beta
frequency
minBranch
frame
docs
coocs
periods
[]
(
head'
"seaLevelMatching"
branches
)
(
tail'
"seaLevelMatching"
branches
)
frequency'
=
reduceFrequency
frequency
(
map
fst
branches'
)
...
...
@@ -511,7 +511,7 @@ adaptativeTemporalMatching elevation phylo = updatePhyloGroups 1
(
fromList
$
map
(
\
g
->
(
getGroupId
g
,
g
))
$
traceMatchEnd
$
concat
branches
)
phylo
where
--
|
2) process the temporal matching by elevating seaLvl level
-- 2) process the temporal matching by elevating seaLvl level
branches
::
[[
PhyloGroup
]]
branches
=
map
fst
$
adaptativeSeaLevelMatching
(
phyloProximity
$
getConfig
phylo
)
...
...
@@ -526,8 +526,8 @@ adaptativeTemporalMatching elevation phylo = updatePhyloGroups 1
(
phylo
^.
phylo_timeDocs
)
(
phylo
^.
phylo_timeCooc
)
groups
--
|
1) for each group process an initial temporal Matching
--
|
here we suppose that all the groups of level 1 are part of the same big branch
-- 1) for each group process an initial temporal Matching
-- here we suppose that all the groups of level 1 are part of the same big branch
groups
::
[([
PhyloGroup
],(
Bool
,[
Double
]))]
groups
=
map
(
\
b
->
(
b
,((
length
$
nub
$
map
_phylo_groupPeriod
b
)
>=
(
_qua_minBranch
$
phyloQuality
$
getConfig
phylo
),[
thr
])))
$
groupsToBranches
$
fromList
$
map
(
\
g
->
(
getGroupId
g
,
g
))
...
...
stack.yaml
View file @
19071e4b
...
...
@@ -58,7 +58,6 @@ extra-deps:
commit
:
308c74b71a1abb0a91546fa57d353131248e3a7f
-
Unique-0.4.7.6@sha256:a1ff411f4d68c756e01e8d532fbe8e57f1ac77f2cc0ee8a999770be2bca185c5,2723
-
KMP-0.1.0.2
-
accelerate-1.2.0.1
-
aeson-lens-0.5.0.0
-
deepseq-th-0.1.0.4
-
duckling-0.1.3.0
...
...
@@ -84,3 +83,7 @@ extra-deps:
-
password-2.0.1.1
-
base64-0.4.2@sha256:e9523e18bdadc3cab9dc32dfe3ac09c718fe792076326d6d353437b8b255cb5b,2888
-
ghc-byteorder-4.11.0.0.10@sha256:5ee4a907279bfec27b0f9de7b8fba4cecfd34395a0235a7784494de70ad4e98f,1535
# Matrix Computation
-
accelerate-1.2.0.1
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment