Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
150
Issues
150
List
Board
Labels
Milestones
Merge Requests
4
Merge Requests
4
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
eea8b990
Commit
eea8b990
authored
Jun 28, 2019
by
qlobbe
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
some refactoring & adding generality, specifivity, etc for the ngrams
parent
8bdafc9f
Pipeline
#500
canceled with stage
Changes
15
Pipelines
1
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
15 changed files
with
160 additions
and
820 deletions
+160
-820
Main.hs
bin/gargantext-phylo/Main.hs
+5
-9
Phylo.hs
src/Gargantext/Viz/Phylo.hs
+1
-0
API.hs
src/Gargantext/Viz/Phylo/API.hs
+2
-1
Cluster.hs
src/Gargantext/Viz/Phylo/Aggregates/Cluster.hs
+0
-123
Cooc.hs
src/Gargantext/Viz/Phylo/Aggregates/Cooc.hs
+0
-134
Document.hs
src/Gargantext/Viz/Phylo/Aggregates/Document.hs
+0
-64
Fis.hs
src/Gargantext/Viz/Phylo/Aggregates/Fis.hs
+0
-122
BranchMaker.hs
src/Gargantext/Viz/Phylo/BranchMaker.hs
+4
-3
Example.hs
src/Gargantext/Viz/Phylo/Example.hs
+23
-36
LevelMaker.hs
src/Gargantext/Viz/Phylo/LevelMaker.hs
+103
-164
LinkMaker.hs
src/Gargantext/Viz/Phylo/LinkMaker.hs
+2
-2
Clustering.hs
src/Gargantext/Viz/Phylo/Metrics/Clustering.hs
+0
-50
Proximity.hs
src/Gargantext/Viz/Phylo/Metrics/Proximity.hs
+0
-104
Tools.hs
src/Gargantext/Viz/Phylo/Tools.hs
+7
-1
Taggers.hs
src/Gargantext/Viz/Phylo/View/Taggers.hs
+13
-7
No files found.
bin/gargantext-phylo/Main.hs
View file @
eea8b990
...
...
@@ -17,7 +17,7 @@ Phylo binaries
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE StandaloneDeriving #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE TypeOperators
#-}
{-# LANGUAGE Strict #-}
module
Main
where
...
...
@@ -25,7 +25,7 @@ module Main where
import
System.Directory
(
doesFileExist
)
import
Data.Aeson
import
Data.Text
(
Text
,
unwords
)
import
Data.Text
(
Text
,
unwords
,
unlines
)
import
Data.List
((
++
))
import
GHC.Generics
import
GHC.IO
(
FilePath
)
...
...
@@ -198,17 +198,13 @@ main = do
corpus
<-
parse
(
corpusType
conf
)
(
limit
conf
)
(
corpusPath
conf
)
termList
putStrLn
$
(
"
\n
"
<>
show
(
length
corpus
)
<>
" parsed docs"
)
let
roots
=
DL
.
nub
$
DL
.
concat
$
map
text
corpus
putStrLn
$
(
"
\n
"
<>
show
(
length
roots
)
<>
" parsed foundation roots"
)
putStrLn
$
(
"
\n
"
<>
show
(
length
corpus
)
<>
" parsed docs"
)
fis
<-
parseFis
(
fisPath
conf
)
(
phyloName
conf
)
(
timeGrain
conf
)
(
timeStep
conf
)
(
fisSupport
conf
)
(
fisClique
conf
)
putStrLn
$
(
"
\n
"
<>
show
(
length
fis
)
<>
" parsed fis"
)
let
mFis
=
DM
.
fromListWith
(
++
)
$
DL
.
sortOn
(
fst
.
fst
)
$
map
(
\
f
->
(
getFisPeriod
f
,[
f
]))
fis
let
fis'
=
DM
.
fromListWith
(
++
)
$
DL
.
sortOn
(
fst
.
fst
)
$
map
(
\
f
->
(
getFisPeriod
f
,[
f
]))
fis
let
query
=
PhyloQueryBuild
(
phyloName
conf
)
""
(
timeGrain
conf
)
(
timeStep
conf
)
(
Fis
$
FisParams
True
(
fisSupport
conf
)
(
fisClique
conf
))
[]
[]
(
WeightedLogJaccard
$
WLJParams
(
timeTh
conf
)
(
timeSens
conf
))
(
timeFrame
conf
)
(
timeFrameTh
conf
)
...
...
@@ -217,7 +213,7 @@ main = do
let
queryView
=
PhyloQueryView
(
viewLevel
conf
)
Merge
False
1
[
BranchAge
]
[
SizeBranch
$
SBParams
(
minSizeBranch
conf
)]
[
BranchPeakFreq
,
GroupLabelCooc
]
(
Just
(
ByBranchAge
,
Asc
))
Json
Flat
True
let
phylo
=
toPhylo
query
corpus
roots
termList
mFis
let
phylo
=
toPhylo
query
corpus
termList
fis'
writeFis
(
fisPath
conf
)
(
phyloName
conf
)
(
timeGrain
conf
)
(
timeStep
conf
)
(
fisSupport
conf
)
(
fisClique
conf
)
(
getPhyloFis
phylo
)
...
...
src/Gargantext/Viz/Phylo.hs
View file @
eea8b990
...
...
@@ -153,6 +153,7 @@ data PhyloGroup =
PhyloGroup
{
_phylo_groupId
::
PhyloGroupId
,
_phylo_groupLabel
::
Text
,
_phylo_groupNgrams
::
[
Int
]
,
_phylo_groupNgramsMeta
::
Map
Text
[
Double
]
,
_phylo_groupMeta
::
Map
Text
Double
,
_phylo_groupBranchId
::
Maybe
PhyloBranchId
,
_phylo_groupCooc
::
Map
(
Int
,
Int
)
Double
...
...
src/Gargantext/Viz/Phylo/API.hs
View file @
eea8b990
...
...
@@ -32,6 +32,7 @@ import Gargantext.API.Types
import
Gargantext.Database.Types.Node
(
PhyloId
,
ListId
,
CorpusId
)
import
Gargantext.Prelude
import
Gargantext.Viz.Phylo
import
Gargantext.Viz.Phylo.Aggregates
import
Gargantext.Viz.Phylo.Example
import
Gargantext.Viz.Phylo.Tools
import
Gargantext.Viz.Phylo.View.ViewMaker
...
...
@@ -105,7 +106,7 @@ postPhylo _n _lId q = do
vrs
=
Just
(
"1"
::
Text
)
sft
=
Just
(
Software
"Gargantext"
"4"
)
prm
=
initPhyloParam
vrs
sft
(
Just
q
)
pure
(
toPhyloBase
q
prm
corpus
actants
termList
empty
)
pure
(
toPhyloBase
q
prm
(
parseDocs
(
initFoundationsRoots
actants
)
corpus
)
termList
empty
)
------------------------------------------------------------------------
...
...
src/Gargantext/Viz/Phylo/Aggregates/Cluster.hs
deleted
100644 → 0
View file @
8bdafc9f
{-|
Module : Gargantext.Viz.Phylo.Tools
Description : Phylomemy Tools to build/manage it
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TemplateHaskell #-}
module
Gargantext.Viz.Phylo.Aggregates.Cluster
where
import
Control.Parallel.Strategies
import
Data.List
(
null
,
concat
,
sort
,
intersect
,(
++
))
import
Data.Map
(
Map
)
import
Data.Tuple
(
fst
)
import
Gargantext.Prelude
import
Gargantext.Viz.Phylo
import
Gargantext.Viz.Phylo.Tools
import
Gargantext.Viz.Phylo.Metrics.Proximity
import
Gargantext.Viz.Phylo.Metrics.Clustering
import
Gargantext.Viz.Phylo.LinkMaker
import
qualified
Data.Map
as
Map
import
qualified
Data.Vector.Storable
as
VS
import
Debug.Trace
(
trace
)
import
Numeric.Statistics
(
percentile
)
-- | Optimisation to filter only relevant candidates
getCandidates
::
[
PhyloGroup
]
->
[(
PhyloGroup
,
PhyloGroup
)]
getCandidates
gs
=
filter
(
\
(
g
,
g'
)
->
(
not
.
null
)
$
intersect
(
getGroupNgrams
g
)
(
getGroupNgrams
g'
))
$
filter
(
\
(
g
,
g'
)
->
g
/=
g'
)
$
listToDirectedCombi
gs
-- | To transform a Graph into Clusters
graphToClusters
::
Cluster
->
GroupGraph
->
[
PhyloCluster
]
graphToClusters
clust
(
nodes
,
edges
)
=
case
clust
of
Louvain
(
LouvainParams
_
)
->
undefined
RelatedComponents
(
RCParams
_
)
->
relatedComp
$
((
map
(
\
((
g
,
g'
),
_
)
->
[
g
,
g'
])
edges
)
++
(
map
(
\
g
->
[
g
])
nodes
))
_
->
panic
"[ERR][Viz.Phylo.Aggregates.Cluster.graphToClusters] not implemented"
-- | To transform a list of PhyloGroups into a Graph of Proximity
groupsToGraph
::
Double
->
Proximity
->
[
PhyloGroup
]
->
([
GroupNode
],[
GroupEdge
])
groupsToGraph
nbDocs
prox
gs
=
case
prox
of
WeightedLogJaccard
(
WLJParams
_
sens
)
->
(
gs
,
let
candidates
=
map
(
\
(
x
,
y
)
->
((
x
,
y
),
weightedLogJaccard
sens
nbDocs
(
getGroupCooc
x
)
(
getGroupCooc
y
)
(
getGroupNgrams
x
)
(
getGroupNgrams
y
)))
$
getCandidates
gs
candidates'
=
candidates
`
using
`
parList
rdeepseq
in
candidates'
)
Hamming
(
HammingParams
_
)
->
(
gs
,
map
(
\
(
x
,
y
)
->
((
x
,
y
),
hamming
(
getGroupCooc
x
)
(
getGroupCooc
y
)))
$
getCandidates
gs
)
_
->
undefined
-- | To filter a Graph of Proximity using a given threshold
filterGraph
::
Proximity
->
([
GroupNode
],[
GroupEdge
])
->
([
GroupNode
],[
GroupEdge
])
filterGraph
prox
(
ns
,
es
)
=
case
prox
of
WeightedLogJaccard
(
WLJParams
thr
_
)
->
(
ns
,
filter
(
\
(
_
,
v
)
->
v
>=
thr
)
es
)
Hamming
(
HammingParams
thr
)
->
(
ns
,
filter
(
\
(
_
,
v
)
->
v
<=
thr
)
es
)
_
->
undefined
-- | To clusterise a Phylo
phyloToClusters
::
Level
->
Cluster
->
Phylo
->
Map
(
Date
,
Date
)
[
PhyloCluster
]
phyloToClusters
lvl
clus
p
=
Map
.
fromList
$
zip
periods
$
map
(
\
g
->
if
null
(
fst
g
)
then
[]
else
graphToClusters
clus
g
)
graphs'
where
--------------------------------------
graphs'
::
[([
GroupNode
],[
GroupEdge
])]
graphs'
=
traceGraphFiltered
lvl
$
map
(
\
g
->
filterGraph
prox
g
)
graphs
--------------------------------------
graphs
::
[([
GroupNode
],[
GroupEdge
])]
graphs
=
traceGraph
lvl
(
getThreshold
prox
)
$
let
gs
=
map
(
\
prd
->
groupsToGraph
(
periodsToNbDocs
[
prd
]
p
)
prox
(
getGroupsWithFilters
lvl
prd
p
))
periods
gs'
=
gs
`
using
`
parList
rdeepseq
in
gs'
--------------------------------------
prox
::
Proximity
prox
=
getProximity
clus
--------------------------------------
periods
::
[
PhyloPeriodId
]
periods
=
getPhyloPeriods
p
--------------------------------------
----------------
-- | Tracer | --
----------------
traceGraph
::
Level
->
Double
->
[([
GroupNode
],[
GroupEdge
])]
->
[([
GroupNode
],[
GroupEdge
])]
traceGraph
lvl
thr
g
=
trace
(
"----
\n
Unfiltered clustering in Phylo"
<>
show
(
lvl
)
<>
" :
\n
"
<>
"count : "
<>
show
(
length
lst
)
<>
" potential edges ("
<>
show
(
length
$
filter
(
>=
thr
)
lst
)
<>
" >= "
<>
show
(
thr
)
<>
")
\n
"
<>
"similarity : "
<>
show
(
percentile
25
(
VS
.
fromList
lst
))
<>
" (25%) "
<>
show
(
percentile
50
(
VS
.
fromList
lst
))
<>
" (50%) "
<>
show
(
percentile
75
(
VS
.
fromList
lst
))
<>
" (75%) "
<>
show
(
percentile
90
(
VS
.
fromList
lst
))
<>
" (90%)
\n
"
)
g
where
lst
=
sort
$
map
snd
$
concat
$
map
snd
g
traceGraphFiltered
::
Level
->
[([
GroupNode
],[
GroupEdge
])]
->
[([
GroupNode
],[
GroupEdge
])]
traceGraphFiltered
lvl
g
=
trace
(
"----
\n
Clustering in Phylo"
<>
show
(
lvl
)
<>
" :
\n
"
<>
"count : "
<>
show
(
length
lst
)
<>
" edges
\n
"
<>
"similarity : "
<>
show
(
percentile
25
(
VS
.
fromList
lst
))
<>
" (25%) "
<>
show
(
percentile
50
(
VS
.
fromList
lst
))
<>
" (50%) "
<>
show
(
percentile
75
(
VS
.
fromList
lst
))
<>
" (75%) "
<>
show
(
percentile
90
(
VS
.
fromList
lst
))
<>
" (90%)
\n
"
)
g
where
lst
=
sort
$
map
snd
$
concat
$
map
snd
g
src/Gargantext/Viz/Phylo/Aggregates/Cooc.hs
deleted
100644 → 0
View file @
8bdafc9f
{-|
Module : Gargantext.Viz.Phylo.Tools
Description : Phylomemy Tools to build/manage it
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
module
Gargantext.Viz.Phylo.Aggregates.Cooc
where
import
Data.List
(
union
,
concat
,
nub
,
sort
,
sortOn
)
import
Data.Map
(
Map
,
elems
,
adjust
,
filterWithKey
,
fromListWith
,
fromList
,
restrictKeys
)
import
Data.Set
(
Set
)
import
Data.Vector
(
Vector
)
import
Gargantext.Prelude
import
Gargantext.Viz.Phylo
import
Gargantext.Viz.Phylo.Tools
import
qualified
Data.Map
as
Map
import
qualified
Data.Set
as
Set
-- import Debug.Trace (trace)
-- | To transform the Fis into a full coocurency Matrix in a Phylo
fisToCooc
::
Map
(
Date
,
Date
)
[
PhyloFis
]
->
Phylo
->
Map
(
Int
,
Int
)
Double
fisToCooc
m
p
=
map
(
/
docs
)
$
foldl
(
\
mem
x
->
adjust
(
+
1
)
x
mem
)
cooc
$
concat
$
map
(
\
x
->
listToDirectedCombiWith
(
\
y
->
getIdxInRoots
y
p
)
$
(
Set
.
toList
.
getClique
)
x
)
$
(
concat
.
elems
)
m
where
--------------------------------------
fisNgrams
::
[
Ngrams
]
fisNgrams
=
foldl
(
\
mem
x
->
union
mem
$
(
Set
.
toList
.
getClique
)
x
)
[]
$
(
concat
.
elems
)
m
--------------------------------------
docs
::
Double
docs
=
fromIntegral
$
foldl
(
\
mem
x
->
mem
+
(
getSupport
x
))
0
$
(
concat
.
elems
)
m
--------------------------------------
cooc
::
Map
(
Int
,
Int
)
(
Double
)
cooc
=
Map
.
fromList
$
map
(
\
x
->
(
x
,
0
))
(
listToDirectedCombiWith
(
\
y
->
getIdxInRoots
y
p
)
fisNgrams
)
--------------------------------------
-- | To transform a tuple of group's information into a coocurency Matrix
toCooc
::
[([
Int
],
Double
)]
->
Map
(
Int
,
Int
)
Double
toCooc
l
=
map
(
/
docs
)
$
foldl
(
\
mem
x
->
adjust
(
+
1
)
x
mem
)
cooc
$
concat
$
map
(
\
x
->
listToFullCombi
$
fst
x
)
l
where
--------------------------------------
idx
::
[
Int
]
idx
=
nub
$
concat
$
map
fst
l
--------------------------------------
docs
::
Double
docs
=
sum
$
map
snd
l
--------------------------------------
cooc
::
Map
(
Int
,
Int
)
(
Double
)
cooc
=
Map
.
fromList
$
map
(
\
x
->
(
x
,
0
))
$
listToFullCombi
idx
--------------------------------------
-- | To reduce a coocurency Matrix to some keys
getSubCooc
::
[
Int
]
->
Map
(
Int
,
Int
)
Double
->
Map
(
Int
,
Int
)
Double
getSubCooc
idx
cooc
=
filterWithKey
(
\
k
_
->
(
elem
(
fst
k
)
idx
)
&&
(
elem
(
snd
k
)
idx
))
cooc
-- | To get a coocurency Matrix related to a given list of Periods
getCooc
::
[
PhyloPeriodId
]
->
Phylo
->
Map
(
Int
,
Int
)
Double
getCooc
prds
p
=
toCooc
$
map
(
\
g
->
(
getGroupNgrams
g
,
getGroupMeta
"support"
g
))
gs
where
--------------------------------------
-- | Here we need to go back to the level 1 (aka : the Fis level)
gs
::
[
PhyloGroup
]
gs
=
filter
(
\
g
->
elem
(
getGroupPeriod
g
)
prds
)
$
getGroupsWithLevel
1
p
--------------------------------------
-- | To transform a list of index into a cooc matrix
listToCooc
::
[
Int
]
->
Map
(
Int
,
Int
)
Double
listToCooc
lst
=
fromList
$
map
(
\
combi
->
(
combi
,
1
))
$
listToFullCombi
lst
-- | To transform a list of ngrams into a list of indexes
ngramsToIdx
::
[
Ngrams
]
->
Vector
Ngrams
->
[
Int
]
ngramsToIdx
ns
v
=
sort
$
map
(
\
n
->
getIdxInVector
n
v
)
ns
-- | To build the cooc matrix by years out of the corpus
docsToCooc
::
[
Document
]
->
Vector
Ngrams
->
Map
Date
(
Map
(
Int
,
Int
)
Double
)
docsToCooc
docs
fdt
=
fromListWith
sumCooc
$
map
(
\
(
d
,
l
)
->
(
d
,
listToCooc
l
))
$
map
(
\
doc
->
(
date
doc
,
ngramsToIdx
(
text
doc
)
fdt
))
docs
-- | To sum all the docs produced during a list of years
sumDocsByYears
::
Set
Date
->
Map
Date
Double
->
Double
sumDocsByYears
years
m
=
sum
$
elems
$
restrictKeys
m
years
-- | To get the cooc matrix of a group
groupToCooc
::
PhyloGroup
->
Phylo
->
Map
(
Int
,
Int
)
Double
groupToCooc
g
p
=
getMiniCooc
(
listToFullCombi
$
getGroupNgrams
g
)
(
periodsToYears
[
getGroupPeriod
g
])
(
getPhyloCooc
p
)
-- | To get the union of the cooc matrix of two groups
unionOfCooc
::
PhyloGroup
->
PhyloGroup
->
Phylo
->
Map
(
Int
,
Int
)
Double
unionOfCooc
g
g'
p
=
sumCooc
(
groupToCooc
g
p
)
(
groupToCooc
g'
p
)
-- | To get the nth most occurent elems in a coocurency matrix
getNthMostOcc
::
Int
->
Map
(
Int
,
Int
)
Double
->
[
Int
]
getNthMostOcc
nth
cooc
=
(
nub
.
concat
)
$
map
(
\
((
idx
,
idx'
),
_
)
->
[
idx
,
idx'
])
$
take
nth
$
reverse
$
sortOn
snd
$
Map
.
toList
cooc
-- phyloCooc :: Map (Int, Int) Double
-- phyloCooc = fisToCooc phyloFis phylo1_0_1
src/Gargantext/Viz/Phylo/Aggregates/Document.hs
deleted
100644 → 0
View file @
8bdafc9f
{-|
Module : Gargantext.Viz.Phylo.Tools
Description : Phylomemy Tools to build/manage it
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
module
Gargantext.Viz.Phylo.Aggregates.Document
where
import
Data.Map
(
Map
,
fromListWith
)
import
Data.Text
(
Text
)
import
Data.Tuple
(
fst
)
import
Data.Vector
(
Vector
)
import
Gargantext.Prelude
import
Gargantext.Text.Terms.Mono
(
monoTexts
)
import
Gargantext.Viz.Phylo
import
qualified
Data.List
as
List
import
qualified
Data.Map
as
Map
import
qualified
Data.Vector
as
Vector
import
Debug.Trace
(
trace
)
-- | To init a list of Periods framed by a starting Date and an ending Date
initPeriods
::
(
Eq
date
,
Enum
date
)
=>
Grain
->
Step
->
(
date
,
date
)
->
[(
date
,
date
)]
initPeriods
g
s
(
start
,
end
)
=
map
(
\
l
->
(
head'
"Doc"
l
,
last'
"Doc"
l
))
$
chunkAlong
g
s
[
start
..
end
]
-- | To group a list of Documents by fixed periods
groupDocsByPeriod
::
(
Ord
date
,
Enum
date
)
=>
(
doc
->
date
)
->
[(
date
,
date
)]
->
[
doc
]
->
Map
(
date
,
date
)
[
doc
]
groupDocsByPeriod
_
_
[]
=
panic
"[ERR][Viz.Phylo.Example.docsToPeriods] Empty [Documents] can not have any periods"
groupDocsByPeriod
f
pds
es
=
trace
(
"----
\n
Group docs by periods
\n
"
)
$
Map
.
fromList
$
zip
pds
$
map
(
inPeriode
f
es
)
pds
where
--------------------------------------
inPeriode
::
Ord
b
=>
(
t
->
b
)
->
[
t
]
->
(
b
,
b
)
->
[
t
]
inPeriode
f'
h
(
start
,
end
)
=
fst
$
List
.
partition
(
\
d
->
f'
d
>=
start
&&
f'
d
<=
end
)
h
--------------------------------------
-- | To parse a list of Documents by filtering on a Vector of Ngrams
parseDocs
::
Vector
Ngrams
->
[(
Date
,
Text
)]
->
[
Document
]
parseDocs
roots
c
=
map
(
\
(
d
,
t
)
->
Document
d
(
filter
(
\
x
->
Vector
.
elem
x
roots
)
$
monoTexts
t
))
c
-- | To count the number of documents by year
countDocs
::
[(
Date
,
a
)]
->
Map
Date
Double
countDocs
corpus
=
fromListWith
(
+
)
$
map
(
\
(
d
,
_
)
->
(
d
,
1
))
corpus
src/Gargantext/Viz/Phylo/Aggregates/Fis.hs
deleted
100644 → 0
View file @
8bdafc9f
{-|
Module : Gargantext.Viz.Phylo.Tools
Description : Phylomemy Tools to build/manage it
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
module
Gargantext.Viz.Phylo.Aggregates.Fis
where
import
Control.Lens
hiding
(
makeLenses
,
both
,
Level
)
import
Data.List
(
null
,
concat
,
sort
,(
++
))
import
Data.Map
(
Map
,
elems
,
mapWithKey
,
unionWith
,
fromList
,
keys
)
import
Data.Tuple
(
fst
,
snd
)
import
Data.Set
(
size
)
import
Gargantext.Prelude
import
Gargantext.Text.Metrics.FrequentItemSet
(
fisWithSizePolyMap
,
Size
(
..
))
import
Gargantext.Viz.Phylo
import
Gargantext.Viz.Phylo.Tools
import
qualified
Data.Map
as
Map
import
qualified
Data.Set
as
Set
import
qualified
Data.Vector.Storable
as
Vector
import
Numeric.Statistics
(
percentile
)
import
Debug.Trace
(
trace
)
-- | To apply a filter with the possibility of keeping some periods non empty (keep : True|False)
filterFis
::
Bool
->
Int
->
(
Int
->
[
PhyloFis
]
->
[
PhyloFis
])
->
Map
(
Date
,
Date
)
[
PhyloFis
]
->
Map
(
Date
,
Date
)
[
PhyloFis
]
filterFis
keep
thr
f
m
=
case
keep
of
False
->
Map
.
map
(
\
l
->
f
thr
l
)
m
True
->
Map
.
map
(
\
l
->
keepFilled
(
f
)
thr
l
)
m
-- | To filter Fis with small Support
filterFisBySupport
::
Int
->
[
PhyloFis
]
->
[
PhyloFis
]
filterFisBySupport
thr
l
=
filter
(
\
fis
->
getSupport
fis
>=
thr
)
l
-- | To filter Fis with small Clique size
filterFisByClique
::
Int
->
[
PhyloFis
]
->
[
PhyloFis
]
filterFisByClique
thr
l
=
filter
(
\
fis
->
(
size
$
getClique
fis
)
>=
thr
)
l
-- | To filter nested Fis
filterFisByNested
::
Map
(
Date
,
Date
)
[
PhyloFis
]
->
Map
(
Date
,
Date
)
[
PhyloFis
]
filterFisByNested
=
map
(
\
l
->
let
cliqueMax
=
filterNestedSets
(
head'
"Fis"
$
map
getClique
l
)
(
map
getClique
l
)
[]
in
filter
(
\
fis
->
elem
(
getClique
fis
)
cliqueMax
)
l
)
docsToFis'
::
Map
(
Date
,
Date
)
[
Document
]
->
Phylo
->
Phylo
docsToFis'
m
p
=
if
(
null
$
getPhyloFis
p
)
then
trace
(
"----
\n
Rebuild the Fis from scratch
\n
"
)
$
p
&
phylo_fis
.~
mapWithKey
(
\
k
docs
->
let
fis
=
Map
.
toList
$
fisWithSizePolyMap
(
Segment
1
20
)
1
(
map
text
docs
)
in
map
(
\
f
->
PhyloFis
(
fst
f
)
(
snd
f
)
k
)
fis
)
m
else
trace
(
"----
\n
Use Fis from an existing file
\n
"
)
$
p
&
phylo_fis
%~
(
unionWith
(
++
)
(
fromList
$
map
(
\
k
->
(
k
,
[]
))
$
keys
m
))
toPhyloFis'
::
Map
(
Date
,
Date
)
[
PhyloFis
]
->
Bool
->
Support
->
Int
->
Map
(
Date
,
Date
)
[
PhyloFis
]
toPhyloFis'
fis
k
s
t
=
traceFis
"----
\n
Filtered Fis by clique size :
\n
"
$
filterFis
k
t
(
filterFisByClique
)
$
traceFis
"----
\n
Filtered Fis by nested :
\n
"
$
filterFisByNested
$
traceFis
"----
\n
Filtered Fis by support :
\n
"
$
filterFis
k
s
(
filterFisBySupport
)
$
traceFis
"----
\n
Unfiltered Fis :
\n
"
fis
-----------------
-- | Tracers | --
-----------------
traceFis
::
[
Char
]
->
Map
(
Date
,
Date
)
[
PhyloFis
]
->
Map
(
Date
,
Date
)
[
PhyloFis
]
traceFis
lbl
m
=
trace
(
lbl
<>
"count : "
<>
show
(
sum
$
map
length
$
elems
m
)
<>
" Fis
\n
"
<>
"support : "
<>
show
(
percentile
25
(
Vector
.
fromList
supps
))
<>
" (25%) "
<>
show
(
percentile
50
(
Vector
.
fromList
supps
))
<>
" (50%) "
<>
show
(
percentile
75
(
Vector
.
fromList
supps
))
<>
" (75%) "
<>
show
(
percentile
90
(
Vector
.
fromList
supps
))
<>
" (90%) "
<>
show
(
percentile
100
(
Vector
.
fromList
supps
))
<>
" (100%)
\n
"
<>
" "
<>
show
(
countSup
1
supps
)
<>
" (>1) "
<>
show
(
countSup
2
supps
)
<>
" (>2) "
<>
show
(
countSup
3
supps
)
<>
" (>3) "
<>
show
(
countSup
4
supps
)
<>
" (>4) "
<>
show
(
countSup
5
supps
)
<>
" (>5) "
<>
show
(
countSup
6
supps
)
<>
" (>6)
\n
"
<>
"clique size : "
<>
show
(
percentile
25
(
Vector
.
fromList
ngrms
))
<>
" (25%) "
<>
show
(
percentile
50
(
Vector
.
fromList
ngrms
))
<>
" (50%) "
<>
show
(
percentile
75
(
Vector
.
fromList
ngrms
))
<>
" (75%) "
<>
show
(
percentile
90
(
Vector
.
fromList
ngrms
))
<>
" (90%) "
<>
show
(
percentile
100
(
Vector
.
fromList
ngrms
))
<>
" (100%)
\n
"
<>
" "
<>
show
(
countSup
1
ngrms
)
<>
" (>1) "
<>
show
(
countSup
2
ngrms
)
<>
" (>2) "
<>
show
(
countSup
3
ngrms
)
<>
" (>3) "
<>
show
(
countSup
4
ngrms
)
<>
" (>4) "
<>
show
(
countSup
5
ngrms
)
<>
" (>5) "
<>
show
(
countSup
6
ngrms
)
<>
" (>6)
\n
"
)
m
where
--------------------------------------
countSup
::
Double
->
[
Double
]
->
Int
countSup
s
l
=
length
$
filter
(
>
s
)
l
--------------------------------------
supps
::
[
Double
]
supps
=
sort
$
map
(
fromIntegral
.
_phyloFis_support
)
$
concat
$
elems
m
--------------------------------------
ngrms
::
[
Double
]
ngrms
=
sort
$
map
(
\
f
->
fromIntegral
$
Set
.
size
$
_phyloFis_clique
f
)
$
concat
$
elems
m
--------------------------------------
\ No newline at end of file
src/Gargantext/Viz/Phylo/BranchMaker.hs
View file @
eea8b990
...
...
@@ -19,13 +19,14 @@ module Gargantext.Viz.Phylo.BranchMaker
import
Control.Parallel.Strategies
import
Control.Lens
hiding
(
both
,
Level
)
import
Data.List
(
concat
,
nub
,(
++
),
tail
,
sortOn
,
take
,
reverse
,
sort
,
null
,
intersect
,
union
,
delete
)
import
Data.List
(
concat
,
nub
,(
++
),
sortOn
,
reverse
,
sort
,
null
,
intersect
,
union
,
delete
)
import
Data.Map
(
Map
,(
!
),
fromListWith
,
elems
)
import
Data.Tuple
(
fst
,
snd
)
import
Gargantext.Prelude
import
Gargantext.Viz.Phylo
import
Gargantext.Viz.Phylo.Metrics.Clustering
import
Gargantext.Viz.Phylo.Aggregates.Cooc
import
Gargantext.Viz.Phylo.Cluster
import
Gargantext.Viz.Phylo.Aggregates
import
Gargantext.Viz.Phylo.Metrics
import
Gargantext.Viz.Phylo.Tools
import
Gargantext.Viz.Phylo.LinkMaker
...
...
src/Gargantext/Viz/Phylo/Example.hs
View file @
eea8b990
...
...
@@ -29,19 +29,16 @@ TODO:
module
Gargantext.Viz.Phylo.Example
where
import
Data.GraphViz.Types.Generalised
(
DotGraph
)
import
Data.Text
(
Text
)
import
Data.List
((
++
)
,
last
)
import
Data.Text
(
Text
,
toLower
)
import
Data.List
((
++
))
import
Data.Map
(
Map
,
empty
)
import
Data.Tuple
(
fst
)
import
Data.Tuple.Extra
import
Data.Vector
(
Vector
)
import
Gargantext.Prelude
import
Gargantext.Text.Context
(
TermList
)
import
Gargantext.Viz.Phylo
import
Gargantext.Viz.Phylo.Aggregates.Cluster
import
Gargantext.Viz.Phylo.Aggregates.Document
import
Gargantext.Viz.Phylo.Aggregates.Cooc
import
Gargantext.Viz.Phylo.Aggregates.Fis
import
Gargantext.Viz.Phylo.Cluster
import
Gargantext.Viz.Phylo.Aggregates
import
Gargantext.Viz.Phylo.BranchMaker
import
Gargantext.Viz.Phylo.LevelMaker
import
Gargantext.Viz.Phylo.LinkMaker
...
...
@@ -78,7 +75,7 @@ queryViewEx = "level=3"
phyloQueryView
::
PhyloQueryView
phyloQueryView
=
PhyloQueryView
2
Merge
False
1
[
BranchAge
]
[]
[
BranchPeakFreq
,
GroupLabelCooc
]
(
Just
(
ByBranchAge
,
Asc
))
Json
Flat
True
phyloQueryView
=
PhyloQueryView
2
Merge
False
2
[
BranchAge
]
[]
[
BranchPeakFreq
,
GroupLabelCooc
]
(
Just
(
ByBranchAge
,
Asc
))
Json
Flat
True
--------------------------------------------------
...
...
@@ -87,7 +84,7 @@ phyloQueryView = PhyloQueryView 2 Merge False 1 [BranchAge] [] [BranchPeakFreq,G
phyloFromQuery
::
Phylo
phyloFromQuery
=
toPhylo
(
queryParser
queryEx
)
corpus
actant
s
termList
empty
phyloFromQuery
=
toPhylo
phyloQueryBuild
doc
s
termList
empty
-- | To do : create a request handler and a query parser
queryParser
::
[
Char
]
->
PhyloQueryBuild
...
...
@@ -105,7 +102,7 @@ queryEx = "title=Cesar et Cleôpatre"
phyloQueryBuild
::
PhyloQueryBuild
phyloQueryBuild
=
PhyloQueryBuild
"Cesar et Cleôpatre"
"An example of Phylomemy (french without accent)"
5
3
defaultFis
[]
[]
(
WeightedLogJaccard
$
WLJParams
0.6
20
)
5
0.8
0.5
4
2
(
RelatedComponents
$
RCParams
$
WeightedLogJaccard
$
WLJParams
0.4
0
)
3
1
defaultFis
[]
[]
(
WeightedLogJaccard
$
WLJParams
0.6
20
)
5
0.8
0.5
4
2
(
RelatedComponents
$
RCParams
$
WeightedLogJaccard
$
WLJParams
0.4
0
)
...
...
@@ -155,7 +152,7 @@ phylo2 = addPhyloLevel 2 phyloCluster phyloBranch1
phyloCluster
::
Map
(
Date
,
Date
)
[
PhyloCluster
]
phyloCluster
=
phyloToClusters
3
(
RelatedComponents
$
RCParams
$
WeightedLogJaccard
$
WLJParams
0.05
10
)
phyloBranch1
phyloCluster
=
phyloToClusters
2
(
RelatedComponents
$
RCParams
$
WeightedLogJaccard
$
WLJParams
0.05
10
)
phyloBranch1
----------------------------------
...
...
@@ -186,30 +183,27 @@ phylo1_p = interTempoMatching Ascendant 1 defaultWeightedLogJaccard phylo1_0_1
phylo1_0_1
::
Phylo
phylo1_0_1
=
setLevelLinks
(
0
,
1
)
phylo1
_1_0
phylo1_0_1
=
setLevelLinks
(
0
,
1
)
phylo1
phylo1_1_0
::
Phylo
phylo1_1_0
=
setLevelLinks
(
1
,
0
)
phylo1
--
phylo1_1_0 :: Phylo
--
phylo1_1_0 = setLevelLinks (1,0) phylo1
phylo1
::
Phylo
phylo1
=
addPhyloLevel
(
1
)
phyloFis
phylo
phylo1
=
addPhyloLevel
(
1
)
phyloFis
phylo
'
-------------------------------------------------------------------
-- | STEP 5 | -- Create lists of Frequent Items Set and filter them
-------------------------------------------------------------------
phyloFis
::
Map
(
Date
,
Date
)
[
PhyloFis
]
phyloFis
=
filterFis
True
1
(
filterFisByClique
)
$
filterFisByNested
$
filterFis
True
1
(
filterFisBySupport
)
(
getPhyloFis
phylo'
)
phyloFis
=
refineFis
(
getPhyloFis
phylo'
)
True
1
1
phylo'
::
Phylo
phylo'
=
docsToFis
'
phyloDocs
phylo
phylo'
=
docsToFis
phyloDocs
phylo
----------------------------------------
-- | STEP 2 | -- Init a Phylo of level 0
...
...
@@ -221,7 +215,7 @@ phylo = addPhyloLevel 0 phyloDocs phyloBase
phyloDocs
::
Map
(
Date
,
Date
)
[
Document
]
phyloDocs
=
corpusToDocs
corpus
phyloBase
phyloDocs
=
groupDocsByPeriod
date
(
getPhyloPeriods
phyloBase
)
docs
------------------------------------------------------------------------
...
...
@@ -229,32 +223,25 @@ phyloDocs = corpusToDocs corpus phyloBase
------------------------------------------------------------------------
phyloBase
::
Phylo
phyloBase
=
initPhyloBase
periods
(
PhyloFoundations
foundationsRoots
termList
)
nbDocs
cooc
empty
defaultPhyloParam
cooc
::
Map
Date
(
Map
(
Int
,
Int
)
Double
)
cooc
=
docsToCooc
(
parseDocs
foundationsRoots
corpus
)
foundationsRoots
phyloBase
::
Phylo
phyloBase
=
toPhyloBase
phyloQueryBuild
phyloParam
docs
termList
empty
nbDocs
::
Map
Date
Double
nbDocs
=
countDocs
corpus
phyloParam
::
PhyloParam
phyloParam
=
(
initPhyloParam
(
Just
defaultPhyloVersion
)
(
Just
defaultSoftware
)
(
Just
phyloQueryBuild
))
periods
::
[(
Date
,
Date
)]
periods
=
initPeriods
5
3
$
both
fst
(
head'
"Example"
corpus
,
last
corpus
)
docs
::
[
Document
]
docs
=
parseDocs
foundationsRoots
corpus
foundationsRoots
::
Vector
Ngrams
foundationsRoots
=
initFoundationsRoots
actants
foundationsRoots
=
initFoundationsRoots
(
termListToNgrams
termList
)
--------------------------------------------
-- | STEP 0 | -- Let's start with an example
--------------------------------------------
-- this is a comment
termList
::
TermList
termList
=
[]
termList
=
map
(
\
a
->
([
toLower
a
],
[]
))
actants
actants
::
[
Ngrams
]
actants
=
[
"Cleopatre"
,
"Ptolemee"
,
"Ptolemee-XIII"
,
"Ptolemee-XIV"
...
...
src/Gargantext/Viz/Phylo/LevelMaker.hs
View file @
eea8b990
This diff is collapsed.
Click to expand it.
src/Gargantext/Viz/Phylo/LinkMaker.hs
View file @
eea8b990
...
...
@@ -19,13 +19,13 @@ module Gargantext.Viz.Phylo.LinkMaker
import
Control.Parallel.Strategies
import
Control.Lens
hiding
(
both
,
Level
)
import
Data.List
((
++
),
sortOn
,
null
,
tail
,
splitAt
,
elem
,
concat
,
delete
,
intersect
,
nub
,
groupBy
,
union
,
inits
,
scanl
,
find
)
import
Data.List
((
++
),
sortOn
,
null
,
tail
,
splitAt
,
elem
,
concat
,
delete
,
intersect
,
groupBy
,
union
,
inits
,
scanl
,
find
)
import
Data.Tuple.Extra
import
Data.Map
(
Map
,(
!
),
fromListWith
,
elems
,
restrictKeys
,
unionWith
,
member
)
import
Gargantext.Prelude
import
Gargantext.Viz.Phylo
import
Gargantext.Viz.Phylo.Tools
import
Gargantext.Viz.Phylo.Metrics
.Proximity
import
Gargantext.Viz.Phylo.Metrics
import
qualified
Data.List
as
List
import
qualified
Data.Maybe
as
Maybe
import
qualified
Data.Map
as
Map
...
...
src/Gargantext/Viz/Phylo/Metrics/Clustering.hs
deleted
100644 → 0
View file @
8bdafc9f
{-|
Module : Gargantext.Viz.Phylo.Tools
Description : Phylomemy Tools to build/manage it
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
module
Gargantext.Viz.Phylo.Metrics.Clustering
where
import
Data.Graph.Clustering.Louvain.CplusPlus
import
Data.List
(
concat
,
null
,
nub
,(
++
),
elemIndex
,
groupBy
,(
!!
),
(
\\
),
union
,
intersect
)
import
Data.Map
(
fromList
,
mapKeys
)
import
Gargantext.Prelude
import
Gargantext.Viz.Phylo
-- import Gargantext.Viz.Phylo.Tools
-- import Debug.Trace (trace)
relatedComp
::
Eq
a
=>
[[
a
]]
->
[[
a
]]
relatedComp
graphs
=
foldl'
(
\
mem
groups
->
if
(
null
mem
)
then
mem
++
[
groups
]
else
let
related
=
filter
(
\
groups'
->
(
not
.
null
)
$
intersect
groups
groups'
)
mem
in
if
(
null
related
)
then
mem
++
[
groups
]
else
(
mem
\\
related
)
++
[
union
groups
(
nub
$
concat
related
)]
)
[]
graphs
louvain
::
([
GroupNode
],[
GroupEdge
])
->
IO
[[
PhyloGroup
]]
louvain
(
nodes
,
edges
)
=
map
(
\
community
->
map
(
\
node
->
nodes
!!
(
l_node_id
node
))
community
)
<$>
groupBy
(
\
a
b
->
(
l_community_id
a
)
==
(
l_community_id
b
))
<$>
(
cLouvain
$
mapKeys
(
\
(
x
,
y
)
->
(
idx
x
,
idx
y
))
$
fromList
edges
)
where
--------------------------------------
idx
::
PhyloGroup
->
Int
idx
e
=
case
elemIndex
e
nodes
of
Nothing
->
panic
"[ERR][Gargantext.Viz.Phylo.Metrics.Clustering] a node is missing"
Just
i
->
i
--------------------------------------
src/Gargantext/Viz/Phylo/Metrics/Proximity.hs
deleted
100644 → 0
View file @
8bdafc9f
{-|
Module : Gargantext.Viz.Phylo.Tools
Description : Phylomemy Tools to build/manage it
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
module
Gargantext.Viz.Phylo.Metrics.Proximity
where
import
Data.List
(
null
,
union
,
intersect
)
import
Data.Map
(
Map
,
elems
,
unionWith
,
intersectionWith
,
intersection
,
size
,
filterWithKey
)
import
Gargantext.Prelude
-- import Debug.Trace (trace)
sumInvLog
::
Double
->
[
Double
]
->
Double
sumInvLog
s
l
=
foldl
(
\
mem
x
->
mem
+
(
1
/
log
(
s
+
x
)))
0
l
sumLog
::
Double
->
[
Double
]
->
Double
sumLog
s
l
=
foldl
(
\
mem
x
->
mem
+
log
(
s
+
x
))
0
l
-- -- | To process WeighedLogJaccard distance between to coocurency matrix
-- weightedLogJaccard :: Double -> Map (Int, Int) Double -> Map (Int, Int) Double -> Double -> Double
-- weightedLogJaccard sens cooc cooc' nbDocs
-- | null union' = 0
-- | union' == inter' = 1
-- | sens == 0 = (fromIntegral $ length $ keysInter) / (fromIntegral $ length $ keysUnion)
-- | sens > 0 = (sumInvLog sens $ elems wInter) / (sumInvLog sens $ elems wUnion)
-- | otherwise = (sumLog sens $ elems wInter) / (sumLog sens $ elems wUnion)
-- where
-- --------------------------------------
-- keysInter :: [Int]
-- keysInter = nub $ concat $ map (\(x,x') -> [x,x']) $ keys inter'
-- --------------------------------------
-- keysUnion :: [Int]
-- keysUnion = nub $ concat $ map (\(x,x') -> [x,x']) $ keys union'
-- --------------------------------------
-- wInter :: Map (Int,Int) Double
-- wInter = map (/nbDocs) inter'
-- --------------------------------------
-- wUnion :: Map (Int,Int) Double
-- wUnion = map (/nbDocs) union'
-- --------------------------------------
-- inter' :: Map (Int, Int) Double
-- inter' = intersectionWith (+) cooc cooc'
-- --------------------------------------
-- union' :: Map (Int, Int) Double
-- union' = unionWith (+) cooc cooc'
-- --------------------------------------
-- | To compute a jaccard similarity between two lists
jaccard
::
[
Int
]
->
[
Int
]
->
Double
jaccard
inter'
union'
=
((
fromIntegral
.
length
)
$
inter'
)
/
((
fromIntegral
.
length
)
$
union'
)
-- | To get the diagonal of a matrix
toDiago
::
Map
(
Int
,
Int
)
Double
->
[
Double
]
toDiago
cooc
=
elems
$
filterWithKey
(
\
(
x
,
x'
)
_
->
x
==
x'
)
cooc
-- | To process WeighedLogJaccard distance between to coocurency matrix
weightedLogJaccard
::
Double
->
Double
->
Map
(
Int
,
Int
)
Double
->
Map
(
Int
,
Int
)
Double
->
[
Int
]
->
[
Int
]
->
Double
weightedLogJaccard
sens
nbDocs
cooc
cooc'
ngrams
ngrams'
|
null
gInter
=
0
|
gInter
==
gUnion
=
1
|
sens
==
0
=
jaccard
gInter
gUnion
|
sens
>
0
=
(
sumInvLog
sens
wInter
)
/
(
sumInvLog
sens
wUnion
)
|
otherwise
=
(
sumLog
sens
wInter
)
/
(
sumLog
sens
wUnion
)
where
--------------------------------------
gInter
::
[
Int
]
gInter
=
intersect
ngrams
ngrams'
--------------------------------------
gUnion
::
[
Int
]
gUnion
=
union
ngrams
ngrams'
--------------------------------------
wInter
::
[
Double
]
wInter
=
toDiago
$
map
(
/
nbDocs
)
$
intersectionWith
(
+
)
cooc
cooc'
--------------------------------------
wUnion
::
[
Double
]
wUnion
=
toDiago
$
map
(
/
nbDocs
)
$
unionWith
(
+
)
cooc
cooc'
--------------------------------------
-- | To process the Hamming distance between two PhyloGroup fields
hamming
::
Map
(
Int
,
Int
)
Double
->
Map
(
Int
,
Int
)
Double
->
Double
hamming
f1
f2
=
fromIntegral
$
max
((
size
inter
)
-
(
size
f1
))
((
size
inter
)
-
(
size
f2
))
where
--------------------------------------
inter
::
Map
(
Int
,
Int
)
Double
inter
=
intersection
f1
f2
--------------------------------------
src/Gargantext/Viz/Phylo/Tools.hs
View file @
eea8b990
...
...
@@ -139,7 +139,7 @@ listToUnDirectedCombiWith f l = [ (f x, f y) | (x:rest) <- tails l, y <- rest ]
-- | To transform a list of Ngrams Indexes into a Label
ngramsToLabel
::
Vector
Ngrams
->
[
Int
]
->
Text
ngramsToLabel
ngrams
l
=
unwords
$
ngramsToText
ngrams
l
ngramsToLabel
ngrams
l
=
unwords
$
tail'
"ngramsToLabel"
$
concat
$
map
(
\
n
->
[
"|"
,
n
])
$
ngramsToText
ngrams
l
-- | To transform a list of Ngrams Indexes into a list of Text
...
...
@@ -147,6 +147,11 @@ ngramsToText :: Vector Ngrams -> [Int] -> [Text]
ngramsToText
ngrams
l
=
map
(
\
idx
->
ngrams
Vector
.!
idx
)
l
-- | To transform a list of ngrams into a list of indexes
ngramsToIdx
::
[
Ngrams
]
->
Vector
Ngrams
->
[
Int
]
ngramsToIdx
ns
v
=
sort
$
map
(
\
n
->
getIdxInVector
n
v
)
ns
-- | To unify the keys (x,y) that Map 1 share with Map 2 such as: (x,y) <=> (y,x)
unifySharedKeys
::
Eq
a
=>
Ord
a
=>
Map
(
a
,
a
)
b
->
Map
(
a
,
a
)
b
->
Map
(
a
,
a
)
b
unifySharedKeys
m1
m2
=
mapKeys
(
\
(
x
,
y
)
->
if
member
(
y
,
x
)
m2
...
...
@@ -439,6 +444,7 @@ initGroup ngrams lbl idx lvl from' to' p = PhyloGroup
lbl
idxs
(
Map
.
empty
)
(
Map
.
empty
)
Nothing
(
getMiniCooc
(
listToFullCombi
idxs
)
(
periodsToYears
[(
from'
,
to'
)])
(
getPhyloCooc
p
))
[]
[]
[]
[]
...
...
src/Gargantext/Viz/Phylo/View/Taggers.hs
View file @
eea8b990
...
...
@@ -18,17 +18,17 @@ module Gargantext.Viz.Phylo.View.Taggers
where
import
Control.Lens
hiding
(
makeLenses
,
both
,
Level
)
import
Data.List
(
concat
,
nub
,
groupBy
,
sortOn
,
sort
)
import
Data.List
(
concat
,
nub
,
groupBy
,
sortOn
,
sort
,
(
!!
),
take
)
import
Data.Text
(
Text
)
import
Data.Tuple
(
fst
,
snd
)
import
Data.Vector
(
Vector
)
import
Data.Map
(
Map
)
import
Data.Map
(
Map
,
(
!
)
)
import
Gargantext.Prelude
import
Gargantext.Viz.Phylo
import
Gargantext.Viz.Phylo.Tools
import
Gargantext.Viz.Phylo.BranchMaker
import
qualified
Data.Map
as
Map
--
import Debug.Trace (trace)
import
Debug.Trace
(
trace
)
-- | To get the nth most frequent Ngrams in a list of PhyloGroups
...
...
@@ -82,14 +82,20 @@ branchPeakCooc v nth p = foldl (\v' (id,lbl) -> alterBranchPeak (id,lbl) v') v
$
getNodesByBranches
v
getNthMostMeta
::
Int
->
Text
->
PhyloGroup
->
[
Int
]
getNthMostMeta
nth
meta
g
=
map
(
\
(
idx
,
_
)
->
(
getGroupNgrams
g
!!
idx
))
$
take
nth
$
sortOn
snd
$
zip
[
0
..
]
$
(
g
^.
phylo_groupNgramsMeta
)
!
meta
-- | To set the label of a PhyloNode as the nth most coocurent terms of its PhyloNodes
nodeLabelCooc
::
PhyloView
->
Int
->
Phylo
->
PhyloView
nodeLabelCooc
v
thr
p
=
over
(
pv_nodes
.
traverse
)
(
\
n
->
let
lbl
=
ngramsToLabel
(
getFoundationsRoots
p
)
$
mostOccNgrams
thr
$
head'
"nodeLabelCooc"
$
getGroupsFromIds
[
getNodeId
n
]
p
in
n
&
pn_label
.~
lbl
)
v
(
\
n
->
let
g
=
head'
"nodeLabelCooc"
$
getGroupsFromIds
[
getNodeId
n
]
p
lbl'
=
ngramsToLabel
(
getFoundationsRoots
p
)
$
getNthMostMeta
thr
"coverage"
g
in
trace
(
show
(
lbl'
))
$
n
&
pn_label
.~
lbl'
)
v
-- | To process a sorted list of Taggers to a PhyloView
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment