Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
148
Issues
148
List
Board
Labels
Milestones
Merge Requests
11
Merge Requests
11
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
eea8b990
Commit
eea8b990
authored
Jun 28, 2019
by
qlobbe
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
some refactoring & adding generality, specifivity, etc for the ngrams
parent
8bdafc9f
Pipeline
#500
canceled with stage
Changes
15
Pipelines
1
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
15 changed files
with
160 additions
and
820 deletions
+160
-820
Main.hs
bin/gargantext-phylo/Main.hs
+5
-9
Phylo.hs
src/Gargantext/Viz/Phylo.hs
+1
-0
API.hs
src/Gargantext/Viz/Phylo/API.hs
+2
-1
Cluster.hs
src/Gargantext/Viz/Phylo/Aggregates/Cluster.hs
+0
-123
Cooc.hs
src/Gargantext/Viz/Phylo/Aggregates/Cooc.hs
+0
-134
Document.hs
src/Gargantext/Viz/Phylo/Aggregates/Document.hs
+0
-64
Fis.hs
src/Gargantext/Viz/Phylo/Aggregates/Fis.hs
+0
-122
BranchMaker.hs
src/Gargantext/Viz/Phylo/BranchMaker.hs
+4
-3
Example.hs
src/Gargantext/Viz/Phylo/Example.hs
+23
-36
LevelMaker.hs
src/Gargantext/Viz/Phylo/LevelMaker.hs
+103
-164
LinkMaker.hs
src/Gargantext/Viz/Phylo/LinkMaker.hs
+2
-2
Clustering.hs
src/Gargantext/Viz/Phylo/Metrics/Clustering.hs
+0
-50
Proximity.hs
src/Gargantext/Viz/Phylo/Metrics/Proximity.hs
+0
-104
Tools.hs
src/Gargantext/Viz/Phylo/Tools.hs
+7
-1
Taggers.hs
src/Gargantext/Viz/Phylo/View/Taggers.hs
+13
-7
No files found.
bin/gargantext-phylo/Main.hs
View file @
eea8b990
...
...
@@ -17,7 +17,7 @@ Phylo binaries
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE StandaloneDeriving #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE TypeOperators
#-}
{-# LANGUAGE Strict #-}
module
Main
where
...
...
@@ -25,7 +25,7 @@ module Main where
import
System.Directory
(
doesFileExist
)
import
Data.Aeson
import
Data.Text
(
Text
,
unwords
)
import
Data.Text
(
Text
,
unwords
,
unlines
)
import
Data.List
((
++
))
import
GHC.Generics
import
GHC.IO
(
FilePath
)
...
...
@@ -198,17 +198,13 @@ main = do
corpus
<-
parse
(
corpusType
conf
)
(
limit
conf
)
(
corpusPath
conf
)
termList
putStrLn
$
(
"
\n
"
<>
show
(
length
corpus
)
<>
" parsed docs"
)
let
roots
=
DL
.
nub
$
DL
.
concat
$
map
text
corpus
putStrLn
$
(
"
\n
"
<>
show
(
length
roots
)
<>
" parsed foundation roots"
)
putStrLn
$
(
"
\n
"
<>
show
(
length
corpus
)
<>
" parsed docs"
)
fis
<-
parseFis
(
fisPath
conf
)
(
phyloName
conf
)
(
timeGrain
conf
)
(
timeStep
conf
)
(
fisSupport
conf
)
(
fisClique
conf
)
putStrLn
$
(
"
\n
"
<>
show
(
length
fis
)
<>
" parsed fis"
)
let
mFis
=
DM
.
fromListWith
(
++
)
$
DL
.
sortOn
(
fst
.
fst
)
$
map
(
\
f
->
(
getFisPeriod
f
,[
f
]))
fis
let
fis'
=
DM
.
fromListWith
(
++
)
$
DL
.
sortOn
(
fst
.
fst
)
$
map
(
\
f
->
(
getFisPeriod
f
,[
f
]))
fis
let
query
=
PhyloQueryBuild
(
phyloName
conf
)
""
(
timeGrain
conf
)
(
timeStep
conf
)
(
Fis
$
FisParams
True
(
fisSupport
conf
)
(
fisClique
conf
))
[]
[]
(
WeightedLogJaccard
$
WLJParams
(
timeTh
conf
)
(
timeSens
conf
))
(
timeFrame
conf
)
(
timeFrameTh
conf
)
...
...
@@ -217,7 +213,7 @@ main = do
let
queryView
=
PhyloQueryView
(
viewLevel
conf
)
Merge
False
1
[
BranchAge
]
[
SizeBranch
$
SBParams
(
minSizeBranch
conf
)]
[
BranchPeakFreq
,
GroupLabelCooc
]
(
Just
(
ByBranchAge
,
Asc
))
Json
Flat
True
let
phylo
=
toPhylo
query
corpus
roots
termList
mFis
let
phylo
=
toPhylo
query
corpus
termList
fis'
writeFis
(
fisPath
conf
)
(
phyloName
conf
)
(
timeGrain
conf
)
(
timeStep
conf
)
(
fisSupport
conf
)
(
fisClique
conf
)
(
getPhyloFis
phylo
)
...
...
src/Gargantext/Viz/Phylo.hs
View file @
eea8b990
...
...
@@ -153,6 +153,7 @@ data PhyloGroup =
PhyloGroup
{
_phylo_groupId
::
PhyloGroupId
,
_phylo_groupLabel
::
Text
,
_phylo_groupNgrams
::
[
Int
]
,
_phylo_groupNgramsMeta
::
Map
Text
[
Double
]
,
_phylo_groupMeta
::
Map
Text
Double
,
_phylo_groupBranchId
::
Maybe
PhyloBranchId
,
_phylo_groupCooc
::
Map
(
Int
,
Int
)
Double
...
...
src/Gargantext/Viz/Phylo/API.hs
View file @
eea8b990
...
...
@@ -32,6 +32,7 @@ import Gargantext.API.Types
import
Gargantext.Database.Types.Node
(
PhyloId
,
ListId
,
CorpusId
)
import
Gargantext.Prelude
import
Gargantext.Viz.Phylo
import
Gargantext.Viz.Phylo.Aggregates
import
Gargantext.Viz.Phylo.Example
import
Gargantext.Viz.Phylo.Tools
import
Gargantext.Viz.Phylo.View.ViewMaker
...
...
@@ -105,7 +106,7 @@ postPhylo _n _lId q = do
vrs
=
Just
(
"1"
::
Text
)
sft
=
Just
(
Software
"Gargantext"
"4"
)
prm
=
initPhyloParam
vrs
sft
(
Just
q
)
pure
(
toPhyloBase
q
prm
corpus
actants
termList
empty
)
pure
(
toPhyloBase
q
prm
(
parseDocs
(
initFoundationsRoots
actants
)
corpus
)
termList
empty
)
------------------------------------------------------------------------
...
...
src/Gargantext/Viz/Phylo/Aggregates/Cluster.hs
deleted
100644 → 0
View file @
8bdafc9f
{-|
Module : Gargantext.Viz.Phylo.Tools
Description : Phylomemy Tools to build/manage it
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TemplateHaskell #-}
module
Gargantext.Viz.Phylo.Aggregates.Cluster
where
import
Control.Parallel.Strategies
import
Data.List
(
null
,
concat
,
sort
,
intersect
,(
++
))
import
Data.Map
(
Map
)
import
Data.Tuple
(
fst
)
import
Gargantext.Prelude
import
Gargantext.Viz.Phylo
import
Gargantext.Viz.Phylo.Tools
import
Gargantext.Viz.Phylo.Metrics.Proximity
import
Gargantext.Viz.Phylo.Metrics.Clustering
import
Gargantext.Viz.Phylo.LinkMaker
import
qualified
Data.Map
as
Map
import
qualified
Data.Vector.Storable
as
VS
import
Debug.Trace
(
trace
)
import
Numeric.Statistics
(
percentile
)
-- | Optimisation to filter only relevant candidates
getCandidates
::
[
PhyloGroup
]
->
[(
PhyloGroup
,
PhyloGroup
)]
getCandidates
gs
=
filter
(
\
(
g
,
g'
)
->
(
not
.
null
)
$
intersect
(
getGroupNgrams
g
)
(
getGroupNgrams
g'
))
$
filter
(
\
(
g
,
g'
)
->
g
/=
g'
)
$
listToDirectedCombi
gs
-- | To transform a Graph into Clusters
graphToClusters
::
Cluster
->
GroupGraph
->
[
PhyloCluster
]
graphToClusters
clust
(
nodes
,
edges
)
=
case
clust
of
Louvain
(
LouvainParams
_
)
->
undefined
RelatedComponents
(
RCParams
_
)
->
relatedComp
$
((
map
(
\
((
g
,
g'
),
_
)
->
[
g
,
g'
])
edges
)
++
(
map
(
\
g
->
[
g
])
nodes
))
_
->
panic
"[ERR][Viz.Phylo.Aggregates.Cluster.graphToClusters] not implemented"
-- | To transform a list of PhyloGroups into a Graph of Proximity
groupsToGraph
::
Double
->
Proximity
->
[
PhyloGroup
]
->
([
GroupNode
],[
GroupEdge
])
groupsToGraph
nbDocs
prox
gs
=
case
prox
of
WeightedLogJaccard
(
WLJParams
_
sens
)
->
(
gs
,
let
candidates
=
map
(
\
(
x
,
y
)
->
((
x
,
y
),
weightedLogJaccard
sens
nbDocs
(
getGroupCooc
x
)
(
getGroupCooc
y
)
(
getGroupNgrams
x
)
(
getGroupNgrams
y
)))
$
getCandidates
gs
candidates'
=
candidates
`
using
`
parList
rdeepseq
in
candidates'
)
Hamming
(
HammingParams
_
)
->
(
gs
,
map
(
\
(
x
,
y
)
->
((
x
,
y
),
hamming
(
getGroupCooc
x
)
(
getGroupCooc
y
)))
$
getCandidates
gs
)
_
->
undefined
-- | To filter a Graph of Proximity using a given threshold
filterGraph
::
Proximity
->
([
GroupNode
],[
GroupEdge
])
->
([
GroupNode
],[
GroupEdge
])
filterGraph
prox
(
ns
,
es
)
=
case
prox
of
WeightedLogJaccard
(
WLJParams
thr
_
)
->
(
ns
,
filter
(
\
(
_
,
v
)
->
v
>=
thr
)
es
)
Hamming
(
HammingParams
thr
)
->
(
ns
,
filter
(
\
(
_
,
v
)
->
v
<=
thr
)
es
)
_
->
undefined
-- | To clusterise a Phylo
phyloToClusters
::
Level
->
Cluster
->
Phylo
->
Map
(
Date
,
Date
)
[
PhyloCluster
]
phyloToClusters
lvl
clus
p
=
Map
.
fromList
$
zip
periods
$
map
(
\
g
->
if
null
(
fst
g
)
then
[]
else
graphToClusters
clus
g
)
graphs'
where
--------------------------------------
graphs'
::
[([
GroupNode
],[
GroupEdge
])]
graphs'
=
traceGraphFiltered
lvl
$
map
(
\
g
->
filterGraph
prox
g
)
graphs
--------------------------------------
graphs
::
[([
GroupNode
],[
GroupEdge
])]
graphs
=
traceGraph
lvl
(
getThreshold
prox
)
$
let
gs
=
map
(
\
prd
->
groupsToGraph
(
periodsToNbDocs
[
prd
]
p
)
prox
(
getGroupsWithFilters
lvl
prd
p
))
periods
gs'
=
gs
`
using
`
parList
rdeepseq
in
gs'
--------------------------------------
prox
::
Proximity
prox
=
getProximity
clus
--------------------------------------
periods
::
[
PhyloPeriodId
]
periods
=
getPhyloPeriods
p
--------------------------------------
----------------
-- | Tracer | --
----------------
traceGraph
::
Level
->
Double
->
[([
GroupNode
],[
GroupEdge
])]
->
[([
GroupNode
],[
GroupEdge
])]
traceGraph
lvl
thr
g
=
trace
(
"----
\n
Unfiltered clustering in Phylo"
<>
show
(
lvl
)
<>
" :
\n
"
<>
"count : "
<>
show
(
length
lst
)
<>
" potential edges ("
<>
show
(
length
$
filter
(
>=
thr
)
lst
)
<>
" >= "
<>
show
(
thr
)
<>
")
\n
"
<>
"similarity : "
<>
show
(
percentile
25
(
VS
.
fromList
lst
))
<>
" (25%) "
<>
show
(
percentile
50
(
VS
.
fromList
lst
))
<>
" (50%) "
<>
show
(
percentile
75
(
VS
.
fromList
lst
))
<>
" (75%) "
<>
show
(
percentile
90
(
VS
.
fromList
lst
))
<>
" (90%)
\n
"
)
g
where
lst
=
sort
$
map
snd
$
concat
$
map
snd
g
traceGraphFiltered
::
Level
->
[([
GroupNode
],[
GroupEdge
])]
->
[([
GroupNode
],[
GroupEdge
])]
traceGraphFiltered
lvl
g
=
trace
(
"----
\n
Clustering in Phylo"
<>
show
(
lvl
)
<>
" :
\n
"
<>
"count : "
<>
show
(
length
lst
)
<>
" edges
\n
"
<>
"similarity : "
<>
show
(
percentile
25
(
VS
.
fromList
lst
))
<>
" (25%) "
<>
show
(
percentile
50
(
VS
.
fromList
lst
))
<>
" (50%) "
<>
show
(
percentile
75
(
VS
.
fromList
lst
))
<>
" (75%) "
<>
show
(
percentile
90
(
VS
.
fromList
lst
))
<>
" (90%)
\n
"
)
g
where
lst
=
sort
$
map
snd
$
concat
$
map
snd
g
src/Gargantext/Viz/Phylo/Aggregates/Cooc.hs
deleted
100644 → 0
View file @
8bdafc9f
{-|
Module : Gargantext.Viz.Phylo.Tools
Description : Phylomemy Tools to build/manage it
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
module
Gargantext.Viz.Phylo.Aggregates.Cooc
where
import
Data.List
(
union
,
concat
,
nub
,
sort
,
sortOn
)
import
Data.Map
(
Map
,
elems
,
adjust
,
filterWithKey
,
fromListWith
,
fromList
,
restrictKeys
)
import
Data.Set
(
Set
)
import
Data.Vector
(
Vector
)
import
Gargantext.Prelude
import
Gargantext.Viz.Phylo
import
Gargantext.Viz.Phylo.Tools
import
qualified
Data.Map
as
Map
import
qualified
Data.Set
as
Set
-- import Debug.Trace (trace)
-- | To transform the Fis into a full coocurency Matrix in a Phylo
fisToCooc
::
Map
(
Date
,
Date
)
[
PhyloFis
]
->
Phylo
->
Map
(
Int
,
Int
)
Double
fisToCooc
m
p
=
map
(
/
docs
)
$
foldl
(
\
mem
x
->
adjust
(
+
1
)
x
mem
)
cooc
$
concat
$
map
(
\
x
->
listToDirectedCombiWith
(
\
y
->
getIdxInRoots
y
p
)
$
(
Set
.
toList
.
getClique
)
x
)
$
(
concat
.
elems
)
m
where
--------------------------------------
fisNgrams
::
[
Ngrams
]
fisNgrams
=
foldl
(
\
mem
x
->
union
mem
$
(
Set
.
toList
.
getClique
)
x
)
[]
$
(
concat
.
elems
)
m
--------------------------------------
docs
::
Double
docs
=
fromIntegral
$
foldl
(
\
mem
x
->
mem
+
(
getSupport
x
))
0
$
(
concat
.
elems
)
m
--------------------------------------
cooc
::
Map
(
Int
,
Int
)
(
Double
)
cooc
=
Map
.
fromList
$
map
(
\
x
->
(
x
,
0
))
(
listToDirectedCombiWith
(
\
y
->
getIdxInRoots
y
p
)
fisNgrams
)
--------------------------------------
-- | To transform a tuple of group's information into a coocurency Matrix
toCooc
::
[([
Int
],
Double
)]
->
Map
(
Int
,
Int
)
Double
toCooc
l
=
map
(
/
docs
)
$
foldl
(
\
mem
x
->
adjust
(
+
1
)
x
mem
)
cooc
$
concat
$
map
(
\
x
->
listToFullCombi
$
fst
x
)
l
where
--------------------------------------
idx
::
[
Int
]
idx
=
nub
$
concat
$
map
fst
l
--------------------------------------
docs
::
Double
docs
=
sum
$
map
snd
l
--------------------------------------
cooc
::
Map
(
Int
,
Int
)
(
Double
)
cooc
=
Map
.
fromList
$
map
(
\
x
->
(
x
,
0
))
$
listToFullCombi
idx
--------------------------------------
-- | To reduce a coocurency Matrix to some keys
getSubCooc
::
[
Int
]
->
Map
(
Int
,
Int
)
Double
->
Map
(
Int
,
Int
)
Double
getSubCooc
idx
cooc
=
filterWithKey
(
\
k
_
->
(
elem
(
fst
k
)
idx
)
&&
(
elem
(
snd
k
)
idx
))
cooc
-- | To get a coocurency Matrix related to a given list of Periods
getCooc
::
[
PhyloPeriodId
]
->
Phylo
->
Map
(
Int
,
Int
)
Double
getCooc
prds
p
=
toCooc
$
map
(
\
g
->
(
getGroupNgrams
g
,
getGroupMeta
"support"
g
))
gs
where
--------------------------------------
-- | Here we need to go back to the level 1 (aka : the Fis level)
gs
::
[
PhyloGroup
]
gs
=
filter
(
\
g
->
elem
(
getGroupPeriod
g
)
prds
)
$
getGroupsWithLevel
1
p
--------------------------------------
-- | To transform a list of index into a cooc matrix
listToCooc
::
[
Int
]
->
Map
(
Int
,
Int
)
Double
listToCooc
lst
=
fromList
$
map
(
\
combi
->
(
combi
,
1
))
$
listToFullCombi
lst
-- | To transform a list of ngrams into a list of indexes
ngramsToIdx
::
[
Ngrams
]
->
Vector
Ngrams
->
[
Int
]
ngramsToIdx
ns
v
=
sort
$
map
(
\
n
->
getIdxInVector
n
v
)
ns
-- | To build the cooc matrix by years out of the corpus
docsToCooc
::
[
Document
]
->
Vector
Ngrams
->
Map
Date
(
Map
(
Int
,
Int
)
Double
)
docsToCooc
docs
fdt
=
fromListWith
sumCooc
$
map
(
\
(
d
,
l
)
->
(
d
,
listToCooc
l
))
$
map
(
\
doc
->
(
date
doc
,
ngramsToIdx
(
text
doc
)
fdt
))
docs
-- | To sum all the docs produced during a list of years
sumDocsByYears
::
Set
Date
->
Map
Date
Double
->
Double
sumDocsByYears
years
m
=
sum
$
elems
$
restrictKeys
m
years
-- | To get the cooc matrix of a group
groupToCooc
::
PhyloGroup
->
Phylo
->
Map
(
Int
,
Int
)
Double
groupToCooc
g
p
=
getMiniCooc
(
listToFullCombi
$
getGroupNgrams
g
)
(
periodsToYears
[
getGroupPeriod
g
])
(
getPhyloCooc
p
)
-- | To get the union of the cooc matrix of two groups
unionOfCooc
::
PhyloGroup
->
PhyloGroup
->
Phylo
->
Map
(
Int
,
Int
)
Double
unionOfCooc
g
g'
p
=
sumCooc
(
groupToCooc
g
p
)
(
groupToCooc
g'
p
)
-- | To get the nth most occurent elems in a coocurency matrix
getNthMostOcc
::
Int
->
Map
(
Int
,
Int
)
Double
->
[
Int
]
getNthMostOcc
nth
cooc
=
(
nub
.
concat
)
$
map
(
\
((
idx
,
idx'
),
_
)
->
[
idx
,
idx'
])
$
take
nth
$
reverse
$
sortOn
snd
$
Map
.
toList
cooc
-- phyloCooc :: Map (Int, Int) Double
-- phyloCooc = fisToCooc phyloFis phylo1_0_1
src/Gargantext/Viz/Phylo/Aggregates/Document.hs
deleted
100644 → 0
View file @
8bdafc9f
{-|
Module : Gargantext.Viz.Phylo.Tools
Description : Phylomemy Tools to build/manage it
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
module
Gargantext.Viz.Phylo.Aggregates.Document
where
import
Data.Map
(
Map
,
fromListWith
)
import
Data.Text
(
Text
)
import
Data.Tuple
(
fst
)
import
Data.Vector
(
Vector
)
import
Gargantext.Prelude
import
Gargantext.Text.Terms.Mono
(
monoTexts
)
import
Gargantext.Viz.Phylo
import
qualified
Data.List
as
List
import
qualified
Data.Map
as
Map
import
qualified
Data.Vector
as
Vector
import
Debug.Trace
(
trace
)
-- | To init a list of Periods framed by a starting Date and an ending Date
initPeriods
::
(
Eq
date
,
Enum
date
)
=>
Grain
->
Step
->
(
date
,
date
)
->
[(
date
,
date
)]
initPeriods
g
s
(
start
,
end
)
=
map
(
\
l
->
(
head'
"Doc"
l
,
last'
"Doc"
l
))
$
chunkAlong
g
s
[
start
..
end
]
-- | To group a list of Documents by fixed periods
groupDocsByPeriod
::
(
Ord
date
,
Enum
date
)
=>
(
doc
->
date
)
->
[(
date
,
date
)]
->
[
doc
]
->
Map
(
date
,
date
)
[
doc
]
groupDocsByPeriod
_
_
[]
=
panic
"[ERR][Viz.Phylo.Example.docsToPeriods] Empty [Documents] can not have any periods"
groupDocsByPeriod
f
pds
es
=
trace
(
"----
\n
Group docs by periods
\n
"
)
$
Map
.
fromList
$
zip
pds
$
map
(
inPeriode
f
es
)
pds
where
--------------------------------------
inPeriode
::
Ord
b
=>
(
t
->
b
)
->
[
t
]
->
(
b
,
b
)
->
[
t
]
inPeriode
f'
h
(
start
,
end
)
=
fst
$
List
.
partition
(
\
d
->
f'
d
>=
start
&&
f'
d
<=
end
)
h
--------------------------------------
-- | To parse a list of Documents by filtering on a Vector of Ngrams
parseDocs
::
Vector
Ngrams
->
[(
Date
,
Text
)]
->
[
Document
]
parseDocs
roots
c
=
map
(
\
(
d
,
t
)
->
Document
d
(
filter
(
\
x
->
Vector
.
elem
x
roots
)
$
monoTexts
t
))
c
-- | To count the number of documents by year
countDocs
::
[(
Date
,
a
)]
->
Map
Date
Double
countDocs
corpus
=
fromListWith
(
+
)
$
map
(
\
(
d
,
_
)
->
(
d
,
1
))
corpus
src/Gargantext/Viz/Phylo/Aggregates/Fis.hs
deleted
100644 → 0
View file @
8bdafc9f
{-|
Module : Gargantext.Viz.Phylo.Tools
Description : Phylomemy Tools to build/manage it
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
module
Gargantext.Viz.Phylo.Aggregates.Fis
where
import
Control.Lens
hiding
(
makeLenses
,
both
,
Level
)
import
Data.List
(
null
,
concat
,
sort
,(
++
))
import
Data.Map
(
Map
,
elems
,
mapWithKey
,
unionWith
,
fromList
,
keys
)
import
Data.Tuple
(
fst
,
snd
)
import
Data.Set
(
size
)
import
Gargantext.Prelude
import
Gargantext.Text.Metrics.FrequentItemSet
(
fisWithSizePolyMap
,
Size
(
..
))
import
Gargantext.Viz.Phylo
import
Gargantext.Viz.Phylo.Tools
import
qualified
Data.Map
as
Map
import
qualified
Data.Set
as
Set
import
qualified
Data.Vector.Storable
as
Vector
import
Numeric.Statistics
(
percentile
)
import
Debug.Trace
(
trace
)
-- | To apply a filter with the possibility of keeping some periods non empty (keep : True|False)
filterFis
::
Bool
->
Int
->
(
Int
->
[
PhyloFis
]
->
[
PhyloFis
])
->
Map
(
Date
,
Date
)
[
PhyloFis
]
->
Map
(
Date
,
Date
)
[
PhyloFis
]
filterFis
keep
thr
f
m
=
case
keep
of
False
->
Map
.
map
(
\
l
->
f
thr
l
)
m
True
->
Map
.
map
(
\
l
->
keepFilled
(
f
)
thr
l
)
m
-- | To filter Fis with small Support
filterFisBySupport
::
Int
->
[
PhyloFis
]
->
[
PhyloFis
]
filterFisBySupport
thr
l
=
filter
(
\
fis
->
getSupport
fis
>=
thr
)
l
-- | To filter Fis with small Clique size
filterFisByClique
::
Int
->
[
PhyloFis
]
->
[
PhyloFis
]
filterFisByClique
thr
l
=
filter
(
\
fis
->
(
size
$
getClique
fis
)
>=
thr
)
l
-- | To filter nested Fis
filterFisByNested
::
Map
(
Date
,
Date
)
[
PhyloFis
]
->
Map
(
Date
,
Date
)
[
PhyloFis
]
filterFisByNested
=
map
(
\
l
->
let
cliqueMax
=
filterNestedSets
(
head'
"Fis"
$
map
getClique
l
)
(
map
getClique
l
)
[]
in
filter
(
\
fis
->
elem
(
getClique
fis
)
cliqueMax
)
l
)
docsToFis'
::
Map
(
Date
,
Date
)
[
Document
]
->
Phylo
->
Phylo
docsToFis'
m
p
=
if
(
null
$
getPhyloFis
p
)
then
trace
(
"----
\n
Rebuild the Fis from scratch
\n
"
)
$
p
&
phylo_fis
.~
mapWithKey
(
\
k
docs
->
let
fis
=
Map
.
toList
$
fisWithSizePolyMap
(
Segment
1
20
)
1
(
map
text
docs
)
in
map
(
\
f
->
PhyloFis
(
fst
f
)
(
snd
f
)
k
)
fis
)
m
else
trace
(
"----
\n
Use Fis from an existing file
\n
"
)
$
p
&
phylo_fis
%~
(
unionWith
(
++
)
(
fromList
$
map
(
\
k
->
(
k
,
[]
))
$
keys
m
))
toPhyloFis'
::
Map
(
Date
,
Date
)
[
PhyloFis
]
->
Bool
->
Support
->
Int
->
Map
(
Date
,
Date
)
[
PhyloFis
]
toPhyloFis'
fis
k
s
t
=
traceFis
"----
\n
Filtered Fis by clique size :
\n
"
$
filterFis
k
t
(
filterFisByClique
)
$
traceFis
"----
\n
Filtered Fis by nested :
\n
"
$
filterFisByNested
$
traceFis
"----
\n
Filtered Fis by support :
\n
"
$
filterFis
k
s
(
filterFisBySupport
)
$
traceFis
"----
\n
Unfiltered Fis :
\n
"
fis
-----------------
-- | Tracers | --
-----------------
traceFis
::
[
Char
]
->
Map
(
Date
,
Date
)
[
PhyloFis
]
->
Map
(
Date
,
Date
)
[
PhyloFis
]
traceFis
lbl
m
=
trace
(
lbl
<>
"count : "
<>
show
(
sum
$
map
length
$
elems
m
)
<>
" Fis
\n
"
<>
"support : "
<>
show
(
percentile
25
(
Vector
.
fromList
supps
))
<>
" (25%) "
<>
show
(
percentile
50
(
Vector
.
fromList
supps
))
<>
" (50%) "
<>
show
(
percentile
75
(
Vector
.
fromList
supps
))
<>
" (75%) "
<>
show
(
percentile
90
(
Vector
.
fromList
supps
))
<>
" (90%) "
<>
show
(
percentile
100
(
Vector
.
fromList
supps
))
<>
" (100%)
\n
"
<>
" "
<>
show
(
countSup
1
supps
)
<>
" (>1) "
<>
show
(
countSup
2
supps
)
<>
" (>2) "
<>
show
(
countSup
3
supps
)
<>
" (>3) "
<>
show
(
countSup
4
supps
)
<>
" (>4) "
<>
show
(
countSup
5
supps
)
<>
" (>5) "
<>
show
(
countSup
6
supps
)
<>
" (>6)
\n
"
<>
"clique size : "
<>
show
(
percentile
25
(
Vector
.
fromList
ngrms
))
<>
" (25%) "
<>
show
(
percentile
50
(
Vector
.
fromList
ngrms
))
<>
" (50%) "
<>
show
(
percentile
75
(
Vector
.
fromList
ngrms
))
<>
" (75%) "
<>
show
(
percentile
90
(
Vector
.
fromList
ngrms
))
<>
" (90%) "
<>
show
(
percentile
100
(
Vector
.
fromList
ngrms
))
<>
" (100%)
\n
"
<>
" "
<>
show
(
countSup
1
ngrms
)
<>
" (>1) "
<>
show
(
countSup
2
ngrms
)
<>
" (>2) "
<>
show
(
countSup
3
ngrms
)
<>
" (>3) "
<>
show
(
countSup
4
ngrms
)
<>
" (>4) "
<>
show
(
countSup
5
ngrms
)
<>
" (>5) "
<>
show
(
countSup
6
ngrms
)
<>
" (>6)
\n
"
)
m
where
--------------------------------------
countSup
::
Double
->
[
Double
]
->
Int
countSup
s
l
=
length
$
filter
(
>
s
)
l
--------------------------------------
supps
::
[
Double
]
supps
=
sort
$
map
(
fromIntegral
.
_phyloFis_support
)
$
concat
$
elems
m
--------------------------------------
ngrms
::
[
Double
]
ngrms
=
sort
$
map
(
\
f
->
fromIntegral
$
Set
.
size
$
_phyloFis_clique
f
)
$
concat
$
elems
m
--------------------------------------
\ No newline at end of file
src/Gargantext/Viz/Phylo/BranchMaker.hs
View file @
eea8b990
...
...
@@ -19,13 +19,14 @@ module Gargantext.Viz.Phylo.BranchMaker
import
Control.Parallel.Strategies
import
Control.Lens
hiding
(
both
,
Level
)
import
Data.List
(
concat
,
nub
,(
++
),
tail
,
sortOn
,
take
,
reverse
,
sort
,
null
,
intersect
,
union
,
delete
)
import
Data.List
(
concat
,
nub
,(
++
),
sortOn
,
reverse
,
sort
,
null
,
intersect
,
union
,
delete
)
import
Data.Map
(
Map
,(
!
),
fromListWith
,
elems
)
import
Data.Tuple
(
fst
,
snd
)
import
Gargantext.Prelude
import
Gargantext.Viz.Phylo
import
Gargantext.Viz.Phylo.Metrics.Clustering
import
Gargantext.Viz.Phylo.Aggregates.Cooc
import
Gargantext.Viz.Phylo.Cluster
import
Gargantext.Viz.Phylo.Aggregates
import
Gargantext.Viz.Phylo.Metrics
import
Gargantext.Viz.Phylo.Tools
import
Gargantext.Viz.Phylo.LinkMaker
...
...
src/Gargantext/Viz/Phylo/Example.hs
View file @
eea8b990
...
...
@@ -29,19 +29,16 @@ TODO:
module
Gargantext.Viz.Phylo.Example
where
import
Data.GraphViz.Types.Generalised
(
DotGraph
)
import
Data.Text
(
Text
)
import
Data.List
((
++
)
,
last
)
import
Data.Text
(
Text
,
toLower
)
import
Data.List
((
++
))
import
Data.Map
(
Map
,
empty
)
import
Data.Tuple
(
fst
)
import
Data.Tuple.Extra
import
Data.Vector
(
Vector
)
import
Gargantext.Prelude
import
Gargantext.Text.Context
(
TermList
)
import
Gargantext.Viz.Phylo
import
Gargantext.Viz.Phylo.Aggregates.Cluster
import
Gargantext.Viz.Phylo.Aggregates.Document
import
Gargantext.Viz.Phylo.Aggregates.Cooc
import
Gargantext.Viz.Phylo.Aggregates.Fis
import
Gargantext.Viz.Phylo.Cluster
import
Gargantext.Viz.Phylo.Aggregates
import
Gargantext.Viz.Phylo.BranchMaker
import
Gargantext.Viz.Phylo.LevelMaker
import
Gargantext.Viz.Phylo.LinkMaker
...
...
@@ -78,7 +75,7 @@ queryViewEx = "level=3"
phyloQueryView
::
PhyloQueryView
phyloQueryView
=
PhyloQueryView
2
Merge
False
1
[
BranchAge
]
[]
[
BranchPeakFreq
,
GroupLabelCooc
]
(
Just
(
ByBranchAge
,
Asc
))
Json
Flat
True
phyloQueryView
=
PhyloQueryView
2
Merge
False
2
[
BranchAge
]
[]
[
BranchPeakFreq
,
GroupLabelCooc
]
(
Just
(
ByBranchAge
,
Asc
))
Json
Flat
True
--------------------------------------------------
...
...
@@ -87,7 +84,7 @@ phyloQueryView = PhyloQueryView 2 Merge False 1 [BranchAge] [] [BranchPeakFreq,G
phyloFromQuery
::
Phylo
phyloFromQuery
=
toPhylo
(
queryParser
queryEx
)
corpus
actant
s
termList
empty
phyloFromQuery
=
toPhylo
phyloQueryBuild
doc
s
termList
empty
-- | To do : create a request handler and a query parser
queryParser
::
[
Char
]
->
PhyloQueryBuild
...
...
@@ -105,7 +102,7 @@ queryEx = "title=Cesar et Cleôpatre"
phyloQueryBuild
::
PhyloQueryBuild
phyloQueryBuild
=
PhyloQueryBuild
"Cesar et Cleôpatre"
"An example of Phylomemy (french without accent)"
5
3
defaultFis
[]
[]
(
WeightedLogJaccard
$
WLJParams
0.6
20
)
5
0.8
0.5
4
2
(
RelatedComponents
$
RCParams
$
WeightedLogJaccard
$
WLJParams
0.4
0
)
3
1
defaultFis
[]
[]
(
WeightedLogJaccard
$
WLJParams
0.6
20
)
5
0.8
0.5
4
2
(
RelatedComponents
$
RCParams
$
WeightedLogJaccard
$
WLJParams
0.4
0
)
...
...
@@ -155,7 +152,7 @@ phylo2 = addPhyloLevel 2 phyloCluster phyloBranch1
phyloCluster
::
Map
(
Date
,
Date
)
[
PhyloCluster
]
phyloCluster
=
phyloToClusters
3
(
RelatedComponents
$
RCParams
$
WeightedLogJaccard
$
WLJParams
0.05
10
)
phyloBranch1
phyloCluster
=
phyloToClusters
2
(
RelatedComponents
$
RCParams
$
WeightedLogJaccard
$
WLJParams
0.05
10
)
phyloBranch1
----------------------------------
...
...
@@ -186,30 +183,27 @@ phylo1_p = interTempoMatching Ascendant 1 defaultWeightedLogJaccard phylo1_0_1
phylo1_0_1
::
Phylo
phylo1_0_1
=
setLevelLinks
(
0
,
1
)
phylo1
_1_0
phylo1_0_1
=
setLevelLinks
(
0
,
1
)
phylo1
phylo1_1_0
::
Phylo
phylo1_1_0
=
setLevelLinks
(
1
,
0
)
phylo1
--
phylo1_1_0 :: Phylo
--
phylo1_1_0 = setLevelLinks (1,0) phylo1
phylo1
::
Phylo
phylo1
=
addPhyloLevel
(
1
)
phyloFis
phylo
phylo1
=
addPhyloLevel
(
1
)
phyloFis
phylo
'
-------------------------------------------------------------------
-- | STEP 5 | -- Create lists of Frequent Items Set and filter them
-------------------------------------------------------------------
phyloFis
::
Map
(
Date
,
Date
)
[
PhyloFis
]
phyloFis
=
filterFis
True
1
(
filterFisByClique
)
$
filterFisByNested
$
filterFis
True
1
(
filterFisBySupport
)
(
getPhyloFis
phylo'
)
phyloFis
=
refineFis
(
getPhyloFis
phylo'
)
True
1
1
phylo'
::
Phylo
phylo'
=
docsToFis
'
phyloDocs
phylo
phylo'
=
docsToFis
phyloDocs
phylo
----------------------------------------
-- | STEP 2 | -- Init a Phylo of level 0
...
...
@@ -221,7 +215,7 @@ phylo = addPhyloLevel 0 phyloDocs phyloBase
phyloDocs
::
Map
(
Date
,
Date
)
[
Document
]
phyloDocs
=
corpusToDocs
corpus
phyloBase
phyloDocs
=
groupDocsByPeriod
date
(
getPhyloPeriods
phyloBase
)
docs
------------------------------------------------------------------------
...
...
@@ -229,32 +223,25 @@ phyloDocs = corpusToDocs corpus phyloBase
------------------------------------------------------------------------
phyloBase
::
Phylo
phyloBase
=
initPhyloBase
periods
(
PhyloFoundations
foundationsRoots
termList
)
nbDocs
cooc
empty
defaultPhyloParam
cooc
::
Map
Date
(
Map
(
Int
,
Int
)
Double
)
cooc
=
docsToCooc
(
parseDocs
foundationsRoots
corpus
)
foundationsRoots
phyloBase
::
Phylo
phyloBase
=
toPhyloBase
phyloQueryBuild
phyloParam
docs
termList
empty
nbDocs
::
Map
Date
Double
nbDocs
=
countDocs
corpus
phyloParam
::
PhyloParam
phyloParam
=
(
initPhyloParam
(
Just
defaultPhyloVersion
)
(
Just
defaultSoftware
)
(
Just
phyloQueryBuild
))
periods
::
[(
Date
,
Date
)]
periods
=
initPeriods
5
3
$
both
fst
(
head'
"Example"
corpus
,
last
corpus
)
docs
::
[
Document
]
docs
=
parseDocs
foundationsRoots
corpus
foundationsRoots
::
Vector
Ngrams
foundationsRoots
=
initFoundationsRoots
actants
foundationsRoots
=
initFoundationsRoots
(
termListToNgrams
termList
)
--------------------------------------------
-- | STEP 0 | -- Let's start with an example
--------------------------------------------
-- this is a comment
termList
::
TermList
termList
=
[]
termList
=
map
(
\
a
->
([
toLower
a
],
[]
))
actants
actants
::
[
Ngrams
]
actants
=
[
"Cleopatre"
,
"Ptolemee"
,
"Ptolemee-XIII"
,
"Ptolemee-XIV"
...
...
src/Gargantext/Viz/Phylo/LevelMaker.hs
View file @
eea8b990
This diff is collapsed.
Click to expand it.
src/Gargantext/Viz/Phylo/LinkMaker.hs
View file @
eea8b990
...
...
@@ -19,13 +19,13 @@ module Gargantext.Viz.Phylo.LinkMaker
import
Control.Parallel.Strategies
import
Control.Lens
hiding
(
both
,
Level
)
import
Data.List
((
++
),
sortOn
,
null
,
tail
,
splitAt
,
elem
,
concat
,
delete
,
intersect
,
nub
,
groupBy
,
union
,
inits
,
scanl
,
find
)
import
Data.List
((
++
),
sortOn
,
null
,
tail
,
splitAt
,
elem
,
concat
,
delete
,
intersect
,
groupBy
,
union
,
inits
,
scanl
,
find
)
import
Data.Tuple.Extra
import
Data.Map
(
Map
,(
!
),
fromListWith
,
elems
,
restrictKeys
,
unionWith
,
member
)
import
Gargantext.Prelude
import
Gargantext.Viz.Phylo
import
Gargantext.Viz.Phylo.Tools
import
Gargantext.Viz.Phylo.Metrics
.Proximity
import
Gargantext.Viz.Phylo.Metrics
import
qualified
Data.List
as
List
import
qualified
Data.Maybe
as
Maybe
import
qualified
Data.Map
as
Map
...
...
src/Gargantext/Viz/Phylo/Metrics/Clustering.hs
deleted
100644 → 0
View file @
8bdafc9f
{-|
Module : Gargantext.Viz.Phylo.Tools
Description : Phylomemy Tools to build/manage it
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
module
Gargantext.Viz.Phylo.Metrics.Clustering
where
import
Data.Graph.Clustering.Louvain.CplusPlus
import
Data.List
(
concat
,
null
,
nub
,(
++
),
elemIndex
,
groupBy
,(
!!
),
(
\\
),
union
,
intersect
)
import
Data.Map
(
fromList
,
mapKeys
)
import
Gargantext.Prelude
import
Gargantext.Viz.Phylo
-- import Gargantext.Viz.Phylo.Tools
-- import Debug.Trace (trace)
relatedComp
::
Eq
a
=>
[[
a
]]
->
[[
a
]]
relatedComp
graphs
=
foldl'
(
\
mem
groups
->
if
(
null
mem
)
then
mem
++
[
groups
]
else
let
related
=
filter
(
\
groups'
->
(
not
.
null
)
$
intersect
groups
groups'
)
mem
in
if
(
null
related
)
then
mem
++
[
groups
]
else
(
mem
\\
related
)
++
[
union
groups
(
nub
$
concat
related
)]
)
[]
graphs
louvain
::
([
GroupNode
],[
GroupEdge
])
->
IO
[[
PhyloGroup
]]
louvain
(
nodes
,
edges
)
=
map
(
\
community
->
map
(
\
node
->
nodes
!!
(
l_node_id
node
))
community
)
<$>
groupBy
(
\
a
b
->
(
l_community_id
a
)
==
(
l_community_id
b
))
<$>
(
cLouvain
$
mapKeys
(
\
(
x
,
y
)
->
(
idx
x
,
idx
y
))
$
fromList
edges
)
where
--------------------------------------
idx
::
PhyloGroup
->
Int
idx
e
=
case
elemIndex
e
nodes
of
Nothing
->
panic
"[ERR][Gargantext.Viz.Phylo.Metrics.Clustering] a node is missing"
Just
i
->
i
--------------------------------------
src/Gargantext/Viz/Phylo/Metrics/Proximity.hs
deleted
100644 → 0
View file @
8bdafc9f
{-|
Module : Gargantext.Viz.Phylo.Tools
Description : Phylomemy Tools to build/manage it
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
module
Gargantext.Viz.Phylo.Metrics.Proximity
where
import
Data.List
(
null
,
union
,
intersect
)
import
Data.Map
(
Map
,
elems
,
unionWith
,
intersectionWith
,
intersection
,
size
,
filterWithKey
)
import
Gargantext.Prelude
-- import Debug.Trace (trace)
sumInvLog
::
Double
->
[
Double
]
->
Double
sumInvLog
s
l
=
foldl
(
\
mem
x
->
mem
+
(
1
/
log
(
s
+
x
)))
0
l
sumLog
::
Double
->
[
Double
]
->
Double
sumLog
s
l
=
foldl
(
\
mem
x
->
mem
+
log
(
s
+
x
))
0
l
-- -- | To process WeighedLogJaccard distance between to coocurency matrix
-- weightedLogJaccard :: Double -> Map (Int, Int) Double -> Map (Int, Int) Double -> Double -> Double
-- weightedLogJaccard sens cooc cooc' nbDocs
-- | null union' = 0
-- | union' == inter' = 1
-- | sens == 0 = (fromIntegral $ length $ keysInter) / (fromIntegral $ length $ keysUnion)
-- | sens > 0 = (sumInvLog sens $ elems wInter) / (sumInvLog sens $ elems wUnion)
-- | otherwise = (sumLog sens $ elems wInter) / (sumLog sens $ elems wUnion)
-- where
-- --------------------------------------
-- keysInter :: [Int]
-- keysInter = nub $ concat $ map (\(x,x') -> [x,x']) $ keys inter'
-- --------------------------------------
-- keysUnion :: [Int]
-- keysUnion = nub $ concat $ map (\(x,x') -> [x,x']) $ keys union'
-- --------------------------------------
-- wInter :: Map (Int,Int) Double
-- wInter = map (/nbDocs) inter'
-- --------------------------------------
-- wUnion :: Map (Int,Int) Double
-- wUnion = map (/nbDocs) union'
-- --------------------------------------
-- inter' :: Map (Int, Int) Double
-- inter' = intersectionWith (+) cooc cooc'
-- --------------------------------------
-- union' :: Map (Int, Int) Double
-- union' = unionWith (+) cooc cooc'
-- --------------------------------------
-- | To compute a jaccard similarity between two lists
jaccard
::
[
Int
]
->
[
Int
]
->
Double
jaccard
inter'
union'
=
((
fromIntegral
.
length
)
$
inter'
)
/
((
fromIntegral
.
length
)
$
union'
)
-- | To get the diagonal of a matrix
toDiago
::
Map
(
Int
,
Int
)
Double
->
[
Double
]
toDiago
cooc
=
elems
$
filterWithKey
(
\
(
x
,
x'
)
_
->
x
==
x'
)
cooc
-- | To process WeighedLogJaccard distance between to coocurency matrix
weightedLogJaccard
::
Double
->
Double
->
Map
(
Int
,
Int
)
Double
->
Map
(
Int
,
Int
)
Double
->
[
Int
]
->
[
Int
]
->
Double
weightedLogJaccard
sens
nbDocs
cooc
cooc'
ngrams
ngrams'
|
null
gInter
=
0
|
gInter
==
gUnion
=
1
|
sens
==
0
=
jaccard
gInter
gUnion
|
sens
>
0
=
(
sumInvLog
sens
wInter
)
/
(
sumInvLog
sens
wUnion
)
|
otherwise
=
(
sumLog
sens
wInter
)
/
(
sumLog
sens
wUnion
)
where
--------------------------------------
gInter
::
[
Int
]
gInter
=
intersect
ngrams
ngrams'
--------------------------------------
gUnion
::
[
Int
]
gUnion
=
union
ngrams
ngrams'
--------------------------------------
wInter
::
[
Double
]
wInter
=
toDiago
$
map
(
/
nbDocs
)
$
intersectionWith
(
+
)
cooc
cooc'
--------------------------------------
wUnion
::
[
Double
]
wUnion
=
toDiago
$
map
(
/
nbDocs
)
$
unionWith
(
+
)
cooc
cooc'
--------------------------------------
-- | To process the Hamming distance between two PhyloGroup fields
hamming
::
Map
(
Int
,
Int
)
Double
->
Map
(
Int
,
Int
)
Double
->
Double
hamming
f1
f2
=
fromIntegral
$
max
((
size
inter
)
-
(
size
f1
))
((
size
inter
)
-
(
size
f2
))
where
--------------------------------------
inter
::
Map
(
Int
,
Int
)
Double
inter
=
intersection
f1
f2
--------------------------------------
src/Gargantext/Viz/Phylo/Tools.hs
View file @
eea8b990
...
...
@@ -139,7 +139,7 @@ listToUnDirectedCombiWith f l = [ (f x, f y) | (x:rest) <- tails l, y <- rest ]
-- | To transform a list of Ngrams Indexes into a Label
ngramsToLabel
::
Vector
Ngrams
->
[
Int
]
->
Text
ngramsToLabel
ngrams
l
=
unwords
$
ngramsToText
ngrams
l
ngramsToLabel
ngrams
l
=
unwords
$
tail'
"ngramsToLabel"
$
concat
$
map
(
\
n
->
[
"|"
,
n
])
$
ngramsToText
ngrams
l
-- | To transform a list of Ngrams Indexes into a list of Text
...
...
@@ -147,6 +147,11 @@ ngramsToText :: Vector Ngrams -> [Int] -> [Text]
ngramsToText
ngrams
l
=
map
(
\
idx
->
ngrams
Vector
.!
idx
)
l
-- | To transform a list of ngrams into a list of indexes
ngramsToIdx
::
[
Ngrams
]
->
Vector
Ngrams
->
[
Int
]
ngramsToIdx
ns
v
=
sort
$
map
(
\
n
->
getIdxInVector
n
v
)
ns
-- | To unify the keys (x,y) that Map 1 share with Map 2 such as: (x,y) <=> (y,x)
unifySharedKeys
::
Eq
a
=>
Ord
a
=>
Map
(
a
,
a
)
b
->
Map
(
a
,
a
)
b
->
Map
(
a
,
a
)
b
unifySharedKeys
m1
m2
=
mapKeys
(
\
(
x
,
y
)
->
if
member
(
y
,
x
)
m2
...
...
@@ -439,6 +444,7 @@ initGroup ngrams lbl idx lvl from' to' p = PhyloGroup
lbl
idxs
(
Map
.
empty
)
(
Map
.
empty
)
Nothing
(
getMiniCooc
(
listToFullCombi
idxs
)
(
periodsToYears
[(
from'
,
to'
)])
(
getPhyloCooc
p
))
[]
[]
[]
[]
...
...
src/Gargantext/Viz/Phylo/View/Taggers.hs
View file @
eea8b990
...
...
@@ -18,17 +18,17 @@ module Gargantext.Viz.Phylo.View.Taggers
where
import
Control.Lens
hiding
(
makeLenses
,
both
,
Level
)
import
Data.List
(
concat
,
nub
,
groupBy
,
sortOn
,
sort
)
import
Data.List
(
concat
,
nub
,
groupBy
,
sortOn
,
sort
,
(
!!
),
take
)
import
Data.Text
(
Text
)
import
Data.Tuple
(
fst
,
snd
)
import
Data.Vector
(
Vector
)
import
Data.Map
(
Map
)
import
Data.Map
(
Map
,
(
!
)
)
import
Gargantext.Prelude
import
Gargantext.Viz.Phylo
import
Gargantext.Viz.Phylo.Tools
import
Gargantext.Viz.Phylo.BranchMaker
import
qualified
Data.Map
as
Map
--
import Debug.Trace (trace)
import
Debug.Trace
(
trace
)
-- | To get the nth most frequent Ngrams in a list of PhyloGroups
...
...
@@ -82,14 +82,20 @@ branchPeakCooc v nth p = foldl (\v' (id,lbl) -> alterBranchPeak (id,lbl) v') v
$
getNodesByBranches
v
getNthMostMeta
::
Int
->
Text
->
PhyloGroup
->
[
Int
]
getNthMostMeta
nth
meta
g
=
map
(
\
(
idx
,
_
)
->
(
getGroupNgrams
g
!!
idx
))
$
take
nth
$
sortOn
snd
$
zip
[
0
..
]
$
(
g
^.
phylo_groupNgramsMeta
)
!
meta
-- | To set the label of a PhyloNode as the nth most coocurent terms of its PhyloNodes
nodeLabelCooc
::
PhyloView
->
Int
->
Phylo
->
PhyloView
nodeLabelCooc
v
thr
p
=
over
(
pv_nodes
.
traverse
)
(
\
n
->
let
lbl
=
ngramsToLabel
(
getFoundationsRoots
p
)
$
mostOccNgrams
thr
$
head'
"nodeLabelCooc"
$
getGroupsFromIds
[
getNodeId
n
]
p
in
n
&
pn_label
.~
lbl
)
v
(
\
n
->
let
g
=
head'
"nodeLabelCooc"
$
getGroupsFromIds
[
getNodeId
n
]
p
lbl'
=
ngramsToLabel
(
getFoundationsRoots
p
)
$
getNthMostMeta
thr
"coverage"
g
in
trace
(
show
(
lbl'
))
$
n
&
pn_label
.~
lbl'
)
v
-- | To process a sorted list of Taggers to a PhyloView
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment