Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
148
Issues
148
List
Board
Labels
Milestones
Merge Requests
12
Merge Requests
12
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
b0826576
Commit
b0826576
authored
Sep 25, 2019
by
qlobbe
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add new synchronic clustering
parent
56636731
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
158 additions
and
58 deletions
+158
-58
Main.hs
bin/gargantext-adaptative-phylo/Main.hs
+9
-0
package.yaml
package.yaml
+1
-0
AdaptativePhylo.hs
src/Gargantext/Viz/AdaptativePhylo.hs
+3
-2
PhyloExport.hs
src/Gargantext/Viz/Phylo/PhyloExport.hs
+5
-1
PhyloTools.hs
src/Gargantext/Viz/Phylo/PhyloTools.hs
+4
-9
SynchronicClustering.hs
src/Gargantext/Viz/Phylo/SynchronicClustering.hs
+76
-23
TemporalMatching.hs
src/Gargantext/Viz/Phylo/TemporalMatching.hs
+60
-23
No files found.
bin/gargantext-adaptative-phylo/Main.hs
View file @
b0826576
...
...
@@ -39,6 +39,7 @@ import Gargantext.Viz.AdaptativePhylo
import
Gargantext.Viz.Phylo.PhyloMaker
(
toPhylo
)
import
Gargantext.Viz.Phylo.PhyloTools
(
printIOMsg
,
printIOComment
)
import
Gargantext.Viz.Phylo.PhyloExport
(
toPhyloExport
,
dotToFile
)
-- import Gargantext.Viz.Phylo.SynchronicClustering (synchronicDistance')
import
GHC.IO
(
FilePath
)
import
Prelude
(
Either
(
..
))
...
...
@@ -156,6 +157,14 @@ main = do
let
phylo
=
toPhylo
corpus
mapList
config
-- | probes
-- writeFile ((outputPath config) <> (unpack $ phyloName config) <> "_synchronic_distance_cumu_jaccard.txt")
-- $ synchronicDistance' phylo 1
-- writeFile ((outputPath config) <> (unpack $ phyloName config) <> "_inflexion_points.txt")
-- $ inflexionPoints phylo 1
printIOMsg
"End of reconstruction, start the export"
let
dot
=
toPhyloExport
phylo
...
...
package.yaml
View file @
b0826576
...
...
@@ -73,6 +73,7 @@ library:
-
Gargantext.Viz.Phylo.Tools
-
Gargantext.Viz.Phylo.PhyloTools
-
Gargantext.Viz.Phylo.PhyloExport
-
Gargantext.Viz.Phylo.SynchronicClustering
-
Gargantext.Viz.Phylo.Example
-
Gargantext.Viz.Phylo.LevelMaker
-
Gargantext.Viz.Phylo.View.Export
...
...
src/Gargantext/Viz/AdaptativePhylo.hs
View file @
b0826576
...
...
@@ -71,7 +71,8 @@ data Synchrony =
ByProximityThreshold
{
_bpt_threshold
::
Double
,
_bpt_sensibility
::
Double
}
|
ByProximityDistribution
|
ByProximityDistribution
{
_bpd_sensibility
::
Double
}
deriving
(
Show
,
Generic
,
Eq
)
...
...
@@ -116,7 +117,7 @@ defaultConfig =
,
phyloName
=
pack
"Default Phylo"
,
phyloLevel
=
1
,
phyloProximity
=
WeightedLogJaccard
10
0
0.1
,
phyloSynchrony
=
ByProximity
Threshold
0.4
0
,
phyloSynchrony
=
ByProximity
Distribution
0
,
timeUnit
=
Year
3
1
5
,
contextualUnit
=
Fis
2
4
,
exportLabel
=
[
BranchLabel
MostInclusive
2
,
GroupLabel
MostEmergentInclusive
2
]
...
...
src/Gargantext/Viz/Phylo/PhyloExport.hs
View file @
b0826576
...
...
@@ -162,7 +162,11 @@ exportToDot phylo export =
graphAttrs
(
[
Label
(
toDotLabel
$
(
phyloName
$
getConfig
phylo
))]
<>
[
FontSize
30
,
LabelLoc
VTop
,
NodeSep
1
,
RankSep
[
1
],
Rank
SameRank
,
Splines
SplineEdges
,
Overlap
ScaleOverlaps
,
Ratio
FillRatio
,
Style
[
SItem
Filled
[]
],
Color
[
toWColor
White
]])
,
Style
[
SItem
Filled
[]
],
Color
[
toWColor
White
]
,
(
toAttr
(
fromStrict
"nbDocs"
)
$
pack
$
show
(
sum
$
elems
$
phylo
^.
phylo_timeDocs
))])
-- toAttr (fromStrict k) $ (pack . unwords) $ map show v
-- | 2) create a layer for the branches labels
subgraph
(
Str
"Branches peaks"
)
$
do
...
...
src/Gargantext/Viz/Phylo/PhyloTools.hs
View file @
b0826576
...
...
@@ -17,7 +17,7 @@ Portability : POSIX
module
Gargantext.Viz.Phylo.PhyloTools
where
import
Data.Vector
(
Vector
,
elemIndex
)
import
Data.List
(
sort
,
concat
,
null
,
union
,
(
++
),
tails
,
sortOn
,
nub
,
init
,
tail
,
partition
)
import
Data.List
(
sort
,
concat
,
null
,
union
,
(
++
),
tails
,
sortOn
,
nub
,
init
,
tail
,
partition
,
tails
,
nubBy
)
import
Data.Set
(
Set
,
size
,
disjoint
)
import
Data.Map
(
Map
,
elems
,
fromList
,
unionWith
,
keys
,
member
,
(
!
),
filterWithKey
,
fromListWith
,
empty
)
import
Data.String
(
String
)
...
...
@@ -215,6 +215,9 @@ listToKeys lst = (listToCombi' lst) ++ (listToEqual' lst)
listToMatrix
::
[
Int
]
->
Map
(
Int
,
Int
)
Double
listToMatrix
lst
=
fromList
$
map
(
\
k
->
(
k
,
1
))
$
listToKeys
$
sort
lst
listToSeq
::
Eq
a
=>
[
a
]
->
[(
a
,
a
)]
listToSeq
l
=
nubBy
(
\
x
y
->
fst
x
==
fst
y
)
$
[
(
x
,
y
)
|
(
x
:
rest
)
<-
tails
l
,
y
<-
rest
]
sumCooc
::
Cooc
->
Cooc
->
Cooc
sumCooc
cooc
cooc'
=
unionWith
(
+
)
cooc
cooc'
...
...
@@ -252,14 +255,6 @@ filterProximity proximity thr local =
WeightedLogJaccard
_
_
_
->
local
>=
thr
Hamming
->
undefined
filterPointers
::
Filiation
->
PointerType
->
Proximity
->
Double
->
PhyloGroup
->
PhyloGroup
filterPointers
fil
pty
proximity
thr
group
=
case
pty
of
TemporalPointer
->
case
fil
of
ToChilds
->
group
&
phylo_groupPeriodChilds
%~
(
filter
(
\
(
_
,
w
)
->
filterProximity
proximity
thr
w
))
ToParents
->
group
&
phylo_groupPeriodParents
%~
(
filter
(
\
(
_
,
w
)
->
filterProximity
proximity
thr
w
))
LevelPointer
->
undefined
---------------
-- | Phylo | --
...
...
src/Gargantext/Viz/Phylo/SynchronicClustering.hs
View file @
b0826576
...
...
@@ -20,12 +20,14 @@ import Gargantext.Viz.AdaptativePhylo
import
Gargantext.Viz.Phylo.PhyloTools
import
Gargantext.Viz.Phylo.TemporalMatching
(
weightedLogJaccard
)
import
Data.List
((
++
),
null
,
intersect
,
nub
,
concat
,
sort
)
import
Data.List
((
++
),
null
,
intersect
,
nub
,
concat
,
sort
,
sortOn
)
import
Data.Map
(
Map
,
fromList
,
fromListWith
,
foldlWithKey
,
(
!
),
insert
,
empty
,
restrictKeys
,
elems
,
mapWithKey
,
member
)
import
Control.Lens
hiding
(
Level
)
import
Control.Parallel.Strategies
(
parList
,
rdeepseq
,
using
)
import
qualified
Data.Map
as
Map
-------------------------
-- | New Level Maker | --
...
...
@@ -92,13 +94,42 @@ toPairs :: [PhyloGroup] -> [(PhyloGroup,PhyloGroup)]
toPairs
groups
=
filter
(
\
(
g
,
g'
)
->
(
not
.
null
)
$
intersect
(
g
^.
phylo_groupNgrams
)
(
g'
^.
phylo_groupNgrams
))
$
listToCombi'
groups
groupsToEdges
::
Proximity
->
Double
->
Double
->
Double
->
[
PhyloGroup
]
->
[((
PhyloGroup
,
PhyloGroup
),
Double
)]
groupsToEdges
prox
thr
sens
docs
groups
=
case
prox
of
WeightedLogJaccard
_
_
_
->
filter
(
\
(
_
,
w
)
->
w
>=
thr
)
$
map
(
\
(
g
,
g'
)
->
((
g
,
g'
),
weightedLogJaccard
sens
docs
(
g
^.
phylo_groupCooc
)
(
g'
^.
phylo_groupCooc
)
(
g
^.
phylo_groupNgrams
)
(
g'
^.
phylo_groupNgrams
)))
$
toPairs
groups
_
->
undefined
toDiamonds
::
[
PhyloGroup
]
->
[[
PhyloGroup
]]
toDiamonds
groups
=
foldl'
(
\
acc
groups'
->
acc
++
(
elems
$
Map
.
filter
(
\
v
->
length
v
>
1
)
$
fromListWith
(
++
)
$
foldl'
(
\
acc'
g
->
acc'
++
(
map
(
\
(
id
,
_
)
->
(
id
,[
g
])
)
$
g
^.
phylo_groupPeriodChilds
))
[]
groups'
))
[]
$
elems
$
Map
.
filter
(
\
v
->
length
v
>
1
)
$
fromListWith
(
++
)
$
foldl'
(
\
acc
g
->
acc
++
(
map
(
\
(
id
,
_
)
->
(
id
,[
g
])
)
$
g
^.
phylo_groupPeriodParents
)
)
[]
groups
groupsToEdges
::
Proximity
->
Synchrony
->
Double
->
[
PhyloGroup
]
->
[((
PhyloGroup
,
PhyloGroup
),
Double
)]
groupsToEdges
prox
sync
docs
groups
=
case
sync
of
ByProximityThreshold
t
s
->
filter
(
\
(
_
,
w
)
->
w
>=
t
)
$
toEdges
s
$
toPairs
groups
ByProximityDistribution
s
->
let
diamonds
=
sortOn
snd
$
toEdges
s
$
concat
$
map
toPairs
$
toDiamonds
groups
in
take
(
div
(
length
diamonds
)
2
)
diamonds
where
toEdges
::
Double
->
[(
PhyloGroup
,
PhyloGroup
)]
->
[((
PhyloGroup
,
PhyloGroup
),
Double
)]
toEdges
sens
edges
=
case
prox
of
WeightedLogJaccard
_
_
_
->
map
(
\
(
g
,
g'
)
->
((
g
,
g'
),
weightedLogJaccard
sens
docs
(
g
^.
phylo_groupCooc
)
(
g'
^.
phylo_groupCooc
)
(
g
^.
phylo_groupNgrams
)
(
g'
^.
phylo_groupNgrams
)))
edges
_
->
undefined
toRelatedComponents
::
[
PhyloGroup
]
->
[((
PhyloGroup
,
PhyloGroup
),
Double
)]
->
[[
PhyloGroup
]]
...
...
@@ -111,32 +142,54 @@ toParentId :: PhyloGroup -> PhyloGroupId
toParentId
child
=
((
child
^.
phylo_groupPeriod
,
child
^.
phylo_groupLevel
+
1
),
child
^.
phylo_groupIndex
)
reduceBranch
::
Proximity
->
Double
->
Double
->
Map
Date
Double
->
[
PhyloGroup
]
->
[
PhyloGroup
]
reduceBranch
prox
thr
sens
docs
branch
=
reduceBranch
::
Proximity
->
Synchrony
->
Map
Date
Double
->
[
PhyloGroup
]
->
[
PhyloGroup
]
reduceBranch
prox
sync
docs
branch
=
-- | 1) reduce a branch as a set of periods & groups
let
periods
=
fromListWith
(
++
)
$
map
(
\
g
->
(
g
^.
phylo_groupPeriod
,[
g
]))
branch
in
(
concat
.
concat
.
elems
)
$
mapWithKey
(
\
prd
groups
->
-- | 2) for each period, transform the groups as a proximity graph filtered by a threshold
let
edges
=
groupsToEdges
prox
thr
sens
((
sum
.
elems
)
$
restrictKeys
docs
$
periodsToYears
[
prd
])
groups
let
edges
=
groupsToEdges
prox
sync
((
sum
.
elems
)
$
restrictKeys
docs
$
periodsToYears
[
prd
])
groups
in
map
(
\
comp
->
-- | 4) add to each groups their futur level parent group
let
parentId
=
toParentId
(
head'
"parentId"
comp
)
in
map
(
\
g
->
g
&
phylo_groupLevelParents
%~
(
++
[(
parentId
,
1
)])
)
comp
)
-- |3) reduce the graph a a set of related components
$
toRelatedComponents
groups
edges
)
periods
$
toRelatedComponents
groups
edges
)
periods
synchronicClustering
::
Phylo
->
Phylo
synchronicClustering
phylo
=
case
(
phyloSynchrony
$
getConfig
phylo
)
of
ByProximityThreshold
t
s
->
let
prox
=
phyloProximity
$
getConfig
phylo
docs
=
phylo
^.
phylo_timeDocs
branches
=
map
(
\
branch
->
reduceBranch
prox
t
s
docs
branch
)
$
phyloToLastBranches
$
traceSynchronyStart
phylo
branches'
=
branches
`
using
`
parList
rdeepseq
in
toNextLevel
phylo
$
concat
branches'
ByProximityDistribution
->
undefined
\ No newline at end of file
synchronicClustering
phylo
=
let
prox
=
phyloProximity
$
getConfig
phylo
sync
=
phyloSynchrony
$
getConfig
phylo
docs
=
phylo
^.
phylo_timeDocs
branches
=
map
(
\
branch
->
reduceBranch
prox
sync
docs
branch
)
$
phyloToLastBranches
$
traceSynchronyStart
phylo
branches'
=
branches
`
using
`
parList
rdeepseq
in
toNextLevel
phylo
$
concat
branches'
----------------
-- | probes | --
----------------
-- synchronicDistance :: Phylo -> Level -> String
-- synchronicDistance phylo lvl =
-- foldl' (\acc branch ->
-- acc <> (foldl' (\acc' period ->
-- acc' <> let prox = phyloProximity $ getConfig phylo
-- sync = phyloSynchrony $ getConfig phylo
-- docs = _phylo_timeDocs phylo
-- prd = _phylo_groupPeriod $ head' "distance" period
-- edges = groupsToEdges prox 0.1 (_bpt_sensibility sync)
-- ((sum . elems) $ restrictKeys docs $ periodsToYears [_phylo_groupPeriod $ head' "distance" period]) period
-- in foldl' (\mem (_,w) ->
-- mem <> show (prd)
-- <> "\t"
-- <> show (w)
-- <> "\n"
-- ) "" edges
-- ) "" $ elems $ groupByField _phylo_groupPeriod branch)
-- ) "period\tdistance\n" $ elems $ groupByField _phylo_groupBranchId $ getGroupsFromLevel lvl phylo
\ No newline at end of file
src/Gargantext/Viz/Phylo/TemporalMatching.hs
View file @
b0826576
...
...
@@ -15,7 +15,7 @@ Portability : POSIX
module
Gargantext.Viz.Phylo.TemporalMatching
where
import
Data.List
(
concat
,
splitAt
,
tail
,
sortOn
,
(
++
),
intersect
,
null
,
inits
,
groupBy
,
scanl
,
nub
,
union
,
elemIndex
,
(
!!
),
dropWhile
)
import
Data.List
(
concat
,
splitAt
,
tail
,
sortOn
,
(
++
),
intersect
,
null
,
inits
,
groupBy
,
scanl
,
nub
,
union
,
elemIndex
,
(
!!
),
dropWhile
,
partition
)
import
Data.Map
(
Map
,
fromList
,
elems
,
restrictKeys
,
unionWith
,
intersectionWith
,
findWithDefault
,
filterWithKey
)
import
Gargantext.Prelude
...
...
@@ -25,6 +25,7 @@ import Gargantext.Viz.Phylo.PhyloTools
import
Prelude
(
logBase
)
import
Control.Lens
hiding
(
Level
)
import
Control.Parallel.Strategies
(
parList
,
rdeepseq
,
using
)
-- import Debug.Trace (trace)
import
qualified
Data.Set
as
Set
...
...
@@ -99,42 +100,78 @@ toProximity docs proximity ego target target' =
-- | Find pairs of valuable candidates to be matched
makePairs
::
[
PhyloGroup
]
->
[
PhyloPeriodId
]
->
[(
PhyloGroup
,
PhyloGroup
)]
makePairs
candidates
periods
=
case
null
periods
of
makePairs
::
[
PhyloGroup
]
->
[
PhyloPeriodId
]
->
[
PhyloPeriodId
]
->
[
(
PhyloGroup
,
PhyloGroup
)]
makePairs
candidates
periods
periods'
=
case
null
periods
of
True
->
[]
-- | at least on of the pair candidates should be from the last added period
False
->
filter
(
\
(
cdt
,
cdt'
)
->
(
inLastPeriod
cdt
periods
)
||
(
inLastPeriod
cdt'
periods
))
False
->
filter
(
\
(
cdt
,
cdt'
)
->
((
inLastPeriod
cdt
periods
)
||
(
inLastPeriod
cdt'
periods
))
&&
(
not
$
inOldPeriods
cdt
periods'
)
&&
(
not
$
inOldPeriods
cdt'
periods'
))
$
listToKeys
candidates
where
inLastPeriod
::
PhyloGroup
->
[
PhyloPeriodId
]
->
Bool
inLastPeriod
g
prds
=
(
g
^.
phylo_groupPeriod
)
==
(
last'
"makePairs"
prds
)
--------------------------------------
inOldPeriods
::
PhyloGroup
->
[
PhyloPeriodId
]
->
Bool
inOldPeriods
g
prds
=
elem
(
g
^.
phylo_groupPeriod
)
prds
keepOldOnes
::
Filiation
->
Proximity
->
Double
->
PhyloGroup
->
Bool
keepOldOnes
fil
proxi
thr
ego
=
any
(
\
(
_
,
w
)
->
filterProximity
proxi
thr
w
)
$
getPeriodPointers
fil
ego
filterPointers
::
Proximity
->
Double
->
[
Pointer
]
->
[
Pointer
]
filterPointers
proxi
thr
pts
=
filter
(
\
(
_
,
w
)
->
filterProximity
proxi
thr
w
)
pts
findLastPeriod
::
Filiation
->
[
Pointer
]
->
PhyloPeriodId
findLastPeriod
fil
pts
=
case
fil
of
ToParents
->
head'
"findLastPeriod"
$
sortOn
fst
$
map
(
fst
.
fst
.
fst
)
pts
ToChilds
->
head'
"findLastPeriod"
$
reverse
$
sortOn
fst
$
map
(
fst
.
fst
.
fst
)
pts
phyloGroupMatching
::
[[
PhyloGroup
]]
->
Filiation
->
Proximity
->
Map
Date
Double
->
Double
->
PhyloGroup
->
PhyloGroup
phyloGroupMatching
::
[[
PhyloGroup
]]
->
Filiation
->
Proximity
->
Map
Date
Double
->
Double
->
PhyloGroup
->
PhyloGroup
phyloGroupMatching
candidates
fil
proxi
docs
thr
ego
=
case
null
(
getPeriodPointers
fil
ego
)
of
False
->
filterPointers
fil
TemporalPointer
proxi
thr
ego
True
->
case
null
pointers
of
True
->
addPointers
ego
fil
TemporalPointer
[]
False
->
addPointers
ego
fil
TemporalPointer
$
head'
"phyloGroupMatching"
-- | Keep only the best set of pointers grouped by proximity
$
groupBy
(
\
pt
pt'
->
snd
pt
==
snd
pt'
)
$
reverse
$
sortOn
snd
$
head'
"pointers"
pointers
-- | Find the first time frame where at leats one pointer satisfies the proximity threshold
where
if
keepOldOnes
fil
proxi
thr
ego
-- | keep some of the old pointers
then
addPointers
ego
fil
TemporalPointer
$
filterPointers
proxi
thr
$
getPeriodPointers
fil
ego
else
case
null
pointers
of
-- | let's find new pointers
True
->
addPointers
ego
fil
TemporalPointer
[]
False
->
addPointers
ego
fil
TemporalPointer
$
head'
"phyloGroupMatching"
-- | Keep only the best set of pointers grouped by proximity
$
groupBy
(
\
pt
pt'
->
snd
pt
==
snd
pt'
)
$
reverse
$
sortOn
snd
$
head'
"pointers"
pointers
-- | Find the first time frame where at leats one pointer satisfies the proximity threshold
where
--------------------------------------
oldPeriods
::
[
PhyloPeriodId
]
->
[
PhyloPeriodId
]
oldPeriods
periods
=
if
(
null
$
getPeriodPointers
fil
ego
)
then
[]
else
let
period
=
findLastPeriod
fil
$
getPeriodPointers
fil
ego
in
fst
$
partition
(
\
prd
->
case
fil
of
ToChilds
->
prd
<=
period
ToParents
->
prd
>=
period
)
periods
--------------------------------------
pointers
::
[[
Pointer
]]
pointers
=
take
1
$
dropWhile
(
null
)
-- | for each time frame, process the proximity on relevant pairs of targeted groups
$
scanl
(
\
acc
groups
->
let
periods
=
nub
$
concat
$
map
(
\
gs
->
if
null
gs
then
[]
else
[
_phylo_groupPeriod
$
head'
"pointers"
gs
])
groups
pairs
=
makePairs
(
concat
groups
)
periods
in
acc
++
(
filter
(
\
(
_
,
proximity
)
->
filterProximity
proxi
thr
proximity
)
let
periods
=
nub
$
concat
$
map
(
\
gs
->
if
null
gs
then
[]
else
[
_phylo_groupPeriod
$
head'
"pointers"
gs
])
groups
periods'
=
oldPeriods
periods
pairs
=
makePairs
(
concat
groups
)
periods
periods'
in
acc
++
(
filterPointers
proxi
thr
$
concat
$
map
(
\
(
c
,
c'
)
->
-- | process the proximity between the current group and a pair of candidates
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment