Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
H
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Przemyslaw Kaminski
haskell-gargantext
Commits
b0826576
Commit
b0826576
authored
Sep 25, 2019
by
qlobbe
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add new synchronic clustering
parent
56636731
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
158 additions
and
58 deletions
+158
-58
Main.hs
bin/gargantext-adaptative-phylo/Main.hs
+9
-0
package.yaml
package.yaml
+1
-0
AdaptativePhylo.hs
src/Gargantext/Viz/AdaptativePhylo.hs
+3
-2
PhyloExport.hs
src/Gargantext/Viz/Phylo/PhyloExport.hs
+5
-1
PhyloTools.hs
src/Gargantext/Viz/Phylo/PhyloTools.hs
+4
-9
SynchronicClustering.hs
src/Gargantext/Viz/Phylo/SynchronicClustering.hs
+76
-23
TemporalMatching.hs
src/Gargantext/Viz/Phylo/TemporalMatching.hs
+60
-23
No files found.
bin/gargantext-adaptative-phylo/Main.hs
View file @
b0826576
...
...
@@ -39,6 +39,7 @@ import Gargantext.Viz.AdaptativePhylo
import
Gargantext.Viz.Phylo.PhyloMaker
(
toPhylo
)
import
Gargantext.Viz.Phylo.PhyloTools
(
printIOMsg
,
printIOComment
)
import
Gargantext.Viz.Phylo.PhyloExport
(
toPhyloExport
,
dotToFile
)
-- import Gargantext.Viz.Phylo.SynchronicClustering (synchronicDistance')
import
GHC.IO
(
FilePath
)
import
Prelude
(
Either
(
..
))
...
...
@@ -156,6 +157,14 @@ main = do
let
phylo
=
toPhylo
corpus
mapList
config
-- | probes
-- writeFile ((outputPath config) <> (unpack $ phyloName config) <> "_synchronic_distance_cumu_jaccard.txt")
-- $ synchronicDistance' phylo 1
-- writeFile ((outputPath config) <> (unpack $ phyloName config) <> "_inflexion_points.txt")
-- $ inflexionPoints phylo 1
printIOMsg
"End of reconstruction, start the export"
let
dot
=
toPhyloExport
phylo
...
...
package.yaml
View file @
b0826576
...
...
@@ -73,6 +73,7 @@ library:
-
Gargantext.Viz.Phylo.Tools
-
Gargantext.Viz.Phylo.PhyloTools
-
Gargantext.Viz.Phylo.PhyloExport
-
Gargantext.Viz.Phylo.SynchronicClustering
-
Gargantext.Viz.Phylo.Example
-
Gargantext.Viz.Phylo.LevelMaker
-
Gargantext.Viz.Phylo.View.Export
...
...
src/Gargantext/Viz/AdaptativePhylo.hs
View file @
b0826576
...
...
@@ -71,7 +71,8 @@ data Synchrony =
ByProximityThreshold
{
_bpt_threshold
::
Double
,
_bpt_sensibility
::
Double
}
|
ByProximityDistribution
|
ByProximityDistribution
{
_bpd_sensibility
::
Double
}
deriving
(
Show
,
Generic
,
Eq
)
...
...
@@ -116,7 +117,7 @@ defaultConfig =
,
phyloName
=
pack
"Default Phylo"
,
phyloLevel
=
1
,
phyloProximity
=
WeightedLogJaccard
10
0
0.1
,
phyloSynchrony
=
ByProximity
Threshold
0.4
0
,
phyloSynchrony
=
ByProximity
Distribution
0
,
timeUnit
=
Year
3
1
5
,
contextualUnit
=
Fis
2
4
,
exportLabel
=
[
BranchLabel
MostInclusive
2
,
GroupLabel
MostEmergentInclusive
2
]
...
...
src/Gargantext/Viz/Phylo/PhyloExport.hs
View file @
b0826576
...
...
@@ -162,7 +162,11 @@ exportToDot phylo export =
graphAttrs
(
[
Label
(
toDotLabel
$
(
phyloName
$
getConfig
phylo
))]
<>
[
FontSize
30
,
LabelLoc
VTop
,
NodeSep
1
,
RankSep
[
1
],
Rank
SameRank
,
Splines
SplineEdges
,
Overlap
ScaleOverlaps
,
Ratio
FillRatio
,
Style
[
SItem
Filled
[]
],
Color
[
toWColor
White
]])
,
Style
[
SItem
Filled
[]
],
Color
[
toWColor
White
]
,
(
toAttr
(
fromStrict
"nbDocs"
)
$
pack
$
show
(
sum
$
elems
$
phylo
^.
phylo_timeDocs
))])
-- toAttr (fromStrict k) $ (pack . unwords) $ map show v
-- | 2) create a layer for the branches labels
subgraph
(
Str
"Branches peaks"
)
$
do
...
...
src/Gargantext/Viz/Phylo/PhyloTools.hs
View file @
b0826576
...
...
@@ -17,7 +17,7 @@ Portability : POSIX
module
Gargantext.Viz.Phylo.PhyloTools
where
import
Data.Vector
(
Vector
,
elemIndex
)
import
Data.List
(
sort
,
concat
,
null
,
union
,
(
++
),
tails
,
sortOn
,
nub
,
init
,
tail
,
partition
)
import
Data.List
(
sort
,
concat
,
null
,
union
,
(
++
),
tails
,
sortOn
,
nub
,
init
,
tail
,
partition
,
tails
,
nubBy
)
import
Data.Set
(
Set
,
size
,
disjoint
)
import
Data.Map
(
Map
,
elems
,
fromList
,
unionWith
,
keys
,
member
,
(
!
),
filterWithKey
,
fromListWith
,
empty
)
import
Data.String
(
String
)
...
...
@@ -215,6 +215,9 @@ listToKeys lst = (listToCombi' lst) ++ (listToEqual' lst)
listToMatrix
::
[
Int
]
->
Map
(
Int
,
Int
)
Double
listToMatrix
lst
=
fromList
$
map
(
\
k
->
(
k
,
1
))
$
listToKeys
$
sort
lst
listToSeq
::
Eq
a
=>
[
a
]
->
[(
a
,
a
)]
listToSeq
l
=
nubBy
(
\
x
y
->
fst
x
==
fst
y
)
$
[
(
x
,
y
)
|
(
x
:
rest
)
<-
tails
l
,
y
<-
rest
]
sumCooc
::
Cooc
->
Cooc
->
Cooc
sumCooc
cooc
cooc'
=
unionWith
(
+
)
cooc
cooc'
...
...
@@ -252,14 +255,6 @@ filterProximity proximity thr local =
WeightedLogJaccard
_
_
_
->
local
>=
thr
Hamming
->
undefined
filterPointers
::
Filiation
->
PointerType
->
Proximity
->
Double
->
PhyloGroup
->
PhyloGroup
filterPointers
fil
pty
proximity
thr
group
=
case
pty
of
TemporalPointer
->
case
fil
of
ToChilds
->
group
&
phylo_groupPeriodChilds
%~
(
filter
(
\
(
_
,
w
)
->
filterProximity
proximity
thr
w
))
ToParents
->
group
&
phylo_groupPeriodParents
%~
(
filter
(
\
(
_
,
w
)
->
filterProximity
proximity
thr
w
))
LevelPointer
->
undefined
---------------
-- | Phylo | --
...
...
src/Gargantext/Viz/Phylo/SynchronicClustering.hs
View file @
b0826576
...
...
@@ -20,12 +20,14 @@ import Gargantext.Viz.AdaptativePhylo
import
Gargantext.Viz.Phylo.PhyloTools
import
Gargantext.Viz.Phylo.TemporalMatching
(
weightedLogJaccard
)
import
Data.List
((
++
),
null
,
intersect
,
nub
,
concat
,
sort
)
import
Data.List
((
++
),
null
,
intersect
,
nub
,
concat
,
sort
,
sortOn
)
import
Data.Map
(
Map
,
fromList
,
fromListWith
,
foldlWithKey
,
(
!
),
insert
,
empty
,
restrictKeys
,
elems
,
mapWithKey
,
member
)
import
Control.Lens
hiding
(
Level
)
import
Control.Parallel.Strategies
(
parList
,
rdeepseq
,
using
)
import
qualified
Data.Map
as
Map
-------------------------
-- | New Level Maker | --
...
...
@@ -92,13 +94,42 @@ toPairs :: [PhyloGroup] -> [(PhyloGroup,PhyloGroup)]
toPairs
groups
=
filter
(
\
(
g
,
g'
)
->
(
not
.
null
)
$
intersect
(
g
^.
phylo_groupNgrams
)
(
g'
^.
phylo_groupNgrams
))
$
listToCombi'
groups
groupsToEdges
::
Proximity
->
Double
->
Double
->
Double
->
[
PhyloGroup
]
->
[((
PhyloGroup
,
PhyloGroup
),
Double
)]
groupsToEdges
prox
thr
sens
docs
groups
=
case
prox
of
WeightedLogJaccard
_
_
_
->
filter
(
\
(
_
,
w
)
->
w
>=
thr
)
$
map
(
\
(
g
,
g'
)
->
((
g
,
g'
),
weightedLogJaccard
sens
docs
(
g
^.
phylo_groupCooc
)
(
g'
^.
phylo_groupCooc
)
(
g
^.
phylo_groupNgrams
)
(
g'
^.
phylo_groupNgrams
)))
$
toPairs
groups
_
->
undefined
toDiamonds
::
[
PhyloGroup
]
->
[[
PhyloGroup
]]
toDiamonds
groups
=
foldl'
(
\
acc
groups'
->
acc
++
(
elems
$
Map
.
filter
(
\
v
->
length
v
>
1
)
$
fromListWith
(
++
)
$
foldl'
(
\
acc'
g
->
acc'
++
(
map
(
\
(
id
,
_
)
->
(
id
,[
g
])
)
$
g
^.
phylo_groupPeriodChilds
))
[]
groups'
))
[]
$
elems
$
Map
.
filter
(
\
v
->
length
v
>
1
)
$
fromListWith
(
++
)
$
foldl'
(
\
acc
g
->
acc
++
(
map
(
\
(
id
,
_
)
->
(
id
,[
g
])
)
$
g
^.
phylo_groupPeriodParents
)
)
[]
groups
groupsToEdges
::
Proximity
->
Synchrony
->
Double
->
[
PhyloGroup
]
->
[((
PhyloGroup
,
PhyloGroup
),
Double
)]
groupsToEdges
prox
sync
docs
groups
=
case
sync
of
ByProximityThreshold
t
s
->
filter
(
\
(
_
,
w
)
->
w
>=
t
)
$
toEdges
s
$
toPairs
groups
ByProximityDistribution
s
->
let
diamonds
=
sortOn
snd
$
toEdges
s
$
concat
$
map
toPairs
$
toDiamonds
groups
in
take
(
div
(
length
diamonds
)
2
)
diamonds
where
toEdges
::
Double
->
[(
PhyloGroup
,
PhyloGroup
)]
->
[((
PhyloGroup
,
PhyloGroup
),
Double
)]
toEdges
sens
edges
=
case
prox
of
WeightedLogJaccard
_
_
_
->
map
(
\
(
g
,
g'
)
->
((
g
,
g'
),
weightedLogJaccard
sens
docs
(
g
^.
phylo_groupCooc
)
(
g'
^.
phylo_groupCooc
)
(
g
^.
phylo_groupNgrams
)
(
g'
^.
phylo_groupNgrams
)))
edges
_
->
undefined
toRelatedComponents
::
[
PhyloGroup
]
->
[((
PhyloGroup
,
PhyloGroup
),
Double
)]
->
[[
PhyloGroup
]]
...
...
@@ -111,32 +142,54 @@ toParentId :: PhyloGroup -> PhyloGroupId
toParentId
child
=
((
child
^.
phylo_groupPeriod
,
child
^.
phylo_groupLevel
+
1
),
child
^.
phylo_groupIndex
)
reduceBranch
::
Proximity
->
Double
->
Double
->
Map
Date
Double
->
[
PhyloGroup
]
->
[
PhyloGroup
]
reduceBranch
prox
thr
sens
docs
branch
=
reduceBranch
::
Proximity
->
Synchrony
->
Map
Date
Double
->
[
PhyloGroup
]
->
[
PhyloGroup
]
reduceBranch
prox
sync
docs
branch
=
-- | 1) reduce a branch as a set of periods & groups
let
periods
=
fromListWith
(
++
)
$
map
(
\
g
->
(
g
^.
phylo_groupPeriod
,[
g
]))
branch
in
(
concat
.
concat
.
elems
)
$
mapWithKey
(
\
prd
groups
->
-- | 2) for each period, transform the groups as a proximity graph filtered by a threshold
let
edges
=
groupsToEdges
prox
thr
sens
((
sum
.
elems
)
$
restrictKeys
docs
$
periodsToYears
[
prd
])
groups
let
edges
=
groupsToEdges
prox
sync
((
sum
.
elems
)
$
restrictKeys
docs
$
periodsToYears
[
prd
])
groups
in
map
(
\
comp
->
-- | 4) add to each groups their futur level parent group
let
parentId
=
toParentId
(
head'
"parentId"
comp
)
in
map
(
\
g
->
g
&
phylo_groupLevelParents
%~
(
++
[(
parentId
,
1
)])
)
comp
)
-- |3) reduce the graph a a set of related components
$
toRelatedComponents
groups
edges
)
periods
$
toRelatedComponents
groups
edges
)
periods
synchronicClustering
::
Phylo
->
Phylo
synchronicClustering
phylo
=
case
(
phyloSynchrony
$
getConfig
phylo
)
of
ByProximityThreshold
t
s
->
let
prox
=
phyloProximity
$
getConfig
phylo
docs
=
phylo
^.
phylo_timeDocs
branches
=
map
(
\
branch
->
reduceBranch
prox
t
s
docs
branch
)
$
phyloToLastBranches
$
traceSynchronyStart
phylo
branches'
=
branches
`
using
`
parList
rdeepseq
in
toNextLevel
phylo
$
concat
branches'
ByProximityDistribution
->
undefined
\ No newline at end of file
synchronicClustering
phylo
=
let
prox
=
phyloProximity
$
getConfig
phylo
sync
=
phyloSynchrony
$
getConfig
phylo
docs
=
phylo
^.
phylo_timeDocs
branches
=
map
(
\
branch
->
reduceBranch
prox
sync
docs
branch
)
$
phyloToLastBranches
$
traceSynchronyStart
phylo
branches'
=
branches
`
using
`
parList
rdeepseq
in
toNextLevel
phylo
$
concat
branches'
----------------
-- | probes | --
----------------
-- synchronicDistance :: Phylo -> Level -> String
-- synchronicDistance phylo lvl =
-- foldl' (\acc branch ->
-- acc <> (foldl' (\acc' period ->
-- acc' <> let prox = phyloProximity $ getConfig phylo
-- sync = phyloSynchrony $ getConfig phylo
-- docs = _phylo_timeDocs phylo
-- prd = _phylo_groupPeriod $ head' "distance" period
-- edges = groupsToEdges prox 0.1 (_bpt_sensibility sync)
-- ((sum . elems) $ restrictKeys docs $ periodsToYears [_phylo_groupPeriod $ head' "distance" period]) period
-- in foldl' (\mem (_,w) ->
-- mem <> show (prd)
-- <> "\t"
-- <> show (w)
-- <> "\n"
-- ) "" edges
-- ) "" $ elems $ groupByField _phylo_groupPeriod branch)
-- ) "period\tdistance\n" $ elems $ groupByField _phylo_groupBranchId $ getGroupsFromLevel lvl phylo
\ No newline at end of file
src/Gargantext/Viz/Phylo/TemporalMatching.hs
View file @
b0826576
...
...
@@ -15,7 +15,7 @@ Portability : POSIX
module
Gargantext.Viz.Phylo.TemporalMatching
where
import
Data.List
(
concat
,
splitAt
,
tail
,
sortOn
,
(
++
),
intersect
,
null
,
inits
,
groupBy
,
scanl
,
nub
,
union
,
elemIndex
,
(
!!
),
dropWhile
)
import
Data.List
(
concat
,
splitAt
,
tail
,
sortOn
,
(
++
),
intersect
,
null
,
inits
,
groupBy
,
scanl
,
nub
,
union
,
elemIndex
,
(
!!
),
dropWhile
,
partition
)
import
Data.Map
(
Map
,
fromList
,
elems
,
restrictKeys
,
unionWith
,
intersectionWith
,
findWithDefault
,
filterWithKey
)
import
Gargantext.Prelude
...
...
@@ -25,6 +25,7 @@ import Gargantext.Viz.Phylo.PhyloTools
import
Prelude
(
logBase
)
import
Control.Lens
hiding
(
Level
)
import
Control.Parallel.Strategies
(
parList
,
rdeepseq
,
using
)
-- import Debug.Trace (trace)
import
qualified
Data.Set
as
Set
...
...
@@ -99,42 +100,78 @@ toProximity docs proximity ego target target' =
-- | Find pairs of valuable candidates to be matched
makePairs
::
[
PhyloGroup
]
->
[
PhyloPeriodId
]
->
[(
PhyloGroup
,
PhyloGroup
)]
makePairs
candidates
periods
=
case
null
periods
of
makePairs
::
[
PhyloGroup
]
->
[
PhyloPeriodId
]
->
[
PhyloPeriodId
]
->
[
(
PhyloGroup
,
PhyloGroup
)]
makePairs
candidates
periods
periods'
=
case
null
periods
of
True
->
[]
-- | at least on of the pair candidates should be from the last added period
False
->
filter
(
\
(
cdt
,
cdt'
)
->
(
inLastPeriod
cdt
periods
)
||
(
inLastPeriod
cdt'
periods
))
False
->
filter
(
\
(
cdt
,
cdt'
)
->
((
inLastPeriod
cdt
periods
)
||
(
inLastPeriod
cdt'
periods
))
&&
(
not
$
inOldPeriods
cdt
periods'
)
&&
(
not
$
inOldPeriods
cdt'
periods'
))
$
listToKeys
candidates
where
inLastPeriod
::
PhyloGroup
->
[
PhyloPeriodId
]
->
Bool
inLastPeriod
g
prds
=
(
g
^.
phylo_groupPeriod
)
==
(
last'
"makePairs"
prds
)
--------------------------------------
inOldPeriods
::
PhyloGroup
->
[
PhyloPeriodId
]
->
Bool
inOldPeriods
g
prds
=
elem
(
g
^.
phylo_groupPeriod
)
prds
keepOldOnes
::
Filiation
->
Proximity
->
Double
->
PhyloGroup
->
Bool
keepOldOnes
fil
proxi
thr
ego
=
any
(
\
(
_
,
w
)
->
filterProximity
proxi
thr
w
)
$
getPeriodPointers
fil
ego
filterPointers
::
Proximity
->
Double
->
[
Pointer
]
->
[
Pointer
]
filterPointers
proxi
thr
pts
=
filter
(
\
(
_
,
w
)
->
filterProximity
proxi
thr
w
)
pts
findLastPeriod
::
Filiation
->
[
Pointer
]
->
PhyloPeriodId
findLastPeriod
fil
pts
=
case
fil
of
ToParents
->
head'
"findLastPeriod"
$
sortOn
fst
$
map
(
fst
.
fst
.
fst
)
pts
ToChilds
->
head'
"findLastPeriod"
$
reverse
$
sortOn
fst
$
map
(
fst
.
fst
.
fst
)
pts
phyloGroupMatching
::
[[
PhyloGroup
]]
->
Filiation
->
Proximity
->
Map
Date
Double
->
Double
->
PhyloGroup
->
PhyloGroup
phyloGroupMatching
::
[[
PhyloGroup
]]
->
Filiation
->
Proximity
->
Map
Date
Double
->
Double
->
PhyloGroup
->
PhyloGroup
phyloGroupMatching
candidates
fil
proxi
docs
thr
ego
=
case
null
(
getPeriodPointers
fil
ego
)
of
False
->
filterPointers
fil
TemporalPointer
proxi
thr
ego
True
->
case
null
pointers
of
True
->
addPointers
ego
fil
TemporalPointer
[]
False
->
addPointers
ego
fil
TemporalPointer
$
head'
"phyloGroupMatching"
-- | Keep only the best set of pointers grouped by proximity
$
groupBy
(
\
pt
pt'
->
snd
pt
==
snd
pt'
)
$
reverse
$
sortOn
snd
$
head'
"pointers"
pointers
-- | Find the first time frame where at leats one pointer satisfies the proximity threshold
where
if
keepOldOnes
fil
proxi
thr
ego
-- | keep some of the old pointers
then
addPointers
ego
fil
TemporalPointer
$
filterPointers
proxi
thr
$
getPeriodPointers
fil
ego
else
case
null
pointers
of
-- | let's find new pointers
True
->
addPointers
ego
fil
TemporalPointer
[]
False
->
addPointers
ego
fil
TemporalPointer
$
head'
"phyloGroupMatching"
-- | Keep only the best set of pointers grouped by proximity
$
groupBy
(
\
pt
pt'
->
snd
pt
==
snd
pt'
)
$
reverse
$
sortOn
snd
$
head'
"pointers"
pointers
-- | Find the first time frame where at leats one pointer satisfies the proximity threshold
where
--------------------------------------
oldPeriods
::
[
PhyloPeriodId
]
->
[
PhyloPeriodId
]
oldPeriods
periods
=
if
(
null
$
getPeriodPointers
fil
ego
)
then
[]
else
let
period
=
findLastPeriod
fil
$
getPeriodPointers
fil
ego
in
fst
$
partition
(
\
prd
->
case
fil
of
ToChilds
->
prd
<=
period
ToParents
->
prd
>=
period
)
periods
--------------------------------------
pointers
::
[[
Pointer
]]
pointers
=
take
1
$
dropWhile
(
null
)
-- | for each time frame, process the proximity on relevant pairs of targeted groups
$
scanl
(
\
acc
groups
->
let
periods
=
nub
$
concat
$
map
(
\
gs
->
if
null
gs
then
[]
else
[
_phylo_groupPeriod
$
head'
"pointers"
gs
])
groups
pairs
=
makePairs
(
concat
groups
)
periods
in
acc
++
(
filter
(
\
(
_
,
proximity
)
->
filterProximity
proxi
thr
proximity
)
let
periods
=
nub
$
concat
$
map
(
\
gs
->
if
null
gs
then
[]
else
[
_phylo_groupPeriod
$
head'
"pointers"
gs
])
groups
periods'
=
oldPeriods
periods
pairs
=
makePairs
(
concat
groups
)
periods
periods'
in
acc
++
(
filterPointers
proxi
thr
$
concat
$
map
(
\
(
c
,
c'
)
->
-- | process the proximity between the current group and a pair of candidates
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment