Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
158
Issues
158
List
Board
Labels
Milestones
Merge Requests
10
Merge Requests
10
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
56636731
Commit
56636731
authored
Sep 18, 2019
by
qlobbe
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
working on perf
parent
5a8e884b
Pipeline
#575
failed with stage
Changes
5
Pipelines
1
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
142 additions
and
96 deletions
+142
-96
Main.hs
bin/gargantext-adaptative-phylo/Main.hs
+0
-2
AdaptativePhylo.hs
src/Gargantext/Viz/AdaptativePhylo.hs
+3
-2
PhyloTools.hs
src/Gargantext/Viz/Phylo/PhyloTools.hs
+57
-16
SynchronicClustering.hs
src/Gargantext/Viz/Phylo/SynchronicClustering.hs
+21
-15
TemporalMatching.hs
src/Gargantext/Viz/Phylo/TemporalMatching.hs
+61
-61
No files found.
bin/gargantext-adaptative-phylo/Main.hs
View file @
56636731
...
...
@@ -160,8 +160,6 @@ main = do
let
dot
=
toPhyloExport
phylo
printIOMsg
"End of export to dot"
let
output
=
(
outputPath
config
)
<>
(
unpack
$
phyloName
config
)
<>
"_V2.dot"
...
...
src/Gargantext/Viz/AdaptativePhylo.hs
View file @
56636731
...
...
@@ -69,7 +69,8 @@ data Proximity =
data
Synchrony
=
ByProximityThreshold
{
_bpt_threshold
::
Double
}
{
_bpt_threshold
::
Double
,
_bpt_sensibility
::
Double
}
|
ByProximityDistribution
deriving
(
Show
,
Generic
,
Eq
)
...
...
@@ -115,7 +116,7 @@ defaultConfig =
,
phyloName
=
pack
"Default Phylo"
,
phyloLevel
=
1
,
phyloProximity
=
WeightedLogJaccard
10
0
0.1
,
phyloSynchrony
=
ByProximityThreshold
0.
1
,
phyloSynchrony
=
ByProximityThreshold
0.
4
0
,
timeUnit
=
Year
3
1
5
,
contextualUnit
=
Fis
2
4
,
exportLabel
=
[
BranchLabel
MostInclusive
2
,
GroupLabel
MostEmergentInclusive
2
]
...
...
src/Gargantext/Viz/Phylo/PhyloTools.hs
View file @
56636731
...
...
@@ -17,14 +17,16 @@ Portability : POSIX
module
Gargantext.Viz.Phylo.PhyloTools
where
import
Data.Vector
(
Vector
,
elemIndex
)
import
Data.List
(
sort
,
concat
,
null
,
union
,
(
++
),
tails
,
sortOn
,
nub
,
init
,
tail
,
intersect
,
(
\\
)
)
import
Data.Set
(
Set
,
size
)
import
Data.List
(
sort
,
concat
,
null
,
union
,
(
++
),
tails
,
sortOn
,
nub
,
init
,
tail
,
partition
)
import
Data.Set
(
Set
,
size
,
disjoint
)
import
Data.Map
(
Map
,
elems
,
fromList
,
unionWith
,
keys
,
member
,
(
!
),
filterWithKey
,
fromListWith
,
empty
)
import
Data.String
(
String
)
import
Data.Text
(
Text
,
unwords
)
import
Gargantext.Prelude
import
Gargantext.Viz.AdaptativePhylo
import
Text.Printf
import
Debug.Trace
(
trace
)
import
Control.Lens
hiding
(
Level
)
...
...
@@ -57,6 +59,10 @@ printIOComment cmt =
--------------
roundToStr
::
(
PrintfArg
a
,
Floating
a
)
=>
Int
->
a
->
String
roundToStr
=
printf
"%0.*f"
countSup
::
Double
->
[
Double
]
->
Int
countSup
s
l
=
length
$
filter
(
>
s
)
l
...
...
@@ -231,6 +237,30 @@ ngramsToCooc ngrams coocs =
getGroupId
::
PhyloGroup
->
PhyloGroupId
getGroupId
group
=
((
group
^.
phylo_groupPeriod
,
group
^.
phylo_groupLevel
),
group
^.
phylo_groupIndex
)
groupByField
::
Ord
a
=>
(
PhyloGroup
->
a
)
->
[
PhyloGroup
]
->
Map
a
[
PhyloGroup
]
groupByField
toField
groups
=
fromListWith
(
++
)
$
map
(
\
g
->
(
toField
g
,
[
g
]))
groups
getPeriodPointers
::
Filiation
->
PhyloGroup
->
[
Pointer
]
getPeriodPointers
fil
group
=
case
fil
of
ToChilds
->
group
^.
phylo_groupPeriodChilds
ToParents
->
group
^.
phylo_groupPeriodParents
filterProximity
::
Proximity
->
Double
->
Double
->
Bool
filterProximity
proximity
thr
local
=
case
proximity
of
WeightedLogJaccard
_
_
_
->
local
>=
thr
Hamming
->
undefined
filterPointers
::
Filiation
->
PointerType
->
Proximity
->
Double
->
PhyloGroup
->
PhyloGroup
filterPointers
fil
pty
proximity
thr
group
=
case
pty
of
TemporalPointer
->
case
fil
of
ToChilds
->
group
&
phylo_groupPeriodChilds
%~
(
filter
(
\
(
_
,
w
)
->
filterProximity
proximity
thr
w
))
ToParents
->
group
&
phylo_groupPeriodParents
%~
(
filter
(
\
(
_
,
w
)
->
filterProximity
proximity
thr
w
))
LevelPointer
->
undefined
---------------
-- | Phylo | --
---------------
...
...
@@ -315,28 +345,25 @@ traceToPhylo lvl phylo =
-- | Clustering | --
--------------------
relatedComponents
::
Eq
a
=>
[[
a
]]
->
[[
a
]]
relatedComponents
graphs
=
foldl'
(
\
mem
groups
->
if
(
null
mem
)
then
mem
++
[
groups
]
else
let
related
=
filter
(
\
groups'
->
(
not
.
null
)
$
intersect
groups
groups'
)
mem
in
if
(
null
related
)
then
mem
++
[
groups
]
else
(
mem
\\
related
)
++
[
union
groups
(
nub
$
concat
related
)]
)
[]
graphs
relatedComponents
::
Ord
a
=>
[[
a
]]
->
[[
a
]]
relatedComponents
graph
=
foldl'
(
\
acc
groups
->
if
(
null
acc
)
then
acc
++
[
groups
]
else
let
acc'
=
partition
(
\
groups'
->
disjoint
(
Set
.
fromList
groups'
)
(
Set
.
fromList
groups
))
acc
in
(
fst
acc'
)
++
[
nub
$
concat
$
(
snd
acc'
)
++
[
groups
]])
[]
graph
traceSynchronyEnd
::
Phylo
->
Phylo
traceSynchronyEnd
phylo
=
trace
(
"
\n
"
<>
"-- | End
of synchronic clustering for
level "
<>
show
(
getLastLevel
phylo
)
trace
(
"
\n
"
<>
"-- | End
synchronic clustering at
level "
<>
show
(
getLastLevel
phylo
)
<>
" with "
<>
show
(
length
$
getGroupsFromLevel
(
getLastLevel
phylo
)
phylo
)
<>
" groups"
<>
" and "
<>
show
(
length
$
nub
$
map
_phylo_groupBranchId
$
getGroupsFromLevel
(
getLastLevel
phylo
)
phylo
)
<>
" branches"
<>
"
\n
"
)
phylo
traceSynchronyStart
::
Phylo
->
Phylo
traceSynchronyStart
phylo
=
trace
(
"
\n
"
<>
"-- | Start
of synchronic clustering for
level "
<>
show
(
getLastLevel
phylo
)
trace
(
"
\n
"
<>
"-- | Start
synchronic clustering at
level "
<>
show
(
getLastLevel
phylo
)
<>
" with "
<>
show
(
length
$
getGroupsFromLevel
(
getLastLevel
phylo
)
phylo
)
<>
" groups"
<>
" and "
<>
show
(
length
$
nub
$
map
_phylo_groupBranchId
$
getGroupsFromLevel
(
getLastLevel
phylo
)
phylo
)
<>
" branches"
<>
"
\n
"
)
phylo
...
...
@@ -362,6 +389,15 @@ getThresholdStep proxi = case proxi of
Hamming
->
undefined
traceBranchMatching
::
Proximity
->
Double
->
[
PhyloGroup
]
->
[
PhyloGroup
]
traceBranchMatching
proxi
thr
groups
=
case
proxi
of
WeightedLogJaccard
_
i
s
->
trace
(
roundToStr
2
thr
<>
" "
<>
foldl
(
\
acc
_
->
acc
<>
"."
)
"."
[(
10
*
i
),(
10
*
i
+
10
*
s
)
..
(
10
*
thr
)]
<>
" "
<>
show
(
length
groups
)
<>
" groups"
)
groups
Hamming
->
undefined
----------------
-- | Branch | --
----------------
...
...
@@ -420,5 +456,10 @@ traceMatchLimit branches =
traceMatchEnd
::
[
PhyloGroup
]
->
[
PhyloGroup
]
traceMatchEnd
groups
=
trace
(
"
\n
"
<>
"-- | End of temporal matching with "
<>
show
(
length
$
nub
$
map
(
\
g
->
g
^.
phylo_groupBranchId
)
groups
)
<>
" branches and "
<>
show
(
length
groups
)
<>
" groups"
<>
"
\n
"
)
groups
\ No newline at end of file
trace
(
"
\n
"
<>
"-- | End temporal matching with "
<>
show
(
length
$
nub
$
map
(
\
g
->
g
^.
phylo_groupBranchId
)
groups
)
<>
" branches and "
<>
show
(
length
groups
)
<>
" groups"
<>
"
\n
"
)
groups
traceTemporalMatching
::
[
PhyloGroup
]
->
[
PhyloGroup
]
traceTemporalMatching
groups
=
trace
(
"
\n
"
<>
"-- | Start temporal matching for "
<>
show
(
length
groups
)
<>
" groups"
<>
"
\n
"
)
groups
\ No newline at end of file
src/Gargantext/Viz/Phylo/SynchronicClustering.hs
View file @
56636731
...
...
@@ -24,7 +24,7 @@ import Data.List ((++), null, intersect, nub, concat, sort)
import
Data.Map
(
Map
,
fromList
,
fromListWith
,
foldlWithKey
,
(
!
),
insert
,
empty
,
restrictKeys
,
elems
,
mapWithKey
,
member
)
import
Control.Lens
hiding
(
Level
)
-- import Debug.Trace (trace
)
import
Control.Parallel.Strategies
(
parList
,
rdeepseq
,
using
)
-------------------------
...
...
@@ -92,31 +92,34 @@ toPairs :: [PhyloGroup] -> [(PhyloGroup,PhyloGroup)]
toPairs
groups
=
filter
(
\
(
g
,
g'
)
->
(
not
.
null
)
$
intersect
(
g
^.
phylo_groupNgrams
)
(
g'
^.
phylo_groupNgrams
))
$
listToCombi'
groups
groupsToEdges
::
Proximity
->
Double
->
Double
->
[
PhyloGroup
]
->
[((
PhyloGroup
,
PhyloGroup
),
Double
)]
groupsToEdges
prox
thr
docs
groups
=
groupsToEdges
::
Proximity
->
Double
->
Double
->
Double
->
[
PhyloGroup
]
->
[((
PhyloGroup
,
PhyloGroup
),
Double
)]
groupsToEdges
prox
thr
sens
docs
groups
=
case
prox
of
WeightedLogJaccard
sens
_
_
->
filter
(
\
(
_
,
w
)
->
w
>=
thr
)
$
map
(
\
(
g
,
g'
)
->
((
g
,
g'
),
weightedLogJaccard
sens
docs
(
g
^.
phylo_groupCooc
)
(
g'
^.
phylo_groupCooc
)
(
g
^.
phylo_groupNgrams
)
(
g'
^.
phylo_groupNgrams
)))
$
toPairs
groups
WeightedLogJaccard
_
_
_
->
filter
(
\
(
_
,
w
)
->
w
>=
thr
)
$
map
(
\
(
g
,
g'
)
->
((
g
,
g'
),
weightedLogJaccard
sens
docs
(
g
^.
phylo_groupCooc
)
(
g'
^.
phylo_groupCooc
)
(
g
^.
phylo_groupNgrams
)
(
g'
^.
phylo_groupNgrams
)))
$
toPairs
groups
_
->
undefined
toRelatedComponents
::
[
PhyloGroup
]
->
[((
PhyloGroup
,
PhyloGroup
),
Double
)]
->
[[
PhyloGroup
]]
toRelatedComponents
nodes
edges
=
relatedComponents
$
((
map
(
\
((
g
,
g'
),
_
)
->
[
g
,
g'
])
edges
)
++
(
map
(
\
g
->
[
g
])
nodes
))
toRelatedComponents
nodes
edges
=
let
ref
=
fromList
$
map
(
\
g
->
(
getGroupId
g
,
g
))
nodes
clusters
=
relatedComponents
$
((
map
(
\
((
g
,
g'
),
_
)
->
[
getGroupId
g
,
getGroupId
g'
])
edges
)
++
(
map
(
\
g
->
[
getGroupId
g
])
nodes
))
in
map
(
\
cluster
->
map
(
\
gId
->
ref
!
gId
)
cluster
)
clusters
toParentId
::
PhyloGroup
->
PhyloGroupId
toParentId
child
=
((
child
^.
phylo_groupPeriod
,
child
^.
phylo_groupLevel
+
1
),
child
^.
phylo_groupIndex
)
reduceBranch
::
Proximity
->
Double
->
Map
Date
Double
->
[
PhyloGroup
]
->
[
PhyloGroup
]
reduceBranch
prox
thr
docs
branch
=
reduceBranch
::
Proximity
->
Double
->
Double
->
Map
Date
Double
->
[
PhyloGroup
]
->
[
PhyloGroup
]
reduceBranch
prox
thr
sens
docs
branch
=
-- | 1) reduce a branch as a set of periods & groups
let
periods
=
fromListWith
(
++
)
$
map
(
\
g
->
(
g
^.
phylo_groupPeriod
,[
g
]))
branch
in
(
concat
.
concat
.
elems
)
$
mapWithKey
(
\
prd
groups
->
-- | 2) for each period, transform the groups as a proximity graph filtered by a threshold
let
edges
=
groupsToEdges
prox
thr
((
sum
.
elems
)
$
restrictKeys
docs
$
periodsToYears
[
prd
])
groups
let
edges
=
groupsToEdges
prox
thr
sens
((
sum
.
elems
)
$
restrictKeys
docs
$
periodsToYears
[
prd
])
groups
in
map
(
\
comp
->
-- | 4) add to each groups their futur level parent group
let
parentId
=
toParentId
(
head'
"parentId"
comp
)
...
...
@@ -128,9 +131,12 @@ reduceBranch prox thr docs branch =
synchronicClustering
::
Phylo
->
Phylo
synchronicClustering
phylo
=
case
(
phyloSynchrony
$
getConfig
phylo
)
of
ByProximityThreshold
thr
->
toNextLevel
phylo
$
concat
$
map
(
\
branch
->
reduceBranch
(
phyloProximity
$
getConfig
phylo
)
thr
(
phylo
^.
phylo_timeDocs
)
branch
)
$
phyloToLastBranches
$
traceSynchronyStart
phylo
ByProximityThreshold
t
s
->
let
prox
=
phyloProximity
$
getConfig
phylo
docs
=
phylo
^.
phylo_timeDocs
branches
=
map
(
\
branch
->
reduceBranch
prox
t
s
docs
branch
)
$
phyloToLastBranches
$
traceSynchronyStart
phylo
branches'
=
branches
`
using
`
parList
rdeepseq
in
toNextLevel
phylo
$
concat
branches'
ByProximityDistribution
->
undefined
\ No newline at end of file
src/Gargantext/Viz/Phylo/TemporalMatching.hs
View file @
56636731
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment