Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
145
Issues
145
List
Board
Labels
Milestones
Merge Requests
6
Merge Requests
6
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
56636731
Commit
56636731
authored
Sep 18, 2019
by
qlobbe
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
working on perf
parent
5a8e884b
Pipeline
#575
failed with stage
Changes
5
Pipelines
1
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
142 additions
and
96 deletions
+142
-96
Main.hs
bin/gargantext-adaptative-phylo/Main.hs
+0
-2
AdaptativePhylo.hs
src/Gargantext/Viz/AdaptativePhylo.hs
+3
-2
PhyloTools.hs
src/Gargantext/Viz/Phylo/PhyloTools.hs
+57
-16
SynchronicClustering.hs
src/Gargantext/Viz/Phylo/SynchronicClustering.hs
+21
-15
TemporalMatching.hs
src/Gargantext/Viz/Phylo/TemporalMatching.hs
+61
-61
No files found.
bin/gargantext-adaptative-phylo/Main.hs
View file @
56636731
...
...
@@ -160,8 +160,6 @@ main = do
let
dot
=
toPhyloExport
phylo
printIOMsg
"End of export to dot"
let
output
=
(
outputPath
config
)
<>
(
unpack
$
phyloName
config
)
<>
"_V2.dot"
...
...
src/Gargantext/Viz/AdaptativePhylo.hs
View file @
56636731
...
...
@@ -69,7 +69,8 @@ data Proximity =
data
Synchrony
=
ByProximityThreshold
{
_bpt_threshold
::
Double
}
{
_bpt_threshold
::
Double
,
_bpt_sensibility
::
Double
}
|
ByProximityDistribution
deriving
(
Show
,
Generic
,
Eq
)
...
...
@@ -115,7 +116,7 @@ defaultConfig =
,
phyloName
=
pack
"Default Phylo"
,
phyloLevel
=
1
,
phyloProximity
=
WeightedLogJaccard
10
0
0.1
,
phyloSynchrony
=
ByProximityThreshold
0.
1
,
phyloSynchrony
=
ByProximityThreshold
0.
4
0
,
timeUnit
=
Year
3
1
5
,
contextualUnit
=
Fis
2
4
,
exportLabel
=
[
BranchLabel
MostInclusive
2
,
GroupLabel
MostEmergentInclusive
2
]
...
...
src/Gargantext/Viz/Phylo/PhyloTools.hs
View file @
56636731
...
...
@@ -17,14 +17,16 @@ Portability : POSIX
module
Gargantext.Viz.Phylo.PhyloTools
where
import
Data.Vector
(
Vector
,
elemIndex
)
import
Data.List
(
sort
,
concat
,
null
,
union
,
(
++
),
tails
,
sortOn
,
nub
,
init
,
tail
,
intersect
,
(
\\
)
)
import
Data.Set
(
Set
,
size
)
import
Data.List
(
sort
,
concat
,
null
,
union
,
(
++
),
tails
,
sortOn
,
nub
,
init
,
tail
,
partition
)
import
Data.Set
(
Set
,
size
,
disjoint
)
import
Data.Map
(
Map
,
elems
,
fromList
,
unionWith
,
keys
,
member
,
(
!
),
filterWithKey
,
fromListWith
,
empty
)
import
Data.String
(
String
)
import
Data.Text
(
Text
,
unwords
)
import
Gargantext.Prelude
import
Gargantext.Viz.AdaptativePhylo
import
Text.Printf
import
Debug.Trace
(
trace
)
import
Control.Lens
hiding
(
Level
)
...
...
@@ -57,6 +59,10 @@ printIOComment cmt =
--------------
roundToStr
::
(
PrintfArg
a
,
Floating
a
)
=>
Int
->
a
->
String
roundToStr
=
printf
"%0.*f"
countSup
::
Double
->
[
Double
]
->
Int
countSup
s
l
=
length
$
filter
(
>
s
)
l
...
...
@@ -231,6 +237,30 @@ ngramsToCooc ngrams coocs =
getGroupId
::
PhyloGroup
->
PhyloGroupId
getGroupId
group
=
((
group
^.
phylo_groupPeriod
,
group
^.
phylo_groupLevel
),
group
^.
phylo_groupIndex
)
groupByField
::
Ord
a
=>
(
PhyloGroup
->
a
)
->
[
PhyloGroup
]
->
Map
a
[
PhyloGroup
]
groupByField
toField
groups
=
fromListWith
(
++
)
$
map
(
\
g
->
(
toField
g
,
[
g
]))
groups
getPeriodPointers
::
Filiation
->
PhyloGroup
->
[
Pointer
]
getPeriodPointers
fil
group
=
case
fil
of
ToChilds
->
group
^.
phylo_groupPeriodChilds
ToParents
->
group
^.
phylo_groupPeriodParents
filterProximity
::
Proximity
->
Double
->
Double
->
Bool
filterProximity
proximity
thr
local
=
case
proximity
of
WeightedLogJaccard
_
_
_
->
local
>=
thr
Hamming
->
undefined
filterPointers
::
Filiation
->
PointerType
->
Proximity
->
Double
->
PhyloGroup
->
PhyloGroup
filterPointers
fil
pty
proximity
thr
group
=
case
pty
of
TemporalPointer
->
case
fil
of
ToChilds
->
group
&
phylo_groupPeriodChilds
%~
(
filter
(
\
(
_
,
w
)
->
filterProximity
proximity
thr
w
))
ToParents
->
group
&
phylo_groupPeriodParents
%~
(
filter
(
\
(
_
,
w
)
->
filterProximity
proximity
thr
w
))
LevelPointer
->
undefined
---------------
-- | Phylo | --
---------------
...
...
@@ -315,28 +345,25 @@ traceToPhylo lvl phylo =
-- | Clustering | --
--------------------
relatedComponents
::
Eq
a
=>
[[
a
]]
->
[[
a
]]
relatedComponents
graphs
=
foldl'
(
\
mem
groups
->
if
(
null
mem
)
then
mem
++
[
groups
]
else
let
related
=
filter
(
\
groups'
->
(
not
.
null
)
$
intersect
groups
groups'
)
mem
in
if
(
null
related
)
then
mem
++
[
groups
]
else
(
mem
\\
related
)
++
[
union
groups
(
nub
$
concat
related
)]
)
[]
graphs
relatedComponents
::
Ord
a
=>
[[
a
]]
->
[[
a
]]
relatedComponents
graph
=
foldl'
(
\
acc
groups
->
if
(
null
acc
)
then
acc
++
[
groups
]
else
let
acc'
=
partition
(
\
groups'
->
disjoint
(
Set
.
fromList
groups'
)
(
Set
.
fromList
groups
))
acc
in
(
fst
acc'
)
++
[
nub
$
concat
$
(
snd
acc'
)
++
[
groups
]])
[]
graph
traceSynchronyEnd
::
Phylo
->
Phylo
traceSynchronyEnd
phylo
=
trace
(
"
\n
"
<>
"-- | End
of synchronic clustering for
level "
<>
show
(
getLastLevel
phylo
)
trace
(
"
\n
"
<>
"-- | End
synchronic clustering at
level "
<>
show
(
getLastLevel
phylo
)
<>
" with "
<>
show
(
length
$
getGroupsFromLevel
(
getLastLevel
phylo
)
phylo
)
<>
" groups"
<>
" and "
<>
show
(
length
$
nub
$
map
_phylo_groupBranchId
$
getGroupsFromLevel
(
getLastLevel
phylo
)
phylo
)
<>
" branches"
<>
"
\n
"
)
phylo
traceSynchronyStart
::
Phylo
->
Phylo
traceSynchronyStart
phylo
=
trace
(
"
\n
"
<>
"-- | Start
of synchronic clustering for
level "
<>
show
(
getLastLevel
phylo
)
trace
(
"
\n
"
<>
"-- | Start
synchronic clustering at
level "
<>
show
(
getLastLevel
phylo
)
<>
" with "
<>
show
(
length
$
getGroupsFromLevel
(
getLastLevel
phylo
)
phylo
)
<>
" groups"
<>
" and "
<>
show
(
length
$
nub
$
map
_phylo_groupBranchId
$
getGroupsFromLevel
(
getLastLevel
phylo
)
phylo
)
<>
" branches"
<>
"
\n
"
)
phylo
...
...
@@ -362,6 +389,15 @@ getThresholdStep proxi = case proxi of
Hamming
->
undefined
traceBranchMatching
::
Proximity
->
Double
->
[
PhyloGroup
]
->
[
PhyloGroup
]
traceBranchMatching
proxi
thr
groups
=
case
proxi
of
WeightedLogJaccard
_
i
s
->
trace
(
roundToStr
2
thr
<>
" "
<>
foldl
(
\
acc
_
->
acc
<>
"."
)
"."
[(
10
*
i
),(
10
*
i
+
10
*
s
)
..
(
10
*
thr
)]
<>
" "
<>
show
(
length
groups
)
<>
" groups"
)
groups
Hamming
->
undefined
----------------
-- | Branch | --
----------------
...
...
@@ -420,5 +456,10 @@ traceMatchLimit branches =
traceMatchEnd
::
[
PhyloGroup
]
->
[
PhyloGroup
]
traceMatchEnd
groups
=
trace
(
"
\n
"
<>
"-- | End of temporal matching with "
<>
show
(
length
$
nub
$
map
(
\
g
->
g
^.
phylo_groupBranchId
)
groups
)
<>
" branches and "
<>
show
(
length
groups
)
<>
" groups"
<>
"
\n
"
)
groups
\ No newline at end of file
trace
(
"
\n
"
<>
"-- | End temporal matching with "
<>
show
(
length
$
nub
$
map
(
\
g
->
g
^.
phylo_groupBranchId
)
groups
)
<>
" branches and "
<>
show
(
length
groups
)
<>
" groups"
<>
"
\n
"
)
groups
traceTemporalMatching
::
[
PhyloGroup
]
->
[
PhyloGroup
]
traceTemporalMatching
groups
=
trace
(
"
\n
"
<>
"-- | Start temporal matching for "
<>
show
(
length
groups
)
<>
" groups"
<>
"
\n
"
)
groups
\ No newline at end of file
src/Gargantext/Viz/Phylo/SynchronicClustering.hs
View file @
56636731
...
...
@@ -24,7 +24,7 @@ import Data.List ((++), null, intersect, nub, concat, sort)
import
Data.Map
(
Map
,
fromList
,
fromListWith
,
foldlWithKey
,
(
!
),
insert
,
empty
,
restrictKeys
,
elems
,
mapWithKey
,
member
)
import
Control.Lens
hiding
(
Level
)
-- import Debug.Trace (trace
)
import
Control.Parallel.Strategies
(
parList
,
rdeepseq
,
using
)
-------------------------
...
...
@@ -92,31 +92,34 @@ toPairs :: [PhyloGroup] -> [(PhyloGroup,PhyloGroup)]
toPairs
groups
=
filter
(
\
(
g
,
g'
)
->
(
not
.
null
)
$
intersect
(
g
^.
phylo_groupNgrams
)
(
g'
^.
phylo_groupNgrams
))
$
listToCombi'
groups
groupsToEdges
::
Proximity
->
Double
->
Double
->
[
PhyloGroup
]
->
[((
PhyloGroup
,
PhyloGroup
),
Double
)]
groupsToEdges
prox
thr
docs
groups
=
groupsToEdges
::
Proximity
->
Double
->
Double
->
Double
->
[
PhyloGroup
]
->
[((
PhyloGroup
,
PhyloGroup
),
Double
)]
groupsToEdges
prox
thr
sens
docs
groups
=
case
prox
of
WeightedLogJaccard
sens
_
_
->
filter
(
\
(
_
,
w
)
->
w
>=
thr
)
$
map
(
\
(
g
,
g'
)
->
((
g
,
g'
),
weightedLogJaccard
sens
docs
(
g
^.
phylo_groupCooc
)
(
g'
^.
phylo_groupCooc
)
(
g
^.
phylo_groupNgrams
)
(
g'
^.
phylo_groupNgrams
)))
$
toPairs
groups
WeightedLogJaccard
_
_
_
->
filter
(
\
(
_
,
w
)
->
w
>=
thr
)
$
map
(
\
(
g
,
g'
)
->
((
g
,
g'
),
weightedLogJaccard
sens
docs
(
g
^.
phylo_groupCooc
)
(
g'
^.
phylo_groupCooc
)
(
g
^.
phylo_groupNgrams
)
(
g'
^.
phylo_groupNgrams
)))
$
toPairs
groups
_
->
undefined
toRelatedComponents
::
[
PhyloGroup
]
->
[((
PhyloGroup
,
PhyloGroup
),
Double
)]
->
[[
PhyloGroup
]]
toRelatedComponents
nodes
edges
=
relatedComponents
$
((
map
(
\
((
g
,
g'
),
_
)
->
[
g
,
g'
])
edges
)
++
(
map
(
\
g
->
[
g
])
nodes
))
toRelatedComponents
nodes
edges
=
let
ref
=
fromList
$
map
(
\
g
->
(
getGroupId
g
,
g
))
nodes
clusters
=
relatedComponents
$
((
map
(
\
((
g
,
g'
),
_
)
->
[
getGroupId
g
,
getGroupId
g'
])
edges
)
++
(
map
(
\
g
->
[
getGroupId
g
])
nodes
))
in
map
(
\
cluster
->
map
(
\
gId
->
ref
!
gId
)
cluster
)
clusters
toParentId
::
PhyloGroup
->
PhyloGroupId
toParentId
child
=
((
child
^.
phylo_groupPeriod
,
child
^.
phylo_groupLevel
+
1
),
child
^.
phylo_groupIndex
)
reduceBranch
::
Proximity
->
Double
->
Map
Date
Double
->
[
PhyloGroup
]
->
[
PhyloGroup
]
reduceBranch
prox
thr
docs
branch
=
reduceBranch
::
Proximity
->
Double
->
Double
->
Map
Date
Double
->
[
PhyloGroup
]
->
[
PhyloGroup
]
reduceBranch
prox
thr
sens
docs
branch
=
-- | 1) reduce a branch as a set of periods & groups
let
periods
=
fromListWith
(
++
)
$
map
(
\
g
->
(
g
^.
phylo_groupPeriod
,[
g
]))
branch
in
(
concat
.
concat
.
elems
)
$
mapWithKey
(
\
prd
groups
->
-- | 2) for each period, transform the groups as a proximity graph filtered by a threshold
let
edges
=
groupsToEdges
prox
thr
((
sum
.
elems
)
$
restrictKeys
docs
$
periodsToYears
[
prd
])
groups
let
edges
=
groupsToEdges
prox
thr
sens
((
sum
.
elems
)
$
restrictKeys
docs
$
periodsToYears
[
prd
])
groups
in
map
(
\
comp
->
-- | 4) add to each groups their futur level parent group
let
parentId
=
toParentId
(
head'
"parentId"
comp
)
...
...
@@ -128,9 +131,12 @@ reduceBranch prox thr docs branch =
synchronicClustering
::
Phylo
->
Phylo
synchronicClustering
phylo
=
case
(
phyloSynchrony
$
getConfig
phylo
)
of
ByProximityThreshold
thr
->
toNextLevel
phylo
$
concat
$
map
(
\
branch
->
reduceBranch
(
phyloProximity
$
getConfig
phylo
)
thr
(
phylo
^.
phylo_timeDocs
)
branch
)
$
phyloToLastBranches
$
traceSynchronyStart
phylo
ByProximityThreshold
t
s
->
let
prox
=
phyloProximity
$
getConfig
phylo
docs
=
phylo
^.
phylo_timeDocs
branches
=
map
(
\
branch
->
reduceBranch
prox
t
s
docs
branch
)
$
phyloToLastBranches
$
traceSynchronyStart
phylo
branches'
=
branches
`
using
`
parList
rdeepseq
in
toNextLevel
phylo
$
concat
branches'
ByProximityDistribution
->
undefined
\ No newline at end of file
src/Gargantext/Viz/Phylo/TemporalMatching.hs
View file @
56636731
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment