Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
P
purescript-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
153
Issues
153
List
Board
Labels
Milestones
Merge Requests
3
Merge Requests
3
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
purescript-gargantext
Commits
56636731
Commit
56636731
authored
Sep 18, 2019
by
qlobbe
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
working on perf
parent
5a8e884b
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
142 additions
and
96 deletions
+142
-96
Main.hs
bin/gargantext-adaptative-phylo/Main.hs
+0
-2
AdaptativePhylo.hs
src/Gargantext/Viz/AdaptativePhylo.hs
+3
-2
PhyloTools.hs
src/Gargantext/Viz/Phylo/PhyloTools.hs
+57
-16
SynchronicClustering.hs
src/Gargantext/Viz/Phylo/SynchronicClustering.hs
+21
-15
TemporalMatching.hs
src/Gargantext/Viz/Phylo/TemporalMatching.hs
+61
-61
No files found.
bin/gargantext-adaptative-phylo/Main.hs
View file @
56636731
...
@@ -160,8 +160,6 @@ main = do
...
@@ -160,8 +160,6 @@ main = do
let
dot
=
toPhyloExport
phylo
let
dot
=
toPhyloExport
phylo
printIOMsg
"End of export to dot"
let
output
=
(
outputPath
config
)
let
output
=
(
outputPath
config
)
<>
(
unpack
$
phyloName
config
)
<>
(
unpack
$
phyloName
config
)
<>
"_V2.dot"
<>
"_V2.dot"
...
...
src/Gargantext/Viz/AdaptativePhylo.hs
View file @
56636731
...
@@ -69,7 +69,8 @@ data Proximity =
...
@@ -69,7 +69,8 @@ data Proximity =
data
Synchrony
=
data
Synchrony
=
ByProximityThreshold
ByProximityThreshold
{
_bpt_threshold
::
Double
}
{
_bpt_threshold
::
Double
,
_bpt_sensibility
::
Double
}
|
ByProximityDistribution
|
ByProximityDistribution
deriving
(
Show
,
Generic
,
Eq
)
deriving
(
Show
,
Generic
,
Eq
)
...
@@ -115,7 +116,7 @@ defaultConfig =
...
@@ -115,7 +116,7 @@ defaultConfig =
,
phyloName
=
pack
"Default Phylo"
,
phyloName
=
pack
"Default Phylo"
,
phyloLevel
=
1
,
phyloLevel
=
1
,
phyloProximity
=
WeightedLogJaccard
10
0
0.1
,
phyloProximity
=
WeightedLogJaccard
10
0
0.1
,
phyloSynchrony
=
ByProximityThreshold
0.
1
,
phyloSynchrony
=
ByProximityThreshold
0.
4
0
,
timeUnit
=
Year
3
1
5
,
timeUnit
=
Year
3
1
5
,
contextualUnit
=
Fis
2
4
,
contextualUnit
=
Fis
2
4
,
exportLabel
=
[
BranchLabel
MostInclusive
2
,
GroupLabel
MostEmergentInclusive
2
]
,
exportLabel
=
[
BranchLabel
MostInclusive
2
,
GroupLabel
MostEmergentInclusive
2
]
...
...
src/Gargantext/Viz/Phylo/PhyloTools.hs
View file @
56636731
...
@@ -17,14 +17,16 @@ Portability : POSIX
...
@@ -17,14 +17,16 @@ Portability : POSIX
module
Gargantext.Viz.Phylo.PhyloTools
where
module
Gargantext.Viz.Phylo.PhyloTools
where
import
Data.Vector
(
Vector
,
elemIndex
)
import
Data.Vector
(
Vector
,
elemIndex
)
import
Data.List
(
sort
,
concat
,
null
,
union
,
(
++
),
tails
,
sortOn
,
nub
,
init
,
tail
,
intersect
,
(
\\
)
)
import
Data.List
(
sort
,
concat
,
null
,
union
,
(
++
),
tails
,
sortOn
,
nub
,
init
,
tail
,
partition
)
import
Data.Set
(
Set
,
size
)
import
Data.Set
(
Set
,
size
,
disjoint
)
import
Data.Map
(
Map
,
elems
,
fromList
,
unionWith
,
keys
,
member
,
(
!
),
filterWithKey
,
fromListWith
,
empty
)
import
Data.Map
(
Map
,
elems
,
fromList
,
unionWith
,
keys
,
member
,
(
!
),
filterWithKey
,
fromListWith
,
empty
)
import
Data.String
(
String
)
import
Data.String
(
String
)
import
Data.Text
(
Text
,
unwords
)
import
Data.Text
(
Text
,
unwords
)
import
Gargantext.Prelude
import
Gargantext.Prelude
import
Gargantext.Viz.AdaptativePhylo
import
Gargantext.Viz.AdaptativePhylo
import
Text.Printf
import
Debug.Trace
(
trace
)
import
Debug.Trace
(
trace
)
import
Control.Lens
hiding
(
Level
)
import
Control.Lens
hiding
(
Level
)
...
@@ -57,6 +59,10 @@ printIOComment cmt =
...
@@ -57,6 +59,10 @@ printIOComment cmt =
--------------
--------------
roundToStr
::
(
PrintfArg
a
,
Floating
a
)
=>
Int
->
a
->
String
roundToStr
=
printf
"%0.*f"
countSup
::
Double
->
[
Double
]
->
Int
countSup
::
Double
->
[
Double
]
->
Int
countSup
s
l
=
length
$
filter
(
>
s
)
l
countSup
s
l
=
length
$
filter
(
>
s
)
l
...
@@ -231,6 +237,30 @@ ngramsToCooc ngrams coocs =
...
@@ -231,6 +237,30 @@ ngramsToCooc ngrams coocs =
getGroupId
::
PhyloGroup
->
PhyloGroupId
getGroupId
::
PhyloGroup
->
PhyloGroupId
getGroupId
group
=
((
group
^.
phylo_groupPeriod
,
group
^.
phylo_groupLevel
),
group
^.
phylo_groupIndex
)
getGroupId
group
=
((
group
^.
phylo_groupPeriod
,
group
^.
phylo_groupLevel
),
group
^.
phylo_groupIndex
)
groupByField
::
Ord
a
=>
(
PhyloGroup
->
a
)
->
[
PhyloGroup
]
->
Map
a
[
PhyloGroup
]
groupByField
toField
groups
=
fromListWith
(
++
)
$
map
(
\
g
->
(
toField
g
,
[
g
]))
groups
getPeriodPointers
::
Filiation
->
PhyloGroup
->
[
Pointer
]
getPeriodPointers
fil
group
=
case
fil
of
ToChilds
->
group
^.
phylo_groupPeriodChilds
ToParents
->
group
^.
phylo_groupPeriodParents
filterProximity
::
Proximity
->
Double
->
Double
->
Bool
filterProximity
proximity
thr
local
=
case
proximity
of
WeightedLogJaccard
_
_
_
->
local
>=
thr
Hamming
->
undefined
filterPointers
::
Filiation
->
PointerType
->
Proximity
->
Double
->
PhyloGroup
->
PhyloGroup
filterPointers
fil
pty
proximity
thr
group
=
case
pty
of
TemporalPointer
->
case
fil
of
ToChilds
->
group
&
phylo_groupPeriodChilds
%~
(
filter
(
\
(
_
,
w
)
->
filterProximity
proximity
thr
w
))
ToParents
->
group
&
phylo_groupPeriodParents
%~
(
filter
(
\
(
_
,
w
)
->
filterProximity
proximity
thr
w
))
LevelPointer
->
undefined
---------------
---------------
-- | Phylo | --
-- | Phylo | --
---------------
---------------
...
@@ -315,28 +345,25 @@ traceToPhylo lvl phylo =
...
@@ -315,28 +345,25 @@ traceToPhylo lvl phylo =
-- | Clustering | --
-- | Clustering | --
--------------------
--------------------
relatedComponents
::
Ord
a
=>
[[
a
]]
->
[[
a
]]
relatedComponents
::
Eq
a
=>
[[
a
]]
->
[[
a
]]
relatedComponents
graph
=
foldl'
(
\
acc
groups
->
relatedComponents
graphs
=
foldl'
(
\
mem
groups
->
if
(
null
acc
)
if
(
null
mem
)
then
acc
++
[
groups
]
then
mem
++
[
groups
]
else
else
let
related
=
filter
(
\
groups'
->
(
not
.
null
)
$
intersect
groups
groups'
)
mem
let
acc'
=
partition
(
\
groups'
->
disjoint
(
Set
.
fromList
groups'
)
(
Set
.
fromList
groups
))
acc
in
if
(
null
related
)
in
(
fst
acc'
)
++
[
nub
$
concat
$
(
snd
acc'
)
++
[
groups
]])
[]
graph
then
mem
++
[
groups
]
else
(
mem
\\
related
)
++
[
union
groups
(
nub
$
concat
related
)]
)
[]
graphs
traceSynchronyEnd
::
Phylo
->
Phylo
traceSynchronyEnd
::
Phylo
->
Phylo
traceSynchronyEnd
phylo
=
traceSynchronyEnd
phylo
=
trace
(
"
\n
"
<>
"-- | End
of synchronic clustering for
level "
<>
show
(
getLastLevel
phylo
)
trace
(
"
\n
"
<>
"-- | End
synchronic clustering at
level "
<>
show
(
getLastLevel
phylo
)
<>
" with "
<>
show
(
length
$
getGroupsFromLevel
(
getLastLevel
phylo
)
phylo
)
<>
" groups"
<>
" with "
<>
show
(
length
$
getGroupsFromLevel
(
getLastLevel
phylo
)
phylo
)
<>
" groups"
<>
" and "
<>
show
(
length
$
nub
$
map
_phylo_groupBranchId
$
getGroupsFromLevel
(
getLastLevel
phylo
)
phylo
)
<>
" branches"
<>
" and "
<>
show
(
length
$
nub
$
map
_phylo_groupBranchId
$
getGroupsFromLevel
(
getLastLevel
phylo
)
phylo
)
<>
" branches"
<>
"
\n
"
)
phylo
<>
"
\n
"
)
phylo
traceSynchronyStart
::
Phylo
->
Phylo
traceSynchronyStart
::
Phylo
->
Phylo
traceSynchronyStart
phylo
=
traceSynchronyStart
phylo
=
trace
(
"
\n
"
<>
"-- | Start
of synchronic clustering for
level "
<>
show
(
getLastLevel
phylo
)
trace
(
"
\n
"
<>
"-- | Start
synchronic clustering at
level "
<>
show
(
getLastLevel
phylo
)
<>
" with "
<>
show
(
length
$
getGroupsFromLevel
(
getLastLevel
phylo
)
phylo
)
<>
" groups"
<>
" with "
<>
show
(
length
$
getGroupsFromLevel
(
getLastLevel
phylo
)
phylo
)
<>
" groups"
<>
" and "
<>
show
(
length
$
nub
$
map
_phylo_groupBranchId
$
getGroupsFromLevel
(
getLastLevel
phylo
)
phylo
)
<>
" branches"
<>
" and "
<>
show
(
length
$
nub
$
map
_phylo_groupBranchId
$
getGroupsFromLevel
(
getLastLevel
phylo
)
phylo
)
<>
" branches"
<>
"
\n
"
)
phylo
<>
"
\n
"
)
phylo
...
@@ -362,6 +389,15 @@ getThresholdStep proxi = case proxi of
...
@@ -362,6 +389,15 @@ getThresholdStep proxi = case proxi of
Hamming
->
undefined
Hamming
->
undefined
traceBranchMatching
::
Proximity
->
Double
->
[
PhyloGroup
]
->
[
PhyloGroup
]
traceBranchMatching
proxi
thr
groups
=
case
proxi
of
WeightedLogJaccard
_
i
s
->
trace
(
roundToStr
2
thr
<>
" "
<>
foldl
(
\
acc
_
->
acc
<>
"."
)
"."
[(
10
*
i
),(
10
*
i
+
10
*
s
)
..
(
10
*
thr
)]
<>
" "
<>
show
(
length
groups
)
<>
" groups"
)
groups
Hamming
->
undefined
----------------
----------------
-- | Branch | --
-- | Branch | --
----------------
----------------
...
@@ -420,5 +456,10 @@ traceMatchLimit branches =
...
@@ -420,5 +456,10 @@ traceMatchLimit branches =
traceMatchEnd
::
[
PhyloGroup
]
->
[
PhyloGroup
]
traceMatchEnd
::
[
PhyloGroup
]
->
[
PhyloGroup
]
traceMatchEnd
groups
=
traceMatchEnd
groups
=
trace
(
"
\n
"
<>
"-- | End
of
temporal matching with "
<>
show
(
length
$
nub
$
map
(
\
g
->
g
^.
phylo_groupBranchId
)
groups
)
trace
(
"
\n
"
<>
"-- | End temporal matching with "
<>
show
(
length
$
nub
$
map
(
\
g
->
g
^.
phylo_groupBranchId
)
groups
)
<>
" branches and "
<>
show
(
length
groups
)
<>
" groups"
<>
"
\n
"
)
groups
<>
" branches and "
<>
show
(
length
groups
)
<>
" groups"
<>
"
\n
"
)
groups
traceTemporalMatching
::
[
PhyloGroup
]
->
[
PhyloGroup
]
traceTemporalMatching
groups
=
trace
(
"
\n
"
<>
"-- | Start temporal matching for "
<>
show
(
length
groups
)
<>
" groups"
<>
"
\n
"
)
groups
\ No newline at end of file
src/Gargantext/Viz/Phylo/SynchronicClustering.hs
View file @
56636731
...
@@ -24,7 +24,7 @@ import Data.List ((++), null, intersect, nub, concat, sort)
...
@@ -24,7 +24,7 @@ import Data.List ((++), null, intersect, nub, concat, sort)
import
Data.Map
(
Map
,
fromList
,
fromListWith
,
foldlWithKey
,
(
!
),
insert
,
empty
,
restrictKeys
,
elems
,
mapWithKey
,
member
)
import
Data.Map
(
Map
,
fromList
,
fromListWith
,
foldlWithKey
,
(
!
),
insert
,
empty
,
restrictKeys
,
elems
,
mapWithKey
,
member
)
import
Control.Lens
hiding
(
Level
)
import
Control.Lens
hiding
(
Level
)
-- import Debug.Trace (trace
)
import
Control.Parallel.Strategies
(
parList
,
rdeepseq
,
using
)
-------------------------
-------------------------
...
@@ -92,31 +92,34 @@ toPairs :: [PhyloGroup] -> [(PhyloGroup,PhyloGroup)]
...
@@ -92,31 +92,34 @@ toPairs :: [PhyloGroup] -> [(PhyloGroup,PhyloGroup)]
toPairs
groups
=
filter
(
\
(
g
,
g'
)
->
(
not
.
null
)
$
intersect
(
g
^.
phylo_groupNgrams
)
(
g'
^.
phylo_groupNgrams
))
toPairs
groups
=
filter
(
\
(
g
,
g'
)
->
(
not
.
null
)
$
intersect
(
g
^.
phylo_groupNgrams
)
(
g'
^.
phylo_groupNgrams
))
$
listToCombi'
groups
$
listToCombi'
groups
groupsToEdges
::
Proximity
->
Double
->
Double
->
[
PhyloGroup
]
->
[((
PhyloGroup
,
PhyloGroup
),
Double
)]
groupsToEdges
::
Proximity
->
Double
->
Double
->
Double
->
[
PhyloGroup
]
->
[((
PhyloGroup
,
PhyloGroup
),
Double
)]
groupsToEdges
prox
thr
docs
groups
=
groupsToEdges
prox
thr
sens
docs
groups
=
case
prox
of
case
prox
of
WeightedLogJaccard
sens
_
_
->
filter
(
\
(
_
,
w
)
->
w
>=
thr
)
WeightedLogJaccard
_
_
_
->
filter
(
\
(
_
,
w
)
->
w
>=
thr
)
$
map
(
\
(
g
,
g'
)
->
((
g
,
g'
),
weightedLogJaccard
sens
docs
(
g
^.
phylo_groupCooc
)
(
g'
^.
phylo_groupCooc
)
(
g
^.
phylo_groupNgrams
)
(
g'
^.
phylo_groupNgrams
)))
$
map
(
\
(
g
,
g'
)
->
((
g
,
g'
),
weightedLogJaccard
sens
docs
(
g
^.
phylo_groupCooc
)
(
g'
^.
phylo_groupCooc
)
(
g
^.
phylo_groupNgrams
)
(
g'
^.
phylo_groupNgrams
)))
$
toPairs
groups
$
toPairs
groups
_
->
undefined
_
->
undefined
toRelatedComponents
::
[
PhyloGroup
]
->
[((
PhyloGroup
,
PhyloGroup
),
Double
)]
->
[[
PhyloGroup
]]
toRelatedComponents
::
[
PhyloGroup
]
->
[((
PhyloGroup
,
PhyloGroup
),
Double
)]
->
[[
PhyloGroup
]]
toRelatedComponents
nodes
edges
=
relatedComponents
$
((
map
(
\
((
g
,
g'
),
_
)
->
[
g
,
g'
])
edges
)
++
(
map
(
\
g
->
[
g
])
nodes
))
toRelatedComponents
nodes
edges
=
let
ref
=
fromList
$
map
(
\
g
->
(
getGroupId
g
,
g
))
nodes
clusters
=
relatedComponents
$
((
map
(
\
((
g
,
g'
),
_
)
->
[
getGroupId
g
,
getGroupId
g'
])
edges
)
++
(
map
(
\
g
->
[
getGroupId
g
])
nodes
))
in
map
(
\
cluster
->
map
(
\
gId
->
ref
!
gId
)
cluster
)
clusters
toParentId
::
PhyloGroup
->
PhyloGroupId
toParentId
::
PhyloGroup
->
PhyloGroupId
toParentId
child
=
((
child
^.
phylo_groupPeriod
,
child
^.
phylo_groupLevel
+
1
),
child
^.
phylo_groupIndex
)
toParentId
child
=
((
child
^.
phylo_groupPeriod
,
child
^.
phylo_groupLevel
+
1
),
child
^.
phylo_groupIndex
)
reduceBranch
::
Proximity
->
Double
->
Map
Date
Double
->
[
PhyloGroup
]
->
[
PhyloGroup
]
reduceBranch
::
Proximity
->
Double
->
Double
->
Map
Date
Double
->
[
PhyloGroup
]
->
[
PhyloGroup
]
reduceBranch
prox
thr
docs
branch
=
reduceBranch
prox
thr
sens
docs
branch
=
-- | 1) reduce a branch as a set of periods & groups
-- | 1) reduce a branch as a set of periods & groups
let
periods
=
fromListWith
(
++
)
let
periods
=
fromListWith
(
++
)
$
map
(
\
g
->
(
g
^.
phylo_groupPeriod
,[
g
]))
branch
$
map
(
\
g
->
(
g
^.
phylo_groupPeriod
,[
g
]))
branch
in
(
concat
.
concat
.
elems
)
in
(
concat
.
concat
.
elems
)
$
mapWithKey
(
\
prd
groups
->
$
mapWithKey
(
\
prd
groups
->
-- | 2) for each period, transform the groups as a proximity graph filtered by a threshold
-- | 2) for each period, transform the groups as a proximity graph filtered by a threshold
let
edges
=
groupsToEdges
prox
thr
((
sum
.
elems
)
$
restrictKeys
docs
$
periodsToYears
[
prd
])
groups
let
edges
=
groupsToEdges
prox
thr
sens
((
sum
.
elems
)
$
restrictKeys
docs
$
periodsToYears
[
prd
])
groups
in
map
(
\
comp
->
in
map
(
\
comp
->
-- | 4) add to each groups their futur level parent group
-- | 4) add to each groups their futur level parent group
let
parentId
=
toParentId
(
head'
"parentId"
comp
)
let
parentId
=
toParentId
(
head'
"parentId"
comp
)
...
@@ -128,9 +131,12 @@ reduceBranch prox thr docs branch =
...
@@ -128,9 +131,12 @@ reduceBranch prox thr docs branch =
synchronicClustering
::
Phylo
->
Phylo
synchronicClustering
::
Phylo
->
Phylo
synchronicClustering
phylo
=
synchronicClustering
phylo
=
case
(
phyloSynchrony
$
getConfig
phylo
)
of
case
(
phyloSynchrony
$
getConfig
phylo
)
of
ByProximityThreshold
thr
->
toNextLevel
phylo
ByProximityThreshold
t
s
->
$
concat
let
prox
=
phyloProximity
$
getConfig
phylo
$
map
(
\
branch
->
reduceBranch
(
phyloProximity
$
getConfig
phylo
)
thr
(
phylo
^.
phylo_timeDocs
)
branch
)
docs
=
phylo
^.
phylo_timeDocs
branches
=
map
(
\
branch
->
reduceBranch
prox
t
s
docs
branch
)
$
phyloToLastBranches
$
phyloToLastBranches
$
traceSynchronyStart
phylo
$
traceSynchronyStart
phylo
branches'
=
branches
`
using
`
parList
rdeepseq
in
toNextLevel
phylo
$
concat
branches'
ByProximityDistribution
->
undefined
ByProximityDistribution
->
undefined
\ No newline at end of file
src/Gargantext/Viz/Phylo/TemporalMatching.hs
View file @
56636731
...
@@ -15,14 +15,13 @@ Portability : POSIX
...
@@ -15,14 +15,13 @@ Portability : POSIX
module
Gargantext.Viz.Phylo.TemporalMatching
where
module
Gargantext.Viz.Phylo.TemporalMatching
where
import
Data.List
(
concat
,
splitAt
,
tail
,
sortOn
,
(
++
),
intersect
,
null
,
inits
,
find
,
groupBy
,
scanl
,
nub
,
union
,
elemIndex
,
(
!!
)
)
import
Data.List
(
concat
,
splitAt
,
tail
,
sortOn
,
(
++
),
intersect
,
null
,
inits
,
groupBy
,
scanl
,
nub
,
union
,
elemIndex
,
(
!!
),
dropWhile
)
import
Data.Map
(
Map
,
fromList
,
fromListWith
,
filterWithKey
,
elems
,
restrictKeys
,
unionWith
,
intersectionWith
,
findWithDefault
)
import
Data.Map
(
Map
,
fromList
,
elems
,
restrictKeys
,
unionWith
,
intersectionWith
,
findWithDefault
,
filterWithKey
)
import
Gargantext.Prelude
import
Gargantext.Prelude
import
Gargantext.Viz.AdaptativePhylo
import
Gargantext.Viz.AdaptativePhylo
import
Gargantext.Viz.Phylo.PhyloTools
import
Gargantext.Viz.Phylo.PhyloTools
import
Debug.Trace
(
trace
)
import
Prelude
(
logBase
)
import
Prelude
(
logBase
)
import
Control.Lens
hiding
(
Level
)
import
Control.Lens
hiding
(
Level
)
import
Control.Parallel.Strategies
(
parList
,
rdeepseq
,
using
)
import
Control.Parallel.Strategies
(
parList
,
rdeepseq
,
using
)
...
@@ -67,10 +66,10 @@ weightedLogJaccard sens docs cooc cooc' ngrams ngrams'
...
@@ -67,10 +66,10 @@ weightedLogJaccard sens docs cooc cooc' ngrams ngrams'
ngramsUnion
=
union
ngrams
ngrams'
ngramsUnion
=
union
ngrams
ngrams'
--------------------------------------
--------------------------------------
coocInter
::
[
Double
]
coocInter
::
[
Double
]
coocInter
=
elems
$
map
(
/
docs
)
$
intersectionWith
(
+
)
cooc
cooc'
coocInter
=
elems
$
map
(
/
docs
)
$
filterWithKey
(
\
(
k
,
k'
)
_
->
k
==
k'
)
$
intersectionWith
(
+
)
cooc
cooc'
--------------------------------------
--------------------------------------
coocUnion
::
[
Double
]
coocUnion
::
[
Double
]
coocUnion
=
elems
$
map
(
/
docs
)
$
unionWith
(
+
)
cooc
cooc'
coocUnion
=
elems
$
map
(
/
docs
)
$
filterWithKey
(
\
(
k
,
k'
)
_
->
k
==
k'
)
$
unionWith
(
+
)
cooc
cooc'
--------------------------------------
--------------------------------------
...
@@ -81,13 +80,6 @@ pickProximity proximity docs cooc cooc' ngrams ngrams' = case proximity of
...
@@ -81,13 +80,6 @@ pickProximity proximity docs cooc cooc' ngrams ngrams' = case proximity of
Hamming
->
undefined
Hamming
->
undefined
filterProximity
::
Proximity
->
Double
->
Double
->
Bool
filterProximity
proximity
thr
local
=
case
proximity
of
WeightedLogJaccard
_
_
_
->
local
>=
thr
Hamming
->
undefined
-- | To process the proximity between a current group and a pair of targets group
-- | To process the proximity between a current group and a pair of targets group
toProximity
::
Map
Date
Double
->
Proximity
->
PhyloGroup
->
PhyloGroup
->
PhyloGroup
->
Double
toProximity
::
Map
Date
Double
->
Proximity
->
PhyloGroup
->
PhyloGroup
->
PhyloGroup
->
Double
toProximity
docs
proximity
ego
target
target'
=
toProximity
docs
proximity
ego
target
target'
=
...
@@ -120,35 +112,43 @@ makePairs candidates periods = case null periods of
...
@@ -120,35 +112,43 @@ makePairs candidates periods = case null periods of
phyloGroupMatching
::
[[
PhyloGroup
]]
->
Filiation
->
Proximity
->
Map
Date
Double
->
Double
->
PhyloGroup
->
PhyloGroup
phyloGroupMatching
::
[[
PhyloGroup
]]
->
Filiation
->
Proximity
->
Map
Date
Double
->
Double
->
PhyloGroup
->
PhyloGroup
phyloGroupMatching
candidates
fil
proxi
docs
thr
ego
=
case
pointers
of
phyloGroupMatching
candidates
fil
proxi
docs
thr
ego
=
Nothing
->
addPointers
ego
fil
TemporalPointer
[]
case
null
(
getPeriodPointers
fil
ego
)
of
Just
pts
->
addPointers
ego
fil
TemporalPointer
False
->
filterPointers
fil
TemporalPointer
proxi
thr
ego
True
->
case
null
pointers
of
True
->
addPointers
ego
fil
TemporalPointer
[]
False
->
addPointers
ego
fil
TemporalPointer
$
head'
"phyloGroupMatching"
$
head'
"phyloGroupMatching"
-- | Keep only the best set of pointers grouped by proximity
-- | Keep only the best set of pointers grouped by proximity
$
groupBy
(
\
pt
pt'
->
snd
pt
==
snd
pt'
)
$
groupBy
(
\
pt
pt'
->
snd
pt
==
snd
pt'
)
$
reverse
$
sortOn
snd
pt
s
$
reverse
$
sortOn
snd
$
head'
"pointers"
pointer
s
-- | Find the first time frame where at leats one pointer satisfies the proximity threshold
-- | Find the first time frame where at leats one pointer satisfies the proximity threshold
where
where
pointers
::
Maybe
[
Pointer
]
pointers
::
[[
Pointer
]]
pointers
=
find
(
not
.
null
)
pointers
=
take
1
$
dropWhile
(
null
)
-- | for each time frame, process the proximity on relevant pairs of targeted groups
-- | for each time frame, process the proximity on relevant pairs of targeted groups
$
scanl
(
\
acc
groups
->
$
scanl
(
\
acc
groups
->
let
periods
=
nub
$
map
(
\
g'
->
g'
^.
phylo_groupPeriod
)
$
concat
groups
let
periods
=
nub
$
concat
$
map
(
\
gs
->
if
null
gs
then
[]
else
[
_phylo_groupPeriod
$
head'
"pointers"
gs
])
groups
pairs
=
makePairs
(
concat
groups
)
periods
pairs
=
makePairs
(
concat
groups
)
periods
in
acc
++
(
filter
(
\
(
_
,
proximity
)
->
filterProximity
proxi
thr
proximity
)
in
acc
++
(
filter
(
\
(
_
,
proximity
)
->
filterProximity
proxi
thr
proximity
)
$
concat
$
concat
$
map
(
\
(
c
,
c'
)
->
$
map
(
\
(
c
,
c'
)
->
-- | process the proximity between the current group and a pair of candidates
-- | process the proximity between the current group and a pair of candidates
let
proximity
=
toProximity
(
filterDocs
docs
periods
)
proxi
ego
c
c'
let
proximity
=
toProximity
(
filterDocs
docs
([
ego
^.
phylo_groupPeriod
]
++
periods
)
)
proxi
ego
c
c'
in
if
(
c
==
c'
)
in
if
(
c
==
c'
)
then
[(
getGroupId
c
,
proximity
)]
then
[(
getGroupId
c
,
proximity
)]
else
[(
getGroupId
c
,
proximity
),(
getGroupId
c'
,
proximity
)]
)
pairs
)
else
[(
getGroupId
c
,
proximity
),(
getGroupId
c'
,
proximity
)]
)
pairs
)
)
[]
)
[]
-- | groups from [[1900],[1900,1901],[1900,1901,1902],...]
-- | groups from [[1900],[1900,1901],[1900,1901,1902],...]
$
inits
candidates
$
inits
candidates
--------------------------------------
filterDocs
::
Map
Date
Double
->
[
PhyloPeriodId
]
->
Map
Date
Double
filterDocs
d
pds
=
restrictKeys
d
$
periodsToYears
pds
filterDocs
::
Map
Date
Double
->
[
PhyloPeriodId
]
->
Map
Date
Double
filterDocs
d
pds
=
restrictKeys
d
$
periodsToYears
pds
-----------------------------
-----------------------------
...
@@ -163,32 +163,36 @@ getNextPeriods fil max' pId pIds =
...
@@ -163,32 +163,36 @@ getNextPeriods fil max' pId pIds =
ToParents
->
take
max'
$
(
reverse
.
fst
)
$
splitAt
(
elemIndex'
pId
pIds
)
pIds
ToParents
->
take
max'
$
(
reverse
.
fst
)
$
splitAt
(
elemIndex'
pId
pIds
)
pIds
getCandidates
::
Filiation
->
PhyloGroup
->
[
PhyloPeriodId
]
->
[
PhyloGroup
]
->
[[
PhyloGroup
]]
getCandidates
::
Filiation
->
PhyloGroup
->
[
[
PhyloGroup
]
]
->
[[
PhyloGroup
]]
getCandidates
fil
ego
pIds
targets
=
getCandidates
fil
ego
targets
=
case
fil
of
case
fil
of
ToChilds
->
targets'
ToChilds
->
targets'
ToParents
->
reverse
targets'
ToParents
->
reverse
targets'
where
where
targets'
::
[[
PhyloGroup
]]
targets'
::
[[
PhyloGroup
]]
targets'
=
map
(
\
groups'
->
filter
(
\
g'
->
(
not
.
null
)
$
intersect
(
ego
^.
phylo_groupNgrams
)
(
g'
^.
phylo_groupNgrams
))
groups'
)
$
elems
targets'
=
$
filterWithKey
(
\
k
_
->
elem
k
pIds
)
map
(
\
groups'
->
$
fromListWith
(
++
)
filter
(
\
g'
->
(
not
.
null
)
$
intersect
(
ego
^.
phylo_groupNgrams
)
(
g'
^.
phylo_groupNgrams
)
$
sortOn
(
fst
.
fst
)
)
groups'
)
targets
$
map
(
\
g'
->
(
g'
^.
phylo_groupPeriod
,[
g'
]))
targets
phyloBranchMatching
::
Int
->
[
PhyloPeriodId
]
->
Proximity
->
Double
->
Map
Date
Double
->
[
PhyloGroup
]
->
[
PhyloGroup
]
processMatching
::
Int
->
[
PhyloPeriodId
]
->
Proximity
->
Double
->
Map
Date
Double
->
[
PhyloGroup
]
->
[
PhyloGroup
]
phyloBranchMatching
frame
periods
proximity
thr
docs
branch
=
traceBranchMatching
proximity
thr
processMatching
max'
periods
proximity
thr
docs
groups
=
$
matchByPeriods
ToParents
let
branche
=
map
(
\
group
->
$
groupByField
_phylo_groupPeriod
let
childs
=
getCandidates
ToChilds
group
$
matchByPeriods
ToChilds
(
getNextPeriods
ToChilds
max'
(
group
^.
phylo_groupPeriod
)
periods
)
groups
$
groupByField
_phylo_groupPeriod
branch
parents
=
getCandidates
ToParents
group
where
(
getNextPeriods
ToParents
max'
(
group
^.
phylo_groupPeriod
)
periods
)
groups
--------------------------------------
in
phyloGroupMatching
parents
ToParents
proximity
docs
thr
matchByPeriods
::
Filiation
->
Map
PhyloPeriodId
[
PhyloGroup
]
->
[
PhyloGroup
]
$
phyloGroupMatching
childs
ToChilds
proximity
docs
thr
group
matchByPeriods
fil
branch'
=
foldl'
(
\
acc
prd
->
)
groups
let
periods'
=
getNextPeriods
fil
frame
prd
periods
branche'
=
branche
`
using
`
parList
rdeepseq
candidates
=
map
(
\
prd'
->
findWithDefault
[]
prd'
branch'
)
periods'
in
branche'
docs'
=
filterDocs
docs
([
prd
]
++
periods'
)
egos
=
map
(
\
g
->
phyloGroupMatching
(
getCandidates
fil
g
candidates
)
fil
proximity
docs'
thr
g
)
$
findWithDefault
[]
prd
branch'
egos'
=
egos
`
using
`
parList
rdeepseq
in
acc
++
egos'
)
[]
periods
-----------------------
-----------------------
...
@@ -256,8 +260,7 @@ groupsToBranches groups =
...
@@ -256,8 +260,7 @@ groupsToBranches groups =
-- | update each group's branch id
-- | update each group's branch id
in
map
(
\
(
bId
,
ids
)
->
in
map
(
\
(
bId
,
ids
)
->
map
(
\
group
->
group
&
phylo_groupBranchId
%~
(
\
(
lvl
,
lst
)
->
(
lvl
,
lst
++
[
bId
])))
map
(
\
group
->
group
&
phylo_groupBranchId
%~
(
\
(
lvl
,
lst
)
->
(
lvl
,
lst
++
[
bId
])))
$
elems
$
restrictKeys
groups
(
Set
.
fromList
ids
)
$
elems
$
restrictKeys
groups
(
Set
.
fromList
ids
))
graph
)
graph
recursiveMatching
::
Proximity
->
Double
->
Int
->
[
PhyloPeriodId
]
->
Map
Date
Double
->
Double
->
[[
PhyloGroup
]]
->
[
PhyloGroup
]
recursiveMatching
::
Proximity
->
Double
->
Int
->
[
PhyloPeriodId
]
->
Map
Date
Double
->
Double
->
[[
PhyloGroup
]]
->
[
PhyloGroup
]
...
@@ -283,13 +286,10 @@ recursiveMatching proximity thr frame periods docs quality branches =
...
@@ -283,13 +286,10 @@ recursiveMatching proximity thr frame periods docs quality branches =
-- | 1) for each local branch process a temporal matching then find the resulting branches
-- | 1) for each local branch process a temporal matching then find the resulting branches
nextBranches
::
[[[
PhyloGroup
]]]
nextBranches
::
[[[
PhyloGroup
]]]
nextBranches
=
nextBranches
=
-- let next =
let
branches'
=
map
(
\
branch
->
phyloBranchMatching
frame
periods
proximity
thr
docs
branch
)
branches
map
(
\
branch
->
clusters
=
map
(
\
branch
->
groupsToBranches
$
fromList
$
map
(
\
group
->
(
getGroupId
group
,
group
))
branch
)
branches'
let
branch'
=
processMatching
frame
periods
proximity
thr
docs
branch
clusters'
=
clusters
`
using
`
parList
rdeepseq
in
groupsToBranches
$
fromList
$
map
(
\
group
->
(
getGroupId
group
,
group
))
branch'
in
clusters'
)
branches
-- next' = next `using` parList rdeepseq
-- in next
...
@@ -312,10 +312,10 @@ temporalMatching phylo = updatePhyloGroups 1 branches' phylo
...
@@ -312,10 +312,10 @@ temporalMatching phylo = updatePhyloGroups 1 branches' phylo
quality
=
toPhyloQuality
branches
quality
=
toPhyloQuality
branches
-- | 2) group into branches
-- | 2) group into branches
branches
::
[[
PhyloGroup
]]
branches
::
[[
PhyloGroup
]]
branches
=
groupsToBranches
$
fromList
$
map
(
\
group
->
(
getGroupId
group
,
group
))
branches
=
groupsToBranches
$
fromList
$
map
(
\
group
->
(
getGroupId
group
,
group
))
groups'
$
trace
(
"
\n
"
<>
"-- | Init temporal matching for "
<>
show
(
length
$
groups'
)
<>
" groups"
<>
"
\n
"
)
groups'
-- | 1) for each group process an initial temporal Matching
-- | 1) for each group process an initial temporal Matching
groups'
::
[
PhyloGroup
]
groups'
::
[
PhyloGroup
]
groups'
=
p
rocess
Matching
(
getTimeFrame
$
timeUnit
$
getConfig
phylo
)
(
getPeriodIds
phylo
)
groups'
=
p
hyloBranch
Matching
(
getTimeFrame
$
timeUnit
$
getConfig
phylo
)
(
getPeriodIds
phylo
)
(
phyloProximity
$
getConfig
phylo
)
(
getThresholdInit
$
phyloProximity
$
getConfig
phylo
)
(
phyloProximity
$
getConfig
phylo
)
(
getThresholdInit
$
phyloProximity
$
getConfig
phylo
)
(
phylo
^.
phylo_timeDocs
)
(
getGroupsFromLevel
1
phylo
)
(
phylo
^.
phylo_timeDocs
)
\ No newline at end of file
(
traceTemporalMatching
$
getGroupsFromLevel
1
phylo
)
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment