Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
199
Issues
199
List
Board
Labels
Milestones
Merge Requests
12
Merge Requests
12
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
f5393047
Commit
f5393047
authored
Nov 18, 2019
by
qlobbe
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fix the weightedlogjaccard
parent
f49465e4
Pipeline
#614
failed with stage
Changes
6
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
163 additions
and
149 deletions
+163
-149
AdaptativePhylo.hs
src/Gargantext/Viz/AdaptativePhylo.hs
+2
-2
PhyloExport.hs
src/Gargantext/Viz/Phylo/PhyloExport.hs
+6
-2
PhyloMaker.hs
src/Gargantext/Viz/Phylo/PhyloMaker.hs
+1
-1
PhyloTools.hs
src/Gargantext/Viz/Phylo/PhyloTools.hs
+2
-0
SynchronicClustering.hs
src/Gargantext/Viz/Phylo/SynchronicClustering.hs
+14
-23
TemporalMatching.hs
src/Gargantext/Viz/Phylo/TemporalMatching.hs
+138
-121
No files found.
src/Gargantext/Viz/AdaptativePhylo.hs
View file @
f5393047
...
...
@@ -133,8 +133,8 @@ defaultConfig =
,
phyloName
=
pack
"Default Phylo"
,
phyloLevel
=
2
,
phyloProximity
=
WeightedLogJaccard
10
0
0.1
,
phyloSynchrony
=
ByProximityThreshold
0.
1
0
SiblingBranches
MergeAllGroups
,
phyloQuality
=
Quality
0.1
1
,
phyloSynchrony
=
ByProximityThreshold
0.
2
0
SiblingBranches
MergeAllGroups
,
phyloQuality
=
Quality
10
1
,
timeUnit
=
Year
3
1
5
,
clique
=
Fis
1
5
,
exportLabel
=
[
BranchLabel
MostInclusive
2
,
GroupLabel
MostEmergentInclusive
2
]
...
...
src/Gargantext/Viz/Phylo/PhyloExport.hs
View file @
f5393047
...
...
@@ -171,11 +171,15 @@ exportToDot phylo export =
,
Ratio
FillRatio
,
Style
[
SItem
Filled
[]
],
Color
[
toWColor
White
]]
-- | home made attributes
<>
[(
toAttr
(
fromStrict
"nbDocs"
)
$
pack
$
show
(
sum
$
elems
$
phylo
^.
phylo_timeDocs
))
<>
[(
toAttr
(
fromStrict
"phyloFoundations"
)
$
pack
$
show
(
length
$
Vector
.
toList
$
getRoots
phylo
))
,(
toAttr
(
fromStrict
"phyloTerms"
)
$
pack
$
show
(
length
$
nub
$
concat
$
map
(
\
g
->
g
^.
phylo_groupNgrams
)
$
export
^.
export_groups
))
,(
toAttr
(
fromStrict
"phyloDocs"
)
$
pack
$
show
(
sum
$
elems
$
phylo
^.
phylo_timeDocs
))
,(
toAttr
(
fromStrict
"phyloBranches"
)
$
pack
$
show
(
length
$
export
^.
export_branches
))
,(
toAttr
(
fromStrict
"phyloGroups"
)
$
pack
$
show
(
length
$
export
^.
export_groups
))
,(
toAttr
(
fromStrict
"proxiName"
)
$
pack
$
show
(
getProximityName
$
phyloProximity
$
getConfig
phylo
))
,(
toAttr
(
fromStrict
"proxiInit"
)
$
pack
$
show
(
getProximityInit
$
phyloProximity
$
getConfig
phylo
))
,(
toAttr
(
fromStrict
"proxiStep"
)
$
pack
$
show
(
getProximityStep
$
phyloProximity
$
getConfig
phylo
))
,(
toAttr
(
fromStrict
"qua
Factor
"
)
$
pack
$
show
(
_qua_granularity
$
phyloQuality
$
getConfig
phylo
))
,(
toAttr
(
fromStrict
"qua
Granularity
"
)
$
pack
$
show
(
_qua_granularity
$
phyloQuality
$
getConfig
phylo
))
])
...
...
src/Gargantext/Viz/Phylo/PhyloMaker.hs
View file @
f5393047
...
...
@@ -91,7 +91,7 @@ cliqueToGroup fis thr pId lvl idx fdt coocs =
(
fis
^.
phyloClique_support
)
ngrams
(
ngramsToCooc
ngrams
coocs
)
(
1
,[
0
])
(
1
,[
0
])
-- | branchid (lvl,[path in the branching tree])
(
singleton
"thr"
[
thr
])
[]
[]
[]
[]
...
...
src/Gargantext/Viz/Phylo/PhyloTools.hs
View file @
f5393047
...
...
@@ -227,6 +227,8 @@ sumCooc cooc cooc' = unionWith (+) cooc cooc'
getTrace
::
Cooc
->
Double
getTrace
cooc
=
sum
$
elems
$
filterWithKey
(
\
(
k
,
k'
)
_
->
k
==
k'
)
cooc
coocToDiago
::
Cooc
->
Cooc
coocToDiago
cooc
=
filterWithKey
(
\
(
k
,
k'
)
_
->
k
==
k'
)
cooc
-- | To build the local cooc matrix of each phylogroup
ngramsToCooc
::
[
Int
]
->
[
Cooc
]
->
Cooc
...
...
src/Gargantext/Viz/Phylo/SynchronicClustering.hs
View file @
f5393047
...
...
@@ -18,7 +18,7 @@ module Gargantext.Viz.Phylo.SynchronicClustering where
import
Gargantext.Prelude
import
Gargantext.Viz.AdaptativePhylo
import
Gargantext.Viz.Phylo.PhyloTools
import
Gargantext.Viz.Phylo.TemporalMatching
(
weightedLogJaccard
)
import
Gargantext.Viz.Phylo.TemporalMatching
(
weightedLogJaccard
'
,
filterDiago
,
reduceDiagos
)
import
Gargantext.Viz.Phylo.PhyloExport
(
processDynamics
)
import
Data.List
((
++
),
null
,
intersect
,
nub
,
concat
,
sort
,
sortOn
,
init
,
all
,
group
,
maximum
,
groupBy
)
...
...
@@ -56,26 +56,16 @@ mergeBranchIds ids = (head' "mergeBranchIds" . sort . mostFreq) ids
groupsToBranches'
::
Map
PhyloGroupId
PhyloGroup
->
[[
PhyloGroup
]]
groupsToBranches'
groups
=
-- | run the related component algorithm
let
egos
=
groupBy
(
\
gs
gs'
->
(
fst
$
fst
$
head'
"egos"
gs
)
==
(
fst
$
fst
$
head'
"egos"
gs'
))
$
sortOn
(
\
gs
->
fst
$
fst
$
head'
"egos"
gs
)
$
map
(
\
g
->
[
getGroupId
g
]
++
(
map
fst
$
g
^.
phylo_groupPeriodParents
)
++
(
map
fst
$
g
^.
phylo_groupPeriodChilds
)
)
$
elems
groups
-- | first find the related components by inside each ego's period
graph'
=
map
relatedComponents
egos
-- | then run it for the all the periods
graph
=
relatedComponents
$
concat
(
graph'
`
using
`
parList
rdeepseq
)
let
egos
=
map
(
\
g
->
[
getGroupId
g
]
++
(
map
fst
$
g
^.
phylo_groupPeriodParents
)
++
(
map
fst
$
g
^.
phylo_groupPeriodChilds
)
)
$
elems
groups
graph
=
relatedComponents
egos
-- | update each group's branch id
in
map
(
\
ids
->
-- intervenir ici
let
groups'
=
elems
$
restrictKeys
groups
(
Set
.
fromList
ids
)
bId
=
mergeBranchIds
$
map
(
\
g
->
snd
$
g
^.
phylo_groupBranchId
)
groups'
in
map
(
\
g
->
g
&
phylo_groupBranchId
%~
(
\
(
lvl
,
_
)
->
(
lvl
+
1
,
bId
)))
groups'
)
graph
-- toBranchId :: PhyloGroup -> PhyloBranchId
-- toBranchId child = ((child ^. phylo_groupLevel) + 1, snd (child ^. phylo_groupBranchId))
getLastThr
::
[
PhyloGroup
]
->
Double
getLastThr
childs
=
maximum
$
concat
$
map
(
\
g
->
(
g
^.
phylo_groupMeta
)
!
"thr"
)
childs
...
...
@@ -157,8 +147,8 @@ toDiamonds groups = foldl' (\acc groups' ->
$
foldl'
(
\
acc
g
->
acc
++
(
map
(
\
(
id
,
_
)
->
(
id
,[
g
])
)
$
g
^.
phylo_groupPeriodParents
)
)
[]
groups
groupsToEdges
::
Proximity
->
Synchrony
->
Double
->
[
PhyloGroup
]
->
[((
PhyloGroup
,
PhyloGroup
),
Double
)]
groupsToEdges
prox
sync
docs
groups
=
groupsToEdges
::
Proximity
->
Synchrony
->
Double
->
Map
Int
Double
->
[
PhyloGroup
]
->
[((
PhyloGroup
,
PhyloGroup
),
Double
)]
groupsToEdges
prox
sync
nbDocs
diago
groups
=
case
sync
of
ByProximityThreshold
thr
sens
_
strat
->
filter
(
\
(
_
,
w
)
->
w
>=
thr
)
...
...
@@ -174,8 +164,7 @@ groupsToEdges prox sync docs groups =
toEdges
sens
edges
=
case
prox
of
WeightedLogJaccard
_
_
_
->
map
(
\
(
g
,
g'
)
->
((
g
,
g'
),
weightedLogJaccard
sens
docs
(
g
^.
phylo_groupCooc
)
(
g'
^.
phylo_groupCooc
)
((
g
,
g'
),
weightedLogJaccard'
sens
nbDocs
diago
(
g
^.
phylo_groupNgrams
)
(
g'
^.
phylo_groupNgrams
)))
edges
_
->
undefined
...
...
@@ -191,15 +180,16 @@ toParentId :: PhyloGroup -> PhyloGroupId
toParentId
child
=
((
child
^.
phylo_groupPeriod
,
child
^.
phylo_groupLevel
+
1
),
child
^.
phylo_groupIndex
)
reduceGroups
::
Proximity
->
Synchrony
->
Map
Date
Double
->
[
PhyloGroup
]
->
[
PhyloGroup
]
reduceGroups
prox
sync
docs
branch
=
reduceGroups
::
Proximity
->
Synchrony
->
Map
Date
Double
->
Map
Date
Cooc
->
[
PhyloGroup
]
->
[
PhyloGroup
]
reduceGroups
prox
sync
docs
diagos
branch
=
-- | 1) reduce a branch as a set of periods & groups
let
periods
=
fromListWith
(
++
)
$
map
(
\
g
->
(
g
^.
phylo_groupPeriod
,[
g
]))
branch
in
(
concat
.
concat
.
elems
)
$
mapWithKey
(
\
prd
groups
->
-- | 2) for each period, transform the groups as a proximity graph filtered by a threshold
let
edges
=
groupsToEdges
prox
sync
((
sum
.
elems
)
$
restrictKeys
docs
$
periodsToYears
[
prd
])
groups
let
diago
=
reduceDiagos
$
filterDiago
diagos
[
prd
]
edges
=
groupsToEdges
prox
sync
((
sum
.
elems
)
$
restrictKeys
docs
$
periodsToYears
[
prd
])
diago
groups
in
map
(
\
comp
->
-- | 4) add to each groups their futur level parent group
let
parentId
=
toParentId
(
head'
"parentId"
comp
)
...
...
@@ -233,7 +223,8 @@ synchronicClustering phylo =
let
prox
=
phyloProximity
$
getConfig
phylo
sync
=
phyloSynchrony
$
getConfig
phylo
docs
=
phylo
^.
phylo_timeDocs
newBranches
=
map
(
\
branch
->
reduceGroups
prox
sync
docs
branch
)
diagos
=
map
coocToDiago
$
phylo
^.
phylo_timeCooc
newBranches
=
map
(
\
branch
->
reduceGroups
prox
sync
docs
diagos
branch
)
$
map
processDynamics
$
adjustClustering
sync
(
getPhyloThresholdStep
phylo
)
$
phyloToLastBranches
...
...
src/Gargantext/Viz/Phylo/TemporalMatching.hs
View file @
f5393047
...
...
@@ -16,7 +16,7 @@ Portability : POSIX
module
Gargantext.Viz.Phylo.TemporalMatching
where
import
Data.List
(
concat
,
splitAt
,
tail
,
sortOn
,
(
++
),
intersect
,
null
,
inits
,
groupBy
,
scanl
,
nub
,
union
,
dropWhile
,
partition
,
or
)
import
Data.Map
(
Map
,
fromList
,
elems
,
restrictKeys
,
unionWith
,
intersectionWith
,
findWithDefault
,
keys
,
(
!
),
filterWithKey
,
singleton
)
import
Data.Map
(
Map
,
fromList
,
elems
,
restrictKeys
,
unionWith
,
findWithDefault
,
keys
,
(
!
),
singleton
,
empty
,
mapKeys
)
import
Gargantext.Prelude
import
Gargantext.Viz.AdaptativePhylo
...
...
@@ -35,30 +35,29 @@ import qualified Data.Set as Set
-------------------
-- |
Process the inverse sumLog
sumInvLog
::
Double
->
[
Double
]
->
Double
sumInvLog
s
l
=
foldl
(
\
mem
x
->
mem
+
(
1
/
log
(
s
+
x
)))
0
l
-- |
To compute a jaccard similarity between two lists
jaccard
::
[
Int
]
->
[
Int
]
->
Double
jaccard
inter'
union'
=
((
fromIntegral
.
length
)
$
inter'
)
/
((
fromIntegral
.
length
)
$
union'
)
-- | Process the sumLog
sum
Log
::
Double
->
[
Double
]
->
Double
sum
Log
s
l
=
foldl
(
\
mem
x
->
mem
+
log
(
s
+
x
))
0
l
-- | Process the
inverse
sumLog
sum
InvLog'
::
Double
->
Double
->
[
Double
]
->
Double
sum
InvLog'
s
nb
diago
=
foldl
(
\
mem
occ
->
mem
+
(
1
/
(
log
(
occ
+
s
)
/
log
(
nb
+
s
))))
0
diago
-- |
To compute a jaccard similarity between two lists
jaccard
::
[
Int
]
->
[
Int
]
->
Double
jaccard
inter'
union'
=
((
fromIntegral
.
length
)
$
inter'
)
/
((
fromIntegral
.
length
)
$
union'
)
-- |
Process the sumLog
sumLog'
::
Double
->
Double
->
[
Double
]
->
Double
sumLog'
s
nb
diago
=
foldl
(
\
mem
occ
->
mem
+
(
log
(
occ
+
s
)
/
log
(
nb
+
s
)))
0
diago
-- | To process a WeighedLogJaccard distance between to coocurency matrix
weightedLogJaccard
::
Double
->
Double
->
Cooc
->
Cooc
->
[
Int
]
->
[
Int
]
->
Double
weightedLogJaccard
sens
docs
cooc
cooc'
ngrams
ngrams'
weightedLogJaccard'
::
Double
->
Double
->
Map
Int
Double
->
[
Int
]
->
[
Int
]
->
Double
weightedLogJaccard'
sens
nbDocs
diago
ngrams
ngrams'
|
null
ngramsInter
=
0
|
ngramsInter
==
ngramsUnion
=
1
|
sens
==
0
=
jaccard
ngramsInter
ngramsUnion
|
sens
>
0
=
(
sumInvLog
sens
coocInter
)
/
(
sumInvLog
sens
cooc
Union
)
|
otherwise
=
(
sumLog
sens
coocInter
)
/
(
sumLog
sens
coocUnion
)
where
|
sens
>
0
=
(
sumInvLog
'
sens
nbDocs
diagoInter
)
/
(
sumInvLog'
sens
nbDocs
diago
Union
)
|
otherwise
=
(
sumLog
'
sens
nbDocs
diagoInter
)
/
(
sumLog'
sens
nbDocs
diagoUnion
)
where
--------------------------------------
ngramsInter
::
[
Int
]
ngramsInter
=
intersect
ngrams
ngrams'
...
...
@@ -66,85 +65,83 @@ weightedLogJaccard sens docs cooc cooc' ngrams ngrams'
ngramsUnion
::
[
Int
]
ngramsUnion
=
union
ngrams
ngrams'
--------------------------------------
coocInter
::
[
Double
]
coocInter
=
elems
$
map
(
/
docs
)
$
filterWithKey
(
\
(
k
,
k'
)
_
->
k
==
k'
)
$
intersectionWith
(
+
)
cooc
cooc'
-- coocInter = elems $ map (/docs) $ intersectionWith (+) cooc cooc'
--------------------------------------
coocUnion
::
[
Double
]
coocUnion
=
elems
$
map
(
/
docs
)
$
filterWithKey
(
\
(
k
,
k'
)
_
->
k
==
k'
)
$
unionWith
(
+
)
cooc
cooc'
--------------------------------------
-- | To choose a proximity function
pickProximity
::
Proximity
->
Double
->
Cooc
->
Cooc
->
[
Int
]
->
[
Int
]
->
Double
pickProximity
proximity
docs
cooc
cooc'
ngrams
ngrams'
=
case
proximity
of
WeightedLogJaccard
sens
_
_
->
weightedLogJaccard
sens
docs
cooc
cooc'
ngrams
ngrams'
Hamming
->
undefined
diagoInter
::
[
Double
]
diagoInter
=
elems
$
restrictKeys
diago
(
Set
.
fromList
ngramsInter
)
--------------------------------------
diagoUnion
::
[
Double
]
diagoUnion
=
elems
$
restrictKeys
diago
(
Set
.
fromList
ngramsUnion
)
--------------------------------------
-- | To process the proximity between a current group and a pair of targets group
toProximity
::
Map
Date
Double
->
Proximity
->
PhyloGroup
->
PhyloGroup
->
PhyloGroup
->
Double
toProximity
docs
proximity
ego
target
target'
=
let
docs'
=
sum
$
elems
docs
cooc
=
if
target
==
target'
then
(
target
^.
phylo_groupCooc
)
else
sumCooc
(
target
^.
phylo_groupCooc
)
(
target'
^.
phylo_groupCooc
)
ngrams
=
if
target
==
target'
then
(
target
^.
phylo_groupNgrams
)
else
union
(
target
^.
phylo_groupNgrams
)
(
target'
^.
phylo_groupNgrams
)
in
pickProximity
proximity
docs'
(
ego
^.
phylo_groupCooc
)
cooc
(
ego
^.
phylo_groupNgrams
)
ngrams
toProximity
::
Double
->
Map
Int
Double
->
Proximity
->
PhyloGroup
->
PhyloGroup
->
PhyloGroup
->
Double
toProximity
nbDocs
diago
proximity
ego
target
target'
=
case
proximity
of
WeightedLogJaccard
sens
_
_
->
let
targetsNgrams
=
if
target
==
target'
then
(
target
^.
phylo_groupNgrams
)
else
union
(
target
^.
phylo_groupNgrams
)
(
target'
^.
phylo_groupNgrams
)
in
weightedLogJaccard'
sens
nbDocs
diago
(
ego
^.
phylo_groupNgrams
)
targetsNgrams
Hamming
->
undefined
------------------------
-- | Local Matching | --
------------------------
to
LastPeriod
::
Filiation
->
[
PhyloPeriodId
]
->
PhyloPeriodId
to
LastPeriod
fil
periods
=
case
fil
of
ToParents
->
head'
"
to
LastPeriod"
(
sortOn
fst
periods
)
ToChilds
->
last'
"
to
LastPeriod"
(
sortOn
fst
periods
)
toLazyPairs
::
[
Pointer
]
->
Filiation
->
Double
->
Proximity
->
PhyloPeriodId
->
[(
PhyloGroup
,
PhyloGroup
)]
->
[(
PhyloGroup
,
PhyloGroup
)]
toLazyPairs
pointers
fil
thr
prox
prd
pairs
=
if
null
pointers
then
pairs
else
let
rest
=
filterPointers
prox
thr
pointe
rs
in
if
null
rest
then
let
prd'
=
toLastPeriod
fil
(
map
(
fst
.
fst
.
fst
)
p
ointers
)
in
if
prd'
==
prd
then
[]
else
filter
(
\
(
g
,
g'
)
->
case
fil
of
ToParents
->
((
fst
$
g
^.
phylo_groupPeriod
)
<
(
fst
prd'
))
||
((
fst
$
g'
^.
phylo_groupPeriod
)
<
(
fst
prd'
))
ToChilds
->
((
fst
$
g
^.
phylo_groupPeriod
)
>
(
fst
prd'
))
||
((
fst
$
g'
^.
phylo_groupPeriod
)
>
(
fst
prd'
)))
pairs
else
[]
find
LastPeriod
::
Filiation
->
[
PhyloPeriodId
]
->
PhyloPeriodId
find
LastPeriod
fil
periods
=
case
fil
of
ToParents
->
head'
"
find
LastPeriod"
(
sortOn
fst
periods
)
ToChilds
->
last'
"
find
LastPeriod"
(
sortOn
fst
periods
)
-- | To filter pairs of candidates related to old pointers periods
removeOldPointers
::
[
Pointer
]
->
Filiation
->
Double
->
Proximity
->
PhyloPeriodId
->
[(
PhyloGroup
,
PhyloGroup
)]
->
[(
PhyloGroup
,
PhyloGroup
)]
removeOldPointers
oldPointers
fil
thr
prox
prd
pairs
|
null
oldPointers
=
pai
rs
|
null
(
filterPointers
prox
thr
oldPointers
)
=
let
lastMatchedPrd
=
findLastPeriod
fil
(
map
(
fst
.
fst
.
fst
)
oldP
ointers
)
in
if
lastMatchedPrd
==
prd
then
[]
else
filter
(
\
(
g
,
g'
)
->
case
fil
of
ToParents
->
((
fst
$
g
^.
phylo_groupPeriod
)
<
(
fst
lastMatchedPrd
))
||
((
fst
$
g'
^.
phylo_groupPeriod
)
<
(
fst
lastMatchedPrd
))
ToChilds
->
((
fst
$
g
^.
phylo_groupPeriod
)
>
(
fst
lastMatchedPrd
))
||
((
fst
$
g'
^.
phylo_groupPeriod
)
>
(
fst
lastMatchedPrd
)))
pairs
|
otherwise
=
[]
-- | Find pairs of valuable candidates to be matched
makePairs'
::
PhyloGroup
->
[
PhyloGroup
]
->
[
PhyloPeriodId
]
->
[
Pointer
]
->
Filiation
->
Double
->
Proximity
->
Map
Date
Double
->
[(
PhyloGroup
,
PhyloGroup
)]
makePairs'
ego
candidates
periods
pointers
fil
thr
prox
doc
s
=
makePairs'
::
PhyloGroup
->
[
PhyloGroup
]
->
[
PhyloPeriodId
]
->
[
Pointer
]
->
Filiation
->
Double
->
Proximity
->
Map
Date
Double
->
Map
Date
Cooc
->
[(
PhyloGroup
,
PhyloGroup
)]
makePairs'
ego
candidates
periods
oldPointers
fil
thr
prox
docs
diago
s
=
case
null
periods
of
True
->
[]
False
->
toLazyPairs
p
ointers
fil
thr
prox
lastPrd
-- | at least on of the pair candidates should be from the last added period
False
->
removeOldPointers
oldP
ointers
fil
thr
prox
lastPrd
-- | at least on of the pair candidates should be from the last added period
$
filter
(
\
(
g
,
g'
)
->
((
g
^.
phylo_groupPeriod
)
==
lastPrd
)
||
((
g'
^.
phylo_groupPeriod
)
==
lastPrd
))
$
listToKeys
$
filter
(
\
g
->
(
g
^.
phylo_groupPeriod
==
lastPrd
)
||
((
toProximity
docs
prox
ego
ego
g
)
>=
thr
))
candidates
$
filter
(
\
g
->
let
nbDocs
=
sum
$
elems
$
(
filterDocs
docs
([
ego
^.
phylo_groupPeriod
,
g
^.
phylo_groupPeriod
]))
diago
=
reduceDiagos
$
filterDiago
diagos
([
ego
^.
phylo_groupPeriod
,
g
^.
phylo_groupPeriod
])
in
(
g
^.
phylo_groupPeriod
==
lastPrd
)
||
((
toProximity
nbDocs
diago
prox
ego
ego
g
)
>=
thr
))
candidates
where
lastPrd
::
PhyloPeriodId
lastPrd
=
to
LastPeriod
fil
periods
lastPrd
=
find
LastPeriod
fil
periods
filterPointers
::
Proximity
->
Double
->
[
Pointer
]
->
[
Pointer
]
filterPointers
proxi
thr
pts
=
filter
(
\
(
_
,
w
)
->
filterProximity
proxi
thr
w
)
pts
phyloGroupMatching
::
[[
PhyloGroup
]]
->
Filiation
->
Proximity
->
Map
Date
Double
->
Double
->
PhyloGroup
->
PhyloGroup
phyloGroupMatching
candidates
fil
proxi
docs
thr
ego
=
reduceDiagos
::
Map
Date
Cooc
->
Map
Int
Double
reduceDiagos
diagos
=
mapKeys
(
\
(
k
,
_
)
->
k
)
$
foldl
(
\
acc
diago
->
unionWith
(
+
)
acc
diago
)
empty
(
elems
diagos
)
phyloGroupMatching
::
[[
PhyloGroup
]]
->
Filiation
->
Proximity
->
Map
Date
Double
->
Map
Date
Cooc
->
Double
->
PhyloGroup
->
PhyloGroup
phyloGroupMatching
candidates
fil
proxi
docs
diagos
thr
ego
=
if
(
null
$
filterPointers
proxi
thr
$
getPeriodPointers
fil
ego
)
-- | let's find new pointers
then
if
null
nextPointers
...
...
@@ -155,8 +152,7 @@ phyloGroupMatching candidates fil proxi docs thr ego =
$
groupBy
(
\
pt
pt'
->
snd
pt
==
snd
pt'
)
$
reverse
$
sortOn
snd
$
head'
"pointers"
nextPointers
-- | Find the first time frame where at leats one pointer satisfies the proximity threshold
else
addPointers
ego
fil
TemporalPointer
$
filterPointers
proxi
thr
$
getPeriodPointers
fil
ego
else
ego
where
nextPointers
::
[[
Pointer
]]
nextPointers
=
take
1
...
...
@@ -164,13 +160,16 @@ phyloGroupMatching candidates fil proxi docs thr ego =
-- | for each time frame, process the proximity on relevant pairs of targeted groups
$
scanl
(
\
acc
groups
->
let
periods
=
nub
$
map
_phylo_groupPeriod
$
concat
groups
docs'
=
(
filterDocs
docs
([
ego
^.
phylo_groupPeriod
]
++
periods
))
pairs
=
makePairs'
ego
(
concat
groups
)
periods
(
getPeriodPointers
fil
ego
)
fil
thr
proxi
docs
nbdocs
=
sum
$
elems
$
(
filterDocs
docs
([
ego
^.
phylo_groupPeriod
]
++
periods
))
diago
=
reduceDiagos
$
filterDiago
diagos
([
ego
^.
phylo_groupPeriod
]
++
periods
)
-- | important resize nbdocs et diago dans le make pairs
pairs
=
makePairs'
ego
(
concat
groups
)
periods
(
getPeriodPointers
fil
ego
)
fil
thr
proxi
docs
diagos
in
acc
++
(
filterPointers
proxi
thr
$
concat
$
map
(
\
(
c
,
c'
)
->
-- | process the proximity between the current group and a pair of candidates
let
proximity
=
toProximity
docs'
proxi
ego
c
c'
let
proximity
=
toProximity
nbdocs
diago
proxi
ego
c
c'
in
if
(
c
==
c'
)
then
[(
getGroupId
c
,
proximity
)]
else
[(
getGroupId
c
,
proximity
),(
getGroupId
c'
,
proximity
)]
)
pairs
))
[]
...
...
@@ -180,6 +179,9 @@ phyloGroupMatching candidates fil proxi docs thr ego =
filterDocs
::
Map
Date
Double
->
[
PhyloPeriodId
]
->
Map
Date
Double
filterDocs
d
pds
=
restrictKeys
d
$
periodsToYears
pds
filterDiago
::
Map
Date
Cooc
->
[
PhyloPeriodId
]
->
Map
Date
Cooc
filterDiago
diago
pds
=
restrictKeys
diago
$
periodsToYears
pds
-----------------------------
-- | Matching Processing | --
...
...
@@ -200,26 +202,29 @@ getCandidates ego targets =
)
groups'
)
targets
phyloBranchMatching
::
Int
->
[
PhyloPeriodId
]
->
Proximity
->
Double
->
Map
Date
Double
->
[
PhyloGroup
]
->
[
PhyloGroup
]
phyloBranchMatching
frame
periods
proximity
thr
docs
branch
=
-- traceBranchMatching proximity thr
matchByPeriods
$
groupByField
_phylo_groupPeriod
branch
where
--------------------------------------
matchByPeriods
::
Map
PhyloPeriodId
[
PhyloGroup
]
->
[
PhyloGroup
]
matchByPeriods
branch'
=
foldl'
(
\
acc
prd
->
let
periodsPar
=
getNextPeriods
ToParents
frame
prd
periods
periodsChi
=
getNextPeriods
ToChilds
frame
prd
periods
candidatesPar
=
map
(
\
prd'
->
findWithDefault
[]
prd'
branch'
)
periodsPar
candidatesChi
=
map
(
\
prd'
->
findWithDefault
[]
prd'
branch'
)
periodsChi
docsPar
=
filterDocs
docs
([
prd
]
++
periodsPar
)
docsChi
=
filterDocs
docs
([
prd
]
++
periodsChi
)
egos
=
map
(
\
ego
->
phyloGroupMatching
(
getCandidates
ego
candidatesPar
)
ToParents
proximity
docsPar
thr
$
phyloGroupMatching
(
getCandidates
ego
candidatesChi
)
ToChilds
proximity
docsChi
thr
ego
)
$
findWithDefault
[]
prd
branch'
egos'
=
egos
`
using
`
parList
rdeepseq
in
acc
++
egos'
)
[]
periods
matchGroupsToGroups
::
Int
->
[
PhyloPeriodId
]
->
Proximity
->
Double
->
Map
Date
Double
->
Map
Date
Cooc
->
[
PhyloGroup
]
->
[
PhyloGroup
]
matchGroupsToGroups
frame
periods
proximity
thr
docs
coocs
groups
=
let
groups'
=
groupByField
_phylo_groupPeriod
groups
in
foldl'
(
\
acc
prd
->
let
-- | 1) find the parents/childs matching periods
periodsPar
=
getNextPeriods
ToParents
frame
prd
periods
periodsChi
=
getNextPeriods
ToChilds
frame
prd
periods
-- | 2) find the parents/childs matching candidates
candidatesPar
=
map
(
\
prd'
->
findWithDefault
[]
prd'
groups'
)
periodsPar
candidatesChi
=
map
(
\
prd'
->
findWithDefault
[]
prd'
groups'
)
periodsChi
-- | 3) find the parents/child number of docs by years
docsPar
=
filterDocs
docs
([
prd
]
++
periodsPar
)
docsChi
=
filterDocs
docs
([
prd
]
++
periodsChi
)
-- | 4) find the parents/child diago by years
diagoPar
=
filterDiago
(
map
coocToDiago
coocs
)
([
prd
]
++
periodsPar
)
diagoChi
=
filterDiago
(
map
coocToDiago
coocs
)
([
prd
]
++
periodsPar
)
-- | 5) match in parallel all the groups (egos) to their possible candidates
egos
=
map
(
\
ego
->
phyloGroupMatching
(
getCandidates
ego
candidatesPar
)
ToParents
proximity
docsPar
diagoPar
thr
$
phyloGroupMatching
(
getCandidates
ego
candidatesChi
)
ToChilds
proximity
docsChi
diagoChi
thr
ego
)
$
findWithDefault
[]
prd
groups'
egos'
=
egos
`
using
`
parList
rdeepseq
in
acc
++
egos'
)
[]
periods
-----------------------
...
...
@@ -270,6 +275,7 @@ groupsToBranches groups =
++
(
map
fst
$
group
^.
phylo_groupPeriodParents
)
++
(
map
fst
$
group
^.
phylo_groupPeriodChilds
)
)
$
elems
groups
-- | first find the related components by inside each ego's period
-- | a supprimer
graph'
=
map
relatedComponents
egos
-- | then run it for the all the periods
graph
=
zip
[
1
..
]
...
...
@@ -288,8 +294,13 @@ reduceFrequency frequency branches =
updateThr
::
Double
->
[[
PhyloGroup
]]
->
[[
PhyloGroup
]]
updateThr
thr
branches
=
map
(
\
b
->
map
(
\
g
->
g
&
phylo_groupMeta
.~
(
singleton
"thr"
[
thr
]))
b
)
branches
seqMatching
::
Proximity
->
Double
->
Map
Int
Double
->
Int
->
Double
->
Int
->
Map
Date
Double
->
[
PhyloPeriodId
]
->
[([
PhyloGroup
],
Bool
)]
->
([
PhyloGroup
],
Bool
)
->
[([
PhyloGroup
],
Bool
)]
->
[([
PhyloGroup
],
Bool
)]
seqMatching
proximity
beta
frequency
minBranch
egoThr
frame
docs
periods
done
ego
rest
=
-- | Sequentially break each branch of a phylo where
-- done = all the allready broken branches
-- ego = the current branch we want to break
-- rest = the branches we still have to break
breakBranches
::
Proximity
->
Double
->
Map
Int
Double
->
Int
->
Double
->
Int
->
Map
Date
Double
->
Map
Date
Cooc
->
[
PhyloPeriodId
]
->
[([
PhyloGroup
],
Bool
)]
->
([
PhyloGroup
],
Bool
)
->
[([
PhyloGroup
],
Bool
)]
->
[([
PhyloGroup
],
Bool
)]
breakBranches
proximity
beta
frequency
minBranch
thr
frame
docs
coocs
periods
done
ego
rest
=
-- | 1) keep or not the new division of ego
let
done'
=
done
++
(
if
snd
ego
then
(
if
((
null
(
fst
ego'
))
||
(
quality
>
quality'
))
...
...
@@ -310,8 +321,8 @@ seqMatching proximity beta frequency minBranch egoThr frame docs periods done eg
-- | 2) if there is no more branches in rest then return else continue
if
null
rest
then
done'
else
seqMatching
proximity
beta
frequency
minBranch
egoThr
frame
d
ocs
periods
done'
(
head'
"
seqMatching"
rest
)
(
tail'
"seqMatching
"
rest
)
else
breakBranches
proximity
beta
frequency
minBranch
thr
frame
docs
co
ocs
periods
done'
(
head'
"
breakBranches"
rest
)
(
tail'
"breakBranches
"
rest
)
where
--------------------------------------
quality
::
Double
...
...
@@ -320,11 +331,11 @@ seqMatching proximity beta frequency minBranch egoThr frame docs periods done eg
ego'
::
([[
PhyloGroup
]],[[
PhyloGroup
]])
ego'
=
let
branches
=
groupsToBranches
$
fromList
$
map
(
\
g
->
(
getGroupId
g
,
g
))
$
phyloBranchMatching
frame
periods
proximity
egoThr
d
ocs
(
fst
ego
)
$
matchGroupsToGroups
frame
periods
proximity
thr
docs
co
ocs
(
fst
ego
)
branches'
=
branches
`
using
`
parList
rdeepseq
in
partition
(
\
b
->
(
length
$
nub
$
map
_phylo_groupPeriod
b
)
>=
minBranch
)
$
if
(
length
branches'
>
1
)
then
updateThr
egoT
hr
branches'
then
updateThr
t
hr
branches'
else
branches'
--------------------------------------
quality'
::
Double
...
...
@@ -332,15 +343,17 @@ seqMatching proximity beta frequency minBranch egoThr frame docs periods done eg
((
map
fst
done
)
++
(
fst
ego'
)
++
(
snd
ego'
)
++
(
map
fst
rest
))
recursiveMatching'
::
Proximity
->
Double
->
Int
->
Map
Int
Double
->
Double
->
Int
->
[
PhyloPeriodId
]
->
Map
Date
Double
->
[([
PhyloGroup
],
Bool
)]
->
[([
PhyloGroup
],
Bool
)]
recursiveMatching'
proximity
beta
minBranch
frequency
egoThr
frame
periods
docs
branches
=
if
(
egoThr
>=
1
)
||
((
not
.
or
)
$
map
snd
branches
)
seaLevelMatching
::
Proximity
->
Double
->
Int
->
Map
Int
Double
->
Double
->
Int
->
[
PhyloPeriodId
]
->
Map
Date
Double
->
Map
Date
Cooc
->
[([
PhyloGroup
],
Bool
)]
->
[([
PhyloGroup
],
Bool
)]
seaLevelMatching
proximity
beta
minBranch
frequency
thr
frame
periods
docs
coocs
branches
=
-- | if there is no branch to break or if sea level > 1 then end
if
(
thr
>=
1
)
||
((
not
.
or
)
$
map
snd
branches
)
then
branches
else
let
branches'
=
seqMatching
proximity
beta
frequency
minBranch
egoThr
frame
docs
periods
[]
(
head'
"recursiveMatching"
branches
)
(
tail'
"recursiveMatching"
branches
)
-- | break all the possible branches at the current sea level
let
branches'
=
breakBranches
proximity
beta
frequency
minBranch
thr
frame
docs
coocs
periods
[]
(
head'
"seaLevelMatching"
branches
)
(
tail'
"seaLevelMatching"
branches
)
frequency'
=
reduceFrequency
frequency
(
map
fst
branches'
)
in
recursiveMatching'
proximity
beta
minBranch
frequency'
(
egoThr
+
(
getThresholdStep
proximity
))
frame
periods
d
ocs
branches'
in
seaLevelMatching
proximity
beta
minBranch
frequency'
(
thr
+
(
getThresholdStep
proximity
))
frame
periods
docs
co
ocs
branches'
temporalMatching
::
Phylo
->
Phylo
...
...
@@ -348,23 +361,27 @@ temporalMatching phylo = updatePhyloGroups 1
(
fromList
$
map
(
\
g
->
(
getGroupId
g
,
g
))
$
traceMatchEnd
$
concat
branches
)
phylo
where
-- | 2)
init the recursiveMatching
-- | 2)
process the temporal matching by elevating sea level
branches
::
[[
PhyloGroup
]]
branches
=
map
fst
$
recursiveMatching'
(
phyloProximity
$
getConfig
phylo
)
(
_qua_granularity
$
phyloQuality
$
getConfig
phylo
)
(
_qua_minBranch
$
phyloQuality
$
getConfig
phylo
)
(
phylo
^.
phylo_termFreq
)
(
getThresholdInit
$
phyloProximity
$
getConfig
phylo
)
(
getTimeFrame
$
timeUnit
$
getConfig
phylo
)
(
getPeriodIds
phylo
)
(
phylo
^.
phylo_timeDocs
)
groups
$
seaLevelMatching
(
phyloProximity
$
getConfig
phylo
)
(
_qua_granularity
$
phyloQuality
$
getConfig
phylo
)
(
_qua_minBranch
$
phyloQuality
$
getConfig
phylo
)
(
phylo
^.
phylo_termFreq
)
(
getThresholdInit
$
phyloProximity
$
getConfig
phylo
)
(
getTimeFrame
$
timeUnit
$
getConfig
phylo
)
(
getPeriodIds
phylo
)
(
phylo
^.
phylo_timeDocs
)
(
phylo
^.
phylo_timeCooc
)
groups
-- | 1) for each group process an initial temporal Matching
-- | here we suppose that all the groups of level 1 are part of the same big branch
groups
::
[([
PhyloGroup
],
Bool
)]
groups
=
map
(
\
b
->
(
b
,(
length
$
nub
$
map
_phylo_groupPeriod
b
)
>=
(
_qua_minBranch
$
phyloQuality
$
getConfig
phylo
)))
$
groupsToBranches
$
fromList
$
map
(
\
g
->
(
getGroupId
g
,
g
))
$
phyloBranchMatching
(
getTimeFrame
$
timeUnit
$
getConfig
phylo
)
(
getPeriodIds
phylo
)
(
phyloProximity
$
getConfig
phylo
)
(
getThresholdInit
$
phyloProximity
$
getConfig
phylo
)
(
phylo
^.
phylo_timeDocs
)
(
traceTemporalMatching
$
getGroupsFromLevel
1
phylo
)
\ No newline at end of file
$
matchGroupsToGroups
(
getTimeFrame
$
timeUnit
$
getConfig
phylo
)
(
getPeriodIds
phylo
)
(
phyloProximity
$
getConfig
phylo
)
(
getThresholdInit
$
phyloProximity
$
getConfig
phylo
)
(
phylo
^.
phylo_timeDocs
)
(
phylo
^.
phylo_timeCooc
)
(
traceTemporalMatching
$
getGroupsFromLevel
1
phylo
)
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment