Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Grégoire Locqueville
haskell-gargantext
Commits
f5393047
Commit
f5393047
authored
Nov 18, 2019
by
qlobbe
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fix the weightedlogjaccard
parent
f49465e4
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
163 additions
and
149 deletions
+163
-149
AdaptativePhylo.hs
src/Gargantext/Viz/AdaptativePhylo.hs
+2
-2
PhyloExport.hs
src/Gargantext/Viz/Phylo/PhyloExport.hs
+6
-2
PhyloMaker.hs
src/Gargantext/Viz/Phylo/PhyloMaker.hs
+1
-1
PhyloTools.hs
src/Gargantext/Viz/Phylo/PhyloTools.hs
+2
-0
SynchronicClustering.hs
src/Gargantext/Viz/Phylo/SynchronicClustering.hs
+14
-23
TemporalMatching.hs
src/Gargantext/Viz/Phylo/TemporalMatching.hs
+138
-121
No files found.
src/Gargantext/Viz/AdaptativePhylo.hs
View file @
f5393047
...
...
@@ -133,8 +133,8 @@ defaultConfig =
,
phyloName
=
pack
"Default Phylo"
,
phyloLevel
=
2
,
phyloProximity
=
WeightedLogJaccard
10
0
0.1
,
phyloSynchrony
=
ByProximityThreshold
0.
1
0
SiblingBranches
MergeAllGroups
,
phyloQuality
=
Quality
0.1
1
,
phyloSynchrony
=
ByProximityThreshold
0.
2
0
SiblingBranches
MergeAllGroups
,
phyloQuality
=
Quality
10
1
,
timeUnit
=
Year
3
1
5
,
clique
=
Fis
1
5
,
exportLabel
=
[
BranchLabel
MostInclusive
2
,
GroupLabel
MostEmergentInclusive
2
]
...
...
src/Gargantext/Viz/Phylo/PhyloExport.hs
View file @
f5393047
...
...
@@ -171,11 +171,15 @@ exportToDot phylo export =
,
Ratio
FillRatio
,
Style
[
SItem
Filled
[]
],
Color
[
toWColor
White
]]
-- | home made attributes
<>
[(
toAttr
(
fromStrict
"nbDocs"
)
$
pack
$
show
(
sum
$
elems
$
phylo
^.
phylo_timeDocs
))
<>
[(
toAttr
(
fromStrict
"phyloFoundations"
)
$
pack
$
show
(
length
$
Vector
.
toList
$
getRoots
phylo
))
,(
toAttr
(
fromStrict
"phyloTerms"
)
$
pack
$
show
(
length
$
nub
$
concat
$
map
(
\
g
->
g
^.
phylo_groupNgrams
)
$
export
^.
export_groups
))
,(
toAttr
(
fromStrict
"phyloDocs"
)
$
pack
$
show
(
sum
$
elems
$
phylo
^.
phylo_timeDocs
))
,(
toAttr
(
fromStrict
"phyloBranches"
)
$
pack
$
show
(
length
$
export
^.
export_branches
))
,(
toAttr
(
fromStrict
"phyloGroups"
)
$
pack
$
show
(
length
$
export
^.
export_groups
))
,(
toAttr
(
fromStrict
"proxiName"
)
$
pack
$
show
(
getProximityName
$
phyloProximity
$
getConfig
phylo
))
,(
toAttr
(
fromStrict
"proxiInit"
)
$
pack
$
show
(
getProximityInit
$
phyloProximity
$
getConfig
phylo
))
,(
toAttr
(
fromStrict
"proxiStep"
)
$
pack
$
show
(
getProximityStep
$
phyloProximity
$
getConfig
phylo
))
,(
toAttr
(
fromStrict
"qua
Factor
"
)
$
pack
$
show
(
_qua_granularity
$
phyloQuality
$
getConfig
phylo
))
,(
toAttr
(
fromStrict
"qua
Granularity
"
)
$
pack
$
show
(
_qua_granularity
$
phyloQuality
$
getConfig
phylo
))
])
...
...
src/Gargantext/Viz/Phylo/PhyloMaker.hs
View file @
f5393047
...
...
@@ -91,7 +91,7 @@ cliqueToGroup fis thr pId lvl idx fdt coocs =
(
fis
^.
phyloClique_support
)
ngrams
(
ngramsToCooc
ngrams
coocs
)
(
1
,[
0
])
(
1
,[
0
])
-- | branchid (lvl,[path in the branching tree])
(
singleton
"thr"
[
thr
])
[]
[]
[]
[]
...
...
src/Gargantext/Viz/Phylo/PhyloTools.hs
View file @
f5393047
...
...
@@ -227,6 +227,8 @@ sumCooc cooc cooc' = unionWith (+) cooc cooc'
getTrace
::
Cooc
->
Double
getTrace
cooc
=
sum
$
elems
$
filterWithKey
(
\
(
k
,
k'
)
_
->
k
==
k'
)
cooc
coocToDiago
::
Cooc
->
Cooc
coocToDiago
cooc
=
filterWithKey
(
\
(
k
,
k'
)
_
->
k
==
k'
)
cooc
-- | To build the local cooc matrix of each phylogroup
ngramsToCooc
::
[
Int
]
->
[
Cooc
]
->
Cooc
...
...
src/Gargantext/Viz/Phylo/SynchronicClustering.hs
View file @
f5393047
...
...
@@ -18,7 +18,7 @@ module Gargantext.Viz.Phylo.SynchronicClustering where
import
Gargantext.Prelude
import
Gargantext.Viz.AdaptativePhylo
import
Gargantext.Viz.Phylo.PhyloTools
import
Gargantext.Viz.Phylo.TemporalMatching
(
weightedLogJaccard
)
import
Gargantext.Viz.Phylo.TemporalMatching
(
weightedLogJaccard
'
,
filterDiago
,
reduceDiagos
)
import
Gargantext.Viz.Phylo.PhyloExport
(
processDynamics
)
import
Data.List
((
++
),
null
,
intersect
,
nub
,
concat
,
sort
,
sortOn
,
init
,
all
,
group
,
maximum
,
groupBy
)
...
...
@@ -56,26 +56,16 @@ mergeBranchIds ids = (head' "mergeBranchIds" . sort . mostFreq) ids
groupsToBranches'
::
Map
PhyloGroupId
PhyloGroup
->
[[
PhyloGroup
]]
groupsToBranches'
groups
=
-- | run the related component algorithm
let
egos
=
groupBy
(
\
gs
gs'
->
(
fst
$
fst
$
head'
"egos"
gs
)
==
(
fst
$
fst
$
head'
"egos"
gs'
))
$
sortOn
(
\
gs
->
fst
$
fst
$
head'
"egos"
gs
)
$
map
(
\
g
->
[
getGroupId
g
]
++
(
map
fst
$
g
^.
phylo_groupPeriodParents
)
++
(
map
fst
$
g
^.
phylo_groupPeriodChilds
)
)
$
elems
groups
-- | first find the related components by inside each ego's period
graph'
=
map
relatedComponents
egos
-- | then run it for the all the periods
graph
=
relatedComponents
$
concat
(
graph'
`
using
`
parList
rdeepseq
)
let
egos
=
map
(
\
g
->
[
getGroupId
g
]
++
(
map
fst
$
g
^.
phylo_groupPeriodParents
)
++
(
map
fst
$
g
^.
phylo_groupPeriodChilds
)
)
$
elems
groups
graph
=
relatedComponents
egos
-- | update each group's branch id
in
map
(
\
ids
->
-- intervenir ici
let
groups'
=
elems
$
restrictKeys
groups
(
Set
.
fromList
ids
)
bId
=
mergeBranchIds
$
map
(
\
g
->
snd
$
g
^.
phylo_groupBranchId
)
groups'
in
map
(
\
g
->
g
&
phylo_groupBranchId
%~
(
\
(
lvl
,
_
)
->
(
lvl
+
1
,
bId
)))
groups'
)
graph
-- toBranchId :: PhyloGroup -> PhyloBranchId
-- toBranchId child = ((child ^. phylo_groupLevel) + 1, snd (child ^. phylo_groupBranchId))
getLastThr
::
[
PhyloGroup
]
->
Double
getLastThr
childs
=
maximum
$
concat
$
map
(
\
g
->
(
g
^.
phylo_groupMeta
)
!
"thr"
)
childs
...
...
@@ -157,8 +147,8 @@ toDiamonds groups = foldl' (\acc groups' ->
$
foldl'
(
\
acc
g
->
acc
++
(
map
(
\
(
id
,
_
)
->
(
id
,[
g
])
)
$
g
^.
phylo_groupPeriodParents
)
)
[]
groups
groupsToEdges
::
Proximity
->
Synchrony
->
Double
->
[
PhyloGroup
]
->
[((
PhyloGroup
,
PhyloGroup
),
Double
)]
groupsToEdges
prox
sync
docs
groups
=
groupsToEdges
::
Proximity
->
Synchrony
->
Double
->
Map
Int
Double
->
[
PhyloGroup
]
->
[((
PhyloGroup
,
PhyloGroup
),
Double
)]
groupsToEdges
prox
sync
nbDocs
diago
groups
=
case
sync
of
ByProximityThreshold
thr
sens
_
strat
->
filter
(
\
(
_
,
w
)
->
w
>=
thr
)
...
...
@@ -174,8 +164,7 @@ groupsToEdges prox sync docs groups =
toEdges
sens
edges
=
case
prox
of
WeightedLogJaccard
_
_
_
->
map
(
\
(
g
,
g'
)
->
((
g
,
g'
),
weightedLogJaccard
sens
docs
(
g
^.
phylo_groupCooc
)
(
g'
^.
phylo_groupCooc
)
((
g
,
g'
),
weightedLogJaccard'
sens
nbDocs
diago
(
g
^.
phylo_groupNgrams
)
(
g'
^.
phylo_groupNgrams
)))
edges
_
->
undefined
...
...
@@ -191,15 +180,16 @@ toParentId :: PhyloGroup -> PhyloGroupId
toParentId
child
=
((
child
^.
phylo_groupPeriod
,
child
^.
phylo_groupLevel
+
1
),
child
^.
phylo_groupIndex
)
reduceGroups
::
Proximity
->
Synchrony
->
Map
Date
Double
->
[
PhyloGroup
]
->
[
PhyloGroup
]
reduceGroups
prox
sync
docs
branch
=
reduceGroups
::
Proximity
->
Synchrony
->
Map
Date
Double
->
Map
Date
Cooc
->
[
PhyloGroup
]
->
[
PhyloGroup
]
reduceGroups
prox
sync
docs
diagos
branch
=
-- | 1) reduce a branch as a set of periods & groups
let
periods
=
fromListWith
(
++
)
$
map
(
\
g
->
(
g
^.
phylo_groupPeriod
,[
g
]))
branch
in
(
concat
.
concat
.
elems
)
$
mapWithKey
(
\
prd
groups
->
-- | 2) for each period, transform the groups as a proximity graph filtered by a threshold
let
edges
=
groupsToEdges
prox
sync
((
sum
.
elems
)
$
restrictKeys
docs
$
periodsToYears
[
prd
])
groups
let
diago
=
reduceDiagos
$
filterDiago
diagos
[
prd
]
edges
=
groupsToEdges
prox
sync
((
sum
.
elems
)
$
restrictKeys
docs
$
periodsToYears
[
prd
])
diago
groups
in
map
(
\
comp
->
-- | 4) add to each groups their futur level parent group
let
parentId
=
toParentId
(
head'
"parentId"
comp
)
...
...
@@ -233,7 +223,8 @@ synchronicClustering phylo =
let
prox
=
phyloProximity
$
getConfig
phylo
sync
=
phyloSynchrony
$
getConfig
phylo
docs
=
phylo
^.
phylo_timeDocs
newBranches
=
map
(
\
branch
->
reduceGroups
prox
sync
docs
branch
)
diagos
=
map
coocToDiago
$
phylo
^.
phylo_timeCooc
newBranches
=
map
(
\
branch
->
reduceGroups
prox
sync
docs
diagos
branch
)
$
map
processDynamics
$
adjustClustering
sync
(
getPhyloThresholdStep
phylo
)
$
phyloToLastBranches
...
...
src/Gargantext/Viz/Phylo/TemporalMatching.hs
View file @
f5393047
...
...
@@ -16,7 +16,7 @@ Portability : POSIX
module
Gargantext.Viz.Phylo.TemporalMatching
where
import
Data.List
(
concat
,
splitAt
,
tail
,
sortOn
,
(
++
),
intersect
,
null
,
inits
,
groupBy
,
scanl
,
nub
,
union
,
dropWhile
,
partition
,
or
)
import
Data.Map
(
Map
,
fromList
,
elems
,
restrictKeys
,
unionWith
,
intersectionWith
,
findWithDefault
,
keys
,
(
!
),
filterWithKey
,
singleton
)
import
Data.Map
(
Map
,
fromList
,
elems
,
restrictKeys
,
unionWith
,
findWithDefault
,
keys
,
(
!
),
singleton
,
empty
,
mapKeys
)
import
Gargantext.Prelude
import
Gargantext.Viz.AdaptativePhylo
...
...
@@ -35,30 +35,29 @@ import qualified Data.Set as Set
-------------------
-- |
Process the inverse sumLog
sumInvLog
::
Double
->
[
Double
]
->
Double
sumInvLog
s
l
=
foldl
(
\
mem
x
->
mem
+
(
1
/
log
(
s
+
x
)))
0
l
-- |
To compute a jaccard similarity between two lists
jaccard
::
[
Int
]
->
[
Int
]
->
Double
jaccard
inter'
union'
=
((
fromIntegral
.
length
)
$
inter'
)
/
((
fromIntegral
.
length
)
$
union'
)
-- | Process the sumLog
sum
Log
::
Double
->
[
Double
]
->
Double
sum
Log
s
l
=
foldl
(
\
mem
x
->
mem
+
log
(
s
+
x
))
0
l
-- | Process the
inverse
sumLog
sum
InvLog'
::
Double
->
Double
->
[
Double
]
->
Double
sum
InvLog'
s
nb
diago
=
foldl
(
\
mem
occ
->
mem
+
(
1
/
(
log
(
occ
+
s
)
/
log
(
nb
+
s
))))
0
diago
-- |
To compute a jaccard similarity between two lists
jaccard
::
[
Int
]
->
[
Int
]
->
Double
jaccard
inter'
union'
=
((
fromIntegral
.
length
)
$
inter'
)
/
((
fromIntegral
.
length
)
$
union'
)
-- |
Process the sumLog
sumLog'
::
Double
->
Double
->
[
Double
]
->
Double
sumLog'
s
nb
diago
=
foldl
(
\
mem
occ
->
mem
+
(
log
(
occ
+
s
)
/
log
(
nb
+
s
)))
0
diago
-- | To process a WeighedLogJaccard distance between to coocurency matrix
weightedLogJaccard
::
Double
->
Double
->
Cooc
->
Cooc
->
[
Int
]
->
[
Int
]
->
Double
weightedLogJaccard
sens
docs
cooc
cooc'
ngrams
ngrams'
weightedLogJaccard'
::
Double
->
Double
->
Map
Int
Double
->
[
Int
]
->
[
Int
]
->
Double
weightedLogJaccard'
sens
nbDocs
diago
ngrams
ngrams'
|
null
ngramsInter
=
0
|
ngramsInter
==
ngramsUnion
=
1
|
sens
==
0
=
jaccard
ngramsInter
ngramsUnion
|
sens
>
0
=
(
sumInvLog
sens
coocInter
)
/
(
sumInvLog
sens
cooc
Union
)
|
otherwise
=
(
sumLog
sens
coocInter
)
/
(
sumLog
sens
coocUnion
)
where
|
sens
>
0
=
(
sumInvLog
'
sens
nbDocs
diagoInter
)
/
(
sumInvLog'
sens
nbDocs
diago
Union
)
|
otherwise
=
(
sumLog
'
sens
nbDocs
diagoInter
)
/
(
sumLog'
sens
nbDocs
diagoUnion
)
where
--------------------------------------
ngramsInter
::
[
Int
]
ngramsInter
=
intersect
ngrams
ngrams'
...
...
@@ -66,85 +65,83 @@ weightedLogJaccard sens docs cooc cooc' ngrams ngrams'
ngramsUnion
::
[
Int
]
ngramsUnion
=
union
ngrams
ngrams'
--------------------------------------
coocInter
::
[
Double
]
coocInter
=
elems
$
map
(
/
docs
)
$
filterWithKey
(
\
(
k
,
k'
)
_
->
k
==
k'
)
$
intersectionWith
(
+
)
cooc
cooc'
-- coocInter = elems $ map (/docs) $ intersectionWith (+) cooc cooc'
--------------------------------------
coocUnion
::
[
Double
]
coocUnion
=
elems
$
map
(
/
docs
)
$
filterWithKey
(
\
(
k
,
k'
)
_
->
k
==
k'
)
$
unionWith
(
+
)
cooc
cooc'
--------------------------------------
-- | To choose a proximity function
pickProximity
::
Proximity
->
Double
->
Cooc
->
Cooc
->
[
Int
]
->
[
Int
]
->
Double
pickProximity
proximity
docs
cooc
cooc'
ngrams
ngrams'
=
case
proximity
of
WeightedLogJaccard
sens
_
_
->
weightedLogJaccard
sens
docs
cooc
cooc'
ngrams
ngrams'
Hamming
->
undefined
diagoInter
::
[
Double
]
diagoInter
=
elems
$
restrictKeys
diago
(
Set
.
fromList
ngramsInter
)
--------------------------------------
diagoUnion
::
[
Double
]
diagoUnion
=
elems
$
restrictKeys
diago
(
Set
.
fromList
ngramsUnion
)
--------------------------------------
-- | To process the proximity between a current group and a pair of targets group
toProximity
::
Map
Date
Double
->
Proximity
->
PhyloGroup
->
PhyloGroup
->
PhyloGroup
->
Double
toProximity
docs
proximity
ego
target
target'
=
let
docs'
=
sum
$
elems
docs
cooc
=
if
target
==
target'
then
(
target
^.
phylo_groupCooc
)
else
sumCooc
(
target
^.
phylo_groupCooc
)
(
target'
^.
phylo_groupCooc
)
ngrams
=
if
target
==
target'
then
(
target
^.
phylo_groupNgrams
)
else
union
(
target
^.
phylo_groupNgrams
)
(
target'
^.
phylo_groupNgrams
)
in
pickProximity
proximity
docs'
(
ego
^.
phylo_groupCooc
)
cooc
(
ego
^.
phylo_groupNgrams
)
ngrams
toProximity
::
Double
->
Map
Int
Double
->
Proximity
->
PhyloGroup
->
PhyloGroup
->
PhyloGroup
->
Double
toProximity
nbDocs
diago
proximity
ego
target
target'
=
case
proximity
of
WeightedLogJaccard
sens
_
_
->
let
targetsNgrams
=
if
target
==
target'
then
(
target
^.
phylo_groupNgrams
)
else
union
(
target
^.
phylo_groupNgrams
)
(
target'
^.
phylo_groupNgrams
)
in
weightedLogJaccard'
sens
nbDocs
diago
(
ego
^.
phylo_groupNgrams
)
targetsNgrams
Hamming
->
undefined
------------------------
-- | Local Matching | --
------------------------
to
LastPeriod
::
Filiation
->
[
PhyloPeriodId
]
->
PhyloPeriodId
to
LastPeriod
fil
periods
=
case
fil
of
ToParents
->
head'
"
to
LastPeriod"
(
sortOn
fst
periods
)
ToChilds
->
last'
"
to
LastPeriod"
(
sortOn
fst
periods
)
toLazyPairs
::
[
Pointer
]
->
Filiation
->
Double
->
Proximity
->
PhyloPeriodId
->
[(
PhyloGroup
,
PhyloGroup
)]
->
[(
PhyloGroup
,
PhyloGroup
)]
toLazyPairs
pointers
fil
thr
prox
prd
pairs
=
if
null
pointers
then
pairs
else
let
rest
=
filterPointers
prox
thr
pointe
rs
in
if
null
rest
then
let
prd'
=
toLastPeriod
fil
(
map
(
fst
.
fst
.
fst
)
p
ointers
)
in
if
prd'
==
prd
then
[]
else
filter
(
\
(
g
,
g'
)
->
case
fil
of
ToParents
->
((
fst
$
g
^.
phylo_groupPeriod
)
<
(
fst
prd'
))
||
((
fst
$
g'
^.
phylo_groupPeriod
)
<
(
fst
prd'
))
ToChilds
->
((
fst
$
g
^.
phylo_groupPeriod
)
>
(
fst
prd'
))
||
((
fst
$
g'
^.
phylo_groupPeriod
)
>
(
fst
prd'
)))
pairs
else
[]
find
LastPeriod
::
Filiation
->
[
PhyloPeriodId
]
->
PhyloPeriodId
find
LastPeriod
fil
periods
=
case
fil
of
ToParents
->
head'
"
find
LastPeriod"
(
sortOn
fst
periods
)
ToChilds
->
last'
"
find
LastPeriod"
(
sortOn
fst
periods
)
-- | To filter pairs of candidates related to old pointers periods
removeOldPointers
::
[
Pointer
]
->
Filiation
->
Double
->
Proximity
->
PhyloPeriodId
->
[(
PhyloGroup
,
PhyloGroup
)]
->
[(
PhyloGroup
,
PhyloGroup
)]
removeOldPointers
oldPointers
fil
thr
prox
prd
pairs
|
null
oldPointers
=
pai
rs
|
null
(
filterPointers
prox
thr
oldPointers
)
=
let
lastMatchedPrd
=
findLastPeriod
fil
(
map
(
fst
.
fst
.
fst
)
oldP
ointers
)
in
if
lastMatchedPrd
==
prd
then
[]
else
filter
(
\
(
g
,
g'
)
->
case
fil
of
ToParents
->
((
fst
$
g
^.
phylo_groupPeriod
)
<
(
fst
lastMatchedPrd
))
||
((
fst
$
g'
^.
phylo_groupPeriod
)
<
(
fst
lastMatchedPrd
))
ToChilds
->
((
fst
$
g
^.
phylo_groupPeriod
)
>
(
fst
lastMatchedPrd
))
||
((
fst
$
g'
^.
phylo_groupPeriod
)
>
(
fst
lastMatchedPrd
)))
pairs
|
otherwise
=
[]
-- | Find pairs of valuable candidates to be matched
makePairs'
::
PhyloGroup
->
[
PhyloGroup
]
->
[
PhyloPeriodId
]
->
[
Pointer
]
->
Filiation
->
Double
->
Proximity
->
Map
Date
Double
->
[(
PhyloGroup
,
PhyloGroup
)]
makePairs'
ego
candidates
periods
pointers
fil
thr
prox
doc
s
=
makePairs'
::
PhyloGroup
->
[
PhyloGroup
]
->
[
PhyloPeriodId
]
->
[
Pointer
]
->
Filiation
->
Double
->
Proximity
->
Map
Date
Double
->
Map
Date
Cooc
->
[(
PhyloGroup
,
PhyloGroup
)]
makePairs'
ego
candidates
periods
oldPointers
fil
thr
prox
docs
diago
s
=
case
null
periods
of
True
->
[]
False
->
toLazyPairs
p
ointers
fil
thr
prox
lastPrd
-- | at least on of the pair candidates should be from the last added period
False
->
removeOldPointers
oldP
ointers
fil
thr
prox
lastPrd
-- | at least on of the pair candidates should be from the last added period
$
filter
(
\
(
g
,
g'
)
->
((
g
^.
phylo_groupPeriod
)
==
lastPrd
)
||
((
g'
^.
phylo_groupPeriod
)
==
lastPrd
))
$
listToKeys
$
filter
(
\
g
->
(
g
^.
phylo_groupPeriod
==
lastPrd
)
||
((
toProximity
docs
prox
ego
ego
g
)
>=
thr
))
candidates
$
filter
(
\
g
->
let
nbDocs
=
sum
$
elems
$
(
filterDocs
docs
([
ego
^.
phylo_groupPeriod
,
g
^.
phylo_groupPeriod
]))
diago
=
reduceDiagos
$
filterDiago
diagos
([
ego
^.
phylo_groupPeriod
,
g
^.
phylo_groupPeriod
])
in
(
g
^.
phylo_groupPeriod
==
lastPrd
)
||
((
toProximity
nbDocs
diago
prox
ego
ego
g
)
>=
thr
))
candidates
where
lastPrd
::
PhyloPeriodId
lastPrd
=
to
LastPeriod
fil
periods
lastPrd
=
find
LastPeriod
fil
periods
filterPointers
::
Proximity
->
Double
->
[
Pointer
]
->
[
Pointer
]
filterPointers
proxi
thr
pts
=
filter
(
\
(
_
,
w
)
->
filterProximity
proxi
thr
w
)
pts
phyloGroupMatching
::
[[
PhyloGroup
]]
->
Filiation
->
Proximity
->
Map
Date
Double
->
Double
->
PhyloGroup
->
PhyloGroup
phyloGroupMatching
candidates
fil
proxi
docs
thr
ego
=
reduceDiagos
::
Map
Date
Cooc
->
Map
Int
Double
reduceDiagos
diagos
=
mapKeys
(
\
(
k
,
_
)
->
k
)
$
foldl
(
\
acc
diago
->
unionWith
(
+
)
acc
diago
)
empty
(
elems
diagos
)
phyloGroupMatching
::
[[
PhyloGroup
]]
->
Filiation
->
Proximity
->
Map
Date
Double
->
Map
Date
Cooc
->
Double
->
PhyloGroup
->
PhyloGroup
phyloGroupMatching
candidates
fil
proxi
docs
diagos
thr
ego
=
if
(
null
$
filterPointers
proxi
thr
$
getPeriodPointers
fil
ego
)
-- | let's find new pointers
then
if
null
nextPointers
...
...
@@ -155,8 +152,7 @@ phyloGroupMatching candidates fil proxi docs thr ego =
$
groupBy
(
\
pt
pt'
->
snd
pt
==
snd
pt'
)
$
reverse
$
sortOn
snd
$
head'
"pointers"
nextPointers
-- | Find the first time frame where at leats one pointer satisfies the proximity threshold
else
addPointers
ego
fil
TemporalPointer
$
filterPointers
proxi
thr
$
getPeriodPointers
fil
ego
else
ego
where
nextPointers
::
[[
Pointer
]]
nextPointers
=
take
1
...
...
@@ -164,13 +160,16 @@ phyloGroupMatching candidates fil proxi docs thr ego =
-- | for each time frame, process the proximity on relevant pairs of targeted groups
$
scanl
(
\
acc
groups
->
let
periods
=
nub
$
map
_phylo_groupPeriod
$
concat
groups
docs'
=
(
filterDocs
docs
([
ego
^.
phylo_groupPeriod
]
++
periods
))
pairs
=
makePairs'
ego
(
concat
groups
)
periods
(
getPeriodPointers
fil
ego
)
fil
thr
proxi
docs
nbdocs
=
sum
$
elems
$
(
filterDocs
docs
([
ego
^.
phylo_groupPeriod
]
++
periods
))
diago
=
reduceDiagos
$
filterDiago
diagos
([
ego
^.
phylo_groupPeriod
]
++
periods
)
-- | important resize nbdocs et diago dans le make pairs
pairs
=
makePairs'
ego
(
concat
groups
)
periods
(
getPeriodPointers
fil
ego
)
fil
thr
proxi
docs
diagos
in
acc
++
(
filterPointers
proxi
thr
$
concat
$
map
(
\
(
c
,
c'
)
->
-- | process the proximity between the current group and a pair of candidates
let
proximity
=
toProximity
docs'
proxi
ego
c
c'
let
proximity
=
toProximity
nbdocs
diago
proxi
ego
c
c'
in
if
(
c
==
c'
)
then
[(
getGroupId
c
,
proximity
)]
else
[(
getGroupId
c
,
proximity
),(
getGroupId
c'
,
proximity
)]
)
pairs
))
[]
...
...
@@ -180,6 +179,9 @@ phyloGroupMatching candidates fil proxi docs thr ego =
filterDocs
::
Map
Date
Double
->
[
PhyloPeriodId
]
->
Map
Date
Double
filterDocs
d
pds
=
restrictKeys
d
$
periodsToYears
pds
filterDiago
::
Map
Date
Cooc
->
[
PhyloPeriodId
]
->
Map
Date
Cooc
filterDiago
diago
pds
=
restrictKeys
diago
$
periodsToYears
pds
-----------------------------
-- | Matching Processing | --
...
...
@@ -200,26 +202,29 @@ getCandidates ego targets =
)
groups'
)
targets
phyloBranchMatching
::
Int
->
[
PhyloPeriodId
]
->
Proximity
->
Double
->
Map
Date
Double
->
[
PhyloGroup
]
->
[
PhyloGroup
]
phyloBranchMatching
frame
periods
proximity
thr
docs
branch
=
-- traceBranchMatching proximity thr
matchByPeriods
$
groupByField
_phylo_groupPeriod
branch
where
--------------------------------------
matchByPeriods
::
Map
PhyloPeriodId
[
PhyloGroup
]
->
[
PhyloGroup
]
matchByPeriods
branch'
=
foldl'
(
\
acc
prd
->
let
periodsPar
=
getNextPeriods
ToParents
frame
prd
periods
periodsChi
=
getNextPeriods
ToChilds
frame
prd
periods
candidatesPar
=
map
(
\
prd'
->
findWithDefault
[]
prd'
branch'
)
periodsPar
candidatesChi
=
map
(
\
prd'
->
findWithDefault
[]
prd'
branch'
)
periodsChi
docsPar
=
filterDocs
docs
([
prd
]
++
periodsPar
)
docsChi
=
filterDocs
docs
([
prd
]
++
periodsChi
)
egos
=
map
(
\
ego
->
phyloGroupMatching
(
getCandidates
ego
candidatesPar
)
ToParents
proximity
docsPar
thr
$
phyloGroupMatching
(
getCandidates
ego
candidatesChi
)
ToChilds
proximity
docsChi
thr
ego
)
$
findWithDefault
[]
prd
branch'
egos'
=
egos
`
using
`
parList
rdeepseq
in
acc
++
egos'
)
[]
periods
matchGroupsToGroups
::
Int
->
[
PhyloPeriodId
]
->
Proximity
->
Double
->
Map
Date
Double
->
Map
Date
Cooc
->
[
PhyloGroup
]
->
[
PhyloGroup
]
matchGroupsToGroups
frame
periods
proximity
thr
docs
coocs
groups
=
let
groups'
=
groupByField
_phylo_groupPeriod
groups
in
foldl'
(
\
acc
prd
->
let
-- | 1) find the parents/childs matching periods
periodsPar
=
getNextPeriods
ToParents
frame
prd
periods
periodsChi
=
getNextPeriods
ToChilds
frame
prd
periods
-- | 2) find the parents/childs matching candidates
candidatesPar
=
map
(
\
prd'
->
findWithDefault
[]
prd'
groups'
)
periodsPar
candidatesChi
=
map
(
\
prd'
->
findWithDefault
[]
prd'
groups'
)
periodsChi
-- | 3) find the parents/child number of docs by years
docsPar
=
filterDocs
docs
([
prd
]
++
periodsPar
)
docsChi
=
filterDocs
docs
([
prd
]
++
periodsChi
)
-- | 4) find the parents/child diago by years
diagoPar
=
filterDiago
(
map
coocToDiago
coocs
)
([
prd
]
++
periodsPar
)
diagoChi
=
filterDiago
(
map
coocToDiago
coocs
)
([
prd
]
++
periodsPar
)
-- | 5) match in parallel all the groups (egos) to their possible candidates
egos
=
map
(
\
ego
->
phyloGroupMatching
(
getCandidates
ego
candidatesPar
)
ToParents
proximity
docsPar
diagoPar
thr
$
phyloGroupMatching
(
getCandidates
ego
candidatesChi
)
ToChilds
proximity
docsChi
diagoChi
thr
ego
)
$
findWithDefault
[]
prd
groups'
egos'
=
egos
`
using
`
parList
rdeepseq
in
acc
++
egos'
)
[]
periods
-----------------------
...
...
@@ -270,6 +275,7 @@ groupsToBranches groups =
++
(
map
fst
$
group
^.
phylo_groupPeriodParents
)
++
(
map
fst
$
group
^.
phylo_groupPeriodChilds
)
)
$
elems
groups
-- | first find the related components by inside each ego's period
-- | a supprimer
graph'
=
map
relatedComponents
egos
-- | then run it for the all the periods
graph
=
zip
[
1
..
]
...
...
@@ -288,8 +294,13 @@ reduceFrequency frequency branches =
updateThr
::
Double
->
[[
PhyloGroup
]]
->
[[
PhyloGroup
]]
updateThr
thr
branches
=
map
(
\
b
->
map
(
\
g
->
g
&
phylo_groupMeta
.~
(
singleton
"thr"
[
thr
]))
b
)
branches
seqMatching
::
Proximity
->
Double
->
Map
Int
Double
->
Int
->
Double
->
Int
->
Map
Date
Double
->
[
PhyloPeriodId
]
->
[([
PhyloGroup
],
Bool
)]
->
([
PhyloGroup
],
Bool
)
->
[([
PhyloGroup
],
Bool
)]
->
[([
PhyloGroup
],
Bool
)]
seqMatching
proximity
beta
frequency
minBranch
egoThr
frame
docs
periods
done
ego
rest
=
-- | Sequentially break each branch of a phylo where
-- done = all the allready broken branches
-- ego = the current branch we want to break
-- rest = the branches we still have to break
breakBranches
::
Proximity
->
Double
->
Map
Int
Double
->
Int
->
Double
->
Int
->
Map
Date
Double
->
Map
Date
Cooc
->
[
PhyloPeriodId
]
->
[([
PhyloGroup
],
Bool
)]
->
([
PhyloGroup
],
Bool
)
->
[([
PhyloGroup
],
Bool
)]
->
[([
PhyloGroup
],
Bool
)]
breakBranches
proximity
beta
frequency
minBranch
thr
frame
docs
coocs
periods
done
ego
rest
=
-- | 1) keep or not the new division of ego
let
done'
=
done
++
(
if
snd
ego
then
(
if
((
null
(
fst
ego'
))
||
(
quality
>
quality'
))
...
...
@@ -310,8 +321,8 @@ seqMatching proximity beta frequency minBranch egoThr frame docs periods done eg
-- | 2) if there is no more branches in rest then return else continue
if
null
rest
then
done'
else
seqMatching
proximity
beta
frequency
minBranch
egoThr
frame
d
ocs
periods
done'
(
head'
"
seqMatching"
rest
)
(
tail'
"seqMatching
"
rest
)
else
breakBranches
proximity
beta
frequency
minBranch
thr
frame
docs
co
ocs
periods
done'
(
head'
"
breakBranches"
rest
)
(
tail'
"breakBranches
"
rest
)
where
--------------------------------------
quality
::
Double
...
...
@@ -320,11 +331,11 @@ seqMatching proximity beta frequency minBranch egoThr frame docs periods done eg
ego'
::
([[
PhyloGroup
]],[[
PhyloGroup
]])
ego'
=
let
branches
=
groupsToBranches
$
fromList
$
map
(
\
g
->
(
getGroupId
g
,
g
))
$
phyloBranchMatching
frame
periods
proximity
egoThr
d
ocs
(
fst
ego
)
$
matchGroupsToGroups
frame
periods
proximity
thr
docs
co
ocs
(
fst
ego
)
branches'
=
branches
`
using
`
parList
rdeepseq
in
partition
(
\
b
->
(
length
$
nub
$
map
_phylo_groupPeriod
b
)
>=
minBranch
)
$
if
(
length
branches'
>
1
)
then
updateThr
egoT
hr
branches'
then
updateThr
t
hr
branches'
else
branches'
--------------------------------------
quality'
::
Double
...
...
@@ -332,15 +343,17 @@ seqMatching proximity beta frequency minBranch egoThr frame docs periods done eg
((
map
fst
done
)
++
(
fst
ego'
)
++
(
snd
ego'
)
++
(
map
fst
rest
))
recursiveMatching'
::
Proximity
->
Double
->
Int
->
Map
Int
Double
->
Double
->
Int
->
[
PhyloPeriodId
]
->
Map
Date
Double
->
[([
PhyloGroup
],
Bool
)]
->
[([
PhyloGroup
],
Bool
)]
recursiveMatching'
proximity
beta
minBranch
frequency
egoThr
frame
periods
docs
branches
=
if
(
egoThr
>=
1
)
||
((
not
.
or
)
$
map
snd
branches
)
seaLevelMatching
::
Proximity
->
Double
->
Int
->
Map
Int
Double
->
Double
->
Int
->
[
PhyloPeriodId
]
->
Map
Date
Double
->
Map
Date
Cooc
->
[([
PhyloGroup
],
Bool
)]
->
[([
PhyloGroup
],
Bool
)]
seaLevelMatching
proximity
beta
minBranch
frequency
thr
frame
periods
docs
coocs
branches
=
-- | if there is no branch to break or if sea level > 1 then end
if
(
thr
>=
1
)
||
((
not
.
or
)
$
map
snd
branches
)
then
branches
else
let
branches'
=
seqMatching
proximity
beta
frequency
minBranch
egoThr
frame
docs
periods
[]
(
head'
"recursiveMatching"
branches
)
(
tail'
"recursiveMatching"
branches
)
-- | break all the possible branches at the current sea level
let
branches'
=
breakBranches
proximity
beta
frequency
minBranch
thr
frame
docs
coocs
periods
[]
(
head'
"seaLevelMatching"
branches
)
(
tail'
"seaLevelMatching"
branches
)
frequency'
=
reduceFrequency
frequency
(
map
fst
branches'
)
in
recursiveMatching'
proximity
beta
minBranch
frequency'
(
egoThr
+
(
getThresholdStep
proximity
))
frame
periods
d
ocs
branches'
in
seaLevelMatching
proximity
beta
minBranch
frequency'
(
thr
+
(
getThresholdStep
proximity
))
frame
periods
docs
co
ocs
branches'
temporalMatching
::
Phylo
->
Phylo
...
...
@@ -348,23 +361,27 @@ temporalMatching phylo = updatePhyloGroups 1
(
fromList
$
map
(
\
g
->
(
getGroupId
g
,
g
))
$
traceMatchEnd
$
concat
branches
)
phylo
where
-- | 2)
init the recursiveMatching
-- | 2)
process the temporal matching by elevating sea level
branches
::
[[
PhyloGroup
]]
branches
=
map
fst
$
recursiveMatching'
(
phyloProximity
$
getConfig
phylo
)
(
_qua_granularity
$
phyloQuality
$
getConfig
phylo
)
(
_qua_minBranch
$
phyloQuality
$
getConfig
phylo
)
(
phylo
^.
phylo_termFreq
)
(
getThresholdInit
$
phyloProximity
$
getConfig
phylo
)
(
getTimeFrame
$
timeUnit
$
getConfig
phylo
)
(
getPeriodIds
phylo
)
(
phylo
^.
phylo_timeDocs
)
groups
$
seaLevelMatching
(
phyloProximity
$
getConfig
phylo
)
(
_qua_granularity
$
phyloQuality
$
getConfig
phylo
)
(
_qua_minBranch
$
phyloQuality
$
getConfig
phylo
)
(
phylo
^.
phylo_termFreq
)
(
getThresholdInit
$
phyloProximity
$
getConfig
phylo
)
(
getTimeFrame
$
timeUnit
$
getConfig
phylo
)
(
getPeriodIds
phylo
)
(
phylo
^.
phylo_timeDocs
)
(
phylo
^.
phylo_timeCooc
)
groups
-- | 1) for each group process an initial temporal Matching
-- | here we suppose that all the groups of level 1 are part of the same big branch
groups
::
[([
PhyloGroup
],
Bool
)]
groups
=
map
(
\
b
->
(
b
,(
length
$
nub
$
map
_phylo_groupPeriod
b
)
>=
(
_qua_minBranch
$
phyloQuality
$
getConfig
phylo
)))
$
groupsToBranches
$
fromList
$
map
(
\
g
->
(
getGroupId
g
,
g
))
$
phyloBranchMatching
(
getTimeFrame
$
timeUnit
$
getConfig
phylo
)
(
getPeriodIds
phylo
)
(
phyloProximity
$
getConfig
phylo
)
(
getThresholdInit
$
phyloProximity
$
getConfig
phylo
)
(
phylo
^.
phylo_timeDocs
)
(
traceTemporalMatching
$
getGroupsFromLevel
1
phylo
)
\ No newline at end of file
$
matchGroupsToGroups
(
getTimeFrame
$
timeUnit
$
getConfig
phylo
)
(
getPeriodIds
phylo
)
(
phyloProximity
$
getConfig
phylo
)
(
getThresholdInit
$
phyloProximity
$
getConfig
phylo
)
(
phylo
^.
phylo_timeDocs
)
(
phylo
^.
phylo_timeCooc
)
(
traceTemporalMatching
$
getGroupsFromLevel
1
phylo
)
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment