Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
153
Issues
153
List
Board
Labels
Milestones
Merge Requests
9
Merge Requests
9
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
258f6aec
Commit
258f6aec
authored
Nov 29, 2022
by
qlobbe
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
refactoring
parent
8faf7d6d
Pipeline
#3432
failed with stage
in 73 minutes and 22 seconds
Changes
7
Pipelines
1
Show whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
83 additions
and
75 deletions
+83
-75
Main.hs
bin/gargantext-phylo/Main.hs
+1
-1
Phylo.hs
src/Gargantext/Core/Viz/Phylo.hs
+8
-8
PhyloExport.hs
src/Gargantext/Core/Viz/Phylo/PhyloExport.hs
+14
-14
PhyloMaker.hs
src/Gargantext/Core/Viz/Phylo/PhyloMaker.hs
+12
-4
PhyloTools.hs
src/Gargantext/Core/Viz/Phylo/PhyloTools.hs
+12
-12
SynchronicClustering.hs
src/Gargantext/Core/Viz/Phylo/SynchronicClustering.hs
+3
-3
TemporalMatching.hs
src/Gargantext/Core/Viz/Phylo/TemporalMatching.hs
+33
-33
No files found.
bin/gargantext-phylo/Main.hs
View file @
258f6aec
...
...
@@ -152,7 +152,7 @@ seaToLabel config = case (seaElevation config) of
sensToLabel
::
PhyloConfig
->
[
Char
]
sensToLabel
config
=
case
(
phyloProxim
ity
config
)
of
sensToLabel
config
=
case
(
similar
ity
config
)
of
Hamming
_
_
->
undefined
WeightedLogJaccard
s
_
->
(
"WeightedLogJaccard_"
<>
show
s
)
WeightedLogSim
s
_
->
(
"WeightedLogSim-sens_"
<>
show
s
)
...
...
src/Gargantext/Core/Viz/Phylo.hs
View file @
258f6aec
...
...
@@ -71,7 +71,7 @@ data SeaElevation =
instance
ToSchema
SeaElevation
data
Proxim
ity
=
data
Similar
ity
=
WeightedLogJaccard
{
_wlj_sensibility
::
Double
,
_wlj_minSharedNgrams
::
Int
}
...
...
@@ -84,7 +84,7 @@ data Proximity =
deriving
(
Show
,
Generic
,
Eq
)
instance
ToSchema
Proxim
ity
where
instance
ToSchema
Similar
ity
where
declareNamedSchema
=
genericDeclareNamedSchema
(
unPrefixSwagger
""
)
...
...
@@ -179,7 +179,7 @@ data PhyloConfig =
,
listParser
::
ListParser
,
phyloName
::
Text
,
phyloScale
::
Int
,
phyloProximity
::
Proxim
ity
,
similarity
::
Similar
ity
,
seaElevation
::
SeaElevation
,
findAncestors
::
Bool
,
phyloSynchrony
::
Synchrony
...
...
@@ -205,7 +205,7 @@ data PhyloSubConfig =
subConfig2config
::
PhyloSubConfig
->
PhyloConfig
subConfig2config
subConfig
=
defaultConfig
{
phyloProximity
=
WeightedLogJaccard
(
_sc_phyloProximity
subConfig
)
1
subConfig2config
subConfig
=
defaultConfig
{
similarity
=
WeightedLogJaccard
(
_sc_phyloProximity
subConfig
)
1
,
phyloSynchrony
=
ByProximityThreshold
(
_sc_phyloSynchrony
subConfig
)
0
AllBranches
MergeAllGroups
,
phyloQuality
=
Quality
(
_sc_phyloQuality
subConfig
)
1
,
timeUnit
=
_sc_timeUnit
subConfig
...
...
@@ -223,7 +223,7 @@ defaultConfig =
,
listParser
=
V4
,
phyloName
=
pack
"Phylo Name"
,
phyloScale
=
2
,
phyloProximity
=
WeightedLogJaccard
0.5
1
,
similarity
=
WeightedLogJaccard
0.5
1
,
seaElevation
=
Constante
0.1
0.1
,
findAncestors
=
False
,
phyloSynchrony
=
ByProximityThreshold
0.5
0
AllBranches
MergeAllGroups
...
...
@@ -251,8 +251,8 @@ instance ToJSON CorpusParser
instance
FromJSON
ListParser
instance
ToJSON
ListParser
instance
FromJSON
Proxim
ity
instance
ToJSON
Proxim
ity
instance
FromJSON
Similar
ity
instance
ToJSON
Similar
ity
instance
FromJSON
SeaElevation
instance
ToJSON
SeaElevation
...
...
@@ -592,7 +592,7 @@ instance ToSchema PhyloExport where
makeLenses
''
P
hyloConfig
makeLenses
''
P
hyloSubConfig
makeLenses
''
P
roxim
ity
makeLenses
''
S
imilar
ity
makeLenses
''
S
eaElevation
makeLenses
''
Q
uality
makeLenses
''
C
luster
...
...
src/Gargantext/Core/Viz/Phylo/PhyloExport.hs
View file @
258f6aec
...
...
@@ -25,7 +25,7 @@ import Data.Vector (Vector)
import
Debug.Trace
(
trace
)
import
Gargantext.Core.Viz.Phylo
import
Gargantext.Core.Viz.Phylo.PhyloTools
import
Gargantext.Core.Viz.Phylo.TemporalMatching
(
filterDocs
,
filterDiago
,
reduceDiagos
,
to
Proxim
ity
,
getNextPeriods
)
import
Gargantext.Core.Viz.Phylo.TemporalMatching
(
filterDocs
,
filterDiago
,
reduceDiagos
,
to
Similar
ity
,
getNextPeriods
)
import
Gargantext.Prelude
hiding
(
scale
)
import
Prelude
(
writeFile
)
import
System.FilePath
...
...
@@ -288,9 +288,9 @@ exportToDot phylo export =
{- 8) create the edges between the branches
-- _ <- mapM (\(bId,bId') ->
-- toDotEdge (branchIdToDotId bId) (branchIdToDotId bId')
-- (Text.pack $ show(branchIdsTo
Proxim
ity bId bId'
-- (getThresholdInit $ phylo
Proxim
ity $ getConfig phylo)
-- (getThresholdStep $ phylo
Proxim
ity $ getConfig phylo))) BranchToBranch
-- (Text.pack $ show(branchIdsTo
Similar
ity bId bId'
-- (getThresholdInit $ phylo
Similar
ity $ getConfig phylo)
-- (getThresholdStep $ phylo
Similar
ity $ getConfig phylo))) BranchToBranch
-- ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ map _branch_id $ export ^. export_branches
-}
...
...
@@ -595,23 +595,23 @@ getGroupThr step g =
breaks
=
(
g
^.
phylo_groupMeta
)
!
"breaks"
in
(
last'
"export"
(
take
(
round
$
(
last'
"export"
breaks
)
+
1
)
seaLvl
))
-
step
toAncestor
::
Double
->
Map
Int
Double
->
Proxim
ity
->
Double
->
[
PhyloGroup
]
->
PhyloGroup
->
PhyloGroup
toAncestor
nbDocs
diago
proxim
ity
step
candidates
ego
=
toAncestor
::
Double
->
Map
Int
Double
->
Similar
ity
->
Double
->
[
PhyloGroup
]
->
PhyloGroup
->
PhyloGroup
toAncestor
nbDocs
diago
similar
ity
step
candidates
ego
=
let
curr
=
ego
^.
phylo_groupAncestors
in
ego
&
phylo_groupAncestors
.~
(
curr
++
(
map
(
\
(
g
,
w
)
->
(
getGroupId
g
,
w
))
$
filter
(
\
(
g
,
w
)
->
(
w
>
0
)
&&
(
w
>=
(
min
(
getGroupThr
step
ego
)
(
getGroupThr
step
g
))))
$
map
(
\
g
->
(
g
,
to
Proximity
nbDocs
diago
proxim
ity
(
ego
^.
phylo_groupNgrams
)
(
g
^.
phylo_groupNgrams
)
(
g
^.
phylo_groupNgrams
)))
$
map
(
\
g
->
(
g
,
to
Similarity
nbDocs
diago
similar
ity
(
ego
^.
phylo_groupNgrams
)
(
g
^.
phylo_groupNgrams
)
(
g
^.
phylo_groupNgrams
)))
$
filter
(
\
g
->
g
^.
phylo_groupBranchId
/=
ego
^.
phylo_groupBranchId
)
candidates
))
headsToAncestors
::
Double
->
Map
Int
Double
->
Proxim
ity
->
Double
->
[
PhyloGroup
]
->
[
PhyloGroup
]
->
[
PhyloGroup
]
headsToAncestors
nbDocs
diago
proxim
ity
step
heads
acc
=
headsToAncestors
::
Double
->
Map
Int
Double
->
Similar
ity
->
Double
->
[
PhyloGroup
]
->
[
PhyloGroup
]
->
[
PhyloGroup
]
headsToAncestors
nbDocs
diago
similar
ity
step
heads
acc
=
if
(
null
heads
)
then
acc
else
let
ego
=
head'
"headsToAncestors"
heads
heads'
=
tail'
"headsToAncestors"
heads
in
headsToAncestors
nbDocs
diago
proximity
step
heads'
(
acc
++
[
toAncestor
nbDocs
diago
proxim
ity
step
heads'
ego
])
in
headsToAncestors
nbDocs
diago
similarity
step
heads'
(
acc
++
[
toAncestor
nbDocs
diago
similar
ity
step
heads'
ego
])
toHorizon
::
Phylo
->
Phylo
...
...
@@ -645,13 +645,13 @@ toHorizon phylo =
noHeads
=
groups
\\
heads
nbDocs
=
sum
$
elems
$
filterDocs
(
phylo
^.
phylo_timeDocs
)
[
prd
]
diago
=
reduceDiagos
$
filterDiago
(
phylo
^.
phylo_timeCooc
)
[
prd
]
proximity
=
(
phyloProxim
ity
$
getConfig
phylo
)
sim
=
(
similar
ity
$
getConfig
phylo
)
step
=
case
getSeaElevation
phylo
of
Constante
_
s
->
s
Adaptative
_
->
0
-- in headsToAncestors nbDocs diago
proxim
ity heads groups []
in
map
(
\
ego
->
toAncestor
nbDocs
diago
proximity
step
noHeads
ego
)
$
headsToAncestors
nbDocs
diago
proximity
step
heads
[]
-- in headsToAncestors nbDocs diago
Similar
ity heads groups []
in
map
(
\
ego
->
toAncestor
nbDocs
diago
sim
step
noHeads
ego
)
$
headsToAncestors
nbDocs
diago
sim
step
heads
[]
)
periods
-- | 3) process this task concurrently
newGroups
::
[[
PhyloGroup
]]
...
...
src/Gargantext/Core/Viz/Phylo/PhyloMaker.hs
View file @
258f6aec
...
...
@@ -31,10 +31,11 @@ import Gargantext.Core.Viz.Phylo
import
Gargantext.Core.Viz.Phylo.PhyloExport
(
toHorizon
)
import
Gargantext.Core.Viz.Phylo.PhyloTools
import
Gargantext.Core.Viz.Phylo.SynchronicClustering
(
synchronicClustering
)
import
Gargantext.Core.Viz.Phylo.TemporalMatching
(
temporalMatching
,
getNextPeriods
,
filterDocs
,
filterDiago
,
reduceDiagos
,
to
Proxim
ity
)
import
Gargantext.Core.Viz.Phylo.TemporalMatching
(
temporalMatching
,
getNextPeriods
,
filterDocs
,
filterDiago
,
reduceDiagos
,
to
Similar
ity
)
import
Gargantext.Prelude
import
qualified
Data.Set
as
Set
import
qualified
Data.List
as
List
import
qualified
Data.Vector
as
Vector
------------------
...
...
@@ -78,6 +79,13 @@ toPhylo phylowithoutLink = trace ("# flatPhylo groups " <> show(length $ getGrou
-----------------------------
{-
-- create a square ladder
-}
squareLadder
::
[
Double
]
->
[
Double
]
squareLadder
ladder
=
List
.
map
(
\
x
->
x
*
x
)
ladder
{-
-- create an adaptative diachronic 'sea elevation' ladder
-}
...
...
@@ -107,7 +115,7 @@ constDiachronicLadder curr step ladder =
-}
scanSimilarity
::
Scale
->
Phylo
->
Phylo
scanSimilarity
lvl
phylo
=
let
proximity
=
phyloProxim
ity
$
getConfig
phylo
let
proximity
=
similar
ity
$
getConfig
phylo
scanning
=
foldlWithKey
(
\
acc
pId
pds
->
-- 1) process period by period
let
egos
=
map
(
\
g
->
(
getGroupId
g
,
g
^.
phylo_groupNgrams
))
...
...
@@ -124,7 +132,7 @@ scanSimilarity lvl phylo =
map
(
\
(
id'
,
ngrams'
)
->
let
nbDocs
=
(
sum
.
elems
)
$
filterDocs
docs
([
idToPrd
id
,
idToPrd
id'
])
diago
=
reduceDiagos
$
filterDiago
diagos
([
idToPrd
id
,
idToPrd
id'
])
in
((
id
,
id'
),
to
Proxim
ity
nbDocs
diago
proximity
ngrams
ngrams'
ngrams'
)
in
((
id
,
id'
),
to
Similar
ity
nbDocs
diago
proximity
ngrams
ngrams'
ngrams'
)
)
$
filter
(
\
(
_
,
ngrams'
)
->
(
not
.
null
)
$
intersect
ngrams
ngrams'
)
targets
)
egos
pairs'
=
pairs
`
using
`
parList
rdeepseq
...
...
@@ -173,7 +181,7 @@ clusterToGroup fis pId pId' lvl idx coocs = PhyloGroup pId pId' lvl idx ""
addTemporalLinksToPhylo
::
Phylo
->
Phylo
addTemporalLinksToPhylo
phylowithoutLink
=
case
strategy
of
Constante
start
gap
->
temporalMatching
(
constDiachronicLadder
start
gap
Set
.
empty
)
phylowithoutLink
Adaptative
steps
->
temporalMatching
(
adaptDiachronicLadder
steps
(
phylowithoutLink
^.
phylo_diaSimScan
)
Set
.
empty
)
phylowithoutLink
Adaptative
steps
->
temporalMatching
(
squareLadder
$
adaptDiachronicLadder
steps
(
phylowithoutLink
^.
phylo_diaSimScan
)
Set
.
empty
)
phylowithoutLink
where
strategy
::
SeaElevation
strategy
=
getSeaElevation
phylowithoutLink
...
...
src/Gargantext/Core/Viz/Phylo/PhyloTools.hs
View file @
258f6aec
...
...
@@ -330,16 +330,16 @@ getPeriodPointers fil g =
ToChildsMemory
->
undefined
ToParentsMemory
->
undefined
filter
Proximity
::
Proxim
ity
->
Double
->
Double
->
Bool
filter
Proximity
proxim
ity
thr
local
=
case
proxim
ity
of
filter
Similarity
::
Similar
ity
->
Double
->
Double
->
Bool
filter
Similarity
similar
ity
thr
local
=
case
similar
ity
of
WeightedLogJaccard
_
_
->
local
>=
thr
WeightedLogSim
_
_
->
local
>=
thr
Hamming
_
_
->
undefined
get
ProximityName
::
Proxim
ity
->
String
get
ProximityName
proxim
ity
=
case
proxim
ity
of
get
SimilarityName
::
Similar
ity
->
String
get
SimilarityName
similar
ity
=
case
similar
ity
of
WeightedLogJaccard
_
_
->
"WLJaccard"
WeightedLogSim
_
_
->
"WeightedLogSim"
Hamming
_
_
->
"Hamming"
...
...
@@ -578,16 +578,16 @@ traceSynchronyStart phylo =
-------------------
-- |
Proxim
ity | --
-- |
Similar
ity | --
-------------------
getSensibility
::
Proxim
ity
->
Double
getSensibility
::
Similar
ity
->
Double
getSensibility
proxi
=
case
proxi
of
WeightedLogJaccard
s
_
->
s
WeightedLogSim
s
_
->
s
Hamming
_
_
->
undefined
getMinSharedNgrams
::
Proxim
ity
->
Int
getMinSharedNgrams
::
Similar
ity
->
Int
getMinSharedNgrams
proxi
=
case
proxi
of
WeightedLogJaccard
_
m
->
m
WeightedLogSim
_
m
->
m
...
...
@@ -605,8 +605,8 @@ intersectInit acc lst lst' =
then
intersectInit
(
acc
++
[
head'
"intersectInit"
lst
])
(
tail
lst
)
(
tail
lst'
)
else
acc
branchIdsTo
Proxim
ity
::
PhyloBranchId
->
PhyloBranchId
->
Double
->
Double
->
Double
branchIdsTo
Proxim
ity
id
id'
thrInit
thrStep
=
thrInit
+
thrStep
*
(
fromIntegral
$
length
$
intersectInit
[]
(
snd
id
)
(
snd
id'
))
branchIdsTo
Similar
ity
::
PhyloBranchId
->
PhyloBranchId
->
Double
->
Double
->
Double
branchIdsTo
Similar
ity
id
id'
thrInit
thrStep
=
thrInit
+
thrStep
*
(
fromIntegral
$
length
$
intersectInit
[]
(
snd
id
)
(
snd
id'
))
ngramsInBranches
::
[[
PhyloGroup
]]
->
[
Int
]
ngramsInBranches
branches
=
nub
$
foldl
(
\
acc
g
->
acc
++
(
g
^.
phylo_groupNgrams
))
[]
$
concat
branches
...
...
@@ -662,4 +662,4 @@ traceTemporalMatching groups =
traceGroupsProxi
::
[
Double
]
->
[
Double
]
traceGroupsProxi
l
=
trace
(
"
\n
"
<>
"-- | "
<>
show
(
List
.
length
l
)
<>
" computed pairs of groups
proxim
ity"
<>
"
\n
"
)
l
trace
(
"
\n
"
<>
"-- | "
<>
show
(
List
.
length
l
)
<>
" computed pairs of groups
Similar
ity"
<>
"
\n
"
)
l
src/Gargantext/Core/Viz/Phylo/SynchronicClustering.hs
View file @
258f6aec
...
...
@@ -124,7 +124,7 @@ toDiamonds groups = foldl' (\acc groups' ->
$
foldl'
(
\
acc
g
->
acc
++
(
map
(
\
(
id
,
_
)
->
(
id
,[
g
])
)
$
g
^.
phylo_groupPeriodParents
)
)
[]
groups
groupsToEdges
::
Proxim
ity
->
Synchrony
->
Double
->
Map
Int
Double
->
[
PhyloGroup
]
->
[((
PhyloGroup
,
PhyloGroup
),
Double
)]
groupsToEdges
::
Similar
ity
->
Synchrony
->
Double
->
Map
Int
Double
->
[
PhyloGroup
]
->
[((
PhyloGroup
,
PhyloGroup
),
Double
)]
groupsToEdges
prox
sync
nbDocs
diago
groups
=
case
sync
of
ByProximityThreshold
thr
sens
_
strat
->
...
...
@@ -153,7 +153,7 @@ toParentId :: PhyloGroup -> PhyloGroupId
toParentId
child
=
((
child
^.
phylo_groupPeriod
,
child
^.
phylo_groupScale
+
1
),
child
^.
phylo_groupIndex
)
reduceGroups
::
Proxim
ity
->
Synchrony
->
Map
Date
Double
->
Map
Date
Cooc
->
[
PhyloGroup
]
->
[
PhyloGroup
]
reduceGroups
::
Similar
ity
->
Synchrony
->
Map
Date
Double
->
Map
Date
Cooc
->
[
PhyloGroup
]
->
[
PhyloGroup
]
reduceGroups
prox
sync
docs
diagos
branch
=
-- 1) reduce a branch as a set of periods & groups
let
periods
=
fromListWith
(
++
)
...
...
@@ -197,7 +197,7 @@ levelUpAncestors groups =
synchronicClustering
::
Phylo
->
Phylo
synchronicClustering
phylo
=
let
prox
=
phyloProxim
ity
$
getConfig
phylo
let
prox
=
similar
ity
$
getConfig
phylo
sync
=
phyloSynchrony
$
getConfig
phylo
docs
=
phylo
^.
phylo_timeDocs
diagos
=
map
coocToDiago
$
phylo
^.
phylo_timeCooc
...
...
src/Gargantext/Core/Viz/Phylo/TemporalMatching.hs
View file @
258f6aec
...
...
@@ -119,9 +119,9 @@ weightedLogSim' sens nbDocs diago ego_ngrams target_ngrams
{-
-- perform a seamilarity measure between a given group and a pair of targeted groups
-}
to
Proximity
::
Double
->
Map
Int
Double
->
Proxim
ity
->
[
Int
]
->
[
Int
]
->
[
Int
]
->
Double
to
Proximity
nbDocs
diago
proxim
ity
egoNgrams
targetNgrams
targetNgrams'
=
case
proxim
ity
of
to
Similarity
::
Double
->
Map
Int
Double
->
Similar
ity
->
[
Int
]
->
[
Int
]
->
[
Int
]
->
Double
to
Similarity
nbDocs
diago
similar
ity
egoNgrams
targetNgrams
targetNgrams'
=
case
similar
ity
of
WeightedLogJaccard
sens
_
->
let
pairNgrams
=
if
targetNgrams
==
targetNgrams'
then
targetNgrams
...
...
@@ -147,7 +147,7 @@ findLastPeriod fil periods = case fil of
ToChildsMemory
->
undefined
ToParentsMemory
->
undefined
removeOldPointers
::
[
Pointer
]
->
Filiation
->
Double
->
Proxim
ity
->
Period
removeOldPointers
::
[
Pointer
]
->
Filiation
->
Double
->
Similar
ity
->
Period
->
[((
PhyloGroupId
,[
Int
]),(
PhyloGroupId
,[
Int
]))]
->
[((
PhyloGroupId
,[
Int
]),(
PhyloGroupId
,[
Int
]))]
removeOldPointers
oldPointers
fil
thr
prox
prd
pairs
...
...
@@ -166,11 +166,11 @@ removeOldPointers oldPointers fil thr prox prd pairs
||
(((
fst
.
fst
.
fst
)
id'
)
>
(
fst
lastMatchedPrd
)))
pairs
|
otherwise
=
[]
filterPointers
::
Proxim
ity
->
Double
->
[
Pointer
]
->
[
Pointer
]
filterPointers
proxi
thr
pts
=
filter
(
\
(
_
,
w
)
->
filter
Proxim
ity
proxi
thr
w
)
pts
filterPointers
::
Similar
ity
->
Double
->
[
Pointer
]
->
[
Pointer
]
filterPointers
proxi
thr
pts
=
filter
(
\
(
_
,
w
)
->
filter
Similar
ity
proxi
thr
w
)
pts
filterPointers'
::
Proxim
ity
->
Double
->
[(
Pointer
,[
Int
])]
->
[(
Pointer
,[
Int
])]
filterPointers'
proxi
thr
pts
=
filter
(
\
((
_
,
w
),
_
)
->
filter
Proxim
ity
proxi
thr
w
)
pts
filterPointers'
::
Similar
ity
->
Double
->
[(
Pointer
,[
Int
])]
->
[(
Pointer
,[
Int
])]
filterPointers'
proxi
thr
pts
=
filter
(
\
((
_
,
w
),
_
)
->
filter
Similar
ity
proxi
thr
w
)
pts
reduceDiagos
::
Map
Date
Cooc
->
Map
Int
Double
...
...
@@ -231,7 +231,7 @@ groupsToBranches groups =
{-
-- find the best pair/singleton of parents/childs for a given group
-}
makePairs
::
(
PhyloGroupId
,[
Int
])
->
[(
PhyloGroupId
,[
Int
])]
->
[
Period
]
->
[
Pointer
]
->
Filiation
->
Double
->
Proxim
ity
makePairs
::
(
PhyloGroupId
,[
Int
])
->
[(
PhyloGroupId
,[
Int
])]
->
[
Period
]
->
[
Pointer
]
->
Filiation
->
Double
->
Similar
ity
->
Map
Date
Double
->
Map
Date
Cooc
->
[((
PhyloGroupId
,[
Int
]),(
PhyloGroupId
,[
Int
]))]
makePairs
(
egoId
,
egoNgrams
)
candidates
periods
oldPointers
fil
thr
prox
docs
diagos
=
if
(
null
periods
)
...
...
@@ -248,7 +248,7 @@ makePairs (egoId, egoNgrams) candidates periods oldPointers fil thr prox docs di
$
filter
(
\
(
id
,
ngrams
)
->
let
nbDocs
=
(
sum
.
elems
)
$
filterDocs
docs
([(
fst
.
fst
)
egoId
,
(
fst
.
fst
)
id
])
diago
=
reduceDiagos
$
filterDiago
diagos
([(
fst
.
fst
)
egoId
,
(
fst
.
fst
)
id
])
in
(
to
Proxim
ity
nbDocs
diago
prox
egoNgrams
egoNgrams
ngrams
)
>=
thr
in
(
to
Similar
ity
nbDocs
diago
prox
egoNgrams
egoNgrams
ngrams
)
>=
thr
)
candidates
--------------------------------------
lastPrd
::
Period
...
...
@@ -258,7 +258,7 @@ makePairs (egoId, egoNgrams) candidates periods oldPointers fil thr prox docs di
{-
-- find the best temporal links between a given group and its parents/childs
-}
phyloGroupMatching
::
[[(
PhyloGroupId
,[
Int
])]]
->
Filiation
->
Proxim
ity
->
Map
Date
Double
->
Map
Date
Cooc
phyloGroupMatching
::
[[(
PhyloGroupId
,[
Int
])]]
->
Filiation
->
Similar
ity
->
Map
Date
Double
->
Map
Date
Cooc
->
Double
->
[
Pointer
]
->
(
PhyloGroupId
,[
Int
])
->
[
Pointer
]
phyloGroupMatching
candidates
filiation
proxi
docs
diagos
thr
oldPointers
(
id
,
ngrams
)
=
if
(
null
$
filterPointers
proxi
thr
oldPointers
)
...
...
@@ -266,10 +266,10 @@ phyloGroupMatching candidates filiation proxi docs diagos thr oldPointers (id,ng
then
if
null
nextPointers
then
[]
else
filterPointersByPeriod
filiation
-- 2) keep only the best set of pointers grouped by
proxim
ity
-- 2) keep only the best set of pointers grouped by
Similar
ity
$
head'
"phyloGroupMatching"
$
groupBy
(
\
pt
pt'
->
(
snd
.
fst
)
pt
==
(
snd
.
fst
)
pt'
)
-- 1) find the first time frame where at leats one pointer satisfies the
proxim
ity threshold
-- 1) find the first time frame where at leats one pointer satisfies the
Similar
ity threshold
$
sortBy
(
comparing
(
Down
.
snd
.
fst
))
$
head'
"pointers"
nextPointers
else
oldPointers
where
...
...
@@ -277,29 +277,29 @@ phyloGroupMatching candidates filiation proxi docs diagos thr oldPointers (id,ng
nextPointers
=
take
1
-- stop as soon as we find a time frame where at least one singleton / pair satisfies the threshold
$
dropWhile
(
null
)
-- for each time frame, process the
proxim
ity on relevant pairs of targeted groups
-- for each time frame, process the
Similar
ity on relevant pairs of targeted groups
$
scanl
(
\
acc
targets
->
let
periods
=
nub
$
map
(
fst
.
fst
.
fst
)
targets
lastPrd
=
findLastPeriod
filiation
periods
nbdocs
=
sum
$
elems
$
(
filterDocs
docs
([(
fst
.
fst
)
id
]
++
periods
))
diago
=
reduceDiagos
$
filterDiago
diagos
([(
fst
.
fst
)
id
]
++
periods
)
singletons
=
process
Proxim
ity
nbdocs
diago
$
map
(
\
g
->
(
g
,
g
))
$
filter
(
\
g
->
(
fst
.
fst
.
fst
)
g
==
lastPrd
)
targets
singletons
=
process
Similar
ity
nbdocs
diago
$
map
(
\
g
->
(
g
,
g
))
$
filter
(
\
g
->
(
fst
.
fst
.
fst
)
g
==
lastPrd
)
targets
pairs
=
makePairs
(
id
,
ngrams
)
targets
periods
oldPointers
filiation
thr
proxi
docs
diagos
in
if
(
null
singletons
)
then
acc
++
(
process
Proxim
ity
nbdocs
diago
pairs
)
then
acc
++
(
process
Similar
ity
nbdocs
diago
pairs
)
else
acc
++
singletons
)
[]
$
map
concat
$
inits
candidates
-- groups from [[1900],[1900,1901],[1900,1901,1902],...]
-----------------------------
process
Proxim
ity
::
Double
->
Map
Int
Double
->
[((
PhyloGroupId
,[
Int
]),(
PhyloGroupId
,[
Int
]))]
->
[(
Pointer
,[
Int
])]
process
Proxim
ity
nbdocs
diago
targets
=
filterPointers'
proxi
thr
process
Similar
ity
::
Double
->
Map
Int
Double
->
[((
PhyloGroupId
,[
Int
]),(
PhyloGroupId
,[
Int
]))]
->
[(
Pointer
,[
Int
])]
process
Similar
ity
nbdocs
diago
targets
=
filterPointers'
proxi
thr
$
concat
$
map
(
\
(
c
,
c'
)
->
let
proximity
=
toProxim
ity
nbdocs
diago
proxi
ngrams
(
snd
c
)
(
snd
c'
)
let
similarity
=
toSimilar
ity
nbdocs
diago
proxi
ngrams
(
snd
c
)
(
snd
c'
)
in
if
((
c
==
c'
)
||
(
snd
c
==
snd
c'
))
then
[((
fst
c
,
proxim
ity
),
snd
c
)]
else
[((
fst
c
,
proximity
),
snd
c
),((
fst
c'
,
proxim
ity
),
snd
c'
)]
)
targets
then
[((
fst
c
,
similar
ity
),
snd
c
)]
else
[((
fst
c
,
similarity
),
snd
c
),((
fst
c'
,
similar
ity
),
snd
c'
)]
)
targets
{-
...
...
@@ -329,8 +329,8 @@ getCandidates minNgrams ego targets =
{-
-- set up and start performing the upstream/downstream inter‐temporal matching period by period
-}
reconstructTemporalLinks
::
Int
->
[
Period
]
->
Proxim
ity
->
Double
->
Map
Date
Double
->
Map
Date
Cooc
->
[
PhyloGroup
]
->
[
PhyloGroup
]
reconstructTemporalLinks
frame
periods
proxim
ity
thr
docs
coocs
groups
=
reconstructTemporalLinks
::
Int
->
[
Period
]
->
Similar
ity
->
Double
->
Map
Date
Double
->
Map
Date
Cooc
->
[
PhyloGroup
]
->
[
PhyloGroup
]
reconstructTemporalLinks
frame
periods
similar
ity
thr
docs
coocs
groups
=
let
groups'
=
groupByField
_phylo_groupPeriod
groups
in
foldl'
(
\
acc
prd
->
let
-- 1) find the parents/childs matching periods
...
...
@@ -347,9 +347,9 @@ reconstructTemporalLinks frame periods proximity thr docs coocs groups =
diagoChi
=
filterDiago
(
map
coocToDiago
coocs
)
([
prd
]
++
periodsPar
)
-- 5) match in parallel all the groups (egos) to their possible candidates
egos
=
map
(
\
ego
->
let
pointersPar
=
phyloGroupMatching
(
getCandidates
(
getMinSharedNgrams
proximity
)
ego
candidatesPar
)
ToParents
proxim
ity
docsPar
diagoPar
let
pointersPar
=
phyloGroupMatching
(
getCandidates
(
getMinSharedNgrams
similarity
)
ego
candidatesPar
)
ToParents
similar
ity
docsPar
diagoPar
thr
(
getPeriodPointers
ToParents
ego
)
(
getGroupId
ego
,
ego
^.
phylo_groupNgrams
)
pointersChi
=
phyloGroupMatching
(
getCandidates
(
getMinSharedNgrams
proximity
)
ego
candidatesChi
)
ToChilds
proxim
ity
docsChi
diagoChi
pointersChi
=
phyloGroupMatching
(
getCandidates
(
getMinSharedNgrams
similarity
)
ego
candidatesChi
)
ToChilds
similar
ity
docsChi
diagoChi
thr
(
getPeriodPointers
ToChilds
ego
)
(
getGroupId
ego
,
ego
^.
phylo_groupNgrams
)
in
addPointers
ToChilds
TemporalPointer
pointersChi
$
addPointers
ToParents
TemporalPointer
pointersPar
...
...
@@ -364,7 +364,7 @@ reconstructTemporalLinks frame periods proximity thr docs coocs groups =
{-
-- reconstruct a phylomemetic network from a list of groups and from a given threshold
-}
toPhylomemeticNetwork
::
Int
->
[
Period
]
->
Proxim
ity
->
Double
->
Map
Date
Double
->
Map
Date
Cooc
->
[
PhyloGroup
]
->
[
Branch
]
toPhylomemeticNetwork
::
Int
->
[
Period
]
->
Similar
ity
->
Double
->
Map
Date
Double
->
Map
Date
Cooc
->
[
PhyloGroup
]
->
[
Branch
]
toPhylomemeticNetwork
timescale
periods
similarity
thr
docs
coocs
groups
=
groupsToBranches
$
fromList
$
map
(
\
g
->
(
getGroupId
g
,
g
))
$
reconstructTemporalLinks
timescale
periods
similarity
thr
docs
coocs
groups
...
...
@@ -523,7 +523,7 @@ thrToMeta thr branches =
-- done = all the already separated branches
-- rest = all the branches we still have to separate
-}
separateBranches
::
Double
->
Proxim
ity
->
Double
->
Map
Int
Double
->
Int
->
Double
->
Double
separateBranches
::
Double
->
Similar
ity
->
Double
->
Map
Int
Double
->
Int
->
Double
->
Double
->
Int
->
Map
Date
Double
->
Map
Date
Cooc
->
[
Period
]
->
[(
Branch
,
ShouldTry
)]
->
(
Branch
,
ShouldTry
)
->
[(
Branch
,
ShouldTry
)]
->
[(
Branch
,
ShouldTry
)]
...
...
@@ -578,13 +578,13 @@ separateBranches fdt similarity lambda frequency minBranch thr rise timescale do
{-
-- perform the sea-level rise algorithm, browse the similarity ladder and check that we can try out the next step
-}
seaLevelRise
::
Double
->
Proxim
ity
->
Double
->
Int
->
Map
Int
Double
seaLevelRise
::
Double
->
Similar
ity
->
Double
->
Int
->
Map
Int
Double
->
[
Double
]
->
Double
->
Int
->
[
Period
]
->
Map
Date
Double
->
Map
Date
Cooc
->
[(
Branch
,
ShouldTry
)]
->
([(
Branch
,
ShouldTry
)],
FinalQuality
)
seaLevelRise
fdt
proxim
ity
lambda
minBranch
frequency
ladder
rise
frame
periods
docs
coocs
branches
=
seaLevelRise
fdt
similar
ity
lambda
minBranch
frequency
ladder
rise
frame
periods
docs
coocs
branches
=
-- if the ladder is empty or thr > 1 or there is no branch to break then stop
if
(
null
ladder
)
||
((
List
.
head
ladder
)
>
1
)
||
(
stopRise
branches
)
then
(
branches
,
toPhyloQuality
fdt
lambda
frequency
(
map
fst
branches
))
...
...
@@ -596,9 +596,9 @@ seaLevelRise fdt proximity lambda minBranch frequency ladder rise frame periods
<>
" ξ = "
<>
printf
"%.5f"
(
globalAccuracy
frequency
(
map
fst
branches
))
<>
" ρ = "
<>
printf
"%.5f"
(
globalRecall
frequency
(
map
fst
branches
))
<>
" branches = "
<>
show
(
length
branches
))
$
separateBranches
fdt
proxim
ity
lambda
frequency
minBranch
thr
rise
frame
docs
coocs
periods
$
separateBranches
fdt
similar
ity
lambda
frequency
minBranch
thr
rise
frame
docs
coocs
periods
[]
(
List
.
head
branches
)
(
List
.
tail
branches
)
in
seaLevelRise
fdt
proxim
ity
lambda
minBranch
frequency
(
List
.
tail
ladder
)
(
rise
+
1
)
frame
periods
docs
coocs
branches'
in
seaLevelRise
fdt
similar
ity
lambda
minBranch
frequency
(
List
.
tail
ladder
)
(
rise
+
1
)
frame
periods
docs
coocs
branches'
where
--------
stopRise
::
[(
Branch
,
ShouldTry
)]
->
Bool
...
...
@@ -624,7 +624,7 @@ temporalMatching ladder phylo = updatePhyloGroups 1
--- 2) process the temporal matching by elevating the similarity ladder
sea
::
([(
Branch
,
ShouldTry
)],
FinalQuality
)
sea
=
seaLevelRise
(
fromIntegral
$
Vector
.
length
$
getRoots
phylo
)
(
phyloProxim
ity
$
getConfig
phylo
)
(
similar
ity
$
getConfig
phylo
)
(
_qua_granularity
$
phyloQuality
$
getConfig
phylo
)
(
_qua_minBranch
$
phyloQuality
$
getConfig
phylo
)
(
phylo
^.
phylo_termFreq
)
...
...
@@ -641,7 +641,7 @@ temporalMatching ladder phylo = updatePhyloGroups 1
seabed
=
map
(
\
b
->
(
b
,(
length
$
nub
$
map
_phylo_groupPeriod
b
)
>=
(
_qua_minBranch
$
phyloQuality
$
getConfig
phylo
)))
$
toPhylomemeticNetwork
(
getTimeFrame
$
timeUnit
$
getConfig
phylo
)
(
getPeriodIds
phylo
)
(
phyloProxim
ity
$
getConfig
phylo
)
(
similar
ity
$
getConfig
phylo
)
(
List
.
head
ladder
)
(
phylo
^.
phylo_timeDocs
)
(
phylo
^.
phylo_timeCooc
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment