Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
H
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Przemyslaw Kaminski
haskell-gargantext
Commits
2d0a7430
Commit
2d0a7430
authored
Jan 14, 2020
by
qlobbe
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add adaptative and constante sea level elevation
parent
a9fc87aa
Changes
7
Show whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
386 additions
and
152 deletions
+386
-152
AdaptativePhylo.hs
src/Gargantext/Viz/AdaptativePhylo.hs
+27
-7
PhyloExample.hs
src/Gargantext/Viz/Phylo/PhyloExample.hs
+7
-2
PhyloExport.hs
src/Gargantext/Viz/Phylo/PhyloExport.hs
+47
-23
PhyloMaker.hs
src/Gargantext/Viz/Phylo/PhyloMaker.hs
+46
-12
PhyloTools.hs
src/Gargantext/Viz/Phylo/PhyloTools.hs
+44
-46
SynchronicClustering.hs
src/Gargantext/Viz/Phylo/SynchronicClustering.hs
+35
-34
TemporalMatching.hs
src/Gargantext/Viz/Phylo/TemporalMatching.hs
+180
-28
No files found.
src/Gargantext/Viz/AdaptativePhylo.hs
View file @
2d0a7430
...
...
@@ -57,12 +57,22 @@ data CorpusParser =
|
Csv
{
_csv_limit
::
Int
}
deriving
(
Show
,
Generic
,
Eq
)
data
SeaElevation
=
Constante
{
_cons_start
::
Double
,
_cons_step
::
Double
}
|
Adaptative
{
_adap_granularity
::
Double
}
deriving
(
Show
,
Generic
,
Eq
)
data
Proximity
=
WeightedLogJaccard
{
_wlj_sensibility
::
Double
,
_wlj_thresholdInit
::
Double
,
_wlj_thresholdStep
::
Double
}
-- , _wlj_thresholdInit :: Double
-- , _wlj_thresholdStep :: Double
-- | max height for sea level in temporal matching
-- , _wlj_elevation :: Double
}
|
Hamming
deriving
(
Show
,
Generic
,
Eq
)
...
...
@@ -114,6 +124,7 @@ data Config =
,
phyloName
::
Text
,
phyloLevel
::
Int
,
phyloProximity
::
Proximity
,
seaElevation
::
SeaElevation
,
phyloSynchrony
::
Synchrony
,
phyloQuality
::
Quality
,
timeUnit
::
TimeUnit
...
...
@@ -132,8 +143,9 @@ defaultConfig =
,
corpusParser
=
Csv
1000
,
phyloName
=
pack
"Default Phylo"
,
phyloLevel
=
2
,
phyloProximity
=
WeightedLogJaccard
10
0
0.1
,
phyloSynchrony
=
ByProximityThreshold
0.1
10
AllBranches
MergeAllGroups
,
phyloProximity
=
WeightedLogJaccard
10
,
seaElevation
=
Adaptative
25
,
phyloSynchrony
=
ByProximityThreshold
0.6
10
SiblingBranches
MergeAllGroups
,
phyloQuality
=
Quality
0.1
1
,
timeUnit
=
Year
3
1
5
,
clique
=
Fis
1
5
...
...
@@ -148,6 +160,8 @@ instance FromJSON CorpusParser
instance
ToJSON
CorpusParser
instance
FromJSON
Proximity
instance
ToJSON
Proximity
instance
FromJSON
SeaElevation
instance
ToJSON
SeaElevation
instance
FromJSON
TimeUnit
instance
ToJSON
TimeUnit
instance
FromJSON
Clique
...
...
@@ -253,6 +267,7 @@ data Phylo =
,
_phylo_timeCooc
::
!
(
Map
Date
Cooc
)
,
_phylo_timeDocs
::
!
(
Map
Date
Double
)
,
_phylo_termFreq
::
!
(
Map
Int
Double
)
,
_phylo_groupsProxi
::
!
(
Map
(
PhyloGroupId
,
PhyloGroupId
)
Double
)
,
_phylo_param
::
PhyloParam
,
_phylo_periods
::
Map
PhyloPeriodId
PhyloPeriod
}
...
...
@@ -366,9 +381,13 @@ data PhyloLabel =
data
PhyloBranch
=
PhyloBranch
{
_branch_id
::
PhyloBranchId
,
_branch_canonId
::
[
Int
]
,
_branch_seaLevel
::
[
Double
]
,
_branch_x
::
Double
,
_branch_y
::
Double
,
_branch_label
::
Text
,
_branch_meta
::
Map
Text
[
Double
]
}
deriving
(
Generic
,
Show
)
}
deriving
(
Generic
,
Show
,
Eq
)
data
PhyloExport
=
PhyloExport
...
...
@@ -382,6 +401,7 @@ data PhyloExport =
makeLenses
''
C
onfig
makeLenses
''
P
roximity
makeLenses
''
S
eaElevation
makeLenses
''
Q
uality
makeLenses
''
C
lique
makeLenses
''
P
hyloLabel
...
...
src/Gargantext/Viz/Phylo/PhyloExample.hs
View file @
2d0a7430
...
...
@@ -30,7 +30,7 @@ import Gargantext.Viz.AdaptativePhylo
import
Gargantext.Viz.Phylo.PhyloTools
import
Gargantext.Viz.Phylo.PhyloMaker
import
Gargantext.Viz.Phylo.PhyloExport
import
Gargantext.Viz.Phylo.TemporalMatching
(
t
emporalMatching
)
import
Gargantext.Viz.Phylo.TemporalMatching
(
adaptativeTemporalMatching
,
constanteT
emporalMatching
)
import
Gargantext.Viz.Phylo.SynchronicClustering
(
synchronicClustering
)
import
Control.Lens
...
...
@@ -60,7 +60,12 @@ phylo2 = synchronicClustering phylo1
-----------------------------------------------
phylo1
::
Phylo
phylo1
=
temporalMatching
phylo1
=
case
(
getSeaElevation
phyloBase
)
of
Constante
s
g
->
constanteTemporalMatching
s
g
$
toGroupsProxi
1
$
appendGroups
cliqueToGroup
1
phyloClique
phyloBase
Adaptative
s
->
adaptativeTemporalMatching
s
$
toGroupsProxi
1
$
appendGroups
cliqueToGroup
1
phyloClique
phyloBase
...
...
src/Gargantext/Viz/Phylo/PhyloExport.hs
View file @
2d0a7430
...
...
@@ -17,8 +17,8 @@ Portability : POSIX
module
Gargantext.Viz.Phylo.PhyloExport
where
import
Data.Map
(
Map
,
fromList
,
empty
,
fromListWith
,
insert
,
(
!
),
elems
,
unionWith
,
findWithDefault
,
toList
,
delete
)
import
Data.List
((
++
),
sort
,
nub
,
concat
,
sortOn
,
reverse
,
groupBy
,
union
,
(
\\
),
(
!!
),
init
,
partition
,
unwords
,
nubBy
)
import
Data.Map
(
Map
,
fromList
,
empty
,
fromListWith
,
insert
,
(
!
),
elems
,
unionWith
,
findWithDefault
,
toList
)
import
Data.List
((
++
),
sort
,
nub
,
concat
,
sortOn
,
reverse
,
groupBy
,
union
,
(
\\
),
(
!!
),
init
,
partition
,
unwords
,
nubBy
,
inits
)
import
Data.Vector
(
Vector
)
import
Prelude
(
writeFile
)
...
...
@@ -116,7 +116,11 @@ branchToDotNode b =
([
FillColor
[
toWColor
CornSilk
],
FontName
"Arial"
,
FontSize
40
,
Shape
Egg
,
Style
[
SItem
Bold
[]
],
Label
(
toDotLabel
$
b
^.
branch_label
)]
<>
(
metaToAttr
$
b
^.
branch_meta
)
<>
[
toAttr
"nodeType"
"branch"
,
toAttr
"branchId"
(
pack
$
unwords
(
map
show
$
snd
$
b
^.
branch_id
))
])
,
toAttr
"branchId"
(
pack
$
unwords
(
map
show
$
snd
$
b
^.
branch_id
))
,
toAttr
"branch_x"
(
fromStrict
$
Text
.
pack
$
(
show
$
b
^.
branch_x
))
,
toAttr
"branch_y"
(
fromStrict
$
Text
.
pack
$
(
show
$
b
^.
branch_y
))
,
toAttr
"label"
(
pack
$
show
$
b
^.
branch_label
)
])
periodToDotNode
::
(
Date
,
Date
)
->
Dot
DotId
periodToDotNode
prd
=
...
...
@@ -130,7 +134,7 @@ periodToDotNode prd =
groupToDotNode
::
Vector
Ngrams
->
PhyloGroup
->
Dot
DotId
groupToDotNode
fdt
g
=
node
(
groupIdToDotId
$
getGroupId
g
)
([
FontName
"Arial"
,
Shape
BoxShape
,
toLabel
(
groupToTable
fdt
g
)]
([
FontName
"Arial"
,
Shape
Square
,
penWidth
4
,
toLabel
(
groupToTable
fdt
g
)]
<>
[
toAttr
"nodeType"
"group"
,
toAttr
"from"
(
pack
$
show
(
fst
$
g
^.
phylo_groupPeriod
))
,
toAttr
"to"
(
pack
$
show
(
snd
$
g
^.
phylo_groupPeriod
))
...
...
@@ -141,7 +145,7 @@ groupToDotNode fdt g =
toDotEdge
::
DotId
->
DotId
->
Text
.
Text
->
EdgeType
->
Dot
DotId
toDotEdge
source
target
lbl
edgeType
=
edge
source
target
(
case
edgeType
of
GroupToGroup
->
[
Width
10
,
Color
[
toWColor
Black
],
Constraint
True
GroupToGroup
->
[
Width
3
,
penWidth
4
,
Color
[
toWColor
Black
],
Constraint
True
,
Label
(
StrLabel
$
fromStrict
lbl
)]
BranchToGroup
->
[
Width
3
,
Color
[
toWColor
Black
],
ArrowHead
(
AType
[(
ArrMod
FilledArrow
RightSide
,
DotArrow
)])
,
Label
(
StrLabel
$
fromStrict
lbl
)]
...
...
@@ -174,12 +178,9 @@ exportToDot phylo export =
<>
[(
toAttr
(
fromStrict
"phyloFoundations"
)
$
pack
$
show
(
length
$
Vector
.
toList
$
getRoots
phylo
))
,(
toAttr
(
fromStrict
"phyloTerms"
)
$
pack
$
show
(
length
$
nub
$
concat
$
map
(
\
g
->
g
^.
phylo_groupNgrams
)
$
export
^.
export_groups
))
,(
toAttr
(
fromStrict
"phyloDocs"
)
$
pack
$
show
(
sum
$
elems
$
phylo
^.
phylo_timeDocs
))
,(
toAttr
(
fromStrict
"phyloPeriods"
)
$
pack
$
show
(
length
$
elems
$
phylo
^.
phylo_periods
))
,(
toAttr
(
fromStrict
"phyloBranches"
)
$
pack
$
show
(
length
$
export
^.
export_branches
))
,(
toAttr
(
fromStrict
"phyloGroups"
)
$
pack
$
show
(
length
$
export
^.
export_groups
))
,(
toAttr
(
fromStrict
"proxiName"
)
$
pack
$
show
(
getProximityName
$
phyloProximity
$
getConfig
phylo
))
,(
toAttr
(
fromStrict
"proxiInit"
)
$
pack
$
show
(
getProximityInit
$
phyloProximity
$
getConfig
phylo
))
,(
toAttr
(
fromStrict
"proxiStep"
)
$
pack
$
show
(
getProximityStep
$
phyloProximity
$
getConfig
phylo
))
,(
toAttr
(
fromStrict
"quaGranularity"
)
$
pack
$
show
(
_qua_granularity
$
phyloQuality
$
getConfig
phylo
))
])
...
...
@@ -232,12 +233,12 @@ exportToDot phylo export =
)
$
nubBy
(
\
combi
combi'
->
fst
combi
==
fst
combi'
)
$
listToCombi'
$
getPeriodIds
phylo
-- | 8) create the edges between the branches
_
<-
mapM
(
\
(
bId
,
bId'
)
->
toDotEdge
(
branchIdToDotId
bId
)
(
branchIdToDotId
bId'
)
(
Text
.
pack
$
show
(
branchIdsToProximity
bId
bId'
(
getThresholdInit
$
phyloProximity
$
getConfig
phylo
)
(
getThresholdStep
$
phyloProximity
$
getConfig
phylo
)))
BranchToBranch
)
$
nubBy
(
\
combi
combi'
->
fst
combi
==
fst
combi'
)
$
listToCombi'
$
map
_branch_id
$
export
^.
export_branches
--
_ <- mapM (\(bId,bId') ->
--
toDotEdge (branchIdToDotId bId) (branchIdToDotId bId')
--
(Text.pack $ show(branchIdsToProximity bId bId'
--
(getThresholdInit $ phyloProximity $ getConfig phylo)
--
(getThresholdStep $ phyloProximity $ getConfig phylo))) BranchToBranch
--
) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ map _branch_id $ export ^. export_branches
graphAttrs
[
Rank
SameRank
]
...
...
@@ -418,8 +419,8 @@ processLabels labels foundations export =
toDynamics
::
Int
->
[
PhyloGroup
]
->
PhyloGroup
->
Map
Int
(
Date
,
Date
)
->
Double
toDynamics
n
parents
g
roup
m
=
let
prd
=
g
roup
^.
phylo_groupPeriod
toDynamics
n
parents
g
m
=
let
prd
=
g
^.
phylo_groupPeriod
end
=
last'
"dynamics"
(
sort
$
map
snd
$
elems
m
)
in
if
(((
snd
prd
)
==
(
snd
$
m
!
n
))
&&
(
snd
prd
/=
end
))
-- | decrease
...
...
@@ -467,15 +468,38 @@ toPhyloExport phylo = exportToDot phylo
$
processMetrics
export
where
export
::
PhyloExport
export
=
PhyloExport
groups
branches
export
=
PhyloExport
groups
$
map
(
\
(
x
,
b
)
->
b
&
branch_x
.~
x
)
$
zip
branchesGaps
branches
--------------------------------------
branchesGaps
::
[
Double
]
branchesGaps
=
map
sum
$
inits
$
map
(
\
(
b
,
x
)
->
b
^.
branch_y
+
0.05
-
x
)
$
zip
branches
$
([
0
]
++
(
map
(
\
(
b
,
b'
)
->
let
idx
=
length
$
commonPrefix
(
b
^.
branch_canonId
)
(
b'
^.
branch_canonId
)
[]
in
(
b'
^.
branch_seaLevel
)
!!
(
idx
-
1
)
)
$
listToSeq
branches
))
--------------------------------------
branches
::
[
PhyloBranch
]
branches
=
map
(
\
bId
->
PhyloBranch
bId
""
empty
)
$
nub
$
map
_phylo_groupBranchId
groups
branches
=
map
(
\
g
->
let
seaLvl
=
(
g
^.
phylo_groupMeta
)
!
"seaLevels"
breaks
=
(
g
^.
phylo_groupMeta
)
!
"breaks"
canonId
=
take
(
round
$
(
last'
"export"
breaks
)
+
2
)
(
snd
$
g
^.
phylo_groupBranchId
)
in
trace
(
show
(
canonId
))
$
PhyloBranch
(
g
^.
phylo_groupBranchId
)
canonId
seaLvl
0
(
last'
"export"
(
take
(
round
$
(
last'
"export"
breaks
)
+
1
)
seaLvl
))
""
empty
)
$
map
(
\
gs
->
head'
"export"
gs
)
$
groupBy
(
\
g
g'
->
g
^.
phylo_groupBranchId
==
g'
^.
phylo_groupBranchId
)
$
sortOn
(
\
g
->
g
^.
phylo_groupBranchId
)
groups
--------------------------------------
groups
::
[
PhyloGroup
]
groups
=
traceExportGroups
$
processDynamics
$
map
(
\
g
->
g
&
phylo_groupMeta
%~
delete
"dynamics"
)
$
getGroupsFromLevel
(
phyloLevel
$
getConfig
phylo
)
$
tracePhyloInfo
phylo
...
...
src/Gargantext/Viz/Phylo/PhyloMaker.hs
View file @
2d0a7430
...
...
@@ -15,15 +15,15 @@ Portability : POSIX
module
Gargantext.Viz.Phylo.PhyloMaker
where
import
Data.List
(
concat
,
nub
,
partition
,
sort
,
(
++
),
group
)
import
Data.Map
(
Map
,
fromListWith
,
keys
,
unionWith
,
fromList
,
empty
,
toList
,
elems
,
(
!
),
restrictKeys
,
singleton
)
import
Data.List
(
concat
,
nub
,
partition
,
sort
,
(
++
),
group
,
intersect
,
null
)
import
Data.Map
(
Map
,
fromListWith
,
keys
,
unionWith
,
fromList
,
empty
,
toList
,
elems
,
(
!
),
restrictKeys
,
foldlWithKey
)
import
Data.Set
(
size
)
import
Data.Vector
(
Vector
)
import
Gargantext.Prelude
import
Gargantext.Viz.AdaptativePhylo
import
Gargantext.Viz.Phylo.PhyloTools
import
Gargantext.Viz.Phylo.TemporalMatching
(
temporalMatching
)
import
Gargantext.Viz.Phylo.TemporalMatching
(
adaptativeTemporalMatching
,
constanteTemporalMatching
,
getNextPeriods
,
filterDocs
,
filterDiago
,
reduceDiagos
,
toProximity
)
import
Gargantext.Viz.Phylo.SynchronicClustering
(
synchronicClustering
)
import
Gargantext.Text.Context
(
TermList
)
import
Gargantext.Text.Metrics.FrequentItemSet
(
fisWithSizePolyMap
,
Size
(
..
))
...
...
@@ -43,7 +43,8 @@ import qualified Data.Set as Set
toPhylo
::
[
Document
]
->
TermList
->
Config
->
Phylo
toPhylo
docs
lst
conf
=
traceToPhylo
(
phyloLevel
conf
)
$
toPhylo
docs
lst
conf
=
trace
(
"# phylo1 groups "
<>
show
(
length
$
getGroupsFromLevel
1
phylo1
))
$
traceToPhylo
(
phyloLevel
conf
)
$
if
(
phyloLevel
conf
)
>
1
then
foldl'
(
\
phylo'
_
->
synchronicClustering
phylo'
)
phylo1
[
2
..
(
phyloLevel
conf
)]
else
phylo1
...
...
@@ -62,8 +63,35 @@ toPhylo docs lst conf = traceToPhylo (phyloLevel conf) $
-- | To Phylo 1 | --
--------------------
appendGroups
::
(
a
->
Double
->
PhyloPeriodId
->
Level
->
Int
->
Vector
Ngrams
->
[
Cooc
]
->
PhyloGroup
)
->
Level
->
Map
(
Date
,
Date
)
[
a
]
->
Phylo
->
Phylo
toGroupsProxi
::
Level
->
Phylo
->
Phylo
toGroupsProxi
lvl
phylo
=
let
proximity
=
phyloProximity
$
getConfig
phylo
groupsProxi
=
foldlWithKey
(
\
acc
pId
pds
->
-- 1) process period by period
let
egos
=
map
(
\
g
->
(
getGroupId
g
,
g
^.
phylo_groupNgrams
))
$
elems
$
view
(
phylo_periodLevels
.
traverse
.
filtered
(
\
phyloLvl
->
phyloLvl
^.
phylo_levelLevel
==
lvl
)
.
phylo_levelGroups
)
pds
next
=
getNextPeriods
ToParents
(
getTimeFrame
$
timeUnit
$
getConfig
phylo
)
pId
(
keys
$
phylo
^.
phylo_periods
)
targets
=
map
(
\
g
->
(
getGroupId
g
,
g
^.
phylo_groupNgrams
))
$
getGroupsFromLevelPeriods
lvl
next
phylo
docs
=
filterDocs
(
phylo
^.
phylo_timeDocs
)
([
pId
]
++
next
)
diagos
=
filterDiago
(
phylo
^.
phylo_timeCooc
)
([
pId
]
++
next
)
-- 2) compute the pairs in parallel
pairs
=
map
(
\
(
id
,
ngrams
)
->
map
(
\
(
id'
,
ngrams'
)
->
let
nbDocs
=
(
sum
.
elems
)
$
filterDocs
docs
([
idToPrd
id
,
idToPrd
id'
])
diago
=
reduceDiagos
$
filterDiago
diagos
([
idToPrd
id
,
idToPrd
id'
])
in
((
id
,
id'
),
toProximity
nbDocs
diago
proximity
ngrams
ngrams'
ngrams'
)
)
$
filter
(
\
(
_
,
ngrams'
)
->
(
not
.
null
)
$
intersect
ngrams
ngrams'
)
targets
)
egos
pairs'
=
pairs
`
using
`
parList
rdeepseq
in
acc
++
(
concat
pairs'
)
)
[]
$
phylo
^.
phylo_periods
in
phylo
&
phylo_groupsProxi
.~
((
traceGroupsProxi
.
fromList
)
groupsProxi
)
appendGroups
::
(
a
->
PhyloPeriodId
->
Level
->
Int
->
Vector
Ngrams
->
[
Cooc
]
->
PhyloGroup
)
->
Level
->
Map
(
Date
,
Date
)
[
a
]
->
Phylo
->
Phylo
appendGroups
f
lvl
m
phylo
=
trace
(
"
\n
"
<>
"-- | Append "
<>
show
(
length
$
concat
$
elems
m
)
<>
" groups to Level "
<>
show
(
lvl
)
<>
"
\n
"
)
$
over
(
phylo_periods
.
traverse
...
...
@@ -76,7 +104,7 @@ appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ co
in
phyloLvl
&
phylo_levelGroups
.~
(
fromList
$
foldl
(
\
groups
obj
->
groups
++
[
(((
pId
,
lvl
),
length
groups
)
,
f
obj
(
getPhyloThresholdInit
phylo
)
pId
lvl
(
length
groups
)
(
getRoots
phylo
)
,
f
obj
pId
lvl
(
length
groups
)
(
getRoots
phylo
)
(
elems
$
restrictKeys
(
phylo
^.
phylo_timeCooc
)
$
periodsToYears
[
pId
]))
]
)
[]
phyloCUnit
)
else
...
...
@@ -84,20 +112,25 @@ appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ co
phylo
cliqueToGroup
::
PhyloClique
->
Double
->
PhyloPeriodId
->
Level
->
Int
->
Vector
Ngrams
->
[
Cooc
]
->
PhyloGroup
cliqueToGroup
fis
thr
pId
lvl
idx
fdt
coocs
=
cliqueToGroup
::
PhyloClique
->
PhyloPeriodId
->
Level
->
Int
->
Vector
Ngrams
->
[
Cooc
]
->
PhyloGroup
cliqueToGroup
fis
pId
lvl
idx
fdt
coocs
=
let
ngrams
=
ngramsToIdx
(
Set
.
toList
$
fis
^.
phyloClique_nodes
)
fdt
in
PhyloGroup
pId
lvl
idx
""
(
fis
^.
phyloClique_support
)
ngrams
(
ngramsToCooc
ngrams
coocs
)
(
1
,[
0
])
-- | branchid (lvl,[path in the branching tree])
(
singleton
"thr"
[
thr
])
(
fromList
[(
"breaks"
,[
0
]),(
"seaLevels"
,[
0
])
])
[]
[]
[]
[]
toPhylo1
::
[
Document
]
->
Phylo
->
Phylo
toPhylo1
docs
phyloBase
=
temporalMatching
toPhylo1
docs
phyloBase
=
case
(
getSeaElevation
phyloBase
)
of
Constante
start
gap
->
constanteTemporalMatching
start
gap
$
toGroupsProxi
1
$
appendGroups
cliqueToGroup
1
phyloClique
phyloBase
Adaptative
steps
->
adaptativeTemporalMatching
steps
$
toGroupsProxi
1
$
appendGroups
cliqueToGroup
1
phyloClique
phyloBase
where
--------------------------------------
...
...
@@ -247,5 +280,6 @@ toPhyloBase docs lst conf =
(
docsToTimeScaleCooc
docs
(
foundations
^.
foundations_roots
))
(
docsToTimeScaleNb
docs
)
(
docsToTermFreq
docs
(
foundations
^.
foundations_roots
))
empty
params
(
fromList
$
map
(
\
prd
->
(
prd
,
PhyloPeriod
prd
(
initPhyloLevels
1
prd
)))
periods
)
src/Gargantext/Viz/Phylo/PhyloTools.hs
View file @
2d0a7430
...
...
@@ -34,6 +34,7 @@ import Control.Lens hiding (Level)
import
qualified
Data.Vector
as
Vector
import
qualified
Data.List
as
List
import
qualified
Data.Set
as
Set
import
qualified
Data.Map
as
Map
------------
-- | Io | --
...
...
@@ -66,6 +67,7 @@ roundToStr = printf "%0.*f"
countSup
::
Double
->
[
Double
]
->
Int
countSup
s
l
=
length
$
filter
(
>
s
)
l
dropByIdx
::
Int
->
[
a
]
->
[
a
]
dropByIdx
k
l
=
take
k
l
++
drop
(
k
+
1
)
l
...
...
@@ -76,6 +78,15 @@ elemIndex' e l = case (List.elemIndex e l) of
Just
i
->
i
commonPrefix
::
Eq
a
=>
[
a
]
->
[
a
]
->
[
a
]
->
[
a
]
commonPrefix
lst
lst'
acc
=
if
(
null
lst
||
null
lst'
)
then
acc
else
if
(
head'
"commonPrefix"
lst
==
head'
"commonPrefix"
lst'
)
then
commonPrefix
(
tail
lst
)
(
tail
lst'
)
(
acc
++
[
head'
"commonPrefix"
lst
])
else
acc
---------------------
-- | Foundations | --
---------------------
...
...
@@ -249,7 +260,7 @@ idToPrd :: PhyloGroupId -> PhyloPeriodId
idToPrd
id
=
(
fst
.
fst
)
id
getGroupThr
::
PhyloGroup
->
Double
getGroupThr
group
=
head'
"getGroupThr"
((
group
^.
phylo_groupMeta
)
!
"thr
"
)
getGroupThr
group
=
last'
"getGroupThr"
((
group
^.
phylo_groupMeta
)
!
"breaks
"
)
groupByField
::
Ord
a
=>
(
PhyloGroup
->
a
)
->
[
PhyloGroup
]
->
Map
a
[
PhyloGroup
]
groupByField
toField
groups
=
fromListWith
(
++
)
$
map
(
\
g
->
(
toField
g
,
[
g
]))
groups
...
...
@@ -263,28 +274,15 @@ getPeriodPointers fil group =
filterProximity
::
Proximity
->
Double
->
Double
->
Bool
filterProximity
proximity
thr
local
=
case
proximity
of
WeightedLogJaccard
_
_
_
->
local
>=
thr
WeightedLogJaccard
_
->
local
>=
thr
Hamming
->
undefined
getProximityName
::
Proximity
->
String
getProximityName
proximity
=
case
proximity
of
WeightedLogJaccard
_
_
_
->
"WLJaccard"
WeightedLogJaccard
_
->
"WLJaccard"
Hamming
->
"Hamming"
getProximityInit
::
Proximity
->
Double
getProximityInit
proximity
=
case
proximity
of
WeightedLogJaccard
_
i
_
->
i
Hamming
->
undefined
getProximityStep
::
Proximity
->
Double
getProximityStep
proximity
=
case
proximity
of
WeightedLogJaccard
_
_
s
->
s
Hamming
->
undefined
---------------
-- | Phylo | --
---------------
...
...
@@ -318,13 +316,8 @@ getLevels phylo = nub
.
traverse
.
phylo_periodLevels
)
phylo
getPhyloThresholdInit
::
Phylo
->
Double
getPhyloThresholdInit
phylo
=
getThresholdInit
(
phyloProximity
(
getConfig
phylo
))
getPhyloThresholdStep
::
Phylo
->
Double
getPhyloThresholdStep
phylo
=
getThresholdStep
(
phyloProximity
(
getConfig
phylo
))
getSeaElevation
::
Phylo
->
SeaElevation
getSeaElevation
phylo
=
seaElevation
(
getConfig
phylo
)
getConfig
::
Phylo
->
Config
...
...
@@ -350,6 +343,26 @@ getGroupsFromLevel lvl phylo =
.
phylo_levelGroups
)
phylo
getGroupsFromLevelPeriods
::
Level
->
[
PhyloPeriodId
]
->
Phylo
->
[
PhyloGroup
]
getGroupsFromLevelPeriods
lvl
periods
phylo
=
elems
$
view
(
phylo_periods
.
traverse
.
filtered
(
\
phyloPrd
->
elem
(
phyloPrd
^.
phylo_periodPeriod
)
periods
)
.
phylo_periodLevels
.
traverse
.
filtered
(
\
phyloLvl
->
phyloLvl
^.
phylo_levelLevel
==
lvl
)
.
phylo_levelGroups
)
phylo
getGroupsFromPeriods
::
Level
->
Map
PhyloPeriodId
PhyloPeriod
->
[
PhyloGroup
]
getGroupsFromPeriods
lvl
periods
=
elems
$
view
(
traverse
.
phylo_periodLevels
.
traverse
.
filtered
(
\
phyloLvl
->
phyloLvl
^.
phylo_levelLevel
==
lvl
)
.
phylo_levelGroups
)
periods
updatePhyloGroups
::
Level
->
Map
PhyloGroupId
PhyloGroup
->
Phylo
->
Phylo
updatePhyloGroups
lvl
m
phylo
=
over
(
phylo_periods
...
...
@@ -407,27 +420,7 @@ traceSynchronyStart phylo =
getSensibility
::
Proximity
->
Double
getSensibility
proxi
=
case
proxi
of
WeightedLogJaccard
s
_
_
->
s
Hamming
->
undefined
getThresholdInit
::
Proximity
->
Double
getThresholdInit
proxi
=
case
proxi
of
WeightedLogJaccard
_
t
_
->
t
Hamming
->
undefined
getThresholdStep
::
Proximity
->
Double
getThresholdStep
proxi
=
case
proxi
of
WeightedLogJaccard
_
_
s
->
s
Hamming
->
undefined
traceBranchMatching
::
Proximity
->
Double
->
[
PhyloGroup
]
->
[
PhyloGroup
]
traceBranchMatching
proxi
thr
groups
=
case
proxi
of
WeightedLogJaccard
_
i
s
->
trace
(
roundToStr
2
thr
<>
" "
<>
foldl
(
\
acc
_
->
acc
<>
"."
)
"."
[(
10
*
i
),(
10
*
i
+
10
*
s
)
..
(
10
*
thr
)]
<>
" "
<>
show
(
length
groups
)
<>
" groups"
)
groups
WeightedLogJaccard
s
->
s
Hamming
->
undefined
----------------
...
...
@@ -495,3 +488,8 @@ traceMatchEnd groups =
traceTemporalMatching
::
[
PhyloGroup
]
->
[
PhyloGroup
]
traceTemporalMatching
groups
=
trace
(
"
\n
"
<>
"-- | Start temporal matching for "
<>
show
(
length
groups
)
<>
" groups"
<>
"
\n
"
)
groups
traceGroupsProxi
::
Map
(
PhyloGroupId
,
PhyloGroupId
)
Double
->
Map
(
PhyloGroupId
,
PhyloGroupId
)
Double
traceGroupsProxi
m
=
trace
(
"
\n
"
<>
"-- | "
<>
show
(
Map
.
size
m
)
<>
" computed pairs of groups proximity"
<>
"
\n
"
)
m
\ No newline at end of file
src/Gargantext/Viz/Phylo/SynchronicClustering.hs
View file @
2d0a7430
...
...
@@ -21,8 +21,9 @@ import Gargantext.Viz.Phylo.PhyloTools
import
Gargantext.Viz.Phylo.TemporalMatching
(
weightedLogJaccard'
,
filterDiago
,
reduceDiagos
)
import
Gargantext.Viz.Phylo.PhyloExport
(
processDynamics
)
import
Data.List
((
++
),
null
,
intersect
,
nub
,
concat
,
sort
,
sortOn
,
init
,
all
,
group
,
maximum
,
groupBy
)
import
Data.Map
(
Map
,
fromList
,
fromListWith
,
foldlWithKey
,
(
!
),
insert
,
empty
,
restrictKeys
,
elems
,
mapWithKey
,
member
,
singleton
)
import
Data.List
((
++
),
null
,
intersect
,
nub
,
concat
,
sort
,
sortOn
,
all
,
groupBy
,
group
,
maximum
)
import
Data.Map
(
Map
,
fromList
,
fromListWith
,
foldlWithKey
,
(
!
),
insert
,
empty
,
restrictKeys
,
elems
,
mapWithKey
,
member
)
import
Data.Text
(
Text
)
import
Control.Lens
hiding
(
Level
)
import
Control.Parallel.Strategies
(
parList
,
rdeepseq
,
using
)
...
...
@@ -37,7 +38,7 @@ import qualified Data.Set as Set
-------------------------
mergeBranchIds
::
[[
Int
]]
->
[
Int
]
mergeBranchIds
ids
=
(
head'
"mergeBranchIds"
.
sort
.
mostFreq
)
ids
mergeBranchIds
ids
=
(
head'
"mergeBranchIds"
.
sort
.
mostFreq
'
)
ids
where
-- | 2) find the most Up Left ids in the hierarchy of similarity
-- mostUpLeft :: [[Int]] -> [[Int]]
...
...
@@ -46,13 +47,19 @@ mergeBranchIds ids = (head' "mergeBranchIds" . sort . mostFreq) ids
-- inf = (fst . minimum) groupIds
-- in map snd $ filter (\gIds -> fst gIds == inf) groupIds
-- | 1) find the most frequent ids
mostFreq
::
[[
Int
]]
->
[[
Int
]]
mostFreq
ids'
=
mostFreq
'
::
[[
Int
]]
->
[[
Int
]]
mostFreq
'
ids'
=
let
groupIds
=
(
map
(
\
gIds
->
(
length
gIds
,
head'
"gIds"
gIds
))
.
group
.
sort
)
ids'
sup
=
(
fst
.
maximum
)
groupIds
in
map
snd
$
filter
(
\
gIds
->
fst
gIds
==
sup
)
groupIds
mergeMeta
::
[
Int
]
->
[
PhyloGroup
]
->
Map
Text
[
Double
]
mergeMeta
bId
groups
=
let
ego
=
head'
"mergeMeta"
$
filter
(
\
g
->
(
snd
(
g
^.
phylo_groupBranchId
))
==
bId
)
groups
in
fromList
[(
"breaks"
,(
ego
^.
phylo_groupMeta
)
!
"breaks"
),(
"seaLevels"
,(
ego
^.
phylo_groupMeta
)
!
"seaLevels"
)]
groupsToBranches'
::
Map
PhyloGroupId
PhyloGroup
->
[[
PhyloGroup
]]
groupsToBranches'
groups
=
-- | run the related component algorithm
...
...
@@ -64,10 +71,8 @@ groupsToBranches' groups =
in
map
(
\
ids
->
let
groups'
=
elems
$
restrictKeys
groups
(
Set
.
fromList
ids
)
bId
=
mergeBranchIds
$
map
(
\
g
->
snd
$
g
^.
phylo_groupBranchId
)
groups'
in
map
(
\
g
->
g
&
phylo_groupBranchId
%~
(
\
(
lvl
,
_
)
->
(
lvl
+
1
,
bId
)))
groups'
)
graph
in
map
(
\
g
->
g
&
phylo_groupBranchId
%~
(
\
(
lvl
,
_
)
->
(
lvl
,
bId
)))
groups'
)
graph
getLastThr
::
[
PhyloGroup
]
->
Double
getLastThr
childs
=
maximum
$
concat
$
map
(
\
g
->
(
g
^.
phylo_groupMeta
)
!
"thr"
)
childs
mergeGroups
::
[
Cooc
]
->
PhyloGroupId
->
Map
PhyloGroupId
PhyloGroupId
->
[
PhyloGroup
]
->
PhyloGroup
mergeGroups
coocs
id
mapIds
childs
=
...
...
@@ -75,11 +80,15 @@ mergeGroups coocs id mapIds childs =
in
PhyloGroup
(
fst
$
fst
id
)
(
snd
$
fst
id
)
(
snd
id
)
""
(
sum
$
map
_phylo_groupSupport
childs
)
ngrams
(
ngramsToCooc
ngrams
coocs
)
((
snd
$
fst
id
),
(
mergeBranchIds
$
map
(
\
g
->
snd
$
g
^.
phylo_groupBranchId
)
childs
)
)
(
singleton
"thr"
[
getLastThr
childs
]
)
[]
(
map
(
\
g
->
(
getGroupId
g
,
1
))
childs
)
((
snd
$
fst
id
),
bId
)
(
mergeMeta
bId
childs
)
[]
(
map
(
\
g
->
(
getGroupId
g
,
1
))
childs
)
(
updatePointers
$
concat
$
map
_phylo_groupPeriodParents
childs
)
(
updatePointers
$
concat
$
map
_phylo_groupPeriodChilds
childs
)
where
--------------------
bId
::
[
Int
]
bId
=
mergeBranchIds
$
map
(
\
g
->
snd
$
g
^.
phylo_groupBranchId
)
childs
--------------------
updatePointers
::
[
Pointer
]
->
[
Pointer
]
updatePointers
pointers
=
map
(
\
(
pId
,
w
)
->
(
mapIds
!
pId
,
w
))
pointers
...
...
@@ -163,7 +172,7 @@ groupsToEdges prox sync nbDocs diago groups =
toEdges
::
Double
->
[(
PhyloGroup
,
PhyloGroup
)]
->
[((
PhyloGroup
,
PhyloGroup
),
Double
)]
toEdges
sens
edges
=
case
prox
of
WeightedLogJaccard
_
_
_
->
map
(
\
(
g
,
g'
)
->
WeightedLogJaccard
_
->
map
(
\
(
g
,
g'
)
->
((
g
,
g'
),
weightedLogJaccard'
sens
nbDocs
diago
(
g
^.
phylo_groupNgrams
)
(
g'
^.
phylo_groupNgrams
)))
edges
_
->
undefined
...
...
@@ -198,21 +207,13 @@ reduceGroups prox sync docs diagos branch =
$
toRelatedComponents
groups
edges
)
periods
getGroupRealBId
::
Double
->
PhyloGroup
->
[
Int
]
getGroupRealBId
step
g
=
let
nb
=
round
(
getGroupThr
g
/
step
)
+
2
in
take
nb
(
snd
$
g
^.
phylo_groupBranchId
)
adjustClustering
::
Synchrony
->
Double
->
[[
PhyloGroup
]]
->
[[
PhyloGroup
]]
adjustClustering
sync
step
branches
=
case
sync
of
ByProximityThreshold
_
_
scope
_
->
case
scope
of
adjustClustering
::
Synchrony
->
[[
PhyloGroup
]]
->
[[
PhyloGroup
]]
adjustClustering
sync
branches
=
case
sync
of
ByProximityThreshold
_
_
scope
_
->
case
scope
of
SingleBranch
->
branches
SiblingBranches
->
groupBy
(
\
g
g'
->
(
init
$
getGroupRealBId
step
g
)
==
(
init
$
getGroupRealBId
step
g'
))
SiblingBranches
->
groupBy
(
\
g
g'
->
(
last'
"adjustClustering"
$
(
g
^.
phylo_groupMeta
)
!
"breaks"
)
==
(
last'
"adjustClustering"
$
(
g'
^.
phylo_groupMeta
)
!
"breaks"
))
$
sortOn
_phylo_groupBranchId
$
concat
branches
-- SiblingBranches -> elems $ fromListWith (++) $ map (\b -> ((init . snd . _phylo_groupBranchId) $ head' "adjustClustering" b,b)) branches
AllBranches
->
[
concat
branches
]
ByProximityDistribution
_
_
->
branches
...
...
@@ -226,7 +227,7 @@ synchronicClustering phylo =
diagos
=
map
coocToDiago
$
phylo
^.
phylo_timeCooc
newBranches
=
map
(
\
branch
->
reduceGroups
prox
sync
docs
diagos
branch
)
$
map
processDynamics
$
adjustClustering
sync
(
getPhyloThresholdStep
phylo
)
$
adjustClustering
sync
$
phyloToLastBranches
$
traceSynchronyStart
phylo
newBranches'
=
newBranches
`
using
`
parList
rdeepseq
...
...
src/Gargantext/Viz/Phylo/TemporalMatching.hs
View file @
2d0a7430
...
...
@@ -15,18 +15,19 @@ Portability : POSIX
module
Gargantext.Viz.Phylo.TemporalMatching
where
import
Data.List
(
concat
,
splitAt
,
tail
,
sortOn
,
(
++
),
intersect
,
null
,
inits
,
groupBy
,
scanl
,
nub
,
union
,
dropWhile
,
partition
,
or
)
import
Data.Map
(
Map
,
fromList
,
elems
,
restrictKeys
,
unionWith
,
findWithDefault
,
keys
,
(
!
),
singleton
,
empty
,
mapKeys
)
import
Data.List
(
concat
,
splitAt
,
tail
,
sortOn
,
(
++
),
intersect
,
null
,
inits
,
groupBy
,
scanl
,
nub
,
union
,
dropWhile
,
partition
,
or
,
sort
,
(
!!
)
)
import
Data.Map
(
Map
,
fromList
,
elems
,
restrictKeys
,
unionWith
,
findWithDefault
,
keys
,
(
!
),
(
!?
),
filterWithKey
,
singleton
,
empty
,
mapKeys
,
adjust
)
import
Gargantext.Prelude
import
Gargantext.Viz.AdaptativePhylo
import
Gargantext.Viz.Phylo.PhyloTools
-- import Prelude (logBase
)
import
Prelude
(
floor
)
import
Control.Lens
hiding
(
Level
)
import
Control.Parallel.Strategies
(
parList
,
rdeepseq
,
using
)
--
import Debug.Trace (trace)
import
Debug.Trace
(
trace
)
import
qualified
Data.Map
as
Map
import
qualified
Data.Set
as
Set
...
...
@@ -77,7 +78,7 @@ weightedLogJaccard' sens nbDocs diago ngrams ngrams'
toProximity
::
Double
->
Map
Int
Double
->
Proximity
->
[
Int
]
->
[
Int
]
->
[
Int
]
->
Double
toProximity
nbDocs
diago
proximity
egoNgrams
targetNgrams
targetNgrams'
=
case
proximity
of
WeightedLogJaccard
sens
_
_
->
WeightedLogJaccard
sens
->
let
pairNgrams
=
if
targetNgrams
==
targetNgrams'
then
targetNgrams
else
union
targetNgrams
targetNgrams'
...
...
@@ -268,9 +269,9 @@ toPhyloQuality' beta freq branches =
$
keys
freq
-----------------------------
-- |
Adaptative
Matching | --
-----------------------------
-----------------------------
-------
-- |
Constant Temporal
Matching | --
-----------------------------
-------
groupsToBranches
::
Map
PhyloGroupId
PhyloGroup
->
[[
PhyloGroup
]]
...
...
@@ -299,18 +300,21 @@ reduceFrequency frequency branches =
restrictKeys
frequency
(
Set
.
fromList
$
(
nub
.
concat
)
$
map
_phylo_groupNgrams
$
concat
branches
)
updateThr
::
Double
->
[[
PhyloGroup
]]
->
[[
PhyloGroup
]]
updateThr
thr
branches
=
map
(
\
b
->
map
(
\
g
->
g
&
phylo_groupMeta
.~
(
singleton
"thr"
[
thr
]))
b
)
branches
updateThr
thr
branches
=
map
(
\
b
->
map
(
\
g
->
g
&
phylo_groupMeta
.~
(
singleton
"seaLevels"
(((
g
^.
phylo_groupMeta
)
!
"seaLevels"
)
++
[
thr
])))
b
)
branches
-- | Sequentially break each branch of a phylo where
-- done = all the allready broken branches
-- ego = the current branch we want to break
-- rest = the branches we still have to break
breakBranches
::
Proximity
->
Double
->
Map
Int
Double
->
Int
->
Double
->
Int
->
Map
Date
Double
->
Map
Date
Cooc
->
[
PhyloPeriodId
]
->
[([
PhyloGroup
],
Bool
)]
->
([
PhyloGroup
],
Bool
)
->
[([
PhyloGroup
],
Bool
)]
->
[([
PhyloGroup
],
Bool
)]
breakBranches
proximity
beta
frequency
minBranch
thr
frame
docs
coocs
periods
done
ego
rest
=
breakBranches
::
Proximity
->
Double
->
Map
Int
Double
->
Int
->
Double
->
Double
->
Double
->
Int
->
Map
Date
Double
->
Map
Date
Cooc
->
[
PhyloPeriodId
]
->
[([
PhyloGroup
],
Bool
)]
->
([
PhyloGroup
],
Bool
)
->
[([
PhyloGroup
],
Bool
)]
->
[([
PhyloGroup
],
Bool
)]
breakBranches
proximity
beta
frequency
minBranch
thr
depth
elevation
frame
docs
coocs
periods
done
ego
rest
=
-- | 1) keep or not the new division of ego
let
done'
=
done
++
(
if
snd
ego
then
(
if
((
null
(
fst
ego'
))
||
(
quality
>
quality'
))
then
(
if
((
null
(
fst
ego'
))
||
(
quality
>
quality'
))
then
-- trace (" ✗ F(β) = " <> show(quality) <> " (vs) " <> show(quality')
-- <> " | " <> show(length $ fst ego) <> " groups : "
...
...
@@ -328,7 +332,7 @@ breakBranches proximity beta frequency minBranch thr frame docs coocs periods do
-- | 2) if there is no more branches in rest then return else continue
if
null
rest
then
done'
else
breakBranches
proximity
beta
frequency
minBranch
thr
frame
docs
coocs
periods
else
breakBranches
proximity
beta
frequency
minBranch
thr
depth
elevation
frame
docs
coocs
periods
done'
(
head'
"breakBranches"
rest
)
(
tail'
"breakBranches"
rest
)
where
--------------------------------------
...
...
@@ -341,41 +345,43 @@ breakBranches proximity beta frequency minBranch thr frame docs coocs periods do
$
matchGroupsToGroups
frame
periods
proximity
thr
docs
coocs
(
fst
ego
)
branches'
=
branches
`
using
`
parList
rdeepseq
in
partition
(
\
b
->
(
length
$
nub
$
map
_phylo_groupPeriod
b
)
>=
minBranch
)
$
if
(
length
branches'
>
1
)
then
updateThr
thr
branches'
else
branches'
$
thrToMeta
thr
$
depthToMeta
(
elevation
-
depth
)
branches'
--------------------------------------
quality'
::
Double
quality'
=
toPhyloQuality'
beta
frequency
((
map
fst
done
)
++
(
fst
ego'
)
++
(
snd
ego'
)
++
(
map
fst
rest
))
seaLevelMatching
::
Proximity
->
Double
->
Int
->
Map
Int
Double
->
Double
->
Int
->
[
PhyloPeriodId
]
->
Map
Date
Double
->
Map
Date
Cooc
->
[([
PhyloGroup
],
Bool
)]
->
[([
PhyloGroup
],
Bool
)]
seaLevelMatching
proximity
beta
minBranch
frequency
thr
frame
periods
docs
coocs
branches
=
-- | if there is no branch to break or if sea level > 1 then end
seaLevelMatching
::
Proximity
->
Double
->
Int
->
Map
Int
Double
->
Double
->
Double
->
Double
->
Double
->
Int
->
[
PhyloPeriodId
]
->
Map
Date
Double
->
Map
Date
Cooc
->
[([
PhyloGroup
],
Bool
)]
->
[([
PhyloGroup
],
Bool
)]
seaLevelMatching
proximity
beta
minBranch
frequency
thr
step
depth
elevation
frame
periods
docs
coocs
branches
=
-- | if there is no branch to break or if seaLvl level > 1 then end
if
(
thr
>=
1
)
||
((
not
.
or
)
$
map
snd
branches
)
then
branches
else
-- | break all the possible branches at the current sea level
let
branches'
=
breakBranches
proximity
beta
frequency
minBranch
thr
frame
docs
coocs
periods
-- | break all the possible branches at the current sea
Lvl
level
let
branches'
=
breakBranches
proximity
beta
frequency
minBranch
thr
depth
elevation
frame
docs
coocs
periods
[]
(
head'
"seaLevelMatching"
branches
)
(
tail'
"seaLevelMatching"
branches
)
frequency'
=
reduceFrequency
frequency
(
map
fst
branches'
)
in
seaLevelMatching
proximity
beta
minBranch
frequency'
(
thr
+
(
getThresholdStep
proximity
))
frame
periods
docs
coocs
branches'
in
seaLevelMatching
proximity
beta
minBranch
frequency'
(
thr
+
step
)
step
(
depth
-
1
)
elevation
frame
periods
docs
coocs
branches'
temporalMatching
::
Phylo
->
Phylo
temporalMatching
phylo
=
updatePhyloGroups
1
constanteTemporalMatching
::
Double
->
Double
->
Phylo
->
Phylo
constanteTemporalMatching
start
step
phylo
=
updatePhyloGroups
1
(
fromList
$
map
(
\
g
->
(
getGroupId
g
,
g
))
$
traceMatchEnd
$
concat
branches
)
phylo
where
-- | 2) process the temporal matching by elevating sea level
-- | 2) process the temporal matching by elevating sea
Lvl
level
branches
::
[[
PhyloGroup
]]
branches
=
map
fst
$
seaLevelMatching
(
phyloProximity
$
getConfig
phylo
)
(
_qua_granularity
$
phyloQuality
$
getConfig
phylo
)
(
_qua_minBranch
$
phyloQuality
$
getConfig
phylo
)
(
phylo
^.
phylo_termFreq
)
(
getThresholdInit
$
phyloProximity
$
getConfig
phylo
)
start
step
(
fromIntegral
$
round
(((
1
-
start
)
/
step
)
-
1
))
(
fromIntegral
$
round
((
1
-
start
)
/
step
))
(
getTimeFrame
$
timeUnit
$
getConfig
phylo
)
(
getPeriodIds
phylo
)
(
phylo
^.
phylo_timeDocs
)
...
...
@@ -388,7 +394,153 @@ temporalMatching phylo = updatePhyloGroups 1
$
groupsToBranches
$
fromList
$
map
(
\
g
->
(
getGroupId
g
,
g
))
$
matchGroupsToGroups
(
getTimeFrame
$
timeUnit
$
getConfig
phylo
)
(
getPeriodIds
phylo
)
(
phyloProximity
$
getConfig
phylo
)
(
getThresholdInit
$
phyloProximity
$
getConfig
phylo
)
start
(
phylo
^.
phylo_timeDocs
)
(
phylo
^.
phylo_timeCooc
)
(
traceTemporalMatching
$
getGroupsFromLevel
1
phylo
)
--------------------------------------
-- | Adaptative Temporal Matching | --
--------------------------------------
thrToMeta
::
Double
->
[[
PhyloGroup
]]
->
[[
PhyloGroup
]]
thrToMeta
thr
branches
=
map
(
\
b
->
map
(
\
g
->
g
&
phylo_groupMeta
.~
(
adjust
(
\
lst
->
lst
++
[
thr
])
"seaLevels"
(
g
^.
phylo_groupMeta
)))
b
)
branches
depthToMeta
::
Double
->
[[
PhyloGroup
]]
->
[[
PhyloGroup
]]
depthToMeta
depth
branches
=
let
break
=
length
branches
>
1
in
map
(
\
b
->
map
(
\
g
->
if
break
then
g
&
phylo_groupMeta
.~
(
adjust
(
\
lst
->
lst
++
[
depth
])
"breaks"
(
g
^.
phylo_groupMeta
))
else
g
)
b
)
branches
reduceTupleMapByKeys
::
Eq
a
=>
[
a
]
->
Map
(
a
,
a
)
Double
->
Map
(
a
,
a
)
Double
reduceTupleMapByKeys
ks
m
=
filterWithKey
(
\
(
k
,
k'
)
_
->
(
elem
k
ks
)
&&
(
elem
k'
ks
))
m
getInTupleMap
::
Ord
a
=>
Map
(
a
,
a
)
Double
->
a
->
a
->
Double
getInTupleMap
m
k
k'
|
isJust
(
m
!?
(
k
,
k'
))
=
m
!
(
k
,
k'
)
|
isJust
(
m
!?
(
k'
,
k
))
=
m
!
(
k'
,
k
)
|
otherwise
=
0
toThreshold
::
Double
->
Map
(
PhyloGroupId
,
PhyloGroupId
)
Double
->
Double
toThreshold
lvl
proxiGroups
=
let
idx
=
((
Map
.
size
proxiGroups
)
`
div
`
(
floor
lvl
))
-
1
in
if
idx
>=
0
then
(
sort
$
elems
proxiGroups
)
!!
idx
else
1
-- done = all the allready broken branches
-- ego = the current branch we want to break
-- rest = the branches we still have to break
adaptativeBreakBranches
::
Proximity
->
Double
->
Double
->
Map
(
PhyloGroupId
,
PhyloGroupId
)
Double
->
Double
->
Map
Int
Double
->
Int
->
Int
->
Map
Date
Double
->
Map
Date
Cooc
->
[
PhyloPeriodId
]
->
[([
PhyloGroup
],(
Bool
,[
Double
]))]
->
([
PhyloGroup
],(
Bool
,[
Double
]))
->
[([
PhyloGroup
],(
Bool
,[
Double
]))]
->
[([
PhyloGroup
],(
Bool
,[
Double
]))]
adaptativeBreakBranches
proxiConf
depth
elevation
groupsProxi
beta
frequency
minBranch
frame
docs
coocs
periods
done
ego
rest
=
-- | 1) keep or not the new division of ego
let
done'
=
done
++
(
if
(
fst
.
snd
)
ego
then
(
if
((
null
(
fst
ego'
))
||
(
quality
>
quality'
))
then
[(
concat
$
thrToMeta
thr
$
[
fst
ego
],(
False
,
((
snd
.
snd
)
ego
)))]
else
(
(
map
(
\
e
->
(
e
,(
True
,
((
snd
.
snd
)
ego
)
++
[
thr
])))
(
fst
ego'
))
++
(
map
(
\
e
->
(
e
,(
False
,
((
snd
.
snd
)
ego
))))
(
snd
ego'
))))
else
[(
concat
$
thrToMeta
thr
$
[
fst
ego
],
snd
ego
)])
in
-- | uncomment let .. in for debugging
-- let part1 = partition (snd) done'
-- part2 = partition (snd) rest
-- in trace ( "[✓ " <> show(length $ fst part1) <> "(" <> show(length $ concat $ map (fst) $ fst part1) <> ")|✗ " <> show(length $ snd part1) <> "(" <> show(length $ concat $ map (fst) $ snd part1) <> ")] "
-- <> "[✓ " <> show(length $ fst part2) <> "(" <> show(length $ concat $ map (fst) $ fst part2) <> ")|✗ " <> show(length $ snd part2) <> "(" <> show(length $ concat $ map (fst) $ snd part2) <> ")]"
-- ) $
-- | 2) if there is no more branches in rest then return else continue
if
null
rest
then
done'
else
adaptativeBreakBranches
proxiConf
depth
elevation
groupsProxi
beta
frequency
minBranch
frame
docs
coocs
periods
done'
(
head'
"breakBranches"
rest
)
(
tail'
"breakBranches"
rest
)
where
--------------------------------------
thr
::
Double
thr
=
toThreshold
depth
$
Map
.
filter
(
\
v
->
v
>
(
last'
"breakBranches"
$
(
snd
.
snd
)
ego
))
$
reduceTupleMapByKeys
(
map
getGroupId
$
fst
ego
)
groupsProxi
--------------------------------------
quality
::
Double
quality
=
toPhyloQuality'
beta
frequency
((
map
fst
done
)
++
[
fst
ego
]
++
(
map
fst
rest
))
--------------------------------------
ego'
::
([[
PhyloGroup
]],[[
PhyloGroup
]])
ego'
=
let
branches
=
groupsToBranches
$
fromList
$
map
(
\
g
->
(
getGroupId
g
,
g
))
$
matchGroupsToGroups
frame
periods
proxiConf
thr
docs
coocs
(
fst
ego
)
branches'
=
branches
`
using
`
parList
rdeepseq
in
partition
(
\
b
->
(
length
$
nub
$
map
_phylo_groupPeriod
b
)
>
minBranch
)
$
thrToMeta
thr
$
depthToMeta
(
elevation
-
depth
)
branches'
--------------------------------------
quality'
::
Double
quality'
=
toPhyloQuality'
beta
frequency
((
map
fst
done
)
++
(
fst
ego'
)
++
(
snd
ego'
)
++
(
map
fst
rest
))
adaptativeSeaLevelMatching
::
Proximity
->
Double
->
Double
->
Map
(
PhyloGroupId
,
PhyloGroupId
)
Double
->
Double
->
Int
->
Map
Int
Double
->
Int
->
[
PhyloPeriodId
]
->
Map
Date
Double
->
Map
Date
Cooc
->
[([
PhyloGroup
],(
Bool
,[
Double
]))]
->
[([
PhyloGroup
],(
Bool
,[
Double
]))]
adaptativeSeaLevelMatching
proxiConf
depth
elevation
groupsProxi
beta
minBranch
frequency
frame
periods
docs
coocs
branches
=
-- | if there is no branch to break or if seaLvl level >= depth then end
if
(
Map
.
null
groupsProxi
)
||
(
depth
<=
0
)
||
((
not
.
or
)
$
map
(
fst
.
snd
)
branches
)
then
branches
else
-- | break all the possible branches at the current seaLvl level
let
branches'
=
adaptativeBreakBranches
proxiConf
depth
elevation
groupsProxi
beta
frequency
minBranch
frame
docs
coocs
periods
[]
(
head'
"seaLevelMatching"
branches
)
(
tail'
"seaLevelMatching"
branches
)
frequency'
=
reduceFrequency
frequency
(
map
fst
branches'
)
groupsProxi'
=
reduceTupleMapByKeys
(
map
(
getGroupId
)
$
concat
$
map
(
fst
)
$
filter
(
fst
.
snd
)
branches'
)
groupsProxi
-- thr = toThreshold depth groupsProxi
in
trace
(
"
\n
"
<>
foldl
(
\
acc
_
->
acc
<>
"🌊 "
)
""
[
0
..
(
elevation
-
depth
)]
<>
" [✓ "
<>
show
(
length
$
filter
(
fst
.
snd
)
branches'
)
<>
"("
<>
show
(
length
$
concat
$
map
(
fst
)
$
filter
(
fst
.
snd
)
branches'
)
<>
")|✗ "
<>
show
(
length
$
filter
(
not
.
fst
.
snd
)
branches'
)
<>
"("
<>
show
(
length
$
concat
$
map
(
fst
)
$
filter
(
not
.
fst
.
snd
)
branches'
)
<>
")]"
<>
" thr = "
)
$
adaptativeSeaLevelMatching
proxiConf
(
depth
-
1
)
elevation
groupsProxi'
beta
minBranch
frequency'
frame
periods
docs
coocs
branches'
adaptativeTemporalMatching
::
Double
->
Phylo
->
Phylo
adaptativeTemporalMatching
elevation
phylo
=
updatePhyloGroups
1
(
fromList
$
map
(
\
g
->
(
getGroupId
g
,
g
))
$
traceMatchEnd
$
concat
branches
)
phylo
where
-- | 2) process the temporal matching by elevating seaLvl level
branches
::
[[
PhyloGroup
]]
branches
=
map
fst
$
adaptativeSeaLevelMatching
(
phyloProximity
$
getConfig
phylo
)
(
elevation
-
1
)
elevation
(
phylo
^.
phylo_groupsProxi
)
(
_qua_granularity
$
phyloQuality
$
getConfig
phylo
)
(
_qua_minBranch
$
phyloQuality
$
getConfig
phylo
)
(
phylo
^.
phylo_termFreq
)
(
getTimeFrame
$
timeUnit
$
getConfig
phylo
)
(
getPeriodIds
phylo
)
(
phylo
^.
phylo_timeDocs
)
(
phylo
^.
phylo_timeCooc
)
groups
-- | 1) for each group process an initial temporal Matching
-- | here we suppose that all the groups of level 1 are part of the same big branch
groups
::
[([
PhyloGroup
],(
Bool
,[
Double
]))]
groups
=
map
(
\
b
->
(
b
,((
length
$
nub
$
map
_phylo_groupPeriod
b
)
>=
(
_qua_minBranch
$
phyloQuality
$
getConfig
phylo
),[
thr
])))
$
groupsToBranches
$
fromList
$
map
(
\
g
->
(
getGroupId
g
,
g
))
$
matchGroupsToGroups
(
getTimeFrame
$
timeUnit
$
getConfig
phylo
)
(
getPeriodIds
phylo
)
(
phyloProximity
$
getConfig
phylo
)
thr
(
phylo
^.
phylo_timeDocs
)
(
phylo
^.
phylo_timeCooc
)
(
traceTemporalMatching
$
getGroupsFromLevel
1
phylo
)
--------------------------------------
thr
::
Double
thr
=
toThreshold
elevation
(
phylo
^.
phylo_groupsProxi
)
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment