Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Julien Moutinho
haskell-gargantext
Commits
70a2339e
Commit
70a2339e
authored
Feb 20, 2020
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'dev-phylo' into dev
parents
01d762cb
ddeafce7
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
873 additions
and
500 deletions
+873
-500
Main.hs
bin/gargantext-adaptative-phylo/Main.hs
+1
-1
AdaptativePhylo.hs
src/Gargantext/Viz/AdaptativePhylo.hs
+63
-31
PhyloExample.hs
src/Gargantext/Viz/Phylo/PhyloExample.hs
+19
-7
PhyloExport.hs
src/Gargantext/Viz/Phylo/PhyloExport.hs
+101
-39
PhyloMaker.hs
src/Gargantext/Viz/Phylo/PhyloMaker.hs
+110
-45
PhyloTools.hs
src/Gargantext/Viz/Phylo/PhyloTools.hs
+69
-55
SynchronicClustering.hs
src/Gargantext/Viz/Phylo/SynchronicClustering.hs
+105
-42
TemporalMatching.hs
src/Gargantext/Viz/Phylo/TemporalMatching.hs
+405
-280
No files found.
bin/gargantext-adaptative-phylo/Main.hs
View file @
70a2339e
...
@@ -167,7 +167,7 @@ main = do
...
@@ -167,7 +167,7 @@ main = do
printIOMsg
"End of reconstruction, start the export"
printIOMsg
"End of reconstruction, start the export"
let
dot
=
toPhyloExport
phylo
let
dot
=
toPhyloExport
phylo
let
output
=
(
outputPath
config
)
let
output
=
(
outputPath
config
)
<>
(
unpack
$
phyloName
config
)
<>
(
unpack
$
phyloName
config
)
...
...
src/Gargantext/Viz/AdaptativePhylo.hs
View file @
70a2339e
...
@@ -57,22 +57,39 @@ data CorpusParser =
...
@@ -57,22 +57,39 @@ data CorpusParser =
|
Csv
{
_csv_limit
::
Int
}
|
Csv
{
_csv_limit
::
Int
}
deriving
(
Show
,
Generic
,
Eq
)
deriving
(
Show
,
Generic
,
Eq
)
data
SeaElevation
=
Constante
{
_cons_start
::
Double
,
_cons_step
::
Double
}
|
Adaptative
{
_adap_granularity
::
Double
}
deriving
(
Show
,
Generic
,
Eq
)
data
Proximity
=
data
Proximity
=
WeightedLogJaccard
WeightedLogJaccard
{
_wlj_sensibility
::
Double
{
_wlj_sensibility
::
Double
,
_wlj_thresholdInit
::
Double
-- , _wlj_thresholdInit :: Double
,
_wlj_thresholdStep
::
Double
}
-- , _wlj_thresholdStep :: Double
-- | max height for sea level in temporal matching
-- , _wlj_elevation :: Double
}
|
Hamming
|
Hamming
deriving
(
Show
,
Generic
,
Eq
)
deriving
(
Show
,
Generic
,
Eq
)
data
SynchronyScope
=
SingleBranch
|
SiblingBranches
|
AllBranches
deriving
(
Show
,
Generic
,
Eq
)
data
SynchronyStrategy
=
MergeRegularGroups
|
MergeAllGroups
deriving
(
Show
,
Generic
,
Eq
)
data
Synchrony
=
data
Synchrony
=
ByProximityThreshold
ByProximityThreshold
{
_bpt_threshold
::
Double
{
_bpt_threshold
::
Double
,
_bpt_sensibility
::
Double
}
,
_bpt_sensibility
::
Double
,
_bpt_scope
::
SynchronyScope
,
_bpt_strategy
::
SynchronyStrategy
}
|
ByProximityDistribution
|
ByProximityDistribution
{
_bpd_sensibility
::
Double
}
{
_bpd_sensibility
::
Double
,
_bpd_strategy
::
SynchronyStrategy
}
deriving
(
Show
,
Generic
,
Eq
)
deriving
(
Show
,
Generic
,
Eq
)
...
@@ -84,16 +101,18 @@ data TimeUnit =
...
@@ -84,16 +101,18 @@ data TimeUnit =
deriving
(
Show
,
Generic
,
Eq
)
deriving
(
Show
,
Generic
,
Eq
)
data
C
ontextualUnit
=
data
C
lique
=
Fis
Fis
{
_fis_support
::
Int
{
_fis_support
::
Int
,
_fis_size
::
Int
}
,
_fis_size
::
Int
}
|
MaxClique
{
_mcl_size
::
Int
}
deriving
(
Show
,
Generic
,
Eq
)
deriving
(
Show
,
Generic
,
Eq
)
data
Quality
=
data
Quality
=
Quality
{
_qua_
relevance
::
Double
Quality
{
_qua_
granularity
::
Double
,
_qua_minBranch
::
Int
}
,
_qua_minBranch
::
Int
}
deriving
(
Show
,
Generic
,
Eq
)
deriving
(
Show
,
Generic
,
Eq
)
...
@@ -105,10 +124,11 @@ data Config =
...
@@ -105,10 +124,11 @@ data Config =
,
phyloName
::
Text
,
phyloName
::
Text
,
phyloLevel
::
Int
,
phyloLevel
::
Int
,
phyloProximity
::
Proximity
,
phyloProximity
::
Proximity
,
seaElevation
::
SeaElevation
,
phyloSynchrony
::
Synchrony
,
phyloSynchrony
::
Synchrony
,
phyloQuality
::
Quality
,
phyloQuality
::
Quality
,
timeUnit
::
TimeUnit
,
timeUnit
::
TimeUnit
,
c
ontextualUnit
::
ContextualUnit
,
c
lique
::
Clique
,
exportLabel
::
[
PhyloLabel
]
,
exportLabel
::
[
PhyloLabel
]
,
exportSort
::
Sort
,
exportSort
::
Sort
,
exportFilter
::
[
Filter
]
,
exportFilter
::
[
Filter
]
...
@@ -123,11 +143,12 @@ defaultConfig =
...
@@ -123,11 +143,12 @@ defaultConfig =
,
corpusParser
=
Csv
1000
,
corpusParser
=
Csv
1000
,
phyloName
=
pack
"Default Phylo"
,
phyloName
=
pack
"Default Phylo"
,
phyloLevel
=
2
,
phyloLevel
=
2
,
phyloProximity
=
WeightedLogJaccard
10
0
0.1
,
phyloProximity
=
WeightedLogJaccard
10
,
phyloSynchrony
=
ByProximityDistribution
0
,
seaElevation
=
Constante
0
0.1
,
phyloQuality
=
Quality
0.1
1
,
phyloSynchrony
=
ByProximityThreshold
0.5
10
SiblingBranches
MergeAllGroups
,
phyloQuality
=
Quality
0.6
1
,
timeUnit
=
Year
3
1
5
,
timeUnit
=
Year
3
1
5
,
c
ontextualUnit
=
Fis
1
5
,
c
lique
=
Fis
1
5
,
exportLabel
=
[
BranchLabel
MostInclusive
2
,
GroupLabel
MostEmergentInclusive
2
]
,
exportLabel
=
[
BranchLabel
MostInclusive
2
,
GroupLabel
MostEmergentInclusive
2
]
,
exportSort
=
ByHierarchy
,
exportSort
=
ByHierarchy
,
exportFilter
=
[
ByBranchSize
2
]
,
exportFilter
=
[
ByBranchSize
2
]
...
@@ -139,10 +160,12 @@ instance FromJSON CorpusParser
...
@@ -139,10 +160,12 @@ instance FromJSON CorpusParser
instance
ToJSON
CorpusParser
instance
ToJSON
CorpusParser
instance
FromJSON
Proximity
instance
FromJSON
Proximity
instance
ToJSON
Proximity
instance
ToJSON
Proximity
instance
FromJSON
SeaElevation
instance
ToJSON
SeaElevation
instance
FromJSON
TimeUnit
instance
FromJSON
TimeUnit
instance
ToJSON
TimeUnit
instance
ToJSON
TimeUnit
instance
FromJSON
C
ontextualUnit
instance
FromJSON
C
lique
instance
ToJSON
C
ontextualUnit
instance
ToJSON
C
lique
instance
FromJSON
PhyloLabel
instance
FromJSON
PhyloLabel
instance
ToJSON
PhyloLabel
instance
ToJSON
PhyloLabel
instance
FromJSON
Tagger
instance
FromJSON
Tagger
...
@@ -153,6 +176,10 @@ instance FromJSON Order
...
@@ -153,6 +176,10 @@ instance FromJSON Order
instance
ToJSON
Order
instance
ToJSON
Order
instance
FromJSON
Filter
instance
FromJSON
Filter
instance
ToJSON
Filter
instance
ToJSON
Filter
instance
FromJSON
SynchronyScope
instance
ToJSON
SynchronyScope
instance
FromJSON
SynchronyStrategy
instance
ToJSON
SynchronyStrategy
instance
FromJSON
Synchrony
instance
FromJSON
Synchrony
instance
ToJSON
Synchrony
instance
ToJSON
Synchrony
instance
FromJSON
Quality
instance
FromJSON
Quality
...
@@ -239,6 +266,8 @@ data Phylo =
...
@@ -239,6 +266,8 @@ data Phylo =
Phylo
{
_phylo_foundations
::
PhyloFoundations
Phylo
{
_phylo_foundations
::
PhyloFoundations
,
_phylo_timeCooc
::
!
(
Map
Date
Cooc
)
,
_phylo_timeCooc
::
!
(
Map
Date
Cooc
)
,
_phylo_timeDocs
::
!
(
Map
Date
Double
)
,
_phylo_timeDocs
::
!
(
Map
Date
Double
)
,
_phylo_termFreq
::
!
(
Map
Int
Double
)
,
_phylo_groupsProxi
::
!
(
Map
(
PhyloGroupId
,
PhyloGroupId
)
Double
)
,
_phylo_param
::
PhyloParam
,
_phylo_param
::
PhyloParam
,
_phylo_periods
::
Map
PhyloPeriodId
PhyloPeriod
,
_phylo_periods
::
Map
PhyloPeriodId
PhyloPeriod
}
}
...
@@ -310,21 +339,17 @@ data Filiation = ToParents | ToChilds deriving (Generic, Show)
...
@@ -310,21 +339,17 @@ data Filiation = ToParents | ToChilds deriving (Generic, Show)
data
PointerType
=
TemporalPointer
|
LevelPointer
deriving
(
Generic
,
Show
)
data
PointerType
=
TemporalPointer
|
LevelPointer
deriving
(
Generic
,
Show
)
---------------------------
----------------------
-- | Frequent Item Set | --
-- | Phylo Clique | --
---------------------------
----------------------
-- | Clique : Set of ngrams cooccurring in the same Document
type
Clique
=
Set
Ngrams
-- | Support : Number of Documents where a Clique occurs
-- | Support : Number of Documents where a Clique occurs
type
Support
=
Int
type
Support
=
Int
-- | Fis : Frequent Items Set (ie: the association between a Clique and a Support)
data
PhyloClique
=
PhyloClique
data
PhyloFis
=
PhyloFis
{
_phyloClique_nodes
::
Set
Ngrams
{
_phyloFis_clique
::
Clique
,
_phyloClique_support
::
Support
,
_phyloFis_support
::
Support
,
_phyloClique_period
::
(
Date
,
Date
)
,
_phyloFis_period
::
(
Date
,
Date
)
}
deriving
(
Generic
,
NFData
,
Show
,
Eq
)
}
deriving
(
Generic
,
NFData
,
Show
,
Eq
)
...
@@ -356,9 +381,15 @@ data PhyloLabel =
...
@@ -356,9 +381,15 @@ data PhyloLabel =
data
PhyloBranch
=
data
PhyloBranch
=
PhyloBranch
PhyloBranch
{
_branch_id
::
PhyloBranchId
{
_branch_id
::
PhyloBranchId
,
_branch_label
::
Text
,
_branch_canonId
::
[
Int
]
,
_branch_meta
::
Map
Text
[
Double
]
,
_branch_seaLevel
::
[
Double
]
}
deriving
(
Generic
,
Show
)
,
_branch_x
::
Double
,
_branch_y
::
Double
,
_branch_w
::
Double
,
_branch_t
::
Double
,
_branch_label
::
Text
,
_branch_meta
::
Map
Text
[
Double
]
}
deriving
(
Generic
,
Show
,
Eq
)
data
PhyloExport
=
data
PhyloExport
=
PhyloExport
PhyloExport
...
@@ -372,12 +403,13 @@ data PhyloExport =
...
@@ -372,12 +403,13 @@ data PhyloExport =
makeLenses
''
C
onfig
makeLenses
''
C
onfig
makeLenses
''
P
roximity
makeLenses
''
P
roximity
makeLenses
''
S
eaElevation
makeLenses
''
Q
uality
makeLenses
''
Q
uality
makeLenses
''
C
ontextualUnit
makeLenses
''
C
lique
makeLenses
''
P
hyloLabel
makeLenses
''
P
hyloLabel
makeLenses
''
T
imeUnit
makeLenses
''
T
imeUnit
makeLenses
''
P
hyloFoundations
makeLenses
''
P
hyloFoundations
makeLenses
''
P
hylo
Fis
makeLenses
''
P
hylo
Clique
makeLenses
''
P
hylo
makeLenses
''
P
hylo
makeLenses
''
P
hyloPeriod
makeLenses
''
P
hyloPeriod
makeLenses
''
P
hyloLevel
makeLenses
''
P
hyloLevel
...
...
src/Gargantext/Viz/Phylo/PhyloExample.hs
View file @
70a2339e
...
@@ -30,7 +30,7 @@ import Gargantext.Viz.AdaptativePhylo
...
@@ -30,7 +30,7 @@ import Gargantext.Viz.AdaptativePhylo
import
Gargantext.Viz.Phylo.PhyloTools
import
Gargantext.Viz.Phylo.PhyloTools
import
Gargantext.Viz.Phylo.PhyloMaker
import
Gargantext.Viz.Phylo.PhyloMaker
import
Gargantext.Viz.Phylo.PhyloExport
import
Gargantext.Viz.Phylo.PhyloExport
import
Gargantext.Viz.Phylo.TemporalMatching
(
t
emporalMatching
)
import
Gargantext.Viz.Phylo.TemporalMatching
(
adaptativeTemporalMatching
,
constanteT
emporalMatching
)
import
Gargantext.Viz.Phylo.SynchronicClustering
(
synchronicClustering
)
import
Gargantext.Viz.Phylo.SynchronicClustering
(
synchronicClustering
)
import
Control.Lens
import
Control.Lens
...
@@ -38,6 +38,9 @@ import Data.GraphViz.Types.Generalised (DotGraph)
...
@@ -38,6 +38,9 @@ import Data.GraphViz.Types.Generalised (DotGraph)
import
qualified
Data.Vector
as
Vector
import
qualified
Data.Vector
as
Vector
---------------------------------
-- | STEP 5 | -- Export the phylo
---------------------------------
phyloExport
::
IO
()
phyloExport
::
IO
()
phyloExport
=
dotToFile
"/home/qlobbe/data/phylo/output/cesar_cleopatre_V2.dot"
phyloDot
phyloExport
=
dotToFile
"/home/qlobbe/data/phylo/output/cesar_cleopatre_V2.dot"
phyloDot
...
@@ -45,6 +48,10 @@ phyloExport = dotToFile "/home/qlobbe/data/phylo/output/cesar_cleopatre_V2.dot"
...
@@ -45,6 +48,10 @@ phyloExport = dotToFile "/home/qlobbe/data/phylo/output/cesar_cleopatre_V2.dot"
phyloDot
::
DotGraph
DotId
phyloDot
::
DotGraph
DotId
phyloDot
=
toPhyloExport
phylo2
phyloDot
=
toPhyloExport
phylo2
--------------------------------------------------
-- | STEP 4 | -- Process the synchronic clustering
--------------------------------------------------
phylo2
::
Phylo
phylo2
::
Phylo
phylo2
=
synchronicClustering
phylo1
phylo2
=
synchronicClustering
phylo1
...
@@ -53,17 +60,22 @@ phylo2 = synchronicClustering phylo1
...
@@ -53,17 +60,22 @@ phylo2 = synchronicClustering phylo1
-----------------------------------------------
-----------------------------------------------
phylo1
::
Phylo
phylo1
::
Phylo
phylo1
=
temporalMatching
phylo1
=
case
(
getSeaElevation
phyloBase
)
of
$
appendGroups
fisToGroup
1
phyloFis
phyloBase
Constante
s
g
->
constanteTemporalMatching
s
g
$
toGroupsProxi
1
$
appendGroups
cliqueToGroup
1
phyloClique
phyloBase
Adaptative
s
->
adaptativeTemporalMatching
s
$
toGroupsProxi
1
$
appendGroups
cliqueToGroup
1
phyloClique
phyloBase
---------------------------------------------
---------------------------------------------
-- | STEP 2 | -- Build the
frequent items set
-- | STEP 2 | -- Build the
cliques
---------------------------------------------
---------------------------------------------
phylo
Fis
::
Map
(
Date
,
Date
)
[
PhyloFis
]
phylo
Clique
::
Map
(
Date
,
Date
)
[
PhyloClique
]
phylo
Fis
=
toPhyloFis
docsByPeriods
(
getFisSupport
$
contextualUnit
config
)
(
getFisSize
$
contextualUnit
config
)
phylo
Clique
=
toPhyloClique
phyloBase
docsByPeriods
docsByPeriods
::
Map
(
Date
,
Date
)
[
Document
]
docsByPeriods
::
Map
(
Date
,
Date
)
[
Document
]
...
@@ -96,7 +108,7 @@ config =
...
@@ -96,7 +108,7 @@ config =
defaultConfig
{
phyloName
=
"Cesar et Cleopatre"
defaultConfig
{
phyloName
=
"Cesar et Cleopatre"
,
phyloLevel
=
2
,
phyloLevel
=
2
,
exportFilter
=
[
ByBranchSize
0
]
,
exportFilter
=
[
ByBranchSize
0
]
,
c
ontextualUnit
=
Fis
0
0
}
,
c
lique
=
Fis
0
0
}
docs
::
[
Document
]
docs
::
[
Document
]
...
...
src/Gargantext/Viz/Phylo/PhyloExport.hs
View file @
70a2339e
...
@@ -18,7 +18,7 @@ Portability : POSIX
...
@@ -18,7 +18,7 @@ Portability : POSIX
module
Gargantext.Viz.Phylo.PhyloExport
where
module
Gargantext.Viz.Phylo.PhyloExport
where
import
Data.Map
(
Map
,
fromList
,
empty
,
fromListWith
,
insert
,
(
!
),
elems
,
unionWith
,
findWithDefault
,
toList
)
import
Data.Map
(
Map
,
fromList
,
empty
,
fromListWith
,
insert
,
(
!
),
elems
,
unionWith
,
findWithDefault
,
toList
)
import
Data.List
((
++
),
sort
,
nub
,
concat
,
sortOn
,
reverse
,
groupBy
,
union
,
(
\\
),
(
!!
),
init
,
partition
,
unwords
,
nubBy
)
import
Data.List
((
++
),
sort
,
nub
,
concat
,
sortOn
,
reverse
,
groupBy
,
union
,
(
\\
),
(
!!
),
init
,
partition
,
unwords
,
nubBy
,
inits
,
elemIndex
)
import
Data.Vector
(
Vector
)
import
Data.Vector
(
Vector
)
import
Prelude
(
writeFile
)
import
Prelude
(
writeFile
)
...
@@ -36,6 +36,7 @@ import System.FilePath
...
@@ -36,6 +36,7 @@ import System.FilePath
import
Debug.Trace
(
trace
)
import
Debug.Trace
(
trace
)
import
qualified
Data.Text
as
Text
import
qualified
Data.Text
as
Text
import
qualified
Data.Vector
as
Vector
import
qualified
Data.Text.Lazy
as
Lazy
import
qualified
Data.Text.Lazy
as
Lazy
import
qualified
Data.GraphViz.Attributes.HTML
as
H
import
qualified
Data.GraphViz.Attributes.HTML
as
H
...
@@ -105,44 +106,51 @@ groupToTable fdt g = H.Table H.HTable
...
@@ -105,44 +106,51 @@ groupToTable fdt g = H.Table H.HTable
<>
(
pack
$
show
(
fst
$
g
^.
phylo_groupPeriod
))
<>
(
pack
$
show
(
fst
$
g
^.
phylo_groupPeriod
))
<>
(
fromStrict
" , "
)
<>
(
fromStrict
" , "
)
<>
(
pack
$
show
(
snd
$
g
^.
phylo_groupPeriod
))
<>
(
pack
$
show
(
snd
$
g
^.
phylo_groupPeriod
))
<>
(
fromStrict
" ) "
))]]
<>
(
fromStrict
" ) "
)
<>
(
pack
$
show
(
getGroupId
g
)))]]
--------------------------------------
--------------------------------------
branchToDotNode
::
PhyloBranch
->
Dot
DotId
branchToDotNode
::
PhyloBranch
->
Int
->
Dot
DotId
branchToDotNode
b
=
branchToDotNode
b
bId
=
node
(
branchIdToDotId
$
b
^.
branch_id
)
node
(
branchIdToDotId
$
b
^.
branch_id
)
([
FillColor
[
toWColor
CornSilk
],
FontName
"Arial"
,
FontSize
40
,
Shape
Egg
,
Style
[
SItem
Bold
[]
],
Label
(
toDotLabel
$
b
^.
branch_label
)]
([
FillColor
[
toWColor
CornSilk
],
FontName
"Arial"
,
FontSize
40
,
Shape
Egg
,
Style
[
SItem
Bold
[]
],
Label
(
toDotLabel
$
b
^.
branch_label
)]
<>
(
metaToAttr
$
b
^.
branch_meta
)
<>
(
metaToAttr
$
b
^.
branch_meta
)
<>
[
toAttr
"nodeType"
"branch"
<>
[
toAttr
"nodeType"
"branch"
,
toAttr
"branchId"
(
pack
$
unwords
(
map
show
$
snd
$
b
^.
branch_id
))
])
,
toAttr
"bId"
(
pack
$
show
bId
)
,
toAttr
"branchId"
(
pack
$
unwords
(
map
show
$
snd
$
b
^.
branch_id
))
,
toAttr
"branch_x"
(
fromStrict
$
Text
.
pack
$
(
show
$
b
^.
branch_x
))
,
toAttr
"branch_y"
(
fromStrict
$
Text
.
pack
$
(
show
$
b
^.
branch_y
))
,
toAttr
"label"
(
pack
$
show
$
b
^.
branch_label
)
])
periodToDotNode
::
(
Date
,
Date
)
->
Dot
DotId
periodToDotNode
::
(
Date
,
Date
)
->
Dot
DotId
periodToDotNode
prd
=
periodToDotNode
prd
=
node
(
periodIdToDotId
prd
)
node
(
periodIdToDotId
prd
)
([
Shape
Squar
e
,
FontSize
50
,
Label
(
toDotLabel
$
Text
.
pack
(
show
(
fst
prd
)
<>
" "
<>
show
(
snd
prd
)))]
([
Shape
BoxShap
e
,
FontSize
50
,
Label
(
toDotLabel
$
Text
.
pack
(
show
(
fst
prd
)
<>
" "
<>
show
(
snd
prd
)))]
<>
[
toAttr
"nodeType"
"period"
<>
[
toAttr
"nodeType"
"period"
,
toAttr
"from"
(
fromStrict
$
Text
.
pack
$
(
show
$
fst
prd
))
,
toAttr
"from"
(
fromStrict
$
Text
.
pack
$
(
show
$
fst
prd
))
,
toAttr
"to"
(
fromStrict
$
Text
.
pack
$
(
show
$
snd
prd
))])
,
toAttr
"to"
(
fromStrict
$
Text
.
pack
$
(
show
$
snd
prd
))])
groupToDotNode
::
Vector
Ngrams
->
PhyloGroup
->
Dot
DotId
groupToDotNode
::
Vector
Ngrams
->
PhyloGroup
->
Int
->
Dot
DotId
groupToDotNode
fdt
g
=
groupToDotNode
fdt
g
bId
=
node
(
groupIdToDotId
$
getGroupId
g
)
node
(
groupIdToDotId
$
getGroupId
g
)
([
FontName
"Arial"
,
Shape
Square
,
toLabel
(
groupToTable
fdt
g
)]
([
FontName
"Arial"
,
Shape
Square
,
penWidth
4
,
toLabel
(
groupToTable
fdt
g
)]
<>
[
toAttr
"nodeType"
"group"
<>
[
toAttr
"nodeType"
"group"
,
toAttr
"from"
(
pack
$
show
(
fst
$
g
^.
phylo_groupPeriod
))
,
toAttr
"from"
(
pack
$
show
(
fst
$
g
^.
phylo_groupPeriod
))
,
toAttr
"to"
(
pack
$
show
(
snd
$
g
^.
phylo_groupPeriod
))
,
toAttr
"to"
(
pack
$
show
(
snd
$
g
^.
phylo_groupPeriod
))
,
toAttr
"branchId"
(
pack
$
unwords
(
init
$
map
show
$
snd
$
g
^.
phylo_groupBranchId
))
,
toAttr
"branchId"
(
pack
$
unwords
(
init
$
map
show
$
snd
$
g
^.
phylo_groupBranchId
))
,
toAttr
"bId"
(
pack
$
show
bId
)
,
toAttr
"support"
(
pack
$
show
(
g
^.
phylo_groupSupport
))])
,
toAttr
"support"
(
pack
$
show
(
g
^.
phylo_groupSupport
))])
toDotEdge
::
DotId
->
DotId
->
Text
.
Text
->
EdgeType
->
Dot
DotId
toDotEdge
::
DotId
->
DotId
->
Text
.
Text
->
EdgeType
->
Dot
DotId
toDotEdge
source
target
lbl
edgeType
=
edge
source
target
toDotEdge
source
target
lbl
edgeType
=
edge
source
target
(
case
edgeType
of
(
case
edgeType
of
GroupToGroup
->
[
Width
10
,
Color
[
toWColor
Black
],
Constraint
True
GroupToGroup
->
[
Width
3
,
penWidth
4
,
Color
[
toWColor
Black
],
Constraint
True
,
Label
(
StrLabel
$
fromStrict
lbl
)]
,
Label
(
StrLabel
$
fromStrict
lbl
)]
<>
[
toAttr
"edgeType"
"link"
]
BranchToGroup
->
[
Width
3
,
Color
[
toWColor
Black
],
ArrowHead
(
AType
[(
ArrMod
FilledArrow
RightSide
,
DotArrow
)])
BranchToGroup
->
[
Width
3
,
Color
[
toWColor
Black
],
ArrowHead
(
AType
[(
ArrMod
FilledArrow
RightSide
,
DotArrow
)])
,
Label
(
StrLabel
$
fromStrict
lbl
)]
,
Label
(
StrLabel
$
fromStrict
lbl
)]
<>
[
toAttr
"edgeType"
"branchLink"
]
BranchToBranch
->
[
Width
2
,
Color
[
toWColor
Black
],
Style
[
SItem
Dashed
[]
],
ArrowHead
(
AType
[(
ArrMod
FilledArrow
BothSides
,
DotArrow
)])
BranchToBranch
->
[
Width
2
,
Color
[
toWColor
Black
],
Style
[
SItem
Dashed
[]
],
ArrowHead
(
AType
[(
ArrMod
FilledArrow
BothSides
,
DotArrow
)])
,
Label
(
StrLabel
$
fromStrict
lbl
)]
,
Label
(
StrLabel
$
fromStrict
lbl
)]
PeriodToPeriod
->
[
Width
5
,
Color
[
toWColor
Black
]])
PeriodToPeriod
->
[
Width
5
,
Color
[
toWColor
Black
]])
...
@@ -155,10 +163,17 @@ mergePointers groups =
...
@@ -155,10 +163,17 @@ mergePointers groups =
in
unionWith
(
\
w
w'
->
max
w
w'
)
toChilds
toParents
in
unionWith
(
\
w
w'
->
max
w
w'
)
toChilds
toParents
toBid
::
PhyloGroup
->
[
PhyloBranch
]
->
Int
toBid
g
bs
=
let
b'
=
head'
"toBid"
(
filter
(
\
b
->
b
^.
branch_id
==
g
^.
phylo_groupBranchId
)
bs
)
in
fromJust
$
elemIndex
b'
bs
exportToDot
::
Phylo
->
PhyloExport
->
DotGraph
DotId
exportToDot
::
Phylo
->
PhyloExport
->
DotGraph
DotId
exportToDot
phylo
export
=
exportToDot
phylo
export
=
trace
(
"
\n
-- | Convert "
<>
show
(
length
$
export
^.
export_branches
)
<>
" branches and "
trace
(
"
\n
-- | Convert "
<>
show
(
length
$
export
^.
export_branches
)
<>
" branches and "
<>
show
(
length
$
export
^.
export_groups
)
<>
" groups to a dot file
\n
"
)
$
<>
show
(
length
$
export
^.
export_groups
)
<>
" groups "
<>
show
(
length
$
nub
$
concat
$
map
(
\
g
->
g
^.
phylo_groupNgrams
)
$
export
^.
export_groups
)
<>
" terms to a dot file
\n\n
"
<>
"##########################"
)
$
digraph
((
Str
.
fromStrict
)
$
(
phyloName
$
getConfig
phylo
))
$
do
digraph
((
Str
.
fromStrict
)
$
(
phyloName
$
getConfig
phylo
))
$
do
-- | 1) init the dot graph
-- | 1) init the dot graph
...
@@ -167,11 +182,12 @@ exportToDot phylo export =
...
@@ -167,11 +182,12 @@ exportToDot phylo export =
,
Ratio
FillRatio
,
Ratio
FillRatio
,
Style
[
SItem
Filled
[]
],
Color
[
toWColor
White
]]
,
Style
[
SItem
Filled
[]
],
Color
[
toWColor
White
]]
-- | home made attributes
-- | home made attributes
<>
[(
toAttr
(
fromStrict
"nbDocs"
)
$
pack
$
show
(
sum
$
elems
$
phylo
^.
phylo_timeDocs
))
<>
[(
toAttr
(
fromStrict
"phyloFoundations"
)
$
pack
$
show
(
length
$
Vector
.
toList
$
getRoots
phylo
))
,(
toAttr
(
fromStrict
"proxiName"
)
$
pack
$
show
(
getProximityName
$
phyloProximity
$
getConfig
phylo
))
,(
toAttr
(
fromStrict
"phyloTerms"
)
$
pack
$
show
(
length
$
nub
$
concat
$
map
(
\
g
->
g
^.
phylo_groupNgrams
)
$
export
^.
export_groups
))
,(
toAttr
(
fromStrict
"proxiInit"
)
$
pack
$
show
(
getProximityInit
$
phyloProximity
$
getConfig
phylo
))
,(
toAttr
(
fromStrict
"phyloDocs"
)
$
pack
$
show
(
sum
$
elems
$
phylo
^.
phylo_timeDocs
))
,(
toAttr
(
fromStrict
"proxiStep"
)
$
pack
$
show
(
getProximityStep
$
phyloProximity
$
getConfig
phylo
))
,(
toAttr
(
fromStrict
"phyloPeriods"
)
$
pack
$
show
(
length
$
elems
$
phylo
^.
phylo_periods
))
,(
toAttr
(
fromStrict
"quaFactor"
)
$
pack
$
show
(
_qua_relevance
$
phyloQuality
$
getConfig
phylo
))
,(
toAttr
(
fromStrict
"phyloBranches"
)
$
pack
$
show
(
length
$
export
^.
export_branches
))
,(
toAttr
(
fromStrict
"phyloGroups"
)
$
pack
$
show
(
length
$
export
^.
export_groups
))
])
])
...
@@ -191,7 +207,7 @@ exportToDot phylo export =
...
@@ -191,7 +207,7 @@ exportToDot phylo export =
-- mapM branchToDotNode branches
-- mapM branchToDotNode branches
-- ) $ elems $ fromListWith (++) $ map (\b -> ((init . snd) $ b ^. branch_id,[b])) $ export ^. export_branches
-- ) $ elems $ fromListWith (++) $ map (\b -> ((init . snd) $ b ^. branch_id,[b])) $ export ^. export_branches
mapM
branchToDotNode
$
export
^.
export_branches
mapM
(
\
b
->
branchToDotNode
b
(
fromJust
$
elemIndex
b
(
export
^.
export_branches
)))
$
export
^.
export_branches
-- | 5) create a layer for each period
-- | 5) create a layer for each period
_
<-
mapM
(
\
period
->
_
<-
mapM
(
\
period
->
...
@@ -200,7 +216,7 @@ exportToDot phylo export =
...
@@ -200,7 +216,7 @@ exportToDot phylo export =
periodToDotNode
period
periodToDotNode
period
-- | 6) create a node for each group
-- | 6) create a node for each group
mapM
(
\
g
->
groupToDotNode
(
getRoots
phylo
)
g
)
(
filter
(
\
g
->
g
^.
phylo_groupPeriod
==
period
)
$
export
^.
export_groups
)
mapM
(
\
g
->
groupToDotNode
(
getRoots
phylo
)
g
(
toBid
g
(
export
^.
export_branches
))
)
(
filter
(
\
g
->
g
^.
phylo_groupPeriod
==
period
)
$
export
^.
export_groups
)
)
$
getPeriodIds
phylo
)
$
getPeriodIds
phylo
-- | 7) create the edges between a branch and its first groups
-- | 7) create the edges between a branch and its first groups
...
@@ -224,12 +240,12 @@ exportToDot phylo export =
...
@@ -224,12 +240,12 @@ exportToDot phylo export =
)
$
nubBy
(
\
combi
combi'
->
fst
combi
==
fst
combi'
)
$
listToCombi'
$
getPeriodIds
phylo
)
$
nubBy
(
\
combi
combi'
->
fst
combi
==
fst
combi'
)
$
listToCombi'
$
getPeriodIds
phylo
-- | 8) create the edges between the branches
-- | 8) create the edges between the branches
_
<-
mapM
(
\
(
bId
,
bId'
)
->
--
_ <- mapM (\(bId,bId') ->
toDotEdge
(
branchIdToDotId
bId
)
(
branchIdToDotId
bId'
)
--
toDotEdge (branchIdToDotId bId) (branchIdToDotId bId')
(
Text
.
pack
$
show
(
branchIdsToProximity
bId
bId'
--
(Text.pack $ show(branchIdsToProximity bId bId'
(
getThresholdInit
$
phyloProximity
$
getConfig
phylo
)
--
(getThresholdInit $ phyloProximity $ getConfig phylo)
(
getThresholdStep
$
phyloProximity
$
getConfig
phylo
)))
BranchToBranch
--
(getThresholdStep $ phyloProximity $ getConfig phylo))) BranchToBranch
)
$
nubBy
(
\
combi
combi'
->
fst
combi
==
fst
combi'
)
$
listToCombi'
$
map
_branch_id
$
export
^.
export_branches
--
) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ map _branch_id $ export ^. export_branches
graphAttrs
[
Rank
SameRank
]
graphAttrs
[
Rank
SameRank
]
...
@@ -261,11 +277,25 @@ processFilters filters qua export =
...
@@ -261,11 +277,25 @@ processFilters filters qua export =
-- | Sort | --
-- | Sort | --
--------------
--------------
branchToIso
::
[
PhyloBranch
]
->
[
PhyloBranch
]
branchToIso
branches
=
let
steps
=
map
sum
$
inits
$
map
(
\
(
b
,
x
)
->
b
^.
branch_y
+
0.05
-
x
)
$
zip
branches
$
([
0
]
++
(
map
(
\
(
b
,
b'
)
->
let
idx
=
length
$
commonPrefix
(
b
^.
branch_canonId
)
(
b'
^.
branch_canonId
)
[]
in
(
b'
^.
branch_seaLevel
)
!!
(
idx
-
1
)
)
$
listToSeq
branches
))
in
map
(
\
(
x
,
b
)
->
b
&
branch_x
.~
x
)
$
zip
steps
branches
sortByHierarchy
::
Int
->
[
PhyloBranch
]
->
[
PhyloBranch
]
sortByHierarchy
::
Int
->
[
PhyloBranch
]
->
[
PhyloBranch
]
sortByHierarchy
depth
branches
=
sortByHierarchy
depth
branches
=
if
(
length
branches
==
1
)
if
(
length
branches
==
1
)
then
branches
then
branch
ToIso
branch
es
else
concat
else
branchToIso
$
concat
$
map
(
\
branches'
->
$
map
(
\
branches'
->
let
partitions
=
partition
(
\
b
->
depth
+
1
==
((
length
.
snd
)
$
b
^.
branch_id
))
branches'
let
partitions
=
partition
(
\
b
->
depth
+
1
==
((
length
.
snd
)
$
b
^.
branch_id
))
branches'
in
(
sortOn
(
\
b
->
(
b
^.
branch_meta
)
!
"birth"
)
(
fst
partitions
))
in
(
sortOn
(
\
b
->
(
b
^.
branch_meta
)
!
"birth"
)
(
fst
partitions
))
...
@@ -338,11 +368,12 @@ branchDating export =
...
@@ -338,11 +368,12 @@ branchDating export =
$
foldl'
(
\
acc
g
->
if
(
g
^.
phylo_groupBranchId
==
b
^.
branch_id
)
$
foldl'
(
\
acc
g
->
if
(
g
^.
phylo_groupBranchId
==
b
^.
branch_id
)
then
acc
++
[
g
^.
phylo_groupPeriod
]
then
acc
++
[
g
^.
phylo_groupPeriod
]
else
acc
)
[]
$
export
^.
export_groups
else
acc
)
[]
$
export
^.
export_groups
periods
=
nub
groups
birth
=
fst
$
head'
"birth"
groups
birth
=
fst
$
head'
"birth"
groups
age
=
(
snd
$
last'
"age"
groups
)
-
birth
age
=
(
snd
$
last'
"age"
groups
)
-
birth
in
b
&
branch_meta
%~
insert
"birth"
[
fromIntegral
birth
]
in
b
&
branch_meta
%~
insert
"birth"
[
fromIntegral
birth
]
&
branch_meta
%~
insert
"age"
[
fromIntegral
age
]
&
branch_meta
%~
insert
"age"
[
fromIntegral
age
]
&
branch_meta
%~
insert
"size"
[
fromIntegral
$
length
group
s
]
)
export
&
branch_meta
%~
insert
"size"
[
fromIntegral
$
length
period
s
]
)
export
processMetrics
::
PhyloExport
->
PhyloExport
processMetrics
::
PhyloExport
->
PhyloExport
processMetrics
export
=
ngramsMetrics
processMetrics
export
=
ngramsMetrics
...
@@ -409,8 +440,8 @@ processLabels labels foundations export =
...
@@ -409,8 +440,8 @@ processLabels labels foundations export =
toDynamics
::
Int
->
[
PhyloGroup
]
->
PhyloGroup
->
Map
Int
(
Date
,
Date
)
->
Double
toDynamics
::
Int
->
[
PhyloGroup
]
->
PhyloGroup
->
Map
Int
(
Date
,
Date
)
->
Double
toDynamics
n
parents
g
roup
m
=
toDynamics
n
parents
g
m
=
let
prd
=
g
roup
^.
phylo_groupPeriod
let
prd
=
g
^.
phylo_groupPeriod
end
=
last'
"dynamics"
(
sort
$
map
snd
$
elems
m
)
end
=
last'
"dynamics"
(
sort
$
map
snd
$
elems
m
)
in
if
(((
snd
prd
)
==
(
snd
$
m
!
n
))
&&
(
snd
prd
/=
end
))
in
if
(((
snd
prd
)
==
(
snd
$
m
!
n
))
&&
(
snd
prd
/=
end
))
-- | decrease
-- | decrease
...
@@ -429,7 +460,7 @@ toDynamics n parents group m =
...
@@ -429,7 +460,7 @@ toDynamics n parents group m =
processDynamics
::
[
PhyloGroup
]
->
[
PhyloGroup
]
processDynamics
::
[
PhyloGroup
]
->
[
PhyloGroup
]
processDynamics
groups
=
processDynamics
groups
=
map
(
\
g
->
map
(
\
g
->
let
parents
=
filter
(
\
g'
->
(
g
^.
phylo_groupBranchId
==
g'
^.
phylo_groupBranchId
)
let
parents
=
filter
(
\
g'
->
(
g
^.
phylo_groupBranchId
==
g'
^.
phylo_groupBranchId
)
&&
((
fst
$
g
^.
phylo_groupPeriod
)
>
(
fst
$
g'
^.
phylo_groupPeriod
)))
groups
&&
((
fst
$
g
^.
phylo_groupPeriod
)
>
(
fst
$
g'
^.
phylo_groupPeriod
)))
groups
...
@@ -449,7 +480,6 @@ processDynamics groups =
...
@@ -449,7 +480,6 @@ processDynamics groups =
-- | phyloExport | --
-- | phyloExport | --
---------------------
---------------------
toPhyloExport
::
Phylo
->
DotGraph
DotId
toPhyloExport
::
Phylo
->
DotGraph
DotId
toPhyloExport
phylo
=
exportToDot
phylo
toPhyloExport
phylo
=
exportToDot
phylo
$
processFilters
(
exportFilter
$
getConfig
phylo
)
(
phyloQuality
$
getConfig
phylo
)
$
processFilters
(
exportFilter
$
getConfig
phylo
)
(
phyloQuality
$
getConfig
phylo
)
...
@@ -458,15 +488,47 @@ toPhyloExport phylo = exportToDot phylo
...
@@ -458,15 +488,47 @@ toPhyloExport phylo = exportToDot phylo
$
processMetrics
export
$
processMetrics
export
where
where
export
::
PhyloExport
export
::
PhyloExport
export
=
PhyloExport
groups
branches
export
=
PhyloExport
groups
branches
--------------------------------------
--------------------------------------
branches
::
[
PhyloBranch
]
branches
::
[
PhyloBranch
]
branches
=
traceExportBranches
$
map
(
\
bId
->
PhyloBranch
bId
""
empty
)
$
nub
$
map
_phylo_groupBranchId
groups
branches
=
map
(
\
g
->
let
seaLvl
=
(
g
^.
phylo_groupMeta
)
!
"seaLevels"
breaks
=
(
g
^.
phylo_groupMeta
)
!
"breaks"
canonId
=
take
(
round
$
(
last'
"export"
breaks
)
+
2
)
(
snd
$
g
^.
phylo_groupBranchId
)
in
PhyloBranch
(
g
^.
phylo_groupBranchId
)
canonId
seaLvl
0
(
last'
"export"
(
take
(
round
$
(
last'
"export"
breaks
)
+
1
)
seaLvl
))
0
0
""
empty
)
$
map
(
\
gs
->
head'
"export"
gs
)
$
groupBy
(
\
g
g'
->
g
^.
phylo_groupBranchId
==
g'
^.
phylo_groupBranchId
)
$
sortOn
(
\
g
->
g
^.
phylo_groupBranchId
)
groups
--------------------------------------
--------------------------------------
groups
::
[
PhyloGroup
]
groups
::
[
PhyloGroup
]
groups
=
processDynamics
groups
=
traceExportGroups
$
getGroupsFromLevel
(
phyloLevel
$
getConfig
phylo
)
phylo
$
processDynamics
$
getGroupsFromLevel
(
phyloLevel
$
getConfig
phylo
)
$
tracePhyloInfo
phylo
traceExportBranches
::
[
PhyloBranch
]
->
[
PhyloBranch
]
traceExportBranches
::
[
PhyloBranch
]
->
[
PhyloBranch
]
traceExportBranches
branches
=
trace
(
"
\n
"
<>
"-- | Export "
<>
show
(
length
branches
)
<>
" branches"
)
branches
traceExportBranches
branches
=
trace
(
"
\n
"
<>
"-- | Export "
<>
show
(
length
branches
)
<>
" branches"
)
branches
tracePhyloInfo
::
Phylo
->
Phylo
tracePhyloInfo
phylo
=
trace
(
"
\n
"
<>
"##########################"
<>
"
\n\n
"
<>
"-- | Phylo with β = "
<>
show
(
_qua_granularity
$
phyloQuality
$
getConfig
phylo
)
<>
" applied to "
<>
show
(
length
$
Vector
.
toList
$
getRoots
phylo
)
<>
" foundations"
)
phylo
traceExportGroups
::
[
PhyloGroup
]
->
[
PhyloGroup
]
traceExportGroups
groups
=
trace
(
"
\n
"
<>
"-- | Export "
<>
show
(
length
$
nub
$
map
(
\
g
->
g
^.
phylo_groupBranchId
)
groups
)
<>
" branches, "
<>
show
(
length
groups
)
<>
" groups and "
<>
show
(
length
$
nub
$
concat
$
map
(
\
g
->
g
^.
phylo_groupNgrams
)
groups
)
<>
" terms"
)
groups
src/Gargantext/Viz/Phylo/PhyloMaker.hs
View file @
70a2339e
...
@@ -15,15 +15,15 @@ Portability : POSIX
...
@@ -15,15 +15,15 @@ Portability : POSIX
module
Gargantext.Viz.Phylo.PhyloMaker
where
module
Gargantext.Viz.Phylo.PhyloMaker
where
import
Data.List
(
concat
,
nub
,
partition
,
sort
,
(
++
))
import
Data.List
(
concat
,
nub
,
partition
,
sort
,
(
++
)
,
group
,
intersect
,
null
,
sortOn
,
groupBy
)
import
Data.Map
(
Map
,
fromListWith
,
keys
,
unionWith
,
fromList
,
empty
,
toList
,
elems
,
(
!
),
restrictKeys
)
import
Data.Map
(
Map
,
fromListWith
,
keys
,
unionWith
,
fromList
,
empty
,
toList
,
elems
,
(
!
),
restrictKeys
,
foldlWithKey
)
import
Data.Set
(
size
)
import
Data.Set
(
size
)
import
Data.Vector
(
Vector
)
import
Data.Vector
(
Vector
)
import
Gargantext.Prelude
import
Gargantext.Prelude
import
Gargantext.Viz.AdaptativePhylo
import
Gargantext.Viz.AdaptativePhylo
import
Gargantext.Viz.Phylo.PhyloTools
import
Gargantext.Viz.Phylo.PhyloTools
import
Gargantext.Viz.Phylo.TemporalMatching
(
temporalMatching
)
import
Gargantext.Viz.Phylo.TemporalMatching
(
adaptativeTemporalMatching
,
constanteTemporalMatching
,
getNextPeriods
,
filterDocs
,
filterDiago
,
reduceDiagos
,
toProximity
)
import
Gargantext.Viz.Phylo.SynchronicClustering
(
synchronicClustering
)
import
Gargantext.Viz.Phylo.SynchronicClustering
(
synchronicClustering
)
import
Gargantext.Text.Context
(
TermList
)
import
Gargantext.Text.Context
(
TermList
)
import
Gargantext.Text.Metrics.FrequentItemSet
(
fisWithSizePolyMap
,
Size
(
..
))
import
Gargantext.Text.Metrics.FrequentItemSet
(
fisWithSizePolyMap
,
Size
(
..
))
...
@@ -43,7 +43,8 @@ import qualified Data.Set as Set
...
@@ -43,7 +43,8 @@ import qualified Data.Set as Set
toPhylo
::
[
Document
]
->
TermList
->
Config
->
Phylo
toPhylo
::
[
Document
]
->
TermList
->
Config
->
Phylo
toPhylo
docs
lst
conf
=
traceToPhylo
(
phyloLevel
conf
)
$
toPhylo
docs
lst
conf
=
trace
(
"# phylo1 groups "
<>
show
(
length
$
getGroupsFromLevel
1
phylo1
))
$
traceToPhylo
(
phyloLevel
conf
)
$
if
(
phyloLevel
conf
)
>
1
if
(
phyloLevel
conf
)
>
1
then
foldl'
(
\
phylo'
_
->
synchronicClustering
phylo'
)
phylo1
[
2
..
(
phyloLevel
conf
)]
then
foldl'
(
\
phylo'
_
->
synchronicClustering
phylo'
)
phylo1
[
2
..
(
phyloLevel
conf
)]
else
phylo1
else
phylo1
...
@@ -62,6 +63,33 @@ toPhylo docs lst conf = traceToPhylo (phyloLevel conf) $
...
@@ -62,6 +63,33 @@ toPhylo docs lst conf = traceToPhylo (phyloLevel conf) $
-- | To Phylo 1 | --
-- | To Phylo 1 | --
--------------------
--------------------
toGroupsProxi
::
Level
->
Phylo
->
Phylo
toGroupsProxi
lvl
phylo
=
let
proximity
=
phyloProximity
$
getConfig
phylo
groupsProxi
=
foldlWithKey
(
\
acc
pId
pds
->
-- 1) process period by period
let
egos
=
map
(
\
g
->
(
getGroupId
g
,
g
^.
phylo_groupNgrams
))
$
elems
$
view
(
phylo_periodLevels
.
traverse
.
filtered
(
\
phyloLvl
->
phyloLvl
^.
phylo_levelLevel
==
lvl
)
.
phylo_levelGroups
)
pds
next
=
getNextPeriods
ToParents
(
getTimeFrame
$
timeUnit
$
getConfig
phylo
)
pId
(
keys
$
phylo
^.
phylo_periods
)
targets
=
map
(
\
g
->
(
getGroupId
g
,
g
^.
phylo_groupNgrams
))
$
getGroupsFromLevelPeriods
lvl
next
phylo
docs
=
filterDocs
(
phylo
^.
phylo_timeDocs
)
([
pId
]
++
next
)
diagos
=
filterDiago
(
phylo
^.
phylo_timeCooc
)
([
pId
]
++
next
)
-- 2) compute the pairs in parallel
pairs
=
map
(
\
(
id
,
ngrams
)
->
map
(
\
(
id'
,
ngrams'
)
->
let
nbDocs
=
(
sum
.
elems
)
$
filterDocs
docs
([
idToPrd
id
,
idToPrd
id'
])
diago
=
reduceDiagos
$
filterDiago
diagos
([
idToPrd
id
,
idToPrd
id'
])
in
((
id
,
id'
),
toProximity
nbDocs
diago
proximity
ngrams
ngrams'
ngrams'
)
)
$
filter
(
\
(
_
,
ngrams'
)
->
(
not
.
null
)
$
intersect
ngrams
ngrams'
)
targets
)
egos
pairs'
=
pairs
`
using
`
parList
rdeepseq
in
acc
++
(
concat
pairs'
)
)
[]
$
phylo
^.
phylo_periods
in
phylo
&
phylo_groupsProxi
.~
((
traceGroupsProxi
.
fromList
)
groupsProxi
)
appendGroups
::
(
a
->
PhyloPeriodId
->
Level
->
Int
->
Vector
Ngrams
->
[
Cooc
]
->
PhyloGroup
)
->
Level
->
Map
(
Date
,
Date
)
[
a
]
->
Phylo
->
Phylo
appendGroups
::
(
a
->
PhyloPeriodId
->
Level
->
Int
->
Vector
Ngrams
->
[
Cooc
]
->
PhyloGroup
)
->
Level
->
Map
(
Date
,
Date
)
[
a
]
->
Phylo
->
Phylo
appendGroups
f
lvl
m
phylo
=
trace
(
"
\n
"
<>
"-- | Append "
<>
show
(
length
$
concat
$
elems
m
)
<>
" groups to Level "
<>
show
(
lvl
)
<>
"
\n
"
)
appendGroups
f
lvl
m
phylo
=
trace
(
"
\n
"
<>
"-- | Append "
<>
show
(
length
$
concat
$
elems
m
)
<>
" groups to Level "
<>
show
(
lvl
)
<>
"
\n
"
)
...
@@ -72,40 +100,44 @@ appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ co
...
@@ -72,40 +100,44 @@ appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ co
(
\
phyloLvl
->
if
lvl
==
(
phyloLvl
^.
phylo_levelLevel
)
(
\
phyloLvl
->
if
lvl
==
(
phyloLvl
^.
phylo_levelLevel
)
then
then
let
pId
=
phyloLvl
^.
phylo_levelPeriod
let
pId
=
phyloLvl
^.
phylo_levelPeriod
phylo
Fis
=
m
!
pId
phylo
CUnit
=
m
!
pId
in
phyloLvl
in
phyloLvl
&
phylo_levelGroups
.~
(
fromList
$
foldl
(
\
groups
obj
->
&
phylo_levelGroups
.~
(
fromList
$
foldl
(
\
groups
obj
->
groups
++
[
(((
pId
,
lvl
),
length
groups
)
groups
++
[
(((
pId
,
lvl
),
length
groups
)
,
f
obj
pId
lvl
(
length
groups
)
(
getRoots
phylo
)
,
f
obj
pId
lvl
(
length
groups
)
(
getRoots
phylo
)
(
elems
$
restrictKeys
(
phylo
^.
phylo_timeCooc
)
$
periodsToYears
[
pId
]))
(
elems
$
restrictKeys
(
phylo
^.
phylo_timeCooc
)
$
periodsToYears
[
pId
]))
]
)
[]
phylo
Fis
)
]
)
[]
phylo
CUnit
)
else
else
phyloLvl
)
phyloLvl
)
phylo
phylo
fisToGroup
::
PhyloFis
->
PhyloPeriodId
->
Level
->
Int
->
Vector
Ngrams
->
[
Cooc
]
->
PhyloGroup
cliqueToGroup
::
PhyloClique
->
PhyloPeriodId
->
Level
->
Int
->
Vector
Ngrams
->
[
Cooc
]
->
PhyloGroup
fis
ToGroup
fis
pId
lvl
idx
fdt
coocs
=
clique
ToGroup
fis
pId
lvl
idx
fdt
coocs
=
let
ngrams
=
ngramsToIdx
(
Set
.
toList
$
fis
^.
phylo
Fis_clique
)
fdt
let
ngrams
=
ngramsToIdx
(
Set
.
toList
$
fis
^.
phylo
Clique_nodes
)
fdt
in
PhyloGroup
pId
lvl
idx
""
in
PhyloGroup
pId
lvl
idx
""
(
fis
^.
phylo
Fis
_support
)
(
fis
^.
phylo
Clique
_support
)
ngrams
ngrams
(
ngramsToCooc
ngrams
coocs
)
(
ngramsToCooc
ngrams
coocs
)
(
1
,[
0
])
(
1
,[
0
])
-- | branchid (lvl,[path in the branching tree])
empty
(
fromList
[(
"breaks"
,[
0
]),(
"seaLevels"
,[
0
])])
[]
[]
[]
[]
[]
[]
[]
[]
toPhylo1
::
[
Document
]
->
Phylo
->
Phylo
toPhylo1
::
[
Document
]
->
Phylo
->
Phylo
toPhylo1
docs
phyloBase
=
temporalMatching
toPhylo1
docs
phyloBase
=
case
(
getSeaElevation
phyloBase
)
of
$
appendGroups
fisToGroup
1
phyloFis
phyloBase
Constante
start
gap
->
constanteTemporalMatching
start
gap
$
appendGroups
cliqueToGroup
1
phyloClique
phyloBase
Adaptative
steps
->
adaptativeTemporalMatching
steps
$
toGroupsProxi
1
$
appendGroups
cliqueToGroup
1
phyloClique
phyloBase
where
where
--------------------------------------
--------------------------------------
phylo
Fis
::
Map
(
Date
,
Date
)
[
PhyloFis
]
phylo
Clique
::
Map
(
Date
,
Date
)
[
PhyloClique
]
phylo
Fis
=
toPhyloFis
docs'
(
getFisSupport
$
contextualUnit
$
getConfig
phyloBase
)
(
getFisSize
$
contextualUnit
$
getConfig
phyloBase
)
phylo
Clique
=
toPhyloClique
phyloBase
docs'
--------------------------------------
--------------------------------------
docs'
::
Map
(
Date
,
Date
)
[
Document
]
docs'
::
Map
(
Date
,
Date
)
[
Document
]
docs'
=
groupDocsByPeriod
date
(
getPeriodIds
phyloBase
)
docs
docs'
=
groupDocsByPeriod
'
date
(
getPeriodIds
phyloBase
)
docs
--------------------------------------
--------------------------------------
...
@@ -115,54 +147,59 @@ toPhylo1 docs phyloBase = temporalMatching
...
@@ -115,54 +147,59 @@ toPhylo1 docs phyloBase = temporalMatching
-- | To apply a filter with the possibility of keeping some periods non empty (keep : True|False)
-- | To apply a filter with the possibility of keeping some periods non empty (keep : True|False)
filter
Fis
::
Bool
->
Int
->
(
Int
->
[
PhyloFis
]
->
[
PhyloFis
])
->
Map
(
Date
,
Date
)
[
PhyloFis
]
->
Map
(
Date
,
Date
)
[
PhyloFis
]
filter
Clique
::
Bool
->
Int
->
(
Int
->
[
PhyloClique
]
->
[
PhyloClique
])
->
Map
(
Date
,
Date
)
[
PhyloClique
]
->
Map
(
Date
,
Date
)
[
PhyloClique
]
filter
Fis
keep
thr
f
m
=
case
keep
of
filter
Clique
keep
thr
f
m
=
case
keep
of
False
->
map
(
\
l
->
f
thr
l
)
m
False
->
map
(
\
l
->
f
thr
l
)
m
True
->
map
(
\
l
->
keepFilled
(
f
)
thr
l
)
m
True
->
map
(
\
l
->
keepFilled
(
f
)
thr
l
)
m
-- | To filter Fis with small Support
-- | To filter Fis with small Support
filter
FisBySupport
::
Int
->
[
PhyloFis
]
->
[
PhyloFis
]
filter
CliqueBySupport
::
Int
->
[
PhyloClique
]
->
[
PhyloClique
]
filter
FisBySupport
thr
l
=
filter
(
\
fis
->
(
fis
^.
phyloFis
_support
)
>=
thr
)
l
filter
CliqueBySupport
thr
l
=
filter
(
\
clq
->
(
clq
^.
phyloClique
_support
)
>=
thr
)
l
-- | To filter Fis with small Clique size
-- | To filter Fis with small Clique size
filter
FisByClique
::
Int
->
[
PhyloFis
]
->
[
PhyloFis
]
filter
CliqueBySize
::
Int
->
[
PhyloClique
]
->
[
PhyloClique
]
filter
FisByClique
thr
l
=
filter
(
\
fis
->
(
size
$
fis
^.
phyloFis_clique
)
>=
thr
)
l
filter
CliqueBySize
thr
l
=
filter
(
\
clq
->
(
size
$
clq
^.
phyloClique_nodes
)
>=
thr
)
l
-- | To filter nested Fis
-- | To filter nested Fis
filter
FisByNested
::
Map
(
Date
,
Date
)
[
PhyloFis
]
->
Map
(
Date
,
Date
)
[
PhyloFis
]
filter
CliqueByNested
::
Map
(
Date
,
Date
)
[
PhyloClique
]
->
Map
(
Date
,
Date
)
[
PhyloClique
]
filter
Fis
ByNested
m
=
filter
Clique
ByNested
m
=
let
fis
=
map
(
\
l
->
let
clq
=
map
(
\
l
->
foldl
(
\
mem
f
->
if
(
any
(
\
f'
->
isNested
(
Set
.
toList
$
f'
^.
phylo
Fis_clique
)
(
Set
.
toList
$
f
^.
phyloFis_clique
))
mem
)
foldl
(
\
mem
f
->
if
(
any
(
\
f'
->
isNested
(
Set
.
toList
$
f'
^.
phylo
Clique_nodes
)
(
Set
.
toList
$
f
^.
phyloClique_nodes
))
mem
)
then
mem
then
mem
else
else
let
fMax
=
filter
(
\
f'
->
not
$
isNested
(
Set
.
toList
$
f
^.
phylo
Fis_clique
)
(
Set
.
toList
$
f'
^.
phyloFis_clique
))
mem
let
fMax
=
filter
(
\
f'
->
not
$
isNested
(
Set
.
toList
$
f
^.
phylo
Clique_nodes
)
(
Set
.
toList
$
f'
^.
phyloClique_nodes
))
mem
in
fMax
++
[
f
]
)
[]
l
)
in
fMax
++
[
f
]
)
[]
l
)
$
elems
m
$
elems
m
fis'
=
fis
`
using
`
parList
rdeepseq
clq'
=
clq
`
using
`
parList
rdeepseq
in
fromList
$
zip
(
keys
m
)
fis
'
in
fromList
$
zip
(
keys
m
)
clq
'
-- | To transform a time map of docs innto a time map of Fis with some filters
-- | To transform a time map of docs innto a time map of Fis with some filters
toPhyloFis
::
Map
(
Date
,
Date
)
[
Document
]
->
Int
->
Int
->
Map
(
Date
,
Date
)
[
PhyloFis
]
toPhyloClique
::
Phylo
->
Map
(
Date
,
Date
)
[
Document
]
->
Map
(
Date
,
Date
)
[
PhyloClique
]
toPhyloFis
phyloDocs
support
clique
=
traceFis
"Filtered Fis"
toPhyloClique
phylo
phyloDocs
=
case
(
clique
$
getConfig
phylo
)
of
$
filterFisByNested
Fis
s
s'
->
-- traceFis "Filtered Fis"
$
traceFis
"Filtered by clique size"
filterCliqueByNested
$
filterFis
True
clique
(
filterFisByClique
)
-- $ traceFis "Filtered by clique size"
$
traceFis
"Filtered by support"
$
filterClique
True
s'
(
filterCliqueBySize
)
$
filterFis
True
support
(
filterFisBySupport
)
-- $ traceFis "Filtered by support"
$
traceFis
"Unfiltered Fis"
phyloFis
$
filterClique
True
s
(
filterCliqueBySupport
)
-- $ traceFis "Unfiltered Fis"
phyloClique
MaxClique
_
->
undefined
where
where
--------------------------------------
--------------------------------------
phyloFis
::
Map
(
Date
,
Date
)
[
PhyloFis
]
phyloClique
::
Map
(
Date
,
Date
)
[
PhyloClique
]
phyloFis
=
phyloClique
=
case
(
clique
$
getConfig
phylo
)
of
let
fis
=
map
(
\
(
prd
,
docs
)
->
let
lst
=
toList
$
fisWithSizePolyMap
(
Segment
1
20
)
1
(
map
text
docs
)
Fis
_
_
->
let
fis
=
map
(
\
(
prd
,
docs
)
->
in
(
prd
,
map
(
\
f
->
PhyloFis
(
fst
f
)
(
snd
f
)
prd
)
lst
))
let
lst
=
toList
$
fisWithSizePolyMap
(
Segment
1
20
)
1
(
map
text
docs
)
$
toList
phyloDocs
in
(
prd
,
map
(
\
f
->
PhyloClique
(
fst
f
)
(
snd
f
)
prd
)
lst
))
fis'
=
fis
`
using
`
parList
rdeepseq
$
toList
phyloDocs
in
fromList
fis'
fis'
=
fis
`
using
`
parList
rdeepseq
in
fromList
fis'
MaxClique
_
->
undefined
--------------------------------------
--------------------------------------
...
@@ -188,6 +225,21 @@ docsToTimeScaleCooc docs fdt =
...
@@ -188,6 +225,21 @@ docsToTimeScaleCooc docs fdt =
-- | to Phylo Base | --
-- | to Phylo Base | --
-----------------------
-----------------------
-- | To group a list of Documents by fixed periods
groupDocsByPeriod'
::
(
NFData
doc
,
Ord
date
,
Enum
date
)
=>
(
doc
->
date
)
->
[(
date
,
date
)]
->
[
doc
]
->
Map
(
date
,
date
)
[
doc
]
groupDocsByPeriod'
f
pds
docs
=
let
docs'
=
groupBy
(
\
d
d'
->
f
d
==
f
d'
)
$
sortOn
f
docs
periods
=
map
(
inPeriode
f
docs'
)
pds
periods'
=
periods
`
using
`
parList
rdeepseq
in
trace
(
"
\n
"
<>
"-- | Group "
<>
show
(
length
docs
)
<>
" docs by "
<>
show
(
length
pds
)
<>
" periods"
<>
"
\n
"
)
$
fromList
$
zip
pds
periods'
where
--------------------------------------
inPeriode
::
Ord
b
=>
(
t
->
b
)
->
[[
t
]]
->
(
b
,
b
)
->
[
t
]
inPeriode
f'
h
(
start
,
end
)
=
concat
$
fst
$
partition
(
\
d
->
f'
(
head'
"inPeriode"
d
)
>=
start
&&
f'
(
head'
"inPeriode"
d
)
<=
end
)
h
-- | To group a list of Documents by fixed periods
-- | To group a list of Documents by fixed periods
groupDocsByPeriod
::
(
NFData
doc
,
Ord
date
,
Enum
date
)
=>
(
doc
->
date
)
->
[(
date
,
date
)]
->
[
doc
]
->
Map
(
date
,
date
)
[
doc
]
groupDocsByPeriod
::
(
NFData
doc
,
Ord
date
,
Enum
date
)
=>
(
doc
->
date
)
->
[(
date
,
date
)]
->
[
doc
]
->
Map
(
date
,
date
)
[
doc
]
...
@@ -206,6 +258,17 @@ groupDocsByPeriod f pds es =
...
@@ -206,6 +258,17 @@ groupDocsByPeriod f pds es =
--------------------------------------
--------------------------------------
docsToTermFreq
::
[
Document
]
->
Vector
Ngrams
->
Map
Int
Double
docsToTermFreq
docs
fdt
=
let
nbDocs
=
fromIntegral
$
length
docs
freqs
=
map
(
/
(
nbDocs
))
$
fromList
$
map
(
\
lst
->
(
head'
"docsToTermFreq"
lst
,
fromIntegral
$
length
lst
))
$
group
$
sort
$
concat
$
map
(
\
d
->
nub
$
ngramsToIdx
(
text
d
)
fdt
)
docs
sumFreqs
=
sum
$
elems
freqs
in
map
(
/
sumFreqs
)
freqs
-- | To count the number of docs by unit of time
-- | To count the number of docs by unit of time
docsToTimeScaleNb
::
[
Document
]
->
Map
Date
Double
docsToTimeScaleNb
::
[
Document
]
->
Map
Date
Double
docsToTimeScaleNb
docs
=
docsToTimeScaleNb
docs
=
...
@@ -230,5 +293,7 @@ toPhyloBase docs lst conf =
...
@@ -230,5 +293,7 @@ toPhyloBase docs lst conf =
$
Phylo
foundations
$
Phylo
foundations
(
docsToTimeScaleCooc
docs
(
foundations
^.
foundations_roots
))
(
docsToTimeScaleCooc
docs
(
foundations
^.
foundations_roots
))
(
docsToTimeScaleNb
docs
)
(
docsToTimeScaleNb
docs
)
(
docsToTermFreq
docs
(
foundations
^.
foundations_roots
))
empty
params
params
(
fromList
$
map
(
\
prd
->
(
prd
,
PhyloPeriod
prd
(
initPhyloLevels
1
prd
)))
periods
)
(
fromList
$
map
(
\
prd
->
(
prd
,
PhyloPeriod
prd
(
initPhyloLevels
1
prd
)))
periods
)
src/Gargantext/Viz/Phylo/PhyloTools.hs
View file @
70a2339e
...
@@ -34,6 +34,7 @@ import Control.Lens hiding (Level)
...
@@ -34,6 +34,7 @@ import Control.Lens hiding (Level)
import
qualified
Data.Vector
as
Vector
import
qualified
Data.Vector
as
Vector
import
qualified
Data.List
as
List
import
qualified
Data.List
as
List
import
qualified
Data.Set
as
Set
import
qualified
Data.Set
as
Set
import
qualified
Data.Map
as
Map
------------
------------
-- | Io | --
-- | Io | --
...
@@ -66,6 +67,7 @@ roundToStr = printf "%0.*f"
...
@@ -66,6 +67,7 @@ roundToStr = printf "%0.*f"
countSup
::
Double
->
[
Double
]
->
Int
countSup
::
Double
->
[
Double
]
->
Int
countSup
s
l
=
length
$
filter
(
>
s
)
l
countSup
s
l
=
length
$
filter
(
>
s
)
l
dropByIdx
::
Int
->
[
a
]
->
[
a
]
dropByIdx
::
Int
->
[
a
]
->
[
a
]
dropByIdx
k
l
=
take
k
l
++
drop
(
k
+
1
)
l
dropByIdx
k
l
=
take
k
l
++
drop
(
k
+
1
)
l
...
@@ -76,6 +78,15 @@ elemIndex' e l = case (List.elemIndex e l) of
...
@@ -76,6 +78,15 @@ elemIndex' e l = case (List.elemIndex e l) of
Just
i
->
i
Just
i
->
i
commonPrefix
::
Eq
a
=>
[
a
]
->
[
a
]
->
[
a
]
->
[
a
]
commonPrefix
lst
lst'
acc
=
if
(
null
lst
||
null
lst'
)
then
acc
else
if
(
head'
"commonPrefix"
lst
==
head'
"commonPrefix"
lst'
)
then
commonPrefix
(
tail
lst
)
(
tail
lst'
)
(
acc
++
[
head'
"commonPrefix"
lst
])
else
acc
---------------------
---------------------
-- | Foundations | --
-- | Foundations | --
---------------------
---------------------
...
@@ -162,44 +173,44 @@ keepFilled f thr l = if (null $ f thr l) && (not $ null l)
...
@@ -162,44 +173,44 @@ keepFilled f thr l = if (null $ f thr l) && (not $ null l)
else
f
thr
l
else
f
thr
l
traceClique
::
Map
(
Date
,
Date
)
[
Phylo
Fis
]
->
String
traceClique
::
Map
(
Date
,
Date
)
[
Phylo
Clique
]
->
String
traceClique
mFis
=
foldl
(
\
msg
cpt
->
msg
<>
show
(
countSup
cpt
cliques
)
<>
" (>"
<>
show
(
cpt
)
<>
") "
)
""
[
1
..
6
]
traceClique
mFis
=
foldl
(
\
msg
cpt
->
msg
<>
show
(
countSup
cpt
cliques
)
<>
" (>"
<>
show
(
cpt
)
<>
") "
)
""
[
1
..
6
]
where
where
--------------------------------------
--------------------------------------
cliques
::
[
Double
]
cliques
::
[
Double
]
cliques
=
sort
$
map
(
fromIntegral
.
size
.
_phylo
Fis_clique
)
$
concat
$
elems
mFis
cliques
=
sort
$
map
(
fromIntegral
.
size
.
_phylo
Clique_nodes
)
$
concat
$
elems
mFis
--------------------------------------
--------------------------------------
traceSupport
::
Map
(
Date
,
Date
)
[
Phylo
Fis
]
->
String
traceSupport
::
Map
(
Date
,
Date
)
[
Phylo
Clique
]
->
String
traceSupport
mFis
=
foldl
(
\
msg
cpt
->
msg
<>
show
(
countSup
cpt
supports
)
<>
" (>"
<>
show
(
cpt
)
<>
") "
)
""
[
1
..
6
]
traceSupport
mFis
=
foldl
(
\
msg
cpt
->
msg
<>
show
(
countSup
cpt
supports
)
<>
" (>"
<>
show
(
cpt
)
<>
") "
)
""
[
1
..
6
]
where
where
--------------------------------------
--------------------------------------
supports
::
[
Double
]
supports
::
[
Double
]
supports
=
sort
$
map
(
fromIntegral
.
_phylo
Fis
_support
)
$
concat
$
elems
mFis
supports
=
sort
$
map
(
fromIntegral
.
_phylo
Clique
_support
)
$
concat
$
elems
mFis
--------------------------------------
--------------------------------------
traceFis
::
[
Char
]
->
Map
(
Date
,
Date
)
[
Phylo
Fis
]
->
Map
(
Date
,
Date
)
[
PhyloFis
]
traceFis
::
[
Char
]
->
Map
(
Date
,
Date
)
[
Phylo
Clique
]
->
Map
(
Date
,
Date
)
[
PhyloClique
]
traceFis
msg
mFis
=
trace
(
"
\n
"
<>
"-- | "
<>
msg
<>
" : "
<>
show
(
sum
$
map
length
$
elems
mFis
)
<>
"
\n
"
traceFis
msg
mFis
=
trace
(
"
\n
"
<>
"-- | "
<>
msg
<>
" : "
<>
show
(
sum
$
map
length
$
elems
mFis
)
<>
"
\n
"
<>
"Support : "
<>
(
traceSupport
mFis
)
<>
"
\n
"
<>
"Support : "
<>
(
traceSupport
mFis
)
<>
"
\n
"
<>
"
Clique
: "
<>
(
traceClique
mFis
)
<>
"
\n
"
)
mFis
<>
"
Nb Ngrams
: "
<>
(
traceClique
mFis
)
<>
"
\n
"
)
mFis
---------------
----------
---------------
-- | C
ontextual unit
| --
-- | C
lique
| --
---------------
----------
---------------
get
FisSupport
::
ContextualUnit
->
Int
get
CliqueSupport
::
Clique
->
Int
get
Fis
Support
unit
=
case
unit
of
get
Clique
Support
unit
=
case
unit
of
Fis
s
_
->
s
Fis
s
_
->
s
-- _ -> panic ("[ERR][Viz.Phylo.PhyloTools.getFisSupport] Only Fis has a support")
MaxClique
_
->
0
get
FisSize
::
ContextualUnit
->
Int
get
CliqueSize
::
Clique
->
Int
get
Fis
Size
unit
=
case
unit
of
get
Clique
Size
unit
=
case
unit
of
Fis
_
s
->
s
Fis
_
s
->
s
-- _ -> panic ("[ERR][Viz.Phylo.PhyloTools.getFisSupport] Only Fis has a clique size")
MaxClique
s
->
s
--------------
--------------
...
@@ -227,6 +238,8 @@ sumCooc cooc cooc' = unionWith (+) cooc cooc'
...
@@ -227,6 +238,8 @@ sumCooc cooc cooc' = unionWith (+) cooc cooc'
getTrace
::
Cooc
->
Double
getTrace
::
Cooc
->
Double
getTrace
cooc
=
sum
$
elems
$
filterWithKey
(
\
(
k
,
k'
)
_
->
k
==
k'
)
cooc
getTrace
cooc
=
sum
$
elems
$
filterWithKey
(
\
(
k
,
k'
)
_
->
k
==
k'
)
cooc
coocToDiago
::
Cooc
->
Cooc
coocToDiago
cooc
=
filterWithKey
(
\
(
k
,
k'
)
_
->
k
==
k'
)
cooc
-- | To build the local cooc matrix of each phylogroup
-- | To build the local cooc matrix of each phylogroup
ngramsToCooc
::
[
Int
]
->
[
Cooc
]
->
Cooc
ngramsToCooc
::
[
Int
]
->
[
Cooc
]
->
Cooc
...
@@ -243,6 +256,12 @@ ngramsToCooc ngrams coocs =
...
@@ -243,6 +256,12 @@ ngramsToCooc ngrams coocs =
getGroupId
::
PhyloGroup
->
PhyloGroupId
getGroupId
::
PhyloGroup
->
PhyloGroupId
getGroupId
group
=
((
group
^.
phylo_groupPeriod
,
group
^.
phylo_groupLevel
),
group
^.
phylo_groupIndex
)
getGroupId
group
=
((
group
^.
phylo_groupPeriod
,
group
^.
phylo_groupLevel
),
group
^.
phylo_groupIndex
)
idToPrd
::
PhyloGroupId
->
PhyloPeriodId
idToPrd
id
=
(
fst
.
fst
)
id
getGroupThr
::
PhyloGroup
->
Double
getGroupThr
group
=
last'
"getGroupThr"
((
group
^.
phylo_groupMeta
)
!
"breaks"
)
groupByField
::
Ord
a
=>
(
PhyloGroup
->
a
)
->
[
PhyloGroup
]
->
Map
a
[
PhyloGroup
]
groupByField
::
Ord
a
=>
(
PhyloGroup
->
a
)
->
[
PhyloGroup
]
->
Map
a
[
PhyloGroup
]
groupByField
toField
groups
=
fromListWith
(
++
)
$
map
(
\
g
->
(
toField
g
,
[
g
]))
groups
groupByField
toField
groups
=
fromListWith
(
++
)
$
map
(
\
g
->
(
toField
g
,
[
g
]))
groups
...
@@ -255,34 +274,21 @@ getPeriodPointers fil group =
...
@@ -255,34 +274,21 @@ getPeriodPointers fil group =
filterProximity
::
Proximity
->
Double
->
Double
->
Bool
filterProximity
::
Proximity
->
Double
->
Double
->
Bool
filterProximity
proximity
thr
local
=
filterProximity
proximity
thr
local
=
case
proximity
of
case
proximity
of
WeightedLogJaccard
_
_
_
->
local
>=
thr
WeightedLogJaccard
_
->
local
>=
thr
Hamming
->
undefined
Hamming
->
undefined
getProximityName
::
Proximity
->
String
getProximityName
::
Proximity
->
String
getProximityName
proximity
=
getProximityName
proximity
=
case
proximity
of
case
proximity
of
WeightedLogJaccard
_
_
_
->
"WLJaccard"
WeightedLogJaccard
_
->
"WLJaccard"
Hamming
->
"Hamming"
Hamming
->
"Hamming"
getProximityInit
::
Proximity
->
Double
getProximityInit
proximity
=
case
proximity
of
WeightedLogJaccard
_
i
_
->
i
Hamming
->
undefined
getProximityStep
::
Proximity
->
Double
getProximityStep
proximity
=
case
proximity
of
WeightedLogJaccard
_
_
s
->
s
Hamming
->
undefined
---------------
---------------
-- | Phylo | --
-- | Phylo | --
---------------
---------------
addPointers
::
PhyloGroup
->
Filiation
->
PointerType
->
[
Pointer
]
->
PhyloGroup
addPointers
::
Filiation
->
PointerType
->
[
Pointer
]
->
PhyloGroup
->
PhyloGroup
addPointers
group
fil
pty
pointers
=
addPointers
fil
pty
pointers
group
=
case
pty
of
case
pty
of
TemporalPointer
->
case
fil
of
TemporalPointer
->
case
fil
of
ToChilds
->
group
&
phylo_groupPeriodChilds
.~
pointers
ToChilds
->
group
&
phylo_groupPeriodChilds
.~
pointers
...
@@ -310,6 +316,9 @@ getLevels phylo = nub
...
@@ -310,6 +316,9 @@ getLevels phylo = nub
.
traverse
.
traverse
.
phylo_periodLevels
)
phylo
.
phylo_periodLevels
)
phylo
getSeaElevation
::
Phylo
->
SeaElevation
getSeaElevation
phylo
=
seaElevation
(
getConfig
phylo
)
getConfig
::
Phylo
->
Config
getConfig
::
Phylo
->
Config
getConfig
phylo
=
(
phylo
^.
phylo_param
)
^.
phyloParam_config
getConfig
phylo
=
(
phylo
^.
phylo_param
)
^.
phyloParam_config
...
@@ -334,6 +343,26 @@ getGroupsFromLevel lvl phylo =
...
@@ -334,6 +343,26 @@ getGroupsFromLevel lvl phylo =
.
phylo_levelGroups
)
phylo
.
phylo_levelGroups
)
phylo
getGroupsFromLevelPeriods
::
Level
->
[
PhyloPeriodId
]
->
Phylo
->
[
PhyloGroup
]
getGroupsFromLevelPeriods
lvl
periods
phylo
=
elems
$
view
(
phylo_periods
.
traverse
.
filtered
(
\
phyloPrd
->
elem
(
phyloPrd
^.
phylo_periodPeriod
)
periods
)
.
phylo_periodLevels
.
traverse
.
filtered
(
\
phyloLvl
->
phyloLvl
^.
phylo_levelLevel
==
lvl
)
.
phylo_levelGroups
)
phylo
getGroupsFromPeriods
::
Level
->
Map
PhyloPeriodId
PhyloPeriod
->
[
PhyloGroup
]
getGroupsFromPeriods
lvl
periods
=
elems
$
view
(
traverse
.
phylo_periodLevels
.
traverse
.
filtered
(
\
phyloLvl
->
phyloLvl
^.
phylo_levelLevel
==
lvl
)
.
phylo_levelGroups
)
periods
updatePhyloGroups
::
Level
->
Map
PhyloGroupId
PhyloGroup
->
Phylo
->
Phylo
updatePhyloGroups
::
Level
->
Map
PhyloGroupId
PhyloGroup
->
Phylo
->
Phylo
updatePhyloGroups
lvl
m
phylo
=
updatePhyloGroups
lvl
m
phylo
=
over
(
phylo_periods
over
(
phylo_periods
...
@@ -391,27 +420,7 @@ traceSynchronyStart phylo =
...
@@ -391,27 +420,7 @@ traceSynchronyStart phylo =
getSensibility
::
Proximity
->
Double
getSensibility
::
Proximity
->
Double
getSensibility
proxi
=
case
proxi
of
getSensibility
proxi
=
case
proxi
of
WeightedLogJaccard
s
_
_
->
s
WeightedLogJaccard
s
->
s
Hamming
->
undefined
getThresholdInit
::
Proximity
->
Double
getThresholdInit
proxi
=
case
proxi
of
WeightedLogJaccard
_
t
_
->
t
Hamming
->
undefined
getThresholdStep
::
Proximity
->
Double
getThresholdStep
proxi
=
case
proxi
of
WeightedLogJaccard
_
_
s
->
s
Hamming
->
undefined
traceBranchMatching
::
Proximity
->
Double
->
[
PhyloGroup
]
->
[
PhyloGroup
]
traceBranchMatching
proxi
thr
groups
=
case
proxi
of
WeightedLogJaccard
_
i
s
->
trace
(
roundToStr
2
thr
<>
" "
<>
foldl
(
\
acc
_
->
acc
<>
"."
)
"."
[(
10
*
i
),(
10
*
i
+
10
*
s
)
..
(
10
*
thr
)]
<>
" "
<>
show
(
length
groups
)
<>
" groups"
)
groups
Hamming
->
undefined
Hamming
->
undefined
----------------
----------------
...
@@ -478,4 +487,9 @@ traceMatchEnd groups =
...
@@ -478,4 +487,9 @@ traceMatchEnd groups =
traceTemporalMatching
::
[
PhyloGroup
]
->
[
PhyloGroup
]
traceTemporalMatching
::
[
PhyloGroup
]
->
[
PhyloGroup
]
traceTemporalMatching
groups
=
traceTemporalMatching
groups
=
trace
(
"
\n
"
<>
"-- | Start temporal matching for "
<>
show
(
length
groups
)
<>
" groups"
<>
"
\n
"
)
groups
trace
(
"
\n
"
<>
"-- | Start temporal matching for "
<>
show
(
length
groups
)
<>
" groups"
<>
"
\n
"
)
groups
\ No newline at end of file
traceGroupsProxi
::
Map
(
PhyloGroupId
,
PhyloGroupId
)
Double
->
Map
(
PhyloGroupId
,
PhyloGroupId
)
Double
traceGroupsProxi
m
=
trace
(
"
\n
"
<>
"-- | "
<>
show
(
Map
.
size
m
)
<>
" computed pairs of groups proximity"
<>
"
\n
"
)
m
\ No newline at end of file
src/Gargantext/Viz/Phylo/SynchronicClustering.hs
View file @
70a2339e
...
@@ -18,34 +18,77 @@ module Gargantext.Viz.Phylo.SynchronicClustering where
...
@@ -18,34 +18,77 @@ module Gargantext.Viz.Phylo.SynchronicClustering where
import
Gargantext.Prelude
import
Gargantext.Prelude
import
Gargantext.Viz.AdaptativePhylo
import
Gargantext.Viz.AdaptativePhylo
import
Gargantext.Viz.Phylo.PhyloTools
import
Gargantext.Viz.Phylo.PhyloTools
import
Gargantext.Viz.Phylo.TemporalMatching
(
weightedLogJaccard
)
import
Gargantext.Viz.Phylo.TemporalMatching
(
weightedLogJaccard'
,
filterDiago
,
reduceDiagos
)
import
Gargantext.Viz.Phylo.PhyloExport
(
processDynamics
)
import
Data.List
((
++
),
null
,
intersect
,
nub
,
concat
,
sort
,
sortOn
)
import
Data.List
((
++
),
null
,
intersect
,
nub
,
concat
,
sort
,
sortOn
,
all
,
groupBy
,
group
,
maximum
)
import
Data.Map
(
Map
,
fromList
,
fromListWith
,
foldlWithKey
,
(
!
),
insert
,
empty
,
restrictKeys
,
elems
,
mapWithKey
,
member
)
import
Data.Map
(
Map
,
fromList
,
fromListWith
,
foldlWithKey
,
(
!
),
insert
,
empty
,
restrictKeys
,
elems
,
mapWithKey
,
member
)
import
Data.Text
(
Text
)
import
Control.Lens
hiding
(
Level
)
import
Control.Lens
hiding
(
Level
)
import
Control.Parallel.Strategies
(
parList
,
rdeepseq
,
using
)
import
Control.Parallel.Strategies
(
parList
,
rdeepseq
,
using
)
-- import Debug.Trace (trace)
import
qualified
Data.Map
as
Map
import
qualified
Data.Map
as
Map
import
qualified
Data.Set
as
Set
-------------------------
-------------------------
-- | New Level Maker | --
-- | New Level Maker | --
-------------------------
-------------------------
toBranchId
::
PhyloGroup
->
PhyloBranchId
mergeBranchIds
::
[[
Int
]]
->
[
Int
]
toBranchId
child
=
((
child
^.
phylo_groupLevel
)
+
1
,
snd
(
child
^.
phylo_groupBranchId
))
mergeBranchIds
ids
=
(
head'
"mergeBranchIds"
.
sort
.
mostFreq'
)
ids
where
-- | 2) find the most Up Left ids in the hierarchy of similarity
-- mostUpLeft :: [[Int]] -> [[Int]]
-- mostUpLeft ids' =
-- let groupIds = (map (\gIds -> (length $ head' "gIds" gIds, head' "gIds" gIds)) . groupBy (\id id' -> length id == length id') . sortOn length) ids'
-- inf = (fst . minimum) groupIds
-- in map snd $ filter (\gIds -> fst gIds == inf) groupIds
-- | 1) find the most frequent ids
mostFreq'
::
[[
Int
]]
->
[[
Int
]]
mostFreq'
ids'
=
let
groupIds
=
(
map
(
\
gIds
->
(
length
gIds
,
head'
"gIds"
gIds
))
.
group
.
sort
)
ids'
sup
=
(
fst
.
maximum
)
groupIds
in
map
snd
$
filter
(
\
gIds
->
fst
gIds
==
sup
)
groupIds
mergeMeta
::
[
Int
]
->
[
PhyloGroup
]
->
Map
Text
[
Double
]
mergeMeta
bId
groups
=
let
ego
=
head'
"mergeMeta"
$
filter
(
\
g
->
(
snd
(
g
^.
phylo_groupBranchId
))
==
bId
)
groups
in
fromList
[(
"breaks"
,(
ego
^.
phylo_groupMeta
)
!
"breaks"
),(
"seaLevels"
,(
ego
^.
phylo_groupMeta
)
!
"seaLevels"
)]
groupsToBranches'
::
Map
PhyloGroupId
PhyloGroup
->
[[
PhyloGroup
]]
groupsToBranches'
groups
=
-- | run the related component algorithm
let
egos
=
map
(
\
g
->
[
getGroupId
g
]
++
(
map
fst
$
g
^.
phylo_groupPeriodParents
)
++
(
map
fst
$
g
^.
phylo_groupPeriodChilds
)
)
$
elems
groups
graph
=
relatedComponents
egos
-- | update each group's branch id
in
map
(
\
ids
->
let
groups'
=
elems
$
restrictKeys
groups
(
Set
.
fromList
ids
)
bId
=
mergeBranchIds
$
map
(
\
g
->
snd
$
g
^.
phylo_groupBranchId
)
groups'
in
map
(
\
g
->
g
&
phylo_groupBranchId
%~
(
\
(
lvl
,
_
)
->
(
lvl
,
bId
)))
groups'
)
graph
mergeGroups
::
[
Cooc
]
->
PhyloGroupId
->
Map
PhyloGroupId
PhyloGroupId
->
[
PhyloGroup
]
->
PhyloGroup
mergeGroups
::
[
Cooc
]
->
PhyloGroupId
->
Map
PhyloGroupId
PhyloGroupId
->
[
PhyloGroup
]
->
PhyloGroup
mergeGroups
coocs
id
mapIds
childs
=
mergeGroups
coocs
id
mapIds
childs
=
let
ngrams
=
(
sort
.
nub
.
concat
)
$
map
_phylo_groupNgrams
childs
let
ngrams
=
(
sort
.
nub
.
concat
)
$
map
_phylo_groupNgrams
childs
in
PhyloGroup
(
fst
$
fst
id
)
(
snd
$
fst
id
)
(
snd
id
)
""
in
PhyloGroup
(
fst
$
fst
id
)
(
snd
$
fst
id
)
(
snd
id
)
""
(
sum
$
map
_phylo_groupSupport
childs
)
ngrams
(
sum
$
map
_phylo_groupSupport
childs
)
ngrams
(
ngramsToCooc
ngrams
coocs
)
(
toBranchId
(
head'
"mergeGroups"
childs
))
(
ngramsToCooc
ngrams
coocs
)
empty
[]
(
map
(
\
g
->
(
getGroupId
g
,
1
))
childs
)
((
snd
$
fst
id
),
bId
)
(
mergeMeta
bId
childs
)
[]
(
map
(
\
g
->
(
getGroupId
g
,
1
))
childs
)
(
updatePointers
$
concat
$
map
_phylo_groupPeriodParents
childs
)
(
updatePointers
$
concat
$
map
_phylo_groupPeriodParents
childs
)
(
updatePointers
$
concat
$
map
_phylo_groupPeriodChilds
childs
)
(
updatePointers
$
concat
$
map
_phylo_groupPeriodChilds
childs
)
where
where
--------------------
bId
::
[
Int
]
bId
=
mergeBranchIds
$
map
(
\
g
->
snd
$
g
^.
phylo_groupBranchId
)
childs
--------------------
updatePointers
::
[
Pointer
]
->
[
Pointer
]
updatePointers
::
[
Pointer
]
->
[
Pointer
]
updatePointers
pointers
=
map
(
\
(
pId
,
w
)
->
(
mapIds
!
pId
,
w
))
pointers
updatePointers
pointers
=
map
(
\
(
pId
,
w
)
->
(
mapIds
!
pId
,
w
))
pointers
...
@@ -57,42 +100,47 @@ addPhyloLevel lvl phylo =
...
@@ -57,42 +100,47 @@ addPhyloLevel lvl phylo =
%~
(
insert
(
phyloPrd
^.
phylo_periodPeriod
,
lvl
)
(
PhyloLevel
(
phyloPrd
^.
phylo_periodPeriod
)
lvl
empty
)))
phylo
%~
(
insert
(
phyloPrd
^.
phylo_periodPeriod
,
lvl
)
(
PhyloLevel
(
phyloPrd
^.
phylo_periodPeriod
)
lvl
empty
)))
phylo
toNextLevel
::
Phylo
->
[
PhyloGroup
]
->
Phylo
toNextLevel
'
::
Phylo
->
[
PhyloGroup
]
->
Phylo
toNextLevel
phylo
groups
=
toNextLevel
'
phylo
groups
=
let
curLvl
=
getLastLevel
phylo
let
curLvl
=
getLastLevel
phylo
oldGroups
=
fromList
$
map
(
\
g
->
(
getGroupId
g
,
getLevelParentId
g
))
groups
oldGroups
=
fromList
$
map
(
\
g
->
(
getGroupId
g
,
getLevelParentId
g
))
groups
newGroups
=
fromListWith
(
++
)
newGroups
=
concat
$
groupsToBranches'
-- | 5) group the parents by periods
$
fromList
$
map
(
\
g
->
(
getGroupId
g
,
g
))
$
foldlWithKey
(
\
acc
id
groups'
->
$
foldlWithKey
(
\
acc
id
groups'
->
-- | 4) create the parent group
-- | 4) create the parent group
let
parent
=
mergeGroups
(
elems
$
restrictKeys
(
phylo
^.
phylo_timeCooc
)
$
periodsToYears
[(
fst
.
fst
)
id
])
id
oldGroups
groups'
let
parent
=
mergeGroups
(
elems
$
restrictKeys
(
phylo
^.
phylo_timeCooc
)
$
periodsToYears
[(
fst
.
fst
)
id
])
id
oldGroups
groups'
in
acc
++
[
(
parent
^.
phylo_groupPeriod
,
[
parent
])
])
[]
in
acc
++
[
parent
])
[]
-- | 3) group the current groups by parentId
-- | 3) group the current groups by parentId
$
fromListWith
(
++
)
$
map
(
\
g
->
(
getLevelParentId
g
,
[
g
]))
groups
$
fromListWith
(
++
)
$
map
(
\
g
->
(
getLevelParentId
g
,
[
g
]))
groups
newPeriods
=
fromListWith
(
++
)
$
map
(
\
g
->
(
g
^.
phylo_groupPeriod
,
[
g
]))
newGroups
in
traceSynchronyEnd
in
traceSynchronyEnd
$
over
(
phylo_periods
.
traverse
.
phylo_periodLevels
.
traverse
$
over
(
phylo_periods
.
traverse
.
phylo_periodLevels
.
traverse
-- | 6) update each period at curLvl + 1
-- | 6) update each period at curLvl + 1
.
filtered
(
\
phyloLvl
->
phyloLvl
^.
phylo_levelLevel
==
(
curLvl
+
1
)))
.
filtered
(
\
phyloLvl
->
phyloLvl
^.
phylo_levelLevel
==
(
curLvl
+
1
)))
-- | 7) by adding the parents
-- | 7) by adding the parents
(
\
phyloLvl
->
(
\
phyloLvl
->
if
member
(
phyloLvl
^.
phylo_levelPeriod
)
new
Group
s
if
member
(
phyloLvl
^.
phylo_levelPeriod
)
new
Period
s
then
phyloLvl
&
phylo_levelGroups
then
phyloLvl
&
phylo_levelGroups
.~
fromList
(
map
(
\
g
->
(
getGroupId
g
,
g
))
$
new
Group
s
!
(
phyloLvl
^.
phylo_levelPeriod
))
.~
fromList
(
map
(
\
g
->
(
getGroupId
g
,
g
))
$
new
Period
s
!
(
phyloLvl
^.
phylo_levelPeriod
))
else
phyloLvl
)
else
phyloLvl
)
-- | 2) add the curLvl + 1 phyloLevel to the phylo
-- | 2) add the curLvl + 1 phyloLevel to the phylo
$
addPhyloLevel
(
curLvl
+
1
)
$
addPhyloLevel
(
curLvl
+
1
)
-- | 1) update the current groups (with level parent pointers) in the phylo
-- | 1) update the current groups (with level parent pointers) in the phylo
$
updatePhyloGroups
curLvl
(
fromList
$
map
(
\
g
->
(
getGroupId
g
,
g
))
groups
)
phylo
$
updatePhyloGroups
curLvl
(
fromList
$
map
(
\
g
->
(
getGroupId
g
,
g
))
groups
)
phylo
--------------------
--------------------
-- | Clustering | --
-- | Clustering | --
--------------------
--------------------
toPairs
::
SynchronyStrategy
->
[
PhyloGroup
]
->
[(
PhyloGroup
,
PhyloGroup
)]
toPairs
::
[
PhyloGroup
]
->
[(
PhyloGroup
,
PhyloGroup
)]
toPairs
strategy
groups
=
case
strategy
of
toPairs
groups
=
filter
(
\
(
g
,
g'
)
->
(
not
.
null
)
$
intersect
(
g
^.
phylo_groupNgrams
)
(
g'
^.
phylo_groupNgrams
))
MergeRegularGroups
->
pairs
$
listToCombi'
groups
$
filter
(
\
g
->
all
(
==
3
)
$
(
g
^.
phylo_groupMeta
)
!
"dynamics"
)
groups
MergeAllGroups
->
pairs
groups
where
pairs
::
[
PhyloGroup
]
->
[(
PhyloGroup
,
PhyloGroup
)]
pairs
gs
=
filter
(
\
(
g
,
g'
)
->
(
not
.
null
)
$
intersect
(
g
^.
phylo_groupNgrams
)
(
g'
^.
phylo_groupNgrams
))
(
listToCombi'
gs
)
toDiamonds
::
[
PhyloGroup
]
->
[[
PhyloGroup
]]
toDiamonds
::
[
PhyloGroup
]
->
[[
PhyloGroup
]]
...
@@ -108,26 +156,25 @@ toDiamonds groups = foldl' (\acc groups' ->
...
@@ -108,26 +156,25 @@ toDiamonds groups = foldl' (\acc groups' ->
$
foldl'
(
\
acc
g
->
acc
++
(
map
(
\
(
id
,
_
)
->
(
id
,[
g
])
)
$
g
^.
phylo_groupPeriodParents
)
)
[]
groups
$
foldl'
(
\
acc
g
->
acc
++
(
map
(
\
(
id
,
_
)
->
(
id
,[
g
])
)
$
g
^.
phylo_groupPeriodParents
)
)
[]
groups
groupsToEdges
::
Proximity
->
Synchrony
->
Double
->
[
PhyloGroup
]
->
[((
PhyloGroup
,
PhyloGroup
),
Double
)]
groupsToEdges
::
Proximity
->
Synchrony
->
Double
->
Map
Int
Double
->
[
PhyloGroup
]
->
[((
PhyloGroup
,
PhyloGroup
),
Double
)]
groupsToEdges
prox
sync
docs
groups
=
groupsToEdges
prox
sync
nbDocs
diago
groups
=
case
sync
of
case
sync
of
ByProximityThreshold
t
s
->
filter
(
\
(
_
,
w
)
->
w
>=
t
)
ByProximityThreshold
t
hr
sens
_
strat
->
$
toEdges
s
filter
(
\
(
_
,
w
)
->
w
>=
thr
)
$
toPairs
group
s
$
toEdges
sen
s
$
toPairs
strat
groups
ByProximityDistribution
s
->
ByProximityDistribution
s
ens
strat
->
let
diamonds
=
sortOn
snd
let
diamonds
=
sortOn
snd
$
toEdges
s
$
concat
$
toEdges
s
ens
$
concat
$
map
toPairs
$
toDiamonds
groups
$
map
(
\
gs
->
toPairs
strat
gs
)
$
toDiamonds
groups
in
take
(
div
(
length
diamonds
)
2
)
diamonds
in
take
(
div
(
length
diamonds
)
2
)
diamonds
where
where
toEdges
::
Double
->
[(
PhyloGroup
,
PhyloGroup
)]
->
[((
PhyloGroup
,
PhyloGroup
),
Double
)]
toEdges
::
Double
->
[(
PhyloGroup
,
PhyloGroup
)]
->
[((
PhyloGroup
,
PhyloGroup
),
Double
)]
toEdges
sens
edges
=
toEdges
sens
edges
=
case
prox
of
case
prox
of
WeightedLogJaccard
_
_
_
->
map
(
\
(
g
,
g'
)
->
WeightedLogJaccard
_
->
map
(
\
(
g
,
g'
)
->
((
g
,
g'
),
weightedLogJaccard
sens
docs
((
g
,
g'
),
weightedLogJaccard'
sens
nbDocs
diago
(
g
^.
phylo_groupCooc
)
(
g'
^.
phylo_groupCooc
)
(
g
^.
phylo_groupNgrams
)
(
g'
^.
phylo_groupNgrams
)))
edges
(
g
^.
phylo_groupNgrams
)
(
g'
^.
phylo_groupNgrams
)))
edges
_
->
undefined
_
->
undefined
...
@@ -142,21 +189,34 @@ toParentId :: PhyloGroup -> PhyloGroupId
...
@@ -142,21 +189,34 @@ toParentId :: PhyloGroup -> PhyloGroupId
toParentId
child
=
((
child
^.
phylo_groupPeriod
,
child
^.
phylo_groupLevel
+
1
),
child
^.
phylo_groupIndex
)
toParentId
child
=
((
child
^.
phylo_groupPeriod
,
child
^.
phylo_groupLevel
+
1
),
child
^.
phylo_groupIndex
)
reduce
Branch
::
Proximity
->
Synchrony
->
Map
Date
Double
->
[
PhyloGroup
]
->
[
PhyloGroup
]
reduce
Groups
::
Proximity
->
Synchrony
->
Map
Date
Double
->
Map
Date
Cooc
->
[
PhyloGroup
]
->
[
PhyloGroup
]
reduce
Branch
prox
sync
docs
branch
=
reduce
Groups
prox
sync
docs
diagos
branch
=
-- | 1) reduce a branch as a set of periods & groups
-- | 1) reduce a branch as a set of periods & groups
let
periods
=
fromListWith
(
++
)
let
periods
=
fromListWith
(
++
)
$
map
(
\
g
->
(
g
^.
phylo_groupPeriod
,[
g
]))
branch
$
map
(
\
g
->
(
g
^.
phylo_groupPeriod
,[
g
]))
branch
in
(
concat
.
concat
.
elems
)
in
(
concat
.
concat
.
elems
)
$
mapWithKey
(
\
prd
groups
->
$
mapWithKey
(
\
prd
groups
->
-- | 2) for each period, transform the groups as a proximity graph filtered by a threshold
-- | 2) for each period, transform the groups as a proximity graph filtered by a threshold
let
edges
=
groupsToEdges
prox
sync
((
sum
.
elems
)
$
restrictKeys
docs
$
periodsToYears
[
prd
])
groups
let
diago
=
reduceDiagos
$
filterDiago
diagos
[
prd
]
edges
=
groupsToEdges
prox
sync
((
sum
.
elems
)
$
restrictKeys
docs
$
periodsToYears
[
prd
])
diago
groups
in
map
(
\
comp
->
in
map
(
\
comp
->
-- | 4) add to each groups their futur level parent group
-- | 4) add to each groups their futur level parent group
let
parentId
=
toParentId
(
head'
"parentId"
comp
)
let
parentId
=
toParentId
(
head'
"parentId"
comp
)
in
map
(
\
g
->
g
&
phylo_groupLevelParents
%~
(
++
[(
parentId
,
1
)])
)
comp
)
in
map
(
\
g
->
g
&
phylo_groupLevelParents
%~
(
++
[(
parentId
,
1
)])
)
comp
)
-- |3) reduce the graph a a set of related components
-- |3) reduce the graph a a set of related components
$
toRelatedComponents
groups
edges
)
periods
$
toRelatedComponents
groups
edges
)
periods
adjustClustering
::
Synchrony
->
[[
PhyloGroup
]]
->
[[
PhyloGroup
]]
adjustClustering
sync
branches
=
case
sync
of
ByProximityThreshold
_
_
scope
_
->
case
scope
of
SingleBranch
->
branches
SiblingBranches
->
groupBy
(
\
g
g'
->
(
last'
"adjustClustering"
$
(
g
^.
phylo_groupMeta
)
!
"breaks"
)
==
(
last'
"adjustClustering"
$
(
g'
^.
phylo_groupMeta
)
!
"breaks"
))
$
sortOn
_phylo_groupBranchId
$
concat
branches
AllBranches
->
[
concat
branches
]
ByProximityDistribution
_
_
->
branches
synchronicClustering
::
Phylo
->
Phylo
synchronicClustering
::
Phylo
->
Phylo
...
@@ -164,11 +224,14 @@ synchronicClustering phylo =
...
@@ -164,11 +224,14 @@ synchronicClustering phylo =
let
prox
=
phyloProximity
$
getConfig
phylo
let
prox
=
phyloProximity
$
getConfig
phylo
sync
=
phyloSynchrony
$
getConfig
phylo
sync
=
phyloSynchrony
$
getConfig
phylo
docs
=
phylo
^.
phylo_timeDocs
docs
=
phylo
^.
phylo_timeDocs
branches
=
map
(
\
branch
->
reduceBranch
prox
sync
docs
branch
)
diagos
=
map
coocToDiago
$
phylo
^.
phylo_timeCooc
$
phyloToLastBranches
newBranches
=
map
(
\
branch
->
reduceGroups
prox
sync
docs
diagos
branch
)
$
traceSynchronyStart
phylo
$
map
processDynamics
branches'
=
branches
`
using
`
parList
rdeepseq
$
adjustClustering
sync
in
toNextLevel
phylo
$
concat
branches'
$
phyloToLastBranches
$
traceSynchronyStart
phylo
newBranches'
=
newBranches
`
using
`
parList
rdeepseq
in
toNextLevel'
phylo
$
concat
newBranches'
----------------
----------------
...
...
src/Gargantext/Viz/Phylo/TemporalMatching.hs
View file @
70a2339e
...
@@ -15,18 +15,19 @@ Portability : POSIX
...
@@ -15,18 +15,19 @@ Portability : POSIX
module
Gargantext.Viz.Phylo.TemporalMatching
where
module
Gargantext.Viz.Phylo.TemporalMatching
where
import
Data.List
(
concat
,
splitAt
,
tail
,
sortOn
,
(
++
),
intersect
,
null
,
inits
,
groupBy
,
scanl
,
nub
,
union
,
dropWhile
,
partition
,
delete
)
import
Data.List
(
concat
,
splitAt
,
tail
,
sortOn
,
(
++
),
intersect
,
null
,
inits
,
groupBy
,
scanl
,
nub
,
union
,
dropWhile
,
partition
,
or
,
sort
,
(
!!
)
)
import
Data.Map
(
Map
,
fromList
,
elems
,
restrictKeys
,
unionWith
,
intersectionWith
,
findWithDefault
,
keys
,
(
!
),
filterWithKey
)
import
Data.Map
(
Map
,
fromList
,
elems
,
restrictKeys
,
unionWith
,
findWithDefault
,
keys
,
(
!
),
(
!?
),
filterWithKey
,
singleton
,
empty
,
mapKeys
,
adjust
)
import
Gargantext.Prelude
import
Gargantext.Prelude
import
Gargantext.Viz.AdaptativePhylo
import
Gargantext.Viz.AdaptativePhylo
import
Gargantext.Viz.Phylo.PhyloTools
import
Gargantext.Viz.Phylo.PhyloTools
-- import Prelude (logBase
)
import
Prelude
(
floor
)
import
Control.Lens
hiding
(
Level
)
import
Control.Lens
hiding
(
Level
)
import
Control.Parallel.Strategies
(
parList
,
rdeepseq
,
using
)
import
Control.Parallel.Strategies
(
parList
,
rdeepseq
,
using
)
import
Debug.Trace
(
trace
)
import
Debug.Trace
(
trace
)
import
qualified
Data.Map
as
Map
import
qualified
Data.Set
as
Set
import
qualified
Data.Set
as
Set
...
@@ -35,30 +36,29 @@ import qualified Data.Set as Set
...
@@ -35,30 +36,29 @@ import qualified Data.Set as Set
-------------------
-------------------
-- |
Process the inverse sumLog
-- |
To compute a jaccard similarity between two lists
sumInvLog
::
Double
->
[
Double
]
->
Double
jaccard
::
[
Int
]
->
[
Int
]
->
Double
sumInvLog
s
l
=
foldl
(
\
mem
x
->
mem
+
(
1
/
log
(
s
+
x
)))
0
l
jaccard
inter'
union'
=
((
fromIntegral
.
length
)
$
inter'
)
/
((
fromIntegral
.
length
)
$
union'
)
-- | Process the sumLog
-- | Process the
inverse
sumLog
sum
Log
::
Double
->
[
Double
]
->
Double
sum
InvLog'
::
Double
->
Double
->
[
Double
]
->
Double
sum
Log
s
l
=
foldl
(
\
mem
x
->
mem
+
log
(
s
+
x
))
0
l
sum
InvLog'
s
nb
diago
=
foldl
(
\
mem
occ
->
mem
+
(
1
/
(
log
(
occ
+
s
)
/
log
(
nb
+
s
))))
0
diago
-- |
To compute a jaccard similarity between two lists
-- |
Process the sumLog
jaccard
::
[
Int
]
->
[
Int
]
->
Double
sumLog'
::
Double
->
Double
->
[
Double
]
->
Double
jaccard
inter'
union'
=
((
fromIntegral
.
length
)
$
inter'
)
/
((
fromIntegral
.
length
)
$
union'
)
sumLog'
s
nb
diago
=
foldl
(
\
mem
occ
->
mem
+
(
log
(
occ
+
s
)
/
log
(
nb
+
s
)))
0
diago
-- | To process a WeighedLogJaccard distance between to coocurency matrix
weightedLogJaccard'
::
Double
->
Double
->
Map
Int
Double
->
[
Int
]
->
[
Int
]
->
Double
weightedLogJaccard
::
Double
->
Double
->
Cooc
->
Cooc
->
[
Int
]
->
[
Int
]
->
Double
weightedLogJaccard'
sens
nbDocs
diago
ngrams
ngrams'
weightedLogJaccard
sens
docs
cooc
cooc'
ngrams
ngrams'
|
null
ngramsInter
=
0
|
null
ngramsInter
=
0
|
ngramsInter
==
ngramsUnion
=
1
|
ngramsInter
==
ngramsUnion
=
1
|
sens
==
0
=
jaccard
ngramsInter
ngramsUnion
|
sens
==
0
=
jaccard
ngramsInter
ngramsUnion
|
sens
>
0
=
(
sumInvLog
sens
coocInter
)
/
(
sumInvLog
sens
cooc
Union
)
|
sens
>
0
=
(
sumInvLog
'
sens
nbDocs
diagoInter
)
/
(
sumInvLog'
sens
nbDocs
diago
Union
)
|
otherwise
=
(
sumLog
sens
coocInter
)
/
(
sumLog
sens
coocUnion
)
|
otherwise
=
(
sumLog
'
sens
nbDocs
diagoInter
)
/
(
sumLog'
sens
nbDocs
diagoUnion
)
where
where
--------------------------------------
--------------------------------------
ngramsInter
::
[
Int
]
ngramsInter
::
[
Int
]
ngramsInter
=
intersect
ngrams
ngrams'
ngramsInter
=
intersect
ngrams
ngrams'
...
@@ -66,122 +66,125 @@ weightedLogJaccard sens docs cooc cooc' ngrams ngrams'
...
@@ -66,122 +66,125 @@ weightedLogJaccard sens docs cooc cooc' ngrams ngrams'
ngramsUnion
::
[
Int
]
ngramsUnion
::
[
Int
]
ngramsUnion
=
union
ngrams
ngrams'
ngramsUnion
=
union
ngrams
ngrams'
--------------------------------------
--------------------------------------
coocInter
::
[
Double
]
diagoInter
::
[
Double
]
coocInter
=
elems
$
map
(
/
docs
)
$
filterWithKey
(
\
(
k
,
k'
)
_
->
k
==
k'
)
$
intersectionWith
(
+
)
cooc
cooc'
diagoInter
=
elems
$
restrictKeys
diago
(
Set
.
fromList
ngramsInter
)
-- coocInter = elems $ map (/docs) $ intersectionWith (+) cooc cooc'
--------------------------------------
--------------------------------------
diagoUnion
::
[
Double
]
coocUnion
::
[
Double
]
diagoUnion
=
elems
$
restrictKeys
diago
(
Set
.
fromList
ngramsUnion
)
coocUnion
=
elems
$
map
(
/
docs
)
$
filterWithKey
(
\
(
k
,
k'
)
_
->
k
==
k'
)
$
unionWith
(
+
)
cooc
cooc'
--------------------------------------
--------------------------------------
-- | To choose a proximity function
pickProximity
::
Proximity
->
Double
->
Cooc
->
Cooc
->
[
Int
]
->
[
Int
]
->
Double
pickProximity
proximity
docs
cooc
cooc'
ngrams
ngrams'
=
case
proximity
of
WeightedLogJaccard
sens
_
_
->
weightedLogJaccard
sens
docs
cooc
cooc'
ngrams
ngrams'
Hamming
->
undefined
-- | To process the proximity between a current group and a pair of targets group
-- | To process the proximity between a current group and a pair of targets group
toProximity
::
Map
Date
Double
->
Proximity
->
PhyloGroup
->
PhyloGroup
->
PhyloGroup
->
Double
toProximity
::
Double
->
Map
Int
Double
->
Proximity
->
[
Int
]
->
[
Int
]
->
[
Int
]
->
Double
toProximity
docs
proximity
ego
target
target'
=
toProximity
nbDocs
diago
proximity
egoNgrams
targetNgrams
targetNgrams'
=
let
docs'
=
sum
$
elems
docs
case
proximity
of
cooc
=
if
target
==
target'
WeightedLogJaccard
sens
->
then
(
target
^.
phylo_groupCooc
)
let
pairNgrams
=
if
targetNgrams
==
targetNgrams'
else
sumCooc
(
target
^.
phylo_groupCooc
)
(
target'
^.
phylo_groupCooc
)
then
targetNgrams
ngrams
=
if
target
==
target'
else
union
targetNgrams
targetNgrams'
then
(
target
^.
phylo_groupNgrams
)
in
weightedLogJaccard'
sens
nbDocs
diago
egoNgrams
pairNgrams
else
union
(
target
^.
phylo_groupNgrams
)
(
target'
^.
phylo_groupNgrams
)
Hamming
->
undefined
in
pickProximity
proximity
docs'
(
ego
^.
phylo_groupCooc
)
cooc
(
ego
^.
phylo_groupNgrams
)
ngrams
------------------------
------------------------
-- | Local Matching | --
-- | Local Matching | --
------------------------
------------------------
toLastPeriod
::
Filiation
->
[
PhyloPeriodId
]
->
PhyloPeriodId
findLastPeriod
::
Filiation
->
[
PhyloPeriodId
]
->
PhyloPeriodId
toLastPeriod
fil
periods
=
case
fil
of
findLastPeriod
fil
periods
=
case
fil
of
ToParents
->
head'
"toLastPeriod"
(
sortOn
fst
periods
)
ToParents
->
head'
"findLastPeriod"
(
sortOn
fst
periods
)
ToChilds
->
last'
"toLastPeriod"
(
sortOn
fst
periods
)
ToChilds
->
last'
"findLastPeriod"
(
sortOn
fst
periods
)
toLazyPairs
::
[
Pointer
]
->
Filiation
->
Double
->
Proximity
->
PhyloPeriodId
->
[(
PhyloGroup
,
PhyloGroup
)]
->
[(
PhyloGroup
,
PhyloGroup
)]
-- | To filter pairs of candidates related to old pointers periods
toLazyPairs
pointers
fil
thr
prox
prd
pairs
=
removeOldPointers
::
[
Pointer
]
->
Filiation
->
Double
->
Proximity
->
PhyloPeriodId
if
null
pointers
then
pairs
->
[((
PhyloGroupId
,[
Int
]),(
PhyloGroupId
,[
Int
]))]
else
let
rest
=
filterPointers
prox
thr
pointers
->
[((
PhyloGroupId
,[
Int
]),(
PhyloGroupId
,[
Int
]))]
in
if
null
rest
removeOldPointers
oldPointers
fil
thr
prox
prd
pairs
then
let
prd'
=
toLastPeriod
fil
(
map
(
fst
.
fst
.
fst
)
pointers
)
|
null
oldPointers
=
pairs
in
if
prd'
==
prd
|
null
(
filterPointers
prox
thr
oldPointers
)
=
then
[]
let
lastMatchedPrd
=
findLastPeriod
fil
(
map
(
fst
.
fst
.
fst
)
oldPointers
)
else
filter
(
\
(
g
,
g'
)
->
in
if
lastMatchedPrd
==
prd
case
fil
of
then
[]
ToParents
->
((
fst
$
g
^.
phylo_groupPeriod
)
<
(
fst
prd'
))
else
filter
(
\
((
id
,
_
),(
id'
,
_
))
->
||
((
fst
$
g'
^.
phylo_groupPeriod
)
<
(
fst
prd'
))
case
fil
of
ToChilds
->
((
fst
$
g
^.
phylo_groupPeriod
)
>
(
fst
prd'
))
ToParents
->
(((
fst
.
fst
.
fst
)
id
)
<
(
fst
lastMatchedPrd
))
||
((
fst
$
g'
^.
phylo_groupPeriod
)
>
(
fst
prd'
)))
pairs
||
(((
fst
.
fst
.
fst
)
id'
)
<
(
fst
lastMatchedPrd
))
else
[]
ToChilds
->
(((
fst
.
fst
.
fst
)
id
)
>
(
fst
lastMatchedPrd
))
||
(((
fst
.
fst
.
fst
)
id'
)
>
(
fst
lastMatchedPrd
)))
pairs
|
otherwise
=
[]
-- | Find pairs of valuable candidates to be matched
makePairs'
::
PhyloGroup
->
[
PhyloGroup
]
->
[
PhyloPeriodId
]
->
[
Pointer
]
->
Filiation
->
Double
->
Proximity
->
Map
Date
Double
->
[(
PhyloGroup
,
PhyloGroup
)]
makePairs'
ego
candidates
periods
pointers
fil
thr
prox
docs
=
makePairs'
::
(
PhyloGroupId
,[
Int
])
->
[(
PhyloGroupId
,[
Int
])]
->
[
PhyloPeriodId
]
->
[
Pointer
]
->
Filiation
->
Double
->
Proximity
case
null
periods
of
->
Map
Date
Double
->
Map
Date
Cooc
->
[((
PhyloGroupId
,[
Int
]),(
PhyloGroupId
,[
Int
]))]
True
->
[]
makePairs'
(
egoId
,
egoNgrams
)
candidates
periods
oldPointers
fil
thr
prox
docs
diagos
=
False
->
toLazyPairs
pointers
fil
thr
prox
lastPrd
if
(
null
periods
)
-- | at least on of the pair candidates should be from the last added period
then
[]
$
filter
(
\
(
g
,
g'
)
->
((
g
^.
phylo_groupPeriod
)
==
lastPrd
)
else
removeOldPointers
oldPointers
fil
thr
prox
lastPrd
||
((
g'
^.
phylo_groupPeriod
)
==
lastPrd
))
-- | at least on of the pair candidates should be from the last added period
$
listToKeys
$
filter
(
\
((
id
,
_
),(
id'
,
_
))
->
((
fst
.
fst
)
id
==
lastPrd
)
||
((
fst
.
fst
)
id'
==
lastPrd
))
$
filter
(
\
g
->
(
g
^.
phylo_groupPeriod
==
lastPrd
)
$
listToKeys
||
((
toProximity
docs
prox
ego
ego
g
)
>=
thr
))
candidates
$
filter
(
\
(
id
,
ngrams
)
->
let
nbDocs
=
(
sum
.
elems
)
$
filterDocs
docs
([(
fst
.
fst
)
egoId
,
(
fst
.
fst
)
id
])
diago
=
reduceDiagos
$
filterDiago
diagos
([(
fst
.
fst
)
egoId
,
(
fst
.
fst
)
id
])
in
(
toProximity
nbDocs
diago
prox
egoNgrams
egoNgrams
ngrams
)
>=
thr
)
candidates
where
where
lastPrd
::
PhyloPeriodId
lastPrd
::
PhyloPeriodId
lastPrd
=
to
LastPeriod
fil
periods
lastPrd
=
find
LastPeriod
fil
periods
filterPointers
::
Proximity
->
Double
->
[
Pointer
]
->
[
Pointer
]
filterPointers
::
Proximity
->
Double
->
[
Pointer
]
->
[
Pointer
]
filterPointers
proxi
thr
pts
=
filter
(
\
(
_
,
w
)
->
filterProximity
proxi
thr
w
)
pts
filterPointers
proxi
thr
pts
=
filter
(
\
(
_
,
w
)
->
filterProximity
proxi
thr
w
)
pts
phyloGroupMatching
::
[[
PhyloGroup
]]
->
Filiation
->
Proximity
->
Map
Date
Double
->
Double
->
PhyloGroup
->
PhyloGroup
reduceDiagos
::
Map
Date
Cooc
->
Map
Int
Double
phyloGroupMatching
candidates
fil
proxi
docs
thr
ego
=
reduceDiagos
diagos
=
mapKeys
(
\
(
k
,
_
)
->
k
)
case
null
nextPointers
of
$
foldl
(
\
acc
diago
->
unionWith
(
+
)
acc
diago
)
empty
(
elems
diagos
)
-- | let's find new pointers
True
->
if
null
$
filterPointers
proxi
thr
$
getPeriodPointers
fil
ego
then
addPointers
ego
fil
TemporalPointer
[]
phyloGroupMatching
::
[[(
PhyloGroupId
,[
Int
])]]
->
Filiation
->
Proximity
->
Map
Date
Double
->
Map
Date
Cooc
-- | or keep the old ones
->
Double
->
[
Pointer
]
->
(
PhyloGroupId
,[
Int
])
->
[
Pointer
]
else
addPointers
ego
fil
TemporalPointer
phyloGroupMatching
candidates
fil
proxi
docs
diagos
thr
oldPointers
(
id
,
ngrams
)
=
$
filterPointers
proxi
thr
$
getPeriodPointers
fil
ego
if
(
null
$
filterPointers
proxi
thr
oldPointers
)
False
->
addPointers
ego
fil
TemporalPointer
-- | let's find new pointers
$
head'
"phyloGroupMatching"
then
if
null
nextPointers
-- | Keep only the best set of pointers grouped by proximity
then
[]
$
groupBy
(
\
pt
pt'
->
snd
pt
==
snd
pt'
)
else
head'
"phyloGroupMatching"
$
reverse
$
sortOn
snd
$
head'
"pointers"
-- | Keep only the best set of pointers grouped by proximity
$
nextPointers
$
groupBy
(
\
pt
pt'
->
snd
pt
==
snd
pt'
)
-- | Find the first time frame where at leats one pointer satisfies the proximity threshold
$
reverse
$
sortOn
snd
$
head'
"pointers"
nextPointers
-- | Find the first time frame where at leats one pointer satisfies the proximity threshold
else
oldPointers
where
where
nextPointers
::
[[
Pointer
]]
nextPointers
::
[[
Pointer
]]
nextPointers
=
take
1
nextPointers
=
take
1
$
dropWhile
(
null
)
$
dropWhile
(
null
)
-- | for each time frame, process the proximity on relevant pairs of targeted groups
-- | for each time frame, process the proximity on relevant pairs of targeted groups
$
scanl
(
\
acc
groups
->
$
scanl
(
\
acc
groups
->
let
periods
=
nub
$
map
_phylo_groupPeriod
$
concat
groups
let
periods
=
nub
$
map
(
fst
.
fst
.
fst
)
$
concat
groups
docs'
=
(
filterDocs
docs
([
ego
^.
phylo_groupPeriod
]
++
periods
))
nbdocs
=
sum
$
elems
$
(
filterDocs
docs
([(
fst
.
fst
)
id
]
++
periods
))
pairs
=
makePairs'
ego
(
concat
groups
)
periods
(
getPeriodPointers
fil
ego
)
fil
thr
proxi
docs
diago
=
reduceDiagos
$
filterDiago
diagos
([(
fst
.
fst
)
id
]
++
periods
)
-- | important resize nbdocs et diago dans le make pairs
pairs
=
makePairs'
(
id
,
ngrams
)
(
concat
groups
)
periods
oldPointers
fil
thr
proxi
docs
diagos
in
acc
++
(
filterPointers
proxi
thr
in
acc
++
(
filterPointers
proxi
thr
$
concat
$
concat
$
map
(
\
(
c
,
c'
)
->
$
map
(
\
(
c
,
c'
)
->
-- | process the proximity between the current group and a pair of candidates
-- | process the proximity between the current group and a pair of candidates
let
proximity
=
toProximity
docs'
proxi
ego
c
c'
let
proximity
=
toProximity
nbdocs
diago
proxi
ngrams
(
snd
c
)
(
snd
c'
)
in
if
(
c
==
c'
)
in
if
(
c
==
c'
)
then
[(
getGroupId
c
,
proximity
)]
then
[(
fst
c
,
proximity
)]
else
[(
getGroupId
c
,
proximity
),(
getGroupId
c'
,
proximity
)]
)
pairs
))
[]
else
[(
fst
c
,
proximity
),(
fst
c'
,
proximity
)]
)
pairs
))
[]
$
inits
candidates
-- | groups from [[1900],[1900,1901],[1900,1901,1902],...]
$
inits
candidates
-- | groups from [[1900],[1900,1901],[1900,1901,1902],...]
filterDocs
::
Map
Date
Double
->
[
PhyloPeriodId
]
->
Map
Date
Double
filterDocs
::
Map
Date
Double
->
[
PhyloPeriodId
]
->
Map
Date
Double
filterDocs
d
pds
=
restrictKeys
d
$
periodsToYears
pds
filterDocs
d
pds
=
restrictKeys
d
$
periodsToYears
pds
filterDiago
::
Map
Date
Cooc
->
[
PhyloPeriodId
]
->
Map
Date
Cooc
filterDiago
diago
pds
=
restrictKeys
diago
$
periodsToYears
pds
-----------------------------
-----------------------------
-- | Matching Processing | --
-- | Matching Processing | --
...
@@ -195,40 +198,41 @@ getNextPeriods fil max' pId pIds =
...
@@ -195,40 +198,41 @@ getNextPeriods fil max' pId pIds =
ToParents
->
take
max'
$
(
reverse
.
fst
)
$
splitAt
(
elemIndex'
pId
pIds
)
pIds
ToParents
->
take
max'
$
(
reverse
.
fst
)
$
splitAt
(
elemIndex'
pId
pIds
)
pIds
getCandidates
::
Filiation
->
PhyloGroup
->
[[
PhyloGroup
]]
->
[[
PhyloGroup
]]
getCandidates
::
PhyloGroup
->
[[(
PhyloGroupId
,[
Int
])]]
->
[[(
PhyloGroupId
,[
Int
])]]
getCandidates
fil
ego
targets
=
getCandidates
ego
targets
=
case
fil
of
map
(
\
groups'
->
ToChilds
->
targets'
filter
(
\
g'
->
(
not
.
null
)
$
intersect
(
ego
^.
phylo_groupNgrams
)
(
snd
g'
)
ToParents
->
reverse
targets'
)
groups'
)
targets
where
targets'
::
[[
PhyloGroup
]]
targets'
=
matchGroupsToGroups
::
Int
->
[
PhyloPeriodId
]
->
Proximity
->
Double
->
Map
Date
Double
->
Map
Date
Cooc
->
[
PhyloGroup
]
->
[
PhyloGroup
]
map
(
\
groups'
->
matchGroupsToGroups
frame
periods
proximity
thr
docs
coocs
groups
=
filter
(
\
g'
->
(
not
.
null
)
$
intersect
(
ego
^.
phylo_groupNgrams
)
(
g'
^.
phylo_groupNgrams
)
let
groups'
=
groupByField
_phylo_groupPeriod
groups
)
groups'
)
targets
in
foldl'
(
\
acc
prd
->
let
-- | 1) find the parents/childs matching periods
periodsPar
=
getNextPeriods
ToParents
frame
prd
periods
phyloBranchMatching
::
Int
->
[
PhyloPeriodId
]
->
Proximity
->
Double
->
Map
Date
Double
->
[
PhyloGroup
]
->
[
PhyloGroup
]
periodsChi
=
getNextPeriods
ToChilds
frame
prd
periods
phyloBranchMatching
frame
periods
proximity
thr
docs
branch
=
traceBranchMatching
proximity
thr
-- | 2) find the parents/childs matching candidates
-- $ matchByPeriods ToParents
candidatesPar
=
map
(
\
prd'
->
map
(
\
g
->
(
getGroupId
g
,
g
^.
phylo_groupNgrams
))
$
findWithDefault
[]
prd'
groups'
)
periodsPar
-- $ groupByField _phylo_groupPeriod
candidatesChi
=
map
(
\
prd'
->
map
(
\
g
->
(
getGroupId
g
,
g
^.
phylo_groupNgrams
))
$
findWithDefault
[]
prd'
groups'
)
periodsChi
$
matchByPeriods
-- | 3) find the parents/child number of docs by years
$
groupByField
_phylo_groupPeriod
branch
docsPar
=
filterDocs
docs
([
prd
]
++
periodsPar
)
where
docsChi
=
filterDocs
docs
([
prd
]
++
periodsChi
)
--------------------------------------
-- | 4) find the parents/child diago by years
matchByPeriods
::
Map
PhyloPeriodId
[
PhyloGroup
]
->
[
PhyloGroup
]
diagoPar
=
filterDiago
(
map
coocToDiago
coocs
)
([
prd
]
++
periodsPar
)
matchByPeriods
branch'
=
foldl'
(
\
acc
prd
->
diagoChi
=
filterDiago
(
map
coocToDiago
coocs
)
([
prd
]
++
periodsPar
)
let
periodsPar
=
getNextPeriods
ToParents
frame
prd
periods
-- | 5) match in parallel all the groups (egos) to their possible candidates
periodsChi
=
getNextPeriods
ToChilds
frame
prd
periods
egos
=
map
(
\
ego
->
candidatesPar
=
map
(
\
prd'
->
findWithDefault
[]
prd'
branch'
)
periodsPar
let
pointersPar
=
phyloGroupMatching
(
getCandidates
ego
candidatesPar
)
ToParents
proximity
docsPar
diagoPar
candidatesChi
=
map
(
\
prd'
->
findWithDefault
[]
prd'
branch'
)
periodsChi
thr
(
getPeriodPointers
ToParents
ego
)
(
getGroupId
ego
,
ego
^.
phylo_groupNgrams
)
docsPar
=
filterDocs
docs
([
prd
]
++
periodsPar
)
pointersChi
=
phyloGroupMatching
(
getCandidates
ego
candidatesChi
)
ToChilds
proximity
docsChi
diagoChi
docsChi
=
filterDocs
docs
([
prd
]
++
periodsChi
)
thr
(
getPeriodPointers
ToChilds
ego
)
(
getGroupId
ego
,
ego
^.
phylo_groupNgrams
)
egos
=
map
(
\
ego
->
phyloGroupMatching
(
getCandidates
ToParents
ego
candidatesPar
)
ToParents
proximity
docsPar
thr
in
addPointers
ToChilds
TemporalPointer
pointersChi
$
phyloGroupMatching
(
getCandidates
ToChilds
ego
candidatesChi
)
ToChilds
proximity
docsChi
thr
ego
)
$
addPointers
ToParents
TemporalPointer
pointersPar
ego
)
$
findWithDefault
[]
prd
branch'
$
findWithDefault
[]
prd
groups'
egos'
=
egos
`
using
`
parList
rdeepseq
egos'
=
egos
`
using
`
parList
rdeepseq
in
acc
++
egos'
)
[]
periods
in
acc
++
egos'
)
[]
periods
-----------------------
-----------------------
...
@@ -236,85 +240,38 @@ phyloBranchMatching frame periods proximity thr docs branch = traceBranchMatchin
...
@@ -236,85 +240,38 @@ phyloBranchMatching frame periods proximity thr docs branch = traceBranchMatchin
-----------------------
-----------------------
count
::
Eq
a
=>
a
->
[
a
]
->
Int
count
x
=
length
.
filter
(
==
x
)
termFreq'
::
Int
->
[
PhyloGroup
]
->
Double
termFreq'
term
groups
=
let
ngrams
=
concat
$
map
_phylo_groupNgrams
groups
in
log
((
fromIntegral
$
count
term
ngrams
)
/
(
fromIntegral
$
length
ngrams
))
relevantBranches
::
Int
->
[[
PhyloGroup
]]
->
[[
PhyloGroup
]]
relevantBranches
::
Int
->
[[
PhyloGroup
]]
->
[[
PhyloGroup
]]
relevantBranches
term
branches
=
relevantBranches
term
branches
=
filter
(
\
groups
->
(
any
(
\
group
->
elem
term
$
group
^.
phylo_groupNgrams
)
groups
))
branches
filter
(
\
groups
->
(
any
(
\
group
->
elem
term
$
group
^.
phylo_groupNgrams
)
groups
))
branches
branchCov'
::
[
PhyloGroup
]
->
[[
PhyloGroup
]]
->
Double
fScore
::
Double
->
Int
->
[
PhyloGroup
]
->
[[
PhyloGroup
]]
->
Double
branchCov'
branch
branches
=
fScore
beta
i
bk
bks
=
(
fromIntegral
$
length
branch
)
/
(
fromIntegral
$
length
$
concat
branches
)
let
recall
=
(
(
fromIntegral
$
length
$
filter
(
\
g
->
elem
i
$
g
^.
phylo_groupNgrams
)
bk
)
/
(
fromIntegral
$
length
$
filter
(
\
g
->
elem
i
$
g
^.
phylo_groupNgrams
)
$
concat
bks
))
accuracy
=
(
(
fromIntegral
$
length
$
filter
(
\
g
->
elem
i
$
g
^.
phylo_groupNgrams
)
bk
)
toRecall
::
Double
->
Int
->
Int
->
[[
PhyloGroup
]]
->
Double
/
(
fromIntegral
$
length
bk
))
toRecall
freq
term
border
branches
=
in
((
1
+
beta
**
2
)
*
accuracy
*
recall
)
-- | given a random term in a phylo
/
(((
beta
**
2
)
*
accuracy
+
recall
))
freq
-- | for each relevant branches
*
(
sum
$
map
(
\
branch
->
-- | given its local coverage
((
branchCov'
branch
branches'
)
/
(
sum
$
map
(
\
b
->
branchCov'
b
branches'
)
branches'
))
-- | compute the local recall
*
(
(
fromIntegral
$
length
$
filter
(
\
group
->
elem
term
$
group
^.
phylo_groupNgrams
)
branch
)
/
(
(
fromIntegral
$
length
$
filter
(
\
group
->
elem
term
$
group
^.
phylo_groupNgrams
)
$
concat
branches'
)
-- | with a ponderation from border branches
+
(
fromIntegral
border
))
))
branches'
)
where
branches'
::
[[
PhyloGroup
]]
branches'
=
relevantBranches
term
branches
toAccuracy
::
Double
->
Int
->
[[
PhyloGroup
]]
->
Double
toAccuracy
freq
term
branches
=
if
(
null
branches
)
then
0
else
-- | given a random term in a phylo
freq
-- | for each relevant branches
*
(
sum
$
map
(
\
branch
->
-- | given its local coverage
((
branchCov'
branch
branches'
)
/
(
sum
$
map
(
\
b
->
branchCov'
b
branches'
)
branches'
))
-- | compute the local accuracy
*
(
(
fromIntegral
$
length
$
filter
(
\
group
->
elem
term
$
group
^.
phylo_groupNgrams
)
branch
)
/
(
fromIntegral
$
length
branch
)))
branches'
)
where
branches'
::
[[
PhyloGroup
]]
branches'
=
relevantBranches
term
branches
toPhyloQuality
::
Double
->
Map
Int
Double
->
Int
->
Double
->
[[
PhyloGroup
]]
->
Double
toPhyloQuality
beta
frequency
border
oldAcc
branches
=
-- trace (" rec : " <> show(recall)) $
-- trace (" acc : " <> show(accuracy)) $
if
(
null
branches
)
then
0
else
((
1
+
beta
**
2
)
*
accuracy
*
recall
)
/
(((
beta
**
2
)
*
accuracy
+
recall
))
where
-- | for each term compute the global accuracy
accuracy
::
Double
accuracy
=
oldAcc
+
(
sum
$
map
(
\
term
->
toAccuracy
(
frequency
!
term
)
term
branches
)
$
keys
frequency
)
-- | for each term compute the global recall
recall
::
Double
recall
=
sum
$
map
(
\
term
->
toRecall
(
frequency
!
term
)
term
border
branches
)
$
keys
frequency
toBorderAccuracy
::
Map
Int
Double
->
[[
PhyloGroup
]
]
->
Double
wk
::
[
PhyloGroup
]
->
Double
toBorderAccuracy
freq
branches
=
sum
$
map
(
\
t
->
toAccuracy
(
freq
!
t
)
t
branches
)
$
keys
freq
wk
bk
=
fromIntegral
$
length
bk
-----------------------------
toPhyloQuality'
::
Double
->
Map
Int
Double
->
[[
PhyloGroup
]]
->
Double
-- | Adaptative Matching | --
toPhyloQuality'
beta
freq
branches
=
-----------------------------
if
(
null
branches
)
then
0
else
sum
$
map
(
\
i
->
let
bks
=
relevantBranches
i
branches
in
(
freq
!
i
)
*
(
sum
$
map
(
\
bk
->
((
wk
bk
)
/
(
sum
$
map
wk
bks
))
*
(
fScore
beta
i
bk
bks
))
bks
))
$
keys
freq
------------------------------------
-- | Constant Temporal Matching | --
------------------------------------
groupsToBranches
::
Map
PhyloGroupId
PhyloGroup
->
[[
PhyloGroup
]]
groupsToBranches
::
Map
PhyloGroupId
PhyloGroup
->
[[
PhyloGroup
]]
...
@@ -326,6 +283,7 @@ groupsToBranches groups =
...
@@ -326,6 +283,7 @@ groupsToBranches groups =
++
(
map
fst
$
group
^.
phylo_groupPeriodParents
)
++
(
map
fst
$
group
^.
phylo_groupPeriodParents
)
++
(
map
fst
$
group
^.
phylo_groupPeriodChilds
)
)
$
elems
groups
++
(
map
fst
$
group
^.
phylo_groupPeriodChilds
)
)
$
elems
groups
-- | first find the related components by inside each ego's period
-- | first find the related components by inside each ego's period
-- | a supprimer
graph'
=
map
relatedComponents
egos
graph'
=
map
relatedComponents
egos
-- | then run it for the all the periods
-- | then run it for the all the periods
graph
=
zip
[
1
..
]
graph
=
zip
[
1
..
]
...
@@ -341,81 +299,248 @@ reduceFrequency :: Map Int Double -> [[PhyloGroup]] -> Map Int Double
...
@@ -341,81 +299,248 @@ reduceFrequency :: Map Int Double -> [[PhyloGroup]] -> Map Int Double
reduceFrequency
frequency
branches
=
reduceFrequency
frequency
branches
=
restrictKeys
frequency
(
Set
.
fromList
$
(
nub
.
concat
)
$
map
_phylo_groupNgrams
$
concat
branches
)
restrictKeys
frequency
(
Set
.
fromList
$
(
nub
.
concat
)
$
map
_phylo_groupNgrams
$
concat
branches
)
updateThr
::
Double
->
[[
PhyloGroup
]]
->
[[
PhyloGroup
]]
alterBorder
::
Int
->
[[
PhyloGroup
]]
->
[
PhyloGroup
]
->
Int
updateThr
thr
branches
=
map
(
\
b
->
map
(
\
g
->
alterBorder
border
branches
branch
=
border
+
(
length
$
concat
branches
)
-
(
length
branch
)
g
&
phylo_groupMeta
.~
(
singleton
"seaLevels"
(((
g
^.
phylo_groupMeta
)
!
"seaLevels"
)
++
[
thr
])))
b
)
branches
recursiveMatching
::
Proximity
->
Double
->
Int
->
Map
Int
Double
->
Double
->
Int
->
[
PhyloPeriodId
]
->
Map
Date
Double
->
Double
->
Int
->
Double
->
[
PhyloGroup
]
->
[
PhyloGroup
]
-- | Sequentially break each branch of a phylo where
recursiveMatching
proximity
beta
minBranch
frequency
egoThr
frame
periods
docs
quality
border
oldAcc
groups
=
-- done = all the allready broken branches
if
((
egoThr
>=
1
)
||
(
quality
>
quality'
)
||
((
length
$
concat
$
snd
branches'
)
==
(
length
groups
)))
-- ego = the current branch we want to break
then
-- rest = the branches we still have to break
trace
(
" ✗ F(β) = "
<>
show
(
quality
)
<>
" (vs) "
<>
show
(
quality'
)
<>
"
\n
"
breakBranches
::
Proximity
->
Double
->
Map
Int
Double
->
Int
->
Double
->
Double
->
Double
<>
" |✓ "
<>
show
(
length
$
fst
branches'
)
<>
show
(
map
length
$
fst
branches'
)
->
Int
->
Map
Date
Double
->
Map
Date
Cooc
->
[
PhyloPeriodId
]
->
[([
PhyloGroup
],
Bool
)]
->
([
PhyloGroup
],
Bool
)
->
[([
PhyloGroup
],
Bool
)]
->
[([
PhyloGroup
],
Bool
)]
<>
" |✗ "
<>
show
(
length
$
snd
branches'
)
<>
"["
<>
show
(
length
$
concat
$
snd
branches'
)
<>
"]"
)
$
breakBranches
proximity
beta
frequency
minBranch
thr
depth
elevation
frame
docs
coocs
periods
done
ego
rest
=
groups
-- | 1) keep or not the new division of ego
else
let
done'
=
done
++
(
if
snd
ego
let
next
=
map
(
\
b
->
recursiveMatching
proximity
beta
minBranch
then
(
reduceFrequency
frequency
(
fst
branches'
))
(
if
((
null
(
fst
ego'
))
||
(
quality
>
quality'
))
(
egoThr
+
(
getThresholdStep
proximity
))
then
frame
periods
docs
quality'
-- trace (" ✗ F(β) = " <> show(quality) <> " (vs) " <> show(quality')
(
alterBorder
border
(
fst
branches'
)
b
)
-- <> " | " <> show(length $ fst ego) <> " groups : "
(
oldAcc
+
(
toBorderAccuracy
frequency
(
delete
b
((
fst
branches'
)
++
(
snd
branches'
)))))
-- <> " |✓ " <> show(length $ fst ego') <> show(map length $ fst ego')
b
)
(
fst
branches'
)
-- <> " |✗ " <> show(length $ snd ego') <> "[" <> show(length $ concat $ snd ego') <> "]")
in
trace
(
" ✓ F(β) = "
<>
show
(
quality
)
<>
" (vs) "
<>
show
(
quality'
)
<>
"
\n
"
[(
fst
ego
,
False
)]
<>
" |✓ "
<>
show
(
length
$
fst
branches'
)
<>
show
(
map
length
$
fst
branches'
)
else
<>
" |✗ "
<>
show
(
length
$
snd
branches'
)
<>
"["
<>
show
(
length
$
concat
$
snd
branches'
)
<>
"]"
)
$
-- trace (" ✓ F(β) = " <> show(quality) <> " (vs) " <> show(quality')
concat
(
next
++
(
snd
branches'
))
-- <> " | " <> show(length $ fst ego) <> " groups : "
-- <> " |✓ " <> show(length $ fst ego') <> show(map length $ fst ego')
-- <> " |✗ " <> show(length $ snd ego') <> "[" <> show(length $ concat $ snd ego') <> "]")
((
map
(
\
e
->
(
e
,
True
))
(
fst
ego'
))
++
(
map
(
\
e
->
(
e
,
False
))
(
snd
ego'
))))
else
[
ego
])
in
-- | 2) if there is no more branches in rest then return else continue
if
null
rest
then
done'
else
breakBranches
proximity
beta
frequency
minBranch
thr
depth
elevation
frame
docs
coocs
periods
done'
(
head'
"breakBranches"
rest
)
(
tail'
"breakBranches"
rest
)
where
--------------------------------------
quality
::
Double
quality
=
toPhyloQuality'
beta
frequency
((
map
fst
done
)
++
[
fst
ego
]
++
(
map
fst
rest
))
--------------------------------------
ego'
::
([[
PhyloGroup
]],[[
PhyloGroup
]])
ego'
=
let
branches
=
groupsToBranches
$
fromList
$
map
(
\
g
->
(
getGroupId
g
,
g
))
$
matchGroupsToGroups
frame
periods
proximity
thr
docs
coocs
(
fst
ego
)
branches'
=
branches
`
using
`
parList
rdeepseq
in
partition
(
\
b
->
(
length
$
nub
$
map
_phylo_groupPeriod
b
)
>=
minBranch
)
$
thrToMeta
thr
$
depthToMeta
(
elevation
-
depth
)
branches'
--------------------------------------
quality'
::
Double
quality'
=
toPhyloQuality'
beta
frequency
((
map
fst
done
)
++
(
fst
ego'
)
++
(
snd
ego'
)
++
(
map
fst
rest
))
seaLevelMatching
::
Proximity
->
Double
->
Int
->
Map
Int
Double
->
Double
->
Double
->
Double
->
Double
->
Int
->
[
PhyloPeriodId
]
->
Map
Date
Double
->
Map
Date
Cooc
->
[([
PhyloGroup
],
Bool
)]
->
[([
PhyloGroup
],
Bool
)]
seaLevelMatching
proximity
beta
minBranch
frequency
thr
step
depth
elevation
frame
periods
docs
coocs
branches
=
-- | if there is no branch to break or if seaLvl level > 1 then end
if
(
thr
>=
1
)
||
((
not
.
or
)
$
map
snd
branches
)
then
branches
else
-- | break all the possible branches at the current seaLvl level
let
branches'
=
breakBranches
proximity
beta
frequency
minBranch
thr
depth
elevation
frame
docs
coocs
periods
[]
(
head'
"seaLevelMatching"
branches
)
(
tail'
"seaLevelMatching"
branches
)
frequency'
=
reduceFrequency
frequency
(
map
fst
branches'
)
in
seaLevelMatching
proximity
beta
minBranch
frequency'
(
thr
+
step
)
step
(
depth
-
1
)
elevation
frame
periods
docs
coocs
branches'
constanteTemporalMatching
::
Double
->
Double
->
Phylo
->
Phylo
constanteTemporalMatching
start
step
phylo
=
updatePhyloGroups
1
(
fromList
$
map
(
\
g
->
(
getGroupId
g
,
g
))
$
traceMatchEnd
$
concat
branches
)
phylo
where
-- | 2) process the temporal matching by elevating seaLvl level
branches
::
[[
PhyloGroup
]]
branches
=
map
fst
$
seaLevelMatching
(
phyloProximity
$
getConfig
phylo
)
(
_qua_granularity
$
phyloQuality
$
getConfig
phylo
)
(
_qua_minBranch
$
phyloQuality
$
getConfig
phylo
)
(
phylo
^.
phylo_termFreq
)
start
step
((((
1
-
start
)
/
step
)
-
1
))
(((
1
-
start
)
/
step
))
(
getTimeFrame
$
timeUnit
$
getConfig
phylo
)
(
getPeriodIds
phylo
)
(
phylo
^.
phylo_timeDocs
)
(
phylo
^.
phylo_timeCooc
)
groups
-- | 1) for each group process an initial temporal Matching
-- | here we suppose that all the groups of level 1 are part of the same big branch
groups
::
[([
PhyloGroup
],
Bool
)]
groups
=
map
(
\
b
->
(
b
,(
length
$
nub
$
map
_phylo_groupPeriod
b
)
>=
(
_qua_minBranch
$
phyloQuality
$
getConfig
phylo
)))
$
groupsToBranches
$
fromList
$
map
(
\
g
->
(
getGroupId
g
,
g
))
$
matchGroupsToGroups
(
getTimeFrame
$
timeUnit
$
getConfig
phylo
)
(
getPeriodIds
phylo
)
(
phyloProximity
$
getConfig
phylo
)
start
(
phylo
^.
phylo_timeDocs
)
(
phylo
^.
phylo_timeCooc
)
(
traceTemporalMatching
$
getGroupsFromLevel
1
phylo
)
--------------------------------------
-- | Adaptative Temporal Matching | --
--------------------------------------
thrToMeta
::
Double
->
[[
PhyloGroup
]]
->
[[
PhyloGroup
]]
thrToMeta
thr
branches
=
map
(
\
b
->
map
(
\
g
->
g
&
phylo_groupMeta
.~
(
adjust
(
\
lst
->
lst
++
[
thr
])
"seaLevels"
(
g
^.
phylo_groupMeta
)))
b
)
branches
depthToMeta
::
Double
->
[[
PhyloGroup
]]
->
[[
PhyloGroup
]]
depthToMeta
depth
branches
=
let
break
=
length
branches
>
1
in
map
(
\
b
->
map
(
\
g
->
if
break
then
g
&
phylo_groupMeta
.~
(
adjust
(
\
lst
->
lst
++
[
depth
])
"breaks"
(
g
^.
phylo_groupMeta
))
else
g
)
b
)
branches
reduceTupleMapByKeys
::
Eq
a
=>
[
a
]
->
Map
(
a
,
a
)
Double
->
Map
(
a
,
a
)
Double
reduceTupleMapByKeys
ks
m
=
filterWithKey
(
\
(
k
,
k'
)
_
->
(
elem
k
ks
)
&&
(
elem
k'
ks
))
m
getInTupleMap
::
Ord
a
=>
Map
(
a
,
a
)
Double
->
a
->
a
->
Double
getInTupleMap
m
k
k'
|
isJust
(
m
!?
(
k
,
k'
))
=
m
!
(
k
,
k'
)
|
isJust
(
m
!?
(
k'
,
k
))
=
m
!
(
k'
,
k
)
|
otherwise
=
0
toThreshold
::
Double
->
Map
(
PhyloGroupId
,
PhyloGroupId
)
Double
->
Double
toThreshold
lvl
proxiGroups
=
let
idx
=
((
Map
.
size
proxiGroups
)
`
div
`
(
floor
lvl
))
-
1
in
if
idx
>=
0
then
(
sort
$
elems
proxiGroups
)
!!
idx
else
1
-- done = all the allready broken branches
-- ego = the current branch we want to break
-- rest = the branches we still have to break
adaptativeBreakBranches
::
Proximity
->
Double
->
Double
->
Map
(
PhyloGroupId
,
PhyloGroupId
)
Double
->
Double
->
Map
Int
Double
->
Int
->
Int
->
Map
Date
Double
->
Map
Date
Cooc
->
[
PhyloPeriodId
]
->
[([
PhyloGroup
],(
Bool
,[
Double
]))]
->
([
PhyloGroup
],(
Bool
,[
Double
]))
->
[([
PhyloGroup
],(
Bool
,[
Double
]))]
->
[([
PhyloGroup
],(
Bool
,[
Double
]))]
adaptativeBreakBranches
proxiConf
depth
elevation
groupsProxi
beta
frequency
minBranch
frame
docs
coocs
periods
done
ego
rest
=
-- | 1) keep or not the new division of ego
let
done'
=
done
++
(
if
(
fst
.
snd
)
ego
then
(
if
((
null
(
fst
ego'
))
||
(
quality
>
quality'
))
then
[(
concat
$
thrToMeta
thr
$
[
fst
ego
],(
False
,
((
snd
.
snd
)
ego
)))]
else
(
(
map
(
\
e
->
(
e
,(
True
,
((
snd
.
snd
)
ego
)
++
[
thr
])))
(
fst
ego'
))
++
(
map
(
\
e
->
(
e
,(
False
,
((
snd
.
snd
)
ego
))))
(
snd
ego'
))))
else
[(
concat
$
thrToMeta
thr
$
[
fst
ego
],
snd
ego
)])
in
-- | uncomment let .. in for debugging
-- let part1 = partition (snd) done'
-- part2 = partition (snd) rest
-- in trace ( "[✓ " <> show(length $ fst part1) <> "(" <> show(length $ concat $ map (fst) $ fst part1) <> ")|✗ " <> show(length $ snd part1) <> "(" <> show(length $ concat $ map (fst) $ snd part1) <> ")] "
-- <> "[✓ " <> show(length $ fst part2) <> "(" <> show(length $ concat $ map (fst) $ fst part2) <> ")|✗ " <> show(length $ snd part2) <> "(" <> show(length $ concat $ map (fst) $ snd part2) <> ")]"
-- ) $
-- | 2) if there is no more branches in rest then return else continue
if
null
rest
then
done'
else
adaptativeBreakBranches
proxiConf
depth
elevation
groupsProxi
beta
frequency
minBranch
frame
docs
coocs
periods
done'
(
head'
"breakBranches"
rest
)
(
tail'
"breakBranches"
rest
)
where
where
-- | 2) for each of the possible next branches process the phyloQuality score
--------------------------------------
thr
::
Double
thr
=
toThreshold
depth
$
Map
.
filter
(
\
v
->
v
>
(
last'
"breakBranches"
$
(
snd
.
snd
)
ego
))
$
reduceTupleMapByKeys
(
map
getGroupId
$
fst
ego
)
groupsProxi
--------------------------------------
quality
::
Double
quality
=
toPhyloQuality'
beta
frequency
((
map
fst
done
)
++
[
fst
ego
]
++
(
map
fst
rest
))
--------------------------------------
ego'
::
([[
PhyloGroup
]],[[
PhyloGroup
]])
ego'
=
let
branches
=
groupsToBranches
$
fromList
$
map
(
\
g
->
(
getGroupId
g
,
g
))
$
matchGroupsToGroups
frame
periods
proxiConf
thr
docs
coocs
(
fst
ego
)
branches'
=
branches
`
using
`
parList
rdeepseq
in
partition
(
\
b
->
(
length
$
nub
$
map
_phylo_groupPeriod
b
)
>
minBranch
)
$
thrToMeta
thr
$
depthToMeta
(
elevation
-
depth
)
branches'
--------------------------------------
quality'
::
Double
quality'
::
Double
quality'
=
toPhyloQuality
beta
frequency
border
oldAcc
((
fst
branches'
)
++
(
snd
branches'
))
quality'
=
toPhyloQuality'
beta
frequency
-- | 1) for each local branch process a temporal matching then find the resulting branches
((
map
fst
done
)
++
(
fst
ego'
)
++
(
snd
ego'
)
++
(
map
fst
rest
))
branches'
::
([[
PhyloGroup
]],[[
PhyloGroup
]])
branches'
=
let
branches
=
groupsToBranches
$
fromList
$
map
(
\
g
->
(
getGroupId
g
,
g
))
adaptativeSeaLevelMatching
::
Proximity
->
Double
->
Double
->
Map
(
PhyloGroupId
,
PhyloGroupId
)
Double
$
phyloBranchMatching
frame
periods
proximity
egoThr
docs
groups
->
Double
->
Int
->
Map
Int
Double
in
partition
(
\
b
->
length
b
>=
minBranch
)
(
branches
`
using
`
parList
rdeepseq
)
->
Int
->
[
PhyloPeriodId
]
->
Map
Date
Double
->
Map
Date
Cooc
->
[([
PhyloGroup
],(
Bool
,[
Double
]))]
->
[([
PhyloGroup
],(
Bool
,[
Double
]))]
adaptativeSeaLevelMatching
proxiConf
depth
elevation
groupsProxi
beta
minBranch
frequency
frame
periods
docs
coocs
branches
=
temporalMatching
::
Phylo
->
Phylo
-- | if there is no branch to break or if seaLvl level >= depth then end
temporalMatching
phylo
=
updatePhyloGroups
1
branches'
phylo
if
(
Map
.
null
groupsProxi
)
||
(
depth
<=
0
)
||
((
not
.
or
)
$
map
(
fst
.
snd
)
branches
)
then
branches
else
-- | break all the possible branches at the current seaLvl level
let
branches'
=
adaptativeBreakBranches
proxiConf
depth
elevation
groupsProxi
beta
frequency
minBranch
frame
docs
coocs
periods
[]
(
head'
"seaLevelMatching"
branches
)
(
tail'
"seaLevelMatching"
branches
)
frequency'
=
reduceFrequency
frequency
(
map
fst
branches'
)
groupsProxi'
=
reduceTupleMapByKeys
(
map
(
getGroupId
)
$
concat
$
map
(
fst
)
$
filter
(
fst
.
snd
)
branches'
)
groupsProxi
-- thr = toThreshold depth groupsProxi
in
trace
(
"
\n
"
<>
foldl
(
\
acc
_
->
acc
<>
"🌊 "
)
""
[
0
..
(
elevation
-
depth
)]
<>
" [✓ "
<>
show
(
length
$
filter
(
fst
.
snd
)
branches'
)
<>
"("
<>
show
(
length
$
concat
$
map
(
fst
)
$
filter
(
fst
.
snd
)
branches'
)
<>
")|✗ "
<>
show
(
length
$
filter
(
not
.
fst
.
snd
)
branches'
)
<>
"("
<>
show
(
length
$
concat
$
map
(
fst
)
$
filter
(
not
.
fst
.
snd
)
branches'
)
<>
")]"
<>
" thr = "
)
$
adaptativeSeaLevelMatching
proxiConf
(
depth
-
1
)
elevation
groupsProxi'
beta
minBranch
frequency'
frame
periods
docs
coocs
branches'
adaptativeTemporalMatching
::
Double
->
Phylo
->
Phylo
adaptativeTemporalMatching
elevation
phylo
=
updatePhyloGroups
1
(
fromList
$
map
(
\
g
->
(
getGroupId
g
,
g
))
$
traceMatchEnd
$
concat
branches
)
phylo
where
where
-- | 5) apply the recursive matching
-- | 2) process the temporal matching by elevating seaLvl level
branches'
::
Map
PhyloGroupId
PhyloGroup
branches
::
[[
PhyloGroup
]]
branches'
=
branches
=
map
fst
let
next
=
trace
(
" ✓ F(β) = "
<>
show
(
quality
)
$
adaptativeSeaLevelMatching
(
phyloProximity
$
getConfig
phylo
)
<>
" |✓ "
<>
show
(
length
$
fst
branches
)
<>
show
(
map
length
$
fst
branches
)
(
elevation
-
1
)
<>
" |✗ "
<>
show
(
length
$
snd
branches
)
<>
"["
<>
show
(
length
$
concat
$
snd
branches
)
<>
"]"
)
elevation
$
map
(
\
branch
->
recursiveMatching
(
phyloProximity
$
getConfig
phylo
)
(
phylo
^.
phylo_groupsProxi
)
(
_qua_relevance
$
phyloQuality
$
getConfig
phylo
)
(
_qua_granularity
$
phyloQuality
$
getConfig
phylo
)
(
_qua_minBranch
$
phyloQuality
$
getConfig
phylo
)
(
_qua_minBranch
$
phyloQuality
$
getConfig
phylo
)
(
reduceFrequency
frequency
(
fst
branches
))
(
phylo
^.
phylo_termFreq
)
(
(
getThresholdInit
$
phyloProximity
$
getConfig
phylo
)
(
getTimeFrame
$
timeUnit
$
getConfig
phylo
)
+
(
getThresholdStep
$
phyloProximity
$
getConfig
phylo
))
(
getPeriodIds
phylo
)
(
getTimeFrame
$
timeUnit
$
getConfig
phylo
)
(
phylo
^.
phylo_timeDocs
)
(
getPeriodIds
phylo
)
(
phylo
^.
phylo_timeCooc
)
(
phylo
^.
phylo_timeDocs
)
quality
(
alterBorder
0
(
fst
branches
)
branch
)
groups
(
toBorderAccuracy
frequency
(
delete
branch
((
fst
branches
)
++
(
snd
branches
))))
branch
)
(
fst
branches
)
in
fromList
$
map
(
\
g
->
(
getGroupId
g
,
g
))
$
traceMatchEnd
(
concat
(
next
++
(
snd
branches
)))
-- | 4) process the quality score
quality
::
Double
quality
=
toPhyloQuality
(
_qua_relevance
$
phyloQuality
$
getConfig
phylo
)
frequency
0
0
((
fst
branches
)
++
(
snd
branches
))
-- | 3) process the constants of the quality score
frequency
::
Map
Int
Double
frequency
=
let
terms
=
ngramsInBranches
((
fst
branches
)
++
(
snd
branches
))
freqs
=
map
(
\
t
->
termFreq'
t
$
concat
((
fst
branches
)
++
(
snd
branches
)))
terms
in
fromList
$
map
(
\
(
t
,
freq
)
->
(
t
,
freq
/
(
sum
freqs
)))
$
zip
terms
freqs
-- | 2) group into branches
branches
::
([[
PhyloGroup
]],[[
PhyloGroup
]])
branches
=
partition
(
\
b
->
length
b
>=
(
_qua_minBranch
$
phyloQuality
$
getConfig
phylo
))
$
groupsToBranches
$
fromList
$
map
(
\
group
->
(
getGroupId
group
,
group
))
groups'
-- | 1) for each group process an initial temporal Matching
-- | 1) for each group process an initial temporal Matching
groups'
::
[
PhyloGroup
]
-- | here we suppose that all the groups of level 1 are part of the same big branch
groups'
=
phyloBranchMatching
(
getTimeFrame
$
timeUnit
$
getConfig
phylo
)
(
getPeriodIds
phylo
)
groups
::
[([
PhyloGroup
],(
Bool
,[
Double
]))]
(
phyloProximity
$
getConfig
phylo
)
(
getThresholdInit
$
phyloProximity
$
getConfig
phylo
)
groups
=
map
(
\
b
->
(
b
,((
length
$
nub
$
map
_phylo_groupPeriod
b
)
>=
(
_qua_minBranch
$
phyloQuality
$
getConfig
phylo
),[
thr
])))
(
phylo
^.
phylo_timeDocs
)
$
groupsToBranches
$
fromList
$
map
(
\
g
->
(
getGroupId
g
,
g
))
(
traceTemporalMatching
$
getGroupsFromLevel
1
phylo
)
$
matchGroupsToGroups
(
getTimeFrame
$
timeUnit
$
getConfig
phylo
)
(
getPeriodIds
phylo
)
(
phyloProximity
$
getConfig
phylo
)
thr
(
phylo
^.
phylo_timeDocs
)
(
phylo
^.
phylo_timeCooc
)
(
traceTemporalMatching
$
getGroupsFromLevel
1
phylo
)
--------------------------------------
thr
::
Double
thr
=
toThreshold
elevation
(
phylo
^.
phylo_groupsProxi
)
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment