Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
145
Issues
145
List
Board
Labels
Milestones
Merge Requests
6
Merge Requests
6
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
eeeb82c8
Commit
eeeb82c8
authored
Oct 22, 2019
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'dev-phylo' into dev-merge
parents
76e11752
077bf19a
Pipeline
#592
failed with stage
Changes
11
Pipelines
1
Show whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
1262 additions
and
215 deletions
+1262
-215
Main.hs
bin/gargantext-adaptative-phylo/Main.hs
+23
-18
package.yaml
package.yaml
+3
-0
AdaptativePhylo.hs
src/Gargantext/Viz/AdaptativePhylo.hs
+89
-9
Example.hs
src/Gargantext/Viz/Phylo/Example.hs
+2
-2
PhyloExample.hs
src/Gargantext/Viz/Phylo/PhyloExample.hs
+17
-2
PhyloExport.hs
src/Gargantext/Viz/Phylo/PhyloExport.hs
+457
-1
PhyloMaker.hs
src/Gargantext/Viz/Phylo/PhyloMaker.hs
+15
-16
PhyloTools.hs
src/Gargantext/Viz/Phylo/PhyloTools.hs
+208
-14
SynchronicClustering.hs
src/Gargantext/Viz/Phylo/SynchronicClustering.hs
+170
-12
TemporalMatching.hs
src/Gargantext/Viz/Phylo/TemporalMatching.hs
+277
-140
Tools.hs
src/Gargantext/Viz/Phylo/Tools.hs
+1
-1
No files found.
bin/gargantext-adaptative-phylo/Main.hs
View file @
eeeb82c8
...
...
@@ -26,7 +26,7 @@ import Data.ByteString.Lazy (ByteString)
import
Data.Maybe
(
isJust
,
fromJust
)
import
Data.List
(
concat
,
nub
,
isSuffixOf
,
take
)
import
Data.String
(
String
)
import
Data.Text
(
Text
,
unwords
)
import
Data.Text
(
Text
,
unwords
,
unpack
)
import
Gargantext.Prelude
import
Gargantext.Database.Types.Node
(
HyperdataDocument
(
..
))
...
...
@@ -37,6 +37,9 @@ import Gargantext.Text.List.CSV (csvGraphTermList)
import
Gargantext.Text.Terms.WithList
(
Patterns
,
buildPatterns
,
extractTermsWithList
)
import
Gargantext.Viz.AdaptativePhylo
import
Gargantext.Viz.Phylo.PhyloMaker
(
toPhylo
)
import
Gargantext.Viz.Phylo.PhyloTools
(
printIOMsg
,
printIOComment
)
import
Gargantext.Viz.Phylo.PhyloExport
(
toPhyloExport
,
dotToFile
)
-- import Gargantext.Viz.Phylo.SynchronicClustering (synchronicDistance')
import
GHC.IO
(
FilePath
)
import
Prelude
(
Either
(
..
))
...
...
@@ -54,21 +57,6 @@ import qualified Gargantext.Text.Corpus.Parsers.CSV as Csv
---------------
-- | To print an important message as an IO()
printIOMsg
::
String
->
IO
()
printIOMsg
msg
=
putStrLn
(
"
\n
"
<>
"------------"
<>
"
\n
"
<>
"-- | "
<>
msg
<>
"
\n
"
)
-- | To print a comment as an IO()
printIOComment
::
String
->
IO
()
printIOComment
cmt
=
putStrLn
(
"
\n
"
<>
cmt
<>
"
\n
"
)
-- | To get all the files in a directory or just a file
getFilesFromPath
::
FilePath
->
IO
([
FilePath
])
getFilesFromPath
path
=
do
...
...
@@ -166,6 +154,23 @@ main = do
printIOComment
(
show
(
length
corpus
)
<>
" parsed docs from the corpus"
)
printIOMsg
"Reconstruct the Phylo"
let
phylo
=
toPhylo
corpus
mapList
config
printIOMsg
"End of reconstruction"
\ No newline at end of file
-- | probes
-- writeFile ((outputPath config) <> (unpack $ phyloName config) <> "_synchronic_distance_cumu_jaccard.txt")
-- $ synchronicDistance' phylo 1
-- writeFile ((outputPath config) <> (unpack $ phyloName config) <> "_inflexion_points.txt")
-- $ inflexionPoints phylo 1
printIOMsg
"End of reconstruction, start the export"
let
dot
=
toPhyloExport
phylo
let
output
=
(
outputPath
config
)
<>
(
unpack
$
phyloName
config
)
<>
"_V2.dot"
dotToFile
output
dot
\ No newline at end of file
package.yaml
View file @
eeeb82c8
...
...
@@ -72,6 +72,9 @@ library:
-
Gargantext.Viz.AdaptativePhylo
-
Gargantext.Viz.Phylo.PhyloMaker
-
Gargantext.Viz.Phylo.Tools
-
Gargantext.Viz.Phylo.PhyloTools
-
Gargantext.Viz.Phylo.PhyloExport
-
Gargantext.Viz.Phylo.SynchronicClustering
-
Gargantext.Viz.Phylo.Example
-
Gargantext.Viz.Phylo.LevelMaker
-
Gargantext.Viz.Phylo.View.Export
...
...
src/Gargantext/Viz/AdaptativePhylo.hs
View file @
eeeb82c8
...
...
@@ -44,6 +44,8 @@ import GHC.IO (FilePath)
import
Control.DeepSeq
(
NFData
)
import
Control.Lens
(
makeLenses
)
import
qualified
Data.Text.Lazy
as
TextLazy
----------------
-- | Config | --
...
...
@@ -65,6 +67,15 @@ data Proximity =
deriving
(
Show
,
Generic
,
Eq
)
data
Synchrony
=
ByProximityThreshold
{
_bpt_threshold
::
Double
,
_bpt_sensibility
::
Double
}
|
ByProximityDistribution
{
_bpd_sensibility
::
Double
}
deriving
(
Show
,
Generic
,
Eq
)
data
TimeUnit
=
Year
{
_year_period
::
Int
...
...
@@ -80,6 +91,12 @@ data ContextualUnit =
deriving
(
Show
,
Generic
,
Eq
)
data
Quality
=
Quality
{
_qua_relevance
::
Double
,
_qua_minBranch
::
Int
}
deriving
(
Show
,
Generic
,
Eq
)
data
Config
=
Config
{
corpusPath
::
FilePath
,
listPath
::
FilePath
...
...
@@ -88,9 +105,13 @@ data Config =
,
phyloName
::
Text
,
phyloLevel
::
Int
,
phyloProximity
::
Proximity
,
phyloSynchrony
::
Synchrony
,
phyloQuality
::
Quality
,
timeUnit
::
TimeUnit
,
contextualUnit
::
ContextualUnit
,
branchSize
::
Int
,
exportLabel
::
[
PhyloLabel
]
,
exportSort
::
Sort
,
exportFilter
::
[
Filter
]
}
deriving
(
Show
,
Generic
,
Eq
)
...
...
@@ -102,10 +123,14 @@ defaultConfig =
,
corpusParser
=
Csv
1000
,
phyloName
=
pack
"Default Phylo"
,
phyloLevel
=
2
,
phyloProximity
=
WeightedLogJaccard
10
0
0.05
,
phyloProximity
=
WeightedLogJaccard
10
0
0.1
,
phyloSynchrony
=
ByProximityDistribution
0
,
phyloQuality
=
Quality
0.1
1
,
timeUnit
=
Year
3
1
5
,
contextualUnit
=
Fis
2
4
,
branchSize
=
3
,
contextualUnit
=
Fis
1
5
,
exportLabel
=
[
BranchLabel
MostInclusive
2
,
GroupLabel
MostEmergentInclusive
2
]
,
exportSort
=
ByHierarchy
,
exportFilter
=
[
ByBranchSize
2
]
}
instance
FromJSON
Config
...
...
@@ -118,6 +143,20 @@ instance FromJSON TimeUnit
instance
ToJSON
TimeUnit
instance
FromJSON
ContextualUnit
instance
ToJSON
ContextualUnit
instance
FromJSON
PhyloLabel
instance
ToJSON
PhyloLabel
instance
FromJSON
Tagger
instance
ToJSON
Tagger
instance
FromJSON
Sort
instance
ToJSON
Sort
instance
FromJSON
Order
instance
ToJSON
Order
instance
FromJSON
Filter
instance
ToJSON
Filter
instance
FromJSON
Synchrony
instance
ToJSON
Synchrony
instance
FromJSON
Quality
instance
ToJSON
Quality
-- | Software parameters
...
...
@@ -248,17 +287,18 @@ data PhyloGroup =
PhyloGroup
{
_phylo_groupPeriod
::
(
Date
,
Date
)
,
_phylo_groupLevel
::
Level
,
_phylo_groupIndex
::
Int
,
_phylo_groupLabel
::
Text
,
_phylo_groupSupport
::
Support
,
_phylo_groupNgrams
::
[
Int
]
,
_phylo_groupCooc
::
!
(
Cooc
)
,
_phylo_groupBranchId
::
PhyloBranchId
,
_phylo_groupMeta
::
Map
Text
[
Double
]
,
_phylo_groupLevelParents
::
[
Pointer
]
,
_phylo_groupLevelChilds
::
[
Pointer
]
,
_phylo_groupPeriodParents
::
[
Pointer
]
,
_phylo_groupPeriodChilds
::
[
Pointer
]
,
_phylo_groupGhostPointers
::
[
Pointer
]
}
deriving
(
Generic
,
Show
,
Eq
)
deriving
(
Generic
,
Show
,
Eq
,
NFData
)
-- | Weight : A generic mesure that can be associated with an Id
type
Weight
=
Double
...
...
@@ -266,8 +306,6 @@ type Weight = Double
-- | Pointer : A weighted pointer to a given PhyloGroup
type
Pointer
=
(
PhyloGroupId
,
Weight
)
type
Link
=
((
PhyloGroupId
,
PhyloGroupId
),
Weight
)
data
Filiation
=
ToParents
|
ToChilds
deriving
(
Generic
,
Show
)
data
PointerType
=
TemporalPointer
|
LevelPointer
deriving
(
Generic
,
Show
)
...
...
@@ -290,13 +328,53 @@ data PhyloFis = PhyloFis
}
deriving
(
Generic
,
NFData
,
Show
,
Eq
)
----------------
-- | Export | --
----------------
type
DotId
=
TextLazy
.
Text
data
EdgeType
=
GroupToGroup
|
BranchToGroup
|
BranchToBranch
|
PeriodToPeriod
deriving
(
Show
,
Generic
,
Eq
)
data
Filter
=
ByBranchSize
{
_branch_size
::
Double
}
deriving
(
Show
,
Generic
,
Eq
)
data
Order
=
Asc
|
Desc
deriving
(
Show
,
Generic
,
Eq
)
data
Sort
=
ByBirthDate
{
_sort_order
::
Order
}
|
ByHierarchy
deriving
(
Show
,
Generic
,
Eq
)
data
Tagger
=
MostInclusive
|
MostEmergentInclusive
deriving
(
Show
,
Generic
,
Eq
)
data
PhyloLabel
=
BranchLabel
{
_branch_labelTagger
::
Tagger
,
_branch_labelSize
::
Int
}
|
GroupLabel
{
_group_labelTagger
::
Tagger
,
_group_labelSize
::
Int
}
deriving
(
Show
,
Generic
,
Eq
)
data
PhyloBranch
=
PhyloBranch
{
_branch_id
::
PhyloBranchId
,
_branch_label
::
Text
,
_branch_meta
::
Map
Text
[
Double
]
}
deriving
(
Generic
,
Show
)
data
PhyloExport
=
PhyloExport
{
_export_groups
::
[
PhyloGroup
]
,
_export_branches
::
[
PhyloBranch
]
}
deriving
(
Generic
,
Show
)
----------------
-- | Lenses | --
----------------
makeLenses
''
C
onfig
makeLenses
''
P
roximity
makeLenses
''
Q
uality
makeLenses
''
C
ontextualUnit
makeLenses
''
P
hyloLabel
makeLenses
''
T
imeUnit
makeLenses
''
P
hyloFoundations
makeLenses
''
P
hyloFis
...
...
@@ -305,6 +383,8 @@ makeLenses ''PhyloPeriod
makeLenses
''
P
hyloLevel
makeLenses
''
P
hyloGroup
makeLenses
''
P
hyloParam
makeLenses
''
P
hyloExport
makeLenses
''
P
hyloBranch
------------------------
-- | JSON instances | --
...
...
src/Gargantext/Viz/Phylo/Example.hs
View file @
eeeb82c8
...
...
@@ -83,7 +83,7 @@ queryViewEx = "level=3"
phyloQueryView
::
PhyloQueryView
phyloQueryView
=
PhyloQueryView
2
Merge
False
2
[
BranchAge
,
BranchBirth
,
BranchGroups
]
[]
[
BranchPeakInc
,
GroupLabelIncDyn
]
(
Just
(
ByBranchBirth
,
Asc
))
Json
Flat
True
phyloQueryView
=
PhyloQueryView
1
Merge
False
1
[
BranchAge
,
BranchBirth
,
BranchGroups
]
[]
[
BranchPeakInc
,
GroupLabelIncDyn
]
(
Just
(
ByBranchBirth
,
Asc
))
Json
Flat
True
--------------------------------------------------
...
...
@@ -110,7 +110,7 @@ queryEx = "title=Cesar et Cleôpatre"
phyloQueryBuild
::
PhyloQueryBuild
phyloQueryBuild
=
PhyloQueryBuild
"Cesar et Cleôpatre"
"An example of Phylomemy (french without accent)"
3
1
defaultFis
[]
[]
(
WeightedLogJaccard
$
WLJParams
0.
5
20
)
5
0.8
0.5
4
2
(
RelatedComponents
$
RCParams
$
WeightedLogJaccard
$
WLJParams
0.3
0
)
3
1
defaultFis
[]
[]
(
WeightedLogJaccard
$
WLJParams
0.
9
10
)
5
0.8
0.5
4
1
(
RelatedComponents
$
RCParams
$
WeightedLogJaccard
$
WLJParams
0.3
0
)
...
...
src/Gargantext/Viz/Phylo/PhyloExample.hs
View file @
eeeb82c8
...
...
@@ -29,18 +29,32 @@ import Gargantext.Text.Terms.Mono (monoTexts)
import
Gargantext.Viz.AdaptativePhylo
import
Gargantext.Viz.Phylo.PhyloTools
import
Gargantext.Viz.Phylo.PhyloMaker
import
Gargantext.Viz.Phylo.PhyloExport
import
Gargantext.Viz.Phylo.TemporalMatching
(
temporalMatching
)
import
Gargantext.Viz.Phylo.SynchronicClustering
(
synchronicClustering
)
import
Control.Lens
import
Data.GraphViz.Types.Generalised
(
DotGraph
)
import
qualified
Data.Vector
as
Vector
phyloExport
::
IO
()
phyloExport
=
dotToFile
"/home/qlobbe/data/phylo/output/cesar_cleopatre_V2.dot"
phyloDot
phyloDot
::
DotGraph
DotId
phyloDot
=
toPhyloExport
phylo2
phylo2
::
Phylo
phylo2
=
synchronicClustering
phylo1
-----------------------------------------------
-- | STEP 3 | -- Build the Level 1 of the Phylo
-----------------------------------------------
phylo1
::
Phylo
phylo1
=
appendGroups
fisToGroup
1
phyloFis
phyloBase
phylo1
=
temporalMatching
$
appendGroups
fisToGroup
1
phyloFis
phyloBase
---------------------------------------------
...
...
@@ -80,7 +94,8 @@ nbDocsByYear = docsToTimeScaleNb docs
config
::
Config
config
=
defaultConfig
{
phyloName
=
"Cesar et Cleopatre"
,
branchSize
=
0
,
phyloLevel
=
2
,
exportFilter
=
[
ByBranchSize
0
]
,
contextualUnit
=
Fis
0
0
}
...
...
src/Gargantext/Viz/Phylo/PhyloExport.hs
View file @
eeeb82c8
...
...
@@ -12,5 +12,461 @@ Portability : POSIX
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE TypeSynonymInstances #-}
{-# LANGUAGE FlexibleInstances #-}
module
Gargantext.Viz.Phylo.PhyloExport
where
import
Data.Map
(
Map
,
fromList
,
empty
,
fromListWith
,
insert
,
(
!
),
elems
,
unionWith
,
findWithDefault
,
toList
)
import
Data.List
((
++
),
sort
,
nub
,
concat
,
sortOn
,
reverse
,
groupBy
,
union
,
(
\\
),
(
!!
),
init
,
partition
,
unwords
,
nubBy
)
import
Data.Vector
(
Vector
)
import
Prelude
(
writeFile
)
import
Gargantext.Prelude
import
Gargantext.Viz.AdaptativePhylo
import
Gargantext.Viz.Phylo.PhyloTools
import
Control.Lens
import
Data.GraphViz
hiding
(
DotGraph
,
Order
)
import
Data.GraphViz.Types.Generalised
(
DotGraph
)
import
Data.GraphViz.Attributes.Complete
hiding
(
EdgeType
,
Order
)
import
Data.GraphViz.Types.Monadic
import
Data.Text.Lazy
(
fromStrict
,
pack
,
unpack
)
import
System.FilePath
import
Debug.Trace
(
trace
)
import
qualified
Data.Text
as
Text
import
qualified
Data.Text.Lazy
as
Lazy
import
qualified
Data.GraphViz.Attributes.HTML
as
H
--------------------
-- | Dot export | --
--------------------
dotToFile
::
FilePath
->
DotGraph
DotId
->
IO
()
dotToFile
filePath
dotG
=
writeFile
filePath
$
dotToString
dotG
dotToString
::
DotGraph
DotId
->
[
Char
]
dotToString
dotG
=
unpack
(
printDotGraph
dotG
)
dynamicToColor
::
Double
->
H
.
Attribute
dynamicToColor
d
|
d
==
0
=
H
.
BGColor
(
toColor
LightCoral
)
|
d
==
1
=
H
.
BGColor
(
toColor
Khaki
)
|
d
==
2
=
H
.
BGColor
(
toColor
SkyBlue
)
|
otherwise
=
H
.
Color
(
toColor
Black
)
pickLabelColor
::
[
Double
]
->
H
.
Attribute
pickLabelColor
lst
|
elem
0
lst
=
dynamicToColor
0
|
elem
2
lst
=
dynamicToColor
2
|
elem
1
lst
=
dynamicToColor
1
|
otherwise
=
dynamicToColor
3
toDotLabel
::
Text
.
Text
->
Label
toDotLabel
lbl
=
StrLabel
$
fromStrict
lbl
toAttr
::
AttributeName
->
Lazy
.
Text
->
CustomAttribute
toAttr
k
v
=
customAttribute
k
v
metaToAttr
::
Map
Text
.
Text
[
Double
]
->
[
CustomAttribute
]
metaToAttr
meta
=
map
(
\
(
k
,
v
)
->
toAttr
(
fromStrict
k
)
$
(
pack
.
unwords
)
$
map
show
v
)
$
toList
meta
groupIdToDotId
::
PhyloGroupId
->
DotId
groupIdToDotId
(((
d
,
d'
),
lvl
),
idx
)
=
(
fromStrict
.
Text
.
pack
)
$
(
"group"
<>
(
show
d
)
<>
(
show
d'
)
<>
(
show
lvl
)
<>
(
show
idx
))
branchIdToDotId
::
PhyloBranchId
->
DotId
branchIdToDotId
bId
=
(
fromStrict
.
Text
.
pack
)
$
(
"branch"
<>
show
(
snd
bId
))
periodIdToDotId
::
PhyloPeriodId
->
DotId
periodIdToDotId
prd
=
(
fromStrict
.
Text
.
pack
)
$
(
"period"
<>
show
(
fst
prd
)
<>
show
(
snd
prd
))
groupToTable
::
Vector
Ngrams
->
PhyloGroup
->
H
.
Label
groupToTable
fdt
g
=
H
.
Table
H
.
HTable
{
H
.
tableFontAttrs
=
Just
[
H
.
PointSize
14
,
H
.
Align
H
.
HLeft
]
,
H
.
tableAttrs
=
[
H
.
Border
0
,
H
.
CellBorder
0
,
H
.
BGColor
(
toColor
White
)]
,
H
.
tableRows
=
[
header
]
<>
[
H
.
Cells
[
H
.
LabelCell
[
H
.
Height
10
]
$
H
.
Text
[
H
.
Str
$
fromStrict
""
]]]
<>
(
map
ngramsToRow
$
splitEvery
4
$
reverse
$
sortOn
(
snd
.
snd
)
$
zip
(
ngramsToText
fdt
(
g
^.
phylo_groupNgrams
))
$
zip
((
g
^.
phylo_groupMeta
)
!
"dynamics"
)
((
g
^.
phylo_groupMeta
)
!
"inclusion"
))}
where
--------------------------------------
ngramsToRow
::
[(
Ngrams
,(
Double
,
Double
))]
->
H
.
Row
ngramsToRow
ns
=
H
.
Cells
$
map
(
\
(
n
,(
d
,
_
))
->
H
.
LabelCell
[
H
.
Align
H
.
HLeft
,
dynamicToColor
d
]
$
H
.
Text
[
H
.
Str
$
fromStrict
n
])
ns
--------------------------------------
header
::
H
.
Row
header
=
H
.
Cells
[
H
.
LabelCell
[
pickLabelColor
((
g
^.
phylo_groupMeta
)
!
"dynamics"
)]
$
H
.
Text
[
H
.
Str
$
(((
fromStrict
.
Text
.
toUpper
)
$
g
^.
phylo_groupLabel
)
<>
(
fromStrict
" ( "
)
<>
(
pack
$
show
(
fst
$
g
^.
phylo_groupPeriod
))
<>
(
fromStrict
" , "
)
<>
(
pack
$
show
(
snd
$
g
^.
phylo_groupPeriod
))
<>
(
fromStrict
" ) "
))]]
--------------------------------------
branchToDotNode
::
PhyloBranch
->
Dot
DotId
branchToDotNode
b
=
node
(
branchIdToDotId
$
b
^.
branch_id
)
([
FillColor
[
toWColor
CornSilk
],
FontName
"Arial"
,
FontSize
40
,
Shape
Egg
,
Style
[
SItem
Bold
[]
],
Label
(
toDotLabel
$
b
^.
branch_label
)]
<>
(
metaToAttr
$
b
^.
branch_meta
)
<>
[
toAttr
"nodeType"
"branch"
,
toAttr
"branchId"
(
pack
$
unwords
(
map
show
$
snd
$
b
^.
branch_id
))
])
periodToDotNode
::
(
Date
,
Date
)
->
Dot
DotId
periodToDotNode
prd
=
node
(
periodIdToDotId
prd
)
([
Shape
Square
,
FontSize
50
,
Label
(
toDotLabel
$
Text
.
pack
(
show
(
fst
prd
)
<>
" "
<>
show
(
snd
prd
)))]
<>
[
toAttr
"nodeType"
"period"
,
toAttr
"from"
(
fromStrict
$
Text
.
pack
$
(
show
$
fst
prd
))
,
toAttr
"to"
(
fromStrict
$
Text
.
pack
$
(
show
$
snd
prd
))])
groupToDotNode
::
Vector
Ngrams
->
PhyloGroup
->
Dot
DotId
groupToDotNode
fdt
g
=
node
(
groupIdToDotId
$
getGroupId
g
)
([
FontName
"Arial"
,
Shape
Square
,
toLabel
(
groupToTable
fdt
g
)]
<>
[
toAttr
"nodeType"
"group"
,
toAttr
"from"
(
pack
$
show
(
fst
$
g
^.
phylo_groupPeriod
))
,
toAttr
"to"
(
pack
$
show
(
snd
$
g
^.
phylo_groupPeriod
))
,
toAttr
"branchId"
(
pack
$
unwords
(
init
$
map
show
$
snd
$
g
^.
phylo_groupBranchId
))
,
toAttr
"support"
(
pack
$
show
(
g
^.
phylo_groupSupport
))])
toDotEdge
::
DotId
->
DotId
->
Text
.
Text
->
EdgeType
->
Dot
DotId
toDotEdge
source
target
lbl
edgeType
=
edge
source
target
(
case
edgeType
of
GroupToGroup
->
[
Width
10
,
Color
[
toWColor
Black
],
Constraint
True
,
Label
(
StrLabel
$
fromStrict
lbl
)]
BranchToGroup
->
[
Width
3
,
Color
[
toWColor
Black
],
ArrowHead
(
AType
[(
ArrMod
FilledArrow
RightSide
,
DotArrow
)])
,
Label
(
StrLabel
$
fromStrict
lbl
)]
BranchToBranch
->
[
Width
2
,
Color
[
toWColor
Black
],
Style
[
SItem
Dashed
[]
],
ArrowHead
(
AType
[(
ArrMod
FilledArrow
BothSides
,
DotArrow
)])
,
Label
(
StrLabel
$
fromStrict
lbl
)]
PeriodToPeriod
->
[
Width
5
,
Color
[
toWColor
Black
]])
mergePointers
::
[
PhyloGroup
]
->
Map
(
PhyloGroupId
,
PhyloGroupId
)
Double
mergePointers
groups
=
let
toChilds
=
fromList
$
concat
$
map
(
\
g
->
map
(
\
(
target
,
w
)
->
((
getGroupId
g
,
target
),
w
))
$
g
^.
phylo_groupPeriodChilds
)
groups
toParents
=
fromList
$
concat
$
map
(
\
g
->
map
(
\
(
target
,
w
)
->
((
target
,
getGroupId
g
),
w
))
$
g
^.
phylo_groupPeriodParents
)
groups
in
unionWith
(
\
w
w'
->
max
w
w'
)
toChilds
toParents
exportToDot
::
Phylo
->
PhyloExport
->
DotGraph
DotId
exportToDot
phylo
export
=
trace
(
"
\n
-- | Convert "
<>
show
(
length
$
export
^.
export_branches
)
<>
" branches and "
<>
show
(
length
$
export
^.
export_groups
)
<>
" groups to a dot file
\n
"
)
$
digraph
((
Str
.
fromStrict
)
$
(
phyloName
$
getConfig
phylo
))
$
do
-- | 1) init the dot graph
graphAttrs
(
[
Label
(
toDotLabel
$
(
phyloName
$
getConfig
phylo
))]
<>
[
FontSize
30
,
LabelLoc
VTop
,
NodeSep
1
,
RankSep
[
1
],
Rank
SameRank
,
Splines
SplineEdges
,
Overlap
ScaleOverlaps
,
Ratio
FillRatio
,
Style
[
SItem
Filled
[]
],
Color
[
toWColor
White
]]
-- | home made attributes
<>
[(
toAttr
(
fromStrict
"nbDocs"
)
$
pack
$
show
(
sum
$
elems
$
phylo
^.
phylo_timeDocs
))
,(
toAttr
(
fromStrict
"proxiName"
)
$
pack
$
show
(
getProximityName
$
phyloProximity
$
getConfig
phylo
))
,(
toAttr
(
fromStrict
"proxiInit"
)
$
pack
$
show
(
getProximityInit
$
phyloProximity
$
getConfig
phylo
))
,(
toAttr
(
fromStrict
"proxiStep"
)
$
pack
$
show
(
getProximityStep
$
phyloProximity
$
getConfig
phylo
))
,(
toAttr
(
fromStrict
"quaFactor"
)
$
pack
$
show
(
_qua_relevance
$
phyloQuality
$
getConfig
phylo
))
])
-- toAttr (fromStrict k) $ (pack . unwords) $ map show v
-- | 2) create a layer for the branches labels
subgraph
(
Str
"Branches peaks"
)
$
do
graphAttrs
[
Rank
SameRank
]
-- | 3) group the branches by hierarchy
-- mapM (\branches ->
-- subgraph (Str "Branches clade") $ do
-- graphAttrs [Rank SameRank]
-- -- | 4) create a node for each branch
-- mapM branchToDotNode branches
-- ) $ elems $ fromListWith (++) $ map (\b -> ((init . snd) $ b ^. branch_id,[b])) $ export ^. export_branches
mapM
branchToDotNode
$
export
^.
export_branches
-- | 5) create a layer for each period
_
<-
mapM
(
\
period
->
subgraph
((
Str
.
fromStrict
.
Text
.
pack
)
$
(
"Period"
<>
show
(
fst
period
)
<>
show
(
snd
period
)))
$
do
graphAttrs
[
Rank
SameRank
]
periodToDotNode
period
-- | 6) create a node for each group
mapM
(
\
g
->
groupToDotNode
(
getRoots
phylo
)
g
)
(
filter
(
\
g
->
g
^.
phylo_groupPeriod
==
period
)
$
export
^.
export_groups
)
)
$
getPeriodIds
phylo
-- | 7) create the edges between a branch and its first groups
_
<-
mapM
(
\
(
bId
,
groups
)
->
mapM
(
\
g
->
toDotEdge
(
branchIdToDotId
bId
)
(
groupIdToDotId
$
getGroupId
g
)
""
BranchToGroup
)
groups
)
$
toList
$
map
(
\
groups
->
head'
"toDot"
$
groupBy
(
\
g
g'
->
g'
^.
phylo_groupPeriod
==
g
^.
phylo_groupPeriod
)
$
sortOn
(
fst
.
_phylo_groupPeriod
)
groups
)
$
fromListWith
(
++
)
$
map
(
\
g
->
(
g
^.
phylo_groupBranchId
,[
g
]))
$
export
^.
export_groups
-- | 8) create the edges between the groups
_
<-
mapM
(
\
((
k
,
k'
),
_
)
->
toDotEdge
(
groupIdToDotId
k
)
(
groupIdToDotId
k'
)
""
GroupToGroup
)
$
(
toList
.
mergePointers
)
$
export
^.
export_groups
-- | 7) create the edges between the periods
_
<-
mapM
(
\
(
prd
,
prd'
)
->
toDotEdge
(
periodIdToDotId
prd
)
(
periodIdToDotId
prd'
)
""
PeriodToPeriod
)
$
nubBy
(
\
combi
combi'
->
fst
combi
==
fst
combi'
)
$
listToCombi'
$
getPeriodIds
phylo
-- | 8) create the edges between the branches
_
<-
mapM
(
\
(
bId
,
bId'
)
->
toDotEdge
(
branchIdToDotId
bId
)
(
branchIdToDotId
bId'
)
(
Text
.
pack
$
show
(
branchIdsToProximity
bId
bId'
(
getThresholdInit
$
phyloProximity
$
getConfig
phylo
)
(
getThresholdStep
$
phyloProximity
$
getConfig
phylo
)))
BranchToBranch
)
$
nubBy
(
\
combi
combi'
->
fst
combi
==
fst
combi'
)
$
listToCombi'
$
map
_branch_id
$
export
^.
export_branches
graphAttrs
[
Rank
SameRank
]
----------------
-- | Filter | --
----------------
filterByBranchSize
::
Double
->
PhyloExport
->
PhyloExport
filterByBranchSize
thr
export
=
let
branches'
=
partition
(
\
b
->
head'
"filter"
((
b
^.
branch_meta
)
!
"size"
)
>=
thr
)
$
export
^.
export_branches
in
export
&
export_branches
.~
(
fst
branches'
)
&
export_groups
%~
(
filter
(
\
g
->
not
$
elem
(
g
^.
phylo_groupBranchId
)
(
map
_branch_id
$
snd
branches'
)))
processFilters
::
[
Filter
]
->
Quality
->
PhyloExport
->
PhyloExport
processFilters
filters
qua
export
=
foldl
(
\
export'
f
->
case
f
of
ByBranchSize
thr
->
if
(
thr
<
(
fromIntegral
$
qua
^.
qua_minBranch
))
then
filterByBranchSize
(
fromIntegral
$
qua
^.
qua_minBranch
)
export'
else
filterByBranchSize
thr
export'
)
export
filters
--------------
-- | Sort | --
--------------
sortByHierarchy
::
Int
->
[
PhyloBranch
]
->
[
PhyloBranch
]
sortByHierarchy
depth
branches
=
if
(
length
branches
==
1
)
then
branches
else
concat
$
map
(
\
branches'
->
let
partitions
=
partition
(
\
b
->
depth
+
1
==
((
length
.
snd
)
$
b
^.
branch_id
))
branches'
in
(
sortOn
(
\
b
->
(
b
^.
branch_meta
)
!
"birth"
)
(
fst
partitions
))
++
(
sortByHierarchy
(
depth
+
1
)
(
snd
partitions
)))
$
groupBy
(
\
b
b'
->
((
take
depth
.
snd
)
$
b
^.
branch_id
)
==
((
take
depth
.
snd
)
$
b'
^.
branch_id
)
)
$
sortOn
(
\
b
->
(
take
depth
.
snd
)
$
b
^.
branch_id
)
branches
sortByBirthDate
::
Order
->
PhyloExport
->
PhyloExport
sortByBirthDate
order
export
=
let
branches
=
sortOn
(
\
b
->
(
b
^.
branch_meta
)
!
"birth"
)
$
export
^.
export_branches
branches'
=
case
order
of
Asc
->
branches
Desc
->
reverse
branches
in
export
&
export_branches
.~
branches'
processSort
::
Sort
->
PhyloExport
->
PhyloExport
processSort
sort'
export
=
case
sort'
of
ByBirthDate
o
->
sortByBirthDate
o
export
ByHierarchy
->
export
&
export_branches
.~
sortByHierarchy
0
(
export
^.
export_branches
)
-----------------
-- | Metrics | --
-----------------
-- | Return the conditional probability of i knowing j
conditional
::
Ord
a
=>
Map
(
a
,
a
)
Double
->
a
->
a
->
Double
conditional
m
i
j
=
(
findWithDefault
0
(
i
,
j
)
m
)
/
(
m
!
(
j
,
j
))
-- | Return the genericity score of a given ngram
genericity
::
Map
(
Int
,
Int
)
Double
->
[
Int
]
->
Int
->
Double
genericity
m
l
i
=
(
(
sum
$
map
(
\
j
->
conditional
m
i
j
)
l
)
-
(
sum
$
map
(
\
j
->
conditional
m
j
i
)
l
))
/
(
fromIntegral
$
(
length
l
)
+
1
)
-- | Return the specificity score of a given ngram
specificity
::
Map
(
Int
,
Int
)
Double
->
[
Int
]
->
Int
->
Double
specificity
m
l
i
=
(
(
sum
$
map
(
\
j
->
conditional
m
j
i
)
l
)
-
(
sum
$
map
(
\
j
->
conditional
m
i
j
)
l
))
/
(
fromIntegral
$
(
length
l
)
+
1
)
-- | Return the inclusion score of a given ngram
inclusion
::
Map
(
Int
,
Int
)
Double
->
[
Int
]
->
Int
->
Double
inclusion
m
l
i
=
(
(
sum
$
map
(
\
j
->
conditional
m
j
i
)
l
)
+
(
sum
$
map
(
\
j
->
conditional
m
i
j
)
l
))
/
(
fromIntegral
$
(
length
l
)
+
1
)
ngramsMetrics
::
PhyloExport
->
PhyloExport
ngramsMetrics
export
=
over
(
export_groups
.
traverse
)
(
\
g
->
g
&
phylo_groupMeta
%~
insert
"genericity"
(
map
(
\
n
->
genericity
(
g
^.
phylo_groupCooc
)
((
g
^.
phylo_groupNgrams
)
\\
[
n
])
n
)
$
g
^.
phylo_groupNgrams
)
&
phylo_groupMeta
%~
insert
"specificity"
(
map
(
\
n
->
specificity
(
g
^.
phylo_groupCooc
)
((
g
^.
phylo_groupNgrams
)
\\
[
n
])
n
)
$
g
^.
phylo_groupNgrams
)
&
phylo_groupMeta
%~
insert
"inclusion"
(
map
(
\
n
->
inclusion
(
g
^.
phylo_groupCooc
)
((
g
^.
phylo_groupNgrams
)
\\
[
n
])
n
)
$
g
^.
phylo_groupNgrams
)
)
export
branchDating
::
PhyloExport
->
PhyloExport
branchDating
export
=
over
(
export_branches
.
traverse
)
(
\
b
->
let
groups
=
sortOn
fst
$
foldl'
(
\
acc
g
->
if
(
g
^.
phylo_groupBranchId
==
b
^.
branch_id
)
then
acc
++
[
g
^.
phylo_groupPeriod
]
else
acc
)
[]
$
export
^.
export_groups
birth
=
fst
$
head'
"birth"
groups
age
=
(
snd
$
last'
"age"
groups
)
-
birth
in
b
&
branch_meta
%~
insert
"birth"
[
fromIntegral
birth
]
&
branch_meta
%~
insert
"age"
[
fromIntegral
age
]
&
branch_meta
%~
insert
"size"
[
fromIntegral
$
length
groups
]
)
export
processMetrics
::
PhyloExport
->
PhyloExport
processMetrics
export
=
ngramsMetrics
$
branchDating
export
-----------------
-- | Taggers | --
-----------------
getNthMostMeta
::
Int
->
[
Double
]
->
[
Int
]
->
[
Int
]
getNthMostMeta
nth
meta
ns
=
map
(
\
(
idx
,
_
)
->
(
ns
!!
idx
))
$
take
nth
$
reverse
$
sortOn
snd
$
zip
[
0
..
]
meta
mostInclusive
::
Int
->
Vector
Ngrams
->
PhyloExport
->
PhyloExport
mostInclusive
nth
foundations
export
=
over
(
export_branches
.
traverse
)
(
\
b
->
let
groups
=
filter
(
\
g
->
g
^.
phylo_groupBranchId
==
b
^.
branch_id
)
$
export
^.
export_groups
cooc
=
foldl
(
\
acc
g
->
unionWith
(
+
)
acc
(
g
^.
phylo_groupCooc
))
empty
groups
ngrams
=
sort
$
foldl
(
\
acc
g
->
union
acc
(
g
^.
phylo_groupNgrams
))
[]
groups
inc
=
map
(
\
n
->
inclusion
cooc
(
ngrams
\\
[
n
])
n
)
ngrams
lbl
=
ngramsToLabel
foundations
$
getNthMostMeta
nth
inc
ngrams
in
b
&
branch_label
.~
lbl
)
export
mostEmergentInclusive
::
Int
->
Vector
Ngrams
->
PhyloExport
->
PhyloExport
mostEmergentInclusive
nth
foundations
export
=
over
(
export_groups
.
traverse
)
(
\
g
->
let
lbl
=
ngramsToLabel
foundations
$
take
nth
$
map
(
\
(
_
,(
_
,
idx
))
->
idx
)
$
concat
$
map
(
\
groups
->
sortOn
(
fst
.
snd
)
groups
)
$
groupBy
((
==
)
`
on
`
fst
)
$
reverse
$
sortOn
fst
$
zip
((
g
^.
phylo_groupMeta
)
!
"inclusion"
)
$
zip
((
g
^.
phylo_groupMeta
)
!
"dynamics"
)
(
g
^.
phylo_groupNgrams
)
in
g
&
phylo_groupLabel
.~
lbl
)
export
processLabels
::
[
PhyloLabel
]
->
Vector
Ngrams
->
PhyloExport
->
PhyloExport
processLabels
labels
foundations
export
=
foldl
(
\
export'
label
->
case
label
of
GroupLabel
tagger
nth
->
case
tagger
of
MostEmergentInclusive
->
mostEmergentInclusive
nth
foundations
export'
_
->
panic
"[ERR][Viz.Phylo.PhyloExport] unknown tagger"
BranchLabel
tagger
nth
->
case
tagger
of
MostInclusive
->
mostInclusive
nth
foundations
export'
_
->
panic
"[ERR][Viz.Phylo.PhyloExport] unknown tagger"
)
export
labels
------------------
-- | Dynamics | --
------------------
toDynamics
::
Int
->
[
PhyloGroup
]
->
PhyloGroup
->
Map
Int
(
Date
,
Date
)
->
Double
toDynamics
n
parents
group
m
=
let
prd
=
group
^.
phylo_groupPeriod
end
=
last'
"dynamics"
(
sort
$
map
snd
$
elems
m
)
in
if
(((
snd
prd
)
==
(
snd
$
m
!
n
))
&&
(
snd
prd
/=
end
))
-- | decrease
then
2
else
if
((
fst
prd
)
==
(
fst
$
m
!
n
))
-- | recombination
then
0
else
if
isNew
-- | emergence
then
1
else
3
where
--------------------------------------
isNew
::
Bool
isNew
=
not
$
elem
n
$
concat
$
map
_phylo_groupNgrams
parents
processDynamics
::
[
PhyloGroup
]
->
[
PhyloGroup
]
processDynamics
groups
=
map
(
\
g
->
let
parents
=
filter
(
\
g'
->
(
g
^.
phylo_groupBranchId
==
g'
^.
phylo_groupBranchId
)
&&
((
fst
$
g
^.
phylo_groupPeriod
)
>
(
fst
$
g'
^.
phylo_groupPeriod
)))
groups
in
g
&
phylo_groupMeta
%~
insert
"dynamics"
(
map
(
\
n
->
toDynamics
n
parents
g
mapNgrams
)
$
g
^.
phylo_groupNgrams
)
)
groups
where
--------------------------------------
mapNgrams
::
Map
Int
(
Date
,
Date
)
mapNgrams
=
map
(
\
dates
->
let
dates'
=
sort
dates
in
(
head'
"dynamics"
dates'
,
last'
"dynamics"
dates'
))
$
fromListWith
(
++
)
$
foldl
(
\
acc
g
->
acc
++
(
map
(
\
n
->
(
n
,[
fst
$
g
^.
phylo_groupPeriod
,
snd
$
g
^.
phylo_groupPeriod
]))
$
(
g
^.
phylo_groupNgrams
)))
[]
groups
---------------------
-- | phyloExport | --
---------------------
toPhyloExport
::
Phylo
->
DotGraph
DotId
toPhyloExport
phylo
=
exportToDot
phylo
$
processFilters
(
exportFilter
$
getConfig
phylo
)
(
phyloQuality
$
getConfig
phylo
)
$
processSort
(
exportSort
$
getConfig
phylo
)
$
processLabels
(
exportLabel
$
getConfig
phylo
)
(
getRoots
phylo
)
$
processMetrics
export
where
export
::
PhyloExport
export
=
PhyloExport
groups
branches
--------------------------------------
branches
::
[
PhyloBranch
]
branches
=
traceExportBranches
$
map
(
\
bId
->
PhyloBranch
bId
""
empty
)
$
nub
$
map
_phylo_groupBranchId
groups
--------------------------------------
groups
::
[
PhyloGroup
]
groups
=
processDynamics
$
getGroupsFromLevel
(
phyloLevel
$
getConfig
phylo
)
phylo
traceExportBranches
::
[
PhyloBranch
]
->
[
PhyloBranch
]
traceExportBranches
branches
=
trace
(
"
\n
"
<>
"-- | Export "
<>
show
(
length
branches
)
<>
" branches"
)
branches
src/Gargantext/Viz/Phylo/PhyloMaker.hs
View file @
eeeb82c8
...
...
@@ -16,13 +16,15 @@ Portability : POSIX
module
Gargantext.Viz.Phylo.PhyloMaker
where
import
Data.List
(
concat
,
nub
,
partition
,
sort
,
(
++
))
import
Data.Map
(
Map
,
fromListWith
,
keys
,
unionWith
,
fromList
,
empty
,
toList
,
elems
,
(
!
),
filterWithKey
,
restrictKeys
)
import
Data.Map
(
Map
,
fromListWith
,
keys
,
unionWith
,
fromList
,
empty
,
toList
,
elems
,
(
!
),
restrictKeys
)
import
Data.Set
(
size
)
import
Data.Vector
(
Vector
)
import
Gargantext.Prelude
import
Gargantext.Viz.AdaptativePhylo
import
Gargantext.Viz.Phylo.PhyloTools
import
Gargantext.Viz.Phylo.TemporalMatching
(
temporalMatching
)
import
Gargantext.Viz.Phylo.SynchronicClustering
(
synchronicClustering
)
import
Gargantext.Text.Context
(
TermList
)
import
Gargantext.Text.Metrics.FrequentItemSet
(
fisWithSizePolyMap
,
Size
(
..
))
...
...
@@ -41,7 +43,10 @@ import qualified Data.Set as Set
toPhylo
::
[
Document
]
->
TermList
->
Config
->
Phylo
toPhylo
docs
lst
conf
=
phylo1
toPhylo
docs
lst
conf
=
traceToPhylo
(
phyloLevel
conf
)
$
if
(
phyloLevel
conf
)
>
1
then
foldl'
(
\
phylo'
_
->
synchronicClustering
phylo'
)
phylo1
[
2
..
(
phyloLevel
conf
)]
else
phylo1
where
--------------------------------------
phylo1
::
Phylo
...
...
@@ -82,16 +87,18 @@ appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ co
fisToGroup
::
PhyloFis
->
PhyloPeriodId
->
Level
->
Int
->
Vector
Ngrams
->
[
Cooc
]
->
PhyloGroup
fisToGroup
fis
pId
lvl
idx
fdt
coocs
=
let
ngrams
=
ngramsToIdx
(
Set
.
toList
$
fis
^.
phyloFis_clique
)
fdt
in
PhyloGroup
pId
lvl
idx
in
PhyloGroup
pId
lvl
idx
""
(
fis
^.
phyloFis_support
)
ngrams
(
ngramsToCooc
ngrams
coocs
)
(
1
,
[]
)
[]
[]
[]
[]
[]
(
1
,[
0
])
empty
[]
[]
[]
[]
toPhylo1
::
[
Document
]
->
Phylo
->
Phylo
toPhylo1
docs
phyloBase
=
appendGroups
fisToGroup
1
phyloFis
phyloBase
toPhylo1
docs
phyloBase
=
temporalMatching
$
appendGroups
fisToGroup
1
phyloFis
phyloBase
where
--------------------------------------
phyloFis
::
Map
(
Date
,
Date
)
[
PhyloFis
]
...
...
@@ -164,14 +171,6 @@ toPhyloFis phyloDocs support clique = traceFis "Filtered Fis"
--------------------
-- | To build the local cooc matrix of each phylogroup
ngramsToCooc
::
[
Int
]
->
[
Cooc
]
->
Cooc
ngramsToCooc
ngrams
coocs
=
let
cooc
=
foldl
(
\
acc
cooc'
->
sumCooc
acc
cooc'
)
empty
coocs
pairs
=
listToKeys
ngrams
in
filterWithKey
(
\
k
_
->
elem
k
pairs
)
cooc
-- | To transform the docs into a time map of coocurency matrix
docsToTimeScaleCooc
::
[
Document
]
->
Vector
Ngrams
->
Map
Date
Cooc
docsToTimeScaleCooc
docs
fdt
=
...
...
@@ -232,4 +231,4 @@ toPhyloBase docs lst conf =
(
docsToTimeScaleCooc
docs
(
foundations
^.
foundations_roots
))
(
docsToTimeScaleNb
docs
)
params
(
fromList
$
map
(
\
prd
->
(
prd
,
PhyloPeriod
prd
(
initPhyloLevels
(
phyloLevel
conf
)
prd
)))
periods
)
(
fromList
$
map
(
\
prd
->
(
prd
,
PhyloPeriod
prd
(
initPhyloLevels
1
prd
)))
periods
)
src/Gargantext/Viz/Phylo/PhyloTools.hs
View file @
eeeb82c8
...
...
@@ -17,13 +17,16 @@ Portability : POSIX
module
Gargantext.Viz.Phylo.PhyloTools
where
import
Data.Vector
(
Vector
,
elemIndex
)
import
Data.List
(
sort
,
concat
,
null
,
union
,
(
++
),
tails
,
sortOn
,
nub
)
import
Data.Set
(
Set
,
size
)
import
Data.Map
(
Map
,
elems
,
fromList
,
unionWith
,
keys
,
member
,
(
!
),
filterWithKey
)
import
Data.List
(
sort
,
concat
,
null
,
union
,
(
++
),
tails
,
sortOn
,
nub
,
init
,
tail
,
partition
,
tails
,
nubBy
)
import
Data.Set
(
Set
,
size
,
disjoint
)
import
Data.Map
(
Map
,
elems
,
fromList
,
unionWith
,
keys
,
member
,
(
!
),
filterWithKey
,
fromListWith
,
empty
)
import
Data.String
(
String
)
import
Data.Text
(
Text
,
unwords
)
import
Gargantext.Prelude
import
Gargantext.Viz.AdaptativePhylo
import
Text.Printf
import
Debug.Trace
(
trace
)
import
Control.Lens
hiding
(
Level
)
...
...
@@ -32,14 +35,40 @@ import qualified Data.Vector as Vector
import
qualified
Data.List
as
List
import
qualified
Data.Set
as
Set
------------
-- | Io | --
------------
-- | To print an important message as an IO()
printIOMsg
::
String
->
IO
()
printIOMsg
msg
=
putStrLn
(
"
\n
"
<>
"------------"
<>
"
\n
"
<>
"-- | "
<>
msg
<>
"
\n
"
)
-- | To print a comment as an IO()
printIOComment
::
String
->
IO
()
printIOComment
cmt
=
putStrLn
(
"
\n
"
<>
cmt
<>
"
\n
"
)
--------------
-- | Misc | --
--------------
roundToStr
::
(
PrintfArg
a
,
Floating
a
)
=>
Int
->
a
->
String
roundToStr
=
printf
"%0.*f"
countSup
::
Double
->
[
Double
]
->
Int
countSup
s
l
=
length
$
filter
(
>
s
)
l
dropByIdx
::
Int
->
[
a
]
->
[
a
]
dropByIdx
k
l
=
take
k
l
++
drop
(
k
+
1
)
l
elemIndex'
::
Eq
a
=>
a
->
[
a
]
->
Int
elemIndex'
e
l
=
case
(
List
.
elemIndex
e
l
)
of
...
...
@@ -60,6 +89,15 @@ isRoots n ns = Vector.elem n ns
ngramsToIdx
::
[
Ngrams
]
->
Vector
Ngrams
->
[
Int
]
ngramsToIdx
ns
fdt
=
map
(
\
n
->
fromJust
$
elemIndex
n
fdt
)
ns
-- | To transform a list of Ngrams Indexes into a Label
ngramsToLabel
::
Vector
Ngrams
->
[
Int
]
->
Text
ngramsToLabel
ngrams
l
=
unwords
$
tail'
"ngramsToLabel"
$
concat
$
map
(
\
n
->
[
"|"
,
n
])
$
ngramsToText
ngrams
l
-- | To transform a list of Ngrams Indexes into a list of Text
ngramsToText
::
Vector
Ngrams
->
[
Int
]
->
[
Text
]
ngramsToText
ngrams
l
=
map
(
\
idx
->
ngrams
Vector
.!
idx
)
l
--------------
-- | Time | --
...
...
@@ -168,7 +206,6 @@ getFisSize unit = case unit of
-- | Cooc | --
--------------
listToCombi'
::
[
a
]
->
[(
a
,
a
)]
listToCombi'
l
=
[(
x
,
y
)
|
(
x
:
rest
)
<-
tails
l
,
y
<-
rest
]
...
...
@@ -181,12 +218,24 @@ listToKeys lst = (listToCombi' lst) ++ (listToEqual' lst)
listToMatrix
::
[
Int
]
->
Map
(
Int
,
Int
)
Double
listToMatrix
lst
=
fromList
$
map
(
\
k
->
(
k
,
1
))
$
listToKeys
$
sort
lst
listToSeq
::
Eq
a
=>
[
a
]
->
[(
a
,
a
)]
listToSeq
l
=
nubBy
(
\
x
y
->
fst
x
==
fst
y
)
$
[
(
x
,
y
)
|
(
x
:
rest
)
<-
tails
l
,
y
<-
rest
]
sumCooc
::
Cooc
->
Cooc
->
Cooc
sumCooc
cooc
cooc'
=
unionWith
(
+
)
cooc
cooc'
getTrace
::
Cooc
->
Double
getTrace
cooc
=
sum
$
elems
$
filterWithKey
(
\
(
k
,
k'
)
_
->
k
==
k'
)
cooc
-- | To build the local cooc matrix of each phylogroup
ngramsToCooc
::
[
Int
]
->
[
Cooc
]
->
Cooc
ngramsToCooc
ngrams
coocs
=
let
cooc
=
foldl
(
\
acc
cooc'
->
sumCooc
acc
cooc'
)
empty
coocs
pairs
=
listToKeys
ngrams
in
filterWithKey
(
\
k
_
->
elem
k
pairs
)
cooc
--------------------
-- | PhyloGroup | --
--------------------
...
...
@@ -194,6 +243,40 @@ getTrace cooc = sum $ elems $ filterWithKey (\(k,k') _ -> k == k') cooc
getGroupId
::
PhyloGroup
->
PhyloGroupId
getGroupId
group
=
((
group
^.
phylo_groupPeriod
,
group
^.
phylo_groupLevel
),
group
^.
phylo_groupIndex
)
groupByField
::
Ord
a
=>
(
PhyloGroup
->
a
)
->
[
PhyloGroup
]
->
Map
a
[
PhyloGroup
]
groupByField
toField
groups
=
fromListWith
(
++
)
$
map
(
\
g
->
(
toField
g
,
[
g
]))
groups
getPeriodPointers
::
Filiation
->
PhyloGroup
->
[
Pointer
]
getPeriodPointers
fil
group
=
case
fil
of
ToChilds
->
group
^.
phylo_groupPeriodChilds
ToParents
->
group
^.
phylo_groupPeriodParents
filterProximity
::
Proximity
->
Double
->
Double
->
Bool
filterProximity
proximity
thr
local
=
case
proximity
of
WeightedLogJaccard
_
_
_
->
local
>=
thr
Hamming
->
undefined
getProximityName
::
Proximity
->
String
getProximityName
proximity
=
case
proximity
of
WeightedLogJaccard
_
_
_
->
"WLJaccard"
Hamming
->
"Hamming"
getProximityInit
::
Proximity
->
Double
getProximityInit
proximity
=
case
proximity
of
WeightedLogJaccard
_
i
_
->
i
Hamming
->
undefined
getProximityStep
::
Proximity
->
Double
getProximityStep
proximity
=
case
proximity
of
WeightedLogJaccard
_
_
s
->
s
Hamming
->
undefined
---------------
-- | Phylo | --
---------------
...
...
@@ -202,11 +285,11 @@ addPointers :: PhyloGroup -> Filiation -> PointerType -> [Pointer] -> PhyloGroup
addPointers
group
fil
pty
pointers
=
case
pty
of
TemporalPointer
->
case
fil
of
ToChilds
->
group
&
phylo_groupPeriodChilds
%~
(
++
pointers
)
ToParents
->
group
&
phylo_groupPeriodParents
%~
(
++
pointers
)
ToChilds
->
group
&
phylo_groupPeriodChilds
.~
pointers
ToParents
->
group
&
phylo_groupPeriodParents
.~
pointers
LevelPointer
->
case
fil
of
ToChilds
->
group
&
phylo_groupLevelChilds
%~
(
++
pointers
)
ToParents
->
group
&
phylo_groupLevelParents
%~
(
++
pointers
)
ToChilds
->
group
&
phylo_groupLevelChilds
.~
pointers
ToParents
->
group
&
phylo_groupLevelParents
.~
pointers
getPeriodIds
::
Phylo
->
[(
Date
,
Date
)]
...
...
@@ -214,6 +297,19 @@ getPeriodIds phylo = sortOn fst
$
keys
$
phylo
^.
phylo_periods
getLevelParentId
::
PhyloGroup
->
PhyloGroupId
getLevelParentId
g
=
fst
$
head'
"getLevelParentId"
$
g
^.
phylo_groupLevelParents
getLastLevel
::
Phylo
->
Level
getLastLevel
phylo
=
last'
"lastLevel"
$
getLevels
phylo
getLevels
::
Phylo
->
[
Level
]
getLevels
phylo
=
nub
$
map
snd
$
keys
$
view
(
phylo_periods
.
traverse
.
phylo_periodLevels
)
phylo
getConfig
::
Phylo
->
Config
getConfig
phylo
=
(
phylo
^.
phylo_param
)
^.
phyloParam_config
...
...
@@ -222,6 +318,11 @@ getConfig phylo = (phylo ^. phylo_param) ^. phyloParam_config
getRoots
::
Phylo
->
Vector
Ngrams
getRoots
phylo
=
(
phylo
^.
phylo_foundations
)
^.
foundations_roots
phyloToLastBranches
::
Phylo
->
[[
PhyloGroup
]]
phyloToLastBranches
phylo
=
elems
$
fromListWith
(
++
)
$
map
(
\
g
->
(
g
^.
phylo_groupBranchId
,
[
g
]))
$
getGroupsFromLevel
(
last'
"byBranches"
$
getLevels
phylo
)
phylo
getGroupsFromLevel
::
Level
->
Phylo
->
[
PhyloGroup
]
getGroupsFromLevel
lvl
phylo
=
...
...
@@ -250,13 +351,38 @@ updatePhyloGroups lvl m phylo =
else
group
)
phylo
------------------
-- | Pointers | --
------------------
traceToPhylo
::
Level
->
Phylo
->
Phylo
traceToPhylo
lvl
phylo
=
trace
(
"
\n
"
<>
"-- | End of phylo making at level "
<>
show
(
lvl
)
<>
" with "
<>
show
(
length
$
getGroupsFromLevel
lvl
phylo
)
<>
" groups and "
<>
show
(
length
$
nub
$
map
_phylo_groupBranchId
$
getGroupsFromLevel
lvl
phylo
)
<>
" branches"
<>
"
\n
"
)
phylo
--------------------
-- | Clustering | --
--------------------
pointersToLinks
::
PhyloGroupId
->
[
Pointer
]
->
[
Link
]
pointersToLinks
id
pointers
=
map
(
\
p
->
((
id
,
fst
p
),
snd
p
))
pointers
relatedComponents
::
Ord
a
=>
[[
a
]]
->
[[
a
]]
relatedComponents
graph
=
foldl'
(
\
acc
groups
->
if
(
null
acc
)
then
acc
++
[
groups
]
else
let
acc'
=
partition
(
\
groups'
->
disjoint
(
Set
.
fromList
groups'
)
(
Set
.
fromList
groups
))
acc
in
(
fst
acc'
)
++
[
nub
$
concat
$
(
snd
acc'
)
++
[
groups
]])
[]
graph
traceSynchronyEnd
::
Phylo
->
Phylo
traceSynchronyEnd
phylo
=
trace
(
"
\n
"
<>
"-- | End synchronic clustering at level "
<>
show
(
getLastLevel
phylo
)
<>
" with "
<>
show
(
length
$
getGroupsFromLevel
(
getLastLevel
phylo
)
phylo
)
<>
" groups"
<>
" and "
<>
show
(
length
$
nub
$
map
_phylo_groupBranchId
$
getGroupsFromLevel
(
getLastLevel
phylo
)
phylo
)
<>
" branches"
<>
"
\n
"
)
phylo
traceSynchronyStart
::
Phylo
->
Phylo
traceSynchronyStart
phylo
=
trace
(
"
\n
"
<>
"-- | Start synchronic clustering at level "
<>
show
(
getLastLevel
phylo
)
<>
" with "
<>
show
(
length
$
getGroupsFromLevel
(
getLastLevel
phylo
)
phylo
)
<>
" groups"
<>
" and "
<>
show
(
length
$
nub
$
map
_phylo_groupBranchId
$
getGroupsFromLevel
(
getLastLevel
phylo
)
phylo
)
<>
" branches"
<>
"
\n
"
)
phylo
-------------------
...
...
@@ -279,9 +405,77 @@ getThresholdStep proxi = case proxi of
Hamming
->
undefined
traceBranchMatching
::
Proximity
->
Double
->
[
PhyloGroup
]
->
[
PhyloGroup
]
traceBranchMatching
proxi
thr
groups
=
case
proxi
of
WeightedLogJaccard
_
i
s
->
trace
(
roundToStr
2
thr
<>
" "
<>
foldl
(
\
acc
_
->
acc
<>
"."
)
"."
[(
10
*
i
),(
10
*
i
+
10
*
s
)
..
(
10
*
thr
)]
<>
" "
<>
show
(
length
groups
)
<>
" groups"
)
groups
Hamming
->
undefined
----------------
-- | Branch | --
----------------
intersectInit
::
Eq
a
=>
[
a
]
->
[
a
]
->
[
a
]
->
[
a
]
intersectInit
acc
lst
lst'
=
if
(
null
lst
)
||
(
null
lst'
)
then
acc
else
if
(
head'
"intersectInit"
lst
)
==
(
head'
"intersectInit"
lst'
)
then
intersectInit
(
acc
++
[
head'
"intersectInit"
lst
])
(
tail
lst
)
(
tail
lst'
)
else
acc
branchIdsToProximity
::
PhyloBranchId
->
PhyloBranchId
->
Double
->
Double
->
Double
branchIdsToProximity
id
id'
thrInit
thrStep
=
thrInit
+
thrStep
*
(
fromIntegral
$
length
$
intersectInit
[]
(
snd
id
)
(
snd
id'
))
ngramsInBranches
::
[[
PhyloGroup
]]
->
[
Int
]
ngramsInBranches
branches
=
nub
$
foldl
(
\
acc
g
->
acc
++
(
g
^.
phylo_groupNgrams
))
[]
$
concat
branches
traceMatchSuccess
::
Double
->
Double
->
Double
->
[[[
PhyloGroup
]]]
->
[[[
PhyloGroup
]]]
traceMatchSuccess
thr
qua
qua'
nextBranches
=
trace
(
"
\n
"
<>
"-- local branches : "
<>
(
init
$
show
((
init
.
init
.
snd
)
$
(
head'
"trace"
$
head'
"trace"
$
head'
"trace"
nextBranches
)
^.
phylo_groupBranchId
))
<>
",(1.."
<>
show
(
length
nextBranches
)
<>
")]"
<>
" | "
<>
show
((
length
.
concat
.
concat
)
nextBranches
)
<>
" groups"
<>
"
\n
"
<>
" - splited with success in "
<>
show
(
map
length
nextBranches
)
<>
" sub-branches"
<>
"
\n
"
<>
" - for the local threshold "
<>
show
(
thr
)
<>
" ( quality : "
<>
show
(
qua
)
<>
" < "
<>
show
(
qua'
)
<>
")
\n
"
)
nextBranches
traceMatchFailure
::
Double
->
Double
->
Double
->
[[
PhyloGroup
]]
->
[[
PhyloGroup
]]
traceMatchFailure
thr
qua
qua'
branches
=
trace
(
"
\n
"
<>
"-- local branches : "
<>
(
init
$
show
((
init
.
snd
)
$
(
head'
"trace"
$
head'
"trace"
branches
)
^.
phylo_groupBranchId
))
<>
",(1.."
<>
show
(
length
branches
)
<>
")]"
<>
" | "
<>
show
(
length
$
concat
branches
)
<>
" groups"
<>
"
\n
"
<>
" - split with failure for the local threshold "
<>
show
(
thr
)
<>
" ( quality : "
<>
show
(
qua
)
<>
" > "
<>
show
(
qua'
)
<>
")
\n
"
)
branches
traceMatchNoSplit
::
[[
PhyloGroup
]]
->
[[
PhyloGroup
]]
traceMatchNoSplit
branches
=
trace
(
"
\n
"
<>
"-- local branches : "
<>
(
init
$
show
((
init
.
snd
)
$
(
head'
"trace"
$
head'
"trace"
branches
)
^.
phylo_groupBranchId
))
<>
",(1.."
<>
show
(
length
branches
)
<>
")]"
<>
" | "
<>
show
(
length
$
concat
branches
)
<>
" groups"
<>
"
\n
"
<>
" - unable to split in smaller branches"
<>
"
\n
"
)
branches
traceMatchLimit
::
[[
PhyloGroup
]]
->
[[
PhyloGroup
]]
traceMatchLimit
branches
=
trace
(
"
\n
"
<>
"-- local branches : "
<>
(
init
$
show
((
init
.
snd
)
$
(
head'
"trace"
$
head'
"trace"
branches
)
^.
phylo_groupBranchId
))
<>
",(1.."
<>
show
(
length
branches
)
<>
")]"
<>
" | "
<>
show
(
length
$
concat
branches
)
<>
" groups"
<>
"
\n
"
<>
" - unable to increase the threshold above 1"
<>
"
\n
"
)
branches
traceMatchEnd
::
[
PhyloGroup
]
->
[
PhyloGroup
]
traceMatchEnd
groups
=
trace
(
"
\n
"
<>
"-- | End temporal matching with "
<>
show
(
length
$
nub
$
map
(
\
g
->
g
^.
phylo_groupBranchId
)
groups
)
<>
" branches and "
<>
show
(
length
groups
)
<>
" groups"
<>
"
\n
"
)
groups
traceTemporalMatching
::
[
PhyloGroup
]
->
[
PhyloGroup
]
traceTemporalMatching
groups
=
trace
(
"
\n
"
<>
"-- | Start temporal matching for "
<>
show
(
length
groups
)
<>
" groups"
<>
"
\n
"
)
groups
\ No newline at end of file
src/Gargantext/Viz/Phylo/SynchronicClustering.hs
View file @
eeeb82c8
...
...
@@ -16,22 +16,180 @@ Portability : POSIX
module
Gargantext.Viz.Phylo.SynchronicClustering
where
import
Gargantext.Prelude
-- import Gargantext.Viz.AdaptativePhylo
-- import Gargantext.Viz.Phylo.PhyloTools
import
Gargantext.Viz.AdaptativePhylo
import
Gargantext.Viz.Phylo.PhyloTools
import
Gargantext.Viz.Phylo.TemporalMatching
(
weightedLogJaccard
)
import
Data.List
((
++
),
null
,
intersect
,
nub
,
concat
,
sort
,
sortOn
)
import
Data.Map
(
Map
,
fromList
,
fromListWith
,
foldlWithKey
,
(
!
),
insert
,
empty
,
restrictKeys
,
elems
,
mapWithKey
,
member
)
import
Control.Lens
hiding
(
Level
)
import
Control.Parallel.Strategies
(
parList
,
rdeepseq
,
using
)
import
qualified
Data.Map
as
Map
-------------------------
-- | New Level Maker | --
-------------------------
toBranchId
::
PhyloGroup
->
PhyloBranchId
toBranchId
child
=
((
child
^.
phylo_groupLevel
)
+
1
,
snd
(
child
^.
phylo_groupBranchId
))
mergeGroups
::
[
Cooc
]
->
PhyloGroupId
->
Map
PhyloGroupId
PhyloGroupId
->
[
PhyloGroup
]
->
PhyloGroup
mergeGroups
coocs
id
mapIds
childs
=
let
ngrams
=
(
sort
.
nub
.
concat
)
$
map
_phylo_groupNgrams
childs
in
PhyloGroup
(
fst
$
fst
id
)
(
snd
$
fst
id
)
(
snd
id
)
""
(
sum
$
map
_phylo_groupSupport
childs
)
ngrams
(
ngramsToCooc
ngrams
coocs
)
(
toBranchId
(
head'
"mergeGroups"
childs
))
empty
[]
(
map
(
\
g
->
(
getGroupId
g
,
1
))
childs
)
(
updatePointers
$
concat
$
map
_phylo_groupPeriodParents
childs
)
(
updatePointers
$
concat
$
map
_phylo_groupPeriodChilds
childs
)
where
updatePointers
::
[
Pointer
]
->
[
Pointer
]
updatePointers
pointers
=
map
(
\
(
pId
,
w
)
->
(
mapIds
!
pId
,
w
))
pointers
addPhyloLevel
::
Level
->
Phylo
->
Phylo
addPhyloLevel
lvl
phylo
=
over
(
phylo_periods
.
traverse
)
(
\
phyloPrd
->
phyloPrd
&
phylo_periodLevels
%~
(
insert
(
phyloPrd
^.
phylo_periodPeriod
,
lvl
)
(
PhyloLevel
(
phyloPrd
^.
phylo_periodPeriod
)
lvl
empty
)))
phylo
toNextLevel
::
Phylo
->
[
PhyloGroup
]
->
Phylo
toNextLevel
phylo
groups
=
let
curLvl
=
getLastLevel
phylo
oldGroups
=
fromList
$
map
(
\
g
->
(
getGroupId
g
,
getLevelParentId
g
))
groups
newGroups
=
fromListWith
(
++
)
-- | 5) group the parents by periods
$
foldlWithKey
(
\
acc
id
groups'
->
-- | 4) create the parent group
let
parent
=
mergeGroups
(
elems
$
restrictKeys
(
phylo
^.
phylo_timeCooc
)
$
periodsToYears
[(
fst
.
fst
)
id
])
id
oldGroups
groups'
in
acc
++
[(
parent
^.
phylo_groupPeriod
,
[
parent
])])
[]
-- | 3) group the current groups by parentId
$
fromListWith
(
++
)
$
map
(
\
g
->
(
getLevelParentId
g
,
[
g
]))
groups
in
traceSynchronyEnd
$
over
(
phylo_periods
.
traverse
.
phylo_periodLevels
.
traverse
-- | 6) update each period at curLvl + 1
.
filtered
(
\
phyloLvl
->
phyloLvl
^.
phylo_levelLevel
==
(
curLvl
+
1
)))
-- | 7) by adding the parents
(
\
phyloLvl
->
if
member
(
phyloLvl
^.
phylo_levelPeriod
)
newGroups
then
phyloLvl
&
phylo_levelGroups
.~
fromList
(
map
(
\
g
->
(
getGroupId
g
,
g
))
$
newGroups
!
(
phyloLvl
^.
phylo_levelPeriod
))
else
phyloLvl
)
-- | 2) add the curLvl + 1 phyloLevel to the phylo
$
addPhyloLevel
(
curLvl
+
1
)
-- | 1) update the current groups (with level parent pointers) in the phylo
$
updatePhyloGroups
curLvl
(
fromList
$
map
(
\
g
->
(
getGroupId
g
,
g
))
groups
)
phylo
import
Data.List
(
foldl'
,
(
++
),
null
,
intersect
,
(
\\
),
union
,
nub
,
concat
)
--------------------
-- | Clustering | --
--------------------
relatedComponents
::
Eq
a
=>
[[
a
]]
->
[[
a
]]
relatedComponents
graphs
=
foldl'
(
\
mem
groups
->
if
(
null
mem
)
then
mem
++
[
groups
]
else
let
related
=
filter
(
\
groups'
->
(
not
.
null
)
$
intersect
groups
groups'
)
mem
in
if
(
null
related
)
then
mem
++
[
groups
]
else
(
mem
\\
related
)
++
[
union
groups
(
nub
$
concat
related
)]
)
[]
graphs
\ No newline at end of file
toPairs
::
[
PhyloGroup
]
->
[(
PhyloGroup
,
PhyloGroup
)]
toPairs
groups
=
filter
(
\
(
g
,
g'
)
->
(
not
.
null
)
$
intersect
(
g
^.
phylo_groupNgrams
)
(
g'
^.
phylo_groupNgrams
))
$
listToCombi'
groups
toDiamonds
::
[
PhyloGroup
]
->
[[
PhyloGroup
]]
toDiamonds
groups
=
foldl'
(
\
acc
groups'
->
acc
++
(
elems
$
Map
.
filter
(
\
v
->
length
v
>
1
)
$
fromListWith
(
++
)
$
foldl'
(
\
acc'
g
->
acc'
++
(
map
(
\
(
id
,
_
)
->
(
id
,[
g
])
)
$
g
^.
phylo_groupPeriodChilds
))
[]
groups'
))
[]
$
elems
$
Map
.
filter
(
\
v
->
length
v
>
1
)
$
fromListWith
(
++
)
$
foldl'
(
\
acc
g
->
acc
++
(
map
(
\
(
id
,
_
)
->
(
id
,[
g
])
)
$
g
^.
phylo_groupPeriodParents
)
)
[]
groups
groupsToEdges
::
Proximity
->
Synchrony
->
Double
->
[
PhyloGroup
]
->
[((
PhyloGroup
,
PhyloGroup
),
Double
)]
groupsToEdges
prox
sync
docs
groups
=
case
sync
of
ByProximityThreshold
t
s
->
filter
(
\
(
_
,
w
)
->
w
>=
t
)
$
toEdges
s
$
toPairs
groups
ByProximityDistribution
s
->
let
diamonds
=
sortOn
snd
$
toEdges
s
$
concat
$
map
toPairs
$
toDiamonds
groups
in
take
(
div
(
length
diamonds
)
2
)
diamonds
where
toEdges
::
Double
->
[(
PhyloGroup
,
PhyloGroup
)]
->
[((
PhyloGroup
,
PhyloGroup
),
Double
)]
toEdges
sens
edges
=
case
prox
of
WeightedLogJaccard
_
_
_
->
map
(
\
(
g
,
g'
)
->
((
g
,
g'
),
weightedLogJaccard
sens
docs
(
g
^.
phylo_groupCooc
)
(
g'
^.
phylo_groupCooc
)
(
g
^.
phylo_groupNgrams
)
(
g'
^.
phylo_groupNgrams
)))
edges
_
->
undefined
toRelatedComponents
::
[
PhyloGroup
]
->
[((
PhyloGroup
,
PhyloGroup
),
Double
)]
->
[[
PhyloGroup
]]
toRelatedComponents
nodes
edges
=
let
ref
=
fromList
$
map
(
\
g
->
(
getGroupId
g
,
g
))
nodes
clusters
=
relatedComponents
$
((
map
(
\
((
g
,
g'
),
_
)
->
[
getGroupId
g
,
getGroupId
g'
])
edges
)
++
(
map
(
\
g
->
[
getGroupId
g
])
nodes
))
in
map
(
\
cluster
->
map
(
\
gId
->
ref
!
gId
)
cluster
)
clusters
toParentId
::
PhyloGroup
->
PhyloGroupId
toParentId
child
=
((
child
^.
phylo_groupPeriod
,
child
^.
phylo_groupLevel
+
1
),
child
^.
phylo_groupIndex
)
reduceBranch
::
Proximity
->
Synchrony
->
Map
Date
Double
->
[
PhyloGroup
]
->
[
PhyloGroup
]
reduceBranch
prox
sync
docs
branch
=
-- | 1) reduce a branch as a set of periods & groups
let
periods
=
fromListWith
(
++
)
$
map
(
\
g
->
(
g
^.
phylo_groupPeriod
,[
g
]))
branch
in
(
concat
.
concat
.
elems
)
$
mapWithKey
(
\
prd
groups
->
-- | 2) for each period, transform the groups as a proximity graph filtered by a threshold
let
edges
=
groupsToEdges
prox
sync
((
sum
.
elems
)
$
restrictKeys
docs
$
periodsToYears
[
prd
])
groups
in
map
(
\
comp
->
-- | 4) add to each groups their futur level parent group
let
parentId
=
toParentId
(
head'
"parentId"
comp
)
in
map
(
\
g
->
g
&
phylo_groupLevelParents
%~
(
++
[(
parentId
,
1
)])
)
comp
)
-- |3) reduce the graph a a set of related components
$
toRelatedComponents
groups
edges
)
periods
synchronicClustering
::
Phylo
->
Phylo
synchronicClustering
phylo
=
let
prox
=
phyloProximity
$
getConfig
phylo
sync
=
phyloSynchrony
$
getConfig
phylo
docs
=
phylo
^.
phylo_timeDocs
branches
=
map
(
\
branch
->
reduceBranch
prox
sync
docs
branch
)
$
phyloToLastBranches
$
traceSynchronyStart
phylo
branches'
=
branches
`
using
`
parList
rdeepseq
in
toNextLevel
phylo
$
concat
branches'
----------------
-- | probes | --
----------------
-- synchronicDistance :: Phylo -> Level -> String
-- synchronicDistance phylo lvl =
-- foldl' (\acc branch ->
-- acc <> (foldl' (\acc' period ->
-- acc' <> let prox = phyloProximity $ getConfig phylo
-- sync = phyloSynchrony $ getConfig phylo
-- docs = _phylo_timeDocs phylo
-- prd = _phylo_groupPeriod $ head' "distance" period
-- edges = groupsToEdges prox 0.1 (_bpt_sensibility sync)
-- ((sum . elems) $ restrictKeys docs $ periodsToYears [_phylo_groupPeriod $ head' "distance" period]) period
-- in foldl' (\mem (_,w) ->
-- mem <> show (prd)
-- <> "\t"
-- <> show (w)
-- <> "\n"
-- ) "" edges
-- ) "" $ elems $ groupByField _phylo_groupPeriod branch)
-- ) "period\tdistance\n" $ elems $ groupByField _phylo_groupBranchId $ getGroupsFromLevel lvl phylo
\ No newline at end of file
src/Gargantext/Viz/Phylo/TemporalMatching.hs
View file @
eeeb82c8
...
...
@@ -15,19 +15,21 @@ Portability : POSIX
module
Gargantext.Viz.Phylo.TemporalMatching
where
import
Data.List
(
concat
,
splitAt
,
tail
,
sortOn
,
(
++
),
intersect
,
null
,
inits
,
find
,
groupBy
,
scanl
,
nub
,
union
)
import
Data.Map
(
Map
,
fromList
,
fromListWith
,
filterWithKey
,
elems
,
restrictKeys
,
unionWith
,
intersectionWith
,
findWithDefault
)
import
Data.List
(
concat
,
splitAt
,
tail
,
sortOn
,
(
++
),
intersect
,
null
,
inits
,
groupBy
,
scanl
,
nub
,
union
,
dropWhile
,
partition
,
delete
)
import
Data.Map
(
Map
,
fromList
,
elems
,
restrictKeys
,
unionWith
,
intersectionWith
,
findWithDefault
,
keys
,
(
!
),
filterWithKey
)
import
Gargantext.Prelude
import
Gargantext.Viz.AdaptativePhylo
import
Gargantext.Viz.Phylo.PhyloTools
import
Gargantext.Viz.Phylo.SynchronicClustering
import
Prelude
(
logBase
)
--
import Prelude (logBase)
import
Control.Lens
hiding
(
Level
)
import
Control.Parallel.Strategies
(
parList
,
rdeepseq
,
using
)
import
Debug.Trace
(
trace
)
import
qualified
Data.Set
as
Set
-------------------
-- | Proximity | --
-------------------
...
...
@@ -65,10 +67,11 @@ weightedLogJaccard sens docs cooc cooc' ngrams ngrams'
ngramsUnion
=
union
ngrams
ngrams'
--------------------------------------
coocInter
::
[
Double
]
coocInter
=
elems
$
map
(
/
docs
)
$
intersectionWith
(
+
)
cooc
cooc'
coocInter
=
elems
$
map
(
/
docs
)
$
filterWithKey
(
\
(
k
,
k'
)
_
->
k
==
k'
)
$
intersectionWith
(
+
)
cooc
cooc'
-- coocInter = elems $ map (/docs) $ intersectionWith (+) cooc cooc'
--------------------------------------
coocUnion
::
[
Double
]
coocUnion
=
elems
$
map
(
/
docs
)
$
unionWith
(
+
)
cooc
cooc'
coocUnion
=
elems
$
map
(
/
docs
)
$
filterWithKey
(
\
(
k
,
k'
)
_
->
k
==
k'
)
$
unionWith
(
+
)
cooc
cooc'
--------------------------------------
...
...
@@ -79,13 +82,6 @@ pickProximity proximity docs cooc cooc' ngrams ngrams' = case proximity of
Hamming
->
undefined
filterProximity
::
Proximity
->
Double
->
Double
->
Bool
filterProximity
proximity
thr
local
=
case
proximity
of
WeightedLogJaccard
_
_
_
->
local
>=
thr
Hamming
->
undefined
-- | To process the proximity between a current group and a pair of targets group
toProximity
::
Map
Date
Double
->
Proximity
->
PhyloGroup
->
PhyloGroup
->
PhyloGroup
->
Double
toProximity
docs
proximity
ego
target
target'
=
...
...
@@ -103,50 +99,88 @@ toProximity docs proximity ego target target' =
-- | Local Matching | --
------------------------
toLastPeriod
::
Filiation
->
[
PhyloPeriodId
]
->
PhyloPeriodId
toLastPeriod
fil
periods
=
case
fil
of
ToParents
->
head'
"toLastPeriod"
(
sortOn
fst
periods
)
ToChilds
->
last'
"toLastPeriod"
(
sortOn
fst
periods
)
toLazyPairs
::
[
Pointer
]
->
Filiation
->
Double
->
Proximity
->
PhyloPeriodId
->
[(
PhyloGroup
,
PhyloGroup
)]
->
[(
PhyloGroup
,
PhyloGroup
)]
toLazyPairs
pointers
fil
thr
prox
prd
pairs
=
if
null
pointers
then
pairs
else
let
rest
=
filterPointers
prox
thr
pointers
in
if
null
rest
then
let
prd'
=
toLastPeriod
fil
(
map
(
fst
.
fst
.
fst
)
pointers
)
in
if
prd'
==
prd
then
[]
else
filter
(
\
(
g
,
g'
)
->
case
fil
of
ToParents
->
((
fst
$
g
^.
phylo_groupPeriod
)
<
(
fst
prd'
))
||
((
fst
$
g'
^.
phylo_groupPeriod
)
<
(
fst
prd'
))
ToChilds
->
((
fst
$
g
^.
phylo_groupPeriod
)
>
(
fst
prd'
))
||
((
fst
$
g'
^.
phylo_groupPeriod
)
>
(
fst
prd'
)))
pairs
else
[]
-- | Find pairs of valuable candidates to be matched
makePairs
::
[
PhyloGroup
]
->
[
PhyloPeriodId
]
->
[(
PhyloGroup
,
PhyloGroup
)]
makePairs
candidates
periods
=
case
null
periods
of
makePairs'
::
PhyloGroup
->
[
PhyloGroup
]
->
[
PhyloPeriodId
]
->
[
Pointer
]
->
Filiation
->
Double
->
Proximity
->
Map
Date
Double
->
[(
PhyloGroup
,
PhyloGroup
)]
makePairs'
ego
candidates
periods
pointers
fil
thr
prox
docs
=
case
null
periods
of
True
->
[]
False
->
toLazyPairs
pointers
fil
thr
prox
lastPrd
-- | at least on of the pair candidates should be from the last added period
False
->
filter
(
\
(
cdt
,
cdt'
)
->
(
inLastPeriod
cdt
periods
)
||
(
inLastPeriod
cdt'
periods
))
$
listToKeys
candidates
$
filter
(
\
(
g
,
g'
)
->
((
g
^.
phylo_groupPeriod
)
==
lastPrd
)
||
((
g'
^.
phylo_groupPeriod
)
==
lastPrd
))
$
listToKeys
$
filter
(
\
g
->
(
g
^.
phylo_groupPeriod
==
lastPrd
)
||
((
toProximity
docs
prox
ego
ego
g
)
>=
thr
))
candidates
where
inLastPeriod
::
PhyloGroup
->
[
PhyloPeriodId
]
->
Bool
inLastPeriod
g
prds
=
(
g
^.
phylo_groupPeriod
)
==
(
last'
"makePairs"
prds
)
lastPrd
::
PhyloPeriodId
lastPrd
=
toLastPeriod
fil
periods
phyloGroupMatching
::
[[
PhyloGroup
]]
->
Filiation
->
Proximity
->
Map
Date
Double
->
Double
->
PhyloGroup
->
PhyloGroup
phyloGroupMatching
candidates
fil
proxi
docs
thr
ego
=
case
pointers
of
Nothing
->
addPointers
ego
fil
TemporalPointer
[]
Just
pts
->
addPointers
ego
fil
TemporalPointer
filterPointers
::
Proximity
->
Double
->
[
Pointer
]
->
[
Pointer
]
filterPointers
proxi
thr
pts
=
filter
(
\
(
_
,
w
)
->
filterProximity
proxi
thr
w
)
pts
phyloGroupMatching
::
[[
PhyloGroup
]]
->
Filiation
->
Proximity
->
Map
Date
Double
->
Double
->
PhyloGroup
->
PhyloGroup
phyloGroupMatching
candidates
fil
proxi
docs
thr
ego
=
case
null
nextPointers
of
-- | let's find new pointers
True
->
if
null
$
filterPointers
proxi
thr
$
getPeriodPointers
fil
ego
then
addPointers
ego
fil
TemporalPointer
[]
-- | or keep the old ones
else
addPointers
ego
fil
TemporalPointer
$
filterPointers
proxi
thr
$
getPeriodPointers
fil
ego
False
->
addPointers
ego
fil
TemporalPointer
$
head'
"phyloGroupMatching"
-- | Keep only the best set of pointers grouped by proximity
$
groupBy
(
\
pt
pt'
->
snd
pt
==
snd
pt'
)
$
reverse
$
sortOn
snd
pts
$
reverse
$
sortOn
snd
$
head'
"pointers"
$
nextPointers
-- | Find the first time frame where at leats one pointer satisfies the proximity threshold
where
pointers
::
Maybe
[
Pointer
]
pointers
=
find
(
not
.
null
)
nextPointers
::
[[
Pointer
]]
nextPointers
=
take
1
$
dropWhile
(
null
)
-- | for each time frame, process the proximity on relevant pairs of targeted groups
$
scanl
(
\
acc
groups
->
let
periods
=
nub
$
map
(
\
g'
->
g'
^.
phylo_groupPeriod
)
$
concat
groups
pairs
=
makePairs
(
concat
groups
)
periods
in
acc
++
(
filter
(
\
(
_
,
proximity
)
->
filterProximity
proxi
thr
proximity
)
let
periods
=
nub
$
map
_phylo_groupPeriod
$
concat
groups
docs'
=
(
filterDocs
docs
([
ego
^.
phylo_groupPeriod
]
++
periods
))
pairs
=
makePairs'
ego
(
concat
groups
)
periods
(
getPeriodPointers
fil
ego
)
fil
thr
proxi
docs
in
acc
++
(
filterPointers
proxi
thr
$
concat
$
map
(
\
(
c
,
c'
)
->
-- | process the proximity between the current group and a pair of candidates
let
proximity
=
toProximity
(
filterDocs
docs
periods
)
proxi
ego
c
c'
let
proximity
=
toProximity
docs'
proxi
ego
c
c'
in
if
(
c
==
c'
)
then
[(
getGroupId
c
,
proximity
)]
else
[(
getGroupId
c
,
proximity
),(
getGroupId
c'
,
proximity
)]
)
pairs
)
)
[]
-- | groups from [[1900],[1900,1901],[1900,1901,1902],...]
$
inits
candidates
--------------------------------------
filterDocs
::
Map
Date
Double
->
[
PhyloPeriodId
]
->
Map
Date
Double
filterDocs
d
pds
=
restrictKeys
d
$
periodsToYears
pds
else
[(
getGroupId
c
,
proximity
),(
getGroupId
c'
,
proximity
)]
)
pairs
))
[]
$
inits
candidates
-- | groups from [[1900],[1900,1901],[1900,1901,1902],...]
filterDocs
::
Map
Date
Double
->
[
PhyloPeriodId
]
->
Map
Date
Double
filterDocs
d
pds
=
restrictKeys
d
$
periodsToYears
pds
-----------------------------
...
...
@@ -161,30 +195,40 @@ getNextPeriods fil max' pId pIds =
ToParents
->
take
max'
$
(
reverse
.
fst
)
$
splitAt
(
elemIndex'
pId
pIds
)
pIds
getCandidates
::
Filiation
->
PhyloGroup
->
[
PhyloPeriodId
]
->
[
PhyloGroup
]
->
[[
PhyloGroup
]]
getCandidates
fil
ego
pIds
targets
=
getCandidates
::
Filiation
->
PhyloGroup
->
[
[
PhyloGroup
]
]
->
[[
PhyloGroup
]]
getCandidates
fil
ego
targets
=
case
fil
of
ToChilds
->
targets'
ToParents
->
reverse
targets'
where
targets'
::
[[
PhyloGroup
]]
targets'
=
map
(
\
groups'
->
filter
(
\
g'
->
(
not
.
null
)
$
intersect
(
ego
^.
phylo_groupNgrams
)
(
g'
^.
phylo_groupNgrams
))
groups'
)
$
elems
$
filterWithKey
(
\
k
_
->
elem
k
pIds
)
$
fromListWith
(
++
)
$
sortOn
(
fst
.
fst
)
$
map
(
\
g'
->
(
g'
^.
phylo_groupPeriod
,[
g'
]))
targets
processMatching
::
Int
->
[
PhyloPeriodId
]
->
Proximity
->
Double
->
Map
Date
Double
->
[
PhyloGroup
]
->
[
PhyloGroup
]
processMatching
max'
periods
proximity
thr
docs
groups
=
map
(
\
group
->
let
childs
=
getCandidates
ToChilds
group
(
getNextPeriods
ToChilds
max'
(
group
^.
phylo_groupPeriod
)
periods
)
groups
parents
=
getCandidates
ToParents
group
(
getNextPeriods
ToParents
max'
(
group
^.
phylo_groupPeriod
)
periods
)
groups
in
phyloGroupMatching
parents
ToParents
proximity
docs
thr
$
phyloGroupMatching
childs
ToChilds
proximity
docs
thr
group
)
groups
targets'
=
map
(
\
groups'
->
filter
(
\
g'
->
(
not
.
null
)
$
intersect
(
ego
^.
phylo_groupNgrams
)
(
g'
^.
phylo_groupNgrams
)
)
groups'
)
targets
phyloBranchMatching
::
Int
->
[
PhyloPeriodId
]
->
Proximity
->
Double
->
Map
Date
Double
->
[
PhyloGroup
]
->
[
PhyloGroup
]
phyloBranchMatching
frame
periods
proximity
thr
docs
branch
=
traceBranchMatching
proximity
thr
-- $ matchByPeriods ToParents
-- $ groupByField _phylo_groupPeriod
$
matchByPeriods
$
groupByField
_phylo_groupPeriod
branch
where
--------------------------------------
matchByPeriods
::
Map
PhyloPeriodId
[
PhyloGroup
]
->
[
PhyloGroup
]
matchByPeriods
branch'
=
foldl'
(
\
acc
prd
->
let
periodsPar
=
getNextPeriods
ToParents
frame
prd
periods
periodsChi
=
getNextPeriods
ToChilds
frame
prd
periods
candidatesPar
=
map
(
\
prd'
->
findWithDefault
[]
prd'
branch'
)
periodsPar
candidatesChi
=
map
(
\
prd'
->
findWithDefault
[]
prd'
branch'
)
periodsChi
docsPar
=
filterDocs
docs
([
prd
]
++
periodsPar
)
docsChi
=
filterDocs
docs
([
prd
]
++
periodsChi
)
egos
=
map
(
\
ego
->
phyloGroupMatching
(
getCandidates
ToParents
ego
candidatesPar
)
ToParents
proximity
docsPar
thr
$
phyloGroupMatching
(
getCandidates
ToChilds
ego
candidatesChi
)
ToChilds
proximity
docsChi
thr
ego
)
$
findWithDefault
[]
prd
branch'
egos'
=
egos
`
using
`
parList
rdeepseq
in
acc
++
egos'
)
[]
periods
-----------------------
...
...
@@ -192,35 +236,80 @@ processMatching max' periods proximity thr docs groups =
-----------------------
termFreq
::
Int
->
[[
PhyloGroup
]]
->
Double
termFreq
term
branches
=
(
sum
$
map
(
\
g
->
findWithDefault
0
(
term
,
term
)
(
g
^.
phylo_groupCooc
))
$
concat
branches
)
/
(
sum
$
map
(
\
g
->
getTrace
$
g
^.
phylo_groupCooc
)
$
concat
branches
)
count
::
Eq
a
=>
a
->
[
a
]
->
Int
count
x
=
length
.
filter
(
==
x
)
termFreq'
::
Int
->
[
PhyloGroup
]
->
Double
termFreq'
term
groups
=
let
ngrams
=
concat
$
map
_phylo_groupNgrams
groups
in
log
((
fromIntegral
$
count
term
ngrams
)
/
(
fromIntegral
$
length
ngrams
))
entropy
::
[[
PhyloGroup
]]
->
Double
entropy
branches
=
let
terms
=
ngramsInBranches
branches
in
sum
$
map
(
\
term
->
(
1
/
log
(
termFreq
term
branches
))
/
(
sum
$
map
(
\
branch
->
1
/
log
(
termFreq
term
[
branch
]))
branches
)
*
(
sum
$
map
(
\
branch
->
let
q
=
branchObs
term
(
length
$
concat
branches
)
branch
in
q
*
logBase
2
q
)
branches
)
)
terms
where
-- | Probability to observe a branch given a random term of the phylo
branchObs
::
Int
->
Int
->
[
PhyloGroup
]
->
Double
branchObs
term
total
branch
=
(
fromIntegral
$
length
$
filter
(
\
g
->
elem
term
$
g
^.
phylo_groupNgrams
)
branch
)
/
(
fromIntegral
total
)
relevantBranches
::
Int
->
[[
PhyloGroup
]]
->
[[
PhyloGroup
]]
relevantBranches
term
branches
=
filter
(
\
groups
->
(
any
(
\
group
->
elem
term
$
group
^.
phylo_groupNgrams
)
groups
))
branches
branchCov'
::
[
PhyloGroup
]
->
[[
PhyloGroup
]]
->
Double
branchCov'
branch
branches
=
(
fromIntegral
$
length
branch
)
/
(
fromIntegral
$
length
$
concat
branches
)
homogeneity
::
[[
PhyloGroup
]]
->
Double
homogeneity
_
=
undefined
-- where
-- branchCov :: [PhyloGroup] -> Int -> Double
-- branchCov branch total = (fromIntegral $ length branch) / (fromIntegral total)
toRecall
::
Double
->
Int
->
Int
->
[[
PhyloGroup
]]
->
Double
toRecall
freq
term
border
branches
=
-- | given a random term in a phylo
freq
-- | for each relevant branches
*
(
sum
$
map
(
\
branch
->
-- | given its local coverage
((
branchCov'
branch
branches'
)
/
(
sum
$
map
(
\
b
->
branchCov'
b
branches'
)
branches'
))
-- | compute the local recall
*
(
(
fromIntegral
$
length
$
filter
(
\
group
->
elem
term
$
group
^.
phylo_groupNgrams
)
branch
)
/
(
(
fromIntegral
$
length
$
filter
(
\
group
->
elem
term
$
group
^.
phylo_groupNgrams
)
$
concat
branches'
)
-- | with a ponderation from border branches
+
(
fromIntegral
border
))
))
branches'
)
where
branches'
::
[[
PhyloGroup
]]
branches'
=
relevantBranches
term
branches
toAccuracy
::
Double
->
Int
->
[[
PhyloGroup
]]
->
Double
toAccuracy
freq
term
branches
=
if
(
null
branches
)
then
0
else
-- | given a random term in a phylo
freq
-- | for each relevant branches
*
(
sum
$
map
(
\
branch
->
-- | given its local coverage
((
branchCov'
branch
branches'
)
/
(
sum
$
map
(
\
b
->
branchCov'
b
branches'
)
branches'
))
-- | compute the local accuracy
*
(
(
fromIntegral
$
length
$
filter
(
\
group
->
elem
term
$
group
^.
phylo_groupNgrams
)
branch
)
/
(
fromIntegral
$
length
branch
)))
branches'
)
where
branches'
::
[[
PhyloGroup
]]
branches'
=
relevantBranches
term
branches
toPhyloQuality
::
Double
->
Map
Int
Double
->
Int
->
Double
->
[[
PhyloGroup
]]
->
Double
toPhyloQuality
beta
frequency
border
oldAcc
branches
=
-- trace (" rec : " <> show(recall)) $
-- trace (" acc : " <> show(accuracy)) $
if
(
null
branches
)
then
0
else
((
1
+
beta
**
2
)
*
accuracy
*
recall
)
/
(((
beta
**
2
)
*
accuracy
+
recall
))
where
-- | for each term compute the global accuracy
accuracy
::
Double
accuracy
=
oldAcc
+
(
sum
$
map
(
\
term
->
toAccuracy
(
frequency
!
term
)
term
branches
)
$
keys
frequency
)
-- | for each term compute the global recall
recall
::
Double
recall
=
sum
$
map
(
\
term
->
toRecall
(
frequency
!
term
)
term
border
branches
)
$
keys
frequency
to
PhyloQuality
::
[[
PhyloGroup
]]
->
Double
to
PhyloQuality
branches
=
sqrt
(
homogeneity
branches
/
entropy
branches
)
to
BorderAccuracy
::
Map
Int
Double
->
[[
PhyloGroup
]]
->
Double
to
BorderAccuracy
freq
branches
=
sum
$
map
(
\
t
->
toAccuracy
(
freq
!
t
)
t
branches
)
$
keys
freq
-----------------------------
...
...
@@ -231,54 +320,102 @@ toPhyloQuality branches = sqrt (homogeneity branches / entropy branches)
groupsToBranches
::
Map
PhyloGroupId
PhyloGroup
->
[[
PhyloGroup
]]
groupsToBranches
groups
=
-- | run the related component algorithm
let
graph
=
zip
[
1
..
]
$
relatedComponents
let
egos
=
groupBy
(
\
gs
gs'
->
(
fst
$
fst
$
head'
"egos"
gs
)
==
(
fst
$
fst
$
head'
"egos"
gs'
))
$
sortOn
(
\
gs
->
fst
$
fst
$
head'
"egos"
gs
)
$
map
(
\
group
->
[
getGroupId
group
]
++
(
map
fst
$
group
^.
phylo_groupPeriodParents
)
++
(
map
fst
$
group
^.
phylo_groupPeriodChilds
)
)
$
elems
groups
-- | first find the related components by inside each ego's period
graph'
=
map
relatedComponents
egos
-- | then run it for the all the periods
graph
=
zip
[
1
..
]
$
relatedComponents
$
concat
(
graph'
`
using
`
parList
rdeepseq
)
-- | update each group's branch id
in
map
(
\
(
bId
,
ids
)
->
map
(
\
group
->
group
&
phylo_groupBranchId
%~
(
\
(
lvl
,
lst
)
->
(
lvl
,
lst
++
[
bId
])))
let
groups'
=
map
(
\
group
->
group
&
phylo_groupBranchId
%~
(
\
(
lvl
,
lst
)
->
(
lvl
,
lst
++
[
bId
])))
$
elems
$
restrictKeys
groups
(
Set
.
fromList
ids
)
)
graph
recursiveMatching
::
Proximity
->
Double
->
Int
->
[
PhyloPeriodId
]
->
Map
Date
Double
->
Double
->
[
PhyloGroup
]
->
[
PhyloGroup
]
recursiveMatching
proximity
thr
max'
periods
docs
quality
groups
=
case
quality
<
quality'
of
-- | success : we localy improve the quality of the branch, let's go deeper
True
->
concat
$
map
(
\
branch
->
recursiveMatching
proximity
(
thr
+
(
getThresholdStep
proximity
))
max'
periods
docs
quality'
branch
)
branches
-- | failure : last step was the local maximum, let's validate it
False
->
groups
in
groups'
`
using
`
parList
rdeepseq
)
graph
reduceFrequency
::
Map
Int
Double
->
[[
PhyloGroup
]]
->
Map
Int
Double
reduceFrequency
frequency
branches
=
restrictKeys
frequency
(
Set
.
fromList
$
(
nub
.
concat
)
$
map
_phylo_groupNgrams
$
concat
branches
)
alterBorder
::
Int
->
[[
PhyloGroup
]]
->
[
PhyloGroup
]
->
Int
alterBorder
border
branches
branch
=
border
+
(
length
$
concat
branches
)
-
(
length
branch
)
recursiveMatching
::
Proximity
->
Double
->
Int
->
Map
Int
Double
->
Double
->
Int
->
[
PhyloPeriodId
]
->
Map
Date
Double
->
Double
->
Int
->
Double
->
[
PhyloGroup
]
->
[
PhyloGroup
]
recursiveMatching
proximity
beta
minBranch
frequency
egoThr
frame
periods
docs
quality
border
oldAcc
groups
=
if
((
egoThr
>=
1
)
||
(
quality
>
quality'
)
||
((
length
$
concat
$
snd
branches'
)
==
(
length
groups
)))
then
trace
(
" ✗ F(β) = "
<>
show
(
quality
)
<>
" (vs) "
<>
show
(
quality'
)
<>
"
\n
"
<>
" |✓ "
<>
show
(
length
$
fst
branches'
)
<>
show
(
map
length
$
fst
branches'
)
<>
" |✗ "
<>
show
(
length
$
snd
branches'
)
<>
"["
<>
show
(
length
$
concat
$
snd
branches'
)
<>
"]"
)
$
groups
else
let
next
=
map
(
\
b
->
recursiveMatching
proximity
beta
minBranch
(
reduceFrequency
frequency
(
fst
branches'
))
(
egoThr
+
(
getThresholdStep
proximity
))
frame
periods
docs
quality'
(
alterBorder
border
(
fst
branches'
)
b
)
(
oldAcc
+
(
toBorderAccuracy
frequency
(
delete
b
((
fst
branches'
)
++
(
snd
branches'
)))))
b
)
(
fst
branches'
)
in
trace
(
" ✓ F(β) = "
<>
show
(
quality
)
<>
" (vs) "
<>
show
(
quality'
)
<>
"
\n
"
<>
" |✓ "
<>
show
(
length
$
fst
branches'
)
<>
show
(
map
length
$
fst
branches'
)
<>
" |✗ "
<>
show
(
length
$
snd
branches'
)
<>
"["
<>
show
(
length
$
concat
$
snd
branches'
)
<>
"]"
)
$
concat
(
next
++
(
snd
branches'
))
where
-- | 3) process a quality score on the local set of branches
-- | 2) for each of the possible next branches process the phyloQuality score
quality'
::
Double
quality'
=
toPhyloQuality
branches
-- | 2) group the new groups into branches
branches
::
[[
PhyloGroup
]]
branches
=
groupsToBranches
$
fromList
$
map
(
\
group
->
(
getGroupId
group
,
group
))
groups'
-- | 1) process a temporal matching for each group
groups'
::
[
PhyloGroup
]
groups'
=
processMatching
max'
periods
proximity
thr
docs
groups
quality'
=
toPhyloQuality
beta
frequency
border
oldAcc
((
fst
branches'
)
++
(
snd
branches'
))
-- | 1) for each local branch process a temporal matching then find the resulting branches
branches'
::
([[
PhyloGroup
]],[[
PhyloGroup
]])
branches'
=
let
branches
=
groupsToBranches
$
fromList
$
map
(
\
g
->
(
getGroupId
g
,
g
))
$
phyloBranchMatching
frame
periods
proximity
egoThr
docs
groups
in
partition
(
\
b
->
length
b
>=
minBranch
)
(
branches
`
using
`
parList
rdeepseq
)
temporalMatching
::
Phylo
->
Phylo
temporalMatching
phylo
=
updatePhyloGroups
1
branches
phylo
temporalMatching
phylo
=
updatePhyloGroups
1
branches
'
phylo
where
-- | 2) run the recursive matching to find the best repartition among branches
branches
::
Map
PhyloGroupId
PhyloGroup
branches
=
fromList
$
map
(
\
g
->
(
getGroupId
g
,
g
))
$
recursiveMatching
(
phyloProximity
$
getConfig
phylo
)
(
getThresholdInit
$
phyloProximity
$
getConfig
phylo
)
-- | 5) apply the recursive matching
branches'
::
Map
PhyloGroupId
PhyloGroup
branches'
=
let
next
=
trace
(
" ✓ F(β) = "
<>
show
(
quality
)
<>
" |✓ "
<>
show
(
length
$
fst
branches
)
<>
show
(
map
length
$
fst
branches
)
<>
" |✗ "
<>
show
(
length
$
snd
branches
)
<>
"["
<>
show
(
length
$
concat
$
snd
branches
)
<>
"]"
)
$
map
(
\
branch
->
recursiveMatching
(
phyloProximity
$
getConfig
phylo
)
(
_qua_relevance
$
phyloQuality
$
getConfig
phylo
)
(
_qua_minBranch
$
phyloQuality
$
getConfig
phylo
)
(
reduceFrequency
frequency
(
fst
branches
))
(
(
getThresholdInit
$
phyloProximity
$
getConfig
phylo
)
+
(
getThresholdStep
$
phyloProximity
$
getConfig
phylo
))
(
getTimeFrame
$
timeUnit
$
getConfig
phylo
)
(
getPeriodIds
phylo
)
(
phylo
^.
phylo_timeDocs
)
(
toPhyloQuality
[
groups'
])
groups'
(
phylo
^.
phylo_timeDocs
)
quality
(
alterBorder
0
(
fst
branches
)
branch
)
(
toBorderAccuracy
frequency
(
delete
branch
((
fst
branches
)
++
(
snd
branches
))))
branch
)
(
fst
branches
)
in
fromList
$
map
(
\
g
->
(
getGroupId
g
,
g
))
$
traceMatchEnd
(
concat
(
next
++
(
snd
branches
)))
-- | 4) process the quality score
quality
::
Double
quality
=
toPhyloQuality
(
_qua_relevance
$
phyloQuality
$
getConfig
phylo
)
frequency
0
0
((
fst
branches
)
++
(
snd
branches
))
-- | 3) process the constants of the quality score
frequency
::
Map
Int
Double
frequency
=
let
terms
=
ngramsInBranches
((
fst
branches
)
++
(
snd
branches
))
freqs
=
map
(
\
t
->
termFreq'
t
$
concat
((
fst
branches
)
++
(
snd
branches
)))
terms
in
fromList
$
map
(
\
(
t
,
freq
)
->
(
t
,
freq
/
(
sum
freqs
)))
$
zip
terms
freqs
-- | 2) group into branches
branches
::
([[
PhyloGroup
]],[[
PhyloGroup
]])
branches
=
partition
(
\
b
->
length
b
>=
(
_qua_minBranch
$
phyloQuality
$
getConfig
phylo
))
$
groupsToBranches
$
fromList
$
map
(
\
group
->
(
getGroupId
group
,
group
))
groups'
-- | 1) for each group process an initial temporal Matching
groups'
::
[
PhyloGroup
]
groups'
=
process
Matching
(
getTimeFrame
$
timeUnit
$
getConfig
phylo
)
(
getPeriodIds
phylo
)
groups'
=
phyloBranch
Matching
(
getTimeFrame
$
timeUnit
$
getConfig
phylo
)
(
getPeriodIds
phylo
)
(
phyloProximity
$
getConfig
phylo
)
(
getThresholdInit
$
phyloProximity
$
getConfig
phylo
)
(
phylo
^.
phylo_timeDocs
)
(
getGroupsFromLevel
1
phylo
)
(
phylo
^.
phylo_timeDocs
)
(
traceTemporalMatching
$
getGroupsFromLevel
1
phylo
)
src/Gargantext/Viz/Phylo/Tools.hs
View file @
eeeb82c8
...
...
@@ -815,7 +815,7 @@ getProximity cluster = case cluster of
-- | To initialize all the Cluster / Proximity with their default parameters
initFis
::
Maybe
Bool
->
Maybe
Support
->
Maybe
Int
->
FisParams
initFis
(
def
True
->
kmf
)
(
def
2
->
min'
)
(
def
4
->
thr
)
=
FisParams
kmf
min'
thr
initFis
(
def
True
->
kmf
)
(
def
0
->
min'
)
(
def
0
->
thr
)
=
FisParams
kmf
min'
thr
initHamming
::
Maybe
Double
->
HammingParams
initHamming
(
def
0.01
->
sens
)
=
HammingParams
sens
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment