Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
H
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Przemyslaw Kaminski
haskell-gargantext
Commits
2d0a7430
Commit
2d0a7430
authored
Jan 14, 2020
by
qlobbe
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add adaptative and constante sea level elevation
parent
a9fc87aa
Changes
7
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
386 additions
and
152 deletions
+386
-152
AdaptativePhylo.hs
src/Gargantext/Viz/AdaptativePhylo.hs
+27
-7
PhyloExample.hs
src/Gargantext/Viz/Phylo/PhyloExample.hs
+7
-2
PhyloExport.hs
src/Gargantext/Viz/Phylo/PhyloExport.hs
+47
-23
PhyloMaker.hs
src/Gargantext/Viz/Phylo/PhyloMaker.hs
+46
-12
PhyloTools.hs
src/Gargantext/Viz/Phylo/PhyloTools.hs
+44
-46
SynchronicClustering.hs
src/Gargantext/Viz/Phylo/SynchronicClustering.hs
+35
-34
TemporalMatching.hs
src/Gargantext/Viz/Phylo/TemporalMatching.hs
+180
-28
No files found.
src/Gargantext/Viz/AdaptativePhylo.hs
View file @
2d0a7430
...
...
@@ -57,12 +57,22 @@ data CorpusParser =
|
Csv
{
_csv_limit
::
Int
}
deriving
(
Show
,
Generic
,
Eq
)
data
SeaElevation
=
Constante
{
_cons_start
::
Double
,
_cons_step
::
Double
}
|
Adaptative
{
_adap_granularity
::
Double
}
deriving
(
Show
,
Generic
,
Eq
)
data
Proximity
=
WeightedLogJaccard
{
_wlj_sensibility
::
Double
,
_wlj_thresholdInit
::
Double
,
_wlj_thresholdStep
::
Double
}
-- , _wlj_thresholdInit :: Double
-- , _wlj_thresholdStep :: Double
-- | max height for sea level in temporal matching
-- , _wlj_elevation :: Double
}
|
Hamming
deriving
(
Show
,
Generic
,
Eq
)
...
...
@@ -114,6 +124,7 @@ data Config =
,
phyloName
::
Text
,
phyloLevel
::
Int
,
phyloProximity
::
Proximity
,
seaElevation
::
SeaElevation
,
phyloSynchrony
::
Synchrony
,
phyloQuality
::
Quality
,
timeUnit
::
TimeUnit
...
...
@@ -132,8 +143,9 @@ defaultConfig =
,
corpusParser
=
Csv
1000
,
phyloName
=
pack
"Default Phylo"
,
phyloLevel
=
2
,
phyloProximity
=
WeightedLogJaccard
10
0
0.1
,
phyloSynchrony
=
ByProximityThreshold
0.1
10
AllBranches
MergeAllGroups
,
phyloProximity
=
WeightedLogJaccard
10
,
seaElevation
=
Adaptative
25
,
phyloSynchrony
=
ByProximityThreshold
0.6
10
SiblingBranches
MergeAllGroups
,
phyloQuality
=
Quality
0.1
1
,
timeUnit
=
Year
3
1
5
,
clique
=
Fis
1
5
...
...
@@ -148,6 +160,8 @@ instance FromJSON CorpusParser
instance
ToJSON
CorpusParser
instance
FromJSON
Proximity
instance
ToJSON
Proximity
instance
FromJSON
SeaElevation
instance
ToJSON
SeaElevation
instance
FromJSON
TimeUnit
instance
ToJSON
TimeUnit
instance
FromJSON
Clique
...
...
@@ -253,6 +267,7 @@ data Phylo =
,
_phylo_timeCooc
::
!
(
Map
Date
Cooc
)
,
_phylo_timeDocs
::
!
(
Map
Date
Double
)
,
_phylo_termFreq
::
!
(
Map
Int
Double
)
,
_phylo_groupsProxi
::
!
(
Map
(
PhyloGroupId
,
PhyloGroupId
)
Double
)
,
_phylo_param
::
PhyloParam
,
_phylo_periods
::
Map
PhyloPeriodId
PhyloPeriod
}
...
...
@@ -366,9 +381,13 @@ data PhyloLabel =
data
PhyloBranch
=
PhyloBranch
{
_branch_id
::
PhyloBranchId
,
_branch_label
::
Text
,
_branch_meta
::
Map
Text
[
Double
]
}
deriving
(
Generic
,
Show
)
,
_branch_canonId
::
[
Int
]
,
_branch_seaLevel
::
[
Double
]
,
_branch_x
::
Double
,
_branch_y
::
Double
,
_branch_label
::
Text
,
_branch_meta
::
Map
Text
[
Double
]
}
deriving
(
Generic
,
Show
,
Eq
)
data
PhyloExport
=
PhyloExport
...
...
@@ -382,6 +401,7 @@ data PhyloExport =
makeLenses
''
C
onfig
makeLenses
''
P
roximity
makeLenses
''
S
eaElevation
makeLenses
''
Q
uality
makeLenses
''
C
lique
makeLenses
''
P
hyloLabel
...
...
src/Gargantext/Viz/Phylo/PhyloExample.hs
View file @
2d0a7430
...
...
@@ -30,7 +30,7 @@ import Gargantext.Viz.AdaptativePhylo
import
Gargantext.Viz.Phylo.PhyloTools
import
Gargantext.Viz.Phylo.PhyloMaker
import
Gargantext.Viz.Phylo.PhyloExport
import
Gargantext.Viz.Phylo.TemporalMatching
(
t
emporalMatching
)
import
Gargantext.Viz.Phylo.TemporalMatching
(
adaptativeTemporalMatching
,
constanteT
emporalMatching
)
import
Gargantext.Viz.Phylo.SynchronicClustering
(
synchronicClustering
)
import
Control.Lens
...
...
@@ -60,7 +60,12 @@ phylo2 = synchronicClustering phylo1
-----------------------------------------------
phylo1
::
Phylo
phylo1
=
temporalMatching
phylo1
=
case
(
getSeaElevation
phyloBase
)
of
Constante
s
g
->
constanteTemporalMatching
s
g
$
toGroupsProxi
1
$
appendGroups
cliqueToGroup
1
phyloClique
phyloBase
Adaptative
s
->
adaptativeTemporalMatching
s
$
toGroupsProxi
1
$
appendGroups
cliqueToGroup
1
phyloClique
phyloBase
...
...
src/Gargantext/Viz/Phylo/PhyloExport.hs
View file @
2d0a7430
...
...
@@ -17,8 +17,8 @@ Portability : POSIX
module
Gargantext.Viz.Phylo.PhyloExport
where
import
Data.Map
(
Map
,
fromList
,
empty
,
fromListWith
,
insert
,
(
!
),
elems
,
unionWith
,
findWithDefault
,
toList
,
delete
)
import
Data.List
((
++
),
sort
,
nub
,
concat
,
sortOn
,
reverse
,
groupBy
,
union
,
(
\\
),
(
!!
),
init
,
partition
,
unwords
,
nubBy
)
import
Data.Map
(
Map
,
fromList
,
empty
,
fromListWith
,
insert
,
(
!
),
elems
,
unionWith
,
findWithDefault
,
toList
)
import
Data.List
((
++
),
sort
,
nub
,
concat
,
sortOn
,
reverse
,
groupBy
,
union
,
(
\\
),
(
!!
),
init
,
partition
,
unwords
,
nubBy
,
inits
)
import
Data.Vector
(
Vector
)
import
Prelude
(
writeFile
)
...
...
@@ -116,7 +116,11 @@ branchToDotNode b =
([
FillColor
[
toWColor
CornSilk
],
FontName
"Arial"
,
FontSize
40
,
Shape
Egg
,
Style
[
SItem
Bold
[]
],
Label
(
toDotLabel
$
b
^.
branch_label
)]
<>
(
metaToAttr
$
b
^.
branch_meta
)
<>
[
toAttr
"nodeType"
"branch"
,
toAttr
"branchId"
(
pack
$
unwords
(
map
show
$
snd
$
b
^.
branch_id
))
])
,
toAttr
"branchId"
(
pack
$
unwords
(
map
show
$
snd
$
b
^.
branch_id
))
,
toAttr
"branch_x"
(
fromStrict
$
Text
.
pack
$
(
show
$
b
^.
branch_x
))
,
toAttr
"branch_y"
(
fromStrict
$
Text
.
pack
$
(
show
$
b
^.
branch_y
))
,
toAttr
"label"
(
pack
$
show
$
b
^.
branch_label
)
])
periodToDotNode
::
(
Date
,
Date
)
->
Dot
DotId
periodToDotNode
prd
=
...
...
@@ -130,7 +134,7 @@ periodToDotNode prd =
groupToDotNode
::
Vector
Ngrams
->
PhyloGroup
->
Dot
DotId
groupToDotNode
fdt
g
=
node
(
groupIdToDotId
$
getGroupId
g
)
([
FontName
"Arial"
,
Shape
BoxShape
,
toLabel
(
groupToTable
fdt
g
)]
([
FontName
"Arial"
,
Shape
Square
,
penWidth
4
,
toLabel
(
groupToTable
fdt
g
)]
<>
[
toAttr
"nodeType"
"group"
,
toAttr
"from"
(
pack
$
show
(
fst
$
g
^.
phylo_groupPeriod
))
,
toAttr
"to"
(
pack
$
show
(
snd
$
g
^.
phylo_groupPeriod
))
...
...
@@ -141,7 +145,7 @@ groupToDotNode fdt g =
toDotEdge
::
DotId
->
DotId
->
Text
.
Text
->
EdgeType
->
Dot
DotId
toDotEdge
source
target
lbl
edgeType
=
edge
source
target
(
case
edgeType
of
GroupToGroup
->
[
Width
10
,
Color
[
toWColor
Black
],
Constraint
True
GroupToGroup
->
[
Width
3
,
penWidth
4
,
Color
[
toWColor
Black
],
Constraint
True
,
Label
(
StrLabel
$
fromStrict
lbl
)]
BranchToGroup
->
[
Width
3
,
Color
[
toWColor
Black
],
ArrowHead
(
AType
[(
ArrMod
FilledArrow
RightSide
,
DotArrow
)])
,
Label
(
StrLabel
$
fromStrict
lbl
)]
...
...
@@ -174,12 +178,9 @@ exportToDot phylo export =
<>
[(
toAttr
(
fromStrict
"phyloFoundations"
)
$
pack
$
show
(
length
$
Vector
.
toList
$
getRoots
phylo
))
,(
toAttr
(
fromStrict
"phyloTerms"
)
$
pack
$
show
(
length
$
nub
$
concat
$
map
(
\
g
->
g
^.
phylo_groupNgrams
)
$
export
^.
export_groups
))
,(
toAttr
(
fromStrict
"phyloDocs"
)
$
pack
$
show
(
sum
$
elems
$
phylo
^.
phylo_timeDocs
))
,(
toAttr
(
fromStrict
"phyloPeriods"
)
$
pack
$
show
(
length
$
elems
$
phylo
^.
phylo_periods
))
,(
toAttr
(
fromStrict
"phyloBranches"
)
$
pack
$
show
(
length
$
export
^.
export_branches
))
,(
toAttr
(
fromStrict
"phyloGroups"
)
$
pack
$
show
(
length
$
export
^.
export_groups
))
,(
toAttr
(
fromStrict
"proxiName"
)
$
pack
$
show
(
getProximityName
$
phyloProximity
$
getConfig
phylo
))
,(
toAttr
(
fromStrict
"proxiInit"
)
$
pack
$
show
(
getProximityInit
$
phyloProximity
$
getConfig
phylo
))
,(
toAttr
(
fromStrict
"proxiStep"
)
$
pack
$
show
(
getProximityStep
$
phyloProximity
$
getConfig
phylo
))
,(
toAttr
(
fromStrict
"quaGranularity"
)
$
pack
$
show
(
_qua_granularity
$
phyloQuality
$
getConfig
phylo
))
])
...
...
@@ -232,12 +233,12 @@ exportToDot phylo export =
)
$
nubBy
(
\
combi
combi'
->
fst
combi
==
fst
combi'
)
$
listToCombi'
$
getPeriodIds
phylo
-- | 8) create the edges between the branches
_
<-
mapM
(
\
(
bId
,
bId'
)
->
toDotEdge
(
branchIdToDotId
bId
)
(
branchIdToDotId
bId'
)
(
Text
.
pack
$
show
(
branchIdsToProximity
bId
bId'
(
getThresholdInit
$
phyloProximity
$
getConfig
phylo
)
(
getThresholdStep
$
phyloProximity
$
getConfig
phylo
)))
BranchToBranch
)
$
nubBy
(
\
combi
combi'
->
fst
combi
==
fst
combi'
)
$
listToCombi'
$
map
_branch_id
$
export
^.
export_branches
--
_ <- mapM (\(bId,bId') ->
--
toDotEdge (branchIdToDotId bId) (branchIdToDotId bId')
--
(Text.pack $ show(branchIdsToProximity bId bId'
--
(getThresholdInit $ phyloProximity $ getConfig phylo)
--
(getThresholdStep $ phyloProximity $ getConfig phylo))) BranchToBranch
--
) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ map _branch_id $ export ^. export_branches
graphAttrs
[
Rank
SameRank
]
...
...
@@ -418,8 +419,8 @@ processLabels labels foundations export =
toDynamics
::
Int
->
[
PhyloGroup
]
->
PhyloGroup
->
Map
Int
(
Date
,
Date
)
->
Double
toDynamics
n
parents
g
roup
m
=
let
prd
=
g
roup
^.
phylo_groupPeriod
toDynamics
n
parents
g
m
=
let
prd
=
g
^.
phylo_groupPeriod
end
=
last'
"dynamics"
(
sort
$
map
snd
$
elems
m
)
in
if
(((
snd
prd
)
==
(
snd
$
m
!
n
))
&&
(
snd
prd
/=
end
))
-- | decrease
...
...
@@ -438,7 +439,7 @@ toDynamics n parents group m =
processDynamics
::
[
PhyloGroup
]
->
[
PhyloGroup
]
processDynamics
groups
=
processDynamics
groups
=
map
(
\
g
->
let
parents
=
filter
(
\
g'
->
(
g
^.
phylo_groupBranchId
==
g'
^.
phylo_groupBranchId
)
&&
((
fst
$
g
^.
phylo_groupPeriod
)
>
(
fst
$
g'
^.
phylo_groupPeriod
)))
groups
...
...
@@ -467,15 +468,38 @@ toPhyloExport phylo = exportToDot phylo
$
processMetrics
export
where
export
::
PhyloExport
export
=
PhyloExport
groups
branches
export
=
PhyloExport
groups
$
map
(
\
(
x
,
b
)
->
b
&
branch_x
.~
x
)
$
zip
branchesGaps
branches
--------------------------------------
branches
::
[
PhyloBranch
]
branches
=
map
(
\
bId
->
PhyloBranch
bId
""
empty
)
$
nub
$
map
_phylo_groupBranchId
groups
branchesGaps
::
[
Double
]
branchesGaps
=
map
sum
$
inits
$
map
(
\
(
b
,
x
)
->
b
^.
branch_y
+
0.05
-
x
)
$
zip
branches
$
([
0
]
++
(
map
(
\
(
b
,
b'
)
->
let
idx
=
length
$
commonPrefix
(
b
^.
branch_canonId
)
(
b'
^.
branch_canonId
)
[]
in
(
b'
^.
branch_seaLevel
)
!!
(
idx
-
1
)
)
$
listToSeq
branches
))
--------------------------------------
branches
::
[
PhyloBranch
]
branches
=
map
(
\
g
->
let
seaLvl
=
(
g
^.
phylo_groupMeta
)
!
"seaLevels"
breaks
=
(
g
^.
phylo_groupMeta
)
!
"breaks"
canonId
=
take
(
round
$
(
last'
"export"
breaks
)
+
2
)
(
snd
$
g
^.
phylo_groupBranchId
)
in
trace
(
show
(
canonId
))
$
PhyloBranch
(
g
^.
phylo_groupBranchId
)
canonId
seaLvl
0
(
last'
"export"
(
take
(
round
$
(
last'
"export"
breaks
)
+
1
)
seaLvl
))
""
empty
)
$
map
(
\
gs
->
head'
"export"
gs
)
$
groupBy
(
\
g
g'
->
g
^.
phylo_groupBranchId
==
g'
^.
phylo_groupBranchId
)
$
sortOn
(
\
g
->
g
^.
phylo_groupBranchId
)
groups
--------------------------------------
groups
::
[
PhyloGroup
]
groups
=
traceExportGroups
$
processDynamics
$
map
(
\
g
->
g
&
phylo_groupMeta
%~
delete
"dynamics"
)
$
processDynamics
$
getGroupsFromLevel
(
phyloLevel
$
getConfig
phylo
)
$
tracePhyloInfo
phylo
...
...
src/Gargantext/Viz/Phylo/PhyloMaker.hs
View file @
2d0a7430
...
...
@@ -15,15 +15,15 @@ Portability : POSIX
module
Gargantext.Viz.Phylo.PhyloMaker
where
import
Data.List
(
concat
,
nub
,
partition
,
sort
,
(
++
),
group
)
import
Data.Map
(
Map
,
fromListWith
,
keys
,
unionWith
,
fromList
,
empty
,
toList
,
elems
,
(
!
),
restrictKeys
,
singleton
)
import
Data.List
(
concat
,
nub
,
partition
,
sort
,
(
++
),
group
,
intersect
,
null
)
import
Data.Map
(
Map
,
fromListWith
,
keys
,
unionWith
,
fromList
,
empty
,
toList
,
elems
,
(
!
),
restrictKeys
,
foldlWithKey
)
import
Data.Set
(
size
)
import
Data.Vector
(
Vector
)
import
Gargantext.Prelude
import
Gargantext.Viz.AdaptativePhylo
import
Gargantext.Viz.Phylo.PhyloTools
import
Gargantext.Viz.Phylo.TemporalMatching
(
temporalMatching
)
import
Gargantext.Viz.Phylo.TemporalMatching
(
adaptativeTemporalMatching
,
constanteTemporalMatching
,
getNextPeriods
,
filterDocs
,
filterDiago
,
reduceDiagos
,
toProximity
)
import
Gargantext.Viz.Phylo.SynchronicClustering
(
synchronicClustering
)
import
Gargantext.Text.Context
(
TermList
)
import
Gargantext.Text.Metrics.FrequentItemSet
(
fisWithSizePolyMap
,
Size
(
..
))
...
...
@@ -43,7 +43,8 @@ import qualified Data.Set as Set
toPhylo
::
[
Document
]
->
TermList
->
Config
->
Phylo
toPhylo
docs
lst
conf
=
traceToPhylo
(
phyloLevel
conf
)
$
toPhylo
docs
lst
conf
=
trace
(
"# phylo1 groups "
<>
show
(
length
$
getGroupsFromLevel
1
phylo1
))
$
traceToPhylo
(
phyloLevel
conf
)
$
if
(
phyloLevel
conf
)
>
1
then
foldl'
(
\
phylo'
_
->
synchronicClustering
phylo'
)
phylo1
[
2
..
(
phyloLevel
conf
)]
else
phylo1
...
...
@@ -62,8 +63,35 @@ toPhylo docs lst conf = traceToPhylo (phyloLevel conf) $
-- | To Phylo 1 | --
--------------------
appendGroups
::
(
a
->
Double
->
PhyloPeriodId
->
Level
->
Int
->
Vector
Ngrams
->
[
Cooc
]
->
PhyloGroup
)
->
Level
->
Map
(
Date
,
Date
)
[
a
]
->
Phylo
->
Phylo
toGroupsProxi
::
Level
->
Phylo
->
Phylo
toGroupsProxi
lvl
phylo
=
let
proximity
=
phyloProximity
$
getConfig
phylo
groupsProxi
=
foldlWithKey
(
\
acc
pId
pds
->
-- 1) process period by period
let
egos
=
map
(
\
g
->
(
getGroupId
g
,
g
^.
phylo_groupNgrams
))
$
elems
$
view
(
phylo_periodLevels
.
traverse
.
filtered
(
\
phyloLvl
->
phyloLvl
^.
phylo_levelLevel
==
lvl
)
.
phylo_levelGroups
)
pds
next
=
getNextPeriods
ToParents
(
getTimeFrame
$
timeUnit
$
getConfig
phylo
)
pId
(
keys
$
phylo
^.
phylo_periods
)
targets
=
map
(
\
g
->
(
getGroupId
g
,
g
^.
phylo_groupNgrams
))
$
getGroupsFromLevelPeriods
lvl
next
phylo
docs
=
filterDocs
(
phylo
^.
phylo_timeDocs
)
([
pId
]
++
next
)
diagos
=
filterDiago
(
phylo
^.
phylo_timeCooc
)
([
pId
]
++
next
)
-- 2) compute the pairs in parallel
pairs
=
map
(
\
(
id
,
ngrams
)
->
map
(
\
(
id'
,
ngrams'
)
->
let
nbDocs
=
(
sum
.
elems
)
$
filterDocs
docs
([
idToPrd
id
,
idToPrd
id'
])
diago
=
reduceDiagos
$
filterDiago
diagos
([
idToPrd
id
,
idToPrd
id'
])
in
((
id
,
id'
),
toProximity
nbDocs
diago
proximity
ngrams
ngrams'
ngrams'
)
)
$
filter
(
\
(
_
,
ngrams'
)
->
(
not
.
null
)
$
intersect
ngrams
ngrams'
)
targets
)
egos
pairs'
=
pairs
`
using
`
parList
rdeepseq
in
acc
++
(
concat
pairs'
)
)
[]
$
phylo
^.
phylo_periods
in
phylo
&
phylo_groupsProxi
.~
((
traceGroupsProxi
.
fromList
)
groupsProxi
)
appendGroups
::
(
a
->
PhyloPeriodId
->
Level
->
Int
->
Vector
Ngrams
->
[
Cooc
]
->
PhyloGroup
)
->
Level
->
Map
(
Date
,
Date
)
[
a
]
->
Phylo
->
Phylo
appendGroups
f
lvl
m
phylo
=
trace
(
"
\n
"
<>
"-- | Append "
<>
show
(
length
$
concat
$
elems
m
)
<>
" groups to Level "
<>
show
(
lvl
)
<>
"
\n
"
)
$
over
(
phylo_periods
.
traverse
...
...
@@ -76,7 +104,7 @@ appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ co
in
phyloLvl
&
phylo_levelGroups
.~
(
fromList
$
foldl
(
\
groups
obj
->
groups
++
[
(((
pId
,
lvl
),
length
groups
)
,
f
obj
(
getPhyloThresholdInit
phylo
)
pId
lvl
(
length
groups
)
(
getRoots
phylo
)
,
f
obj
pId
lvl
(
length
groups
)
(
getRoots
phylo
)
(
elems
$
restrictKeys
(
phylo
^.
phylo_timeCooc
)
$
periodsToYears
[
pId
]))
]
)
[]
phyloCUnit
)
else
...
...
@@ -84,21 +112,26 @@ appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ co
phylo
cliqueToGroup
::
PhyloClique
->
Double
->
PhyloPeriodId
->
Level
->
Int
->
Vector
Ngrams
->
[
Cooc
]
->
PhyloGroup
cliqueToGroup
fis
thr
pId
lvl
idx
fdt
coocs
=
cliqueToGroup
::
PhyloClique
->
PhyloPeriodId
->
Level
->
Int
->
Vector
Ngrams
->
[
Cooc
]
->
PhyloGroup
cliqueToGroup
fis
pId
lvl
idx
fdt
coocs
=
let
ngrams
=
ngramsToIdx
(
Set
.
toList
$
fis
^.
phyloClique_nodes
)
fdt
in
PhyloGroup
pId
lvl
idx
""
(
fis
^.
phyloClique_support
)
ngrams
(
ngramsToCooc
ngrams
coocs
)
(
1
,[
0
])
-- | branchid (lvl,[path in the branching tree])
(
singleton
"thr"
[
thr
])
(
fromList
[(
"breaks"
,[
0
]),(
"seaLevels"
,[
0
])
])
[]
[]
[]
[]
toPhylo1
::
[
Document
]
->
Phylo
->
Phylo
toPhylo1
docs
phyloBase
=
temporalMatching
$
appendGroups
cliqueToGroup
1
phyloClique
phyloBase
toPhylo1
docs
phyloBase
=
case
(
getSeaElevation
phyloBase
)
of
Constante
start
gap
->
constanteTemporalMatching
start
gap
$
toGroupsProxi
1
$
appendGroups
cliqueToGroup
1
phyloClique
phyloBase
Adaptative
steps
->
adaptativeTemporalMatching
steps
$
toGroupsProxi
1
$
appendGroups
cliqueToGroup
1
phyloClique
phyloBase
where
--------------------------------------
phyloClique
::
Map
(
Date
,
Date
)
[
PhyloClique
]
...
...
@@ -247,5 +280,6 @@ toPhyloBase docs lst conf =
(
docsToTimeScaleCooc
docs
(
foundations
^.
foundations_roots
))
(
docsToTimeScaleNb
docs
)
(
docsToTermFreq
docs
(
foundations
^.
foundations_roots
))
empty
params
(
fromList
$
map
(
\
prd
->
(
prd
,
PhyloPeriod
prd
(
initPhyloLevels
1
prd
)))
periods
)
src/Gargantext/Viz/Phylo/PhyloTools.hs
View file @
2d0a7430
...
...
@@ -34,6 +34,7 @@ import Control.Lens hiding (Level)
import
qualified
Data.Vector
as
Vector
import
qualified
Data.List
as
List
import
qualified
Data.Set
as
Set
import
qualified
Data.Map
as
Map
------------
-- | Io | --
...
...
@@ -66,6 +67,7 @@ roundToStr = printf "%0.*f"
countSup
::
Double
->
[
Double
]
->
Int
countSup
s
l
=
length
$
filter
(
>
s
)
l
dropByIdx
::
Int
->
[
a
]
->
[
a
]
dropByIdx
k
l
=
take
k
l
++
drop
(
k
+
1
)
l
...
...
@@ -76,6 +78,15 @@ elemIndex' e l = case (List.elemIndex e l) of
Just
i
->
i
commonPrefix
::
Eq
a
=>
[
a
]
->
[
a
]
->
[
a
]
->
[
a
]
commonPrefix
lst
lst'
acc
=
if
(
null
lst
||
null
lst'
)
then
acc
else
if
(
head'
"commonPrefix"
lst
==
head'
"commonPrefix"
lst'
)
then
commonPrefix
(
tail
lst
)
(
tail
lst'
)
(
acc
++
[
head'
"commonPrefix"
lst
])
else
acc
---------------------
-- | Foundations | --
---------------------
...
...
@@ -249,7 +260,7 @@ idToPrd :: PhyloGroupId -> PhyloPeriodId
idToPrd
id
=
(
fst
.
fst
)
id
getGroupThr
::
PhyloGroup
->
Double
getGroupThr
group
=
head'
"getGroupThr"
((
group
^.
phylo_groupMeta
)
!
"thr
"
)
getGroupThr
group
=
last'
"getGroupThr"
((
group
^.
phylo_groupMeta
)
!
"breaks
"
)
groupByField
::
Ord
a
=>
(
PhyloGroup
->
a
)
->
[
PhyloGroup
]
->
Map
a
[
PhyloGroup
]
groupByField
toField
groups
=
fromListWith
(
++
)
$
map
(
\
g
->
(
toField
g
,
[
g
]))
groups
...
...
@@ -263,27 +274,14 @@ getPeriodPointers fil group =
filterProximity
::
Proximity
->
Double
->
Double
->
Bool
filterProximity
proximity
thr
local
=
case
proximity
of
WeightedLogJaccard
_
_
_
->
local
>=
thr
WeightedLogJaccard
_
->
local
>=
thr
Hamming
->
undefined
getProximityName
::
Proximity
->
String
getProximityName
proximity
=
case
proximity
of
WeightedLogJaccard
_
_
_
->
"WLJaccard"
Hamming
->
"Hamming"
getProximityInit
::
Proximity
->
Double
getProximityInit
proximity
=
case
proximity
of
WeightedLogJaccard
_
i
_
->
i
Hamming
->
undefined
getProximityStep
::
Proximity
->
Double
getProximityStep
proximity
=
case
proximity
of
WeightedLogJaccard
_
_
s
->
s
Hamming
->
undefined
WeightedLogJaccard
_
->
"WLJaccard"
Hamming
->
"Hamming"
---------------
-- | Phylo | --
...
...
@@ -318,13 +316,8 @@ getLevels phylo = nub
.
traverse
.
phylo_periodLevels
)
phylo
getPhyloThresholdInit
::
Phylo
->
Double
getPhyloThresholdInit
phylo
=
getThresholdInit
(
phyloProximity
(
getConfig
phylo
))
getPhyloThresholdStep
::
Phylo
->
Double
getPhyloThresholdStep
phylo
=
getThresholdStep
(
phyloProximity
(
getConfig
phylo
))
getSeaElevation
::
Phylo
->
SeaElevation
getSeaElevation
phylo
=
seaElevation
(
getConfig
phylo
)
getConfig
::
Phylo
->
Config
...
...
@@ -350,6 +343,26 @@ getGroupsFromLevel lvl phylo =
.
phylo_levelGroups
)
phylo
getGroupsFromLevelPeriods
::
Level
->
[
PhyloPeriodId
]
->
Phylo
->
[
PhyloGroup
]
getGroupsFromLevelPeriods
lvl
periods
phylo
=
elems
$
view
(
phylo_periods
.
traverse
.
filtered
(
\
phyloPrd
->
elem
(
phyloPrd
^.
phylo_periodPeriod
)
periods
)
.
phylo_periodLevels
.
traverse
.
filtered
(
\
phyloLvl
->
phyloLvl
^.
phylo_levelLevel
==
lvl
)
.
phylo_levelGroups
)
phylo
getGroupsFromPeriods
::
Level
->
Map
PhyloPeriodId
PhyloPeriod
->
[
PhyloGroup
]
getGroupsFromPeriods
lvl
periods
=
elems
$
view
(
traverse
.
phylo_periodLevels
.
traverse
.
filtered
(
\
phyloLvl
->
phyloLvl
^.
phylo_levelLevel
==
lvl
)
.
phylo_levelGroups
)
periods
updatePhyloGroups
::
Level
->
Map
PhyloGroupId
PhyloGroup
->
Phylo
->
Phylo
updatePhyloGroups
lvl
m
phylo
=
over
(
phylo_periods
...
...
@@ -407,27 +420,7 @@ traceSynchronyStart phylo =
getSensibility
::
Proximity
->
Double
getSensibility
proxi
=
case
proxi
of
WeightedLogJaccard
s
_
_
->
s
Hamming
->
undefined
getThresholdInit
::
Proximity
->
Double
getThresholdInit
proxi
=
case
proxi
of
WeightedLogJaccard
_
t
_
->
t
Hamming
->
undefined
getThresholdStep
::
Proximity
->
Double
getThresholdStep
proxi
=
case
proxi
of
WeightedLogJaccard
_
_
s
->
s
Hamming
->
undefined
traceBranchMatching
::
Proximity
->
Double
->
[
PhyloGroup
]
->
[
PhyloGroup
]
traceBranchMatching
proxi
thr
groups
=
case
proxi
of
WeightedLogJaccard
_
i
s
->
trace
(
roundToStr
2
thr
<>
" "
<>
foldl
(
\
acc
_
->
acc
<>
"."
)
"."
[(
10
*
i
),(
10
*
i
+
10
*
s
)
..
(
10
*
thr
)]
<>
" "
<>
show
(
length
groups
)
<>
" groups"
)
groups
WeightedLogJaccard
s
->
s
Hamming
->
undefined
----------------
...
...
@@ -494,4 +487,9 @@ traceMatchEnd groups =
traceTemporalMatching
::
[
PhyloGroup
]
->
[
PhyloGroup
]
traceTemporalMatching
groups
=
trace
(
"
\n
"
<>
"-- | Start temporal matching for "
<>
show
(
length
groups
)
<>
" groups"
<>
"
\n
"
)
groups
\ No newline at end of file
trace
(
"
\n
"
<>
"-- | Start temporal matching for "
<>
show
(
length
groups
)
<>
" groups"
<>
"
\n
"
)
groups
traceGroupsProxi
::
Map
(
PhyloGroupId
,
PhyloGroupId
)
Double
->
Map
(
PhyloGroupId
,
PhyloGroupId
)
Double
traceGroupsProxi
m
=
trace
(
"
\n
"
<>
"-- | "
<>
show
(
Map
.
size
m
)
<>
" computed pairs of groups proximity"
<>
"
\n
"
)
m
\ No newline at end of file
src/Gargantext/Viz/Phylo/SynchronicClustering.hs
View file @
2d0a7430
...
...
@@ -21,8 +21,9 @@ import Gargantext.Viz.Phylo.PhyloTools
import
Gargantext.Viz.Phylo.TemporalMatching
(
weightedLogJaccard'
,
filterDiago
,
reduceDiagos
)
import
Gargantext.Viz.Phylo.PhyloExport
(
processDynamics
)
import
Data.List
((
++
),
null
,
intersect
,
nub
,
concat
,
sort
,
sortOn
,
init
,
all
,
group
,
maximum
,
groupBy
)
import
Data.Map
(
Map
,
fromList
,
fromListWith
,
foldlWithKey
,
(
!
),
insert
,
empty
,
restrictKeys
,
elems
,
mapWithKey
,
member
,
singleton
)
import
Data.List
((
++
),
null
,
intersect
,
nub
,
concat
,
sort
,
sortOn
,
all
,
groupBy
,
group
,
maximum
)
import
Data.Map
(
Map
,
fromList
,
fromListWith
,
foldlWithKey
,
(
!
),
insert
,
empty
,
restrictKeys
,
elems
,
mapWithKey
,
member
)
import
Data.Text
(
Text
)
import
Control.Lens
hiding
(
Level
)
import
Control.Parallel.Strategies
(
parList
,
rdeepseq
,
using
)
...
...
@@ -37,20 +38,26 @@ import qualified Data.Set as Set
-------------------------
mergeBranchIds
::
[[
Int
]]
->
[
Int
]
mergeBranchIds
ids
=
(
head'
"mergeBranchIds"
.
sort
.
mostFreq
)
ids
mergeBranchIds
ids
=
(
head'
"mergeBranchIds"
.
sort
.
mostFreq
'
)
ids
where
-- | 2) find the most Up Left ids in the hierarchy of similarity
-- mostUpLeft :: [[Int]] -> [[Int]]
-- mostUpLeft ids' =
-- let groupIds = (map (\gIds -> (length $ head' "gIds" gIds, head' "gIds" gIds)) . groupBy (\id id' -> length id == length id') . sortOn length) ids'
-- inf = (fst . minimum) groupIds
--
in map snd $ filter (\gIds -> fst gIds == inf) groupIds
-- in map snd $ filter (\gIds -> fst gIds == inf) groupIds
-- | 1) find the most frequent ids
mostFreq
::
[[
Int
]]
->
[[
Int
]]
mostFreq
ids'
=
let
groupIds
=
(
map
(
\
gIds
->
(
length
gIds
,
head'
"gIds"
gIds
))
.
group
.
sort
)
ids'
sup
=
(
fst
.
maximum
)
groupIds
in
map
snd
$
filter
(
\
gIds
->
fst
gIds
==
sup
)
groupIds
mostFreq'
::
[[
Int
]]
->
[[
Int
]]
mostFreq'
ids'
=
let
groupIds
=
(
map
(
\
gIds
->
(
length
gIds
,
head'
"gIds"
gIds
))
.
group
.
sort
)
ids'
sup
=
(
fst
.
maximum
)
groupIds
in
map
snd
$
filter
(
\
gIds
->
fst
gIds
==
sup
)
groupIds
mergeMeta
::
[
Int
]
->
[
PhyloGroup
]
->
Map
Text
[
Double
]
mergeMeta
bId
groups
=
let
ego
=
head'
"mergeMeta"
$
filter
(
\
g
->
(
snd
(
g
^.
phylo_groupBranchId
))
==
bId
)
groups
in
fromList
[(
"breaks"
,(
ego
^.
phylo_groupMeta
)
!
"breaks"
),(
"seaLevels"
,(
ego
^.
phylo_groupMeta
)
!
"seaLevels"
)]
groupsToBranches'
::
Map
PhyloGroupId
PhyloGroup
->
[[
PhyloGroup
]]
...
...
@@ -64,10 +71,8 @@ groupsToBranches' groups =
in
map
(
\
ids
->
let
groups'
=
elems
$
restrictKeys
groups
(
Set
.
fromList
ids
)
bId
=
mergeBranchIds
$
map
(
\
g
->
snd
$
g
^.
phylo_groupBranchId
)
groups'
in
map
(
\
g
->
g
&
phylo_groupBranchId
%~
(
\
(
lvl
,
_
)
->
(
lvl
+
1
,
bId
)))
groups'
)
graph
in
map
(
\
g
->
g
&
phylo_groupBranchId
%~
(
\
(
lvl
,
_
)
->
(
lvl
,
bId
)))
groups'
)
graph
getLastThr
::
[
PhyloGroup
]
->
Double
getLastThr
childs
=
maximum
$
concat
$
map
(
\
g
->
(
g
^.
phylo_groupMeta
)
!
"thr"
)
childs
mergeGroups
::
[
Cooc
]
->
PhyloGroupId
->
Map
PhyloGroupId
PhyloGroupId
->
[
PhyloGroup
]
->
PhyloGroup
mergeGroups
coocs
id
mapIds
childs
=
...
...
@@ -75,11 +80,15 @@ mergeGroups coocs id mapIds childs =
in
PhyloGroup
(
fst
$
fst
id
)
(
snd
$
fst
id
)
(
snd
id
)
""
(
sum
$
map
_phylo_groupSupport
childs
)
ngrams
(
ngramsToCooc
ngrams
coocs
)
((
snd
$
fst
id
),
(
mergeBranchIds
$
map
(
\
g
->
snd
$
g
^.
phylo_groupBranchId
)
childs
)
)
(
singleton
"thr"
[
getLastThr
childs
]
)
[]
(
map
(
\
g
->
(
getGroupId
g
,
1
))
childs
)
((
snd
$
fst
id
),
bId
)
(
mergeMeta
bId
childs
)
[]
(
map
(
\
g
->
(
getGroupId
g
,
1
))
childs
)
(
updatePointers
$
concat
$
map
_phylo_groupPeriodParents
childs
)
(
updatePointers
$
concat
$
map
_phylo_groupPeriodChilds
childs
)
where
where
--------------------
bId
::
[
Int
]
bId
=
mergeBranchIds
$
map
(
\
g
->
snd
$
g
^.
phylo_groupBranchId
)
childs
--------------------
updatePointers
::
[
Pointer
]
->
[
Pointer
]
updatePointers
pointers
=
map
(
\
(
pId
,
w
)
->
(
mapIds
!
pId
,
w
))
pointers
...
...
@@ -92,7 +101,7 @@ addPhyloLevel lvl phylo =
toNextLevel'
::
Phylo
->
[
PhyloGroup
]
->
Phylo
toNextLevel'
phylo
groups
=
toNextLevel'
phylo
groups
=
let
curLvl
=
getLastLevel
phylo
oldGroups
=
fromList
$
map
(
\
g
->
(
getGroupId
g
,
getLevelParentId
g
))
groups
newGroups
=
concat
$
groupsToBranches'
...
...
@@ -163,9 +172,9 @@ groupsToEdges prox sync nbDocs diago groups =
toEdges
::
Double
->
[(
PhyloGroup
,
PhyloGroup
)]
->
[((
PhyloGroup
,
PhyloGroup
),
Double
)]
toEdges
sens
edges
=
case
prox
of
WeightedLogJaccard
_
_
_
->
map
(
\
(
g
,
g'
)
->
((
g
,
g'
),
weightedLogJaccard'
sens
nbDocs
diago
(
g
^.
phylo_groupNgrams
)
(
g'
^.
phylo_groupNgrams
)))
edges
WeightedLogJaccard
_
->
map
(
\
(
g
,
g'
)
->
((
g
,
g'
),
weightedLogJaccard'
sens
nbDocs
diago
(
g
^.
phylo_groupNgrams
)
(
g'
^.
phylo_groupNgrams
)))
edges
_
->
undefined
...
...
@@ -181,7 +190,7 @@ toParentId child = ((child ^. phylo_groupPeriod, child ^. phylo_groupLevel + 1),
reduceGroups
::
Proximity
->
Synchrony
->
Map
Date
Double
->
Map
Date
Cooc
->
[
PhyloGroup
]
->
[
PhyloGroup
]
reduceGroups
prox
sync
docs
diagos
branch
=
reduceGroups
prox
sync
docs
diagos
branch
=
-- | 1) reduce a branch as a set of periods & groups
let
periods
=
fromListWith
(
++
)
$
map
(
\
g
->
(
g
^.
phylo_groupPeriod
,[
g
]))
branch
...
...
@@ -198,21 +207,13 @@ reduceGroups prox sync docs diagos branch =
$
toRelatedComponents
groups
edges
)
periods
getGroupRealBId
::
Double
->
PhyloGroup
->
[
Int
]
getGroupRealBId
step
g
=
let
nb
=
round
(
getGroupThr
g
/
step
)
+
2
in
take
nb
(
snd
$
g
^.
phylo_groupBranchId
)
adjustClustering
::
Synchrony
->
Double
->
[[
PhyloGroup
]]
->
[[
PhyloGroup
]]
adjustClustering
sync
step
branches
=
case
sync
of
ByProximityThreshold
_
_
scope
_
->
case
scope
of
adjustClustering
::
Synchrony
->
[[
PhyloGroup
]]
->
[[
PhyloGroup
]]
adjustClustering
sync
branches
=
case
sync
of
ByProximityThreshold
_
_
scope
_
->
case
scope
of
SingleBranch
->
branches
SiblingBranches
->
groupBy
(
\
g
g'
->
(
init
$
getGroupRealBId
step
g
)
==
(
init
$
getGroupRealBId
step
g'
))
SiblingBranches
->
groupBy
(
\
g
g'
->
(
last'
"adjustClustering"
$
(
g
^.
phylo_groupMeta
)
!
"breaks"
)
==
(
last'
"adjustClustering"
$
(
g'
^.
phylo_groupMeta
)
!
"breaks"
))
$
sortOn
_phylo_groupBranchId
$
concat
branches
-- SiblingBranches -> elems $ fromListWith (++) $ map (\b -> ((init . snd . _phylo_groupBranchId) $ head' "adjustClustering" b,b)) branches
AllBranches
->
[
concat
branches
]
ByProximityDistribution
_
_
->
branches
...
...
@@ -226,7 +227,7 @@ synchronicClustering phylo =
diagos
=
map
coocToDiago
$
phylo
^.
phylo_timeCooc
newBranches
=
map
(
\
branch
->
reduceGroups
prox
sync
docs
diagos
branch
)
$
map
processDynamics
$
adjustClustering
sync
(
getPhyloThresholdStep
phylo
)
$
adjustClustering
sync
$
phyloToLastBranches
$
traceSynchronyStart
phylo
newBranches'
=
newBranches
`
using
`
parList
rdeepseq
...
...
src/Gargantext/Viz/Phylo/TemporalMatching.hs
View file @
2d0a7430
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment