Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
H
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Przemyslaw Kaminski
haskell-gargantext
Commits
e1b6117a
Commit
e1b6117a
authored
Feb 11, 2022
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[FEAT] FlowPhylo implemented
parent
25cdbe65
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
135 additions
and
123 deletions
+135
-123
List.hs
src/Gargantext/API/Ngrams/List.hs
+4
-9
Prelude.hs
src/Gargantext/API/Ngrams/Prelude.hs
+20
-1
PhyloExport.hs
src/Gargantext/Core/Viz/Phylo/PhyloExport.hs
+111
-111
PhyloMaker.hs
src/Gargantext/Core/Viz/Phylo/PhyloMaker.hs
+0
-2
No files found.
src/Gargantext/API/Ngrams/List.hs
View file @
e1b6117a
...
...
@@ -19,15 +19,16 @@ import Control.Lens hiding (elements, Indexed)
import
Data.Aeson
import
Data.Either
(
Either
(
..
))
import
Data.HashMap.Strict
(
HashMap
)
import
Data.Map
(
Map
,
toList
,
fromList
)
import
Data.Map
(
Map
,
toList
)
import
Data.Maybe
(
catMaybes
)
import
Data.Set
(
Set
)
import
Data.Text
(
Text
,
concat
,
pack
)
import
Data.Vector
(
Vector
)
import
Gargantext.API.Admin.Orchestrator.Types
import
Gargantext.API.Ngrams
(
getNgramsTableMap
,
setListNgrams
)
import
Gargantext.API.Ngrams
(
setListNgrams
)
import
Gargantext.API.Ngrams.Tools
(
getTermsWith
)
import
Gargantext.API.Ngrams.Types
import
Gargantext.API.Ngrams.Prelude
(
getNgramsList
)
import
Gargantext.API.Ngrams.List.Types
import
Gargantext.API.Prelude
(
GargServer
)
import
Gargantext.Core.NodeStory
...
...
@@ -110,7 +111,7 @@ csvApi = csvPostAsync
get
::
HasNodeStory
env
err
m
=>
ListId
->
m
(
Headers
'[
H
eader
"Content-Disposition"
Text
]
NgramsList
)
get
lId
=
do
lst
<-
get
'
lId
lst
<-
get
NgramsList
lId
let
(
NodeId
id'
)
=
lId
return
$
addHeader
(
concat
[
"attachment; filename=GarganText_NgramsList-"
,
pack
$
show
id'
...
...
@@ -118,12 +119,6 @@ get lId = do
]
)
lst
get'
::
HasNodeStory
env
err
m
=>
ListId
->
m
NgramsList
get'
lId
=
fromList
<$>
zip
ngramsTypes
<$>
mapM
(
getNgramsTableMap
lId
)
ngramsTypes
------------------------------------------------------------------------
-- TODO : purge list
-- TODO talk
...
...
src/Gargantext/API/Ngrams/Prelude.hs
View file @
e1b6117a
...
...
@@ -16,19 +16,38 @@ module Gargantext.API.Ngrams.Prelude
import
Data.Maybe
(
catMaybes
)
import
Control.Lens
(
view
)
import
Data.Map
(
fromList
)
import
Data.Hashable
(
Hashable
)
import
Data.Validity
import
Gargantext.API.Ngrams.Types
import
Gargantext.Core.Types
(
ListType
)
import
Gargantext.Database.Schema.Ngrams
(
NgramsType
)
import
Gargantext.Database.Schema.Ngrams
(
NgramsType
,
ngramsTypes
)
import
Gargantext.Prelude
import
Gargantext.Core.Text.List.Social.Prelude
import
Gargantext.API.Ngrams
(
getNgramsTableMap
)
import
Gargantext.Core.Text.Context
(
TermList
)
import
Gargantext.Core.NodeStory
(
HasNodeStory
)
import
Gargantext.Database.Admin.Types.Node
(
ListId
)
import
qualified
Data.HashMap.Strict
as
HM
import
qualified
Data.Map.Strict
as
Map
import
qualified
Data.List
as
List
import
qualified
Data.Text
as
Text
------------------------------------------------------------------------
getNgramsList
::
HasNodeStory
env
err
m
=>
ListId
->
m
NgramsList
getNgramsList
lId
=
fromList
<$>
zip
ngramsTypes
<$>
mapM
(
getNgramsTableMap
lId
)
ngramsTypes
getTermList
::
HasNodeStory
env
err
m
=>
ListId
->
ListType
->
NgramsType
->
m
(
Maybe
TermList
)
getTermList
lId
listType
ngramsType
=
do
ngramsList
<-
getNgramsList
lId
pure
$
toTermList
listType
ngramsType
ngramsList
------------------------------------------------------------------------
-- | Tools
-- Usage example: toTermList MapTerm NgramsTerms ngramsList
...
...
src/Gargantext/Core/Viz/Phylo/PhyloExport.hs
View file @
e1b6117a
...
...
@@ -15,7 +15,7 @@ module Gargantext.Core.Viz.Phylo.PhyloExport where
import
Control.Lens
hiding
(
Level
)
import
Control.Parallel.Strategies
(
parList
,
rdeepseq
,
using
)
import
Data.GraphViz
hiding
(
DotGraph
,
Order
)
import
Data.GraphViz.Attributes.Complete
hiding
(
EdgeType
,
Order
)
import
Data.GraphViz.Attributes.Complete
hiding
(
EdgeType
,
Order
)
import
Data.GraphViz.Types.Generalised
(
DotGraph
)
import
Data.GraphViz.Types.Monadic
import
Data.List
((
++
),
sort
,
nub
,
null
,
concat
,
sortOn
,
groupBy
,
union
,
(
\\
),
(
!!
),
init
,
partition
,
notElem
,
unwords
,
nubBy
,
inits
,
elemIndex
)
...
...
@@ -45,7 +45,7 @@ dotToString :: DotGraph DotId -> [Char]
dotToString
dotG
=
unpack
(
printDotGraph
dotG
)
dynamicToColor
::
Double
->
H
.
Attribute
dynamicToColor
d
dynamicToColor
d
|
d
==
0
=
H
.
BGColor
(
toColor
LightCoral
)
|
d
==
1
=
H
.
BGColor
(
toColor
Khaki
)
|
d
==
2
=
H
.
BGColor
(
toColor
SkyBlue
)
...
...
@@ -56,7 +56,7 @@ pickLabelColor lst
|
elem
0
lst
=
dynamicToColor
0
|
elem
2
lst
=
dynamicToColor
2
|
elem
1
lst
=
dynamicToColor
1
|
otherwise
=
dynamicToColor
3
|
otherwise
=
dynamicToColor
3
toDotLabel
::
Text
.
Text
->
Label
toDotLabel
lbl
=
StrLabel
$
fromStrict
lbl
...
...
@@ -82,30 +82,30 @@ groupToTable fdt g = H.Table H.HTable
,
H
.
tableAttrs
=
[
H
.
Border
0
,
H
.
CellBorder
0
,
H
.
BGColor
(
toColor
White
)]
,
H
.
tableRows
=
[
header
]
<>
[
H
.
Cells
[
H
.
LabelCell
[
H
.
Height
10
]
$
H
.
Text
[
H
.
Str
$
fromStrict
""
]]]
<>
(
map
ngramsToRow
$
splitEvery
4
<>
(
map
ngramsToRow
$
splitEvery
4
$
reverse
$
sortOn
(
snd
.
snd
)
$
zip
(
ngramsToText
fdt
(
g
^.
phylo_groupNgrams
))
$
zip
(
ngramsToText
fdt
(
g
^.
phylo_groupNgrams
))
$
zip
((
g
^.
phylo_groupMeta
)
!
"dynamics"
)
((
g
^.
phylo_groupMeta
)
!
"inclusion"
))}
where
--------------------------------------
ngramsToRow
::
[(
Ngrams
,(
Double
,
Double
))]
->
H
.
Row
ngramsToRow
ns
=
H
.
Cells
$
map
(
\
(
n
,(
d
,
_
))
->
ngramsToRow
ns
=
H
.
Cells
$
map
(
\
(
n
,(
d
,
_
))
->
H
.
LabelCell
[
H
.
Align
H
.
HLeft
,
dynamicToColor
d
]
$
H
.
Text
[
H
.
Str
$
fromStrict
n
])
ns
--------------------------------------
header
::
H
.
Row
header
=
H
.
Cells
[
H
.
LabelCell
[
pickLabelColor
((
g
^.
phylo_groupMeta
)
!
"dynamics"
)]
header
=
H
.
Cells
[
H
.
LabelCell
[
pickLabelColor
((
g
^.
phylo_groupMeta
)
!
"dynamics"
)]
$
H
.
Text
[
H
.
Str
$
(((
fromStrict
.
Text
.
toUpper
)
$
g
^.
phylo_groupLabel
)
<>
(
fromStrict
" ( "
)
<>
(
pack
$
show
(
fst
$
g
^.
phylo_groupPeriod
))
<>
(
fromStrict
" , "
)
<>
(
pack
$
show
(
snd
$
g
^.
phylo_groupPeriod
))
<>
(
fromStrict
" ) "
)
<>
(
pack
$
show
(
getGroupId
g
)))]]
<>
(
pack
$
show
(
getGroupId
g
)))]]
--------------------------------------
branchToDotNode
::
PhyloBranch
->
Int
->
Dot
DotId
branchToDotNode
b
bId
=
branchToDotNode
b
bId
=
node
(
branchIdToDotId
$
b
^.
branch_id
)
([
FillColor
[
toWColor
CornSilk
],
FontName
"Arial"
,
FontSize
40
,
Shape
Egg
,
Style
[
SItem
Bold
[]
],
Label
(
toDotLabel
$
b
^.
branch_label
)]
<>
(
metaToAttr
$
b
^.
branch_meta
)
...
...
@@ -116,20 +116,20 @@ branchToDotNode b bId =
,
toAttr
"branch_y"
(
fromStrict
$
Text
.
pack
$
(
show
$
b
^.
branch_y
))
,
toAttr
"label"
(
pack
$
show
$
b
^.
branch_label
)
])
periodToDotNode
::
(
Date
,
Date
)
->
(
Text
.
Text
,
Text
.
Text
)
->
Dot
DotId
periodToDotNode
prd
prd'
=
node
(
periodIdToDotId
prd
)
([
Shape
BoxShape
,
FontSize
50
,
Label
(
toDotLabel
$
Text
.
pack
(
show
(
fst
prd
)
<>
" "
<>
show
(
snd
prd
)))]
<>
[
toAttr
"nodeType"
"period"
,
toAttr
"strFrom"
(
fromStrict
$
Text
.
pack
$
(
show
$
fst
prd'
))
,
toAttr
"strTo"
(
fromStrict
$
Text
.
pack
$
(
show
$
snd
prd'
))
,
toAttr
"strTo"
(
fromStrict
$
Text
.
pack
$
(
show
$
snd
prd'
))
,
toAttr
"from"
(
fromStrict
$
Text
.
pack
$
(
show
$
fst
prd
))
,
toAttr
"to"
(
fromStrict
$
Text
.
pack
$
(
show
$
snd
prd
))])
groupToDotNode
::
Vector
Ngrams
->
PhyloGroup
->
Int
->
Dot
DotId
groupToDotNode
fdt
g
bId
=
groupToDotNode
fdt
g
bId
=
node
(
groupIdToDotId
$
getGroupId
g
)
([
FontName
"Arial"
,
Shape
Square
,
penWidth
4
,
toLabel
(
groupToTable
fdt
g
)]
<>
[
toAttr
"nodeType"
"group"
...
...
@@ -137,7 +137,7 @@ groupToDotNode fdt g bId =
,
toAttr
"from"
(
pack
$
show
(
fst
$
g
^.
phylo_groupPeriod
))
,
toAttr
"to"
(
pack
$
show
(
snd
$
g
^.
phylo_groupPeriod
))
,
toAttr
"strFrom"
(
pack
$
show
(
fst
$
g
^.
phylo_groupPeriod'
))
,
toAttr
"strTo"
(
pack
$
show
(
snd
$
g
^.
phylo_groupPeriod'
))
,
toAttr
"strTo"
(
pack
$
show
(
snd
$
g
^.
phylo_groupPeriod'
))
,
toAttr
"branchId"
(
pack
$
unwords
(
init
$
map
show
$
snd
$
g
^.
phylo_groupBranchId
))
,
toAttr
"bId"
(
pack
$
show
bId
)
,
toAttr
"support"
(
pack
$
show
(
g
^.
phylo_groupSupport
))
...
...
@@ -149,7 +149,7 @@ groupToDotNode fdt g bId =
,
toAttr
"role"
(
pack
$
show
(
idxToLabel'
((
g
^.
phylo_groupMeta
)
!
"dynamics"
)))
,
toAttr
"frequence"
(
pack
$
show
(
idxToLabel'
((
g
^.
phylo_groupMeta
)
!
"frequence"
)))
,
toAttr
"seaLvl"
(
pack
$
show
((
g
^.
phylo_groupMeta
)
!
"seaLevels"
))
])
])
toDotEdge'
::
DotId
->
DotId
->
[
Char
]
->
[
Char
]
->
EdgeType
->
Dot
DotId
...
...
@@ -175,7 +175,7 @@ toDotEdge source target lbl edgeType = edge source target
mergePointers
::
[
PhyloGroup
]
->
Map
(
PhyloGroupId
,
PhyloGroupId
)
Double
mergePointers
groups
=
mergePointers
groups
=
let
toChilds
=
fromList
$
concat
$
map
(
\
g
->
map
(
\
(
target
,
w
)
->
((
getGroupId
g
,
target
),
w
))
$
g
^.
phylo_groupPeriodChilds
)
groups
toParents
=
fromList
$
concat
$
map
(
\
g
->
map
(
\
(
target
,
w
)
->
((
target
,
getGroupId
g
),
w
))
$
g
^.
phylo_groupPeriodParents
)
groups
in
unionWith
(
\
w
w'
->
max
w
w'
)
toChilds
toParents
...
...
@@ -188,22 +188,22 @@ mergePointersMemory groups =
mergeAncestors
::
[
PhyloGroup
]
->
[((
PhyloGroupId
,
PhyloGroupId
),
Double
)]
mergeAncestors
groups
=
concat
$
map
(
\
g
->
map
(
\
(
target
,
w
)
->
((
getGroupId
g
,
target
),
w
))
$
g
^.
phylo_groupAncestors
)
$
map
(
\
g
->
map
(
\
(
target
,
w
)
->
((
getGroupId
g
,
target
),
w
))
$
g
^.
phylo_groupAncestors
)
$
filter
(
\
g
->
(
not
.
null
)
$
g
^.
phylo_groupAncestors
)
groups
toBid
::
PhyloGroup
->
[
PhyloBranch
]
->
Int
toBid
g
bs
=
toBid
g
bs
=
let
b'
=
head'
"toBid"
(
filter
(
\
b
->
b
^.
branch_id
==
g
^.
phylo_groupBranchId
)
bs
)
in
fromJust
$
elemIndex
b'
bs
exportToDot
::
Phylo
->
PhyloExport
->
DotGraph
DotId
exportToDot
phylo
export
=
exportToDot
phylo
export
=
trace
(
"
\n
-- | Convert "
<>
show
(
length
$
export
^.
export_branches
)
<>
" branches and "
<>
show
(
length
$
export
^.
export_groups
)
<>
" groups "
<>
show
(
length
$
export
^.
export_groups
)
<>
" groups "
<>
show
(
length
$
nub
$
concat
$
map
(
\
g
->
g
^.
phylo_groupNgrams
)
$
export
^.
export_groups
)
<>
" terms to a dot file
\n\n
"
<>
"##########################"
)
$
digraph
((
Str
.
fromStrict
)
$
(
phyloName
$
getConfig
phylo
))
$
do
digraph
((
Str
.
fromStrict
)
$
(
phyloName
$
getConfig
phylo
))
$
do
{- 1) init the dot graph -}
graphAttrs
(
[
Label
(
toDotLabel
$
(
phyloName
$
getConfig
phylo
))]
...
...
@@ -226,12 +226,12 @@ exportToDot phylo export =
-- toAttr (fromStrict k) $ (pack . unwords) $ map show v
-- 2) create a layer for the branches labels -}
subgraph
(
Str
"Branches peaks"
)
$
do
subgraph
(
Str
"Branches peaks"
)
$
do
-- graphAttrs [Rank SameRank]
{-
-- 3) group the branches by hierarchy
-- mapM (\branches ->
-- mapM (\branches ->
-- subgraph (Str "Branches clade") $ do
-- graphAttrs [Rank SameRank]
...
...
@@ -243,7 +243,7 @@ exportToDot phylo export =
{-- 5) create a layer for each period -}
_
<-
mapM
(
\
period
->
subgraph
((
Str
.
fromStrict
.
Text
.
pack
)
$
(
"Period"
<>
show
(
fst
$
_phylo_periodPeriod
period
)
<>
show
(
snd
$
_phylo_periodPeriod
period
)))
$
do
subgraph
((
Str
.
fromStrict
.
Text
.
pack
)
$
(
"Period"
<>
show
(
fst
$
_phylo_periodPeriod
period
)
<>
show
(
snd
$
_phylo_periodPeriod
period
)))
$
do
graphAttrs
[
Rank
SameRank
]
periodToDotNode
(
period
^.
phylo_periodPeriod
)
(
period
^.
phylo_periodPeriod'
)
...
...
@@ -253,16 +253,16 @@ exportToDot phylo export =
{-- 7) create the edges between a branch and its first groups -}
_
<-
mapM
(
\
(
bId
,
groups
)
->
mapM
(
\
g
->
toDotEdge
(
branchIdToDotId
bId
)
(
groupIdToDotId
$
getGroupId
g
)
""
BranchToGroup
)
groups
mapM
(
\
g
->
toDotEdge
(
branchIdToDotId
bId
)
(
groupIdToDotId
$
getGroupId
g
)
""
BranchToGroup
)
groups
)
$
toList
$
map
(
\
groups
->
head'
"toDot"
$
map
(
\
groups
->
head'
"toDot"
$
groupBy
(
\
g
g'
->
g'
^.
phylo_groupPeriod
==
g
^.
phylo_groupPeriod
)
$
sortOn
(
fst
.
_phylo_groupPeriod
)
groups
)
$
sortOn
(
fst
.
_phylo_groupPeriod
)
groups
)
$
fromListWith
(
++
)
$
map
(
\
g
->
(
g
^.
phylo_groupBranchId
,[
g
]))
$
export
^.
export_groups
{- 8) create the edges between the groups -}
_
<-
mapM
(
\
((
k
,
k'
),
v
)
->
_
<-
mapM
(
\
((
k
,
k'
),
v
)
->
toDotEdge
(
groupIdToDotId
k
)
(
groupIdToDotId
k'
)
(
show
v
)
GroupToGroup
)
$
(
toList
.
mergePointers
)
$
export
^.
export_groups
...
...
@@ -275,15 +275,15 @@ exportToDot phylo export =
toDotEdge
(
groupIdToDotId
k
)
(
groupIdToDotId
k'
)
(
show
v
)
GroupToAncestor
)
$
mergeAncestors
$
export
^.
export_groups
-- 10) create the edges between the periods
-- 10) create the edges between the periods
_
<-
mapM
(
\
(
prd
,
prd'
)
->
toDotEdge
(
periodIdToDotId
prd
)
(
periodIdToDotId
prd'
)
""
PeriodToPeriod
)
$
nubBy
(
\
combi
combi'
->
fst
combi
==
fst
combi'
)
$
listToCombi'
$
getPeriodIds
phylo
{- 8) create the edges between the branches
{- 8) create the edges between the branches
-- _ <- mapM (\(bId,bId') ->
-- toDotEdge (branchIdToDotId bId) (branchIdToDotId bId')
-- (Text.pack $ show(branchIdsToProximity bId bId'
-- toDotEdge (branchIdToDotId bId) (branchIdToDotId bId')
-- (Text.pack $ show(branchIdsToProximity bId bId'
-- (getThresholdInit $ phyloProximity $ getConfig phylo)
-- (getThresholdStep $ phyloProximity $ getConfig phylo))) BranchToBranch
-- ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ map _branch_id $ export ^. export_branches
...
...
@@ -298,18 +298,18 @@ exportToDot phylo export =
----------------
filterByBranchSize
::
Double
->
PhyloExport
->
PhyloExport
filterByBranchSize
thr
export
=
filterByBranchSize
thr
export
=
let
splited
=
partition
(
\
b
->
head'
"filter"
((
b
^.
branch_meta
)
!
"size"
)
>=
thr
)
$
export
^.
export_branches
in
export
&
export_branches
.~
(
fst
splited
)
&
export_groups
%~
(
filter
(
\
g
->
not
$
elem
(
g
^.
phylo_groupBranchId
)
(
map
_branch_id
$
snd
splited
)))
processFilters
::
[
Filter
]
->
Quality
->
PhyloExport
->
PhyloExport
processFilters
filters
qua
export
=
foldl
(
\
export'
f
->
case
f
of
processFilters
filters
qua
export
=
foldl
(
\
export'
f
->
case
f
of
ByBranchSize
thr
->
if
(
thr
<
(
fromIntegral
$
qua
^.
qua_minBranch
))
then
filterByBranchSize
(
fromIntegral
$
qua
^.
qua_minBranch
)
export'
else
filterByBranchSize
thr
export'
else
filterByBranchSize
thr
export'
)
export
filters
--------------
...
...
@@ -321,11 +321,11 @@ branchToIso branches =
let
steps
=
map
sum
$
inits
$
map
(
\
(
b
,
x
)
->
b
^.
branch_y
+
0.05
-
x
)
$
zip
branches
$
([
0
]
++
(
map
(
\
(
b
,
b'
)
->
$
zip
branches
$
([
0
]
++
(
map
(
\
(
b
,
b'
)
->
let
idx
=
length
$
commonPrefix
(
b
^.
branch_canonId
)
(
b'
^.
branch_canonId
)
[]
lmin
=
min
(
length
$
b
^.
branch_seaLevel
)
(
length
$
b'
^.
branch_seaLevel
)
in
lmin
=
min
(
length
$
b
^.
branch_seaLevel
)
(
length
$
b'
^.
branch_seaLevel
)
in
if
((
idx
-
1
)
>
((
length
$
b'
^.
branch_seaLevel
)
-
1
))
then
(
b'
^.
branch_seaLevel
)
!!
(
lmin
-
1
)
else
(
b'
^.
branch_seaLevel
)
!!
(
idx
-
1
)
...
...
@@ -334,10 +334,10 @@ branchToIso branches =
$
zip
steps
branches
branchToIso'
::
Double
->
Double
->
[
PhyloBranch
]
->
[
PhyloBranch
]
branchToIso'
start
step
branches
=
branchToIso'
start
step
branches
=
let
bx
=
map
(
\
l
->
(
sum
l
)
+
((
fromIntegral
$
length
l
)
*
0.5
))
$
inits
$
([
0
]
++
(
map
(
\
(
b
,
b'
)
->
$
([
0
]
++
(
map
(
\
(
b
,
b'
)
->
let
root
=
fromIntegral
$
length
$
commonPrefix
(
snd
$
b
^.
branch_id
)
(
snd
$
b'
^.
branch_id
)
[]
in
1
-
start
-
step
*
root
)
$
listToSeq
branches
))
in
map
(
\
(
x
,
b
)
->
b
&
branch_x
.~
x
)
...
...
@@ -348,17 +348,17 @@ sortByHierarchy :: Int -> [PhyloBranch] -> [PhyloBranch]
sortByHierarchy
depth
branches
=
if
(
length
branches
==
1
)
then
branches
else
concat
else
concat
$
map
(
\
branches'
->
let
partitions
=
partition
(
\
b
->
depth
+
1
==
((
length
.
snd
)
$
b
^.
branch_id
))
branches'
in
(
sortOn
(
\
b
->
(
b
^.
branch_meta
)
!
"birth"
)
(
fst
partitions
))
++
(
sortByHierarchy
(
depth
+
1
)
(
snd
partitions
)))
++
(
sortByHierarchy
(
depth
+
1
)
(
snd
partitions
)))
$
groupBy
(
\
b
b'
->
((
take
depth
.
snd
)
$
b
^.
branch_id
)
==
((
take
depth
.
snd
)
$
b'
^.
branch_id
)
)
$
sortOn
(
\
b
->
(
take
depth
.
snd
)
$
b
^.
branch_id
)
branches
sortByBirthDate
::
Order
->
PhyloExport
->
PhyloExport
sortByBirthDate
order
export
=
sortByBirthDate
order
export
=
let
branches
=
sortOn
(
\
b
->
(
b
^.
branch_meta
)
!
"birth"
)
$
export
^.
export_branches
branches'
=
case
order
of
Asc
->
branches
...
...
@@ -367,8 +367,8 @@ sortByBirthDate order export =
processSort
::
Sort
->
SeaElevation
->
PhyloExport
->
PhyloExport
processSort
sort'
elev
export
=
case
sort'
of
ByBirthDate
o
->
sortByBirthDate
o
export
ByHierarchy
->
export
&
export_branches
.~
(
branchToIso'
(
_cons_start
elev
)
(
_cons_step
elev
)
ByBirthDate
o
->
sortByBirthDate
o
export
ByHierarchy
->
export
&
export_branches
.~
(
branchToIso'
(
_cons_start
elev
)
(
_cons_step
elev
)
$
sortByHierarchy
0
(
export
^.
export_branches
))
...
...
@@ -376,26 +376,26 @@ processSort sort' elev export = case sort' of
-- | Metrics | --
-----------------
-- | Return the conditional probability of i knowing j
-- | Return the conditional probability of i knowing j
conditional
::
Ord
a
=>
Map
(
a
,
a
)
Double
->
a
->
a
->
Double
conditional
m
i
j
=
(
findWithDefault
0
(
i
,
j
)
m
)
conditional
m
i
j
=
(
findWithDefault
0
(
i
,
j
)
m
)
/
(
m
!
(
j
,
j
))
-- | Return the genericity score of a given ngram
genericity
::
Map
(
Int
,
Int
)
Double
->
[
Int
]
->
Int
->
Double
genericity
m
l
i
=
(
(
sum
$
map
(
\
j
->
conditional
m
i
j
)
l
)
genericity
::
Map
(
Int
,
Int
)
Double
->
[
Int
]
->
Int
->
Double
genericity
m
l
i
=
(
(
sum
$
map
(
\
j
->
conditional
m
i
j
)
l
)
-
(
sum
$
map
(
\
j
->
conditional
m
j
i
)
l
))
/
(
fromIntegral
$
(
length
l
)
+
1
)
-- | Return the specificity score of a given ngram
specificity
::
Map
(
Int
,
Int
)
Double
->
[
Int
]
->
Int
->
Double
specificity
::
Map
(
Int
,
Int
)
Double
->
[
Int
]
->
Int
->
Double
specificity
m
l
i
=
(
(
sum
$
map
(
\
j
->
conditional
m
j
i
)
l
)
-
(
sum
$
map
(
\
j
->
conditional
m
i
j
)
l
))
/
(
fromIntegral
$
(
length
l
)
+
1
)
-
(
sum
$
map
(
\
j
->
conditional
m
i
j
)
l
))
/
(
fromIntegral
$
(
length
l
)
+
1
)
-- | Return the inclusion score of a given ngram
inclusion
::
Map
(
Int
,
Int
)
Double
->
[
Int
]
->
Int
->
Double
inclusion
::
Map
(
Int
,
Int
)
Double
->
[
Int
]
->
Int
->
Double
inclusion
m
l
i
=
(
(
sum
$
map
(
\
j
->
conditional
m
j
i
)
l
)
+
(
sum
$
map
(
\
j
->
conditional
m
i
j
)
l
))
/
(
fromIntegral
$
(
length
l
)
+
1
)
...
...
@@ -404,14 +404,14 @@ ngramsMetrics :: Phylo -> PhyloExport -> PhyloExport
ngramsMetrics
phylo
export
=
over
(
export_groups
.
traverse
)
(
\
g
->
g
&
phylo_groupMeta
%~
insert
"genericity"
(
\
g
->
g
&
phylo_groupMeta
%~
insert
"genericity"
(
map
(
\
n
->
genericity
(
g
^.
phylo_groupCooc
)
((
g
^.
phylo_groupNgrams
)
\\
[
n
])
n
)
$
g
^.
phylo_groupNgrams
)
&
phylo_groupMeta
%~
insert
"specificity"
&
phylo_groupMeta
%~
insert
"specificity"
(
map
(
\
n
->
specificity
(
g
^.
phylo_groupCooc
)
((
g
^.
phylo_groupNgrams
)
\\
[
n
])
n
)
$
g
^.
phylo_groupNgrams
)
&
phylo_groupMeta
%~
insert
"inclusion"
&
phylo_groupMeta
%~
insert
"inclusion"
(
map
(
\
n
->
inclusion
(
g
^.
phylo_groupCooc
)
((
g
^.
phylo_groupNgrams
)
\\
[
n
])
n
)
$
g
^.
phylo_groupNgrams
)
&
phylo_groupMeta
%~
insert
"frequence"
(
map
(
\
n
->
getInMap
n
(
phylo
^.
phylo_lastTermFreq
))
$
g
^.
phylo_groupNgrams
)
&
phylo_groupMeta
%~
insert
"frequence"
(
map
(
\
n
->
getInMap
n
(
phylo
^.
phylo_lastTermFreq
))
$
g
^.
phylo_groupNgrams
)
)
export
...
...
@@ -419,32 +419,32 @@ branchDating :: PhyloExport -> PhyloExport
branchDating
export
=
over
(
export_branches
.
traverse
)
(
\
b
->
(
\
b
->
let
groups
=
sortOn
fst
$
foldl'
(
\
acc
g
->
if
(
g
^.
phylo_groupBranchId
==
b
^.
branch_id
)
then
acc
++
[
g
^.
phylo_groupPeriod
]
else
acc
)
[]
$
export
^.
export_groups
periods
=
nub
groups
birth
=
fst
$
head'
"birth"
groups
age
=
(
snd
$
last'
"age"
groups
)
-
birth
in
b
&
branch_meta
%~
insert
"birth"
[
fromIntegral
birth
]
age
=
(
snd
$
last'
"age"
groups
)
-
birth
in
b
&
branch_meta
%~
insert
"birth"
[
fromIntegral
birth
]
&
branch_meta
%~
insert
"age"
[
fromIntegral
age
]
&
branch_meta
%~
insert
"size"
[
fromIntegral
$
length
periods
]
)
export
processMetrics
::
Phylo
->
PhyloExport
->
PhyloExport
processMetrics
phylo
export
=
ngramsMetrics
phylo
$
branchDating
export
$
branchDating
export
-----------------
-- | Taggers | --
-----------------
-----------------
nk
::
Int
->
[[
Int
]]
->
Int
nk
n
groups
=
sum
$
map
(
\
g
->
if
(
elem
n
g
)
then
1
else
0
)
groups
else
0
)
groups
tf
::
Int
->
[[
Int
]]
->
Double
...
...
@@ -463,7 +463,7 @@ findEmergences :: [PhyloGroup] -> Map Int Double -> [(Int,Double)]
findEmergences
groups
freq
=
let
ngrams
=
map
_phylo_groupNgrams
groups
dynamics
=
map
(
\
g
->
(
g
^.
phylo_groupMeta
)
!
"dynamics"
)
groups
emerging
=
nubBy
(
\
n1
n2
->
fst
n1
==
fst
n2
)
emerging
=
nubBy
(
\
n1
n2
->
fst
n1
==
fst
n2
)
$
concat
$
map
(
\
g
->
filter
(
\
(
_
,
d
)
->
d
==
0
)
$
zip
(
fst
g
)
(
snd
g
))
$
zip
ngrams
dynamics
in
reverse
$
sortOn
snd
$
map
(
\
(
n
,
_
)
->
if
(
member
n
freq
)
...
...
@@ -471,18 +471,18 @@ findEmergences groups freq =
else
(
n
,
0
))
emerging
mostEmergentTfIdf
::
Int
->
Map
Int
Double
->
Vector
Ngrams
->
PhyloExport
->
PhyloExport
mostEmergentTfIdf
nth
freq
foundations
export
=
mostEmergentTfIdf
::
Int
->
Map
Int
Double
->
Vector
Ngrams
->
PhyloExport
->
PhyloExport
mostEmergentTfIdf
nth
freq
foundations
export
=
over
(
export_branches
.
traverse
)
(
\
b
->
(
\
b
->
let
groups
=
filter
(
\
g
->
g
^.
phylo_groupBranchId
==
b
^.
branch_id
)
$
export
^.
export_groups
tfidf
=
findTfIdf
(
map
_phylo_groupNgrams
groups
)
emergences
=
findEmergences
groups
freq
selected
=
if
(
null
emergences
)
then
map
fst
$
take
nth
tfidf
else
[
fst
$
head'
"mostEmergentTfIdf"
emergences
]
++
(
map
fst
$
take
(
nth
-
1
)
$
filter
(
\
(
n
,
_
)
->
n
/=
(
fst
$
head'
"mostEmergentTfIdf"
emergences
))
tfidf
)
else
[
fst
$
head'
"mostEmergentTfIdf"
emergences
]
++
(
map
fst
$
take
(
nth
-
1
)
$
filter
(
\
(
n
,
_
)
->
n
/=
(
fst
$
head'
"mostEmergentTfIdf"
emergences
))
tfidf
)
in
b
&
branch_label
.~
(
ngramsToLabel
foundations
selected
))
export
...
...
@@ -490,14 +490,14 @@ getNthMostMeta :: Int -> [Double] -> [Int] -> [Int]
getNthMostMeta
nth
meta
ns
=
map
(
\
(
idx
,
_
)
->
(
ns
!!
idx
))
$
take
nth
$
reverse
$
sortOn
snd
$
zip
[
0
..
]
meta
$
sortOn
snd
$
zip
[
0
..
]
meta
mostInclusive
::
Int
->
Vector
Ngrams
->
PhyloExport
->
PhyloExport
mostInclusive
nth
foundations
export
=
over
(
export_branches
.
traverse
)
(
\
b
->
(
\
b
->
let
groups
=
filter
(
\
g
->
g
^.
phylo_groupBranchId
==
b
^.
branch_id
)
$
export
^.
export_groups
cooc
=
foldl
(
\
acc
g
->
unionWith
(
+
)
acc
(
g
^.
phylo_groupCooc
))
empty
groups
ngrams
=
sort
$
foldl
(
\
acc
g
->
union
acc
(
g
^.
phylo_groupNgrams
))
[]
groups
...
...
@@ -509,14 +509,14 @@ mostInclusive nth foundations export =
mostEmergentInclusive
::
Int
->
Vector
Ngrams
->
PhyloExport
->
PhyloExport
mostEmergentInclusive
nth
foundations
export
=
over
(
export_groups
.
traverse
)
(
\
g
->
.
traverse
)
(
\
g
->
let
lbl
=
ngramsToLabel
foundations
$
take
nth
$
take
nth
$
map
(
\
(
_
,(
_
,
idx
))
->
idx
)
$
concat
$
map
(
\
groups
->
sortOn
(
fst
.
snd
)
groups
)
$
groupBy
((
==
)
`
on
`
fst
)
$
reverse
$
sortOn
fst
$
groupBy
((
==
)
`
on
`
fst
)
$
reverse
$
sortOn
fst
$
zip
((
g
^.
phylo_groupMeta
)
!
"inclusion"
)
$
zip
((
g
^.
phylo_groupMeta
)
!
"dynamics"
)
(
g
^.
phylo_groupNgrams
)
in
g
&
phylo_groupLabel
.~
lbl
)
export
...
...
@@ -524,26 +524,26 @@ mostEmergentInclusive nth foundations export =
processLabels
::
[
PhyloLabel
]
->
Vector
Ngrams
->
Map
Int
Double
->
PhyloExport
->
PhyloExport
processLabels
labels
foundations
freq
export
=
foldl
(
\
export'
label
->
foldl
(
\
export'
label
->
case
label
of
GroupLabel
tagger
nth
->
GroupLabel
tagger
nth
->
case
tagger
of
MostEmergentInclusive
->
mostEmergentInclusive
nth
foundations
export'
MostEmergentInclusive
->
mostEmergentInclusive
nth
foundations
export'
_
->
panic
"[ERR][Viz.Phylo.PhyloExport] unknown tagger"
BranchLabel
tagger
nth
->
case
tagger
of
MostInclusive
->
mostInclusive
nth
foundations
export'
MostEmergentTfIdf
->
mostEmergentTfIdf
nth
freq
foundations
export'
_
->
panic
"[ERR][Viz.Phylo.PhyloExport] unknown tagger"
)
export
labels
_
->
panic
"[ERR][Viz.Phylo.PhyloExport] unknown tagger"
)
export
labels
------------------
-- | Dynamics | --
------------------
------------------
toDynamics
::
Int
->
[
PhyloGroup
]
->
PhyloGroup
->
Map
Int
(
Date
,
Date
)
->
Double
toDynamics
n
parents
g
m
=
toDynamics
n
parents
g
m
=
let
prd
=
g
^.
phylo_groupPeriod
end
=
last'
"dynamics"
(
sort
$
map
snd
$
elems
m
)
in
if
(((
snd
prd
)
==
(
snd
$
m
!
n
))
&&
(
snd
prd
/=
end
))
...
...
@@ -557,7 +557,7 @@ toDynamics n parents g m =
then
1
else
3
where
--------------------------------------
--------------------------------------
isNew
::
Bool
isNew
=
not
$
elem
n
$
concat
$
map
_phylo_groupNgrams
parents
...
...
@@ -571,11 +571,11 @@ processDynamics groups =
where
--------------------------------------
mapNgrams
::
Map
Int
(
Date
,
Date
)
mapNgrams
=
map
(
\
dates
->
mapNgrams
=
map
(
\
dates
->
let
dates'
=
sort
dates
in
(
head'
"dynamics"
dates'
,
last'
"dynamics"
dates'
))
$
fromListWith
(
++
)
$
foldl
(
\
acc
g
->
acc
++
(
map
(
\
n
->
(
n
,[
fst
$
g
^.
phylo_groupPeriod
,
snd
$
g
^.
phylo_groupPeriod
]))
$
foldl
(
\
acc
g
->
acc
++
(
map
(
\
n
->
(
n
,[
fst
$
g
^.
phylo_groupPeriod
,
snd
$
g
^.
phylo_groupPeriod
]))
$
(
g
^.
phylo_groupNgrams
)))
[]
groups
...
...
@@ -584,17 +584,17 @@ processDynamics groups =
-----------------
getGroupThr
::
Double
->
PhyloGroup
->
Double
getGroupThr
step
g
=
getGroupThr
step
g
=
let
seaLvl
=
(
g
^.
phylo_groupMeta
)
!
"seaLevels"
breaks
=
(
g
^.
phylo_groupMeta
)
!
"breaks"
in
(
last'
"export"
(
take
(
round
$
(
last'
"export"
breaks
)
+
1
)
seaLvl
))
-
step
toAncestor
::
Double
->
Map
Int
Double
->
Proximity
->
Double
->
[
PhyloGroup
]
->
PhyloGroup
->
PhyloGroup
toAncestor
nbDocs
diago
proximity
step
candidates
ego
=
let
curr
=
ego
^.
phylo_groupAncestors
let
curr
=
ego
^.
phylo_groupAncestors
in
ego
&
phylo_groupAncestors
.~
(
curr
++
(
map
(
\
(
g
,
w
)
->
(
getGroupId
g
,
w
))
$
filter
(
\
(
g
,
w
)
->
(
w
>
0
)
&&
(
w
>=
(
min
(
getGroupThr
step
ego
)
(
getGroupThr
step
g
))))
$
map
(
\
g
->
(
g
,
toProximity
nbDocs
diago
proximity
(
ego
^.
phylo_groupNgrams
)
(
g
^.
phylo_groupNgrams
)
(
g
^.
phylo_groupNgrams
)))
$
map
(
\
g
->
(
g
,
toProximity
nbDocs
diago
proximity
(
ego
^.
phylo_groupNgrams
)
(
g
^.
phylo_groupNgrams
)
(
g
^.
phylo_groupNgrams
)))
$
filter
(
\
g
->
g
^.
phylo_groupBranchId
/=
ego
^.
phylo_groupBranchId
)
candidates
))
...
...
@@ -602,24 +602,24 @@ headsToAncestors :: Double -> Map Int Double -> Proximity -> Double -> [PhyloGro
headsToAncestors
nbDocs
diago
proximity
step
heads
acc
=
if
(
null
heads
)
then
acc
else
else
let
ego
=
head'
"headsToAncestors"
heads
heads'
=
tail'
"headsToAncestors"
heads
in
headsToAncestors
nbDocs
diago
proximity
step
heads'
(
acc
++
[
toAncestor
nbDocs
diago
proximity
step
heads'
ego
])
toHorizon
::
Phylo
->
Phylo
toHorizon
phylo
=
let
phyloAncestor
=
updatePhyloGroups
level
(
fromList
$
map
(
\
g
->
(
getGroupId
g
,
g
))
$
concat
toHorizon
phylo
=
let
phyloAncestor
=
updatePhyloGroups
level
(
fromList
$
map
(
\
g
->
(
getGroupId
g
,
g
))
$
concat
$
tracePhyloAncestors
newGroups
)
phylo
reBranched
=
fromList
$
map
(
\
g
->
(
getGroupId
g
,
g
))
$
concat
$
groupsToBranches
$
fromList
$
map
(
\
g
->
(
getGroupId
g
,
g
))
$
getGroupsFromLevel
level
phyloAncestor
in
updatePhyloGroups
level
reBranched
phylo
where
-- | 1) for each periods
-- | 1) for each periods
periods
::
[
PhyloPeriodId
]
periods
=
getPeriodIds
phylo
-- --
...
...
@@ -630,64 +630,64 @@ toHorizon phylo =
frame
=
getTimeFrame
$
timeUnit
$
getConfig
phylo
-- | 2) find ancestors between groups without parents
mapGroups
::
[[
PhyloGroup
]]
mapGroups
=
map
(
\
prd
->
mapGroups
=
map
(
\
prd
->
let
groups
=
getGroupsFromLevelPeriods
level
[
prd
]
phylo
childs
=
getPreviousChildIds
level
frame
prd
periods
phylo
childs
=
getPreviousChildIds
level
frame
prd
periods
phylo
-- maybe add a better filter for non isolated ancestors
heads
=
filter
(
\
g
->
(
not
.
null
)
$
(
g
^.
phylo_groupPeriodChilds
))
$
filter
(
\
g
->
null
(
g
^.
phylo_groupPeriodParents
)
&&
(
notElem
(
getGroupId
g
)
childs
))
groups
noHeads
=
groups
\\
heads
noHeads
=
groups
\\
heads
nbDocs
=
sum
$
elems
$
filterDocs
(
phylo
^.
phylo_timeDocs
)
[
prd
]
diago
=
reduceDiagos
$
filterDiago
(
phylo
^.
phylo_timeCooc
)
[
prd
]
proximity
=
(
phyloProximity
$
getConfig
phylo
)
step
=
case
getSeaElevation
phylo
of
Constante
_
s
->
s
Constante
_
s
->
s
Adaptative
_
->
undefined
-- in headsToAncestors nbDocs diago proximity heads groups []
in
map
(
\
ego
->
toAncestor
nbDocs
diago
proximity
step
noHeads
ego
)
in
map
(
\
ego
->
toAncestor
nbDocs
diago
proximity
step
noHeads
ego
)
$
headsToAncestors
nbDocs
diago
proximity
step
heads
[]
)
periods
-- | 3) process this task concurrently
newGroups
::
[[
PhyloGroup
]]
newGroups
=
mapGroups
`
using
`
parList
rdeepseq
newGroups
=
mapGroups
`
using
`
parList
rdeepseq
--------------------------------------
getPreviousChildIds
::
Level
->
Int
->
PhyloPeriodId
->
[
PhyloPeriodId
]
->
Phylo
->
[
PhyloGroupId
]
getPreviousChildIds
lvl
frame
curr
prds
phylo
=
getPreviousChildIds
lvl
frame
curr
prds
phylo
=
concat
$
map
((
map
fst
)
.
_phylo_groupPeriodChilds
)
$
getGroupsFromLevelPeriods
lvl
(
getNextPeriods
ToParents
frame
curr
prds
)
phylo
---------------------
-- | phyloExport | --
---------------------
---------------------
toPhyloExport
::
Phylo
->
DotGraph
DotId
toPhyloExport
phylo
=
exportToDot
phylo
$
processFilters
(
exportFilter
$
getConfig
phylo
)
(
phyloQuality
$
getConfig
phylo
)
$
processSort
(
exportSort
$
getConfig
phylo
)
(
getSeaElevation
phylo
)
$
processLabels
(
exportLabel
$
getConfig
phylo
)
(
getRoots
phylo
)
(
_phylo_lastTermFreq
phylo
)
$
processMetrics
phylo
export
$
processMetrics
phylo
export
where
export
::
PhyloExport
export
=
PhyloExport
groups
branches
export
=
PhyloExport
groups
branches
--------------------------------------
branches
::
[
PhyloBranch
]
branches
=
map
(
\
g
->
branches
=
map
(
\
g
->
let
seaLvl
=
(
g
^.
phylo_groupMeta
)
!
"seaLevels"
breaks
=
(
g
^.
phylo_groupMeta
)
!
"breaks"
canonId
=
take
(
round
$
(
last'
"export"
breaks
)
+
2
)
(
snd
$
g
^.
phylo_groupBranchId
)
in
PhyloBranch
(
g
^.
phylo_groupBranchId
)
in
PhyloBranch
(
g
^.
phylo_groupBranchId
)
canonId
seaLvl
0
0
(
last'
"export"
(
take
(
round
$
(
last'
"export"
breaks
)
+
1
)
seaLvl
))
0
0
""
empty
)
""
empty
)
$
map
(
\
gs
->
head'
"export"
gs
)
$
groupBy
(
\
g
g'
->
g
^.
phylo_groupBranchId
==
g'
^.
phylo_groupBranchId
)
$
sortOn
(
\
g
->
g
^.
phylo_groupBranchId
)
groups
--------------------------------------
--------------------------------------
groups
::
[
PhyloGroup
]
groups
=
traceExportGroups
$
processDynamics
...
...
src/Gargantext/Core/Viz/Phylo/PhyloMaker.hs
View file @
e1b6117a
...
...
@@ -67,11 +67,9 @@ toPhylo phyloStep = trace ("# phylo1 groups " <> show(length $ getGroupsFromLeve
--------------------------------------
phylo1
::
Phylo
phylo1
=
toPhylo1
phyloStep
-- > AD to db here
--------------------------------------
--------------------
-- | To Phylo 1 | --
--------------------
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment