Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
H
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Przemyslaw Kaminski
haskell-gargantext
Commits
c60dc0b7
Commit
c60dc0b7
authored
Sep 13, 2019
by
qlobbe
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
end of phylo clustering
parent
bea591f6
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
132 additions
and
84 deletions
+132
-84
Main.hs
bin/gargantext-adaptative-phylo/Main.hs
+16
-18
AdaptativePhylo.hs
src/Gargantext/Viz/AdaptativePhylo.hs
+1
-1
PhyloExample.hs
src/Gargantext/Viz/Phylo/PhyloExample.hs
+0
-1
PhyloExport.hs
src/Gargantext/Viz/Phylo/PhyloExport.hs
+4
-7
PhyloMaker.hs
src/Gargantext/Viz/Phylo/PhyloMaker.hs
+6
-5
PhyloTools.hs
src/Gargantext/Viz/Phylo/PhyloTools.hs
+44
-0
SynchronicClustering.hs
src/Gargantext/Viz/Phylo/SynchronicClustering.hs
+55
-46
TemporalMatching.hs
src/Gargantext/Viz/Phylo/TemporalMatching.hs
+6
-6
No files found.
bin/gargantext-adaptative-phylo/Main.hs
View file @
c60dc0b7
...
...
@@ -26,7 +26,7 @@ import Data.ByteString.Lazy (ByteString)
import
Data.Maybe
(
isJust
,
fromJust
)
import
Data.List
(
concat
,
nub
,
isSuffixOf
,
take
)
import
Data.String
(
String
)
import
Data.Text
(
Text
,
unwords
)
import
Data.Text
(
Text
,
unwords
,
unpack
)
import
Gargantext.Prelude
import
Gargantext.Database.Types.Node
(
HyperdataDocument
(
..
))
...
...
@@ -36,7 +36,9 @@ import Gargantext.Text.Corpus.Parsers (FileFormat(..),parseFile)
import
Gargantext.Text.List.CSV
(
csvGraphTermList
)
import
Gargantext.Text.Terms.WithList
(
Patterns
,
buildPatterns
,
extractTermsWithList
)
import
Gargantext.Viz.AdaptativePhylo
import
Gargantext.Viz.Phylo.PhyloMaker
(
toPhylo
)
import
Gargantext.Viz.Phylo.PhyloMaker
(
toPhylo
)
import
Gargantext.Viz.Phylo.PhyloTools
(
printIOMsg
,
printIOComment
)
import
Gargantext.Viz.Phylo.PhyloExport
(
toPhyloExport
,
dotToFile
)
import
GHC.IO
(
FilePath
)
import
Prelude
(
Either
(
..
))
...
...
@@ -54,21 +56,6 @@ import qualified Gargantext.Text.Corpus.Parsers.CSV as Csv
---------------
-- | To print an important message as an IO()
printIOMsg
::
String
->
IO
()
printIOMsg
msg
=
putStrLn
(
"
\n
"
<>
"------------"
<>
"
\n
"
<>
"-- | "
<>
msg
<>
"
\n
"
)
-- | To print a comment as an IO()
printIOComment
::
String
->
IO
()
printIOComment
cmt
=
putStrLn
(
"
\n
"
<>
cmt
<>
"
\n
"
)
-- | To get all the files in a directory or just a file
getFilesFromPath
::
FilePath
->
IO
([
FilePath
])
getFilesFromPath
path
=
do
...
...
@@ -166,6 +153,17 @@ main = do
printIOComment
(
show
(
length
corpus
)
<>
" parsed docs from the corpus"
)
printIOMsg
"Reconstruct the Phylo"
let
phylo
=
toPhylo
corpus
mapList
config
printIOMsg
"End of reconstruction"
\ No newline at end of file
printIOMsg
"End of reconstruction, start the export"
let
dot
=
toPhyloExport
phylo
printIOMsg
"End of export to dot"
let
output
=
(
outputPath
config
)
<>
(
unpack
$
phyloName
config
)
<>
"_V2.dot"
dotToFile
output
dot
\ No newline at end of file
src/Gargantext/Viz/AdaptativePhylo.hs
View file @
c60dc0b7
...
...
@@ -115,7 +115,7 @@ defaultConfig =
,
phyloName
=
pack
"Default Phylo"
,
phyloLevel
=
1
,
phyloProximity
=
WeightedLogJaccard
10
0
0.1
,
phyloSynchrony
=
ByProximityThreshold
0.
4
,
phyloSynchrony
=
ByProximityThreshold
0.
1
,
timeUnit
=
Year
3
1
5
,
contextualUnit
=
Fis
2
4
,
exportLabel
=
[
BranchLabel
MostInclusive
2
,
GroupLabel
MostEmergentInclusive
2
]
...
...
src/Gargantext/Viz/Phylo/PhyloExample.hs
View file @
c60dc0b7
...
...
@@ -45,7 +45,6 @@ phyloExport = dotToFile "/home/qlobbe/data/phylo/output/cesar_cleopatre_V2.dot"
phyloDot
::
DotGraph
DotId
phyloDot
=
toPhyloExport
phylo2
phylo2
::
Phylo
phylo2
=
synchronicClustering
phylo1
...
...
src/Gargantext/Viz/Phylo/PhyloExport.hs
View file @
c60dc0b7
...
...
@@ -447,15 +447,12 @@ toPhyloExport phylo = exportToDot phylo
export
=
PhyloExport
groups
branches
--------------------------------------
branches
::
[
PhyloBranch
]
branches
=
traceBranches
$
map
(
\
bId
->
PhyloBranch
bId
""
empty
)
$
nub
$
map
_phylo_groupBranchId
groups
branches
=
trace
Export
Branches
$
map
(
\
bId
->
PhyloBranch
bId
""
empty
)
$
nub
$
map
_phylo_groupBranchId
groups
--------------------------------------
groups
::
[
PhyloGroup
]
groups
=
traceGroups
$
processDynamics
groups
=
processDynamics
$
getGroupsFromLevel
(
phyloLevel
$
getConfig
phylo
)
phylo
traceBranches
::
[
PhyloBranch
]
->
[
PhyloBranch
]
traceBranches
branches
=
trace
(
">>>> nb branches : "
<>
show
(
length
branches
))
branches
traceGroups
::
[
PhyloGroup
]
->
[
PhyloGroup
]
traceGroups
groups
=
trace
(
">>>> nb groups : "
<>
show
(
length
groups
))
groups
\ No newline at end of file
traceExportBranches
::
[
PhyloBranch
]
->
[
PhyloBranch
]
traceExportBranches
branches
=
trace
(
"
\n
"
<>
"-- | Export "
<>
show
(
length
branches
)
<>
" branches"
)
branches
src/Gargantext/Viz/Phylo/PhyloMaker.hs
View file @
c60dc0b7
...
...
@@ -43,13 +43,14 @@ import qualified Data.Set as Set
toPhylo
::
[
Document
]
->
TermList
->
Config
->
Phylo
toPhylo
docs
lst
conf
=
phylo1
toPhylo
docs
lst
conf
=
traceToPhylo
(
phyloLevel
conf
)
$
if
(
phyloLevel
conf
)
>
1
then
foldl'
(
\
phylo'
_
->
synchronicClustering
phylo'
)
phylo1
[
2
..
(
phyloLevel
conf
)]
else
phylo1
where
--------------------------------------
phylo1
::
Phylo
phylo1
=
synchronicClustering
$
temporalMatching
$
toPhylo1
docs
phyloBase
phylo1
=
toPhylo1
docs
phyloBase
--------------------------------------
phyloBase
::
Phylo
phyloBase
=
toPhyloBase
docs
lst
conf
...
...
@@ -230,4 +231,4 @@ toPhyloBase docs lst conf =
(
docsToTimeScaleCooc
docs
(
foundations
^.
foundations_roots
))
(
docsToTimeScaleNb
docs
)
params
(
fromList
$
map
(
\
prd
->
(
prd
,
PhyloPeriod
prd
(
initPhyloLevels
(
phyloLevel
conf
)
prd
)))
periods
)
(
fromList
$
map
(
\
prd
->
(
prd
,
PhyloPeriod
prd
(
initPhyloLevels
1
prd
)))
periods
)
src/Gargantext/Viz/Phylo/PhyloTools.hs
View file @
c60dc0b7
...
...
@@ -33,6 +33,25 @@ import qualified Data.Vector as Vector
import
qualified
Data.List
as
List
import
qualified
Data.Set
as
Set
------------
-- | Io | --
------------
-- | To print an important message as an IO()
printIOMsg
::
String
->
IO
()
printIOMsg
msg
=
putStrLn
(
"
\n
"
<>
"------------"
<>
"
\n
"
<>
"-- | "
<>
msg
<>
"
\n
"
)
-- | To print a comment as an IO()
printIOComment
::
String
->
IO
()
printIOComment
cmt
=
putStrLn
(
"
\n
"
<>
cmt
<>
"
\n
"
)
--------------
-- | Misc | --
--------------
...
...
@@ -232,6 +251,9 @@ getPeriodIds phylo = sortOn fst
$
keys
$
phylo
^.
phylo_periods
getLevelParentId
::
PhyloGroup
->
PhyloGroupId
getLevelParentId
g
=
fst
$
head'
"getLevelParentId"
$
g
^.
phylo_groupLevelParents
getLastLevel
::
Phylo
->
Level
getLastLevel
phylo
=
last'
"lastLevel"
$
getLevels
phylo
...
...
@@ -282,6 +304,13 @@ updatePhyloGroups lvl m phylo =
then
m
!
id
else
group
)
phylo
traceToPhylo
::
Level
->
Phylo
->
Phylo
traceToPhylo
lvl
phylo
=
trace
(
"
\n
"
<>
"-- | End of phylo making at level "
<>
show
(
lvl
)
<>
" with "
<>
show
(
length
$
getGroupsFromLevel
lvl
phylo
)
<>
" groups and "
<>
show
(
length
$
nub
$
map
_phylo_groupBranchId
$
getGroupsFromLevel
lvl
phylo
)
<>
" branches"
<>
"
\n
"
)
phylo
--------------------
-- | Clustering | --
--------------------
...
...
@@ -298,6 +327,21 @@ relatedComponents graphs = foldl' (\mem groups ->
else
(
mem
\\
related
)
++
[
union
groups
(
nub
$
concat
related
)]
)
[]
graphs
traceSynchronyEnd
::
Phylo
->
Phylo
traceSynchronyEnd
phylo
=
trace
(
"
\n
"
<>
"-- | End of synchronic clustering for level "
<>
show
(
getLastLevel
phylo
)
<>
" with "
<>
show
(
length
$
getGroupsFromLevel
(
getLastLevel
phylo
)
phylo
)
<>
" groups"
<>
" and "
<>
show
(
length
$
nub
$
map
_phylo_groupBranchId
$
getGroupsFromLevel
(
getLastLevel
phylo
)
phylo
)
<>
" branches"
<>
"
\n
"
)
phylo
traceSynchronyStart
::
Phylo
->
Phylo
traceSynchronyStart
phylo
=
trace
(
"
\n
"
<>
"-- | Start of synchronic clustering for level "
<>
show
(
getLastLevel
phylo
)
<>
" with "
<>
show
(
length
$
getGroupsFromLevel
(
getLastLevel
phylo
)
phylo
)
<>
" groups"
<>
" and "
<>
show
(
length
$
nub
$
map
_phylo_groupBranchId
$
getGroupsFromLevel
(
getLastLevel
phylo
)
phylo
)
<>
" branches"
<>
"
\n
"
)
phylo
-------------------
-- | Proximity | --
-------------------
...
...
src/Gargantext/Viz/Phylo/SynchronicClustering.hs
View file @
c60dc0b7
...
...
@@ -31,51 +31,56 @@ import Debug.Trace (trace)
-- | New Level Maker | --
-------------------------
mergeGroups
::
[
Cooc
]
->
PhyloGroupId
->
[
PhyloGroup
]
->
PhyloGroup
mergeGroups
coocs
id
childs
=
toBranchId
::
PhyloGroup
->
PhyloBranchId
toBranchId
child
=
((
child
^.
phylo_groupLevel
)
+
1
,
snd
(
child
^.
phylo_groupBranchId
))
mergeGroups
::
[
Cooc
]
->
PhyloGroupId
->
Map
PhyloGroupId
PhyloGroupId
->
[
PhyloGroup
]
->
PhyloGroup
mergeGroups
coocs
id
mapIds
childs
=
let
ngrams
=
(
sort
.
nub
.
concat
)
$
map
_phylo_groupNgrams
childs
in
PhyloGroup
(
fst
$
fst
id
)
(
snd
$
fst
id
)
(
snd
id
)
""
(
sum
$
map
_phylo_groupSupport
childs
)
ngrams
(
ngramsToCooc
ngrams
coocs
)
(((
head'
"mergeGroups"
childs
)
^.
phylo_groupLevel
)
+
1
,
snd
((
head'
"mergeGroups"
childs
)
^.
phylo_groupBranchId
))
empty
[]
(
map
(
\
g
->
(
getGroupId
g
,
1
))
childs
)
(
concat
$
map
_phylo_groupPeriodParents
childs
)
(
concat
$
map
_phylo_groupPeriodChilds
childs
)
addNewLevel
::
Level
->
Phylo
->
Phylo
addNewLevel
lvl
phylo
=
over
(
phylo_periods
.
traverse
)
(
\
phyloPrd
->
phyloPrd
&
phylo_periodLevels
%~
(
insert
(
phyloPrd
^.
phylo_periodPeriod
,
lvl
+
1
)
(
PhyloLevel
(
phyloPrd
^.
phylo_periodPeriod
)
(
lvl
+
1
)
empty
)))
phylo
in
PhyloGroup
(
fst
$
fst
id
)
(
snd
$
fst
id
)
(
snd
id
)
""
(
sum
$
map
_phylo_groupSupport
childs
)
ngrams
(
ngramsToCooc
ngrams
coocs
)
(
toBranchId
(
head'
"mergeGroups"
childs
))
empty
[]
(
map
(
\
g
->
(
getGroupId
g
,
1
))
childs
)
(
updatePointers
$
concat
$
map
_phylo_groupPeriodParents
childs
)
(
updatePointers
$
concat
$
map
_phylo_groupPeriodChilds
childs
)
where
updatePointers
::
[
Pointer
]
->
[
Pointer
]
updatePointers
pointers
=
map
(
\
(
pId
,
w
)
->
(
mapIds
!
pId
,
w
))
pointers
addPhyloLevel
::
Level
->
Phylo
->
Phylo
addPhyloLevel
lvl
phylo
=
over
(
phylo_periods
.
traverse
)
(
\
phyloPrd
->
phyloPrd
&
phylo_periodLevels
%~
(
insert
(
phyloPrd
^.
phylo_periodPeriod
,
lvl
)
(
PhyloLevel
(
phyloPrd
^.
phylo_periodPeriod
)
lvl
empty
)))
phylo
toNextLevel
::
Phylo
->
[
PhyloGroup
]
->
Phylo
toNextLevel
phylo
groups
=
let
level
=
getLastLevel
phylo
phylo'
=
updatePhyloGroups
level
(
fromList
$
map
(
\
g
->
(
getGroupId
g
,
g
))
groups
)
phylo
nextGroups
=
fromListWith
(
++
)
$
foldlWithKey
(
\
acc
k
v
->
let
group
=
mergeGroups
(
elems
$
restrictKeys
(
phylo
^.
phylo_timeCooc
)
$
periodsToYears
[
fst
$
fst
k
])
k
v
in
acc
++
[(
group
^.
phylo_groupPeriod
,[
group
])])
[]
$
fromListWith
(
++
)
$
map
(
\
g
->
(
fst
$
head'
"nextGroups"
$
g
^.
phylo_groupLevelParents
,[
g
]))
groups
in
trace
(
">>>>>>>>>>>>>>>>>>>>>>>>"
<>
show
(
nextGroups
))
over
(
phylo_periods
.
traverse
.
phylo_periodLevels
.
traverse
.
filtered
(
\
phyloLvl
->
phyloLvl
^.
phylo_levelLevel
==
(
level
+
1
)))
(
\
phyloLvl
->
if
member
(
phyloLvl
^.
phylo_levelPeriod
)
nextGroups
then
phyloLvl
&
phylo_levelGroups
.~
fromList
(
map
(
\
g
->
(
getGroupId
g
,
g
))
$
nextGroups
!
(
phyloLvl
^.
phylo_levelPeriod
))
else
phyloLvl
)
$
addNewLevel
level
phylo'
let
curLvl
=
getLastLevel
phylo
oldGroups
=
fromList
$
map
(
\
g
->
(
getGroupId
g
,
getLevelParentId
g
))
groups
newGroups
=
fromListWith
(
++
)
-- | 5) group the parents by periods
$
foldlWithKey
(
\
acc
id
groups'
->
-- | 4) create the parent group
let
parent
=
mergeGroups
(
elems
$
restrictKeys
(
phylo
^.
phylo_timeCooc
)
$
periodsToYears
[(
fst
.
fst
)
id
])
id
oldGroups
groups'
in
acc
++
[(
parent
^.
phylo_groupPeriod
,
[
parent
])])
[]
-- | 3) group the current groups by parentId
$
fromListWith
(
++
)
$
map
(
\
g
->
(
getLevelParentId
g
,
[
g
]))
groups
in
traceSynchronyEnd
$
over
(
phylo_periods
.
traverse
.
phylo_periodLevels
.
traverse
-- | 6) update each period at curLvl + 1
.
filtered
(
\
phyloLvl
->
phyloLvl
^.
phylo_levelLevel
==
(
curLvl
+
1
)))
-- | 7) by adding the parents
(
\
phyloLvl
->
if
member
(
phyloLvl
^.
phylo_levelPeriod
)
newGroups
then
phyloLvl
&
phylo_levelGroups
.~
fromList
(
map
(
\
g
->
(
getGroupId
g
,
g
))
$
newGroups
!
(
phyloLvl
^.
phylo_levelPeriod
))
else
phyloLvl
)
-- | 2) add the curLvl + 1 phyloLevel to the phylo
$
addPhyloLevel
(
curLvl
+
1
)
-- | 1) update the current groups (with level parent pointers) in the phylo
$
updatePhyloGroups
curLvl
(
fromList
$
map
(
\
g
->
(
getGroupId
g
,
g
))
groups
)
phylo
--------------------
...
...
@@ -99,6 +104,9 @@ groupsToEdges prox thr docs groups =
toRelatedComponents
::
[
PhyloGroup
]
->
[((
PhyloGroup
,
PhyloGroup
),
Double
)]
->
[[
PhyloGroup
]]
toRelatedComponents
nodes
edges
=
relatedComponents
$
((
map
(
\
((
g
,
g'
),
_
)
->
[
g
,
g'
])
edges
)
++
(
map
(
\
g
->
[
g
])
nodes
))
toParentId
::
PhyloGroup
->
PhyloGroupId
toParentId
child
=
((
child
^.
phylo_groupPeriod
,
child
^.
phylo_groupLevel
+
1
),
child
^.
phylo_groupIndex
)
reduceBranch
::
Proximity
->
Double
->
Map
Date
Double
->
[
PhyloGroup
]
->
[
PhyloGroup
]
reduceBranch
prox
thr
docs
branch
=
...
...
@@ -106,15 +114,15 @@ reduceBranch prox thr docs branch =
let
periods
=
fromListWith
(
++
)
$
map
(
\
g
->
(
g
^.
phylo_groupPeriod
,[
g
]))
branch
in
(
concat
.
concat
.
elems
)
$
mapWithKey
(
\
prd
groups
->
$
mapWithKey
(
\
prd
groups
->
-- | 2) for each period, transform the groups as a proximity graph filtered by a threshold
let
edges
=
groupsToEdges
prox
thr
((
sum
.
elems
)
$
restrictKeys
docs
$
periodsToYears
[
prd
])
groups
in
map
(
\
(
idx
,
comp
)
->
in
map
(
\
comp
->
-- | 4) add to each groups their futur level parent group
let
parentId
=
(((
head'
"reduceBranch"
comp
)
^.
phylo_groupPeriod
,
1
+
(
head'
"reduceBranch"
comp
)
^.
phylo_groupLevel
),
idx
)
let
parentId
=
toParentId
(
head'
"parentId"
comp
)
in
map
(
\
g
->
g
&
phylo_groupLevelParents
%~
(
++
[(
parentId
,
1
)])
)
comp
)
-- |3) reduce the graph a a set of related components
$
zip
[
1
..
]
(
toRelatedComponents
groups
edges
)
)
periods
$
toRelatedComponents
groups
edges
)
periods
synchronicClustering
::
Phylo
->
Phylo
...
...
@@ -123,5 +131,6 @@ synchronicClustering phylo =
ByProximityThreshold
thr
->
toNextLevel
phylo
$
concat
$
map
(
\
branch
->
reduceBranch
(
phyloProximity
$
getConfig
phylo
)
thr
(
phylo
^.
phylo_timeDocs
)
branch
)
$
phyloToLastBranches
phylo
$
phyloToLastBranches
$
traceSynchronyStart
phylo
ByProximityDistribution
->
undefined
\ No newline at end of file
src/Gargantext/Viz/Phylo/TemporalMatching.hs
View file @
c60dc0b7
...
...
@@ -260,19 +260,19 @@ groupsToBranches groups =
recursiveMatching
::
Proximity
->
Double
->
Int
->
[
PhyloPeriodId
]
->
Map
Date
Double
->
Double
->
[[
PhyloGroup
]]
->
[
PhyloGroup
]
recursiveMatching
proximity
thr
frame
periods
docs
quality
branches
=
if
(
length
branches
==
(
length
$
concat
branches
))
then
concat
$
traceMatchNoSplit
branches
then
concat
branches
else
if
thr
>
1
then
concat
$
traceMatchLimit
branches
then
concat
branches
else
case
quality
<=
(
sum
nextQualities
)
of
-- | success : the new threshold improves the quality score, let's go deeper
-- | success : the new threshold improves the quality score, let's go deeper
(traceMatchSuccess thr quality (sum nextQualities))
True
->
concat
$
map
(
\
branches'
->
let
idx
=
fromJust
$
elemIndex
branches'
nextBranches
in
recursiveMatching
proximity
(
thr
+
(
getThresholdStep
proximity
))
frame
periods
docs
(
nextQualities
!!
idx
)
branches'
)
$
traceMatchSuccess
thr
quality
(
sum
nextQualities
)
nextBranches
-- | failure : last step was a local maximum of quality, let's validate it
False
->
concat
$
traceMatchFailure
thr
quality
(
sum
nextQualities
)
branches
$
nextBranches
-- | failure : last step was a local maximum of quality, let's validate it
(traceMatchFailure thr quality (sum nextQualities))
False
->
concat
branches
where
-- | 2) for each of the possible next branches process the phyloQuality score
nextQualities
::
[
Double
]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment