Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
H
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Przemyslaw Kaminski
haskell-gargantext
Commits
e2b8b663
Commit
e2b8b663
authored
Sep 05, 2019
by
qlobbe
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
temporal is close to be ok, start export
parent
5d618e53
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
180 additions
and
44 deletions
+180
-44
AdaptativePhylo.hs
src/Gargantext/Viz/AdaptativePhylo.hs
+9
-1
PhyloExample.hs
src/Gargantext/Viz/Phylo/PhyloExample.hs
+3
-1
PhyloExport.hs
src/Gargantext/Viz/Phylo/PhyloExport.hs
+52
-1
PhyloMaker.hs
src/Gargantext/Viz/Phylo/PhyloMaker.hs
+5
-3
PhyloTools.hs
src/Gargantext/Viz/Phylo/PhyloTools.hs
+49
-6
TemporalMatching.hs
src/Gargantext/Viz/Phylo/TemporalMatching.hs
+62
-32
No files found.
src/Gargantext/Viz/AdaptativePhylo.hs
View file @
e2b8b663
...
...
@@ -44,6 +44,8 @@ import GHC.IO (FilePath)
import
Control.DeepSeq
(
NFData
)
import
Control.Lens
(
makeLenses
)
import
qualified
Data.Text.Lazy
as
TextLazy
----------------
-- | Config | --
...
...
@@ -102,7 +104,7 @@ defaultConfig =
,
corpusParser
=
Csv
1000
,
phyloName
=
pack
"Default Phylo"
,
phyloLevel
=
2
,
phyloProximity
=
WeightedLogJaccard
10
0
0.
05
,
phyloProximity
=
WeightedLogJaccard
10
0
0.
2
,
timeUnit
=
Year
3
1
5
,
contextualUnit
=
Fis
2
4
,
branchSize
=
3
...
...
@@ -290,6 +292,12 @@ data PhyloFis = PhyloFis
}
deriving
(
Generic
,
NFData
,
Show
,
Eq
)
----------------
-- | Export | --
----------------
type
DotId
=
TextLazy
.
Text
----------------
-- | Lenses | --
----------------
...
...
src/Gargantext/Viz/Phylo/PhyloExample.hs
View file @
e2b8b663
...
...
@@ -29,6 +29,7 @@ import Gargantext.Text.Terms.Mono (monoTexts)
import
Gargantext.Viz.AdaptativePhylo
import
Gargantext.Viz.Phylo.PhyloTools
import
Gargantext.Viz.Phylo.PhyloMaker
import
Gargantext.Viz.Phylo.TemporalMatching
(
temporalMatching
)
import
Control.Lens
...
...
@@ -40,7 +41,8 @@ import qualified Data.Vector as Vector
-----------------------------------------------
phylo1
::
Phylo
phylo1
=
appendGroups
fisToGroup
1
phyloFis
phyloBase
phylo1
=
temporalMatching
$
appendGroups
fisToGroup
1
phyloFis
phyloBase
---------------------------------------------
...
...
src/Gargantext/Viz/Phylo/PhyloExport.hs
View file @
e2b8b663
...
...
@@ -13,4 +13,55 @@ Portability : POSIX
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE MultiParamTypeClasses #-}
module
Gargantext.Viz.Phylo.PhyloExport
where
\ No newline at end of file
module
Gargantext.Viz.Phylo.PhyloExport
where
import
Gargantext.Prelude
import
Gargantext.Viz.AdaptativePhylo
import
Gargantext.Viz.Phylo.PhyloTools
import
Data.GraphViz.Types.Generalised
(
DotGraph
)
--------------------
-- | Dot export | --
--------------------
toDot
::
[
PhyloGroup
]
->
DotGraph
DotId
toDot
branches
=
undefined
----------------------
-- | post process | --
----------------------
processFilters
::
[
PhyloGroup
]
->
[
PhyloGroup
]
processFilters
branches
=
branches
processSort
::
[
PhyloGroup
]
->
[
PhyloGroup
]
processSort
branches
=
branches
processMetrics
::
[
PhyloGroup
]
->
[
PhyloGroup
]
processMetrics
branches
=
branches
processDynamics
::
[
PhyloGroup
]
->
[
PhyloGroup
]
processDynamics
branches
=
branches
processLabels
::
[
PhyloGroup
]
->
[
PhyloGroup
]
processLabels
branches
=
branches
phyloPostProcess
::
[
PhyloGroup
]
->
[
PhyloGroup
]
phyloPostProcess
branches
=
branches
---------------------
-- | phyloExport | --
---------------------
toPhyloExport
::
Phylo
->
DotGraph
DotId
toPhyloExport
phylo
=
toDot
$
phyloPostProcess
groups
where
groups
::
[
PhyloGroup
]
groups
=
getGroupsFromLevel
(
phyloLevel
$
getConfig
phylo
)
phylo
\ No newline at end of file
src/Gargantext/Viz/Phylo/PhyloMaker.hs
View file @
e2b8b663
...
...
@@ -23,6 +23,7 @@ import Data.Vector (Vector)
import
Gargantext.Prelude
import
Gargantext.Viz.AdaptativePhylo
import
Gargantext.Viz.Phylo.PhyloTools
import
Gargantext.Viz.Phylo.TemporalMatching
(
temporalMatching
)
import
Gargantext.Text.Context
(
TermList
)
import
Gargantext.Text.Metrics.FrequentItemSet
(
fisWithSizePolyMap
,
Size
(
..
))
...
...
@@ -86,12 +87,13 @@ fisToGroup fis pId lvl idx fdt coocs =
(
fis
^.
phyloFis_support
)
ngrams
(
ngramsToCooc
ngrams
coocs
)
(
1
,
[]
)
(
1
,[
0
])
[]
[]
[]
[]
[]
toPhylo1
::
[
Document
]
->
Phylo
->
Phylo
toPhylo1
docs
phyloBase
=
appendGroups
fisToGroup
1
phyloFis
phyloBase
toPhylo1
docs
phyloBase
=
temporalMatching
$
appendGroups
fisToGroup
1
phyloFis
phyloBase
where
--------------------------------------
phyloFis
::
Map
(
Date
,
Date
)
[
PhyloFis
]
...
...
@@ -181,7 +183,7 @@ docsToTimeScaleCooc docs fdt =
mCooc'
=
fromList
$
map
(
\
t
->
(
t
,
empty
))
$
toTimeScale
(
map
date
docs
)
1
in
trace
(
"
\n
"
<>
"-- | Build the coocurency matrix for "
<>
show
(
length
$
keys
mCooc'
)
<>
" unit of time"
<>
"
\n
"
)
in
trace
(
"
\n
"
<>
"-- | Build the coocurency matrix for "
<>
show
(
length
$
keys
mCooc'
)
<>
" unit of time"
<>
"
\n
"
)
$
unionWith
sumCooc
mCooc
mCooc'
...
...
src/Gargantext/Viz/Phylo/PhyloTools.hs
View file @
e2b8b663
...
...
@@ -17,7 +17,7 @@ Portability : POSIX
module
Gargantext.Viz.Phylo.PhyloTools
where
import
Data.Vector
(
Vector
,
elemIndex
)
import
Data.List
(
sort
,
concat
,
null
,
union
,
(
++
),
tails
,
sortOn
,
nub
)
import
Data.List
(
sort
,
concat
,
null
,
union
,
(
++
),
tails
,
sortOn
,
nub
,
init
)
import
Data.Set
(
Set
,
size
)
import
Data.Map
(
Map
,
elems
,
fromList
,
unionWith
,
keys
,
member
,
(
!
),
filterWithKey
)
import
Data.String
(
String
)
...
...
@@ -202,11 +202,11 @@ addPointers :: PhyloGroup -> Filiation -> PointerType -> [Pointer] -> PhyloGroup
addPointers
group
fil
pty
pointers
=
case
pty
of
TemporalPointer
->
case
fil
of
ToChilds
->
group
&
phylo_groupPeriodChilds
%~
(
++
pointers
)
ToParents
->
group
&
phylo_groupPeriodParents
%~
(
++
pointers
)
ToChilds
->
group
&
phylo_groupPeriodChilds
.~
pointers
ToParents
->
group
&
phylo_groupPeriodParents
.~
pointers
LevelPointer
->
case
fil
of
ToChilds
->
group
&
phylo_groupLevelChilds
%~
(
++
pointers
)
ToParents
->
group
&
phylo_groupLevelParents
%~
(
++
pointers
)
ToChilds
->
group
&
phylo_groupLevelChilds
.~
pointers
ToParents
->
group
&
phylo_groupLevelParents
.~
pointers
getPeriodIds
::
Phylo
->
[(
Date
,
Date
)]
...
...
@@ -284,4 +284,47 @@ getThresholdStep proxi = case proxi of
----------------
ngramsInBranches
::
[[
PhyloGroup
]]
->
[
Int
]
ngramsInBranches
branches
=
nub
$
foldl
(
\
acc
g
->
acc
++
(
g
^.
phylo_groupNgrams
))
[]
$
concat
branches
\ No newline at end of file
ngramsInBranches
branches
=
nub
$
foldl
(
\
acc
g
->
acc
++
(
g
^.
phylo_groupNgrams
))
[]
$
concat
branches
traceMatchSuccess
::
Double
->
Double
->
Double
->
[[[
PhyloGroup
]]]
->
[[[
PhyloGroup
]]]
traceMatchSuccess
thr
qua
qua'
nextBranches
=
trace
(
"
\n
"
<>
"-- local branches : "
<>
(
init
$
show
((
init
.
init
.
snd
)
$
(
head'
"trace"
$
head'
"trace"
$
head'
"trace"
nextBranches
)
^.
phylo_groupBranchId
))
<>
",(1.."
<>
show
(
length
nextBranches
)
<>
")]"
<>
" | "
<>
show
((
length
.
concat
.
concat
)
nextBranches
)
<>
" groups"
<>
"
\n
"
<>
" - splited with success in "
<>
show
(
map
length
nextBranches
)
<>
" sub-branches"
<>
"
\n
"
<>
" - for the local threshold "
<>
show
(
thr
)
<>
" ( quality : "
<>
show
(
qua
)
<>
" < "
<>
show
(
qua'
)
<>
")
\n
"
)
nextBranches
traceMatchFailure
::
Double
->
Double
->
Double
->
[[
PhyloGroup
]]
->
[[
PhyloGroup
]]
traceMatchFailure
thr
qua
qua'
branches
=
trace
(
"
\n
"
<>
"-- local branches : "
<>
(
init
$
show
((
init
.
snd
)
$
(
head'
"trace"
$
head'
"trace"
branches
)
^.
phylo_groupBranchId
))
<>
",(1.."
<>
show
(
length
branches
)
<>
")]"
<>
" | "
<>
show
(
length
$
concat
branches
)
<>
" groups"
<>
"
\n
"
<>
" - split with failure for the local threshold "
<>
show
(
thr
)
<>
" ( quality : "
<>
show
(
qua
)
<>
" > "
<>
show
(
qua'
)
<>
")
\n
"
)
branches
traceMatchNoSplit
::
[[
PhyloGroup
]]
->
[[
PhyloGroup
]]
traceMatchNoSplit
branches
=
trace
(
"
\n
"
<>
"-- local branches : "
<>
(
init
$
show
((
init
.
snd
)
$
(
head'
"trace"
$
head'
"trace"
branches
)
^.
phylo_groupBranchId
))
<>
",(1.."
<>
show
(
length
branches
)
<>
")]"
<>
" | "
<>
show
(
length
$
concat
branches
)
<>
" groups"
<>
"
\n
"
<>
" - unable to split in smaller branches"
<>
"
\n
"
)
branches
traceMatchLimit
::
[[
PhyloGroup
]]
->
[[
PhyloGroup
]]
traceMatchLimit
branches
=
trace
(
"
\n
"
<>
"-- local branches : "
<>
(
init
$
show
((
init
.
snd
)
$
(
head'
"trace"
$
head'
"trace"
branches
)
^.
phylo_groupBranchId
))
<>
",(1.."
<>
show
(
length
branches
)
<>
")]"
<>
" | "
<>
show
(
length
$
concat
branches
)
<>
" groups"
<>
"
\n
"
<>
" - unable to increase the threshold above 1"
<>
"
\n
"
)
branches
traceMatchEnd
::
[
PhyloGroup
]
->
[
PhyloGroup
]
traceMatchEnd
groups
=
trace
(
"
\n
"
<>
"-- | End of temporal matching with "
<>
show
(
length
$
nub
$
map
(
\
g
->
g
^.
phylo_groupBranchId
)
groups
)
<>
" branches and "
<>
show
(
length
groups
)
<>
" groups"
<>
"
\n
"
)
groups
\ No newline at end of file
src/Gargantext/Viz/Phylo/TemporalMatching.hs
View file @
e2b8b663
...
...
@@ -15,7 +15,7 @@ Portability : POSIX
module
Gargantext.Viz.Phylo.TemporalMatching
where
import
Data.List
(
concat
,
splitAt
,
tail
,
sortOn
,
(
++
),
intersect
,
null
,
inits
,
find
,
groupBy
,
scanl
,
nub
,
union
)
import
Data.List
(
concat
,
splitAt
,
tail
,
sortOn
,
(
++
),
intersect
,
null
,
inits
,
find
,
groupBy
,
scanl
,
nub
,
union
,
elemIndex
,
(
!!
)
)
import
Data.Map
(
Map
,
fromList
,
fromListWith
,
filterWithKey
,
elems
,
restrictKeys
,
unionWith
,
intersectionWith
,
findWithDefault
)
import
Gargantext.Prelude
...
...
@@ -23,11 +23,13 @@ import Gargantext.Viz.AdaptativePhylo
import
Gargantext.Viz.Phylo.PhyloTools
import
Gargantext.Viz.Phylo.SynchronicClustering
import
Debug.Trace
(
trace
)
import
Prelude
(
logBase
)
import
Control.Lens
hiding
(
Level
)
import
qualified
Data.Set
as
Set
-------------------
-- | Proximity | --
-------------------
...
...
@@ -204,7 +206,9 @@ entropy branches =
/
(
sum
$
map
(
\
branch
->
1
/
log
(
termFreq
term
[
branch
]))
branches
)
*
(
sum
$
map
(
\
branch
->
let
q
=
branchObs
term
(
length
$
concat
branches
)
branch
in
q
*
logBase
2
q
)
branches
)
)
terms
in
if
(
q
==
0
)
then
0
else
-
q
*
logBase
2
q
)
branches
)
)
terms
where
-- | Probability to observe a branch given a random term of the phylo
branchObs
::
Int
->
Int
->
[
PhyloGroup
]
->
Double
...
...
@@ -213,10 +217,21 @@ entropy branches =
homogeneity
::
[[
PhyloGroup
]]
->
Double
homogeneity
_
=
undefined
-- where
-- branchCov :: [PhyloGroup] -> Int -> Double
-- branchCov branch total = (fromIntegral $ length branch) / (fromIntegral total)
homogeneity
branches
=
let
nbGroups
=
length
$
concat
branches
in
sum
$
map
(
\
branch
->
(
if
(
length
branch
==
nbGroups
)
then
1
else
(
1
/
log
(
branchCov
branch
nbGroups
))
/
(
sum
$
map
(
\
branch'
->
1
/
log
(
branchCov
branch'
nbGroups
))
branches
))
*
(
sum
$
map
(
\
term
->
(
termFreq
term
branches
)
/
(
sum
$
map
(
\
term'
->
termFreq
term'
branches
)
$
ngramsInBranches
[
branch
])
*
(
fromIntegral
$
sum
$
ngramsInBranches
[
filter
(
\
g
->
elem
term
$
g
^.
phylo_groupNgrams
)
branch
])
/
(
fromIntegral
$
sum
$
ngramsInBranches
[
branch
])
)
$
ngramsInBranches
[
branch
])
)
branches
where
branchCov
::
[
PhyloGroup
]
->
Int
->
Double
branchCov
branch
total
=
(
fromIntegral
$
length
branch
)
/
(
fromIntegral
total
)
toPhyloQuality
::
[[
PhyloGroup
]]
->
Double
...
...
@@ -243,42 +258,57 @@ groupsToBranches groups =
)
graph
recursiveMatching
::
Proximity
->
Double
->
Int
->
[
PhyloPeriodId
]
->
Map
Date
Double
->
Double
->
[
PhyloGroup
]
->
[
PhyloGroup
]
recursiveMatching
proximity
thr
max'
periods
docs
quality
groups
=
case
quality
<
quality'
of
-- | success : we localy improve the quality of the branch, let's go deeper
True
->
concat
$
map
(
\
branch
->
recursiveMatching
proximity
(
thr
+
(
getThresholdStep
proximity
))
max'
periods
docs
quality'
branch
)
branches
-- | failure : last step was the local maximum, let's validate it
False
->
groups
recursiveMatching
::
Proximity
->
Double
->
Int
->
[
PhyloPeriodId
]
->
Map
Date
Double
->
Double
->
[[
PhyloGroup
]]
->
[
PhyloGroup
]
recursiveMatching
proximity
thr
frame
periods
docs
quality
branches
=
if
(
length
branches
==
(
length
$
concat
branches
))
then
concat
$
traceMatchNoSplit
branches
else
if
thr
>
1
then
concat
$
traceMatchLimit
branches
else
case
quality
<=
(
sum
nextQualities
)
of
-- | success : the new threshold improves the quality score, let's go deeper
True
->
concat
$
map
(
\
branches'
->
let
idx
=
fromJust
$
elemIndex
branches'
nextBranches
in
recursiveMatching
proximity
(
thr
+
(
getThresholdStep
proximity
))
frame
periods
docs
(
nextQualities
!!
idx
)
branches'
)
$
traceMatchSuccess
thr
quality
(
sum
nextQualities
)
nextBranches
-- | failure : last step was a local maximum of quality, let's validate it
False
->
concat
$
traceMatchFailure
thr
quality
(
sum
nextQualities
)
branches
where
-- |
3) process a quality score on the local set of branches
quality'
::
Double
quality'
=
toPhyloQuality
b
ranches
-- |
2) group the new groups into branches
branches
::
[[
PhyloGroup
]]
branches
=
groupsToBranches
$
fromList
$
map
(
\
group
->
(
getGroupId
group
,
group
))
groups'
-- | 1) process a temporal matching for each group
groups'
::
[
PhyloGroup
]
groups'
=
processMatching
max'
periods
proximity
thr
docs
group
s
-- |
2) for each of the possible next branches process the phyloQuality score
nextQualities
::
[
Double
]
nextQualities
=
map
toPhyloQuality
nextB
ranches
-- |
1) for each local branch process a temporal matching then find the resulting branches
nextBranches
::
[[[
PhyloGroup
]
]]
nextBranches
=
map
(
\
branch
->
let
branch'
=
processMatching
frame
periods
proximity
thr
docs
branch
in
groupsToBranches
$
fromList
$
map
(
\
group
->
(
getGroupId
group
,
group
))
branch'
)
branche
s
temporalMatching
::
Phylo
->
Phylo
temporalMatching
phylo
=
updatePhyloGroups
1
branches
phylo
temporalMatching
phylo
=
updatePhyloGroups
1
branches
'
phylo
where
-- |
2
) run the recursive matching to find the best repartition among branches
branches
::
Map
PhyloGroupId
PhyloGroup
branches
=
fromList
-- |
4
) run the recursive matching to find the best repartition among branches
branches
'
::
Map
PhyloGroupId
PhyloGroup
branches
'
=
fromList
$
map
(
\
g
->
(
getGroupId
g
,
g
))
$
traceMatchEnd
$
recursiveMatching
(
phyloProximity
$
getConfig
phylo
)
(
getThresholdInit
$
phyloProximity
$
getConfig
phylo
)
(
(
getThresholdInit
$
phyloProximity
$
getConfig
phylo
)
+
(
getThresholdStep
$
phyloProximity
$
getConfig
phylo
))
(
getTimeFrame
$
timeUnit
$
getConfig
phylo
)
(
getPeriodIds
phylo
)
(
phylo
^.
phylo_timeDocs
)
(
toPhyloQuality
[
groups'
])
groups'
(
phylo
^.
phylo_timeDocs
)
quality
branches
-- | 3) process the quality score
quality
::
Double
quality
=
toPhyloQuality
branches
-- | 2) group into branches
branches
::
[[
PhyloGroup
]]
branches
=
groupsToBranches
$
fromList
$
map
(
\
group
->
(
getGroupId
group
,
group
))
$
trace
(
"
\n
"
<>
"-- | Init temporal matching for "
<>
show
(
length
$
groups'
)
<>
" groups"
<>
"
\n
"
)
groups'
-- | 1) for each group process an initial temporal Matching
groups'
::
[
PhyloGroup
]
groups'
=
processMatching
(
getTimeFrame
$
timeUnit
$
getConfig
phylo
)
(
getPeriodIds
phylo
)
(
phyloProximity
$
getConfig
phylo
)
(
getThresholdInit
$
phyloProximity
$
getConfig
phylo
)
(
phylo
^.
phylo_timeDocs
)
(
getGroupsFromLevel
1
phylo
)
(
phylo
^.
phylo_timeDocs
)
(
getGroupsFromLevel
1
phylo
)
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment