Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
175
Issues
175
List
Board
Labels
Milestones
Merge Requests
10
Merge Requests
10
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
e4e913ab
Commit
e4e913ab
authored
Aug 27, 2019
by
qlobbe
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
working on temporal matching
parent
91e81646
Pipeline
#545
failed with stage
Changes
4
Pipelines
1
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
259 additions
and
9 deletions
+259
-9
AdaptativePhylo.hs
src/Gargantext/Viz/AdaptativePhylo.hs
+6
-1
PhyloTools.hs
src/Gargantext/Viz/Phylo/PhyloTools.hs
+63
-6
SynchronicClustering.hs
src/Gargantext/Viz/Phylo/SynchronicClustering.hs
+14
-1
TemporalMatching.hs
src/Gargantext/Viz/Phylo/TemporalMatching.hs
+176
-1
No files found.
src/Gargantext/Viz/AdaptativePhylo.hs
View file @
e4e913ab
...
...
@@ -61,6 +61,7 @@ data Config =
,
phyloName
::
Text
,
phyloLevel
::
Int
,
timeUnit
::
Int
,
timeMatching
::
Int
,
timePeriod
::
Int
,
timeStep
::
Int
,
fisSupport
::
Int
...
...
@@ -78,6 +79,7 @@ defaultConfig =
,
phyloName
=
pack
"Default Phylo"
,
phyloLevel
=
2
,
timeUnit
=
1
,
timeMatching
=
5
,
timePeriod
=
3
,
timeStep
=
1
,
fisSupport
=
2
...
...
@@ -236,6 +238,9 @@ type Weight = Double
-- | Pointer : A weighted pointer to a given PhyloGroup
type
Pointer
=
(
PhyloGroupId
,
Weight
)
data
Filiation
=
ToParents
|
ToChilds
deriving
(
Generic
,
Show
)
data
PointerType
=
TemporalPointer
|
LevelPointer
deriving
(
Generic
,
Show
)
---------------------------
-- | Frequent Item Set | --
...
...
src/Gargantext/Viz/Phylo/PhyloTools.hs
View file @
e4e913ab
...
...
@@ -18,17 +18,19 @@ module Gargantext.Viz.Phylo.PhyloTools where
import
Data.Vector
(
Vector
,
elemIndex
)
import
Data.List
(
sort
,
concat
,
null
,
union
,
(
++
),
tails
,
sortOn
)
import
Data.Set
(
size
)
import
Data.Map
(
Map
,
elems
,
fromList
,
unionWith
,
keys
)
import
Data.Set
(
Set
,
size
)
import
Data.Map
(
Map
,
elems
,
fromList
,
unionWith
,
keys
,
member
,
(
!
)
)
import
Data.String
(
String
)
import
Gargantext.Prelude
import
Gargantext.Viz.AdaptativePhylo
import
Debug.Trace
(
trace
)
import
Control.Lens
import
Control.Lens
hiding
(
Level
)
import
qualified
Data.Vector
as
Vector
import
qualified
Data.List
as
List
import
qualified
Data.Set
as
Set
--------------
-- | Misc | --
...
...
@@ -39,6 +41,12 @@ countSup :: Double -> [Double] -> Int
countSup
s
l
=
length
$
filter
(
>
s
)
l
elemIndex'
::
Eq
a
=>
a
->
[
a
]
->
Int
elemIndex'
e
l
=
case
(
List
.
elemIndex
e
l
)
of
Nothing
->
panic
(
"[ERR][Viz.Phylo.PhyloTools] element not in list"
)
Just
i
->
i
---------------------
-- | Foundations | --
---------------------
...
...
@@ -57,6 +65,11 @@ ngramsToIdx ns fdt = map (\n -> fromJust $ elemIndex n fdt) ns
-- | Time | --
--------------
-- | To transform a list of periods into a set of Dates
periodsToYears
::
[(
Date
,
Date
)]
->
Set
Date
periodsToYears
periods
=
(
Set
.
fromList
.
sort
.
concat
)
$
map
(
\
(
d
,
d'
)
->
[
d
..
d'
])
periods
findBounds
::
[
Date
]
->
(
Date
,
Date
)
findBounds
dates
=
...
...
@@ -134,7 +147,7 @@ listToCombi' l = [(x,y) | (x:rest) <- tails l, y <- rest]
listToEqual'
::
Eq
a
=>
[
a
]
->
[(
a
,
a
)]
listToEqual'
l
=
[(
x
,
y
)
|
x
<-
l
,
y
<-
l
,
x
==
y
]
listToKeys
::
[
Int
]
->
[(
Int
,
Int
)]
listToKeys
::
Eq
a
=>
[
a
]
->
[(
a
,
a
)]
listToKeys
lst
=
(
listToCombi'
lst
)
++
(
listToEqual'
lst
)
listToMatrix
::
[
Int
]
->
Map
(
Int
,
Int
)
Double
...
...
@@ -143,11 +156,28 @@ listToMatrix lst = fromList $ map (\k -> (k,1)) $ listToKeys $ sort lst
sumCooc
::
Cooc
->
Cooc
->
Cooc
sumCooc
cooc
cooc'
=
unionWith
(
+
)
cooc
cooc'
---------------
-- | Phylo | --
---------------
getGroupId
::
PhyloGroup
->
PhyloGroupId
getGroupId
group
=
((
group
^.
phylo_groupPeriod
,
group
^.
phylo_groupLevel
),
group
^.
phylo_groupIndex
)
---------------
-- | Phylo | --
---------------
addPointers
::
PhyloGroup
->
Filiation
->
PointerType
->
[
Pointer
]
->
PhyloGroup
addPointers
group
fil
pty
pointers
=
case
pty
of
TemporalPointer
->
case
fil
of
ToChilds
->
group
&
phylo_groupPeriodChilds
%~
(
++
pointers
)
ToParents
->
group
&
phylo_groupPeriodParents
%~
(
++
pointers
)
LevelPointer
->
case
fil
of
ToChilds
->
group
&
phylo_groupLevelChilds
%~
(
++
pointers
)
ToParents
->
group
&
phylo_groupLevelParents
%~
(
++
pointers
)
getPeriodIds
::
Phylo
->
[(
Date
,
Date
)]
getPeriodIds
phylo
=
sortOn
fst
$
keys
...
...
@@ -160,3 +190,30 @@ getConfig phylo = (phylo ^. phylo_param) ^. phyloParam_config
getRoots
::
Phylo
->
Vector
Ngrams
getRoots
phylo
=
(
phylo
^.
phylo_foundations
)
^.
foundations_roots
getGroupsFromLevel
::
Level
->
Phylo
->
[
PhyloGroup
]
getGroupsFromLevel
lvl
phylo
=
elems
$
view
(
phylo_periods
.
traverse
.
phylo_periodLevels
.
traverse
.
filtered
(
\
phyloLvl
->
phyloLvl
^.
phylo_levelLevel
==
lvl
)
.
phylo_levelGroups
)
phylo
updatePhyloGroups
::
Level
->
Map
PhyloGroupId
PhyloGroup
->
Phylo
->
Phylo
updatePhyloGroups
lvl
m
phylo
=
over
(
phylo_periods
.
traverse
.
phylo_periodLevels
.
traverse
.
filtered
(
\
phyloLvl
->
phyloLvl
^.
phylo_levelLevel
==
lvl
)
.
phylo_levelGroups
.
traverse
)
(
\
group
->
let
id
=
getGroupId
group
in
if
member
id
m
then
m
!
id
else
group
)
phylo
\ No newline at end of file
src/Gargantext/Viz/Phylo/SynchronicClustering.hs
View file @
e4e913ab
...
...
@@ -14,3 +14,16 @@ Portability : POSIX
{-# LANGUAGE MultiParamTypeClasses #-}
module
Gargantext.Viz.Phylo.SynchronicClustering
where
import
Gargantext.Prelude
import
Gargantext.Viz.AdaptativePhylo
import
Gargantext.Viz.Phylo.PhyloTools
--------------------
-- | Clustering | --
--------------------
relatedComponents
::
[
PhyloGroup
]
->
[[
PhyloGroup
]]
relatedComponents
groups
=
undefined
\ No newline at end of file
src/Gargantext/Viz/Phylo/TemporalMatching.hs
View file @
e4e913ab
...
...
@@ -14,3 +14,178 @@ Portability : POSIX
{-# LANGUAGE MultiParamTypeClasses #-}
module
Gargantext.Viz.Phylo.TemporalMatching
where
import
Data.List
(
concat
,
splitAt
,
tail
,
sortOn
,
(
++
),
intersect
,
null
,
inits
,
find
,
groupBy
,
scanl
,
any
,
nub
)
import
Data.Map
(
Map
,
fromList
,
toList
,
fromListWith
,
filterWithKey
,
elems
,
restrictKeys
)
import
Gargantext.Prelude
import
Gargantext.Viz.AdaptativePhylo
import
Gargantext.Viz.Phylo.PhyloTools
import
Gargantext.Viz.Phylo.SynchronicClustering
import
Control.Lens
hiding
(
Level
)
-------------------
-- | Proximity | --
-------------------
-- periodsToNbDocs :: [PhyloPeriodId] -> Phylo -> Double
-- periodsToNbDocs prds phylo = sum $ elems
-- $ restrictKeys (phylo ^. phylo_docsByYears)
-- $ periodsToYears prds
-- matchWithPairs :: PhyloGroup -> (PhyloGroup,PhyloGroup) -> Phylo -> Double
-- matchWithPairs g1 (g2,g3) p =
-- let nbDocs = periodsToNbDocs [(getGroupPeriod g1),(getGroupPeriod g2),(getGroupPeriod g3)] p
-- cooc = if (g2 == g3)
-- then getGroupCooc g2
-- else unionWith (+) (getGroupCooc g2) (getGroupCooc g3)
-- ngrams = if (g2 == g3)
-- then getGroupNgrams g2
-- else union (getGroupNgrams g2) (getGroupNgrams g3)
-- in processProximity (getPhyloProximity p) nbDocs (getGroupCooc g1) cooc (getGroupNgrams g1) ngrams
toProximity
::
Map
Date
Double
->
PhyloGroup
->
PhyloGroup
->
PhyloGroup
->
Double
toProximity
docs
group
target
target'
=
let
nbDocs
=
sum
$
elems
docs
in
undefined
------------------------
-- | Local Matching | --
------------------------
makePairs
::
[
PhyloGroup
]
->
[
PhyloPeriodId
]
->
Double
->
Map
Date
Double
->
PhyloGroup
->
[(
PhyloGroup
,
PhyloGroup
)]
makePairs
candidates
periods
thr
docs
group
=
case
null
periods
of
True
->
[]
-- | at least on of the pair candidates should be from the last added period
False
->
filter
(
\
(
cdt
,
cdt'
)
->
(
inLastPeriod
cdt
periods
)
||
(
inLastPeriod
cdt'
periods
))
$
listToKeys
-- | remove poor candidates from previous periods
$
filter
(
\
cdt
->
(
inLastPeriod
cdt
periods
)
||
((
toProximity
(
reframeDocs
docs
periods
)
group
group
cdt
)
>=
thr
))
candidates
where
inLastPeriod
::
PhyloGroup
->
[
PhyloPeriodId
]
->
Bool
inLastPeriod
g
prds
=
(
g
^.
phylo_groupPeriod
)
==
(
last'
"makePairs"
prds
)
phyloGroupMatching
::
[[
PhyloGroup
]]
->
Filiation
->
Double
->
Map
Date
Double
->
PhyloGroup
->
PhyloGroup
phyloGroupMatching
candidates
fil
thr
docs
group
=
case
pointers
of
Nothing
->
addPointers
group
fil
TemporalPointer
[]
Just
pts
->
addPointers
group
fil
TemporalPointer
$
head'
"phyloGroupMatching"
-- | Keep only the best set of pointers grouped by proximity
$
groupBy
(
\
pt
pt'
->
snd
pt
==
snd
pt'
)
$
reverse
$
sortOn
snd
pts
-- | Find the first time frame where at leats one pointer satisfies the proximity threshold
where
pointers
::
Maybe
[
Pointer
]
pointers
=
find
(
not
.
null
)
-- | for each time frame, process the proximity on relevant pairs of targeted groups
$
scanl
(
\
acc
groups
->
let
periods
=
nub
$
map
(
\
g'
->
g'
^.
phylo_groupPeriod
)
$
concat
groups
pairs
=
makePairs
(
concat
groups
)
periods
thr
docs
group
in
acc
++
(
filter
(
\
(
_
,
proximity
)
->
proximity
>=
thr
)
$
concat
$
map
(
\
(
c
,
c'
)
->
-- | process the proximity between the current group and a pair of candidates
let
proximity
=
toProximity
(
reframeDocs
docs
periods
)
group
c
c'
in
if
(
c
==
c'
)
then
[(
getGroupId
c
,
proximity
)]
else
[(
getGroupId
c
,
proximity
),(
getGroupId
c'
,
proximity
)]
)
pairs
)
)
[]
-- | groups from [[1900],[1900,1901],[1900,1901,1902],...]
$
inits
candidates
matchGroupToGroups
::
[[
PhyloGroup
]]
->
PhyloGroup
->
PhyloGroup
matchGroupToGroups
candidates
group
=
undefined
-----------------------------
-- | Adaptative Matching | --
-----------------------------
getNextPeriods
::
Filiation
->
Int
->
PhyloPeriodId
->
[
PhyloPeriodId
]
->
[
PhyloPeriodId
]
getNextPeriods
fil
max
pId
pIds
=
case
fil
of
ToChilds
->
take
max
$
(
tail
.
snd
)
$
splitAt
(
elemIndex'
pId
pIds
)
pIds
ToParents
->
take
max
$
(
reverse
.
fst
)
$
splitAt
(
elemIndex'
pId
pIds
)
pIds
getCandidates
::
Filiation
->
PhyloGroup
->
[
PhyloPeriodId
]
->
[
PhyloGroup
]
->
[[
PhyloGroup
]]
getCandidates
fil
g
pIds
targets
=
case
fil
of
ToChilds
->
targets'
ToParents
->
reverse
targets'
where
targets'
::
[[
PhyloGroup
]]
targets'
=
map
(
\
groups'
->
filter
(
\
g'
->
(
not
.
null
)
$
intersect
(
g
^.
phylo_groupNgrams
)
(
g'
^.
phylo_groupNgrams
))
groups'
)
$
elems
$
filterWithKey
(
\
k
_
->
elem
k
pIds
)
$
fromListWith
(
++
)
$
sortOn
(
fst
.
fst
)
$
map
(
\
g'
->
(
g'
^.
phylo_groupPeriod
,[
g'
]))
targets
shouldBreak
::
Double
->
[(
Double
,[
PhyloGroup
])]
->
Bool
shouldBreak
thr
branches
=
any
(
\
(
quality
,
_
)
->
quality
<
thr
)
branches
toBranchQuality
::
[[
PhyloGroup
]]
->
[(
Double
,[
PhyloGroup
])]
toBranchQuality
branches
=
undefined
reframeDocs
::
Map
Date
Double
->
[
PhyloPeriodId
]
->
Map
Date
Double
reframeDocs
docs
periods
=
restrictKeys
docs
$
periodsToYears
periods
adaptativeMatching
::
Int
->
Double
->
Double
->
Double
->
Map
Date
Double
->
[
PhyloGroup
]
->
[
PhyloGroup
]
->
[
PhyloPeriodId
]
->
[
PhyloGroup
]
adaptativeMatching
maxTime
thrStep
thrMatch
thrQua
docs
groups
candidates
periods
=
-- | check if we should break some of the new branches or not
case
shouldBreak
thrQua
branches'
of
True
->
concat
$
map
(
\
(
s
,
b
)
->
if
s
>=
thrQua
-- | we keep the branch as it is
then
b
-- | we break the branch using an increased temporal matching threshold
else
let
nextGroups
=
undefined
nextCandidates
=
undefined
nextPeriods
=
undefined
in
adaptativeMatching
maxTime
thrStep
(
thrMatch
+
thrStep
)
thrQua
(
reframeDocs
docs
nextPeriods
)
nextGroups
nextCandidates
nextPeriods
)
branches'
-- | the quality of all the new branches is sufficient
False
->
concat
branches
where
-- | 3) process a quality score for each new branch
branches'
::
[(
Double
,[
PhyloGroup
])]
branches'
=
toBranchQuality
branches
-- | 2) group the new groups into branches
branches
::
[[
PhyloGroup
]]
branches
=
relatedComponents
groups'
-- | 1) connect each group to its parents and childs
groups'
::
[
PhyloGroup
]
groups'
=
map
(
\
group
->
let
childs
=
getCandidates
ToChilds
group
(
getNextPeriods
ToChilds
maxTime
(
group
^.
phylo_groupPeriod
)
periods
)
candidates
parents
=
getCandidates
ToParents
group
(
getNextPeriods
ToParents
maxTime
(
group
^.
phylo_groupPeriod
)
periods
)
candidates
-- | match the group to its possible childs then parents
in
matchGroupToGroups
parents
$
matchGroupToGroups
childs
group
)
groups
temporalMatching
::
Phylo
->
Phylo
temporalMatching
phylo
=
let
branches
=
fromList
$
map
(
\
g
->
(
getGroupId
g
,
g
))
$
adaptativeMatching
(
timeMatching
$
getConfig
phylo
)
0
0
0
(
phylo
^.
phylo_timeDocs
)
(
getGroupsFromLevel
1
phylo
)
(
getGroupsFromLevel
1
phylo
)
(
getPeriodIds
phylo
)
in
updatePhyloGroups
1
branches
phylo
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment