Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
145
Issues
145
List
Board
Labels
Milestones
Merge Requests
6
Merge Requests
6
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
e4e913ab
Commit
e4e913ab
authored
Aug 27, 2019
by
qlobbe
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
working on temporal matching
parent
91e81646
Pipeline
#545
failed with stage
Changes
4
Pipelines
1
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
259 additions
and
9 deletions
+259
-9
AdaptativePhylo.hs
src/Gargantext/Viz/AdaptativePhylo.hs
+6
-1
PhyloTools.hs
src/Gargantext/Viz/Phylo/PhyloTools.hs
+63
-6
SynchronicClustering.hs
src/Gargantext/Viz/Phylo/SynchronicClustering.hs
+14
-1
TemporalMatching.hs
src/Gargantext/Viz/Phylo/TemporalMatching.hs
+176
-1
No files found.
src/Gargantext/Viz/AdaptativePhylo.hs
View file @
e4e913ab
...
...
@@ -61,6 +61,7 @@ data Config =
,
phyloName
::
Text
,
phyloLevel
::
Int
,
timeUnit
::
Int
,
timeMatching
::
Int
,
timePeriod
::
Int
,
timeStep
::
Int
,
fisSupport
::
Int
...
...
@@ -78,6 +79,7 @@ defaultConfig =
,
phyloName
=
pack
"Default Phylo"
,
phyloLevel
=
2
,
timeUnit
=
1
,
timeMatching
=
5
,
timePeriod
=
3
,
timeStep
=
1
,
fisSupport
=
2
...
...
@@ -236,6 +238,9 @@ type Weight = Double
-- | Pointer : A weighted pointer to a given PhyloGroup
type
Pointer
=
(
PhyloGroupId
,
Weight
)
data
Filiation
=
ToParents
|
ToChilds
deriving
(
Generic
,
Show
)
data
PointerType
=
TemporalPointer
|
LevelPointer
deriving
(
Generic
,
Show
)
---------------------------
-- | Frequent Item Set | --
...
...
src/Gargantext/Viz/Phylo/PhyloTools.hs
View file @
e4e913ab
...
...
@@ -18,17 +18,19 @@ module Gargantext.Viz.Phylo.PhyloTools where
import
Data.Vector
(
Vector
,
elemIndex
)
import
Data.List
(
sort
,
concat
,
null
,
union
,
(
++
),
tails
,
sortOn
)
import
Data.Set
(
size
)
import
Data.Map
(
Map
,
elems
,
fromList
,
unionWith
,
keys
)
import
Data.Set
(
Set
,
size
)
import
Data.Map
(
Map
,
elems
,
fromList
,
unionWith
,
keys
,
member
,
(
!
)
)
import
Data.String
(
String
)
import
Gargantext.Prelude
import
Gargantext.Viz.AdaptativePhylo
import
Debug.Trace
(
trace
)
import
Control.Lens
import
Control.Lens
hiding
(
Level
)
import
qualified
Data.Vector
as
Vector
import
qualified
Data.List
as
List
import
qualified
Data.Set
as
Set
--------------
-- | Misc | --
...
...
@@ -39,6 +41,12 @@ countSup :: Double -> [Double] -> Int
countSup
s
l
=
length
$
filter
(
>
s
)
l
elemIndex'
::
Eq
a
=>
a
->
[
a
]
->
Int
elemIndex'
e
l
=
case
(
List
.
elemIndex
e
l
)
of
Nothing
->
panic
(
"[ERR][Viz.Phylo.PhyloTools] element not in list"
)
Just
i
->
i
---------------------
-- | Foundations | --
---------------------
...
...
@@ -57,6 +65,11 @@ ngramsToIdx ns fdt = map (\n -> fromJust $ elemIndex n fdt) ns
-- | Time | --
--------------
-- | To transform a list of periods into a set of Dates
periodsToYears
::
[(
Date
,
Date
)]
->
Set
Date
periodsToYears
periods
=
(
Set
.
fromList
.
sort
.
concat
)
$
map
(
\
(
d
,
d'
)
->
[
d
..
d'
])
periods
findBounds
::
[
Date
]
->
(
Date
,
Date
)
findBounds
dates
=
...
...
@@ -134,7 +147,7 @@ listToCombi' l = [(x,y) | (x:rest) <- tails l, y <- rest]
listToEqual'
::
Eq
a
=>
[
a
]
->
[(
a
,
a
)]
listToEqual'
l
=
[(
x
,
y
)
|
x
<-
l
,
y
<-
l
,
x
==
y
]
listToKeys
::
[
Int
]
->
[(
Int
,
Int
)]
listToKeys
::
Eq
a
=>
[
a
]
->
[(
a
,
a
)]
listToKeys
lst
=
(
listToCombi'
lst
)
++
(
listToEqual'
lst
)
listToMatrix
::
[
Int
]
->
Map
(
Int
,
Int
)
Double
...
...
@@ -143,11 +156,28 @@ listToMatrix lst = fromList $ map (\k -> (k,1)) $ listToKeys $ sort lst
sumCooc
::
Cooc
->
Cooc
->
Cooc
sumCooc
cooc
cooc'
=
unionWith
(
+
)
cooc
cooc'
---------------
-- | Phylo | --
---------------
getGroupId
::
PhyloGroup
->
PhyloGroupId
getGroupId
group
=
((
group
^.
phylo_groupPeriod
,
group
^.
phylo_groupLevel
),
group
^.
phylo_groupIndex
)
---------------
-- | Phylo | --
---------------
addPointers
::
PhyloGroup
->
Filiation
->
PointerType
->
[
Pointer
]
->
PhyloGroup
addPointers
group
fil
pty
pointers
=
case
pty
of
TemporalPointer
->
case
fil
of
ToChilds
->
group
&
phylo_groupPeriodChilds
%~
(
++
pointers
)
ToParents
->
group
&
phylo_groupPeriodParents
%~
(
++
pointers
)
LevelPointer
->
case
fil
of
ToChilds
->
group
&
phylo_groupLevelChilds
%~
(
++
pointers
)
ToParents
->
group
&
phylo_groupLevelParents
%~
(
++
pointers
)
getPeriodIds
::
Phylo
->
[(
Date
,
Date
)]
getPeriodIds
phylo
=
sortOn
fst
$
keys
...
...
@@ -160,3 +190,30 @@ getConfig phylo = (phylo ^. phylo_param) ^. phyloParam_config
getRoots
::
Phylo
->
Vector
Ngrams
getRoots
phylo
=
(
phylo
^.
phylo_foundations
)
^.
foundations_roots
getGroupsFromLevel
::
Level
->
Phylo
->
[
PhyloGroup
]
getGroupsFromLevel
lvl
phylo
=
elems
$
view
(
phylo_periods
.
traverse
.
phylo_periodLevels
.
traverse
.
filtered
(
\
phyloLvl
->
phyloLvl
^.
phylo_levelLevel
==
lvl
)
.
phylo_levelGroups
)
phylo
updatePhyloGroups
::
Level
->
Map
PhyloGroupId
PhyloGroup
->
Phylo
->
Phylo
updatePhyloGroups
lvl
m
phylo
=
over
(
phylo_periods
.
traverse
.
phylo_periodLevels
.
traverse
.
filtered
(
\
phyloLvl
->
phyloLvl
^.
phylo_levelLevel
==
lvl
)
.
phylo_levelGroups
.
traverse
)
(
\
group
->
let
id
=
getGroupId
group
in
if
member
id
m
then
m
!
id
else
group
)
phylo
\ No newline at end of file
src/Gargantext/Viz/Phylo/SynchronicClustering.hs
View file @
e4e913ab
...
...
@@ -14,3 +14,16 @@ Portability : POSIX
{-# LANGUAGE MultiParamTypeClasses #-}
module
Gargantext.Viz.Phylo.SynchronicClustering
where
import
Gargantext.Prelude
import
Gargantext.Viz.AdaptativePhylo
import
Gargantext.Viz.Phylo.PhyloTools
--------------------
-- | Clustering | --
--------------------
relatedComponents
::
[
PhyloGroup
]
->
[[
PhyloGroup
]]
relatedComponents
groups
=
undefined
\ No newline at end of file
src/Gargantext/Viz/Phylo/TemporalMatching.hs
View file @
e4e913ab
...
...
@@ -14,3 +14,178 @@ Portability : POSIX
{-# LANGUAGE MultiParamTypeClasses #-}
module
Gargantext.Viz.Phylo.TemporalMatching
where
import
Data.List
(
concat
,
splitAt
,
tail
,
sortOn
,
(
++
),
intersect
,
null
,
inits
,
find
,
groupBy
,
scanl
,
any
,
nub
)
import
Data.Map
(
Map
,
fromList
,
toList
,
fromListWith
,
filterWithKey
,
elems
,
restrictKeys
)
import
Gargantext.Prelude
import
Gargantext.Viz.AdaptativePhylo
import
Gargantext.Viz.Phylo.PhyloTools
import
Gargantext.Viz.Phylo.SynchronicClustering
import
Control.Lens
hiding
(
Level
)
-------------------
-- | Proximity | --
-------------------
-- periodsToNbDocs :: [PhyloPeriodId] -> Phylo -> Double
-- periodsToNbDocs prds phylo = sum $ elems
-- $ restrictKeys (phylo ^. phylo_docsByYears)
-- $ periodsToYears prds
-- matchWithPairs :: PhyloGroup -> (PhyloGroup,PhyloGroup) -> Phylo -> Double
-- matchWithPairs g1 (g2,g3) p =
-- let nbDocs = periodsToNbDocs [(getGroupPeriod g1),(getGroupPeriod g2),(getGroupPeriod g3)] p
-- cooc = if (g2 == g3)
-- then getGroupCooc g2
-- else unionWith (+) (getGroupCooc g2) (getGroupCooc g3)
-- ngrams = if (g2 == g3)
-- then getGroupNgrams g2
-- else union (getGroupNgrams g2) (getGroupNgrams g3)
-- in processProximity (getPhyloProximity p) nbDocs (getGroupCooc g1) cooc (getGroupNgrams g1) ngrams
toProximity
::
Map
Date
Double
->
PhyloGroup
->
PhyloGroup
->
PhyloGroup
->
Double
toProximity
docs
group
target
target'
=
let
nbDocs
=
sum
$
elems
docs
in
undefined
------------------------
-- | Local Matching | --
------------------------
makePairs
::
[
PhyloGroup
]
->
[
PhyloPeriodId
]
->
Double
->
Map
Date
Double
->
PhyloGroup
->
[(
PhyloGroup
,
PhyloGroup
)]
makePairs
candidates
periods
thr
docs
group
=
case
null
periods
of
True
->
[]
-- | at least on of the pair candidates should be from the last added period
False
->
filter
(
\
(
cdt
,
cdt'
)
->
(
inLastPeriod
cdt
periods
)
||
(
inLastPeriod
cdt'
periods
))
$
listToKeys
-- | remove poor candidates from previous periods
$
filter
(
\
cdt
->
(
inLastPeriod
cdt
periods
)
||
((
toProximity
(
reframeDocs
docs
periods
)
group
group
cdt
)
>=
thr
))
candidates
where
inLastPeriod
::
PhyloGroup
->
[
PhyloPeriodId
]
->
Bool
inLastPeriod
g
prds
=
(
g
^.
phylo_groupPeriod
)
==
(
last'
"makePairs"
prds
)
phyloGroupMatching
::
[[
PhyloGroup
]]
->
Filiation
->
Double
->
Map
Date
Double
->
PhyloGroup
->
PhyloGroup
phyloGroupMatching
candidates
fil
thr
docs
group
=
case
pointers
of
Nothing
->
addPointers
group
fil
TemporalPointer
[]
Just
pts
->
addPointers
group
fil
TemporalPointer
$
head'
"phyloGroupMatching"
-- | Keep only the best set of pointers grouped by proximity
$
groupBy
(
\
pt
pt'
->
snd
pt
==
snd
pt'
)
$
reverse
$
sortOn
snd
pts
-- | Find the first time frame where at leats one pointer satisfies the proximity threshold
where
pointers
::
Maybe
[
Pointer
]
pointers
=
find
(
not
.
null
)
-- | for each time frame, process the proximity on relevant pairs of targeted groups
$
scanl
(
\
acc
groups
->
let
periods
=
nub
$
map
(
\
g'
->
g'
^.
phylo_groupPeriod
)
$
concat
groups
pairs
=
makePairs
(
concat
groups
)
periods
thr
docs
group
in
acc
++
(
filter
(
\
(
_
,
proximity
)
->
proximity
>=
thr
)
$
concat
$
map
(
\
(
c
,
c'
)
->
-- | process the proximity between the current group and a pair of candidates
let
proximity
=
toProximity
(
reframeDocs
docs
periods
)
group
c
c'
in
if
(
c
==
c'
)
then
[(
getGroupId
c
,
proximity
)]
else
[(
getGroupId
c
,
proximity
),(
getGroupId
c'
,
proximity
)]
)
pairs
)
)
[]
-- | groups from [[1900],[1900,1901],[1900,1901,1902],...]
$
inits
candidates
matchGroupToGroups
::
[[
PhyloGroup
]]
->
PhyloGroup
->
PhyloGroup
matchGroupToGroups
candidates
group
=
undefined
-----------------------------
-- | Adaptative Matching | --
-----------------------------
getNextPeriods
::
Filiation
->
Int
->
PhyloPeriodId
->
[
PhyloPeriodId
]
->
[
PhyloPeriodId
]
getNextPeriods
fil
max
pId
pIds
=
case
fil
of
ToChilds
->
take
max
$
(
tail
.
snd
)
$
splitAt
(
elemIndex'
pId
pIds
)
pIds
ToParents
->
take
max
$
(
reverse
.
fst
)
$
splitAt
(
elemIndex'
pId
pIds
)
pIds
getCandidates
::
Filiation
->
PhyloGroup
->
[
PhyloPeriodId
]
->
[
PhyloGroup
]
->
[[
PhyloGroup
]]
getCandidates
fil
g
pIds
targets
=
case
fil
of
ToChilds
->
targets'
ToParents
->
reverse
targets'
where
targets'
::
[[
PhyloGroup
]]
targets'
=
map
(
\
groups'
->
filter
(
\
g'
->
(
not
.
null
)
$
intersect
(
g
^.
phylo_groupNgrams
)
(
g'
^.
phylo_groupNgrams
))
groups'
)
$
elems
$
filterWithKey
(
\
k
_
->
elem
k
pIds
)
$
fromListWith
(
++
)
$
sortOn
(
fst
.
fst
)
$
map
(
\
g'
->
(
g'
^.
phylo_groupPeriod
,[
g'
]))
targets
shouldBreak
::
Double
->
[(
Double
,[
PhyloGroup
])]
->
Bool
shouldBreak
thr
branches
=
any
(
\
(
quality
,
_
)
->
quality
<
thr
)
branches
toBranchQuality
::
[[
PhyloGroup
]]
->
[(
Double
,[
PhyloGroup
])]
toBranchQuality
branches
=
undefined
reframeDocs
::
Map
Date
Double
->
[
PhyloPeriodId
]
->
Map
Date
Double
reframeDocs
docs
periods
=
restrictKeys
docs
$
periodsToYears
periods
adaptativeMatching
::
Int
->
Double
->
Double
->
Double
->
Map
Date
Double
->
[
PhyloGroup
]
->
[
PhyloGroup
]
->
[
PhyloPeriodId
]
->
[
PhyloGroup
]
adaptativeMatching
maxTime
thrStep
thrMatch
thrQua
docs
groups
candidates
periods
=
-- | check if we should break some of the new branches or not
case
shouldBreak
thrQua
branches'
of
True
->
concat
$
map
(
\
(
s
,
b
)
->
if
s
>=
thrQua
-- | we keep the branch as it is
then
b
-- | we break the branch using an increased temporal matching threshold
else
let
nextGroups
=
undefined
nextCandidates
=
undefined
nextPeriods
=
undefined
in
adaptativeMatching
maxTime
thrStep
(
thrMatch
+
thrStep
)
thrQua
(
reframeDocs
docs
nextPeriods
)
nextGroups
nextCandidates
nextPeriods
)
branches'
-- | the quality of all the new branches is sufficient
False
->
concat
branches
where
-- | 3) process a quality score for each new branch
branches'
::
[(
Double
,[
PhyloGroup
])]
branches'
=
toBranchQuality
branches
-- | 2) group the new groups into branches
branches
::
[[
PhyloGroup
]]
branches
=
relatedComponents
groups'
-- | 1) connect each group to its parents and childs
groups'
::
[
PhyloGroup
]
groups'
=
map
(
\
group
->
let
childs
=
getCandidates
ToChilds
group
(
getNextPeriods
ToChilds
maxTime
(
group
^.
phylo_groupPeriod
)
periods
)
candidates
parents
=
getCandidates
ToParents
group
(
getNextPeriods
ToParents
maxTime
(
group
^.
phylo_groupPeriod
)
periods
)
candidates
-- | match the group to its possible childs then parents
in
matchGroupToGroups
parents
$
matchGroupToGroups
childs
group
)
groups
temporalMatching
::
Phylo
->
Phylo
temporalMatching
phylo
=
let
branches
=
fromList
$
map
(
\
g
->
(
getGroupId
g
,
g
))
$
adaptativeMatching
(
timeMatching
$
getConfig
phylo
)
0
0
0
(
phylo
^.
phylo_timeDocs
)
(
getGroupsFromLevel
1
phylo
)
(
getGroupsFromLevel
1
phylo
)
(
getPeriodIds
phylo
)
in
updatePhyloGroups
1
branches
phylo
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment