Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
147
Issues
147
List
Board
Labels
Milestones
Merge Requests
6
Merge Requests
6
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
ff80ee2f
Commit
ff80ee2f
authored
May 24, 2019
by
qlobbe
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fix temporalMatching
parent
38d3a9cd
Pipeline
#415
failed with stage
Changes
11
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
244 additions
and
175 deletions
+244
-175
Phylo.hs
src/Gargantext/Viz/Phylo.hs
+3
-0
Cluster.hs
src/Gargantext/Viz/Phylo/Aggregates/Cluster.hs
+7
-11
Cooc.hs
src/Gargantext/Viz/Phylo/Aggregates/Cooc.hs
+41
-2
Document.hs
src/Gargantext/Viz/Phylo/Aggregates/Document.hs
+9
-4
Fis.hs
src/Gargantext/Viz/Phylo/Aggregates/Fis.hs
+2
-2
Example.hs
src/Gargantext/Viz/Phylo/Example.hs
+10
-3
LevelMaker.hs
src/Gargantext/Viz/Phylo/LevelMaker.hs
+28
-12
LinkMaker.hs
src/Gargantext/Viz/Phylo/LinkMaker.hs
+76
-86
Proximity.hs
src/Gargantext/Viz/Phylo/Metrics/Proximity.hs
+24
-49
Tools.hs
src/Gargantext/Viz/Phylo/Tools.hs
+43
-5
Filters.hs
src/Gargantext/Viz/Phylo/View/Filters.hs
+1
-1
No files found.
src/Gargantext/Viz/Phylo.hs
View file @
ff80ee2f
...
...
@@ -77,6 +77,8 @@ data Phylo =
Phylo
{
_phylo_duration
::
(
Start
,
End
)
,
_phylo_foundations
::
PhyloFoundations
,
_phylo_periods
::
[
PhyloPeriod
]
,
_phylo_docsByYears
::
Map
Date
Double
,
_phylo_cooc
::
Map
Date
(
Map
(
Int
,
Int
)
Double
)
,
_phylo_param
::
PhyloParam
}
deriving
(
Generic
,
Show
,
Eq
)
...
...
@@ -150,6 +152,7 @@ data PhyloGroup =
,
_phylo_groupNgrams
::
[
Int
]
,
_phylo_groupMeta
::
Map
Text
Double
,
_phylo_groupBranchId
::
Maybe
PhyloBranchId
,
_phylo_groupCooc
::
Map
(
Int
,
Int
)
Double
,
_phylo_groupPeriodParents
::
[
Pointer
]
,
_phylo_groupPeriodChilds
::
[
Pointer
]
...
...
src/Gargantext/Viz/Phylo/Aggregates/Cluster.hs
View file @
ff80ee2f
...
...
@@ -25,7 +25,7 @@ import Gargantext.Viz.Phylo
import
Gargantext.Viz.Phylo.Tools
import
Gargantext.Viz.Phylo.Metrics.Proximity
import
Gargantext.Viz.Phylo.Metrics.Clustering
import
Gargantext.Viz.Phylo.
Aggregates.Cooc
import
Gargantext.Viz.Phylo.
LinkMaker
import
qualified
Data.Map
as
Map
import
qualified
Data.Vector.Storable
as
VS
...
...
@@ -49,15 +49,11 @@ graphToClusters clust (nodes,edges) = case clust of
-- | To transform a list of PhyloGroups into a Graph of Proximity
groupsToGraph
::
Proximity
->
[
PhyloGroup
]
->
Map
(
Int
,
Int
)
Double
->
([
GroupNode
],[
GroupEdge
])
groupsToGraph
prox
gs
cooc
=
case
prox
of
-- WeightedLogJaccard (WLJParams _ sens) -> (gs, map (\(x,y) -> ((x,y), traceSim x y (getSubCooc (getGroupNgrams x) cooc) (getSubCooc (getGroupNgrams y) cooc) p
-- $ weightedLogJaccard sens (getSubCooc (getGroupNgrams x) cooc) (getSubCooc (getGroupNgrams y) cooc)))
-- $ getCandidates gs)
WeightedLogJaccard
(
WLJParams
_
sens
)
->
(
gs
,
map
(
\
(
x
,
y
)
->
((
x
,
y
),
weightedLogJaccard'
sens
(
getGroupNgrams
x
)
(
getGroupNgrams
y
)
cooc
))
$
getCandidates
gs
)
Hamming
(
HammingParams
_
)
->
(
gs
,
map
(
\
(
x
,
y
)
->
((
x
,
y
),
hamming
(
getSubCooc
(
getGroupNgrams
x
)
cooc
)
(
getSubCooc
(
getGroupNgrams
y
)
cooc
)))
$
getCandidates
gs
)
groupsToGraph
::
Double
->
Proximity
->
[
PhyloGroup
]
->
([
GroupNode
],[
GroupEdge
])
groupsToGraph
nbDocs
prox
gs
=
case
prox
of
WeightedLogJaccard
(
WLJParams
_
sens
)
->
(
gs
,
map
(
\
(
x
,
y
)
->
((
x
,
y
),
weightedLogJaccard
sens
(
getGroupCooc
x
)
(
getGroupCooc
y
)
nbDocs
))
$
getCandidates
gs
)
Hamming
(
HammingParams
_
)
->
(
gs
,
map
(
\
(
x
,
y
)
->
((
x
,
y
),
hamming
(
getGroupCooc
x
)
(
getGroupCooc
y
)))
$
getCandidates
gs
)
_
->
undefined
...
...
@@ -84,7 +80,7 @@ phyloToClusters lvl clus p = Map.fromList
--------------------------------------
graphs
::
[([
GroupNode
],[
GroupEdge
])]
graphs
=
traceGraph
lvl
(
getThreshold
prox
)
$
map
(
\
prd
->
groupsToGraph
prox
(
getGroupsWithFilters
lvl
prd
p
)
(
getCooc
[
prd
]
p
))
periods
$
map
(
\
prd
->
groupsToGraph
(
periodsToNbDocs
[
prd
]
p
)
prox
(
getGroupsWithFilters
lvl
prd
p
))
periods
--------------------------------------
prox
::
Proximity
prox
=
getProximity
clus
...
...
src/Gargantext/Viz/Phylo/Aggregates/Cooc.hs
View file @
ff80ee2f
...
...
@@ -17,8 +17,10 @@ Portability : POSIX
module
Gargantext.Viz.Phylo.Aggregates.Cooc
where
import
Data.List
(
union
,
concat
,
nub
)
import
Data.Map
(
Map
,
elems
,
adjust
,
filterWithKey
)
import
Data.List
(
union
,
concat
,
nub
,
sort
)
import
Data.Map
(
Map
,
elems
,
adjust
,
filterWithKey
,
fromListWith
,
fromList
,
restrictKeys
)
import
Data.Set
(
Set
)
import
Data.Vector
(
Vector
)
import
Gargantext.Prelude
import
Gargantext.Viz.Phylo
import
Gargantext.Viz.Phylo.Tools
...
...
@@ -83,5 +85,42 @@ getCooc prds p = toCooc $ map (\g -> (getGroupNgrams g,getGroupMeta "support" g)
--------------------------------------
-- | To transform a list of index into a cooc matrix
listToCooc
::
[
Int
]
->
Map
(
Int
,
Int
)
Double
listToCooc
lst
=
fromList
$
map
(
\
combi
->
(
combi
,
1
))
$
listToFullCombi
lst
-- | To transform a list of ngrams into a list of indexes
ngramsToIdx
::
[
Ngrams
]
->
Vector
Ngrams
->
[
Int
]
ngramsToIdx
ns
v
=
sort
$
map
(
\
n
->
getIdxInVector
n
v
)
ns
-- | To build the cooc matrix by years out of the corpus
docsToCooc
::
[
Document
]
->
Vector
Ngrams
->
Map
Date
(
Map
(
Int
,
Int
)
Double
)
docsToCooc
docs
fdt
=
fromListWith
sumCooc
$
map
(
\
(
d
,
l
)
->
(
d
,
listToCooc
l
))
$
map
(
\
doc
->
(
date
doc
,
ngramsToIdx
(
text
doc
)
fdt
))
docs
-- | To sum all the docs produced during a list of years
sumDocsByYears
::
Set
Date
->
Map
Date
Double
->
Double
sumDocsByYears
years
m
=
sum
$
elems
$
restrictKeys
m
years
-- | To get the cooc matrix of a group
groupToCooc
::
PhyloGroup
->
Phylo
->
Map
(
Int
,
Int
)
Double
groupToCooc
g
p
=
getMiniCooc
(
listToFullCombi
$
getGroupNgrams
g
)
(
periodsToYears
[
getGroupPeriod
g
])
(
getPhyloCooc
p
)
-- | To get the union of the cooc matrix of two groups
unionOfCooc
::
PhyloGroup
->
PhyloGroup
->
Phylo
->
Map
(
Int
,
Int
)
Double
unionOfCooc
g
g'
p
=
sumCooc
(
groupToCooc
g
p
)
(
groupToCooc
g'
p
)
-- phyloCooc :: Map (Int, Int) Double
-- phyloCooc = fisToCooc phyloFis phylo1_0_1
src/Gargantext/Viz/Phylo/Aggregates/Document.hs
View file @
ff80ee2f
...
...
@@ -17,8 +17,7 @@ Portability : POSIX
module
Gargantext.Viz.Phylo.Aggregates.Document
where
import
Data.List
(
last
)
import
Data.Map
(
Map
)
import
Data.Map
(
Map
,
fromListWith
)
import
Data.Text
(
Text
)
import
Data.Tuple
(
fst
)
import
Data.Vector
(
Vector
)
...
...
@@ -32,7 +31,7 @@ import qualified Data.Vector as Vector
-- | To init a list of Periods framed by a starting Date and an ending Date
initPeriods
::
(
Eq
date
,
Enum
date
)
=>
Grain
->
Step
->
(
date
,
date
)
->
[(
date
,
date
)]
initPeriods
g
s
(
start
,
end
)
=
map
(
\
l
->
(
head'
"Doc"
l
,
last
l
))
initPeriods
g
s
(
start
,
end
)
=
map
(
\
l
->
(
head'
"Doc"
l
,
last
'
"Doc"
l
))
$
chunkAlong
g
s
[
start
..
end
]
...
...
@@ -45,7 +44,7 @@ groupDocsByPeriod f pds es = Map.fromList $ zip pds $ map (inPeriode f es) pds
inPeriode
::
Ord
b
=>
(
t
->
b
)
->
[
t
]
->
(
b
,
b
)
->
[
t
]
inPeriode
f'
h
(
start
,
end
)
=
fst
$
List
.
partition
(
\
d
->
f'
d
>=
start
&&
f'
d
<=
end
)
h
--------------------------------------
--------------------------------------
-- | To parse a list of Documents by filtering on a Vector of Ngrams
...
...
@@ -54,4 +53,10 @@ parseDocs roots c = map (\(d,t)
->
Document
d
(
filter
(
\
x
->
Vector
.
elem
x
roots
)
$
monoTexts
t
))
c
-- | To count the number of documents by year
countDocs
::
[(
Date
,
a
)]
->
Map
Date
Double
countDocs
corpus
=
fromListWith
(
+
)
$
map
(
\
(
d
,
_
)
->
(
d
,
1
))
corpus
src/Gargantext/Viz/Phylo/Aggregates/Fis.hs
View file @
ff80ee2f
...
...
@@ -43,12 +43,12 @@ filterFis keep thr f m = case keep of
-- | To filter Fis with small Support
filterFisBySupport
::
Int
->
[
PhyloFis
]
->
[
PhyloFis
]
filterFisBySupport
thr
l
=
filter
(
\
fis
->
getSupport
fis
>
thr
)
l
filterFisBySupport
thr
l
=
filter
(
\
fis
->
getSupport
fis
>
=
thr
)
l
-- | To filter Fis with small Clique size
filterFisByClique
::
Int
->
[
PhyloFis
]
->
[
PhyloFis
]
filterFisByClique
thr
l
=
filter
(
\
fis
->
(
size
$
getClique
fis
)
>
thr
)
l
filterFisByClique
thr
l
=
filter
(
\
fis
->
(
size
$
getClique
fis
)
>
=
thr
)
l
-- | To filter nested Fis
...
...
src/Gargantext/Viz/Phylo/Example.hs
View file @
ff80ee2f
...
...
@@ -40,6 +40,7 @@ import Gargantext.Text.Context (TermList)
import
Gargantext.Viz.Phylo
import
Gargantext.Viz.Phylo.Aggregates.Cluster
import
Gargantext.Viz.Phylo.Aggregates.Document
import
Gargantext.Viz.Phylo.Aggregates.Cooc
import
Gargantext.Viz.Phylo.Aggregates.Fis
import
Gargantext.Viz.Phylo.BranchMaker
import
Gargantext.Viz.Phylo.LevelMaker
...
...
@@ -104,7 +105,7 @@ queryEx = "title=Cesar et Cleôpatre"
phyloQueryBuild
::
PhyloQueryBuild
phyloQueryBuild
=
PhyloQueryBuild
"Cesar et Cleôpatre"
"An example of Phylomemy (french without accent)"
5
3
defaultFis
[]
[]
(
WeightedLogJaccard
$
WLJParams
0.
3
10
)
2
(
RelatedComponents
$
RCParams
$
WeightedLogJaccard
$
WLJParams
0.5
0
)
5
3
defaultFis
[]
[]
(
WeightedLogJaccard
$
WLJParams
0.
1
20
)
2
(
RelatedComponents
$
RCParams
$
WeightedLogJaccard
$
WLJParams
0.3
0
)
...
...
@@ -154,7 +155,7 @@ phylo2 = addPhyloLevel 2 phyloCluster phyloBranch1
phyloCluster
::
Map
(
Date
,
Date
)
[
PhyloCluster
]
phyloCluster
=
phyloToClusters
1
(
RelatedComponents
$
RCParams
$
WeightedLogJaccard
$
WLJParams
0.05
10
)
phyloBranch1
phyloCluster
=
phyloToClusters
3
(
RelatedComponents
$
RCParams
$
WeightedLogJaccard
$
WLJParams
0.05
10
)
phyloBranch1
----------------------------------
...
...
@@ -226,7 +227,13 @@ phyloDocs = corpusToDocs corpus phyloBase
phyloBase
::
Phylo
phyloBase
=
initPhyloBase
periods
(
PhyloFoundations
foundationsRoots
termList
)
defaultPhyloParam
phyloBase
=
initPhyloBase
periods
(
PhyloFoundations
foundationsRoots
termList
)
nbDocs
cooc
defaultPhyloParam
cooc
::
Map
Date
(
Map
(
Int
,
Int
)
Double
)
cooc
=
docsToCooc
(
parseDocs
foundationsRoots
corpus
)
foundationsRoots
nbDocs
::
Map
Date
Double
nbDocs
=
countDocs
corpus
periods
::
[(
Date
,
Date
)]
periods
=
initPeriods
5
3
...
...
src/Gargantext/Viz/Phylo/LevelMaker.hs
View file @
ff80ee2f
...
...
@@ -32,6 +32,7 @@ import Gargantext.Viz.Phylo.Aggregates.Fis
import
Gargantext.Viz.Phylo.BranchMaker
import
Gargantext.Viz.Phylo.LinkMaker
import
Gargantext.Viz.Phylo.Tools
import
Gargantext.Viz.Phylo.Aggregates.Cooc
import
Gargantext.Text.Context
(
TermList
)
import
qualified
Data.Vector.Storable
as
VS
...
...
@@ -60,7 +61,7 @@ instance PhyloLevelMaker PhyloCluster
|
otherwise
=
panic
(
"[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Clusters at level < 2"
)
--------------------------------------
-- | Level -> (Date,Date) -> [Cluster] -> Map (Date,Date) [Cluster] -> Phylo -> [PhyloGroup]
toPhyloGroups
lvl
(
d
,
d'
)
l
m
_
=
map
(
\
(
idx
,
cluster
)
->
clusterToGroup
(
d
,
d'
)
lvl
idx
""
cluster
m
)
$
zip
[
1
..
]
l
toPhyloGroups
lvl
(
d
,
d'
)
l
m
p
=
map
(
\
(
idx
,
cluster
)
->
clusterToGroup
(
d
,
d'
)
lvl
idx
""
cluster
m
p
)
$
zip
[
1
..
]
l
--------------------------------------
...
...
@@ -94,9 +95,12 @@ instance PhyloLevelMaker Document
-- | To transform a Cluster into a Phylogroup
clusterToGroup
::
PhyloPeriodId
->
Level
->
Int
->
Text
->
PhyloCluster
->
Map
(
Date
,
Date
)
[
PhyloCluster
]
->
PhyloGroup
clusterToGroup
prd
lvl
idx
lbl
groups
_m
=
PhyloGroup
((
prd
,
lvl
),
idx
)
lbl
ngrams
empty
Nothing
[]
[]
[]
(
map
(
\
g
->
(
getGroupId
g
,
1
))
groups
)
clusterToGroup
::
PhyloPeriodId
->
Level
->
Int
->
Text
->
PhyloCluster
->
Map
(
Date
,
Date
)
[
PhyloCluster
]
->
Phylo
->
PhyloGroup
clusterToGroup
prd
lvl
idx
lbl
groups
_m
p
=
PhyloGroup
((
prd
,
lvl
),
idx
)
lbl
ngrams
empty
Nothing
(
getMiniCooc
(
listToFullCombi
ngrams
)
(
periodsToYears
[
prd
])
(
getPhyloCooc
p
))
[]
[]
[]
(
map
(
\
g
->
(
getGroupId
g
,
1
))
groups
)
where
--------------------------------------
ngrams
::
[
Int
]
...
...
@@ -107,7 +111,9 @@ clusterToGroup prd lvl idx lbl groups _m =
-- | To transform a Clique into a PhyloGroup
cliqueToGroup
::
PhyloPeriodId
->
Level
->
Int
->
Text
->
PhyloFis
->
Phylo
->
PhyloGroup
cliqueToGroup
prd
lvl
idx
lbl
fis
p
=
PhyloGroup
((
prd
,
lvl
),
idx
)
lbl
ngrams
(
singleton
"support"
(
fromIntegral
$
getSupport
fis
))
Nothing
[]
[]
[]
[]
PhyloGroup
((
prd
,
lvl
),
idx
)
lbl
ngrams
(
singleton
"support"
(
fromIntegral
$
getSupport
fis
))
Nothing
(
getMiniCooc
(
listToFullCombi
ngrams
)
(
periodsToYears
[
prd
])
(
getPhyloCooc
p
))
[]
[]
[]
[]
where
--------------------------------------
ngrams
::
[
Int
]
...
...
@@ -120,7 +126,9 @@ cliqueToGroup prd lvl idx lbl fis p =
-- | To transform a list of Ngrams into a PhyloGroup
ngramsToGroup
::
PhyloPeriodId
->
Level
->
Int
->
Text
->
[
Ngrams
]
->
Phylo
->
PhyloGroup
ngramsToGroup
prd
lvl
idx
lbl
ngrams
p
=
PhyloGroup
((
prd
,
lvl
),
idx
)
lbl
(
sort
$
map
(
\
x
->
getIdxInRoots
x
p
)
ngrams
)
empty
Nothing
[]
[]
[]
[]
PhyloGroup
((
prd
,
lvl
),
idx
)
lbl
(
sort
$
map
(
\
x
->
getIdxInRoots
x
p
)
ngrams
)
empty
Nothing
(
getMiniCooc
(
listToFullCombi
$
sort
$
map
(
\
x
->
getIdxInRoots
x
p
)
ngrams
)
(
periodsToYears
[
prd
])
(
getPhyloCooc
p
))
[]
[]
[]
[]
-- | To traverse a Phylo and add a new PhyloLevel linked to a new list of PhyloGroups
...
...
@@ -141,10 +149,6 @@ toNthLevel lvlMax prox clus p
|
otherwise
=
toNthLevel
lvlMax
prox
clus
$
traceBranches
(
lvl
+
1
)
$
setPhyloBranches
(
lvl
+
1
)
-- $ traceTempoMatching Descendant (lvl + 1)
-- $ interTempoMatching Descendant (lvl + 1) prox
-- $ traceTempoMatching Ascendant (lvl + 1)
-- $ interTempoMatching Ascendant (lvl + 1) prox
$
transposePeriodLinks
(
lvl
+
1
)
$
setLevelLinks
(
lvl
,
lvl
+
1
)
$
addPhyloLevel
(
lvl
+
1
)
...
...
@@ -207,8 +211,14 @@ instance PhyloMaker [(Date, Text)]
phyloBase
=
tracePhyloBase
$
toPhyloBase
q
(
initPhyloParam
(
Just
defaultPhyloVersion
)
(
Just
defaultSoftware
)
(
Just
q
))
c
roots
termList
--------------------------------------
--------------------------------------
toPhyloBase
q
p
c
roots
termList
=
initPhyloBase
periods
foundations
p
toPhyloBase
q
p
c
roots
termList
=
initPhyloBase
periods
foundations
nbDocs
cooc
p
where
--------------------------------------
cooc
::
Map
Date
(
Map
(
Int
,
Int
)
Double
)
cooc
=
docsToCooc
(
parseDocs
(
foundations
^.
phylo_foundationsRoots
)
c
)
(
foundations
^.
phylo_foundationsRoots
)
--------------------------------------
nbDocs
::
Map
Date
Double
nbDocs
=
countDocs
c
--------------------------------------
foundations
::
PhyloFoundations
foundations
=
PhyloFoundations
(
initFoundationsRoots
roots
)
termList
...
...
@@ -240,8 +250,14 @@ instance PhyloMaker [Document]
phyloBase
=
tracePhyloBase
$
toPhyloBase
q
(
initPhyloParam
(
Just
defaultPhyloVersion
)
(
Just
defaultSoftware
)
(
Just
q
))
c
roots
termList
--------------------------------------
--------------------------------------
toPhyloBase
q
p
c
roots
termList
=
initPhyloBase
periods
foundations
p
toPhyloBase
q
p
c
roots
termList
=
initPhyloBase
periods
foundations
nbDocs
cooc
p
where
--------------------------------------
cooc
::
Map
Date
(
Map
(
Int
,
Int
)
Double
)
cooc
=
docsToCooc
c
(
foundations
^.
phylo_foundationsRoots
)
--------------------------------------
nbDocs
::
Map
Date
Double
nbDocs
=
countDocs
$
map
(
\
doc
->
(
date
doc
,
text
doc
))
c
--------------------------------------
foundations
::
PhyloFoundations
foundations
=
PhyloFoundations
(
initFoundationsRoots
roots
)
termList
...
...
src/Gargantext/Viz/Phylo/LinkMaker.hs
View file @
ff80ee2f
...
...
@@ -18,14 +18,13 @@ module Gargantext.Viz.Phylo.LinkMaker
where
import
Control.Lens
hiding
(
both
,
Level
)
import
Data.List
((
++
),
sortOn
,
null
,
tail
,
splitAt
,
elem
,
concat
,
sort
,
delete
,
intersect
)
import
Data.List
((
++
),
sortOn
,
null
,
tail
,
splitAt
,
elem
,
concat
,
sort
,
delete
,
intersect
,
nub
,
groupBy
)
import
Data.Tuple.Extra
import
Data.Map
(
Map
,(
!
),
fromListWith
)
import
Data.Map
(
Map
,(
!
),
fromListWith
,
elems
,
restrictKeys
,
unionWith
)
import
Gargantext.Prelude
import
Gargantext.Viz.Phylo
import
Gargantext.Viz.Phylo.Tools
import
Gargantext.Viz.Phylo.Metrics.Proximity
import
Gargantext.Viz.Phylo.Aggregates.Cooc
import
qualified
Data.List
as
List
import
qualified
Data.Maybe
as
Maybe
import
qualified
Data.Map
as
Map
...
...
@@ -34,9 +33,9 @@ import qualified Data.Vector.Storable as VS
import
Debug.Trace
(
trace
)
import
Numeric.Statistics
(
percentile
)
--
--------------------------------------------------------------------
--
--
| Make links from Level to Level
-----------------------------
--
| From Level to level |
--
--
---------------------------
-- | To choose a LevelLink strategy based an a given Level
...
...
@@ -82,17 +81,9 @@ setLevelLinks (lvl,lvl') p = alterPhyloGroups (\gs -> map (\g -> if getGroupLeve
--------------------------------------
------------------------------------------------------------------------
-- | Make links from Period to Period
-- | To apply the corresponding proximity function based on a given Proximity
applyProximity
::
Proximity
->
PhyloGroup
->
PhyloGroup
->
Map
(
Int
,
Int
)
Double
->
(
PhyloGroupId
,
Double
)
applyProximity
prox
g1
g2
cooc
=
case
prox
of
-- WeightedLogJaccard (WLJParams _ s) -> ((getGroupId g2), weightedLogJaccard s (getSubCooc (getGroupNgrams g1) cooc) (getSubCooc (getGroupNgrams g2) cooc))
WeightedLogJaccard
(
WLJParams
_
s
)
->
((
getGroupId
g2
),
weightedLogJaccard'
s
(
getGroupNgrams
g1
)
(
getGroupNgrams
g2
)
cooc
)
Hamming
(
HammingParams
_
)
->
((
getGroupId
g2
),
hamming
(
getSubCooc
(
getGroupNgrams
g1
)
cooc
)
(
getSubCooc
(
getGroupNgrams
g2
)
cooc
))
_
->
panic
(
"[ERR][Viz.Phylo.Example.applyProximity] Proximity function not defined"
)
-------------------------------
-- | From Period to Period | --
-------------------------------
-- | To get the next or previous PhyloPeriod based on a given PhyloPeriodId
...
...
@@ -113,35 +104,57 @@ getNextPeriods to' id l = case to' of
--------------------------------------
-- | To find the best candidates regarding a given proximity
findBestCandidates'
::
Filiation
->
Int
->
Int
->
Proximity
->
[
PhyloPeriodId
]
->
[
PhyloGroup
]
->
PhyloGroup
->
Phylo
->
([
Pointer
],[
Double
])
findBestCandidates'
fil
depth
limit
prox
prds
gs
g
p
|
depth
>
limit
||
null
next
=
(
[]
,
[]
)
|
(
not
.
null
)
bestScores
=
(
take
2
bestScores
,
map
snd
scores
)
|
otherwise
=
findBestCandidates'
fil
(
depth
+
1
)
limit
prox
prds
gs
g
p
where
-- | To get the number of docs produced during a list of periods
periodsToNbDocs
::
[
PhyloPeriodId
]
->
Phylo
->
Double
periodsToNbDocs
prds
phylo
=
sum
$
elems
$
restrictKeys
(
phylo
^.
phylo_docsByYears
)
$
periodsToYears
prds
-- | To process a given Proximity
processProximity
::
Proximity
->
Map
(
Int
,
Int
)
Double
->
Map
(
Int
,
Int
)
Double
->
Double
->
Double
processProximity
proximity
cooc
cooc'
nbDocs
=
case
proximity
of
WeightedLogJaccard
(
WLJParams
_
sens
)
->
weightedLogJaccard
sens
cooc
cooc'
nbDocs
Hamming
(
HammingParams
_
)
->
hamming
cooc
cooc'
_
->
panic
"[ERR][Viz.Phylo.LinkMaker.processProximity] Unknown proximity"
-- | Find the best candidates to be time-linked with a group g1 (recursively until the limit of periods is reached)
-- | 1) find the next periods and get the mini cooc matrix of g1
-- | 2) build the pairs of candidates (single groups or tuples)
-- | 3) process the proximity mesure and select the best ones to create the pointers (ie: all the max)
findBestCandidates
::
Filiation
->
Int
->
Int
->
Proximity
->
[(
Date
,
Date
)]
->
[
PhyloGroup
]
->
PhyloGroup
->
Phylo
->
([
Pointer
],[
Double
])
findBestCandidates
filiation
depth
limit
proximity
periods
candidates
g1
phylo
|
depth
>
limit
||
null
nextPeriods
=
(
[]
,
[]
)
|
(
not
.
null
)
pointers
=
(
head'
"findBestCandidates"
$
groupBy
(
\
x
y
->
snd
x
==
snd
y
)
pointers
,
map
snd
similarities
)
|
otherwise
=
findBestCandidates
filiation
(
depth
+
1
)
limit
proximity
periods
candidates
g1
phylo
where
--------------------------------------
pointers
::
[(
PhyloGroupId
,
Double
)]
pointers
=
reverse
$
sortOn
snd
$
filter
(
\
(
_
,
score
)
->
case
proximity
of
WeightedLogJaccard
(
WLJParams
thr
_
)
->
score
>=
thr
Hamming
(
HammingParams
thr
)
->
score
<=
thr
_
->
panic
"[ERR][Viz.Phylo.LinkMaker.findBestCandidates] Unknown proximity"
)
similarities
--------------------------------------
next
::
[
PhyloPeriodId
]
next
=
take
depth
prds
similarities
::
[(
PhyloGroupId
,
Double
)]
similarities
=
concat
$
map
(
\
(
g2
,
g3
)
->
let
nbDocs
=
periodsToNbDocs
[(
getGroupPeriod
g1
),(
getGroupPeriod
g2
),(
getGroupPeriod
g3
)]
phylo
cooc2
=
getGroupCooc
g2
cooc3
=
getGroupCooc
g3
score
=
processProximity
proximity
cooc1
(
unionWith
(
+
)
cooc2
cooc3
)
nbDocs
in
nub
$
[(
getGroupId
g2
,
score
),(
getGroupId
g3
,
score
)])
pairsOfCandidates
--------------------------------------
cooc
::
Map
(
Int
,
Int
)
Double
cooc
=
getCooc
next
p
pairsOfCandidates
::
[(
PhyloGroup
,
PhyloGroup
)]
pairsOfCandidates
=
listToFullCombi
$
filter
(
\
g
->
elem
(
getGroupPeriod
g
)
nextPeriods
)
candidates
--------------------------------------
c
andidates
::
[
PhyloGroup
]
c
andidates
=
filter
(
\
g'
->
elem
(
getGroupPeriod
g'
)
next
)
gs
c
ooc1
::
Map
(
Int
,
Int
)
Double
c
ooc1
=
getGroupCooc
g1
--------------------------------------
scores
::
[(
PhyloGroupId
,
Double
)]
scores
=
map
(
\
g'
->
applyProximity
prox
g
g'
cooc
)
candidates
--------------------------------------
bestScores
::
[(
PhyloGroupId
,
Double
)]
bestScores
=
reverse
$
sortOn
snd
$
filter
(
\
(
_id
,
score
)
->
case
prox
of
WeightedLogJaccard
(
WLJParams
thr
_
)
->
score
>=
thr
Hamming
(
HammingParams
thr
)
->
score
<=
thr
Filiation
->
panic
"[ERR][Viz.Phylo.LinkMaker.findBestCandidates] Filiation"
)
scores
--------------------------------------
nextPeriods
::
[(
Date
,
Date
)]
nextPeriods
=
take
depth
periods
--------------------------------------
-- | To add some Pointer to a PhyloGroup
...
...
@@ -189,56 +202,31 @@ toBranches mem gs
--------------------------------------
-- | a init avec la [[head groups]] et la tail groups
toBranches'
::
[[[
Int
]]]
->
[[
Int
]]
->
[[[
Int
]]]
toBranches'
mem
gs
|
null
gs
=
mem
|
otherwise
=
toBranches'
mem'
$
tail
gs
where
--------------------------------------
mem'
::
[[[
Int
]]]
mem'
=
if
(
null
withHead
)
then
mem
++
[[
head'
"toBranches"
gs
]]
else
(
filter
(
\
gs'
->
not
$
elem
gs'
withHead
)
mem
)
++
[(
concat
withHead
)
++
[
head'
"toBranches"
gs
]]
--------------------------------------
withHead
::
[[[
Int
]]]
withHead
=
filter
(
\
gs'
->
(
not
.
null
)
$
intersect
(
concat
gs'
)
(
head'
"toBranches"
gs
)
)
mem
--------------------------------------
-- | To apply the intertemporal matching to Phylo at a given level
-- | To process an intertemporal matching task to a Phylo at a given level
-- | 1) split all groups (of the level) in branches (ie:related components sharing at least one ngram)
-- | 2) for each branch, for each group find the best candidates (by Filiation and Proximity) and create the corresponding pointers
-- | 3) update all the groups with the new pointers if they exist
interTempoMatching
::
Filiation
->
Level
->
Proximity
->
Phylo
->
Phylo
interTempoMatching
fil
lvl
prox
p
=
traceMatching
fil
lvl
(
getThreshold
prox
)
scores
$
updateGroups
fil
lvl
pointers
p
interTempoMatching
fil
lvl
prox
p
=
traceMatching
fil
lvl
(
getThreshold
prox
)
debug
$
updateGroups
fil
lvl
pointersMap
p
where
--------------------------------------
pointers
::
Map
PhyloGroupId
[
Pointer
]
pointers
=
Map
.
fromList
$
map
(
\
(
id
,
x
)
->
(
id
,
fst
x
))
candidates
--------------------------------------
scores
::
[
Double
]
scores
=
sort
$
concat
$
map
(
snd
.
snd
)
candidates
--------------------------------------
-- candidates' :: [(PhyloGroupId,([Pointer],[Double]))]
-- candidates' = map (\g -> ( getGroupId g, findBestCandidates' fil 1 5 prox (getNextPeriods fil (getGroupPeriod g) prds) (filterCandidates g gs) g p)) gs
debug
::
[
Double
]
debug
=
sort
$
concat
$
map
(
snd
.
snd
)
pointers
--------------------------------------
candidates
::
[(
PhyloGroupId
,([
Pointer
],[
Double
]))
]
candidates
=
concat
$
map
(
\
b
->
map
(
\
g
->
(
getGroupId
g
,
findBestCandidates'
fil
1
5
prox
(
getNextPeriods
fil
(
getGroupPeriod
g
)
prds
)
(
filterCandidates
g
gs
)
g
p
))
b
)
b
s
pointersMap
::
Map
PhyloGroupId
[
Pointer
]
pointersMap
=
Map
.
fromList
$
map
(
\
(
id
,
x
)
->
(
id
,
fst
x
))
pointer
s
--------------------------------------
gs
::
[
PhyloGroup
]
gs
=
getGroupsWithLevel
lvl
p
pointers
::
[(
PhyloGroupId
,([
Pointer
],[
Double
]))]
pointers
=
concat
$
map
(
\
branche
->
map
(
\
g
->
(
getGroupId
g
,
findBestCandidates
fil
1
5
prox
(
getNextPeriods
fil
(
getGroupPeriod
g
)
(
getPhyloPeriods
p
))
(
filterCandidates
g
branche
)
g
p
)
)
branche
)
branches
--------------------------------------
bs
::
[[
PhyloGroup
]]
bs
=
tracePreBranches
$
toBranches
[[
head'
"interTempoMatching"
gs
]]
$
tail
gs
--------------------------------------
prds
::
[
PhyloPeriodId
]
prds
=
getPhyloPeriods
p
branches
::
[[
PhyloGroup
]]
branches
=
tracePreBranches
$
toBranches
[[
head'
"interTempoMatching"
(
getGroupsWithLevel
lvl
p
)]]
$
tail
(
getGroupsWithLevel
lvl
p
)
--------------------------------------
...
...
@@ -256,6 +244,7 @@ toLevelUp lst p = Map.toList
--------------------------------------
-- | Transpose the parent/child pointers from one level to another
transposePeriodLinks
::
Level
->
Phylo
->
Phylo
transposePeriodLinks
lvl
p
=
alterGroupWithLevel
(
\
g
->
...
...
@@ -269,6 +258,7 @@ transposePeriodLinks lvl p = alterGroupWithLevel
--------------------------------------
)
lvl
p
----------------
-- | Tracer | --
----------------
...
...
src/Gargantext/Viz/Phylo/Metrics/Proximity.hs
View file @
ff80ee2f
...
...
@@ -17,65 +17,40 @@ Portability : POSIX
module
Gargantext.Viz.Phylo.Metrics.Proximity
where
import
Data.List
(
null
,
intersect
,
union
)
import
Data.Map
(
Map
,
elems
,
unionWith
,
intersectionWith
,
intersection
,
size
)
import
Data.List
(
null
)
import
Data.Map
(
Map
,
elems
,
unionWith
,
intersectionWith
,
intersection
,
size
,
keys
)
import
Gargantext.Prelude
import
Gargantext.Viz.Phylo.Aggregates.Cooc
-- import Debug.Trace (trace)
-- | To process the weightedLogJaccard between two PhyloGroup fields
weightedLogJaccard
::
Double
->
Map
(
Int
,
Int
)
Double
->
Map
(
Int
,
Int
)
Double
->
Double
weightedLogJaccard
s
f1
f2
|
null
wUnion
=
0
|
wUnion
==
wInter
=
1
|
s
==
0
=
(
fromIntegral
$
length
wInter
)
/
(
fromIntegral
$
length
wUnion
)
|
s
>
0
=
(
sumInvLog
wInter
)
/
(
sumInvLog
wUnion
)
|
otherwise
=
(
sumLog
wInter
)
/
(
sumLog
wUnion
)
where
--------------------------------------
wInter
::
[
Double
]
wInter
=
elems
$
intersectionWith
(
+
)
f1
f2
--------------------------------------
wUnion
::
[
Double
]
wUnion
=
elems
$
unionWith
(
+
)
f1
f2
--------------------------------------
sumInvLog
::
[
Double
]
->
Double
sumInvLog
l
=
foldl
(
\
mem
x
->
mem
+
(
1
/
log
(
s
+
x
)))
0
l
--------------------------------------
sumLog
::
[
Double
]
->
Double
sumLog
l
=
foldl
(
\
mem
x
->
mem
+
log
(
s
+
x
))
0
l
--------------------------------------
sumInvLog
::
Double
->
[
Double
]
->
Double
sumInvLog
s
l
=
foldl
(
\
mem
x
->
mem
+
(
1
/
log
(
s
+
x
)))
0
l
sumLog
::
Double
->
[
Double
]
->
Double
sumLog
s
l
=
foldl
(
\
mem
x
->
mem
+
log
(
s
+
x
))
0
l
-- | To process the weightedLogJaccard between two PhyloGroup fields
weightedLogJaccard'
::
Double
->
[
Int
]
->
[
Int
]
->
Map
(
Int
,
Int
)
Double
->
Double
weightedLogJaccard'
s
idx
idx'
cooc
|
null
idxUnion
=
0
|
idxUnion
==
idxInter
=
1
|
s
==
0
=
(
fromIntegral
$
length
idxInter
)
/
(
fromIntegral
$
length
idxUnion
)
|
s
>
0
=
(
sumInvLog
wInter
)
/
(
sumInvLog
wUnion
)
|
otherwise
=
(
sumLog
wInter
)
/
(
sumLog
wUnion
)
-- | To process WeighedLogJaccard distance between to coocurency matrix
weightedLogJaccard
::
Double
->
Map
(
Int
,
Int
)
Double
->
Map
(
Int
,
Int
)
Double
->
Double
->
Double
weightedLogJaccard
sens
cooc
cooc'
nbDocs
|
null
union'
=
0
|
union'
==
inter'
=
1
|
sens
==
0
=
(
fromIntegral
$
length
$
keys
inter'
)
/
(
fromIntegral
$
length
$
keys
union'
)
|
sens
>
0
=
(
sumInvLog
sens
$
elems
wInter
)
/
(
sumInvLog
sens
$
elems
wUnion
)
|
otherwise
=
(
sumLog
sens
$
elems
wInter
)
/
(
sumLog
sens
$
elems
wUnion
)
where
--------------------------------------
wInter
::
[
Double
]
wInter
=
elems
$
getSubCooc
idxInter
cooc
--------------------------------------
wUnion
::
[
Double
]
wUnion
=
elems
$
getSubCooc
idxUnion
cooc
--------------------------------------
idxInter
::
[
Int
]
idxInter
=
intersect
idx
idx'
wInter
::
Map
(
Int
,
Int
)
Double
wInter
=
map
(
/
nbDocs
)
inter'
--------------------------------------
idxUnion
::
[
Int
]
idxUnion
=
union
idx
idx
'
wUnion
::
Map
(
Int
,
Int
)
Double
wUnion
=
map
(
/
nbDocs
)
union
'
--------------------------------------
sumInvLog
::
[
Double
]
->
Double
sumInvLog
l
=
foldl
(
\
mem
x
->
mem
+
(
1
/
log
(
s
+
x
)))
0
l
inter'
::
Map
(
Int
,
Int
)
Double
inter'
=
intersectionWith
(
+
)
cooc
cooc'
--------------------------------------
union'
::
Map
(
Int
,
Int
)
Double
union'
=
unionWith
(
+
)
cooc
cooc'
--------------------------------------
sumLog
::
[
Double
]
->
Double
sumLog
l
=
foldl
(
\
mem
x
->
mem
+
log
(
s
+
x
))
0
l
--------------------------------------
-- | To process the Hamming distance between two PhyloGroup fields
...
...
src/Gargantext/Viz/Phylo/Tools.hs
View file @
ff80ee2f
...
...
@@ -20,9 +20,9 @@ module Gargantext.Viz.Phylo.Tools
where
import
Control.Lens
hiding
(
both
,
Level
,
Empty
)
import
Data.List
(
filter
,
intersect
,
(
++
),
sort
,
null
,
tail
,
last
,
tails
,
delete
,
nub
,
sortOn
,
nubBy
)
import
Data.List
(
filter
,
intersect
,
(
++
),
sort
,
null
,
tail
,
last
,
tails
,
delete
,
nub
,
sortOn
,
nubBy
,
concat
)
import
Data.Maybe
(
mapMaybe
,
fromMaybe
)
import
Data.Map
(
Map
,
mapKeys
,
member
,
(
!
))
import
Data.Map
(
Map
,
mapKeys
,
member
,
(
!
)
,
restrictKeys
,
elems
,
empty
,
filterWithKey
,
unionWith
)
import
Data.Set
(
Set
)
import
Data.Text
(
Text
,
toLower
,
unwords
)
import
Data.Tuple.Extra
...
...
@@ -160,8 +160,8 @@ initFoundationsRoots :: [Ngrams] -> Vector Ngrams
initFoundationsRoots
l
=
Vector
.
fromList
$
map
phyloAnalyzer
l
-- | To init the base of a Phylo from a List of Periods and Foundations
initPhyloBase
::
[(
Date
,
Date
)]
->
PhyloFoundations
->
PhyloParam
->
Phylo
initPhyloBase
pds
fds
prm
=
Phylo
((
fst
.
(
head'
"initPhyloBase"
))
pds
,
(
snd
.
last
)
pds
)
fds
(
map
(
\
pd
->
initPhyloPeriod
pd
[]
)
pds
)
prm
initPhyloBase
::
[(
Date
,
Date
)]
->
PhyloFoundations
->
Map
Date
Double
->
Map
Date
(
Map
(
Int
,
Int
)
Double
)
->
PhyloParam
->
Phylo
initPhyloBase
pds
fds
nbDocs
cooc
prm
=
Phylo
((
fst
.
(
head'
"initPhyloBase"
))
pds
,
(
snd
.
last
)
pds
)
fds
(
map
(
\
pd
->
initPhyloPeriod
pd
[]
)
pds
)
nbDocs
cooc
prm
-- | To init the param of a Phylo
initPhyloParam
::
Maybe
Text
->
Maybe
Software
->
Maybe
PhyloQueryBuild
->
PhyloParam
...
...
@@ -175,6 +175,11 @@ getLastLevel p = (last . sort)
.
traverse
.
phylo_periodLevels
)
p
-- | To get all the coocurency matrix of a phylo
getPhyloCooc
::
Phylo
->
Map
Date
(
Map
(
Int
,
Int
)
Double
)
getPhyloCooc
p
=
p
^.
phylo_cooc
--------------------
-- | PhyloRoots | --
...
...
@@ -194,6 +199,11 @@ getIdxInRoots n p = case (elemIndex n (getFoundationsRoots p)) of
Nothing
->
panic
"[ERR][Viz.Phylo.Tools.getIdxInRoots] Ngrams not in foundationsRoots"
Just
idx
->
idx
getIdxInVector
::
Ngrams
->
Vector
Ngrams
->
Int
getIdxInVector
n
ns
=
case
(
elemIndex
n
ns
)
of
Nothing
->
panic
"[ERR][Viz.Phylo.Tools.getIdxInRoots] Ngrams not in foundationsRoots"
Just
idx
->
idx
--------------------
-- | PhyloGroup | --
--------------------
...
...
@@ -242,6 +252,10 @@ getGroupId :: PhyloGroup -> PhyloGroupId
getGroupId
=
_phylo_groupId
getGroupCooc
::
PhyloGroup
->
Map
(
Int
,
Int
)
Double
getGroupCooc
=
_phylo_groupCooc
-- | To get the level out of the id of a PhyloGroup
getGroupLevel
::
PhyloGroup
->
Int
getGroupLevel
=
snd
.
fst
.
getGroupId
...
...
@@ -380,10 +394,29 @@ initGroup :: [Ngrams] -> Text -> Int -> Int -> Int -> Int -> Phylo -> PhyloGroup
initGroup
ngrams
lbl
idx
lvl
from'
to'
p
=
PhyloGroup
(((
from'
,
to'
),
lvl
),
idx
)
lbl
(
sort
$
map
(
\
x
->
getIdxInRoots
x
p
)
ngrams
)
idxs
(
Map
.
empty
)
Nothing
(
getMiniCooc
(
listToFullCombi
idxs
)
(
periodsToYears
[(
from'
,
to'
)])
(
getPhyloCooc
p
))
[]
[]
[]
[]
where
idxs
=
sort
$
map
(
\
x
->
getIdxInRoots
x
p
)
ngrams
-- | To sum two coocurency Matrix
sumCooc
::
Map
(
Int
,
Int
)
Double
->
Map
(
Int
,
Int
)
Double
->
Map
(
Int
,
Int
)
Double
sumCooc
m
m'
=
unionWith
(
+
)
m
m'
-- | To build the mini cooc matrix of each group
getMiniCooc
::
[(
Int
,
Int
)]
->
Set
Date
->
Map
Date
(
Map
(
Int
,
Int
)
Double
)
->
Map
(
Int
,
Int
)
Double
getMiniCooc
pairs
years
cooc
=
filterWithKey
(
\
(
n
,
n'
)
_
->
elem
(
n
,
n'
)
pairs
)
cooc'
where
--------------------------------------
cooc'
::
Map
(
Int
,
Int
)
Double
cooc'
=
foldl
(
\
m
m'
->
sumCooc
m
m'
)
empty
$
elems
$
restrictKeys
cooc
years
--------------------------------------
---------------------
...
...
@@ -418,6 +451,11 @@ initPhyloPeriod :: PhyloPeriodId -> [PhyloLevel] -> PhyloPeriod
initPhyloPeriod
id
l
=
PhyloPeriod
id
l
-- | To transform a list of periods into a set of Dates
periodsToYears
::
[(
Date
,
Date
)]
->
Set
Date
periodsToYears
periods
=
(
Set
.
fromList
.
sort
.
concat
)
[[
d
,
d'
]
|
(
d
,
d'
)
<-
periods
]
--------------------
-- | PhyloLevel | --
--------------------
...
...
src/Gargantext/Viz/Phylo/View/Filters.hs
View file @
ff80ee2f
...
...
@@ -71,7 +71,7 @@ filterSizeBranch min' v = cleanNodesEdges v v'
where
--------------------------------------
v'
::
PhyloView
v'
=
v
&
pv_branches
%~
(
filter
(
\
b
->
(
length
$
filter
(
\
n
->
(
getBranchId
b
)
==
(
getNodeBranchId
n
))
$
getNodesInBranches
v
)
>
min'
))
v'
=
v
&
pv_branches
%~
(
filter
(
\
b
->
(
length
$
filter
(
\
n
->
(
getBranchId
b
)
==
(
getNodeBranchId
n
))
$
getNodesInBranches
v
)
>
=
min'
))
--------------------------------------
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment