Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
148
Issues
148
List
Board
Labels
Milestones
Merge Requests
11
Merge Requests
11
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
ff80ee2f
Commit
ff80ee2f
authored
May 24, 2019
by
qlobbe
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fix temporalMatching
parent
38d3a9cd
Pipeline
#415
failed with stage
Changes
11
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
244 additions
and
175 deletions
+244
-175
Phylo.hs
src/Gargantext/Viz/Phylo.hs
+3
-0
Cluster.hs
src/Gargantext/Viz/Phylo/Aggregates/Cluster.hs
+7
-11
Cooc.hs
src/Gargantext/Viz/Phylo/Aggregates/Cooc.hs
+41
-2
Document.hs
src/Gargantext/Viz/Phylo/Aggregates/Document.hs
+9
-4
Fis.hs
src/Gargantext/Viz/Phylo/Aggregates/Fis.hs
+2
-2
Example.hs
src/Gargantext/Viz/Phylo/Example.hs
+10
-3
LevelMaker.hs
src/Gargantext/Viz/Phylo/LevelMaker.hs
+28
-12
LinkMaker.hs
src/Gargantext/Viz/Phylo/LinkMaker.hs
+76
-86
Proximity.hs
src/Gargantext/Viz/Phylo/Metrics/Proximity.hs
+24
-49
Tools.hs
src/Gargantext/Viz/Phylo/Tools.hs
+43
-5
Filters.hs
src/Gargantext/Viz/Phylo/View/Filters.hs
+1
-1
No files found.
src/Gargantext/Viz/Phylo.hs
View file @
ff80ee2f
...
...
@@ -77,6 +77,8 @@ data Phylo =
Phylo
{
_phylo_duration
::
(
Start
,
End
)
,
_phylo_foundations
::
PhyloFoundations
,
_phylo_periods
::
[
PhyloPeriod
]
,
_phylo_docsByYears
::
Map
Date
Double
,
_phylo_cooc
::
Map
Date
(
Map
(
Int
,
Int
)
Double
)
,
_phylo_param
::
PhyloParam
}
deriving
(
Generic
,
Show
,
Eq
)
...
...
@@ -150,6 +152,7 @@ data PhyloGroup =
,
_phylo_groupNgrams
::
[
Int
]
,
_phylo_groupMeta
::
Map
Text
Double
,
_phylo_groupBranchId
::
Maybe
PhyloBranchId
,
_phylo_groupCooc
::
Map
(
Int
,
Int
)
Double
,
_phylo_groupPeriodParents
::
[
Pointer
]
,
_phylo_groupPeriodChilds
::
[
Pointer
]
...
...
src/Gargantext/Viz/Phylo/Aggregates/Cluster.hs
View file @
ff80ee2f
...
...
@@ -25,7 +25,7 @@ import Gargantext.Viz.Phylo
import
Gargantext.Viz.Phylo.Tools
import
Gargantext.Viz.Phylo.Metrics.Proximity
import
Gargantext.Viz.Phylo.Metrics.Clustering
import
Gargantext.Viz.Phylo.
Aggregates.Cooc
import
Gargantext.Viz.Phylo.
LinkMaker
import
qualified
Data.Map
as
Map
import
qualified
Data.Vector.Storable
as
VS
...
...
@@ -49,15 +49,11 @@ graphToClusters clust (nodes,edges) = case clust of
-- | To transform a list of PhyloGroups into a Graph of Proximity
groupsToGraph
::
Proximity
->
[
PhyloGroup
]
->
Map
(
Int
,
Int
)
Double
->
([
GroupNode
],[
GroupEdge
])
groupsToGraph
prox
gs
cooc
=
case
prox
of
-- WeightedLogJaccard (WLJParams _ sens) -> (gs, map (\(x,y) -> ((x,y), traceSim x y (getSubCooc (getGroupNgrams x) cooc) (getSubCooc (getGroupNgrams y) cooc) p
-- $ weightedLogJaccard sens (getSubCooc (getGroupNgrams x) cooc) (getSubCooc (getGroupNgrams y) cooc)))
-- $ getCandidates gs)
WeightedLogJaccard
(
WLJParams
_
sens
)
->
(
gs
,
map
(
\
(
x
,
y
)
->
((
x
,
y
),
weightedLogJaccard'
sens
(
getGroupNgrams
x
)
(
getGroupNgrams
y
)
cooc
))
$
getCandidates
gs
)
Hamming
(
HammingParams
_
)
->
(
gs
,
map
(
\
(
x
,
y
)
->
((
x
,
y
),
hamming
(
getSubCooc
(
getGroupNgrams
x
)
cooc
)
(
getSubCooc
(
getGroupNgrams
y
)
cooc
)))
$
getCandidates
gs
)
groupsToGraph
::
Double
->
Proximity
->
[
PhyloGroup
]
->
([
GroupNode
],[
GroupEdge
])
groupsToGraph
nbDocs
prox
gs
=
case
prox
of
WeightedLogJaccard
(
WLJParams
_
sens
)
->
(
gs
,
map
(
\
(
x
,
y
)
->
((
x
,
y
),
weightedLogJaccard
sens
(
getGroupCooc
x
)
(
getGroupCooc
y
)
nbDocs
))
$
getCandidates
gs
)
Hamming
(
HammingParams
_
)
->
(
gs
,
map
(
\
(
x
,
y
)
->
((
x
,
y
),
hamming
(
getGroupCooc
x
)
(
getGroupCooc
y
)))
$
getCandidates
gs
)
_
->
undefined
...
...
@@ -84,7 +80,7 @@ phyloToClusters lvl clus p = Map.fromList
--------------------------------------
graphs
::
[([
GroupNode
],[
GroupEdge
])]
graphs
=
traceGraph
lvl
(
getThreshold
prox
)
$
map
(
\
prd
->
groupsToGraph
prox
(
getGroupsWithFilters
lvl
prd
p
)
(
getCooc
[
prd
]
p
))
periods
$
map
(
\
prd
->
groupsToGraph
(
periodsToNbDocs
[
prd
]
p
)
prox
(
getGroupsWithFilters
lvl
prd
p
))
periods
--------------------------------------
prox
::
Proximity
prox
=
getProximity
clus
...
...
src/Gargantext/Viz/Phylo/Aggregates/Cooc.hs
View file @
ff80ee2f
...
...
@@ -17,8 +17,10 @@ Portability : POSIX
module
Gargantext.Viz.Phylo.Aggregates.Cooc
where
import
Data.List
(
union
,
concat
,
nub
)
import
Data.Map
(
Map
,
elems
,
adjust
,
filterWithKey
)
import
Data.List
(
union
,
concat
,
nub
,
sort
)
import
Data.Map
(
Map
,
elems
,
adjust
,
filterWithKey
,
fromListWith
,
fromList
,
restrictKeys
)
import
Data.Set
(
Set
)
import
Data.Vector
(
Vector
)
import
Gargantext.Prelude
import
Gargantext.Viz.Phylo
import
Gargantext.Viz.Phylo.Tools
...
...
@@ -83,5 +85,42 @@ getCooc prds p = toCooc $ map (\g -> (getGroupNgrams g,getGroupMeta "support" g)
--------------------------------------
-- | To transform a list of index into a cooc matrix
listToCooc
::
[
Int
]
->
Map
(
Int
,
Int
)
Double
listToCooc
lst
=
fromList
$
map
(
\
combi
->
(
combi
,
1
))
$
listToFullCombi
lst
-- | To transform a list of ngrams into a list of indexes
ngramsToIdx
::
[
Ngrams
]
->
Vector
Ngrams
->
[
Int
]
ngramsToIdx
ns
v
=
sort
$
map
(
\
n
->
getIdxInVector
n
v
)
ns
-- | To build the cooc matrix by years out of the corpus
docsToCooc
::
[
Document
]
->
Vector
Ngrams
->
Map
Date
(
Map
(
Int
,
Int
)
Double
)
docsToCooc
docs
fdt
=
fromListWith
sumCooc
$
map
(
\
(
d
,
l
)
->
(
d
,
listToCooc
l
))
$
map
(
\
doc
->
(
date
doc
,
ngramsToIdx
(
text
doc
)
fdt
))
docs
-- | To sum all the docs produced during a list of years
sumDocsByYears
::
Set
Date
->
Map
Date
Double
->
Double
sumDocsByYears
years
m
=
sum
$
elems
$
restrictKeys
m
years
-- | To get the cooc matrix of a group
groupToCooc
::
PhyloGroup
->
Phylo
->
Map
(
Int
,
Int
)
Double
groupToCooc
g
p
=
getMiniCooc
(
listToFullCombi
$
getGroupNgrams
g
)
(
periodsToYears
[
getGroupPeriod
g
])
(
getPhyloCooc
p
)
-- | To get the union of the cooc matrix of two groups
unionOfCooc
::
PhyloGroup
->
PhyloGroup
->
Phylo
->
Map
(
Int
,
Int
)
Double
unionOfCooc
g
g'
p
=
sumCooc
(
groupToCooc
g
p
)
(
groupToCooc
g'
p
)
-- phyloCooc :: Map (Int, Int) Double
-- phyloCooc = fisToCooc phyloFis phylo1_0_1
src/Gargantext/Viz/Phylo/Aggregates/Document.hs
View file @
ff80ee2f
...
...
@@ -17,8 +17,7 @@ Portability : POSIX
module
Gargantext.Viz.Phylo.Aggregates.Document
where
import
Data.List
(
last
)
import
Data.Map
(
Map
)
import
Data.Map
(
Map
,
fromListWith
)
import
Data.Text
(
Text
)
import
Data.Tuple
(
fst
)
import
Data.Vector
(
Vector
)
...
...
@@ -32,7 +31,7 @@ import qualified Data.Vector as Vector
-- | To init a list of Periods framed by a starting Date and an ending Date
initPeriods
::
(
Eq
date
,
Enum
date
)
=>
Grain
->
Step
->
(
date
,
date
)
->
[(
date
,
date
)]
initPeriods
g
s
(
start
,
end
)
=
map
(
\
l
->
(
head'
"Doc"
l
,
last
l
))
initPeriods
g
s
(
start
,
end
)
=
map
(
\
l
->
(
head'
"Doc"
l
,
last
'
"Doc"
l
))
$
chunkAlong
g
s
[
start
..
end
]
...
...
@@ -45,7 +44,7 @@ groupDocsByPeriod f pds es = Map.fromList $ zip pds $ map (inPeriode f es) pds
inPeriode
::
Ord
b
=>
(
t
->
b
)
->
[
t
]
->
(
b
,
b
)
->
[
t
]
inPeriode
f'
h
(
start
,
end
)
=
fst
$
List
.
partition
(
\
d
->
f'
d
>=
start
&&
f'
d
<=
end
)
h
--------------------------------------
--------------------------------------
-- | To parse a list of Documents by filtering on a Vector of Ngrams
...
...
@@ -54,4 +53,10 @@ parseDocs roots c = map (\(d,t)
->
Document
d
(
filter
(
\
x
->
Vector
.
elem
x
roots
)
$
monoTexts
t
))
c
-- | To count the number of documents by year
countDocs
::
[(
Date
,
a
)]
->
Map
Date
Double
countDocs
corpus
=
fromListWith
(
+
)
$
map
(
\
(
d
,
_
)
->
(
d
,
1
))
corpus
src/Gargantext/Viz/Phylo/Aggregates/Fis.hs
View file @
ff80ee2f
...
...
@@ -43,12 +43,12 @@ filterFis keep thr f m = case keep of
-- | To filter Fis with small Support
filterFisBySupport
::
Int
->
[
PhyloFis
]
->
[
PhyloFis
]
filterFisBySupport
thr
l
=
filter
(
\
fis
->
getSupport
fis
>
thr
)
l
filterFisBySupport
thr
l
=
filter
(
\
fis
->
getSupport
fis
>
=
thr
)
l
-- | To filter Fis with small Clique size
filterFisByClique
::
Int
->
[
PhyloFis
]
->
[
PhyloFis
]
filterFisByClique
thr
l
=
filter
(
\
fis
->
(
size
$
getClique
fis
)
>
thr
)
l
filterFisByClique
thr
l
=
filter
(
\
fis
->
(
size
$
getClique
fis
)
>
=
thr
)
l
-- | To filter nested Fis
...
...
src/Gargantext/Viz/Phylo/Example.hs
View file @
ff80ee2f
...
...
@@ -40,6 +40,7 @@ import Gargantext.Text.Context (TermList)
import
Gargantext.Viz.Phylo
import
Gargantext.Viz.Phylo.Aggregates.Cluster
import
Gargantext.Viz.Phylo.Aggregates.Document
import
Gargantext.Viz.Phylo.Aggregates.Cooc
import
Gargantext.Viz.Phylo.Aggregates.Fis
import
Gargantext.Viz.Phylo.BranchMaker
import
Gargantext.Viz.Phylo.LevelMaker
...
...
@@ -104,7 +105,7 @@ queryEx = "title=Cesar et Cleôpatre"
phyloQueryBuild
::
PhyloQueryBuild
phyloQueryBuild
=
PhyloQueryBuild
"Cesar et Cleôpatre"
"An example of Phylomemy (french without accent)"
5
3
defaultFis
[]
[]
(
WeightedLogJaccard
$
WLJParams
0.
3
10
)
2
(
RelatedComponents
$
RCParams
$
WeightedLogJaccard
$
WLJParams
0.5
0
)
5
3
defaultFis
[]
[]
(
WeightedLogJaccard
$
WLJParams
0.
1
20
)
2
(
RelatedComponents
$
RCParams
$
WeightedLogJaccard
$
WLJParams
0.3
0
)
...
...
@@ -154,7 +155,7 @@ phylo2 = addPhyloLevel 2 phyloCluster phyloBranch1
phyloCluster
::
Map
(
Date
,
Date
)
[
PhyloCluster
]
phyloCluster
=
phyloToClusters
1
(
RelatedComponents
$
RCParams
$
WeightedLogJaccard
$
WLJParams
0.05
10
)
phyloBranch1
phyloCluster
=
phyloToClusters
3
(
RelatedComponents
$
RCParams
$
WeightedLogJaccard
$
WLJParams
0.05
10
)
phyloBranch1
----------------------------------
...
...
@@ -226,7 +227,13 @@ phyloDocs = corpusToDocs corpus phyloBase
phyloBase
::
Phylo
phyloBase
=
initPhyloBase
periods
(
PhyloFoundations
foundationsRoots
termList
)
defaultPhyloParam
phyloBase
=
initPhyloBase
periods
(
PhyloFoundations
foundationsRoots
termList
)
nbDocs
cooc
defaultPhyloParam
cooc
::
Map
Date
(
Map
(
Int
,
Int
)
Double
)
cooc
=
docsToCooc
(
parseDocs
foundationsRoots
corpus
)
foundationsRoots
nbDocs
::
Map
Date
Double
nbDocs
=
countDocs
corpus
periods
::
[(
Date
,
Date
)]
periods
=
initPeriods
5
3
...
...
src/Gargantext/Viz/Phylo/LevelMaker.hs
View file @
ff80ee2f
...
...
@@ -32,6 +32,7 @@ import Gargantext.Viz.Phylo.Aggregates.Fis
import
Gargantext.Viz.Phylo.BranchMaker
import
Gargantext.Viz.Phylo.LinkMaker
import
Gargantext.Viz.Phylo.Tools
import
Gargantext.Viz.Phylo.Aggregates.Cooc
import
Gargantext.Text.Context
(
TermList
)
import
qualified
Data.Vector.Storable
as
VS
...
...
@@ -60,7 +61,7 @@ instance PhyloLevelMaker PhyloCluster
|
otherwise
=
panic
(
"[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Clusters at level < 2"
)
--------------------------------------
-- | Level -> (Date,Date) -> [Cluster] -> Map (Date,Date) [Cluster] -> Phylo -> [PhyloGroup]
toPhyloGroups
lvl
(
d
,
d'
)
l
m
_
=
map
(
\
(
idx
,
cluster
)
->
clusterToGroup
(
d
,
d'
)
lvl
idx
""
cluster
m
)
$
zip
[
1
..
]
l
toPhyloGroups
lvl
(
d
,
d'
)
l
m
p
=
map
(
\
(
idx
,
cluster
)
->
clusterToGroup
(
d
,
d'
)
lvl
idx
""
cluster
m
p
)
$
zip
[
1
..
]
l
--------------------------------------
...
...
@@ -94,9 +95,12 @@ instance PhyloLevelMaker Document
-- | To transform a Cluster into a Phylogroup
clusterToGroup
::
PhyloPeriodId
->
Level
->
Int
->
Text
->
PhyloCluster
->
Map
(
Date
,
Date
)
[
PhyloCluster
]
->
PhyloGroup
clusterToGroup
prd
lvl
idx
lbl
groups
_m
=
PhyloGroup
((
prd
,
lvl
),
idx
)
lbl
ngrams
empty
Nothing
[]
[]
[]
(
map
(
\
g
->
(
getGroupId
g
,
1
))
groups
)
clusterToGroup
::
PhyloPeriodId
->
Level
->
Int
->
Text
->
PhyloCluster
->
Map
(
Date
,
Date
)
[
PhyloCluster
]
->
Phylo
->
PhyloGroup
clusterToGroup
prd
lvl
idx
lbl
groups
_m
p
=
PhyloGroup
((
prd
,
lvl
),
idx
)
lbl
ngrams
empty
Nothing
(
getMiniCooc
(
listToFullCombi
ngrams
)
(
periodsToYears
[
prd
])
(
getPhyloCooc
p
))
[]
[]
[]
(
map
(
\
g
->
(
getGroupId
g
,
1
))
groups
)
where
--------------------------------------
ngrams
::
[
Int
]
...
...
@@ -107,7 +111,9 @@ clusterToGroup prd lvl idx lbl groups _m =
-- | To transform a Clique into a PhyloGroup
cliqueToGroup
::
PhyloPeriodId
->
Level
->
Int
->
Text
->
PhyloFis
->
Phylo
->
PhyloGroup
cliqueToGroup
prd
lvl
idx
lbl
fis
p
=
PhyloGroup
((
prd
,
lvl
),
idx
)
lbl
ngrams
(
singleton
"support"
(
fromIntegral
$
getSupport
fis
))
Nothing
[]
[]
[]
[]
PhyloGroup
((
prd
,
lvl
),
idx
)
lbl
ngrams
(
singleton
"support"
(
fromIntegral
$
getSupport
fis
))
Nothing
(
getMiniCooc
(
listToFullCombi
ngrams
)
(
periodsToYears
[
prd
])
(
getPhyloCooc
p
))
[]
[]
[]
[]
where
--------------------------------------
ngrams
::
[
Int
]
...
...
@@ -120,7 +126,9 @@ cliqueToGroup prd lvl idx lbl fis p =
-- | To transform a list of Ngrams into a PhyloGroup
ngramsToGroup
::
PhyloPeriodId
->
Level
->
Int
->
Text
->
[
Ngrams
]
->
Phylo
->
PhyloGroup
ngramsToGroup
prd
lvl
idx
lbl
ngrams
p
=
PhyloGroup
((
prd
,
lvl
),
idx
)
lbl
(
sort
$
map
(
\
x
->
getIdxInRoots
x
p
)
ngrams
)
empty
Nothing
[]
[]
[]
[]
PhyloGroup
((
prd
,
lvl
),
idx
)
lbl
(
sort
$
map
(
\
x
->
getIdxInRoots
x
p
)
ngrams
)
empty
Nothing
(
getMiniCooc
(
listToFullCombi
$
sort
$
map
(
\
x
->
getIdxInRoots
x
p
)
ngrams
)
(
periodsToYears
[
prd
])
(
getPhyloCooc
p
))
[]
[]
[]
[]
-- | To traverse a Phylo and add a new PhyloLevel linked to a new list of PhyloGroups
...
...
@@ -141,10 +149,6 @@ toNthLevel lvlMax prox clus p
|
otherwise
=
toNthLevel
lvlMax
prox
clus
$
traceBranches
(
lvl
+
1
)
$
setPhyloBranches
(
lvl
+
1
)
-- $ traceTempoMatching Descendant (lvl + 1)
-- $ interTempoMatching Descendant (lvl + 1) prox
-- $ traceTempoMatching Ascendant (lvl + 1)
-- $ interTempoMatching Ascendant (lvl + 1) prox
$
transposePeriodLinks
(
lvl
+
1
)
$
setLevelLinks
(
lvl
,
lvl
+
1
)
$
addPhyloLevel
(
lvl
+
1
)
...
...
@@ -207,8 +211,14 @@ instance PhyloMaker [(Date, Text)]
phyloBase
=
tracePhyloBase
$
toPhyloBase
q
(
initPhyloParam
(
Just
defaultPhyloVersion
)
(
Just
defaultSoftware
)
(
Just
q
))
c
roots
termList
--------------------------------------
--------------------------------------
toPhyloBase
q
p
c
roots
termList
=
initPhyloBase
periods
foundations
p
toPhyloBase
q
p
c
roots
termList
=
initPhyloBase
periods
foundations
nbDocs
cooc
p
where
--------------------------------------
cooc
::
Map
Date
(
Map
(
Int
,
Int
)
Double
)
cooc
=
docsToCooc
(
parseDocs
(
foundations
^.
phylo_foundationsRoots
)
c
)
(
foundations
^.
phylo_foundationsRoots
)
--------------------------------------
nbDocs
::
Map
Date
Double
nbDocs
=
countDocs
c
--------------------------------------
foundations
::
PhyloFoundations
foundations
=
PhyloFoundations
(
initFoundationsRoots
roots
)
termList
...
...
@@ -240,8 +250,14 @@ instance PhyloMaker [Document]
phyloBase
=
tracePhyloBase
$
toPhyloBase
q
(
initPhyloParam
(
Just
defaultPhyloVersion
)
(
Just
defaultSoftware
)
(
Just
q
))
c
roots
termList
--------------------------------------
--------------------------------------
toPhyloBase
q
p
c
roots
termList
=
initPhyloBase
periods
foundations
p
toPhyloBase
q
p
c
roots
termList
=
initPhyloBase
periods
foundations
nbDocs
cooc
p
where
--------------------------------------
cooc
::
Map
Date
(
Map
(
Int
,
Int
)
Double
)
cooc
=
docsToCooc
c
(
foundations
^.
phylo_foundationsRoots
)
--------------------------------------
nbDocs
::
Map
Date
Double
nbDocs
=
countDocs
$
map
(
\
doc
->
(
date
doc
,
text
doc
))
c
--------------------------------------
foundations
::
PhyloFoundations
foundations
=
PhyloFoundations
(
initFoundationsRoots
roots
)
termList
...
...
src/Gargantext/Viz/Phylo/LinkMaker.hs
View file @
ff80ee2f
...
...
@@ -18,14 +18,13 @@ module Gargantext.Viz.Phylo.LinkMaker
where
import
Control.Lens
hiding
(
both
,
Level
)
import
Data.List
((
++
),
sortOn
,
null
,
tail
,
splitAt
,
elem
,
concat
,
sort
,
delete
,
intersect
)
import
Data.List
((
++
),
sortOn
,
null
,
tail
,
splitAt
,
elem
,
concat
,
sort
,
delete
,
intersect
,
nub
,
groupBy
)
import
Data.Tuple.Extra
import
Data.Map
(
Map
,(
!
),
fromListWith
)
import
Data.Map
(
Map
,(
!
),
fromListWith
,
elems
,
restrictKeys
,
unionWith
)
import
Gargantext.Prelude
import
Gargantext.Viz.Phylo
import
Gargantext.Viz.Phylo.Tools
import
Gargantext.Viz.Phylo.Metrics.Proximity
import
Gargantext.Viz.Phylo.Aggregates.Cooc
import
qualified
Data.List
as
List
import
qualified
Data.Maybe
as
Maybe
import
qualified
Data.Map
as
Map
...
...
@@ -34,9 +33,9 @@ import qualified Data.Vector.Storable as VS
import
Debug.Trace
(
trace
)
import
Numeric.Statistics
(
percentile
)
--
--------------------------------------------------------------------
--
--
| Make links from Level to Level
-----------------------------
--
| From Level to level |
--
--
---------------------------
-- | To choose a LevelLink strategy based an a given Level
...
...
@@ -82,17 +81,9 @@ setLevelLinks (lvl,lvl') p = alterPhyloGroups (\gs -> map (\g -> if getGroupLeve
--------------------------------------
------------------------------------------------------------------------
-- | Make links from Period to Period
-- | To apply the corresponding proximity function based on a given Proximity
applyProximity
::
Proximity
->
PhyloGroup
->
PhyloGroup
->
Map
(
Int
,
Int
)
Double
->
(
PhyloGroupId
,
Double
)
applyProximity
prox
g1
g2
cooc
=
case
prox
of
-- WeightedLogJaccard (WLJParams _ s) -> ((getGroupId g2), weightedLogJaccard s (getSubCooc (getGroupNgrams g1) cooc) (getSubCooc (getGroupNgrams g2) cooc))
WeightedLogJaccard
(
WLJParams
_
s
)
->
((
getGroupId
g2
),
weightedLogJaccard'
s
(
getGroupNgrams
g1
)
(
getGroupNgrams
g2
)
cooc
)
Hamming
(
HammingParams
_
)
->
((
getGroupId
g2
),
hamming
(
getSubCooc
(
getGroupNgrams
g1
)
cooc
)
(
getSubCooc
(
getGroupNgrams
g2
)
cooc
))
_
->
panic
(
"[ERR][Viz.Phylo.Example.applyProximity] Proximity function not defined"
)
-------------------------------
-- | From Period to Period | --
-------------------------------
-- | To get the next or previous PhyloPeriod based on a given PhyloPeriodId
...
...
@@ -113,35 +104,57 @@ getNextPeriods to' id l = case to' of
--------------------------------------
-- | To find the best candidates regarding a given proximity
findBestCandidates'
::
Filiation
->
Int
->
Int
->
Proximity
->
[
PhyloPeriodId
]
->
[
PhyloGroup
]
->
PhyloGroup
->
Phylo
->
([
Pointer
],[
Double
])
findBestCandidates'
fil
depth
limit
prox
prds
gs
g
p
|
depth
>
limit
||
null
next
=
(
[]
,
[]
)
|
(
not
.
null
)
bestScores
=
(
take
2
bestScores
,
map
snd
scores
)
|
otherwise
=
findBestCandidates'
fil
(
depth
+
1
)
limit
prox
prds
gs
g
p
where
-- | To get the number of docs produced during a list of periods
periodsToNbDocs
::
[
PhyloPeriodId
]
->
Phylo
->
Double
periodsToNbDocs
prds
phylo
=
sum
$
elems
$
restrictKeys
(
phylo
^.
phylo_docsByYears
)
$
periodsToYears
prds
-- | To process a given Proximity
processProximity
::
Proximity
->
Map
(
Int
,
Int
)
Double
->
Map
(
Int
,
Int
)
Double
->
Double
->
Double
processProximity
proximity
cooc
cooc'
nbDocs
=
case
proximity
of
WeightedLogJaccard
(
WLJParams
_
sens
)
->
weightedLogJaccard
sens
cooc
cooc'
nbDocs
Hamming
(
HammingParams
_
)
->
hamming
cooc
cooc'
_
->
panic
"[ERR][Viz.Phylo.LinkMaker.processProximity] Unknown proximity"
-- | Find the best candidates to be time-linked with a group g1 (recursively until the limit of periods is reached)
-- | 1) find the next periods and get the mini cooc matrix of g1
-- | 2) build the pairs of candidates (single groups or tuples)
-- | 3) process the proximity mesure and select the best ones to create the pointers (ie: all the max)
findBestCandidates
::
Filiation
->
Int
->
Int
->
Proximity
->
[(
Date
,
Date
)]
->
[
PhyloGroup
]
->
PhyloGroup
->
Phylo
->
([
Pointer
],[
Double
])
findBestCandidates
filiation
depth
limit
proximity
periods
candidates
g1
phylo
|
depth
>
limit
||
null
nextPeriods
=
(
[]
,
[]
)
|
(
not
.
null
)
pointers
=
(
head'
"findBestCandidates"
$
groupBy
(
\
x
y
->
snd
x
==
snd
y
)
pointers
,
map
snd
similarities
)
|
otherwise
=
findBestCandidates
filiation
(
depth
+
1
)
limit
proximity
periods
candidates
g1
phylo
where
--------------------------------------
pointers
::
[(
PhyloGroupId
,
Double
)]
pointers
=
reverse
$
sortOn
snd
$
filter
(
\
(
_
,
score
)
->
case
proximity
of
WeightedLogJaccard
(
WLJParams
thr
_
)
->
score
>=
thr
Hamming
(
HammingParams
thr
)
->
score
<=
thr
_
->
panic
"[ERR][Viz.Phylo.LinkMaker.findBestCandidates] Unknown proximity"
)
similarities
--------------------------------------
next
::
[
PhyloPeriodId
]
next
=
take
depth
prds
similarities
::
[(
PhyloGroupId
,
Double
)]
similarities
=
concat
$
map
(
\
(
g2
,
g3
)
->
let
nbDocs
=
periodsToNbDocs
[(
getGroupPeriod
g1
),(
getGroupPeriod
g2
),(
getGroupPeriod
g3
)]
phylo
cooc2
=
getGroupCooc
g2
cooc3
=
getGroupCooc
g3
score
=
processProximity
proximity
cooc1
(
unionWith
(
+
)
cooc2
cooc3
)
nbDocs
in
nub
$
[(
getGroupId
g2
,
score
),(
getGroupId
g3
,
score
)])
pairsOfCandidates
--------------------------------------
cooc
::
Map
(
Int
,
Int
)
Double
cooc
=
getCooc
next
p
pairsOfCandidates
::
[(
PhyloGroup
,
PhyloGroup
)]
pairsOfCandidates
=
listToFullCombi
$
filter
(
\
g
->
elem
(
getGroupPeriod
g
)
nextPeriods
)
candidates
--------------------------------------
c
andidates
::
[
PhyloGroup
]
c
andidates
=
filter
(
\
g'
->
elem
(
getGroupPeriod
g'
)
next
)
gs
c
ooc1
::
Map
(
Int
,
Int
)
Double
c
ooc1
=
getGroupCooc
g1
--------------------------------------
scores
::
[(
PhyloGroupId
,
Double
)]
scores
=
map
(
\
g'
->
applyProximity
prox
g
g'
cooc
)
candidates
--------------------------------------
bestScores
::
[(
PhyloGroupId
,
Double
)]
bestScores
=
reverse
$
sortOn
snd
$
filter
(
\
(
_id
,
score
)
->
case
prox
of
WeightedLogJaccard
(
WLJParams
thr
_
)
->
score
>=
thr
Hamming
(
HammingParams
thr
)
->
score
<=
thr
Filiation
->
panic
"[ERR][Viz.Phylo.LinkMaker.findBestCandidates] Filiation"
)
scores
--------------------------------------
nextPeriods
::
[(
Date
,
Date
)]
nextPeriods
=
take
depth
periods
--------------------------------------
-- | To add some Pointer to a PhyloGroup
...
...
@@ -189,56 +202,31 @@ toBranches mem gs
--------------------------------------
-- | a init avec la [[head groups]] et la tail groups
toBranches'
::
[[[
Int
]]]
->
[[
Int
]]
->
[[[
Int
]]]
toBranches'
mem
gs
|
null
gs
=
mem
|
otherwise
=
toBranches'
mem'
$
tail
gs
where
--------------------------------------
mem'
::
[[[
Int
]]]
mem'
=
if
(
null
withHead
)
then
mem
++
[[
head'
"toBranches"
gs
]]
else
(
filter
(
\
gs'
->
not
$
elem
gs'
withHead
)
mem
)
++
[(
concat
withHead
)
++
[
head'
"toBranches"
gs
]]
--------------------------------------
withHead
::
[[[
Int
]]]
withHead
=
filter
(
\
gs'
->
(
not
.
null
)
$
intersect
(
concat
gs'
)
(
head'
"toBranches"
gs
)
)
mem
--------------------------------------
-- | To apply the intertemporal matching to Phylo at a given level
-- | To process an intertemporal matching task to a Phylo at a given level
-- | 1) split all groups (of the level) in branches (ie:related components sharing at least one ngram)
-- | 2) for each branch, for each group find the best candidates (by Filiation and Proximity) and create the corresponding pointers
-- | 3) update all the groups with the new pointers if they exist
interTempoMatching
::
Filiation
->
Level
->
Proximity
->
Phylo
->
Phylo
interTempoMatching
fil
lvl
prox
p
=
traceMatching
fil
lvl
(
getThreshold
prox
)
scores
$
updateGroups
fil
lvl
pointers
p
interTempoMatching
fil
lvl
prox
p
=
traceMatching
fil
lvl
(
getThreshold
prox
)
debug
$
updateGroups
fil
lvl
pointersMap
p
where
--------------------------------------
pointers
::
Map
PhyloGroupId
[
Pointer
]
pointers
=
Map
.
fromList
$
map
(
\
(
id
,
x
)
->
(
id
,
fst
x
))
candidates
--------------------------------------
scores
::
[
Double
]
scores
=
sort
$
concat
$
map
(
snd
.
snd
)
candidates
--------------------------------------
-- candidates' :: [(PhyloGroupId,([Pointer],[Double]))]
-- candidates' = map (\g -> ( getGroupId g, findBestCandidates' fil 1 5 prox (getNextPeriods fil (getGroupPeriod g) prds) (filterCandidates g gs) g p)) gs
debug
::
[
Double
]
debug
=
sort
$
concat
$
map
(
snd
.
snd
)
pointers
--------------------------------------
candidates
::
[(
PhyloGroupId
,([
Pointer
],[
Double
]))
]
candidates
=
concat
$
map
(
\
b
->
map
(
\
g
->
(
getGroupId
g
,
findBestCandidates'
fil
1
5
prox
(
getNextPeriods
fil
(
getGroupPeriod
g
)
prds
)
(
filterCandidates
g
gs
)
g
p
))
b
)
b
s
pointersMap
::
Map
PhyloGroupId
[
Pointer
]
pointersMap
=
Map
.
fromList
$
map
(
\
(
id
,
x
)
->
(
id
,
fst
x
))
pointer
s
--------------------------------------
gs
::
[
PhyloGroup
]
gs
=
getGroupsWithLevel
lvl
p
pointers
::
[(
PhyloGroupId
,([
Pointer
],[
Double
]))]
pointers
=
concat
$
map
(
\
branche
->
map
(
\
g
->
(
getGroupId
g
,
findBestCandidates
fil
1
5
prox
(
getNextPeriods
fil
(
getGroupPeriod
g
)
(
getPhyloPeriods
p
))
(
filterCandidates
g
branche
)
g
p
)
)
branche
)
branches
--------------------------------------
bs
::
[[
PhyloGroup
]]
bs
=
tracePreBranches
$
toBranches
[[
head'
"interTempoMatching"
gs
]]
$
tail
gs
--------------------------------------
prds
::
[
PhyloPeriodId
]
prds
=
getPhyloPeriods
p
branches
::
[[
PhyloGroup
]]
branches
=
tracePreBranches
$
toBranches
[[
head'
"interTempoMatching"
(
getGroupsWithLevel
lvl
p
)]]
$
tail
(
getGroupsWithLevel
lvl
p
)
--------------------------------------
...
...
@@ -256,6 +244,7 @@ toLevelUp lst p = Map.toList
--------------------------------------
-- | Transpose the parent/child pointers from one level to another
transposePeriodLinks
::
Level
->
Phylo
->
Phylo
transposePeriodLinks
lvl
p
=
alterGroupWithLevel
(
\
g
->
...
...
@@ -269,6 +258,7 @@ transposePeriodLinks lvl p = alterGroupWithLevel
--------------------------------------
)
lvl
p
----------------
-- | Tracer | --
----------------
...
...
src/Gargantext/Viz/Phylo/Metrics/Proximity.hs
View file @
ff80ee2f
...
...
@@ -17,65 +17,40 @@ Portability : POSIX
module
Gargantext.Viz.Phylo.Metrics.Proximity
where
import
Data.List
(
null
,
intersect
,
union
)
import
Data.Map
(
Map
,
elems
,
unionWith
,
intersectionWith
,
intersection
,
size
)
import
Data.List
(
null
)
import
Data.Map
(
Map
,
elems
,
unionWith
,
intersectionWith
,
intersection
,
size
,
keys
)
import
Gargantext.Prelude
import
Gargantext.Viz.Phylo.Aggregates.Cooc
-- import Debug.Trace (trace)
-- | To process the weightedLogJaccard between two PhyloGroup fields
weightedLogJaccard
::
Double
->
Map
(
Int
,
Int
)
Double
->
Map
(
Int
,
Int
)
Double
->
Double
weightedLogJaccard
s
f1
f2
|
null
wUnion
=
0
|
wUnion
==
wInter
=
1
|
s
==
0
=
(
fromIntegral
$
length
wInter
)
/
(
fromIntegral
$
length
wUnion
)
|
s
>
0
=
(
sumInvLog
wInter
)
/
(
sumInvLog
wUnion
)
|
otherwise
=
(
sumLog
wInter
)
/
(
sumLog
wUnion
)
where
--------------------------------------
wInter
::
[
Double
]
wInter
=
elems
$
intersectionWith
(
+
)
f1
f2
--------------------------------------
wUnion
::
[
Double
]
wUnion
=
elems
$
unionWith
(
+
)
f1
f2
--------------------------------------
sumInvLog
::
[
Double
]
->
Double
sumInvLog
l
=
foldl
(
\
mem
x
->
mem
+
(
1
/
log
(
s
+
x
)))
0
l
--------------------------------------
sumLog
::
[
Double
]
->
Double
sumLog
l
=
foldl
(
\
mem
x
->
mem
+
log
(
s
+
x
))
0
l
--------------------------------------
sumInvLog
::
Double
->
[
Double
]
->
Double
sumInvLog
s
l
=
foldl
(
\
mem
x
->
mem
+
(
1
/
log
(
s
+
x
)))
0
l
sumLog
::
Double
->
[
Double
]
->
Double
sumLog
s
l
=
foldl
(
\
mem
x
->
mem
+
log
(
s
+
x
))
0
l
-- | To process the weightedLogJaccard between two PhyloGroup fields
weightedLogJaccard'
::
Double
->
[
Int
]
->
[
Int
]
->
Map
(
Int
,
Int
)
Double
->
Double
weightedLogJaccard'
s
idx
idx'
cooc
|
null
idxUnion
=
0
|
idxUnion
==
idxInter
=
1
|
s
==
0
=
(
fromIntegral
$
length
idxInter
)
/
(
fromIntegral
$
length
idxUnion
)
|
s
>
0
=
(
sumInvLog
wInter
)
/
(
sumInvLog
wUnion
)
|
otherwise
=
(
sumLog
wInter
)
/
(
sumLog
wUnion
)
-- | To process WeighedLogJaccard distance between to coocurency matrix
weightedLogJaccard
::
Double
->
Map
(
Int
,
Int
)
Double
->
Map
(
Int
,
Int
)
Double
->
Double
->
Double
weightedLogJaccard
sens
cooc
cooc'
nbDocs
|
null
union'
=
0
|
union'
==
inter'
=
1
|
sens
==
0
=
(
fromIntegral
$
length
$
keys
inter'
)
/
(
fromIntegral
$
length
$
keys
union'
)
|
sens
>
0
=
(
sumInvLog
sens
$
elems
wInter
)
/
(
sumInvLog
sens
$
elems
wUnion
)
|
otherwise
=
(
sumLog
sens
$
elems
wInter
)
/
(
sumLog
sens
$
elems
wUnion
)
where
--------------------------------------
wInter
::
[
Double
]
wInter
=
elems
$
getSubCooc
idxInter
cooc
--------------------------------------
wUnion
::
[
Double
]
wUnion
=
elems
$
getSubCooc
idxUnion
cooc
--------------------------------------
idxInter
::
[
Int
]
idxInter
=
intersect
idx
idx'
wInter
::
Map
(
Int
,
Int
)
Double
wInter
=
map
(
/
nbDocs
)
inter'
--------------------------------------
idxUnion
::
[
Int
]
idxUnion
=
union
idx
idx
'
wUnion
::
Map
(
Int
,
Int
)
Double
wUnion
=
map
(
/
nbDocs
)
union
'
--------------------------------------
sumInvLog
::
[
Double
]
->
Double
sumInvLog
l
=
foldl
(
\
mem
x
->
mem
+
(
1
/
log
(
s
+
x
)))
0
l
inter'
::
Map
(
Int
,
Int
)
Double
inter'
=
intersectionWith
(
+
)
cooc
cooc'
--------------------------------------
union'
::
Map
(
Int
,
Int
)
Double
union'
=
unionWith
(
+
)
cooc
cooc'
--------------------------------------
sumLog
::
[
Double
]
->
Double
sumLog
l
=
foldl
(
\
mem
x
->
mem
+
log
(
s
+
x
))
0
l
--------------------------------------
-- | To process the Hamming distance between two PhyloGroup fields
...
...
src/Gargantext/Viz/Phylo/Tools.hs
View file @
ff80ee2f
...
...
@@ -20,9 +20,9 @@ module Gargantext.Viz.Phylo.Tools
where
import
Control.Lens
hiding
(
both
,
Level
,
Empty
)
import
Data.List
(
filter
,
intersect
,
(
++
),
sort
,
null
,
tail
,
last
,
tails
,
delete
,
nub
,
sortOn
,
nubBy
)
import
Data.List
(
filter
,
intersect
,
(
++
),
sort
,
null
,
tail
,
last
,
tails
,
delete
,
nub
,
sortOn
,
nubBy
,
concat
)
import
Data.Maybe
(
mapMaybe
,
fromMaybe
)
import
Data.Map
(
Map
,
mapKeys
,
member
,
(
!
))
import
Data.Map
(
Map
,
mapKeys
,
member
,
(
!
)
,
restrictKeys
,
elems
,
empty
,
filterWithKey
,
unionWith
)
import
Data.Set
(
Set
)
import
Data.Text
(
Text
,
toLower
,
unwords
)
import
Data.Tuple.Extra
...
...
@@ -160,8 +160,8 @@ initFoundationsRoots :: [Ngrams] -> Vector Ngrams
initFoundationsRoots
l
=
Vector
.
fromList
$
map
phyloAnalyzer
l
-- | To init the base of a Phylo from a List of Periods and Foundations
initPhyloBase
::
[(
Date
,
Date
)]
->
PhyloFoundations
->
PhyloParam
->
Phylo
initPhyloBase
pds
fds
prm
=
Phylo
((
fst
.
(
head'
"initPhyloBase"
))
pds
,
(
snd
.
last
)
pds
)
fds
(
map
(
\
pd
->
initPhyloPeriod
pd
[]
)
pds
)
prm
initPhyloBase
::
[(
Date
,
Date
)]
->
PhyloFoundations
->
Map
Date
Double
->
Map
Date
(
Map
(
Int
,
Int
)
Double
)
->
PhyloParam
->
Phylo
initPhyloBase
pds
fds
nbDocs
cooc
prm
=
Phylo
((
fst
.
(
head'
"initPhyloBase"
))
pds
,
(
snd
.
last
)
pds
)
fds
(
map
(
\
pd
->
initPhyloPeriod
pd
[]
)
pds
)
nbDocs
cooc
prm
-- | To init the param of a Phylo
initPhyloParam
::
Maybe
Text
->
Maybe
Software
->
Maybe
PhyloQueryBuild
->
PhyloParam
...
...
@@ -175,6 +175,11 @@ getLastLevel p = (last . sort)
.
traverse
.
phylo_periodLevels
)
p
-- | To get all the coocurency matrix of a phylo
getPhyloCooc
::
Phylo
->
Map
Date
(
Map
(
Int
,
Int
)
Double
)
getPhyloCooc
p
=
p
^.
phylo_cooc
--------------------
-- | PhyloRoots | --
...
...
@@ -194,6 +199,11 @@ getIdxInRoots n p = case (elemIndex n (getFoundationsRoots p)) of
Nothing
->
panic
"[ERR][Viz.Phylo.Tools.getIdxInRoots] Ngrams not in foundationsRoots"
Just
idx
->
idx
getIdxInVector
::
Ngrams
->
Vector
Ngrams
->
Int
getIdxInVector
n
ns
=
case
(
elemIndex
n
ns
)
of
Nothing
->
panic
"[ERR][Viz.Phylo.Tools.getIdxInRoots] Ngrams not in foundationsRoots"
Just
idx
->
idx
--------------------
-- | PhyloGroup | --
--------------------
...
...
@@ -242,6 +252,10 @@ getGroupId :: PhyloGroup -> PhyloGroupId
getGroupId
=
_phylo_groupId
getGroupCooc
::
PhyloGroup
->
Map
(
Int
,
Int
)
Double
getGroupCooc
=
_phylo_groupCooc
-- | To get the level out of the id of a PhyloGroup
getGroupLevel
::
PhyloGroup
->
Int
getGroupLevel
=
snd
.
fst
.
getGroupId
...
...
@@ -380,10 +394,29 @@ initGroup :: [Ngrams] -> Text -> Int -> Int -> Int -> Int -> Phylo -> PhyloGroup
initGroup
ngrams
lbl
idx
lvl
from'
to'
p
=
PhyloGroup
(((
from'
,
to'
),
lvl
),
idx
)
lbl
(
sort
$
map
(
\
x
->
getIdxInRoots
x
p
)
ngrams
)
idxs
(
Map
.
empty
)
Nothing
(
getMiniCooc
(
listToFullCombi
idxs
)
(
periodsToYears
[(
from'
,
to'
)])
(
getPhyloCooc
p
))
[]
[]
[]
[]
where
idxs
=
sort
$
map
(
\
x
->
getIdxInRoots
x
p
)
ngrams
-- | To sum two coocurency Matrix
sumCooc
::
Map
(
Int
,
Int
)
Double
->
Map
(
Int
,
Int
)
Double
->
Map
(
Int
,
Int
)
Double
sumCooc
m
m'
=
unionWith
(
+
)
m
m'
-- | To build the mini cooc matrix of each group
getMiniCooc
::
[(
Int
,
Int
)]
->
Set
Date
->
Map
Date
(
Map
(
Int
,
Int
)
Double
)
->
Map
(
Int
,
Int
)
Double
getMiniCooc
pairs
years
cooc
=
filterWithKey
(
\
(
n
,
n'
)
_
->
elem
(
n
,
n'
)
pairs
)
cooc'
where
--------------------------------------
cooc'
::
Map
(
Int
,
Int
)
Double
cooc'
=
foldl
(
\
m
m'
->
sumCooc
m
m'
)
empty
$
elems
$
restrictKeys
cooc
years
--------------------------------------
---------------------
...
...
@@ -418,6 +451,11 @@ initPhyloPeriod :: PhyloPeriodId -> [PhyloLevel] -> PhyloPeriod
initPhyloPeriod
id
l
=
PhyloPeriod
id
l
-- | To transform a list of periods into a set of Dates
periodsToYears
::
[(
Date
,
Date
)]
->
Set
Date
periodsToYears
periods
=
(
Set
.
fromList
.
sort
.
concat
)
[[
d
,
d'
]
|
(
d
,
d'
)
<-
periods
]
--------------------
-- | PhyloLevel | --
--------------------
...
...
src/Gargantext/Viz/Phylo/View/Filters.hs
View file @
ff80ee2f
...
...
@@ -71,7 +71,7 @@ filterSizeBranch min' v = cleanNodesEdges v v'
where
--------------------------------------
v'
::
PhyloView
v'
=
v
&
pv_branches
%~
(
filter
(
\
b
->
(
length
$
filter
(
\
n
->
(
getBranchId
b
)
==
(
getNodeBranchId
n
))
$
getNodesInBranches
v
)
>
min'
))
v'
=
v
&
pv_branches
%~
(
filter
(
\
b
->
(
length
$
filter
(
\
n
->
(
getBranchId
b
)
==
(
getNodeBranchId
n
))
$
getNodesInBranches
v
)
>
=
min'
))
--------------------------------------
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment