Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
147
Issues
147
List
Board
Labels
Milestones
Merge Requests
6
Merge Requests
6
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
ff80ee2f
Commit
ff80ee2f
authored
May 24, 2019
by
qlobbe
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fix temporalMatching
parent
38d3a9cd
Pipeline
#415
failed with stage
Changes
11
Pipelines
1
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
244 additions
and
175 deletions
+244
-175
Phylo.hs
src/Gargantext/Viz/Phylo.hs
+3
-0
Cluster.hs
src/Gargantext/Viz/Phylo/Aggregates/Cluster.hs
+7
-11
Cooc.hs
src/Gargantext/Viz/Phylo/Aggregates/Cooc.hs
+41
-2
Document.hs
src/Gargantext/Viz/Phylo/Aggregates/Document.hs
+9
-4
Fis.hs
src/Gargantext/Viz/Phylo/Aggregates/Fis.hs
+2
-2
Example.hs
src/Gargantext/Viz/Phylo/Example.hs
+10
-3
LevelMaker.hs
src/Gargantext/Viz/Phylo/LevelMaker.hs
+28
-12
LinkMaker.hs
src/Gargantext/Viz/Phylo/LinkMaker.hs
+76
-86
Proximity.hs
src/Gargantext/Viz/Phylo/Metrics/Proximity.hs
+24
-49
Tools.hs
src/Gargantext/Viz/Phylo/Tools.hs
+43
-5
Filters.hs
src/Gargantext/Viz/Phylo/View/Filters.hs
+1
-1
No files found.
src/Gargantext/Viz/Phylo.hs
View file @
ff80ee2f
...
...
@@ -77,6 +77,8 @@ data Phylo =
Phylo
{
_phylo_duration
::
(
Start
,
End
)
,
_phylo_foundations
::
PhyloFoundations
,
_phylo_periods
::
[
PhyloPeriod
]
,
_phylo_docsByYears
::
Map
Date
Double
,
_phylo_cooc
::
Map
Date
(
Map
(
Int
,
Int
)
Double
)
,
_phylo_param
::
PhyloParam
}
deriving
(
Generic
,
Show
,
Eq
)
...
...
@@ -150,6 +152,7 @@ data PhyloGroup =
,
_phylo_groupNgrams
::
[
Int
]
,
_phylo_groupMeta
::
Map
Text
Double
,
_phylo_groupBranchId
::
Maybe
PhyloBranchId
,
_phylo_groupCooc
::
Map
(
Int
,
Int
)
Double
,
_phylo_groupPeriodParents
::
[
Pointer
]
,
_phylo_groupPeriodChilds
::
[
Pointer
]
...
...
src/Gargantext/Viz/Phylo/Aggregates/Cluster.hs
View file @
ff80ee2f
...
...
@@ -25,7 +25,7 @@ import Gargantext.Viz.Phylo
import
Gargantext.Viz.Phylo.Tools
import
Gargantext.Viz.Phylo.Metrics.Proximity
import
Gargantext.Viz.Phylo.Metrics.Clustering
import
Gargantext.Viz.Phylo.
Aggregates.Cooc
import
Gargantext.Viz.Phylo.
LinkMaker
import
qualified
Data.Map
as
Map
import
qualified
Data.Vector.Storable
as
VS
...
...
@@ -49,15 +49,11 @@ graphToClusters clust (nodes,edges) = case clust of
-- | To transform a list of PhyloGroups into a Graph of Proximity
groupsToGraph
::
Proximity
->
[
PhyloGroup
]
->
Map
(
Int
,
Int
)
Double
->
([
GroupNode
],[
GroupEdge
])
groupsToGraph
prox
gs
cooc
=
case
prox
of
-- WeightedLogJaccard (WLJParams _ sens) -> (gs, map (\(x,y) -> ((x,y), traceSim x y (getSubCooc (getGroupNgrams x) cooc) (getSubCooc (getGroupNgrams y) cooc) p
-- $ weightedLogJaccard sens (getSubCooc (getGroupNgrams x) cooc) (getSubCooc (getGroupNgrams y) cooc)))
-- $ getCandidates gs)
WeightedLogJaccard
(
WLJParams
_
sens
)
->
(
gs
,
map
(
\
(
x
,
y
)
->
((
x
,
y
),
weightedLogJaccard'
sens
(
getGroupNgrams
x
)
(
getGroupNgrams
y
)
cooc
))
$
getCandidates
gs
)
Hamming
(
HammingParams
_
)
->
(
gs
,
map
(
\
(
x
,
y
)
->
((
x
,
y
),
hamming
(
getSubCooc
(
getGroupNgrams
x
)
cooc
)
(
getSubCooc
(
getGroupNgrams
y
)
cooc
)))
groupsToGraph
::
Double
->
Proximity
->
[
PhyloGroup
]
->
([
GroupNode
],[
GroupEdge
])
groupsToGraph
nbDocs
prox
gs
=
case
prox
of
WeightedLogJaccard
(
WLJParams
_
sens
)
->
(
gs
,
map
(
\
(
x
,
y
)
->
((
x
,
y
),
weightedLogJaccard
sens
(
getGroupCooc
x
)
(
getGroupCooc
y
)
nbDocs
))
$
getCandidates
gs
)
Hamming
(
HammingParams
_
)
->
(
gs
,
map
(
\
(
x
,
y
)
->
((
x
,
y
),
hamming
(
getGroupCooc
x
)
(
getGroupCooc
y
)))
$
getCandidates
gs
)
_
->
undefined
...
...
@@ -84,7 +80,7 @@ phyloToClusters lvl clus p = Map.fromList
--------------------------------------
graphs
::
[([
GroupNode
],[
GroupEdge
])]
graphs
=
traceGraph
lvl
(
getThreshold
prox
)
$
map
(
\
prd
->
groupsToGraph
prox
(
getGroupsWithFilters
lvl
prd
p
)
(
getCooc
[
prd
]
p
))
periods
$
map
(
\
prd
->
groupsToGraph
(
periodsToNbDocs
[
prd
]
p
)
prox
(
getGroupsWithFilters
lvl
prd
p
))
periods
--------------------------------------
prox
::
Proximity
prox
=
getProximity
clus
...
...
src/Gargantext/Viz/Phylo/Aggregates/Cooc.hs
View file @
ff80ee2f
...
...
@@ -17,8 +17,10 @@ Portability : POSIX
module
Gargantext.Viz.Phylo.Aggregates.Cooc
where
import
Data.List
(
union
,
concat
,
nub
)
import
Data.Map
(
Map
,
elems
,
adjust
,
filterWithKey
)
import
Data.List
(
union
,
concat
,
nub
,
sort
)
import
Data.Map
(
Map
,
elems
,
adjust
,
filterWithKey
,
fromListWith
,
fromList
,
restrictKeys
)
import
Data.Set
(
Set
)
import
Data.Vector
(
Vector
)
import
Gargantext.Prelude
import
Gargantext.Viz.Phylo
import
Gargantext.Viz.Phylo.Tools
...
...
@@ -83,5 +85,42 @@ getCooc prds p = toCooc $ map (\g -> (getGroupNgrams g,getGroupMeta "support" g)
--------------------------------------
-- | To transform a list of index into a cooc matrix
listToCooc
::
[
Int
]
->
Map
(
Int
,
Int
)
Double
listToCooc
lst
=
fromList
$
map
(
\
combi
->
(
combi
,
1
))
$
listToFullCombi
lst
-- | To transform a list of ngrams into a list of indexes
ngramsToIdx
::
[
Ngrams
]
->
Vector
Ngrams
->
[
Int
]
ngramsToIdx
ns
v
=
sort
$
map
(
\
n
->
getIdxInVector
n
v
)
ns
-- | To build the cooc matrix by years out of the corpus
docsToCooc
::
[
Document
]
->
Vector
Ngrams
->
Map
Date
(
Map
(
Int
,
Int
)
Double
)
docsToCooc
docs
fdt
=
fromListWith
sumCooc
$
map
(
\
(
d
,
l
)
->
(
d
,
listToCooc
l
))
$
map
(
\
doc
->
(
date
doc
,
ngramsToIdx
(
text
doc
)
fdt
))
docs
-- | To sum all the docs produced during a list of years
sumDocsByYears
::
Set
Date
->
Map
Date
Double
->
Double
sumDocsByYears
years
m
=
sum
$
elems
$
restrictKeys
m
years
-- | To get the cooc matrix of a group
groupToCooc
::
PhyloGroup
->
Phylo
->
Map
(
Int
,
Int
)
Double
groupToCooc
g
p
=
getMiniCooc
(
listToFullCombi
$
getGroupNgrams
g
)
(
periodsToYears
[
getGroupPeriod
g
])
(
getPhyloCooc
p
)
-- | To get the union of the cooc matrix of two groups
unionOfCooc
::
PhyloGroup
->
PhyloGroup
->
Phylo
->
Map
(
Int
,
Int
)
Double
unionOfCooc
g
g'
p
=
sumCooc
(
groupToCooc
g
p
)
(
groupToCooc
g'
p
)
-- phyloCooc :: Map (Int, Int) Double
-- phyloCooc = fisToCooc phyloFis phylo1_0_1
src/Gargantext/Viz/Phylo/Aggregates/Document.hs
View file @
ff80ee2f
...
...
@@ -17,8 +17,7 @@ Portability : POSIX
module
Gargantext.Viz.Phylo.Aggregates.Document
where
import
Data.List
(
last
)
import
Data.Map
(
Map
)
import
Data.Map
(
Map
,
fromListWith
)
import
Data.Text
(
Text
)
import
Data.Tuple
(
fst
)
import
Data.Vector
(
Vector
)
...
...
@@ -32,7 +31,7 @@ import qualified Data.Vector as Vector
-- | To init a list of Periods framed by a starting Date and an ending Date
initPeriods
::
(
Eq
date
,
Enum
date
)
=>
Grain
->
Step
->
(
date
,
date
)
->
[(
date
,
date
)]
initPeriods
g
s
(
start
,
end
)
=
map
(
\
l
->
(
head'
"Doc"
l
,
last
l
))
initPeriods
g
s
(
start
,
end
)
=
map
(
\
l
->
(
head'
"Doc"
l
,
last
'
"Doc"
l
))
$
chunkAlong
g
s
[
start
..
end
]
...
...
@@ -54,4 +53,10 @@ parseDocs roots c = map (\(d,t)
->
Document
d
(
filter
(
\
x
->
Vector
.
elem
x
roots
)
$
monoTexts
t
))
c
-- | To count the number of documents by year
countDocs
::
[(
Date
,
a
)]
->
Map
Date
Double
countDocs
corpus
=
fromListWith
(
+
)
$
map
(
\
(
d
,
_
)
->
(
d
,
1
))
corpus
src/Gargantext/Viz/Phylo/Aggregates/Fis.hs
View file @
ff80ee2f
...
...
@@ -43,12 +43,12 @@ filterFis keep thr f m = case keep of
-- | To filter Fis with small Support
filterFisBySupport
::
Int
->
[
PhyloFis
]
->
[
PhyloFis
]
filterFisBySupport
thr
l
=
filter
(
\
fis
->
getSupport
fis
>
thr
)
l
filterFisBySupport
thr
l
=
filter
(
\
fis
->
getSupport
fis
>
=
thr
)
l
-- | To filter Fis with small Clique size
filterFisByClique
::
Int
->
[
PhyloFis
]
->
[
PhyloFis
]
filterFisByClique
thr
l
=
filter
(
\
fis
->
(
size
$
getClique
fis
)
>
thr
)
l
filterFisByClique
thr
l
=
filter
(
\
fis
->
(
size
$
getClique
fis
)
>
=
thr
)
l
-- | To filter nested Fis
...
...
src/Gargantext/Viz/Phylo/Example.hs
View file @
ff80ee2f
...
...
@@ -40,6 +40,7 @@ import Gargantext.Text.Context (TermList)
import
Gargantext.Viz.Phylo
import
Gargantext.Viz.Phylo.Aggregates.Cluster
import
Gargantext.Viz.Phylo.Aggregates.Document
import
Gargantext.Viz.Phylo.Aggregates.Cooc
import
Gargantext.Viz.Phylo.Aggregates.Fis
import
Gargantext.Viz.Phylo.BranchMaker
import
Gargantext.Viz.Phylo.LevelMaker
...
...
@@ -104,7 +105,7 @@ queryEx = "title=Cesar et Cleôpatre"
phyloQueryBuild
::
PhyloQueryBuild
phyloQueryBuild
=
PhyloQueryBuild
"Cesar et Cleôpatre"
"An example of Phylomemy (french without accent)"
5
3
defaultFis
[]
[]
(
WeightedLogJaccard
$
WLJParams
0.
3
10
)
2
(
RelatedComponents
$
RCParams
$
WeightedLogJaccard
$
WLJParams
0.5
0
)
5
3
defaultFis
[]
[]
(
WeightedLogJaccard
$
WLJParams
0.
1
20
)
2
(
RelatedComponents
$
RCParams
$
WeightedLogJaccard
$
WLJParams
0.3
0
)
...
...
@@ -154,7 +155,7 @@ phylo2 = addPhyloLevel 2 phyloCluster phyloBranch1
phyloCluster
::
Map
(
Date
,
Date
)
[
PhyloCluster
]
phyloCluster
=
phyloToClusters
1
(
RelatedComponents
$
RCParams
$
WeightedLogJaccard
$
WLJParams
0.05
10
)
phyloBranch1
phyloCluster
=
phyloToClusters
3
(
RelatedComponents
$
RCParams
$
WeightedLogJaccard
$
WLJParams
0.05
10
)
phyloBranch1
----------------------------------
...
...
@@ -226,7 +227,13 @@ phyloDocs = corpusToDocs corpus phyloBase
phyloBase
::
Phylo
phyloBase
=
initPhyloBase
periods
(
PhyloFoundations
foundationsRoots
termList
)
defaultPhyloParam
phyloBase
=
initPhyloBase
periods
(
PhyloFoundations
foundationsRoots
termList
)
nbDocs
cooc
defaultPhyloParam
cooc
::
Map
Date
(
Map
(
Int
,
Int
)
Double
)
cooc
=
docsToCooc
(
parseDocs
foundationsRoots
corpus
)
foundationsRoots
nbDocs
::
Map
Date
Double
nbDocs
=
countDocs
corpus
periods
::
[(
Date
,
Date
)]
periods
=
initPeriods
5
3
...
...
src/Gargantext/Viz/Phylo/LevelMaker.hs
View file @
ff80ee2f
...
...
@@ -32,6 +32,7 @@ import Gargantext.Viz.Phylo.Aggregates.Fis
import
Gargantext.Viz.Phylo.BranchMaker
import
Gargantext.Viz.Phylo.LinkMaker
import
Gargantext.Viz.Phylo.Tools
import
Gargantext.Viz.Phylo.Aggregates.Cooc
import
Gargantext.Text.Context
(
TermList
)
import
qualified
Data.Vector.Storable
as
VS
...
...
@@ -60,7 +61,7 @@ instance PhyloLevelMaker PhyloCluster
|
otherwise
=
panic
(
"[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Clusters at level < 2"
)
--------------------------------------
-- | Level -> (Date,Date) -> [Cluster] -> Map (Date,Date) [Cluster] -> Phylo -> [PhyloGroup]
toPhyloGroups
lvl
(
d
,
d'
)
l
m
_
=
map
(
\
(
idx
,
cluster
)
->
clusterToGroup
(
d
,
d'
)
lvl
idx
""
cluster
m
)
$
zip
[
1
..
]
l
toPhyloGroups
lvl
(
d
,
d'
)
l
m
p
=
map
(
\
(
idx
,
cluster
)
->
clusterToGroup
(
d
,
d'
)
lvl
idx
""
cluster
m
p
)
$
zip
[
1
..
]
l
--------------------------------------
...
...
@@ -94,9 +95,12 @@ instance PhyloLevelMaker Document
-- | To transform a Cluster into a Phylogroup
clusterToGroup
::
PhyloPeriodId
->
Level
->
Int
->
Text
->
PhyloCluster
->
Map
(
Date
,
Date
)
[
PhyloCluster
]
->
PhyloGroup
clusterToGroup
prd
lvl
idx
lbl
groups
_m
=
PhyloGroup
((
prd
,
lvl
),
idx
)
lbl
ngrams
empty
Nothing
[]
[]
[]
(
map
(
\
g
->
(
getGroupId
g
,
1
))
groups
)
clusterToGroup
::
PhyloPeriodId
->
Level
->
Int
->
Text
->
PhyloCluster
->
Map
(
Date
,
Date
)
[
PhyloCluster
]
->
Phylo
->
PhyloGroup
clusterToGroup
prd
lvl
idx
lbl
groups
_m
p
=
PhyloGroup
((
prd
,
lvl
),
idx
)
lbl
ngrams
empty
Nothing
(
getMiniCooc
(
listToFullCombi
ngrams
)
(
periodsToYears
[
prd
])
(
getPhyloCooc
p
))
[]
[]
[]
(
map
(
\
g
->
(
getGroupId
g
,
1
))
groups
)
where
--------------------------------------
ngrams
::
[
Int
]
...
...
@@ -107,7 +111,9 @@ clusterToGroup prd lvl idx lbl groups _m =
-- | To transform a Clique into a PhyloGroup
cliqueToGroup
::
PhyloPeriodId
->
Level
->
Int
->
Text
->
PhyloFis
->
Phylo
->
PhyloGroup
cliqueToGroup
prd
lvl
idx
lbl
fis
p
=
PhyloGroup
((
prd
,
lvl
),
idx
)
lbl
ngrams
(
singleton
"support"
(
fromIntegral
$
getSupport
fis
))
Nothing
[]
[]
[]
[]
PhyloGroup
((
prd
,
lvl
),
idx
)
lbl
ngrams
(
singleton
"support"
(
fromIntegral
$
getSupport
fis
))
Nothing
(
getMiniCooc
(
listToFullCombi
ngrams
)
(
periodsToYears
[
prd
])
(
getPhyloCooc
p
))
[]
[]
[]
[]
where
--------------------------------------
ngrams
::
[
Int
]
...
...
@@ -120,7 +126,9 @@ cliqueToGroup prd lvl idx lbl fis p =
-- | To transform a list of Ngrams into a PhyloGroup
ngramsToGroup
::
PhyloPeriodId
->
Level
->
Int
->
Text
->
[
Ngrams
]
->
Phylo
->
PhyloGroup
ngramsToGroup
prd
lvl
idx
lbl
ngrams
p
=
PhyloGroup
((
prd
,
lvl
),
idx
)
lbl
(
sort
$
map
(
\
x
->
getIdxInRoots
x
p
)
ngrams
)
empty
Nothing
[]
[]
[]
[]
PhyloGroup
((
prd
,
lvl
),
idx
)
lbl
(
sort
$
map
(
\
x
->
getIdxInRoots
x
p
)
ngrams
)
empty
Nothing
(
getMiniCooc
(
listToFullCombi
$
sort
$
map
(
\
x
->
getIdxInRoots
x
p
)
ngrams
)
(
periodsToYears
[
prd
])
(
getPhyloCooc
p
))
[]
[]
[]
[]
-- | To traverse a Phylo and add a new PhyloLevel linked to a new list of PhyloGroups
...
...
@@ -141,10 +149,6 @@ toNthLevel lvlMax prox clus p
|
otherwise
=
toNthLevel
lvlMax
prox
clus
$
traceBranches
(
lvl
+
1
)
$
setPhyloBranches
(
lvl
+
1
)
-- $ traceTempoMatching Descendant (lvl + 1)
-- $ interTempoMatching Descendant (lvl + 1) prox
-- $ traceTempoMatching Ascendant (lvl + 1)
-- $ interTempoMatching Ascendant (lvl + 1) prox
$
transposePeriodLinks
(
lvl
+
1
)
$
setLevelLinks
(
lvl
,
lvl
+
1
)
$
addPhyloLevel
(
lvl
+
1
)
...
...
@@ -207,8 +211,14 @@ instance PhyloMaker [(Date, Text)]
phyloBase
=
tracePhyloBase
$
toPhyloBase
q
(
initPhyloParam
(
Just
defaultPhyloVersion
)
(
Just
defaultSoftware
)
(
Just
q
))
c
roots
termList
--------------------------------------
--------------------------------------
toPhyloBase
q
p
c
roots
termList
=
initPhyloBase
periods
foundations
p
toPhyloBase
q
p
c
roots
termList
=
initPhyloBase
periods
foundations
nbDocs
cooc
p
where
--------------------------------------
cooc
::
Map
Date
(
Map
(
Int
,
Int
)
Double
)
cooc
=
docsToCooc
(
parseDocs
(
foundations
^.
phylo_foundationsRoots
)
c
)
(
foundations
^.
phylo_foundationsRoots
)
--------------------------------------
nbDocs
::
Map
Date
Double
nbDocs
=
countDocs
c
--------------------------------------
foundations
::
PhyloFoundations
foundations
=
PhyloFoundations
(
initFoundationsRoots
roots
)
termList
...
...
@@ -240,8 +250,14 @@ instance PhyloMaker [Document]
phyloBase
=
tracePhyloBase
$
toPhyloBase
q
(
initPhyloParam
(
Just
defaultPhyloVersion
)
(
Just
defaultSoftware
)
(
Just
q
))
c
roots
termList
--------------------------------------
--------------------------------------
toPhyloBase
q
p
c
roots
termList
=
initPhyloBase
periods
foundations
p
toPhyloBase
q
p
c
roots
termList
=
initPhyloBase
periods
foundations
nbDocs
cooc
p
where
--------------------------------------
cooc
::
Map
Date
(
Map
(
Int
,
Int
)
Double
)
cooc
=
docsToCooc
c
(
foundations
^.
phylo_foundationsRoots
)
--------------------------------------
nbDocs
::
Map
Date
Double
nbDocs
=
countDocs
$
map
(
\
doc
->
(
date
doc
,
text
doc
))
c
--------------------------------------
foundations
::
PhyloFoundations
foundations
=
PhyloFoundations
(
initFoundationsRoots
roots
)
termList
...
...
src/Gargantext/Viz/Phylo/LinkMaker.hs
View file @
ff80ee2f
This diff is collapsed.
Click to expand it.
src/Gargantext/Viz/Phylo/Metrics/Proximity.hs
View file @
ff80ee2f
...
...
@@ -17,65 +17,40 @@ Portability : POSIX
module
Gargantext.Viz.Phylo.Metrics.Proximity
where
import
Data.List
(
null
,
intersect
,
union
)
import
Data.Map
(
Map
,
elems
,
unionWith
,
intersectionWith
,
intersection
,
size
)
import
Data.List
(
null
)
import
Data.Map
(
Map
,
elems
,
unionWith
,
intersectionWith
,
intersection
,
size
,
keys
)
import
Gargantext.Prelude
import
Gargantext.Viz.Phylo.Aggregates.Cooc
-- import Debug.Trace (trace)
-- | To process the weightedLogJaccard between two PhyloGroup fields
weightedLogJaccard
::
Double
->
Map
(
Int
,
Int
)
Double
->
Map
(
Int
,
Int
)
Double
->
Double
weightedLogJaccard
s
f1
f2
|
null
wUnion
=
0
|
wUnion
==
wInter
=
1
|
s
==
0
=
(
fromIntegral
$
length
wInter
)
/
(
fromIntegral
$
length
wUnion
)
|
s
>
0
=
(
sumInvLog
wInter
)
/
(
sumInvLog
wUnion
)
|
otherwise
=
(
sumLog
wInter
)
/
(
sumLog
wUnion
)
where
--------------------------------------
wInter
::
[
Double
]
wInter
=
elems
$
intersectionWith
(
+
)
f1
f2
--------------------------------------
wUnion
::
[
Double
]
wUnion
=
elems
$
unionWith
(
+
)
f1
f2
--------------------------------------
sumInvLog
::
[
Double
]
->
Double
sumInvLog
l
=
foldl
(
\
mem
x
->
mem
+
(
1
/
log
(
s
+
x
)))
0
l
--------------------------------------
sumLog
::
[
Double
]
->
Double
sumLog
l
=
foldl
(
\
mem
x
->
mem
+
log
(
s
+
x
))
0
l
--------------------------------------
sumInvLog
::
Double
->
[
Double
]
->
Double
sumInvLog
s
l
=
foldl
(
\
mem
x
->
mem
+
(
1
/
log
(
s
+
x
)))
0
l
sumLog
::
Double
->
[
Double
]
->
Double
sumLog
s
l
=
foldl
(
\
mem
x
->
mem
+
log
(
s
+
x
))
0
l
-- | To process
the weightedLogJaccard between two PhyloGroup fields
weightedLogJaccard
'
::
Double
->
[
Int
]
->
[
Int
]
->
Map
(
Int
,
Int
)
Double
->
Double
weightedLogJaccard
'
s
idx
idx'
cooc
|
null
idxUnion
=
0
|
idxUnion
==
idxInter
=
1
|
s
==
0
=
(
fromIntegral
$
length
idxInter
)
/
(
fromIntegral
$
length
idxUnion
)
|
s
>
0
=
(
sumInvLog
wInter
)
/
(
sumInvLog
wUnion
)
|
otherwise
=
(
sumLog
wInter
)
/
(
sumLog
wUnion
)
-- | To process
WeighedLogJaccard distance between to coocurency matrix
weightedLogJaccard
::
Double
->
Map
(
Int
,
Int
)
Double
->
Map
(
Int
,
Int
)
Double
->
Double
->
Double
weightedLogJaccard
sens
cooc
cooc'
nbDocs
|
null
union'
=
0
|
union'
==
inter'
=
1
|
s
ens
==
0
=
(
fromIntegral
$
length
$
keys
inter'
)
/
(
fromIntegral
$
length
$
keys
union'
)
|
s
ens
>
0
=
(
sumInvLog
sens
$
elems
wInter
)
/
(
sumInvLog
sens
$
elems
wUnion
)
|
otherwise
=
(
sumLog
sens
$
elems
wInter
)
/
(
sumLog
sens
$
elems
wUnion
)
where
--------------------------------------
wInter
::
[
Double
]
wInter
=
elems
$
getSubCooc
idxInter
cooc
--------------------------------------
wUnion
::
[
Double
]
wUnion
=
elems
$
getSubCooc
idxUnion
cooc
wInter
::
Map
(
Int
,
Int
)
Double
wInter
=
map
(
/
nbDocs
)
inter'
--------------------------------------
idxInter
::
[
Int
]
idxInter
=
intersect
idx
idx
'
wUnion
::
Map
(
Int
,
Int
)
Double
wUnion
=
map
(
/
nbDocs
)
union
'
--------------------------------------
i
dxUnion
::
[
Int
]
i
dxUnion
=
union
idx
idx
'
i
nter'
::
Map
(
Int
,
Int
)
Double
i
nter'
=
intersectionWith
(
+
)
cooc
cooc
'
--------------------------------------
sumInvLog
::
[
Double
]
->
Double
sumInvLog
l
=
foldl
(
\
mem
x
->
mem
+
(
1
/
log
(
s
+
x
)))
0
l
union'
::
Map
(
Int
,
Int
)
Double
union'
=
unionWith
(
+
)
cooc
cooc'
--------------------------------------
sumLog
::
[
Double
]
->
Double
sumLog
l
=
foldl
(
\
mem
x
->
mem
+
log
(
s
+
x
))
0
l
--------------------------------------
-- | To process the Hamming distance between two PhyloGroup fields
...
...
src/Gargantext/Viz/Phylo/Tools.hs
View file @
ff80ee2f
...
...
@@ -20,9 +20,9 @@ module Gargantext.Viz.Phylo.Tools
where
import
Control.Lens
hiding
(
both
,
Level
,
Empty
)
import
Data.List
(
filter
,
intersect
,
(
++
),
sort
,
null
,
tail
,
last
,
tails
,
delete
,
nub
,
sortOn
,
nubBy
)
import
Data.List
(
filter
,
intersect
,
(
++
),
sort
,
null
,
tail
,
last
,
tails
,
delete
,
nub
,
sortOn
,
nubBy
,
concat
)
import
Data.Maybe
(
mapMaybe
,
fromMaybe
)
import
Data.Map
(
Map
,
mapKeys
,
member
,
(
!
))
import
Data.Map
(
Map
,
mapKeys
,
member
,
(
!
)
,
restrictKeys
,
elems
,
empty
,
filterWithKey
,
unionWith
)
import
Data.Set
(
Set
)
import
Data.Text
(
Text
,
toLower
,
unwords
)
import
Data.Tuple.Extra
...
...
@@ -160,8 +160,8 @@ initFoundationsRoots :: [Ngrams] -> Vector Ngrams
initFoundationsRoots
l
=
Vector
.
fromList
$
map
phyloAnalyzer
l
-- | To init the base of a Phylo from a List of Periods and Foundations
initPhyloBase
::
[(
Date
,
Date
)]
->
PhyloFoundations
->
PhyloParam
->
Phylo
initPhyloBase
pds
fds
prm
=
Phylo
((
fst
.
(
head'
"initPhyloBase"
))
pds
,
(
snd
.
last
)
pds
)
fds
(
map
(
\
pd
->
initPhyloPeriod
pd
[]
)
pds
)
prm
initPhyloBase
::
[(
Date
,
Date
)]
->
PhyloFoundations
->
Map
Date
Double
->
Map
Date
(
Map
(
Int
,
Int
)
Double
)
->
PhyloParam
->
Phylo
initPhyloBase
pds
fds
nbDocs
cooc
prm
=
Phylo
((
fst
.
(
head'
"initPhyloBase"
))
pds
,
(
snd
.
last
)
pds
)
fds
(
map
(
\
pd
->
initPhyloPeriod
pd
[]
)
pds
)
nbDocs
cooc
prm
-- | To init the param of a Phylo
initPhyloParam
::
Maybe
Text
->
Maybe
Software
->
Maybe
PhyloQueryBuild
->
PhyloParam
...
...
@@ -175,6 +175,11 @@ getLastLevel p = (last . sort)
.
traverse
.
phylo_periodLevels
)
p
-- | To get all the coocurency matrix of a phylo
getPhyloCooc
::
Phylo
->
Map
Date
(
Map
(
Int
,
Int
)
Double
)
getPhyloCooc
p
=
p
^.
phylo_cooc
--------------------
-- | PhyloRoots | --
...
...
@@ -194,6 +199,11 @@ getIdxInRoots n p = case (elemIndex n (getFoundationsRoots p)) of
Nothing
->
panic
"[ERR][Viz.Phylo.Tools.getIdxInRoots] Ngrams not in foundationsRoots"
Just
idx
->
idx
getIdxInVector
::
Ngrams
->
Vector
Ngrams
->
Int
getIdxInVector
n
ns
=
case
(
elemIndex
n
ns
)
of
Nothing
->
panic
"[ERR][Viz.Phylo.Tools.getIdxInRoots] Ngrams not in foundationsRoots"
Just
idx
->
idx
--------------------
-- | PhyloGroup | --
--------------------
...
...
@@ -242,6 +252,10 @@ getGroupId :: PhyloGroup -> PhyloGroupId
getGroupId
=
_phylo_groupId
getGroupCooc
::
PhyloGroup
->
Map
(
Int
,
Int
)
Double
getGroupCooc
=
_phylo_groupCooc
-- | To get the level out of the id of a PhyloGroup
getGroupLevel
::
PhyloGroup
->
Int
getGroupLevel
=
snd
.
fst
.
getGroupId
...
...
@@ -380,10 +394,29 @@ initGroup :: [Ngrams] -> Text -> Int -> Int -> Int -> Int -> Phylo -> PhyloGroup
initGroup
ngrams
lbl
idx
lvl
from'
to'
p
=
PhyloGroup
(((
from'
,
to'
),
lvl
),
idx
)
lbl
(
sort
$
map
(
\
x
->
getIdxInRoots
x
p
)
ngrams
)
idxs
(
Map
.
empty
)
Nothing
(
getMiniCooc
(
listToFullCombi
idxs
)
(
periodsToYears
[(
from'
,
to'
)])
(
getPhyloCooc
p
))
[]
[]
[]
[]
where
idxs
=
sort
$
map
(
\
x
->
getIdxInRoots
x
p
)
ngrams
-- | To sum two coocurency Matrix
sumCooc
::
Map
(
Int
,
Int
)
Double
->
Map
(
Int
,
Int
)
Double
->
Map
(
Int
,
Int
)
Double
sumCooc
m
m'
=
unionWith
(
+
)
m
m'
-- | To build the mini cooc matrix of each group
getMiniCooc
::
[(
Int
,
Int
)]
->
Set
Date
->
Map
Date
(
Map
(
Int
,
Int
)
Double
)
->
Map
(
Int
,
Int
)
Double
getMiniCooc
pairs
years
cooc
=
filterWithKey
(
\
(
n
,
n'
)
_
->
elem
(
n
,
n'
)
pairs
)
cooc'
where
--------------------------------------
cooc'
::
Map
(
Int
,
Int
)
Double
cooc'
=
foldl
(
\
m
m'
->
sumCooc
m
m'
)
empty
$
elems
$
restrictKeys
cooc
years
--------------------------------------
---------------------
...
...
@@ -418,6 +451,11 @@ initPhyloPeriod :: PhyloPeriodId -> [PhyloLevel] -> PhyloPeriod
initPhyloPeriod
id
l
=
PhyloPeriod
id
l
-- | To transform a list of periods into a set of Dates
periodsToYears
::
[(
Date
,
Date
)]
->
Set
Date
periodsToYears
periods
=
(
Set
.
fromList
.
sort
.
concat
)
[[
d
,
d'
]
|
(
d
,
d'
)
<-
periods
]
--------------------
-- | PhyloLevel | --
--------------------
...
...
src/Gargantext/Viz/Phylo/View/Filters.hs
View file @
ff80ee2f
...
...
@@ -71,7 +71,7 @@ filterSizeBranch min' v = cleanNodesEdges v v'
where
--------------------------------------
v'
::
PhyloView
v'
=
v
&
pv_branches
%~
(
filter
(
\
b
->
(
length
$
filter
(
\
n
->
(
getBranchId
b
)
==
(
getNodeBranchId
n
))
$
getNodesInBranches
v
)
>
min'
))
v'
=
v
&
pv_branches
%~
(
filter
(
\
b
->
(
length
$
filter
(
\
n
->
(
getBranchId
b
)
==
(
getNodeBranchId
n
))
$
getNodesInBranches
v
)
>
=
min'
))
--------------------------------------
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment