Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
147
Issues
147
List
Board
Labels
Milestones
Merge Requests
6
Merge Requests
6
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
3517130e
Commit
3517130e
authored
Oct 08, 2019
by
qlobbe
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
looking at the recall
parent
406ae431
Pipeline
#586
failed with stage
Changes
4
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
83 additions
and
14 deletions
+83
-14
AdaptativePhylo.hs
src/Gargantext/Viz/AdaptativePhylo.hs
+1
-1
PhyloExport.hs
src/Gargantext/Viz/Phylo/PhyloExport.hs
+10
-4
PhyloTools.hs
src/Gargantext/Viz/Phylo/PhyloTools.hs
+22
-1
TemporalMatching.hs
src/Gargantext/Viz/Phylo/TemporalMatching.hs
+50
-8
No files found.
src/Gargantext/Viz/AdaptativePhylo.hs
View file @
3517130e
...
...
@@ -125,7 +125,7 @@ defaultConfig =
,
phyloLevel
=
1
,
phyloProximity
=
WeightedLogJaccard
10
0
0.1
,
phyloSynchrony
=
ByProximityDistribution
0
,
phyloQuality
=
Quality
1
1
,
phyloQuality
=
Quality
0.5
1
,
timeUnit
=
Year
3
1
5
,
contextualUnit
=
Fis
1
4
,
exportLabel
=
[
BranchLabel
MostInclusive
2
,
GroupLabel
MostEmergentInclusive
2
]
...
...
src/Gargantext/Viz/Phylo/PhyloExport.hs
View file @
3517130e
...
...
@@ -114,7 +114,7 @@ branchToDotNode b =
([
FillColor
[
toWColor
CornSilk
],
FontName
"Arial"
,
FontSize
40
,
Shape
Egg
,
Style
[
SItem
Bold
[]
],
Label
(
toDotLabel
$
b
^.
branch_label
)]
<>
(
metaToAttr
$
b
^.
branch_meta
)
<>
[
toAttr
"nodeType"
"branch"
,
toAttr
"branchId"
(
pack
$
show
(
snd
$
b
^.
branch_id
))
])
,
toAttr
"branchId"
(
pack
$
unwords
(
init
$
map
show
$
snd
$
b
^.
branch_id
))
])
periodToDotNode
::
(
Date
,
Date
)
->
Dot
DotId
periodToDotNode
prd
=
...
...
@@ -132,7 +132,7 @@ groupToDotNode fdt g =
<>
[
toAttr
"nodeType"
"group"
,
toAttr
"from"
(
pack
$
show
(
fst
$
g
^.
phylo_groupPeriod
))
,
toAttr
"to"
(
pack
$
show
(
snd
$
g
^.
phylo_groupPeriod
))
,
toAttr
"branchId"
(
pack
$
show
(
snd
$
g
^.
phylo_groupBranchId
))])
,
toAttr
"branchId"
(
pack
$
unwords
(
init
$
map
show
$
snd
$
g
^.
phylo_groupBranchId
))])
toDotEdge
::
DotId
->
DotId
->
Text
.
Text
->
EdgeType
->
Dot
DotId
...
...
@@ -164,8 +164,14 @@ exportToDot phylo export =
graphAttrs
(
[
Label
(
toDotLabel
$
(
phyloName
$
getConfig
phylo
))]
<>
[
FontSize
30
,
LabelLoc
VTop
,
NodeSep
1
,
RankSep
[
1
],
Rank
SameRank
,
Splines
SplineEdges
,
Overlap
ScaleOverlaps
,
Ratio
FillRatio
,
Style
[
SItem
Filled
[]
],
Color
[
toWColor
White
]
,
(
toAttr
(
fromStrict
"nbDocs"
)
$
pack
$
show
(
sum
$
elems
$
phylo
^.
phylo_timeDocs
))])
,
Style
[
SItem
Filled
[]
],
Color
[
toWColor
White
]]
-- | home made attributes
<>
[(
toAttr
(
fromStrict
"nbDocs"
)
$
pack
$
show
(
sum
$
elems
$
phylo
^.
phylo_timeDocs
))
,(
toAttr
(
fromStrict
"proxiName"
)
$
pack
$
show
(
getProximityName
$
phyloProximity
$
getConfig
phylo
))
,(
toAttr
(
fromStrict
"proxiInit"
)
$
pack
$
show
(
getProximityInit
$
phyloProximity
$
getConfig
phylo
))
,(
toAttr
(
fromStrict
"proxiStep"
)
$
pack
$
show
(
getProximityStep
$
phyloProximity
$
getConfig
phylo
))
,(
toAttr
(
fromStrict
"quaFactor"
)
$
pack
$
show
(
_qua_relevance
$
phyloQuality
$
getConfig
phylo
))
])
-- toAttr (fromStrict k) $ (pack . unwords) $ map show v
...
...
src/Gargantext/Viz/Phylo/PhyloTools.hs
View file @
3517130e
...
...
@@ -66,6 +66,9 @@ roundToStr = printf "%0.*f"
countSup
::
Double
->
[
Double
]
->
Int
countSup
s
l
=
length
$
filter
(
>
s
)
l
dropByIdx
::
Int
->
[
a
]
->
[
a
]
dropByIdx
k
l
=
take
k
l
++
drop
(
k
+
1
)
l
elemIndex'
::
Eq
a
=>
a
->
[
a
]
->
Int
elemIndex'
e
l
=
case
(
List
.
elemIndex
e
l
)
of
...
...
@@ -253,8 +256,26 @@ filterProximity :: Proximity -> Double -> Double -> Bool
filterProximity
proximity
thr
local
=
case
proximity
of
WeightedLogJaccard
_
_
_
->
local
>=
thr
Hamming
->
undefined
Hamming
->
undefined
getProximityName
::
Proximity
->
String
getProximityName
proximity
=
case
proximity
of
WeightedLogJaccard
_
_
_
->
"WLJaccard"
Hamming
->
"Hamming"
getProximityInit
::
Proximity
->
Double
getProximityInit
proximity
=
case
proximity
of
WeightedLogJaccard
_
i
_
->
i
Hamming
->
undefined
getProximityStep
::
Proximity
->
Double
getProximityStep
proximity
=
case
proximity
of
WeightedLogJaccard
_
_
s
->
s
Hamming
->
undefined
---------------
-- | Phylo | --
...
...
src/Gargantext/Viz/Phylo/TemporalMatching.hs
View file @
3517130e
...
...
@@ -285,6 +285,36 @@ toAccuracy freq term thr branches =
branches'
::
[[
PhyloGroup
]]
branches'
=
relevantBranches
term
thr
branches
toRecallWeighted
::
Double
->
[
Double
]
->
[
Double
]
toRecallWeighted
old
curr
=
let
old'
=
old
+
sum
curr
in
map
(
\
r
->
(
r
/
old'
)
*
r
)
curr
toRecall'
::
Quality
->
Map
Int
Double
->
[[
PhyloGroup
]]
->
Double
toRecall'
quality
frequency
branches
=
let
terms
=
keys
frequency
in
sum
$
map
(
\
term
->
toRecall
(
frequency
!
term
)
term
(
quality
^.
qua_minBranch
)
branches
)
terms
toPhyloQuality
::
Quality
->
Map
Int
Double
->
Double
->
[[
PhyloGroup
]]
->
Double
toPhyloQuality
quality
frequency
recall
branches
=
if
(
foldl'
(
\
acc
b
->
acc
&&
(
length
b
<
(
quality
^.
qua_minBranch
)))
True
branches
)
-- | the local phylo is composed of small branches
then
0
else
let
relevance
=
quality
^.
qua_relevance
-- | compute the F score for a given relevance
in
((
1
+
relevance
**
2
)
*
accuracy
*
recall
)
/
(((
relevance
**
2
)
*
accuracy
+
recall
))
where
terms
::
[
Int
]
terms
=
keys
frequency
-- | for each term compute the global accuracy
accuracy
::
Double
accuracy
=
sum
$
map
(
\
term
->
toAccuracy
(
frequency
!
term
)
term
(
quality
^.
qua_minBranch
)
branches
)
terms
toPhyloQuality'
::
Quality
->
Map
Int
Double
->
[[
PhyloGroup
]]
->
Double
toPhyloQuality'
quality
frequency
branches
=
...
...
@@ -295,7 +325,7 @@ toPhyloQuality' quality frequency branches =
let
relevance
=
quality
^.
qua_relevance
-- | compute the F score for a given relevance
in
((
1
+
relevance
**
2
)
*
accuracy
*
recall
)
/
(((
relevance
**
2
)
*
accuracy
+
recall
))
/
(((
relevance
**
2
)
*
accuracy
+
recall
))
where
terms
::
[
Int
]
terms
=
keys
frequency
...
...
@@ -334,8 +364,8 @@ groupsToBranches groups =
in
groups'
`
using
`
parList
rdeepseq
)
graph
recursiveMatching
::
Proximity
->
Quality
->
Map
Int
Double
->
Double
->
Int
->
[
PhyloPeriodId
]
->
Map
Date
Double
->
Double
->
[[
PhyloGroup
]]
->
[
PhyloGroup
]
recursiveMatching
proximity
qua
freq
thr
frame
periods
docs
quality
branches
=
recursiveMatching
::
Proximity
->
Quality
->
Map
Int
Double
->
Double
->
Int
->
[
PhyloPeriodId
]
->
Map
Date
Double
->
Double
->
Double
->
[[
PhyloGroup
]]
->
[
PhyloGroup
]
recursiveMatching
proximity
qua
freq
thr
frame
periods
docs
quality
oldRecall
branches
=
if
(
length
branches
==
(
length
$
concat
branches
))
then
concat
branches
else
if
thr
>=
1
...
...
@@ -347,14 +377,22 @@ recursiveMatching proximity qua freq thr frame periods docs quality branches =
True
->
concat
$
map
(
\
branches'
->
let
idx
=
fromJust
$
elemIndex
branches'
nextBranches
in
recursiveMatching
proximity
qua
freq
(
thr
+
(
getThresholdStep
proximity
))
frame
periods
docs
(
nextQualities
!!
idx
)
branches'
)
in
recursiveMatching
proximity
qua
freq
(
thr
+
(
getThresholdStep
proximity
))
frame
periods
docs
(
nextQualities
!!
idx
)
(
sum
$
dropByIdx
idx
nextRecalls
)
branches'
)
$
nextBranches
-- | failure : last step was a local maximum of quality, let's validate it (traceMatchFailure thr quality (sum nextQualities))
False
->
concat
branches
where
-- | 2) for each of the possible next branches process the phyloQuality score
nextQualities
::
[
Double
]
nextQualities
=
map
(
\
nextBranch
->
toPhyloQuality'
qua
freq
nextBranch
)
nextBranches
nextQualities
=
map
(
\
(
nextBranch
,
recall
)
->
toPhyloQuality
qua
freq
recall
nextBranch
)
$
zip
nextBranches
nextRecalls
-- nextQualities = map (\nextBranch -> toPhyloQuality' qua freq nextBranch) nextBranches
-------
nextRecalls
::
[
Double
]
nextRecalls
=
toRecallWeighted
oldRecall
$
map
(
\
nextBranch
->
toRecall'
qua
freq
nextBranch
)
nextBranches
-- | 1) for each local branch process a temporal matching then find the resulting branches
nextBranches
::
[[[
PhyloGroup
]]]
nextBranches
=
...
...
@@ -380,13 +418,17 @@ temporalMatching phylo = updatePhyloGroups 1 branches' phylo
+
(
getThresholdStep
$
phyloProximity
$
getConfig
phylo
))
(
getTimeFrame
$
timeUnit
$
getConfig
phylo
)
(
getPeriodIds
phylo
)
(
phylo
^.
phylo_timeDocs
)
quality
branches
(
phylo
^.
phylo_timeDocs
)
quality
recall
branches
-- | 3) process the quality score
quality
::
Double
quality
=
toPhyloQuality'
(
phyloQuality
$
getConfig
phylo
)
frequency
branches
quality
=
toPhyloQuality
(
phyloQuality
$
getConfig
phylo
)
frequency
recall
branches
-- quality = toPhyloQuality' (phyloQuality $ getConfig phylo) frequency branches
-------
recall
::
Double
recall
=
toRecall'
(
phyloQuality
$
getConfig
phylo
)
frequency
branches
-- | 3) process the constants of the quality score
frequency
::
Map
Int
Double
frequency
=
frequency
=
let
terms
=
ngramsInBranches
branches
in
fromList
$
map
(
\
t
->
(
t
,
((
termFreq'
t
$
concat
branches
)
/
(
sum
$
map
(
\
t'
->
termFreq'
t'
$
concat
branches
)
terms
))))
terms
-- | 2) group into branches
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment