Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
H
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Przemyslaw Kaminski
haskell-gargantext
Commits
406ae431
Commit
406ae431
authored
Oct 02, 2019
by
qlobbe
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
optimisation for temporal matching
parent
b0826576
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
193 additions
and
136 deletions
+193
-136
AdaptativePhylo.hs
src/Gargantext/Viz/AdaptativePhylo.hs
+13
-2
Example.hs
src/Gargantext/Viz/Phylo/Example.hs
+2
-2
PhyloExport.hs
src/Gargantext/Viz/Phylo/PhyloExport.hs
+8
-4
TemporalMatching.hs
src/Gargantext/Viz/Phylo/TemporalMatching.hs
+169
-127
Tools.hs
src/Gargantext/Viz/Phylo/Tools.hs
+1
-1
No files found.
src/Gargantext/Viz/AdaptativePhylo.hs
View file @
406ae431
...
...
@@ -88,7 +88,13 @@ data ContextualUnit =
Fis
{
_fis_support
::
Int
,
_fis_size
::
Int
}
deriving
(
Show
,
Generic
,
Eq
)
deriving
(
Show
,
Generic
,
Eq
)
data
Quality
=
Quality
{
_qua_relevance
::
Double
,
_qua_minBranch
::
Int
}
deriving
(
Show
,
Generic
,
Eq
)
data
Config
=
...
...
@@ -100,6 +106,7 @@ data Config =
,
phyloLevel
::
Int
,
phyloProximity
::
Proximity
,
phyloSynchrony
::
Synchrony
,
phyloQuality
::
Quality
,
timeUnit
::
TimeUnit
,
contextualUnit
::
ContextualUnit
,
exportLabel
::
[
PhyloLabel
]
...
...
@@ -118,8 +125,9 @@ defaultConfig =
,
phyloLevel
=
1
,
phyloProximity
=
WeightedLogJaccard
10
0
0.1
,
phyloSynchrony
=
ByProximityDistribution
0
,
phyloQuality
=
Quality
1
1
,
timeUnit
=
Year
3
1
5
,
contextualUnit
=
Fis
2
4
,
contextualUnit
=
Fis
1
4
,
exportLabel
=
[
BranchLabel
MostInclusive
2
,
GroupLabel
MostEmergentInclusive
2
]
,
exportSort
=
ByHierarchy
,
exportFilter
=
[
ByBranchSize
2
]
...
...
@@ -147,6 +155,8 @@ instance FromJSON Filter
instance
ToJSON
Filter
instance
FromJSON
Synchrony
instance
ToJSON
Synchrony
instance
FromJSON
Quality
instance
ToJSON
Quality
-- | Software parameters
...
...
@@ -362,6 +372,7 @@ data PhyloExport =
makeLenses
''
C
onfig
makeLenses
''
P
roximity
makeLenses
''
Q
uality
makeLenses
''
C
ontextualUnit
makeLenses
''
P
hyloLabel
makeLenses
''
T
imeUnit
...
...
src/Gargantext/Viz/Phylo/Example.hs
View file @
406ae431
...
...
@@ -83,7 +83,7 @@ queryViewEx = "level=3"
phyloQueryView
::
PhyloQueryView
phyloQueryView
=
PhyloQueryView
2
Merge
False
2
[
BranchAge
,
BranchBirth
,
BranchGroups
]
[]
[
BranchPeakInc
,
GroupLabelIncDyn
]
(
Just
(
ByBranchBirth
,
Asc
))
Json
Flat
True
phyloQueryView
=
PhyloQueryView
1
Merge
False
1
[
BranchAge
,
BranchBirth
,
BranchGroups
]
[]
[
BranchPeakInc
,
GroupLabelIncDyn
]
(
Just
(
ByBranchBirth
,
Asc
))
Json
Flat
True
--------------------------------------------------
...
...
@@ -110,7 +110,7 @@ queryEx = "title=Cesar et Cleôpatre"
phyloQueryBuild
::
PhyloQueryBuild
phyloQueryBuild
=
PhyloQueryBuild
"Cesar et Cleôpatre"
"An example of Phylomemy (french without accent)"
3
1
defaultFis
[]
[]
(
WeightedLogJaccard
$
WLJParams
0.
5
20
)
5
0.8
0.5
4
2
(
RelatedComponents
$
RCParams
$
WeightedLogJaccard
$
WLJParams
0.3
0
)
3
1
defaultFis
[]
[]
(
WeightedLogJaccard
$
WLJParams
0.
9
10
)
5
0.8
0.5
4
1
(
RelatedComponents
$
RCParams
$
WeightedLogJaccard
$
WLJParams
0.3
0
)
...
...
src/Gargantext/Viz/Phylo/PhyloExport.hs
View file @
406ae431
...
...
@@ -156,6 +156,8 @@ mergePointers groups =
exportToDot
::
Phylo
->
PhyloExport
->
DotGraph
DotId
exportToDot
phylo
export
=
trace
(
"
\n
-- | Convert "
<>
show
(
length
$
export
^.
export_branches
)
<>
" branches and "
<>
show
(
length
$
export
^.
export_groups
)
<>
" groups to a dot file
\n
"
)
$
digraph
((
Str
.
fromStrict
)
$
(
phyloName
$
getConfig
phylo
))
$
do
-- | 1) init the dot graph
...
...
@@ -238,10 +240,12 @@ filterByBranchSize thr export =
&
export_groups
%~
(
filter
(
\
g
->
not
$
elem
(
g
^.
phylo_groupBranchId
)
(
map
_branch_id
$
snd
branches'
)))
processFilters
::
[
Filter
]
->
PhyloExport
->
PhyloExport
processFilters
filters
export
=
processFilters
::
[
Filter
]
->
Quality
->
PhyloExport
->
PhyloExport
processFilters
filters
qua
export
=
foldl
(
\
export'
f
->
case
f
of
ByBranchSize
thr
->
filterByBranchSize
thr
export'
ByBranchSize
thr
->
if
(
thr
<
(
fromIntegral
$
qua
^.
qua_minBranch
))
then
filterByBranchSize
(
fromIntegral
$
qua
^.
qua_minBranch
)
export'
else
filterByBranchSize
thr
export'
)
export
filters
--------------
...
...
@@ -439,7 +443,7 @@ processDynamics groups =
toPhyloExport
::
Phylo
->
DotGraph
DotId
toPhyloExport
phylo
=
exportToDot
phylo
$
processFilters
(
exportFilter
$
getConfig
phylo
)
$
processFilters
(
exportFilter
$
getConfig
phylo
)
(
phyloQuality
$
getConfig
phylo
)
$
processSort
(
exportSort
$
getConfig
phylo
)
$
processLabels
(
exportLabel
$
getConfig
phylo
)
(
getRoots
phylo
)
$
processMetrics
export
...
...
src/Gargantext/Viz/Phylo/TemporalMatching.hs
View file @
406ae431
...
...
@@ -15,14 +15,14 @@ Portability : POSIX
module
Gargantext.Viz.Phylo.TemporalMatching
where
import
Data.List
(
concat
,
splitAt
,
tail
,
sortOn
,
(
++
),
intersect
,
null
,
inits
,
groupBy
,
scanl
,
nub
,
union
,
elemIndex
,
(
!!
),
dropWhile
,
partition
)
import
Data.Map
(
Map
,
fromList
,
elems
,
restrictKeys
,
unionWith
,
intersectionWith
,
findWithDefault
,
filterWithKey
)
import
Data.List
(
concat
,
splitAt
,
tail
,
sortOn
,
(
++
),
intersect
,
null
,
inits
,
groupBy
,
scanl
,
nub
,
union
,
elemIndex
,
(
!!
),
dropWhile
)
import
Data.Map
(
Map
,
fromList
,
elems
,
restrictKeys
,
unionWith
,
intersectionWith
,
findWithDefault
,
filterWithKey
,
keys
,
(
!
)
)
import
Gargantext.Prelude
import
Gargantext.Viz.AdaptativePhylo
import
Gargantext.Viz.Phylo.PhyloTools
import
Prelude
(
logBase
)
--
import Prelude (logBase)
import
Control.Lens
hiding
(
Level
)
import
Control.Parallel.Strategies
(
parList
,
rdeepseq
,
using
)
-- import Debug.Trace (trace)
...
...
@@ -98,90 +98,84 @@ toProximity docs proximity ego target target' =
-- | Local Matching | --
------------------------
toLastPeriod
::
Filiation
->
[
PhyloPeriodId
]
->
PhyloPeriodId
toLastPeriod
fil
periods
=
case
fil
of
ToParents
->
head'
"toLastPeriod"
(
sortOn
fst
periods
)
ToChilds
->
last'
"toLastPeriod"
(
sortOn
fst
periods
)
toLazyPairs
::
[
Pointer
]
->
Filiation
->
Double
->
Proximity
->
PhyloPeriodId
->
[(
PhyloGroup
,
PhyloGroup
)]
->
[(
PhyloGroup
,
PhyloGroup
)]
toLazyPairs
pointers
fil
thr
prox
prd
pairs
=
if
null
pointers
then
pairs
else
let
rest
=
filterPointers
prox
thr
pointers
in
if
null
rest
then
let
prd'
=
toLastPeriod
fil
(
map
(
fst
.
fst
.
fst
)
pointers
)
in
if
prd'
==
prd
then
[]
else
filter
(
\
(
g
,
g'
)
->
case
fil
of
ToParents
->
((
fst
$
g
^.
phylo_groupPeriod
)
<
(
fst
prd'
))
||
((
fst
$
g'
^.
phylo_groupPeriod
)
<
(
fst
prd'
))
ToChilds
->
((
fst
$
g
^.
phylo_groupPeriod
)
>
(
fst
prd'
))
||
((
fst
$
g'
^.
phylo_groupPeriod
)
>
(
fst
prd'
)))
pairs
else
[]
-- | Find pairs of valuable candidates to be matched
makePairs
::
[
PhyloGroup
]
->
[
PhyloPeriodId
]
->
[
PhyloPeriodId
]
->
[(
PhyloGroup
,
PhyloGroup
)]
makePairs
candidates
periods
periods'
=
case
null
periods
of
True
->
[]
-- | at least on of the pair candidates should be from the last added period
False
->
filter
(
\
(
cdt
,
cdt'
)
->
((
inLastPeriod
cdt
periods
)
||
(
inLastPeriod
cdt'
periods
))
&&
(
not
$
inOldPeriods
cdt
periods'
)
&&
(
not
$
inOldPeriods
cdt'
periods'
))
$
listToKeys
candidates
makePairs'
::
PhyloGroup
->
[
PhyloGroup
]
->
[
PhyloPeriodId
]
->
[
Pointer
]
->
Filiation
->
Double
->
Proximity
->
Map
Date
Double
->
[(
PhyloGroup
,
PhyloGroup
)]
makePairs'
ego
candidates
periods
pointers
fil
thr
prox
docs
=
case
null
periods
of
True
->
[]
False
->
toLazyPairs
pointers
fil
thr
prox
lastPrd
-- | at least on of the pair candidates should be from the last added period
$
filter
(
\
(
g
,
g'
)
->
((
g
^.
phylo_groupPeriod
)
==
lastPrd
)
||
((
g'
^.
phylo_groupPeriod
)
==
lastPrd
))
$
listToKeys
$
filter
(
\
g
->
(
g
^.
phylo_groupPeriod
==
lastPrd
)
||
((
toProximity
docs
prox
ego
ego
g
)
>=
thr
))
candidates
where
inLastPeriod
::
PhyloGroup
->
[
PhyloPeriodId
]
->
Bool
inLastPeriod
g
prds
=
(
g
^.
phylo_groupPeriod
)
==
(
last'
"makePairs"
prds
)
--------------------------------------
inOldPeriods
::
PhyloGroup
->
[
PhyloPeriodId
]
->
Bool
inOldPeriods
g
prds
=
elem
(
g
^.
phylo_groupPeriod
)
prds
lastPrd
::
PhyloPeriodId
lastPrd
=
toLastPeriod
fil
periods
keepOldOnes
::
Filiation
->
Proximity
->
Double
->
PhyloGroup
->
Bool
keepOldOnes
fil
proxi
thr
ego
=
any
(
\
(
_
,
w
)
->
filterProximity
proxi
thr
w
)
$
getPeriodPointers
fil
ego
filterPointers
::
Proximity
->
Double
->
[
Pointer
]
->
[
Pointer
]
filterPointers
proxi
thr
pts
=
filter
(
\
(
_
,
w
)
->
filterProximity
proxi
thr
w
)
pts
findLastPeriod
::
Filiation
->
[
Pointer
]
->
PhyloPeriodId
findLastPeriod
fil
pts
=
case
fil
of
ToParents
->
head'
"findLastPeriod"
$
sortOn
fst
$
map
(
fst
.
fst
.
fst
)
pts
ToChilds
->
head'
"findLastPeriod"
$
reverse
$
sortOn
fst
$
map
(
fst
.
fst
.
fst
)
pts
phyloGroupMatching
::
[[
PhyloGroup
]]
->
Filiation
->
Proximity
->
Map
Date
Double
->
Double
->
PhyloGroup
->
PhyloGroup
phyloGroupMatching
candidates
fil
proxi
docs
thr
ego
=
if
keepOldOnes
fil
proxi
thr
ego
-- | keep some of the old pointers
then
addPointers
ego
fil
TemporalPointer
$
filterPointers
proxi
thr
$
getPeriodPointers
fil
ego
else
case
null
pointers
of
case
null
nextPointers
of
-- | let's find new pointers
True
->
addPointers
ego
fil
TemporalPointer
[]
True
->
if
null
$
filterPointers
proxi
thr
$
getPeriodPointers
fil
ego
then
addPointers
ego
fil
TemporalPointer
[]
-- | or keep the old ones
else
addPointers
ego
fil
TemporalPointer
$
filterPointers
proxi
thr
$
getPeriodPointers
fil
ego
False
->
addPointers
ego
fil
TemporalPointer
$
head'
"phyloGroupMatching"
-- | Keep only the best set of pointers grouped by proximity
$
groupBy
(
\
pt
pt'
->
snd
pt
==
snd
pt'
)
$
reverse
$
sortOn
snd
$
head'
"pointers"
pointers
$
reverse
$
sortOn
snd
$
head'
"pointers"
$
nextPointers
-- | Find the first time frame where at leats one pointer satisfies the proximity threshold
where
--------------------------------------
oldPeriods
::
[
PhyloPeriodId
]
->
[
PhyloPeriodId
]
oldPeriods
periods
=
if
(
null
$
getPeriodPointers
fil
ego
)
then
[]
else
let
period
=
findLastPeriod
fil
$
getPeriodPointers
fil
ego
in
fst
$
partition
(
\
prd
->
case
fil
of
ToChilds
->
prd
<=
period
ToParents
->
prd
>=
period
)
periods
--------------------------------------
pointers
::
[[
Pointer
]]
pointers
=
take
1
nextPointers
::
[[
Pointer
]]
nextPointers
=
take
1
$
dropWhile
(
null
)
-- | for each time frame, process the proximity on relevant pairs of targeted groups
$
scanl
(
\
acc
groups
->
let
periods
=
nub
$
concat
$
map
(
\
gs
->
if
null
gs
then
[]
else
[
_phylo_groupPeriod
$
head'
"pointers"
gs
])
groups
periods'
=
oldPeriods
periods
pairs
=
makePairs
(
concat
groups
)
periods
periods'
in
acc
++
(
filterPointers
proxi
thr
$
concat
$
map
(
\
(
c
,
c'
)
->
-- | process the proximity between the current group and a pair of candidates
let
proximity
=
toProximity
(
filterDocs
docs
([
ego
^.
phylo_groupPeriod
]
++
periods
))
proxi
ego
c
c'
in
if
(
c
==
c'
)
then
[(
getGroupId
c
,
proximity
)]
else
[(
getGroupId
c
,
proximity
),(
getGroupId
c'
,
proximity
)]
)
pairs
)
)
[]
-- | groups from [[1900],[1900,1901],[1900,1901,1902],...]
$
inits
candidates
let
periods
=
nub
$
map
_phylo_groupPeriod
$
concat
groups
docs'
=
(
filterDocs
docs
([
ego
^.
phylo_groupPeriod
]
++
periods
))
pairs
=
makePairs'
ego
(
concat
groups
)
periods
(
getPeriodPointers
fil
ego
)
fil
thr
proxi
docs
in
acc
++
(
filterPointers
proxi
thr
$
concat
$
map
(
\
(
c
,
c'
)
->
-- | process the proximity between the current group and a pair of candidates
let
proximity
=
toProximity
docs'
proxi
ego
c
c'
in
if
(
c
==
c'
)
then
[(
getGroupId
c
,
proximity
)]
else
[(
getGroupId
c
,
proximity
),(
getGroupId
c'
,
proximity
)]
)
pairs
))
[]
$
inits
candidates
-- | groups from [[1900],[1900,1901],[1900,1901,1902],...]
filterDocs
::
Map
Date
Double
->
[
PhyloPeriodId
]
->
Map
Date
Double
...
...
@@ -215,18 +209,22 @@ getCandidates fil ego targets =
phyloBranchMatching
::
Int
->
[
PhyloPeriodId
]
->
Proximity
->
Double
->
Map
Date
Double
->
[
PhyloGroup
]
->
[
PhyloGroup
]
phyloBranchMatching
frame
periods
proximity
thr
docs
branch
=
traceBranchMatching
proximity
thr
$
matchByPeriods
ToParents
$
groupByField
_phylo_groupPeriod
$
matchByPeriods
ToChilds
--
$ matchByPeriods ToParents
--
$ groupByField _phylo_groupPeriod
$
matchByPeriods
$
groupByField
_phylo_groupPeriod
branch
where
--------------------------------------
matchByPeriods
::
Filiation
->
Map
PhyloPeriodId
[
PhyloGroup
]
->
[
PhyloGroup
]
matchByPeriods
fil
branch'
=
foldl'
(
\
acc
prd
->
let
periods'
=
getNextPeriods
fil
frame
prd
periods
candidates
=
map
(
\
prd'
->
findWithDefault
[]
prd'
branch'
)
periods'
docs'
=
filterDocs
docs
([
prd
]
++
periods'
)
egos
=
map
(
\
g
->
phyloGroupMatching
(
getCandidates
fil
g
candidates
)
fil
proximity
docs'
thr
g
)
matchByPeriods
::
Map
PhyloPeriodId
[
PhyloGroup
]
->
[
PhyloGroup
]
matchByPeriods
branch'
=
foldl'
(
\
acc
prd
->
let
periodsPar
=
getNextPeriods
ToParents
frame
prd
periods
periodsChi
=
getNextPeriods
ToChilds
frame
prd
periods
candidatesPar
=
map
(
\
prd'
->
findWithDefault
[]
prd'
branch'
)
periodsPar
candidatesChi
=
map
(
\
prd'
->
findWithDefault
[]
prd'
branch'
)
periodsChi
docsPar
=
filterDocs
docs
([
prd
]
++
periodsPar
)
docsChi
=
filterDocs
docs
([
prd
]
++
periodsChi
)
egos
=
map
(
\
ego
->
phyloGroupMatching
(
getCandidates
ToParents
ego
candidatesPar
)
ToParents
proximity
docsPar
thr
$
phyloGroupMatching
(
getCandidates
ToChilds
ego
candidatesChi
)
ToChilds
proximity
docsChi
thr
ego
)
$
findWithDefault
[]
prd
branch'
egos'
=
egos
`
using
`
parList
rdeepseq
in
acc
++
egos'
)
[]
periods
...
...
@@ -237,48 +235,78 @@ phyloBranchMatching frame periods proximity thr docs branch = traceBranchMatchin
-----------------------
termFreq
::
Int
->
[[
PhyloGroup
]]
->
Double
termFreq
term
branches
=
(
sum
$
map
(
\
g
->
findWithDefault
0
(
term
,
term
)
(
g
^.
phylo_groupCooc
))
$
concat
branches
)
/
(
sum
$
map
(
\
g
->
getTrace
$
g
^.
phylo_groupCooc
)
$
concat
branches
)
entropy
::
[[
PhyloGroup
]]
->
Double
entropy
branches
=
let
terms
=
ngramsInBranches
branches
in
sum
$
map
(
\
term
->
(
1
/
log
(
termFreq
term
branches
))
/
(
sum
$
map
(
\
branch
->
1
/
log
(
termFreq
term
[
branch
]))
branches
)
*
(
sum
$
map
(
\
branch
->
let
q
=
branchObs
term
(
length
$
concat
branches
)
branch
in
if
(
q
==
0
)
then
0
else
-
q
*
logBase
2
q
)
branches
)
)
terms
where
-- | Probability to observe a branch given a random term of the phylo
branchObs
::
Int
->
Int
->
[
PhyloGroup
]
->
Double
branchObs
term
total
branch
=
(
fromIntegral
$
length
$
filter
(
\
g
->
elem
term
$
g
^.
phylo_groupNgrams
)
branch
)
/
(
fromIntegral
total
)
homogeneity
::
[[
PhyloGroup
]]
->
Double
homogeneity
branches
=
let
nbGroups
=
length
$
concat
branches
in
sum
$
map
(
\
branch
->
(
if
(
length
branch
==
nbGroups
)
then
1
else
(
1
/
log
(
branchCov
branch
nbGroups
))
/
(
sum
$
map
(
\
branch'
->
1
/
log
(
branchCov
branch'
nbGroups
))
branches
))
*
(
sum
$
map
(
\
term
->
(
termFreq
term
branches
)
/
(
sum
$
map
(
\
term'
->
termFreq
term'
branches
)
$
ngramsInBranches
[
branch
])
*
(
fromIntegral
$
sum
$
ngramsInBranches
[
filter
(
\
g
->
elem
term
$
g
^.
phylo_groupNgrams
)
branch
])
/
(
fromIntegral
$
sum
$
ngramsInBranches
[
branch
])
)
$
ngramsInBranches
[
branch
])
)
branches
count
::
Eq
a
=>
a
->
[
a
]
->
Int
count
x
=
length
.
filter
(
==
x
)
termFreq'
::
Int
->
[
PhyloGroup
]
->
Double
termFreq'
term
groups
=
let
ngrams
=
concat
$
map
_phylo_groupNgrams
groups
in
(
fromIntegral
$
count
term
ngrams
)
/
(
fromIntegral
$
length
ngrams
)
relevantBranches
::
Int
->
Int
->
[[
PhyloGroup
]]
->
[[
PhyloGroup
]]
relevantBranches
term
thr
branches
=
filter
(
\
groups
->
(
length
groups
>=
thr
)
&&
(
any
(
\
group
->
elem
term
$
group
^.
phylo_groupNgrams
)
groups
))
branches
branchCov'
::
[
PhyloGroup
]
->
[[
PhyloGroup
]]
->
Double
branchCov'
branch
branches
=
(
fromIntegral
$
length
branch
)
/
(
fromIntegral
$
length
$
concat
branches
)
toRecall
::
Double
->
Int
->
Int
->
[[
PhyloGroup
]]
->
Double
toRecall
freq
term
thr
branches
=
-- | given a random term in a phylo
freq
-- | for each relevant branches
*
(
sum
$
map
(
\
branch
->
-- | given its local coverage
((
branchCov'
branch
branches'
)
/
(
sum
$
map
(
\
b
->
branchCov'
b
branches'
)
branches'
))
-- | compute the local recall
*
(
(
fromIntegral
$
length
$
filter
(
\
group
->
elem
term
$
group
^.
phylo_groupNgrams
)
branch
)
/
(
fromIntegral
$
length
$
filter
(
\
group
->
elem
term
$
group
^.
phylo_groupNgrams
)
$
concat
branches'
)))
branches'
)
where
branches'
::
[[
PhyloGroup
]]
branches'
=
relevantBranches
term
thr
branches
toAccuracy
::
Double
->
Int
->
Int
->
[[
PhyloGroup
]]
->
Double
toAccuracy
freq
term
thr
branches
=
-- | given a random term in a phylo
freq
-- | for each relevant branches
*
(
sum
$
map
(
\
branch
->
-- | given its local coverage
((
branchCov'
branch
branches'
)
/
(
sum
$
map
(
\
b
->
branchCov'
b
branches'
)
branches'
))
-- | compute the local accuracy
*
(
(
fromIntegral
$
length
$
filter
(
\
group
->
elem
term
$
group
^.
phylo_groupNgrams
)
branch
)
/
(
fromIntegral
$
length
branch
)))
branches'
)
where
branchCov
::
[
PhyloGroup
]
->
Int
->
Double
branchCov
branch
total
=
(
fromIntegral
$
length
branch
)
/
(
fromIntegral
total
)
branches'
::
[[
PhyloGroup
]]
branches'
=
relevantBranches
term
thr
branches
toPhyloQuality'
::
Quality
->
Map
Int
Double
->
[[
PhyloGroup
]]
->
Double
toPhyloQuality'
quality
frequency
branches
=
if
(
foldl'
(
\
acc
b
->
acc
&&
(
length
b
<
(
quality
^.
qua_minBranch
)))
True
branches
)
-- | the local phylo is composed of small branches
then
0
else
let
relevance
=
quality
^.
qua_relevance
-- | compute the F score for a given relevance
in
((
1
+
relevance
**
2
)
*
accuracy
*
recall
)
/
(((
relevance
**
2
)
*
accuracy
+
recall
))
where
terms
::
[
Int
]
terms
=
keys
frequency
-- | for each term compute the global accuracy
accuracy
::
Double
accuracy
=
sum
$
map
(
\
term
->
toAccuracy
(
frequency
!
term
)
term
(
quality
^.
qua_minBranch
)
branches
)
terms
-- | for each term compute the global recall
recall
::
Double
recall
=
sum
$
map
(
\
term
->
toRecall
(
frequency
!
term
)
term
(
quality
^.
qua_minBranch
)
branches
)
terms
toPhyloQuality
::
[[
PhyloGroup
]]
->
Double
toPhyloQuality
branches
=
sqrt
(
homogeneity
branches
/
entropy
branches
)
-----------------------------
...
...
@@ -289,37 +317,44 @@ toPhyloQuality branches = sqrt (homogeneity branches / entropy branches)
groupsToBranches
::
Map
PhyloGroupId
PhyloGroup
->
[[
PhyloGroup
]]
groupsToBranches
groups
=
-- | run the related component algorithm
let
graph
=
zip
[
1
..
]
$
relatedComponents
$
map
(
\
group
->
[
getGroupId
group
]
let
egos
=
groupBy
(
\
gs
gs'
->
(
fst
$
fst
$
head'
"egos"
gs
)
==
(
fst
$
fst
$
head'
"egos"
gs'
))
$
sortOn
(
\
gs
->
fst
$
fst
$
head'
"egos"
gs
)
$
map
(
\
group
->
[
getGroupId
group
]
++
(
map
fst
$
group
^.
phylo_groupPeriodParents
)
++
(
map
fst
$
group
^.
phylo_groupPeriodChilds
)
)
$
elems
groups
-- | first find the related components by inside each ego's period
graph'
=
map
relatedComponents
egos
-- | then run it for the all the periods
graph
=
zip
[
1
..
]
$
relatedComponents
$
concat
(
graph'
`
using
`
parList
rdeepseq
)
-- | update each group's branch id
in
map
(
\
(
bId
,
ids
)
->
map
(
\
group
->
group
&
phylo_groupBranchId
%~
(
\
(
lvl
,
lst
)
->
(
lvl
,
lst
++
[
bId
])))
$
elems
$
restrictKeys
groups
(
Set
.
fromList
ids
))
graph
let
groups'
=
map
(
\
group
->
group
&
phylo_groupBranchId
%~
(
\
(
lvl
,
lst
)
->
(
lvl
,
lst
++
[
bId
])))
$
elems
$
restrictKeys
groups
(
Set
.
fromList
ids
)
in
groups'
`
using
`
parList
rdeepseq
)
graph
recursiveMatching
::
Proximity
->
Double
->
Int
->
[
PhyloPeriodId
]
->
Map
Date
Double
->
Double
->
[[
PhyloGroup
]]
->
[
PhyloGroup
]
recursiveMatching
proximity
thr
frame
periods
docs
quality
branches
=
recursiveMatching
::
Proximity
->
Quality
->
Map
Int
Double
->
Double
->
Int
->
[
PhyloPeriodId
]
->
Map
Date
Double
->
Double
->
[[
PhyloGroup
]]
->
[
PhyloGroup
]
recursiveMatching
proximity
qua
freq
thr
frame
periods
docs
quality
branches
=
if
(
length
branches
==
(
length
$
concat
branches
))
then
concat
branches
else
if
thr
>
1
else
if
thr
>
=
1
then
concat
branches
else
-- trace (show(quality) <> " (vs) sum of " <> show(nextQualities))
case
quality
<=
(
sum
nextQualities
)
of
-- | success : the new threshold improves the quality score, let's go deeper (traceMatchSuccess thr quality (sum nextQualities))
True
->
concat
$
map
(
\
branches'
->
let
idx
=
fromJust
$
elemIndex
branches'
nextBranches
in
recursiveMatching
proximity
(
thr
+
(
getThresholdStep
proximity
))
frame
periods
docs
(
nextQualities
!!
idx
)
branches'
)
let
idx
=
fromJust
$
elemIndex
branches'
nextBranches
in
recursiveMatching
proximity
qua
freq
(
thr
+
(
getThresholdStep
proximity
))
frame
periods
docs
(
nextQualities
!!
idx
)
branches'
)
$
nextBranches
-- | failure : last step was a local maximum of quality, let's validate it (traceMatchFailure thr quality (sum nextQualities))
False
->
concat
branches
where
-- | 2) for each of the possible next branches process the phyloQuality score
nextQualities
::
[
Double
]
nextQualities
=
map
toPhyloQuality
nextBranches
nextQualities
=
map
(
\
nextBranch
->
toPhyloQuality'
qua
freq
nextBranch
)
nextBranches
-- | 1) for each local branch process a temporal matching then find the resulting branches
nextBranches
::
[[[
PhyloGroup
]]]
nextBranches
=
...
...
@@ -338,7 +373,9 @@ temporalMatching phylo = updatePhyloGroups 1 branches' phylo
branches'
=
fromList
$
map
(
\
g
->
(
getGroupId
g
,
g
))
$
traceMatchEnd
$
recursiveMatching
(
phyloProximity
$
getConfig
phylo
)
$
recursiveMatching
(
phyloProximity
$
getConfig
phylo
)
(
phyloQuality
$
getConfig
phylo
)
frequency
(
(
getThresholdInit
$
phyloProximity
$
getConfig
phylo
)
+
(
getThresholdStep
$
phyloProximity
$
getConfig
phylo
))
(
getTimeFrame
$
timeUnit
$
getConfig
phylo
)
...
...
@@ -346,7 +383,12 @@ temporalMatching phylo = updatePhyloGroups 1 branches' phylo
(
phylo
^.
phylo_timeDocs
)
quality
branches
-- | 3) process the quality score
quality
::
Double
quality
=
toPhyloQuality
branches
quality
=
toPhyloQuality'
(
phyloQuality
$
getConfig
phylo
)
frequency
branches
-- | 3) process the constants of the quality score
frequency
::
Map
Int
Double
frequency
=
let
terms
=
ngramsInBranches
branches
in
fromList
$
map
(
\
t
->
(
t
,
((
termFreq'
t
$
concat
branches
)
/
(
sum
$
map
(
\
t'
->
termFreq'
t'
$
concat
branches
)
terms
))))
terms
-- | 2) group into branches
branches
::
[[
PhyloGroup
]]
branches
=
groupsToBranches
$
fromList
$
map
(
\
group
->
(
getGroupId
group
,
group
))
groups'
...
...
src/Gargantext/Viz/Phylo/Tools.hs
View file @
406ae431
...
...
@@ -815,7 +815,7 @@ getProximity cluster = case cluster of
-- | To initialize all the Cluster / Proximity with their default parameters
initFis
::
Maybe
Bool
->
Maybe
Support
->
Maybe
Int
->
FisParams
initFis
(
def
True
->
kmf
)
(
def
2
->
min'
)
(
def
4
->
thr
)
=
FisParams
kmf
min'
thr
initFis
(
def
True
->
kmf
)
(
def
0
->
min'
)
(
def
0
->
thr
)
=
FisParams
kmf
min'
thr
initHamming
::
Maybe
Double
->
HammingParams
initHamming
(
def
0.01
->
sens
)
=
HammingParams
sens
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment