Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Julien Moutinho
haskell-gargantext
Commits
7fef621e
Commit
7fef621e
authored
Oct 24, 2019
by
qlobbe
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add the maxClique (in progress)
parent
4f17f5dd
Changes
6
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
51 additions
and
206 deletions
+51
-206
AdaptativePhylo.hs
src/Gargantext/Viz/AdaptativePhylo.hs
+10
-12
PhyloExample.hs
src/Gargantext/Viz/Phylo/PhyloExample.hs
+2
-2
PhyloExport.hs
src/Gargantext/Viz/Phylo/PhyloExport.hs
+1
-1
PhyloMaker.hs
src/Gargantext/Viz/Phylo/PhyloMaker.hs
+23
-21
PhyloTools.hs
src/Gargantext/Viz/Phylo/PhyloTools.hs
+12
-12
TemporalMatching.hs
src/Gargantext/Viz/Phylo/TemporalMatching.hs
+3
-158
No files found.
src/Gargantext/Viz/AdaptativePhylo.hs
View file @
7fef621e
...
@@ -88,11 +88,13 @@ data ContextualUnit =
...
@@ -88,11 +88,13 @@ data ContextualUnit =
Fis
Fis
{
_fis_support
::
Int
{
_fis_support
::
Int
,
_fis_size
::
Int
}
,
_fis_size
::
Int
}
|
MaxClique
{
_clique_size
::
Int
}
deriving
(
Show
,
Generic
,
Eq
)
deriving
(
Show
,
Generic
,
Eq
)
data
Quality
=
data
Quality
=
Quality
{
_qua_
relevance
::
Double
Quality
{
_qua_
granularity
::
Double
,
_qua_minBranch
::
Int
}
,
_qua_minBranch
::
Int
}
deriving
(
Show
,
Generic
,
Eq
)
deriving
(
Show
,
Generic
,
Eq
)
...
@@ -125,7 +127,7 @@ defaultConfig =
...
@@ -125,7 +127,7 @@ defaultConfig =
,
phyloLevel
=
2
,
phyloLevel
=
2
,
phyloProximity
=
WeightedLogJaccard
10
0
0.1
,
phyloProximity
=
WeightedLogJaccard
10
0
0.1
,
phyloSynchrony
=
ByProximityDistribution
0
,
phyloSynchrony
=
ByProximityDistribution
0
,
phyloQuality
=
Quality
1
0
3
,
phyloQuality
=
Quality
0
3
,
timeUnit
=
Year
3
1
5
,
timeUnit
=
Year
3
1
5
,
contextualUnit
=
Fis
1
5
,
contextualUnit
=
Fis
1
5
,
exportLabel
=
[
BranchLabel
MostInclusive
2
,
GroupLabel
MostEmergentInclusive
2
]
,
exportLabel
=
[
BranchLabel
MostInclusive
2
,
GroupLabel
MostEmergentInclusive
2
]
...
@@ -315,17 +317,13 @@ data PointerType = TemporalPointer | LevelPointer deriving (Generic, Show)
...
@@ -315,17 +317,13 @@ data PointerType = TemporalPointer | LevelPointer deriving (Generic, Show)
-- | Frequent Item Set | --
-- | Frequent Item Set | --
---------------------------
---------------------------
-- | Clique : Set of ngrams cooccurring in the same Document
type
Clique
=
Set
Ngrams
-- | Support : Number of Documents where a Clique occurs
-- | Support : Number of Documents where a Clique occurs
type
Support
=
Int
type
Support
=
Int
-- | Fis : Frequent Items Set (ie: the association between a Clique and a Support)
data
PhyloCUnit
=
PhyloCUnit
data
PhyloFis
=
PhyloFis
{
_phyloCUnit_nodes
::
Set
Ngrams
{
_phyloFis_clique
::
Clique
,
_phyloCUnit_support
::
Support
,
_phyloFis_support
::
Support
,
_phyloCUnit_period
::
(
Date
,
Date
)
,
_phyloFis_period
::
(
Date
,
Date
)
}
deriving
(
Generic
,
NFData
,
Show
,
Eq
)
}
deriving
(
Generic
,
NFData
,
Show
,
Eq
)
...
@@ -378,7 +376,7 @@ makeLenses ''ContextualUnit
...
@@ -378,7 +376,7 @@ makeLenses ''ContextualUnit
makeLenses
''
P
hyloLabel
makeLenses
''
P
hyloLabel
makeLenses
''
T
imeUnit
makeLenses
''
T
imeUnit
makeLenses
''
P
hyloFoundations
makeLenses
''
P
hyloFoundations
makeLenses
''
P
hylo
Fis
makeLenses
''
P
hylo
CUnit
makeLenses
''
P
hylo
makeLenses
''
P
hylo
makeLenses
''
P
hyloPeriod
makeLenses
''
P
hyloPeriod
makeLenses
''
P
hyloLevel
makeLenses
''
P
hyloLevel
...
...
src/Gargantext/Viz/Phylo/PhyloExample.hs
View file @
7fef621e
...
@@ -62,8 +62,8 @@ phylo1 = temporalMatching
...
@@ -62,8 +62,8 @@ phylo1 = temporalMatching
---------------------------------------------
---------------------------------------------
phyloFis
::
Map
(
Date
,
Date
)
[
Phylo
Fis
]
phyloFis
::
Map
(
Date
,
Date
)
[
Phylo
CUnit
]
phyloFis
=
toPhyloFis
docsByPeriods
(
get
FisSupport
$
contextualUnit
config
)
(
getFis
Size
$
contextualUnit
config
)
phyloFis
=
toPhyloFis
docsByPeriods
(
get
ContextualUnitSupport
$
contextualUnit
config
)
(
getContextualUnit
Size
$
contextualUnit
config
)
docsByPeriods
::
Map
(
Date
,
Date
)
[
Document
]
docsByPeriods
::
Map
(
Date
,
Date
)
[
Document
]
...
...
src/Gargantext/Viz/Phylo/PhyloExport.hs
View file @
7fef621e
...
@@ -171,7 +171,7 @@ exportToDot phylo export =
...
@@ -171,7 +171,7 @@ exportToDot phylo export =
,(
toAttr
(
fromStrict
"proxiName"
)
$
pack
$
show
(
getProximityName
$
phyloProximity
$
getConfig
phylo
))
,(
toAttr
(
fromStrict
"proxiName"
)
$
pack
$
show
(
getProximityName
$
phyloProximity
$
getConfig
phylo
))
,(
toAttr
(
fromStrict
"proxiInit"
)
$
pack
$
show
(
getProximityInit
$
phyloProximity
$
getConfig
phylo
))
,(
toAttr
(
fromStrict
"proxiInit"
)
$
pack
$
show
(
getProximityInit
$
phyloProximity
$
getConfig
phylo
))
,(
toAttr
(
fromStrict
"proxiStep"
)
$
pack
$
show
(
getProximityStep
$
phyloProximity
$
getConfig
phylo
))
,(
toAttr
(
fromStrict
"proxiStep"
)
$
pack
$
show
(
getProximityStep
$
phyloProximity
$
getConfig
phylo
))
,(
toAttr
(
fromStrict
"quaFactor"
)
$
pack
$
show
(
_qua_
relevance
$
phyloQuality
$
getConfig
phylo
))
,(
toAttr
(
fromStrict
"quaFactor"
)
$
pack
$
show
(
_qua_
granularity
$
phyloQuality
$
getConfig
phylo
))
])
])
...
...
src/Gargantext/Viz/Phylo/PhyloMaker.hs
View file @
7fef621e
...
@@ -72,23 +72,23 @@ appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ co
...
@@ -72,23 +72,23 @@ appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ co
(
\
phyloLvl
->
if
lvl
==
(
phyloLvl
^.
phylo_levelLevel
)
(
\
phyloLvl
->
if
lvl
==
(
phyloLvl
^.
phylo_levelLevel
)
then
then
let
pId
=
phyloLvl
^.
phylo_levelPeriod
let
pId
=
phyloLvl
^.
phylo_levelPeriod
phylo
Fis
=
m
!
pId
phylo
CUnit
=
m
!
pId
in
phyloLvl
in
phyloLvl
&
phylo_levelGroups
.~
(
fromList
$
foldl
(
\
groups
obj
->
&
phylo_levelGroups
.~
(
fromList
$
foldl
(
\
groups
obj
->
groups
++
[
(((
pId
,
lvl
),
length
groups
)
groups
++
[
(((
pId
,
lvl
),
length
groups
)
,
f
obj
pId
lvl
(
length
groups
)
(
getRoots
phylo
)
,
f
obj
pId
lvl
(
length
groups
)
(
getRoots
phylo
)
(
elems
$
restrictKeys
(
phylo
^.
phylo_timeCooc
)
$
periodsToYears
[
pId
]))
(
elems
$
restrictKeys
(
phylo
^.
phylo_timeCooc
)
$
periodsToYears
[
pId
]))
]
)
[]
phylo
Fis
)
]
)
[]
phylo
CUnit
)
else
else
phyloLvl
)
phyloLvl
)
phylo
phylo
fisToGroup
::
Phylo
Fis
->
PhyloPeriodId
->
Level
->
Int
->
Vector
Ngrams
->
[
Cooc
]
->
PhyloGroup
fisToGroup
::
Phylo
CUnit
->
PhyloPeriodId
->
Level
->
Int
->
Vector
Ngrams
->
[
Cooc
]
->
PhyloGroup
fisToGroup
fis
pId
lvl
idx
fdt
coocs
=
fisToGroup
fis
pId
lvl
idx
fdt
coocs
=
let
ngrams
=
ngramsToIdx
(
Set
.
toList
$
fis
^.
phylo
Fis_clique
)
fdt
let
ngrams
=
ngramsToIdx
(
Set
.
toList
$
fis
^.
phylo
CUnit_nodes
)
fdt
in
PhyloGroup
pId
lvl
idx
""
in
PhyloGroup
pId
lvl
idx
""
(
fis
^.
phylo
Fis
_support
)
(
fis
^.
phylo
CUnit
_support
)
ngrams
ngrams
(
ngramsToCooc
ngrams
coocs
)
(
ngramsToCooc
ngrams
coocs
)
(
1
,[
0
])
(
1
,[
0
])
...
@@ -98,11 +98,13 @@ fisToGroup fis pId lvl idx fdt coocs =
...
@@ -98,11 +98,13 @@ fisToGroup fis pId lvl idx fdt coocs =
toPhylo1
::
[
Document
]
->
Phylo
->
Phylo
toPhylo1
::
[
Document
]
->
Phylo
->
Phylo
toPhylo1
docs
phyloBase
=
temporalMatching
toPhylo1
docs
phyloBase
=
temporalMatching
$
appendGroups
fisToGroup
1
phylo
Fis
phyloBase
$
appendGroups
fisToGroup
1
phylo
CUnit
phyloBase
where
where
--------------------------------------
--------------------------------------
phyloFis
::
Map
(
Date
,
Date
)
[
PhyloFis
]
phyloCUnit
::
Map
(
Date
,
Date
)
[
PhyloCUnit
]
phyloFis
=
toPhyloFis
docs'
(
getFisSupport
$
contextualUnit
$
getConfig
phyloBase
)
(
getFisSize
$
contextualUnit
$
getConfig
phyloBase
)
phyloCUnit
=
case
(
contextualUnit
$
getConfig
phyloBase
)
of
Fis
s
s'
->
toPhyloFis
docs'
s
s'
MaxClique
_
->
undefined
--------------------------------------
--------------------------------------
docs'
::
Map
(
Date
,
Date
)
[
Document
]
docs'
::
Map
(
Date
,
Date
)
[
Document
]
docs'
=
groupDocsByPeriod
date
(
getPeriodIds
phyloBase
)
docs
docs'
=
groupDocsByPeriod
date
(
getPeriodIds
phyloBase
)
docs
...
@@ -115,30 +117,30 @@ toPhylo1 docs phyloBase = temporalMatching
...
@@ -115,30 +117,30 @@ toPhylo1 docs phyloBase = temporalMatching
-- | To apply a filter with the possibility of keeping some periods non empty (keep : True|False)
-- | To apply a filter with the possibility of keeping some periods non empty (keep : True|False)
filterFis
::
Bool
->
Int
->
(
Int
->
[
Phylo
Fis
]
->
[
PhyloFis
])
->
Map
(
Date
,
Date
)
[
PhyloFis
]
->
Map
(
Date
,
Date
)
[
PhyloFis
]
filterFis
::
Bool
->
Int
->
(
Int
->
[
Phylo
CUnit
]
->
[
PhyloCUnit
])
->
Map
(
Date
,
Date
)
[
PhyloCUnit
]
->
Map
(
Date
,
Date
)
[
PhyloCUnit
]
filterFis
keep
thr
f
m
=
case
keep
of
filterFis
keep
thr
f
m
=
case
keep
of
False
->
map
(
\
l
->
f
thr
l
)
m
False
->
map
(
\
l
->
f
thr
l
)
m
True
->
map
(
\
l
->
keepFilled
(
f
)
thr
l
)
m
True
->
map
(
\
l
->
keepFilled
(
f
)
thr
l
)
m
-- | To filter Fis with small Support
-- | To filter Fis with small Support
filterFisBySupport
::
Int
->
[
Phylo
Fis
]
->
[
PhyloFis
]
filterFisBySupport
::
Int
->
[
Phylo
CUnit
]
->
[
PhyloCUnit
]
filterFisBySupport
thr
l
=
filter
(
\
fis
->
(
fis
^.
phylo
Fis
_support
)
>=
thr
)
l
filterFisBySupport
thr
l
=
filter
(
\
fis
->
(
fis
^.
phylo
CUnit
_support
)
>=
thr
)
l
-- | To filter Fis with small Clique size
-- | To filter Fis with small Clique size
filterFisByClique
::
Int
->
[
Phylo
Fis
]
->
[
PhyloFis
]
filterFisByClique
::
Int
->
[
Phylo
CUnit
]
->
[
PhyloCUnit
]
filterFisByClique
thr
l
=
filter
(
\
fis
->
(
size
$
fis
^.
phylo
Fis_clique
)
>=
thr
)
l
filterFisByClique
thr
l
=
filter
(
\
fis
->
(
size
$
fis
^.
phylo
CUnit_nodes
)
>=
thr
)
l
-- | To filter nested Fis
-- | To filter nested Fis
filterFisByNested
::
Map
(
Date
,
Date
)
[
Phylo
Fis
]
->
Map
(
Date
,
Date
)
[
PhyloFis
]
filterFisByNested
::
Map
(
Date
,
Date
)
[
Phylo
CUnit
]
->
Map
(
Date
,
Date
)
[
PhyloCUnit
]
filterFisByNested
m
=
filterFisByNested
m
=
let
fis
=
map
(
\
l
->
let
fis
=
map
(
\
l
->
foldl
(
\
mem
f
->
if
(
any
(
\
f'
->
isNested
(
Set
.
toList
$
f'
^.
phylo
Fis_clique
)
(
Set
.
toList
$
f
^.
phyloFis_clique
))
mem
)
foldl
(
\
mem
f
->
if
(
any
(
\
f'
->
isNested
(
Set
.
toList
$
f'
^.
phylo
CUnit_nodes
)
(
Set
.
toList
$
f
^.
phyloCUnit_nodes
))
mem
)
then
mem
then
mem
else
else
let
fMax
=
filter
(
\
f'
->
not
$
isNested
(
Set
.
toList
$
f
^.
phylo
Fis_clique
)
(
Set
.
toList
$
f'
^.
phyloFis_clique
))
mem
let
fMax
=
filter
(
\
f'
->
not
$
isNested
(
Set
.
toList
$
f
^.
phylo
CUnit_nodes
)
(
Set
.
toList
$
f'
^.
phyloCUnit_nodes
))
mem
in
fMax
++
[
f
]
)
[]
l
)
in
fMax
++
[
f
]
)
[]
l
)
$
elems
m
$
elems
m
fis'
=
fis
`
using
`
parList
rdeepseq
fis'
=
fis
`
using
`
parList
rdeepseq
...
@@ -146,7 +148,7 @@ filterFisByNested m =
...
@@ -146,7 +148,7 @@ filterFisByNested m =
-- | To transform a time map of docs innto a time map of Fis with some filters
-- | To transform a time map of docs innto a time map of Fis with some filters
toPhyloFis
::
Map
(
Date
,
Date
)
[
Document
]
->
Int
->
Int
->
Map
(
Date
,
Date
)
[
Phylo
Fis
]
toPhyloFis
::
Map
(
Date
,
Date
)
[
Document
]
->
Int
->
Int
->
Map
(
Date
,
Date
)
[
Phylo
CUnit
]
toPhyloFis
phyloDocs
support
clique
=
traceFis
"Filtered Fis"
toPhyloFis
phyloDocs
support
clique
=
traceFis
"Filtered Fis"
$
filterFisByNested
$
filterFisByNested
$
traceFis
"Filtered by clique size"
$
traceFis
"Filtered by clique size"
...
@@ -156,10 +158,10 @@ toPhyloFis phyloDocs support clique = traceFis "Filtered Fis"
...
@@ -156,10 +158,10 @@ toPhyloFis phyloDocs support clique = traceFis "Filtered Fis"
$
traceFis
"Unfiltered Fis"
phyloFis
$
traceFis
"Unfiltered Fis"
phyloFis
where
where
--------------------------------------
--------------------------------------
phyloFis
::
Map
(
Date
,
Date
)
[
Phylo
Fis
]
phyloFis
::
Map
(
Date
,
Date
)
[
Phylo
CUnit
]
phyloFis
=
phyloFis
=
let
fis
=
map
(
\
(
prd
,
docs
)
->
let
lst
=
toList
$
fisWithSizePolyMap
(
Segment
1
20
)
1
(
map
text
docs
)
let
fis
=
map
(
\
(
prd
,
docs
)
->
let
lst
=
toList
$
fisWithSizePolyMap
(
Segment
1
20
)
1
(
map
text
docs
)
in
(
prd
,
map
(
\
f
->
Phylo
Fis
(
fst
f
)
(
snd
f
)
prd
)
lst
))
in
(
prd
,
map
(
\
f
->
Phylo
CUnit
(
fst
f
)
(
snd
f
)
prd
)
lst
))
$
toList
phyloDocs
$
toList
phyloDocs
fis'
=
fis
`
using
`
parList
rdeepseq
fis'
=
fis
`
using
`
parList
rdeepseq
in
fromList
fis'
in
fromList
fis'
...
@@ -209,9 +211,9 @@ groupDocsByPeriod f pds es =
...
@@ -209,9 +211,9 @@ groupDocsByPeriod f pds es =
docsToTermFreq
::
[
Document
]
->
Vector
Ngrams
->
Map
Int
Double
docsToTermFreq
::
[
Document
]
->
Vector
Ngrams
->
Map
Int
Double
docsToTermFreq
docs
fdt
=
docsToTermFreq
docs
fdt
=
let
nbDocs
=
fromIntegral
$
length
docs
let
nbDocs
=
fromIntegral
$
length
docs
freqs
=
map
(
/
nbDocs
)
freqs
=
map
(
/
(
log
nbDocs
)
)
$
fromList
$
fromList
$
map
(
\
lst
->
(
head'
"docsToTermFreq"
lst
,
fromIntegral
$
length
lst
))
$
map
(
\
lst
->
(
head'
"docsToTermFreq"
lst
,
log
$
fromIntegral
$
length
lst
))
$
group
$
sort
$
concat
$
map
(
\
d
->
nub
$
ngramsToIdx
(
text
d
)
fdt
)
docs
$
group
$
sort
$
concat
$
map
(
\
d
->
nub
$
ngramsToIdx
(
text
d
)
fdt
)
docs
sumFreqs
=
sum
$
elems
freqs
sumFreqs
=
sum
$
elems
freqs
in
map
(
/
sumFreqs
)
freqs
in
map
(
/
sumFreqs
)
freqs
...
...
src/Gargantext/Viz/Phylo/PhyloTools.hs
View file @
7fef621e
...
@@ -162,28 +162,28 @@ keepFilled f thr l = if (null $ f thr l) && (not $ null l)
...
@@ -162,28 +162,28 @@ keepFilled f thr l = if (null $ f thr l) && (not $ null l)
else
f
thr
l
else
f
thr
l
traceClique
::
Map
(
Date
,
Date
)
[
Phylo
Fis
]
->
String
traceClique
::
Map
(
Date
,
Date
)
[
Phylo
CUnit
]
->
String
traceClique
mFis
=
foldl
(
\
msg
cpt
->
msg
<>
show
(
countSup
cpt
cliques
)
<>
" (>"
<>
show
(
cpt
)
<>
") "
)
""
[
1
..
6
]
traceClique
mFis
=
foldl
(
\
msg
cpt
->
msg
<>
show
(
countSup
cpt
cliques
)
<>
" (>"
<>
show
(
cpt
)
<>
") "
)
""
[
1
..
6
]
where
where
--------------------------------------
--------------------------------------
cliques
::
[
Double
]
cliques
::
[
Double
]
cliques
=
sort
$
map
(
fromIntegral
.
size
.
_phylo
Fis_clique
)
$
concat
$
elems
mFis
cliques
=
sort
$
map
(
fromIntegral
.
size
.
_phylo
CUnit_nodes
)
$
concat
$
elems
mFis
--------------------------------------
--------------------------------------
traceSupport
::
Map
(
Date
,
Date
)
[
Phylo
Fis
]
->
String
traceSupport
::
Map
(
Date
,
Date
)
[
Phylo
CUnit
]
->
String
traceSupport
mFis
=
foldl
(
\
msg
cpt
->
msg
<>
show
(
countSup
cpt
supports
)
<>
" (>"
<>
show
(
cpt
)
<>
") "
)
""
[
1
..
6
]
traceSupport
mFis
=
foldl
(
\
msg
cpt
->
msg
<>
show
(
countSup
cpt
supports
)
<>
" (>"
<>
show
(
cpt
)
<>
") "
)
""
[
1
..
6
]
where
where
--------------------------------------
--------------------------------------
supports
::
[
Double
]
supports
::
[
Double
]
supports
=
sort
$
map
(
fromIntegral
.
_phylo
Fis
_support
)
$
concat
$
elems
mFis
supports
=
sort
$
map
(
fromIntegral
.
_phylo
CUnit
_support
)
$
concat
$
elems
mFis
--------------------------------------
--------------------------------------
traceFis
::
[
Char
]
->
Map
(
Date
,
Date
)
[
Phylo
Fis
]
->
Map
(
Date
,
Date
)
[
PhyloFis
]
traceFis
::
[
Char
]
->
Map
(
Date
,
Date
)
[
Phylo
CUnit
]
->
Map
(
Date
,
Date
)
[
PhyloCUnit
]
traceFis
msg
mFis
=
trace
(
"
\n
"
<>
"-- | "
<>
msg
<>
" : "
<>
show
(
sum
$
map
length
$
elems
mFis
)
<>
"
\n
"
traceFis
msg
mFis
=
trace
(
"
\n
"
<>
"-- | "
<>
msg
<>
" : "
<>
show
(
sum
$
map
length
$
elems
mFis
)
<>
"
\n
"
<>
"Support : "
<>
(
traceSupport
mFis
)
<>
"
\n
"
<>
"Support : "
<>
(
traceSupport
mFis
)
<>
"
\n
"
<>
"
Clique
: "
<>
(
traceClique
mFis
)
<>
"
\n
"
)
mFis
<>
"
Nb Ngrams
: "
<>
(
traceClique
mFis
)
<>
"
\n
"
)
mFis
-------------------------
-------------------------
...
@@ -191,15 +191,15 @@ traceFis msg mFis = trace ( "\n" <> "-- | " <> msg <> " : " <> show (sum $ map l
...
@@ -191,15 +191,15 @@ traceFis msg mFis = trace ( "\n" <> "-- | " <> msg <> " : " <> show (sum $ map l
-------------------------
-------------------------
get
Fis
Support
::
ContextualUnit
->
Int
get
ContextualUnit
Support
::
ContextualUnit
->
Int
get
Fis
Support
unit
=
case
unit
of
get
ContextualUnit
Support
unit
=
case
unit
of
Fis
s
_
->
s
Fis
s
_
->
s
-- _ -> panic ("[ERR][Viz.Phylo.PhyloTools.getFisSupport] Only Fis has a support")
MaxClique
_
->
0
get
Fis
Size
::
ContextualUnit
->
Int
get
ContextualUnit
Size
::
ContextualUnit
->
Int
get
Fis
Size
unit
=
case
unit
of
get
ContextualUnit
Size
unit
=
case
unit
of
Fis
_
s
->
s
Fis
_
s
->
s
-- _ -> panic ("[ERR][Viz.Phylo.PhyloTools.getFisSupport] Only Fis has a clique size")
MaxClique
s
->
s
--------------
--------------
...
...
src/Gargantext/Viz/Phylo/TemporalMatching.hs
View file @
7fef621e
...
@@ -15,7 +15,7 @@ Portability : POSIX
...
@@ -15,7 +15,7 @@ Portability : POSIX
module
Gargantext.Viz.Phylo.TemporalMatching
where
module
Gargantext.Viz.Phylo.TemporalMatching
where
import
Data.List
(
concat
,
splitAt
,
tail
,
sortOn
,
(
++
),
intersect
,
null
,
inits
,
groupBy
,
scanl
,
nub
,
union
,
dropWhile
,
partition
,
delete
,
or
)
import
Data.List
(
concat
,
splitAt
,
tail
,
sortOn
,
(
++
),
intersect
,
null
,
inits
,
groupBy
,
scanl
,
nub
,
union
,
dropWhile
,
partition
,
or
)
import
Data.Map
(
Map
,
fromList
,
elems
,
restrictKeys
,
unionWith
,
intersectionWith
,
findWithDefault
,
keys
,
(
!
),
filterWithKey
)
import
Data.Map
(
Map
,
fromList
,
elems
,
restrictKeys
,
unionWith
,
intersectionWith
,
findWithDefault
,
keys
,
(
!
),
filterWithKey
)
import
Gargantext.Prelude
import
Gargantext.Prelude
...
@@ -234,61 +234,10 @@ phyloBranchMatching frame periods proximity thr docs branch = traceBranchMatchin
...
@@ -234,61 +234,10 @@ phyloBranchMatching frame periods proximity thr docs branch = traceBranchMatchin
-----------------------
-----------------------
count
::
Eq
a
=>
a
->
[
a
]
->
Int
count
x
=
length
.
filter
(
==
x
)
termFreq'
::
Int
->
[
PhyloGroup
]
->
Double
termFreq'
term
groups
=
let
ngrams
=
concat
$
map
_phylo_groupNgrams
groups
in
log
((
fromIntegral
$
count
term
ngrams
)
/
(
fromIntegral
$
length
ngrams
))
relevantBranches
::
Int
->
[[
PhyloGroup
]]
->
[[
PhyloGroup
]]
relevantBranches
::
Int
->
[[
PhyloGroup
]]
->
[[
PhyloGroup
]]
relevantBranches
term
branches
=
relevantBranches
term
branches
=
filter
(
\
groups
->
(
any
(
\
group
->
elem
term
$
group
^.
phylo_groupNgrams
)
groups
))
branches
filter
(
\
groups
->
(
any
(
\
group
->
elem
term
$
group
^.
phylo_groupNgrams
)
groups
))
branches
branchCov'
::
[
PhyloGroup
]
->
[[
PhyloGroup
]]
->
Double
branchCov'
branch
branches
=
(
fromIntegral
$
length
branch
)
/
(
fromIntegral
$
length
$
concat
branches
)
toRecall
::
Double
->
Int
->
Int
->
[[
PhyloGroup
]]
->
Double
toRecall
freq
term
border
branches
=
-- | given a random term in a phylo
freq
-- | for each relevant branches
*
(
sum
$
map
(
\
branch
->
-- | given its local coverage
((
branchCov'
branch
branches'
)
/
(
sum
$
map
(
\
b
->
branchCov'
b
branches'
)
branches'
))
-- | compute the local recall
*
(
(
fromIntegral
$
length
$
filter
(
\
group
->
elem
term
$
group
^.
phylo_groupNgrams
)
branch
)
/
(
(
fromIntegral
$
length
$
filter
(
\
group
->
elem
term
$
group
^.
phylo_groupNgrams
)
$
concat
branches'
)
-- | with a ponderation from border branches
+
(
fromIntegral
border
))
))
branches'
)
where
branches'
::
[[
PhyloGroup
]]
branches'
=
relevantBranches
term
branches
toAccuracy
::
Double
->
Int
->
[[
PhyloGroup
]]
->
Double
toAccuracy
freq
term
branches
=
if
(
null
branches
)
then
0
else
-- | given a random term in a phylo
freq
-- | for each relevant branches
*
(
sum
$
map
(
\
branch
->
-- | given its local coverage
((
branchCov'
branch
branches'
)
/
(
sum
$
map
(
\
b
->
branchCov'
b
branches'
)
branches'
))
-- | compute the local accuracy
*
(
(
fromIntegral
$
length
$
filter
(
\
group
->
elem
term
$
group
^.
phylo_groupNgrams
)
branch
)
/
(
fromIntegral
$
length
branch
)))
branches'
)
where
branches'
::
[[
PhyloGroup
]]
branches'
=
relevantBranches
term
branches
fScore
::
Double
->
Int
->
[
PhyloGroup
]
->
[[
PhyloGroup
]]
->
Double
fScore
::
Double
->
Int
->
[
PhyloGroup
]
->
[[
PhyloGroup
]]
->
Double
fScore
beta
i
bk
bks
=
fScore
beta
i
bk
bks
=
let
recall
=
(
(
fromIntegral
$
length
$
filter
(
\
g
->
elem
i
$
g
^.
phylo_groupNgrams
)
bk
)
let
recall
=
(
(
fromIntegral
$
length
$
filter
(
\
g
->
elem
i
$
g
^.
phylo_groupNgrams
)
bk
)
...
@@ -314,28 +263,6 @@ toPhyloQuality' beta freq branches =
...
@@ -314,28 +263,6 @@ toPhyloQuality' beta freq branches =
$
keys
freq
$
keys
freq
toPhyloQuality
::
Double
->
Map
Int
Double
->
Int
->
Double
->
[[
PhyloGroup
]]
->
Double
toPhyloQuality
beta
frequency
border
oldAcc
branches
=
-- trace (" rec : " <> show(recall)) $
-- trace (" acc : " <> show(accuracy)) $
if
(
null
branches
)
then
0
else
((
1
+
beta
**
2
)
*
accuracy
*
recall
)
/
(((
beta
**
2
)
*
accuracy
+
recall
))
where
-- | for each term compute the global accuracy
accuracy
::
Double
accuracy
=
oldAcc
+
(
sum
$
map
(
\
term
->
toAccuracy
(
frequency
!
term
)
term
branches
)
$
keys
frequency
)
-- | for each term compute the global recall
recall
::
Double
recall
=
sum
$
map
(
\
term
->
toRecall
(
frequency
!
term
)
term
border
branches
)
$
keys
frequency
toBorderAccuracy
::
Map
Int
Double
->
[[
PhyloGroup
]]
->
Double
toBorderAccuracy
freq
branches
=
sum
$
map
(
\
t
->
toAccuracy
(
freq
!
t
)
t
branches
)
$
keys
freq
-----------------------------
-----------------------------
-- | Adaptative Matching | --
-- | Adaptative Matching | --
-----------------------------
-----------------------------
...
@@ -366,12 +293,6 @@ reduceFrequency frequency branches =
...
@@ -366,12 +293,6 @@ reduceFrequency frequency branches =
restrictKeys
frequency
(
Set
.
fromList
$
(
nub
.
concat
)
$
map
_phylo_groupNgrams
$
concat
branches
)
restrictKeys
frequency
(
Set
.
fromList
$
(
nub
.
concat
)
$
map
_phylo_groupNgrams
$
concat
branches
)
alterBorder
::
Int
->
[[
PhyloGroup
]]
->
[
PhyloGroup
]
->
Int
alterBorder
border
branches
branch
=
border
+
(
length
$
concat
branches
)
-
(
length
branch
)
-- | Important ne pas virer les filtree mais les mettre en false
seqMatching
::
Proximity
->
Double
->
Map
Int
Double
->
Int
->
Double
->
Int
->
Map
Date
Double
->
[
PhyloPeriodId
]
->
[([
PhyloGroup
],
Bool
)]
->
([
PhyloGroup
],
Bool
)
->
[([
PhyloGroup
],
Bool
)]
->
[([
PhyloGroup
],
Bool
)]
seqMatching
::
Proximity
->
Double
->
Map
Int
Double
->
Int
->
Double
->
Int
->
Map
Date
Double
->
[
PhyloPeriodId
]
->
[([
PhyloGroup
],
Bool
)]
->
([
PhyloGroup
],
Bool
)
->
[([
PhyloGroup
],
Bool
)]
->
[([
PhyloGroup
],
Bool
)]
seqMatching
proximity
beta
frequency
minBranch
egoThr
frame
docs
periods
done
ego
rest
=
seqMatching
proximity
beta
frequency
minBranch
egoThr
frame
docs
periods
done
ego
rest
=
-- | 1) keep or not the new division of ego
-- | 1) keep or not the new division of ego
...
@@ -422,38 +343,6 @@ recursiveMatching' proximity beta minBranch frequency egoThr frame periods docs
...
@@ -422,38 +343,6 @@ recursiveMatching' proximity beta minBranch frequency egoThr frame periods docs
in
recursiveMatching'
proximity
beta
minBranch
frequency'
(
egoThr
+
(
getThresholdStep
proximity
))
frame
periods
docs
branches'
in
recursiveMatching'
proximity
beta
minBranch
frequency'
(
egoThr
+
(
getThresholdStep
proximity
))
frame
periods
docs
branches'
recursiveMatching
::
Proximity
->
Double
->
Int
->
Map
Int
Double
->
Double
->
Int
->
[
PhyloPeriodId
]
->
Map
Date
Double
->
Double
->
Int
->
Double
->
[
PhyloGroup
]
->
[
PhyloGroup
]
recursiveMatching
proximity
beta
minBranch
frequency
egoThr
frame
periods
docs
quality
border
oldAcc
groups
=
if
((
egoThr
>=
1
)
||
(
quality
>
quality'
)
||
((
length
$
concat
$
snd
branches'
)
==
(
length
groups
)))
then
trace
(
" ✗ F(β) = "
<>
show
(
quality
)
<>
" (vs) "
<>
show
(
quality'
)
<>
"
\n
"
<>
" |✓ "
<>
show
(
length
$
fst
branches'
)
<>
show
(
map
length
$
fst
branches'
)
<>
" |✗ "
<>
show
(
length
$
snd
branches'
)
<>
"["
<>
show
(
length
$
concat
$
snd
branches'
)
<>
"]"
)
$
groups
else
let
next
=
map
(
\
b
->
recursiveMatching
proximity
beta
minBranch
(
reduceFrequency
frequency
(
fst
branches'
))
(
egoThr
+
(
getThresholdStep
proximity
))
frame
periods
docs
quality'
(
alterBorder
border
(
fst
branches'
)
b
)
(
oldAcc
+
(
toBorderAccuracy
frequency
(
delete
b
((
fst
branches'
)
++
(
snd
branches'
)))))
b
)
(
fst
branches'
)
in
trace
(
" ✓ F(β) = "
<>
show
(
quality
)
<>
" (vs) "
<>
show
(
quality'
)
<>
"
\n
"
<>
" |✓ "
<>
show
(
length
$
fst
branches'
)
<>
show
(
map
length
$
fst
branches'
)
<>
" |✗ "
<>
show
(
length
$
snd
branches'
)
<>
"["
<>
show
(
length
$
concat
$
snd
branches'
)
<>
"]"
)
$
concat
(
next
++
(
snd
branches'
))
where
-- | 2) for each of the possible next branches process the phyloQuality score
quality'
::
Double
quality'
=
toPhyloQuality
beta
frequency
border
oldAcc
((
fst
branches'
)
++
(
snd
branches'
))
-- | 1) for each local branch process a temporal matching then find the resulting branches
branches'
::
([[
PhyloGroup
]],[[
PhyloGroup
]])
branches'
=
let
branches
=
groupsToBranches
$
fromList
$
map
(
\
g
->
(
getGroupId
g
,
g
))
$
phyloBranchMatching
frame
periods
proximity
egoThr
docs
groups
in
partition
(
\
b
->
length
b
>=
minBranch
)
(
branches
`
using
`
parList
rdeepseq
)
temporalMatching
::
Phylo
->
Phylo
temporalMatching
::
Phylo
->
Phylo
temporalMatching
phylo
=
updatePhyloGroups
1
temporalMatching
phylo
=
updatePhyloGroups
1
(
fromList
$
map
(
\
g
->
(
getGroupId
g
,
g
))
$
traceMatchEnd
$
concat
branches
)
(
fromList
$
map
(
\
g
->
(
getGroupId
g
,
g
))
$
traceMatchEnd
$
concat
branches
)
...
@@ -463,7 +352,7 @@ temporalMatching phylo = updatePhyloGroups 1
...
@@ -463,7 +352,7 @@ temporalMatching phylo = updatePhyloGroups 1
branches
::
[[
PhyloGroup
]]
branches
::
[[
PhyloGroup
]]
branches
=
map
fst
branches
=
map
fst
$
recursiveMatching'
(
phyloProximity
$
getConfig
phylo
)
$
recursiveMatching'
(
phyloProximity
$
getConfig
phylo
)
(
_qua_
relevance
$
phyloQuality
$
getConfig
phylo
)
(
_qua_
granularity
$
phyloQuality
$
getConfig
phylo
)
(
_qua_minBranch
$
phyloQuality
$
getConfig
phylo
)
(
_qua_minBranch
$
phyloQuality
$
getConfig
phylo
)
(
phylo
^.
phylo_termFreq
)
(
phylo
^.
phylo_termFreq
)
(
getThresholdInit
$
phyloProximity
$
getConfig
phylo
)
(
getThresholdInit
$
phyloProximity
$
getConfig
phylo
)
...
@@ -477,47 +366,3 @@ temporalMatching phylo = updatePhyloGroups 1
...
@@ -477,47 +366,3 @@ temporalMatching phylo = updatePhyloGroups 1
(
phyloProximity
$
getConfig
phylo
)
(
getThresholdInit
$
phyloProximity
$
getConfig
phylo
)
(
phyloProximity
$
getConfig
phylo
)
(
getThresholdInit
$
phyloProximity
$
getConfig
phylo
)
(
phylo
^.
phylo_timeDocs
)
(
phylo
^.
phylo_timeDocs
)
(
traceTemporalMatching
$
getGroupsFromLevel
1
phylo
)
(
traceTemporalMatching
$
getGroupsFromLevel
1
phylo
)
\ No newline at end of file
temporalMatching'
::
Phylo
->
Phylo
temporalMatching'
phylo
=
updatePhyloGroups
1
branches'
phylo
where
-- | 5) apply the recursive matching
branches'
::
Map
PhyloGroupId
PhyloGroup
branches'
=
let
next
=
trace
(
" ✓ F(β) = "
<>
show
(
quality
)
<>
" |✓ "
<>
show
(
length
$
fst
branches
)
<>
show
(
map
length
$
fst
branches
)
<>
" |✗ "
<>
show
(
length
$
snd
branches
)
<>
"["
<>
show
(
length
$
concat
$
snd
branches
)
<>
"]"
)
$
map
(
\
branch
->
recursiveMatching
(
phyloProximity
$
getConfig
phylo
)
(
_qua_relevance
$
phyloQuality
$
getConfig
phylo
)
(
_qua_minBranch
$
phyloQuality
$
getConfig
phylo
)
(
reduceFrequency
frequency
(
fst
branches
))
(
(
getThresholdInit
$
phyloProximity
$
getConfig
phylo
)
+
(
getThresholdStep
$
phyloProximity
$
getConfig
phylo
))
(
getTimeFrame
$
timeUnit
$
getConfig
phylo
)
(
getPeriodIds
phylo
)
(
phylo
^.
phylo_timeDocs
)
quality
(
alterBorder
0
(
fst
branches
)
branch
)
(
toBorderAccuracy
frequency
(
delete
branch
((
fst
branches
)
++
(
snd
branches
))))
branch
)
(
fst
branches
)
in
fromList
$
map
(
\
g
->
(
getGroupId
g
,
g
))
$
traceMatchEnd
(
concat
(
next
++
(
snd
branches
)))
-- | 4) process the quality score
quality
::
Double
quality
=
toPhyloQuality
(
_qua_relevance
$
phyloQuality
$
getConfig
phylo
)
frequency
0
0
((
fst
branches
)
++
(
snd
branches
))
-- | 3) process the constants of the quality score
frequency
::
Map
Int
Double
frequency
=
let
terms
=
ngramsInBranches
((
fst
branches
)
++
(
snd
branches
))
freqs
=
map
(
\
t
->
termFreq'
t
$
concat
((
fst
branches
)
++
(
snd
branches
)))
terms
in
fromList
$
map
(
\
(
t
,
freq
)
->
(
t
,
freq
/
(
sum
freqs
)))
$
zip
terms
freqs
-- | 2) group into branches
branches
::
([[
PhyloGroup
]],[[
PhyloGroup
]])
branches
=
partition
(
\
b
->
length
b
>=
(
_qua_minBranch
$
phyloQuality
$
getConfig
phylo
))
$
groupsToBranches
$
fromList
$
map
(
\
group
->
(
getGroupId
group
,
group
))
groups'
-- | 1) for each group process an initial temporal Matching
groups'
::
[
PhyloGroup
]
groups'
=
phyloBranchMatching
(
getTimeFrame
$
timeUnit
$
getConfig
phylo
)
(
getPeriodIds
phylo
)
(
phyloProximity
$
getConfig
phylo
)
(
getThresholdInit
$
phyloProximity
$
getConfig
phylo
)
(
phylo
^.
phylo_timeDocs
)
(
traceTemporalMatching
$
getGroupsFromLevel
1
phylo
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment