Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
165
Issues
165
List
Board
Labels
Milestones
Merge Requests
11
Merge Requests
11
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
b0913118
Verified
Commit
b0913118
authored
Jun 09, 2025
by
Przemyslaw Kaminski
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[phylo] small, mechanical refactorings
parent
f54f2036
Pipeline
#7649
failed with stages
in 97 minutes and 44 seconds
Changes
2
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
71 additions
and
74 deletions
+71
-74
PhyloMaker.hs
src/Gargantext/Core/Viz/Phylo/PhyloMaker.hs
+21
-19
PhyloTools.hs
src/Gargantext/Core/Viz/Phylo/PhyloTools.hs
+50
-55
No files found.
src/Gargantext/Core/Viz/Phylo/PhyloMaker.hs
View file @
b0913118
...
@@ -120,8 +120,10 @@ evolvSeaLadder nbFdt lambda freq similarities graph = map snd
...
@@ -120,8 +120,10 @@ evolvSeaLadder nbFdt lambda freq similarities graph = map snd
where
where
--------
--------
-- 2) find the local maxima in the quality distribution
-- 2) find the local maxima in the quality distribution
-- TODO (seeg, #471) head throws errors when list is too short.
-- TODO (seeg, #471) head throws errors when list is too short
-- I propose this implementation, but I'm not sure of the length of the list
-- (i.e. List.head . List.tail requires at least 2 elements in the
-- list). I propose this implementation, but I'm not sure of the
-- length of the list
-- maxima = if List.length qua' > 1 then
-- maxima = if List.length qua' > 1 then
-- [snd (List.head qua') > snd (List.head $ List.tail qua')] ++
-- [snd (List.head qua') > snd (List.head $ List.tail qua')] ++
-- (findMaxima qua') ++
-- (findMaxima qua') ++
...
@@ -134,9 +136,9 @@ evolvSeaLadder nbFdt lambda freq similarities graph = map snd
...
@@ -134,9 +136,9 @@ evolvSeaLadder nbFdt lambda freq similarities graph = map snd
-- 1.2)
-- 1.2)
qua'
::
[(
Double
,
Double
)]
qua'
::
[(
Double
,
Double
)]
qua'
=
foldl
(
\
acc
(
s
,
q
)
->
qua'
=
foldl
(
\
acc
(
s
,
q
)
->
if
length
acc
==
0
if
null
acc
then
[(
s
,
q
)]
then
[(
s
,
q
)]
else
if
(
snd
(
List
.
last
acc
)
)
==
q
else
if
snd
(
List
.
last
acc
)
==
q
then
acc
then
acc
else
acc
++
[(
s
,
q
)]
else
acc
++
[(
s
,
q
)]
)
[]
$
zip
(
Set
.
toList
similarities
)
qua
)
[]
$
zip
(
Set
.
toList
similarities
)
qua
...
@@ -145,10 +147,10 @@ evolvSeaLadder nbFdt lambda freq similarities graph = map snd
...
@@ -145,10 +147,10 @@ evolvSeaLadder nbFdt lambda freq similarities graph = map snd
qua
::
[
Double
]
qua
::
[
Double
]
qua
=
parMap
rpar
(
\
thr
->
qua
=
parMap
rpar
(
\
thr
->
let
edges
=
filter
(
\
edge
->
snd
edge
>=
thr
)
graph
let
edges
=
filter
(
\
edge
->
snd
edge
>=
thr
)
graph
nodes
=
nubOrd
$
concat
$
m
ap
(
\
((
n
,
n'
),
_
)
->
[
n
,
n'
])
edges
nodes
=
nubOrd
$
concat
M
ap
(
\
((
n
,
n'
),
_
)
->
[
n
,
n'
])
edges
branches
=
toRelatedComponents
nodes
edges
branches
=
toRelatedComponents
nodes
edges
in
toPhyloQuality
nbFdt
lambda
freq
branches
in
toPhyloQuality
nbFdt
lambda
freq
branches
)
$
(
Set
.
toList
similarities
)
)
$
Set
.
toList
similarities
{-
{-
...
@@ -220,7 +222,7 @@ appendGroups f lvl m phylo =
...
@@ -220,7 +222,7 @@ appendGroups f lvl m phylo =
-- select the cooc of the periods
-- select the cooc of the periods
(
elems
$
restrictKeys
(
getCoocByDate
phylo
)
$
periodsToYears
[
pId
])
(
elems
$
restrictKeys
(
getCoocByDate
phylo
)
$
periodsToYears
[
pId
])
-- select and merge the roots count of the periods
-- select and merge the roots count of the periods
(
foldl
(
\
acc
count
->
unionWith
(
+
)
acc
count
)
empty
(
foldl
(
\
acc
count
->
unionWith
(
+
)
acc
count
)
empty
$
elems
$
restrictKeys
(
getRootsCountByDate
phylo
)
$
periodsToYears
[
pId
]))
$
elems
$
restrictKeys
(
getRootsCountByDate
phylo
)
$
periodsToYears
[
pId
]))
]
)
[]
phyloCUnit
)
]
)
[]
phyloCUnit
)
else
else
...
@@ -416,9 +418,9 @@ groupDocsByPeriod' f pds docs =
...
@@ -416,9 +418,9 @@ groupDocsByPeriod' f pds docs =
let
docs'
=
groupBy
(
\
d
d'
->
f
d
==
f
d'
)
$
sortOn
f
docs
let
docs'
=
groupBy
(
\
d
d'
->
f
d
==
f
d'
)
$
sortOn
f
docs
periods
=
parMap
rpar
(
inPeriode
f
docs'
)
pds
periods
=
parMap
rpar
(
inPeriode
f
docs'
)
pds
in
tracePhylo
(
"
\n
"
<>
"-- | Group "
in
tracePhylo
(
"
\n
"
<>
"-- | Group "
<>
show
(
length
docs
)
<>
show
(
length
docs
)
<>
" docs by "
<>
" docs by "
<>
show
(
length
pds
)
<>
" periods"
<>
"
\n
"
::
Text
)
<>
show
(
length
pds
)
<>
" periods"
<>
"
\n
"
::
Text
)
$
fromList
$
zip
pds
periods
$
fromList
$
zip
pds
periods
where
where
--------------------------------------
--------------------------------------
...
@@ -435,8 +437,8 @@ groupDocsByPeriod f pds es =
...
@@ -435,8 +437,8 @@ groupDocsByPeriod f pds es =
let
periods
=
parMap
rpar
(
inPeriode
f
es
)
pds
let
periods
=
parMap
rpar
(
inPeriode
f
es
)
pds
in
tracePhylo
(
"
\n
"
<>
"-- | Group "
in
tracePhylo
(
"
\n
"
<>
"-- | Group "
<>
show
(
length
es
)
<>
" docs by "
<>
show
(
length
es
)
<>
" docs by "
<>
show
(
length
pds
)
<>
" periods"
<>
"
\n
"
::
Text
)
<>
show
(
length
pds
)
<>
" periods"
<>
"
\n
"
::
Text
)
$
fromList
$
zip
pds
periods
$
fromList
$
zip
pds
periods
where
where
--------------------------------------
--------------------------------------
...
@@ -465,7 +467,7 @@ docsToTermCount docs roots = fromList
...
@@ -465,7 +467,7 @@ docsToTermCount docs roots = fromList
docsToTimeTermCount
::
[
Document
]
->
Vector
Ngrams
->
(
Map
Date
(
Map
Int
Double
))
docsToTimeTermCount
::
[
Document
]
->
Vector
Ngrams
->
(
Map
Date
(
Map
Int
Double
))
docsToTimeTermCount
docs
roots
=
docsToTimeTermCount
docs
roots
=
let
docs'
=
Map
.
map
(
\
l
->
fromList
$
map
(
\
lst
->
(
head'
"docsToTimeTermCount"
lst
,
fromIntegral
$
length
lst
))
let
docs'
=
Map
.
map
(
\
l
->
fromList
$
map
(
\
lst
->
(
head'
"docsToTimeTermCount"
lst
,
fromIntegral
$
length
lst
))
$
group
$
D
.
sort
l
)
$
group
$
D
.
sort
l
)
$
fromListWith
(
++
)
$
fromListWith
(
++
)
...
@@ -492,9 +494,9 @@ docsToTimeScaleNb docs =
...
@@ -492,9 +494,9 @@ docsToTimeScaleNb docs =
let
docs'
=
fromListWith
(
+
)
$
map
(
\
d
->
(
date
d
,
1
))
docs
let
docs'
=
fromListWith
(
+
)
$
map
(
\
d
->
(
date
d
,
1
))
docs
time
=
fromList
$
map
(
\
t
->
(
t
,
0
))
$
toTimeScale
(
keys
docs'
)
1
time
=
fromList
$
map
(
\
t
->
(
t
,
0
))
$
toTimeScale
(
keys
docs'
)
1
in
tracePhylo
(
"
\n
"
<>
"-- | Group "
in
tracePhylo
(
"
\n
"
<>
"-- | Group "
<>
show
(
length
docs
)
<>
show
(
length
docs
)
<>
" docs by "
<>
" docs by "
<>
show
(
length
time
)
<>
show
(
length
time
)
<>
" unit of time"
<>
"
\n
"
::
Text
)
<>
" unit of time"
<>
"
\n
"
::
Text
)
$
unionWith
(
+
)
time
docs'
$
unionWith
(
+
)
time
docs'
...
@@ -506,7 +508,7 @@ initPhyloScales lvlMax pId =
...
@@ -506,7 +508,7 @@ initPhyloScales lvlMax pId =
setDefault
::
PhyloConfig
->
TimeUnit
->
Int
->
PhyloConfig
setDefault
::
PhyloConfig
->
TimeUnit
->
Int
->
PhyloConfig
setDefault
conf
timeScale
nbDocs
=
defaultConfig
setDefault
conf
timeScale
nbDocs
=
defaultConfig
{
corpusPath
=
(
corpusPath
conf
)
{
corpusPath
=
(
corpusPath
conf
)
,
listPath
=
(
listPath
conf
)
,
listPath
=
(
listPath
conf
)
,
outputPath
=
(
outputPath
conf
)
,
outputPath
=
(
outputPath
conf
)
...
@@ -515,11 +517,11 @@ setDefault conf timeScale nbDocs = defaultConfig
...
@@ -515,11 +517,11 @@ setDefault conf timeScale nbDocs = defaultConfig
,
phyloName
=
(
phyloName
conf
)
,
phyloName
=
(
phyloName
conf
)
,
defaultMode
=
True
,
defaultMode
=
True
,
timeUnit
=
timeScale
,
timeUnit
=
timeScale
,
clique
=
Fis
(
toSupport
nbDocs
)
3
}
,
clique
=
Fis
(
toSupport
nbDocs
)
3
}
where
where
--------------------------------------
--------------------------------------
toSupport
::
Int
->
Support
toSupport
::
Int
->
Support
toSupport
n
toSupport
n
|
n
<
500
=
1
|
n
<
500
=
1
|
n
<
1000
=
2
|
n
<
1000
=
2
|
n
<
2000
=
3
|
n
<
2000
=
3
...
@@ -548,9 +550,9 @@ initPhylo docs conf =
...
@@ -548,9 +550,9 @@ initPhylo docs conf =
else
defaultPhyloParam
{
_phyloParam_config
=
conf
}
else
defaultPhyloParam
{
_phyloParam_config
=
conf
}
periods
=
toPeriods
(
D
.
sort
$
D
.
nub
$
map
date
docs
)
(
getTimePeriod
timeScale
)
(
getTimeStep
timeScale
)
periods
=
toPeriods
(
D
.
sort
$
D
.
nub
$
map
date
docs
)
(
getTimePeriod
timeScale
)
(
getTimeStep
timeScale
)
in
tracePhylo
(
"
\n
"
<>
"-- | Init a phylo out of "
in
tracePhylo
(
"
\n
"
<>
"-- | Init a phylo out of "
<>
show
(
length
docs
)
<>
" docs
\n
"
::
Text
)
<>
show
(
length
docs
)
<>
" docs
\n
"
::
Text
)
$
tracePhylo
(
"
\n
"
<>
"-- | lambda "
$
tracePhylo
(
"
\n
"
<>
"-- | lambda "
<>
show
(
_qua_granularity
$
phyloQuality
$
_phyloParam_config
params
)
::
Text
)
<>
show
(
_qua_granularity
$
phyloQuality
$
_phyloParam_config
params
)
::
Text
)
$
Phylo
foundations
$
Phylo
foundations
docsSources
docsSources
docsCounts
docsCounts
...
...
src/Gargantext/Core/Viz/Phylo/PhyloTools.hs
View file @
b0913118
...
@@ -11,11 +11,10 @@ Portability : POSIX
...
@@ -11,11 +11,10 @@ Portability : POSIX
{-# OPTIONS_GHC -fno-warn-deprecations #-}
{-# OPTIONS_GHC -fno-warn-deprecations #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE ViewPatterns #-}
module
Gargantext.Core.Viz.Phylo.PhyloTools
where
module
Gargantext.Core.Viz.Phylo.PhyloTools
where
import
Control.Lens
hiding
(
Level
)
import
Control.Lens
(
over
,
filtered
,
view
,
(
%~
)
)
import
Data.List
(
union
,
nub
,
init
,
tail
,
partition
,
nubBy
,
(
!!
))
import
Data.List
(
union
,
nub
,
init
,
tail
,
partition
,
nubBy
,
(
!!
))
import
Data.List
qualified
as
List
import
Data.List
qualified
as
List
import
Data.Map
(
elems
,
empty
,
fromList
,
findWithDefault
,
unionWith
,
keys
,
member
,
(
!
),
filterWithKey
,
fromListWith
,
restrictKeys
)
import
Data.Map
(
elems
,
empty
,
fromList
,
findWithDefault
,
unionWith
,
keys
,
member
,
(
!
),
filterWithKey
,
fromListWith
,
restrictKeys
)
...
@@ -30,7 +29,7 @@ import Data.Vector (Vector, elemIndex)
...
@@ -30,7 +29,7 @@ import Data.Vector (Vector, elemIndex)
import
Data.Vector
qualified
as
Vector
import
Data.Vector
qualified
as
Vector
import
Gargantext.Core.Viz.Phylo
import
Gargantext.Core.Viz.Phylo
import
Gargantext.Prelude
hiding
(
empty
)
import
Gargantext.Prelude
hiding
(
empty
)
import
Text.Printf
import
Text.Printf
(
PrintfArg
,
printf
)
------------
------------
-- | Io | --
-- | Io | --
...
@@ -68,7 +67,7 @@ truncate' x n = (fromIntegral $ (floor (x * t) :: Int)) / t
...
@@ -68,7 +67,7 @@ truncate' x n = (fromIntegral $ (floor (x * t) :: Int)) / t
getInMap
::
Int
->
Map
Int
Double
->
Double
getInMap
::
Int
->
Map
Int
Double
->
Double
getInMap
k
m
=
getInMap
k
m
=
if
(
member
k
m
)
if
member
k
m
then
m
!
k
then
m
!
k
else
0
else
0
...
@@ -85,18 +84,16 @@ dropByIdx k l = take k l ++ drop (k+1) l
...
@@ -85,18 +84,16 @@ dropByIdx k l = take k l ++ drop (k+1) l
elemIndex'
::
Eq
a
=>
a
->
[
a
]
->
Int
elemIndex'
::
Eq
a
=>
a
->
[
a
]
->
Int
elemIndex'
e
l
=
case
(
List
.
elemIndex
e
l
)
of
elemIndex'
e
l
=
case
List
.
elemIndex
e
l
of
Nothing
->
panic
(
"[ERR][Viz.Phylo.PhyloTools] element not in list"
)
Nothing
->
panic
"[ERR][Viz.Phylo.PhyloTools] element not in list"
Just
i
->
i
Just
i
->
i
commonPrefix
::
Eq
a
=>
[
a
]
->
[
a
]
->
[
a
]
->
[
a
]
commonPrefix
::
Eq
a
=>
[
a
]
->
[
a
]
->
[
a
]
->
[
a
]
commonPrefix
lst
lst'
acc
=
commonPrefix
lst
lst'
acc
if
(
null
lst
||
null
lst'
)
|
null
lst
||
null
lst'
=
acc
then
acc
|
head'
"commonPrefix"
lst
==
head'
"commonPrefix"
lst'
=
commonPrefix
(
tail
lst
)
(
tail
lst'
)
(
acc
++
[
head'
"commonPrefix"
lst
])
else
if
(
head'
"commonPrefix"
lst
==
head'
"commonPrefix"
lst'
)
|
otherwise
=
acc
then
commonPrefix
(
tail
lst
)
(
tail
lst'
)
(
acc
++
[
head'
"commonPrefix"
lst
])
else
acc
---------------------
---------------------
...
@@ -118,13 +115,13 @@ sourcesToIdx ss ps = nub $ map (\s -> fromJust $ elemIndex s ps) ss
...
@@ -118,13 +115,13 @@ sourcesToIdx ss ps = nub $ map (\s -> fromJust $ elemIndex s ps) ss
-- | To transform a list of Ngrams Indexes into a Label
-- | To transform a list of Ngrams Indexes into a Label
ngramsToLabel
::
Vector
Ngrams
->
[
Int
]
->
Text
ngramsToLabel
::
Vector
Ngrams
->
[
Int
]
->
Text
ngramsToLabel
ngrams
l
=
Text
.
unwords
$
tail'
"ngramsToLabel"
$
concat
$
map
(
\
n
->
[
"|"
,
n
])
$
ngramsToText
ngrams
l
ngramsToLabel
ngrams
l
=
Text
.
unwords
$
tail'
"ngramsToLabel"
$
concat
Map
(
\
n
->
[
"|"
,
n
])
(
ngramsToText
ngrams
l
)
idxToLabel
::
[
Int
]
->
String
idxToLabel
::
[
Int
]
->
String
idxToLabel
l
=
List
.
unwords
$
tail'
"idxToLabel"
$
concat
$
m
ap
(
\
n
->
[
"|"
,
show
n
])
l
idxToLabel
l
=
List
.
unwords
$
tail'
"idxToLabel"
$
concat
M
ap
(
\
n
->
[
"|"
,
show
n
])
l
idxToLabel'
::
[
Double
]
->
String
idxToLabel'
::
[
Double
]
->
String
idxToLabel'
l
=
List
.
unwords
$
tail'
"idxToLabel"
$
concat
$
m
ap
(
\
n
->
[
"|"
,
show
n
])
l
idxToLabel'
l
=
List
.
unwords
$
tail'
"idxToLabel"
$
concat
M
ap
(
\
n
->
[
"|"
,
show
n
])
l
-- | To transform a list of Ngrams Indexes into a list of Text
-- | To transform a list of Ngrams Indexes into a list of Text
ngramsToText
::
Vector
Ngrams
->
[
Int
]
->
[
Text
]
ngramsToText
::
Vector
Ngrams
->
[
Int
]
->
[
Text
]
...
@@ -137,8 +134,7 @@ ngramsToText ngrams l = map (\idx -> ngrams Vector.! idx) l
...
@@ -137,8 +134,7 @@ ngramsToText ngrams l = map (\idx -> ngrams Vector.! idx) l
-- | To transform a list of periods into a set of Dates
-- | To transform a list of periods into a set of Dates
periodsToYears
::
[(
Date
,
Date
)]
->
Set
Date
periodsToYears
::
[(
Date
,
Date
)]
->
Set
Date
periodsToYears
periods
=
(
Set
.
fromList
.
sort
.
concat
)
periodsToYears
periods
=
(
Set
.
fromList
.
sort
)
(
concatMap
(
\
(
d
,
d'
)
->
[
d
..
d'
])
periods
)
$
map
(
\
(
d
,
d'
)
->
[
d
..
d'
])
periods
findBounds
::
[
Date
]
->
(
Date
,
Date
)
findBounds
::
[
Date
]
->
(
Date
,
Date
)
...
@@ -159,7 +155,7 @@ toFstDate ds = snd
...
@@ -159,7 +155,7 @@ toFstDate ds = snd
$
head'
"firstDate"
$
head'
"firstDate"
$
sortOn
fst
$
sortOn
fst
$
map
(
\
d
->
$
map
(
\
d
->
let
d'
=
fromMaybe
(
error
"toFstDate"
)
$
readMaybe
(
filter
(
\
c
->
notElem
c
[
'U'
,
'T'
,
'C'
,
' '
,
':'
,
'-'
])
$
unpack
d
)
::
Int
let
d'
=
fromMaybe
(
error
"toFstDate"
)
$
readMaybe
(
filter
(
\
c
->
c
`
notElem
`
[
'U'
,
'T'
,
'C'
,
' '
,
':'
,
'-'
])
$
unpack
d
)
::
Int
in
(
d'
,
d
))
ds
in
(
d'
,
d
))
ds
toLstDate
::
[
Text
]
->
Text
toLstDate
::
[
Text
]
->
Text
...
@@ -168,12 +164,12 @@ toLstDate ds = snd
...
@@ -168,12 +164,12 @@ toLstDate ds = snd
$
reverse
$
reverse
$
sortOn
fst
$
sortOn
fst
$
map
(
\
d
->
$
map
(
\
d
->
let
d'
=
fromMaybe
(
error
"toLstDate"
)
$
readMaybe
(
filter
(
\
c
->
notElem
c
[
'U'
,
'T'
,
'C'
,
' '
,
':'
,
'-'
])
$
unpack
d
)
::
Int
let
d'
=
fromMaybe
(
error
"toLstDate"
)
$
readMaybe
(
filter
(
\
c
->
c
`
notElem
`
[
'U'
,
'T'
,
'C'
,
' '
,
':'
,
'-'
])
$
unpack
d
)
::
Int
in
(
d'
,
d
))
ds
in
(
d'
,
d
))
ds
getTimeScale
::
Phylo
->
[
Char
]
getTimeScale
::
Phylo
->
[
Char
]
getTimeScale
p
=
case
(
timeUnit
$
getConfig
p
)
of
getTimeScale
p
=
case
timeUnit
$
getConfig
p
of
Epoch
{}
->
"epoch"
Epoch
{}
->
"epoch"
Year
{}
->
"year"
Year
{}
->
"year"
Month
{}
->
"month"
Month
{}
->
"month"
...
@@ -228,21 +224,21 @@ isNested l l'
...
@@ -228,21 +224,21 @@ isNested l l'
-- | To filter Fis with small Support but by keeping non empty Periods
-- | To filter Fis with small Support but by keeping non empty Periods
keepFilled
::
(
Int
->
[
a
]
->
[
a
])
->
Int
->
[
a
]
->
[
a
]
keepFilled
::
(
Int
->
[
a
]
->
[
a
])
->
Int
->
[
a
]
->
[
a
]
keepFilled
f
thr
l
=
if
(
null
$
f
thr
l
)
&&
(
not
$
null
l
)
keepFilled
f
thr
l
=
if
null
(
f
thr
l
)
&&
not
(
null
l
)
then
keepFilled
f
(
thr
-
1
)
l
then
keepFilled
f
(
thr
-
1
)
l
else
f
thr
l
else
f
thr
l
-- | General workhorse to use in lieu of /trace/. It decides at compile
-- | General workhorse to use in lieu of /trace/. It decides at compile
-- time whether or not debug logs are enabled.
-- time whether or not debug logs are enabled.
tracePhylo
::
(
Print
s
,
IsString
s
)
=>
s
->
a
->
a
tracePhylo
::
(
Print
s
,
IsString
s
)
=>
s
->
a
->
a
#
if
NO_PHYLO_DEBUG_LOGS
tracePhylo
_
p
=
p
#
else
tracePhylo
msg
p
=
trace
msg
p
tracePhylo
msg
p
=
trace
msg
p
#
endif
traceClique
::
Map
(
Date
,
Date
)
[
Clustering
]
->
String
traceClique
::
Map
(
Date
,
Date
)
[
Clustering
]
->
String
traceClique
mFis
=
foldl
(
\
msg
cpt
->
msg
<>
show
(
countSup
cpt
cliques
)
<>
" (>"
<>
show
(
cpt
)
<>
") "
)
""
[
1
..
6
]
traceClique
mFis
=
foldl
(
\
msg
cpt
->
msg
<>
show
(
countSup
cpt
cliques
)
<>
" (>"
<>
show
cpt
<>
") "
)
""
[
1
..
6
]
where
where
--------------------------------------
--------------------------------------
cliques
::
[
Double
]
cliques
::
[
Double
]
...
@@ -251,7 +247,7 @@ traceClique mFis = foldl (\msg cpt -> msg <> show (countSup cpt cliques) <> " (>
...
@@ -251,7 +247,7 @@ traceClique mFis = foldl (\msg cpt -> msg <> show (countSup cpt cliques) <> " (>
traceSupport
::
Map
(
Date
,
Date
)
[
Clustering
]
->
String
traceSupport
::
Map
(
Date
,
Date
)
[
Clustering
]
->
String
traceSupport
mFis
=
foldl
(
\
msg
cpt
->
msg
<>
show
(
countSup
cpt
supports
)
<>
" (>"
<>
show
(
cpt
)
<>
") "
)
""
[
1
..
6
]
traceSupport
mFis
=
foldl
(
\
msg
cpt
->
msg
<>
show
(
countSup
cpt
supports
)
<>
" (>"
<>
show
cpt
<>
") "
)
""
[
1
..
6
]
where
where
--------------------------------------
--------------------------------------
supports
::
[
Double
]
supports
::
[
Double
]
...
@@ -321,7 +317,7 @@ ngramsToCooc :: [Int] -> [Cooc] -> Cooc
...
@@ -321,7 +317,7 @@ ngramsToCooc :: [Int] -> [Cooc] -> Cooc
ngramsToCooc
ngrams
coocs
=
ngramsToCooc
ngrams
coocs
=
let
cooc
=
foldl
(
\
acc
cooc'
->
sumCooc
acc
cooc'
)
empty
coocs
let
cooc
=
foldl
(
\
acc
cooc'
->
sumCooc
acc
cooc'
)
empty
coocs
pairs
=
listToKeys
ngrams
pairs
=
listToKeys
ngrams
in
filterWithKey
(
\
k
_
->
elem
k
pairs
)
cooc
in
filterWithKey
(
\
k
_
->
k
`
elem
`
pairs
)
cooc
-----------------
-----------------
...
@@ -333,14 +329,14 @@ ngramsToCooc ngrams coocs =
...
@@ -333,14 +329,14 @@ ngramsToCooc ngrams coocs =
-- density is defined in Callon M, Courtial JP, Laville F (1991) Co-word analysis as a tool for describing
-- density is defined in Callon M, Courtial JP, Laville F (1991) Co-word analysis as a tool for describing
-- the network of interaction between basic and technological research: The case of polymer chemistry.
-- the network of interaction between basic and technological research: The case of polymer chemistry.
-- Scientometric 22: 155–205.
-- Scientometric 22: 155–205.
ngramsToDensity
::
[
Int
]
->
[
Cooc
]
->
(
Map
Int
Double
)
->
Double
ngramsToDensity
::
[
Int
]
->
[
Cooc
]
->
Map
Int
Double
->
Double
ngramsToDensity
ngrams
coocs
rootsCount
=
ngramsToDensity
ngrams
coocs
rootsCount
=
let
cooc
=
foldl
(
\
acc
cooc'
->
sumCooc
acc
cooc'
)
empty
coocs
let
cooc
=
foldl
(
\
acc
cooc'
->
sumCooc
acc
cooc'
)
empty
coocs
pairs
=
listToCombi'
ngrams
pairs
=
listToCombi'
ngrams
density
=
map
(
\
(
i
,
j
)
->
density
=
map
(
\
(
i
,
j
)
->
let
nij
=
findWithDefault
0
(
i
,
j
)
cooc
let
nij
=
findWithDefault
0
(
i
,
j
)
cooc
in
(
nij
*
nij
)
/
((
rootsCount
!
i
)
*
(
rootsCount
!
j
)))
pairs
in
(
nij
*
nij
)
/
((
rootsCount
!
i
)
*
(
rootsCount
!
j
)))
pairs
in
(
sum
density
)
/
(
fromIntegral
$
length
ngrams
)
in
sum
density
/
fromIntegral
(
length
ngrams
)
...
@@ -351,14 +347,15 @@ ngramsToDensity ngrams coocs rootsCount =
...
@@ -351,14 +347,15 @@ ngramsToDensity ngrams coocs rootsCount =
-- | find the local maxima in a list of values
-- | find the local maxima in a list of values
findMaxima
::
[(
Double
,
Double
)]
->
[
Bool
]
findMaxima
::
[(
Double
,
Double
)]
->
[
Bool
]
findMaxima
lst
=
map
(
hasMax
)
$
toChunk
3
lst
findMaxima
lst
=
map
hasMax
$
toChunk
3
lst
where
where
------
------
hasMax
::
[(
Double
,
Double
)]
->
Bool
hasMax
::
[(
Double
,
Double
)]
->
Bool
hasMax
chunk
=
hasMax
chunk
=
if
(
length
chunk
)
/=
3
(
length
chunk
==
3
)
&&
then
False
(
let
snds
=
snd
<$>
chunk
else
(
snd
(
chunk
!!
0
)
<
snd
(
chunk
!!
1
))
&&
(
snd
(
chunk
!!
2
)
<
snd
(
chunk
!!
1
))
in
(
snds
!!
0
<
snds
!!
1
)
&&
(
snds
!!
2
<
snds
!!
1
))
-- | split a list into chunks of size n
-- | split a list into chunks of size n
...
@@ -539,7 +536,7 @@ getCoocByDate :: Phylo -> Map Date Cooc
...
@@ -539,7 +536,7 @@ getCoocByDate :: Phylo -> Map Date Cooc
getCoocByDate
phylo
=
coocByDate
(
phylo
^.
phylo_counts
)
getCoocByDate
phylo
=
coocByDate
(
phylo
^.
phylo_counts
)
getRootsCountByDate
::
Phylo
->
Map
Date
(
Map
Int
Double
)
getRootsCountByDate
::
Phylo
->
Map
Date
(
Map
Int
Double
)
getRootsCountByDate
phylo
=
rootsCountByDate
(
phylo
^.
phylo_counts
)
getRootsCountByDate
phylo
=
rootsCountByDate
(
phylo
^.
phylo_counts
)
getDocsByDate
::
Phylo
->
Map
Date
Double
getDocsByDate
::
Phylo
->
Map
Date
Double
getDocsByDate
phylo
=
docsByDate
(
phylo
^.
phylo_counts
)
getDocsByDate
phylo
=
docsByDate
(
phylo
^.
phylo_counts
)
...
@@ -555,19 +552,19 @@ getLastRootsFreq phylo = lastRootsFreq (phylo ^. phylo_counts)
...
@@ -555,19 +552,19 @@ getLastRootsFreq phylo = lastRootsFreq (phylo ^. phylo_counts)
setConfig
::
PhyloConfig
->
Phylo
->
Phylo
setConfig
::
PhyloConfig
->
Phylo
->
Phylo
setConfig
config
phylo
=
phylo
setConfig
config
phylo
=
phylo
&
phylo_param
.~
(
PhyloParam
&
phylo_param
.~
PhyloParam
{
((
phylo
^.
phylo_param
)
^
.
phyloParam_version
)
_phyloParam_version
=
phylo
^.
(
phylo_param
.
phyloParam_version
)
((
phylo
^.
phylo_param
)
^
.
phyloParam_software
)
,
_phyloParam_software
=
phylo
^.
(
phylo_param
.
phyloParam_software
)
config
)
,
_phyloParam_config
=
config
}
-- & phylo_param & phyloParam_config & phyloParam_config .~ config
-- & phylo_param & phyloParam_config & phyloParam_config .~ config
getRoots
::
Phylo
->
Vector
Ngrams
getRoots
::
Phylo
->
Vector
Ngrams
getRoots
phylo
=
(
phylo
^.
phylo_foundations
)
^.
foundations_roots
getRoots
phylo
=
phylo
^.
(
phylo_foundations
.
foundations_roots
)
getRootsInGroups
::
Phylo
->
Map
Int
[
PhyloGroupId
]
getRootsInGroups
::
Phylo
->
Map
Int
[
PhyloGroupId
]
getRootsInGroups
phylo
=
(
phylo
^.
phylo_foundations
)
^.
foundations_rootsInGroups
getRootsInGroups
phylo
=
phylo
^.
(
phylo_foundations
.
foundations_rootsInGroups
)
getSources
::
Phylo
->
Vector
Text
getSources
::
Phylo
->
Vector
Text
getSources
phylo
=
_sources
(
phylo
^.
phylo_sources
)
getSources
phylo
=
_sources
(
phylo
^.
phylo_sources
)
...
@@ -594,7 +591,7 @@ getGroupsFromScalePeriods :: Scale -> [Period] -> Phylo -> [PhyloGroup]
...
@@ -594,7 +591,7 @@ getGroupsFromScalePeriods :: Scale -> [Period] -> Phylo -> [PhyloGroup]
getGroupsFromScalePeriods
lvl
periods
phylo
=
getGroupsFromScalePeriods
lvl
periods
phylo
=
elems
$
view
(
phylo_periods
elems
$
view
(
phylo_periods
.
traverse
.
traverse
.
filtered
(
\
phyloPrd
->
elem
(
phyloPrd
^.
phylo_periodPeriod
)
periods
)
.
filtered
(
\
phyloPrd
->
(
phyloPrd
^.
phylo_periodPeriod
)
`
elem
`
periods
)
.
phylo_periodScales
.
phylo_periodScales
.
traverse
.
traverse
.
filtered
(
\
phyloLvl
->
phyloLvl
^.
phylo_scaleScale
==
lvl
)
.
filtered
(
\
phyloLvl
->
phyloLvl
^.
phylo_scaleScale
==
lvl
)
...
@@ -704,7 +701,7 @@ relatedComponents graph = foldl' (\branches groups ->
...
@@ -704,7 +701,7 @@ relatedComponents graph = foldl' (\branches groups ->
toRelatedComponents
::
[
PhyloGroup
]
->
[((
PhyloGroup
,
PhyloGroup
),
Double
)]
->
[[
PhyloGroup
]]
toRelatedComponents
::
[
PhyloGroup
]
->
[((
PhyloGroup
,
PhyloGroup
),
Double
)]
->
[[
PhyloGroup
]]
toRelatedComponents
nodes
edges
=
toRelatedComponents
nodes
edges
=
let
ref
=
fromList
$
map
(
\
g
->
(
getGroupId
g
,
g
))
nodes
let
ref
=
fromList
$
map
(
\
g
->
(
getGroupId
g
,
g
))
nodes
clusters
=
relatedComponents
$
(
(
map
(
\
((
g
,
g'
),
_
)
->
[
getGroupId
g
,
getGroupId
g'
])
edges
)
++
(
map
(
\
g
->
[
getGroupId
g
])
nodes
))
clusters
=
relatedComponents
$
(
map
(
\
((
g
,
g'
),
_
)
->
[
getGroupId
g
,
getGroupId
g'
])
edges
++
(
map
(
\
g
->
[
getGroupId
g
])
nodes
))
in
map
(
\
cluster
->
map
(
\
gId
->
ref
!
gId
)
cluster
)
clusters
in
map
(
\
cluster
->
map
(
\
gId
->
ref
!
gId
)
cluster
)
clusters
...
@@ -746,15 +743,13 @@ getMinSharedNgrams proxi = case proxi of
...
@@ -746,15 +743,13 @@ getMinSharedNgrams proxi = case proxi of
----------------
----------------
intersectInit
::
Eq
a
=>
[
a
]
->
[
a
]
->
[
a
]
->
[
a
]
intersectInit
::
Eq
a
=>
[
a
]
->
[
a
]
->
[
a
]
->
[
a
]
intersectInit
acc
lst
lst'
=
intersectInit
acc
lst
lst'
if
(
null
lst
)
||
(
null
lst'
)
|
null
lst
||
null
lst'
=
acc
then
acc
|
head'
"intersectInit"
lst
==
head'
"intersectInit"
lst'
=
intersectInit
(
acc
++
[
head'
"intersectInit"
lst
])
(
tail
lst
)
(
tail
lst'
)
else
if
(
head'
"intersectInit"
lst
)
==
(
head'
"intersectInit"
lst'
)
|
otherwise
=
acc
then
intersectInit
(
acc
++
[
head'
"intersectInit"
lst
])
(
tail
lst
)
(
tail
lst'
)
else
acc
branchIdsToSimilarity
::
PhyloBranchId
->
PhyloBranchId
->
Double
->
Double
->
Double
branchIdsToSimilarity
::
PhyloBranchId
->
PhyloBranchId
->
Double
->
Double
->
Double
branchIdsToSimilarity
id
id'
thrInit
thrStep
=
thrInit
+
thrStep
*
(
fromIntegral
$
length
$
intersectInit
[]
(
snd
id
)
(
snd
id'
))
branchIdsToSimilarity
id
id'
thrInit
thrStep
=
thrInit
+
thrStep
*
fromIntegral
(
length
$
intersectInit
[]
(
snd
id
)
(
snd
id'
))
ngramsInBranches
::
[[
PhyloGroup
]]
->
[
Int
]
ngramsInBranches
::
[[
PhyloGroup
]]
->
[
Int
]
ngramsInBranches
branches
=
nub
$
foldl
(
\
acc
g
->
acc
++
(
g
^.
phylo_groupNgrams
))
[]
$
concat
branches
ngramsInBranches
branches
=
nub
$
foldl
(
\
acc
g
->
acc
++
(
g
^.
phylo_groupNgrams
))
[]
$
concat
branches
...
@@ -769,7 +764,7 @@ traceMatchSuccess thr qua qua' nextBranches =
...
@@ -769,7 +764,7 @@ traceMatchSuccess thr qua qua' nextBranches =
<>
" | "
<>
show
((
length
.
concat
.
concat
)
nextBranches
)
<>
" groups"
<>
"
\n
"
<>
" | "
<>
show
((
length
.
concat
.
concat
)
nextBranches
)
<>
" groups"
<>
"
\n
"
<>
" - splited with success in "
<>
show
(
map
length
nextBranches
)
<>
" sub-branches"
<>
"
\n
"
<>
" - splited with success in "
<>
show
(
map
length
nextBranches
)
<>
" sub-branches"
<>
"
\n
"
<>
" - for the local threshold "
<>
show
(
thr
)
<>
" - for the local threshold "
<>
show
(
thr
)
<>
" ( quality : "
<>
show
(
qua
)
<>
" < "
<>
show
(
qua'
)
<>
")
\n
"
::
Text
<>
" ( quality : "
<>
show
(
qua
)
<>
" < "
<>
show
(
qua'
)
<>
")
\n
"
::
Text
)
nextBranches
)
nextBranches
...
@@ -780,7 +775,7 @@ traceMatchFailure thr qua qua' branches =
...
@@ -780,7 +775,7 @@ traceMatchFailure thr qua qua' branches =
<>
",(1.."
<>
show
(
length
branches
)
<>
")]"
<>
",(1.."
<>
show
(
length
branches
)
<>
")]"
<>
" | "
<>
show
(
length
$
concat
branches
)
<>
" groups"
<>
"
\n
"
<>
" | "
<>
show
(
length
$
concat
branches
)
<>
" groups"
<>
"
\n
"
<>
" - split with failure for the local threshold "
<>
show
(
thr
)
<>
" - split with failure for the local threshold "
<>
show
(
thr
)
<>
" ( quality : "
<>
show
(
qua
)
<>
" > "
<>
show
(
qua'
)
<>
")
\n
"
::
Text
<>
" ( quality : "
<>
show
(
qua
)
<>
" > "
<>
show
(
qua'
)
<>
")
\n
"
::
Text
)
branches
)
branches
...
@@ -812,9 +807,9 @@ traceMatchEnd groups =
...
@@ -812,9 +807,9 @@ traceMatchEnd groups =
traceTemporalMatching
::
[
PhyloGroup
]
->
[
PhyloGroup
]
traceTemporalMatching
::
[
PhyloGroup
]
->
[
PhyloGroup
]
traceTemporalMatching
groups
=
traceTemporalMatching
groups
=
tracePhylo
(
"
\n
"
<>
"-- | Start temporal matching for "
<>
show
(
length
groups
)
<>
" groups"
<>
"
\n
"
::
Text
)
groups
tracePhylo
(
"
\n
"
<>
"-- | Start temporal matching for "
<>
show
(
length
groups
)
<>
" groups"
<>
"
\n
"
::
Text
)
groups
traceGroupsProxi
::
[
Double
]
->
[
Double
]
traceGroupsProxi
::
[
Double
]
->
[
Double
]
traceGroupsProxi
l
=
traceGroupsProxi
l
=
tracePhylo
(
"
\n
"
<>
"-- | "
<>
show
(
List
.
length
l
)
<>
" computed pairs of groups Similarity"
<>
"
\n
"
::
Text
)
l
tracePhylo
(
"
\n
"
<>
"-- | "
<>
show
(
List
.
length
l
)
<>
" computed pairs of groups Similarity"
<>
"
\n
"
::
Text
)
l
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment