Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
160
Issues
160
List
Board
Labels
Milestones
Merge Requests
14
Merge Requests
14
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
8e353331
Commit
8e353331
authored
Jun 20, 2023
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Plain Diff
[MERGE] Phylo
parents
334d2b2d
e93416f8
Changes
9
Show whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
132 additions
and
52 deletions
+132
-52
Main.hs
bin/gargantext-phylo/Main.hs
+28
-6
gargantext.cabal
gargantext.cabal
+1
-1
Phylo.hs
src/Gargantext/Core/Viz/Phylo.hs
+5
-1
Tools.hs
src/Gargantext/Core/Viz/Phylo/API/Tools.hs
+5
-3
Example.hs
src/Gargantext/Core/Viz/Phylo/Example.hs
+1
-0
PhyloExport.hs
src/Gargantext/Core/Viz/Phylo/PhyloExport.hs
+24
-24
PhyloMaker.hs
src/Gargantext/Core/Viz/Phylo/PhyloMaker.hs
+32
-13
PhyloTools.hs
src/Gargantext/Core/Viz/Phylo/PhyloTools.hs
+29
-2
SynchronicClustering.hs
src/Gargantext/Core/Viz/Phylo/SynchronicClustering.hs
+7
-2
No files found.
bin/gargantext-phylo/Main.hs
View file @
8e353331
...
...
@@ -21,7 +21,7 @@ import Control.Concurrent.Async (mapConcurrently)
import
Crypto.Hash.SHA256
(
hash
)
import
Data.Aeson
import
Data.Either
(
Either
(
..
),
fromRight
)
import
Data.List
(
concat
,
nub
,
isSuffixOf
)
import
Data.List
(
concat
,
nub
,
isSuffixOf
,
sort
,
tail
)
import
Data.List.Split
import
Data.Maybe
(
fromMaybe
)
import
Data.String
(
String
)
...
...
@@ -39,7 +39,7 @@ import Gargantext.Core.Viz.Phylo
import
Gargantext.Core.Viz.Phylo.API.Tools
import
Gargantext.Core.Viz.Phylo.PhyloExport
(
toPhyloExport
,
dotToFile
)
import
Gargantext.Core.Viz.Phylo.PhyloMaker
(
toPhylo
,
toPhyloWithoutLink
)
import
Gargantext.Core.Viz.Phylo.PhyloTools
(
printIOMsg
,
printIOComment
,
setConfig
)
import
Gargantext.Core.Viz.Phylo.PhyloTools
(
printIOMsg
,
printIOComment
,
setConfig
,
toPeriods
,
getTimePeriod
,
getTimeStep
)
import
Gargantext.Database.Admin.Types.Hyperdata
(
HyperdataDocument
(
..
))
import
Gargantext.Database.Schema.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Prelude
...
...
@@ -89,7 +89,7 @@ wosToDocs limit patterns time path = do
(
fromIntegral
$
fromJust
$
_hd_publication_year
d
)
(
fromJust
$
_hd_publication_month
d
)
(
fromJust
$
_hd_publication_day
d
)
time
)
(
termsInText
patterns
$
title
<>
" "
<>
abstr
)
Nothing
[]
)
(
termsInText
patterns
$
title
<>
" "
<>
abstr
)
Nothing
[]
time
)
<$>
concat
<$>
mapConcurrently
(
\
file
->
filter
(
\
d
->
(
isJust
$
_hd_publication_year
d
)
...
...
@@ -109,6 +109,7 @@ csvToDocs parser patterns time path =
(
termsInText
patterns
$
(
csv_title
row
)
<>
" "
<>
(
csv_abstract
row
))
Nothing
[]
time
)
<$>
snd
<$>
either
(
\
err
->
panic
$
cs
$
"CSV error"
<>
(
show
err
))
identity
<$>
Csv
.
readCSVFile
path
Csv'
limit
->
Vector
.
toList
<$>
Vector
.
take
limit
...
...
@@ -117,18 +118,35 @@ csvToDocs parser patterns time path =
(
termsInText
patterns
$
(
csv'_title
row
)
<>
" "
<>
(
csv'_abstract
row
))
(
Just
$
csv'_weight
row
)
(
map
(
T
.
strip
.
pack
)
$
splitOn
";"
(
unpack
$
(
csv'_source
row
)))
time
)
<$>
snd
<$>
Csv
.
readWeightedCsv
path
-- To parse a file into a list of Document
fileToDocs
'
::
CorpusParser
->
FilePath
->
TimeUnit
->
TermList
->
IO
[
Document
]
fileToDocs
'
parser
path
time
lst
=
do
fileToDocs
Advanced
::
CorpusParser
->
FilePath
->
TimeUnit
->
TermList
->
IO
[
Document
]
fileToDocs
Advanced
parser
path
time
lst
=
do
let
patterns
=
buildPatterns
lst
case
parser
of
Wos
limit
->
wosToDocs
limit
patterns
time
path
Csv
_
->
csvToDocs
parser
patterns
time
path
Csv'
_
->
csvToDocs
parser
patterns
time
path
fileToDocsDefault
::
CorpusParser
->
FilePath
->
[
TimeUnit
]
->
TermList
->
IO
[
Document
]
fileToDocsDefault
parser
path
timeUnits
lst
=
if
length
timeUnits
>
0
then
do
let
timeUnit
=
(
head'
"fileToDocsDefault"
timeUnits
)
docs
<-
fileToDocsAdvanced
parser
path
timeUnit
lst
let
periods
=
toPeriods
(
sort
$
nub
$
map
date
docs
)
(
getTimePeriod
timeUnit
)
(
getTimeStep
timeUnit
)
if
(
length
periods
<
3
)
then
fileToDocsDefault
parser
path
(
tail
timeUnits
)
lst
else
pure
docs
else
panic
"this corpus is incompatible with the phylomemy reconstruction"
-- on passe à passer la time unit dans la conf envoyé au phyloMaker
-- dans le phyloMaker si default est true alors dans le setDefault ou pense à utiliser la TimeUnit de la conf
---------------
-- | Label | --
...
...
@@ -251,7 +269,11 @@ main = do
printIOMsg
"Parse the corpus"
mapList
<-
fileToList
(
listParser
config
)
(
listPath
config
)
corpus
<-
fileToDocs'
(
corpusParser
config
)
(
corpusPath
config
)
(
timeUnit
config
)
mapList
corpus
<-
if
(
defaultMode
config
)
then
fileToDocsDefault
(
corpusParser
config
)
(
corpusPath
config
)
[
Year
3
1
5
,
Month
3
1
5
,
Week
4
2
5
]
mapList
else
fileToDocsAdvanced
(
corpusParser
config
)
(
corpusPath
config
)
(
timeUnit
config
)
mapList
printIOComment
(
show
(
length
corpus
)
<>
" parsed docs from the corpus"
)
printIOComment
(
show
(
length
$
nub
$
concat
$
map
text
corpus
)
<>
" Size ngs_coterms"
)
...
...
gargantext.cabal
View file @
8e353331
src/Gargantext/Core/Viz/Phylo.hs
View file @
8e353331
...
...
@@ -135,7 +135,7 @@ data TimeUnit =
{
_day_period
::
Int
,
_day_step
::
Int
,
_day_matchingFrame
::
Int
}
deriving
(
Show
,
Generic
,
Eq
)
deriving
(
Show
,
Generic
,
Eq
,
NFData
)
instance
ToSchema
TimeUnit
where
declareNamedSchema
=
genericDeclareNamedSchema
(
unPrefixSwagger
""
)
...
...
@@ -355,6 +355,7 @@ data Document = Document
,
text
::
[
Ngrams
]
,
weight
::
Maybe
Double
,
sources
::
[
Text
]
,
docTime
::
TimeUnit
}
deriving
(
Eq
,
Show
,
Generic
,
NFData
)
...
...
@@ -372,6 +373,7 @@ data PhyloFoundations = PhyloFoundations
data
PhyloCounts
=
PhyloCounts
{
coocByDate
::
!
(
Map
Date
Cooc
)
,
docsByDate
::
!
(
Map
Date
Double
)
,
rootsCountByDate
::
!
(
Map
Date
(
Map
Int
Double
))
,
rootsCount
::
!
(
Map
Int
Double
)
,
rootsFreq
::
!
(
Map
Int
Double
)
,
lastRootsFreq
::
!
(
Map
Int
Double
)
...
...
@@ -487,8 +489,10 @@ data PhyloGroup =
,
_phylo_groupSources
::
[
Int
]
,
_phylo_groupNgrams
::
[
Int
]
,
_phylo_groupCooc
::
!
(
Cooc
)
,
_phylo_groupDensity
::
Double
,
_phylo_groupBranchId
::
PhyloBranchId
,
_phylo_groupMeta
::
Map
Text
[
Double
]
,
_phylo_groupRootsCount
::
Map
Int
Double
,
_phylo_groupScaleParents
::
[
Pointer
]
,
_phylo_groupScaleChilds
::
[
Pointer
]
,
_phylo_groupPeriodParents
::
[
Pointer
]
...
...
src/Gargantext/Core/Viz/Phylo/API/Tools.hs
View file @
8e353331
...
...
@@ -91,7 +91,9 @@ flowPhyloAPI config cId = do
corpus
<-
corpusIdtoDocuments
(
timeUnit
config
)
cId
let
phyloWithCliques
=
toPhyloWithoutLink
corpus
config
-- writePhylo phyloWithCliquesFile phyloWithCliques
pure
$
toPhylo
(
setConfig
config
phyloWithCliques
)
printDebug
"PhyloConfig old: "
config
pure
$
toPhylo
$
setConfig
config
phyloWithCliques
--------------------------------------------------------------------
corpusIdtoDocuments
::
TimeUnit
->
CorpusId
->
GargNoServer
[
Document
]
...
...
@@ -120,7 +122,7 @@ toPhyloDocs patterns time d =
(
fromIntegral
$
fromMaybe
1
$
_hd_publication_year
d
)
(
fromMaybe
1
$
_hd_publication_month
d
)
(
fromMaybe
1
$
_hd_publication_day
d
)
time
)
(
termsInText'
patterns
$
title
<>
" "
<>
abstr
)
Nothing
[]
(
termsInText'
patterns
$
title
<>
" "
<>
abstr
)
Nothing
[]
time
...
...
@@ -138,7 +140,7 @@ context2phyloDocument timeUnit context (ngs_terms, ngs_sources) = do
text'
=
maybe
[]
toText
$
Map
.
lookup
contextId
ngs_terms
sources'
=
maybe
[]
toText
$
Map
.
lookup
contextId
ngs_sources
pure
$
Document
date
date'
text'
Nothing
sources'
pure
$
Document
date
date'
text'
Nothing
sources'
(
Year
3
1
5
)
-- TODO better default date and log the errors to improve data quality
...
...
src/Gargantext/Core/Viz/Phylo/Example.hs
View file @
8e353331
...
...
@@ -111,6 +111,7 @@ docs = map (\(d,t)
(
filter
(
\
n
->
isRoots
n
(
foundations
^.
foundations_roots
))
$
monoTexts
t
)
Nothing
[]
(
Year
3
1
5
)
)
corpus
...
...
src/Gargantext/Core/Viz/Phylo/PhyloExport.hs
View file @
8e353331
...
...
@@ -143,10 +143,7 @@ periodToDotNode prd prd' =
groupToDotNode
::
Vector
Ngrams
->
PhyloGroup
->
Int
->
Dot
DotId
groupToDotNode
fdt
g
bId
=
node
(
groupIdToDotId
$
getGroupId
g
)
(
[
FontName
"Arial"
,
Shape
Square
,
penWidth
4
,
toLabel
(
groupToTable
fdt
g
)
]
([
FontName
"Arial"
,
Shape
Square
,
penWidth
4
,
toLabel
(
groupToTable
fdt
g
)]
<>
[
toAttr
"nodeType"
"group"
,
toAttr
"gid"
(
groupIdToDotId
$
getGroupId
g
)
,
toAttr
"from"
(
pack
$
show
(
fst
$
g
^.
phylo_groupPeriod
))
...
...
@@ -159,6 +156,8 @@ groupToDotNode fdt g bId =
,
toAttr
"weight"
(
pack
$
show
(
g
^.
phylo_groupWeight
))
,
toAttr
"source"
(
pack
$
show
(
nub
$
g
^.
phylo_groupSources
))
,
toAttr
"sourceFull"
(
pack
$
show
(
g
^.
phylo_groupSources
))
,
toAttr
"density"
(
pack
$
show
(
g
^.
phylo_groupDensity
))
,
toAttr
"cooc"
(
pack
$
show
(
g
^.
phylo_groupCooc
))
,
toAttr
"lbl"
(
pack
$
show
(
ngramsToLabel
fdt
(
g
^.
phylo_groupNgrams
)))
,
toAttr
"foundation"
(
pack
$
show
(
idxToLabel
(
g
^.
phylo_groupNgrams
)))
,
toAttr
"role"
(
pack
$
show
(
idxToLabel'
((
g
^.
phylo_groupMeta
)
!
"dynamics"
)))
...
...
@@ -166,7 +165,6 @@ groupToDotNode fdt g bId =
,
toAttr
"seaLvl"
(
pack
$
show
((
g
^.
phylo_groupMeta
)
!
"seaLevels"
))
])
toDotEdge'
::
DotId
->
DotId
->
[
Char
]
->
[
Char
]
->
EdgeType
->
Dot
DotId
toDotEdge'
source
target
thr
w
edgeType
=
edge
source
target
(
case
edgeType
of
...
...
@@ -447,8 +445,10 @@ branchDating export =
else
acc
)
[]
$
export
^.
export_groups
periods
=
nub
groups
birth
=
fst
$
head'
"birth"
groups
age
=
(
snd
$
last'
"age"
groups
)
-
birth
death
=
snd
$
last'
"death"
groups
age
=
death
-
birth
in
b
&
branch_meta
%~
insert
"birth"
[
fromIntegral
birth
]
&
branch_meta
%~
insert
"death"
[
fromIntegral
death
]
&
branch_meta
%~
insert
"age"
[
fromIntegral
age
]
&
branch_meta
%~
insert
"size"
[
fromIntegral
$
length
periods
]
)
export
...
...
src/Gargantext/Core/Viz/Phylo/PhyloMaker.hs
View file @
8e353331
...
...
@@ -34,6 +34,7 @@ import Gargantext.Core.Viz.Phylo.TemporalMatching (toPhyloQuality, temporalMatch
import
Gargantext.Prelude
import
qualified
Data.Set
as
Set
import
qualified
Data.Map
as
Map
import
qualified
Data.List
as
List
import
qualified
Data.Vector
as
Vector
...
...
@@ -191,7 +192,7 @@ findSeaLadder phylo = case getSeaElevation phylo of
in
acc
++
(
concat
pairs'
)
)
[]
$
keys
$
phylo
^.
phylo_periods
appendGroups
::
(
a
->
Period
->
(
Text
,
Text
)
->
Scale
->
Int
->
[
Cooc
]
->
PhyloGroup
)
->
Scale
->
Map
(
Date
,
Date
)
[
a
]
->
Phylo
->
Phylo
appendGroups
::
(
a
->
Period
->
(
Text
,
Text
)
->
Scale
->
Int
->
[
Cooc
]
->
Map
Int
Double
->
PhyloGroup
)
->
Scale
->
Map
(
Date
,
Date
)
[
a
]
->
Phylo
->
Phylo
appendGroups
f
lvl
m
phylo
=
trace
(
"
\n
"
<>
"-- | Append "
<>
show
(
length
$
concat
$
elems
m
)
<>
" groups to Level "
<>
show
(
lvl
)
<>
"
\n
"
)
$
over
(
phylo_periods
.
traverse
...
...
@@ -206,23 +207,28 @@ appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ co
&
phylo_scaleGroups
.~
(
fromList
$
foldl
(
\
groups
obj
->
groups
++
[
(((
pId
,
lvl
),
length
groups
)
,
f
obj
pId
pId'
lvl
(
length
groups
)
(
elems
$
restrictKeys
(
getCoocByDate
phylo
)
$
periodsToYears
[
pId
]))
-- select the cooc of the periods
(
elems
$
restrictKeys
(
getCoocByDate
phylo
)
$
periodsToYears
[
pId
])
-- select and merge the roots count of the periods
(
foldl
(
\
acc
count
->
unionWith
(
+
)
acc
count
)
empty
$
elems
$
restrictKeys
(
getRootsCountByDate
phylo
)
$
periodsToYears
[
pId
]))
]
)
[]
phyloCUnit
)
else
phyloLvl
)
phylo
clusterToGroup
::
Clustering
->
Period
->
(
Text
,
Text
)
->
Scale
->
Int
->
[
Cooc
]
->
PhyloGroup
clusterToGroup
fis
pId
pId'
lvl
idx
coocs
=
PhyloGroup
pId
pId'
lvl
idx
""
clusterToGroup
::
Clustering
->
Period
->
(
Text
,
Text
)
->
Scale
->
Int
->
[
Cooc
]
->
Map
Int
Double
->
PhyloGroup
clusterToGroup
fis
pId
pId'
lvl
idx
coocs
rootsCount
=
PhyloGroup
pId
pId'
lvl
idx
""
(
fis
^.
clustering_support
)
(
fis
^.
clustering_visWeighting
)
(
fis
^.
clustering_visFiltering
)
(
fis
^.
clustering_roots
)
(
ngramsToCooc
(
fis
^.
clustering_roots
)
coocs
)
(
ngramsToDensity
(
fis
^.
clustering_roots
)
coocs
rootsCount
)
(
1
,[
0
])
-- branchid (lvl,[path in the branching tree])
(
fromList
[(
"breaks"
,[
0
]),(
"seaLevels"
,[
0
])])
[]
[]
[]
[]
[]
[]
[]
rootsCount
[]
[]
[]
[]
[]
[]
[]
-----------------------
...
...
@@ -446,6 +452,16 @@ docsToTermCount docs roots = fromList
docsToTimeTermCount
::
[
Document
]
->
Vector
Ngrams
->
(
Map
Date
(
Map
Int
Double
))
docsToTimeTermCount
docs
roots
=
let
docs'
=
Map
.
map
(
\
l
->
fromList
$
map
(
\
lst
->
(
head'
"docsToTimeTermCount"
lst
,
fromIntegral
$
length
lst
))
$
group
$
sort
l
)
$
fromListWith
(
++
)
$
map
(
\
d
->
(
date
d
,
nub
$
ngramsToIdx
(
text
d
)
roots
))
docs
time
=
fromList
$
map
(
\
t
->
(
t
,
Map
.
empty
))
$
toTimeScale
(
keys
docs'
)
1
in
unionWith
(
Map
.
union
)
time
docs'
docsToLastTermFreq
::
Int
->
[
Document
]
->
Vector
Ngrams
->
Map
Int
Double
docsToLastTermFreq
n
docs
fdt
=
let
last
=
take
n
$
reverse
$
sort
$
map
date
docs
...
...
@@ -472,15 +488,15 @@ initPhyloScales lvlMax pId =
fromList
$
map
(
\
lvl
->
((
pId
,
lvl
),
PhyloScale
pId
(
""
,
""
)
lvl
empty
))
[
1
..
lvlMax
]
setDefault
::
PhyloConfig
->
PhyloConfig
setDefault
conf
=
conf
{
setDefault
::
PhyloConfig
->
TimeUnit
->
PhyloConfig
setDefault
conf
timeScale
=
conf
{
phyloScale
=
2
,
similarity
=
WeightedLogJaccard
0.5
2
,
findAncestors
=
True
,
phyloSynchrony
=
ByProximityThreshold
0.6
0
SiblingBranches
MergeAllGroups
,
phyloQuality
=
Quality
0.5
3
,
timeUnit
=
Year
3
1
3
,
clique
=
MaxClique
5
30
ByNeighbours
,
timeUnit
=
timeScale
,
clique
=
Fis
3
5
,
exportLabel
=
[
BranchLabel
MostEmergentTfIdf
2
,
GroupLabel
MostEmergentInclusive
2
],
exportSort
=
ByHierarchy
Desc
,
exportFilter
=
[
ByBranchSize
3
]
...
...
@@ -492,18 +508,21 @@ setDefault conf = conf {
initPhylo
::
[
Document
]
->
PhyloConfig
->
Phylo
initPhylo
docs
conf
=
let
roots
=
Vector
.
fromList
$
nub
$
concat
$
map
text
docs
timeScale
=
head'
"initPhylo"
$
map
docTime
docs
foundations
=
PhyloFoundations
roots
empty
docsSources
=
PhyloSources
(
Vector
.
fromList
$
nub
$
concat
$
map
sources
docs
)
docsCounts
=
PhyloCounts
(
docsToTimeScaleCooc
docs
(
foundations
^.
foundations_roots
))
(
docsToTimeScaleNb
docs
)
(
docsToTimeTermCount
docs
(
foundations
^.
foundations_roots
))
(
docsToTermCount
docs
(
foundations
^.
foundations_roots
))
(
docsToTermFreq
docs
(
foundations
^.
foundations_roots
))
(
docsToLastTermFreq
(
getTimePeriod
$
timeUnit
conf
)
docs
(
foundations
^.
foundations_roots
))
(
docsToLastTermFreq
(
getTimePeriod
timeScale
)
docs
(
foundations
^.
foundations_roots
))
params
=
if
(
defaultMode
conf
)
then
defaultPhyloParam
{
_phyloParam_config
=
setDefault
conf
}
then
defaultPhyloParam
{
_phyloParam_config
=
setDefault
conf
timeScale
}
else
defaultPhyloParam
{
_phyloParam_config
=
conf
}
periods
=
toPeriods
(
sort
$
nub
$
map
date
docs
)
(
getTimePeriod
$
timeUnit
conf
)
(
getTimeStep
$
timeUnit
conf
)
periods
=
toPeriods
(
sort
$
nub
$
map
date
docs
)
(
getTimePeriod
timeScale
)
(
getTimeStep
timeScale
)
in
trace
(
"
\n
"
<>
"-- | Init a phylo out of "
<>
show
(
length
docs
)
<>
" docs
\n
"
)
$
trace
(
"
\n
"
<>
"-- | lambda "
<>
show
(
_qua_granularity
$
phyloQuality
$
_phyloParam_config
params
))
$
Phylo
foundations
docsSources
docsCounts
...
...
@@ -511,4 +530,4 @@ initPhylo docs conf =
params
(
fromList
$
map
(
\
prd
->
(
prd
,
PhyloPeriod
prd
(
""
,
""
)
(
initPhyloScales
1
prd
)))
periods
)
0
(
_qua_granularity
$
phyloQuality
$
conf
)
(
_qua_granularity
$
phyloQuality
$
_phyloParam_config
params
)
src/Gargantext/Core/Viz/Phylo/PhyloTools.hs
View file @
8e353331
...
...
@@ -14,7 +14,7 @@ module Gargantext.Core.Viz.Phylo.PhyloTools where
import
Control.Lens
hiding
(
Level
)
import
Data.List
(
sort
,
concat
,
null
,
union
,
(
++
),
tails
,
sortOn
,
nub
,
init
,
tail
,
iterate
,
transpose
,
partition
,
tails
,
nubBy
,
group
,
notElem
,
(
!!
))
import
Data.Map
(
Map
,
elems
,
fromList
,
unionWith
,
keys
,
member
,
(
!
),
filterWithKey
,
fromListWith
,
empty
,
restrictKeys
)
import
Data.Map
(
Map
,
elems
,
fromList
,
findWithDefault
,
unionWith
,
keys
,
member
,
(
!
),
filterWithKey
,
fromListWith
,
empty
,
restrictKeys
)
import
Data.Set
(
Set
,
disjoint
)
import
Data.String
(
String
)
import
Data.Text
(
Text
,
unpack
)
...
...
@@ -313,6 +313,27 @@ ngramsToCooc ngrams coocs =
in
filterWithKey
(
\
k
_
->
elem
k
pairs
)
cooc
-----------------
-- | Density | --
-----------------
-- | To build the density of a phylogroup
-- density is defined in Callon M, Courtial JP, Laville F (1991) Co-word analysis as a tool for describing
-- the network of interaction between basic and technological research: The case of polymer chemistry.
-- Scientometric 22: 155–205.
ngramsToDensity
::
[
Int
]
->
[
Cooc
]
->
(
Map
Int
Double
)
->
Double
ngramsToDensity
ngrams
coocs
rootsCount
=
let
cooc
=
foldl
(
\
acc
cooc'
->
sumCooc
acc
cooc'
)
empty
coocs
pairs
=
listToCombi'
ngrams
density
=
map
(
\
(
i
,
j
)
->
let
nij
=
findWithDefault
0
(
i
,
j
)
cooc
in
(
nij
*
nij
)
/
((
rootsCount
!
i
)
*
(
rootsCount
!
j
)))
pairs
in
(
sum
density
)
/
(
fromIntegral
$
length
ngrams
)
------------------
-- | Defaults | --
------------------
...
...
@@ -458,6 +479,9 @@ getPeriodIds phylo = sortOn fst
$
keys
$
phylo
^.
phylo_periods
getLastDate
::
Phylo
->
Date
getLastDate
phylo
=
snd
$
last'
"lastDate"
$
getPeriodIds
phylo
getLevelParentId
::
PhyloGroup
->
PhyloGroupId
getLevelParentId
g
=
fst
$
head'
"getLevelParentId"
$
g
^.
phylo_groupScaleParents
...
...
@@ -495,7 +519,7 @@ getConfig :: Phylo -> PhyloConfig
getConfig
phylo
=
(
phylo
^.
phylo_param
)
^.
phyloParam_config
getLevel
::
Phylo
->
Double
getLevel
phylo
=
_phylo_level
phylo
getLevel
phylo
=
(
phyloQuality
(
getConfig
phylo
))
^.
qua_granularity
getLadder
::
Phylo
->
[
Double
]
getLadder
phylo
=
phylo
^.
phylo_seaLadder
...
...
@@ -503,6 +527,9 @@ getLadder phylo = phylo ^. phylo_seaLadder
getCoocByDate
::
Phylo
->
Map
Date
Cooc
getCoocByDate
phylo
=
coocByDate
(
phylo
^.
phylo_counts
)
getRootsCountByDate
::
Phylo
->
Map
Date
(
Map
Int
Double
)
getRootsCountByDate
phylo
=
rootsCountByDate
(
phylo
^.
phylo_counts
)
getDocsByDate
::
Phylo
->
Map
Date
Double
getDocsByDate
phylo
=
docsByDate
(
phylo
^.
phylo_counts
)
...
...
src/Gargantext/Core/Viz/Phylo/SynchronicClustering.hs
View file @
8e353331
...
...
@@ -16,7 +16,7 @@ import Control.Lens hiding (Level)
import
Control.Monad
(
sequence
)
import
Control.Parallel.Strategies
(
parList
,
rdeepseq
,
using
)
import
Data.List
((
++
),
null
,
intersect
,
nub
,
concat
,
sort
,
sortOn
,
groupBy
)
import
Data.Map
(
Map
,
fromList
,
fromListWith
,
foldlWithKey
,
(
!
),
insert
,
empty
,
restrictKeys
,
elems
,
mapWithKey
,
member
)
import
Data.Map
(
Map
,
fromList
,
fromListWith
,
foldlWithKey
,
(
!
),
insert
,
empty
,
restrictKeys
,
elems
,
mapWithKey
,
member
,
unionWith
)
import
Gargantext.Core.Viz.Phylo
import
Gargantext.Core.Viz.Phylo.PhyloExport
(
processDynamics
)
import
Gargantext.Core.Viz.Phylo.PhyloTools
...
...
@@ -32,6 +32,7 @@ import qualified Data.Map as Map
mergeGroups
::
[
Cooc
]
->
PhyloGroupId
->
Map
PhyloGroupId
PhyloGroupId
->
[
PhyloGroup
]
->
PhyloGroup
mergeGroups
coocs
id
mapIds
childs
=
let
ngrams
=
(
sort
.
nub
.
concat
)
$
map
_phylo_groupNgrams
childs
counts
=
foldl
(
\
acc
count
->
unionWith
(
+
)
acc
count
)
empty
$
map
_phylo_groupRootsCount
childs
in
PhyloGroup
(
fst
$
fst
id
)
(
_phylo_groupPeriod'
$
head'
"mergeGroups"
childs
)
(
snd
$
fst
id
)
(
snd
id
)
""
(
sum
$
map
_phylo_groupSupport
childs
)
...
...
@@ -40,8 +41,12 @@ mergeGroups coocs id mapIds childs =
(
concat
$
map
_phylo_groupSources
childs
)
ngrams
(
ngramsToCooc
ngrams
coocs
)
(
ngramsToDensity
ngrams
coocs
counts
)
-- todo add density here
((
snd
$
fst
id
),
bId
)
(
mergeMeta
bId
childs
)
[]
(
map
(
\
g
->
(
getGroupId
g
,
1
))
childs
)
(
mergeMeta
bId
childs
)
counts
[]
(
map
(
\
g
->
(
getGroupId
g
,
1
))
childs
)
(
updatePointers
$
concat
$
map
_phylo_groupPeriodParents
childs
)
(
updatePointers
$
concat
$
map
_phylo_groupPeriodChilds
childs
)
(
mergeAncestors
$
concat
$
map
_phylo_groupAncestors
childs
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment