Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Julien Moutinho
haskell-gargantext
Commits
8e353331
Commit
8e353331
authored
Jun 20, 2023
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Plain Diff
[MERGE] Phylo
parents
334d2b2d
e93416f8
Changes
9
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
132 additions
and
52 deletions
+132
-52
Main.hs
bin/gargantext-phylo/Main.hs
+28
-6
gargantext.cabal
gargantext.cabal
+1
-1
Phylo.hs
src/Gargantext/Core/Viz/Phylo.hs
+5
-1
Tools.hs
src/Gargantext/Core/Viz/Phylo/API/Tools.hs
+5
-3
Example.hs
src/Gargantext/Core/Viz/Phylo/Example.hs
+1
-0
PhyloExport.hs
src/Gargantext/Core/Viz/Phylo/PhyloExport.hs
+24
-24
PhyloMaker.hs
src/Gargantext/Core/Viz/Phylo/PhyloMaker.hs
+32
-13
PhyloTools.hs
src/Gargantext/Core/Viz/Phylo/PhyloTools.hs
+29
-2
SynchronicClustering.hs
src/Gargantext/Core/Viz/Phylo/SynchronicClustering.hs
+7
-2
No files found.
bin/gargantext-phylo/Main.hs
View file @
8e353331
...
...
@@ -21,7 +21,7 @@ import Control.Concurrent.Async (mapConcurrently)
import
Crypto.Hash.SHA256
(
hash
)
import
Data.Aeson
import
Data.Either
(
Either
(
..
),
fromRight
)
import
Data.List
(
concat
,
nub
,
isSuffixOf
)
import
Data.List
(
concat
,
nub
,
isSuffixOf
,
sort
,
tail
)
import
Data.List.Split
import
Data.Maybe
(
fromMaybe
)
import
Data.String
(
String
)
...
...
@@ -39,7 +39,7 @@ import Gargantext.Core.Viz.Phylo
import
Gargantext.Core.Viz.Phylo.API.Tools
import
Gargantext.Core.Viz.Phylo.PhyloExport
(
toPhyloExport
,
dotToFile
)
import
Gargantext.Core.Viz.Phylo.PhyloMaker
(
toPhylo
,
toPhyloWithoutLink
)
import
Gargantext.Core.Viz.Phylo.PhyloTools
(
printIOMsg
,
printIOComment
,
setConfig
)
import
Gargantext.Core.Viz.Phylo.PhyloTools
(
printIOMsg
,
printIOComment
,
setConfig
,
toPeriods
,
getTimePeriod
,
getTimeStep
)
import
Gargantext.Database.Admin.Types.Hyperdata
(
HyperdataDocument
(
..
))
import
Gargantext.Database.Schema.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Prelude
...
...
@@ -89,7 +89,7 @@ wosToDocs limit patterns time path = do
(
fromIntegral
$
fromJust
$
_hd_publication_year
d
)
(
fromJust
$
_hd_publication_month
d
)
(
fromJust
$
_hd_publication_day
d
)
time
)
(
termsInText
patterns
$
title
<>
" "
<>
abstr
)
Nothing
[]
)
(
termsInText
patterns
$
title
<>
" "
<>
abstr
)
Nothing
[]
time
)
<$>
concat
<$>
mapConcurrently
(
\
file
->
filter
(
\
d
->
(
isJust
$
_hd_publication_year
d
)
...
...
@@ -109,6 +109,7 @@ csvToDocs parser patterns time path =
(
termsInText
patterns
$
(
csv_title
row
)
<>
" "
<>
(
csv_abstract
row
))
Nothing
[]
time
)
<$>
snd
<$>
either
(
\
err
->
panic
$
cs
$
"CSV error"
<>
(
show
err
))
identity
<$>
Csv
.
readCSVFile
path
Csv'
limit
->
Vector
.
toList
<$>
Vector
.
take
limit
...
...
@@ -117,18 +118,35 @@ csvToDocs parser patterns time path =
(
termsInText
patterns
$
(
csv'_title
row
)
<>
" "
<>
(
csv'_abstract
row
))
(
Just
$
csv'_weight
row
)
(
map
(
T
.
strip
.
pack
)
$
splitOn
";"
(
unpack
$
(
csv'_source
row
)))
time
)
<$>
snd
<$>
Csv
.
readWeightedCsv
path
-- To parse a file into a list of Document
fileToDocs
'
::
CorpusParser
->
FilePath
->
TimeUnit
->
TermList
->
IO
[
Document
]
fileToDocs
'
parser
path
time
lst
=
do
fileToDocs
Advanced
::
CorpusParser
->
FilePath
->
TimeUnit
->
TermList
->
IO
[
Document
]
fileToDocs
Advanced
parser
path
time
lst
=
do
let
patterns
=
buildPatterns
lst
case
parser
of
Wos
limit
->
wosToDocs
limit
patterns
time
path
Csv
_
->
csvToDocs
parser
patterns
time
path
Csv'
_
->
csvToDocs
parser
patterns
time
path
fileToDocsDefault
::
CorpusParser
->
FilePath
->
[
TimeUnit
]
->
TermList
->
IO
[
Document
]
fileToDocsDefault
parser
path
timeUnits
lst
=
if
length
timeUnits
>
0
then
do
let
timeUnit
=
(
head'
"fileToDocsDefault"
timeUnits
)
docs
<-
fileToDocsAdvanced
parser
path
timeUnit
lst
let
periods
=
toPeriods
(
sort
$
nub
$
map
date
docs
)
(
getTimePeriod
timeUnit
)
(
getTimeStep
timeUnit
)
if
(
length
periods
<
3
)
then
fileToDocsDefault
parser
path
(
tail
timeUnits
)
lst
else
pure
docs
else
panic
"this corpus is incompatible with the phylomemy reconstruction"
-- on passe à passer la time unit dans la conf envoyé au phyloMaker
-- dans le phyloMaker si default est true alors dans le setDefault ou pense à utiliser la TimeUnit de la conf
---------------
-- | Label | --
...
...
@@ -251,7 +269,11 @@ main = do
printIOMsg
"Parse the corpus"
mapList
<-
fileToList
(
listParser
config
)
(
listPath
config
)
corpus
<-
fileToDocs'
(
corpusParser
config
)
(
corpusPath
config
)
(
timeUnit
config
)
mapList
corpus
<-
if
(
defaultMode
config
)
then
fileToDocsDefault
(
corpusParser
config
)
(
corpusPath
config
)
[
Year
3
1
5
,
Month
3
1
5
,
Week
4
2
5
]
mapList
else
fileToDocsAdvanced
(
corpusParser
config
)
(
corpusPath
config
)
(
timeUnit
config
)
mapList
printIOComment
(
show
(
length
corpus
)
<>
" parsed docs from the corpus"
)
printIOComment
(
show
(
length
$
nub
$
concat
$
map
text
corpus
)
<>
" Size ngs_coterms"
)
...
...
gargantext.cabal
View file @
8e353331
...
...
@@ -5,7 +5,7 @@ cabal-version: 1.12
-- see: https://github.com/sol/hpack
name: gargantext
version: 0.0.6.9.9.6.6
version:
0.0.6.9.9.6.6
synopsis: Search, map, share
description: Please see README.md
category: Data
...
...
src/Gargantext/Core/Viz/Phylo.hs
View file @
8e353331
...
...
@@ -135,7 +135,7 @@ data TimeUnit =
{
_day_period
::
Int
,
_day_step
::
Int
,
_day_matchingFrame
::
Int
}
deriving
(
Show
,
Generic
,
Eq
)
deriving
(
Show
,
Generic
,
Eq
,
NFData
)
instance
ToSchema
TimeUnit
where
declareNamedSchema
=
genericDeclareNamedSchema
(
unPrefixSwagger
""
)
...
...
@@ -355,6 +355,7 @@ data Document = Document
,
text
::
[
Ngrams
]
,
weight
::
Maybe
Double
,
sources
::
[
Text
]
,
docTime
::
TimeUnit
}
deriving
(
Eq
,
Show
,
Generic
,
NFData
)
...
...
@@ -372,6 +373,7 @@ data PhyloFoundations = PhyloFoundations
data
PhyloCounts
=
PhyloCounts
{
coocByDate
::
!
(
Map
Date
Cooc
)
,
docsByDate
::
!
(
Map
Date
Double
)
,
rootsCountByDate
::
!
(
Map
Date
(
Map
Int
Double
))
,
rootsCount
::
!
(
Map
Int
Double
)
,
rootsFreq
::
!
(
Map
Int
Double
)
,
lastRootsFreq
::
!
(
Map
Int
Double
)
...
...
@@ -487,8 +489,10 @@ data PhyloGroup =
,
_phylo_groupSources
::
[
Int
]
,
_phylo_groupNgrams
::
[
Int
]
,
_phylo_groupCooc
::
!
(
Cooc
)
,
_phylo_groupDensity
::
Double
,
_phylo_groupBranchId
::
PhyloBranchId
,
_phylo_groupMeta
::
Map
Text
[
Double
]
,
_phylo_groupRootsCount
::
Map
Int
Double
,
_phylo_groupScaleParents
::
[
Pointer
]
,
_phylo_groupScaleChilds
::
[
Pointer
]
,
_phylo_groupPeriodParents
::
[
Pointer
]
...
...
src/Gargantext/Core/Viz/Phylo/API/Tools.hs
View file @
8e353331
...
...
@@ -91,7 +91,9 @@ flowPhyloAPI config cId = do
corpus
<-
corpusIdtoDocuments
(
timeUnit
config
)
cId
let
phyloWithCliques
=
toPhyloWithoutLink
corpus
config
-- writePhylo phyloWithCliquesFile phyloWithCliques
pure
$
toPhylo
(
setConfig
config
phyloWithCliques
)
printDebug
"PhyloConfig old: "
config
pure
$
toPhylo
$
setConfig
config
phyloWithCliques
--------------------------------------------------------------------
corpusIdtoDocuments
::
TimeUnit
->
CorpusId
->
GargNoServer
[
Document
]
...
...
@@ -120,7 +122,7 @@ toPhyloDocs patterns time d =
(
fromIntegral
$
fromMaybe
1
$
_hd_publication_year
d
)
(
fromMaybe
1
$
_hd_publication_month
d
)
(
fromMaybe
1
$
_hd_publication_day
d
)
time
)
(
termsInText'
patterns
$
title
<>
" "
<>
abstr
)
Nothing
[]
(
termsInText'
patterns
$
title
<>
" "
<>
abstr
)
Nothing
[]
time
...
...
@@ -138,7 +140,7 @@ context2phyloDocument timeUnit context (ngs_terms, ngs_sources) = do
text'
=
maybe
[]
toText
$
Map
.
lookup
contextId
ngs_terms
sources'
=
maybe
[]
toText
$
Map
.
lookup
contextId
ngs_sources
pure
$
Document
date
date'
text'
Nothing
sources'
pure
$
Document
date
date'
text'
Nothing
sources'
(
Year
3
1
5
)
-- TODO better default date and log the errors to improve data quality
...
...
src/Gargantext/Core/Viz/Phylo/Example.hs
View file @
8e353331
...
...
@@ -111,6 +111,7 @@ docs = map (\(d,t)
(
filter
(
\
n
->
isRoots
n
(
foundations
^.
foundations_roots
))
$
monoTexts
t
)
Nothing
[]
(
Year
3
1
5
)
)
corpus
...
...
src/Gargantext/Core/Viz/Phylo/PhyloExport.hs
View file @
8e353331
...
...
@@ -143,29 +143,27 @@ periodToDotNode prd prd' =
groupToDotNode
::
Vector
Ngrams
->
PhyloGroup
->
Int
->
Dot
DotId
groupToDotNode
fdt
g
bId
=
node
(
groupIdToDotId
$
getGroupId
g
)
(
[
FontName
"Arial"
,
Shape
Square
,
penWidth
4
,
toLabel
(
groupToTable
fdt
g
)
]
<>
[
toAttr
"nodeType"
"group"
,
toAttr
"gid"
(
groupIdToDotId
$
getGroupId
g
)
,
toAttr
"from"
(
pack
$
show
(
fst
$
g
^.
phylo_groupPeriod
))
,
toAttr
"to"
(
pack
$
show
(
snd
$
g
^.
phylo_groupPeriod
))
,
toAttr
"strFrom"
(
pack
$
show
(
fst
$
g
^.
phylo_groupPeriod'
))
,
toAttr
"strTo"
(
pack
$
show
(
snd
$
g
^.
phylo_groupPeriod'
))
,
toAttr
"branchId"
(
pack
$
unwords
(
init
$
map
show
$
snd
$
g
^.
phylo_groupBranchId
))
,
toAttr
"bId"
(
pack
$
show
bId
)
,
toAttr
"support"
(
pack
$
show
(
g
^.
phylo_groupSupport
))
,
toAttr
"weight"
(
pack
$
show
(
g
^.
phylo_groupWeight
))
,
toAttr
"source"
(
pack
$
show
(
nub
$
g
^.
phylo_groupSources
))
,
toAttr
"sourceFull"
(
pack
$
show
(
g
^.
phylo_groupSources
))
,
toAttr
"lbl"
(
pack
$
show
(
ngramsToLabel
fdt
(
g
^.
phylo_groupNgrams
)))
,
toAttr
"foundation"
(
pack
$
show
(
idxToLabel
(
g
^.
phylo_groupNgrams
)))
,
toAttr
"role"
(
pack
$
show
(
idxToLabel'
((
g
^.
phylo_groupMeta
)
!
"dynamics"
)))
,
toAttr
"frequence"
(
pack
$
show
(
idxToLabel'
((
g
^.
phylo_groupMeta
)
!
"frequence"
)))
,
toAttr
"seaLvl"
(
pack
$
show
((
g
^.
phylo_groupMeta
)
!
"seaLevels"
))
])
([
FontName
"Arial"
,
Shape
Square
,
penWidth
4
,
toLabel
(
groupToTable
fdt
g
)]
<>
[
toAttr
"nodeType"
"group"
,
toAttr
"gid"
(
groupIdToDotId
$
getGroupId
g
)
,
toAttr
"from"
(
pack
$
show
(
fst
$
g
^.
phylo_groupPeriod
))
,
toAttr
"to"
(
pack
$
show
(
snd
$
g
^.
phylo_groupPeriod
))
,
toAttr
"strFrom"
(
pack
$
show
(
fst
$
g
^.
phylo_groupPeriod'
))
,
toAttr
"strTo"
(
pack
$
show
(
snd
$
g
^.
phylo_groupPeriod'
))
,
toAttr
"branchId"
(
pack
$
unwords
(
init
$
map
show
$
snd
$
g
^.
phylo_groupBranchId
))
,
toAttr
"bId"
(
pack
$
show
bId
)
,
toAttr
"support"
(
pack
$
show
(
g
^.
phylo_groupSupport
))
,
toAttr
"weight"
(
pack
$
show
(
g
^.
phylo_groupWeight
))
,
toAttr
"source"
(
pack
$
show
(
nub
$
g
^.
phylo_groupSources
))
,
toAttr
"sourceFull"
(
pack
$
show
(
g
^.
phylo_groupSources
))
,
toAttr
"density"
(
pack
$
show
(
g
^.
phylo_groupDensity
))
,
toAttr
"cooc"
(
pack
$
show
(
g
^.
phylo_groupCooc
))
,
toAttr
"lbl"
(
pack
$
show
(
ngramsToLabel
fdt
(
g
^.
phylo_groupNgrams
)))
,
toAttr
"foundation"
(
pack
$
show
(
idxToLabel
(
g
^.
phylo_groupNgrams
)))
,
toAttr
"role"
(
pack
$
show
(
idxToLabel'
((
g
^.
phylo_groupMeta
)
!
"dynamics"
)))
,
toAttr
"frequence"
(
pack
$
show
(
idxToLabel'
((
g
^.
phylo_groupMeta
)
!
"frequence"
)))
,
toAttr
"seaLvl"
(
pack
$
show
((
g
^.
phylo_groupMeta
)
!
"seaLevels"
))
])
toDotEdge'
::
DotId
->
DotId
->
[
Char
]
->
[
Char
]
->
EdgeType
->
Dot
DotId
toDotEdge'
source
target
thr
w
edgeType
=
edge
source
target
...
...
@@ -447,8 +445,10 @@ branchDating export =
else
acc
)
[]
$
export
^.
export_groups
periods
=
nub
groups
birth
=
fst
$
head'
"birth"
groups
age
=
(
snd
$
last'
"age"
groups
)
-
birth
death
=
snd
$
last'
"death"
groups
age
=
death
-
birth
in
b
&
branch_meta
%~
insert
"birth"
[
fromIntegral
birth
]
&
branch_meta
%~
insert
"death"
[
fromIntegral
death
]
&
branch_meta
%~
insert
"age"
[
fromIntegral
age
]
&
branch_meta
%~
insert
"size"
[
fromIntegral
$
length
periods
]
)
export
...
...
src/Gargantext/Core/Viz/Phylo/PhyloMaker.hs
View file @
8e353331
...
...
@@ -34,6 +34,7 @@ import Gargantext.Core.Viz.Phylo.TemporalMatching (toPhyloQuality, temporalMatch
import
Gargantext.Prelude
import
qualified
Data.Set
as
Set
import
qualified
Data.Map
as
Map
import
qualified
Data.List
as
List
import
qualified
Data.Vector
as
Vector
...
...
@@ -191,7 +192,7 @@ findSeaLadder phylo = case getSeaElevation phylo of
in
acc
++
(
concat
pairs'
)
)
[]
$
keys
$
phylo
^.
phylo_periods
appendGroups
::
(
a
->
Period
->
(
Text
,
Text
)
->
Scale
->
Int
->
[
Cooc
]
->
PhyloGroup
)
->
Scale
->
Map
(
Date
,
Date
)
[
a
]
->
Phylo
->
Phylo
appendGroups
::
(
a
->
Period
->
(
Text
,
Text
)
->
Scale
->
Int
->
[
Cooc
]
->
Map
Int
Double
->
PhyloGroup
)
->
Scale
->
Map
(
Date
,
Date
)
[
a
]
->
Phylo
->
Phylo
appendGroups
f
lvl
m
phylo
=
trace
(
"
\n
"
<>
"-- | Append "
<>
show
(
length
$
concat
$
elems
m
)
<>
" groups to Level "
<>
show
(
lvl
)
<>
"
\n
"
)
$
over
(
phylo_periods
.
traverse
...
...
@@ -206,23 +207,28 @@ appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ co
&
phylo_scaleGroups
.~
(
fromList
$
foldl
(
\
groups
obj
->
groups
++
[
(((
pId
,
lvl
),
length
groups
)
,
f
obj
pId
pId'
lvl
(
length
groups
)
(
elems
$
restrictKeys
(
getCoocByDate
phylo
)
$
periodsToYears
[
pId
]))
-- select the cooc of the periods
(
elems
$
restrictKeys
(
getCoocByDate
phylo
)
$
periodsToYears
[
pId
])
-- select and merge the roots count of the periods
(
foldl
(
\
acc
count
->
unionWith
(
+
)
acc
count
)
empty
$
elems
$
restrictKeys
(
getRootsCountByDate
phylo
)
$
periodsToYears
[
pId
]))
]
)
[]
phyloCUnit
)
else
phyloLvl
)
phylo
clusterToGroup
::
Clustering
->
Period
->
(
Text
,
Text
)
->
Scale
->
Int
->
[
Cooc
]
->
PhyloGroup
clusterToGroup
fis
pId
pId'
lvl
idx
coocs
=
PhyloGroup
pId
pId'
lvl
idx
""
clusterToGroup
::
Clustering
->
Period
->
(
Text
,
Text
)
->
Scale
->
Int
->
[
Cooc
]
->
Map
Int
Double
->
PhyloGroup
clusterToGroup
fis
pId
pId'
lvl
idx
coocs
rootsCount
=
PhyloGroup
pId
pId'
lvl
idx
""
(
fis
^.
clustering_support
)
(
fis
^.
clustering_visWeighting
)
(
fis
^.
clustering_visFiltering
)
(
fis
^.
clustering_roots
)
(
ngramsToCooc
(
fis
^.
clustering_roots
)
coocs
)
(
ngramsToDensity
(
fis
^.
clustering_roots
)
coocs
rootsCount
)
(
1
,[
0
])
-- branchid (lvl,[path in the branching tree])
(
fromList
[(
"breaks"
,[
0
]),(
"seaLevels"
,[
0
])])
[]
[]
[]
[]
[]
[]
[]
rootsCount
[]
[]
[]
[]
[]
[]
[]
-----------------------
...
...
@@ -446,6 +452,16 @@ docsToTermCount docs roots = fromList
docsToTimeTermCount
::
[
Document
]
->
Vector
Ngrams
->
(
Map
Date
(
Map
Int
Double
))
docsToTimeTermCount
docs
roots
=
let
docs'
=
Map
.
map
(
\
l
->
fromList
$
map
(
\
lst
->
(
head'
"docsToTimeTermCount"
lst
,
fromIntegral
$
length
lst
))
$
group
$
sort
l
)
$
fromListWith
(
++
)
$
map
(
\
d
->
(
date
d
,
nub
$
ngramsToIdx
(
text
d
)
roots
))
docs
time
=
fromList
$
map
(
\
t
->
(
t
,
Map
.
empty
))
$
toTimeScale
(
keys
docs'
)
1
in
unionWith
(
Map
.
union
)
time
docs'
docsToLastTermFreq
::
Int
->
[
Document
]
->
Vector
Ngrams
->
Map
Int
Double
docsToLastTermFreq
n
docs
fdt
=
let
last
=
take
n
$
reverse
$
sort
$
map
date
docs
...
...
@@ -472,15 +488,15 @@ initPhyloScales lvlMax pId =
fromList
$
map
(
\
lvl
->
((
pId
,
lvl
),
PhyloScale
pId
(
""
,
""
)
lvl
empty
))
[
1
..
lvlMax
]
setDefault
::
PhyloConfig
->
PhyloConfig
setDefault
conf
=
conf
{
setDefault
::
PhyloConfig
->
TimeUnit
->
PhyloConfig
setDefault
conf
timeScale
=
conf
{
phyloScale
=
2
,
similarity
=
WeightedLogJaccard
0.5
2
,
findAncestors
=
True
,
phyloSynchrony
=
ByProximityThreshold
0.6
0
SiblingBranches
MergeAllGroups
,
phyloQuality
=
Quality
0.5
3
,
timeUnit
=
Year
3
1
3
,
clique
=
MaxClique
5
30
ByNeighbours
,
timeUnit
=
timeScale
,
clique
=
Fis
3
5
,
exportLabel
=
[
BranchLabel
MostEmergentTfIdf
2
,
GroupLabel
MostEmergentInclusive
2
],
exportSort
=
ByHierarchy
Desc
,
exportFilter
=
[
ByBranchSize
3
]
...
...
@@ -492,18 +508,21 @@ setDefault conf = conf {
initPhylo
::
[
Document
]
->
PhyloConfig
->
Phylo
initPhylo
docs
conf
=
let
roots
=
Vector
.
fromList
$
nub
$
concat
$
map
text
docs
timeScale
=
head'
"initPhylo"
$
map
docTime
docs
foundations
=
PhyloFoundations
roots
empty
docsSources
=
PhyloSources
(
Vector
.
fromList
$
nub
$
concat
$
map
sources
docs
)
docsCounts
=
PhyloCounts
(
docsToTimeScaleCooc
docs
(
foundations
^.
foundations_roots
))
(
docsToTimeScaleNb
docs
)
(
docsToTimeTermCount
docs
(
foundations
^.
foundations_roots
))
(
docsToTermCount
docs
(
foundations
^.
foundations_roots
))
(
docsToTermFreq
docs
(
foundations
^.
foundations_roots
))
(
docsToLastTermFreq
(
getTimePeriod
$
timeUnit
conf
)
docs
(
foundations
^.
foundations_roots
))
(
docsToLastTermFreq
(
getTimePeriod
timeScale
)
docs
(
foundations
^.
foundations_roots
))
params
=
if
(
defaultMode
conf
)
then
defaultPhyloParam
{
_phyloParam_config
=
setDefault
conf
}
then
defaultPhyloParam
{
_phyloParam_config
=
setDefault
conf
timeScale
}
else
defaultPhyloParam
{
_phyloParam_config
=
conf
}
periods
=
toPeriods
(
sort
$
nub
$
map
date
docs
)
(
getTimePeriod
$
timeUnit
conf
)
(
getTimeStep
$
timeUnit
conf
)
periods
=
toPeriods
(
sort
$
nub
$
map
date
docs
)
(
getTimePeriod
timeScale
)
(
getTimeStep
timeScale
)
in
trace
(
"
\n
"
<>
"-- | Init a phylo out of "
<>
show
(
length
docs
)
<>
" docs
\n
"
)
$
trace
(
"
\n
"
<>
"-- | lambda "
<>
show
(
_qua_granularity
$
phyloQuality
$
_phyloParam_config
params
))
$
Phylo
foundations
docsSources
docsCounts
...
...
@@ -511,4 +530,4 @@ initPhylo docs conf =
params
(
fromList
$
map
(
\
prd
->
(
prd
,
PhyloPeriod
prd
(
""
,
""
)
(
initPhyloScales
1
prd
)))
periods
)
0
(
_qua_granularity
$
phyloQuality
$
conf
)
(
_qua_granularity
$
phyloQuality
$
_phyloParam_config
params
)
src/Gargantext/Core/Viz/Phylo/PhyloTools.hs
View file @
8e353331
...
...
@@ -14,7 +14,7 @@ module Gargantext.Core.Viz.Phylo.PhyloTools where
import
Control.Lens
hiding
(
Level
)
import
Data.List
(
sort
,
concat
,
null
,
union
,
(
++
),
tails
,
sortOn
,
nub
,
init
,
tail
,
iterate
,
transpose
,
partition
,
tails
,
nubBy
,
group
,
notElem
,
(
!!
))
import
Data.Map
(
Map
,
elems
,
fromList
,
unionWith
,
keys
,
member
,
(
!
),
filterWithKey
,
fromListWith
,
empty
,
restrictKeys
)
import
Data.Map
(
Map
,
elems
,
fromList
,
findWithDefault
,
unionWith
,
keys
,
member
,
(
!
),
filterWithKey
,
fromListWith
,
empty
,
restrictKeys
)
import
Data.Set
(
Set
,
disjoint
)
import
Data.String
(
String
)
import
Data.Text
(
Text
,
unpack
)
...
...
@@ -313,6 +313,27 @@ ngramsToCooc ngrams coocs =
in
filterWithKey
(
\
k
_
->
elem
k
pairs
)
cooc
-----------------
-- | Density | --
-----------------
-- | To build the density of a phylogroup
-- density is defined in Callon M, Courtial JP, Laville F (1991) Co-word analysis as a tool for describing
-- the network of interaction between basic and technological research: The case of polymer chemistry.
-- Scientometric 22: 155–205.
ngramsToDensity
::
[
Int
]
->
[
Cooc
]
->
(
Map
Int
Double
)
->
Double
ngramsToDensity
ngrams
coocs
rootsCount
=
let
cooc
=
foldl
(
\
acc
cooc'
->
sumCooc
acc
cooc'
)
empty
coocs
pairs
=
listToCombi'
ngrams
density
=
map
(
\
(
i
,
j
)
->
let
nij
=
findWithDefault
0
(
i
,
j
)
cooc
in
(
nij
*
nij
)
/
((
rootsCount
!
i
)
*
(
rootsCount
!
j
)))
pairs
in
(
sum
density
)
/
(
fromIntegral
$
length
ngrams
)
------------------
-- | Defaults | --
------------------
...
...
@@ -458,6 +479,9 @@ getPeriodIds phylo = sortOn fst
$
keys
$
phylo
^.
phylo_periods
getLastDate
::
Phylo
->
Date
getLastDate
phylo
=
snd
$
last'
"lastDate"
$
getPeriodIds
phylo
getLevelParentId
::
PhyloGroup
->
PhyloGroupId
getLevelParentId
g
=
fst
$
head'
"getLevelParentId"
$
g
^.
phylo_groupScaleParents
...
...
@@ -495,7 +519,7 @@ getConfig :: Phylo -> PhyloConfig
getConfig
phylo
=
(
phylo
^.
phylo_param
)
^.
phyloParam_config
getLevel
::
Phylo
->
Double
getLevel
phylo
=
_phylo_level
phylo
getLevel
phylo
=
(
phyloQuality
(
getConfig
phylo
))
^.
qua_granularity
getLadder
::
Phylo
->
[
Double
]
getLadder
phylo
=
phylo
^.
phylo_seaLadder
...
...
@@ -503,6 +527,9 @@ getLadder phylo = phylo ^. phylo_seaLadder
getCoocByDate
::
Phylo
->
Map
Date
Cooc
getCoocByDate
phylo
=
coocByDate
(
phylo
^.
phylo_counts
)
getRootsCountByDate
::
Phylo
->
Map
Date
(
Map
Int
Double
)
getRootsCountByDate
phylo
=
rootsCountByDate
(
phylo
^.
phylo_counts
)
getDocsByDate
::
Phylo
->
Map
Date
Double
getDocsByDate
phylo
=
docsByDate
(
phylo
^.
phylo_counts
)
...
...
src/Gargantext/Core/Viz/Phylo/SynchronicClustering.hs
View file @
8e353331
...
...
@@ -16,7 +16,7 @@ import Control.Lens hiding (Level)
import
Control.Monad
(
sequence
)
import
Control.Parallel.Strategies
(
parList
,
rdeepseq
,
using
)
import
Data.List
((
++
),
null
,
intersect
,
nub
,
concat
,
sort
,
sortOn
,
groupBy
)
import
Data.Map
(
Map
,
fromList
,
fromListWith
,
foldlWithKey
,
(
!
),
insert
,
empty
,
restrictKeys
,
elems
,
mapWithKey
,
member
)
import
Data.Map
(
Map
,
fromList
,
fromListWith
,
foldlWithKey
,
(
!
),
insert
,
empty
,
restrictKeys
,
elems
,
mapWithKey
,
member
,
unionWith
)
import
Gargantext.Core.Viz.Phylo
import
Gargantext.Core.Viz.Phylo.PhyloExport
(
processDynamics
)
import
Gargantext.Core.Viz.Phylo.PhyloTools
...
...
@@ -32,6 +32,7 @@ import qualified Data.Map as Map
mergeGroups
::
[
Cooc
]
->
PhyloGroupId
->
Map
PhyloGroupId
PhyloGroupId
->
[
PhyloGroup
]
->
PhyloGroup
mergeGroups
coocs
id
mapIds
childs
=
let
ngrams
=
(
sort
.
nub
.
concat
)
$
map
_phylo_groupNgrams
childs
counts
=
foldl
(
\
acc
count
->
unionWith
(
+
)
acc
count
)
empty
$
map
_phylo_groupRootsCount
childs
in
PhyloGroup
(
fst
$
fst
id
)
(
_phylo_groupPeriod'
$
head'
"mergeGroups"
childs
)
(
snd
$
fst
id
)
(
snd
id
)
""
(
sum
$
map
_phylo_groupSupport
childs
)
...
...
@@ -40,8 +41,12 @@ mergeGroups coocs id mapIds childs =
(
concat
$
map
_phylo_groupSources
childs
)
ngrams
(
ngramsToCooc
ngrams
coocs
)
(
ngramsToDensity
ngrams
coocs
counts
)
-- todo add density here
((
snd
$
fst
id
),
bId
)
(
mergeMeta
bId
childs
)
[]
(
map
(
\
g
->
(
getGroupId
g
,
1
))
childs
)
(
mergeMeta
bId
childs
)
counts
[]
(
map
(
\
g
->
(
getGroupId
g
,
1
))
childs
)
(
updatePointers
$
concat
$
map
_phylo_groupPeriodParents
childs
)
(
updatePointers
$
concat
$
map
_phylo_groupPeriodChilds
childs
)
(
mergeAncestors
$
concat
$
map
_phylo_groupAncestors
childs
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment