Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
160
Issues
160
List
Board
Labels
Milestones
Merge Requests
14
Merge Requests
14
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
79d0ea23
Verified
Commit
79d0ea23
authored
Jun 20, 2023
by
Przemyslaw Kaminski
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'dev' into 571-dev-node-corpus-api-search-fixes-take-2
parents
011a5304
138d2f86
Changes
15
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
15 changed files
with
8041 additions
and
85 deletions
+8041
-85
CHANGELOG.md
CHANGELOG.md
+7
-0
Main.hs
bin/gargantext-phylo/Main.hs
+28
-6
gargantext.cabal
gargantext.cabal
+4
-2
package.yaml
package.yaml
+4
-1
JSON.hs
src-test/Offline/JSON.hs
+30
-2
Phylo.hs
src/Gargantext/Core/Types/Phylo.hs
+406
-28
Phylo.hs
src/Gargantext/Core/Viz/Phylo.hs
+6
-2
Tools.hs
src/Gargantext/Core/Viz/Phylo/API/Tools.hs
+5
-3
Example.hs
src/Gargantext/Core/Viz/Phylo/Example.hs
+1
-0
PhyloExport.hs
src/Gargantext/Core/Viz/Phylo/PhyloExport.hs
+24
-24
PhyloMaker.hs
src/Gargantext/Core/Viz/Phylo/PhyloMaker.hs
+32
-13
PhyloTools.hs
src/Gargantext/Core/Viz/Phylo/PhyloTools.hs
+29
-2
SynchronicClustering.hs
src/Gargantext/Core/Viz/Phylo/SynchronicClustering.hs
+7
-2
bpa_phylo_test.json
test-data/phylo/bpa_phylo_test.json
+6140
-0
open_science.json
test-data/phylo/open_science.json
+1318
-0
No files found.
CHANGELOG.md
View file @
79d0ea23
## Version 0.0.6.9.9.6.7
*
[
FRONT
][
FIX
][
Node Corpus / API search (#571)
](
https://gitlab.iscpif.fr/gargantext/purescript-gargantext/issues/571
)
*
[
BACK
][
FIX
]
PhyloTypes
*
[
FRONT
][
FIX
][
[Node Doc
]
Annotation: select/add ngram to map terms doesn
'
t seems to sync in frontend render (#563)](https://gitlab.iscpif.fr/gargantext/purescript-gargantext/issues/563)
*
[
FRONT
][
FIX
][
[Graph
]
Rearrange the graph toolbar (#567)](https://gitlab.iscpif.fr/gargantext/purescript-gargantext/issues/567)
## Version 0.0.6.9.9.6.6
*
[
BACK|FRONT
][
WARNING
]
Button to launch API corpus is broken for now
...
...
bin/gargantext-phylo/Main.hs
View file @
79d0ea23
...
...
@@ -21,7 +21,7 @@ import Control.Concurrent.Async (mapConcurrently)
import
Crypto.Hash.SHA256
(
hash
)
import
Data.Aeson
import
Data.Either
(
Either
(
..
),
fromRight
)
import
Data.List
(
concat
,
nub
,
isSuffixOf
)
import
Data.List
(
concat
,
nub
,
isSuffixOf
,
sort
,
tail
)
import
Data.List.Split
import
Data.Maybe
(
fromMaybe
)
import
Data.String
(
String
)
...
...
@@ -39,7 +39,7 @@ import Gargantext.Core.Viz.Phylo
import
Gargantext.Core.Viz.Phylo.API.Tools
import
Gargantext.Core.Viz.Phylo.PhyloExport
(
toPhyloExport
,
dotToFile
)
import
Gargantext.Core.Viz.Phylo.PhyloMaker
(
toPhylo
,
toPhyloWithoutLink
)
import
Gargantext.Core.Viz.Phylo.PhyloTools
(
printIOMsg
,
printIOComment
,
setConfig
)
import
Gargantext.Core.Viz.Phylo.PhyloTools
(
printIOMsg
,
printIOComment
,
setConfig
,
toPeriods
,
getTimePeriod
,
getTimeStep
)
import
Gargantext.Database.Admin.Types.Hyperdata
(
HyperdataDocument
(
..
))
import
Gargantext.Database.Schema.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Prelude
...
...
@@ -89,7 +89,7 @@ wosToDocs limit patterns time path = do
(
fromIntegral
$
fromJust
$
_hd_publication_year
d
)
(
fromJust
$
_hd_publication_month
d
)
(
fromJust
$
_hd_publication_day
d
)
time
)
(
termsInText
patterns
$
title
<>
" "
<>
abstr
)
Nothing
[]
)
(
termsInText
patterns
$
title
<>
" "
<>
abstr
)
Nothing
[]
time
)
<$>
concat
<$>
mapConcurrently
(
\
file
->
filter
(
\
d
->
(
isJust
$
_hd_publication_year
d
)
...
...
@@ -109,6 +109,7 @@ csvToDocs parser patterns time path =
(
termsInText
patterns
$
(
csv_title
row
)
<>
" "
<>
(
csv_abstract
row
))
Nothing
[]
time
)
<$>
snd
<$>
either
(
\
err
->
panic
$
cs
$
"CSV error"
<>
(
show
err
))
identity
<$>
Csv
.
readCSVFile
path
Csv'
limit
->
Vector
.
toList
<$>
Vector
.
take
limit
...
...
@@ -117,18 +118,35 @@ csvToDocs parser patterns time path =
(
termsInText
patterns
$
(
csv'_title
row
)
<>
" "
<>
(
csv'_abstract
row
))
(
Just
$
csv'_weight
row
)
(
map
(
T
.
strip
.
pack
)
$
splitOn
";"
(
unpack
$
(
csv'_source
row
)))
time
)
<$>
snd
<$>
Csv
.
readWeightedCsv
path
-- To parse a file into a list of Document
fileToDocs
'
::
CorpusParser
->
FilePath
->
TimeUnit
->
TermList
->
IO
[
Document
]
fileToDocs
'
parser
path
time
lst
=
do
fileToDocs
Advanced
::
CorpusParser
->
FilePath
->
TimeUnit
->
TermList
->
IO
[
Document
]
fileToDocs
Advanced
parser
path
time
lst
=
do
let
patterns
=
buildPatterns
lst
case
parser
of
Wos
limit
->
wosToDocs
limit
patterns
time
path
Csv
_
->
csvToDocs
parser
patterns
time
path
Csv'
_
->
csvToDocs
parser
patterns
time
path
fileToDocsDefault
::
CorpusParser
->
FilePath
->
[
TimeUnit
]
->
TermList
->
IO
[
Document
]
fileToDocsDefault
parser
path
timeUnits
lst
=
if
length
timeUnits
>
0
then
do
let
timeUnit
=
(
head'
"fileToDocsDefault"
timeUnits
)
docs
<-
fileToDocsAdvanced
parser
path
timeUnit
lst
let
periods
=
toPeriods
(
sort
$
nub
$
map
date
docs
)
(
getTimePeriod
timeUnit
)
(
getTimeStep
timeUnit
)
if
(
length
periods
<
3
)
then
fileToDocsDefault
parser
path
(
tail
timeUnits
)
lst
else
pure
docs
else
panic
"this corpus is incompatible with the phylomemy reconstruction"
-- on passe à passer la time unit dans la conf envoyé au phyloMaker
-- dans le phyloMaker si default est true alors dans le setDefault ou pense à utiliser la TimeUnit de la conf
---------------
-- | Label | --
...
...
@@ -251,7 +269,11 @@ main = do
printIOMsg
"Parse the corpus"
mapList
<-
fileToList
(
listParser
config
)
(
listPath
config
)
corpus
<-
fileToDocs'
(
corpusParser
config
)
(
corpusPath
config
)
(
timeUnit
config
)
mapList
corpus
<-
if
(
defaultMode
config
)
then
fileToDocsDefault
(
corpusParser
config
)
(
corpusPath
config
)
[
Year
3
1
5
,
Month
3
1
5
,
Week
4
2
5
]
mapList
else
fileToDocsAdvanced
(
corpusParser
config
)
(
corpusPath
config
)
(
timeUnit
config
)
mapList
printIOComment
(
show
(
length
corpus
)
<>
" parsed docs from the corpus"
)
printIOComment
(
show
(
length
$
nub
$
concat
$
map
text
corpus
)
<>
" Size ngs_coterms"
)
...
...
gargantext.cabal
View file @
79d0ea23
...
...
@@ -5,7 +5,7 @@ cabal-version: 1.12
-- see: https://github.com/sol/hpack
name: gargantext
version:
0.0.6.9.9.6.6
version:
0.0.6.9.9.6.7
synopsis: Search, map, share
description: Please see README.md
category: Data
...
...
@@ -26,6 +26,8 @@ data-files:
ekg-assets/chart_line_add.png
ekg-assets/cross.png
test-data/ngrams/GarganText_NgramsTerms-QuantumComputing.json
test-data/phylo/bpa_phylo_test.json
test-data/phylo/open_science.json
library
exposed-modules:
...
...
@@ -81,6 +83,7 @@ library
Gargantext.Core.Types.Individu
Gargantext.Core.Types.Main
Gargantext.Core.Types.Query
Gargantext.Core.Types.Phylo
Gargantext.Core.Utils
Gargantext.Core.Utils.Prefix
Gargantext.Core.Viz.Graph
...
...
@@ -235,7 +238,6 @@ library
Gargantext.Core.Text.Terms.Multi.PosTagging
Gargantext.Core.Text.Terms.Multi.PosTagging.Types
Gargantext.Core.Text.Upload
Gargantext.Core.Types.Phylo
Gargantext.Core.Types.Search
Gargantext.Core.Utils.DateUtils
Gargantext.Core.Viz
...
...
package.yaml
View file @
79d0ea23
...
...
@@ -6,7 +6,7 @@ name: gargantext
# | | | +----- Layers * : New versions with API additions
# | | | | +--- Layers * : New versions without API breaking changes
# | | | | |
version
:
'
0.0.6.9.9.6.
6
'
version
:
'
0.0.6.9.9.6.
7
'
synopsis
:
Search, map, share
description
:
Please see README.md
category
:
Data
...
...
@@ -45,6 +45,8 @@ data-files:
-
ekg-assets/chart_line_add.png
-
ekg-assets/cross.png
-
test-data/ngrams/GarganText_NgramsTerms-QuantumComputing.json
-
test-data/phylo/bpa_phylo_test.json
-
test-data/phylo/open_science.json
library
:
source-dirs
:
src
ghc-options
:
...
...
@@ -109,6 +111,7 @@ library:
-
Gargantext.Core.Types.Individu
-
Gargantext.Core.Types.Main
-
Gargantext.Core.Types.Query
-
Gargantext.Core.Types.Phylo
-
Gargantext.Core.Utils
-
Gargantext.Core.Utils.Prefix
-
Gargantext.Core.Viz.Graph
...
...
src-test/Offline/JSON.hs
View file @
79d0ea23
...
...
@@ -9,16 +9,21 @@ import Data.Aeson
import
Data.Either
import
Gargantext.API.Node.Corpus.New
import
Gargantext.API.Node.Corpus.Types
import
Gargantext.Core.Types.Phylo
import
Gargantext.Core.Viz.Phylo.API
import
Prelude
import
Test.Tasty
import
Test.Tasty.HUnit
import
Test.Tasty.QuickCheck
import
Text.RawString.QQ
import
qualified
Data.ByteString
as
B
import
qualified
Data.ByteString.Lazy.Char8
as
C8
import
Paths_gargantext
jsonRoundtrip
::
(
Show
a
,
FromJSON
a
,
ToJSON
a
,
Eq
a
)
=>
a
->
Property
jsonRoundtrip
a
=
eitherDecode
(
encode
a
)
===
Right
a
jsonRoundtrip
a
=
counterexample
(
"Parsed JSON: "
<>
C8
.
unpack
(
encode
a
))
$
eitherDecode
(
encode
a
)
===
Right
a
tests
::
TestTree
tests
=
testGroup
"JSON"
[
...
...
@@ -26,7 +31,14 @@ tests = testGroup "JSON" [
,
testProperty
"WithQuery roundtrips"
(
jsonRoundtrip
@
WithQuery
)
,
testCase
"WithQuery frontend compliance"
testWithQueryFrontend
,
testGroup
"Phylo"
[
testProperty
"PhyloData"
(
jsonRoundtrip
@
PhyloData
)
testProperty
"PeriodToNode"
(
jsonRoundtrip
@
PeriodToNodeData
)
,
testProperty
"GraphData"
(
jsonRoundtrip
@
GraphData
)
,
testProperty
"GraphDataData"
(
jsonRoundtrip
@
GraphDataData
)
,
testProperty
"ObjectData"
(
jsonRoundtrip
@
ObjectData
)
,
testProperty
"PhyloData"
(
jsonRoundtrip
@
PhyloData
)
,
testProperty
"LayerData"
(
jsonRoundtrip
@
LayerData
)
,
testCase
"can parse bpa_phylo_test.json"
testParseBpaPhylo
,
testCase
"can parse open_science.json"
testOpenSciencePhylo
]
]
...
...
@@ -40,3 +52,19 @@ testWithQueryFrontend = do
-- instances, this test would fail, and we will be notified.
cannedWithQueryPayload
::
String
cannedWithQueryPayload
=
[
r
|
{"query":"Haskell","node_id":138,"lang":"EN","flowListWith":{"type":"MyListsFirst"},"datafield":"External Arxiv","databases":"Arxiv"}
|]
testParseBpaPhylo
::
Assertion
testParseBpaPhylo
=
do
pth
<-
getDataFileName
"test-data/phylo/bpa_phylo_test.json"
jsonBlob
<-
B
.
readFile
pth
case
eitherDecodeStrict'
@
GraphData
jsonBlob
of
Left
err
->
error
err
Right
_
->
pure
()
testOpenSciencePhylo
::
Assertion
testOpenSciencePhylo
=
do
pth
<-
getDataFileName
"test-data/phylo/open_science.json"
jsonBlob
<-
B
.
readFile
pth
case
eitherDecodeStrict'
@
PhyloData
jsonBlob
of
Left
err
->
error
err
Right
_
->
pure
()
src/Gargantext/Core/Types/Phylo.hs
View file @
79d0ea23
This diff is collapsed.
Click to expand it.
src/Gargantext/Core/Viz/Phylo.hs
View file @
79d0ea23
...
...
@@ -135,7 +135,7 @@ data TimeUnit =
{
_day_period
::
Int
,
_day_step
::
Int
,
_day_matchingFrame
::
Int
}
deriving
(
Show
,
Generic
,
Eq
)
deriving
(
Show
,
Generic
,
Eq
,
NFData
)
instance
ToSchema
TimeUnit
where
declareNamedSchema
=
genericDeclareNamedSchema
(
unPrefixSwagger
""
)
...
...
@@ -227,7 +227,7 @@ defaultConfig =
,
phyloScale
=
2
,
similarity
=
WeightedLogJaccard
0.5
1
,
seaElevation
=
Constante
0.1
0.1
,
defaultMode
=
Tru
e
,
defaultMode
=
Fals
e
,
findAncestors
=
False
,
phyloSynchrony
=
ByProximityThreshold
0.5
0
AllBranches
MergeAllGroups
,
phyloQuality
=
Quality
0.5
1
...
...
@@ -355,6 +355,7 @@ data Document = Document
,
text
::
[
Ngrams
]
,
weight
::
Maybe
Double
,
sources
::
[
Text
]
,
docTime
::
TimeUnit
}
deriving
(
Eq
,
Show
,
Generic
,
NFData
)
...
...
@@ -372,6 +373,7 @@ data PhyloFoundations = PhyloFoundations
data
PhyloCounts
=
PhyloCounts
{
coocByDate
::
!
(
Map
Date
Cooc
)
,
docsByDate
::
!
(
Map
Date
Double
)
,
rootsCountByDate
::
!
(
Map
Date
(
Map
Int
Double
))
,
rootsCount
::
!
(
Map
Int
Double
)
,
rootsFreq
::
!
(
Map
Int
Double
)
,
lastRootsFreq
::
!
(
Map
Int
Double
)
...
...
@@ -487,8 +489,10 @@ data PhyloGroup =
,
_phylo_groupSources
::
[
Int
]
,
_phylo_groupNgrams
::
[
Int
]
,
_phylo_groupCooc
::
!
(
Cooc
)
,
_phylo_groupDensity
::
Double
,
_phylo_groupBranchId
::
PhyloBranchId
,
_phylo_groupMeta
::
Map
Text
[
Double
]
,
_phylo_groupRootsCount
::
Map
Int
Double
,
_phylo_groupScaleParents
::
[
Pointer
]
,
_phylo_groupScaleChilds
::
[
Pointer
]
,
_phylo_groupPeriodParents
::
[
Pointer
]
...
...
src/Gargantext/Core/Viz/Phylo/API/Tools.hs
View file @
79d0ea23
...
...
@@ -91,7 +91,9 @@ flowPhyloAPI config cId = do
corpus
<-
corpusIdtoDocuments
(
timeUnit
config
)
cId
let
phyloWithCliques
=
toPhyloWithoutLink
corpus
config
-- writePhylo phyloWithCliquesFile phyloWithCliques
pure
$
toPhylo
(
setConfig
config
phyloWithCliques
)
printDebug
"PhyloConfig old: "
config
pure
$
toPhylo
$
setConfig
config
phyloWithCliques
--------------------------------------------------------------------
corpusIdtoDocuments
::
TimeUnit
->
CorpusId
->
GargNoServer
[
Document
]
...
...
@@ -120,7 +122,7 @@ toPhyloDocs patterns time d =
(
fromIntegral
$
fromMaybe
1
$
_hd_publication_year
d
)
(
fromMaybe
1
$
_hd_publication_month
d
)
(
fromMaybe
1
$
_hd_publication_day
d
)
time
)
(
termsInText'
patterns
$
title
<>
" "
<>
abstr
)
Nothing
[]
(
termsInText'
patterns
$
title
<>
" "
<>
abstr
)
Nothing
[]
time
...
...
@@ -138,7 +140,7 @@ context2phyloDocument timeUnit context (ngs_terms, ngs_sources) = do
text'
=
maybe
[]
toText
$
Map
.
lookup
contextId
ngs_terms
sources'
=
maybe
[]
toText
$
Map
.
lookup
contextId
ngs_sources
pure
$
Document
date
date'
text'
Nothing
sources'
pure
$
Document
date
date'
text'
Nothing
sources'
(
Year
3
1
5
)
-- TODO better default date and log the errors to improve data quality
...
...
src/Gargantext/Core/Viz/Phylo/Example.hs
View file @
79d0ea23
...
...
@@ -111,6 +111,7 @@ docs = map (\(d,t)
(
filter
(
\
n
->
isRoots
n
(
foundations
^.
foundations_roots
))
$
monoTexts
t
)
Nothing
[]
(
Year
3
1
5
)
)
corpus
...
...
src/Gargantext/Core/Viz/Phylo/PhyloExport.hs
View file @
79d0ea23
...
...
@@ -143,29 +143,27 @@ periodToDotNode prd prd' =
groupToDotNode
::
Vector
Ngrams
->
PhyloGroup
->
Int
->
Dot
DotId
groupToDotNode
fdt
g
bId
=
node
(
groupIdToDotId
$
getGroupId
g
)
(
[
FontName
"Arial"
,
Shape
Square
,
penWidth
4
,
toLabel
(
groupToTable
fdt
g
)
]
<>
[
toAttr
"nodeType"
"group"
,
toAttr
"gid"
(
groupIdToDotId
$
getGroupId
g
)
,
toAttr
"from"
(
pack
$
show
(
fst
$
g
^.
phylo_groupPeriod
))
,
toAttr
"to"
(
pack
$
show
(
snd
$
g
^.
phylo_groupPeriod
))
,
toAttr
"strFrom"
(
pack
$
show
(
fst
$
g
^.
phylo_groupPeriod'
))
,
toAttr
"strTo"
(
pack
$
show
(
snd
$
g
^.
phylo_groupPeriod'
))
,
toAttr
"branchId"
(
pack
$
unwords
(
init
$
map
show
$
snd
$
g
^.
phylo_groupBranchId
))
,
toAttr
"bId"
(
pack
$
show
bId
)
,
toAttr
"support"
(
pack
$
show
(
g
^.
phylo_groupSupport
))
,
toAttr
"weight"
(
pack
$
show
(
g
^.
phylo_groupWeight
))
,
toAttr
"source"
(
pack
$
show
(
nub
$
g
^.
phylo_groupSources
))
,
toAttr
"sourceFull"
(
pack
$
show
(
g
^.
phylo_groupSources
))
,
toAttr
"lbl"
(
pack
$
show
(
ngramsToLabel
fdt
(
g
^.
phylo_groupNgrams
)))
,
toAttr
"foundation"
(
pack
$
show
(
idxToLabel
(
g
^.
phylo_groupNgrams
)))
,
toAttr
"role"
(
pack
$
show
(
idxToLabel'
((
g
^.
phylo_groupMeta
)
!
"dynamics"
)))
,
toAttr
"frequence"
(
pack
$
show
(
idxToLabel'
((
g
^.
phylo_groupMeta
)
!
"frequence"
)))
,
toAttr
"seaLvl"
(
pack
$
show
((
g
^.
phylo_groupMeta
)
!
"seaLevels"
))
])
([
FontName
"Arial"
,
Shape
Square
,
penWidth
4
,
toLabel
(
groupToTable
fdt
g
)]
<>
[
toAttr
"nodeType"
"group"
,
toAttr
"gid"
(
groupIdToDotId
$
getGroupId
g
)
,
toAttr
"from"
(
pack
$
show
(
fst
$
g
^.
phylo_groupPeriod
))
,
toAttr
"to"
(
pack
$
show
(
snd
$
g
^.
phylo_groupPeriod
))
,
toAttr
"strFrom"
(
pack
$
show
(
fst
$
g
^.
phylo_groupPeriod'
))
,
toAttr
"strTo"
(
pack
$
show
(
snd
$
g
^.
phylo_groupPeriod'
))
,
toAttr
"branchId"
(
pack
$
unwords
(
init
$
map
show
$
snd
$
g
^.
phylo_groupBranchId
))
,
toAttr
"bId"
(
pack
$
show
bId
)
,
toAttr
"support"
(
pack
$
show
(
g
^.
phylo_groupSupport
))
,
toAttr
"weight"
(
pack
$
show
(
g
^.
phylo_groupWeight
))
,
toAttr
"source"
(
pack
$
show
(
nub
$
g
^.
phylo_groupSources
))
,
toAttr
"sourceFull"
(
pack
$
show
(
g
^.
phylo_groupSources
))
,
toAttr
"density"
(
pack
$
show
(
g
^.
phylo_groupDensity
))
,
toAttr
"cooc"
(
pack
$
show
(
g
^.
phylo_groupCooc
))
,
toAttr
"lbl"
(
pack
$
show
(
ngramsToLabel
fdt
(
g
^.
phylo_groupNgrams
)))
,
toAttr
"foundation"
(
pack
$
show
(
idxToLabel
(
g
^.
phylo_groupNgrams
)))
,
toAttr
"role"
(
pack
$
show
(
idxToLabel'
((
g
^.
phylo_groupMeta
)
!
"dynamics"
)))
,
toAttr
"frequence"
(
pack
$
show
(
idxToLabel'
((
g
^.
phylo_groupMeta
)
!
"frequence"
)))
,
toAttr
"seaLvl"
(
pack
$
show
((
g
^.
phylo_groupMeta
)
!
"seaLevels"
))
])
toDotEdge'
::
DotId
->
DotId
->
[
Char
]
->
[
Char
]
->
EdgeType
->
Dot
DotId
toDotEdge'
source
target
thr
w
edgeType
=
edge
source
target
...
...
@@ -447,8 +445,10 @@ branchDating export =
else
acc
)
[]
$
export
^.
export_groups
periods
=
nub
groups
birth
=
fst
$
head'
"birth"
groups
age
=
(
snd
$
last'
"age"
groups
)
-
birth
death
=
snd
$
last'
"death"
groups
age
=
death
-
birth
in
b
&
branch_meta
%~
insert
"birth"
[
fromIntegral
birth
]
&
branch_meta
%~
insert
"death"
[
fromIntegral
death
]
&
branch_meta
%~
insert
"age"
[
fromIntegral
age
]
&
branch_meta
%~
insert
"size"
[
fromIntegral
$
length
periods
]
)
export
...
...
src/Gargantext/Core/Viz/Phylo/PhyloMaker.hs
View file @
79d0ea23
...
...
@@ -34,6 +34,7 @@ import Gargantext.Core.Viz.Phylo.TemporalMatching (toPhyloQuality, temporalMatch
import
Gargantext.Prelude
import
qualified
Data.Set
as
Set
import
qualified
Data.Map
as
Map
import
qualified
Data.List
as
List
import
qualified
Data.Vector
as
Vector
...
...
@@ -191,7 +192,7 @@ findSeaLadder phylo = case getSeaElevation phylo of
in
acc
++
(
concat
pairs'
)
)
[]
$
keys
$
phylo
^.
phylo_periods
appendGroups
::
(
a
->
Period
->
(
Text
,
Text
)
->
Scale
->
Int
->
[
Cooc
]
->
PhyloGroup
)
->
Scale
->
Map
(
Date
,
Date
)
[
a
]
->
Phylo
->
Phylo
appendGroups
::
(
a
->
Period
->
(
Text
,
Text
)
->
Scale
->
Int
->
[
Cooc
]
->
Map
Int
Double
->
PhyloGroup
)
->
Scale
->
Map
(
Date
,
Date
)
[
a
]
->
Phylo
->
Phylo
appendGroups
f
lvl
m
phylo
=
trace
(
"
\n
"
<>
"-- | Append "
<>
show
(
length
$
concat
$
elems
m
)
<>
" groups to Level "
<>
show
(
lvl
)
<>
"
\n
"
)
$
over
(
phylo_periods
.
traverse
...
...
@@ -206,23 +207,28 @@ appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ co
&
phylo_scaleGroups
.~
(
fromList
$
foldl
(
\
groups
obj
->
groups
++
[
(((
pId
,
lvl
),
length
groups
)
,
f
obj
pId
pId'
lvl
(
length
groups
)
(
elems
$
restrictKeys
(
getCoocByDate
phylo
)
$
periodsToYears
[
pId
]))
-- select the cooc of the periods
(
elems
$
restrictKeys
(
getCoocByDate
phylo
)
$
periodsToYears
[
pId
])
-- select and merge the roots count of the periods
(
foldl
(
\
acc
count
->
unionWith
(
+
)
acc
count
)
empty
$
elems
$
restrictKeys
(
getRootsCountByDate
phylo
)
$
periodsToYears
[
pId
]))
]
)
[]
phyloCUnit
)
else
phyloLvl
)
phylo
clusterToGroup
::
Clustering
->
Period
->
(
Text
,
Text
)
->
Scale
->
Int
->
[
Cooc
]
->
PhyloGroup
clusterToGroup
fis
pId
pId'
lvl
idx
coocs
=
PhyloGroup
pId
pId'
lvl
idx
""
clusterToGroup
::
Clustering
->
Period
->
(
Text
,
Text
)
->
Scale
->
Int
->
[
Cooc
]
->
Map
Int
Double
->
PhyloGroup
clusterToGroup
fis
pId
pId'
lvl
idx
coocs
rootsCount
=
PhyloGroup
pId
pId'
lvl
idx
""
(
fis
^.
clustering_support
)
(
fis
^.
clustering_visWeighting
)
(
fis
^.
clustering_visFiltering
)
(
fis
^.
clustering_roots
)
(
ngramsToCooc
(
fis
^.
clustering_roots
)
coocs
)
(
ngramsToDensity
(
fis
^.
clustering_roots
)
coocs
rootsCount
)
(
1
,[
0
])
-- branchid (lvl,[path in the branching tree])
(
fromList
[(
"breaks"
,[
0
]),(
"seaLevels"
,[
0
])])
[]
[]
[]
[]
[]
[]
[]
rootsCount
[]
[]
[]
[]
[]
[]
[]
-----------------------
...
...
@@ -446,6 +452,16 @@ docsToTermCount docs roots = fromList
docsToTimeTermCount
::
[
Document
]
->
Vector
Ngrams
->
(
Map
Date
(
Map
Int
Double
))
docsToTimeTermCount
docs
roots
=
let
docs'
=
Map
.
map
(
\
l
->
fromList
$
map
(
\
lst
->
(
head'
"docsToTimeTermCount"
lst
,
fromIntegral
$
length
lst
))
$
group
$
sort
l
)
$
fromListWith
(
++
)
$
map
(
\
d
->
(
date
d
,
nub
$
ngramsToIdx
(
text
d
)
roots
))
docs
time
=
fromList
$
map
(
\
t
->
(
t
,
Map
.
empty
))
$
toTimeScale
(
keys
docs'
)
1
in
unionWith
(
Map
.
union
)
time
docs'
docsToLastTermFreq
::
Int
->
[
Document
]
->
Vector
Ngrams
->
Map
Int
Double
docsToLastTermFreq
n
docs
fdt
=
let
last
=
take
n
$
reverse
$
sort
$
map
date
docs
...
...
@@ -472,15 +488,15 @@ initPhyloScales lvlMax pId =
fromList
$
map
(
\
lvl
->
((
pId
,
lvl
),
PhyloScale
pId
(
""
,
""
)
lvl
empty
))
[
1
..
lvlMax
]
setDefault
::
PhyloConfig
->
PhyloConfig
setDefault
conf
=
conf
{
setDefault
::
PhyloConfig
->
TimeUnit
->
PhyloConfig
setDefault
conf
timeScale
=
conf
{
phyloScale
=
2
,
similarity
=
WeightedLogJaccard
0.5
2
,
findAncestors
=
True
,
phyloSynchrony
=
ByProximityThreshold
0.6
0
SiblingBranches
MergeAllGroups
,
phyloQuality
=
Quality
0.5
3
,
timeUnit
=
Year
3
1
3
,
clique
=
MaxClique
5
30
ByNeighbours
,
timeUnit
=
timeScale
,
clique
=
Fis
3
5
,
exportLabel
=
[
BranchLabel
MostEmergentTfIdf
2
,
GroupLabel
MostEmergentInclusive
2
],
exportSort
=
ByHierarchy
Desc
,
exportFilter
=
[
ByBranchSize
3
]
...
...
@@ -492,18 +508,21 @@ setDefault conf = conf {
initPhylo
::
[
Document
]
->
PhyloConfig
->
Phylo
initPhylo
docs
conf
=
let
roots
=
Vector
.
fromList
$
nub
$
concat
$
map
text
docs
timeScale
=
head'
"initPhylo"
$
map
docTime
docs
foundations
=
PhyloFoundations
roots
empty
docsSources
=
PhyloSources
(
Vector
.
fromList
$
nub
$
concat
$
map
sources
docs
)
docsCounts
=
PhyloCounts
(
docsToTimeScaleCooc
docs
(
foundations
^.
foundations_roots
))
(
docsToTimeScaleNb
docs
)
(
docsToTimeTermCount
docs
(
foundations
^.
foundations_roots
))
(
docsToTermCount
docs
(
foundations
^.
foundations_roots
))
(
docsToTermFreq
docs
(
foundations
^.
foundations_roots
))
(
docsToLastTermFreq
(
getTimePeriod
$
timeUnit
conf
)
docs
(
foundations
^.
foundations_roots
))
(
docsToLastTermFreq
(
getTimePeriod
timeScale
)
docs
(
foundations
^.
foundations_roots
))
params
=
if
(
defaultMode
conf
)
then
defaultPhyloParam
{
_phyloParam_config
=
setDefault
conf
}
then
defaultPhyloParam
{
_phyloParam_config
=
setDefault
conf
timeScale
}
else
defaultPhyloParam
{
_phyloParam_config
=
conf
}
periods
=
toPeriods
(
sort
$
nub
$
map
date
docs
)
(
getTimePeriod
$
timeUnit
conf
)
(
getTimeStep
$
timeUnit
conf
)
periods
=
toPeriods
(
sort
$
nub
$
map
date
docs
)
(
getTimePeriod
timeScale
)
(
getTimeStep
timeScale
)
in
trace
(
"
\n
"
<>
"-- | Init a phylo out of "
<>
show
(
length
docs
)
<>
" docs
\n
"
)
$
trace
(
"
\n
"
<>
"-- | lambda "
<>
show
(
_qua_granularity
$
phyloQuality
$
_phyloParam_config
params
))
$
Phylo
foundations
docsSources
docsCounts
...
...
@@ -511,4 +530,4 @@ initPhylo docs conf =
params
(
fromList
$
map
(
\
prd
->
(
prd
,
PhyloPeriod
prd
(
""
,
""
)
(
initPhyloScales
1
prd
)))
periods
)
0
(
_qua_granularity
$
phyloQuality
$
conf
)
(
_qua_granularity
$
phyloQuality
$
_phyloParam_config
params
)
src/Gargantext/Core/Viz/Phylo/PhyloTools.hs
View file @
79d0ea23
...
...
@@ -14,7 +14,7 @@ module Gargantext.Core.Viz.Phylo.PhyloTools where
import
Control.Lens
hiding
(
Level
)
import
Data.List
(
sort
,
concat
,
null
,
union
,
(
++
),
tails
,
sortOn
,
nub
,
init
,
tail
,
iterate
,
transpose
,
partition
,
tails
,
nubBy
,
group
,
notElem
,
(
!!
))
import
Data.Map
(
Map
,
elems
,
fromList
,
unionWith
,
keys
,
member
,
(
!
),
filterWithKey
,
fromListWith
,
empty
,
restrictKeys
)
import
Data.Map
(
Map
,
elems
,
fromList
,
findWithDefault
,
unionWith
,
keys
,
member
,
(
!
),
filterWithKey
,
fromListWith
,
empty
,
restrictKeys
)
import
Data.Set
(
Set
,
disjoint
)
import
Data.String
(
String
)
import
Data.Text
(
Text
,
unpack
)
...
...
@@ -313,6 +313,27 @@ ngramsToCooc ngrams coocs =
in
filterWithKey
(
\
k
_
->
elem
k
pairs
)
cooc
-----------------
-- | Density | --
-----------------
-- | To build the density of a phylogroup
-- density is defined in Callon M, Courtial JP, Laville F (1991) Co-word analysis as a tool for describing
-- the network of interaction between basic and technological research: The case of polymer chemistry.
-- Scientometric 22: 155–205.
ngramsToDensity
::
[
Int
]
->
[
Cooc
]
->
(
Map
Int
Double
)
->
Double
ngramsToDensity
ngrams
coocs
rootsCount
=
let
cooc
=
foldl
(
\
acc
cooc'
->
sumCooc
acc
cooc'
)
empty
coocs
pairs
=
listToCombi'
ngrams
density
=
map
(
\
(
i
,
j
)
->
let
nij
=
findWithDefault
0
(
i
,
j
)
cooc
in
(
nij
*
nij
)
/
((
rootsCount
!
i
)
*
(
rootsCount
!
j
)))
pairs
in
(
sum
density
)
/
(
fromIntegral
$
length
ngrams
)
------------------
-- | Defaults | --
------------------
...
...
@@ -458,6 +479,9 @@ getPeriodIds phylo = sortOn fst
$
keys
$
phylo
^.
phylo_periods
getLastDate
::
Phylo
->
Date
getLastDate
phylo
=
snd
$
last'
"lastDate"
$
getPeriodIds
phylo
getLevelParentId
::
PhyloGroup
->
PhyloGroupId
getLevelParentId
g
=
fst
$
head'
"getLevelParentId"
$
g
^.
phylo_groupScaleParents
...
...
@@ -495,7 +519,7 @@ getConfig :: Phylo -> PhyloConfig
getConfig
phylo
=
(
phylo
^.
phylo_param
)
^.
phyloParam_config
getLevel
::
Phylo
->
Double
getLevel
phylo
=
_phylo_level
phylo
getLevel
phylo
=
(
phyloQuality
(
getConfig
phylo
))
^.
qua_granularity
getLadder
::
Phylo
->
[
Double
]
getLadder
phylo
=
phylo
^.
phylo_seaLadder
...
...
@@ -503,6 +527,9 @@ getLadder phylo = phylo ^. phylo_seaLadder
getCoocByDate
::
Phylo
->
Map
Date
Cooc
getCoocByDate
phylo
=
coocByDate
(
phylo
^.
phylo_counts
)
getRootsCountByDate
::
Phylo
->
Map
Date
(
Map
Int
Double
)
getRootsCountByDate
phylo
=
rootsCountByDate
(
phylo
^.
phylo_counts
)
getDocsByDate
::
Phylo
->
Map
Date
Double
getDocsByDate
phylo
=
docsByDate
(
phylo
^.
phylo_counts
)
...
...
src/Gargantext/Core/Viz/Phylo/SynchronicClustering.hs
View file @
79d0ea23
...
...
@@ -16,7 +16,7 @@ import Control.Lens hiding (Level)
import
Control.Monad
(
sequence
)
import
Control.Parallel.Strategies
(
parList
,
rdeepseq
,
using
)
import
Data.List
((
++
),
null
,
intersect
,
nub
,
concat
,
sort
,
sortOn
,
groupBy
)
import
Data.Map
(
Map
,
fromList
,
fromListWith
,
foldlWithKey
,
(
!
),
insert
,
empty
,
restrictKeys
,
elems
,
mapWithKey
,
member
)
import
Data.Map
(
Map
,
fromList
,
fromListWith
,
foldlWithKey
,
(
!
),
insert
,
empty
,
restrictKeys
,
elems
,
mapWithKey
,
member
,
unionWith
)
import
Gargantext.Core.Viz.Phylo
import
Gargantext.Core.Viz.Phylo.PhyloExport
(
processDynamics
)
import
Gargantext.Core.Viz.Phylo.PhyloTools
...
...
@@ -32,6 +32,7 @@ import qualified Data.Map as Map
mergeGroups
::
[
Cooc
]
->
PhyloGroupId
->
Map
PhyloGroupId
PhyloGroupId
->
[
PhyloGroup
]
->
PhyloGroup
mergeGroups
coocs
id
mapIds
childs
=
let
ngrams
=
(
sort
.
nub
.
concat
)
$
map
_phylo_groupNgrams
childs
counts
=
foldl
(
\
acc
count
->
unionWith
(
+
)
acc
count
)
empty
$
map
_phylo_groupRootsCount
childs
in
PhyloGroup
(
fst
$
fst
id
)
(
_phylo_groupPeriod'
$
head'
"mergeGroups"
childs
)
(
snd
$
fst
id
)
(
snd
id
)
""
(
sum
$
map
_phylo_groupSupport
childs
)
...
...
@@ -40,8 +41,12 @@ mergeGroups coocs id mapIds childs =
(
concat
$
map
_phylo_groupSources
childs
)
ngrams
(
ngramsToCooc
ngrams
coocs
)
(
ngramsToDensity
ngrams
coocs
counts
)
-- todo add density here
((
snd
$
fst
id
),
bId
)
(
mergeMeta
bId
childs
)
[]
(
map
(
\
g
->
(
getGroupId
g
,
1
))
childs
)
(
mergeMeta
bId
childs
)
counts
[]
(
map
(
\
g
->
(
getGroupId
g
,
1
))
childs
)
(
updatePointers
$
concat
$
map
_phylo_groupPeriodParents
childs
)
(
updatePointers
$
concat
$
map
_phylo_groupPeriodChilds
childs
)
(
mergeAncestors
$
concat
$
map
_phylo_groupAncestors
childs
)
...
...
test-data/phylo/bpa_phylo_test.json
0 → 100644
View file @
79d0ea23
This diff is collapsed.
Click to expand it.
test-data/phylo/open_science.json
0 → 100644
View file @
79d0ea23
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment