Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
199
Issues
199
List
Board
Labels
Milestones
Merge Requests
12
Merge Requests
12
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
67c70cdb
Commit
67c70cdb
authored
Jun 12, 2023
by
qlobbe
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
adaptative time scale with phylo 1click
parent
03327f23
Pipeline
#4189
failed with stage
in 83 minutes and 20 seconds
Changes
5
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
41 additions
and
16 deletions
+41
-16
Main.hs
bin/gargantext-phylo/Main.hs
+28
-6
Phylo.hs
src/Gargantext/Core/Viz/Phylo.hs
+2
-1
Tools.hs
src/Gargantext/Core/Viz/Phylo/API/Tools.hs
+1
-1
Example.hs
src/Gargantext/Core/Viz/Phylo/Example.hs
+1
-0
PhyloMaker.hs
src/Gargantext/Core/Viz/Phylo/PhyloMaker.hs
+9
-8
No files found.
bin/gargantext-phylo/Main.hs
View file @
67c70cdb
...
...
@@ -21,7 +21,7 @@ import Control.Concurrent.Async (mapConcurrently)
import
Crypto.Hash.SHA256
(
hash
)
import
Data.Aeson
import
Data.Either
(
Either
(
..
),
fromRight
)
import
Data.List
(
concat
,
nub
,
isSuffixOf
)
import
Data.List
(
concat
,
nub
,
isSuffixOf
,
sort
,
tail
)
import
Data.List.Split
import
Data.Maybe
(
fromMaybe
)
import
Data.String
(
String
)
...
...
@@ -39,7 +39,7 @@ import Gargantext.Core.Viz.Phylo
import
Gargantext.Core.Viz.Phylo.API.Tools
import
Gargantext.Core.Viz.Phylo.PhyloExport
(
toPhyloExport
,
dotToFile
)
import
Gargantext.Core.Viz.Phylo.PhyloMaker
(
toPhylo
,
toPhyloWithoutLink
)
import
Gargantext.Core.Viz.Phylo.PhyloTools
(
printIOMsg
,
printIOComment
,
setConfig
)
import
Gargantext.Core.Viz.Phylo.PhyloTools
(
printIOMsg
,
printIOComment
,
setConfig
,
toPeriods
,
getTimePeriod
,
getTimeStep
)
import
Gargantext.Database.Admin.Types.Hyperdata
(
HyperdataDocument
(
..
))
import
Gargantext.Database.Schema.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Prelude
...
...
@@ -89,7 +89,7 @@ wosToDocs limit patterns time path = do
(
fromIntegral
$
fromJust
$
_hd_publication_year
d
)
(
fromJust
$
_hd_publication_month
d
)
(
fromJust
$
_hd_publication_day
d
)
time
)
(
termsInText
patterns
$
title
<>
" "
<>
abstr
)
Nothing
[]
)
(
termsInText
patterns
$
title
<>
" "
<>
abstr
)
Nothing
[]
time
)
<$>
concat
<$>
mapConcurrently
(
\
file
->
filter
(
\
d
->
(
isJust
$
_hd_publication_year
d
)
...
...
@@ -109,6 +109,7 @@ csvToDocs parser patterns time path =
(
termsInText
patterns
$
(
csv_title
row
)
<>
" "
<>
(
csv_abstract
row
))
Nothing
[]
time
)
<$>
snd
<$>
either
(
\
err
->
panic
$
cs
$
"CSV error"
<>
(
show
err
))
identity
<$>
Csv
.
readCSVFile
path
Csv'
limit
->
Vector
.
toList
<$>
Vector
.
take
limit
...
...
@@ -117,18 +118,35 @@ csvToDocs parser patterns time path =
(
termsInText
patterns
$
(
csv'_title
row
)
<>
" "
<>
(
csv'_abstract
row
))
(
Just
$
csv'_weight
row
)
(
map
(
T
.
strip
.
pack
)
$
splitOn
";"
(
unpack
$
(
csv'_source
row
)))
time
)
<$>
snd
<$>
Csv
.
readWeightedCsv
path
-- To parse a file into a list of Document
fileToDocs
'
::
CorpusParser
->
FilePath
->
TimeUnit
->
TermList
->
IO
[
Document
]
fileToDocs
'
parser
path
time
lst
=
do
fileToDocs
Advanced
::
CorpusParser
->
FilePath
->
TimeUnit
->
TermList
->
IO
[
Document
]
fileToDocs
Advanced
parser
path
time
lst
=
do
let
patterns
=
buildPatterns
lst
case
parser
of
Wos
limit
->
wosToDocs
limit
patterns
time
path
Csv
_
->
csvToDocs
parser
patterns
time
path
Csv'
_
->
csvToDocs
parser
patterns
time
path
fileToDocsDefault
::
CorpusParser
->
FilePath
->
[
TimeUnit
]
->
TermList
->
IO
[
Document
]
fileToDocsDefault
parser
path
timeUnits
lst
=
if
length
timeUnits
>
0
then
do
let
timeUnit
=
(
head'
"fileToDocsDefault"
timeUnits
)
docs
<-
fileToDocsAdvanced
parser
path
timeUnit
lst
let
periods
=
toPeriods
(
sort
$
nub
$
map
date
docs
)
(
getTimePeriod
timeUnit
)
(
getTimeStep
timeUnit
)
if
(
length
periods
<
3
)
then
fileToDocsDefault
parser
path
(
tail
timeUnits
)
lst
else
pure
docs
else
panic
"this corpus is incompatible with the phylomemy reconstruction"
-- on passe à passer la time unit dans la conf envoyé au phyloMaker
-- dans le phyloMaker si default est true alors dans le setDefault ou pense à utiliser la TimeUnit de la conf
---------------
-- | Label | --
...
...
@@ -251,7 +269,11 @@ main = do
printIOMsg
"Parse the corpus"
mapList
<-
fileToList
(
listParser
config
)
(
listPath
config
)
corpus
<-
fileToDocs'
(
corpusParser
config
)
(
corpusPath
config
)
(
timeUnit
config
)
mapList
corpus
<-
if
(
defaultMode
config
)
then
fileToDocsDefault
(
corpusParser
config
)
(
corpusPath
config
)
[
Year
3
1
5
,
Month
3
1
5
,
Week
4
2
5
]
mapList
else
fileToDocsAdvanced
(
corpusParser
config
)
(
corpusPath
config
)
(
timeUnit
config
)
mapList
printIOComment
(
show
(
length
corpus
)
<>
" parsed docs from the corpus"
)
printIOComment
(
show
(
length
$
nub
$
concat
$
map
text
corpus
)
<>
" Size ngs_coterms"
)
...
...
src/Gargantext/Core/Viz/Phylo.hs
View file @
67c70cdb
...
...
@@ -135,7 +135,7 @@ data TimeUnit =
{
_day_period
::
Int
,
_day_step
::
Int
,
_day_matchingFrame
::
Int
}
deriving
(
Show
,
Generic
,
Eq
)
deriving
(
Show
,
Generic
,
Eq
,
NFData
)
instance
ToSchema
TimeUnit
where
declareNamedSchema
=
genericDeclareNamedSchema
(
unPrefixSwagger
""
)
...
...
@@ -354,6 +354,7 @@ data Document = Document
,
text
::
[
Ngrams
]
,
weight
::
Maybe
Double
,
sources
::
[
Text
]
,
docTime
::
TimeUnit
}
deriving
(
Eq
,
Show
,
Generic
,
NFData
)
...
...
src/Gargantext/Core/Viz/Phylo/API/Tools.hs
View file @
67c70cdb
...
...
@@ -128,7 +128,7 @@ context2phyloDocument timeUnit context (ngs_terms, ngs_sources) = do
text'
=
maybe
[]
toText
$
Map
.
lookup
contextId
ngs_terms
sources'
=
maybe
[]
toText
$
Map
.
lookup
contextId
ngs_sources
pure
$
Document
date
date'
text'
Nothing
sources'
pure
$
Document
date
date'
text'
Nothing
sources'
(
Year
3
1
5
)
context2date
::
Context
HyperdataDocument
->
TimeUnit
->
Maybe
(
Date
,
Text
)
...
...
src/Gargantext/Core/Viz/Phylo/Example.hs
View file @
67c70cdb
...
...
@@ -111,6 +111,7 @@ docs = map (\(d,t)
(
filter
(
\
n
->
isRoots
n
(
foundations
^.
foundations_roots
))
$
monoTexts
t
)
Nothing
[]
(
Year
3
1
5
)
)
corpus
...
...
src/Gargantext/Core/Viz/Phylo/PhyloMaker.hs
View file @
67c70cdb
...
...
@@ -489,15 +489,15 @@ initPhyloScales lvlMax pId =
fromList
$
map
(
\
lvl
->
((
pId
,
lvl
),
PhyloScale
pId
(
""
,
""
)
lvl
empty
))
[
1
..
lvlMax
]
setDefault
::
PhyloConfig
->
PhyloConfig
setDefault
conf
=
conf
{
setDefault
::
PhyloConfig
->
TimeUnit
->
PhyloConfig
setDefault
conf
timeScale
=
conf
{
phyloScale
=
2
,
similarity
=
WeightedLogJaccard
0.5
2
,
findAncestors
=
True
,
phyloSynchrony
=
ByProximityThreshold
0.6
0
SiblingBranches
MergeAllGroups
,
phyloQuality
=
Quality
0.5
3
,
timeUnit
=
Year
3
1
3
,
clique
=
MaxClique
5
30
ByNeighbours
,
timeUnit
=
timeScale
,
clique
=
Fis
3
5
,
exportLabel
=
[
BranchLabel
MostEmergentTfIdf
2
,
GroupLabel
MostEmergentInclusive
2
],
exportSort
=
ByHierarchy
Desc
,
exportFilter
=
[
ByBranchSize
3
]
...
...
@@ -509,6 +509,7 @@ setDefault conf = conf {
initPhylo
::
[
Document
]
->
PhyloConfig
->
Phylo
initPhylo
docs
conf
=
let
roots
=
Vector
.
fromList
$
nub
$
concat
$
map
text
docs
timeScale
=
head'
"initPhylo"
$
map
docTime
docs
foundations
=
PhyloFoundations
roots
empty
docsSources
=
PhyloSources
(
Vector
.
fromList
$
nub
$
concat
$
map
sources
docs
)
docsCounts
=
PhyloCounts
(
docsToTimeScaleCooc
docs
(
foundations
^.
foundations_roots
))
...
...
@@ -516,11 +517,11 @@ initPhylo docs conf =
(
docsToTimeTermCount
docs
(
foundations
^.
foundations_roots
))
(
docsToTermCount
docs
(
foundations
^.
foundations_roots
))
(
docsToTermFreq
docs
(
foundations
^.
foundations_roots
))
(
docsToLastTermFreq
(
getTimePeriod
$
timeUnit
conf
)
docs
(
foundations
^.
foundations_roots
))
(
docsToLastTermFreq
(
getTimePeriod
timeScale
)
docs
(
foundations
^.
foundations_roots
))
params
=
if
(
defaultMode
conf
)
then
defaultPhyloParam
{
_phyloParam_config
=
setDefault
conf
}
then
defaultPhyloParam
{
_phyloParam_config
=
setDefault
conf
timeScale
}
else
defaultPhyloParam
{
_phyloParam_config
=
conf
}
periods
=
toPeriods
(
sort
$
nub
$
map
date
docs
)
(
getTimePeriod
$
timeUnit
conf
)
(
getTimeStep
$
timeUnit
conf
)
periods
=
toPeriods
(
sort
$
nub
$
map
date
docs
)
(
getTimePeriod
timeScale
)
(
getTimeStep
timeScale
)
in
trace
(
"
\n
"
<>
"-- | Init a phylo out of "
<>
show
(
length
docs
)
<>
" docs
\n
"
)
$
Phylo
foundations
docsSources
...
...
@@ -529,4 +530,4 @@ initPhylo docs conf =
params
(
fromList
$
map
(
\
prd
->
(
prd
,
PhyloPeriod
prd
(
""
,
""
)
(
initPhyloScales
1
prd
)))
periods
)
0
(
_qua_granularity
$
phyloQuality
$
conf
)
(
_qua_granularity
$
phyloQuality
$
_phyloParam_config
params
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment