Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Grégoire Locqueville
haskell-gargantext
Commits
67c70cdb
Commit
67c70cdb
authored
Jun 12, 2023
by
qlobbe
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
adaptative time scale with phylo 1click
parent
03327f23
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
41 additions
and
16 deletions
+41
-16
Main.hs
bin/gargantext-phylo/Main.hs
+28
-6
Phylo.hs
src/Gargantext/Core/Viz/Phylo.hs
+2
-1
Tools.hs
src/Gargantext/Core/Viz/Phylo/API/Tools.hs
+1
-1
Example.hs
src/Gargantext/Core/Viz/Phylo/Example.hs
+1
-0
PhyloMaker.hs
src/Gargantext/Core/Viz/Phylo/PhyloMaker.hs
+9
-8
No files found.
bin/gargantext-phylo/Main.hs
View file @
67c70cdb
...
...
@@ -21,7 +21,7 @@ import Control.Concurrent.Async (mapConcurrently)
import
Crypto.Hash.SHA256
(
hash
)
import
Data.Aeson
import
Data.Either
(
Either
(
..
),
fromRight
)
import
Data.List
(
concat
,
nub
,
isSuffixOf
)
import
Data.List
(
concat
,
nub
,
isSuffixOf
,
sort
,
tail
)
import
Data.List.Split
import
Data.Maybe
(
fromMaybe
)
import
Data.String
(
String
)
...
...
@@ -39,7 +39,7 @@ import Gargantext.Core.Viz.Phylo
import
Gargantext.Core.Viz.Phylo.API.Tools
import
Gargantext.Core.Viz.Phylo.PhyloExport
(
toPhyloExport
,
dotToFile
)
import
Gargantext.Core.Viz.Phylo.PhyloMaker
(
toPhylo
,
toPhyloWithoutLink
)
import
Gargantext.Core.Viz.Phylo.PhyloTools
(
printIOMsg
,
printIOComment
,
setConfig
)
import
Gargantext.Core.Viz.Phylo.PhyloTools
(
printIOMsg
,
printIOComment
,
setConfig
,
toPeriods
,
getTimePeriod
,
getTimeStep
)
import
Gargantext.Database.Admin.Types.Hyperdata
(
HyperdataDocument
(
..
))
import
Gargantext.Database.Schema.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Prelude
...
...
@@ -89,7 +89,7 @@ wosToDocs limit patterns time path = do
(
fromIntegral
$
fromJust
$
_hd_publication_year
d
)
(
fromJust
$
_hd_publication_month
d
)
(
fromJust
$
_hd_publication_day
d
)
time
)
(
termsInText
patterns
$
title
<>
" "
<>
abstr
)
Nothing
[]
)
(
termsInText
patterns
$
title
<>
" "
<>
abstr
)
Nothing
[]
time
)
<$>
concat
<$>
mapConcurrently
(
\
file
->
filter
(
\
d
->
(
isJust
$
_hd_publication_year
d
)
...
...
@@ -109,6 +109,7 @@ csvToDocs parser patterns time path =
(
termsInText
patterns
$
(
csv_title
row
)
<>
" "
<>
(
csv_abstract
row
))
Nothing
[]
time
)
<$>
snd
<$>
either
(
\
err
->
panic
$
cs
$
"CSV error"
<>
(
show
err
))
identity
<$>
Csv
.
readCSVFile
path
Csv'
limit
->
Vector
.
toList
<$>
Vector
.
take
limit
...
...
@@ -117,18 +118,35 @@ csvToDocs parser patterns time path =
(
termsInText
patterns
$
(
csv'_title
row
)
<>
" "
<>
(
csv'_abstract
row
))
(
Just
$
csv'_weight
row
)
(
map
(
T
.
strip
.
pack
)
$
splitOn
";"
(
unpack
$
(
csv'_source
row
)))
time
)
<$>
snd
<$>
Csv
.
readWeightedCsv
path
-- To parse a file into a list of Document
fileToDocs
'
::
CorpusParser
->
FilePath
->
TimeUnit
->
TermList
->
IO
[
Document
]
fileToDocs
'
parser
path
time
lst
=
do
fileToDocs
Advanced
::
CorpusParser
->
FilePath
->
TimeUnit
->
TermList
->
IO
[
Document
]
fileToDocs
Advanced
parser
path
time
lst
=
do
let
patterns
=
buildPatterns
lst
case
parser
of
Wos
limit
->
wosToDocs
limit
patterns
time
path
Csv
_
->
csvToDocs
parser
patterns
time
path
Csv'
_
->
csvToDocs
parser
patterns
time
path
fileToDocsDefault
::
CorpusParser
->
FilePath
->
[
TimeUnit
]
->
TermList
->
IO
[
Document
]
fileToDocsDefault
parser
path
timeUnits
lst
=
if
length
timeUnits
>
0
then
do
let
timeUnit
=
(
head'
"fileToDocsDefault"
timeUnits
)
docs
<-
fileToDocsAdvanced
parser
path
timeUnit
lst
let
periods
=
toPeriods
(
sort
$
nub
$
map
date
docs
)
(
getTimePeriod
timeUnit
)
(
getTimeStep
timeUnit
)
if
(
length
periods
<
3
)
then
fileToDocsDefault
parser
path
(
tail
timeUnits
)
lst
else
pure
docs
else
panic
"this corpus is incompatible with the phylomemy reconstruction"
-- on passe à passer la time unit dans la conf envoyé au phyloMaker
-- dans le phyloMaker si default est true alors dans le setDefault ou pense à utiliser la TimeUnit de la conf
---------------
-- | Label | --
...
...
@@ -251,7 +269,11 @@ main = do
printIOMsg
"Parse the corpus"
mapList
<-
fileToList
(
listParser
config
)
(
listPath
config
)
corpus
<-
fileToDocs'
(
corpusParser
config
)
(
corpusPath
config
)
(
timeUnit
config
)
mapList
corpus
<-
if
(
defaultMode
config
)
then
fileToDocsDefault
(
corpusParser
config
)
(
corpusPath
config
)
[
Year
3
1
5
,
Month
3
1
5
,
Week
4
2
5
]
mapList
else
fileToDocsAdvanced
(
corpusParser
config
)
(
corpusPath
config
)
(
timeUnit
config
)
mapList
printIOComment
(
show
(
length
corpus
)
<>
" parsed docs from the corpus"
)
printIOComment
(
show
(
length
$
nub
$
concat
$
map
text
corpus
)
<>
" Size ngs_coterms"
)
...
...
src/Gargantext/Core/Viz/Phylo.hs
View file @
67c70cdb
...
...
@@ -135,7 +135,7 @@ data TimeUnit =
{
_day_period
::
Int
,
_day_step
::
Int
,
_day_matchingFrame
::
Int
}
deriving
(
Show
,
Generic
,
Eq
)
deriving
(
Show
,
Generic
,
Eq
,
NFData
)
instance
ToSchema
TimeUnit
where
declareNamedSchema
=
genericDeclareNamedSchema
(
unPrefixSwagger
""
)
...
...
@@ -354,6 +354,7 @@ data Document = Document
,
text
::
[
Ngrams
]
,
weight
::
Maybe
Double
,
sources
::
[
Text
]
,
docTime
::
TimeUnit
}
deriving
(
Eq
,
Show
,
Generic
,
NFData
)
...
...
src/Gargantext/Core/Viz/Phylo/API/Tools.hs
View file @
67c70cdb
...
...
@@ -128,7 +128,7 @@ context2phyloDocument timeUnit context (ngs_terms, ngs_sources) = do
text'
=
maybe
[]
toText
$
Map
.
lookup
contextId
ngs_terms
sources'
=
maybe
[]
toText
$
Map
.
lookup
contextId
ngs_sources
pure
$
Document
date
date'
text'
Nothing
sources'
pure
$
Document
date
date'
text'
Nothing
sources'
(
Year
3
1
5
)
context2date
::
Context
HyperdataDocument
->
TimeUnit
->
Maybe
(
Date
,
Text
)
...
...
src/Gargantext/Core/Viz/Phylo/Example.hs
View file @
67c70cdb
...
...
@@ -111,6 +111,7 @@ docs = map (\(d,t)
(
filter
(
\
n
->
isRoots
n
(
foundations
^.
foundations_roots
))
$
monoTexts
t
)
Nothing
[]
(
Year
3
1
5
)
)
corpus
...
...
src/Gargantext/Core/Viz/Phylo/PhyloMaker.hs
View file @
67c70cdb
...
...
@@ -489,15 +489,15 @@ initPhyloScales lvlMax pId =
fromList
$
map
(
\
lvl
->
((
pId
,
lvl
),
PhyloScale
pId
(
""
,
""
)
lvl
empty
))
[
1
..
lvlMax
]
setDefault
::
PhyloConfig
->
PhyloConfig
setDefault
conf
=
conf
{
setDefault
::
PhyloConfig
->
TimeUnit
->
PhyloConfig
setDefault
conf
timeScale
=
conf
{
phyloScale
=
2
,
similarity
=
WeightedLogJaccard
0.5
2
,
findAncestors
=
True
,
phyloSynchrony
=
ByProximityThreshold
0.6
0
SiblingBranches
MergeAllGroups
,
phyloQuality
=
Quality
0.5
3
,
timeUnit
=
Year
3
1
3
,
clique
=
MaxClique
5
30
ByNeighbours
,
timeUnit
=
timeScale
,
clique
=
Fis
3
5
,
exportLabel
=
[
BranchLabel
MostEmergentTfIdf
2
,
GroupLabel
MostEmergentInclusive
2
],
exportSort
=
ByHierarchy
Desc
,
exportFilter
=
[
ByBranchSize
3
]
...
...
@@ -509,6 +509,7 @@ setDefault conf = conf {
initPhylo
::
[
Document
]
->
PhyloConfig
->
Phylo
initPhylo
docs
conf
=
let
roots
=
Vector
.
fromList
$
nub
$
concat
$
map
text
docs
timeScale
=
head'
"initPhylo"
$
map
docTime
docs
foundations
=
PhyloFoundations
roots
empty
docsSources
=
PhyloSources
(
Vector
.
fromList
$
nub
$
concat
$
map
sources
docs
)
docsCounts
=
PhyloCounts
(
docsToTimeScaleCooc
docs
(
foundations
^.
foundations_roots
))
...
...
@@ -516,11 +517,11 @@ initPhylo docs conf =
(
docsToTimeTermCount
docs
(
foundations
^.
foundations_roots
))
(
docsToTermCount
docs
(
foundations
^.
foundations_roots
))
(
docsToTermFreq
docs
(
foundations
^.
foundations_roots
))
(
docsToLastTermFreq
(
getTimePeriod
$
timeUnit
conf
)
docs
(
foundations
^.
foundations_roots
))
(
docsToLastTermFreq
(
getTimePeriod
timeScale
)
docs
(
foundations
^.
foundations_roots
))
params
=
if
(
defaultMode
conf
)
then
defaultPhyloParam
{
_phyloParam_config
=
setDefault
conf
}
then
defaultPhyloParam
{
_phyloParam_config
=
setDefault
conf
timeScale
}
else
defaultPhyloParam
{
_phyloParam_config
=
conf
}
periods
=
toPeriods
(
sort
$
nub
$
map
date
docs
)
(
getTimePeriod
$
timeUnit
conf
)
(
getTimeStep
$
timeUnit
conf
)
periods
=
toPeriods
(
sort
$
nub
$
map
date
docs
)
(
getTimePeriod
timeScale
)
(
getTimeStep
timeScale
)
in
trace
(
"
\n
"
<>
"-- | Init a phylo out of "
<>
show
(
length
docs
)
<>
" docs
\n
"
)
$
Phylo
foundations
docsSources
...
...
@@ -529,4 +530,4 @@ initPhylo docs conf =
params
(
fromList
$
map
(
\
prd
->
(
prd
,
PhyloPeriod
prd
(
""
,
""
)
(
initPhyloScales
1
prd
)))
periods
)
0
(
_qua_granularity
$
phyloQuality
$
conf
)
(
_qua_granularity
$
phyloQuality
$
_phyloParam_config
params
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment