Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
153
Issues
153
List
Board
Labels
Milestones
Merge Requests
12
Merge Requests
12
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
6bddaf45
Commit
6bddaf45
authored
Apr 23, 2021
by
qlobbe
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add source and fixe date
parent
d41e40d9
Pipeline
#1445
failed with stage
Changes
10
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
251 additions
and
97 deletions
+251
-97
Main.hs
bin/gargantext-adaptative-phylo/Main.hs
+69
-25
package.yaml
package.yaml
+2
-1
CSV.hs
src/Gargantext/Core/Text/Corpus/Parsers/CSV.hs
+22
-22
FrequentItemSet.hs
src/Gargantext/Core/Text/Metrics/FrequentItemSet.hs
+4
-4
AdaptativePhylo.hs
src/Gargantext/Core/Viz/AdaptativePhylo.hs
+40
-13
PhyloExample.hs
src/Gargantext/Core/Viz/Phylo/PhyloExample.hs
+3
-1
PhyloExport.hs
src/Gargantext/Core/Viz/Phylo/PhyloExport.hs
+14
-7
PhyloMaker.hs
src/Gargantext/Core/Viz/Phylo/PhyloMaker.hs
+34
-15
PhyloTools.hs
src/Gargantext/Core/Viz/Phylo/PhyloTools.hs
+57
-5
SynchronicClustering.hs
src/Gargantext/Core/Viz/Phylo/SynchronicClustering.hs
+6
-4
No files found.
bin/gargantext-adaptative-phylo/Main.hs
View file @
6bddaf45
...
...
@@ -19,13 +19,14 @@ module Main where
import
Data.Aeson
import
Data.List
(
concat
,
nub
,
isSuffixOf
)
import
Data.String
(
String
)
import
Data.Text
(
Text
,
unwords
,
unpack
,
replace
)
import
Data.Text
(
Text
,
unwords
,
unpack
,
replace
,
pack
)
import
Crypto.Hash.SHA256
(
hash
)
import
Gargantext.Prelude
import
Gargantext.Database.Admin.Types.Hyperdata
(
HyperdataDocument
(
..
))
import
Gargantext.Core.Text.Context
(
TermList
)
import
Gargantext.Core.Text.Corpus.Parsers.CSV
(
csv_title
,
csv_abstract
,
csv_publication_year
,
csv_w_title
,
csv_w_abstract
,
csv_w_publication_year
,
csv_w_weight
)
import
Gargantext.Core.Text.Corpus.Parsers.CSV
(
csv_title
,
csv_abstract
,
csv_publication_year
,
csv_publication_month
,
csv_publication_day
,
csv'_source
,
csv'_title
,
csv'_abstract
,
csv'_publication_year
,
csv'_publication_month
,
csv'_publication_day
,
csv'_weight
)
import
Gargantext.Core.Text.Corpus.Parsers
(
FileFormat
(
..
),
parseFile
)
import
Gargantext.Core.Text.List.Formats.CSV
(
csvMapTermList
)
import
Gargantext.Core.Text.Terms.WithList
(
Patterns
,
buildPatterns
,
extractTermsWithList
)
...
...
@@ -36,11 +37,13 @@ import Gargantext.Core.Viz.Phylo.PhyloExport (toPhyloExport, dotToFile)
-- import Gargantext.API.Ngrams.Prelude (toTermList)
import
GHC.IO
(
FilePath
)
import
Prelude
(
Either
(
Left
,
Right
))
import
Prelude
(
Either
(
Left
,
Right
)
,
toInteger
)
import
System.Environment
import
System.Directory
(
listDirectory
,
doesFileExist
)
import
Control.Concurrent.Async
(
mapConcurrently
)
import
Data.Time.Calendar
(
fromGregorian
,
diffGregorianDurationClip
,
cdMonths
,
diffDays
,
showGregorian
)
import
qualified
Data.ByteString.Char8
as
C8
import
qualified
Data.ByteString.Lazy
as
Lazy
import
qualified
Data.Vector
as
Vector
...
...
@@ -60,9 +63,36 @@ data PhyloStage = PhyloWithCliques | PhyloWithLinks deriving (Show)
getFilesFromPath
::
FilePath
->
IO
([
FilePath
])
getFilesFromPath
path
=
do
if
(
isSuffixOf
"/"
path
)
then
(
listDirectory
path
)
then
(
listDirectory
path
)
else
return
[
path
]
---------------
-- | Dates | --
---------------
toMonths
::
Integer
->
Int
->
Int
->
Date
toMonths
y
m
d
=
fromIntegral
$
cdMonths
$
diffGregorianDurationClip
(
fromGregorian
y
m
d
)
(
fromGregorian
0000
0
0
)
toDays
::
Integer
->
Int
->
Int
->
Date
toDays
y
m
d
=
fromIntegral
$
diffDays
(
fromGregorian
y
m
d
)
(
fromGregorian
0000
0
0
)
toPhyloDate
::
Int
->
Int
->
Int
->
TimeUnit
->
Date
toPhyloDate
y
m
d
tu
=
case
tu
of
Year
_
_
_
->
y
Month
_
_
_
->
toMonths
(
toInteger
y
)
m
d
Week
_
_
_
->
div
(
toDays
(
toInteger
y
)
m
d
)
7
Day
_
_
_
->
toDays
(
toInteger
y
)
m
d
toPhyloDate'
::
Int
->
Int
->
Int
->
Text
toPhyloDate'
y
m
d
=
pack
$
showGregorian
$
fromGregorian
(
toInteger
y
)
m
d
--------------
-- | Json | --
--------------
...
...
@@ -83,16 +113,23 @@ termsInText pats txt = nub $ concat $ map (map unwords) $ extractTermsWithList p
-- | To transform a Wos file (or [file]) into a list of Docs
wosToDocs
::
Int
->
Patterns
->
FilePath
->
IO
([
Document
])
wosToDocs
limit
patterns
path
=
do
wosToDocs
::
Int
->
Patterns
->
TimeUnit
->
FilePath
->
IO
([
Document
])
wosToDocs
limit
patterns
time
path
=
do
files
<-
getFilesFromPath
path
take
limit
<$>
map
(
\
d
->
let
date'
=
fromJust
$
_hd_publication_year
d
title
=
fromJust
$
_hd_title
d
<$>
map
(
\
d
->
let
title
=
fromJust
$
_hd_title
d
abstr
=
if
(
isJust
$
_hd_abstract
d
)
then
fromJust
$
_hd_abstract
d
else
""
in
Document
date'
(
termsInText
patterns
$
title
<>
" "
<>
abstr
)
Nothing
)
in
Document
(
toPhyloDate
(
fromIntegral
$
fromJust
$
_hd_publication_year
d
)
(
fromJust
$
_hd_publication_month
d
)
(
fromJust
$
_hd_publication_day
d
)
time
)
(
toPhyloDate'
(
fromIntegral
$
fromJust
$
_hd_publication_year
d
)
(
fromJust
$
_hd_publication_month
d
)
(
fromJust
$
_hd_publication_day
d
))
(
termsInText
patterns
$
title
<>
" "
<>
abstr
)
Nothing
[]
)
<$>
concat
<$>
mapConcurrently
(
\
file
->
filter
(
\
d
->
(
isJust
$
_hd_publication_year
d
)
...
...
@@ -101,32 +138,36 @@ wosToDocs limit patterns path = do
-- To transform a Csv file into a list of Document
csvToDocs
::
CorpusParser
->
Patterns
->
FilePath
->
IO
([
Document
])
csvToDocs
parser
patterns
path
=
csvToDocs
::
CorpusParser
->
Patterns
->
TimeUnit
->
FilePath
->
IO
([
Document
])
csvToDocs
parser
patterns
time
path
=
case
parser
of
Wos
_
->
undefined
Csv
limit
->
Vector
.
toList
Wos
_
->
undefined
Csv
limit
->
Vector
.
toList
<$>
Vector
.
take
limit
<$>
Vector
.
map
(
\
row
->
Document
(
csv_publication_year
row
)
<$>
Vector
.
map
(
\
row
->
Document
(
toPhyloDate
(
csv_publication_year
row
)
(
csv_publication_month
row
)
(
csv_publication_day
row
)
time
)
(
toPhyloDate'
(
csv_publication_year
row
)
(
csv_publication_month
row
)
(
csv_publication_day
row
))
(
termsInText
patterns
$
(
csv_title
row
)
<>
" "
<>
(
csv_abstract
row
))
Nothing
[]
)
<$>
snd
<$>
Csv
.
readFile
path
Csv
Weighted
limit
->
Vector
.
toList
Csv
'
limit
->
Vector
.
toList
<$>
Vector
.
take
limit
<$>
Vector
.
map
(
\
row
->
Document
(
csv_w_publication_year
row
)
(
termsInText
patterns
$
(
csv_w_title
row
)
<>
" "
<>
(
csv_w_abstract
row
))
(
Just
$
csv_w_weight
row
)
<$>
Vector
.
map
(
\
row
->
Document
(
toPhyloDate
(
csv'_publication_year
row
)
(
csv'_publication_month
row
)
(
csv'_publication_day
row
)
time
)
(
toPhyloDate'
(
csv'_publication_year
row
)
(
csv'_publication_month
row
)
(
csv'_publication_day
row
))
(
termsInText
patterns
$
(
csv'_title
row
)
<>
" "
<>
(
csv'_abstract
row
))
(
Just
$
csv'_weight
row
)
[
csv'_source
row
]
)
<$>
snd
<$>
Csv
.
readWeightedCsv
path
-- To parse a file into a list of Document
fileToDocs'
::
CorpusParser
->
FilePath
->
TermList
->
IO
[
Document
]
fileToDocs'
parser
path
lst
=
do
fileToDocs'
::
CorpusParser
->
FilePath
->
T
imeUnit
->
T
ermList
->
IO
[
Document
]
fileToDocs'
parser
path
time
lst
=
do
let
patterns
=
buildPatterns
lst
case
parser
of
Wos
limit
->
wosToDocs
limit
patterns
path
Csv
_
->
csvToDocs
parser
patterns
path
Csv
Weighted
_
->
csvToDocs
parser
patterns
path
Wos
limit
->
wosToDocs
limit
patterns
time
path
Csv
_
->
csvToDocs
parser
patterns
time
path
Csv
'
_
->
csvToDocs
parser
patterns
time
path
---------------
...
...
@@ -137,7 +178,10 @@ fileToDocs' parser path lst = do
-- Config time parameters to label
timeToLabel
::
Config
->
[
Char
]
timeToLabel
config
=
case
(
timeUnit
config
)
of
Year
p
s
f
->
(
"time"
<>
"_"
<>
(
show
p
)
<>
"_"
<>
(
show
s
)
<>
"_"
<>
(
show
f
))
Year
p
s
f
->
(
"time_years"
<>
"_"
<>
(
show
p
)
<>
"_"
<>
(
show
s
)
<>
"_"
<>
(
show
f
))
Month
p
s
f
->
(
"time_months"
<>
"_"
<>
(
show
p
)
<>
"_"
<>
(
show
s
)
<>
"_"
<>
(
show
f
))
Week
p
s
f
->
(
"time_weeks"
<>
"_"
<>
(
show
p
)
<>
"_"
<>
(
show
s
)
<>
"_"
<>
(
show
f
))
Day
p
s
f
->
(
"time_days"
<>
"_"
<>
(
show
p
)
<>
"_"
<>
(
show
s
)
<>
"_"
<>
(
show
f
))
seaToLabel
::
Config
->
[
Char
]
...
...
@@ -240,7 +284,7 @@ main = do
printIOMsg
"Parse the corpus"
mapList
<-
csvMapTermList
(
listPath
config
)
corpus
<-
fileToDocs'
(
corpusParser
config
)
(
corpusPath
config
)
mapList
corpus
<-
fileToDocs'
(
corpusParser
config
)
(
corpusPath
config
)
(
timeUnit
config
)
mapList
printIOComment
(
show
(
length
corpus
)
<>
" parsed docs from the corpus"
)
printIOMsg
"Reconstruct the phylo"
...
...
package.yaml
View file @
6bddaf45
...
...
@@ -330,7 +330,8 @@ executables:
-
optparse-generic
-
split
-
unordered-containers
-
cryptohash
-
cryptohash
-
time
gargantext-import
:
main
:
Main.hs
...
...
src/Gargantext/Core/Text/Corpus/Parsers/CSV.hs
View file @
6bddaf45
...
...
@@ -398,28 +398,28 @@ parseCsv' bs = V.toList $ V.map csv2doc $ snd $ readCsvLazyBS bs
------------------------------------------------------------------------
-- Csv v3 weighted for phylo
data
WeightedCsv
=
WeightedCsv
{
csv
_w
_title
::
!
Text
,
csv
_w
_source
::
!
Text
,
csv
_w
_publication_year
::
!
Int
,
csv
_w
_publication_month
::
!
Int
,
csv
_w
_publication_day
::
!
Int
,
csv
_w
_abstract
::
!
Text
,
csv
_w
_authors
::
!
Text
,
csv
_w
_weight
::
!
Double
}
deriving
(
Show
)
instance
FromNamedRecord
WeightedCsv
where
parseNamedRecord
r
=
WeightedCsv
<$>
r
.:
"title"
<*>
r
.:
"source"
<*>
r
.:
"publication_year"
<*>
r
.:
"publication_month"
<*>
r
.:
"publication_day"
<*>
r
.:
"abstract"
<*>
r
.:
"authors"
<*>
r
.:
"weight"
readWeightedCsv
::
FilePath
->
IO
(
Header
,
Vector
WeightedCsv
)
data
Csv'
=
Csv'
{
csv
'
_title
::
!
Text
,
csv
'
_source
::
!
Text
,
csv
'
_publication_year
::
!
Int
,
csv
'
_publication_month
::
!
Int
,
csv
'
_publication_day
::
!
Int
,
csv
'
_abstract
::
!
Text
,
csv
'
_authors
::
!
Text
,
csv
'
_weight
::
!
Double
}
deriving
(
Show
)
instance
FromNamedRecord
Csv'
where
parseNamedRecord
r
=
Csv'
<$>
r
.:
"title"
<*>
r
.:
"source"
<*>
r
.:
"publication_year"
<*>
r
.:
"publication_month"
<*>
r
.:
"publication_day"
<*>
r
.:
"abstract"
<*>
r
.:
"authors"
<*>
r
.:
"weight"
readWeightedCsv
::
FilePath
->
IO
(
Header
,
Vector
Csv'
)
readWeightedCsv
fp
=
fmap
(
\
bs
->
case
decodeByNameWith
csvDecodeOptions
bs
of
...
...
src/Gargantext/Core/Text/Metrics/FrequentItemSet.hs
View file @
6bddaf45
...
...
@@ -128,17 +128,17 @@ fisWithSizePolyMap n f is =
isSublistOf
::
Ord
a
=>
[
a
]
->
[
a
]
->
Bool
isSublistOf
sub
lst
=
all
(
\
i
->
elem
i
lst
)
sub
reIndexFis
::
Ord
a
=>
[([
a
],
b
)]
->
[
Fis'
a
]
->
[(
Fis'
a
,[
b
]
)]
reIndexFis
::
Ord
a
=>
[([
a
],
(
b
,
c
))]
->
[
Fis'
a
]
->
[(
Fis'
a
,([
b
],[
c
])
)]
reIndexFis
items
fis
=
map
(
\
f
->
let
docs
=
filter
(
\
(
lst
,
_
)
->
isSublistOf
(
_fisItemSet
f
)
lst
)
items
in
(
f
,
map
snd
docs
))
fis
in
(
f
,
(
map
(
fst
.
snd
)
docs
,
map
(
snd
.
snd
)
docs
)
))
fis
wsum
::
[
Maybe
Double
]
->
Maybe
Double
wsum
lst
=
fmap
sum
$
sequence
lst
fisWithSizePolyMap'
::
Ord
a
=>
Size
->
Frequency
->
[([
a
],
Maybe
Double
)]
->
Map
(
Set
a
)
(
Int
,
Maybe
Double
)
fisWithSizePolyMap'
::
Ord
a
=>
Size
->
Frequency
->
[([
a
],
(
Maybe
Double
,[
Int
]))]
->
Map
(
Set
a
)
(
Int
,
(
Maybe
Double
,[
Int
])
)
fisWithSizePolyMap'
n
f
is
=
Map
.
fromList
$
map
(
\
(
fis
,
ws
)
->
(
Set
.
fromList
(
_fisItemSet
fis
),(
_fisCount
fis
,(
wsum
w
s
))))
$
map
(
\
(
fis
,
(
ws
,
sources
))
->
(
Set
.
fromList
(
_fisItemSet
fis
),(
_fisCount
fis
,(
wsum
ws
,
concat
source
s
))))
$
reIndexFis
is
$
fisWithSizePoly2
n
f
(
map
fst
is
)
...
...
src/Gargantext/Core/Viz/AdaptativePhylo.hs
View file @
6bddaf45
...
...
@@ -50,9 +50,9 @@ import qualified Data.Text.Lazy as TextLazy
data
CorpusParser
=
Wos
{
_wos_limit
::
Int
}
|
Csv
{
_csv_limit
::
Int
}
|
Csv
Weighted
{
_csvw
_limit
::
Int
}
Wos
{
_wos_limit
::
Int
}
|
Csv
{
_csv_limit
::
Int
}
|
Csv
'
{
_csv'
_limit
::
Int
}
deriving
(
Show
,
Generic
,
Eq
)
data
SeaElevation
=
...
...
@@ -107,6 +107,18 @@ data TimeUnit =
{
_year_period
::
Int
,
_year_step
::
Int
,
_year_matchingFrame
::
Int
}
|
Month
{
_month_period
::
Int
,
_month_step
::
Int
,
_month_matchingFrame
::
Int
}
|
Week
{
_week_period
::
Int
,
_week_step
::
Int
,
_week_matchingFrame
::
Int
}
|
Day
{
_day_period
::
Int
,
_day_step
::
Int
,
_day_matchingFrame
::
Int
}
deriving
(
Show
,
Generic
,
Eq
)
data
CliqueFilter
=
ByThreshold
|
ByNeighbours
deriving
(
Show
,
Generic
,
Eq
)
...
...
@@ -238,11 +250,14 @@ type Date = Int
-- | Ngrams : a contiguous sequence of n terms
type
Ngrams
=
Text
-- | Document : a piece of Text linked to a Date
-- Document : a piece of Text linked to a Date
-- date = computational date; date' = original string date yyyy-mm-dd
data
Document
=
Document
{
date
::
Date
,
text
::
[
Ngrams
]
,
weight
::
Maybe
Double
{
date
::
Date
,
date'
::
Text
,
text
::
[
Ngrams
]
,
weight
::
Maybe
Double
,
sources
::
[
Text
]
}
deriving
(
Eq
,
Show
,
Generic
,
NFData
)
...
...
@@ -258,6 +273,10 @@ data PhyloFoundations = PhyloFoundations
}
deriving
(
Generic
,
Show
,
Eq
)
data
PhyloSources
=
PhyloSources
{
_sources
::
!
(
Vector
Text
)
}
deriving
(
Generic
,
Show
,
Eq
)
---------------------------
-- | Coocurency Matrix | --
---------------------------
...
...
@@ -280,6 +299,7 @@ type Cooc = Map (Int,Int) Double
-- periods : the temporal steps of a phylomemy
data
Phylo
=
Phylo
{
_phylo_foundations
::
PhyloFoundations
,
_phylo_sources
::
PhyloSources
,
_phylo_timeCooc
::
!
(
Map
Date
Cooc
)
,
_phylo_timeDocs
::
!
(
Map
Date
Double
)
,
_phylo_termFreq
::
!
(
Map
Int
Double
)
...
...
@@ -299,8 +319,9 @@ type PhyloPeriodId = (Date,Date)
-- id: tuple (start date, end date) of the temporal step of the phylomemy
-- levels: levels of granularity
data
PhyloPeriod
=
PhyloPeriod
{
_phylo_periodPeriod
::
(
Date
,
Date
)
,
_phylo_periodLevels
::
Map
PhyloLevelId
PhyloLevel
PhyloPeriod
{
_phylo_periodPeriod
::
(
Date
,
Date
)
,
_phylo_periodPeriod'
::
(
Text
,
Text
)
,
_phylo_periodLevels
::
Map
PhyloLevelId
PhyloLevel
}
deriving
(
Generic
,
Show
,
Eq
)
...
...
@@ -316,9 +337,10 @@ type PhyloLevelId = (PhyloPeriodId,Level)
-- Level 1: First level of clustering (the Fis)
-- Level [2..N]: Nth level of synchronic clustering (cluster of Fis)
data
PhyloLevel
=
PhyloLevel
{
_phylo_levelPeriod
::
(
Date
,
Date
)
,
_phylo_levelLevel
::
Level
,
_phylo_levelGroups
::
Map
PhyloGroupId
PhyloGroup
PhyloLevel
{
_phylo_levelPeriod
::
(
Date
,
Date
)
,
_phylo_levelPeriod'
::
(
Text
,
Text
)
,
_phylo_levelLevel
::
Level
,
_phylo_levelGroups
::
Map
PhyloGroupId
PhyloGroup
}
deriving
(
Generic
,
Show
,
Eq
)
...
...
@@ -332,11 +354,13 @@ type PhyloBranchId = (Level, [Int])
-- | PhyloGroup : group of ngrams at each level and period
data
PhyloGroup
=
PhyloGroup
{
_phylo_groupPeriod
::
(
Date
,
Date
)
,
_phylo_groupPeriod'
::
(
Text
,
Text
)
,
_phylo_groupLevel
::
Level
,
_phylo_groupIndex
::
Int
,
_phylo_groupIndex
::
Int
,
_phylo_groupLabel
::
Text
,
_phylo_groupSupport
::
Support
,
_phylo_groupWeight
::
Maybe
Double
,
_phylo_groupSources
::
[
Int
]
,
_phylo_groupNgrams
::
[
Int
]
,
_phylo_groupCooc
::
!
(
Cooc
)
,
_phylo_groupBranchId
::
PhyloBranchId
...
...
@@ -371,6 +395,7 @@ data PhyloClique = PhyloClique
,
_phyloClique_support
::
Support
,
_phyloClique_period
::
(
Date
,
Date
)
,
_phyloClique_weight
::
Maybe
Double
,
_phyloClique_sources
::
[
Int
]
}
deriving
(
Generic
,
NFData
,
Show
,
Eq
)
----------------
...
...
@@ -444,6 +469,8 @@ makeLenses ''PhyloBranch
instance
FromJSON
Phylo
instance
ToJSON
Phylo
instance
FromJSON
PhyloSources
instance
ToJSON
PhyloSources
instance
FromJSON
PhyloParam
instance
ToJSON
PhyloParam
instance
FromJSON
PhyloPeriod
...
...
src/Gargantext/Core/Viz/Phylo/PhyloExample.hs
View file @
6bddaf45
...
...
@@ -109,9 +109,11 @@ config =
docs
::
[
Document
]
docs
=
map
(
\
(
d
,
t
)
->
Document
d
->
Document
d
""
(
filter
(
\
n
->
isRoots
n
(
foundations
^.
foundations_roots
))
$
monoTexts
t
)
Nothing
[]
)
corpus
...
...
src/Gargantext/Core/Viz/Phylo/PhyloExport.hs
View file @
6bddaf45
...
...
@@ -120,11 +120,13 @@ branchToDotNode b bId =
,
toAttr
"label"
(
pack
$
show
$
b
^.
branch_label
)
])
periodToDotNode
::
(
Date
,
Date
)
->
Dot
DotId
periodToDotNode
prd
=
periodToDotNode
::
(
Date
,
Date
)
->
(
Text
.
Text
,
Text
.
Text
)
->
Dot
DotId
periodToDotNode
prd
prd'
=
node
(
periodIdToDotId
prd
)
([
Shape
BoxShape
,
FontSize
50
,
Label
(
toDotLabel
$
Text
.
pack
(
show
(
fst
prd
)
<>
" "
<>
show
(
snd
prd
)))]
<>
[
toAttr
"nodeType"
"period"
<>
[
toAttr
"nodeType"
"period"
,
toAttr
"strFrom"
(
fromStrict
$
Text
.
pack
$
(
show
$
fst
prd'
))
,
toAttr
"strTo"
(
fromStrict
$
Text
.
pack
$
(
show
$
snd
prd'
))
,
toAttr
"from"
(
fromStrict
$
Text
.
pack
$
(
show
$
fst
prd
))
,
toAttr
"to"
(
fromStrict
$
Text
.
pack
$
(
show
$
snd
prd
))])
...
...
@@ -136,10 +138,13 @@ groupToDotNode fdt g bId =
<>
[
toAttr
"nodeType"
"group"
,
toAttr
"from"
(
pack
$
show
(
fst
$
g
^.
phylo_groupPeriod
))
,
toAttr
"to"
(
pack
$
show
(
snd
$
g
^.
phylo_groupPeriod
))
,
toAttr
"strFrom"
(
pack
$
show
(
fst
$
g
^.
phylo_groupPeriod'
))
,
toAttr
"strTo"
(
pack
$
show
(
snd
$
g
^.
phylo_groupPeriod'
))
,
toAttr
"branchId"
(
pack
$
unwords
(
init
$
map
show
$
snd
$
g
^.
phylo_groupBranchId
))
,
toAttr
"bId"
(
pack
$
show
bId
)
,
toAttr
"support"
(
pack
$
show
(
g
^.
phylo_groupSupport
))
,
toAttr
"weight"
(
pack
$
show
(
g
^.
phylo_groupWeight
))
,
toAttr
"source"
(
pack
$
show
(
nub
$
g
^.
phylo_groupSources
))
,
toAttr
"lbl"
(
pack
$
show
(
ngramsToLabel
fdt
(
g
^.
phylo_groupNgrams
)))
,
toAttr
"foundation"
(
pack
$
show
(
idxToLabel
(
g
^.
phylo_groupNgrams
)))
,
toAttr
"role"
(
pack
$
show
(
idxToLabel'
((
g
^.
phylo_groupMeta
)
!
"dynamics"
)))
...
...
@@ -194,6 +199,8 @@ exportToDot phylo export =
,(
toAttr
(
fromStrict
"phyloPeriods"
)
$
pack
$
show
(
length
$
elems
$
phylo
^.
phylo_periods
))
,(
toAttr
(
fromStrict
"phyloBranches"
)
$
pack
$
show
(
length
$
export
^.
export_branches
))
,(
toAttr
(
fromStrict
"phyloGroups"
)
$
pack
$
show
(
length
$
export
^.
export_groups
))
,(
toAttr
(
fromStrict
"phyloSources"
)
$
pack
$
show
(
Vector
.
toList
$
getSources
phylo
))
,(
toAttr
(
fromStrict
"phyloTimeScale"
)
$
pack
$
getTimeScale
phylo
)
-- ,(toAttr (fromStrict "phyloTermsFreq") $ pack $ show (toList $ _phylo_lastTermFreq phylo))
])
...
...
@@ -218,13 +225,13 @@ exportToDot phylo export =
{-- 5) create a layer for each period -}
_
<-
mapM
(
\
period
->
subgraph
((
Str
.
fromStrict
.
Text
.
pack
)
$
(
"Period"
<>
show
(
fst
period
)
<>
show
(
sn
d
period
)))
$
do
subgraph
((
Str
.
fromStrict
.
Text
.
pack
)
$
(
"Period"
<>
show
(
fst
$
_phylo_periodPeriod
period
)
<>
show
(
snd
$
_phylo_periodPerio
d
period
)))
$
do
graphAttrs
[
Rank
SameRank
]
periodToDotNode
period
periodToDotNode
(
period
^.
phylo_periodPeriod
)
(
period
^.
phylo_periodPeriod'
)
{-- 6) create a node for each group -}
mapM
(
\
g
->
groupToDotNode
(
getRoots
phylo
)
g
(
toBid
g
(
export
^.
export_branches
)))
(
filter
(
\
g
->
g
^.
phylo_groupPeriod
==
period
)
$
export
^.
export_groups
)
)
$
getPeriodIds
phylo
mapM
(
\
g
->
groupToDotNode
(
getRoots
phylo
)
g
(
toBid
g
(
export
^.
export_branches
)))
(
filter
(
\
g
->
g
^.
phylo_groupPeriod
==
(
period
^.
phylo_periodPeriod
)
)
$
export
^.
export_groups
)
)
$
phylo
^.
phylo_periods
{-- 7) create the edges between a branch and its first groups -}
_
<-
mapM
(
\
(
bId
,
groups
)
->
...
...
src/Gargantext/Core/Viz/Phylo/PhyloMaker.hs
View file @
6bddaf45
...
...
@@ -14,6 +14,7 @@ module Gargantext.Core.Viz.Phylo.PhyloMaker where
import
Data.List
(
concat
,
nub
,
partition
,
sort
,
(
++
),
group
,
intersect
,
null
,
sortOn
,
groupBy
,
tail
)
import
Data.Map
(
Map
,
fromListWith
,
keys
,
unionWith
,
fromList
,
empty
,
toList
,
elems
,
(
!
),
restrictKeys
,
foldlWithKey
,
insert
)
import
Data.Vector
(
Vector
)
import
Data.Text
(
Text
)
import
Gargantext.Prelude
import
Gargantext.Core.Viz.AdaptativePhylo
...
...
@@ -104,7 +105,7 @@ toGroupsProxi lvl phylo =
in
phylo
&
phylo_groupsProxi
.~
((
traceGroupsProxi
.
fromList
)
groupsProxi
)
appendGroups
::
(
a
->
PhyloPeriodId
->
Level
->
Int
->
[
Cooc
]
->
PhyloGroup
)
->
Level
->
Map
(
Date
,
Date
)
[
a
]
->
Phylo
->
Phylo
appendGroups
::
(
a
->
PhyloPeriodId
->
(
Text
,
Text
)
->
Level
->
Int
->
[
Cooc
]
->
PhyloGroup
)
->
Level
->
Map
(
Date
,
Date
)
[
a
]
->
Phylo
->
Phylo
appendGroups
f
lvl
m
phylo
=
trace
(
"
\n
"
<>
"-- | Append "
<>
show
(
length
$
concat
$
elems
m
)
<>
" groups to Level "
<>
show
(
lvl
)
<>
"
\n
"
)
$
over
(
phylo_periods
.
traverse
...
...
@@ -112,12 +113,13 @@ appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ co
.
traverse
)
(
\
phyloLvl
->
if
lvl
==
(
phyloLvl
^.
phylo_levelLevel
)
then
let
pId
=
phyloLvl
^.
phylo_levelPeriod
let
pId
=
phyloLvl
^.
phylo_levelPeriod
pId'
=
phyloLvl
^.
phylo_levelPeriod'
phyloCUnit
=
m
!
pId
in
phyloLvl
&
phylo_levelGroups
.~
(
fromList
$
foldl
(
\
groups
obj
->
groups
++
[
(((
pId
,
lvl
),
length
groups
)
,
f
obj
pId
lvl
(
length
groups
)
,
f
obj
pId
pId'
lvl
(
length
groups
)
(
elems
$
restrictKeys
(
phylo
^.
phylo_timeCooc
)
$
periodsToYears
[
pId
]))
]
)
[]
phyloCUnit
)
else
...
...
@@ -125,10 +127,11 @@ appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ co
phylo
cliqueToGroup
::
PhyloClique
->
PhyloPeriodId
->
Level
->
Int
->
[
Cooc
]
->
PhyloGroup
cliqueToGroup
fis
pId
lvl
idx
coocs
=
PhyloGroup
pId
lvl
idx
""
cliqueToGroup
::
PhyloClique
->
PhyloPeriodId
->
(
Text
,
Text
)
->
Level
->
Int
->
[
Cooc
]
->
PhyloGroup
cliqueToGroup
fis
pId
pId'
lvl
idx
coocs
=
PhyloGroup
pId
pId'
lvl
idx
""
(
fis
^.
phyloClique_support
)
(
fis
^.
phyloClique_weight
)
(
fis
^.
phyloClique_sources
)
(
fis
^.
phyloClique_nodes
)
(
ngramsToCooc
(
fis
^.
phyloClique_nodes
)
coocs
)
(
1
,[
0
])
-- branchid (lvl,[path in the branching tree])
...
...
@@ -143,14 +146,27 @@ toPhylo1 phyloStep = case (getSeaElevation phyloStep) of
-----------------------
-- | To Phylo Step | --
-----------------------
-----------------------
indexDates'
::
Map
(
Date
,
Date
)
[
Document
]
->
Map
(
Date
,
Date
)
(
Text
,
Text
)
indexDates'
m
=
map
(
\
docs
->
let
ds
=
map
(
\
d
->
date'
d
)
docs
f
=
if
(
null
ds
)
then
""
else
toFstDate
ds
l
=
if
(
null
ds
)
then
""
else
toLstDate
ds
in
(
f
,
l
))
m
-- To build the first phylo step from docs and terms
toPhyloStep
::
[
Document
]
->
TermList
->
Config
->
Phylo
toPhyloStep
docs
lst
conf
=
case
(
getSeaElevation
phyloBase
)
of
Constante
_
_
->
appendGroups
cliqueToGroup
1
phyloClique
phyloBase
Adaptative
_
->
toGroupsProxi
1
$
appendGroups
cliqueToGroup
1
phyloClique
phyloBase
Constante
_
_
->
appendGroups
cliqueToGroup
1
phyloClique
(
updatePeriods
(
indexDates'
docs'
)
phyloBase
)
Adaptative
_
->
toGroupsProxi
1
$
appendGroups
cliqueToGroup
1
phyloClique
(
updatePeriods
(
indexDates'
docs'
)
phyloBase
)
where
--------------------------------------
phyloClique
::
Map
(
Date
,
Date
)
[
PhyloClique
]
...
...
@@ -219,12 +235,12 @@ toPhyloClique phylo phyloDocs = case (clique $ getConfig phylo) of
Fis
_
_
->
let
fis
=
map
(
\
(
prd
,
docs
)
->
case
(
corpusParser
$
getConfig
phylo
)
of
Csv
Weighted
_
->
let
lst
=
toList
$
fisWithSizePolyMap'
(
Segment
1
20
)
1
(
map
(
\
d
->
(
ngramsToIdx
(
text
d
)
(
getRoots
phylo
),
weight
d
))
docs
)
in
(
prd
,
map
(
\
f
->
PhyloClique
(
Set
.
toList
$
fst
f
)
((
fst
.
snd
)
f
)
prd
((
snd
.
snd
)
f
))
lst
)
Csv
'
_
->
let
lst
=
toList
$
fisWithSizePolyMap'
(
Segment
1
20
)
1
(
map
(
\
d
->
(
ngramsToIdx
(
text
d
)
(
getRoots
phylo
),
(
weight
d
,
(
sourcesToIdx
(
sources
d
)
(
getSources
phylo
)))
))
docs
)
in
(
prd
,
map
(
\
f
->
PhyloClique
(
Set
.
toList
$
fst
f
)
((
fst
.
snd
)
f
)
prd
((
fst
.
snd
.
snd
)
f
)
(((
snd
.
snd
.
snd
)
f
)
))
lst
)
_
->
let
lst
=
toList
$
fisWithSizePolyMap
(
Segment
1
20
)
1
(
map
(
\
d
->
ngramsToIdx
(
text
d
)
(
getRoots
phylo
))
docs
)
in
(
prd
,
map
(
\
f
->
PhyloClique
(
Set
.
toList
$
fst
f
)
(
snd
f
)
prd
Nothing
)
lst
)
in
(
prd
,
map
(
\
f
->
PhyloClique
(
Set
.
toList
$
fst
f
)
(
snd
f
)
prd
Nothing
[]
)
lst
)
)
$
toList
phyloDocs
fis'
=
fis
`
using
`
parList
rdeepseq
...
...
@@ -235,7 +251,7 @@ toPhyloClique phylo phyloDocs = case (clique $ getConfig phylo) of
$
foldl
sumCooc
empty
$
map
listToMatrix
$
map
(
\
d
->
ngramsToIdx
(
text
d
)
(
getRoots
phylo
))
docs
in
(
prd
,
map
(
\
cl
->
PhyloClique
cl
0
prd
Nothing
)
$
getMaxCliques
filterType
Conditional
thr
cooc
))
in
(
prd
,
map
(
\
cl
->
PhyloClique
cl
0
prd
Nothing
[]
)
$
getMaxCliques
filterType
Conditional
thr
cooc
))
$
toList
phyloDocs
mcl'
=
mcl
`
using
`
parList
rdeepseq
in
fromList
mcl'
...
...
@@ -342,17 +358,20 @@ docsToTimeScaleNb docs =
initPhyloLevels
::
Int
->
PhyloPeriodId
->
Map
PhyloLevelId
PhyloLevel
initPhyloLevels
lvlMax
pId
=
fromList
$
map
(
\
lvl
->
((
pId
,
lvl
),
PhyloLevel
pId
lvl
empty
))
[
1
..
lvlMax
]
fromList
$
map
(
\
lvl
->
((
pId
,
lvl
),
PhyloLevel
pId
(
""
,
""
)
lvl
empty
))
[
1
..
lvlMax
]
-- To init the basic elements of a Phylo
toPhyloBase
::
[
Document
]
->
TermList
->
Config
->
Phylo
toPhyloBase
docs
lst
conf
=
let
foundations
=
PhyloFoundations
(
Vector
.
fromList
$
nub
$
concat
$
map
text
docs
)
lst
docsSources
=
PhyloSources
(
Vector
.
fromList
$
nub
$
concat
$
map
sources
docs
)
params
=
defaultPhyloParam
{
_phyloParam_config
=
conf
}
periods
=
toPeriods
(
sort
$
nub
$
map
date
docs
)
(
getTimePeriod
$
timeUnit
conf
)
(
getTimeStep
$
timeUnit
conf
)
in
trace
(
"
\n
"
<>
"-- | Create PhyloBase out of "
<>
show
(
length
docs
)
<>
" docs
\n
"
)
$
Phylo
foundations
docsSources
(
docsToTimeScaleCooc
docs
(
foundations
^.
foundations_roots
))
(
docsToTimeScaleNb
docs
)
(
docsToTermFreq
docs
(
foundations
^.
foundations_roots
))
...
...
@@ -360,4 +379,4 @@ toPhyloBase docs lst conf =
empty
empty
params
(
fromList
$
map
(
\
prd
->
(
prd
,
PhyloPeriod
prd
(
initPhyloLevels
1
prd
)))
periods
)
(
fromList
$
map
(
\
prd
->
(
prd
,
PhyloPeriod
prd
(
""
,
""
)
(
initPhyloLevels
1
prd
)))
periods
)
src/Gargantext/Core/Viz/Phylo/PhyloTools.hs
View file @
6bddaf45
...
...
@@ -17,9 +17,9 @@ import Data.List (sort, concat, null, union, (++), tails, sortOn, nub, init, tai
import
Data.Set
(
Set
,
disjoint
)
import
Data.Map
(
Map
,
elems
,
fromList
,
unionWith
,
keys
,
member
,
(
!
),
filterWithKey
,
fromListWith
,
empty
,
restrictKeys
)
import
Data.String
(
String
)
import
Data.Text
(
Text
)
import
Data.Text
(
Text
,
unpack
)
import
Prelude
(
floor
)
import
Prelude
(
floor
,
read
)
import
Gargantext.Prelude
import
Gargantext.Core.Viz.AdaptativePhylo
...
...
@@ -115,6 +115,10 @@ isRoots n ns = Vector.elem n ns
ngramsToIdx
::
[
Ngrams
]
->
Vector
Ngrams
->
[
Int
]
ngramsToIdx
ns
fdt
=
map
(
\
n
->
fromJust
$
elemIndex
n
fdt
)
ns
-- | To transform a list of sources into a list of sources' index
sourcesToIdx
::
[
Text
]
->
Vector
Text
->
[
Int
]
sourcesToIdx
ss
ps
=
nub
$
map
(
\
s
->
fromJust
$
elemIndex
s
ps
)
ss
-- | To transform a list of Ngrams Indexes into a Label
ngramsToLabel
::
Vector
Ngrams
->
[
Int
]
->
Text
ngramsToLabel
ngrams
l
=
Text
.
unwords
$
tail'
"ngramsToLabel"
$
concat
$
map
(
\
n
->
[
"|"
,
n
])
$
ngramsToText
ngrams
l
...
...
@@ -153,6 +157,32 @@ toPeriods dates p s =
$
chunkAlong
p
s
[
start
..
end
]
toFstDate
::
[
Text
]
->
Text
toFstDate
ds
=
snd
$
head'
"firstDate"
$
sortOn
fst
$
map
(
\
d
->
let
d'
=
read
(
filter
(
\
c
->
c
/=
'-'
)
$
unpack
d
)
::
Int
in
(
d'
,
d
))
ds
toLstDate
::
[
Text
]
->
Text
toLstDate
ds
=
snd
$
head'
"firstDate"
$
reverse
$
sortOn
fst
$
map
(
\
d
->
let
d'
=
read
(
filter
(
\
c
->
c
/=
'-'
)
$
unpack
d
)
::
Int
in
(
d'
,
d
))
ds
getTimeScale
::
Phylo
->
[
Char
]
getTimeScale
p
=
case
(
timeUnit
$
getConfig
p
)
of
Year
_
_
_
->
"year"
Month
_
_
_
->
"month"
Week
_
_
_
->
"week"
Day
_
_
_
->
"day"
-- | Get a regular & ascendante timeScale from a given list of dates
toTimeScale
::
[
Date
]
->
Int
->
[
Date
]
toTimeScale
dates
step
=
...
...
@@ -162,15 +192,24 @@ toTimeScale dates step =
getTimeStep
::
TimeUnit
->
Int
getTimeStep
time
=
case
time
of
Year
_
s
_
->
s
Year
_
s
_
->
s
Month
_
s
_
->
s
Week
_
s
_
->
s
Day
_
s
_
->
s
getTimePeriod
::
TimeUnit
->
Int
getTimePeriod
time
=
case
time
of
Year
p
_
_
->
p
Year
p
_
_
->
p
Month
p
_
_
->
p
Week
p
_
_
->
p
Day
p
_
_
->
p
getTimeFrame
::
TimeUnit
->
Int
getTimeFrame
time
=
case
time
of
Year
_
_
f
->
f
Year
_
_
f
->
f
Month
_
_
f
->
f
Week
_
_
f
->
f
Day
_
_
f
->
f
-------------
-- | Fis | --
...
...
@@ -359,6 +398,9 @@ setConfig config phylo = phylo
getRoots
::
Phylo
->
Vector
Ngrams
getRoots
phylo
=
(
phylo
^.
phylo_foundations
)
^.
foundations_roots
getSources
::
Phylo
->
Vector
Text
getSources
phylo
=
_sources
(
phylo
^.
phylo_sources
)
phyloToLastBranches
::
Phylo
->
[[
PhyloGroup
]]
phyloToLastBranches
phylo
=
elems
$
fromListWith
(
++
)
...
...
@@ -411,6 +453,16 @@ updatePhyloGroups lvl m phylo =
then
m
!
id
else
g
)
phylo
updatePeriods
::
Map
(
Date
,
Date
)
(
Text
,
Text
)
->
Phylo
->
Phylo
updatePeriods
periods'
phylo
=
over
(
phylo_periods
.
traverse
)
(
\
prd
->
let
prd'
=
periods'
!
(
prd
^.
phylo_periodPeriod
)
lvls
=
map
(
\
lvl
->
lvl
&
phylo_levelPeriod'
.~
prd'
)
$
prd
^.
phylo_periodLevels
in
prd
&
phylo_periodPeriod'
.~
prd'
&
phylo_periodLevels
.~
lvls
)
phylo
traceToPhylo
::
Level
->
Phylo
->
Phylo
traceToPhylo
lvl
phylo
=
...
...
src/Gargantext/Core/Viz/Phylo/SynchronicClustering.hs
View file @
6bddaf45
...
...
@@ -36,10 +36,12 @@ import qualified Data.Map as Map
mergeGroups
::
[
Cooc
]
->
PhyloGroupId
->
Map
PhyloGroupId
PhyloGroupId
->
[
PhyloGroup
]
->
PhyloGroup
mergeGroups
coocs
id
mapIds
childs
=
let
ngrams
=
(
sort
.
nub
.
concat
)
$
map
_phylo_groupNgrams
childs
in
PhyloGroup
(
fst
$
fst
id
)
(
snd
$
fst
id
)
(
snd
id
)
""
in
PhyloGroup
(
fst
$
fst
id
)
(
_phylo_groupPeriod'
$
head'
"mergeGroups"
childs
)
(
snd
$
fst
id
)
(
snd
id
)
""
(
sum
$
map
_phylo_groupSupport
childs
)
(
fmap
sum
$
sequence
$
map
_phylo_groupWeight
childs
)
$
map
_phylo_groupWeight
childs
)
(
concat
$
map
_phylo_groupSources
childs
)
ngrams
(
ngramsToCooc
ngrams
coocs
)
((
snd
$
fst
id
),
bId
)
...
...
@@ -58,12 +60,12 @@ mergeGroups coocs id mapIds childs =
mergeAncestors
::
[
Pointer
]
->
[
Pointer
]
mergeAncestors
pointers
=
Map
.
toList
$
fromListWith
max
pointers
addPhyloLevel
::
Level
->
Phylo
->
Phylo
addPhyloLevel
lvl
phylo
=
over
(
phylo_periods
.
traverse
)
(
\
phyloPrd
->
phyloPrd
&
phylo_periodLevels
%~
(
insert
(
phyloPrd
^.
phylo_periodPeriod
,
lvl
)
(
PhyloLevel
(
phyloPrd
^.
phylo_periodPeriod
)
lvl
empty
)))
phylo
%~
(
insert
(
phyloPrd
^.
phylo_periodPeriod
,
lvl
)
(
PhyloLevel
(
phyloPrd
^.
phylo_periodPeriod
)
(
phyloPrd
^.
phylo_periodPeriod'
)
lvl
empty
)))
phylo
toNextLevel'
::
Phylo
->
[
PhyloGroup
]
->
Phylo
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment