Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
149
Issues
149
List
Board
Labels
Milestones
Merge Requests
5
Merge Requests
5
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
6bddaf45
Commit
6bddaf45
authored
Apr 23, 2021
by
qlobbe
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add source and fixe date
parent
d41e40d9
Pipeline
#1445
failed with stage
Changes
10
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
251 additions
and
97 deletions
+251
-97
Main.hs
bin/gargantext-adaptative-phylo/Main.hs
+69
-25
package.yaml
package.yaml
+2
-1
CSV.hs
src/Gargantext/Core/Text/Corpus/Parsers/CSV.hs
+22
-22
FrequentItemSet.hs
src/Gargantext/Core/Text/Metrics/FrequentItemSet.hs
+4
-4
AdaptativePhylo.hs
src/Gargantext/Core/Viz/AdaptativePhylo.hs
+40
-13
PhyloExample.hs
src/Gargantext/Core/Viz/Phylo/PhyloExample.hs
+3
-1
PhyloExport.hs
src/Gargantext/Core/Viz/Phylo/PhyloExport.hs
+14
-7
PhyloMaker.hs
src/Gargantext/Core/Viz/Phylo/PhyloMaker.hs
+34
-15
PhyloTools.hs
src/Gargantext/Core/Viz/Phylo/PhyloTools.hs
+57
-5
SynchronicClustering.hs
src/Gargantext/Core/Viz/Phylo/SynchronicClustering.hs
+6
-4
No files found.
bin/gargantext-adaptative-phylo/Main.hs
View file @
6bddaf45
...
...
@@ -19,13 +19,14 @@ module Main where
import
Data.Aeson
import
Data.List
(
concat
,
nub
,
isSuffixOf
)
import
Data.String
(
String
)
import
Data.Text
(
Text
,
unwords
,
unpack
,
replace
)
import
Data.Text
(
Text
,
unwords
,
unpack
,
replace
,
pack
)
import
Crypto.Hash.SHA256
(
hash
)
import
Gargantext.Prelude
import
Gargantext.Database.Admin.Types.Hyperdata
(
HyperdataDocument
(
..
))
import
Gargantext.Core.Text.Context
(
TermList
)
import
Gargantext.Core.Text.Corpus.Parsers.CSV
(
csv_title
,
csv_abstract
,
csv_publication_year
,
csv_w_title
,
csv_w_abstract
,
csv_w_publication_year
,
csv_w_weight
)
import
Gargantext.Core.Text.Corpus.Parsers.CSV
(
csv_title
,
csv_abstract
,
csv_publication_year
,
csv_publication_month
,
csv_publication_day
,
csv'_source
,
csv'_title
,
csv'_abstract
,
csv'_publication_year
,
csv'_publication_month
,
csv'_publication_day
,
csv'_weight
)
import
Gargantext.Core.Text.Corpus.Parsers
(
FileFormat
(
..
),
parseFile
)
import
Gargantext.Core.Text.List.Formats.CSV
(
csvMapTermList
)
import
Gargantext.Core.Text.Terms.WithList
(
Patterns
,
buildPatterns
,
extractTermsWithList
)
...
...
@@ -36,11 +37,13 @@ import Gargantext.Core.Viz.Phylo.PhyloExport (toPhyloExport, dotToFile)
-- import Gargantext.API.Ngrams.Prelude (toTermList)
import
GHC.IO
(
FilePath
)
import
Prelude
(
Either
(
Left
,
Right
))
import
Prelude
(
Either
(
Left
,
Right
)
,
toInteger
)
import
System.Environment
import
System.Directory
(
listDirectory
,
doesFileExist
)
import
Control.Concurrent.Async
(
mapConcurrently
)
import
Data.Time.Calendar
(
fromGregorian
,
diffGregorianDurationClip
,
cdMonths
,
diffDays
,
showGregorian
)
import
qualified
Data.ByteString.Char8
as
C8
import
qualified
Data.ByteString.Lazy
as
Lazy
import
qualified
Data.Vector
as
Vector
...
...
@@ -60,9 +63,36 @@ data PhyloStage = PhyloWithCliques | PhyloWithLinks deriving (Show)
getFilesFromPath
::
FilePath
->
IO
([
FilePath
])
getFilesFromPath
path
=
do
if
(
isSuffixOf
"/"
path
)
then
(
listDirectory
path
)
then
(
listDirectory
path
)
else
return
[
path
]
---------------
-- | Dates | --
---------------
toMonths
::
Integer
->
Int
->
Int
->
Date
toMonths
y
m
d
=
fromIntegral
$
cdMonths
$
diffGregorianDurationClip
(
fromGregorian
y
m
d
)
(
fromGregorian
0000
0
0
)
toDays
::
Integer
->
Int
->
Int
->
Date
toDays
y
m
d
=
fromIntegral
$
diffDays
(
fromGregorian
y
m
d
)
(
fromGregorian
0000
0
0
)
toPhyloDate
::
Int
->
Int
->
Int
->
TimeUnit
->
Date
toPhyloDate
y
m
d
tu
=
case
tu
of
Year
_
_
_
->
y
Month
_
_
_
->
toMonths
(
toInteger
y
)
m
d
Week
_
_
_
->
div
(
toDays
(
toInteger
y
)
m
d
)
7
Day
_
_
_
->
toDays
(
toInteger
y
)
m
d
toPhyloDate'
::
Int
->
Int
->
Int
->
Text
toPhyloDate'
y
m
d
=
pack
$
showGregorian
$
fromGregorian
(
toInteger
y
)
m
d
--------------
-- | Json | --
--------------
...
...
@@ -83,16 +113,23 @@ termsInText pats txt = nub $ concat $ map (map unwords) $ extractTermsWithList p
-- | To transform a Wos file (or [file]) into a list of Docs
wosToDocs
::
Int
->
Patterns
->
FilePath
->
IO
([
Document
])
wosToDocs
limit
patterns
path
=
do
wosToDocs
::
Int
->
Patterns
->
TimeUnit
->
FilePath
->
IO
([
Document
])
wosToDocs
limit
patterns
time
path
=
do
files
<-
getFilesFromPath
path
take
limit
<$>
map
(
\
d
->
let
date'
=
fromJust
$
_hd_publication_year
d
title
=
fromJust
$
_hd_title
d
<$>
map
(
\
d
->
let
title
=
fromJust
$
_hd_title
d
abstr
=
if
(
isJust
$
_hd_abstract
d
)
then
fromJust
$
_hd_abstract
d
else
""
in
Document
date'
(
termsInText
patterns
$
title
<>
" "
<>
abstr
)
Nothing
)
in
Document
(
toPhyloDate
(
fromIntegral
$
fromJust
$
_hd_publication_year
d
)
(
fromJust
$
_hd_publication_month
d
)
(
fromJust
$
_hd_publication_day
d
)
time
)
(
toPhyloDate'
(
fromIntegral
$
fromJust
$
_hd_publication_year
d
)
(
fromJust
$
_hd_publication_month
d
)
(
fromJust
$
_hd_publication_day
d
))
(
termsInText
patterns
$
title
<>
" "
<>
abstr
)
Nothing
[]
)
<$>
concat
<$>
mapConcurrently
(
\
file
->
filter
(
\
d
->
(
isJust
$
_hd_publication_year
d
)
...
...
@@ -101,32 +138,36 @@ wosToDocs limit patterns path = do
-- To transform a Csv file into a list of Document
csvToDocs
::
CorpusParser
->
Patterns
->
FilePath
->
IO
([
Document
])
csvToDocs
parser
patterns
path
=
csvToDocs
::
CorpusParser
->
Patterns
->
TimeUnit
->
FilePath
->
IO
([
Document
])
csvToDocs
parser
patterns
time
path
=
case
parser
of
Wos
_
->
undefined
Csv
limit
->
Vector
.
toList
Wos
_
->
undefined
Csv
limit
->
Vector
.
toList
<$>
Vector
.
take
limit
<$>
Vector
.
map
(
\
row
->
Document
(
csv_publication_year
row
)
<$>
Vector
.
map
(
\
row
->
Document
(
toPhyloDate
(
csv_publication_year
row
)
(
csv_publication_month
row
)
(
csv_publication_day
row
)
time
)
(
toPhyloDate'
(
csv_publication_year
row
)
(
csv_publication_month
row
)
(
csv_publication_day
row
))
(
termsInText
patterns
$
(
csv_title
row
)
<>
" "
<>
(
csv_abstract
row
))
Nothing
[]
)
<$>
snd
<$>
Csv
.
readFile
path
Csv
Weighted
limit
->
Vector
.
toList
Csv
'
limit
->
Vector
.
toList
<$>
Vector
.
take
limit
<$>
Vector
.
map
(
\
row
->
Document
(
csv_w_publication_year
row
)
(
termsInText
patterns
$
(
csv_w_title
row
)
<>
" "
<>
(
csv_w_abstract
row
))
(
Just
$
csv_w_weight
row
)
<$>
Vector
.
map
(
\
row
->
Document
(
toPhyloDate
(
csv'_publication_year
row
)
(
csv'_publication_month
row
)
(
csv'_publication_day
row
)
time
)
(
toPhyloDate'
(
csv'_publication_year
row
)
(
csv'_publication_month
row
)
(
csv'_publication_day
row
))
(
termsInText
patterns
$
(
csv'_title
row
)
<>
" "
<>
(
csv'_abstract
row
))
(
Just
$
csv'_weight
row
)
[
csv'_source
row
]
)
<$>
snd
<$>
Csv
.
readWeightedCsv
path
-- To parse a file into a list of Document
fileToDocs'
::
CorpusParser
->
FilePath
->
TermList
->
IO
[
Document
]
fileToDocs'
parser
path
lst
=
do
fileToDocs'
::
CorpusParser
->
FilePath
->
T
imeUnit
->
T
ermList
->
IO
[
Document
]
fileToDocs'
parser
path
time
lst
=
do
let
patterns
=
buildPatterns
lst
case
parser
of
Wos
limit
->
wosToDocs
limit
patterns
path
Csv
_
->
csvToDocs
parser
patterns
path
Csv
Weighted
_
->
csvToDocs
parser
patterns
path
Wos
limit
->
wosToDocs
limit
patterns
time
path
Csv
_
->
csvToDocs
parser
patterns
time
path
Csv
'
_
->
csvToDocs
parser
patterns
time
path
---------------
...
...
@@ -137,7 +178,10 @@ fileToDocs' parser path lst = do
-- Config time parameters to label
timeToLabel
::
Config
->
[
Char
]
timeToLabel
config
=
case
(
timeUnit
config
)
of
Year
p
s
f
->
(
"time"
<>
"_"
<>
(
show
p
)
<>
"_"
<>
(
show
s
)
<>
"_"
<>
(
show
f
))
Year
p
s
f
->
(
"time_years"
<>
"_"
<>
(
show
p
)
<>
"_"
<>
(
show
s
)
<>
"_"
<>
(
show
f
))
Month
p
s
f
->
(
"time_months"
<>
"_"
<>
(
show
p
)
<>
"_"
<>
(
show
s
)
<>
"_"
<>
(
show
f
))
Week
p
s
f
->
(
"time_weeks"
<>
"_"
<>
(
show
p
)
<>
"_"
<>
(
show
s
)
<>
"_"
<>
(
show
f
))
Day
p
s
f
->
(
"time_days"
<>
"_"
<>
(
show
p
)
<>
"_"
<>
(
show
s
)
<>
"_"
<>
(
show
f
))
seaToLabel
::
Config
->
[
Char
]
...
...
@@ -240,7 +284,7 @@ main = do
printIOMsg
"Parse the corpus"
mapList
<-
csvMapTermList
(
listPath
config
)
corpus
<-
fileToDocs'
(
corpusParser
config
)
(
corpusPath
config
)
mapList
corpus
<-
fileToDocs'
(
corpusParser
config
)
(
corpusPath
config
)
(
timeUnit
config
)
mapList
printIOComment
(
show
(
length
corpus
)
<>
" parsed docs from the corpus"
)
printIOMsg
"Reconstruct the phylo"
...
...
package.yaml
View file @
6bddaf45
...
...
@@ -330,7 +330,8 @@ executables:
-
optparse-generic
-
split
-
unordered-containers
-
cryptohash
-
cryptohash
-
time
gargantext-import
:
main
:
Main.hs
...
...
src/Gargantext/Core/Text/Corpus/Parsers/CSV.hs
View file @
6bddaf45
...
...
@@ -398,28 +398,28 @@ parseCsv' bs = V.toList $ V.map csv2doc $ snd $ readCsvLazyBS bs
------------------------------------------------------------------------
-- Csv v3 weighted for phylo
data
WeightedCsv
=
WeightedCsv
{
csv
_w
_title
::
!
Text
,
csv
_w
_source
::
!
Text
,
csv
_w
_publication_year
::
!
Int
,
csv
_w
_publication_month
::
!
Int
,
csv
_w
_publication_day
::
!
Int
,
csv
_w
_abstract
::
!
Text
,
csv
_w
_authors
::
!
Text
,
csv
_w
_weight
::
!
Double
}
deriving
(
Show
)
instance
FromNamedRecord
WeightedCsv
where
parseNamedRecord
r
=
WeightedCsv
<$>
r
.:
"title"
<*>
r
.:
"source"
<*>
r
.:
"publication_year"
<*>
r
.:
"publication_month"
<*>
r
.:
"publication_day"
<*>
r
.:
"abstract"
<*>
r
.:
"authors"
<*>
r
.:
"weight"
readWeightedCsv
::
FilePath
->
IO
(
Header
,
Vector
WeightedCsv
)
data
Csv'
=
Csv'
{
csv
'
_title
::
!
Text
,
csv
'
_source
::
!
Text
,
csv
'
_publication_year
::
!
Int
,
csv
'
_publication_month
::
!
Int
,
csv
'
_publication_day
::
!
Int
,
csv
'
_abstract
::
!
Text
,
csv
'
_authors
::
!
Text
,
csv
'
_weight
::
!
Double
}
deriving
(
Show
)
instance
FromNamedRecord
Csv'
where
parseNamedRecord
r
=
Csv'
<$>
r
.:
"title"
<*>
r
.:
"source"
<*>
r
.:
"publication_year"
<*>
r
.:
"publication_month"
<*>
r
.:
"publication_day"
<*>
r
.:
"abstract"
<*>
r
.:
"authors"
<*>
r
.:
"weight"
readWeightedCsv
::
FilePath
->
IO
(
Header
,
Vector
Csv'
)
readWeightedCsv
fp
=
fmap
(
\
bs
->
case
decodeByNameWith
csvDecodeOptions
bs
of
...
...
src/Gargantext/Core/Text/Metrics/FrequentItemSet.hs
View file @
6bddaf45
...
...
@@ -128,17 +128,17 @@ fisWithSizePolyMap n f is =
isSublistOf
::
Ord
a
=>
[
a
]
->
[
a
]
->
Bool
isSublistOf
sub
lst
=
all
(
\
i
->
elem
i
lst
)
sub
reIndexFis
::
Ord
a
=>
[([
a
],
b
)]
->
[
Fis'
a
]
->
[(
Fis'
a
,[
b
]
)]
reIndexFis
::
Ord
a
=>
[([
a
],
(
b
,
c
))]
->
[
Fis'
a
]
->
[(
Fis'
a
,([
b
],[
c
])
)]
reIndexFis
items
fis
=
map
(
\
f
->
let
docs
=
filter
(
\
(
lst
,
_
)
->
isSublistOf
(
_fisItemSet
f
)
lst
)
items
in
(
f
,
map
snd
docs
))
fis
in
(
f
,
(
map
(
fst
.
snd
)
docs
,
map
(
snd
.
snd
)
docs
)
))
fis
wsum
::
[
Maybe
Double
]
->
Maybe
Double
wsum
lst
=
fmap
sum
$
sequence
lst
fisWithSizePolyMap'
::
Ord
a
=>
Size
->
Frequency
->
[([
a
],
Maybe
Double
)]
->
Map
(
Set
a
)
(
Int
,
Maybe
Double
)
fisWithSizePolyMap'
::
Ord
a
=>
Size
->
Frequency
->
[([
a
],
(
Maybe
Double
,[
Int
]))]
->
Map
(
Set
a
)
(
Int
,
(
Maybe
Double
,[
Int
])
)
fisWithSizePolyMap'
n
f
is
=
Map
.
fromList
$
map
(
\
(
fis
,
ws
)
->
(
Set
.
fromList
(
_fisItemSet
fis
),(
_fisCount
fis
,(
wsum
w
s
))))
$
map
(
\
(
fis
,
(
ws
,
sources
))
->
(
Set
.
fromList
(
_fisItemSet
fis
),(
_fisCount
fis
,(
wsum
ws
,
concat
source
s
))))
$
reIndexFis
is
$
fisWithSizePoly2
n
f
(
map
fst
is
)
...
...
src/Gargantext/Core/Viz/AdaptativePhylo.hs
View file @
6bddaf45
...
...
@@ -50,9 +50,9 @@ import qualified Data.Text.Lazy as TextLazy
data
CorpusParser
=
Wos
{
_wos_limit
::
Int
}
|
Csv
{
_csv_limit
::
Int
}
|
Csv
Weighted
{
_csvw
_limit
::
Int
}
Wos
{
_wos_limit
::
Int
}
|
Csv
{
_csv_limit
::
Int
}
|
Csv
'
{
_csv'
_limit
::
Int
}
deriving
(
Show
,
Generic
,
Eq
)
data
SeaElevation
=
...
...
@@ -107,6 +107,18 @@ data TimeUnit =
{
_year_period
::
Int
,
_year_step
::
Int
,
_year_matchingFrame
::
Int
}
|
Month
{
_month_period
::
Int
,
_month_step
::
Int
,
_month_matchingFrame
::
Int
}
|
Week
{
_week_period
::
Int
,
_week_step
::
Int
,
_week_matchingFrame
::
Int
}
|
Day
{
_day_period
::
Int
,
_day_step
::
Int
,
_day_matchingFrame
::
Int
}
deriving
(
Show
,
Generic
,
Eq
)
data
CliqueFilter
=
ByThreshold
|
ByNeighbours
deriving
(
Show
,
Generic
,
Eq
)
...
...
@@ -238,11 +250,14 @@ type Date = Int
-- | Ngrams : a contiguous sequence of n terms
type
Ngrams
=
Text
-- | Document : a piece of Text linked to a Date
-- Document : a piece of Text linked to a Date
-- date = computational date; date' = original string date yyyy-mm-dd
data
Document
=
Document
{
date
::
Date
,
text
::
[
Ngrams
]
,
weight
::
Maybe
Double
{
date
::
Date
,
date'
::
Text
,
text
::
[
Ngrams
]
,
weight
::
Maybe
Double
,
sources
::
[
Text
]
}
deriving
(
Eq
,
Show
,
Generic
,
NFData
)
...
...
@@ -258,6 +273,10 @@ data PhyloFoundations = PhyloFoundations
}
deriving
(
Generic
,
Show
,
Eq
)
data
PhyloSources
=
PhyloSources
{
_sources
::
!
(
Vector
Text
)
}
deriving
(
Generic
,
Show
,
Eq
)
---------------------------
-- | Coocurency Matrix | --
---------------------------
...
...
@@ -280,6 +299,7 @@ type Cooc = Map (Int,Int) Double
-- periods : the temporal steps of a phylomemy
data
Phylo
=
Phylo
{
_phylo_foundations
::
PhyloFoundations
,
_phylo_sources
::
PhyloSources
,
_phylo_timeCooc
::
!
(
Map
Date
Cooc
)
,
_phylo_timeDocs
::
!
(
Map
Date
Double
)
,
_phylo_termFreq
::
!
(
Map
Int
Double
)
...
...
@@ -299,8 +319,9 @@ type PhyloPeriodId = (Date,Date)
-- id: tuple (start date, end date) of the temporal step of the phylomemy
-- levels: levels of granularity
data
PhyloPeriod
=
PhyloPeriod
{
_phylo_periodPeriod
::
(
Date
,
Date
)
,
_phylo_periodLevels
::
Map
PhyloLevelId
PhyloLevel
PhyloPeriod
{
_phylo_periodPeriod
::
(
Date
,
Date
)
,
_phylo_periodPeriod'
::
(
Text
,
Text
)
,
_phylo_periodLevels
::
Map
PhyloLevelId
PhyloLevel
}
deriving
(
Generic
,
Show
,
Eq
)
...
...
@@ -316,9 +337,10 @@ type PhyloLevelId = (PhyloPeriodId,Level)
-- Level 1: First level of clustering (the Fis)
-- Level [2..N]: Nth level of synchronic clustering (cluster of Fis)
data
PhyloLevel
=
PhyloLevel
{
_phylo_levelPeriod
::
(
Date
,
Date
)
,
_phylo_levelLevel
::
Level
,
_phylo_levelGroups
::
Map
PhyloGroupId
PhyloGroup
PhyloLevel
{
_phylo_levelPeriod
::
(
Date
,
Date
)
,
_phylo_levelPeriod'
::
(
Text
,
Text
)
,
_phylo_levelLevel
::
Level
,
_phylo_levelGroups
::
Map
PhyloGroupId
PhyloGroup
}
deriving
(
Generic
,
Show
,
Eq
)
...
...
@@ -332,11 +354,13 @@ type PhyloBranchId = (Level, [Int])
-- | PhyloGroup : group of ngrams at each level and period
data
PhyloGroup
=
PhyloGroup
{
_phylo_groupPeriod
::
(
Date
,
Date
)
,
_phylo_groupPeriod'
::
(
Text
,
Text
)
,
_phylo_groupLevel
::
Level
,
_phylo_groupIndex
::
Int
,
_phylo_groupIndex
::
Int
,
_phylo_groupLabel
::
Text
,
_phylo_groupSupport
::
Support
,
_phylo_groupWeight
::
Maybe
Double
,
_phylo_groupSources
::
[
Int
]
,
_phylo_groupNgrams
::
[
Int
]
,
_phylo_groupCooc
::
!
(
Cooc
)
,
_phylo_groupBranchId
::
PhyloBranchId
...
...
@@ -371,6 +395,7 @@ data PhyloClique = PhyloClique
,
_phyloClique_support
::
Support
,
_phyloClique_period
::
(
Date
,
Date
)
,
_phyloClique_weight
::
Maybe
Double
,
_phyloClique_sources
::
[
Int
]
}
deriving
(
Generic
,
NFData
,
Show
,
Eq
)
----------------
...
...
@@ -444,6 +469,8 @@ makeLenses ''PhyloBranch
instance
FromJSON
Phylo
instance
ToJSON
Phylo
instance
FromJSON
PhyloSources
instance
ToJSON
PhyloSources
instance
FromJSON
PhyloParam
instance
ToJSON
PhyloParam
instance
FromJSON
PhyloPeriod
...
...
src/Gargantext/Core/Viz/Phylo/PhyloExample.hs
View file @
6bddaf45
...
...
@@ -109,9 +109,11 @@ config =
docs
::
[
Document
]
docs
=
map
(
\
(
d
,
t
)
->
Document
d
->
Document
d
""
(
filter
(
\
n
->
isRoots
n
(
foundations
^.
foundations_roots
))
$
monoTexts
t
)
Nothing
[]
)
corpus
...
...
src/Gargantext/Core/Viz/Phylo/PhyloExport.hs
View file @
6bddaf45
...
...
@@ -120,11 +120,13 @@ branchToDotNode b bId =
,
toAttr
"label"
(
pack
$
show
$
b
^.
branch_label
)
])
periodToDotNode
::
(
Date
,
Date
)
->
Dot
DotId
periodToDotNode
prd
=
periodToDotNode
::
(
Date
,
Date
)
->
(
Text
.
Text
,
Text
.
Text
)
->
Dot
DotId
periodToDotNode
prd
prd'
=
node
(
periodIdToDotId
prd
)
([
Shape
BoxShape
,
FontSize
50
,
Label
(
toDotLabel
$
Text
.
pack
(
show
(
fst
prd
)
<>
" "
<>
show
(
snd
prd
)))]
<>
[
toAttr
"nodeType"
"period"
<>
[
toAttr
"nodeType"
"period"
,
toAttr
"strFrom"
(
fromStrict
$
Text
.
pack
$
(
show
$
fst
prd'
))
,
toAttr
"strTo"
(
fromStrict
$
Text
.
pack
$
(
show
$
snd
prd'
))
,
toAttr
"from"
(
fromStrict
$
Text
.
pack
$
(
show
$
fst
prd
))
,
toAttr
"to"
(
fromStrict
$
Text
.
pack
$
(
show
$
snd
prd
))])
...
...
@@ -136,10 +138,13 @@ groupToDotNode fdt g bId =
<>
[
toAttr
"nodeType"
"group"
,
toAttr
"from"
(
pack
$
show
(
fst
$
g
^.
phylo_groupPeriod
))
,
toAttr
"to"
(
pack
$
show
(
snd
$
g
^.
phylo_groupPeriod
))
,
toAttr
"strFrom"
(
pack
$
show
(
fst
$
g
^.
phylo_groupPeriod'
))
,
toAttr
"strTo"
(
pack
$
show
(
snd
$
g
^.
phylo_groupPeriod'
))
,
toAttr
"branchId"
(
pack
$
unwords
(
init
$
map
show
$
snd
$
g
^.
phylo_groupBranchId
))
,
toAttr
"bId"
(
pack
$
show
bId
)
,
toAttr
"support"
(
pack
$
show
(
g
^.
phylo_groupSupport
))
,
toAttr
"weight"
(
pack
$
show
(
g
^.
phylo_groupWeight
))
,
toAttr
"source"
(
pack
$
show
(
nub
$
g
^.
phylo_groupSources
))
,
toAttr
"lbl"
(
pack
$
show
(
ngramsToLabel
fdt
(
g
^.
phylo_groupNgrams
)))
,
toAttr
"foundation"
(
pack
$
show
(
idxToLabel
(
g
^.
phylo_groupNgrams
)))
,
toAttr
"role"
(
pack
$
show
(
idxToLabel'
((
g
^.
phylo_groupMeta
)
!
"dynamics"
)))
...
...
@@ -194,6 +199,8 @@ exportToDot phylo export =
,(
toAttr
(
fromStrict
"phyloPeriods"
)
$
pack
$
show
(
length
$
elems
$
phylo
^.
phylo_periods
))
,(
toAttr
(
fromStrict
"phyloBranches"
)
$
pack
$
show
(
length
$
export
^.
export_branches
))
,(
toAttr
(
fromStrict
"phyloGroups"
)
$
pack
$
show
(
length
$
export
^.
export_groups
))
,(
toAttr
(
fromStrict
"phyloSources"
)
$
pack
$
show
(
Vector
.
toList
$
getSources
phylo
))
,(
toAttr
(
fromStrict
"phyloTimeScale"
)
$
pack
$
getTimeScale
phylo
)
-- ,(toAttr (fromStrict "phyloTermsFreq") $ pack $ show (toList $ _phylo_lastTermFreq phylo))
])
...
...
@@ -218,13 +225,13 @@ exportToDot phylo export =
{-- 5) create a layer for each period -}
_
<-
mapM
(
\
period
->
subgraph
((
Str
.
fromStrict
.
Text
.
pack
)
$
(
"Period"
<>
show
(
fst
period
)
<>
show
(
sn
d
period
)))
$
do
subgraph
((
Str
.
fromStrict
.
Text
.
pack
)
$
(
"Period"
<>
show
(
fst
$
_phylo_periodPeriod
period
)
<>
show
(
snd
$
_phylo_periodPerio
d
period
)))
$
do
graphAttrs
[
Rank
SameRank
]
periodToDotNode
period
periodToDotNode
(
period
^.
phylo_periodPeriod
)
(
period
^.
phylo_periodPeriod'
)
{-- 6) create a node for each group -}
mapM
(
\
g
->
groupToDotNode
(
getRoots
phylo
)
g
(
toBid
g
(
export
^.
export_branches
)))
(
filter
(
\
g
->
g
^.
phylo_groupPeriod
==
period
)
$
export
^.
export_groups
)
)
$
getPeriodIds
phylo
mapM
(
\
g
->
groupToDotNode
(
getRoots
phylo
)
g
(
toBid
g
(
export
^.
export_branches
)))
(
filter
(
\
g
->
g
^.
phylo_groupPeriod
==
(
period
^.
phylo_periodPeriod
)
)
$
export
^.
export_groups
)
)
$
phylo
^.
phylo_periods
{-- 7) create the edges between a branch and its first groups -}
_
<-
mapM
(
\
(
bId
,
groups
)
->
...
...
src/Gargantext/Core/Viz/Phylo/PhyloMaker.hs
View file @
6bddaf45
...
...
@@ -14,6 +14,7 @@ module Gargantext.Core.Viz.Phylo.PhyloMaker where
import
Data.List
(
concat
,
nub
,
partition
,
sort
,
(
++
),
group
,
intersect
,
null
,
sortOn
,
groupBy
,
tail
)
import
Data.Map
(
Map
,
fromListWith
,
keys
,
unionWith
,
fromList
,
empty
,
toList
,
elems
,
(
!
),
restrictKeys
,
foldlWithKey
,
insert
)
import
Data.Vector
(
Vector
)
import
Data.Text
(
Text
)
import
Gargantext.Prelude
import
Gargantext.Core.Viz.AdaptativePhylo
...
...
@@ -104,7 +105,7 @@ toGroupsProxi lvl phylo =
in
phylo
&
phylo_groupsProxi
.~
((
traceGroupsProxi
.
fromList
)
groupsProxi
)
appendGroups
::
(
a
->
PhyloPeriodId
->
Level
->
Int
->
[
Cooc
]
->
PhyloGroup
)
->
Level
->
Map
(
Date
,
Date
)
[
a
]
->
Phylo
->
Phylo
appendGroups
::
(
a
->
PhyloPeriodId
->
(
Text
,
Text
)
->
Level
->
Int
->
[
Cooc
]
->
PhyloGroup
)
->
Level
->
Map
(
Date
,
Date
)
[
a
]
->
Phylo
->
Phylo
appendGroups
f
lvl
m
phylo
=
trace
(
"
\n
"
<>
"-- | Append "
<>
show
(
length
$
concat
$
elems
m
)
<>
" groups to Level "
<>
show
(
lvl
)
<>
"
\n
"
)
$
over
(
phylo_periods
.
traverse
...
...
@@ -112,12 +113,13 @@ appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ co
.
traverse
)
(
\
phyloLvl
->
if
lvl
==
(
phyloLvl
^.
phylo_levelLevel
)
then
let
pId
=
phyloLvl
^.
phylo_levelPeriod
let
pId
=
phyloLvl
^.
phylo_levelPeriod
pId'
=
phyloLvl
^.
phylo_levelPeriod'
phyloCUnit
=
m
!
pId
in
phyloLvl
&
phylo_levelGroups
.~
(
fromList
$
foldl
(
\
groups
obj
->
groups
++
[
(((
pId
,
lvl
),
length
groups
)
,
f
obj
pId
lvl
(
length
groups
)
,
f
obj
pId
pId'
lvl
(
length
groups
)
(
elems
$
restrictKeys
(
phylo
^.
phylo_timeCooc
)
$
periodsToYears
[
pId
]))
]
)
[]
phyloCUnit
)
else
...
...
@@ -125,10 +127,11 @@ appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ co
phylo
cliqueToGroup
::
PhyloClique
->
PhyloPeriodId
->
Level
->
Int
->
[
Cooc
]
->
PhyloGroup
cliqueToGroup
fis
pId
lvl
idx
coocs
=
PhyloGroup
pId
lvl
idx
""
cliqueToGroup
::
PhyloClique
->
PhyloPeriodId
->
(
Text
,
Text
)
->
Level
->
Int
->
[
Cooc
]
->
PhyloGroup
cliqueToGroup
fis
pId
pId'
lvl
idx
coocs
=
PhyloGroup
pId
pId'
lvl
idx
""
(
fis
^.
phyloClique_support
)
(
fis
^.
phyloClique_weight
)
(
fis
^.
phyloClique_sources
)
(
fis
^.
phyloClique_nodes
)
(
ngramsToCooc
(
fis
^.
phyloClique_nodes
)
coocs
)
(
1
,[
0
])
-- branchid (lvl,[path in the branching tree])
...
...
@@ -143,14 +146,27 @@ toPhylo1 phyloStep = case (getSeaElevation phyloStep) of
-----------------------
-- | To Phylo Step | --
-----------------------
-----------------------
indexDates'
::
Map
(
Date
,
Date
)
[
Document
]
->
Map
(
Date
,
Date
)
(
Text
,
Text
)
indexDates'
m
=
map
(
\
docs
->
let
ds
=
map
(
\
d
->
date'
d
)
docs
f
=
if
(
null
ds
)
then
""
else
toFstDate
ds
l
=
if
(
null
ds
)
then
""
else
toLstDate
ds
in
(
f
,
l
))
m
-- To build the first phylo step from docs and terms
toPhyloStep
::
[
Document
]
->
TermList
->
Config
->
Phylo
toPhyloStep
docs
lst
conf
=
case
(
getSeaElevation
phyloBase
)
of
Constante
_
_
->
appendGroups
cliqueToGroup
1
phyloClique
phyloBase
Adaptative
_
->
toGroupsProxi
1
$
appendGroups
cliqueToGroup
1
phyloClique
phyloBase
Constante
_
_
->
appendGroups
cliqueToGroup
1
phyloClique
(
updatePeriods
(
indexDates'
docs'
)
phyloBase
)
Adaptative
_
->
toGroupsProxi
1
$
appendGroups
cliqueToGroup
1
phyloClique
(
updatePeriods
(
indexDates'
docs'
)
phyloBase
)
where
--------------------------------------
phyloClique
::
Map
(
Date
,
Date
)
[
PhyloClique
]
...
...
@@ -219,12 +235,12 @@ toPhyloClique phylo phyloDocs = case (clique $ getConfig phylo) of
Fis
_
_
->
let
fis
=
map
(
\
(
prd
,
docs
)
->
case
(
corpusParser
$
getConfig
phylo
)
of
Csv
Weighted
_
->
let
lst
=
toList
$
fisWithSizePolyMap'
(
Segment
1
20
)
1
(
map
(
\
d
->
(
ngramsToIdx
(
text
d
)
(
getRoots
phylo
),
weight
d
))
docs
)
in
(
prd
,
map
(
\
f
->
PhyloClique
(
Set
.
toList
$
fst
f
)
((
fst
.
snd
)
f
)
prd
((
snd
.
snd
)
f
))
lst
)
Csv
'
_
->
let
lst
=
toList
$
fisWithSizePolyMap'
(
Segment
1
20
)
1
(
map
(
\
d
->
(
ngramsToIdx
(
text
d
)
(
getRoots
phylo
),
(
weight
d
,
(
sourcesToIdx
(
sources
d
)
(
getSources
phylo
)))
))
docs
)
in
(
prd
,
map
(
\
f
->
PhyloClique
(
Set
.
toList
$
fst
f
)
((
fst
.
snd
)
f
)
prd
((
fst
.
snd
.
snd
)
f
)
(((
snd
.
snd
.
snd
)
f
)
))
lst
)
_
->
let
lst
=
toList
$
fisWithSizePolyMap
(
Segment
1
20
)
1
(
map
(
\
d
->
ngramsToIdx
(
text
d
)
(
getRoots
phylo
))
docs
)
in
(
prd
,
map
(
\
f
->
PhyloClique
(
Set
.
toList
$
fst
f
)
(
snd
f
)
prd
Nothing
)
lst
)
in
(
prd
,
map
(
\
f
->
PhyloClique
(
Set
.
toList
$
fst
f
)
(
snd
f
)
prd
Nothing
[]
)
lst
)
)
$
toList
phyloDocs
fis'
=
fis
`
using
`
parList
rdeepseq
...
...
@@ -235,7 +251,7 @@ toPhyloClique phylo phyloDocs = case (clique $ getConfig phylo) of
$
foldl
sumCooc
empty
$
map
listToMatrix
$
map
(
\
d
->
ngramsToIdx
(
text
d
)
(
getRoots
phylo
))
docs
in
(
prd
,
map
(
\
cl
->
PhyloClique
cl
0
prd
Nothing
)
$
getMaxCliques
filterType
Conditional
thr
cooc
))
in
(
prd
,
map
(
\
cl
->
PhyloClique
cl
0
prd
Nothing
[]
)
$
getMaxCliques
filterType
Conditional
thr
cooc
))
$
toList
phyloDocs
mcl'
=
mcl
`
using
`
parList
rdeepseq
in
fromList
mcl'
...
...
@@ -342,17 +358,20 @@ docsToTimeScaleNb docs =
initPhyloLevels
::
Int
->
PhyloPeriodId
->
Map
PhyloLevelId
PhyloLevel
initPhyloLevels
lvlMax
pId
=
fromList
$
map
(
\
lvl
->
((
pId
,
lvl
),
PhyloLevel
pId
lvl
empty
))
[
1
..
lvlMax
]
fromList
$
map
(
\
lvl
->
((
pId
,
lvl
),
PhyloLevel
pId
(
""
,
""
)
lvl
empty
))
[
1
..
lvlMax
]
-- To init the basic elements of a Phylo
toPhyloBase
::
[
Document
]
->
TermList
->
Config
->
Phylo
toPhyloBase
docs
lst
conf
=
let
foundations
=
PhyloFoundations
(
Vector
.
fromList
$
nub
$
concat
$
map
text
docs
)
lst
docsSources
=
PhyloSources
(
Vector
.
fromList
$
nub
$
concat
$
map
sources
docs
)
params
=
defaultPhyloParam
{
_phyloParam_config
=
conf
}
periods
=
toPeriods
(
sort
$
nub
$
map
date
docs
)
(
getTimePeriod
$
timeUnit
conf
)
(
getTimeStep
$
timeUnit
conf
)
in
trace
(
"
\n
"
<>
"-- | Create PhyloBase out of "
<>
show
(
length
docs
)
<>
" docs
\n
"
)
$
Phylo
foundations
docsSources
(
docsToTimeScaleCooc
docs
(
foundations
^.
foundations_roots
))
(
docsToTimeScaleNb
docs
)
(
docsToTermFreq
docs
(
foundations
^.
foundations_roots
))
...
...
@@ -360,4 +379,4 @@ toPhyloBase docs lst conf =
empty
empty
params
(
fromList
$
map
(
\
prd
->
(
prd
,
PhyloPeriod
prd
(
initPhyloLevels
1
prd
)))
periods
)
(
fromList
$
map
(
\
prd
->
(
prd
,
PhyloPeriod
prd
(
""
,
""
)
(
initPhyloLevels
1
prd
)))
periods
)
src/Gargantext/Core/Viz/Phylo/PhyloTools.hs
View file @
6bddaf45
...
...
@@ -17,9 +17,9 @@ import Data.List (sort, concat, null, union, (++), tails, sortOn, nub, init, tai
import
Data.Set
(
Set
,
disjoint
)
import
Data.Map
(
Map
,
elems
,
fromList
,
unionWith
,
keys
,
member
,
(
!
),
filterWithKey
,
fromListWith
,
empty
,
restrictKeys
)
import
Data.String
(
String
)
import
Data.Text
(
Text
)
import
Data.Text
(
Text
,
unpack
)
import
Prelude
(
floor
)
import
Prelude
(
floor
,
read
)
import
Gargantext.Prelude
import
Gargantext.Core.Viz.AdaptativePhylo
...
...
@@ -115,6 +115,10 @@ isRoots n ns = Vector.elem n ns
ngramsToIdx
::
[
Ngrams
]
->
Vector
Ngrams
->
[
Int
]
ngramsToIdx
ns
fdt
=
map
(
\
n
->
fromJust
$
elemIndex
n
fdt
)
ns
-- | To transform a list of sources into a list of sources' index
sourcesToIdx
::
[
Text
]
->
Vector
Text
->
[
Int
]
sourcesToIdx
ss
ps
=
nub
$
map
(
\
s
->
fromJust
$
elemIndex
s
ps
)
ss
-- | To transform a list of Ngrams Indexes into a Label
ngramsToLabel
::
Vector
Ngrams
->
[
Int
]
->
Text
ngramsToLabel
ngrams
l
=
Text
.
unwords
$
tail'
"ngramsToLabel"
$
concat
$
map
(
\
n
->
[
"|"
,
n
])
$
ngramsToText
ngrams
l
...
...
@@ -153,6 +157,32 @@ toPeriods dates p s =
$
chunkAlong
p
s
[
start
..
end
]
toFstDate
::
[
Text
]
->
Text
toFstDate
ds
=
snd
$
head'
"firstDate"
$
sortOn
fst
$
map
(
\
d
->
let
d'
=
read
(
filter
(
\
c
->
c
/=
'-'
)
$
unpack
d
)
::
Int
in
(
d'
,
d
))
ds
toLstDate
::
[
Text
]
->
Text
toLstDate
ds
=
snd
$
head'
"firstDate"
$
reverse
$
sortOn
fst
$
map
(
\
d
->
let
d'
=
read
(
filter
(
\
c
->
c
/=
'-'
)
$
unpack
d
)
::
Int
in
(
d'
,
d
))
ds
getTimeScale
::
Phylo
->
[
Char
]
getTimeScale
p
=
case
(
timeUnit
$
getConfig
p
)
of
Year
_
_
_
->
"year"
Month
_
_
_
->
"month"
Week
_
_
_
->
"week"
Day
_
_
_
->
"day"
-- | Get a regular & ascendante timeScale from a given list of dates
toTimeScale
::
[
Date
]
->
Int
->
[
Date
]
toTimeScale
dates
step
=
...
...
@@ -162,15 +192,24 @@ toTimeScale dates step =
getTimeStep
::
TimeUnit
->
Int
getTimeStep
time
=
case
time
of
Year
_
s
_
->
s
Year
_
s
_
->
s
Month
_
s
_
->
s
Week
_
s
_
->
s
Day
_
s
_
->
s
getTimePeriod
::
TimeUnit
->
Int
getTimePeriod
time
=
case
time
of
Year
p
_
_
->
p
Year
p
_
_
->
p
Month
p
_
_
->
p
Week
p
_
_
->
p
Day
p
_
_
->
p
getTimeFrame
::
TimeUnit
->
Int
getTimeFrame
time
=
case
time
of
Year
_
_
f
->
f
Year
_
_
f
->
f
Month
_
_
f
->
f
Week
_
_
f
->
f
Day
_
_
f
->
f
-------------
-- | Fis | --
...
...
@@ -359,6 +398,9 @@ setConfig config phylo = phylo
getRoots
::
Phylo
->
Vector
Ngrams
getRoots
phylo
=
(
phylo
^.
phylo_foundations
)
^.
foundations_roots
getSources
::
Phylo
->
Vector
Text
getSources
phylo
=
_sources
(
phylo
^.
phylo_sources
)
phyloToLastBranches
::
Phylo
->
[[
PhyloGroup
]]
phyloToLastBranches
phylo
=
elems
$
fromListWith
(
++
)
...
...
@@ -411,6 +453,16 @@ updatePhyloGroups lvl m phylo =
then
m
!
id
else
g
)
phylo
updatePeriods
::
Map
(
Date
,
Date
)
(
Text
,
Text
)
->
Phylo
->
Phylo
updatePeriods
periods'
phylo
=
over
(
phylo_periods
.
traverse
)
(
\
prd
->
let
prd'
=
periods'
!
(
prd
^.
phylo_periodPeriod
)
lvls
=
map
(
\
lvl
->
lvl
&
phylo_levelPeriod'
.~
prd'
)
$
prd
^.
phylo_periodLevels
in
prd
&
phylo_periodPeriod'
.~
prd'
&
phylo_periodLevels
.~
lvls
)
phylo
traceToPhylo
::
Level
->
Phylo
->
Phylo
traceToPhylo
lvl
phylo
=
...
...
src/Gargantext/Core/Viz/Phylo/SynchronicClustering.hs
View file @
6bddaf45
...
...
@@ -36,10 +36,12 @@ import qualified Data.Map as Map
mergeGroups
::
[
Cooc
]
->
PhyloGroupId
->
Map
PhyloGroupId
PhyloGroupId
->
[
PhyloGroup
]
->
PhyloGroup
mergeGroups
coocs
id
mapIds
childs
=
let
ngrams
=
(
sort
.
nub
.
concat
)
$
map
_phylo_groupNgrams
childs
in
PhyloGroup
(
fst
$
fst
id
)
(
snd
$
fst
id
)
(
snd
id
)
""
in
PhyloGroup
(
fst
$
fst
id
)
(
_phylo_groupPeriod'
$
head'
"mergeGroups"
childs
)
(
snd
$
fst
id
)
(
snd
id
)
""
(
sum
$
map
_phylo_groupSupport
childs
)
(
fmap
sum
$
sequence
$
map
_phylo_groupWeight
childs
)
$
map
_phylo_groupWeight
childs
)
(
concat
$
map
_phylo_groupSources
childs
)
ngrams
(
ngramsToCooc
ngrams
coocs
)
((
snd
$
fst
id
),
bId
)
...
...
@@ -58,12 +60,12 @@ mergeGroups coocs id mapIds childs =
mergeAncestors
::
[
Pointer
]
->
[
Pointer
]
mergeAncestors
pointers
=
Map
.
toList
$
fromListWith
max
pointers
addPhyloLevel
::
Level
->
Phylo
->
Phylo
addPhyloLevel
lvl
phylo
=
over
(
phylo_periods
.
traverse
)
(
\
phyloPrd
->
phyloPrd
&
phylo_periodLevels
%~
(
insert
(
phyloPrd
^.
phylo_periodPeriod
,
lvl
)
(
PhyloLevel
(
phyloPrd
^.
phylo_periodPeriod
)
lvl
empty
)))
phylo
%~
(
insert
(
phyloPrd
^.
phylo_periodPeriod
,
lvl
)
(
PhyloLevel
(
phyloPrd
^.
phylo_periodPeriod
)
(
phyloPrd
^.
phylo_periodPeriod'
)
lvl
empty
)))
phylo
toNextLevel'
::
Phylo
->
[
PhyloGroup
]
->
Phylo
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment