Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
158
Issues
158
List
Board
Labels
Milestones
Merge Requests
11
Merge Requests
11
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
9863bd4f
Commit
9863bd4f
authored
May 04, 2021
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Plain Diff
[FIX] merge with dev-phylo
parents
928c717f
6bddaf45
Pipeline
#1460
failed with stage
Changes
10
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
320 additions
and
88 deletions
+320
-88
Main.hs
bin/gargantext-adaptative-phylo/Main.hs
+92
-43
package.yaml
package.yaml
+2
-1
CSV.hs
src/Gargantext/Core/Text/Corpus/Parsers/CSV.hs
+31
-0
FrequentItemSet.hs
src/Gargantext/Core/Text/Metrics/FrequentItemSet.hs
+25
-0
AdaptativePhylo.hs
src/Gargantext/Core/Viz/AdaptativePhylo.hs
+42
-12
PhyloExample.hs
src/Gargantext/Core/Viz/Phylo/PhyloExample.hs
+6
-2
PhyloExport.hs
src/Gargantext/Core/Viz/Phylo/PhyloExport.hs
+15
-7
PhyloMaker.hs
src/Gargantext/Core/Viz/Phylo/PhyloMaker.hs
+40
-14
PhyloTools.hs
src/Gargantext/Core/Viz/Phylo/PhyloTools.hs
+57
-5
SynchronicClustering.hs
src/Gargantext/Core/Viz/Phylo/SynchronicClustering.hs
+10
-4
No files found.
bin/gargantext-adaptative-phylo/Main.hs
View file @
9863bd4f
...
@@ -19,13 +19,14 @@ module Main where
...
@@ -19,13 +19,14 @@ module Main where
import
Data.Aeson
import
Data.Aeson
import
Data.List
(
concat
,
nub
,
isSuffixOf
)
import
Data.List
(
concat
,
nub
,
isSuffixOf
)
import
Data.String
(
String
)
import
Data.String
(
String
)
import
Data.Text
(
Text
,
unwords
,
unpack
,
replace
)
import
Data.Text
(
Text
,
unwords
,
unpack
,
replace
,
pack
)
import
Crypto.Hash.SHA256
(
hash
)
import
Crypto.Hash.SHA256
(
hash
)
import
Gargantext.Prelude
import
Gargantext.Prelude
import
Gargantext.Database.Admin.Types.Hyperdata
(
HyperdataDocument
(
..
))
import
Gargantext.Database.Admin.Types.Hyperdata
(
HyperdataDocument
(
..
))
import
Gargantext.Core.Text.Context
(
TermList
)
import
Gargantext.Core.Text.Context
(
TermList
)
import
Gargantext.Core.Text.Corpus.Parsers.CSV
(
csv_title
,
csv_abstract
,
csv_publication_year
)
import
Gargantext.Core.Text.Corpus.Parsers.CSV
(
csv_title
,
csv_abstract
,
csv_publication_year
,
csv_publication_month
,
csv_publication_day
,
csv'_source
,
csv'_title
,
csv'_abstract
,
csv'_publication_year
,
csv'_publication_month
,
csv'_publication_day
,
csv'_weight
)
import
Gargantext.Core.Text.Corpus.Parsers
(
FileFormat
(
..
),
parseFile
)
import
Gargantext.Core.Text.Corpus.Parsers
(
FileFormat
(
..
),
parseFile
)
import
Gargantext.Core.Text.List.Formats.CSV
(
csvMapTermList
)
import
Gargantext.Core.Text.List.Formats.CSV
(
csvMapTermList
)
import
Gargantext.Core.Text.Terms.WithList
(
Patterns
,
buildPatterns
,
extractTermsWithList
)
import
Gargantext.Core.Text.Terms.WithList
(
Patterns
,
buildPatterns
,
extractTermsWithList
)
...
@@ -33,23 +34,25 @@ import Gargantext.Core.Viz.AdaptativePhylo
...
@@ -33,23 +34,25 @@ import Gargantext.Core.Viz.AdaptativePhylo
import
Gargantext.Core.Viz.Phylo.PhyloMaker
(
toPhylo
,
toPhyloStep
)
import
Gargantext.Core.Viz.Phylo.PhyloMaker
(
toPhylo
,
toPhyloStep
)
import
Gargantext.Core.Viz.Phylo.PhyloTools
(
printIOMsg
,
printIOComment
,
setConfig
)
import
Gargantext.Core.Viz.Phylo.PhyloTools
(
printIOMsg
,
printIOComment
,
setConfig
)
import
Gargantext.Core.Viz.Phylo.PhyloExport
(
toPhyloExport
,
dotToFile
)
import
Gargantext.Core.Viz.Phylo.PhyloExport
(
toPhyloExport
,
dotToFile
)
-- import Gargantext.API.Ngrams.Prelude (toTermList)
import
GHC.IO
(
FilePath
)
import
GHC.IO
(
FilePath
)
import
Prelude
(
Either
(
Left
,
Right
))
import
Prelude
(
Either
(
Left
,
Right
)
,
toInteger
)
import
System.Environment
import
System.Environment
import
System.Directory
(
listDirectory
,
doesFileExist
)
import
System.Directory
(
listDirectory
,
doesFileExist
)
import
Control.Concurrent.Async
(
mapConcurrently
)
import
Control.Concurrent.Async
(
mapConcurrently
)
import
Data.Time.Calendar
(
fromGregorian
,
diffGregorianDurationClip
,
cdMonths
,
diffDays
,
showGregorian
)
import
qualified
Data.ByteString.Char8
as
C8
import
qualified
Data.ByteString.Char8
as
C8
import
qualified
Data.ByteString.Lazy
as
Lazy
import
qualified
Data.ByteString.Lazy
as
Lazy
import
qualified
Data.Vector
as
Vector
import
qualified
Data.Vector
as
Vector
import
qualified
Gargantext.Core.Text.Corpus.Parsers.CSV
as
Csv
import
qualified
Gargantext.Core.Text.Corpus.Parsers.CSV
as
Csv
import
qualified
Data.Text
as
T
import
qualified
Data.Text
as
T
-- import Debug.Trace (trace)
data
PhyloStage
=
PhyloWithCliques
|
PhyloWithLinks
deriving
(
Show
)
data
PhyloStage
=
PhyloWithCliques
|
PhyloWithLinks
deriving
(
Show
)
---------------
---------------
-- | Tools | --
-- | Tools | --
...
@@ -60,9 +63,35 @@ data PhyloStage = PhyloWithCliques | PhyloWithLinks deriving (Show)
...
@@ -60,9 +63,35 @@ data PhyloStage = PhyloWithCliques | PhyloWithLinks deriving (Show)
getFilesFromPath
::
FilePath
->
IO
([
FilePath
])
getFilesFromPath
::
FilePath
->
IO
([
FilePath
])
getFilesFromPath
path
=
do
getFilesFromPath
path
=
do
if
(
isSuffixOf
"/"
path
)
if
(
isSuffixOf
"/"
path
)
then
(
listDirectory
path
)
then
(
listDirectory
path
)
else
return
[
path
]
else
return
[
path
]
---------------
-- | Dates | --
---------------
toMonths
::
Integer
->
Int
->
Int
->
Date
toMonths
y
m
d
=
fromIntegral
$
cdMonths
$
diffGregorianDurationClip
(
fromGregorian
y
m
d
)
(
fromGregorian
0000
0
0
)
toDays
::
Integer
->
Int
->
Int
->
Date
toDays
y
m
d
=
fromIntegral
$
diffDays
(
fromGregorian
y
m
d
)
(
fromGregorian
0000
0
0
)
toPhyloDate
::
Int
->
Int
->
Int
->
TimeUnit
->
Date
toPhyloDate
y
m
d
tu
=
case
tu
of
Year
_
_
_
->
y
Month
_
_
_
->
toMonths
(
toInteger
y
)
m
d
Week
_
_
_
->
div
(
toDays
(
toInteger
y
)
m
d
)
7
Day
_
_
_
->
toDays
(
toInteger
y
)
m
d
toPhyloDate'
::
Int
->
Int
->
Int
->
Text
toPhyloDate'
y
m
d
=
pack
$
showGregorian
$
fromGregorian
(
toInteger
y
)
m
d
--------------
--------------
-- | Json | --
-- | Json | --
...
@@ -79,26 +108,28 @@ readJson path = Lazy.readFile path
...
@@ -79,26 +108,28 @@ readJson path = Lazy.readFile path
----------------
----------------
-- | To filter the Ngrams of a document based on the termList
-- | To filter the Ngrams of a document based on the termList
filterTerms
::
Patterns
->
(
a
,
Text
)
->
(
a
,
[
Text
])
termsInText
::
Patterns
->
Text
->
[
Text
]
filterTerms
patterns
(
y
,
d
)
=
(
y
,
termsInText
patterns
d
)
termsInText
pats
txt
=
nub
$
concat
$
map
(
map
unwords
)
$
extractTermsWithList
pats
txt
where
--------------------------------------
termsInText
::
Patterns
->
Text
->
[
Text
]
-- | To transform a Wos file (or [file]) into a list of Docs
termsInText
pats
txt
=
nub
$
concat
$
map
(
map
unwords
)
$
extractTermsWithList
pats
txt
wosToDocs
::
Int
->
Patterns
->
TimeUnit
->
FilePath
->
IO
([
Document
])
--------------------------------------
wosToDocs
limit
patterns
time
path
=
do
-- | To transform a Wos file (or [file]) into a readable corpus
wosToCorpus
::
Int
->
FilePath
->
IO
([(
Int
,
Text
)])
wosToCorpus
limit
path
=
do
files
<-
getFilesFromPath
path
files
<-
getFilesFromPath
path
take
limit
take
limit
<$>
map
(
\
d
->
let
date'
=
fromJust
$
_hd_publication_year
d
<$>
map
(
\
d
->
let
title
=
fromJust
$
_hd_title
d
title
=
fromJust
$
_hd_title
d
abstr
=
if
(
isJust
$
_hd_abstract
d
)
abstr
=
if
(
isJust
$
_hd_abstract
d
)
then
fromJust
$
_hd_abstract
d
then
fromJust
$
_hd_abstract
d
else
""
else
""
in
(
date'
,
title
<>
" "
<>
abstr
))
in
Document
(
toPhyloDate
(
fromIntegral
$
fromJust
$
_hd_publication_year
d
)
(
fromJust
$
_hd_publication_month
d
)
(
fromJust
$
_hd_publication_day
d
)
time
)
(
toPhyloDate'
(
fromIntegral
$
fromJust
$
_hd_publication_year
d
)
(
fromJust
$
_hd_publication_month
d
)
(
fromJust
$
_hd_publication_day
d
))
(
termsInText
patterns
$
title
<>
" "
<>
abstr
)
Nothing
[]
)
<$>
concat
<$>
concat
<$>
mapConcurrently
(
\
file
->
<$>
mapConcurrently
(
\
file
->
filter
(
\
d
->
(
isJust
$
_hd_publication_year
d
)
filter
(
\
d
->
(
isJust
$
_hd_publication_year
d
)
...
@@ -106,33 +137,51 @@ wosToCorpus limit path = do
...
@@ -106,33 +137,51 @@ wosToCorpus limit path = do
<$>
parseFile
WOS
(
path
<>
file
)
)
files
<$>
parseFile
WOS
(
path
<>
file
)
)
files
-- | To transform a Csv file into a readable corpus
-- To transform a Csv file into a list of Document
csvToCorpus
::
Int
->
FilePath
->
IO
([(
Int
,
Text
)])
csvToDocs
::
CorpusParser
->
Patterns
->
TimeUnit
->
FilePath
->
IO
([
Document
])
csvToCorpus
limit
path
=
Vector
.
toList
csvToDocs
parser
patterns
time
path
=
<$>
Vector
.
take
limit
case
parser
of
<$>
Vector
.
map
(
\
row
->
(
csv_publication_year
row
,
(
csv_title
row
)
<>
" "
<>
(
csv_abstract
row
)))
Wos
_
->
undefined
<$>
snd
<$>
Csv
.
readFile
path
Csv
limit
->
Vector
.
toList
<$>
Vector
.
take
limit
<$>
Vector
.
map
(
\
row
->
Document
(
toPhyloDate
(
csv_publication_year
row
)
(
csv_publication_month
row
)
(
csv_publication_day
row
)
time
)
-- | To use the correct parser given a CorpusType
(
toPhyloDate'
(
csv_publication_year
row
)
(
csv_publication_month
row
)
(
csv_publication_day
row
))
fileToCorpus
::
CorpusParser
->
FilePath
->
IO
([(
Int
,
Text
)])
(
termsInText
patterns
$
(
csv_title
row
)
<>
" "
<>
(
csv_abstract
row
))
fileToCorpus
parser
path
=
case
parser
of
Nothing
Wos
limit
->
wosToCorpus
limit
path
[]
Csv
limit
->
csvToCorpus
limit
path
)
<$>
snd
<$>
Csv
.
readFile
path
Csv'
limit
->
Vector
.
toList
<$>
Vector
.
take
limit
<$>
Vector
.
map
(
\
row
->
Document
(
toPhyloDate
(
csv'_publication_year
row
)
(
csv'_publication_month
row
)
(
csv'_publication_day
row
)
time
)
(
toPhyloDate'
(
csv'_publication_year
row
)
(
csv'_publication_month
row
)
(
csv'_publication_day
row
))
(
termsInText
patterns
$
(
csv'_title
row
)
<>
" "
<>
(
csv'_abstract
row
))
(
Just
$
csv'_weight
row
)
[
csv'_source
row
]
)
<$>
snd
<$>
Csv
.
readWeightedCsv
path
-- To parse a file into a list of Document
fileToDocs'
::
CorpusParser
->
FilePath
->
TimeUnit
->
TermList
->
IO
[
Document
]
fileToDocs'
parser
path
time
lst
=
do
let
patterns
=
buildPatterns
lst
case
parser
of
Wos
limit
->
wosToDocs
limit
patterns
time
path
Csv
_
->
csvToDocs
parser
patterns
time
path
Csv'
_
->
csvToDocs
parser
patterns
time
path
-- | To parse a file into a list of Document
---------------
fileToDocs
::
CorpusParser
->
FilePath
->
TermList
->
IO
[
Document
]
-- | Label | --
fileToDocs
parser
path
lst
=
do
---------------
corpus
<-
fileToCorpus
parser
path
let
patterns
=
buildPatterns
lst
pure
$
map
(
(
\
(
y
,
t
)
->
Document
y
t
)
.
filterTerms
patterns
)
corpus
-- Config time parameters to label
-- Config time parameters to label
timeToLabel
::
Config
->
[
Char
]
timeToLabel
::
Config
->
[
Char
]
timeToLabel
config
=
case
(
timeUnit
config
)
of
timeToLabel
config
=
case
(
timeUnit
config
)
of
Year
p
s
f
->
(
"time"
<>
"_"
<>
(
show
p
)
<>
"_"
<>
(
show
s
)
<>
"_"
<>
(
show
f
))
Year
p
s
f
->
(
"time_years"
<>
"_"
<>
(
show
p
)
<>
"_"
<>
(
show
s
)
<>
"_"
<>
(
show
f
))
Month
p
s
f
->
(
"time_months"
<>
"_"
<>
(
show
p
)
<>
"_"
<>
(
show
s
)
<>
"_"
<>
(
show
f
))
Week
p
s
f
->
(
"time_weeks"
<>
"_"
<>
(
show
p
)
<>
"_"
<>
(
show
s
)
<>
"_"
<>
(
show
f
))
Day
p
s
f
->
(
"time_days"
<>
"_"
<>
(
show
p
)
<>
"_"
<>
(
show
s
)
<>
"_"
<>
(
show
f
))
seaToLabel
::
Config
->
[
Char
]
seaToLabel
::
Config
->
[
Char
]
...
@@ -235,7 +284,7 @@ main = do
...
@@ -235,7 +284,7 @@ main = do
printIOMsg
"Parse the corpus"
printIOMsg
"Parse the corpus"
mapList
<-
csvMapTermList
(
listPath
config
)
mapList
<-
csvMapTermList
(
listPath
config
)
corpus
<-
fileToDocs
(
corpusParser
config
)
(
corpusPath
config
)
mapList
corpus
<-
fileToDocs
'
(
corpusParser
config
)
(
corpusPath
config
)
(
timeUnit
config
)
mapList
printIOComment
(
show
(
length
corpus
)
<>
" parsed docs from the corpus"
)
printIOComment
(
show
(
length
corpus
)
<>
" parsed docs from the corpus"
)
printIOMsg
"Reconstruct the phylo"
printIOMsg
"Reconstruct the phylo"
...
...
package.yaml
View file @
9863bd4f
...
@@ -330,7 +330,8 @@ executables:
...
@@ -330,7 +330,8 @@ executables:
-
optparse-generic
-
optparse-generic
-
split
-
split
-
unordered-containers
-
unordered-containers
-
cryptohash
-
cryptohash
-
time
gargantext-import
:
gargantext-import
:
main
:
Main.hs
main
:
Main.hs
...
...
src/Gargantext/Core/Text/Corpus/Parsers/CSV.hs
View file @
9863bd4f
...
@@ -395,3 +395,34 @@ parseCsv fp = V.toList <$> V.map csv2doc <$> snd <$> readFile fp
...
@@ -395,3 +395,34 @@ parseCsv fp = V.toList <$> V.map csv2doc <$> snd <$> readFile fp
parseCsv'
::
BL
.
ByteString
->
[
HyperdataDocument
]
parseCsv'
::
BL
.
ByteString
->
[
HyperdataDocument
]
parseCsv'
bs
=
V
.
toList
$
V
.
map
csv2doc
$
snd
$
readCsvLazyBS
bs
parseCsv'
bs
=
V
.
toList
$
V
.
map
csv2doc
$
snd
$
readCsvLazyBS
bs
------------------------------------------------------------------------
-- Csv v3 weighted for phylo
data
Csv'
=
Csv'
{
csv'_title
::
!
Text
,
csv'_source
::
!
Text
,
csv'_publication_year
::
!
Int
,
csv'_publication_month
::
!
Int
,
csv'_publication_day
::
!
Int
,
csv'_abstract
::
!
Text
,
csv'_authors
::
!
Text
,
csv'_weight
::
!
Double
}
deriving
(
Show
)
instance
FromNamedRecord
Csv'
where
parseNamedRecord
r
=
Csv'
<$>
r
.:
"title"
<*>
r
.:
"source"
<*>
r
.:
"publication_year"
<*>
r
.:
"publication_month"
<*>
r
.:
"publication_day"
<*>
r
.:
"abstract"
<*>
r
.:
"authors"
<*>
r
.:
"weight"
readWeightedCsv
::
FilePath
->
IO
(
Header
,
Vector
Csv'
)
readWeightedCsv
fp
=
fmap
(
\
bs
->
case
decodeByNameWith
csvDecodeOptions
bs
of
Left
e
->
panic
(
pack
e
)
Right
corpus
->
corpus
)
$
BL
.
readFile
fp
\ No newline at end of file
src/Gargantext/Core/Text/Metrics/FrequentItemSet.hs
View file @
9863bd4f
...
@@ -21,6 +21,7 @@ module Gargantext.Core.Text.Metrics.FrequentItemSet
...
@@ -21,6 +21,7 @@ module Gargantext.Core.Text.Metrics.FrequentItemSet
,
fisWithSizePoly
,
fisWithSizePoly
,
fisWithSizePoly2
,
fisWithSizePoly2
,
fisWithSizePolyMap
,
fisWithSizePolyMap
,
fisWithSizePolyMap'
,
module
HLCM
,
module
HLCM
)
)
where
where
...
@@ -35,6 +36,8 @@ import qualified Data.Map.Strict as Map
...
@@ -35,6 +36,8 @@ import qualified Data.Map.Strict as Map
import
qualified
Data.Set
as
Set
import
qualified
Data.Set
as
Set
import
qualified
Data.Vector
as
V
import
qualified
Data.Vector
as
V
import
Control.Monad
(
sequence
)
data
Size
=
Point
Int
|
Segment
Int
Int
data
Size
=
Point
Int
|
Segment
Int
Int
------------------------------------------------------------------------
------------------------------------------------------------------------
...
@@ -120,6 +123,28 @@ fisWithSizePolyMap n f is =
...
@@ -120,6 +123,28 @@ fisWithSizePolyMap n f is =
------------------------------------------------------------------------
------------------------------------------------------------------------
------------------------------------------------------------------------
------------------------------------------------------------------------
---- Weighted [[Item]]
isSublistOf
::
Ord
a
=>
[
a
]
->
[
a
]
->
Bool
isSublistOf
sub
lst
=
all
(
\
i
->
elem
i
lst
)
sub
reIndexFis
::
Ord
a
=>
[([
a
],(
b
,
c
))]
->
[
Fis'
a
]
->
[(
Fis'
a
,([
b
],[
c
]))]
reIndexFis
items
fis
=
map
(
\
f
->
let
docs
=
filter
(
\
(
lst
,
_
)
->
isSublistOf
(
_fisItemSet
f
)
lst
)
items
in
(
f
,
(
map
(
fst
.
snd
)
docs
,
map
(
snd
.
snd
)
docs
)))
fis
wsum
::
[
Maybe
Double
]
->
Maybe
Double
wsum
lst
=
fmap
sum
$
sequence
lst
fisWithSizePolyMap'
::
Ord
a
=>
Size
->
Frequency
->
[([
a
],
(
Maybe
Double
,[
Int
]))]
->
Map
(
Set
a
)
(
Int
,
(
Maybe
Double
,[
Int
]))
fisWithSizePolyMap'
n
f
is
=
Map
.
fromList
$
map
(
\
(
fis
,(
ws
,
sources
))
->
(
Set
.
fromList
(
_fisItemSet
fis
),(
_fisCount
fis
,(
wsum
ws
,
concat
sources
))))
$
reIndexFis
is
$
fisWithSizePoly2
n
f
(
map
fst
is
)
------------------------------------------------------------------------
------------------------------------------------------------------------
--
--
---- | /!\ indexes are not the same:
---- | /!\ indexes are not the same:
...
...
src/Gargantext/Core/Viz/AdaptativePhylo.hs
View file @
9863bd4f
...
@@ -50,8 +50,9 @@ import qualified Data.Text.Lazy as TextLazy
...
@@ -50,8 +50,9 @@ import qualified Data.Text.Lazy as TextLazy
data
CorpusParser
=
data
CorpusParser
=
Wos
{
_wos_limit
::
Int
}
Wos
{
_wos_limit
::
Int
}
|
Csv
{
_csv_limit
::
Int
}
|
Csv
{
_csv_limit
::
Int
}
|
Csv'
{
_csv'_limit
::
Int
}
deriving
(
Show
,
Generic
,
Eq
)
deriving
(
Show
,
Generic
,
Eq
)
data
SeaElevation
=
data
SeaElevation
=
...
@@ -106,6 +107,18 @@ data TimeUnit =
...
@@ -106,6 +107,18 @@ data TimeUnit =
{
_year_period
::
Int
{
_year_period
::
Int
,
_year_step
::
Int
,
_year_step
::
Int
,
_year_matchingFrame
::
Int
}
,
_year_matchingFrame
::
Int
}
|
Month
{
_month_period
::
Int
,
_month_step
::
Int
,
_month_matchingFrame
::
Int
}
|
Week
{
_week_period
::
Int
,
_week_step
::
Int
,
_week_matchingFrame
::
Int
}
|
Day
{
_day_period
::
Int
,
_day_step
::
Int
,
_day_matchingFrame
::
Int
}
deriving
(
Show
,
Generic
,
Eq
)
deriving
(
Show
,
Generic
,
Eq
)
data
CliqueFilter
=
ByThreshold
|
ByNeighbours
deriving
(
Show
,
Generic
,
Eq
)
data
CliqueFilter
=
ByThreshold
|
ByNeighbours
deriving
(
Show
,
Generic
,
Eq
)
...
@@ -231,17 +244,20 @@ defaultPhyloParam =
...
@@ -231,17 +244,20 @@ defaultPhyloParam =
-- | Document | --
-- | Document | --
------------------
------------------
-- | Date : a simple Integer
-- | Date : a simple Integer
type
Date
=
Int
type
Date
=
Int
-- | Ngrams : a contiguous sequence of n terms
-- | Ngrams : a contiguous sequence of n terms
type
Ngrams
=
Text
type
Ngrams
=
Text
-- | Document : a piece of Text linked to a Date
-- Document : a piece of Text linked to a Date
-- date = computational date; date' = original string date yyyy-mm-dd
data
Document
=
Document
data
Document
=
Document
{
date
::
Date
{
date
::
Date
,
text
::
[
Ngrams
]
,
date'
::
Text
,
text
::
[
Ngrams
]
,
weight
::
Maybe
Double
,
sources
::
[
Text
]
}
deriving
(
Eq
,
Show
,
Generic
,
NFData
)
}
deriving
(
Eq
,
Show
,
Generic
,
NFData
)
...
@@ -257,6 +273,10 @@ data PhyloFoundations = PhyloFoundations
...
@@ -257,6 +273,10 @@ data PhyloFoundations = PhyloFoundations
}
deriving
(
Generic
,
Show
,
Eq
)
}
deriving
(
Generic
,
Show
,
Eq
)
data
PhyloSources
=
PhyloSources
{
_sources
::
!
(
Vector
Text
)
}
deriving
(
Generic
,
Show
,
Eq
)
---------------------------
---------------------------
-- | Coocurency Matrix | --
-- | Coocurency Matrix | --
---------------------------
---------------------------
...
@@ -279,6 +299,7 @@ type Cooc = Map (Int,Int) Double
...
@@ -279,6 +299,7 @@ type Cooc = Map (Int,Int) Double
-- periods : the temporal steps of a phylomemy
-- periods : the temporal steps of a phylomemy
data
Phylo
=
data
Phylo
=
Phylo
{
_phylo_foundations
::
PhyloFoundations
Phylo
{
_phylo_foundations
::
PhyloFoundations
,
_phylo_sources
::
PhyloSources
,
_phylo_timeCooc
::
!
(
Map
Date
Cooc
)
,
_phylo_timeCooc
::
!
(
Map
Date
Cooc
)
,
_phylo_timeDocs
::
!
(
Map
Date
Double
)
,
_phylo_timeDocs
::
!
(
Map
Date
Double
)
,
_phylo_termFreq
::
!
(
Map
Int
Double
)
,
_phylo_termFreq
::
!
(
Map
Int
Double
)
...
@@ -298,8 +319,9 @@ type PhyloPeriodId = (Date,Date)
...
@@ -298,8 +319,9 @@ type PhyloPeriodId = (Date,Date)
-- id: tuple (start date, end date) of the temporal step of the phylomemy
-- id: tuple (start date, end date) of the temporal step of the phylomemy
-- levels: levels of granularity
-- levels: levels of granularity
data
PhyloPeriod
=
data
PhyloPeriod
=
PhyloPeriod
{
_phylo_periodPeriod
::
(
Date
,
Date
)
PhyloPeriod
{
_phylo_periodPeriod
::
(
Date
,
Date
)
,
_phylo_periodLevels
::
Map
PhyloLevelId
PhyloLevel
,
_phylo_periodPeriod'
::
(
Text
,
Text
)
,
_phylo_periodLevels
::
Map
PhyloLevelId
PhyloLevel
}
deriving
(
Generic
,
Show
,
Eq
)
}
deriving
(
Generic
,
Show
,
Eq
)
...
@@ -315,9 +337,10 @@ type PhyloLevelId = (PhyloPeriodId,Level)
...
@@ -315,9 +337,10 @@ type PhyloLevelId = (PhyloPeriodId,Level)
-- Level 1: First level of clustering (the Fis)
-- Level 1: First level of clustering (the Fis)
-- Level [2..N]: Nth level of synchronic clustering (cluster of Fis)
-- Level [2..N]: Nth level of synchronic clustering (cluster of Fis)
data
PhyloLevel
=
data
PhyloLevel
=
PhyloLevel
{
_phylo_levelPeriod
::
(
Date
,
Date
)
PhyloLevel
{
_phylo_levelPeriod
::
(
Date
,
Date
)
,
_phylo_levelLevel
::
Level
,
_phylo_levelPeriod'
::
(
Text
,
Text
)
,
_phylo_levelGroups
::
Map
PhyloGroupId
PhyloGroup
,
_phylo_levelLevel
::
Level
,
_phylo_levelGroups
::
Map
PhyloGroupId
PhyloGroup
}
}
deriving
(
Generic
,
Show
,
Eq
)
deriving
(
Generic
,
Show
,
Eq
)
...
@@ -331,10 +354,13 @@ type PhyloBranchId = (Level, [Int])
...
@@ -331,10 +354,13 @@ type PhyloBranchId = (Level, [Int])
-- | PhyloGroup : group of ngrams at each level and period
-- | PhyloGroup : group of ngrams at each level and period
data
PhyloGroup
=
data
PhyloGroup
=
PhyloGroup
{
_phylo_groupPeriod
::
(
Date
,
Date
)
PhyloGroup
{
_phylo_groupPeriod
::
(
Date
,
Date
)
,
_phylo_groupPeriod'
::
(
Text
,
Text
)
,
_phylo_groupLevel
::
Level
,
_phylo_groupLevel
::
Level
,
_phylo_groupIndex
::
Int
,
_phylo_groupIndex
::
Int
,
_phylo_groupLabel
::
Text
,
_phylo_groupLabel
::
Text
,
_phylo_groupSupport
::
Support
,
_phylo_groupSupport
::
Support
,
_phylo_groupWeight
::
Maybe
Double
,
_phylo_groupSources
::
[
Int
]
,
_phylo_groupNgrams
::
[
Int
]
,
_phylo_groupNgrams
::
[
Int
]
,
_phylo_groupCooc
::
!
(
Cooc
)
,
_phylo_groupCooc
::
!
(
Cooc
)
,
_phylo_groupBranchId
::
PhyloBranchId
,
_phylo_groupBranchId
::
PhyloBranchId
...
@@ -368,6 +394,8 @@ data PhyloClique = PhyloClique
...
@@ -368,6 +394,8 @@ data PhyloClique = PhyloClique
{
_phyloClique_nodes
::
[
Int
]
{
_phyloClique_nodes
::
[
Int
]
,
_phyloClique_support
::
Support
,
_phyloClique_support
::
Support
,
_phyloClique_period
::
(
Date
,
Date
)
,
_phyloClique_period
::
(
Date
,
Date
)
,
_phyloClique_weight
::
Maybe
Double
,
_phyloClique_sources
::
[
Int
]
}
deriving
(
Generic
,
NFData
,
Show
,
Eq
)
}
deriving
(
Generic
,
NFData
,
Show
,
Eq
)
----------------
----------------
...
@@ -441,6 +469,8 @@ makeLenses ''PhyloBranch
...
@@ -441,6 +469,8 @@ makeLenses ''PhyloBranch
instance
FromJSON
Phylo
instance
FromJSON
Phylo
instance
ToJSON
Phylo
instance
ToJSON
Phylo
instance
FromJSON
PhyloSources
instance
ToJSON
PhyloSources
instance
FromJSON
PhyloParam
instance
FromJSON
PhyloParam
instance
ToJSON
PhyloParam
instance
ToJSON
PhyloParam
instance
FromJSON
PhyloPeriod
instance
FromJSON
PhyloPeriod
...
...
src/Gargantext/Core/Viz/Phylo/PhyloExample.hs
View file @
9863bd4f
...
@@ -109,8 +109,12 @@ config =
...
@@ -109,8 +109,12 @@ config =
docs
::
[
Document
]
docs
::
[
Document
]
docs
=
map
(
\
(
d
,
t
)
docs
=
map
(
\
(
d
,
t
)
->
Document
d
(
filter
(
\
n
->
isRoots
n
(
foundations
^.
foundations_roots
))
->
Document
d
$
monoTexts
t
))
corpus
""
(
filter
(
\
n
->
isRoots
n
(
foundations
^.
foundations_roots
))
$
monoTexts
t
)
Nothing
[]
)
corpus
foundations
::
PhyloFoundations
foundations
::
PhyloFoundations
...
...
src/Gargantext/Core/Viz/Phylo/PhyloExport.hs
View file @
9863bd4f
...
@@ -120,11 +120,13 @@ branchToDotNode b bId =
...
@@ -120,11 +120,13 @@ branchToDotNode b bId =
,
toAttr
"label"
(
pack
$
show
$
b
^.
branch_label
)
,
toAttr
"label"
(
pack
$
show
$
b
^.
branch_label
)
])
])
periodToDotNode
::
(
Date
,
Date
)
->
Dot
DotId
periodToDotNode
::
(
Date
,
Date
)
->
(
Text
.
Text
,
Text
.
Text
)
->
Dot
DotId
periodToDotNode
prd
=
periodToDotNode
prd
prd'
=
node
(
periodIdToDotId
prd
)
node
(
periodIdToDotId
prd
)
([
Shape
BoxShape
,
FontSize
50
,
Label
(
toDotLabel
$
Text
.
pack
(
show
(
fst
prd
)
<>
" "
<>
show
(
snd
prd
)))]
([
Shape
BoxShape
,
FontSize
50
,
Label
(
toDotLabel
$
Text
.
pack
(
show
(
fst
prd
)
<>
" "
<>
show
(
snd
prd
)))]
<>
[
toAttr
"nodeType"
"period"
<>
[
toAttr
"nodeType"
"period"
,
toAttr
"strFrom"
(
fromStrict
$
Text
.
pack
$
(
show
$
fst
prd'
))
,
toAttr
"strTo"
(
fromStrict
$
Text
.
pack
$
(
show
$
snd
prd'
))
,
toAttr
"from"
(
fromStrict
$
Text
.
pack
$
(
show
$
fst
prd
))
,
toAttr
"from"
(
fromStrict
$
Text
.
pack
$
(
show
$
fst
prd
))
,
toAttr
"to"
(
fromStrict
$
Text
.
pack
$
(
show
$
snd
prd
))])
,
toAttr
"to"
(
fromStrict
$
Text
.
pack
$
(
show
$
snd
prd
))])
...
@@ -136,9 +138,13 @@ groupToDotNode fdt g bId =
...
@@ -136,9 +138,13 @@ groupToDotNode fdt g bId =
<>
[
toAttr
"nodeType"
"group"
<>
[
toAttr
"nodeType"
"group"
,
toAttr
"from"
(
pack
$
show
(
fst
$
g
^.
phylo_groupPeriod
))
,
toAttr
"from"
(
pack
$
show
(
fst
$
g
^.
phylo_groupPeriod
))
,
toAttr
"to"
(
pack
$
show
(
snd
$
g
^.
phylo_groupPeriod
))
,
toAttr
"to"
(
pack
$
show
(
snd
$
g
^.
phylo_groupPeriod
))
,
toAttr
"strFrom"
(
pack
$
show
(
fst
$
g
^.
phylo_groupPeriod'
))
,
toAttr
"strTo"
(
pack
$
show
(
snd
$
g
^.
phylo_groupPeriod'
))
,
toAttr
"branchId"
(
pack
$
unwords
(
init
$
map
show
$
snd
$
g
^.
phylo_groupBranchId
))
,
toAttr
"branchId"
(
pack
$
unwords
(
init
$
map
show
$
snd
$
g
^.
phylo_groupBranchId
))
,
toAttr
"bId"
(
pack
$
show
bId
)
,
toAttr
"bId"
(
pack
$
show
bId
)
,
toAttr
"support"
(
pack
$
show
(
g
^.
phylo_groupSupport
))
,
toAttr
"support"
(
pack
$
show
(
g
^.
phylo_groupSupport
))
,
toAttr
"weight"
(
pack
$
show
(
g
^.
phylo_groupWeight
))
,
toAttr
"source"
(
pack
$
show
(
nub
$
g
^.
phylo_groupSources
))
,
toAttr
"lbl"
(
pack
$
show
(
ngramsToLabel
fdt
(
g
^.
phylo_groupNgrams
)))
,
toAttr
"lbl"
(
pack
$
show
(
ngramsToLabel
fdt
(
g
^.
phylo_groupNgrams
)))
,
toAttr
"foundation"
(
pack
$
show
(
idxToLabel
(
g
^.
phylo_groupNgrams
)))
,
toAttr
"foundation"
(
pack
$
show
(
idxToLabel
(
g
^.
phylo_groupNgrams
)))
,
toAttr
"role"
(
pack
$
show
(
idxToLabel'
((
g
^.
phylo_groupMeta
)
!
"dynamics"
)))
,
toAttr
"role"
(
pack
$
show
(
idxToLabel'
((
g
^.
phylo_groupMeta
)
!
"dynamics"
)))
...
@@ -193,6 +199,8 @@ exportToDot phylo export =
...
@@ -193,6 +199,8 @@ exportToDot phylo export =
,(
toAttr
(
fromStrict
"phyloPeriods"
)
$
pack
$
show
(
length
$
elems
$
phylo
^.
phylo_periods
))
,(
toAttr
(
fromStrict
"phyloPeriods"
)
$
pack
$
show
(
length
$
elems
$
phylo
^.
phylo_periods
))
,(
toAttr
(
fromStrict
"phyloBranches"
)
$
pack
$
show
(
length
$
export
^.
export_branches
))
,(
toAttr
(
fromStrict
"phyloBranches"
)
$
pack
$
show
(
length
$
export
^.
export_branches
))
,(
toAttr
(
fromStrict
"phyloGroups"
)
$
pack
$
show
(
length
$
export
^.
export_groups
))
,(
toAttr
(
fromStrict
"phyloGroups"
)
$
pack
$
show
(
length
$
export
^.
export_groups
))
,(
toAttr
(
fromStrict
"phyloSources"
)
$
pack
$
show
(
Vector
.
toList
$
getSources
phylo
))
,(
toAttr
(
fromStrict
"phyloTimeScale"
)
$
pack
$
getTimeScale
phylo
)
-- ,(toAttr (fromStrict "phyloTermsFreq") $ pack $ show (toList $ _phylo_lastTermFreq phylo))
-- ,(toAttr (fromStrict "phyloTermsFreq") $ pack $ show (toList $ _phylo_lastTermFreq phylo))
])
])
...
@@ -217,13 +225,13 @@ exportToDot phylo export =
...
@@ -217,13 +225,13 @@ exportToDot phylo export =
{-- 5) create a layer for each period -}
{-- 5) create a layer for each period -}
_
<-
mapM
(
\
period
->
_
<-
mapM
(
\
period
->
subgraph
((
Str
.
fromStrict
.
Text
.
pack
)
$
(
"Period"
<>
show
(
fst
period
)
<>
show
(
sn
d
period
)))
$
do
subgraph
((
Str
.
fromStrict
.
Text
.
pack
)
$
(
"Period"
<>
show
(
fst
$
_phylo_periodPeriod
period
)
<>
show
(
snd
$
_phylo_periodPerio
d
period
)))
$
do
graphAttrs
[
Rank
SameRank
]
graphAttrs
[
Rank
SameRank
]
periodToDotNode
period
periodToDotNode
(
period
^.
phylo_periodPeriod
)
(
period
^.
phylo_periodPeriod'
)
{-- 6) create a node for each group -}
{-- 6) create a node for each group -}
mapM
(
\
g
->
groupToDotNode
(
getRoots
phylo
)
g
(
toBid
g
(
export
^.
export_branches
)))
(
filter
(
\
g
->
g
^.
phylo_groupPeriod
==
period
)
$
export
^.
export_groups
)
mapM
(
\
g
->
groupToDotNode
(
getRoots
phylo
)
g
(
toBid
g
(
export
^.
export_branches
)))
(
filter
(
\
g
->
g
^.
phylo_groupPeriod
==
(
period
^.
phylo_periodPeriod
)
)
$
export
^.
export_groups
)
)
$
getPeriodIds
phylo
)
$
phylo
^.
phylo_periods
{-- 7) create the edges between a branch and its first groups -}
{-- 7) create the edges between a branch and its first groups -}
_
<-
mapM
(
\
(
bId
,
groups
)
->
_
<-
mapM
(
\
(
bId
,
groups
)
->
...
...
src/Gargantext/Core/Viz/Phylo/PhyloMaker.hs
View file @
9863bd4f
...
@@ -14,6 +14,7 @@ module Gargantext.Core.Viz.Phylo.PhyloMaker where
...
@@ -14,6 +14,7 @@ module Gargantext.Core.Viz.Phylo.PhyloMaker where
import
Data.List
(
concat
,
nub
,
partition
,
sort
,
(
++
),
group
,
intersect
,
null
,
sortOn
,
groupBy
,
tail
)
import
Data.List
(
concat
,
nub
,
partition
,
sort
,
(
++
),
group
,
intersect
,
null
,
sortOn
,
groupBy
,
tail
)
import
Data.Map
(
Map
,
fromListWith
,
keys
,
unionWith
,
fromList
,
empty
,
toList
,
elems
,
(
!
),
restrictKeys
,
foldlWithKey
,
insert
)
import
Data.Map
(
Map
,
fromListWith
,
keys
,
unionWith
,
fromList
,
empty
,
toList
,
elems
,
(
!
),
restrictKeys
,
foldlWithKey
,
insert
)
import
Data.Vector
(
Vector
)
import
Data.Vector
(
Vector
)
import
Data.Text
(
Text
)
import
Gargantext.Prelude
import
Gargantext.Prelude
import
Gargantext.Core.Viz.AdaptativePhylo
import
Gargantext.Core.Viz.AdaptativePhylo
...
@@ -21,7 +22,7 @@ import Gargantext.Core.Viz.Phylo.PhyloTools
...
@@ -21,7 +22,7 @@ import Gargantext.Core.Viz.Phylo.PhyloTools
import
Gargantext.Core.Viz.Phylo.TemporalMatching
(
adaptativeTemporalMatching
,
constanteTemporalMatching
,
getNextPeriods
,
filterDocs
,
filterDiago
,
reduceDiagos
,
toProximity
)
import
Gargantext.Core.Viz.Phylo.TemporalMatching
(
adaptativeTemporalMatching
,
constanteTemporalMatching
,
getNextPeriods
,
filterDocs
,
filterDiago
,
reduceDiagos
,
toProximity
)
import
Gargantext.Core.Viz.Phylo.SynchronicClustering
(
synchronicClustering
)
import
Gargantext.Core.Viz.Phylo.SynchronicClustering
(
synchronicClustering
)
import
Gargantext.Core.Text.Context
(
TermList
)
import
Gargantext.Core.Text.Context
(
TermList
)
import
Gargantext.Core.Text.Metrics.FrequentItemSet
(
fisWithSizePolyMap
,
Size
(
..
))
import
Gargantext.Core.Text.Metrics.FrequentItemSet
(
fisWithSizePolyMap
,
fisWithSizePolyMap'
,
Size
(
..
))
import
Gargantext.Core.Methods.Graph.MaxClique
(
getMaxCliques
)
import
Gargantext.Core.Methods.Graph.MaxClique
(
getMaxCliques
)
import
Gargantext.Core.Methods.Distances
(
Distance
(
Conditional
))
import
Gargantext.Core.Methods.Distances
(
Distance
(
Conditional
))
import
Gargantext.Core.Viz.Phylo.PhyloExport
(
toHorizon
)
import
Gargantext.Core.Viz.Phylo.PhyloExport
(
toHorizon
)
...
@@ -104,7 +105,7 @@ toGroupsProxi lvl phylo =
...
@@ -104,7 +105,7 @@ toGroupsProxi lvl phylo =
in
phylo
&
phylo_groupsProxi
.~
((
traceGroupsProxi
.
fromList
)
groupsProxi
)
in
phylo
&
phylo_groupsProxi
.~
((
traceGroupsProxi
.
fromList
)
groupsProxi
)
appendGroups
::
(
a
->
PhyloPeriodId
->
Level
->
Int
->
[
Cooc
]
->
PhyloGroup
)
->
Level
->
Map
(
Date
,
Date
)
[
a
]
->
Phylo
->
Phylo
appendGroups
::
(
a
->
PhyloPeriodId
->
(
Text
,
Text
)
->
Level
->
Int
->
[
Cooc
]
->
PhyloGroup
)
->
Level
->
Map
(
Date
,
Date
)
[
a
]
->
Phylo
->
Phylo
appendGroups
f
lvl
m
phylo
=
trace
(
"
\n
"
<>
"-- | Append "
<>
show
(
length
$
concat
$
elems
m
)
<>
" groups to Level "
<>
show
(
lvl
)
<>
"
\n
"
)
appendGroups
f
lvl
m
phylo
=
trace
(
"
\n
"
<>
"-- | Append "
<>
show
(
length
$
concat
$
elems
m
)
<>
" groups to Level "
<>
show
(
lvl
)
<>
"
\n
"
)
$
over
(
phylo_periods
$
over
(
phylo_periods
.
traverse
.
traverse
...
@@ -112,12 +113,13 @@ appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ co
...
@@ -112,12 +113,13 @@ appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ co
.
traverse
)
.
traverse
)
(
\
phyloLvl
->
if
lvl
==
(
phyloLvl
^.
phylo_levelLevel
)
(
\
phyloLvl
->
if
lvl
==
(
phyloLvl
^.
phylo_levelLevel
)
then
then
let
pId
=
phyloLvl
^.
phylo_levelPeriod
let
pId
=
phyloLvl
^.
phylo_levelPeriod
pId'
=
phyloLvl
^.
phylo_levelPeriod'
phyloCUnit
=
m
!
pId
phyloCUnit
=
m
!
pId
in
phyloLvl
in
phyloLvl
&
phylo_levelGroups
.~
(
fromList
$
foldl
(
\
groups
obj
->
&
phylo_levelGroups
.~
(
fromList
$
foldl
(
\
groups
obj
->
groups
++
[
(((
pId
,
lvl
),
length
groups
)
groups
++
[
(((
pId
,
lvl
),
length
groups
)
,
f
obj
pId
lvl
(
length
groups
)
,
f
obj
pId
pId'
lvl
(
length
groups
)
(
elems
$
restrictKeys
(
phylo
^.
phylo_timeCooc
)
$
periodsToYears
[
pId
]))
(
elems
$
restrictKeys
(
phylo
^.
phylo_timeCooc
)
$
periodsToYears
[
pId
]))
]
)
[]
phyloCUnit
)
]
)
[]
phyloCUnit
)
else
else
...
@@ -125,9 +127,11 @@ appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ co
...
@@ -125,9 +127,11 @@ appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ co
phylo
phylo
cliqueToGroup
::
PhyloClique
->
PhyloPeriodId
->
Level
->
Int
->
[
Cooc
]
->
PhyloGroup
cliqueToGroup
::
PhyloClique
->
PhyloPeriodId
->
(
Text
,
Text
)
->
Level
->
Int
->
[
Cooc
]
->
PhyloGroup
cliqueToGroup
fis
pId
lvl
idx
coocs
=
PhyloGroup
pId
lvl
idx
""
cliqueToGroup
fis
pId
pId'
lvl
idx
coocs
=
PhyloGroup
pId
pId'
lvl
idx
""
(
fis
^.
phyloClique_support
)
(
fis
^.
phyloClique_support
)
(
fis
^.
phyloClique_weight
)
(
fis
^.
phyloClique_sources
)
(
fis
^.
phyloClique_nodes
)
(
fis
^.
phyloClique_nodes
)
(
ngramsToCooc
(
fis
^.
phyloClique_nodes
)
coocs
)
(
ngramsToCooc
(
fis
^.
phyloClique_nodes
)
coocs
)
(
1
,[
0
])
-- branchid (lvl,[path in the branching tree])
(
1
,[
0
])
-- branchid (lvl,[path in the branching tree])
...
@@ -142,14 +146,27 @@ toPhylo1 phyloStep = case (getSeaElevation phyloStep) of
...
@@ -142,14 +146,27 @@ toPhylo1 phyloStep = case (getSeaElevation phyloStep) of
-----------------------
-----------------------
-- | To Phylo Step | --
-- | To Phylo Step | --
-----------------------
-----------------------
indexDates'
::
Map
(
Date
,
Date
)
[
Document
]
->
Map
(
Date
,
Date
)
(
Text
,
Text
)
indexDates'
m
=
map
(
\
docs
->
let
ds
=
map
(
\
d
->
date'
d
)
docs
f
=
if
(
null
ds
)
then
""
else
toFstDate
ds
l
=
if
(
null
ds
)
then
""
else
toLstDate
ds
in
(
f
,
l
))
m
-- To build the first phylo step from docs and terms
-- To build the first phylo step from docs and terms
toPhyloStep
::
[
Document
]
->
TermList
->
Config
->
Phylo
toPhyloStep
::
[
Document
]
->
TermList
->
Config
->
Phylo
toPhyloStep
docs
lst
conf
=
case
(
getSeaElevation
phyloBase
)
of
toPhyloStep
docs
lst
conf
=
case
(
getSeaElevation
phyloBase
)
of
Constante
_
_
->
appendGroups
cliqueToGroup
1
phyloClique
phyloBase
Constante
_
_
->
appendGroups
cliqueToGroup
1
phyloClique
(
updatePeriods
(
indexDates'
docs'
)
phyloBase
)
Adaptative
_
->
toGroupsProxi
1
$
appendGroups
cliqueToGroup
1
phyloClique
phyloBase
Adaptative
_
->
toGroupsProxi
1
$
appendGroups
cliqueToGroup
1
phyloClique
(
updatePeriods
(
indexDates'
docs'
)
phyloBase
)
where
where
--------------------------------------
--------------------------------------
phyloClique
::
Map
(
Date
,
Date
)
[
PhyloClique
]
phyloClique
::
Map
(
Date
,
Date
)
[
PhyloClique
]
...
@@ -217,8 +234,14 @@ toPhyloClique phylo phyloDocs = case (clique $ getConfig phylo) of
...
@@ -217,8 +234,14 @@ toPhyloClique phylo phyloDocs = case (clique $ getConfig phylo) of
phyloClique
=
case
(
clique
$
getConfig
phylo
)
of
phyloClique
=
case
(
clique
$
getConfig
phylo
)
of
Fis
_
_
->
Fis
_
_
->
let
fis
=
map
(
\
(
prd
,
docs
)
->
let
fis
=
map
(
\
(
prd
,
docs
)
->
let
lst
=
toList
$
fisWithSizePolyMap
(
Segment
1
20
)
1
(
map
(
\
d
->
ngramsToIdx
(
text
d
)
(
getRoots
phylo
))
docs
)
case
(
corpusParser
$
getConfig
phylo
)
of
in
(
prd
,
map
(
\
f
->
PhyloClique
(
Set
.
toList
$
fst
f
)
(
snd
f
)
prd
)
lst
))
Csv'
_
->
let
lst
=
toList
$
fisWithSizePolyMap'
(
Segment
1
20
)
1
(
map
(
\
d
->
(
ngramsToIdx
(
text
d
)
(
getRoots
phylo
),
(
weight
d
,
(
sourcesToIdx
(
sources
d
)
(
getSources
phylo
)))))
docs
)
in
(
prd
,
map
(
\
f
->
PhyloClique
(
Set
.
toList
$
fst
f
)
((
fst
.
snd
)
f
)
prd
((
fst
.
snd
.
snd
)
f
)
(((
snd
.
snd
.
snd
)
f
)))
lst
)
_
->
let
lst
=
toList
$
fisWithSizePolyMap
(
Segment
1
20
)
1
(
map
(
\
d
->
ngramsToIdx
(
text
d
)
(
getRoots
phylo
))
docs
)
in
(
prd
,
map
(
\
f
->
PhyloClique
(
Set
.
toList
$
fst
f
)
(
snd
f
)
prd
Nothing
[]
)
lst
)
)
$
toList
phyloDocs
$
toList
phyloDocs
fis'
=
fis
`
using
`
parList
rdeepseq
fis'
=
fis
`
using
`
parList
rdeepseq
in
fromList
fis'
in
fromList
fis'
...
@@ -228,7 +251,7 @@ toPhyloClique phylo phyloDocs = case (clique $ getConfig phylo) of
...
@@ -228,7 +251,7 @@ toPhyloClique phylo phyloDocs = case (clique $ getConfig phylo) of
$
foldl
sumCooc
empty
$
foldl
sumCooc
empty
$
map
listToMatrix
$
map
listToMatrix
$
map
(
\
d
->
ngramsToIdx
(
text
d
)
(
getRoots
phylo
))
docs
$
map
(
\
d
->
ngramsToIdx
(
text
d
)
(
getRoots
phylo
))
docs
in
(
prd
,
map
(
\
cl
->
PhyloClique
cl
0
prd
)
$
getMaxCliques
filterType
Conditional
thr
cooc
))
in
(
prd
,
map
(
\
cl
->
PhyloClique
cl
0
prd
Nothing
[]
)
$
getMaxCliques
filterType
Conditional
thr
cooc
))
$
toList
phyloDocs
$
toList
phyloDocs
mcl'
=
mcl
`
using
`
parList
rdeepseq
mcl'
=
mcl
`
using
`
parList
rdeepseq
in
fromList
mcl'
in
fromList
mcl'
...
@@ -335,17 +358,20 @@ docsToTimeScaleNb docs =
...
@@ -335,17 +358,20 @@ docsToTimeScaleNb docs =
initPhyloLevels
::
Int
->
PhyloPeriodId
->
Map
PhyloLevelId
PhyloLevel
initPhyloLevels
::
Int
->
PhyloPeriodId
->
Map
PhyloLevelId
PhyloLevel
initPhyloLevels
lvlMax
pId
=
initPhyloLevels
lvlMax
pId
=
fromList
$
map
(
\
lvl
->
((
pId
,
lvl
),
PhyloLevel
pId
lvl
empty
))
[
1
..
lvlMax
]
fromList
$
map
(
\
lvl
->
((
pId
,
lvl
),
PhyloLevel
pId
(
""
,
""
)
lvl
empty
))
[
1
..
lvlMax
]
-- To init the basic elements of a Phylo
-- To init the basic elements of a Phylo
toPhyloBase
::
[
Document
]
->
TermList
->
Config
->
Phylo
toPhyloBase
::
[
Document
]
->
TermList
->
Config
->
Phylo
toPhyloBase
docs
lst
conf
=
toPhyloBase
docs
lst
conf
=
let
foundations
=
PhyloFoundations
(
Vector
.
fromList
$
nub
$
concat
$
map
text
docs
)
lst
let
foundations
=
PhyloFoundations
(
Vector
.
fromList
$
nub
$
concat
$
map
text
docs
)
lst
docsSources
=
PhyloSources
(
Vector
.
fromList
$
nub
$
concat
$
map
sources
docs
)
params
=
defaultPhyloParam
{
_phyloParam_config
=
conf
}
params
=
defaultPhyloParam
{
_phyloParam_config
=
conf
}
periods
=
toPeriods
(
sort
$
nub
$
map
date
docs
)
(
getTimePeriod
$
timeUnit
conf
)
(
getTimeStep
$
timeUnit
conf
)
periods
=
toPeriods
(
sort
$
nub
$
map
date
docs
)
(
getTimePeriod
$
timeUnit
conf
)
(
getTimeStep
$
timeUnit
conf
)
in
trace
(
"
\n
"
<>
"-- | Create PhyloBase out of "
<>
show
(
length
docs
)
<>
" docs
\n
"
)
in
trace
(
"
\n
"
<>
"-- | Create PhyloBase out of "
<>
show
(
length
docs
)
<>
" docs
\n
"
)
$
Phylo
foundations
$
Phylo
foundations
docsSources
(
docsToTimeScaleCooc
docs
(
foundations
^.
foundations_roots
))
(
docsToTimeScaleCooc
docs
(
foundations
^.
foundations_roots
))
(
docsToTimeScaleNb
docs
)
(
docsToTimeScaleNb
docs
)
(
docsToTermFreq
docs
(
foundations
^.
foundations_roots
))
(
docsToTermFreq
docs
(
foundations
^.
foundations_roots
))
...
@@ -353,4 +379,4 @@ toPhyloBase docs lst conf =
...
@@ -353,4 +379,4 @@ toPhyloBase docs lst conf =
empty
empty
empty
empty
params
params
(
fromList
$
map
(
\
prd
->
(
prd
,
PhyloPeriod
prd
(
initPhyloLevels
1
prd
)))
periods
)
(
fromList
$
map
(
\
prd
->
(
prd
,
PhyloPeriod
prd
(
""
,
""
)
(
initPhyloLevels
1
prd
)))
periods
)
src/Gargantext/Core/Viz/Phylo/PhyloTools.hs
View file @
9863bd4f
...
@@ -17,9 +17,9 @@ import Data.List (sort, concat, null, union, (++), tails, sortOn, nub, init, tai
...
@@ -17,9 +17,9 @@ import Data.List (sort, concat, null, union, (++), tails, sortOn, nub, init, tai
import
Data.Set
(
Set
,
disjoint
)
import
Data.Set
(
Set
,
disjoint
)
import
Data.Map
(
Map
,
elems
,
fromList
,
unionWith
,
keys
,
member
,
(
!
),
filterWithKey
,
fromListWith
,
empty
,
restrictKeys
)
import
Data.Map
(
Map
,
elems
,
fromList
,
unionWith
,
keys
,
member
,
(
!
),
filterWithKey
,
fromListWith
,
empty
,
restrictKeys
)
import
Data.String
(
String
)
import
Data.String
(
String
)
import
Data.Text
(
Text
)
import
Data.Text
(
Text
,
unpack
)
import
Prelude
(
floor
)
import
Prelude
(
floor
,
read
)
import
Gargantext.Prelude
import
Gargantext.Prelude
import
Gargantext.Core.Viz.AdaptativePhylo
import
Gargantext.Core.Viz.AdaptativePhylo
...
@@ -115,6 +115,10 @@ isRoots n ns = Vector.elem n ns
...
@@ -115,6 +115,10 @@ isRoots n ns = Vector.elem n ns
ngramsToIdx
::
[
Ngrams
]
->
Vector
Ngrams
->
[
Int
]
ngramsToIdx
::
[
Ngrams
]
->
Vector
Ngrams
->
[
Int
]
ngramsToIdx
ns
fdt
=
map
(
\
n
->
fromJust
$
elemIndex
n
fdt
)
ns
ngramsToIdx
ns
fdt
=
map
(
\
n
->
fromJust
$
elemIndex
n
fdt
)
ns
-- | To transform a list of sources into a list of sources' index
sourcesToIdx
::
[
Text
]
->
Vector
Text
->
[
Int
]
sourcesToIdx
ss
ps
=
nub
$
map
(
\
s
->
fromJust
$
elemIndex
s
ps
)
ss
-- | To transform a list of Ngrams Indexes into a Label
-- | To transform a list of Ngrams Indexes into a Label
ngramsToLabel
::
Vector
Ngrams
->
[
Int
]
->
Text
ngramsToLabel
::
Vector
Ngrams
->
[
Int
]
->
Text
ngramsToLabel
ngrams
l
=
Text
.
unwords
$
tail'
"ngramsToLabel"
$
concat
$
map
(
\
n
->
[
"|"
,
n
])
$
ngramsToText
ngrams
l
ngramsToLabel
ngrams
l
=
Text
.
unwords
$
tail'
"ngramsToLabel"
$
concat
$
map
(
\
n
->
[
"|"
,
n
])
$
ngramsToText
ngrams
l
...
@@ -153,6 +157,32 @@ toPeriods dates p s =
...
@@ -153,6 +157,32 @@ toPeriods dates p s =
$
chunkAlong
p
s
[
start
..
end
]
$
chunkAlong
p
s
[
start
..
end
]
toFstDate
::
[
Text
]
->
Text
toFstDate
ds
=
snd
$
head'
"firstDate"
$
sortOn
fst
$
map
(
\
d
->
let
d'
=
read
(
filter
(
\
c
->
c
/=
'-'
)
$
unpack
d
)
::
Int
in
(
d'
,
d
))
ds
toLstDate
::
[
Text
]
->
Text
toLstDate
ds
=
snd
$
head'
"firstDate"
$
reverse
$
sortOn
fst
$
map
(
\
d
->
let
d'
=
read
(
filter
(
\
c
->
c
/=
'-'
)
$
unpack
d
)
::
Int
in
(
d'
,
d
))
ds
getTimeScale
::
Phylo
->
[
Char
]
getTimeScale
p
=
case
(
timeUnit
$
getConfig
p
)
of
Year
_
_
_
->
"year"
Month
_
_
_
->
"month"
Week
_
_
_
->
"week"
Day
_
_
_
->
"day"
-- | Get a regular & ascendante timeScale from a given list of dates
-- | Get a regular & ascendante timeScale from a given list of dates
toTimeScale
::
[
Date
]
->
Int
->
[
Date
]
toTimeScale
::
[
Date
]
->
Int
->
[
Date
]
toTimeScale
dates
step
=
toTimeScale
dates
step
=
...
@@ -162,15 +192,24 @@ toTimeScale dates step =
...
@@ -162,15 +192,24 @@ toTimeScale dates step =
getTimeStep
::
TimeUnit
->
Int
getTimeStep
::
TimeUnit
->
Int
getTimeStep
time
=
case
time
of
getTimeStep
time
=
case
time
of
Year
_
s
_
->
s
Year
_
s
_
->
s
Month
_
s
_
->
s
Week
_
s
_
->
s
Day
_
s
_
->
s
getTimePeriod
::
TimeUnit
->
Int
getTimePeriod
::
TimeUnit
->
Int
getTimePeriod
time
=
case
time
of
getTimePeriod
time
=
case
time
of
Year
p
_
_
->
p
Year
p
_
_
->
p
Month
p
_
_
->
p
Week
p
_
_
->
p
Day
p
_
_
->
p
getTimeFrame
::
TimeUnit
->
Int
getTimeFrame
::
TimeUnit
->
Int
getTimeFrame
time
=
case
time
of
getTimeFrame
time
=
case
time
of
Year
_
_
f
->
f
Year
_
_
f
->
f
Month
_
_
f
->
f
Week
_
_
f
->
f
Day
_
_
f
->
f
-------------
-------------
-- | Fis | --
-- | Fis | --
...
@@ -359,6 +398,9 @@ setConfig config phylo = phylo
...
@@ -359,6 +398,9 @@ setConfig config phylo = phylo
getRoots
::
Phylo
->
Vector
Ngrams
getRoots
::
Phylo
->
Vector
Ngrams
getRoots
phylo
=
(
phylo
^.
phylo_foundations
)
^.
foundations_roots
getRoots
phylo
=
(
phylo
^.
phylo_foundations
)
^.
foundations_roots
getSources
::
Phylo
->
Vector
Text
getSources
phylo
=
_sources
(
phylo
^.
phylo_sources
)
phyloToLastBranches
::
Phylo
->
[[
PhyloGroup
]]
phyloToLastBranches
::
Phylo
->
[[
PhyloGroup
]]
phyloToLastBranches
phylo
=
elems
phyloToLastBranches
phylo
=
elems
$
fromListWith
(
++
)
$
fromListWith
(
++
)
...
@@ -411,6 +453,16 @@ updatePhyloGroups lvl m phylo =
...
@@ -411,6 +453,16 @@ updatePhyloGroups lvl m phylo =
then
m
!
id
then
m
!
id
else
g
)
phylo
else
g
)
phylo
updatePeriods
::
Map
(
Date
,
Date
)
(
Text
,
Text
)
->
Phylo
->
Phylo
updatePeriods
periods'
phylo
=
over
(
phylo_periods
.
traverse
)
(
\
prd
->
let
prd'
=
periods'
!
(
prd
^.
phylo_periodPeriod
)
lvls
=
map
(
\
lvl
->
lvl
&
phylo_levelPeriod'
.~
prd'
)
$
prd
^.
phylo_periodLevels
in
prd
&
phylo_periodPeriod'
.~
prd'
&
phylo_periodLevels
.~
lvls
)
phylo
traceToPhylo
::
Level
->
Phylo
->
Phylo
traceToPhylo
::
Level
->
Phylo
->
Phylo
traceToPhylo
lvl
phylo
=
traceToPhylo
lvl
phylo
=
...
...
src/Gargantext/Core/Viz/Phylo/SynchronicClustering.hs
View file @
9863bd4f
...
@@ -22,6 +22,7 @@ import Data.Map (Map, fromList, fromListWith, foldlWithKey, (!), insert, empty,
...
@@ -22,6 +22,7 @@ import Data.Map (Map, fromList, fromListWith, foldlWithKey, (!), insert, empty,
import
Control.Lens
hiding
(
Level
)
import
Control.Lens
hiding
(
Level
)
import
Control.Parallel.Strategies
(
parList
,
rdeepseq
,
using
)
import
Control.Parallel.Strategies
(
parList
,
rdeepseq
,
using
)
import
Control.Monad
(
sequence
)
-- import Debug.Trace (trace)
-- import Debug.Trace (trace)
import
qualified
Data.Map
as
Map
import
qualified
Data.Map
as
Map
...
@@ -35,8 +36,13 @@ import qualified Data.Map as Map
...
@@ -35,8 +36,13 @@ import qualified Data.Map as Map
mergeGroups
::
[
Cooc
]
->
PhyloGroupId
->
Map
PhyloGroupId
PhyloGroupId
->
[
PhyloGroup
]
->
PhyloGroup
mergeGroups
::
[
Cooc
]
->
PhyloGroupId
->
Map
PhyloGroupId
PhyloGroupId
->
[
PhyloGroup
]
->
PhyloGroup
mergeGroups
coocs
id
mapIds
childs
=
mergeGroups
coocs
id
mapIds
childs
=
let
ngrams
=
(
sort
.
nub
.
concat
)
$
map
_phylo_groupNgrams
childs
let
ngrams
=
(
sort
.
nub
.
concat
)
$
map
_phylo_groupNgrams
childs
in
PhyloGroup
(
fst
$
fst
id
)
(
snd
$
fst
id
)
(
snd
id
)
""
in
PhyloGroup
(
fst
$
fst
id
)
(
_phylo_groupPeriod'
$
head'
"mergeGroups"
childs
)
(
sum
$
map
_phylo_groupSupport
childs
)
ngrams
(
snd
$
fst
id
)
(
snd
id
)
""
(
sum
$
map
_phylo_groupSupport
childs
)
(
fmap
sum
$
sequence
$
map
_phylo_groupWeight
childs
)
(
concat
$
map
_phylo_groupSources
childs
)
ngrams
(
ngramsToCooc
ngrams
coocs
)
(
ngramsToCooc
ngrams
coocs
)
((
snd
$
fst
id
),
bId
)
((
snd
$
fst
id
),
bId
)
(
mergeMeta
bId
childs
)
[]
(
map
(
\
g
->
(
getGroupId
g
,
1
))
childs
)
(
mergeMeta
bId
childs
)
[]
(
map
(
\
g
->
(
getGroupId
g
,
1
))
childs
)
...
@@ -54,12 +60,12 @@ mergeGroups coocs id mapIds childs =
...
@@ -54,12 +60,12 @@ mergeGroups coocs id mapIds childs =
mergeAncestors
::
[
Pointer
]
->
[
Pointer
]
mergeAncestors
::
[
Pointer
]
->
[
Pointer
]
mergeAncestors
pointers
=
Map
.
toList
$
fromListWith
max
pointers
mergeAncestors
pointers
=
Map
.
toList
$
fromListWith
max
pointers
addPhyloLevel
::
Level
->
Phylo
->
Phylo
addPhyloLevel
::
Level
->
Phylo
->
Phylo
addPhyloLevel
lvl
phylo
=
addPhyloLevel
lvl
phylo
=
over
(
phylo_periods
.
traverse
)
over
(
phylo_periods
.
traverse
)
(
\
phyloPrd
->
phyloPrd
&
phylo_periodLevels
(
\
phyloPrd
->
phyloPrd
&
phylo_periodLevels
%~
(
insert
(
phyloPrd
^.
phylo_periodPeriod
,
lvl
)
(
PhyloLevel
(
phyloPrd
^.
phylo_periodPeriod
)
lvl
empty
)))
phylo
%~
(
insert
(
phyloPrd
^.
phylo_periodPeriod
,
lvl
)
(
PhyloLevel
(
phyloPrd
^.
phylo_periodPeriod
)
(
phyloPrd
^.
phylo_periodPeriod'
)
lvl
empty
)))
phylo
toNextLevel'
::
Phylo
->
[
PhyloGroup
]
->
Phylo
toNextLevel'
::
Phylo
->
[
PhyloGroup
]
->
Phylo
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment