Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
145
Issues
145
List
Board
Labels
Milestones
Merge Requests
6
Merge Requests
6
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
74d2038a
Commit
74d2038a
authored
May 21, 2021
by
qlobbe
Committed by
Alexandre Delanoë
Feb 14, 2022
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add a list parser param
parent
878907d0
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
312 additions
and
203 deletions
+312
-203
Main.hs
bin/gargantext-phylo/Main.hs
+72
-96
package.yaml
package.yaml
+2
-0
Phylo.hs
src/Gargantext/Core/Viz/Phylo.hs
+128
-21
PhyloTools.hs
src/Gargantext/Core/Viz/Phylo/PhyloTools.hs
+110
-86
No files found.
bin/gargantext-phylo/Main.hs
View file @
74d2038a
...
...
@@ -16,43 +16,40 @@ Adaptative Phylo binaries
module
Main
where
-- import Debug.Trace (trace)
import
Control.Concurrent.Async
(
mapConcurrently
)
import
Crypto.Hash.SHA256
(
hash
)
import
Data.Aeson
import
Data.Either
(
Either
(
..
))
import
Data.Either
(
Either
(
..
)
,
fromRight
)
import
Data.List
(
concat
,
nub
,
isSuffixOf
)
import
Data.List.Split
import
Data.Maybe
(
fromMaybe
)
import
Data.String
(
String
)
import
GHC.IO
(
FilePath
)
import
qualified
Prelude
as
Prelude
import
System.Environment
import
System.Directory
(
listDirectory
,
doesFileExist
)
import
Data.Text
(
Text
,
unwords
,
unpack
,
replace
,
pack
)
import
Data.Time.Calendar
(
fromGregorian
,
diffGregorianDurationClip
,
cdMonths
,
diffDays
,
showGregorian
)
import
qualified
Data.ByteString.Char8
as
C8
import
qualified
Data.ByteString.Lazy
as
Lazy
import
qualified
Data.Vector
as
Vector
import
qualified
Data.Text
as
T
import
Gargantext.Prelude
import
Gargantext.Database.Admin.Types.Hyperdata
(
HyperdataDocument
(
..
))
import
GHC.IO
(
FilePath
)
import
Gargantext.API.Ngrams.Prelude
(
toTermList
)
import
Gargantext.API.Ngrams.Types
import
Gargantext.Core.Text.Context
(
TermList
)
import
Gargantext.Core.Text.Corpus.Parsers.CSV
(
csv_title
,
csv_abstract
,
csv_publication_year
,
csv_publication_month
,
csv_publication_day
,
csv'_source
,
csv'_title
,
csv'_abstract
,
csv'_publication_year
,
csv'_publication_month
,
csv'_publication_day
,
csv'_weight
)
import
qualified
Gargantext.Core.Text.Corpus.Parsers.CSV
as
Csv
import
Gargantext.Core.Text.Corpus.Parsers
(
FileFormat
(
..
),
parseFile
)
import
Gargantext.Core.Text.Corpus.Parsers.CSV
(
csv_title
,
csv_abstract
,
csv_publication_year
,
csv_publication_month
,
csv_publication_day
,
csv'_source
,
csv'_title
,
csv'_abstract
,
csv'_publication_year
,
csv'_publication_month
,
csv'_publication_day
,
csv'_weight
)
import
Gargantext.Core.Text.List.Formats.CSV
(
csvMapTermList
)
import
Gargantext.Core.Text.Terms.WithList
(
Patterns
,
buildPatterns
,
extractTermsWithList
)
import
Gargantext.Core.Types.Main
(
ListType
(
..
))
import
Gargantext.Core.Viz.Phylo
import
Gargantext.Core.Viz.Phylo.API
import
Gargantext.Core.Viz.Phylo.PhyloExport
(
toPhyloExport
,
dotToFile
)
import
Gargantext.Core.Viz.Phylo.PhyloMaker
(
toPhylo
,
toPhyloStep
)
import
Gargantext.Core.Viz.Phylo.PhyloTools
(
printIOMsg
,
printIOComment
,
setConfig
)
import
Gargantext.Core.Viz.Phylo.PhyloExport
(
toPhyloExport
,
dotToFile
)
-- import Gargantext.API.Ngrams.Prelude (toTermList)
import
Gargantext.Core.Viz.Phylo.API
(
toPhyloDate
,
toPhyloDate'
)
-- import Debug.Trace (trace)
import
Gargantext.Database.Admin.Types.Hyperdata
(
HyperdataDocument
(
..
))
import
Gargantext.Database.Schema.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Prelude
import
System.Directory
(
listDirectory
,
doesFileExist
)
import
System.Environment
import
qualified
Data.ByteString.Char8
as
C8
import
qualified
Data.ByteString.Lazy
as
Lazy
import
qualified
Data.Text
as
T
import
qualified
Data.Vector
as
Vector
import
qualified
Gargantext.Core.Text.Corpus.Parsers.CSV
as
Csv
data
PhyloStage
=
PhyloWithCliques
|
PhyloWithLinks
deriving
(
Show
)
...
...
@@ -60,24 +57,13 @@ data PhyloStage = PhyloWithCliques | PhyloWithLinks deriving (Show)
-- | Tools | --
---------------
-- | To get all the files in a directory or just a file
getFilesFromPath
::
FilePath
->
IO
([
FilePath
])
getFilesFromPath
::
FilePath
->
IO
[
FilePath
]
getFilesFromPath
path
=
do
if
(
isSuffixOf
"/"
path
)
then
(
listDirectory
path
)
else
return
[
path
]
--------------
-- | Json | --
--------------
-- | To read and decode a Json file
readJson
::
FilePath
->
IO
Lazy
.
ByteString
readJson
path
=
Lazy
.
readFile
path
----------------
-- | Parser | --
----------------
...
...
@@ -90,31 +76,26 @@ termsInText pats txt = nub $ concat $ map (map unwords) $ extractTermsWithList p
-- | To transform a Wos file (or [file]) into a list of Docs
wosToDocs
::
Int
->
Patterns
->
TimeUnit
->
FilePath
->
IO
[
Document
]
wosToDocs
limit
patterns
time
path
=
do
files
<-
getFilesFromPath
path
let
parseFile'
file
=
do
eParsed
<-
parseFile
WOS
(
path
<>
file
)
case
eParsed
of
Right
ps
->
pure
ps
Left
e
->
panic
$
"Error: "
<>
(
pack
e
)
take
limit
<$>
map
(
\
d
->
let
title
=
fromJust
$
_hd_title
d
abstr
=
if
(
isJust
$
_hd_abstract
d
)
then
fromJust
$
_hd_abstract
d
else
""
in
Document
(
toPhyloDate
(
fromIntegral
$
fromJust
$
_hd_publication_year
d
)
(
fromJust
$
_hd_publication_month
d
)
(
fromJust
$
_hd_publication_day
d
)
time
)
(
toPhyloDate'
(
fromIntegral
$
fromJust
$
_hd_publication_year
d
)
(
fromJust
$
_hd_publication_month
d
)
(
fromJust
$
_hd_publication_day
d
))
(
termsInText
patterns
$
title
<>
" "
<>
abstr
)
Nothing
[]
)
<$>
concat
<$>
mapConcurrently
(
\
file
->
filter
(
\
d
->
(
isJust
$
_hd_publication_year
d
)
&&
(
isJust
$
_hd_title
d
))
<$>
parseFile'
file
)
files
files
<-
getFilesFromPath
path
take
limit
<$>
map
(
\
d
->
let
title
=
fromJust
$
_hd_title
d
abstr
=
if
(
isJust
$
_hd_abstract
d
)
then
fromJust
$
_hd_abstract
d
else
""
in
Document
(
toPhyloDate
(
fromIntegral
$
fromJust
$
_hd_publication_year
d
)
(
fromJust
$
_hd_publication_month
d
)
(
fromJust
$
_hd_publication_day
d
)
time
)
(
toPhyloDate'
(
fromIntegral
$
fromJust
$
_hd_publication_year
d
)
(
fromJust
$
_hd_publication_month
d
)
(
fromJust
$
_hd_publication_day
d
)
time
)
(
termsInText
patterns
$
title
<>
" "
<>
abstr
)
Nothing
[]
)
<$>
concat
<$>
mapConcurrently
(
\
file
->
filter
(
\
d
->
(
isJust
$
_hd_publication_year
d
)
&&
(
isJust
$
_hd_title
d
))
<$>
fromRight
[]
<$>
parseFile
WOS
(
path
<>
file
)
)
files
-- To transform a Csv file into a list of Document
...
...
@@ -122,31 +103,21 @@ csvToDocs :: CorpusParser -> Patterns -> TimeUnit -> FilePath -> IO [Document]
csvToDocs
parser
patterns
time
path
=
case
parser
of
Wos
_
->
undefined
Csv
limit
->
do
eR
<-
Csv
.
readFile
path
case
eR
of
Right
r
->
pure
$
Vector
.
toList
$
Vector
.
take
limit
$
Vector
.
map
(
\
row
->
Document
(
toPhyloDate
(
Csv
.
fromMIntOrDec
Csv
.
defaultYear
$
csv_publication_year
row
)
(
fromMaybe
Csv
.
defaultMonth
$
csv_publication_month
row
)
(
fromMaybe
Csv
.
defaultDay
$
csv_publication_day
row
)
time
)
(
toPhyloDate'
(
Csv
.
fromMIntOrDec
Csv
.
defaultYear
$
csv_publication_year
row
)
(
fromMaybe
Csv
.
defaultMonth
$
csv_publication_month
row
)
(
fromMaybe
Csv
.
defaultDay
$
csv_publication_day
row
))
(
termsInText
patterns
$
(
csv_title
row
)
<>
" "
<>
(
csv_abstract
row
))
Nothing
[]
)
$
snd
r
Left
e
->
panic
$
"Error: "
<>
(
pack
e
)
Csv
limit
->
Vector
.
toList
<$>
Vector
.
take
limit
<$>
Vector
.
map
(
\
row
->
Document
(
toPhyloDate
(
Csv
.
fromMIntOrDec
Csv
.
defaultYear
$
csv_publication_year
row
)
(
fromMaybe
Csv
.
defaultMonth
$
csv_publication_month
row
)
(
fromMaybe
Csv
.
defaultDay
$
csv_publication_day
row
)
time
)
(
toPhyloDate'
(
Csv
.
fromMIntOrDec
Csv
.
defaultYear
$
csv_publication_year
row
)
(
fromMaybe
Csv
.
defaultMonth
$
csv_publication_month
row
)
(
fromMaybe
Csv
.
defaultDay
$
csv_publication_day
row
)
time
)
(
termsInText
patterns
$
(
csv_title
row
)
<>
" "
<>
(
csv_abstract
row
))
Nothing
[]
)
<$>
snd
<$>
either
(
\
err
->
panic
$
cs
$
"CSV error"
<>
(
show
err
))
identity
<$>
Csv
.
readFile
path
Csv'
limit
->
Vector
.
toList
<$>
Vector
.
take
limit
<$>
Vector
.
map
(
\
row
->
Document
(
toPhyloDate
(
csv'_publication_year
row
)
(
csv'_publication_month
row
)
(
csv'_publication_day
row
)
time
)
(
toPhyloDate'
(
csv'_publication_year
row
)
(
csv'_publication_month
row
)
(
csv'_publication_day
row
))
(
toPhyloDate'
(
csv'_publication_year
row
)
(
csv'_publication_month
row
)
(
csv'_publication_day
row
)
time
)
(
termsInText
patterns
$
(
csv'_title
row
)
<>
" "
<>
(
csv'_abstract
row
))
(
Just
$
csv'_weight
row
)
[
csv'_source
row
]
(
map
(
T
.
strip
.
pack
)
$
splitOn
";"
(
unpack
$
(
csv'_source
row
)))
)
<$>
snd
<$>
Csv
.
readWeightedCsv
path
...
...
@@ -168,10 +139,11 @@ fileToDocs' parser path time lst = do
-- Config time parameters to label
timeToLabel
::
Config
->
[
Char
]
timeToLabel
config
=
case
(
timeUnit
config
)
of
Year
p
s
f
->
(
"time_years"
<>
"_"
<>
(
show
p
)
<>
"_"
<>
(
show
s
)
<>
"_"
<>
(
show
f
))
Month
p
s
f
->
(
"time_months"
<>
"_"
<>
(
show
p
)
<>
"_"
<>
(
show
s
)
<>
"_"
<>
(
show
f
))
Week
p
s
f
->
(
"time_weeks"
<>
"_"
<>
(
show
p
)
<>
"_"
<>
(
show
s
)
<>
"_"
<>
(
show
f
))
Day
p
s
f
->
(
"time_days"
<>
"_"
<>
(
show
p
)
<>
"_"
<>
(
show
s
)
<>
"_"
<>
(
show
f
))
Epoch
p
s
f
->
(
"time_epochs"
<>
"_"
<>
(
show
p
)
<>
"_"
<>
(
show
s
)
<>
"_"
<>
(
show
f
))
Year
p
s
f
->
(
"time_years"
<>
"_"
<>
(
show
p
)
<>
"_"
<>
(
show
s
)
<>
"_"
<>
(
show
f
))
Month
p
s
f
->
(
"time_months"
<>
"_"
<>
(
show
p
)
<>
"_"
<>
(
show
s
)
<>
"_"
<>
(
show
f
))
Week
p
s
f
->
(
"time_weeks"
<>
"_"
<>
(
show
p
)
<>
"_"
<>
(
show
s
)
<>
"_"
<>
(
show
f
))
Day
p
s
f
->
(
"time_days"
<>
"_"
<>
(
show
p
)
<>
"_"
<>
(
show
s
)
<>
"_"
<>
(
show
f
))
seaToLabel
::
Config
->
[
Char
]
...
...
@@ -182,7 +154,7 @@ seaToLabel config = case (seaElevation config) of
sensToLabel
::
Config
->
[
Char
]
sensToLabel
config
=
case
(
phyloProximity
config
)
of
Hamming
->
undefined
Hamming
_
->
undefined
WeightedLogJaccard
s
->
(
"WeightedLogJaccard_"
<>
show
s
)
WeightedLogSim
s
->
(
"WeightedLogSim-sens_"
<>
show
s
)
...
...
@@ -240,18 +212,23 @@ configToSha stage config = unpack
<>
(
show
(
phyloLevel
config
))
writePhylo
::
[
Char
]
->
Phylo
->
IO
()
writePhylo
path
phylo
=
Lazy
.
writeFile
path
$
encode
phylo
readPhylo
::
[
Char
]
->
IO
Phylo
readPhylo
path
=
do
phyloJson
<-
(
eitherDecode
<$>
readJson
path
)
::
IO
(
Either
String
Phylo
)
case
phyloJson
of
readListV4
::
[
Char
]
->
IO
NgramsList
readListV4
path
=
do
listJson
<-
(
eitherDecode
<$>
readJson
path
)
::
IO
(
Either
String
NgramsList
)
case
listJson
of
Left
err
->
do
putStrLn
err
undefined
Right
phylo
->
pure
phylo
Right
listV4
->
pure
listV4
fileToList
::
ListParser
->
FilePath
->
IO
TermList
fileToList
parser
path
=
case
parser
of
V3
->
csvMapTermList
path
V4
->
fromJust
<$>
toTermList
MapTerm
NgramsTerms
<$>
readListV4
path
--------------
...
...
@@ -273,7 +250,7 @@ main = do
Right
config
->
do
printIOMsg
"Parse the corpus"
mapList
<-
csvMapTermList
(
listPath
config
)
mapList
<-
fileToList
(
listParser
config
)
(
listPath
config
)
corpus
<-
fileToDocs'
(
corpusParser
config
)
(
corpusPath
config
)
(
timeUnit
config
)
mapList
printIOComment
(
show
(
length
corpus
)
<>
" parsed docs from the corpus"
)
...
...
@@ -297,7 +274,6 @@ main = do
-- let phylo = toPhylo (setConfig config phyloStep)
-- QL: 2 files read from disk
phyloWithLinks
<-
if
phyloWithLinksExists
then
do
printIOMsg
"Reconstruct the phylo from an existing file with intertemporal links"
...
...
package.yaml
View file @
74d2038a
...
...
@@ -54,6 +54,7 @@ library:
-
Gargantext.API.Ngrams
-
Gargantext.API.Ngrams.Tools
-
Gargantext.API.Ngrams.Types
-
Gargantext.API.Ngrams.Prelude
-
Gargantext.API.Admin.Settings
-
Gargantext.API.Admin.EnvTypes
-
Gargantext.API.Admin.Types
...
...
@@ -73,6 +74,7 @@ library:
-
Gargantext.Database.Query.Table.Node
-
Gargantext.Database.Query.Table.Node.UpdateOpaleye
-
Gargantext.Database.Query.Table.NgramsPostag
-
Gargantext.Database.Schema.Ngrams
-
Gargantext.Database.Prelude
-
Gargantext.Database.Admin.Trigger.Init
-
Gargantext.Database.Admin.Config
...
...
src/Gargantext/Core/Viz/Phylo.hs
View file @
74d2038a
{-
|
{-
Module : Gargantext.Core.Viz.AdaptativePhylo
Description : Phylomemy definitions and types.
Copyright : (c) CNRS, 2017-Present
...
...
@@ -26,6 +26,8 @@ one 8, e54847.
module
Gargantext.Core.Viz.Phylo
where
import
Data.Swagger
import
Gargantext.Core.Utils.Prefix
(
unPrefixSwagger
)
import
Control.DeepSeq
(
NFData
)
import
Control.Lens
(
makeLenses
)
import
Data.Aeson
...
...
@@ -50,6 +52,14 @@ data CorpusParser =
|
Csv'
{
_csv'_limit
::
Int
}
deriving
(
Show
,
Generic
,
Eq
)
instance
ToSchema
CorpusParser
where
declareNamedSchema
=
genericDeclareNamedSchema
(
unPrefixSwagger
"_"
)
data
ListParser
=
V3
|
V4
deriving
(
Show
,
Generic
,
Eq
)
instance
ToSchema
ListParser
data
SeaElevation
=
Constante
{
_cons_start
::
Double
...
...
@@ -58,6 +68,8 @@ data SeaElevation =
{
_adap_granularity
::
Double
}
deriving
(
Show
,
Generic
,
Eq
)
instance
ToSchema
SeaElevation
data
Proximity
=
WeightedLogJaccard
{
_wlj_sensibility
::
Double
...
...
@@ -77,13 +89,23 @@ data Proximity =
-- , _wlj_elevation :: Double
-}
}
|
Hamming
|
Hamming
{
_wlj_sensibility
::
Double
}
deriving
(
Show
,
Generic
,
Eq
)
instance
ToSchema
Proximity
where
declareNamedSchema
=
genericDeclareNamedSchema
(
unPrefixSwagger
""
)
data
SynchronyScope
=
SingleBranch
|
SiblingBranches
|
AllBranches
deriving
(
Show
,
Generic
,
Eq
,
ToSchema
)
data
SynchronyScope
=
SingleBranch
|
SiblingBranches
|
AllBranches
deriving
(
Show
,
Generic
,
Eq
)
data
SynchronyStrategy
=
MergeRegularGroups
|
MergeAllGroups
deriving
(
Show
,
Generic
,
Eq
)
instance
ToSchema
SynchronyStrategy
where
declareNamedSchema
=
genericDeclareNamedSchema
(
unPrefixSwagger
""
)
data
SynchronyStrategy
=
MergeRegularGroups
|
MergeAllGroups
deriving
(
Show
,
Generic
,
Eq
)
data
Synchrony
=
ByProximityThreshold
...
...
@@ -96,9 +118,17 @@ data Synchrony =
,
_bpd_strategy
::
SynchronyStrategy
}
deriving
(
Show
,
Generic
,
Eq
)
instance
ToSchema
Synchrony
where
declareNamedSchema
=
genericDeclareNamedSchema
(
unPrefixSwagger
"_"
)
data
TimeUnit
=
Year
Epoch
{
_epoch_period
::
Int
,
_epoch_step
::
Int
,
_epoch_matchingFrame
::
Int
}
|
Year
{
_year_period
::
Int
,
_year_step
::
Int
,
_year_matchingFrame
::
Int
}
...
...
@@ -116,8 +146,17 @@ data TimeUnit =
,
_day_matchingFrame
::
Int
}
deriving
(
Show
,
Generic
,
Eq
)
instance
ToSchema
TimeUnit
where
declareNamedSchema
=
genericDeclareNamedSchema
(
unPrefixSwagger
""
)
data
CliqueFilter
=
ByThreshold
|
ByNeighbours
deriving
(
Show
,
Generic
,
Eq
)
instance
ToSchema
CliqueFilter
where
declareNamedSchema
=
genericDeclareNamedSchema
(
unPrefixSwagger
""
)
data
Clique
=
Fis
{
_fis_support
::
Int
...
...
@@ -128,18 +167,26 @@ data Clique =
,
_mcl_filter
::
CliqueFilter
}
deriving
(
Show
,
Generic
,
Eq
)
instance
ToSchema
Clique
where
declareNamedSchema
=
genericDeclareNamedSchema
(
unPrefixSwagger
""
)
data
Quality
=
Quality
{
_qua_granularity
::
Double
,
_qua_minBranch
::
Int
}
deriving
(
Show
,
Generic
,
Eq
)
instance
ToSchema
Quality
where
declareNamedSchema
=
genericDeclareNamedSchema
(
unPrefixSwagger
"_qua_"
)
data
Config
=
Config
{
corpusPath
::
FilePath
,
listPath
::
FilePath
,
outputPath
::
FilePath
,
corpusParser
::
CorpusParser
,
listParser
::
ListParser
,
phyloName
::
Text
,
phyloLevel
::
Int
,
phyloProximity
::
Proximity
...
...
@@ -154,25 +201,28 @@ data Config =
,
exportFilter
::
[
Filter
]
}
deriving
(
Show
,
Generic
,
Eq
)
instance
ToSchema
Config
defaultConfig
::
Config
defaultConfig
=
Config
{
corpusPath
=
""
,
listPath
=
""
,
outputPath
=
""
,
corpusParser
=
Csv
1000
,
phyloName
=
pack
"Default Phylo"
Config
{
corpusPath
=
"corpus.csv"
-- useful for commandline only
,
listPath
=
"list.csv"
-- useful for commandline only
,
outputPath
=
"data/"
,
corpusParser
=
Csv
100000
,
listParser
=
V4
,
phyloName
=
pack
"Phylo Name"
,
phyloLevel
=
2
,
phyloProximity
=
WeightedLogJaccard
10
,
phyloProximity
=
WeightedLogJaccard
0.5
,
seaElevation
=
Constante
0.1
0.1
,
findAncestors
=
Tru
e
,
phyloSynchrony
=
ByProximityThreshold
0.
1
10
Sibling
Branches
MergeAllGroups
,
phyloQuality
=
Quality
0
1
,
findAncestors
=
Fals
e
,
phyloSynchrony
=
ByProximityThreshold
0.
5
0
All
Branches
MergeAllGroups
,
phyloQuality
=
Quality
0
.5
1
,
timeUnit
=
Year
3
1
5
,
clique
=
MaxClique
0
3
ByNeighbours
,
clique
=
MaxClique
5
0.0001
ByThreshold
,
exportLabel
=
[
BranchLabel
MostEmergentTfIdf
2
,
GroupLabel
MostEmergentInclusive
2
]
,
exportSort
=
ByHierarchy
,
exportFilter
=
[
ByBranchSize
2
]
,
exportSort
=
ByHierarchy
Desc
,
exportFilter
=
[
ByBranchSize
3
]
}
instance
FromJSON
Config
...
...
@@ -181,6 +231,9 @@ instance ToJSON Config
instance
FromJSON
CorpusParser
instance
ToJSON
CorpusParser
instance
FromJSON
ListParser
instance
ToJSON
ListParser
instance
FromJSON
Proximity
instance
ToJSON
Proximity
...
...
@@ -230,6 +283,11 @@ data Software =
,
_software_version
::
Text
}
deriving
(
Generic
,
Show
,
Eq
)
instance
ToSchema
Software
where
declareNamedSchema
=
genericDeclareNamedSchema
(
unPrefixSwagger
"_software_"
)
defaultSoftware
::
Software
defaultSoftware
=
Software
{
_software_name
=
pack
"Gargantext"
...
...
@@ -243,6 +301,11 @@ data PhyloParam =
,
_phyloParam_config
::
Config
}
deriving
(
Generic
,
Show
,
Eq
)
instance
ToSchema
PhyloParam
where
declareNamedSchema
=
genericDeclareNamedSchema
(
unPrefixSwagger
"_phyloParam_"
)
defaultPhyloParam
::
PhyloParam
defaultPhyloParam
=
PhyloParam
{
_phyloParam_version
=
pack
"v2.adaptative"
...
...
@@ -283,10 +346,16 @@ data PhyloFoundations = PhyloFoundations
,
_foundations_mapList
::
TermList
}
deriving
(
Generic
,
Show
,
Eq
)
instance
ToSchema
PhyloFoundations
where
declareNamedSchema
=
genericDeclareNamedSchema
(
unPrefixSwagger
"_foundations_"
)
data
PhyloSources
=
PhyloSources
{
_sources
::
!
(
Vector
Text
)
}
deriving
(
Generic
,
Show
,
Eq
)
instance
ToSchema
PhyloSources
where
declareNamedSchema
=
genericDeclareNamedSchema
(
unPrefixSwagger
"_"
)
---------------------------
-- | Coocurency Matrix | --
...
...
@@ -322,6 +391,9 @@ data Phylo =
}
deriving
(
Generic
,
Show
,
Eq
)
instance
ToSchema
Phylo
where
declareNamedSchema
=
genericDeclareNamedSchema
(
unPrefixSwagger
"_phylo_"
)
-- | PhyloPeriodId : the id of a given period
type
PhyloPeriodId
=
(
Date
,
Date
)
...
...
@@ -335,6 +407,10 @@ data PhyloPeriod =
,
_phylo_periodLevels
::
Map
PhyloLevelId
PhyloLevel
}
deriving
(
Generic
,
Show
,
Eq
)
instance
ToSchema
PhyloPeriod
where
declareNamedSchema
=
genericDeclareNamedSchema
(
unPrefixSwagger
"_phylo_"
)
-- | Level : a level of clustering
type
Level
=
Int
...
...
@@ -355,6 +431,9 @@ data PhyloLevel =
}
deriving
(
Generic
,
Show
,
Eq
)
instance
ToSchema
PhyloLevel
where
declareNamedSchema
=
genericDeclareNamedSchema
(
unPrefixSwagger
"_phylo_"
)
type
PhyloGroupId
=
(
PhyloLevelId
,
Int
)
...
...
@@ -381,16 +460,25 @@ data PhyloGroup =
,
_phylo_groupPeriodParents
::
[
Pointer
]
,
_phylo_groupPeriodChilds
::
[
Pointer
]
,
_phylo_groupAncestors
::
[
Pointer
]
,
_phylo_groupPeriodMemoryParents
::
[
Pointer'
]
,
_phylo_groupPeriodMemoryChilds
::
[
Pointer'
]
}
deriving
(
Generic
,
Show
,
Eq
,
NFData
)
instance
ToSchema
PhyloGroup
where
declareNamedSchema
=
genericDeclareNamedSchema
(
unPrefixSwagger
"_phylo_"
)
-- | Weight : A generic mesure that can be associated with an Id
type
Weight
=
Double
type
Thr
=
Double
-- | Pointer : A weighted pointer to a given PhyloGroup
type
Pointer
=
(
PhyloGroupId
,
Weight
)
-- | Pointer' : A weighted pointer to a given PhyloGroup with a lower bounded threshold
type
Pointer'
=
(
PhyloGroupId
,
(
Thr
,
Weight
))
data
Filiation
=
ToParents
|
ToChilds
deriving
(
Generic
,
Show
)
data
Filiation
=
ToParents
|
ToChilds
|
ToParentsMemory
|
ToChildsMemory
deriving
(
Generic
,
Show
)
data
PointerType
=
TemporalPointer
|
LevelPointer
deriving
(
Generic
,
Show
)
...
...
@@ -415,15 +503,24 @@ data PhyloClique = PhyloClique
type
DotId
=
TextLazy
.
Text
data
EdgeType
=
GroupToGroup
|
BranchToGroup
|
BranchToBranch
|
GroupToAncestor
|
PeriodToPeriod
deriving
(
Show
,
Generic
,
Eq
)
data
EdgeType
=
GroupToGroup
|
GroupToGroupMemory
|
BranchToGroup
|
BranchToBranch
|
GroupToAncestor
|
PeriodToPeriod
deriving
(
Show
,
Generic
,
Eq
)
data
Filter
=
ByBranchSize
{
_branch_size
::
Double
}
deriving
(
Show
,
Generic
,
Eq
)
instance
ToSchema
Filter
where
declareNamedSchema
=
genericDeclareNamedSchema
(
unPrefixSwagger
""
)
data
Order
=
Asc
|
Desc
deriving
(
Show
,
Generic
,
Eq
,
ToSchema
)
data
Order
=
Asc
|
Desc
deriving
(
Show
,
Generic
,
Eq
)
data
Sort
=
ByBirthDate
{
_sort_order
::
Order
}
|
ByHierarchy
{
_sort_order
::
Order
}
deriving
(
Show
,
Generic
,
Eq
)
instance
ToSchema
Sort
where
declareNamedSchema
=
genericDeclareNamedSchema
(
unPrefixSwagger
"_sort_"
)
data
Sort
=
ByBirthDate
{
_sort_order
::
Order
}
|
ByHierarchy
deriving
(
Show
,
Generic
,
Eq
)
data
Tagger
=
MostInclusive
|
MostEmergentInclusive
|
MostEmergentTfIdf
deriving
(
Show
,
Generic
,
Eq
)
instance
ToSchema
Tagger
where
declareNamedSchema
=
genericDeclareNamedSchema
(
unPrefixSwagger
""
)
data
PhyloLabel
=
BranchLabel
...
...
@@ -434,6 +531,10 @@ data PhyloLabel =
,
_group_labelSize
::
Int
}
deriving
(
Show
,
Generic
,
Eq
)
instance
ToSchema
PhyloLabel
where
declareNamedSchema
=
genericDeclareNamedSchema
(
unPrefixSwagger
"_"
)
data
PhyloBranch
=
PhyloBranch
{
_branch_id
::
PhyloBranchId
...
...
@@ -447,11 +548,17 @@ data PhyloBranch =
,
_branch_meta
::
Map
Text
[
Double
]
}
deriving
(
Generic
,
Show
,
Eq
)
instance
ToSchema
PhyloBranch
where
declareNamedSchema
=
genericDeclareNamedSchema
(
unPrefixSwagger
"_branch_"
)
data
PhyloExport
=
PhyloExport
{
_export_groups
::
[
PhyloGroup
]
,
_export_branches
::
[
PhyloBranch
]
}
deriving
(
Generic
,
Show
)
instance
ToSchema
PhyloExport
where
declareNamedSchema
=
genericDeclareNamedSchema
(
unPrefixSwagger
"_export_"
)
----------------
-- | Lenses | --
...
...
src/Gargantext/Core/Viz/Phylo/PhyloTools.hs
View file @
74d2038a
...
...
@@ -13,7 +13,7 @@ Portability : POSIX
module
Gargantext.Core.Viz.Phylo.PhyloTools
where
import
Control.Lens
hiding
(
Level
)
import
Data.List
(
sort
,
concat
,
null
,
union
,
(
++
),
tails
,
sortOn
,
nub
,
init
,
tail
,
partition
,
tails
,
nubBy
,
group
)
import
Data.List
(
sort
,
concat
,
null
,
union
,
(
++
),
tails
,
sortOn
,
nub
,
init
,
tail
,
partition
,
tails
,
nubBy
,
group
,
notElem
)
import
Data.Map
(
Map
,
elems
,
fromList
,
unionWith
,
keys
,
member
,
(
!
),
filterWithKey
,
fromListWith
,
empty
,
restrictKeys
)
import
Data.Set
(
Set
,
disjoint
)
import
Data.String
(
String
)
...
...
@@ -36,9 +36,9 @@ import qualified Data.Vector as Vector
-- | To print an important message as an IO()
printIOMsg
::
String
->
IO
()
printIOMsg
msg
=
printIOMsg
msg
=
putStrLn
(
"
\n
"
<>
"------------"
<>
"------------"
<>
"
\n
"
<>
"-- | "
<>
msg
<>
"
\n
"
)
...
...
@@ -59,13 +59,13 @@ printIOComment cmt =
truncate'
::
Double
->
Int
->
Double
truncate'
x
n
=
(
fromIntegral
$
(
floor
(
x
*
t
)
::
Int
))
/
t
where
where
--------------
t
::
Double
t
=
10
^
n
getInMap
::
Int
->
Map
Int
Double
->
Double
getInMap
k
m
=
getInMap
k
m
=
if
(
member
k
m
)
then
m
!
k
else
0
...
...
@@ -140,15 +140,15 @@ periodsToYears periods = (Set.fromList . sort . concat)
findBounds
::
[
Date
]
->
(
Date
,
Date
)
findBounds
dates
=
findBounds
dates
=
let
dates'
=
sort
dates
in
(
head'
"findBounds"
dates'
,
last'
"findBounds"
dates'
)
toPeriods
::
[
Date
]
->
Int
->
Int
->
[(
Date
,
Date
)]
toPeriods
dates
p
s
=
toPeriods
dates
p
s
=
let
(
start
,
end
)
=
findBounds
dates
in
map
(
\
dates'
->
(
head'
"toPeriods"
dates'
,
last'
"toPeriods"
dates'
))
in
map
(
\
dates'
->
(
head'
"toPeriods"
dates'
,
last'
"toPeriods"
dates'
))
$
chunkAlong
p
s
[
start
..
end
]
...
...
@@ -156,8 +156,8 @@ toFstDate :: [Text] -> Text
toFstDate
ds
=
snd
$
head'
"firstDate"
$
sortOn
fst
$
map
(
\
d
->
let
d'
=
read
(
filter
(
\
c
->
c
/=
'-'
)
$
unpack
d
)
::
Int
$
map
(
\
d
->
let
d'
=
read
(
filter
(
\
c
->
notElem
c
[
'U'
,
'T'
,
'C'
,
' '
,
':'
,
'-'
]
)
$
unpack
d
)
::
Int
in
(
d'
,
d
))
ds
toLstDate
::
[
Text
]
->
Text
...
...
@@ -165,46 +165,50 @@ toLstDate ds = snd
$
head'
"firstDate"
$
reverse
$
sortOn
fst
$
map
(
\
d
->
let
d'
=
read
(
filter
(
\
c
->
c
/=
'-'
)
$
unpack
d
)
::
Int
in
(
d'
,
d
))
ds
$
map
(
\
d
->
let
d'
=
read
(
filter
(
\
c
->
notElem
c
[
'U'
,
'T'
,
'C'
,
' '
,
':'
,
'-'
]
)
$
unpack
d
)
::
Int
in
(
d'
,
d
))
ds
getTimeScale
::
Phylo
->
[
Char
]
getTimeScale
p
=
case
(
timeUnit
$
getConfig
p
)
of
Epoch
_
_
_
->
"epoch"
Year
_
_
_
->
"year"
Month
_
_
_
->
"month"
Week
_
_
_
->
"week"
Day
_
_
_
->
"day"
Week
_
_
_
->
"week"
Day
_
_
_
->
"day"
-- | Get a regular & ascendante timeScale from a given list of dates
toTimeScale
::
[
Date
]
->
Int
->
[
Date
]
toTimeScale
dates
step
=
toTimeScale
dates
step
=
let
(
start
,
end
)
=
findBounds
dates
in
[
start
,
(
start
+
step
)
..
end
]
getTimeStep
::
TimeUnit
->
Int
getTimeStep
time
=
case
time
of
getTimeStep
time
=
case
time
of
Epoch
_
s
_
->
s
Year
_
s
_
->
s
Month
_
s
_
->
s
Week
_
s
_
->
s
Day
_
s
_
->
s
Month
_
s
_
->
s
Week
_
s
_
->
s
Day
_
s
_
->
s
getTimePeriod
::
TimeUnit
->
Int
getTimePeriod
time
=
case
time
of
getTimePeriod
time
=
case
time
of
Epoch
p
_
_
->
p
Year
p
_
_
->
p
Month
p
_
_
->
p
Week
p
_
_
->
p
Day
p
_
_
->
p
Month
p
_
_
->
p
Week
p
_
_
->
p
Day
p
_
_
->
p
getTimeFrame
::
TimeUnit
->
Int
getTimeFrame
time
=
case
time
of
getTimeFrame
time
=
case
time
of
Epoch
_
_
f
->
f
Year
_
_
f
->
f
Month
_
_
f
->
f
Week
_
_
f
->
f
Day
_
_
f
->
f
Day
_
_
f
->
f
-------------
-- | Fis | --
...
...
@@ -217,7 +221,7 @@ isNested l l'
|
null
l'
=
True
|
length
l'
>
length
l
=
False
|
(
union
l
l'
)
==
l
=
True
|
otherwise
=
False
|
otherwise
=
False
-- | To filter Fis with small Support but by keeping non empty Periods
...
...
@@ -233,7 +237,7 @@ traceClique mFis = foldl (\msg cpt -> msg <> show (countSup cpt cliques) <> " (>
--------------------------------------
cliques
::
[
Double
]
cliques
=
sort
$
map
(
fromIntegral
.
length
.
_phyloClique_nodes
)
$
concat
$
elems
mFis
--------------------------------------
--------------------------------------
traceSupport
::
Map
(
Date
,
Date
)
[
PhyloClique
]
->
String
...
...
@@ -242,7 +246,7 @@ traceSupport mFis = foldl (\msg cpt -> msg <> show (countSup cpt supports) <> "
--------------------------------------
supports
::
[
Double
]
supports
=
sort
$
map
(
fromIntegral
.
_phyloClique_support
)
$
concat
$
elems
mFis
--------------------------------------
--------------------------------------
traceFis
::
[
Char
]
->
Map
(
Date
,
Date
)
[
PhyloClique
]
->
Map
(
Date
,
Date
)
[
PhyloClique
]
...
...
@@ -257,12 +261,12 @@ traceFis msg mFis = trace ( "\n" <> "-- | " <> msg <> " : " <> show (sum $ map l
getCliqueSupport
::
Clique
->
Int
getCliqueSupport
unit
=
case
unit
of
getCliqueSupport
unit
=
case
unit
of
Fis
s
_
->
s
MaxClique
_
_
_
->
0
getCliqueSize
::
Clique
->
Int
getCliqueSize
unit
=
case
unit
of
getCliqueSize
unit
=
case
unit
of
Fis
_
s
->
s
MaxClique
s
_
_
->
s
...
...
@@ -292,7 +296,7 @@ listToSeq l = nubBy (\x y -> fst x == fst y) $ [ (x,y) | (x:rest) <- tails l, y
sumCooc
::
Cooc
->
Cooc
->
Cooc
sumCooc
cooc
cooc'
=
unionWith
(
+
)
cooc
cooc'
getTrace
::
Cooc
->
Double
getTrace
::
Cooc
->
Double
getTrace
cooc
=
sum
$
elems
$
filterWithKey
(
\
(
k
,
k'
)
_
->
k
==
k'
)
cooc
coocToDiago
::
Cooc
->
Cooc
...
...
@@ -310,7 +314,7 @@ ngramsToCooc ngrams coocs =
-- | PhyloGroup | --
--------------------
getGroupId
::
PhyloGroup
->
PhyloGroupId
getGroupId
::
PhyloGroup
->
PhyloGroupId
getGroupId
g
=
((
g
^.
phylo_groupPeriod
,
g
^.
phylo_groupLevel
),
g
^.
phylo_groupIndex
)
idToPrd
::
PhyloGroupId
->
PhyloPeriodId
...
...
@@ -320,38 +324,58 @@ groupByField :: Ord a => (PhyloGroup -> a) -> [PhyloGroup] -> Map a [PhyloGroup
groupByField
toField
groups
=
fromListWith
(
++
)
$
map
(
\
g
->
(
toField
g
,
[
g
]))
groups
getPeriodPointers
::
Filiation
->
PhyloGroup
->
[
Pointer
]
getPeriodPointers
fil
g
=
case
fil
of
getPeriodPointers
fil
g
=
case
fil
of
ToChilds
->
g
^.
phylo_groupPeriodChilds
ToParents
->
g
^.
phylo_groupPeriodParents
ToChildsMemory
->
undefined
ToParentsMemory
->
undefined
filterProximity
::
Proximity
->
Double
->
Double
->
Bool
filterProximity
proximity
thr
local
=
filterProximity
proximity
thr
local
=
case
proximity
of
WeightedLogJaccard
_
->
local
>=
thr
WeightedLogSim
_
->
local
>=
thr
Hamming
->
undefined
WeightedLogSim
_
->
local
>=
thr
Hamming
_
->
undefined
getProximityName
::
Proximity
->
String
getProximityName
proximity
=
case
proximity
of
WeightedLogJaccard
_
->
"WLJaccard"
WeightedLogSim
_
->
"WeightedLogSim"
Hamming
->
"Hamming"
WeightedLogSim
_
->
"WeightedLogSim"
Hamming
_
->
"Hamming"
---------------
-- | Phylo | --
---------------
addPointers
::
Filiation
->
PointerType
->
[
Pointer
]
->
PhyloGroup
->
PhyloGroup
addPointers
fil
pty
pointers
g
=
case
pty
of
TemporalPointer
->
case
fil
of
addPointers
fil
pty
pointers
g
=
case
pty
of
TemporalPointer
->
case
fil
of
ToChilds
->
g
&
phylo_groupPeriodChilds
.~
pointers
ToParents
->
g
&
phylo_groupPeriodParents
.~
pointers
LevelPointer
->
case
fil
of
ToChildsMemory
->
undefined
ToParentsMemory
->
undefined
LevelPointer
->
case
fil
of
ToChilds
->
g
&
phylo_groupLevelChilds
.~
pointers
ToParents
->
g
&
phylo_groupLevelParents
.~
pointers
ToChildsMemory
->
undefined
ToParentsMemory
->
undefined
toPointer'
::
Double
->
Pointer
->
Pointer'
toPointer'
thr
pt
=
(
fst
pt
,(
thr
,
snd
pt
))
addMemoryPointers
::
Filiation
->
PointerType
->
Double
->
[
Pointer
]
->
PhyloGroup
->
PhyloGroup
addMemoryPointers
fil
pty
thr
pointers
g
=
case
pty
of
TemporalPointer
->
case
fil
of
ToChilds
->
undefined
ToParents
->
undefined
ToChildsMemory
->
g
&
phylo_groupPeriodMemoryChilds
.~
(
concat
[(
g
^.
phylo_groupPeriodMemoryChilds
),(
map
(
\
pt
->
toPointer'
thr
pt
)
pointers
)])
ToParentsMemory
->
g
&
phylo_groupPeriodMemoryParents
.~
(
concat
[(
g
^.
phylo_groupPeriodMemoryParents
),(
map
(
\
pt
->
toPointer'
thr
pt
)
pointers
)])
LevelPointer
->
undefined
getPeriodIds
::
Phylo
->
[(
Date
,
Date
)]
...
...
@@ -359,14 +383,14 @@ getPeriodIds phylo = sortOn fst
$
keys
$
phylo
^.
phylo_periods
getLevelParentId
::
PhyloGroup
->
PhyloGroupId
getLevelParentId
::
PhyloGroup
->
PhyloGroupId
getLevelParentId
g
=
fst
$
head'
"getLevelParentId"
$
g
^.
phylo_groupLevelParents
getLastLevel
::
Phylo
->
Level
getLastLevel
phylo
=
last'
"lastLevel"
$
getLevels
phylo
getLevels
::
Phylo
->
[
Level
]
getLevels
phylo
=
nub
getLevels
phylo
=
nub
$
map
snd
$
keys
$
view
(
phylo_periods
.
traverse
...
...
@@ -381,10 +405,10 @@ getConfig phylo = (phylo ^. phylo_param) ^. phyloParam_config
setConfig
::
Config
->
Phylo
->
Phylo
setConfig
config
phylo
=
phylo
&
phylo_param
.~
(
PhyloParam
((
phylo
^.
phylo_param
)
^.
phyloParam_version
)
((
phylo
^.
phylo_param
)
^.
phyloParam_software
)
setConfig
config
phylo
=
phylo
&
phylo_param
.~
(
PhyloParam
((
phylo
^.
phylo_param
)
^.
phyloParam_version
)
((
phylo
^.
phylo_param
)
^.
phyloParam_software
)
config
)
-- & phylo_param & phyloParam_config & phyloParam_config .~ config
...
...
@@ -397,13 +421,13 @@ getSources :: Phylo -> Vector Text
getSources
phylo
=
_sources
(
phylo
^.
phylo_sources
)
phyloToLastBranches
::
Phylo
->
[[
PhyloGroup
]]
phyloToLastBranches
phylo
=
elems
phyloToLastBranches
phylo
=
elems
$
fromListWith
(
++
)
$
map
(
\
g
->
(
g
^.
phylo_groupBranchId
,
[
g
]))
$
getGroupsFromLevel
(
last'
"byBranches"
$
getLevels
phylo
)
phylo
getGroupsFromLevel
::
Level
->
Phylo
->
[
PhyloGroup
]
getGroupsFromLevel
lvl
phylo
=
getGroupsFromLevel
lvl
phylo
=
elems
$
view
(
phylo_periods
.
traverse
.
phylo_periodLevels
...
...
@@ -413,18 +437,18 @@ getGroupsFromLevel lvl phylo =
getGroupsFromLevelPeriods
::
Level
->
[
PhyloPeriodId
]
->
Phylo
->
[
PhyloGroup
]
getGroupsFromLevelPeriods
lvl
periods
phylo
=
getGroupsFromLevelPeriods
lvl
periods
phylo
=
elems
$
view
(
phylo_periods
.
traverse
.
filtered
(
\
phyloPrd
->
elem
(
phyloPrd
^.
phylo_periodPeriod
)
periods
)
.
phylo_periodLevels
.
traverse
.
filtered
(
\
phyloLvl
->
phyloLvl
^.
phylo_levelLevel
==
lvl
)
.
phylo_levelGroups
)
phylo
.
phylo_levelGroups
)
phylo
getGroupsFromPeriods
::
Level
->
Map
PhyloPeriodId
PhyloPeriod
->
[
PhyloGroup
]
getGroupsFromPeriods
lvl
periods
=
getGroupsFromPeriods
lvl
periods
=
elems
$
view
(
traverse
.
phylo_periodLevels
.
traverse
...
...
@@ -433,25 +457,25 @@ getGroupsFromPeriods lvl periods =
updatePhyloGroups
::
Level
->
Map
PhyloGroupId
PhyloGroup
->
Phylo
->
Phylo
updatePhyloGroups
lvl
m
phylo
=
updatePhyloGroups
lvl
m
phylo
=
over
(
phylo_periods
.
traverse
.
phylo_periodLevels
.
traverse
.
filtered
(
\
phyloLvl
->
phyloLvl
^.
phylo_levelLevel
==
lvl
)
.
phylo_levelGroups
.
traverse
)
(
\
g
->
.
traverse
)
(
\
g
->
let
id
=
getGroupId
g
in
if
member
id
m
in
if
member
id
m
then
m
!
id
else
g
)
phylo
updatePeriods
::
Map
(
Date
,
Date
)
(
Text
,
Text
)
->
Phylo
->
Phylo
updatePeriods
periods'
phylo
=
over
(
phylo_periods
.
traverse
)
(
\
prd
->
updatePeriods
periods'
phylo
=
over
(
phylo_periods
.
traverse
)
(
\
prd
->
let
prd'
=
periods'
!
(
prd
^.
phylo_periodPeriod
)
lvls
=
map
(
\
lvl
->
lvl
&
phylo_levelPeriod'
.~
prd'
)
$
prd
^.
phylo_periodLevels
in
prd
&
phylo_periodPeriod'
.~
prd'
...
...
@@ -460,10 +484,10 @@ updatePeriods periods' phylo =
traceToPhylo
::
Level
->
Phylo
->
Phylo
traceToPhylo
lvl
phylo
=
traceToPhylo
lvl
phylo
=
trace
(
"
\n
"
<>
"-- | End of phylo making at level "
<>
show
(
lvl
)
<>
" with "
<>
show
(
length
$
getGroupsFromLevel
lvl
phylo
)
<>
" groups and "
<>
show
(
length
$
nub
$
map
_phylo_groupBranchId
$
getGroupsFromLevel
lvl
phylo
)
<>
" branches"
<>
"
\n
"
)
phylo
<>
show
(
length
$
nub
$
map
_phylo_groupBranchId
$
getGroupsFromLevel
lvl
phylo
)
<>
" branches"
<>
"
\n
"
)
phylo
--------------------
-- | Clustering | --
...
...
@@ -474,28 +498,28 @@ mergeBranchIds ids = (head' "mergeBranchIds" . sort . mostFreq') ids
where
-- | 2) find the most Up Left ids in the hierarchy of similarity
-- mostUpLeft :: [[Int]] -> [[Int]]
-- mostUpLeft ids' =
-- mostUpLeft ids' =
-- let groupIds = (map (\gIds -> (length $ head' "gIds" gIds, head' "gIds" gIds)) . groupBy (\id id' -> length id == length id') . sortOn length) ids'
-- inf = (fst . minimum) groupIds
-- in map snd $ filter (\gIds -> fst gIds == inf) groupIds
-- | 1) find the most frequent ids
mostFreq'
::
[[
Int
]]
->
[[
Int
]]
mostFreq'
ids'
=
mostFreq'
ids'
=
let
groupIds
=
(
map
(
\
gIds
->
(
length
gIds
,
head'
"gIds"
gIds
))
.
group
.
sort
)
ids'
sup
=
(
fst
.
maximum
)
groupIds
in
map
snd
$
filter
(
\
gIds
->
fst
gIds
==
sup
)
groupIds
mergeMeta
::
[
Int
]
->
[
PhyloGroup
]
->
Map
Text
[
Double
]
mergeMeta
bId
groups
=
let
ego
=
head'
"mergeMeta"
$
filter
(
\
g
->
(
snd
(
g
^.
phylo_groupBranchId
))
==
bId
)
groups
in
fromList
[(
"breaks"
,(
ego
^.
phylo_groupMeta
)
!
"breaks"
),(
"seaLevels"
,(
ego
^.
phylo_groupMeta
)
!
"seaLevels"
)]
mergeMeta
bId
groups
=
let
ego
=
head'
"mergeMeta"
$
filter
(
\
g
->
(
snd
(
g
^.
phylo_groupBranchId
))
==
bId
)
groups
in
fromList
[(
"breaks"
,(
ego
^.
phylo_groupMeta
)
!
"breaks"
),(
"seaLevels"
,(
ego
^.
phylo_groupMeta
)
!
"seaLevels"
)]
groupsToBranches
::
Map
PhyloGroupId
PhyloGroup
->
[[
PhyloGroup
]]
groupsToBranches
groups
=
{- run the related component algorithm -}
let
egos
=
map
(
\
g
->
[
getGroupId
g
]
let
egos
=
map
(
\
g
->
[
getGroupId
g
]
++
(
map
fst
$
g
^.
phylo_groupPeriodParents
)
++
(
map
fst
$
g
^.
phylo_groupPeriodChilds
)
++
(
map
fst
$
g
^.
phylo_groupAncestors
))
$
elems
groups
...
...
@@ -510,30 +534,30 @@ relatedComponents :: Ord a => [[a]] -> [[a]]
relatedComponents
graph
=
foldl'
(
\
acc
groups
->
if
(
null
acc
)
then
acc
++
[
groups
]
else
else
let
acc'
=
partition
(
\
groups'
->
disjoint
(
Set
.
fromList
groups'
)
(
Set
.
fromList
groups
))
acc
in
(
fst
acc'
)
++
[
nub
$
concat
$
(
snd
acc'
)
++
[
groups
]])
[]
graph
toRelatedComponents
::
[
PhyloGroup
]
->
[((
PhyloGroup
,
PhyloGroup
),
Double
)]
->
[[
PhyloGroup
]]
toRelatedComponents
nodes
edges
=
toRelatedComponents
nodes
edges
=
let
ref
=
fromList
$
map
(
\
g
->
(
getGroupId
g
,
g
))
nodes
clusters
=
relatedComponents
$
((
map
(
\
((
g
,
g'
),
_
)
->
[
getGroupId
g
,
getGroupId
g'
])
edges
)
++
(
map
(
\
g
->
[
getGroupId
g
])
nodes
))
in
map
(
\
cluster
->
map
(
\
gId
->
ref
!
gId
)
cluster
)
clusters
clusters
=
relatedComponents
$
((
map
(
\
((
g
,
g'
),
_
)
->
[
getGroupId
g
,
getGroupId
g'
])
edges
)
++
(
map
(
\
g
->
[
getGroupId
g
])
nodes
))
in
map
(
\
cluster
->
map
(
\
gId
->
ref
!
gId
)
cluster
)
clusters
traceSynchronyEnd
::
Phylo
->
Phylo
traceSynchronyEnd
phylo
=
trace
(
"-- | End synchronic clustering at level "
<>
show
(
getLastLevel
phylo
)
traceSynchronyEnd
phylo
=
trace
(
"-- | End synchronic clustering at level "
<>
show
(
getLastLevel
phylo
)
<>
" with "
<>
show
(
length
$
getGroupsFromLevel
(
getLastLevel
phylo
)
phylo
)
<>
" groups"
<>
" and "
<>
show
(
length
$
nub
$
map
_phylo_groupBranchId
$
getGroupsFromLevel
(
getLastLevel
phylo
)
phylo
)
<>
" branches"
<>
"
\n
"
)
phylo
traceSynchronyStart
::
Phylo
->
Phylo
traceSynchronyStart
phylo
=
trace
(
"
\n
"
<>
"-- | Start synchronic clustering at level "
<>
show
(
getLastLevel
phylo
)
traceSynchronyStart
phylo
=
trace
(
"
\n
"
<>
"-- | Start synchronic clustering at level "
<>
show
(
getLastLevel
phylo
)
<>
" with "
<>
show
(
length
$
getGroupsFromLevel
(
getLastLevel
phylo
)
phylo
)
<>
" groups"
<>
" and "
<>
show
(
length
$
nub
$
map
_phylo_groupBranchId
$
getGroupsFromLevel
(
getLastLevel
phylo
)
phylo
)
<>
" branches"
<>
"
\n
"
)
phylo
<>
"
\n
"
)
phylo
-------------------
...
...
@@ -541,10 +565,10 @@ traceSynchronyStart phylo =
-------------------
getSensibility
::
Proximity
->
Double
getSensibility
proxi
=
case
proxi
of
getSensibility
proxi
=
case
proxi
of
WeightedLogJaccard
s
->
s
WeightedLogSim
s
->
s
Hamming
->
undefined
WeightedLogSim
s
->
s
Hamming
_
->
undefined
----------------
-- | Branch | --
...
...
@@ -599,7 +623,7 @@ traceMatchLimit branches =
<>
",(1.."
<>
show
(
length
branches
)
<>
")]"
<>
" | "
<>
show
(
length
$
concat
branches
)
<>
" groups"
<>
"
\n
"
<>
" - unable to increase the threshold above 1"
<>
"
\n
"
)
branches
)
branches
traceMatchEnd
::
[
PhyloGroup
]
->
[
PhyloGroup
]
...
...
@@ -609,10 +633,10 @@ traceMatchEnd groups =
traceTemporalMatching
::
[
PhyloGroup
]
->
[
PhyloGroup
]
traceTemporalMatching
groups
=
traceTemporalMatching
groups
=
trace
(
"
\n
"
<>
"-- | Start temporal matching for "
<>
show
(
length
groups
)
<>
" groups"
<>
"
\n
"
)
groups
traceGroupsProxi
::
Map
(
PhyloGroupId
,
PhyloGroupId
)
Double
->
Map
(
PhyloGroupId
,
PhyloGroupId
)
Double
traceGroupsProxi
m
=
traceGroupsProxi
m
=
trace
(
"
\n
"
<>
"-- | "
<>
show
(
Map
.
size
m
)
<>
" computed pairs of groups proximity"
<>
"
\n
"
)
m
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment