Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Julien Moutinho
haskell-gargantext
Commits
74d2038a
Commit
74d2038a
authored
May 21, 2021
by
qlobbe
Committed by
Alexandre Delanoë
Feb 14, 2022
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add a list parser param
parent
878907d0
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
312 additions
and
203 deletions
+312
-203
Main.hs
bin/gargantext-phylo/Main.hs
+72
-96
package.yaml
package.yaml
+2
-0
Phylo.hs
src/Gargantext/Core/Viz/Phylo.hs
+128
-21
PhyloTools.hs
src/Gargantext/Core/Viz/Phylo/PhyloTools.hs
+110
-86
No files found.
bin/gargantext-phylo/Main.hs
View file @
74d2038a
...
...
@@ -16,43 +16,40 @@ Adaptative Phylo binaries
module
Main
where
-- import Debug.Trace (trace)
import
Control.Concurrent.Async
(
mapConcurrently
)
import
Crypto.Hash.SHA256
(
hash
)
import
Data.Aeson
import
Data.Either
(
Either
(
..
))
import
Data.Either
(
Either
(
..
)
,
fromRight
)
import
Data.List
(
concat
,
nub
,
isSuffixOf
)
import
Data.List.Split
import
Data.Maybe
(
fromMaybe
)
import
Data.String
(
String
)
import
GHC.IO
(
FilePath
)
import
qualified
Prelude
as
Prelude
import
System.Environment
import
System.Directory
(
listDirectory
,
doesFileExist
)
import
Data.Text
(
Text
,
unwords
,
unpack
,
replace
,
pack
)
import
Data.Time.Calendar
(
fromGregorian
,
diffGregorianDurationClip
,
cdMonths
,
diffDays
,
showGregorian
)
import
qualified
Data.ByteString.Char8
as
C8
import
qualified
Data.ByteString.Lazy
as
Lazy
import
qualified
Data.Vector
as
Vector
import
qualified
Data.Text
as
T
import
Gargantext.Prelude
import
Gargantext.Database.Admin.Types.Hyperdata
(
HyperdataDocument
(
..
))
import
GHC.IO
(
FilePath
)
import
Gargantext.API.Ngrams.Prelude
(
toTermList
)
import
Gargantext.API.Ngrams.Types
import
Gargantext.Core.Text.Context
(
TermList
)
import
Gargantext.Core.Text.Corpus.Parsers.CSV
(
csv_title
,
csv_abstract
,
csv_publication_year
,
csv_publication_month
,
csv_publication_day
,
csv'_source
,
csv'_title
,
csv'_abstract
,
csv'_publication_year
,
csv'_publication_month
,
csv'_publication_day
,
csv'_weight
)
import
qualified
Gargantext.Core.Text.Corpus.Parsers.CSV
as
Csv
import
Gargantext.Core.Text.Corpus.Parsers
(
FileFormat
(
..
),
parseFile
)
import
Gargantext.Core.Text.Corpus.Parsers.CSV
(
csv_title
,
csv_abstract
,
csv_publication_year
,
csv_publication_month
,
csv_publication_day
,
csv'_source
,
csv'_title
,
csv'_abstract
,
csv'_publication_year
,
csv'_publication_month
,
csv'_publication_day
,
csv'_weight
)
import
Gargantext.Core.Text.List.Formats.CSV
(
csvMapTermList
)
import
Gargantext.Core.Text.Terms.WithList
(
Patterns
,
buildPatterns
,
extractTermsWithList
)
import
Gargantext.Core.Types.Main
(
ListType
(
..
))
import
Gargantext.Core.Viz.Phylo
import
Gargantext.Core.Viz.Phylo.API
import
Gargantext.Core.Viz.Phylo.PhyloExport
(
toPhyloExport
,
dotToFile
)
import
Gargantext.Core.Viz.Phylo.PhyloMaker
(
toPhylo
,
toPhyloStep
)
import
Gargantext.Core.Viz.Phylo.PhyloTools
(
printIOMsg
,
printIOComment
,
setConfig
)
import
Gargantext.Core.Viz.Phylo.PhyloExport
(
toPhyloExport
,
dotToFile
)
-- import Gargantext.API.Ngrams.Prelude (toTermList)
import
Gargantext.Core.Viz.Phylo.API
(
toPhyloDate
,
toPhyloDate'
)
-- import Debug.Trace (trace)
import
Gargantext.Database.Admin.Types.Hyperdata
(
HyperdataDocument
(
..
))
import
Gargantext.Database.Schema.Ngrams
(
NgramsType
(
..
))
import
Gargantext.Prelude
import
System.Directory
(
listDirectory
,
doesFileExist
)
import
System.Environment
import
qualified
Data.ByteString.Char8
as
C8
import
qualified
Data.ByteString.Lazy
as
Lazy
import
qualified
Data.Text
as
T
import
qualified
Data.Vector
as
Vector
import
qualified
Gargantext.Core.Text.Corpus.Parsers.CSV
as
Csv
data
PhyloStage
=
PhyloWithCliques
|
PhyloWithLinks
deriving
(
Show
)
...
...
@@ -60,24 +57,13 @@ data PhyloStage = PhyloWithCliques | PhyloWithLinks deriving (Show)
-- | Tools | --
---------------
-- | To get all the files in a directory or just a file
getFilesFromPath
::
FilePath
->
IO
([
FilePath
])
getFilesFromPath
::
FilePath
->
IO
[
FilePath
]
getFilesFromPath
path
=
do
if
(
isSuffixOf
"/"
path
)
then
(
listDirectory
path
)
else
return
[
path
]
--------------
-- | Json | --
--------------
-- | To read and decode a Json file
readJson
::
FilePath
->
IO
Lazy
.
ByteString
readJson
path
=
Lazy
.
readFile
path
----------------
-- | Parser | --
----------------
...
...
@@ -91,11 +77,6 @@ termsInText pats txt = nub $ concat $ map (map unwords) $ extractTermsWithList p
wosToDocs
::
Int
->
Patterns
->
TimeUnit
->
FilePath
->
IO
[
Document
]
wosToDocs
limit
patterns
time
path
=
do
files
<-
getFilesFromPath
path
let
parseFile'
file
=
do
eParsed
<-
parseFile
WOS
(
path
<>
file
)
case
eParsed
of
Right
ps
->
pure
ps
Left
e
->
panic
$
"Error: "
<>
(
pack
e
)
take
limit
<$>
map
(
\
d
->
let
title
=
fromJust
$
_hd_title
d
abstr
=
if
(
isJust
$
_hd_abstract
d
)
...
...
@@ -108,13 +89,13 @@ wosToDocs limit patterns time path = do
(
toPhyloDate'
(
fromIntegral
$
fromJust
$
_hd_publication_year
d
)
(
fromJust
$
_hd_publication_month
d
)
(
fromJust
$
_hd_publication_day
d
)
)
(
fromJust
$
_hd_publication_day
d
)
time
)
(
termsInText
patterns
$
title
<>
" "
<>
abstr
)
Nothing
[]
)
<$>
concat
<$>
mapConcurrently
(
\
file
->
filter
(
\
d
->
(
isJust
$
_hd_publication_year
d
)
&&
(
isJust
$
_hd_title
d
))
<$>
parseFile'
file
)
files
<$>
fromRight
[]
<$>
parseFile
WOS
(
path
<>
file
)
)
files
-- To transform a Csv file into a list of Document
...
...
@@ -122,31 +103,21 @@ csvToDocs :: CorpusParser -> Patterns -> TimeUnit -> FilePath -> IO [Document]
csvToDocs
parser
patterns
time
path
=
case
parser
of
Wos
_
->
undefined
Csv
limit
->
do
eR
<-
Csv
.
readFile
path
case
eR
of
Right
r
->
pure
$
Vector
.
toList
$
Vector
.
take
limit
$
Vector
.
map
(
\
row
->
Document
(
toPhyloDate
(
Csv
.
fromMIntOrDec
Csv
.
defaultYear
$
csv_publication_year
row
)
(
fromMaybe
Csv
.
defaultMonth
$
csv_publication_month
row
)
(
fromMaybe
Csv
.
defaultDay
$
csv_publication_day
row
)
time
)
(
toPhyloDate'
(
Csv
.
fromMIntOrDec
Csv
.
defaultYear
$
csv_publication_year
row
)
(
fromMaybe
Csv
.
defaultMonth
$
csv_publication_month
row
)
(
fromMaybe
Csv
.
defaultDay
$
csv_publication_day
row
))
Csv
limit
->
Vector
.
toList
<$>
Vector
.
take
limit
<$>
Vector
.
map
(
\
row
->
Document
(
toPhyloDate
(
Csv
.
fromMIntOrDec
Csv
.
defaultYear
$
csv_publication_year
row
)
(
fromMaybe
Csv
.
defaultMonth
$
csv_publication_month
row
)
(
fromMaybe
Csv
.
defaultDay
$
csv_publication_day
row
)
time
)
(
toPhyloDate'
(
Csv
.
fromMIntOrDec
Csv
.
defaultYear
$
csv_publication_year
row
)
(
fromMaybe
Csv
.
defaultMonth
$
csv_publication_month
row
)
(
fromMaybe
Csv
.
defaultDay
$
csv_publication_day
row
)
time
)
(
termsInText
patterns
$
(
csv_title
row
)
<>
" "
<>
(
csv_abstract
row
))
Nothing
[]
)
$
snd
r
Left
e
->
panic
$
"Error: "
<>
(
pack
e
)
)
<$>
snd
<$>
either
(
\
err
->
panic
$
cs
$
"CSV error"
<>
(
show
err
))
identity
<$>
Csv
.
readFile
path
Csv'
limit
->
Vector
.
toList
<$>
Vector
.
take
limit
<$>
Vector
.
map
(
\
row
->
Document
(
toPhyloDate
(
csv'_publication_year
row
)
(
csv'_publication_month
row
)
(
csv'_publication_day
row
)
time
)
(
toPhyloDate'
(
csv'_publication_year
row
)
(
csv'_publication_month
row
)
(
csv'_publication_day
row
))
(
toPhyloDate'
(
csv'_publication_year
row
)
(
csv'_publication_month
row
)
(
csv'_publication_day
row
)
time
)
(
termsInText
patterns
$
(
csv'_title
row
)
<>
" "
<>
(
csv'_abstract
row
))
(
Just
$
csv'_weight
row
)
[
csv'_source
row
]
(
map
(
T
.
strip
.
pack
)
$
splitOn
";"
(
unpack
$
(
csv'_source
row
)))
)
<$>
snd
<$>
Csv
.
readWeightedCsv
path
...
...
@@ -168,8 +139,9 @@ fileToDocs' parser path time lst = do
-- Config time parameters to label
timeToLabel
::
Config
->
[
Char
]
timeToLabel
config
=
case
(
timeUnit
config
)
of
Epoch
p
s
f
->
(
"time_epochs"
<>
"_"
<>
(
show
p
)
<>
"_"
<>
(
show
s
)
<>
"_"
<>
(
show
f
))
Year
p
s
f
->
(
"time_years"
<>
"_"
<>
(
show
p
)
<>
"_"
<>
(
show
s
)
<>
"_"
<>
(
show
f
))
Month
p
s
f
->
(
"time_months"
<>
"_"
<>
(
show
p
)
<>
"_"
<>
(
show
s
)
<>
"_"
<>
(
show
f
))
Month
p
s
f
->
(
"time_months"
<>
"_"
<>
(
show
p
)
<>
"_"
<>
(
show
s
)
<>
"_"
<>
(
show
f
))
Week
p
s
f
->
(
"time_weeks"
<>
"_"
<>
(
show
p
)
<>
"_"
<>
(
show
s
)
<>
"_"
<>
(
show
f
))
Day
p
s
f
->
(
"time_days"
<>
"_"
<>
(
show
p
)
<>
"_"
<>
(
show
s
)
<>
"_"
<>
(
show
f
))
...
...
@@ -182,7 +154,7 @@ seaToLabel config = case (seaElevation config) of
sensToLabel
::
Config
->
[
Char
]
sensToLabel
config
=
case
(
phyloProximity
config
)
of
Hamming
->
undefined
Hamming
_
->
undefined
WeightedLogJaccard
s
->
(
"WeightedLogJaccard_"
<>
show
s
)
WeightedLogSim
s
->
(
"WeightedLogSim-sens_"
<>
show
s
)
...
...
@@ -240,18 +212,23 @@ configToSha stage config = unpack
<>
(
show
(
phyloLevel
config
))
writePhylo
::
[
Char
]
->
Phylo
->
IO
()
writePhylo
path
phylo
=
Lazy
.
writeFile
path
$
encode
phylo
readPhylo
::
[
Char
]
->
IO
Phylo
readPhylo
path
=
do
phyloJson
<-
(
eitherDecode
<$>
readJson
path
)
::
IO
(
Either
String
Phylo
)
case
phyloJson
of
readListV4
::
[
Char
]
->
IO
NgramsList
readListV4
path
=
do
listJson
<-
(
eitherDecode
<$>
readJson
path
)
::
IO
(
Either
String
NgramsList
)
case
listJson
of
Left
err
->
do
putStrLn
err
undefined
Right
phylo
->
pure
phylo
Right
listV4
->
pure
listV4
fileToList
::
ListParser
->
FilePath
->
IO
TermList
fileToList
parser
path
=
case
parser
of
V3
->
csvMapTermList
path
V4
->
fromJust
<$>
toTermList
MapTerm
NgramsTerms
<$>
readListV4
path
--------------
...
...
@@ -273,7 +250,7 @@ main = do
Right
config
->
do
printIOMsg
"Parse the corpus"
mapList
<-
csvMapTermList
(
listPath
config
)
mapList
<-
fileToList
(
listParser
config
)
(
listPath
config
)
corpus
<-
fileToDocs'
(
corpusParser
config
)
(
corpusPath
config
)
(
timeUnit
config
)
mapList
printIOComment
(
show
(
length
corpus
)
<>
" parsed docs from the corpus"
)
...
...
@@ -297,7 +274,6 @@ main = do
-- let phylo = toPhylo (setConfig config phyloStep)
-- QL: 2 files read from disk
phyloWithLinks
<-
if
phyloWithLinksExists
then
do
printIOMsg
"Reconstruct the phylo from an existing file with intertemporal links"
...
...
package.yaml
View file @
74d2038a
...
...
@@ -54,6 +54,7 @@ library:
-
Gargantext.API.Ngrams
-
Gargantext.API.Ngrams.Tools
-
Gargantext.API.Ngrams.Types
-
Gargantext.API.Ngrams.Prelude
-
Gargantext.API.Admin.Settings
-
Gargantext.API.Admin.EnvTypes
-
Gargantext.API.Admin.Types
...
...
@@ -73,6 +74,7 @@ library:
-
Gargantext.Database.Query.Table.Node
-
Gargantext.Database.Query.Table.Node.UpdateOpaleye
-
Gargantext.Database.Query.Table.NgramsPostag
-
Gargantext.Database.Schema.Ngrams
-
Gargantext.Database.Prelude
-
Gargantext.Database.Admin.Trigger.Init
-
Gargantext.Database.Admin.Config
...
...
src/Gargantext/Core/Viz/Phylo.hs
View file @
74d2038a
{-
|
{-
Module : Gargantext.Core.Viz.AdaptativePhylo
Description : Phylomemy definitions and types.
Copyright : (c) CNRS, 2017-Present
...
...
@@ -26,6 +26,8 @@ one 8, e54847.
module
Gargantext.Core.Viz.Phylo
where
import
Data.Swagger
import
Gargantext.Core.Utils.Prefix
(
unPrefixSwagger
)
import
Control.DeepSeq
(
NFData
)
import
Control.Lens
(
makeLenses
)
import
Data.Aeson
...
...
@@ -50,6 +52,14 @@ data CorpusParser =
|
Csv'
{
_csv'_limit
::
Int
}
deriving
(
Show
,
Generic
,
Eq
)
instance
ToSchema
CorpusParser
where
declareNamedSchema
=
genericDeclareNamedSchema
(
unPrefixSwagger
"_"
)
data
ListParser
=
V3
|
V4
deriving
(
Show
,
Generic
,
Eq
)
instance
ToSchema
ListParser
data
SeaElevation
=
Constante
{
_cons_start
::
Double
...
...
@@ -58,6 +68,8 @@ data SeaElevation =
{
_adap_granularity
::
Double
}
deriving
(
Show
,
Generic
,
Eq
)
instance
ToSchema
SeaElevation
data
Proximity
=
WeightedLogJaccard
{
_wlj_sensibility
::
Double
...
...
@@ -77,13 +89,23 @@ data Proximity =
-- , _wlj_elevation :: Double
-}
}
|
Hamming
|
Hamming
{
_wlj_sensibility
::
Double
}
deriving
(
Show
,
Generic
,
Eq
)
instance
ToSchema
Proximity
where
declareNamedSchema
=
genericDeclareNamedSchema
(
unPrefixSwagger
""
)
data
SynchronyScope
=
SingleBranch
|
SiblingBranches
|
AllBranches
deriving
(
Show
,
Generic
,
Eq
,
ToSchema
)
data
SynchronyStrategy
=
MergeRegularGroups
|
MergeAllGroups
deriving
(
Show
,
Generic
,
Eq
)
data
SynchronyScope
=
SingleBranch
|
SiblingBranches
|
AllBranches
deriving
(
Show
,
Generic
,
Eq
)
instance
ToSchema
SynchronyStrategy
where
declareNamedSchema
=
genericDeclareNamedSchema
(
unPrefixSwagger
""
)
data
SynchronyStrategy
=
MergeRegularGroups
|
MergeAllGroups
deriving
(
Show
,
Generic
,
Eq
)
data
Synchrony
=
ByProximityThreshold
...
...
@@ -96,9 +118,17 @@ data Synchrony =
,
_bpd_strategy
::
SynchronyStrategy
}
deriving
(
Show
,
Generic
,
Eq
)
instance
ToSchema
Synchrony
where
declareNamedSchema
=
genericDeclareNamedSchema
(
unPrefixSwagger
"_"
)
data
TimeUnit
=
Year
Epoch
{
_epoch_period
::
Int
,
_epoch_step
::
Int
,
_epoch_matchingFrame
::
Int
}
|
Year
{
_year_period
::
Int
,
_year_step
::
Int
,
_year_matchingFrame
::
Int
}
...
...
@@ -116,8 +146,17 @@ data TimeUnit =
,
_day_matchingFrame
::
Int
}
deriving
(
Show
,
Generic
,
Eq
)
instance
ToSchema
TimeUnit
where
declareNamedSchema
=
genericDeclareNamedSchema
(
unPrefixSwagger
""
)
data
CliqueFilter
=
ByThreshold
|
ByNeighbours
deriving
(
Show
,
Generic
,
Eq
)
instance
ToSchema
CliqueFilter
where
declareNamedSchema
=
genericDeclareNamedSchema
(
unPrefixSwagger
""
)
data
Clique
=
Fis
{
_fis_support
::
Int
...
...
@@ -128,18 +167,26 @@ data Clique =
,
_mcl_filter
::
CliqueFilter
}
deriving
(
Show
,
Generic
,
Eq
)
instance
ToSchema
Clique
where
declareNamedSchema
=
genericDeclareNamedSchema
(
unPrefixSwagger
""
)
data
Quality
=
Quality
{
_qua_granularity
::
Double
,
_qua_minBranch
::
Int
}
deriving
(
Show
,
Generic
,
Eq
)
instance
ToSchema
Quality
where
declareNamedSchema
=
genericDeclareNamedSchema
(
unPrefixSwagger
"_qua_"
)
data
Config
=
Config
{
corpusPath
::
FilePath
,
listPath
::
FilePath
,
outputPath
::
FilePath
,
corpusParser
::
CorpusParser
,
listParser
::
ListParser
,
phyloName
::
Text
,
phyloLevel
::
Int
,
phyloProximity
::
Proximity
...
...
@@ -154,25 +201,28 @@ data Config =
,
exportFilter
::
[
Filter
]
}
deriving
(
Show
,
Generic
,
Eq
)
instance
ToSchema
Config
defaultConfig
::
Config
defaultConfig
=
Config
{
corpusPath
=
""
,
listPath
=
""
,
outputPath
=
""
,
corpusParser
=
Csv
1000
,
phyloName
=
pack
"Default Phylo"
Config
{
corpusPath
=
"corpus.csv"
-- useful for commandline only
,
listPath
=
"list.csv"
-- useful for commandline only
,
outputPath
=
"data/"
,
corpusParser
=
Csv
100000
,
listParser
=
V4
,
phyloName
=
pack
"Phylo Name"
,
phyloLevel
=
2
,
phyloProximity
=
WeightedLogJaccard
10
,
phyloProximity
=
WeightedLogJaccard
0.5
,
seaElevation
=
Constante
0.1
0.1
,
findAncestors
=
Tru
e
,
phyloSynchrony
=
ByProximityThreshold
0.
1
10
Sibling
Branches
MergeAllGroups
,
phyloQuality
=
Quality
0
1
,
findAncestors
=
Fals
e
,
phyloSynchrony
=
ByProximityThreshold
0.
5
0
All
Branches
MergeAllGroups
,
phyloQuality
=
Quality
0
.5
1
,
timeUnit
=
Year
3
1
5
,
clique
=
MaxClique
0
3
ByNeighbours
,
clique
=
MaxClique
5
0.0001
ByThreshold
,
exportLabel
=
[
BranchLabel
MostEmergentTfIdf
2
,
GroupLabel
MostEmergentInclusive
2
]
,
exportSort
=
ByHierarchy
,
exportFilter
=
[
ByBranchSize
2
]
,
exportSort
=
ByHierarchy
Desc
,
exportFilter
=
[
ByBranchSize
3
]
}
instance
FromJSON
Config
...
...
@@ -181,6 +231,9 @@ instance ToJSON Config
instance
FromJSON
CorpusParser
instance
ToJSON
CorpusParser
instance
FromJSON
ListParser
instance
ToJSON
ListParser
instance
FromJSON
Proximity
instance
ToJSON
Proximity
...
...
@@ -230,6 +283,11 @@ data Software =
,
_software_version
::
Text
}
deriving
(
Generic
,
Show
,
Eq
)
instance
ToSchema
Software
where
declareNamedSchema
=
genericDeclareNamedSchema
(
unPrefixSwagger
"_software_"
)
defaultSoftware
::
Software
defaultSoftware
=
Software
{
_software_name
=
pack
"Gargantext"
...
...
@@ -243,6 +301,11 @@ data PhyloParam =
,
_phyloParam_config
::
Config
}
deriving
(
Generic
,
Show
,
Eq
)
instance
ToSchema
PhyloParam
where
declareNamedSchema
=
genericDeclareNamedSchema
(
unPrefixSwagger
"_phyloParam_"
)
defaultPhyloParam
::
PhyloParam
defaultPhyloParam
=
PhyloParam
{
_phyloParam_version
=
pack
"v2.adaptative"
...
...
@@ -283,10 +346,16 @@ data PhyloFoundations = PhyloFoundations
,
_foundations_mapList
::
TermList
}
deriving
(
Generic
,
Show
,
Eq
)
instance
ToSchema
PhyloFoundations
where
declareNamedSchema
=
genericDeclareNamedSchema
(
unPrefixSwagger
"_foundations_"
)
data
PhyloSources
=
PhyloSources
{
_sources
::
!
(
Vector
Text
)
}
deriving
(
Generic
,
Show
,
Eq
)
instance
ToSchema
PhyloSources
where
declareNamedSchema
=
genericDeclareNamedSchema
(
unPrefixSwagger
"_"
)
---------------------------
-- | Coocurency Matrix | --
...
...
@@ -322,6 +391,9 @@ data Phylo =
}
deriving
(
Generic
,
Show
,
Eq
)
instance
ToSchema
Phylo
where
declareNamedSchema
=
genericDeclareNamedSchema
(
unPrefixSwagger
"_phylo_"
)
-- | PhyloPeriodId : the id of a given period
type
PhyloPeriodId
=
(
Date
,
Date
)
...
...
@@ -335,6 +407,10 @@ data PhyloPeriod =
,
_phylo_periodLevels
::
Map
PhyloLevelId
PhyloLevel
}
deriving
(
Generic
,
Show
,
Eq
)
instance
ToSchema
PhyloPeriod
where
declareNamedSchema
=
genericDeclareNamedSchema
(
unPrefixSwagger
"_phylo_"
)
-- | Level : a level of clustering
type
Level
=
Int
...
...
@@ -355,6 +431,9 @@ data PhyloLevel =
}
deriving
(
Generic
,
Show
,
Eq
)
instance
ToSchema
PhyloLevel
where
declareNamedSchema
=
genericDeclareNamedSchema
(
unPrefixSwagger
"_phylo_"
)
type
PhyloGroupId
=
(
PhyloLevelId
,
Int
)
...
...
@@ -381,16 +460,25 @@ data PhyloGroup =
,
_phylo_groupPeriodParents
::
[
Pointer
]
,
_phylo_groupPeriodChilds
::
[
Pointer
]
,
_phylo_groupAncestors
::
[
Pointer
]
,
_phylo_groupPeriodMemoryParents
::
[
Pointer'
]
,
_phylo_groupPeriodMemoryChilds
::
[
Pointer'
]
}
deriving
(
Generic
,
Show
,
Eq
,
NFData
)
instance
ToSchema
PhyloGroup
where
declareNamedSchema
=
genericDeclareNamedSchema
(
unPrefixSwagger
"_phylo_"
)
-- | Weight : A generic mesure that can be associated with an Id
type
Weight
=
Double
type
Thr
=
Double
-- | Pointer : A weighted pointer to a given PhyloGroup
type
Pointer
=
(
PhyloGroupId
,
Weight
)
-- | Pointer' : A weighted pointer to a given PhyloGroup with a lower bounded threshold
type
Pointer'
=
(
PhyloGroupId
,
(
Thr
,
Weight
))
data
Filiation
=
ToParents
|
ToChilds
deriving
(
Generic
,
Show
)
data
Filiation
=
ToParents
|
ToChilds
|
ToParentsMemory
|
ToChildsMemory
deriving
(
Generic
,
Show
)
data
PointerType
=
TemporalPointer
|
LevelPointer
deriving
(
Generic
,
Show
)
...
...
@@ -415,15 +503,24 @@ data PhyloClique = PhyloClique
type
DotId
=
TextLazy
.
Text
data
EdgeType
=
GroupToGroup
|
BranchToGroup
|
BranchToBranch
|
GroupToAncestor
|
PeriodToPeriod
deriving
(
Show
,
Generic
,
Eq
)
data
EdgeType
=
GroupToGroup
|
GroupToGroupMemory
|
BranchToGroup
|
BranchToBranch
|
GroupToAncestor
|
PeriodToPeriod
deriving
(
Show
,
Generic
,
Eq
)
data
Filter
=
ByBranchSize
{
_branch_size
::
Double
}
deriving
(
Show
,
Generic
,
Eq
)
instance
ToSchema
Filter
where
declareNamedSchema
=
genericDeclareNamedSchema
(
unPrefixSwagger
""
)
data
Order
=
Asc
|
Desc
deriving
(
Show
,
Generic
,
Eq
,
ToSchema
)
data
Order
=
Asc
|
Desc
deriving
(
Show
,
Generic
,
Eq
)
data
Sort
=
ByBirthDate
{
_sort_order
::
Order
}
|
ByHierarchy
{
_sort_order
::
Order
}
deriving
(
Show
,
Generic
,
Eq
)
instance
ToSchema
Sort
where
declareNamedSchema
=
genericDeclareNamedSchema
(
unPrefixSwagger
"_sort_"
)
data
Sort
=
ByBirthDate
{
_sort_order
::
Order
}
|
ByHierarchy
deriving
(
Show
,
Generic
,
Eq
)
data
Tagger
=
MostInclusive
|
MostEmergentInclusive
|
MostEmergentTfIdf
deriving
(
Show
,
Generic
,
Eq
)
instance
ToSchema
Tagger
where
declareNamedSchema
=
genericDeclareNamedSchema
(
unPrefixSwagger
""
)
data
PhyloLabel
=
BranchLabel
...
...
@@ -434,6 +531,10 @@ data PhyloLabel =
,
_group_labelSize
::
Int
}
deriving
(
Show
,
Generic
,
Eq
)
instance
ToSchema
PhyloLabel
where
declareNamedSchema
=
genericDeclareNamedSchema
(
unPrefixSwagger
"_"
)
data
PhyloBranch
=
PhyloBranch
{
_branch_id
::
PhyloBranchId
...
...
@@ -447,11 +548,17 @@ data PhyloBranch =
,
_branch_meta
::
Map
Text
[
Double
]
}
deriving
(
Generic
,
Show
,
Eq
)
instance
ToSchema
PhyloBranch
where
declareNamedSchema
=
genericDeclareNamedSchema
(
unPrefixSwagger
"_branch_"
)
data
PhyloExport
=
PhyloExport
{
_export_groups
::
[
PhyloGroup
]
,
_export_branches
::
[
PhyloBranch
]
}
deriving
(
Generic
,
Show
)
instance
ToSchema
PhyloExport
where
declareNamedSchema
=
genericDeclareNamedSchema
(
unPrefixSwagger
"_export_"
)
----------------
-- | Lenses | --
...
...
src/Gargantext/Core/Viz/Phylo/PhyloTools.hs
View file @
74d2038a
...
...
@@ -13,7 +13,7 @@ Portability : POSIX
module
Gargantext.Core.Viz.Phylo.PhyloTools
where
import
Control.Lens
hiding
(
Level
)
import
Data.List
(
sort
,
concat
,
null
,
union
,
(
++
),
tails
,
sortOn
,
nub
,
init
,
tail
,
partition
,
tails
,
nubBy
,
group
)
import
Data.List
(
sort
,
concat
,
null
,
union
,
(
++
),
tails
,
sortOn
,
nub
,
init
,
tail
,
partition
,
tails
,
nubBy
,
group
,
notElem
)
import
Data.Map
(
Map
,
elems
,
fromList
,
unionWith
,
keys
,
member
,
(
!
),
filterWithKey
,
fromListWith
,
empty
,
restrictKeys
)
import
Data.Set
(
Set
,
disjoint
)
import
Data.String
(
String
)
...
...
@@ -157,7 +157,7 @@ toFstDate ds = snd
$
head'
"firstDate"
$
sortOn
fst
$
map
(
\
d
->
let
d'
=
read
(
filter
(
\
c
->
c
/=
'-'
)
$
unpack
d
)
::
Int
let
d'
=
read
(
filter
(
\
c
->
notElem
c
[
'U'
,
'T'
,
'C'
,
' '
,
':'
,
'-'
]
)
$
unpack
d
)
::
Int
in
(
d'
,
d
))
ds
toLstDate
::
[
Text
]
->
Text
...
...
@@ -166,12 +166,13 @@ toLstDate ds = snd
$
reverse
$
sortOn
fst
$
map
(
\
d
->
let
d'
=
read
(
filter
(
\
c
->
c
/=
'-'
)
$
unpack
d
)
::
Int
let
d'
=
read
(
filter
(
\
c
->
notElem
c
[
'U'
,
'T'
,
'C'
,
' '
,
':'
,
'-'
]
)
$
unpack
d
)
::
Int
in
(
d'
,
d
))
ds
getTimeScale
::
Phylo
->
[
Char
]
getTimeScale
p
=
case
(
timeUnit
$
getConfig
p
)
of
Epoch
_
_
_
->
"epoch"
Year
_
_
_
->
"year"
Month
_
_
_
->
"month"
Week
_
_
_
->
"week"
...
...
@@ -187,6 +188,7 @@ toTimeScale dates step =
getTimeStep
::
TimeUnit
->
Int
getTimeStep
time
=
case
time
of
Epoch
_
s
_
->
s
Year
_
s
_
->
s
Month
_
s
_
->
s
Week
_
s
_
->
s
...
...
@@ -194,6 +196,7 @@ getTimeStep time = case time of
getTimePeriod
::
TimeUnit
->
Int
getTimePeriod
time
=
case
time
of
Epoch
p
_
_
->
p
Year
p
_
_
->
p
Month
p
_
_
->
p
Week
p
_
_
->
p
...
...
@@ -201,6 +204,7 @@ getTimePeriod time = case time of
getTimeFrame
::
TimeUnit
->
Int
getTimeFrame
time
=
case
time
of
Epoch
_
_
f
->
f
Year
_
_
f
->
f
Month
_
_
f
->
f
Week
_
_
f
->
f
...
...
@@ -324,20 +328,22 @@ getPeriodPointers fil g =
case
fil
of
ToChilds
->
g
^.
phylo_groupPeriodChilds
ToParents
->
g
^.
phylo_groupPeriodParents
ToChildsMemory
->
undefined
ToParentsMemory
->
undefined
filterProximity
::
Proximity
->
Double
->
Double
->
Bool
filterProximity
proximity
thr
local
=
case
proximity
of
WeightedLogJaccard
_
->
local
>=
thr
WeightedLogSim
_
->
local
>=
thr
Hamming
->
undefined
Hamming
_
->
undefined
getProximityName
::
Proximity
->
String
getProximityName
proximity
=
case
proximity
of
WeightedLogJaccard
_
->
"WLJaccard"
WeightedLogSim
_
->
"WeightedLogSim"
Hamming
->
"Hamming"
Hamming
_
->
"Hamming"
---------------
-- | Phylo | --
...
...
@@ -349,9 +355,27 @@ addPointers fil pty pointers g =
TemporalPointer
->
case
fil
of
ToChilds
->
g
&
phylo_groupPeriodChilds
.~
pointers
ToParents
->
g
&
phylo_groupPeriodParents
.~
pointers
ToChildsMemory
->
undefined
ToParentsMemory
->
undefined
LevelPointer
->
case
fil
of
ToChilds
->
g
&
phylo_groupLevelChilds
.~
pointers
ToParents
->
g
&
phylo_groupLevelParents
.~
pointers
ToChildsMemory
->
undefined
ToParentsMemory
->
undefined
toPointer'
::
Double
->
Pointer
->
Pointer'
toPointer'
thr
pt
=
(
fst
pt
,(
thr
,
snd
pt
))
addMemoryPointers
::
Filiation
->
PointerType
->
Double
->
[
Pointer
]
->
PhyloGroup
->
PhyloGroup
addMemoryPointers
fil
pty
thr
pointers
g
=
case
pty
of
TemporalPointer
->
case
fil
of
ToChilds
->
undefined
ToParents
->
undefined
ToChildsMemory
->
g
&
phylo_groupPeriodMemoryChilds
.~
(
concat
[(
g
^.
phylo_groupPeriodMemoryChilds
),(
map
(
\
pt
->
toPointer'
thr
pt
)
pointers
)])
ToParentsMemory
->
g
&
phylo_groupPeriodMemoryParents
.~
(
concat
[(
g
^.
phylo_groupPeriodMemoryParents
),(
map
(
\
pt
->
toPointer'
thr
pt
)
pointers
)])
LevelPointer
->
undefined
getPeriodIds
::
Phylo
->
[(
Date
,
Date
)]
...
...
@@ -544,7 +568,7 @@ getSensibility :: Proximity -> Double
getSensibility
proxi
=
case
proxi
of
WeightedLogJaccard
s
->
s
WeightedLogSim
s
->
s
Hamming
->
undefined
Hamming
_
->
undefined
----------------
-- | Branch | --
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment