Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Grégoire Locqueville
haskell-gargantext
Commits
a7cafc56
Commit
a7cafc56
authored
Apr 24, 2024
by
lobbeque
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
comment the phylo.hs file
parent
95553245
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
101 additions
and
22 deletions
+101
-22
Phylo.hs
src/Gargantext/Core/Viz/Phylo.hs
+101
-22
No files found.
src/Gargantext/Core/Viz/Phylo.hs
View file @
a7cafc56
...
...
@@ -41,70 +41,114 @@ import Gargantext.Core.Utils.Prefix (unPrefixSwagger)
import
Gargantext.Prelude
import
qualified
Data.Text.Lazy
as
TextLazy
---------------------
-- | PhyloConfig | --
---------------------
-- | CorpusParser : control which csv collumns should be taken into account for reconstructing a phylo
data
CorpusParser
=
Wos
{
_wos_limit
::
Int
}
|
Csv
{
_csv_limit
::
Int
}
|
Csv'
{
_csv'_limit
::
Int
}
Wos
-- not used anymore
{
_wos_limit
::
Int
}
|
Csv
-- consider Publication_Day, Publication_Month, Publication_Year, Authors, Title, Abstract
{
_csv_limit
::
Int
}
|
Csv'
-- consider Publication_Day, Publication_Month, Publication_Year, Authors, Title, Abstract, Source, Weight
{
_csv'_limit
::
Int
}
deriving
(
Show
,
Generic
,
Eq
)
instance
ToSchema
CorpusParser
where
declareNamedSchema
=
genericDeclareNamedSchema
(
unPrefixSwagger
"_"
)
data
ListParser
=
V3
|
V4
deriving
(
Show
,
Generic
,
Eq
)
-- | ListParser : is the map list in Csv or in Json?
data
ListParser
=
V3
-- the map list is in Csv
|
V4
-- the map list in in Json
deriving
(
Show
,
Generic
,
Eq
)
instance
ToSchema
ListParser
-- | SeaElevation : for a given level of observation,
-- define a set of similarity values that will be tested by the sea level rise algorithm
data
SeaElevation
=
Constante
-- test a constant set of values (see Gargantext.Core.Viz.Phylo.PhyloMaker.constSeaLadder)
{
_cons_start
::
Double
,
_cons_gap
::
Double
}
|
Adaptative
-- test a set of values that matches the similarity spectrum of the corpus
-- (see Gargantext.Core.Viz.Phylo.PhyloMaker.constSeaLadder)
{
_adap_steps
::
Double
}
|
Evolving
-- test a set of values that tries to directly maximize the quality of the phylo
-- the similarity spectrum of the corpus (see Gargantext.Core.Viz.Phylo.PhyloMaker.evolvSeaLadder)
{
_evol_neighborhood
::
Bool
}
deriving
(
Show
,
Generic
,
Eq
)
instance
ToSchema
SeaElevation
-- | PhyloSimilarity : define the similarity measure used to for inter temporal matching
data
PhyloSimilarity
=
WeightedLogJaccard
-- the default one (see Gargantext.Core.Viz.Phylo.TemporalMatching.weightedLogJaccard')
{
_wlj_sensibility
::
Double
,
_wlj_minSharedNgrams
::
Int
}
|
WeightedLogSim
-- not used
{
_wls_sensibility
::
Double
,
_wls_minSharedNgrams
::
Int
}
|
Hamming
-- not implemented
{
_hmg_sensibility
::
Double
,
_hmg_minSharedNgrams
::
Int
}
deriving
(
Show
,
Generic
,
Eq
)
instance
ToSchema
PhyloSimilarity
where
declareNamedSchema
=
genericDeclareNamedSchema
(
unPrefixSwagger
""
)
data
SynchronyScope
=
SingleBranch
|
SiblingBranches
|
AllBranches
-- | SynchronyScope : define which groups should be considered by the synchonic clustering
data
SynchronyScope
=
SingleBranch
-- consider only the groups belonging to the same branch
|
SiblingBranches
-- consider only the groups belonging to sibling branches
-- (ie. branches that split at the same level of similarity)
|
AllBranches
-- consider every groups of every branches
deriving
(
Show
,
Generic
,
Eq
,
ToSchema
)
data
SynchronyStrategy
=
MergeRegularGroups
|
MergeAllGroups
-- | SynchronyStrategy : define which groups should be merged when satisfying the synchonic clustering
data
SynchronyStrategy
=
MergeRegularGroups
-- only merge groups that don't contain emerging or declining ngrams
|
MergeAllGroups
-- merge every groups
deriving
(
Show
,
Generic
,
Eq
)
instance
ToSchema
SynchronyStrategy
where
declareNamedSchema
=
genericDeclareNamedSchema
(
unPrefixSwagger
""
)
-- | Synchrony : define the synchronic clustering strategy
data
Synchrony
=
ByProximityThreshold
-- select all groups that satisfy a given similarity threshold
{
_bpt_threshold
::
Double
,
_bpt_sensibility
::
Double
,
_bpt_scope
::
SynchronyScope
,
_bpt_strategy
::
SynchronyStrategy
}
|
ByProximityDistribution
-- select the top x groups sorted by similarity
{
_bpd_sensibility
::
Double
,
_bpd_strategy
::
SynchronyStrategy
}
deriving
(
Show
,
Generic
,
Eq
)
...
...
@@ -113,7 +157,10 @@ instance ToSchema Synchrony where
declareNamedSchema
=
genericDeclareNamedSchema
(
unPrefixSwagger
"_"
)
-- | TimeUnit : define the temporal granularity
-- period = size of a PhyloPeriod
-- step = step between two subsequent PhyloPeriod
-- frame = maximum number of PhyloPeriod considered for intertemporal matching
data
TimeUnit
=
Epoch
{
_epoch_period
::
Int
...
...
@@ -141,18 +188,28 @@ instance ToSchema TimeUnit where
declareNamedSchema
=
genericDeclareNamedSchema
(
unPrefixSwagger
""
)
data
MaxCliqueFilter
=
ByThreshold
|
ByNeighbours
deriving
(
Show
,
Generic
,
Eq
)
-- | MaxCliqueFilter : define a strategy for computing MaxClique
data
MaxCliqueFilter
=
ByThreshold
-- consider ngrams whose confidence probability satisfies a given threshold
|
ByNeighbours
-- consider the top x ngrams sorted by confidence probability
deriving
(
Show
,
Generic
,
Eq
)
instance
ToSchema
MaxCliqueFilter
where
declareNamedSchema
=
genericDeclareNamedSchema
(
unPrefixSwagger
""
)
-- | Cluster : define a ngrams clustering method for computing PhyloGroups
-- Reference : Uno, Takeaki et al. “LCM ver. 2: Efficient Mining Algorithms for Frequent/Closed/Maximal Itemsets.”
-- Workshop on Frequent Itemset Mining Implementations (2004).
data
Cluster
=
Fis
-- frequent item set can be filtered by support and size
{
_fis_support
::
Int
,
_fis_size
::
Int
}
|
MaxClique
-- max clique can be filtered by size and threshold
{
_mcl_size
::
Int
,
_mcl_threshold
::
Double
,
_mcl_filter
::
MaxCliqueFilter
}
...
...
@@ -162,15 +219,21 @@ instance ToSchema Cluster where
declareNamedSchema
=
genericDeclareNamedSchema
(
unPrefixSwagger
""
)
-- | Quality : define a level of observation
data
Quality
=
Quality
{
_qua_granularity
::
Double
,
_qua_minBranch
::
Int
}
Quality
-- _qua_granularity <=> level of observation or λ in (chavalarias, lobbe & delanoe 2021)
-- if λ = 0 then we have one big branch
-- if λ = 1 then we have many little branches
{
_qua_granularity
::
Double
,
_qua_minBranch
::
Int
}
deriving
(
Show
,
Generic
,
Eq
)
instance
ToSchema
Quality
where
declareNamedSchema
=
genericDeclareNamedSchema
(
unPrefixSwagger
"_qua_"
)
-- | PhyloConfig : full list of parameters used to reconstruct a Phylomemy from the command line
data
PhyloConfig
=
PhyloConfig
{
corpusPath
::
FilePath
,
listPath
::
FilePath
...
...
@@ -197,6 +260,8 @@ data PhyloConfig =
-- | SubConfig API & 1Click | --
--------------------------------
-- | PhyloSubConfigAPI : selected list of parameters used to reconstruct a Phylomemy from the API
data
PhyloSubConfigAPI
=
PhyloSubConfigAPI
{
_sc_phyloProximity
::
Double
,
_sc_phyloSynchrony
::
Double
...
...
@@ -223,6 +288,8 @@ subConfigAPI2config subConfig = defaultConfig
-- | SubConfig 1Click | --
--------------------------
-- | defaultConfig : default configuration used by the 1'Click feature
defaultConfig
::
PhyloConfig
defaultConfig
=
PhyloConfig
{
corpusPath
=
"corpus.csv"
-- useful for commandline only
...
...
@@ -304,7 +371,7 @@ instance FromJSON Quality
instance
ToJSON
Quality
-- | Software parameters
-- | Software
: software
parameters
data
Software
=
Software
{
_software_name
::
Text
,
_software_version
::
Text
...
...
@@ -314,14 +381,13 @@ instance ToSchema Software where
declareNamedSchema
=
genericDeclareNamedSchema
(
unPrefixSwagger
"_software_"
)
defaultSoftware
::
Software
defaultSoftware
=
Software
{
_software_name
=
pack
"GarganText"
,
_software_version
=
pack
"v5"
}
-- |
G
lobal parameters of a Phylo
-- |
PhyloParam : g
lobal parameters of a Phylo
data
PhyloParam
=
PhyloParam
{
_phyloParam_version
::
Text
,
_phyloParam_software
::
Software
...
...
@@ -332,7 +398,6 @@ instance ToSchema PhyloParam where
declareNamedSchema
=
genericDeclareNamedSchema
(
unPrefixSwagger
"_phyloParam_"
)
defaultPhyloParam
::
PhyloParam
defaultPhyloParam
=
PhyloParam
{
_phyloParam_version
=
pack
"v3"
...
...
@@ -353,14 +418,18 @@ type DateStr = Text
-- | Ngrams : a contiguous sequence of n terms
type
Ngrams
=
Text
-- Document : a piece of Text linked to a Date
-- date = computational date; date' = original string date yyyy-mm-dd
-- Export Database to Document
-- | Document : a piece of Text linked to a Date
data
Document
=
Document
{
date
::
Date
-- datatype Date {unDate :: Int}
,
date'
::
DateStr
-- show date
{
date
::
Date
-- the Int date used to compute the periods, groups, etc.
-- created by toPhyloDate in Gargantext.Core.Viz.Phylo.API.Tools
,
date'
::
DateStr
-- the original String date (yyyy-mm-dd) that will be displayed in the interface
-- created by toPhyloDate' in Gargantext.Core.Viz.Phylo.API.Tools
,
text
::
[
Ngrams
]
,
weight
::
Maybe
Double
-- a Double attached to each Document that will be used to set up the size of the phylogroup in the interface
-- only taken into account when CorpusParser is CSV'
,
sources
::
[
Text
]
,
docTime
::
TimeUnit
}
deriving
(
Eq
,
Show
,
Generic
,
NFData
)
...
...
@@ -371,12 +440,14 @@ data Document = Document
--------------------
-- |
The Foundations of a Phylo created from a given TermList
-- |
PhyloFoundations : store and index all the ngrams (named roots) that will appear in the Phylomemy
data
PhyloFoundations
=
PhyloFoundations
{
_foundations_roots
::
(
Vector
Ngrams
)
,
_foundations_rootsInGroups
::
Map
Int
[
PhyloGroupId
]
-- map of roots associated to groups
}
deriving
(
Generic
,
Show
,
Eq
)
-- | PhyloCounts : store various counters related to roots or dates
data
PhyloCounts
=
PhyloCounts
{
coocByDate
::
!
(
Map
Date
Cooc
)
,
docsByDate
::
!
(
Map
Date
Double
)
...
...
@@ -386,9 +457,12 @@ data PhyloCounts = PhyloCounts
,
lastRootsFreq
::
!
(
Map
Int
Double
)
}
deriving
(
Generic
,
Show
,
Eq
)
-- | PhyloSources : store sources that will be used in the interface to highlight some PhyloGroups
data
PhyloSources
=
PhyloSources
{
_sources
::
!
(
Vector
Text
)
}
deriving
(
Generic
,
Show
,
Eq
)
instance
ToSchema
PhyloFoundations
where
declareNamedSchema
=
genericDeclareNamedSchema
(
unPrefixSwagger
"_foundations_"
)
instance
ToSchema
PhyloCounts
where
...
...
@@ -521,10 +595,13 @@ type Thr = Double
-- | Pointer : A weighted pointer to a given PhyloGroup
type
Pointer
=
(
PhyloGroupId
,
Weight
)
-- | Pointer' : A weighted pointer to a given PhyloGroup with a lower bounded threshold
type
Pointer'
=
(
PhyloGroupId
,
(
Thr
,
Weight
))
data
Filiation
=
ToParents
|
ToChilds
|
ToParentsMemory
|
ToChildsMemory
deriving
(
Generic
,
Show
)
data
PointerType
=
TemporalPointer
|
ScalePointer
deriving
(
Generic
,
Show
)
...
...
@@ -535,6 +612,8 @@ data PointerType = TemporalPointer | ScalePointer deriving (Generic, Show)
-- | Support : Number of Documents where a Cluster occurs
type
Support
=
Int
-- | Clustering : define the structure of a cluster of ngrams
data
Clustering
=
Clustering
{
_clustering_roots
::
[
Int
]
,
_clustering_support
::
Support
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment