Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
147
Issues
147
List
Board
Labels
Milestones
Merge Requests
6
Merge Requests
6
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
eeeb82c8
Commit
eeeb82c8
authored
Oct 22, 2019
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'dev-phylo' into dev-merge
parents
76e11752
077bf19a
Pipeline
#592
failed with stage
Changes
11
Pipelines
1
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
1262 additions
and
215 deletions
+1262
-215
Main.hs
bin/gargantext-adaptative-phylo/Main.hs
+23
-18
package.yaml
package.yaml
+3
-0
AdaptativePhylo.hs
src/Gargantext/Viz/AdaptativePhylo.hs
+89
-9
Example.hs
src/Gargantext/Viz/Phylo/Example.hs
+2
-2
PhyloExample.hs
src/Gargantext/Viz/Phylo/PhyloExample.hs
+17
-2
PhyloExport.hs
src/Gargantext/Viz/Phylo/PhyloExport.hs
+457
-1
PhyloMaker.hs
src/Gargantext/Viz/Phylo/PhyloMaker.hs
+15
-16
PhyloTools.hs
src/Gargantext/Viz/Phylo/PhyloTools.hs
+208
-14
SynchronicClustering.hs
src/Gargantext/Viz/Phylo/SynchronicClustering.hs
+170
-12
TemporalMatching.hs
src/Gargantext/Viz/Phylo/TemporalMatching.hs
+277
-140
Tools.hs
src/Gargantext/Viz/Phylo/Tools.hs
+1
-1
No files found.
bin/gargantext-adaptative-phylo/Main.hs
View file @
eeeb82c8
...
...
@@ -26,7 +26,7 @@ import Data.ByteString.Lazy (ByteString)
import
Data.Maybe
(
isJust
,
fromJust
)
import
Data.List
(
concat
,
nub
,
isSuffixOf
,
take
)
import
Data.String
(
String
)
import
Data.Text
(
Text
,
unwords
)
import
Data.Text
(
Text
,
unwords
,
unpack
)
import
Gargantext.Prelude
import
Gargantext.Database.Types.Node
(
HyperdataDocument
(
..
))
...
...
@@ -36,7 +36,10 @@ import Gargantext.Text.Corpus.Parsers (FileFormat(..),parseFile)
import
Gargantext.Text.List.CSV
(
csvGraphTermList
)
import
Gargantext.Text.Terms.WithList
(
Patterns
,
buildPatterns
,
extractTermsWithList
)
import
Gargantext.Viz.AdaptativePhylo
import
Gargantext.Viz.Phylo.PhyloMaker
(
toPhylo
)
import
Gargantext.Viz.Phylo.PhyloMaker
(
toPhylo
)
import
Gargantext.Viz.Phylo.PhyloTools
(
printIOMsg
,
printIOComment
)
import
Gargantext.Viz.Phylo.PhyloExport
(
toPhyloExport
,
dotToFile
)
-- import Gargantext.Viz.Phylo.SynchronicClustering (synchronicDistance')
import
GHC.IO
(
FilePath
)
import
Prelude
(
Either
(
..
))
...
...
@@ -54,21 +57,6 @@ import qualified Gargantext.Text.Corpus.Parsers.CSV as Csv
---------------
-- | To print an important message as an IO()
printIOMsg
::
String
->
IO
()
printIOMsg
msg
=
putStrLn
(
"
\n
"
<>
"------------"
<>
"
\n
"
<>
"-- | "
<>
msg
<>
"
\n
"
)
-- | To print a comment as an IO()
printIOComment
::
String
->
IO
()
printIOComment
cmt
=
putStrLn
(
"
\n
"
<>
cmt
<>
"
\n
"
)
-- | To get all the files in a directory or just a file
getFilesFromPath
::
FilePath
->
IO
([
FilePath
])
getFilesFromPath
path
=
do
...
...
@@ -166,6 +154,23 @@ main = do
printIOComment
(
show
(
length
corpus
)
<>
" parsed docs from the corpus"
)
printIOMsg
"Reconstruct the Phylo"
let
phylo
=
toPhylo
corpus
mapList
config
printIOMsg
"End of reconstruction"
\ No newline at end of file
-- | probes
-- writeFile ((outputPath config) <> (unpack $ phyloName config) <> "_synchronic_distance_cumu_jaccard.txt")
-- $ synchronicDistance' phylo 1
-- writeFile ((outputPath config) <> (unpack $ phyloName config) <> "_inflexion_points.txt")
-- $ inflexionPoints phylo 1
printIOMsg
"End of reconstruction, start the export"
let
dot
=
toPhyloExport
phylo
let
output
=
(
outputPath
config
)
<>
(
unpack
$
phyloName
config
)
<>
"_V2.dot"
dotToFile
output
dot
\ No newline at end of file
package.yaml
View file @
eeeb82c8
...
...
@@ -72,6 +72,9 @@ library:
-
Gargantext.Viz.AdaptativePhylo
-
Gargantext.Viz.Phylo.PhyloMaker
-
Gargantext.Viz.Phylo.Tools
-
Gargantext.Viz.Phylo.PhyloTools
-
Gargantext.Viz.Phylo.PhyloExport
-
Gargantext.Viz.Phylo.SynchronicClustering
-
Gargantext.Viz.Phylo.Example
-
Gargantext.Viz.Phylo.LevelMaker
-
Gargantext.Viz.Phylo.View.Export
...
...
src/Gargantext/Viz/AdaptativePhylo.hs
View file @
eeeb82c8
...
...
@@ -44,6 +44,8 @@ import GHC.IO (FilePath)
import
Control.DeepSeq
(
NFData
)
import
Control.Lens
(
makeLenses
)
import
qualified
Data.Text.Lazy
as
TextLazy
----------------
-- | Config | --
...
...
@@ -65,6 +67,15 @@ data Proximity =
deriving
(
Show
,
Generic
,
Eq
)
data
Synchrony
=
ByProximityThreshold
{
_bpt_threshold
::
Double
,
_bpt_sensibility
::
Double
}
|
ByProximityDistribution
{
_bpd_sensibility
::
Double
}
deriving
(
Show
,
Generic
,
Eq
)
data
TimeUnit
=
Year
{
_year_period
::
Int
...
...
@@ -77,7 +88,13 @@ data ContextualUnit =
Fis
{
_fis_support
::
Int
,
_fis_size
::
Int
}
deriving
(
Show
,
Generic
,
Eq
)
deriving
(
Show
,
Generic
,
Eq
)
data
Quality
=
Quality
{
_qua_relevance
::
Double
,
_qua_minBranch
::
Int
}
deriving
(
Show
,
Generic
,
Eq
)
data
Config
=
...
...
@@ -88,9 +105,13 @@ data Config =
,
phyloName
::
Text
,
phyloLevel
::
Int
,
phyloProximity
::
Proximity
,
phyloSynchrony
::
Synchrony
,
phyloQuality
::
Quality
,
timeUnit
::
TimeUnit
,
contextualUnit
::
ContextualUnit
,
branchSize
::
Int
,
exportLabel
::
[
PhyloLabel
]
,
exportSort
::
Sort
,
exportFilter
::
[
Filter
]
}
deriving
(
Show
,
Generic
,
Eq
)
...
...
@@ -102,10 +123,14 @@ defaultConfig =
,
corpusParser
=
Csv
1000
,
phyloName
=
pack
"Default Phylo"
,
phyloLevel
=
2
,
phyloProximity
=
WeightedLogJaccard
10
0
0.05
,
phyloProximity
=
WeightedLogJaccard
10
0
0.1
,
phyloSynchrony
=
ByProximityDistribution
0
,
phyloQuality
=
Quality
0.1
1
,
timeUnit
=
Year
3
1
5
,
contextualUnit
=
Fis
2
4
,
branchSize
=
3
,
contextualUnit
=
Fis
1
5
,
exportLabel
=
[
BranchLabel
MostInclusive
2
,
GroupLabel
MostEmergentInclusive
2
]
,
exportSort
=
ByHierarchy
,
exportFilter
=
[
ByBranchSize
2
]
}
instance
FromJSON
Config
...
...
@@ -118,6 +143,20 @@ instance FromJSON TimeUnit
instance
ToJSON
TimeUnit
instance
FromJSON
ContextualUnit
instance
ToJSON
ContextualUnit
instance
FromJSON
PhyloLabel
instance
ToJSON
PhyloLabel
instance
FromJSON
Tagger
instance
ToJSON
Tagger
instance
FromJSON
Sort
instance
ToJSON
Sort
instance
FromJSON
Order
instance
ToJSON
Order
instance
FromJSON
Filter
instance
ToJSON
Filter
instance
FromJSON
Synchrony
instance
ToJSON
Synchrony
instance
FromJSON
Quality
instance
ToJSON
Quality
-- | Software parameters
...
...
@@ -248,17 +287,18 @@ data PhyloGroup =
PhyloGroup
{
_phylo_groupPeriod
::
(
Date
,
Date
)
,
_phylo_groupLevel
::
Level
,
_phylo_groupIndex
::
Int
,
_phylo_groupLabel
::
Text
,
_phylo_groupSupport
::
Support
,
_phylo_groupNgrams
::
[
Int
]
,
_phylo_groupCooc
::
!
(
Cooc
)
,
_phylo_groupBranchId
::
PhyloBranchId
,
_phylo_groupMeta
::
Map
Text
[
Double
]
,
_phylo_groupLevelParents
::
[
Pointer
]
,
_phylo_groupLevelChilds
::
[
Pointer
]
,
_phylo_groupPeriodParents
::
[
Pointer
]
,
_phylo_groupPeriodChilds
::
[
Pointer
]
,
_phylo_groupGhostPointers
::
[
Pointer
]
}
deriving
(
Generic
,
Show
,
Eq
)
deriving
(
Generic
,
Show
,
Eq
,
NFData
)
-- | Weight : A generic mesure that can be associated with an Id
type
Weight
=
Double
...
...
@@ -266,8 +306,6 @@ type Weight = Double
-- | Pointer : A weighted pointer to a given PhyloGroup
type
Pointer
=
(
PhyloGroupId
,
Weight
)
type
Link
=
((
PhyloGroupId
,
PhyloGroupId
),
Weight
)
data
Filiation
=
ToParents
|
ToChilds
deriving
(
Generic
,
Show
)
data
PointerType
=
TemporalPointer
|
LevelPointer
deriving
(
Generic
,
Show
)
...
...
@@ -290,13 +328,53 @@ data PhyloFis = PhyloFis
}
deriving
(
Generic
,
NFData
,
Show
,
Eq
)
----------------
-- | Export | --
----------------
type
DotId
=
TextLazy
.
Text
data
EdgeType
=
GroupToGroup
|
BranchToGroup
|
BranchToBranch
|
PeriodToPeriod
deriving
(
Show
,
Generic
,
Eq
)
data
Filter
=
ByBranchSize
{
_branch_size
::
Double
}
deriving
(
Show
,
Generic
,
Eq
)
data
Order
=
Asc
|
Desc
deriving
(
Show
,
Generic
,
Eq
)
data
Sort
=
ByBirthDate
{
_sort_order
::
Order
}
|
ByHierarchy
deriving
(
Show
,
Generic
,
Eq
)
data
Tagger
=
MostInclusive
|
MostEmergentInclusive
deriving
(
Show
,
Generic
,
Eq
)
data
PhyloLabel
=
BranchLabel
{
_branch_labelTagger
::
Tagger
,
_branch_labelSize
::
Int
}
|
GroupLabel
{
_group_labelTagger
::
Tagger
,
_group_labelSize
::
Int
}
deriving
(
Show
,
Generic
,
Eq
)
data
PhyloBranch
=
PhyloBranch
{
_branch_id
::
PhyloBranchId
,
_branch_label
::
Text
,
_branch_meta
::
Map
Text
[
Double
]
}
deriving
(
Generic
,
Show
)
data
PhyloExport
=
PhyloExport
{
_export_groups
::
[
PhyloGroup
]
,
_export_branches
::
[
PhyloBranch
]
}
deriving
(
Generic
,
Show
)
----------------
-- | Lenses | --
----------------
makeLenses
''
C
onfig
makeLenses
''
P
roximity
makeLenses
''
Q
uality
makeLenses
''
C
ontextualUnit
makeLenses
''
P
hyloLabel
makeLenses
''
T
imeUnit
makeLenses
''
P
hyloFoundations
makeLenses
''
P
hyloFis
...
...
@@ -305,6 +383,8 @@ makeLenses ''PhyloPeriod
makeLenses
''
P
hyloLevel
makeLenses
''
P
hyloGroup
makeLenses
''
P
hyloParam
makeLenses
''
P
hyloExport
makeLenses
''
P
hyloBranch
------------------------
-- | JSON instances | --
...
...
src/Gargantext/Viz/Phylo/Example.hs
View file @
eeeb82c8
...
...
@@ -83,7 +83,7 @@ queryViewEx = "level=3"
phyloQueryView
::
PhyloQueryView
phyloQueryView
=
PhyloQueryView
2
Merge
False
2
[
BranchAge
,
BranchBirth
,
BranchGroups
]
[]
[
BranchPeakInc
,
GroupLabelIncDyn
]
(
Just
(
ByBranchBirth
,
Asc
))
Json
Flat
True
phyloQueryView
=
PhyloQueryView
1
Merge
False
1
[
BranchAge
,
BranchBirth
,
BranchGroups
]
[]
[
BranchPeakInc
,
GroupLabelIncDyn
]
(
Just
(
ByBranchBirth
,
Asc
))
Json
Flat
True
--------------------------------------------------
...
...
@@ -110,7 +110,7 @@ queryEx = "title=Cesar et Cleôpatre"
phyloQueryBuild
::
PhyloQueryBuild
phyloQueryBuild
=
PhyloQueryBuild
"Cesar et Cleôpatre"
"An example of Phylomemy (french without accent)"
3
1
defaultFis
[]
[]
(
WeightedLogJaccard
$
WLJParams
0.
5
20
)
5
0.8
0.5
4
2
(
RelatedComponents
$
RCParams
$
WeightedLogJaccard
$
WLJParams
0.3
0
)
3
1
defaultFis
[]
[]
(
WeightedLogJaccard
$
WLJParams
0.
9
10
)
5
0.8
0.5
4
1
(
RelatedComponents
$
RCParams
$
WeightedLogJaccard
$
WLJParams
0.3
0
)
...
...
src/Gargantext/Viz/Phylo/PhyloExample.hs
View file @
eeeb82c8
...
...
@@ -29,18 +29,32 @@ import Gargantext.Text.Terms.Mono (monoTexts)
import
Gargantext.Viz.AdaptativePhylo
import
Gargantext.Viz.Phylo.PhyloTools
import
Gargantext.Viz.Phylo.PhyloMaker
import
Gargantext.Viz.Phylo.PhyloExport
import
Gargantext.Viz.Phylo.TemporalMatching
(
temporalMatching
)
import
Gargantext.Viz.Phylo.SynchronicClustering
(
synchronicClustering
)
import
Control.Lens
import
Data.GraphViz.Types.Generalised
(
DotGraph
)
import
qualified
Data.Vector
as
Vector
phyloExport
::
IO
()
phyloExport
=
dotToFile
"/home/qlobbe/data/phylo/output/cesar_cleopatre_V2.dot"
phyloDot
phyloDot
::
DotGraph
DotId
phyloDot
=
toPhyloExport
phylo2
phylo2
::
Phylo
phylo2
=
synchronicClustering
phylo1
-----------------------------------------------
-- | STEP 3 | -- Build the Level 1 of the Phylo
-----------------------------------------------
phylo1
::
Phylo
phylo1
=
appendGroups
fisToGroup
1
phyloFis
phyloBase
phylo1
=
temporalMatching
$
appendGroups
fisToGroup
1
phyloFis
phyloBase
---------------------------------------------
...
...
@@ -80,7 +94,8 @@ nbDocsByYear = docsToTimeScaleNb docs
config
::
Config
config
=
defaultConfig
{
phyloName
=
"Cesar et Cleopatre"
,
branchSize
=
0
,
phyloLevel
=
2
,
exportFilter
=
[
ByBranchSize
0
]
,
contextualUnit
=
Fis
0
0
}
...
...
src/Gargantext/Viz/Phylo/PhyloExport.hs
View file @
eeeb82c8
This diff is collapsed.
Click to expand it.
src/Gargantext/Viz/Phylo/PhyloMaker.hs
View file @
eeeb82c8
...
...
@@ -16,13 +16,15 @@ Portability : POSIX
module
Gargantext.Viz.Phylo.PhyloMaker
where
import
Data.List
(
concat
,
nub
,
partition
,
sort
,
(
++
))
import
Data.Map
(
Map
,
fromListWith
,
keys
,
unionWith
,
fromList
,
empty
,
toList
,
elems
,
(
!
),
filterWithKey
,
restrictKeys
)
import
Data.Map
(
Map
,
fromListWith
,
keys
,
unionWith
,
fromList
,
empty
,
toList
,
elems
,
(
!
),
restrictKeys
)
import
Data.Set
(
size
)
import
Data.Vector
(
Vector
)
import
Gargantext.Prelude
import
Gargantext.Viz.AdaptativePhylo
import
Gargantext.Viz.Phylo.PhyloTools
import
Gargantext.Viz.Phylo.TemporalMatching
(
temporalMatching
)
import
Gargantext.Viz.Phylo.SynchronicClustering
(
synchronicClustering
)
import
Gargantext.Text.Context
(
TermList
)
import
Gargantext.Text.Metrics.FrequentItemSet
(
fisWithSizePolyMap
,
Size
(
..
))
...
...
@@ -41,7 +43,10 @@ import qualified Data.Set as Set
toPhylo
::
[
Document
]
->
TermList
->
Config
->
Phylo
toPhylo
docs
lst
conf
=
phylo1
toPhylo
docs
lst
conf
=
traceToPhylo
(
phyloLevel
conf
)
$
if
(
phyloLevel
conf
)
>
1
then
foldl'
(
\
phylo'
_
->
synchronicClustering
phylo'
)
phylo1
[
2
..
(
phyloLevel
conf
)]
else
phylo1
where
--------------------------------------
phylo1
::
Phylo
...
...
@@ -82,16 +87,18 @@ appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ co
fisToGroup
::
PhyloFis
->
PhyloPeriodId
->
Level
->
Int
->
Vector
Ngrams
->
[
Cooc
]
->
PhyloGroup
fisToGroup
fis
pId
lvl
idx
fdt
coocs
=
let
ngrams
=
ngramsToIdx
(
Set
.
toList
$
fis
^.
phyloFis_clique
)
fdt
in
PhyloGroup
pId
lvl
idx
in
PhyloGroup
pId
lvl
idx
""
(
fis
^.
phyloFis_support
)
ngrams
(
ngramsToCooc
ngrams
coocs
)
(
1
,
[]
)
[]
[]
[]
[]
[]
(
1
,[
0
])
empty
[]
[]
[]
[]
toPhylo1
::
[
Document
]
->
Phylo
->
Phylo
toPhylo1
docs
phyloBase
=
appendGroups
fisToGroup
1
phyloFis
phyloBase
toPhylo1
docs
phyloBase
=
temporalMatching
$
appendGroups
fisToGroup
1
phyloFis
phyloBase
where
--------------------------------------
phyloFis
::
Map
(
Date
,
Date
)
[
PhyloFis
]
...
...
@@ -164,14 +171,6 @@ toPhyloFis phyloDocs support clique = traceFis "Filtered Fis"
--------------------
-- | To build the local cooc matrix of each phylogroup
ngramsToCooc
::
[
Int
]
->
[
Cooc
]
->
Cooc
ngramsToCooc
ngrams
coocs
=
let
cooc
=
foldl
(
\
acc
cooc'
->
sumCooc
acc
cooc'
)
empty
coocs
pairs
=
listToKeys
ngrams
in
filterWithKey
(
\
k
_
->
elem
k
pairs
)
cooc
-- | To transform the docs into a time map of coocurency matrix
docsToTimeScaleCooc
::
[
Document
]
->
Vector
Ngrams
->
Map
Date
Cooc
docsToTimeScaleCooc
docs
fdt
=
...
...
@@ -181,7 +180,7 @@ docsToTimeScaleCooc docs fdt =
mCooc'
=
fromList
$
map
(
\
t
->
(
t
,
empty
))
$
toTimeScale
(
map
date
docs
)
1
in
trace
(
"
\n
"
<>
"-- | Build the coocurency matrix for "
<>
show
(
length
$
keys
mCooc'
)
<>
" unit of time"
<>
"
\n
"
)
in
trace
(
"
\n
"
<>
"-- | Build the coocurency matrix for "
<>
show
(
length
$
keys
mCooc'
)
<>
" unit of time"
<>
"
\n
"
)
$
unionWith
sumCooc
mCooc
mCooc'
...
...
@@ -232,4 +231,4 @@ toPhyloBase docs lst conf =
(
docsToTimeScaleCooc
docs
(
foundations
^.
foundations_roots
))
(
docsToTimeScaleNb
docs
)
params
(
fromList
$
map
(
\
prd
->
(
prd
,
PhyloPeriod
prd
(
initPhyloLevels
(
phyloLevel
conf
)
prd
)))
periods
)
(
fromList
$
map
(
\
prd
->
(
prd
,
PhyloPeriod
prd
(
initPhyloLevels
1
prd
)))
periods
)
src/Gargantext/Viz/Phylo/PhyloTools.hs
View file @
eeeb82c8
This diff is collapsed.
Click to expand it.
src/Gargantext/Viz/Phylo/SynchronicClustering.hs
View file @
eeeb82c8
...
...
@@ -16,22 +16,180 @@ Portability : POSIX
module
Gargantext.Viz.Phylo.SynchronicClustering
where
import
Gargantext.Prelude
-- import Gargantext.Viz.AdaptativePhylo
-- import Gargantext.Viz.Phylo.PhyloTools
import
Gargantext.Viz.AdaptativePhylo
import
Gargantext.Viz.Phylo.PhyloTools
import
Gargantext.Viz.Phylo.TemporalMatching
(
weightedLogJaccard
)
import
Data.List
((
++
),
null
,
intersect
,
nub
,
concat
,
sort
,
sortOn
)
import
Data.Map
(
Map
,
fromList
,
fromListWith
,
foldlWithKey
,
(
!
),
insert
,
empty
,
restrictKeys
,
elems
,
mapWithKey
,
member
)
import
Control.Lens
hiding
(
Level
)
import
Control.Parallel.Strategies
(
parList
,
rdeepseq
,
using
)
import
qualified
Data.Map
as
Map
-------------------------
-- | New Level Maker | --
-------------------------
toBranchId
::
PhyloGroup
->
PhyloBranchId
toBranchId
child
=
((
child
^.
phylo_groupLevel
)
+
1
,
snd
(
child
^.
phylo_groupBranchId
))
mergeGroups
::
[
Cooc
]
->
PhyloGroupId
->
Map
PhyloGroupId
PhyloGroupId
->
[
PhyloGroup
]
->
PhyloGroup
mergeGroups
coocs
id
mapIds
childs
=
let
ngrams
=
(
sort
.
nub
.
concat
)
$
map
_phylo_groupNgrams
childs
in
PhyloGroup
(
fst
$
fst
id
)
(
snd
$
fst
id
)
(
snd
id
)
""
(
sum
$
map
_phylo_groupSupport
childs
)
ngrams
(
ngramsToCooc
ngrams
coocs
)
(
toBranchId
(
head'
"mergeGroups"
childs
))
empty
[]
(
map
(
\
g
->
(
getGroupId
g
,
1
))
childs
)
(
updatePointers
$
concat
$
map
_phylo_groupPeriodParents
childs
)
(
updatePointers
$
concat
$
map
_phylo_groupPeriodChilds
childs
)
where
updatePointers
::
[
Pointer
]
->
[
Pointer
]
updatePointers
pointers
=
map
(
\
(
pId
,
w
)
->
(
mapIds
!
pId
,
w
))
pointers
addPhyloLevel
::
Level
->
Phylo
->
Phylo
addPhyloLevel
lvl
phylo
=
over
(
phylo_periods
.
traverse
)
(
\
phyloPrd
->
phyloPrd
&
phylo_periodLevels
%~
(
insert
(
phyloPrd
^.
phylo_periodPeriod
,
lvl
)
(
PhyloLevel
(
phyloPrd
^.
phylo_periodPeriod
)
lvl
empty
)))
phylo
toNextLevel
::
Phylo
->
[
PhyloGroup
]
->
Phylo
toNextLevel
phylo
groups
=
let
curLvl
=
getLastLevel
phylo
oldGroups
=
fromList
$
map
(
\
g
->
(
getGroupId
g
,
getLevelParentId
g
))
groups
newGroups
=
fromListWith
(
++
)
-- | 5) group the parents by periods
$
foldlWithKey
(
\
acc
id
groups'
->
-- | 4) create the parent group
let
parent
=
mergeGroups
(
elems
$
restrictKeys
(
phylo
^.
phylo_timeCooc
)
$
periodsToYears
[(
fst
.
fst
)
id
])
id
oldGroups
groups'
in
acc
++
[(
parent
^.
phylo_groupPeriod
,
[
parent
])])
[]
-- | 3) group the current groups by parentId
$
fromListWith
(
++
)
$
map
(
\
g
->
(
getLevelParentId
g
,
[
g
]))
groups
in
traceSynchronyEnd
$
over
(
phylo_periods
.
traverse
.
phylo_periodLevels
.
traverse
-- | 6) update each period at curLvl + 1
.
filtered
(
\
phyloLvl
->
phyloLvl
^.
phylo_levelLevel
==
(
curLvl
+
1
)))
-- | 7) by adding the parents
(
\
phyloLvl
->
if
member
(
phyloLvl
^.
phylo_levelPeriod
)
newGroups
then
phyloLvl
&
phylo_levelGroups
.~
fromList
(
map
(
\
g
->
(
getGroupId
g
,
g
))
$
newGroups
!
(
phyloLvl
^.
phylo_levelPeriod
))
else
phyloLvl
)
-- | 2) add the curLvl + 1 phyloLevel to the phylo
$
addPhyloLevel
(
curLvl
+
1
)
-- | 1) update the current groups (with level parent pointers) in the phylo
$
updatePhyloGroups
curLvl
(
fromList
$
map
(
\
g
->
(
getGroupId
g
,
g
))
groups
)
phylo
import
Data.List
(
foldl'
,
(
++
),
null
,
intersect
,
(
\\
),
union
,
nub
,
concat
)
--------------------
-- | Clustering | --
--------------------
relatedComponents
::
Eq
a
=>
[[
a
]]
->
[[
a
]]
relatedComponents
graphs
=
foldl'
(
\
mem
groups
->
if
(
null
mem
)
then
mem
++
[
groups
]
else
let
related
=
filter
(
\
groups'
->
(
not
.
null
)
$
intersect
groups
groups'
)
mem
in
if
(
null
related
)
then
mem
++
[
groups
]
else
(
mem
\\
related
)
++
[
union
groups
(
nub
$
concat
related
)]
)
[]
graphs
\ No newline at end of file
toPairs
::
[
PhyloGroup
]
->
[(
PhyloGroup
,
PhyloGroup
)]
toPairs
groups
=
filter
(
\
(
g
,
g'
)
->
(
not
.
null
)
$
intersect
(
g
^.
phylo_groupNgrams
)
(
g'
^.
phylo_groupNgrams
))
$
listToCombi'
groups
toDiamonds
::
[
PhyloGroup
]
->
[[
PhyloGroup
]]
toDiamonds
groups
=
foldl'
(
\
acc
groups'
->
acc
++
(
elems
$
Map
.
filter
(
\
v
->
length
v
>
1
)
$
fromListWith
(
++
)
$
foldl'
(
\
acc'
g
->
acc'
++
(
map
(
\
(
id
,
_
)
->
(
id
,[
g
])
)
$
g
^.
phylo_groupPeriodChilds
))
[]
groups'
))
[]
$
elems
$
Map
.
filter
(
\
v
->
length
v
>
1
)
$
fromListWith
(
++
)
$
foldl'
(
\
acc
g
->
acc
++
(
map
(
\
(
id
,
_
)
->
(
id
,[
g
])
)
$
g
^.
phylo_groupPeriodParents
)
)
[]
groups
groupsToEdges
::
Proximity
->
Synchrony
->
Double
->
[
PhyloGroup
]
->
[((
PhyloGroup
,
PhyloGroup
),
Double
)]
groupsToEdges
prox
sync
docs
groups
=
case
sync
of
ByProximityThreshold
t
s
->
filter
(
\
(
_
,
w
)
->
w
>=
t
)
$
toEdges
s
$
toPairs
groups
ByProximityDistribution
s
->
let
diamonds
=
sortOn
snd
$
toEdges
s
$
concat
$
map
toPairs
$
toDiamonds
groups
in
take
(
div
(
length
diamonds
)
2
)
diamonds
where
toEdges
::
Double
->
[(
PhyloGroup
,
PhyloGroup
)]
->
[((
PhyloGroup
,
PhyloGroup
),
Double
)]
toEdges
sens
edges
=
case
prox
of
WeightedLogJaccard
_
_
_
->
map
(
\
(
g
,
g'
)
->
((
g
,
g'
),
weightedLogJaccard
sens
docs
(
g
^.
phylo_groupCooc
)
(
g'
^.
phylo_groupCooc
)
(
g
^.
phylo_groupNgrams
)
(
g'
^.
phylo_groupNgrams
)))
edges
_
->
undefined
toRelatedComponents
::
[
PhyloGroup
]
->
[((
PhyloGroup
,
PhyloGroup
),
Double
)]
->
[[
PhyloGroup
]]
toRelatedComponents
nodes
edges
=
let
ref
=
fromList
$
map
(
\
g
->
(
getGroupId
g
,
g
))
nodes
clusters
=
relatedComponents
$
((
map
(
\
((
g
,
g'
),
_
)
->
[
getGroupId
g
,
getGroupId
g'
])
edges
)
++
(
map
(
\
g
->
[
getGroupId
g
])
nodes
))
in
map
(
\
cluster
->
map
(
\
gId
->
ref
!
gId
)
cluster
)
clusters
toParentId
::
PhyloGroup
->
PhyloGroupId
toParentId
child
=
((
child
^.
phylo_groupPeriod
,
child
^.
phylo_groupLevel
+
1
),
child
^.
phylo_groupIndex
)
reduceBranch
::
Proximity
->
Synchrony
->
Map
Date
Double
->
[
PhyloGroup
]
->
[
PhyloGroup
]
reduceBranch
prox
sync
docs
branch
=
-- | 1) reduce a branch as a set of periods & groups
let
periods
=
fromListWith
(
++
)
$
map
(
\
g
->
(
g
^.
phylo_groupPeriod
,[
g
]))
branch
in
(
concat
.
concat
.
elems
)
$
mapWithKey
(
\
prd
groups
->
-- | 2) for each period, transform the groups as a proximity graph filtered by a threshold
let
edges
=
groupsToEdges
prox
sync
((
sum
.
elems
)
$
restrictKeys
docs
$
periodsToYears
[
prd
])
groups
in
map
(
\
comp
->
-- | 4) add to each groups their futur level parent group
let
parentId
=
toParentId
(
head'
"parentId"
comp
)
in
map
(
\
g
->
g
&
phylo_groupLevelParents
%~
(
++
[(
parentId
,
1
)])
)
comp
)
-- |3) reduce the graph a a set of related components
$
toRelatedComponents
groups
edges
)
periods
synchronicClustering
::
Phylo
->
Phylo
synchronicClustering
phylo
=
let
prox
=
phyloProximity
$
getConfig
phylo
sync
=
phyloSynchrony
$
getConfig
phylo
docs
=
phylo
^.
phylo_timeDocs
branches
=
map
(
\
branch
->
reduceBranch
prox
sync
docs
branch
)
$
phyloToLastBranches
$
traceSynchronyStart
phylo
branches'
=
branches
`
using
`
parList
rdeepseq
in
toNextLevel
phylo
$
concat
branches'
----------------
-- | probes | --
----------------
-- synchronicDistance :: Phylo -> Level -> String
-- synchronicDistance phylo lvl =
-- foldl' (\acc branch ->
-- acc <> (foldl' (\acc' period ->
-- acc' <> let prox = phyloProximity $ getConfig phylo
-- sync = phyloSynchrony $ getConfig phylo
-- docs = _phylo_timeDocs phylo
-- prd = _phylo_groupPeriod $ head' "distance" period
-- edges = groupsToEdges prox 0.1 (_bpt_sensibility sync)
-- ((sum . elems) $ restrictKeys docs $ periodsToYears [_phylo_groupPeriod $ head' "distance" period]) period
-- in foldl' (\mem (_,w) ->
-- mem <> show (prd)
-- <> "\t"
-- <> show (w)
-- <> "\n"
-- ) "" edges
-- ) "" $ elems $ groupByField _phylo_groupPeriod branch)
-- ) "period\tdistance\n" $ elems $ groupByField _phylo_groupBranchId $ getGroupsFromLevel lvl phylo
\ No newline at end of file
src/Gargantext/Viz/Phylo/TemporalMatching.hs
View file @
eeeb82c8
This diff is collapsed.
Click to expand it.
src/Gargantext/Viz/Phylo/Tools.hs
View file @
eeeb82c8
...
...
@@ -815,7 +815,7 @@ getProximity cluster = case cluster of
-- | To initialize all the Cluster / Proximity with their default parameters
initFis
::
Maybe
Bool
->
Maybe
Support
->
Maybe
Int
->
FisParams
initFis
(
def
True
->
kmf
)
(
def
2
->
min'
)
(
def
4
->
thr
)
=
FisParams
kmf
min'
thr
initFis
(
def
True
->
kmf
)
(
def
0
->
min'
)
(
def
0
->
thr
)
=
FisParams
kmf
min'
thr
initHamming
::
Maybe
Double
->
HammingParams
initHamming
(
def
0.01
->
sens
)
=
HammingParams
sens
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment