Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Christian Merten
haskell-gargantext
Commits
92b4221b
Commit
92b4221b
authored
Aug 29, 2019
by
qlobbe
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add branching
parent
d3097207
Changes
7
Show whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
232 additions
and
94 deletions
+232
-94
Main.hs
bin/gargantext-adaptative-phylo/Main.hs
+8
-8
AdaptativePhylo.hs
src/Gargantext/Viz/AdaptativePhylo.hs
+45
-21
PhyloExample.hs
src/Gargantext/Viz/Phylo/PhyloExample.hs
+5
-6
PhyloMaker.hs
src/Gargantext/Viz/Phylo/PhyloMaker.hs
+12
-13
PhyloTools.hs
src/Gargantext/Viz/Phylo/PhyloTools.hs
+50
-1
SynchronicClustering.hs
src/Gargantext/Viz/Phylo/SynchronicClustering.hs
+10
-2
TemporalMatching.hs
src/Gargantext/Viz/Phylo/TemporalMatching.hs
+102
-43
No files found.
bin/gargantext-adaptative-phylo/Main.hs
View file @
92b4221b
...
@@ -128,16 +128,16 @@ csvToCorpus limit path = Vector.toList
...
@@ -128,16 +128,16 @@ csvToCorpus limit path = Vector.toList
-- | To use the correct parser given a CorpusType
-- | To use the correct parser given a CorpusType
fileToCorpus
::
CorpusParser
->
Int
->
FilePath
->
IO
([(
Int
,
Text
)])
fileToCorpus
::
CorpusParser
->
FilePath
->
IO
([(
Int
,
Text
)])
fileToCorpus
parser
limit
path
=
case
parser
of
fileToCorpus
parser
path
=
case
parser
of
Wos
->
wosToCorpus
limit
path
Wos
limit
->
wosToCorpus
limit
path
Csv
->
csvToCorpus
limit
path
Csv
limit
->
csvToCorpus
limit
path
-- | To parse a file into a list of Document
-- | To parse a file into a list of Document
fileToDocs
::
CorpusParser
->
Int
->
FilePath
->
TermList
->
IO
[
Document
]
fileToDocs
::
CorpusParser
->
FilePath
->
TermList
->
IO
[
Document
]
fileToDocs
parser
limit
path
lst
=
do
fileToDocs
parser
path
lst
=
do
corpus
<-
fileToCorpus
parser
limit
path
corpus
<-
fileToCorpus
parser
path
let
patterns
=
buildPatterns
lst
let
patterns
=
buildPatterns
lst
pure
$
map
(
(
\
(
y
,
t
)
->
Document
y
t
)
.
filterTerms
patterns
)
corpus
pure
$
map
(
(
\
(
y
,
t
)
->
Document
y
t
)
.
filterTerms
patterns
)
corpus
...
@@ -162,7 +162,7 @@ main = do
...
@@ -162,7 +162,7 @@ main = do
printIOMsg
"Parse the corpus"
printIOMsg
"Parse the corpus"
mapList
<-
csvGraphTermList
(
listPath
config
)
mapList
<-
csvGraphTermList
(
listPath
config
)
corpus
<-
fileToDocs
(
corpusParser
config
)
(
corpus
Limit
config
)
(
corpus
Path
config
)
mapList
corpus
<-
fileToDocs
(
corpusParser
config
)
(
corpusPath
config
)
mapList
printIOComment
(
show
(
length
corpus
)
<>
" parsed docs from the corpus"
)
printIOComment
(
show
(
length
corpus
)
<>
" parsed docs from the corpus"
)
printIOMsg
"Reconstruct the Phylo"
printIOMsg
"Reconstruct the Phylo"
...
...
src/Gargantext/Viz/AdaptativePhylo.hs
View file @
92b4221b
...
@@ -50,46 +50,63 @@ import Control.Lens (makeLenses)
...
@@ -50,46 +50,63 @@ import Control.Lens (makeLenses)
----------------
----------------
data
CorpusParser
=
Wos
|
Csv
deriving
(
Show
,
Generic
,
Eq
)
data
CorpusParser
=
Wos
{
_wos_limit
::
Int
}
|
Csv
{
_csv_limit
::
Int
}
deriving
(
Show
,
Generic
,
Eq
)
data
Proximity
=
WeightedLogJaccard
{
_sensibility
::
Double
}
data
Proximity
=
WeightedLogJaccard
{
_wlj_sensibility
::
Double
,
_wlj_thresholdInit
::
Double
,
_wlj_thresholdStep
::
Double
}
|
Hamming
|
Hamming
deriving
(
Show
,
Generic
,
Eq
)
deriving
(
Show
,
Generic
,
Eq
)
data
TimeUnit
=
Year
{
_year_period
::
Int
,
_year_step
::
Int
,
_year_matchingFrame
::
Int
}
deriving
(
Show
,
Generic
,
Eq
)
data
ContextualUnit
=
Fis
{
_fis_support
::
Int
,
_fis_size
::
Int
}
deriving
(
Show
,
Generic
,
Eq
)
data
Config
=
data
Config
=
Config
{
corpusPath
::
FilePath
Config
{
corpusPath
::
FilePath
,
listPath
::
FilePath
,
listPath
::
FilePath
,
outputPath
::
FilePath
,
outputPath
::
FilePath
,
corpusParser
::
CorpusParser
,
corpusParser
::
CorpusParser
,
corpusLimit
::
Int
,
phyloName
::
Text
,
phyloName
::
Text
,
phyloLevel
::
Int
,
phyloLevel
::
Int
,
phyloQuality
::
Double
,
phyloProximity
::
Proximity
,
phyloProximity
::
Proximity
,
timeUnit
::
Int
,
timeUnit
::
TimeUnit
,
maxTimeMatch
::
Int
,
contextualUnit
::
ContextualUnit
,
timePeriod
::
Int
,
timeStep
::
Int
,
fisSupport
::
Int
,
fisSize
::
Int
,
branchSize
::
Int
,
branchSize
::
Int
}
deriving
(
Show
,
Generic
,
Eq
)
}
deriving
(
Show
,
Generic
,
Eq
)
defaultConfig
::
Config
defaultConfig
::
Config
defaultConfig
=
defaultConfig
=
Config
{
corpusPath
=
""
Config
{
corpusPath
=
""
,
listPath
=
""
,
listPath
=
""
,
outputPath
=
""
,
outputPath
=
""
,
corpusParser
=
Csv
,
corpusParser
=
Csv
1000
,
corpusLimit
=
1000
,
phyloName
=
pack
"Default Phylo"
,
phyloName
=
pack
"Default Phylo"
,
phyloLevel
=
2
,
phyloLevel
=
2
,
phyloProximity
=
WeightedLogJaccard
10
,
phyloQuality
=
0.5
,
timeUnit
=
1
,
phyloProximity
=
WeightedLogJaccard
10
0
0.05
,
maxTimeMatch
=
5
,
timeUnit
=
Year
3
1
5
,
timePeriod
=
3
,
contextualUnit
=
Fis
2
4
,
timeStep
=
1
,
fisSupport
=
2
,
fisSize
=
4
,
branchSize
=
3
,
branchSize
=
3
}
}
...
@@ -99,6 +116,10 @@ instance FromJSON CorpusParser
...
@@ -99,6 +116,10 @@ instance FromJSON CorpusParser
instance
ToJSON
CorpusParser
instance
ToJSON
CorpusParser
instance
FromJSON
Proximity
instance
FromJSON
Proximity
instance
ToJSON
Proximity
instance
ToJSON
Proximity
instance
FromJSON
TimeUnit
instance
ToJSON
TimeUnit
instance
FromJSON
ContextualUnit
instance
ToJSON
ContextualUnit
-- | Software parameters
-- | Software parameters
...
@@ -237,7 +258,7 @@ data PhyloGroup =
...
@@ -237,7 +258,7 @@ data PhyloGroup =
,
_phylo_groupLevelChilds
::
[
Pointer
]
,
_phylo_groupLevelChilds
::
[
Pointer
]
,
_phylo_groupPeriodParents
::
[
Pointer
]
,
_phylo_groupPeriodParents
::
[
Pointer
]
,
_phylo_groupPeriodChilds
::
[
Pointer
]
,
_phylo_groupPeriodChilds
::
[
Pointer
]
,
_phylo_group
BreakPointer
::
Maybe
Pointer
,
_phylo_group
GhostPointers
::
[
Pointer
]
}
}
deriving
(
Generic
,
Show
,
Eq
)
deriving
(
Generic
,
Show
,
Eq
)
...
@@ -276,6 +297,9 @@ data PhyloFis = PhyloFis
...
@@ -276,6 +297,9 @@ data PhyloFis = PhyloFis
----------------
----------------
makeLenses
''
C
onfig
makeLenses
''
C
onfig
makeLenses
''
P
roximity
makeLenses
''
C
ontextualUnit
makeLenses
''
T
imeUnit
makeLenses
''
P
hyloFoundations
makeLenses
''
P
hyloFoundations
makeLenses
''
P
hyloFis
makeLenses
''
P
hyloFis
makeLenses
''
P
hylo
makeLenses
''
P
hylo
...
...
src/Gargantext/Viz/Phylo/PhyloExample.hs
View file @
92b4221b
...
@@ -49,7 +49,7 @@ phylo1 = appendGroups fisToGroup 1 phyloFis phyloBase
...
@@ -49,7 +49,7 @@ phylo1 = appendGroups fisToGroup 1 phyloFis phyloBase
phyloFis
::
Map
(
Date
,
Date
)
[
PhyloFis
]
phyloFis
::
Map
(
Date
,
Date
)
[
PhyloFis
]
phyloFis
=
toPhyloFis
docsByPeriods
(
fisSupport
config
)
(
fisSize
config
)
phyloFis
=
toPhyloFis
docsByPeriods
(
getFisSupport
$
contextualUnit
config
)
(
getFisSize
$
contextualUnit
config
)
docsByPeriods
::
Map
(
Date
,
Date
)
[
Document
]
docsByPeriods
::
Map
(
Date
,
Date
)
[
Document
]
...
@@ -66,23 +66,22 @@ phyloBase = toPhyloBase docs mapList config
...
@@ -66,23 +66,22 @@ phyloBase = toPhyloBase docs mapList config
phyloCooc
::
Map
Date
Cooc
phyloCooc
::
Map
Date
Cooc
phyloCooc
=
docsTo
CoocByYear
docs
(
foundations
^.
foundations_roots
)
config
phyloCooc
=
docsTo
TimeScaleCooc
docs
(
foundations
^.
foundations_roots
)
config
periods
::
[(
Date
,
Date
)]
periods
::
[(
Date
,
Date
)]
periods
=
toPeriods
(
sort
$
nub
$
map
date
docs
)
(
timePeriod
config
)
(
timeStep
config
)
periods
=
toPeriods
(
sort
$
nub
$
map
date
docs
)
(
getTimePeriod
$
timeUnit
config
)
(
getTimeStep
$
timeUnit
config
)
nbDocsByYear
::
Map
Date
Double
nbDocsByYear
::
Map
Date
Double
nbDocsByYear
=
nbDocsByTime
docs
(
timeUnit
config
)
nbDocsByYear
=
docsToTimeScaleNb
docs
config
::
Config
config
::
Config
config
=
config
=
defaultConfig
{
phyloName
=
"Cesar et Cleopatre"
defaultConfig
{
phyloName
=
"Cesar et Cleopatre"
,
branchSize
=
0
,
branchSize
=
0
,
fisSupport
=
0
,
contextualUnit
=
Fis
0
0
}
,
fisSize
=
0
}
docs
::
[
Document
]
docs
::
[
Document
]
...
...
src/Gargantext/Viz/Phylo/PhyloMaker.hs
View file @
92b4221b
...
@@ -87,8 +87,7 @@ fisToGroup fis pId lvl idx fdt coocs =
...
@@ -87,8 +87,7 @@ fisToGroup fis pId lvl idx fdt coocs =
ngrams
ngrams
(
ngramsToCooc
ngrams
coocs
)
(
ngramsToCooc
ngrams
coocs
)
(
1
,
[]
)
(
1
,
[]
)
[]
[]
[]
[]
[]
[]
[]
[]
[]
Nothing
toPhylo1
::
[
Document
]
->
Phylo
->
Phylo
toPhylo1
::
[
Document
]
->
Phylo
->
Phylo
...
@@ -96,7 +95,7 @@ toPhylo1 docs phyloBase = appendGroups fisToGroup 1 phyloFis phyloBase
...
@@ -96,7 +95,7 @@ toPhylo1 docs phyloBase = appendGroups fisToGroup 1 phyloFis phyloBase
where
where
--------------------------------------
--------------------------------------
phyloFis
::
Map
(
Date
,
Date
)
[
PhyloFis
]
phyloFis
::
Map
(
Date
,
Date
)
[
PhyloFis
]
phyloFis
=
toPhyloFis
docs'
(
fisSupport
$
getConfig
phyloBase
)
(
fisSize
$
getConfig
phyloBase
)
phyloFis
=
toPhyloFis
docs'
(
getFisSupport
$
contextualUnit
$
getConfig
phyloBase
)
(
getFisSize
$
contextualUnit
$
getConfig
phyloBase
)
--------------------------------------
--------------------------------------
docs'
::
Map
(
Date
,
Date
)
[
Document
]
docs'
::
Map
(
Date
,
Date
)
[
Document
]
docs'
=
groupDocsByPeriod
date
(
getPeriodIds
phyloBase
)
docs
docs'
=
groupDocsByPeriod
date
(
getPeriodIds
phyloBase
)
docs
...
@@ -174,14 +173,14 @@ ngramsToCooc ngrams coocs =
...
@@ -174,14 +173,14 @@ ngramsToCooc ngrams coocs =
-- | To transform the docs into a time map of coocurency matrix
-- | To transform the docs into a time map of coocurency matrix
docsTo
CoocByYear
::
[
Document
]
->
Vector
Ngrams
->
Config
->
Map
Date
Cooc
docsTo
TimeScaleCooc
::
[
Document
]
->
Vector
Ngrams
->
Config
->
Map
Date
Cooc
docsTo
CoocByYear
docs
fdt
conf
=
docsTo
TimeScaleCooc
docs
fdt
conf
=
let
mCooc
=
fromListWith
sumCooc
let
mCooc
=
fromListWith
sumCooc
$
map
(
\
(
_d
,
l
)
->
(
_d
,
listToMatrix
l
))
$
map
(
\
(
_d
,
l
)
->
(
_d
,
listToMatrix
l
))
$
map
(
\
doc
->
(
date
doc
,
sort
$
ngramsToIdx
(
text
doc
)
fdt
))
docs
$
map
(
\
doc
->
(
date
doc
,
sort
$
ngramsToIdx
(
text
doc
)
fdt
))
docs
mCooc'
=
fromList
mCooc'
=
fromList
$
map
(
\
t
->
(
t
,
empty
))
$
map
(
\
t
->
(
t
,
empty
))
$
toTimeScale
(
map
date
docs
)
(
timeUnit
conf
)
$
toTimeScale
(
map
date
docs
)
1
in
trace
(
"
\n
"
<>
"-- | Build the coocurency matrix for "
<>
show
(
length
$
keys
mCooc'
)
<>
" unit of time"
<>
"
\n
"
)
in
trace
(
"
\n
"
<>
"-- | Build the coocurency matrix for "
<>
show
(
length
$
keys
mCooc'
)
<>
" unit of time"
<>
"
\n
"
)
$
unionWith
sumCooc
mCooc
mCooc'
$
unionWith
sumCooc
mCooc
mCooc'
...
@@ -208,11 +207,11 @@ groupDocsByPeriod f pds es =
...
@@ -208,11 +207,11 @@ groupDocsByPeriod f pds es =
--------------------------------------
--------------------------------------
-- | To count the number of docs by unit of time
(like a year)
-- | To count the number of docs by unit of time
nbDocsByTime
::
[
Document
]
->
Int
->
Map
Date
Double
docsToTimeScaleNb
::
[
Document
]
->
Map
Date
Double
nbDocsByTime
docs
step
=
docsToTimeScaleNb
docs
=
let
docs'
=
fromListWith
(
+
)
$
map
(
\
d
->
(
date
d
,
1
))
docs
let
docs'
=
fromListWith
(
+
)
$
map
(
\
d
->
(
date
d
,
1
))
docs
time
=
fromList
$
map
(
\
t
->
(
t
,
0
))
$
toTimeScale
(
keys
docs'
)
step
time
=
fromList
$
map
(
\
t
->
(
t
,
0
))
$
toTimeScale
(
keys
docs'
)
1
in
trace
(
"
\n
"
<>
"-- | Group "
<>
show
(
length
docs
)
<>
" docs by "
<>
show
(
length
time
)
<>
" unit of time"
<>
"
\n
"
)
in
trace
(
"
\n
"
<>
"-- | Group "
<>
show
(
length
docs
)
<>
" docs by "
<>
show
(
length
time
)
<>
" unit of time"
<>
"
\n
"
)
$
unionWith
(
+
)
time
docs'
$
unionWith
(
+
)
time
docs'
...
@@ -227,10 +226,10 @@ toPhyloBase :: [Document] -> TermList -> Config -> Phylo
...
@@ -227,10 +226,10 @@ toPhyloBase :: [Document] -> TermList -> Config -> Phylo
toPhyloBase
docs
lst
conf
=
toPhyloBase
docs
lst
conf
=
let
foundations
=
PhyloFoundations
(
Vector
.
fromList
$
nub
$
concat
$
map
text
docs
)
lst
let
foundations
=
PhyloFoundations
(
Vector
.
fromList
$
nub
$
concat
$
map
text
docs
)
lst
params
=
defaultPhyloParam
{
_phyloParam_config
=
conf
}
params
=
defaultPhyloParam
{
_phyloParam_config
=
conf
}
periods
=
toPeriods
(
sort
$
nub
$
map
date
docs
)
(
timePeriod
conf
)
(
timeStep
conf
)
periods
=
toPeriods
(
sort
$
nub
$
map
date
docs
)
(
getTimePeriod
$
timeUnit
conf
)
(
getTimeStep
$
timeUnit
conf
)
in
trace
(
"
\n
"
<>
"-- | Create PhyloBase out of "
<>
show
(
length
docs
)
<>
" docs
\n
"
)
in
trace
(
"
\n
"
<>
"-- | Create PhyloBase out of "
<>
show
(
length
docs
)
<>
" docs
\n
"
)
$
Phylo
foundations
$
Phylo
foundations
(
docsTo
CoocByYear
docs
(
foundations
^.
foundations_roots
)
conf
)
(
docsTo
TimeScaleCooc
docs
(
foundations
^.
foundations_roots
)
conf
)
(
nbDocsByTime
docs
$
timeUnit
conf
)
(
docsToTimeScaleNb
docs
)
params
params
(
fromList
$
map
(
\
prd
->
(
prd
,
PhyloPeriod
prd
(
initPhyloLevels
(
phyloLevel
conf
)
prd
)))
periods
)
(
fromList
$
map
(
\
prd
->
(
prd
,
PhyloPeriod
prd
(
initPhyloLevels
(
phyloLevel
conf
)
prd
)))
periods
)
src/Gargantext/Viz/Phylo/PhyloTools.hs
View file @
92b4221b
...
@@ -91,6 +91,18 @@ toTimeScale dates step =
...
@@ -91,6 +91,18 @@ toTimeScale dates step =
in
[
start
,
(
start
+
step
)
..
end
]
in
[
start
,
(
start
+
step
)
..
end
]
getTimeStep
::
TimeUnit
->
Int
getTimeStep
time
=
case
time
of
Year
_
s
_
->
s
getTimePeriod
::
TimeUnit
->
Int
getTimePeriod
time
=
case
time
of
Year
p
_
_
->
p
getTimeFrame
::
TimeUnit
->
Int
getTimeFrame
time
=
case
time
of
Year
_
_
f
->
f
-------------
-------------
-- | Fis | --
-- | Fis | --
-------------
-------------
...
@@ -136,6 +148,22 @@ traceFis msg mFis = trace ( "\n" <> "-- | " <> msg <> " : " <> show (sum $ map l
...
@@ -136,6 +148,22 @@ traceFis msg mFis = trace ( "\n" <> "-- | " <> msg <> " : " <> show (sum $ map l
<>
"Clique : "
<>
(
traceClique
mFis
)
<>
"
\n
"
)
mFis
<>
"Clique : "
<>
(
traceClique
mFis
)
<>
"
\n
"
)
mFis
-------------------------
-- | Contextual unit | --
-------------------------
getFisSupport
::
ContextualUnit
->
Int
getFisSupport
unit
=
case
unit
of
Fis
s
_
->
s
_
->
panic
(
"[ERR][Viz.Phylo.PhyloTools.getFisSupport] Only Fis has a support"
)
getFisSize
::
ContextualUnit
->
Int
getFisSize
unit
=
case
unit
of
Fis
_
s
->
s
_
->
panic
(
"[ERR][Viz.Phylo.PhyloTools.getFisSupport] Only Fis has a clique size"
)
--------------
--------------
-- | Cooc | --
-- | Cooc | --
--------------
--------------
...
@@ -223,6 +251,7 @@ updatePhyloGroups lvl m phylo =
...
@@ -223,6 +251,7 @@ updatePhyloGroups lvl m phylo =
-- | Pointers | --
-- | Pointers | --
------------------
------------------
pointersToLinks
::
PhyloGroupId
->
[
Pointer
]
->
[
Link
]
pointersToLinks
::
PhyloGroupId
->
[
Pointer
]
->
[
Link
]
pointersToLinks
id
pointers
=
map
(
\
p
->
((
id
,
fst
p
),
snd
p
))
pointers
pointersToLinks
id
pointers
=
map
(
\
p
->
((
id
,
fst
p
),
snd
p
))
pointers
...
@@ -230,3 +259,23 @@ mergeLinks :: [Link] -> [Link] -> [Link]
...
@@ -230,3 +259,23 @@ mergeLinks :: [Link] -> [Link] -> [Link]
mergeLinks
toChilds
toParents
=
mergeLinks
toChilds
toParents
=
let
toChilds'
=
fromList
$
map
(
\
((
from
,
to
),
w
)
->
((
to
,
from
),
w
))
toChilds
let
toChilds'
=
fromList
$
map
(
\
((
from
,
to
),
w
)
->
((
to
,
from
),
w
))
toChilds
in
toList
$
unionWith
max
(
fromList
toParents
)
toChilds'
in
toList
$
unionWith
max
(
fromList
toParents
)
toChilds'
-------------------
-- | Proximity | --
-------------------
getSensibility
::
Proximity
->
Double
getSensibility
proxi
=
case
proxi
of
WeightedLogJaccard
s
_
_
->
s
Hamming
->
undefined
getThresholdInit
::
Proximity
->
Double
getThresholdInit
proxi
=
case
proxi
of
WeightedLogJaccard
_
t
_
->
t
Hamming
->
undefined
getThresholdStep
::
Proximity
->
Double
getThresholdStep
proxi
=
case
proxi
of
WeightedLogJaccard
_
_
s
->
s
Hamming
->
undefined
\ No newline at end of file
src/Gargantext/Viz/Phylo/SynchronicClustering.hs
View file @
92b4221b
...
@@ -19,11 +19,19 @@ import Gargantext.Prelude
...
@@ -19,11 +19,19 @@ import Gargantext.Prelude
import
Gargantext.Viz.AdaptativePhylo
import
Gargantext.Viz.AdaptativePhylo
import
Gargantext.Viz.Phylo.PhyloTools
import
Gargantext.Viz.Phylo.PhyloTools
import
Data.List
(
foldl'
,
(
++
),
null
,
intersect
,
(
\\
),
union
,
nub
,
concat
)
--------------------
--------------------
-- | Clustering | --
-- | Clustering | --
--------------------
--------------------
relatedComponents
::
[
PhyloGroup
]
->
[[
PhyloGroup
]]
relatedComponents
::
Eq
a
=>
[[
a
]]
->
[[
a
]]
relatedComponents
groups
=
undefined
relatedComponents
graphs
=
foldl'
(
\
mem
groups
->
\ No newline at end of file
if
(
null
mem
)
then
mem
++
[
groups
]
else
let
related
=
filter
(
\
groups'
->
(
not
.
null
)
$
intersect
groups
groups'
)
mem
in
if
(
null
related
)
then
mem
++
[
groups
]
else
(
mem
\\
related
)
++
[
union
groups
(
nub
$
concat
related
)]
)
[]
graphs
\ No newline at end of file
src/Gargantext/Viz/Phylo/TemporalMatching.hs
View file @
92b4221b
...
@@ -16,7 +16,7 @@ Portability : POSIX
...
@@ -16,7 +16,7 @@ Portability : POSIX
module
Gargantext.Viz.Phylo.TemporalMatching
where
module
Gargantext.Viz.Phylo.TemporalMatching
where
import
Data.List
(
concat
,
splitAt
,
tail
,
sortOn
,
(
++
),
intersect
,
null
,
inits
,
find
,
groupBy
,
scanl
,
any
,
nub
,
union
)
import
Data.List
(
concat
,
splitAt
,
tail
,
sortOn
,
(
++
),
intersect
,
null
,
inits
,
find
,
groupBy
,
scanl
,
any
,
nub
,
union
)
import
Data.Map
(
Map
,
fromList
,
toList
,
fromListWith
,
filterWithKey
,
elems
,
restrictKeys
,
unionWith
,
intersectionWith
)
import
Data.Map
(
Map
,
fromList
,
toList
,
fromListWith
,
filterWithKey
,
elems
,
restrictKeys
,
unionWith
,
intersectionWith
,
member
,
(
!
)
)
import
Gargantext.Prelude
import
Gargantext.Prelude
import
Gargantext.Viz.AdaptativePhylo
import
Gargantext.Viz.AdaptativePhylo
...
@@ -25,6 +25,8 @@ import Gargantext.Viz.Phylo.SynchronicClustering
...
@@ -25,6 +25,8 @@ import Gargantext.Viz.Phylo.SynchronicClustering
import
Control.Lens
hiding
(
Level
)
import
Control.Lens
hiding
(
Level
)
import
qualified
Data.Set
as
Set
-------------------
-------------------
-- | Proximity | --
-- | Proximity | --
...
@@ -73,7 +75,7 @@ weightedLogJaccard sens docs cooc cooc' ngrams ngrams'
...
@@ -73,7 +75,7 @@ weightedLogJaccard sens docs cooc cooc' ngrams ngrams'
-- | To choose a proximity function
-- | To choose a proximity function
pickProximity
::
Proximity
->
Double
->
Cooc
->
Cooc
->
[
Int
]
->
[
Int
]
->
Double
pickProximity
::
Proximity
->
Double
->
Cooc
->
Cooc
->
[
Int
]
->
[
Int
]
->
Double
pickProximity
proximity
docs
cooc
cooc'
ngrams
ngrams'
=
case
proximity
of
pickProximity
proximity
docs
cooc
cooc'
ngrams
ngrams'
=
case
proximity
of
WeightedLogJaccard
sens
->
weightedLogJaccard
sens
docs
cooc
cooc'
ngrams
ngrams'
WeightedLogJaccard
sens
_
_
->
weightedLogJaccard
sens
docs
cooc
cooc'
ngrams
ngrams'
Hamming
->
undefined
Hamming
->
undefined
...
@@ -96,23 +98,20 @@ toProximity docs proximity group target target' =
...
@@ -96,23 +98,20 @@ toProximity docs proximity group target target' =
-- | Find pairs of valuable candidates to be matched
-- | Find pairs of valuable candidates to be matched
makePairs
::
[
PhyloGroup
]
->
[
PhyloPeriodId
]
->
Double
->
Map
Date
Double
->
Proximity
->
PhyloGroup
->
[(
PhyloGroup
,
PhyloGroup
)]
makePairs
::
[
PhyloGroup
]
->
[
PhyloPeriodId
]
->
Map
Date
Double
->
PhyloGroup
->
[(
PhyloGroup
,
PhyloGroup
)]
makePairs
candidates
periods
thr
docs
proximity
group
=
case
null
periods
of
makePairs
candidates
periods
docs
group
=
case
null
periods
of
True
->
[]
True
->
[]
-- | at least on of the pair candidates should be from the last added period
-- | at least on of the pair candidates should be from the last added period
False
->
filter
(
\
(
cdt
,
cdt'
)
->
(
inLastPeriod
cdt
periods
)
False
->
filter
(
\
(
cdt
,
cdt'
)
->
(
inLastPeriod
cdt
periods
)
||
(
inLastPeriod
cdt'
periods
))
||
(
inLastPeriod
cdt'
periods
))
$
listToKeys
$
listToKeys
candidates
-- | remove poor candidates from previous periods
$
filter
(
\
cdt
->
(
inLastPeriod
cdt
periods
)
||
((
toProximity
(
reframeDocs
docs
periods
)
proximity
group
group
cdt
)
>=
thr
))
candidates
where
where
inLastPeriod
::
PhyloGroup
->
[
PhyloPeriodId
]
->
Bool
inLastPeriod
::
PhyloGroup
->
[
PhyloPeriodId
]
->
Bool
inLastPeriod
g
prds
=
(
g
^.
phylo_groupPeriod
)
==
(
last'
"makePairs"
prds
)
inLastPeriod
g
prds
=
(
g
^.
phylo_groupPeriod
)
==
(
last'
"makePairs"
prds
)
phyloGroupMatching
::
[[
PhyloGroup
]]
->
Filiation
->
Double
->
Map
Date
Double
->
Proximity
->
PhyloGroup
->
PhyloGroup
phyloGroupMatching
::
[[
PhyloGroup
]]
->
Filiation
->
Proximity
->
Map
Date
Double
->
PhyloGroup
->
PhyloGroup
phyloGroupMatching
candidates
fil
thr
docs
proxi
group
=
case
pointers
of
phyloGroupMatching
candidates
fil
proxi
docs
group
=
case
pointers
of
Nothing
->
addPointers
group
fil
TemporalPointer
[]
Nothing
->
addPointers
group
fil
TemporalPointer
[]
Just
pts
->
addPointers
group
fil
TemporalPointer
Just
pts
->
addPointers
group
fil
TemporalPointer
$
head'
"phyloGroupMatching"
$
head'
"phyloGroupMatching"
...
@@ -126,18 +125,55 @@ phyloGroupMatching candidates fil thr docs proxi group = case pointers of
...
@@ -126,18 +125,55 @@ phyloGroupMatching candidates fil thr docs proxi group = case pointers of
-- | for each time frame, process the proximity on relevant pairs of targeted groups
-- | for each time frame, process the proximity on relevant pairs of targeted groups
$
scanl
(
\
acc
groups
->
$
scanl
(
\
acc
groups
->
let
periods
=
nub
$
map
(
\
g'
->
g'
^.
phylo_groupPeriod
)
$
concat
groups
let
periods
=
nub
$
map
(
\
g'
->
g'
^.
phylo_groupPeriod
)
$
concat
groups
pairs
=
makePairs
(
concat
groups
)
periods
thr
docs
proxi
group
pairs
=
makePairs
(
concat
groups
)
periods
docs
group
in
acc
++
(
filter
(
\
(
_
,
proximity
)
->
proximity
>=
thr
)
in
acc
++
(
concat
$
concat
$
map
(
\
(
c
,
c'
)
->
$
map
(
\
(
c
,
c'
)
->
-- | process the proximity between the current group and a pair of candidates
-- | process the proximity between the current group and a pair of candidates
let
proximity
=
toProximity
(
reframe
Docs
docs
periods
)
proxi
group
c
c'
let
proximity
=
toProximity
(
filter
Docs
docs
periods
)
proxi
group
c
c'
in
if
(
c
==
c'
)
in
if
(
c
==
c'
)
then
[(
getGroupId
c
,
proximity
)]
then
[(
getGroupId
c
,
proximity
)]
else
[(
getGroupId
c
,
proximity
),(
getGroupId
c'
,
proximity
)]
)
pairs
)
else
[(
getGroupId
c
,
proximity
),(
getGroupId
c'
,
proximity
)]
)
pairs
)
)
[]
)
[]
-- | groups from [[1900],[1900,1901],[1900,1901,1902],...]
-- | groups from [[1900],[1900,1901],[1900,1901,1902],...]
$
inits
candidates
$
inits
candidates
--------------------------------------
filterDocs
::
Map
Date
Double
->
[
PhyloPeriodId
]
->
Map
Date
Double
filterDocs
d
pds
=
restrictKeys
d
$
periodsToYears
pds
------------------
-- | Pointers | --
------------------
-- ghostHunter :: [[PhyloGroup]] -> [[PhyloGroup]]
-- ghostHunter branches =
-- map (\branch ->
-- -- | il manque une référence au group source de chaque pointer
-- let pointers = elems $ fromList
-- $ map (\pt -> (groupIds ! (fst pt),pt))
-- $ filter (\pt -> member (fst pt) groupIds) $ concat $ map (\g -> g ^. phylo_groupGhostPointers) branch
-- in undefined
-- ) branches
-- where
-- groupIds :: Map PhyloGroupId Int
-- groupIds = fromList $ map (\g -> (getGroupId g, last' "ghostHunter" $ snd $ g ^. phylo_groupBranchId)) $ concat branches
-- --------------------------------------
-- selectBest :: [Pointers] -> [Pointers]
-- se
filterPointers
::
Double
->
[
PhyloGroup
]
->
[
PhyloGroup
]
filterPointers
thr
groups
=
map
(
\
group
->
let
ghosts
=
filter
(
\
(
_
,
w
)
->
w
<
thr
)
$
group
^.
phylo_groupPeriodParents
in
group
&
phylo_groupPeriodParents
%~
(
filter
(
\
(
_
,
w
)
->
w
>=
thr
))
&
phylo_groupPeriodChilds
%~
(
filter
(
\
(
_
,
w
)
->
w
>=
thr
))
&
phylo_groupGhostPointers
%~
(
++
ghosts
)
)
groups
-----------------------------
-----------------------------
...
@@ -174,14 +210,26 @@ toBranchQuality :: [[PhyloGroup]] -> [(Double,[PhyloGroup])]
...
@@ -174,14 +210,26 @@ toBranchQuality :: [[PhyloGroup]] -> [(Double,[PhyloGroup])]
toBranchQuality
branches
=
undefined
toBranchQuality
branches
=
undefined
reframeDocs
::
Map
Date
Double
->
[
PhyloPeriodId
]
->
Map
Date
Double
groupsToBranches
::
Map
PhyloGroupId
PhyloGroup
->
[[
PhyloGroup
]]
reframeDocs
docs
periods
=
restrictKeys
docs
$
periodsToYears
periods
groupsToBranches
groups
=
-- | run the related component algorithm
let
graph
=
zip
[
1
..
]
$
relatedComponents
$
map
(
\
group
->
[
getGroupId
group
]
++
(
map
fst
$
group
^.
phylo_groupPeriodParents
)
++
(
map
fst
$
group
^.
phylo_groupPeriodChilds
)
)
$
elems
groups
-- | update each group's branch id
in
map
(
\
(
bId
,
ids
)
->
map
(
\
group
->
group
&
phylo_groupBranchId
%~
(
\
(
lvl
,
lst
)
->
(
lvl
,
lst
++
[
bId
])))
$
elems
$
restrictKeys
groups
(
Set
.
fromList
ids
)
)
graph
-- findGhostLinks :: [Link] -> [[Link]] -> Map PhyloGroupId
-- findGhostLinks :: [Link] -> [[Link]] -> Map PhyloGroupId
adaptativeMatching
::
Int
->
Double
->
Double
->
Double
->
Map
Date
Double
->
Proximity
->
[
PhyloGroup
]
->
[
PhyloGroup
]
->
[
PhyloPeriodId
]
->
[
PhyloGroup
]
adaptativeMatching
::
Proximity
->
Double
->
Double
->
[
PhyloGroup
]
->
[
PhyloGroup
]
adaptativeMatching
maxTime
thrStep
thrMatch
thrQua
docs
proximity
groups
candidates
period
s
=
adaptativeMatching
proximity
thr
thrQua
group
s
=
-- | check if we should break some of the new branches or not
-- | check if we should break some of the new branches or not
case
shouldBreak
thrQua
branches'
of
case
shouldBreak
thrQua
branches'
of
True
->
concat
$
map
(
\
(
s
,
b
)
->
True
->
concat
$
map
(
\
(
s
,
b
)
->
...
@@ -190,12 +238,7 @@ adaptativeMatching maxTime thrStep thrMatch thrQua docs proximity groups candida
...
@@ -190,12 +238,7 @@ adaptativeMatching maxTime thrStep thrMatch thrQua docs proximity groups candida
then
b
then
b
-- | we break the branch using an increased temporal matching threshold
-- | we break the branch using an increased temporal matching threshold
else
let
nextGroups
=
undefined
else
let
nextGroups
=
undefined
nextCandidates
=
undefined
in
adaptativeMatching
proximity
(
thr
+
(
getThresholdStep
proximity
))
thrQua
nextGroups
nextPeriods
=
undefined
in
adaptativeMatching
maxTime
thrStep
(
thrMatch
+
thrStep
)
thrQua
(
reframeDocs
docs
nextPeriods
)
proximity
nextGroups
nextCandidates
nextPeriods
)
branches'
)
branches'
-- | the quality of all the new branches is sufficient
-- | the quality of all the new branches is sufficient
False
->
concat
branches
False
->
concat
branches
...
@@ -205,25 +248,41 @@ adaptativeMatching maxTime thrStep thrMatch thrQua docs proximity groups candida
...
@@ -205,25 +248,41 @@ adaptativeMatching maxTime thrStep thrMatch thrQua docs proximity groups candida
branches'
=
toBranchQuality
branches
branches'
=
toBranchQuality
branches
-- | 2) group the new groups into branches
-- | 2) group the new groups into branches
branches
::
[[
PhyloGroup
]]
branches
::
[[
PhyloGroup
]]
branches
=
relatedComponents
groups'
branches
=
groupsToBranches
$
fromList
$
map
(
\
group
->
(
getGroupId
group
,
group
))
groups'
-- | 1)
connect each group to its parents and childs
-- | 1)
filter the pointers of each groups regarding the current state of the quality threshold
groups'
::
[
PhyloGroup
]
groups'
::
[
PhyloGroup
]
groups'
=
map
(
\
group
->
groups'
=
filterPointers
thr
groups
let
childs
=
getCandidates
ToChilds
group
(
getNextPeriods
ToChilds
maxTime
(
group
^.
phylo_groupPeriod
)
periods
)
candidates
parents
=
getCandidates
ToParents
group
(
getNextPeriods
ToParents
maxTime
(
group
^.
phylo_groupPeriod
)
periods
)
candidates
-- | match the group to its possible childs then parents
in
phyloGroupMatching
parents
ToParents
thrMatch
docs
proximity
$
phyloGroupMatching
childs
ToChilds
thrMatch
docs
proximity
group
)
groups
temporalMatching
::
Phylo
->
Phylo
temporalMatching
::
Phylo
->
Phylo
temporalMatching
phylo
=
temporalMatching
phylo
=
updatePhyloGroups
1
branches
phylo
let
branches
=
fromList
$
map
(
\
g
->
(
getGroupId
g
,
g
))
where
$
adaptativeMatching
(
maxTimeMatch
$
getConfig
phylo
)
0
0
0
-- | 4) find the ghost links and postprocess the branches
(
phylo
^.
phylo_timeDocs
)
branches'
::
Map
PhyloGroupId
PhyloGroup
(
phyloProximity
$
getConfig
phylo
)
branches'
=
undefined
(
getGroupsFromLevel
1
phylo
)
(
getGroupsFromLevel
1
phylo
)
(
getPeriodIds
phylo
)
-- | 3) run the adaptative matching to find the best repartition among branches
in
updatePhyloGroups
1
branches
phylo
branches
::
Map
PhyloGroupId
PhyloGroup
\ No newline at end of file
branches
=
fromList
$
map
(
\
g
->
(
getGroupId
g
,
g
))
$
adaptativeMatching
proximity
(
getThresholdInit
proximity
)
(
phyloQuality
$
getConfig
phylo
)
groups'
-- | 2) for each group process an initial temporal Matching
groups'
::
[
PhyloGroup
]
groups'
=
let
maxTime
=
getTimeFrame
$
timeUnit
$
getConfig
phylo
periods
=
getPeriodIds
phylo
docs
=
phylo
^.
phylo_timeDocs
--------------------------------------
in
map
(
\
group
->
let
childs
=
getCandidates
ToChilds
group
(
getNextPeriods
ToChilds
maxTime
(
group
^.
phylo_groupPeriod
)
periods
)
groups
parents
=
getCandidates
ToParents
group
(
getNextPeriods
ToParents
maxTime
(
group
^.
phylo_groupPeriod
)
periods
)
groups
in
phyloGroupMatching
parents
ToParents
proximity
docs
$
phyloGroupMatching
childs
ToChilds
proximity
docs
group
)
groups
-- | 1) start with all the groups from a given level
groups
::
[
PhyloGroup
]
groups
=
getGroupsFromLevel
1
phylo
--------------------------------------
proximity
::
Proximity
proximity
=
phyloProximity
$
getConfig
phylo
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment