Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
145
Issues
145
List
Board
Labels
Milestones
Merge Requests
6
Merge Requests
6
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
92b4221b
Commit
92b4221b
authored
Aug 29, 2019
by
qlobbe
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add branching
parent
d3097207
Pipeline
#548
failed with stage
Changes
7
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
232 additions
and
94 deletions
+232
-94
Main.hs
bin/gargantext-adaptative-phylo/Main.hs
+8
-8
AdaptativePhylo.hs
src/Gargantext/Viz/AdaptativePhylo.hs
+45
-21
PhyloExample.hs
src/Gargantext/Viz/Phylo/PhyloExample.hs
+5
-6
PhyloMaker.hs
src/Gargantext/Viz/Phylo/PhyloMaker.hs
+12
-13
PhyloTools.hs
src/Gargantext/Viz/Phylo/PhyloTools.hs
+50
-1
SynchronicClustering.hs
src/Gargantext/Viz/Phylo/SynchronicClustering.hs
+10
-2
TemporalMatching.hs
src/Gargantext/Viz/Phylo/TemporalMatching.hs
+102
-43
No files found.
bin/gargantext-adaptative-phylo/Main.hs
View file @
92b4221b
...
...
@@ -128,16 +128,16 @@ csvToCorpus limit path = Vector.toList
-- | To use the correct parser given a CorpusType
fileToCorpus
::
CorpusParser
->
Int
->
FilePath
->
IO
([(
Int
,
Text
)])
fileToCorpus
parser
limit
path
=
case
parser
of
Wos
->
wosToCorpus
limit
path
Csv
->
csvToCorpus
limit
path
fileToCorpus
::
CorpusParser
->
FilePath
->
IO
([(
Int
,
Text
)])
fileToCorpus
parser
path
=
case
parser
of
Wos
limit
->
wosToCorpus
limit
path
Csv
limit
->
csvToCorpus
limit
path
-- | To parse a file into a list of Document
fileToDocs
::
CorpusParser
->
Int
->
FilePath
->
TermList
->
IO
[
Document
]
fileToDocs
parser
limit
path
lst
=
do
corpus
<-
fileToCorpus
parser
limit
path
fileToDocs
::
CorpusParser
->
FilePath
->
TermList
->
IO
[
Document
]
fileToDocs
parser
path
lst
=
do
corpus
<-
fileToCorpus
parser
path
let
patterns
=
buildPatterns
lst
pure
$
map
(
(
\
(
y
,
t
)
->
Document
y
t
)
.
filterTerms
patterns
)
corpus
...
...
@@ -162,7 +162,7 @@ main = do
printIOMsg
"Parse the corpus"
mapList
<-
csvGraphTermList
(
listPath
config
)
corpus
<-
fileToDocs
(
corpusParser
config
)
(
corpus
Limit
config
)
(
corpus
Path
config
)
mapList
corpus
<-
fileToDocs
(
corpusParser
config
)
(
corpusPath
config
)
mapList
printIOComment
(
show
(
length
corpus
)
<>
" parsed docs from the corpus"
)
printIOMsg
"Reconstruct the Phylo"
...
...
src/Gargantext/Viz/AdaptativePhylo.hs
View file @
92b4221b
...
...
@@ -50,46 +50,63 @@ import Control.Lens (makeLenses)
----------------
data
CorpusParser
=
Wos
|
Csv
deriving
(
Show
,
Generic
,
Eq
)
data
CorpusParser
=
Wos
{
_wos_limit
::
Int
}
|
Csv
{
_csv_limit
::
Int
}
deriving
(
Show
,
Generic
,
Eq
)
data
Proximity
=
WeightedLogJaccard
{
_wlj_sensibility
::
Double
,
_wlj_thresholdInit
::
Double
,
_wlj_thresholdStep
::
Double
}
|
Hamming
deriving
(
Show
,
Generic
,
Eq
)
data
TimeUnit
=
Year
{
_year_period
::
Int
,
_year_step
::
Int
,
_year_matchingFrame
::
Int
}
deriving
(
Show
,
Generic
,
Eq
)
data
ContextualUnit
=
Fis
{
_fis_support
::
Int
,
_fis_size
::
Int
}
deriving
(
Show
,
Generic
,
Eq
)
data
Proximity
=
WeightedLogJaccard
{
_sensibility
::
Double
}
|
Hamming
deriving
(
Show
,
Generic
,
Eq
)
data
Config
=
Config
{
corpusPath
::
FilePath
,
listPath
::
FilePath
,
outputPath
::
FilePath
,
corpusParser
::
CorpusParser
,
corpusLimit
::
Int
,
phyloName
::
Text
,
phyloLevel
::
Int
,
phyloQuality
::
Double
,
phyloProximity
::
Proximity
,
timeUnit
::
Int
,
maxTimeMatch
::
Int
,
timePeriod
::
Int
,
timeStep
::
Int
,
fisSupport
::
Int
,
fisSize
::
Int
,
timeUnit
::
TimeUnit
,
contextualUnit
::
ContextualUnit
,
branchSize
::
Int
}
deriving
(
Show
,
Generic
,
Eq
)
defaultConfig
::
Config
defaultConfig
=
Config
{
corpusPath
=
""
,
listPath
=
""
,
outputPath
=
""
,
corpusParser
=
Csv
,
corpusLimit
=
1000
,
corpusParser
=
Csv
1000
,
phyloName
=
pack
"Default Phylo"
,
phyloLevel
=
2
,
phyloProximity
=
WeightedLogJaccard
10
,
timeUnit
=
1
,
maxTimeMatch
=
5
,
timePeriod
=
3
,
timeStep
=
1
,
fisSupport
=
2
,
fisSize
=
4
,
phyloQuality
=
0.5
,
phyloProximity
=
WeightedLogJaccard
10
0
0.05
,
timeUnit
=
Year
3
1
5
,
contextualUnit
=
Fis
2
4
,
branchSize
=
3
}
...
...
@@ -99,6 +116,10 @@ instance FromJSON CorpusParser
instance
ToJSON
CorpusParser
instance
FromJSON
Proximity
instance
ToJSON
Proximity
instance
FromJSON
TimeUnit
instance
ToJSON
TimeUnit
instance
FromJSON
ContextualUnit
instance
ToJSON
ContextualUnit
-- | Software parameters
...
...
@@ -237,7 +258,7 @@ data PhyloGroup =
,
_phylo_groupLevelChilds
::
[
Pointer
]
,
_phylo_groupPeriodParents
::
[
Pointer
]
,
_phylo_groupPeriodChilds
::
[
Pointer
]
,
_phylo_group
BreakPointer
::
Maybe
Pointer
,
_phylo_group
GhostPointers
::
[
Pointer
]
}
deriving
(
Generic
,
Show
,
Eq
)
...
...
@@ -276,6 +297,9 @@ data PhyloFis = PhyloFis
----------------
makeLenses
''
C
onfig
makeLenses
''
P
roximity
makeLenses
''
C
ontextualUnit
makeLenses
''
T
imeUnit
makeLenses
''
P
hyloFoundations
makeLenses
''
P
hyloFis
makeLenses
''
P
hylo
...
...
src/Gargantext/Viz/Phylo/PhyloExample.hs
View file @
92b4221b
...
...
@@ -49,7 +49,7 @@ phylo1 = appendGroups fisToGroup 1 phyloFis phyloBase
phyloFis
::
Map
(
Date
,
Date
)
[
PhyloFis
]
phyloFis
=
toPhyloFis
docsByPeriods
(
fisSupport
config
)
(
fisSize
config
)
phyloFis
=
toPhyloFis
docsByPeriods
(
getFisSupport
$
contextualUnit
config
)
(
getFisSize
$
contextualUnit
config
)
docsByPeriods
::
Map
(
Date
,
Date
)
[
Document
]
...
...
@@ -66,23 +66,22 @@ phyloBase = toPhyloBase docs mapList config
phyloCooc
::
Map
Date
Cooc
phyloCooc
=
docsTo
CoocByYear
docs
(
foundations
^.
foundations_roots
)
config
phyloCooc
=
docsTo
TimeScaleCooc
docs
(
foundations
^.
foundations_roots
)
config
periods
::
[(
Date
,
Date
)]
periods
=
toPeriods
(
sort
$
nub
$
map
date
docs
)
(
timePeriod
config
)
(
timeStep
config
)
periods
=
toPeriods
(
sort
$
nub
$
map
date
docs
)
(
getTimePeriod
$
timeUnit
config
)
(
getTimeStep
$
timeUnit
config
)
nbDocsByYear
::
Map
Date
Double
nbDocsByYear
=
nbDocsByTime
docs
(
timeUnit
config
)
nbDocsByYear
=
docsToTimeScaleNb
docs
config
::
Config
config
=
defaultConfig
{
phyloName
=
"Cesar et Cleopatre"
,
branchSize
=
0
,
fisSupport
=
0
,
fisSize
=
0
}
,
contextualUnit
=
Fis
0
0
}
docs
::
[
Document
]
...
...
src/Gargantext/Viz/Phylo/PhyloMaker.hs
View file @
92b4221b
...
...
@@ -87,8 +87,7 @@ fisToGroup fis pId lvl idx fdt coocs =
ngrams
(
ngramsToCooc
ngrams
coocs
)
(
1
,
[]
)
[]
[]
[]
[]
Nothing
[]
[]
[]
[]
[]
toPhylo1
::
[
Document
]
->
Phylo
->
Phylo
...
...
@@ -96,7 +95,7 @@ toPhylo1 docs phyloBase = appendGroups fisToGroup 1 phyloFis phyloBase
where
--------------------------------------
phyloFis
::
Map
(
Date
,
Date
)
[
PhyloFis
]
phyloFis
=
toPhyloFis
docs'
(
fisSupport
$
getConfig
phyloBase
)
(
fisSize
$
getConfig
phyloBase
)
phyloFis
=
toPhyloFis
docs'
(
getFisSupport
$
contextualUnit
$
getConfig
phyloBase
)
(
getFisSize
$
contextualUnit
$
getConfig
phyloBase
)
--------------------------------------
docs'
::
Map
(
Date
,
Date
)
[
Document
]
docs'
=
groupDocsByPeriod
date
(
getPeriodIds
phyloBase
)
docs
...
...
@@ -174,14 +173,14 @@ ngramsToCooc ngrams coocs =
-- | To transform the docs into a time map of coocurency matrix
docsTo
CoocByYear
::
[
Document
]
->
Vector
Ngrams
->
Config
->
Map
Date
Cooc
docsTo
CoocByYear
docs
fdt
conf
=
docsTo
TimeScaleCooc
::
[
Document
]
->
Vector
Ngrams
->
Config
->
Map
Date
Cooc
docsTo
TimeScaleCooc
docs
fdt
conf
=
let
mCooc
=
fromListWith
sumCooc
$
map
(
\
(
_d
,
l
)
->
(
_d
,
listToMatrix
l
))
$
map
(
\
doc
->
(
date
doc
,
sort
$
ngramsToIdx
(
text
doc
)
fdt
))
docs
mCooc'
=
fromList
$
map
(
\
t
->
(
t
,
empty
))
$
toTimeScale
(
map
date
docs
)
(
timeUnit
conf
)
$
toTimeScale
(
map
date
docs
)
1
in
trace
(
"
\n
"
<>
"-- | Build the coocurency matrix for "
<>
show
(
length
$
keys
mCooc'
)
<>
" unit of time"
<>
"
\n
"
)
$
unionWith
sumCooc
mCooc
mCooc'
...
...
@@ -208,11 +207,11 @@ groupDocsByPeriod f pds es =
--------------------------------------
-- | To count the number of docs by unit of time
(like a year)
nbDocsByTime
::
[
Document
]
->
Int
->
Map
Date
Double
nbDocsByTime
docs
step
=
-- | To count the number of docs by unit of time
docsToTimeScaleNb
::
[
Document
]
->
Map
Date
Double
docsToTimeScaleNb
docs
=
let
docs'
=
fromListWith
(
+
)
$
map
(
\
d
->
(
date
d
,
1
))
docs
time
=
fromList
$
map
(
\
t
->
(
t
,
0
))
$
toTimeScale
(
keys
docs'
)
step
time
=
fromList
$
map
(
\
t
->
(
t
,
0
))
$
toTimeScale
(
keys
docs'
)
1
in
trace
(
"
\n
"
<>
"-- | Group "
<>
show
(
length
docs
)
<>
" docs by "
<>
show
(
length
time
)
<>
" unit of time"
<>
"
\n
"
)
$
unionWith
(
+
)
time
docs'
...
...
@@ -227,10 +226,10 @@ toPhyloBase :: [Document] -> TermList -> Config -> Phylo
toPhyloBase
docs
lst
conf
=
let
foundations
=
PhyloFoundations
(
Vector
.
fromList
$
nub
$
concat
$
map
text
docs
)
lst
params
=
defaultPhyloParam
{
_phyloParam_config
=
conf
}
periods
=
toPeriods
(
sort
$
nub
$
map
date
docs
)
(
timePeriod
conf
)
(
timeStep
conf
)
periods
=
toPeriods
(
sort
$
nub
$
map
date
docs
)
(
getTimePeriod
$
timeUnit
conf
)
(
getTimeStep
$
timeUnit
conf
)
in
trace
(
"
\n
"
<>
"-- | Create PhyloBase out of "
<>
show
(
length
docs
)
<>
" docs
\n
"
)
$
Phylo
foundations
(
docsTo
CoocByYear
docs
(
foundations
^.
foundations_roots
)
conf
)
(
nbDocsByTime
docs
$
timeUnit
conf
)
(
docsTo
TimeScaleCooc
docs
(
foundations
^.
foundations_roots
)
conf
)
(
docsToTimeScaleNb
docs
)
params
(
fromList
$
map
(
\
prd
->
(
prd
,
PhyloPeriod
prd
(
initPhyloLevels
(
phyloLevel
conf
)
prd
)))
periods
)
src/Gargantext/Viz/Phylo/PhyloTools.hs
View file @
92b4221b
...
...
@@ -91,6 +91,18 @@ toTimeScale dates step =
in
[
start
,
(
start
+
step
)
..
end
]
getTimeStep
::
TimeUnit
->
Int
getTimeStep
time
=
case
time
of
Year
_
s
_
->
s
getTimePeriod
::
TimeUnit
->
Int
getTimePeriod
time
=
case
time
of
Year
p
_
_
->
p
getTimeFrame
::
TimeUnit
->
Int
getTimeFrame
time
=
case
time
of
Year
_
_
f
->
f
-------------
-- | Fis | --
-------------
...
...
@@ -136,6 +148,22 @@ traceFis msg mFis = trace ( "\n" <> "-- | " <> msg <> " : " <> show (sum $ map l
<>
"Clique : "
<>
(
traceClique
mFis
)
<>
"
\n
"
)
mFis
-------------------------
-- | Contextual unit | --
-------------------------
getFisSupport
::
ContextualUnit
->
Int
getFisSupport
unit
=
case
unit
of
Fis
s
_
->
s
_
->
panic
(
"[ERR][Viz.Phylo.PhyloTools.getFisSupport] Only Fis has a support"
)
getFisSize
::
ContextualUnit
->
Int
getFisSize
unit
=
case
unit
of
Fis
_
s
->
s
_
->
panic
(
"[ERR][Viz.Phylo.PhyloTools.getFisSupport] Only Fis has a clique size"
)
--------------
-- | Cooc | --
--------------
...
...
@@ -223,10 +251,31 @@ updatePhyloGroups lvl m phylo =
-- | Pointers | --
------------------
pointersToLinks
::
PhyloGroupId
->
[
Pointer
]
->
[
Link
]
pointersToLinks
id
pointers
=
map
(
\
p
->
((
id
,
fst
p
),
snd
p
))
pointers
mergeLinks
::
[
Link
]
->
[
Link
]
->
[
Link
]
mergeLinks
toChilds
toParents
=
let
toChilds'
=
fromList
$
map
(
\
((
from
,
to
),
w
)
->
((
to
,
from
),
w
))
toChilds
in
toList
$
unionWith
max
(
fromList
toParents
)
toChilds'
\ No newline at end of file
in
toList
$
unionWith
max
(
fromList
toParents
)
toChilds'
-------------------
-- | Proximity | --
-------------------
getSensibility
::
Proximity
->
Double
getSensibility
proxi
=
case
proxi
of
WeightedLogJaccard
s
_
_
->
s
Hamming
->
undefined
getThresholdInit
::
Proximity
->
Double
getThresholdInit
proxi
=
case
proxi
of
WeightedLogJaccard
_
t
_
->
t
Hamming
->
undefined
getThresholdStep
::
Proximity
->
Double
getThresholdStep
proxi
=
case
proxi
of
WeightedLogJaccard
_
_
s
->
s
Hamming
->
undefined
\ No newline at end of file
src/Gargantext/Viz/Phylo/SynchronicClustering.hs
View file @
92b4221b
...
...
@@ -19,11 +19,19 @@ import Gargantext.Prelude
import
Gargantext.Viz.AdaptativePhylo
import
Gargantext.Viz.Phylo.PhyloTools
import
Data.List
(
foldl'
,
(
++
),
null
,
intersect
,
(
\\
),
union
,
nub
,
concat
)
--------------------
-- | Clustering | --
--------------------
relatedComponents
::
[
PhyloGroup
]
->
[[
PhyloGroup
]]
relatedComponents
groups
=
undefined
\ No newline at end of file
relatedComponents
::
Eq
a
=>
[[
a
]]
->
[[
a
]]
relatedComponents
graphs
=
foldl'
(
\
mem
groups
->
if
(
null
mem
)
then
mem
++
[
groups
]
else
let
related
=
filter
(
\
groups'
->
(
not
.
null
)
$
intersect
groups
groups'
)
mem
in
if
(
null
related
)
then
mem
++
[
groups
]
else
(
mem
\\
related
)
++
[
union
groups
(
nub
$
concat
related
)]
)
[]
graphs
\ No newline at end of file
src/Gargantext/Viz/Phylo/TemporalMatching.hs
View file @
92b4221b
...
...
@@ -16,7 +16,7 @@ Portability : POSIX
module
Gargantext.Viz.Phylo.TemporalMatching
where
import
Data.List
(
concat
,
splitAt
,
tail
,
sortOn
,
(
++
),
intersect
,
null
,
inits
,
find
,
groupBy
,
scanl
,
any
,
nub
,
union
)
import
Data.Map
(
Map
,
fromList
,
toList
,
fromListWith
,
filterWithKey
,
elems
,
restrictKeys
,
unionWith
,
intersectionWith
)
import
Data.Map
(
Map
,
fromList
,
toList
,
fromListWith
,
filterWithKey
,
elems
,
restrictKeys
,
unionWith
,
intersectionWith
,
member
,
(
!
)
)
import
Gargantext.Prelude
import
Gargantext.Viz.AdaptativePhylo
...
...
@@ -25,6 +25,8 @@ import Gargantext.Viz.Phylo.SynchronicClustering
import
Control.Lens
hiding
(
Level
)
import
qualified
Data.Set
as
Set
-------------------
-- | Proximity | --
...
...
@@ -73,8 +75,8 @@ weightedLogJaccard sens docs cooc cooc' ngrams ngrams'
-- | To choose a proximity function
pickProximity
::
Proximity
->
Double
->
Cooc
->
Cooc
->
[
Int
]
->
[
Int
]
->
Double
pickProximity
proximity
docs
cooc
cooc'
ngrams
ngrams'
=
case
proximity
of
WeightedLogJaccard
sens
->
weightedLogJaccard
sens
docs
cooc
cooc'
ngrams
ngrams'
Hamming
->
undefined
WeightedLogJaccard
sens
_
_
->
weightedLogJaccard
sens
docs
cooc
cooc'
ngrams
ngrams'
Hamming
->
undefined
-- | To process the proximity between a current group and a pair of targets group
...
...
@@ -96,23 +98,20 @@ toProximity docs proximity group target target' =
-- | Find pairs of valuable candidates to be matched
makePairs
::
[
PhyloGroup
]
->
[
PhyloPeriodId
]
->
Double
->
Map
Date
Double
->
Proximity
->
PhyloGroup
->
[(
PhyloGroup
,
PhyloGroup
)]
makePairs
candidates
periods
thr
docs
proximity
group
=
case
null
periods
of
makePairs
::
[
PhyloGroup
]
->
[
PhyloPeriodId
]
->
Map
Date
Double
->
PhyloGroup
->
[(
PhyloGroup
,
PhyloGroup
)]
makePairs
candidates
periods
docs
group
=
case
null
periods
of
True
->
[]
-- | at least on of the pair candidates should be from the last added period
False
->
filter
(
\
(
cdt
,
cdt'
)
->
(
inLastPeriod
cdt
periods
)
||
(
inLastPeriod
cdt'
periods
))
$
listToKeys
-- | remove poor candidates from previous periods
$
filter
(
\
cdt
->
(
inLastPeriod
cdt
periods
)
||
((
toProximity
(
reframeDocs
docs
periods
)
proximity
group
group
cdt
)
>=
thr
))
candidates
$
listToKeys
candidates
where
inLastPeriod
::
PhyloGroup
->
[
PhyloPeriodId
]
->
Bool
inLastPeriod
g
prds
=
(
g
^.
phylo_groupPeriod
)
==
(
last'
"makePairs"
prds
)
phyloGroupMatching
::
[[
PhyloGroup
]]
->
Filiation
->
Double
->
Map
Date
Double
->
Proximity
->
PhyloGroup
->
PhyloGroup
phyloGroupMatching
candidates
fil
thr
docs
proxi
group
=
case
pointers
of
phyloGroupMatching
::
[[
PhyloGroup
]]
->
Filiation
->
Proximity
->
Map
Date
Double
->
PhyloGroup
->
PhyloGroup
phyloGroupMatching
candidates
fil
proxi
docs
group
=
case
pointers
of
Nothing
->
addPointers
group
fil
TemporalPointer
[]
Just
pts
->
addPointers
group
fil
TemporalPointer
$
head'
"phyloGroupMatching"
...
...
@@ -126,18 +125,55 @@ phyloGroupMatching candidates fil thr docs proxi group = case pointers of
-- | for each time frame, process the proximity on relevant pairs of targeted groups
$
scanl
(
\
acc
groups
->
let
periods
=
nub
$
map
(
\
g'
->
g'
^.
phylo_groupPeriod
)
$
concat
groups
pairs
=
makePairs
(
concat
groups
)
periods
thr
docs
proxi
group
in
acc
++
(
filter
(
\
(
_
,
proximity
)
->
proximity
>=
thr
)
$
concat
pairs
=
makePairs
(
concat
groups
)
periods
docs
group
in
acc
++
(
concat
$
map
(
\
(
c
,
c'
)
->
-- | process the proximity between the current group and a pair of candidates
let
proximity
=
toProximity
(
reframe
Docs
docs
periods
)
proxi
group
c
c'
let
proximity
=
toProximity
(
filter
Docs
docs
periods
)
proxi
group
c
c'
in
if
(
c
==
c'
)
then
[(
getGroupId
c
,
proximity
)]
else
[(
getGroupId
c
,
proximity
),(
getGroupId
c'
,
proximity
)]
)
pairs
)
)
[]
-- | groups from [[1900],[1900,1901],[1900,1901,1902],...]
$
inits
candidates
--------------------------------------
filterDocs
::
Map
Date
Double
->
[
PhyloPeriodId
]
->
Map
Date
Double
filterDocs
d
pds
=
restrictKeys
d
$
periodsToYears
pds
------------------
-- | Pointers | --
------------------
-- ghostHunter :: [[PhyloGroup]] -> [[PhyloGroup]]
-- ghostHunter branches =
-- map (\branch ->
-- -- | il manque une référence au group source de chaque pointer
-- let pointers = elems $ fromList
-- $ map (\pt -> (groupIds ! (fst pt),pt))
-- $ filter (\pt -> member (fst pt) groupIds) $ concat $ map (\g -> g ^. phylo_groupGhostPointers) branch
-- in undefined
-- ) branches
-- where
-- groupIds :: Map PhyloGroupId Int
-- groupIds = fromList $ map (\g -> (getGroupId g, last' "ghostHunter" $ snd $ g ^. phylo_groupBranchId)) $ concat branches
-- --------------------------------------
-- selectBest :: [Pointers] -> [Pointers]
-- se
filterPointers
::
Double
->
[
PhyloGroup
]
->
[
PhyloGroup
]
filterPointers
thr
groups
=
map
(
\
group
->
let
ghosts
=
filter
(
\
(
_
,
w
)
->
w
<
thr
)
$
group
^.
phylo_groupPeriodParents
in
group
&
phylo_groupPeriodParents
%~
(
filter
(
\
(
_
,
w
)
->
w
>=
thr
))
&
phylo_groupPeriodChilds
%~
(
filter
(
\
(
_
,
w
)
->
w
>=
thr
))
&
phylo_groupGhostPointers
%~
(
++
ghosts
)
)
groups
-----------------------------
...
...
@@ -174,14 +210,26 @@ toBranchQuality :: [[PhyloGroup]] -> [(Double,[PhyloGroup])]
toBranchQuality
branches
=
undefined
reframeDocs
::
Map
Date
Double
->
[
PhyloPeriodId
]
->
Map
Date
Double
reframeDocs
docs
periods
=
restrictKeys
docs
$
periodsToYears
periods
groupsToBranches
::
Map
PhyloGroupId
PhyloGroup
->
[[
PhyloGroup
]]
groupsToBranches
groups
=
-- | run the related component algorithm
let
graph
=
zip
[
1
..
]
$
relatedComponents
$
map
(
\
group
->
[
getGroupId
group
]
++
(
map
fst
$
group
^.
phylo_groupPeriodParents
)
++
(
map
fst
$
group
^.
phylo_groupPeriodChilds
)
)
$
elems
groups
-- | update each group's branch id
in
map
(
\
(
bId
,
ids
)
->
map
(
\
group
->
group
&
phylo_groupBranchId
%~
(
\
(
lvl
,
lst
)
->
(
lvl
,
lst
++
[
bId
])))
$
elems
$
restrictKeys
groups
(
Set
.
fromList
ids
)
)
graph
-- findGhostLinks :: [Link] -> [[Link]] -> Map PhyloGroupId
adaptativeMatching
::
Int
->
Double
->
Double
->
Double
->
Map
Date
Double
->
Proximity
->
[
PhyloGroup
]
->
[
PhyloGroup
]
->
[
PhyloPeriodId
]
->
[
PhyloGroup
]
adaptativeMatching
maxTime
thrStep
thrMatch
thrQua
docs
proximity
groups
candidates
period
s
=
adaptativeMatching
::
Proximity
->
Double
->
Double
->
[
PhyloGroup
]
->
[
PhyloGroup
]
adaptativeMatching
proximity
thr
thrQua
group
s
=
-- | check if we should break some of the new branches or not
case
shouldBreak
thrQua
branches'
of
True
->
concat
$
map
(
\
(
s
,
b
)
->
...
...
@@ -190,12 +238,7 @@ adaptativeMatching maxTime thrStep thrMatch thrQua docs proximity groups candida
then
b
-- | we break the branch using an increased temporal matching threshold
else
let
nextGroups
=
undefined
nextCandidates
=
undefined
nextPeriods
=
undefined
in
adaptativeMatching
maxTime
thrStep
(
thrMatch
+
thrStep
)
thrQua
(
reframeDocs
docs
nextPeriods
)
proximity
nextGroups
nextCandidates
nextPeriods
in
adaptativeMatching
proximity
(
thr
+
(
getThresholdStep
proximity
))
thrQua
nextGroups
)
branches'
-- | the quality of all the new branches is sufficient
False
->
concat
branches
...
...
@@ -205,25 +248,41 @@ adaptativeMatching maxTime thrStep thrMatch thrQua docs proximity groups candida
branches'
=
toBranchQuality
branches
-- | 2) group the new groups into branches
branches
::
[[
PhyloGroup
]]
branches
=
relatedComponents
groups'
-- | 1)
connect each group to its parents and childs
branches
=
groupsToBranches
$
fromList
$
map
(
\
group
->
(
getGroupId
group
,
group
))
groups'
-- | 1)
filter the pointers of each groups regarding the current state of the quality threshold
groups'
::
[
PhyloGroup
]
groups'
=
map
(
\
group
->
let
childs
=
getCandidates
ToChilds
group
(
getNextPeriods
ToChilds
maxTime
(
group
^.
phylo_groupPeriod
)
periods
)
candidates
parents
=
getCandidates
ToParents
group
(
getNextPeriods
ToParents
maxTime
(
group
^.
phylo_groupPeriod
)
periods
)
candidates
-- | match the group to its possible childs then parents
in
phyloGroupMatching
parents
ToParents
thrMatch
docs
proximity
$
phyloGroupMatching
childs
ToChilds
thrMatch
docs
proximity
group
)
groups
groups'
=
filterPointers
thr
groups
temporalMatching
::
Phylo
->
Phylo
temporalMatching
phylo
=
let
branches
=
fromList
$
map
(
\
g
->
(
getGroupId
g
,
g
))
$
adaptativeMatching
(
maxTimeMatch
$
getConfig
phylo
)
0
0
0
(
phylo
^.
phylo_timeDocs
)
(
phyloProximity
$
getConfig
phylo
)
(
getGroupsFromLevel
1
phylo
)
(
getGroupsFromLevel
1
phylo
)
(
getPeriodIds
phylo
)
in
updatePhyloGroups
1
branches
phylo
\ No newline at end of file
temporalMatching
phylo
=
updatePhyloGroups
1
branches
phylo
where
-- | 4) find the ghost links and postprocess the branches
branches'
::
Map
PhyloGroupId
PhyloGroup
branches'
=
undefined
-- | 3) run the adaptative matching to find the best repartition among branches
branches
::
Map
PhyloGroupId
PhyloGroup
branches
=
fromList
$
map
(
\
g
->
(
getGroupId
g
,
g
))
$
adaptativeMatching
proximity
(
getThresholdInit
proximity
)
(
phyloQuality
$
getConfig
phylo
)
groups'
-- | 2) for each group process an initial temporal Matching
groups'
::
[
PhyloGroup
]
groups'
=
let
maxTime
=
getTimeFrame
$
timeUnit
$
getConfig
phylo
periods
=
getPeriodIds
phylo
docs
=
phylo
^.
phylo_timeDocs
--------------------------------------
in
map
(
\
group
->
let
childs
=
getCandidates
ToChilds
group
(
getNextPeriods
ToChilds
maxTime
(
group
^.
phylo_groupPeriod
)
periods
)
groups
parents
=
getCandidates
ToParents
group
(
getNextPeriods
ToParents
maxTime
(
group
^.
phylo_groupPeriod
)
periods
)
groups
in
phyloGroupMatching
parents
ToParents
proximity
docs
$
phyloGroupMatching
childs
ToChilds
proximity
docs
group
)
groups
-- | 1) start with all the groups from a given level
groups
::
[
PhyloGroup
]
groups
=
getGroupsFromLevel
1
phylo
--------------------------------------
proximity
::
Proximity
proximity
=
phyloProximity
$
getConfig
phylo
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment