Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
141
Issues
141
List
Board
Labels
Milestones
Merge Requests
5
Merge Requests
5
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
63e3a6fd
Commit
63e3a6fd
authored
Jul 01, 2020
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[FIX] Haddock documentation ok
parent
43fe729b
Pipeline
#921
failed with stage
Changes
12
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
12 changed files
with
105 additions
and
110 deletions
+105
-110
Types.hs
src/Gargantext/API/Admin/Orchestrator/Types.hs
+1
-1
Terms.hs
src/Gargantext/Text/Terms.hs
+1
-1
Eleve.hs
src/Gargantext/Text/Terms/Eleve.hs
+1
-1
AdaptativePhylo.hs
src/Gargantext/Viz/AdaptativePhylo.hs
+3
-1
Matrice.hs
src/Gargantext/Viz/Graph/Distances/Matrice.hs
+7
-4
LevelMaker.hs
src/Gargantext/Viz/Phylo/LevelMaker.hs
+4
-4
LinkMaker.hs
src/Gargantext/Viz/Phylo/LinkMaker.hs
+4
-6
Metrics.hs
src/Gargantext/Viz/Phylo/Metrics.hs
+3
-9
PhyloExport.hs
src/Gargantext/Viz/Phylo/PhyloExport.hs
+18
-20
PhyloMaker.hs
src/Gargantext/Viz/Phylo/PhyloMaker.hs
+14
-14
SynchronicClustering.hs
src/Gargantext/Viz/Phylo/SynchronicClustering.hs
+15
-15
TemporalMatching.hs
src/Gargantext/Viz/Phylo/TemporalMatching.hs
+34
-34
No files found.
src/Gargantext/API/Admin/Orchestrator/Types.hs
View file @
63e3a6fd
...
...
@@ -27,13 +27,13 @@ instance Arbitrary a => Arbitrary (JobOutput a) where
arbitrary
=
JobOutput
<$>
arbitrary
-- | Main Types
-- TODO IsidoreAuth
data
ExternalAPIs
=
All
|
PubMed
|
HAL
|
IsTex
|
Isidore
deriving
(
Show
,
Eq
,
Enum
,
Bounded
,
Generic
)
-- | IsidoreAuth
-- | Main Instances
...
...
src/Gargantext/Text/Terms.hs
View file @
63e3a6fd
...
...
@@ -175,6 +175,6 @@ newTries n t = buildTries n (fmap toToken $ uniText t)
uniText
::
Text
->
[[
Text
]]
uniText
=
map
(
List
.
filter
(
not
.
isPunctuation
))
.
map
tokenize
.
sentences
--
|
TODO get sentences according to lang
.
sentences
-- TODO get sentences according to lang
.
Text
.
toLower
src/Gargantext/Text/Terms/Eleve.hs
View file @
63e3a6fd
...
...
@@ -73,12 +73,12 @@ subst (src, dst) x | sim src x = dst
|
otherwise
=
x
------------------------------------------------------------------------
-- | TODO: Show Instance only used for debugging
type
Entropy
e
=
(
Fractional
e
,
Floating
e
,
P
.
RealFloat
e
,
Show
e
-- ^ TODO: only used for debugging
)
------------------------------------------------------------------------
-- | Example and tests for development
...
...
src/Gargantext/Viz/AdaptativePhylo.hs
View file @
63e3a6fd
...
...
@@ -65,11 +65,13 @@ data SeaElevation =
data
Proximity
=
WeightedLogJaccard
{
_wlj_sensibility
::
Double
{
_wlj_sensibility
::
Double
{-
-- , _wlj_thresholdInit :: Double
-- , _wlj_thresholdStep :: Double
-- | max height for sea level in temporal matching
-- , _wlj_elevation :: Double
-}
}
|
Hamming
deriving
(
Show
,
Generic
,
Eq
)
...
...
src/Gargantext/Viz/Graph/Distances/Matrice.hs
View file @
63e3a6fd
...
...
@@ -235,13 +235,16 @@ conditional' m = ( run $ ie $ map fromIntegral $ use m
-- \[N_{m} = \sum_{i,i \neq i}^{m} \sum_{j, j \neq j}^{m} S_{ij}\]
--
distributional
::
Matrix
Int
->
Matrix
Double
distributional
m
=
run
-- $ matMiniMax
distributional
m
=
run
{-
-- $ matMiniMax
-- $ ri
-- $ myMin
-- $ myMin
-}
$
filter'
0
$
s_mi
$
map
fromIntegral
-- ^ from Int to Double
$
use
m
-- ^ push matrix in Accelerate type
$
map
fromIntegral
{- from Int to Double -}
$
use
m
{- push matrix in Accelerate type -}
where
-- filter m = zipWith (\a b -> max a b) m (transpose m)
...
...
src/Gargantext/Viz/Phylo/LevelMaker.hs
View file @
63e3a6fd
...
...
@@ -207,7 +207,7 @@ toNthLevel lvlMax prox clus p
|
otherwise
=
toNthLevel
lvlMax
prox
clus
$
traceBranches
(
lvl
+
1
)
$
setPhyloBranches
(
lvl
+
1
)
-- $ transposePeriodLinks (lvl + 1)
--
\
$ transposePeriodLinks (lvl + 1)
$
traceTranspose
(
lvl
+
1
)
Descendant
$
transposeLinks
(
lvl
+
1
)
Descendant
$
traceTranspose
(
lvl
+
1
)
Ascendant
...
...
@@ -230,15 +230,15 @@ toNthLevel lvlMax prox clus p
toPhylo1
::
Cluster
->
Proximity
->
Map
(
Date
,
Date
)
[
Document
]
->
Phylo
->
Phylo
toPhylo1
clus
prox
d
p
=
case
clus
of
Fis
(
FisParams
k
s
t
)
->
traceBranches
1
-- $ reLinkPhyloBranches 1
-- $ traceBranches 1
--
\
$ reLinkPhyloBranches 1
--
\
$ traceBranches 1
$
setPhyloBranches
1
$
traceTempoMatching
Descendant
1
$
interTempoMatching
Descendant
1
prox
$
traceTempoMatching
Ascendant
1
$
interTempoMatching
Ascendant
1
prox
$
tracePhyloN
1
-- $ setLevelLinks (0,1)
--
\
$ setLevelLinks (0,1)
$
addPhyloLevel
1
(
getPhyloFis
phyloFis
)
$
trace
(
show
(
size
$
getPhyloFis
phyloFis
)
<>
" Fis created"
)
$
phyloFis
where
...
...
src/Gargantext/Viz/Phylo/LinkMaker.hs
View file @
63e3a6fd
...
...
@@ -126,15 +126,15 @@ phyloGroupMatching :: [PhyloPeriodId] -> PhyloGroup -> Phylo -> [Pointer]
phyloGroupMatching
periods
g
p
=
case
pointers
of
Nothing
->
[]
Just
pts
->
head'
"phyloGroupMatching"
--
|
Keep only the best set of pointers grouped by proximity
-- Keep only the best set of pointers grouped by proximity
$
groupBy
(
\
pt
pt'
->
snd
pt
==
snd
pt'
)
$
reverse
$
sortOn
snd
pts
--
|
Find the first time frame where at leats one pointer satisfies the proximity threshold
-- Find the first time frame where at leats one pointer satisfies the proximity threshold
where
--------------------------------------
pointers
::
Maybe
[
Pointer
]
pointers
=
find
(
not
.
null
)
--
|
For each time frame, process the Proximity on relevant pairs of targeted groups
-- For each time frame, process the Proximity on relevant pairs of targeted groups
$
scanl
(
\
acc
frame
->
let
pairs
=
makePairs
frame
g
p
in
acc
++
(
filter
(
\
(
_
,
proxi
)
->
filterProximity
proxi
(
getPhyloProximity
p
))
...
...
@@ -145,7 +145,7 @@ phyloGroupMatching periods g p = case pointers of
if
(
t
==
t'
)
then
[(
getGroupId
t
,
proxi
)]
else
[(
getGroupId
t
,
proxi
),(
getGroupId
t'
,
proxi
)]
)
pairs
)
)
[]
--
|
[[1900],[1900,1901],[1900,1901,1902],...] | length max => + 5 years
-- [[1900],[1900,1901],[1900,1901,1902],...] | length max => + 5 years
$
inits
periods
--------------------------------------
...
...
@@ -218,8 +218,6 @@ interTempoMatching fil lvl _ p = updateGroups fil lvl (Map.fromList pointers) p
------------------------------------------------------------------------
-- | Make links from Period to Period after level 1
listToTuple
::
(
a
->
b
)
->
[
a
]
->
[(
b
,
a
)]
listToTuple
f
l
=
map
(
\
x
->
(
f
x
,
x
))
l
...
...
src/Gargantext/Viz/Phylo/Metrics.hs
View file @
63e3a6fd
...
...
@@ -90,13 +90,13 @@ findDynamics n pv pn m =
bid
=
fromJust
$
(
pn
^.
pn_bid
)
end
=
last'
"dynamics"
(
sort
$
map
snd
$
elems
m
)
in
if
(((
snd
prd
)
==
(
snd
$
m
!
n
))
&&
(
snd
prd
/=
end
))
--
|
emergence
-- emergence
then
2
else
if
((
fst
prd
)
==
(
fst
$
m
!
n
))
--
|
recombination
-- recombination
then
0
else
if
(
not
$
sharedWithParents
(
fst
prd
)
bid
n
pv
)
--
|
decrease
-- decrease
then
1
else
3
...
...
@@ -175,9 +175,3 @@ hamming f1 f2 = fromIntegral $ max ((size inter) - (size f1)) ((size inter) - (s
inter
::
Map
(
Int
,
Int
)
Double
inter
=
intersection
f1
f2
--------------------------------------
src/Gargantext/Viz/Phylo/PhyloExport.hs
View file @
63e3a6fd
...
...
@@ -171,12 +171,12 @@ exportToDot phylo export =
<>
"##########################"
)
$
digraph
((
Str
.
fromStrict
)
$
(
phyloName
$
getConfig
phylo
))
$
do
-- | 1) init the dot graph
{- 1) init the dot graph -}
graphAttrs
(
[
Label
(
toDotLabel
$
(
phyloName
$
getConfig
phylo
))]
<>
[
FontSize
30
,
LabelLoc
VTop
,
NodeSep
1
,
RankSep
[
1
],
Rank
SameRank
,
Splines
SplineEdges
,
Overlap
ScaleOverlaps
,
Ratio
FillRatio
,
Style
[
SItem
Filled
[]
],
Color
[
toWColor
White
]]
-- | home made attributes
{-- home made attributes -}
<>
[(
toAttr
(
fromStrict
"phyloFoundations"
)
$
pack
$
show
(
length
$
Vector
.
toList
$
getRoots
phylo
))
,(
toAttr
(
fromStrict
"phyloTerms"
)
$
pack
$
show
(
length
$
nub
$
concat
$
map
(
\
g
->
g
^.
phylo_groupNgrams
)
$
export
^.
export_groups
))
,(
toAttr
(
fromStrict
"phyloDocs"
)
$
pack
$
show
(
sum
$
elems
$
phylo
^.
phylo_timeDocs
))
...
...
@@ -185,36 +185,36 @@ exportToDot phylo export =
,(
toAttr
(
fromStrict
"phyloGroups"
)
$
pack
$
show
(
length
$
export
^.
export_groups
))
])
{-
-- toAttr (fromStrict k) $ (pack . unwords) $ map show v
--
| 2) create a layer for the branches labels
--
2) create a layer for the branches labels -}
subgraph
(
Str
"Branches peaks"
)
$
do
graphAttrs
[
Rank
SameRank
]
--
|
3) group the branches by hierarchy
{-
-- 3) group the branches by hierarchy
-- mapM (\branches ->
-- subgraph (Str "Branches clade") $ do
-- graphAttrs [Rank SameRank]
-- --
|
4) create a node for each branch
-- -- 4) create a node for each branch
-- mapM branchToDotNode branches
-- ) $ elems $ fromListWith (++) $ map (\b -> ((init . snd) $ b ^. branch_id,[b])) $ export ^. export_branches
-}
mapM
(
\
b
->
branchToDotNode
b
(
fromJust
$
elemIndex
b
(
export
^.
export_branches
)))
$
export
^.
export_branches
-- | 5) create a layer for each period
{-- 5) create a layer for each period -}
_
<-
mapM
(
\
period
->
subgraph
((
Str
.
fromStrict
.
Text
.
pack
)
$
(
"Period"
<>
show
(
fst
period
)
<>
show
(
snd
period
)))
$
do
graphAttrs
[
Rank
SameRank
]
periodToDotNode
period
-- | 6) create a node for each group
{-- 6) create a node for each group -}
mapM
(
\
g
->
groupToDotNode
(
getRoots
phylo
)
g
(
toBid
g
(
export
^.
export_branches
)))
(
filter
(
\
g
->
g
^.
phylo_groupPeriod
==
period
)
$
export
^.
export_groups
)
)
$
getPeriodIds
phylo
-- | 7) create the edges between a branch and its first groups
{-- 7) create the edges between a branch and its first groups -}
_
<-
mapM
(
\
(
bId
,
groups
)
->
mapM
(
\
g
->
toDotEdge
(
branchIdToDotId
bId
)
(
groupIdToDotId
$
getGroupId
g
)
""
BranchToGroup
)
groups
)
...
...
@@ -224,31 +224,29 @@ exportToDot phylo export =
$
sortOn
(
fst
.
_phylo_groupPeriod
)
groups
)
$
fromListWith
(
++
)
$
map
(
\
g
->
(
g
^.
phylo_groupBranchId
,[
g
]))
$
export
^.
export_groups
-- | 8) create the edges between the groups
{- 8) create the edges between the groups -}
_
<-
mapM
(
\
((
k
,
k'
),
_
)
->
toDotEdge
(
groupIdToDotId
k
)
(
groupIdToDotId
k'
)
""
GroupToGroup
)
$
(
toList
.
mergePointers
)
$
export
^.
export_groups
-- | 7) create the edges between the periods
{- 7) create the edges between the periods -}
_
<-
mapM
(
\
(
prd
,
prd'
)
->
toDotEdge
(
periodIdToDotId
prd
)
(
periodIdToDotId
prd'
)
""
PeriodToPeriod
)
$
nubBy
(
\
combi
combi'
->
fst
combi
==
fst
combi'
)
$
listToCombi'
$
getPeriodIds
phylo
-- |
8) create the edges between the branches
{-
8) create the edges between the branches
-- _ <- mapM (\(bId,bId') ->
-- toDotEdge (branchIdToDotId bId) (branchIdToDotId bId')
-- (Text.pack $ show(branchIdsToProximity bId bId'
-- (getThresholdInit $ phyloProximity $ getConfig phylo)
-- (getThresholdStep $ phyloProximity $ getConfig phylo))) BranchToBranch
-- ) $ nubBy (\combi combi' -> fst combi == fst combi') $ listToCombi' $ map _branch_id $ export ^. export_branches
-}
graphAttrs
[
Rank
SameRank
]
----------------
-- | Filter | --
----------------
...
...
@@ -439,13 +437,13 @@ toDynamics n parents g m =
let
prd
=
g
^.
phylo_groupPeriod
end
=
last'
"dynamics"
(
sort
$
map
snd
$
elems
m
)
in
if
(((
snd
prd
)
==
(
snd
$
m
!
n
))
&&
(
snd
prd
/=
end
))
-- | decrease
{- decrease -}
then
2
else
if
((
fst
prd
)
==
(
fst
$
m
!
n
))
-- | recombination
{- recombination -}
then
0
else
if
isNew
-- | emergence
{- emergence -}
then
1
else
3
where
...
...
src/Gargantext/Viz/Phylo/PhyloMaker.hs
View file @
63e3a6fd
...
...
@@ -115,7 +115,7 @@ cliqueToGroup fis pId lvl idx fdt coocs =
(
fis
^.
phyloClique_support
)
ngrams
(
ngramsToCooc
ngrams
coocs
)
(
1
,[
0
])
--
|
branchid (lvl,[path in the branching tree])
(
1
,[
0
])
-- branchid (lvl,[path in the branching tree])
(
fromList
[(
"breaks"
,[
0
]),(
"seaLevels"
,[
0
])])
[]
[]
[]
[]
...
...
@@ -142,24 +142,24 @@ toPhylo1 docs phyloBase = case (getSeaElevation phyloBase) of
---------------------------
--
|
To apply a filter with the possibility of keeping some periods non empty (keep : True|False)
-- To apply a filter with the possibility of keeping some periods non empty (keep : True|False)
filterClique
::
Bool
->
Int
->
(
Int
->
[
PhyloClique
]
->
[
PhyloClique
])
->
Map
(
Date
,
Date
)
[
PhyloClique
]
->
Map
(
Date
,
Date
)
[
PhyloClique
]
filterClique
keep
thr
f
m
=
case
keep
of
False
->
map
(
\
l
->
f
thr
l
)
m
True
->
map
(
\
l
->
keepFilled
(
f
)
thr
l
)
m
--
|
To filter Fis with small Support
-- To filter Fis with small Support
filterCliqueBySupport
::
Int
->
[
PhyloClique
]
->
[
PhyloClique
]
filterCliqueBySupport
thr
l
=
filter
(
\
clq
->
(
clq
^.
phyloClique_support
)
>=
thr
)
l
--
|
To filter Fis with small Clique size
-- To filter Fis with small Clique size
filterCliqueBySize
::
Int
->
[
PhyloClique
]
->
[
PhyloClique
]
filterCliqueBySize
thr
l
=
filter
(
\
clq
->
(
size
$
clq
^.
phyloClique_nodes
)
>=
thr
)
l
--
|
To filter nested Fis
-- To filter nested Fis
filterCliqueByNested
::
Map
(
Date
,
Date
)
[
PhyloClique
]
->
Map
(
Date
,
Date
)
[
PhyloClique
]
filterCliqueByNested
m
=
let
clq
=
map
(
\
l
->
...
...
@@ -173,16 +173,16 @@ filterCliqueByNested m =
in
fromList
$
zip
(
keys
m
)
clq'
--
|
To transform a time map of docs innto a time map of Fis with some filters
-- To transform a time map of docs innto a time map of Fis with some filters
toPhyloClique
::
Phylo
->
Map
(
Date
,
Date
)
[
Document
]
->
Map
(
Date
,
Date
)
[
PhyloClique
]
toPhyloClique
phylo
phyloDocs
=
case
(
clique
$
getConfig
phylo
)
of
Fis
s
s'
->
-- traceFis "Filtered Fis"
filterCliqueByNested
-- $ traceFis "Filtered by clique size"
{- \$ traceFis "Filtered by clique size" -}
$
filterClique
True
s'
(
filterCliqueBySize
)
-- $ traceFis "Filtered by support"
{- \$ traceFis "Filtered by support" -}
$
filterClique
True
s
(
filterCliqueBySupport
)
-- $ traceFis "Unfiltered Fis"
{- \$ traceFis "Unfiltered Fis" -}
phyloClique
MaxClique
_
->
undefined
where
...
...
@@ -204,7 +204,7 @@ toPhyloClique phylo phyloDocs = case (clique $ getConfig phylo) of
--------------------
--
|
To transform the docs into a time map of coocurency matrix
-- To transform the docs into a time map of coocurency matrix
docsToTimeScaleCooc
::
[
Document
]
->
Vector
Ngrams
->
Map
Date
Cooc
docsToTimeScaleCooc
docs
fdt
=
let
mCooc
=
fromListWith
sumCooc
...
...
@@ -221,7 +221,7 @@ docsToTimeScaleCooc docs fdt =
-- | to Phylo Base | --
-----------------------
--
|
To group a list of Documents by fixed periods
-- To group a list of Documents by fixed periods
groupDocsByPeriod'
::
(
NFData
doc
,
Ord
date
,
Enum
date
)
=>
(
doc
->
date
)
->
[(
date
,
date
)]
->
[
doc
]
->
Map
(
date
,
date
)
[
doc
]
groupDocsByPeriod'
f
pds
docs
=
let
docs'
=
groupBy
(
\
d
d'
->
f
d
==
f
d'
)
$
sortOn
f
docs
...
...
@@ -237,7 +237,7 @@ groupDocsByPeriod' f pds docs =
--
|
To group a list of Documents by fixed periods
-- To group a list of Documents by fixed periods
groupDocsByPeriod
::
(
NFData
doc
,
Ord
date
,
Enum
date
)
=>
(
doc
->
date
)
->
[(
date
,
date
)]
->
[
doc
]
->
Map
(
date
,
date
)
[
doc
]
groupDocsByPeriod
_
_
[]
=
panic
"[ERR][Viz.Phylo.PhyloMaker] Empty [Documents] can not have any periods"
groupDocsByPeriod
f
pds
es
=
...
...
@@ -265,7 +265,7 @@ docsToTermFreq docs fdt =
in
map
(
/
sumFreqs
)
freqs
--
|
To count the number of docs by unit of time
-- To count the number of docs by unit of time
docsToTimeScaleNb
::
[
Document
]
->
Map
Date
Double
docsToTimeScaleNb
docs
=
let
docs'
=
fromListWith
(
+
)
$
map
(
\
d
->
(
date
d
,
1
))
docs
...
...
@@ -279,7 +279,7 @@ initPhyloLevels lvlMax pId =
fromList
$
map
(
\
lvl
->
((
pId
,
lvl
),
PhyloLevel
pId
lvl
empty
))
[
1
..
lvlMax
]
--
|
To init the basic elements of a Phylo
-- To init the basic elements of a Phylo
toPhyloBase
::
[
Document
]
->
TermList
->
Config
->
Phylo
toPhyloBase
docs
lst
conf
=
let
foundations
=
PhyloFoundations
(
Vector
.
fromList
$
nub
$
concat
$
map
text
docs
)
lst
...
...
src/Gargantext/Viz/Phylo/SynchronicClustering.hs
View file @
63e3a6fd
...
...
@@ -36,13 +36,13 @@ import qualified Data.Set as Set
mergeBranchIds
::
[[
Int
]]
->
[
Int
]
mergeBranchIds
ids
=
(
head'
"mergeBranchIds"
.
sort
.
mostFreq'
)
ids
where
--
|
2) find the most Up Left ids in the hierarchy of similarity
-- 2) find the most Up Left ids in the hierarchy of similarity
-- mostUpLeft :: [[Int]] -> [[Int]]
-- mostUpLeft ids' =
-- let groupIds = (map (\gIds -> (length $ head' "gIds" gIds, head' "gIds" gIds)) . groupBy (\id id' -> length id == length id') . sortOn length) ids'
-- inf = (fst . minimum) groupIds
-- in map snd $ filter (\gIds -> fst gIds == inf) groupIds
--
|
1) find the most frequent ids
-- 1) find the most frequent ids
mostFreq'
::
[[
Int
]]
->
[[
Int
]]
mostFreq'
ids'
=
let
groupIds
=
(
map
(
\
gIds
->
(
length
gIds
,
head'
"gIds"
gIds
))
.
group
.
sort
)
ids'
...
...
@@ -58,12 +58,12 @@ mergeMeta bId groups =
groupsToBranches'
::
Map
PhyloGroupId
PhyloGroup
->
[[
PhyloGroup
]]
groupsToBranches'
groups
=
--
|
run the related component algorithm
-- run the related component algorithm
let
egos
=
map
(
\
g
->
[
getGroupId
g
]
++
(
map
fst
$
g
^.
phylo_groupPeriodParents
)
++
(
map
fst
$
g
^.
phylo_groupPeriodChilds
)
)
$
elems
groups
graph
=
relatedComponents
egos
--
|
update each group's branch id
-- update each group's branch id
in
map
(
\
ids
->
let
groups'
=
elems
$
restrictKeys
groups
(
Set
.
fromList
ids
)
bId
=
mergeBranchIds
$
map
(
\
g
->
snd
$
g
^.
phylo_groupBranchId
)
groups'
...
...
@@ -103,26 +103,26 @@ toNextLevel' phylo groups =
newGroups
=
concat
$
groupsToBranches'
$
fromList
$
map
(
\
g
->
(
getGroupId
g
,
g
))
$
foldlWithKey
(
\
acc
id
groups'
->
--
|
4) create the parent group
-- 4) create the parent group
let
parent
=
mergeGroups
(
elems
$
restrictKeys
(
phylo
^.
phylo_timeCooc
)
$
periodsToYears
[(
fst
.
fst
)
id
])
id
oldGroups
groups'
in
acc
++
[
parent
])
[]
--
|
3) group the current groups by parentId
-- 3) group the current groups by parentId
$
fromListWith
(
++
)
$
map
(
\
g
->
(
getLevelParentId
g
,
[
g
]))
groups
newPeriods
=
fromListWith
(
++
)
$
map
(
\
g
->
(
g
^.
phylo_groupPeriod
,
[
g
]))
newGroups
in
traceSynchronyEnd
$
over
(
phylo_periods
.
traverse
.
phylo_periodLevels
.
traverse
--
|
6) update each period at curLvl + 1
-- 6) update each period at curLvl + 1
.
filtered
(
\
phyloLvl
->
phyloLvl
^.
phylo_levelLevel
==
(
curLvl
+
1
)))
--
|
7) by adding the parents
-- 7) by adding the parents
(
\
phyloLvl
->
if
member
(
phyloLvl
^.
phylo_levelPeriod
)
newPeriods
then
phyloLvl
&
phylo_levelGroups
.~
fromList
(
map
(
\
g
->
(
getGroupId
g
,
g
))
$
newPeriods
!
(
phyloLvl
^.
phylo_levelPeriod
))
else
phyloLvl
)
--
|
2) add the curLvl + 1 phyloLevel to the phylo
-- 2) add the curLvl + 1 phyloLevel to the phylo
$
addPhyloLevel
(
curLvl
+
1
)
--
|
1) update the current groups (with level parent pointers) in the phylo
-- 1) update the current groups (with level parent pointers) in the phylo
$
updatePhyloGroups
curLvl
(
fromList
$
map
(
\
g
->
(
getGroupId
g
,
g
))
groups
)
phylo
--------------------
...
...
@@ -187,19 +187,19 @@ toParentId child = ((child ^. phylo_groupPeriod, child ^. phylo_groupLevel + 1),
reduceGroups
::
Proximity
->
Synchrony
->
Map
Date
Double
->
Map
Date
Cooc
->
[
PhyloGroup
]
->
[
PhyloGroup
]
reduceGroups
prox
sync
docs
diagos
branch
=
--
|
1) reduce a branch as a set of periods & groups
-- 1) reduce a branch as a set of periods & groups
let
periods
=
fromListWith
(
++
)
$
map
(
\
g
->
(
g
^.
phylo_groupPeriod
,[
g
]))
branch
in
(
concat
.
concat
.
elems
)
$
mapWithKey
(
\
prd
groups
->
--
|
2) for each period, transform the groups as a proximity graph filtered by a threshold
-- 2) for each period, transform the groups as a proximity graph filtered by a threshold
let
diago
=
reduceDiagos
$
filterDiago
diagos
[
prd
]
edges
=
groupsToEdges
prox
sync
((
sum
.
elems
)
$
restrictKeys
docs
$
periodsToYears
[
prd
])
diago
groups
in
map
(
\
comp
->
--
|
4) add to each groups their futur level parent group
-- 4) add to each groups their futur level parent group
let
parentId
=
toParentId
(
head'
"parentId"
comp
)
in
map
(
\
g
->
g
&
phylo_groupLevelParents
%~
(
++
[(
parentId
,
1
)])
)
comp
)
--
|
3) reduce the graph a a set of related components
-- 3) reduce the graph a a set of related components
$
toRelatedComponents
groups
edges
)
periods
...
...
@@ -251,4 +251,4 @@ synchronicClustering phylo =
-- <> "\n"
-- ) "" edges
-- ) "" $ elems $ groupByField _phylo_groupPeriod branch)
-- ) "period\tdistance\n" $ elems $ groupByField _phylo_groupBranchId $ getGroupsFromLevel lvl phylo
\ No newline at end of file
-- ) "period\tdistance\n" $ elems $ groupByField _phylo_groupBranchId $ getGroupsFromLevel lvl phylo
src/Gargantext/Viz/Phylo/TemporalMatching.hs
View file @
63e3a6fd
...
...
@@ -117,7 +117,7 @@ makePairs' (egoId, egoNgrams) candidates periods oldPointers fil thr prox docs d
if
(
null
periods
)
then
[]
else
removeOldPointers
oldPointers
fil
thr
prox
lastPrd
-- | at least on of the pair candidates should be from the last added period
{- at least on of the pair candidates should be from the last added period -}
$
filter
(
\
((
id
,
_
),(
id'
,
_
))
->
((
fst
.
fst
)
id
==
lastPrd
)
||
((
fst
.
fst
)
id'
==
lastPrd
))
$
listToKeys
$
filter
(
\
(
id
,
ngrams
)
->
...
...
@@ -143,36 +143,36 @@ phyloGroupMatching :: [[(PhyloGroupId,[Int])]] -> Filiation -> Proximity -> Map
->
Double
->
[
Pointer
]
->
(
PhyloGroupId
,[
Int
])
->
[
Pointer
]
phyloGroupMatching
candidates
fil
proxi
docs
diagos
thr
oldPointers
(
id
,
ngrams
)
=
if
(
null
$
filterPointers
proxi
thr
oldPointers
)
-- | let's find new pointers
{- let's find new pointers -}
then
if
null
nextPointers
then
[]
else
head'
"phyloGroupMatching"
-- | Keep only the best set of pointers grouped by proximity
{- Keep only the best set of pointers grouped by proximity -}
$
groupBy
(
\
pt
pt'
->
snd
pt
==
snd
pt'
)
$
reverse
$
sortOn
snd
$
head'
"pointers"
nextPointers
-- | Find the first time frame where at leats one pointer satisfies the proximity threshold
{- Find the first time frame where at leats one pointer satisfies the proximity threshold -}
else
oldPointers
where
nextPointers
::
[[
Pointer
]]
nextPointers
=
take
1
$
dropWhile
(
null
)
-- | for each time frame, process the proximity on relevant pairs of targeted groups
{- for each time frame, process the proximity on relevant pairs of targeted groups -}
$
scanl
(
\
acc
groups
->
let
periods
=
nub
$
map
(
fst
.
fst
.
fst
)
$
concat
groups
nbdocs
=
sum
$
elems
$
(
filterDocs
docs
([(
fst
.
fst
)
id
]
++
periods
))
diago
=
reduceDiagos
$
filterDiago
diagos
([(
fst
.
fst
)
id
]
++
periods
)
-- | important resize nbdocs et diago dans le make pairs
{- important resize nbdocs et diago dans le make pairs -}
pairs
=
makePairs'
(
id
,
ngrams
)
(
concat
groups
)
periods
oldPointers
fil
thr
proxi
docs
diagos
in
acc
++
(
filterPointers
proxi
thr
$
concat
$
map
(
\
(
c
,
c'
)
->
-- | process the proximity between the current group and a pair of candidates
{- process the proximity between the current group and a pair of candidates -}
let
proximity
=
toProximity
nbdocs
diago
proxi
ngrams
(
snd
c
)
(
snd
c'
)
in
if
(
c
==
c'
)
then
[(
fst
c
,
proximity
)]
else
[(
fst
c
,
proximity
),(
fst
c'
,
proximity
)]
)
pairs
))
[]
$
inits
candidates
-- | groups from [[1900],[1900,1901],[1900,1901,1902],...]
$
inits
candidates
{- groups from [[1900],[1900,1901],[1900,1901,1902],...] -}
filterDocs
::
Map
Date
Double
->
[
PhyloPeriodId
]
->
Map
Date
Double
...
...
@@ -205,19 +205,19 @@ matchGroupsToGroups :: Int -> [PhyloPeriodId] -> Proximity -> Double -> Map Date
matchGroupsToGroups
frame
periods
proximity
thr
docs
coocs
groups
=
let
groups'
=
groupByField
_phylo_groupPeriod
groups
in
foldl'
(
\
acc
prd
->
let
--
|
1) find the parents/childs matching periods
let
-- 1) find the parents/childs matching periods
periodsPar
=
getNextPeriods
ToParents
frame
prd
periods
periodsChi
=
getNextPeriods
ToChilds
frame
prd
periods
--
|
2) find the parents/childs matching candidates
-- 2) find the parents/childs matching candidates
candidatesPar
=
map
(
\
prd'
->
map
(
\
g
->
(
getGroupId
g
,
g
^.
phylo_groupNgrams
))
$
findWithDefault
[]
prd'
groups'
)
periodsPar
candidatesChi
=
map
(
\
prd'
->
map
(
\
g
->
(
getGroupId
g
,
g
^.
phylo_groupNgrams
))
$
findWithDefault
[]
prd'
groups'
)
periodsChi
--
|
3) find the parents/child number of docs by years
-- 3) find the parents/child number of docs by years
docsPar
=
filterDocs
docs
([
prd
]
++
periodsPar
)
docsChi
=
filterDocs
docs
([
prd
]
++
periodsChi
)
--
|
4) find the parents/child diago by years
-- 4) find the parents/child diago by years
diagoPar
=
filterDiago
(
map
coocToDiago
coocs
)
([
prd
]
++
periodsPar
)
diagoChi
=
filterDiago
(
map
coocToDiago
coocs
)
([
prd
]
++
periodsPar
)
--
|
5) match in parallel all the groups (egos) to their possible candidates
-- 5) match in parallel all the groups (egos) to their possible candidates
egos
=
map
(
\
ego
->
let
pointersPar
=
phyloGroupMatching
(
getCandidates
ego
candidatesPar
)
ToParents
proximity
docsPar
diagoPar
thr
(
getPeriodPointers
ToParents
ego
)
(
getGroupId
ego
,
ego
^.
phylo_groupNgrams
)
...
...
@@ -272,19 +272,19 @@ toPhyloQuality' beta freq branches =
groupsToBranches
::
Map
PhyloGroupId
PhyloGroup
->
[[
PhyloGroup
]]
groupsToBranches
groups
=
--
|
run the related component algorithm
-- run the related component algorithm
let
egos
=
groupBy
(
\
gs
gs'
->
(
fst
$
fst
$
head'
"egos"
gs
)
==
(
fst
$
fst
$
head'
"egos"
gs'
))
$
sortOn
(
\
gs
->
fst
$
fst
$
head'
"egos"
gs
)
$
map
(
\
group
->
[
getGroupId
group
]
++
(
map
fst
$
group
^.
phylo_groupPeriodParents
)
++
(
map
fst
$
group
^.
phylo_groupPeriodChilds
)
)
$
elems
groups
--
|
first find the related components by inside each ego's period
--
|
a supprimer
-- first find the related components by inside each ego's period
-- a supprimer
graph'
=
map
relatedComponents
egos
--
|
then run it for the all the periods
-- then run it for the all the periods
graph
=
zip
[
1
..
]
$
relatedComponents
$
concat
(
graph'
`
using
`
parList
rdeepseq
)
--
|
update each group's branch id
-- update each group's branch id
in
map
(
\
(
bId
,
ids
)
->
let
groups'
=
map
(
\
group
->
group
&
phylo_groupBranchId
%~
(
\
(
lvl
,
lst
)
->
(
lvl
,
lst
++
[
bId
])))
$
elems
$
restrictKeys
groups
(
Set
.
fromList
ids
)
...
...
@@ -300,14 +300,14 @@ updateThr thr branches = map (\b -> map (\g ->
g
&
phylo_groupMeta
.~
(
singleton
"seaLevels"
(((
g
^.
phylo_groupMeta
)
!
"seaLevels"
)
++
[
thr
])))
b
)
branches
--
|
Sequentially break each branch of a phylo where
-- Sequentially break each branch of a phylo where
-- done = all the allready broken branches
-- ego = the current branch we want to break
-- rest = the branches we still have to break
breakBranches
::
Proximity
->
Double
->
Map
Int
Double
->
Int
->
Double
->
Double
->
Double
->
Int
->
Map
Date
Double
->
Map
Date
Cooc
->
[
PhyloPeriodId
]
->
[([
PhyloGroup
],
Bool
)]
->
([
PhyloGroup
],
Bool
)
->
[([
PhyloGroup
],
Bool
)]
->
[([
PhyloGroup
],
Bool
)]
breakBranches
proximity
beta
frequency
minBranch
thr
depth
elevation
frame
docs
coocs
periods
done
ego
rest
=
--
|
1) keep or not the new division of ego
-- 1) keep or not the new division of ego
let
done'
=
done
++
(
if
snd
ego
then
(
if
((
null
(
fst
ego'
))
||
(
quality
>
quality'
))
...
...
@@ -325,7 +325,7 @@ breakBranches proximity beta frequency minBranch thr depth elevation frame docs
((
map
(
\
e
->
(
e
,
True
))
(
fst
ego'
))
++
(
map
(
\
e
->
(
e
,
False
))
(
snd
ego'
))))
else
[
ego
])
in
--
|
2) if there is no more branches in rest then return else continue
-- 2) if there is no more branches in rest then return else continue
if
null
rest
then
done'
else
breakBranches
proximity
beta
frequency
minBranch
thr
depth
elevation
frame
docs
coocs
periods
...
...
@@ -352,11 +352,11 @@ breakBranches proximity beta frequency minBranch thr depth elevation frame docs
seaLevelMatching
::
Proximity
->
Double
->
Int
->
Map
Int
Double
->
Double
->
Double
->
Double
->
Double
->
Int
->
[
PhyloPeriodId
]
->
Map
Date
Double
->
Map
Date
Cooc
->
[([
PhyloGroup
],
Bool
)]
->
[([
PhyloGroup
],
Bool
)]
seaLevelMatching
proximity
beta
minBranch
frequency
thr
step
depth
elevation
frame
periods
docs
coocs
branches
=
--
|
if there is no branch to break or if seaLvl level > 1 then end
-- if there is no branch to break or if seaLvl level > 1 then end
if
(
thr
>=
1
)
||
((
not
.
or
)
$
map
snd
branches
)
then
branches
else
--
|
break all the possible branches at the current seaLvl level
-- break all the possible branches at the current seaLvl level
let
branches'
=
breakBranches
proximity
beta
frequency
minBranch
thr
depth
elevation
frame
docs
coocs
periods
[]
(
head'
"seaLevelMatching"
branches
)
(
tail'
"seaLevelMatching"
branches
)
frequency'
=
reduceFrequency
frequency
(
map
fst
branches'
)
...
...
@@ -368,7 +368,7 @@ constanteTemporalMatching start step phylo = updatePhyloGroups 1
(
fromList
$
map
(
\
g
->
(
getGroupId
g
,
g
))
$
traceMatchEnd
$
concat
branches
)
phylo
where
--
|
2) process the temporal matching by elevating seaLvl level
-- 2) process the temporal matching by elevating seaLvl level
branches
::
[[
PhyloGroup
]]
branches
=
map
fst
$
seaLevelMatching
(
phyloProximity
$
getConfig
phylo
)
...
...
@@ -383,8 +383,8 @@ constanteTemporalMatching start step phylo = updatePhyloGroups 1
(
phylo
^.
phylo_timeDocs
)
(
phylo
^.
phylo_timeCooc
)
groups
--
|
1) for each group process an initial temporal Matching
--
|
here we suppose that all the groups of level 1 are part of the same big branch
-- 1) for each group process an initial temporal Matching
-- here we suppose that all the groups of level 1 are part of the same big branch
groups
::
[([
PhyloGroup
],
Bool
)]
groups
=
map
(
\
b
->
(
b
,(
length
$
nub
$
map
_phylo_groupPeriod
b
)
>=
(
_qua_minBranch
$
phyloQuality
$
getConfig
phylo
)))
$
groupsToBranches
$
fromList
$
map
(
\
g
->
(
getGroupId
g
,
g
))
...
...
@@ -441,7 +441,7 @@ adaptativeBreakBranches :: Proximity -> Double -> Double -> Map (PhyloGroupId,Ph
->
[
PhyloPeriodId
]
->
[([
PhyloGroup
],(
Bool
,[
Double
]))]
->
([
PhyloGroup
],(
Bool
,[
Double
]))
->
[([
PhyloGroup
],(
Bool
,[
Double
]))]
->
[([
PhyloGroup
],(
Bool
,[
Double
]))]
adaptativeBreakBranches
proxiConf
depth
elevation
groupsProxi
beta
frequency
minBranch
frame
docs
coocs
periods
done
ego
rest
=
--
|
1) keep or not the new division of ego
-- 1) keep or not the new division of ego
let
done'
=
done
++
(
if
(
fst
.
snd
)
ego
then
(
if
((
null
(
fst
ego'
))
||
(
quality
>
quality'
))
then
...
...
@@ -451,13 +451,13 @@ adaptativeBreakBranches proxiConf depth elevation groupsProxi beta frequency min
++
(
map
(
\
e
->
(
e
,(
False
,
((
snd
.
snd
)
ego
))))
(
snd
ego'
))))
else
[(
concat
$
thrToMeta
thr
$
[
fst
ego
],
snd
ego
)])
in
--
|
uncomment let .. in for debugging
-- uncomment let .. in for debugging
-- let part1 = partition (snd) done'
-- part2 = partition (snd) rest
-- in trace ( "[✓ " <> show(length $ fst part1) <> "(" <> show(length $ concat $ map (fst) $ fst part1) <> ")|✗ " <> show(length $ snd part1) <> "(" <> show(length $ concat $ map (fst) $ snd part1) <> ")] "
-- <> "[✓ " <> show(length $ fst part2) <> "(" <> show(length $ concat $ map (fst) $ fst part2) <> ")|✗ " <> show(length $ snd part2) <> "(" <> show(length $ concat $ map (fst) $ snd part2) <> ")]"
-- ) $
--
|
2) if there is no more branches in rest then return else continue
-- 2) if there is no more branches in rest then return else continue
if
null
rest
then
done'
else
adaptativeBreakBranches
proxiConf
depth
elevation
groupsProxi
beta
frequency
minBranch
frame
docs
coocs
periods
...
...
@@ -489,11 +489,11 @@ adaptativeSeaLevelMatching :: Proximity -> Double -> Double -> Map (PhyloGroupId
->
Int
->
[
PhyloPeriodId
]
->
Map
Date
Double
->
Map
Date
Cooc
->
[([
PhyloGroup
],(
Bool
,[
Double
]))]
->
[([
PhyloGroup
],(
Bool
,[
Double
]))]
adaptativeSeaLevelMatching
proxiConf
depth
elevation
groupsProxi
beta
minBranch
frequency
frame
periods
docs
coocs
branches
=
--
|
if there is no branch to break or if seaLvl level >= depth then end
-- if there is no branch to break or if seaLvl level >= depth then end
if
(
Map
.
null
groupsProxi
)
||
(
depth
<=
0
)
||
((
not
.
or
)
$
map
(
fst
.
snd
)
branches
)
then
branches
else
--
|
break all the possible branches at the current seaLvl level
-- break all the possible branches at the current seaLvl level
let
branches'
=
adaptativeBreakBranches
proxiConf
depth
elevation
groupsProxi
beta
frequency
minBranch
frame
docs
coocs
periods
[]
(
head'
"seaLevelMatching"
branches
)
(
tail'
"seaLevelMatching"
branches
)
frequency'
=
reduceFrequency
frequency
(
map
fst
branches'
)
...
...
@@ -511,7 +511,7 @@ adaptativeTemporalMatching elevation phylo = updatePhyloGroups 1
(
fromList
$
map
(
\
g
->
(
getGroupId
g
,
g
))
$
traceMatchEnd
$
concat
branches
)
phylo
where
--
|
2) process the temporal matching by elevating seaLvl level
-- 2) process the temporal matching by elevating seaLvl level
branches
::
[[
PhyloGroup
]]
branches
=
map
fst
$
adaptativeSeaLevelMatching
(
phyloProximity
$
getConfig
phylo
)
...
...
@@ -526,8 +526,8 @@ adaptativeTemporalMatching elevation phylo = updatePhyloGroups 1
(
phylo
^.
phylo_timeDocs
)
(
phylo
^.
phylo_timeCooc
)
groups
--
|
1) for each group process an initial temporal Matching
--
|
here we suppose that all the groups of level 1 are part of the same big branch
-- 1) for each group process an initial temporal Matching
-- here we suppose that all the groups of level 1 are part of the same big branch
groups
::
[([
PhyloGroup
],(
Bool
,[
Double
]))]
groups
=
map
(
\
b
->
(
b
,((
length
$
nub
$
map
_phylo_groupPeriod
b
)
>=
(
_qua_minBranch
$
phyloQuality
$
getConfig
phylo
),[
thr
])))
$
groupsToBranches
$
fromList
$
map
(
\
g
->
(
getGroupId
g
,
g
))
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment