Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
P
purescript-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
152
Issues
152
List
Board
Labels
Milestones
Merge Requests
1
Merge Requests
1
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
purescript-gargantext
Commits
0225091d
Commit
0225091d
authored
Nov 19, 2019
by
qlobbe
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
remove groups from time matching and work only on ngrams and ids
parent
f5393047
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
60 additions
and
50 deletions
+60
-50
PhyloTools.hs
src/Gargantext/Viz/Phylo/PhyloTools.hs
+5
-2
TemporalMatching.hs
src/Gargantext/Viz/Phylo/TemporalMatching.hs
+55
-48
No files found.
src/Gargantext/Viz/Phylo/PhyloTools.hs
View file @
0225091d
...
@@ -245,6 +245,9 @@ ngramsToCooc ngrams coocs =
...
@@ -245,6 +245,9 @@ ngramsToCooc ngrams coocs =
getGroupId
::
PhyloGroup
->
PhyloGroupId
getGroupId
::
PhyloGroup
->
PhyloGroupId
getGroupId
group
=
((
group
^.
phylo_groupPeriod
,
group
^.
phylo_groupLevel
),
group
^.
phylo_groupIndex
)
getGroupId
group
=
((
group
^.
phylo_groupPeriod
,
group
^.
phylo_groupLevel
),
group
^.
phylo_groupIndex
)
idToPrd
::
PhyloGroupId
->
PhyloPeriodId
idToPrd
id
=
(
fst
.
fst
)
id
getGroupThr
::
PhyloGroup
->
Double
getGroupThr
::
PhyloGroup
->
Double
getGroupThr
group
=
head'
"getGroupThr"
((
group
^.
phylo_groupMeta
)
!
"thr"
)
getGroupThr
group
=
head'
"getGroupThr"
((
group
^.
phylo_groupMeta
)
!
"thr"
)
...
@@ -286,8 +289,8 @@ getProximityStep proximity =
...
@@ -286,8 +289,8 @@ getProximityStep proximity =
-- | Phylo | --
-- | Phylo | --
---------------
---------------
addPointers
::
PhyloGroup
->
Filiation
->
PointerType
->
[
Pointer
]
->
PhyloGroup
addPointers
::
Filiation
->
PointerType
->
[
Pointer
]
->
PhyloGroup
->
PhyloGroup
addPointers
group
fil
pty
pointers
=
addPointers
fil
pty
pointers
group
=
case
pty
of
case
pty
of
TemporalPointer
->
case
fil
of
TemporalPointer
->
case
fil
of
ToChilds
->
group
&
phylo_groupPeriodChilds
.~
pointers
ToChilds
->
group
&
phylo_groupPeriodChilds
.~
pointers
...
...
src/Gargantext/Viz/Phylo/TemporalMatching.hs
View file @
0225091d
...
@@ -74,14 +74,14 @@ weightedLogJaccard' sens nbDocs diago ngrams ngrams'
...
@@ -74,14 +74,14 @@ weightedLogJaccard' sens nbDocs diago ngrams ngrams'
-- | To process the proximity between a current group and a pair of targets group
-- | To process the proximity between a current group and a pair of targets group
toProximity
::
Double
->
Map
Int
Double
->
Proximity
->
PhyloGroup
->
PhyloGroup
->
PhyloGroup
->
Double
toProximity
::
Double
->
Map
Int
Double
->
Proximity
->
[
Int
]
->
[
Int
]
->
[
Int
]
->
Double
toProximity
nbDocs
diago
proximity
ego
target
target
'
=
toProximity
nbDocs
diago
proximity
ego
Ngrams
targetNgrams
targetNgrams
'
=
case
proximity
of
case
proximity
of
WeightedLogJaccard
sens
_
_
->
WeightedLogJaccard
sens
_
_
->
let
targetsNgrams
=
if
target
==
target
'
let
pairNgrams
=
if
targetNgrams
==
targetNgrams
'
then
(
target
^.
phylo_groupNgrams
)
then
targetNgrams
else
union
(
target
^.
phylo_groupNgrams
)
(
target'
^.
phylo_groupNgrams
)
else
union
targetNgrams
targetNgrams'
in
weightedLogJaccard'
sens
nbDocs
diago
(
ego
^.
phylo_groupNgrams
)
targets
Ngrams
in
weightedLogJaccard'
sens
nbDocs
diago
egoNgrams
pair
Ngrams
Hamming
->
undefined
Hamming
->
undefined
...
@@ -96,39 +96,41 @@ findLastPeriod fil periods = case fil of
...
@@ -96,39 +96,41 @@ findLastPeriod fil periods = case fil of
-- | To filter pairs of candidates related to old pointers periods
-- | To filter pairs of candidates related to old pointers periods
removeOldPointers
::
[
Pointer
]
->
Filiation
->
Double
->
Proximity
->
PhyloPeriodId
->
[(
PhyloGroup
,
PhyloGroup
)]
->
[(
PhyloGroup
,
PhyloGroup
)]
removeOldPointers
::
[
Pointer
]
->
Filiation
->
Double
->
Proximity
->
PhyloPeriodId
->
[((
PhyloGroupId
,[
Int
]),(
PhyloGroupId
,[
Int
]))]
->
[((
PhyloGroupId
,[
Int
]),(
PhyloGroupId
,[
Int
]))]
removeOldPointers
oldPointers
fil
thr
prox
prd
pairs
removeOldPointers
oldPointers
fil
thr
prox
prd
pairs
|
null
oldPointers
=
pairs
|
null
oldPointers
=
pairs
|
null
(
filterPointers
prox
thr
oldPointers
)
=
|
null
(
filterPointers
prox
thr
oldPointers
)
=
let
lastMatchedPrd
=
findLastPeriod
fil
(
map
(
fst
.
fst
.
fst
)
oldPointers
)
let
lastMatchedPrd
=
findLastPeriod
fil
(
map
(
fst
.
fst
.
fst
)
oldPointers
)
in
if
lastMatchedPrd
==
prd
in
if
lastMatchedPrd
==
prd
then
[]
then
[]
else
filter
(
\
(
g
,
g'
)
->
else
filter
(
\
(
(
id
,
_
),(
id'
,
_
)
)
->
case
fil
of
case
fil
of
ToParents
->
((
fst
$
g
^.
phylo_groupPeriod
)
<
(
fst
lastMatchedPrd
))
ToParents
->
((
(
fst
.
fst
.
fst
)
id
)
<
(
fst
lastMatchedPrd
))
||
((
fst
$
g'
^.
phylo_groupPeriod
)
<
(
fst
lastMatchedPrd
))
||
((
(
fst
.
fst
.
fst
)
id'
)
<
(
fst
lastMatchedPrd
))
ToChilds
->
((
fst
$
g
^.
phylo_groupPeriod
)
>
(
fst
lastMatchedPrd
))
ToChilds
->
((
(
fst
.
fst
.
fst
)
id
)
>
(
fst
lastMatchedPrd
))
||
((
fst
$
g'
^.
phylo_groupPeriod
)
>
(
fst
lastMatchedPrd
)))
pairs
||
((
(
fst
.
fst
.
fst
)
id'
)
>
(
fst
lastMatchedPrd
)))
pairs
|
otherwise
=
[]
|
otherwise
=
[]
-- | Find pairs of valuable candidates to be matched
makePairs'
::
(
PhyloGroupId
,[
Int
])
->
[(
PhyloGroupId
,[
Int
])]
->
[
PhyloPeriodId
]
->
[
Pointer
]
->
Filiation
->
Double
->
Proximity
makePairs'
::
PhyloGroup
->
[
PhyloGroup
]
->
[
PhyloPeriodId
]
->
[
Pointer
]
->
Filiation
->
Double
->
Proximity
->
Map
Date
Double
->
Map
Date
Cooc
->
[(
PhyloGroup
,
PhyloGroup
)]
->
Map
Date
Double
->
Map
Date
Cooc
->
[((
PhyloGroupId
,[
Int
]),(
PhyloGroupId
,[
Int
])
)]
makePairs'
ego
candidates
periods
oldPointers
fil
thr
prox
docs
diagos
=
makePairs'
(
egoId
,
egoNgrams
)
candidates
periods
oldPointers
fil
thr
prox
docs
diagos
=
case
null
periods
of
if
(
null
periods
)
True
->
[]
then
[]
False
->
removeOldPointers
oldPointers
fil
thr
prox
lastPrd
else
removeOldPointers
oldPointers
fil
thr
prox
lastPrd
-- | at least on of the pair candidates should be from the last added period
-- | at least on of the pair candidates should be from the last added period
$
filter
(
\
(
g
,
g'
)
->
((
g
^.
phylo_groupPeriod
)
==
lastPrd
)
$
filter
(
\
((
id
,
_
),(
id'
,
_
))
->
((
fst
.
fst
)
id
==
lastPrd
)
||
((
fst
.
fst
)
id'
==
lastPrd
)
)
||
((
g'
^.
phylo_groupPeriod
)
==
lastPrd
))
$
listToKeys
$
listToKeys
$
filter
(
\
(
id
,
ngrams
)
->
$
filter
(
\
g
->
let
nbDocs
=
sum
$
elems
$
(
filterDocs
docs
([
ego
^.
phylo_groupPeriod
,
g
^.
phylo_groupPeriod
])
)
let
nbDocs
=
(
sum
.
elems
)
$
filterDocs
docs
([(
fst
.
fst
)
egoId
,
(
fst
.
fst
)
id
]
)
diago
=
reduceDiagos
$
filterDiago
diagos
([
ego
^.
phylo_groupPeriod
,
g
^.
phylo_groupPerio
d
])
diago
=
reduceDiagos
$
filterDiago
diagos
([(
fst
.
fst
)
egoId
,
(
fst
.
fst
)
i
d
])
in
(
g
^.
phylo_groupPeriod
==
lastPrd
)
in
(
toProximity
nbDocs
diago
prox
egoNgrams
egoNgrams
ngrams
)
>=
thr
||
((
toProximity
nbDocs
diago
prox
ego
ego
g
)
>=
thr
))
candidates
)
candidates
where
where
lastPrd
::
PhyloPeriodId
lastPrd
::
PhyloPeriodId
lastPrd
=
findLastPeriod
fil
periods
lastPrd
=
findLastPeriod
fil
periods
filterPointers
::
Proximity
->
Double
->
[
Pointer
]
->
[
Pointer
]
filterPointers
::
Proximity
->
Double
->
[
Pointer
]
->
[
Pointer
]
...
@@ -140,39 +142,39 @@ reduceDiagos diagos = mapKeys (\(k,_) -> k)
...
@@ -140,39 +142,39 @@ reduceDiagos diagos = mapKeys (\(k,_) -> k)
$
foldl
(
\
acc
diago
->
unionWith
(
+
)
acc
diago
)
empty
(
elems
diagos
)
$
foldl
(
\
acc
diago
->
unionWith
(
+
)
acc
diago
)
empty
(
elems
diagos
)
phyloGroupMatching
::
[[
PhyloGroup
]]
->
Filiation
->
Proximity
->
Map
Date
Double
->
Map
Date
Cooc
->
Double
->
PhyloGroup
->
PhyloGroup
phyloGroupMatching
::
[[(
PhyloGroupId
,[
Int
])]]
->
Filiation
->
Proximity
->
Map
Date
Double
->
Map
Date
Cooc
phyloGroupMatching
candidates
fil
proxi
docs
diagos
thr
ego
=
->
Double
->
[
Pointer
]
->
(
PhyloGroupId
,[
Int
])
->
[
Pointer
]
if
(
null
$
filterPointers
proxi
thr
$
getPeriodPointers
fil
ego
)
phyloGroupMatching
candidates
fil
proxi
docs
diagos
thr
oldPointers
(
id
,
ngrams
)
=
if
(
null
$
filterPointers
proxi
thr
oldPointers
)
-- | let's find new pointers
-- | let's find new pointers
then
if
null
nextPointers
then
if
null
nextPointers
then
addPointers
ego
fil
TemporalPointer
[]
then
[]
else
addPointers
ego
fil
TemporalPointer
else
head'
"phyloGroupMatching"
$
head'
"phyloGroupMatching"
-- | Keep only the best set of pointers grouped by proximity
-- | Keep only the best set of pointers grouped by proximity
$
groupBy
(
\
pt
pt'
->
snd
pt
==
snd
pt'
)
$
groupBy
(
\
pt
pt'
->
snd
pt
==
snd
pt'
)
$
reverse
$
sortOn
snd
$
head'
"pointers"
nextPointers
$
reverse
$
sortOn
snd
$
head'
"pointers"
nextPointers
-- | Find the first time frame where at leats one pointer satisfies the proximity threshold
-- | Find the first time frame where at leats one pointer satisfies the proximity threshold
else
ego
else
oldPointers
where
where
nextPointers
::
[[
Pointer
]]
nextPointers
::
[[
Pointer
]]
nextPointers
=
take
1
nextPointers
=
take
1
$
dropWhile
(
null
)
$
dropWhile
(
null
)
-- | for each time frame, process the proximity on relevant pairs of targeted groups
-- | for each time frame, process the proximity on relevant pairs of targeted groups
$
scanl
(
\
acc
groups
->
$
scanl
(
\
acc
groups
->
let
periods
=
nub
$
map
_phylo_groupPeriod
$
concat
groups
let
periods
=
nub
$
map
(
fst
.
fst
.
fst
)
$
concat
groups
nbdocs
=
sum
$
elems
$
(
filterDocs
docs
([
ego
^.
phylo_groupPerio
d
]
++
periods
))
nbdocs
=
sum
$
elems
$
(
filterDocs
docs
([
(
fst
.
fst
)
i
d
]
++
periods
))
diago
=
reduceDiagos
diago
=
reduceDiagos
$
filterDiago
diagos
([
ego
^.
phylo_groupPerio
d
]
++
periods
)
$
filterDiago
diagos
([
(
fst
.
fst
)
i
d
]
++
periods
)
-- | important resize nbdocs et diago dans le make pairs
-- | important resize nbdocs et diago dans le make pairs
pairs
=
makePairs'
ego
(
concat
groups
)
periods
(
getPeriodPointers
fil
ego
)
fil
thr
proxi
docs
diagos
pairs
=
makePairs'
(
id
,
ngrams
)
(
concat
groups
)
periods
oldPointers
fil
thr
proxi
docs
diagos
in
acc
++
(
filterPointers
proxi
thr
in
acc
++
(
filterPointers
proxi
thr
$
concat
$
concat
$
map
(
\
(
c
,
c'
)
->
$
map
(
\
(
c
,
c'
)
->
-- | process the proximity between the current group and a pair of candidates
-- | process the proximity between the current group and a pair of candidates
let
proximity
=
toProximity
nbdocs
diago
proxi
ego
c
c'
let
proximity
=
toProximity
nbdocs
diago
proxi
ngrams
(
snd
c
)
(
snd
c'
)
in
if
(
c
==
c'
)
in
if
(
c
==
c'
)
then
[(
getGroupId
c
,
proximity
)]
then
[(
fst
c
,
proximity
)]
else
[(
getGroupId
c
,
proximity
),(
getGroupId
c'
,
proximity
)]
)
pairs
))
[]
else
[(
fst
c
,
proximity
),(
fst
c'
,
proximity
)]
)
pairs
))
[]
$
inits
candidates
-- | groups from [[1900],[1900,1901],[1900,1901,1902],...]
$
inits
candidates
-- | groups from [[1900],[1900,1901],[1900,1901,1902],...]
...
@@ -195,10 +197,10 @@ getNextPeriods fil max' pId pIds =
...
@@ -195,10 +197,10 @@ getNextPeriods fil max' pId pIds =
ToParents
->
take
max'
$
(
reverse
.
fst
)
$
splitAt
(
elemIndex'
pId
pIds
)
pIds
ToParents
->
take
max'
$
(
reverse
.
fst
)
$
splitAt
(
elemIndex'
pId
pIds
)
pIds
getCandidates
::
PhyloGroup
->
[[
PhyloGroup
]]
->
[[
PhyloGroup
]]
getCandidates
::
PhyloGroup
->
[[
(
PhyloGroupId
,[
Int
])]]
->
[[(
PhyloGroupId
,[
Int
])
]]
getCandidates
ego
targets
=
getCandidates
ego
targets
=
map
(
\
groups'
->
map
(
\
groups'
->
filter
(
\
g'
->
(
not
.
null
)
$
intersect
(
ego
^.
phylo_groupNgrams
)
(
g'
^.
phylo_groupNgrams
)
filter
(
\
g'
->
(
not
.
null
)
$
intersect
(
ego
^.
phylo_groupNgrams
)
(
snd
g'
)
)
groups'
)
targets
)
groups'
)
targets
...
@@ -210,8 +212,8 @@ matchGroupsToGroups frame periods proximity thr docs coocs groups =
...
@@ -210,8 +212,8 @@ matchGroupsToGroups frame periods proximity thr docs coocs groups =
periodsPar
=
getNextPeriods
ToParents
frame
prd
periods
periodsPar
=
getNextPeriods
ToParents
frame
prd
periods
periodsChi
=
getNextPeriods
ToChilds
frame
prd
periods
periodsChi
=
getNextPeriods
ToChilds
frame
prd
periods
-- | 2) find the parents/childs matching candidates
-- | 2) find the parents/childs matching candidates
candidatesPar
=
map
(
\
prd'
->
findWithDefault
[]
prd'
groups'
)
periodsPar
candidatesPar
=
map
(
\
prd'
->
map
(
\
g
->
(
getGroupId
g
,
g
^.
phylo_groupNgrams
))
$
findWithDefault
[]
prd'
groups'
)
periodsPar
candidatesChi
=
map
(
\
prd'
->
findWithDefault
[]
prd'
groups'
)
periodsChi
candidatesChi
=
map
(
\
prd'
->
map
(
\
g
->
(
getGroupId
g
,
g
^.
phylo_groupNgrams
))
$
findWithDefault
[]
prd'
groups'
)
periodsChi
-- | 3) find the parents/child number of docs by years
-- | 3) find the parents/child number of docs by years
docsPar
=
filterDocs
docs
([
prd
]
++
periodsPar
)
docsPar
=
filterDocs
docs
([
prd
]
++
periodsPar
)
docsChi
=
filterDocs
docs
([
prd
]
++
periodsChi
)
docsChi
=
filterDocs
docs
([
prd
]
++
periodsChi
)
...
@@ -219,8 +221,13 @@ matchGroupsToGroups frame periods proximity thr docs coocs groups =
...
@@ -219,8 +221,13 @@ matchGroupsToGroups frame periods proximity thr docs coocs groups =
diagoPar
=
filterDiago
(
map
coocToDiago
coocs
)
([
prd
]
++
periodsPar
)
diagoPar
=
filterDiago
(
map
coocToDiago
coocs
)
([
prd
]
++
periodsPar
)
diagoChi
=
filterDiago
(
map
coocToDiago
coocs
)
([
prd
]
++
periodsPar
)
diagoChi
=
filterDiago
(
map
coocToDiago
coocs
)
([
prd
]
++
periodsPar
)
-- | 5) match in parallel all the groups (egos) to their possible candidates
-- | 5) match in parallel all the groups (egos) to their possible candidates
egos
=
map
(
\
ego
->
phyloGroupMatching
(
getCandidates
ego
candidatesPar
)
ToParents
proximity
docsPar
diagoPar
thr
egos
=
map
(
\
ego
->
$
phyloGroupMatching
(
getCandidates
ego
candidatesChi
)
ToChilds
proximity
docsChi
diagoChi
thr
ego
)
let
pointersPar
=
phyloGroupMatching
(
getCandidates
ego
candidatesPar
)
ToParents
proximity
docsPar
diagoPar
thr
(
getPeriodPointers
ToParents
ego
)
(
getGroupId
ego
,
ego
^.
phylo_groupNgrams
)
pointersChi
=
phyloGroupMatching
(
getCandidates
ego
candidatesChi
)
ToChilds
proximity
docsChi
diagoChi
thr
(
getPeriodPointers
ToChilds
ego
)
(
getGroupId
ego
,
ego
^.
phylo_groupNgrams
)
in
addPointers
ToChilds
TemporalPointer
pointersChi
$
addPointers
ToParents
TemporalPointer
pointersPar
ego
)
$
findWithDefault
[]
prd
groups'
$
findWithDefault
[]
prd
groups'
egos'
=
egos
`
using
`
parList
rdeepseq
egos'
=
egos
`
using
`
parList
rdeepseq
in
acc
++
egos'
in
acc
++
egos'
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment