Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
H
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Przemyslaw Kaminski
haskell-gargantext
Commits
6fc1eb02
Commit
6fc1eb02
authored
Sep 03, 2019
by
qlobbe
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
phyloQuality
parent
99b5de7d
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
60 additions
and
22 deletions
+60
-22
PhyloTools.hs
src/Gargantext/Viz/Phylo/PhyloTools.hs
+14
-8
TemporalMatching.hs
src/Gargantext/Viz/Phylo/TemporalMatching.hs
+46
-14
No files found.
src/Gargantext/Viz/Phylo/PhyloTools.hs
View file @
6fc1eb02
...
@@ -17,9 +17,9 @@ Portability : POSIX
...
@@ -17,9 +17,9 @@ Portability : POSIX
module
Gargantext.Viz.Phylo.PhyloTools
where
module
Gargantext.Viz.Phylo.PhyloTools
where
import
Data.Vector
(
Vector
,
elemIndex
)
import
Data.Vector
(
Vector
,
elemIndex
)
import
Data.List
(
sort
,
concat
,
null
,
union
,
(
++
),
tails
,
sortOn
)
import
Data.List
(
sort
,
concat
,
null
,
union
,
(
++
),
tails
,
sortOn
,
nub
)
import
Data.Set
(
Set
,
size
)
import
Data.Set
(
Set
,
size
)
import
Data.Map
(
Map
,
elems
,
fromList
,
unionWith
,
keys
,
member
,
(
!
))
import
Data.Map
(
Map
,
elems
,
fromList
,
unionWith
,
keys
,
member
,
(
!
)
,
filterWithKey
)
import
Data.String
(
String
)
import
Data.String
(
String
)
import
Gargantext.Prelude
import
Gargantext.Prelude
...
@@ -184,6 +184,9 @@ listToMatrix lst = fromList $ map (\k -> (k,1)) $ listToKeys $ sort lst
...
@@ -184,6 +184,9 @@ listToMatrix lst = fromList $ map (\k -> (k,1)) $ listToKeys $ sort lst
sumCooc
::
Cooc
->
Cooc
->
Cooc
sumCooc
::
Cooc
->
Cooc
->
Cooc
sumCooc
cooc
cooc'
=
unionWith
(
+
)
cooc
cooc'
sumCooc
cooc
cooc'
=
unionWith
(
+
)
cooc
cooc'
getTrace
::
Cooc
->
Double
getTrace
cooc
=
sum
$
elems
$
filterWithKey
(
\
(
k
,
k'
)
_
->
k
==
k'
)
cooc
--------------------
--------------------
-- | PhyloGroup | --
-- | PhyloGroup | --
--------------------
--------------------
...
@@ -255,11 +258,6 @@ updatePhyloGroups lvl m phylo =
...
@@ -255,11 +258,6 @@ updatePhyloGroups lvl m phylo =
pointersToLinks
::
PhyloGroupId
->
[
Pointer
]
->
[
Link
]
pointersToLinks
::
PhyloGroupId
->
[
Pointer
]
->
[
Link
]
pointersToLinks
id
pointers
=
map
(
\
p
->
((
id
,
fst
p
),
snd
p
))
pointers
pointersToLinks
id
pointers
=
map
(
\
p
->
((
id
,
fst
p
),
snd
p
))
pointers
-- mergeLinks :: [Link] -> [Link] -> [Link]
-- mergeLinks toChilds toParents =
-- let toChilds' = fromList $ map (\((from,to),w) -> ((to,from),w)) toChilds
-- in toList $ unionWith max (fromList toParents) toChilds'
-------------------
-------------------
-- | Proximity | --
-- | Proximity | --
...
@@ -278,4 +276,12 @@ getThresholdInit proxi = case proxi of
...
@@ -278,4 +276,12 @@ getThresholdInit proxi = case proxi of
getThresholdStep
::
Proximity
->
Double
getThresholdStep
::
Proximity
->
Double
getThresholdStep
proxi
=
case
proxi
of
getThresholdStep
proxi
=
case
proxi
of
WeightedLogJaccard
_
_
s
->
s
WeightedLogJaccard
_
_
s
->
s
Hamming
->
undefined
Hamming
->
undefined
\ No newline at end of file
----------------
-- | Branch | --
----------------
ngramsInBranches
::
[[
PhyloGroup
]]
->
[
Int
]
ngramsInBranches
branches
=
nub
$
foldl
(
\
acc
g
->
acc
++
(
g
^.
phylo_groupNgrams
))
[]
$
concat
branches
\ No newline at end of file
src/Gargantext/Viz/Phylo/TemporalMatching.hs
View file @
6fc1eb02
...
@@ -16,13 +16,14 @@ Portability : POSIX
...
@@ -16,13 +16,14 @@ Portability : POSIX
module
Gargantext.Viz.Phylo.TemporalMatching
where
module
Gargantext.Viz.Phylo.TemporalMatching
where
import
Data.List
(
concat
,
splitAt
,
tail
,
sortOn
,
(
++
),
intersect
,
null
,
inits
,
find
,
groupBy
,
scanl
,
nub
,
union
)
import
Data.List
(
concat
,
splitAt
,
tail
,
sortOn
,
(
++
),
intersect
,
null
,
inits
,
find
,
groupBy
,
scanl
,
nub
,
union
)
import
Data.Map
(
Map
,
fromList
,
fromListWith
,
filterWithKey
,
elems
,
restrictKeys
,
unionWith
,
intersectionWith
)
import
Data.Map
(
Map
,
fromList
,
fromListWith
,
filterWithKey
,
elems
,
restrictKeys
,
unionWith
,
intersectionWith
,
findWithDefault
)
import
Gargantext.Prelude
import
Gargantext.Prelude
import
Gargantext.Viz.AdaptativePhylo
import
Gargantext.Viz.AdaptativePhylo
import
Gargantext.Viz.Phylo.PhyloTools
import
Gargantext.Viz.Phylo.PhyloTools
import
Gargantext.Viz.Phylo.SynchronicClustering
import
Gargantext.Viz.Phylo.SynchronicClustering
import
Prelude
(
logBase
)
import
Control.Lens
hiding
(
Level
)
import
Control.Lens
hiding
(
Level
)
import
qualified
Data.Set
as
Set
import
qualified
Data.Set
as
Set
...
@@ -87,7 +88,7 @@ filterProximity proximity thr local =
...
@@ -87,7 +88,7 @@ filterProximity proximity thr local =
-- | To process the proximity between a current group and a pair of targets group
-- | To process the proximity between a current group and a pair of targets group
toProximity
::
Map
Date
Double
->
Proximity
->
PhyloGroup
->
PhyloGroup
->
PhyloGroup
->
Double
toProximity
::
Map
Date
Double
->
Proximity
->
PhyloGroup
->
PhyloGroup
->
PhyloGroup
->
Double
toProximity
docs
proximity
group
target
target'
=
toProximity
docs
proximity
ego
target
target'
=
let
docs'
=
sum
$
elems
docs
let
docs'
=
sum
$
elems
docs
cooc
=
if
target
==
target'
cooc
=
if
target
==
target'
then
(
target
^.
phylo_groupCooc
)
then
(
target
^.
phylo_groupCooc
)
...
@@ -95,7 +96,7 @@ toProximity docs proximity group target target' =
...
@@ -95,7 +96,7 @@ toProximity docs proximity group target target' =
ngrams
=
if
target
==
target'
ngrams
=
if
target
==
target'
then
(
target
^.
phylo_groupNgrams
)
then
(
target
^.
phylo_groupNgrams
)
else
union
(
target
^.
phylo_groupNgrams
)
(
target'
^.
phylo_groupNgrams
)
else
union
(
target
^.
phylo_groupNgrams
)
(
target'
^.
phylo_groupNgrams
)
in
pickProximity
proximity
docs'
(
group
^.
phylo_groupCooc
)
cooc
(
group
^.
phylo_groupNgrams
)
ngrams
in
pickProximity
proximity
docs'
(
ego
^.
phylo_groupCooc
)
cooc
(
ego
^.
phylo_groupNgrams
)
ngrams
------------------------
------------------------
...
@@ -117,9 +118,9 @@ makePairs candidates periods = case null periods of
...
@@ -117,9 +118,9 @@ makePairs candidates periods = case null periods of
phyloGroupMatching
::
[[
PhyloGroup
]]
->
Filiation
->
Proximity
->
Map
Date
Double
->
Double
->
PhyloGroup
->
PhyloGroup
phyloGroupMatching
::
[[
PhyloGroup
]]
->
Filiation
->
Proximity
->
Map
Date
Double
->
Double
->
PhyloGroup
->
PhyloGroup
phyloGroupMatching
candidates
fil
proxi
docs
thr
group
=
case
pointers
of
phyloGroupMatching
candidates
fil
proxi
docs
thr
ego
=
case
pointers
of
Nothing
->
addPointers
group
fil
TemporalPointer
[]
Nothing
->
addPointers
ego
fil
TemporalPointer
[]
Just
pts
->
addPointers
group
fil
TemporalPointer
Just
pts
->
addPointers
ego
fil
TemporalPointer
$
head'
"phyloGroupMatching"
$
head'
"phyloGroupMatching"
-- | Keep only the best set of pointers grouped by proximity
-- | Keep only the best set of pointers grouped by proximity
$
groupBy
(
\
pt
pt'
->
snd
pt
==
snd
pt'
)
$
groupBy
(
\
pt
pt'
->
snd
pt
==
snd
pt'
)
...
@@ -136,7 +137,7 @@ phyloGroupMatching candidates fil proxi docs thr group = case pointers of
...
@@ -136,7 +137,7 @@ phyloGroupMatching candidates fil proxi docs thr group = case pointers of
$
concat
$
concat
$
map
(
\
(
c
,
c'
)
->
$
map
(
\
(
c
,
c'
)
->
-- | process the proximity between the current group and a pair of candidates
-- | process the proximity between the current group and a pair of candidates
let
proximity
=
toProximity
(
filterDocs
docs
periods
)
proxi
group
c
c'
let
proximity
=
toProximity
(
filterDocs
docs
periods
)
proxi
ego
c
c'
in
if
(
c
==
c'
)
in
if
(
c
==
c'
)
then
[(
getGroupId
c
,
proximity
)]
then
[(
getGroupId
c
,
proximity
)]
else
[(
getGroupId
c
,
proximity
),(
getGroupId
c'
,
proximity
)]
)
pairs
)
else
[(
getGroupId
c
,
proximity
),(
getGroupId
c'
,
proximity
)]
)
pairs
)
...
@@ -161,13 +162,13 @@ getNextPeriods fil max' pId pIds =
...
@@ -161,13 +162,13 @@ getNextPeriods fil max' pId pIds =
getCandidates
::
Filiation
->
PhyloGroup
->
[
PhyloPeriodId
]
->
[
PhyloGroup
]
->
[[
PhyloGroup
]]
getCandidates
::
Filiation
->
PhyloGroup
->
[
PhyloPeriodId
]
->
[
PhyloGroup
]
->
[[
PhyloGroup
]]
getCandidates
fil
g
pIds
targets
=
getCandidates
fil
ego
pIds
targets
=
case
fil
of
case
fil
of
ToChilds
->
targets'
ToChilds
->
targets'
ToParents
->
reverse
targets'
ToParents
->
reverse
targets'
where
where
targets'
::
[[
PhyloGroup
]]
targets'
::
[[
PhyloGroup
]]
targets'
=
map
(
\
groups'
->
filter
(
\
g'
->
(
not
.
null
)
$
intersect
(
g
^.
phylo_groupNgrams
)
(
g'
^.
phylo_groupNgrams
))
groups'
)
$
elems
targets'
=
map
(
\
groups'
->
filter
(
\
g'
->
(
not
.
null
)
$
intersect
(
ego
^.
phylo_groupNgrams
)
(
g'
^.
phylo_groupNgrams
))
groups'
)
$
elems
$
filterWithKey
(
\
k
_
->
elem
k
pIds
)
$
filterWithKey
(
\
k
_
->
elem
k
pIds
)
$
fromListWith
(
++
)
$
fromListWith
(
++
)
$
sortOn
(
fst
.
fst
)
$
sortOn
(
fst
.
fst
)
...
@@ -186,13 +187,44 @@ processMatching max' periods proximity thr docs groups =
...
@@ -186,13 +187,44 @@ processMatching max' periods proximity thr docs groups =
)
groups
)
groups
-----------------------------
-----------------------
-- | Adaptative Matching | --
-- | Phylo Quality | --
-----------------------------
-----------------------
termFreq
::
Int
->
[[
PhyloGroup
]]
->
Double
termFreq
term
branches
=
(
sum
$
map
(
\
g
->
findWithDefault
0
(
term
,
term
)
(
g
^.
phylo_groupCooc
))
$
concat
branches
)
/
(
sum
$
map
(
\
g
->
getTrace
$
g
^.
phylo_groupCooc
)
$
concat
branches
)
entropy
::
[[
PhyloGroup
]]
->
Double
entropy
branches
=
let
terms
=
ngramsInBranches
branches
in
sum
$
map
(
\
term
->
(
1
/
log
(
termFreq
term
branches
))
/
(
sum
$
map
(
\
branch
->
1
/
log
(
termFreq
term
[
branch
]))
branches
)
*
(
sum
$
map
(
\
branch
->
let
q
=
branchObs
term
(
length
$
concat
branches
)
branch
in
q
*
logBase
2
q
)
branches
)
)
terms
where
-- | Probability to observe a branch given a random term of the phylo
branchObs
::
Int
->
Int
->
[
PhyloGroup
]
->
Double
branchObs
term
total
branch
=
(
fromIntegral
$
length
$
filter
(
\
g
->
elem
term
$
g
^.
phylo_groupNgrams
)
branch
)
/
(
fromIntegral
total
)
homogeneity
::
[[
PhyloGroup
]]
->
Double
homogeneity
branches
=
undefined
where
branchCov
::
toPhyloQuality
::
[[
PhyloGroup
]]
->
Double
toPhyloQuality
::
[[
PhyloGroup
]]
->
Double
toPhyloQuality
_
=
undefined
toPhyloQuality
branches
=
sqrt
(
homogeneity
branches
/
entropy
branches
)
-----------------------------
-- | Adaptative Matching | --
-----------------------------
groupsToBranches
::
Map
PhyloGroupId
PhyloGroup
->
[[
PhyloGroup
]]
groupsToBranches
::
Map
PhyloGroupId
PhyloGroup
->
[[
PhyloGroup
]]
...
@@ -211,7 +243,7 @@ groupsToBranches groups =
...
@@ -211,7 +243,7 @@ groupsToBranches groups =
recursiveMatching
::
Proximity
->
Double
->
Int
->
[
PhyloPeriodId
]
->
Map
Date
Double
->
Double
->
[
PhyloGroup
]
->
[
PhyloGroup
]
recursiveMatching
::
Proximity
->
Double
->
Int
->
[
PhyloPeriodId
]
->
Map
Date
Double
->
Double
->
[
PhyloGroup
]
->
[
PhyloGroup
]
recursiveMatching
proximity
thr
max'
periods
docs
quality
groups
=
recursiveMatching
proximity
thr
max'
periods
docs
quality
groups
=
case
quality
<
quality'
of
case
quality
<
quality'
of
-- | success : we localy improve the quality of the branch, let's go deeper
-- | success : we localy improve the quality of the branch, let's go deeper
True
->
concat
True
->
concat
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment