Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
148
Issues
148
List
Board
Labels
Milestones
Merge Requests
11
Merge Requests
11
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
38d3a9cd
Commit
38d3a9cd
authored
May 21, 2019
by
qlobbe
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
remove bad tracers
parent
5e332ef8
Pipeline
#403
failed with stage
Changes
5
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
109 additions
and
20 deletions
+109
-20
Main.hs
bin/gargantext-phylo/Main.hs
+2
-0
Cluster.hs
src/Gargantext/Viz/Phylo/Aggregates/Cluster.hs
+9
-10
Example.hs
src/Gargantext/Viz/Phylo/Example.hs
+3
-3
LinkMaker.hs
src/Gargantext/Viz/Phylo/LinkMaker.hs
+59
-3
Proximity.hs
src/Gargantext/Viz/Phylo/Metrics/Proximity.hs
+36
-4
No files found.
bin/gargantext-phylo/Main.hs
View file @
38d3a9cd
...
...
@@ -115,7 +115,9 @@ filterTerms patterns (y,d) = (y,termsInText patterns d)
-- | To transform a Csv nfile into a readable corpus
csvToCorpus
::
Limit
->
CorpusPath
->
IO
([(
Int
,
Text
)])
csvToCorpus
limit
csv
=
DV
.
toList
-- . DV.reverse
.
DV
.
take
limit
-- . DV.reverse
.
DV
.
map
(
\
n
->
(
csv_publication_year
n
,
(
csv_title
n
)
<>
" "
<>
(
csv_abstract
n
)))
.
snd
<$>
CSV
.
readFile
csv
...
...
src/Gargantext/Viz/Phylo/Aggregates/Cluster.hs
View file @
38d3a9cd
...
...
@@ -49,10 +49,12 @@ graphToClusters clust (nodes,edges) = case clust of
-- | To transform a list of PhyloGroups into a Graph of Proximity
groupsToGraph
::
Proximity
->
[
PhyloGroup
]
->
Map
(
Int
,
Int
)
Double
->
Phylo
->
([
GroupNode
],[
GroupEdge
])
groupsToGraph
prox
gs
cooc
p
=
case
prox
of
WeightedLogJaccard
(
WLJParams
_
sens
)
->
(
gs
,
map
(
\
(
x
,
y
)
->
((
x
,
y
),
traceSim
x
y
(
getSubCooc
(
getGroupNgrams
x
)
cooc
)
(
getSubCooc
(
getGroupNgrams
y
)
cooc
)
p
$
weightedLogJaccard
sens
(
getSubCooc
(
getGroupNgrams
x
)
cooc
)
(
getSubCooc
(
getGroupNgrams
y
)
cooc
)))
groupsToGraph
::
Proximity
->
[
PhyloGroup
]
->
Map
(
Int
,
Int
)
Double
->
([
GroupNode
],[
GroupEdge
])
groupsToGraph
prox
gs
cooc
=
case
prox
of
-- WeightedLogJaccard (WLJParams _ sens) -> (gs, map (\(x,y) -> ((x,y), traceSim x y (getSubCooc (getGroupNgrams x) cooc) (getSubCooc (getGroupNgrams y) cooc) p
-- $ weightedLogJaccard sens (getSubCooc (getGroupNgrams x) cooc) (getSubCooc (getGroupNgrams y) cooc)))
-- $ getCandidates gs)
WeightedLogJaccard
(
WLJParams
_
sens
)
->
(
gs
,
map
(
\
(
x
,
y
)
->
((
x
,
y
),
weightedLogJaccard'
sens
(
getGroupNgrams
x
)
(
getGroupNgrams
y
)
cooc
))
$
getCandidates
gs
)
Hamming
(
HammingParams
_
)
->
(
gs
,
map
(
\
(
x
,
y
)
->
((
x
,
y
),
hamming
(
getSubCooc
(
getGroupNgrams
x
)
cooc
)
(
getSubCooc
(
getGroupNgrams
y
)
cooc
)))
$
getCandidates
gs
)
...
...
@@ -82,7 +84,7 @@ phyloToClusters lvl clus p = Map.fromList
--------------------------------------
graphs
::
[([
GroupNode
],[
GroupEdge
])]
graphs
=
traceGraph
lvl
(
getThreshold
prox
)
$
map
(
\
prd
->
groupsToGraph
prox
(
getGroupsWithFilters
lvl
prd
p
)
(
getCooc
[
prd
]
p
)
p
)
periods
$
map
(
\
prd
->
groupsToGraph
prox
(
getGroupsWithFilters
lvl
prd
p
)
(
getCooc
[
prd
]
p
))
periods
--------------------------------------
prox
::
Proximity
prox
=
getProximity
clus
...
...
@@ -100,7 +102,6 @@ phyloToClusters lvl clus p = Map.fromList
traceGraph
::
Level
->
Double
->
[([
GroupNode
],[
GroupEdge
])]
->
[([
GroupNode
],[
GroupEdge
])]
traceGraph
lvl
thr
g
=
trace
(
"----
\n
Unfiltered clustering in Phylo"
<>
show
(
lvl
)
<>
" :
\n
"
<>
"count : "
<>
show
(
length
lst
)
<>
" potential edges ("
<>
show
(
length
$
filter
(
>=
thr
)
lst
)
<>
" >= "
<>
show
(
thr
)
<>
")
\n
"
<>
show
(
lst
)
<>
"
\n
"
<>
"similarity : "
<>
show
(
percentile
25
(
VS
.
fromList
lst
))
<>
" (25%) "
<>
show
(
percentile
50
(
VS
.
fromList
lst
))
<>
" (50%) "
<>
show
(
percentile
75
(
VS
.
fromList
lst
))
<>
" (75%) "
...
...
@@ -120,7 +121,5 @@ traceGraphFiltered lvl g = trace ( "----\nClustering in Phylo" <> show (lvl) <>
lst
=
sort
$
map
snd
$
concat
$
map
snd
g
traceSim
::
PhyloGroup
->
PhyloGroup
->
Map
(
Int
,
Int
)
Double
->
Map
(
Int
,
Int
)
Double
->
Phylo
->
Double
->
Double
traceSim
g
g'
_
_
p
sim
=
trace
(
show
(
getGroupText
g
p
)
<>
" [vs] "
<>
show
(
getGroupText
g'
p
)
<>
" = "
<>
show
(
sim
)
<>
"
\n
"
-- <> show (c) <> " [vs] " <> show (c') <> " = " <> show (sim)
)
sim
-- traceSim :: PhyloGroup -> PhyloGroup -> Phylo -> Double -> Double
-- traceSim g g' p sim = trace (show (getGroupText g p) <> " [vs] " <> show (getGroupText g' p) <> " = " <> show (sim) <> "\n") sim
src/Gargantext/Viz/Phylo/Example.hs
View file @
38d3a9cd
...
...
@@ -55,7 +55,7 @@ import qualified Data.List as List
------------------------------------------------------
export
::
IO
()
export
=
dotToFile
"/home/qlobbe/data/
epique
/output/cesar_cleopatre.dot"
phyloDot
export
=
dotToFile
"/home/qlobbe/data/
phylo
/output/cesar_cleopatre.dot"
phyloDot
phyloDot
::
DotGraph
DotId
phyloDot
=
viewToDot
phyloView
...
...
@@ -77,7 +77,7 @@ queryViewEx = "level=3"
phyloQueryView
::
PhyloQueryView
phyloQueryView
=
PhyloQueryView
1
Merge
False
1
[
BranchAge
]
[]
[
BranchPeakFreq
,
GroupLabelCooc
]
(
Just
(
ByBranchAge
,
Asc
))
Json
Flat
True
phyloQueryView
=
PhyloQueryView
2
Merge
False
1
[
BranchAge
]
[]
[
BranchPeakFreq
,
GroupLabelCooc
]
(
Just
(
ByBranchAge
,
Asc
))
Json
Flat
True
--------------------------------------------------
...
...
@@ -104,7 +104,7 @@ queryEx = "title=Cesar et Cleôpatre"
phyloQueryBuild
::
PhyloQueryBuild
phyloQueryBuild
=
PhyloQueryBuild
"Cesar et Cleôpatre"
"An example of Phylomemy (french without accent)"
5
3
defaultFis
[]
[]
(
WeightedLogJaccard
$
WLJParams
0.
1
10
)
2
(
RelatedComponents
$
RCParams
$
WeightedLogJaccard
$
WLJParams
0.13
0
)
5
3
defaultFis
[]
[]
(
WeightedLogJaccard
$
WLJParams
0.
3
10
)
2
(
RelatedComponents
$
RCParams
$
WeightedLogJaccard
$
WLJParams
0.5
0
)
...
...
src/Gargantext/Viz/Phylo/LinkMaker.hs
View file @
38d3a9cd
...
...
@@ -89,7 +89,8 @@ setLevelLinks (lvl,lvl') p = alterPhyloGroups (\gs -> map (\g -> if getGroupLeve
-- | To apply the corresponding proximity function based on a given Proximity
applyProximity
::
Proximity
->
PhyloGroup
->
PhyloGroup
->
Map
(
Int
,
Int
)
Double
->
(
PhyloGroupId
,
Double
)
applyProximity
prox
g1
g2
cooc
=
case
prox
of
WeightedLogJaccard
(
WLJParams
_
s
)
->
((
getGroupId
g2
),
weightedLogJaccard
s
(
getSubCooc
(
getGroupNgrams
g1
)
cooc
)
(
getSubCooc
(
getGroupNgrams
g2
)
cooc
))
-- WeightedLogJaccard (WLJParams _ s) -> ((getGroupId g2), weightedLogJaccard s (getSubCooc (getGroupNgrams g1) cooc) (getSubCooc (getGroupNgrams g2) cooc))
WeightedLogJaccard
(
WLJParams
_
s
)
->
((
getGroupId
g2
),
weightedLogJaccard'
s
(
getGroupNgrams
g1
)
(
getGroupNgrams
g2
)
cooc
)
Hamming
(
HammingParams
_
)
->
((
getGroupId
g2
),
hamming
(
getSubCooc
(
getGroupNgrams
g1
)
cooc
)
(
getSubCooc
(
getGroupNgrams
g2
)
cooc
))
_
->
panic
(
"[ERR][Viz.Phylo.Example.applyProximity] Proximity function not defined"
)
...
...
@@ -163,7 +164,52 @@ updateGroups fil lvl m p = alterPhyloGroups (\gs -> map (\g -> if (getGroupLevel
-- | Optimisation : to keep only the groups that have at least one ngrams in commons with the target
filterCandidates
::
PhyloGroup
->
[
PhyloGroup
]
->
[
PhyloGroup
]
filterCandidates
g
gs
=
filter
(
\
g'
->
(
not
.
null
)
$
intersect
(
getGroupNgrams
g
)
(
getGroupNgrams
g'
))
$
delete
g
gs
$
delete
g
gs
-- | a init avec la [[head groups]] et la tail groups
toBranches
::
[[
PhyloGroup
]]
->
[
PhyloGroup
]
->
[[
PhyloGroup
]]
toBranches
mem
gs
|
null
gs
=
mem
|
otherwise
=
toBranches
mem'
$
tail
gs
where
--------------------------------------
mem'
::
[[
PhyloGroup
]]
mem'
=
if
(
null
withHead
)
then
mem
++
[[
head'
"toBranches"
gs
]]
else
(
filter
(
\
gs'
->
not
$
elem
gs'
withHead
)
mem
)
++
[(
concat
withHead
)
++
[
head'
"toBranches"
gs
]]
--------------------------------------
withHead
::
[[
PhyloGroup
]]
withHead
=
filter
(
\
gs'
->
(
not
.
null
)
$
intersect
(
concat
$
map
getGroupNgrams
gs'
)
(
getGroupNgrams
$
(
head'
"toBranches"
gs
))
)
mem
--------------------------------------
-- | a init avec la [[head groups]] et la tail groups
toBranches'
::
[[[
Int
]]]
->
[[
Int
]]
->
[[[
Int
]]]
toBranches'
mem
gs
|
null
gs
=
mem
|
otherwise
=
toBranches'
mem'
$
tail
gs
where
--------------------------------------
mem'
::
[[[
Int
]]]
mem'
=
if
(
null
withHead
)
then
mem
++
[[
head'
"toBranches"
gs
]]
else
(
filter
(
\
gs'
->
not
$
elem
gs'
withHead
)
mem
)
++
[(
concat
withHead
)
++
[
head'
"toBranches"
gs
]]
--------------------------------------
withHead
::
[[[
Int
]]]
withHead
=
filter
(
\
gs'
->
(
not
.
null
)
$
intersect
(
concat
gs'
)
(
head'
"toBranches"
gs
)
)
mem
--------------------------------------
...
...
@@ -179,12 +225,18 @@ interTempoMatching fil lvl prox p = traceMatching fil lvl (getThreshold prox) sc
scores
::
[
Double
]
scores
=
sort
$
concat
$
map
(
snd
.
snd
)
candidates
--------------------------------------
-- candidates' :: [(PhyloGroupId,([Pointer],[Double]))]
-- candidates' = map (\g -> ( getGroupId g, findBestCandidates' fil 1 5 prox (getNextPeriods fil (getGroupPeriod g) prds) (filterCandidates g gs) g p)) gs
--------------------------------------
candidates
::
[(
PhyloGroupId
,([
Pointer
],[
Double
]))]
candidates
=
map
(
\
g
->
(
getGroupId
g
,
findBestCandidates'
fil
1
5
prox
(
getNextPeriods
fil
(
getGroupPeriod
g
)
prds
)
(
filterCandidates
g
gs
)
g
p
))
g
s
candidates
=
concat
$
map
(
\
b
->
map
(
\
g
->
(
getGroupId
g
,
findBestCandidates'
fil
1
5
prox
(
getNextPeriods
fil
(
getGroupPeriod
g
)
prds
)
(
filterCandidates
g
gs
)
g
p
))
b
)
b
s
--------------------------------------
gs
::
[
PhyloGroup
]
gs
=
getGroupsWithLevel
lvl
p
--------------------------------------
bs
::
[[
PhyloGroup
]]
bs
=
tracePreBranches
$
toBranches
[[
head'
"interTempoMatching"
gs
]]
$
tail
gs
--------------------------------------
prds
::
[
PhyloPeriodId
]
prds
=
getPhyloPeriods
p
--------------------------------------
...
...
@@ -230,3 +282,7 @@ traceMatching fil lvl thr lst p = trace ( "----\n" <> show (fil) <> " unfiltered
<>
show
(
percentile
75
(
VS
.
fromList
lst
))
<>
" (75%) "
<>
show
(
percentile
90
(
VS
.
fromList
lst
))
<>
" (90%)
\n
"
)
p
tracePreBranches
::
[[
PhyloGroup
]]
->
[[
PhyloGroup
]]
tracePreBranches
bs
=
trace
(
show
(
length
bs
)
<>
" pre-branches"
<>
"
\n
"
<>
"with sizes : "
<>
show
(
map
length
bs
)
<>
"
\n
"
)
bs
src/Gargantext/Viz/Phylo/Metrics/Proximity.hs
View file @
38d3a9cd
...
...
@@ -17,18 +17,19 @@ Portability : POSIX
module
Gargantext.Viz.Phylo.Metrics.Proximity
where
import
Data.List
(
null
)
import
Data.List
(
null
,
intersect
,
union
)
import
Data.Map
(
Map
,
elems
,
unionWith
,
intersectionWith
,
intersection
,
size
)
import
Gargantext.Prelude
import
Debug.Trace
(
trace
)
import
Gargantext.Viz.Phylo.Aggregates.Cooc
-- import Debug.Trace (trace)
-- | To process the weightedLogJaccard between two PhyloGroup fields
weightedLogJaccard
::
Double
->
Map
(
Int
,
Int
)
Double
->
Map
(
Int
,
Int
)
Double
->
Double
weightedLogJaccard
s
f1
f2
|
null
wUnion
=
0
|
wUnion
==
wInter
=
1
|
s
==
0
=
trace
(
"==0"
)
$
(
fromIntegral
$
length
wInter
)
/
(
fromIntegral
$
length
wUnion
)
|
s
>
0
=
trace
(
">0"
)
$
(
sumInvLog
wInter
)
/
(
sumInvLog
wUnion
)
|
s
==
0
=
(
fromIntegral
$
length
wInter
)
/
(
fromIntegral
$
length
wUnion
)
|
s
>
0
=
(
sumInvLog
wInter
)
/
(
sumInvLog
wUnion
)
|
otherwise
=
(
sumLog
wInter
)
/
(
sumLog
wUnion
)
where
--------------------------------------
...
...
@@ -46,6 +47,37 @@ weightedLogJaccard s f1 f2
--------------------------------------
-- | To process the weightedLogJaccard between two PhyloGroup fields
weightedLogJaccard'
::
Double
->
[
Int
]
->
[
Int
]
->
Map
(
Int
,
Int
)
Double
->
Double
weightedLogJaccard'
s
idx
idx'
cooc
|
null
idxUnion
=
0
|
idxUnion
==
idxInter
=
1
|
s
==
0
=
(
fromIntegral
$
length
idxInter
)
/
(
fromIntegral
$
length
idxUnion
)
|
s
>
0
=
(
sumInvLog
wInter
)
/
(
sumInvLog
wUnion
)
|
otherwise
=
(
sumLog
wInter
)
/
(
sumLog
wUnion
)
where
--------------------------------------
wInter
::
[
Double
]
wInter
=
elems
$
getSubCooc
idxInter
cooc
--------------------------------------
wUnion
::
[
Double
]
wUnion
=
elems
$
getSubCooc
idxUnion
cooc
--------------------------------------
idxInter
::
[
Int
]
idxInter
=
intersect
idx
idx'
--------------------------------------
idxUnion
::
[
Int
]
idxUnion
=
union
idx
idx'
--------------------------------------
sumInvLog
::
[
Double
]
->
Double
sumInvLog
l
=
foldl
(
\
mem
x
->
mem
+
(
1
/
log
(
s
+
x
)))
0
l
--------------------------------------
sumLog
::
[
Double
]
->
Double
sumLog
l
=
foldl
(
\
mem
x
->
mem
+
log
(
s
+
x
))
0
l
--------------------------------------
-- | To process the Hamming distance between two PhyloGroup fields
hamming
::
Map
(
Int
,
Int
)
Double
->
Map
(
Int
,
Int
)
Double
->
Double
hamming
f1
f2
=
fromIntegral
$
max
((
size
inter
)
-
(
size
f1
))
((
size
inter
)
-
(
size
f2
))
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment