Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Christian Merten
haskell-gargantext
Commits
38d3a9cd
Commit
38d3a9cd
authored
May 21, 2019
by
qlobbe
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
remove bad tracers
parent
5e332ef8
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
109 additions
and
20 deletions
+109
-20
Main.hs
bin/gargantext-phylo/Main.hs
+2
-0
Cluster.hs
src/Gargantext/Viz/Phylo/Aggregates/Cluster.hs
+9
-10
Example.hs
src/Gargantext/Viz/Phylo/Example.hs
+3
-3
LinkMaker.hs
src/Gargantext/Viz/Phylo/LinkMaker.hs
+59
-3
Proximity.hs
src/Gargantext/Viz/Phylo/Metrics/Proximity.hs
+36
-4
No files found.
bin/gargantext-phylo/Main.hs
View file @
38d3a9cd
...
...
@@ -115,7 +115,9 @@ filterTerms patterns (y,d) = (y,termsInText patterns d)
-- | To transform a Csv nfile into a readable corpus
csvToCorpus
::
Limit
->
CorpusPath
->
IO
([(
Int
,
Text
)])
csvToCorpus
limit
csv
=
DV
.
toList
-- . DV.reverse
.
DV
.
take
limit
-- . DV.reverse
.
DV
.
map
(
\
n
->
(
csv_publication_year
n
,
(
csv_title
n
)
<>
" "
<>
(
csv_abstract
n
)))
.
snd
<$>
CSV
.
readFile
csv
...
...
src/Gargantext/Viz/Phylo/Aggregates/Cluster.hs
View file @
38d3a9cd
...
...
@@ -49,10 +49,12 @@ graphToClusters clust (nodes,edges) = case clust of
-- | To transform a list of PhyloGroups into a Graph of Proximity
groupsToGraph
::
Proximity
->
[
PhyloGroup
]
->
Map
(
Int
,
Int
)
Double
->
Phylo
->
([
GroupNode
],[
GroupEdge
])
groupsToGraph
prox
gs
cooc
p
=
case
prox
of
WeightedLogJaccard
(
WLJParams
_
sens
)
->
(
gs
,
map
(
\
(
x
,
y
)
->
((
x
,
y
),
traceSim
x
y
(
getSubCooc
(
getGroupNgrams
x
)
cooc
)
(
getSubCooc
(
getGroupNgrams
y
)
cooc
)
p
$
weightedLogJaccard
sens
(
getSubCooc
(
getGroupNgrams
x
)
cooc
)
(
getSubCooc
(
getGroupNgrams
y
)
cooc
)))
groupsToGraph
::
Proximity
->
[
PhyloGroup
]
->
Map
(
Int
,
Int
)
Double
->
([
GroupNode
],[
GroupEdge
])
groupsToGraph
prox
gs
cooc
=
case
prox
of
-- WeightedLogJaccard (WLJParams _ sens) -> (gs, map (\(x,y) -> ((x,y), traceSim x y (getSubCooc (getGroupNgrams x) cooc) (getSubCooc (getGroupNgrams y) cooc) p
-- $ weightedLogJaccard sens (getSubCooc (getGroupNgrams x) cooc) (getSubCooc (getGroupNgrams y) cooc)))
-- $ getCandidates gs)
WeightedLogJaccard
(
WLJParams
_
sens
)
->
(
gs
,
map
(
\
(
x
,
y
)
->
((
x
,
y
),
weightedLogJaccard'
sens
(
getGroupNgrams
x
)
(
getGroupNgrams
y
)
cooc
))
$
getCandidates
gs
)
Hamming
(
HammingParams
_
)
->
(
gs
,
map
(
\
(
x
,
y
)
->
((
x
,
y
),
hamming
(
getSubCooc
(
getGroupNgrams
x
)
cooc
)
(
getSubCooc
(
getGroupNgrams
y
)
cooc
)))
$
getCandidates
gs
)
...
...
@@ -82,7 +84,7 @@ phyloToClusters lvl clus p = Map.fromList
--------------------------------------
graphs
::
[([
GroupNode
],[
GroupEdge
])]
graphs
=
traceGraph
lvl
(
getThreshold
prox
)
$
map
(
\
prd
->
groupsToGraph
prox
(
getGroupsWithFilters
lvl
prd
p
)
(
getCooc
[
prd
]
p
)
p
)
periods
$
map
(
\
prd
->
groupsToGraph
prox
(
getGroupsWithFilters
lvl
prd
p
)
(
getCooc
[
prd
]
p
))
periods
--------------------------------------
prox
::
Proximity
prox
=
getProximity
clus
...
...
@@ -100,7 +102,6 @@ phyloToClusters lvl clus p = Map.fromList
traceGraph
::
Level
->
Double
->
[([
GroupNode
],[
GroupEdge
])]
->
[([
GroupNode
],[
GroupEdge
])]
traceGraph
lvl
thr
g
=
trace
(
"----
\n
Unfiltered clustering in Phylo"
<>
show
(
lvl
)
<>
" :
\n
"
<>
"count : "
<>
show
(
length
lst
)
<>
" potential edges ("
<>
show
(
length
$
filter
(
>=
thr
)
lst
)
<>
" >= "
<>
show
(
thr
)
<>
")
\n
"
<>
show
(
lst
)
<>
"
\n
"
<>
"similarity : "
<>
show
(
percentile
25
(
VS
.
fromList
lst
))
<>
" (25%) "
<>
show
(
percentile
50
(
VS
.
fromList
lst
))
<>
" (50%) "
<>
show
(
percentile
75
(
VS
.
fromList
lst
))
<>
" (75%) "
...
...
@@ -120,7 +121,5 @@ traceGraphFiltered lvl g = trace ( "----\nClustering in Phylo" <> show (lvl) <>
lst
=
sort
$
map
snd
$
concat
$
map
snd
g
traceSim
::
PhyloGroup
->
PhyloGroup
->
Map
(
Int
,
Int
)
Double
->
Map
(
Int
,
Int
)
Double
->
Phylo
->
Double
->
Double
traceSim
g
g'
_
_
p
sim
=
trace
(
show
(
getGroupText
g
p
)
<>
" [vs] "
<>
show
(
getGroupText
g'
p
)
<>
" = "
<>
show
(
sim
)
<>
"
\n
"
-- <> show (c) <> " [vs] " <> show (c') <> " = " <> show (sim)
)
sim
-- traceSim :: PhyloGroup -> PhyloGroup -> Phylo -> Double -> Double
-- traceSim g g' p sim = trace (show (getGroupText g p) <> " [vs] " <> show (getGroupText g' p) <> " = " <> show (sim) <> "\n") sim
src/Gargantext/Viz/Phylo/Example.hs
View file @
38d3a9cd
...
...
@@ -55,7 +55,7 @@ import qualified Data.List as List
------------------------------------------------------
export
::
IO
()
export
=
dotToFile
"/home/qlobbe/data/
epique
/output/cesar_cleopatre.dot"
phyloDot
export
=
dotToFile
"/home/qlobbe/data/
phylo
/output/cesar_cleopatre.dot"
phyloDot
phyloDot
::
DotGraph
DotId
phyloDot
=
viewToDot
phyloView
...
...
@@ -77,7 +77,7 @@ queryViewEx = "level=3"
phyloQueryView
::
PhyloQueryView
phyloQueryView
=
PhyloQueryView
1
Merge
False
1
[
BranchAge
]
[]
[
BranchPeakFreq
,
GroupLabelCooc
]
(
Just
(
ByBranchAge
,
Asc
))
Json
Flat
True
phyloQueryView
=
PhyloQueryView
2
Merge
False
1
[
BranchAge
]
[]
[
BranchPeakFreq
,
GroupLabelCooc
]
(
Just
(
ByBranchAge
,
Asc
))
Json
Flat
True
--------------------------------------------------
...
...
@@ -104,7 +104,7 @@ queryEx = "title=Cesar et Cleôpatre"
phyloQueryBuild
::
PhyloQueryBuild
phyloQueryBuild
=
PhyloQueryBuild
"Cesar et Cleôpatre"
"An example of Phylomemy (french without accent)"
5
3
defaultFis
[]
[]
(
WeightedLogJaccard
$
WLJParams
0.
1
10
)
2
(
RelatedComponents
$
RCParams
$
WeightedLogJaccard
$
WLJParams
0.13
0
)
5
3
defaultFis
[]
[]
(
WeightedLogJaccard
$
WLJParams
0.
3
10
)
2
(
RelatedComponents
$
RCParams
$
WeightedLogJaccard
$
WLJParams
0.5
0
)
...
...
src/Gargantext/Viz/Phylo/LinkMaker.hs
View file @
38d3a9cd
...
...
@@ -89,7 +89,8 @@ setLevelLinks (lvl,lvl') p = alterPhyloGroups (\gs -> map (\g -> if getGroupLeve
-- | To apply the corresponding proximity function based on a given Proximity
applyProximity
::
Proximity
->
PhyloGroup
->
PhyloGroup
->
Map
(
Int
,
Int
)
Double
->
(
PhyloGroupId
,
Double
)
applyProximity
prox
g1
g2
cooc
=
case
prox
of
WeightedLogJaccard
(
WLJParams
_
s
)
->
((
getGroupId
g2
),
weightedLogJaccard
s
(
getSubCooc
(
getGroupNgrams
g1
)
cooc
)
(
getSubCooc
(
getGroupNgrams
g2
)
cooc
))
-- WeightedLogJaccard (WLJParams _ s) -> ((getGroupId g2), weightedLogJaccard s (getSubCooc (getGroupNgrams g1) cooc) (getSubCooc (getGroupNgrams g2) cooc))
WeightedLogJaccard
(
WLJParams
_
s
)
->
((
getGroupId
g2
),
weightedLogJaccard'
s
(
getGroupNgrams
g1
)
(
getGroupNgrams
g2
)
cooc
)
Hamming
(
HammingParams
_
)
->
((
getGroupId
g2
),
hamming
(
getSubCooc
(
getGroupNgrams
g1
)
cooc
)
(
getSubCooc
(
getGroupNgrams
g2
)
cooc
))
_
->
panic
(
"[ERR][Viz.Phylo.Example.applyProximity] Proximity function not defined"
)
...
...
@@ -166,6 +167,51 @@ filterCandidates g gs = filter (\g' -> (not . null) $ intersect (getGroupNgrams
$
delete
g
gs
-- | a init avec la [[head groups]] et la tail groups
toBranches
::
[[
PhyloGroup
]]
->
[
PhyloGroup
]
->
[[
PhyloGroup
]]
toBranches
mem
gs
|
null
gs
=
mem
|
otherwise
=
toBranches
mem'
$
tail
gs
where
--------------------------------------
mem'
::
[[
PhyloGroup
]]
mem'
=
if
(
null
withHead
)
then
mem
++
[[
head'
"toBranches"
gs
]]
else
(
filter
(
\
gs'
->
not
$
elem
gs'
withHead
)
mem
)
++
[(
concat
withHead
)
++
[
head'
"toBranches"
gs
]]
--------------------------------------
withHead
::
[[
PhyloGroup
]]
withHead
=
filter
(
\
gs'
->
(
not
.
null
)
$
intersect
(
concat
$
map
getGroupNgrams
gs'
)
(
getGroupNgrams
$
(
head'
"toBranches"
gs
))
)
mem
--------------------------------------
-- | a init avec la [[head groups]] et la tail groups
toBranches'
::
[[[
Int
]]]
->
[[
Int
]]
->
[[[
Int
]]]
toBranches'
mem
gs
|
null
gs
=
mem
|
otherwise
=
toBranches'
mem'
$
tail
gs
where
--------------------------------------
mem'
::
[[[
Int
]]]
mem'
=
if
(
null
withHead
)
then
mem
++
[[
head'
"toBranches"
gs
]]
else
(
filter
(
\
gs'
->
not
$
elem
gs'
withHead
)
mem
)
++
[(
concat
withHead
)
++
[
head'
"toBranches"
gs
]]
--------------------------------------
withHead
::
[[[
Int
]]]
withHead
=
filter
(
\
gs'
->
(
not
.
null
)
$
intersect
(
concat
gs'
)
(
head'
"toBranches"
gs
)
)
mem
--------------------------------------
-- | To apply the intertemporal matching to Phylo at a given level
interTempoMatching
::
Filiation
->
Level
->
Proximity
->
Phylo
->
Phylo
...
...
@@ -179,12 +225,18 @@ interTempoMatching fil lvl prox p = traceMatching fil lvl (getThreshold prox) sc
scores
::
[
Double
]
scores
=
sort
$
concat
$
map
(
snd
.
snd
)
candidates
--------------------------------------
-- candidates' :: [(PhyloGroupId,([Pointer],[Double]))]
-- candidates' = map (\g -> ( getGroupId g, findBestCandidates' fil 1 5 prox (getNextPeriods fil (getGroupPeriod g) prds) (filterCandidates g gs) g p)) gs
--------------------------------------
candidates
::
[(
PhyloGroupId
,([
Pointer
],[
Double
]))]
candidates
=
map
(
\
g
->
(
getGroupId
g
,
findBestCandidates'
fil
1
5
prox
(
getNextPeriods
fil
(
getGroupPeriod
g
)
prds
)
(
filterCandidates
g
gs
)
g
p
))
g
s
candidates
=
concat
$
map
(
\
b
->
map
(
\
g
->
(
getGroupId
g
,
findBestCandidates'
fil
1
5
prox
(
getNextPeriods
fil
(
getGroupPeriod
g
)
prds
)
(
filterCandidates
g
gs
)
g
p
))
b
)
b
s
--------------------------------------
gs
::
[
PhyloGroup
]
gs
=
getGroupsWithLevel
lvl
p
--------------------------------------
bs
::
[[
PhyloGroup
]]
bs
=
tracePreBranches
$
toBranches
[[
head'
"interTempoMatching"
gs
]]
$
tail
gs
--------------------------------------
prds
::
[
PhyloPeriodId
]
prds
=
getPhyloPeriods
p
--------------------------------------
...
...
@@ -230,3 +282,7 @@ traceMatching fil lvl thr lst p = trace ( "----\n" <> show (fil) <> " unfiltered
<>
show
(
percentile
75
(
VS
.
fromList
lst
))
<>
" (75%) "
<>
show
(
percentile
90
(
VS
.
fromList
lst
))
<>
" (90%)
\n
"
)
p
tracePreBranches
::
[[
PhyloGroup
]]
->
[[
PhyloGroup
]]
tracePreBranches
bs
=
trace
(
show
(
length
bs
)
<>
" pre-branches"
<>
"
\n
"
<>
"with sizes : "
<>
show
(
map
length
bs
)
<>
"
\n
"
)
bs
src/Gargantext/Viz/Phylo/Metrics/Proximity.hs
View file @
38d3a9cd
...
...
@@ -17,18 +17,19 @@ Portability : POSIX
module
Gargantext.Viz.Phylo.Metrics.Proximity
where
import
Data.List
(
null
)
import
Data.List
(
null
,
intersect
,
union
)
import
Data.Map
(
Map
,
elems
,
unionWith
,
intersectionWith
,
intersection
,
size
)
import
Gargantext.Prelude
import
Debug.Trace
(
trace
)
import
Gargantext.Viz.Phylo.Aggregates.Cooc
-- import Debug.Trace (trace)
-- | To process the weightedLogJaccard between two PhyloGroup fields
weightedLogJaccard
::
Double
->
Map
(
Int
,
Int
)
Double
->
Map
(
Int
,
Int
)
Double
->
Double
weightedLogJaccard
s
f1
f2
|
null
wUnion
=
0
|
wUnion
==
wInter
=
1
|
s
==
0
=
trace
(
"==0"
)
$
(
fromIntegral
$
length
wInter
)
/
(
fromIntegral
$
length
wUnion
)
|
s
>
0
=
trace
(
">0"
)
$
(
sumInvLog
wInter
)
/
(
sumInvLog
wUnion
)
|
s
==
0
=
(
fromIntegral
$
length
wInter
)
/
(
fromIntegral
$
length
wUnion
)
|
s
>
0
=
(
sumInvLog
wInter
)
/
(
sumInvLog
wUnion
)
|
otherwise
=
(
sumLog
wInter
)
/
(
sumLog
wUnion
)
where
--------------------------------------
...
...
@@ -46,6 +47,37 @@ weightedLogJaccard s f1 f2
--------------------------------------
-- | To process the weightedLogJaccard between two PhyloGroup fields
weightedLogJaccard'
::
Double
->
[
Int
]
->
[
Int
]
->
Map
(
Int
,
Int
)
Double
->
Double
weightedLogJaccard'
s
idx
idx'
cooc
|
null
idxUnion
=
0
|
idxUnion
==
idxInter
=
1
|
s
==
0
=
(
fromIntegral
$
length
idxInter
)
/
(
fromIntegral
$
length
idxUnion
)
|
s
>
0
=
(
sumInvLog
wInter
)
/
(
sumInvLog
wUnion
)
|
otherwise
=
(
sumLog
wInter
)
/
(
sumLog
wUnion
)
where
--------------------------------------
wInter
::
[
Double
]
wInter
=
elems
$
getSubCooc
idxInter
cooc
--------------------------------------
wUnion
::
[
Double
]
wUnion
=
elems
$
getSubCooc
idxUnion
cooc
--------------------------------------
idxInter
::
[
Int
]
idxInter
=
intersect
idx
idx'
--------------------------------------
idxUnion
::
[
Int
]
idxUnion
=
union
idx
idx'
--------------------------------------
sumInvLog
::
[
Double
]
->
Double
sumInvLog
l
=
foldl
(
\
mem
x
->
mem
+
(
1
/
log
(
s
+
x
)))
0
l
--------------------------------------
sumLog
::
[
Double
]
->
Double
sumLog
l
=
foldl
(
\
mem
x
->
mem
+
log
(
s
+
x
))
0
l
--------------------------------------
-- | To process the Hamming distance between two PhyloGroup fields
hamming
::
Map
(
Int
,
Int
)
Double
->
Map
(
Int
,
Int
)
Double
->
Double
hamming
f1
f2
=
fromIntegral
$
max
((
size
inter
)
-
(
size
f1
))
((
size
inter
)
-
(
size
f2
))
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment