Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
153
Issues
153
List
Board
Labels
Milestones
Merge Requests
12
Merge Requests
12
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
19e0bdc1
Commit
19e0bdc1
authored
Apr 19, 2019
by
Quentin Lobbé
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
tune the threshold and sensibility of the WeightedLogJaccard
parent
237a8f2b
Changes
10
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
58 additions
and
39 deletions
+58
-39
Main.hs
bin/gargantext-phylo/Main.hs
+16
-6
Cluster.hs
src/Gargantext/Viz/Phylo/Aggregates/Cluster.hs
+8
-8
BranchMaker.hs
src/Gargantext/Viz/Phylo/BranchMaker.hs
+8
-2
Example.hs
src/Gargantext/Viz/Phylo/Example.hs
+5
-5
LevelMaker.hs
src/Gargantext/Viz/Phylo/LevelMaker.hs
+1
-1
Clustering.hs
src/Gargantext/Viz/Phylo/Metrics/Clustering.hs
+1
-1
Proximity.hs
src/Gargantext/Viz/Phylo/Metrics/Proximity.hs
+1
-0
Tools.hs
src/Gargantext/Viz/Phylo/Tools.hs
+15
-2
Export.hs
src/Gargantext/Viz/Phylo/View/Export.hs
+2
-2
Taggers.hs
src/Gargantext/Viz/Phylo/View/Taggers.hs
+1
-12
No files found.
bin/gargantext-phylo/Main.hs
View file @
19e0bdc1
...
...
@@ -30,6 +30,7 @@ import Gargantext.Prelude
import
Gargantext.Text.List.CSV
(
csvGraphTermList
)
import
Gargantext.Text.Parsers.CSV
(
readCsv
,
csv_title
,
csv_abstract
,
csv_publication_year
)
import
Gargantext.Text.Terms.WithList
import
System.Environment
import
Gargantext.Viz.Phylo
...
...
@@ -64,10 +65,8 @@ filterTerms patterns (year', doc) = (year',termsInText patterns doc)
termsInText
pats
txt
=
DL
.
nub
$
DL
.
concat
$
map
(
map
unwords
)
$
extractTermsWithList
pats
txt
-- csvToCorpus :: Int -> FilePath -> IO (DM.Map Int [Text])
csvToCorpus
::
Int
->
FilePath
->
IO
([(
Int
,
Text
)])
csvToCorpus
limit
csv
=
DV
.
toList
-- DM.fromListWith (<>)
.
DV
.
take
limit
.
DV
.
map
(
\
n
->
(
csv_publication_year
n
,
(
csv_title
n
)
<>
" "
<>
(
csv_abstract
n
)))
.
snd
<$>
readCsv
csv
...
...
@@ -87,20 +86,31 @@ parse limit corpus liste = do
main
::
IO
()
main
=
do
-- [corpusFile, termListFile, outputFile] <- getArgs
-- [corpusPath, termListPath, outputPath] <- getArgs
let
corpusPath
=
"/home/qlobbe/data/epique/corpus/cultural_evolution/texts/fullCorpus.csv"
let
termListPath
=
"/home/qlobbe/data/epique/corpus/cultural_evolution/termList.csv"
let
outputPath
=
"/home/qlobbe/data/epique/output/cultural_evolution.dot"
let
query
=
PhyloQueryBuild
"cultural_evolution"
"Test"
5
3
defaultFis
[]
[]
defaultWeightedLogJaccard
3
defaultRelatedComponents
let
query
=
PhyloQueryBuild
"cultural_evolution"
""
5
3
defaultFis
[]
[]
(
WeightedLogJaccard
$
WLJParams
0
0
)
2
(
RelatedComponents
$
RCParams
$
WeightedLogJaccard
$
WLJParams
0.1
10
)
let
queryView
=
PhyloQueryView
2
Merge
False
1
[
BranchAge
]
[
defaultSmallBranch
]
[
BranchPeakFreq
,
GroupLabelCooc
]
(
Just
(
ByBranchAge
,
Asc
))
Json
Flat
True
corpus
<-
parse
5000
corpusPath
termListPath
putStrLn
$
show
"-- Start parsing the corpus"
corpus
<-
parse
500
corpusPath
termListPath
let
foundations
=
DL
.
nub
$
DL
.
concat
$
map
text
corpus
-- putStrLn $ show $ csvGraphTermList termListPath
-- putStrLn $ show (map text corpus)
-- foundations <- DL.concat <$> DL.concat <$> map snd <$> csvGraphTermList termListPath
-- putStrLn $ show foundations
-- a <- map snd <$> csvGraphTermList liste
let
phylo
=
toPhylo
query
corpus
foundations
[]
...
...
src/Gargantext/Viz/Phylo/Aggregates/Cluster.hs
View file @
19e0bdc1
...
...
@@ -36,11 +36,11 @@ graphToClusters clust (nodes,edges) = case clust of
_
->
panic
"[ERR][Viz.Phylo.Aggregates.Cluster.graphToClusters] not implemented"
-- | To transform a Phylo into Clusters of PhyloGroups at a given level
phyloToClusters
::
Level
->
Proximity
->
Cluster
->
Phylo
->
Map
(
Date
,
Date
)
[
PhyloCluster
]
phyloToClusters
lvl
prox
clus
p
=
Map
.
fromList
$
zip
(
getPhyloPeriods
p
)
(
map
(
\
prd
->
let
graph
=
groupsToGraph
prox
(
getGroupsWithFilters
lvl
prd
p
)
p
in
if
null
(
fst
graph
)
then
[]
else
graphToClusters
clus
graph
)
(
getPhyloPeriods
p
))
phyloToClusters
::
Level
->
Cluster
->
Phylo
->
Map
(
Date
,
Date
)
[
PhyloCluster
]
phyloToClusters
lvl
clus
p
=
Map
.
fromList
$
zip
(
getPhyloPeriods
p
)
(
map
(
\
prd
->
let
graph
=
groupsToGraph
(
getProximity
clus
)
(
getGroupsWithFilters
lvl
prd
p
)
p
in
if
null
(
fst
graph
)
then
[]
else
graphToClusters
clus
graph
)
(
getPhyloPeriods
p
))
src/Gargantext/Viz/Phylo/BranchMaker.hs
View file @
19e0bdc1
...
...
@@ -20,11 +20,13 @@ module Gargantext.Viz.Phylo.BranchMaker
import
Control.Lens
hiding
(
both
,
Level
)
import
Data.List
(
concat
,
nub
,(
++
),
tail
)
import
Data.Tuple
(
fst
,
snd
)
import
Data.Map
(
Map
,
fromList
,
toList
)
import
Gargantext.Prelude
import
Gargantext.Viz.Phylo
import
Gargantext.Viz.Phylo.Metrics.Clustering
import
Gargantext.Viz.Phylo.Metrics.Proximity
import
Gargantext.Viz.Phylo.Tools
-- import Debug.Trace (trace)
-- | To transform a PhyloGraph into a list of PhyloBranches by using the relatedComp clustering
...
...
@@ -35,6 +37,10 @@ graphToBranches _lvl (nodes,edges) _p = concat
$
relatedComp
0
(
head'
"branchMaker"
nodes
)
(
tail
nodes
,
edges
)
[]
[]
mirror
::
Ord
a
=>
Map
(
a
,
a
)
b
->
Map
(
a
,
a
)
b
mirror
m
=
fromList
$
concat
$
map
(
\
((
k
,
k'
),
v
)
->
[((
k
,
k'
),
v
),((
k'
,
k
),
v
)])
$
toList
m
-- | To transform a list of PhyloGroups into a PhyloGraph by using a given Proximity mesure
groupsToGraph
::
Proximity
->
[
PhyloGroup
]
->
Phylo
->
GroupGraph
groupsToGraph
prox
groups
p
=
(
groups
,
edges
)
...
...
@@ -44,8 +50,8 @@ groupsToGraph prox groups p = (groups,edges)
Filiation
->
(
nub
.
concat
)
$
map
(
\
g
->
(
map
(
\
g'
->
((
g'
,
g
),
1
))
$
getGroupParents
g
p
)
++
(
map
(
\
g'
->
((
g
,
g'
),
1
))
$
getGroupChilds
g
p
))
groups
WeightedLogJaccard
(
WLJParams
thr
sens
)
->
filter
(
\
edge
->
snd
edge
>=
thr
)
$
map
(
\
(
x
,
y
)
->
((
x
,
y
),
weightedLogJaccard
sens
(
getGroupCooc
x
)
(
unifySharedKeys
(
getGroupCooc
x
)
(
getGroupCooc
y
)
)))
WeightedLogJaccard
(
WLJParams
thr
sens
)
->
filter
(
\
(
_
,
v
)
->
v
>=
thr
)
$
map
(
\
(
x
,
y
)
->
((
x
,
y
),
weightedLogJaccard
sens
(
mirror
$
getGroupCooc
x
)
(
mirror
$
getGroupCooc
y
)))
$
listToDirectedCombi
groups
Hamming
(
HammingParams
thr
)
->
filter
(
\
edge
->
snd
edge
<=
thr
)
$
map
(
\
(
x
,
y
)
->
((
x
,
y
),
hamming
(
getGroupCooc
x
)
(
unifySharedKeys
(
getGroupCooc
x
)
(
getGroupCooc
y
))))
...
...
src/Gargantext/Viz/Phylo/Example.hs
View file @
19e0bdc1
...
...
@@ -54,7 +54,7 @@ import qualified Data.List as List
------------------------------------------------------
export
::
IO
()
export
=
dotToFile
"
./export_test"
"
cesar_cleopatre.dot"
phyloDot
export
=
dotToFile
"
/home/qlobbe/data/epique/output/
cesar_cleopatre.dot"
phyloDot
phyloDot
::
DotGraph
DotId
phyloDot
=
viewToDot
phyloView
...
...
@@ -103,7 +103,7 @@ queryEx = "title=Cesar et Cleôpatre"
phyloQueryBuild
::
PhyloQueryBuild
phyloQueryBuild
=
PhyloQueryBuild
"Cesar et Cleôpatre"
"An example of Phylomemy (french without accent)"
5
3
defaultFis
[]
[]
defaultWeightedLogJaccard
3
defaultRelatedComponents
5
3
defaultFis
[]
[]
(
WeightedLogJaccard
$
WLJParams
0.0001
10
)
3
(
RelatedComponents
$
RCParams
$
WeightedLogJaccard
$
WLJParams
0.05
10
)
...
...
@@ -113,7 +113,7 @@ phyloQueryBuild = PhyloQueryBuild "Cesar et Cleôpatre" "An example of Phylomemy
phylo6
::
Phylo
phylo6
=
toNthLevel
6
defaultWeightedLogJaccard
defaultRelatedComponents
phylo3
phylo6
=
toNthLevel
6
defaultWeightedLogJaccard
(
RelatedComponents
(
initRelatedComponents
(
Just
defaultWeightedLogJaccard
)))
phylo3
phylo3
::
Phylo
...
...
@@ -122,7 +122,7 @@ phylo3 = setPhyloBranches 3
$
interTempoMatching
Ascendant
3
defaultWeightedLogJaccard
$
setLevelLinks
(
2
,
3
)
$
addPhyloLevel
3
(
phyloToClusters
2
defaultWeightedLogJaccard
defaultRelatedComponents
phyloBranch2
)
(
phyloToClusters
2
(
RelatedComponents
(
initRelatedComponents
(
Just
defaultWeightedLogJaccard
)))
phyloBranch2
)
phyloBranch2
...
...
@@ -153,7 +153,7 @@ phylo2 = addPhyloLevel 2 phyloCluster phyloBranch1
phyloCluster
::
Map
(
Date
,
Date
)
[
PhyloCluster
]
phyloCluster
=
phyloToClusters
1
defaultWeightedLogJaccard
defaultRelatedComponents
phyloBranch1
phyloCluster
=
phyloToClusters
1
(
RelatedComponents
$
RCParams
$
WeightedLogJaccard
$
WLJParams
0.05
10
)
phyloBranch1
----------------------------------
...
...
src/Gargantext/Viz/Phylo/LevelMaker.hs
View file @
19e0bdc1
...
...
@@ -149,7 +149,7 @@ toNthLevel lvlMax prox clus p
$
interTempoMatching
Ascendant
(
lvl
+
1
)
prox
$
setLevelLinks
(
lvl
,
lvl
+
1
)
$
addPhyloLevel
(
lvl
+
1
)
(
phyloToClusters
lvl
(
getProximity
clus
)
clus
p
)
p
(
phyloToClusters
lvl
clus
p
)
p
where
--------------------------------------
lvl
::
Level
...
...
src/Gargantext/Viz/Phylo/Metrics/Clustering.hs
View file @
19e0bdc1
...
...
@@ -43,7 +43,7 @@ relatedComp idx curr (nodes,edges) next memo
|
otherwise
=
memo
++
[[
curr
]]
--------------------------------------
next'
::
[
PhyloGroup
]
next'
=
filter
(
\
x
->
not
$
elem
x
$
concat
memo
)
$
nub
$
next
++
(
getNeighbours
Tru
e
curr
edges
)
next'
=
filter
(
\
x
->
not
$
elem
x
$
concat
memo
)
$
nub
$
next
++
(
getNeighbours
Fals
e
curr
edges
)
--------------------------------------
nodes'
::
[
PhyloGroup
]
nodes'
=
filter
(
\
x
->
not
$
elem
x
next'
)
nodes
...
...
src/Gargantext/Viz/Phylo/Metrics/Proximity.hs
View file @
19e0bdc1
...
...
@@ -20,6 +20,7 @@ module Gargantext.Viz.Phylo.Metrics.Proximity
import
Data.List
(
null
)
import
Data.Map
(
Map
,
elems
,
unionWith
,
intersectionWith
,
intersection
,
size
)
import
Gargantext.Prelude
-- import Debug.Trace (trace)
-- | To process the weightedLogJaccard between two PhyloGroup fields
weightedLogJaccard
::
Double
->
Map
(
Int
,
Int
)
Double
->
Map
(
Int
,
Int
)
Double
->
Double
...
...
src/Gargantext/Viz/Phylo/Tools.hs
View file @
19e0bdc1
...
...
@@ -24,7 +24,7 @@ import Data.List (filter, intersect, (++), sort, null, tail, last, ta
import
Data.Maybe
(
mapMaybe
,
fromMaybe
)
import
Data.Map
(
Map
,
mapKeys
,
member
,
(
!
))
import
Data.Set
(
Set
)
import
Data.Text
(
Text
,
toLower
)
import
Data.Text
(
Text
,
toLower
,
unwords
)
import
Data.Tuple.Extra
import
Data.Vector
(
Vector
,
elemIndex
)
import
Gargantext.Prelude
...
...
@@ -125,6 +125,16 @@ listToUnDirectedCombiWith :: forall a b. (a -> b) -> [a] -> [(b,b)]
listToUnDirectedCombiWith
f
l
=
[
(
f
x
,
f
y
)
|
(
x
:
rest
)
<-
tails
l
,
y
<-
rest
]
-- | To transform a list of Ngrams Indexes into a Label
ngramsToLabel
::
Vector
Ngrams
->
[
Int
]
->
Text
ngramsToLabel
ngrams
l
=
unwords
$
ngramsToText
ngrams
l
-- | To transform a list of Ngrams Indexes into a list of Text
ngramsToText
::
Vector
Ngrams
->
[
Int
]
->
[
Text
]
ngramsToText
ngrams
l
=
map
(
\
idx
->
ngrams
Vector
.!
idx
)
l
-- | To unify the keys (x,y) that Map 1 share with Map 2 such as: (x,y) <=> (y,x)
unifySharedKeys
::
Eq
a
=>
Ord
a
=>
Map
(
a
,
a
)
b
->
Map
(
a
,
a
)
b
->
Map
(
a
,
a
)
b
unifySharedKeys
m1
m2
=
mapKeys
(
\
(
x
,
y
)
->
if
member
(
y
,
x
)
m2
...
...
@@ -136,7 +146,6 @@ unifySharedKeys m1 m2 = mapKeys (\(x,y) -> if member (y,x) m2
-- | Phylo | --
---------------
-- | An analyzer ingests a Ngrams and generates a modified version of it
phyloAnalyzer
::
Ngrams
->
Ngrams
phyloAnalyzer
n
=
toLower
n
...
...
@@ -342,6 +351,10 @@ getGroupPeriodParents = _phylo_groupPeriodParents
getGroupPeriodParentsId
::
PhyloGroup
->
[
PhyloGroupId
]
getGroupPeriodParentsId
g
=
map
fst
$
getGroupPeriodParents
g
-- | To get the roots labels of a list of group ngrams
getGroupText
::
PhyloGroup
->
Phylo
->
[
Text
]
getGroupText
g
p
=
ngramsToText
(
getRootsLabels
p
)
(
getGroupNgrams
g
)
-- | To get all the PhyloGroup of a Phylo
getGroups
::
Phylo
->
[
PhyloGroup
]
...
...
src/Gargantext/Viz/Phylo/View/Export.hs
View file @
19e0bdc1
...
...
@@ -47,8 +47,8 @@ type DotId = T'.Text
-- | Dot to File | --
---------------------
dotToFile
::
FilePath
->
FilePath
->
DotGraph
DotId
->
IO
()
dotToFile
filePath
fileName
dotG
=
writeFile
(
combine
filePath
fileName
)
$
dotToString
dotG
dotToFile
::
FilePath
->
DotGraph
DotId
->
IO
()
dotToFile
filePath
dotG
=
writeFile
filePath
$
dotToString
dotG
dotToString
::
DotGraph
DotId
->
[
Char
]
dotToString
dotG
=
unpack
(
printDotGraph
dotG
)
...
...
src/Gargantext/Viz/Phylo/View/Taggers.hs
View file @
19e0bdc1
...
...
@@ -19,24 +19,13 @@ module Gargantext.Viz.Phylo.View.Taggers
import
Control.Lens
hiding
(
makeLenses
,
both
,
Level
)
import
Data.List
(
concat
,
nub
,
groupBy
,
sortOn
,
sort
)
import
Data.Text
(
Text
,
unwords
)
import
Data.Text
(
Text
)
import
Data.Tuple
(
fst
,
snd
)
import
Data.Vector
(
Vector
)
import
Gargantext.Prelude
import
Gargantext.Viz.Phylo
import
Gargantext.Viz.Phylo.Tools
import
qualified
Data.Map
as
Map
import
qualified
Data.Vector
as
Vector
-- | To transform a list of Ngrams Indexes into a Label
ngramsToLabel
::
Vector
Ngrams
->
[
Int
]
->
Text
ngramsToLabel
ngrams
l
=
unwords
$
ngramsToText
ngrams
l
-- | To transform a list of Ngrams Indexes into a list of Text
ngramsToText
::
Vector
Ngrams
->
[
Int
]
->
[
Text
]
ngramsToText
ngrams
l
=
map
(
\
idx
->
ngrams
Vector
.!
idx
)
l
-- | To get the nth most frequent Ngrams in a list of PhyloGroups
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment