Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
147
Issues
147
List
Board
Labels
Milestones
Merge Requests
6
Merge Requests
6
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
92b4221b
Commit
92b4221b
authored
Aug 29, 2019
by
qlobbe
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add branching
parent
d3097207
Pipeline
#548
failed with stage
Changes
7
Pipelines
1
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
232 additions
and
94 deletions
+232
-94
Main.hs
bin/gargantext-adaptative-phylo/Main.hs
+8
-8
AdaptativePhylo.hs
src/Gargantext/Viz/AdaptativePhylo.hs
+45
-21
PhyloExample.hs
src/Gargantext/Viz/Phylo/PhyloExample.hs
+5
-6
PhyloMaker.hs
src/Gargantext/Viz/Phylo/PhyloMaker.hs
+12
-13
PhyloTools.hs
src/Gargantext/Viz/Phylo/PhyloTools.hs
+50
-1
SynchronicClustering.hs
src/Gargantext/Viz/Phylo/SynchronicClustering.hs
+10
-2
TemporalMatching.hs
src/Gargantext/Viz/Phylo/TemporalMatching.hs
+102
-43
No files found.
bin/gargantext-adaptative-phylo/Main.hs
View file @
92b4221b
...
...
@@ -128,16 +128,16 @@ csvToCorpus limit path = Vector.toList
-- | To use the correct parser given a CorpusType
fileToCorpus
::
CorpusParser
->
Int
->
FilePath
->
IO
([(
Int
,
Text
)])
fileToCorpus
parser
limit
path
=
case
parser
of
Wos
->
wosToCorpus
limit
path
Csv
->
csvToCorpus
limit
path
fileToCorpus
::
CorpusParser
->
FilePath
->
IO
([(
Int
,
Text
)])
fileToCorpus
parser
path
=
case
parser
of
Wos
limit
->
wosToCorpus
limit
path
Csv
limit
->
csvToCorpus
limit
path
-- | To parse a file into a list of Document
fileToDocs
::
CorpusParser
->
Int
->
FilePath
->
TermList
->
IO
[
Document
]
fileToDocs
parser
limit
path
lst
=
do
corpus
<-
fileToCorpus
parser
limit
path
fileToDocs
::
CorpusParser
->
FilePath
->
TermList
->
IO
[
Document
]
fileToDocs
parser
path
lst
=
do
corpus
<-
fileToCorpus
parser
path
let
patterns
=
buildPatterns
lst
pure
$
map
(
(
\
(
y
,
t
)
->
Document
y
t
)
.
filterTerms
patterns
)
corpus
...
...
@@ -162,7 +162,7 @@ main = do
printIOMsg
"Parse the corpus"
mapList
<-
csvGraphTermList
(
listPath
config
)
corpus
<-
fileToDocs
(
corpusParser
config
)
(
corpus
Limit
config
)
(
corpus
Path
config
)
mapList
corpus
<-
fileToDocs
(
corpusParser
config
)
(
corpusPath
config
)
mapList
printIOComment
(
show
(
length
corpus
)
<>
" parsed docs from the corpus"
)
printIOMsg
"Reconstruct the Phylo"
...
...
src/Gargantext/Viz/AdaptativePhylo.hs
View file @
92b4221b
...
...
@@ -50,46 +50,63 @@ import Control.Lens (makeLenses)
----------------
data
CorpusParser
=
Wos
|
Csv
deriving
(
Show
,
Generic
,
Eq
)
data
CorpusParser
=
Wos
{
_wos_limit
::
Int
}
|
Csv
{
_csv_limit
::
Int
}
deriving
(
Show
,
Generic
,
Eq
)
data
Proximity
=
WeightedLogJaccard
{
_wlj_sensibility
::
Double
,
_wlj_thresholdInit
::
Double
,
_wlj_thresholdStep
::
Double
}
|
Hamming
deriving
(
Show
,
Generic
,
Eq
)
data
TimeUnit
=
Year
{
_year_period
::
Int
,
_year_step
::
Int
,
_year_matchingFrame
::
Int
}
deriving
(
Show
,
Generic
,
Eq
)
data
ContextualUnit
=
Fis
{
_fis_support
::
Int
,
_fis_size
::
Int
}
deriving
(
Show
,
Generic
,
Eq
)
data
Proximity
=
WeightedLogJaccard
{
_sensibility
::
Double
}
|
Hamming
deriving
(
Show
,
Generic
,
Eq
)
data
Config
=
Config
{
corpusPath
::
FilePath
,
listPath
::
FilePath
,
outputPath
::
FilePath
,
corpusParser
::
CorpusParser
,
corpusLimit
::
Int
,
phyloName
::
Text
,
phyloLevel
::
Int
,
phyloQuality
::
Double
,
phyloProximity
::
Proximity
,
timeUnit
::
Int
,
maxTimeMatch
::
Int
,
timePeriod
::
Int
,
timeStep
::
Int
,
fisSupport
::
Int
,
fisSize
::
Int
,
timeUnit
::
TimeUnit
,
contextualUnit
::
ContextualUnit
,
branchSize
::
Int
}
deriving
(
Show
,
Generic
,
Eq
)
defaultConfig
::
Config
defaultConfig
=
Config
{
corpusPath
=
""
,
listPath
=
""
,
outputPath
=
""
,
corpusParser
=
Csv
,
corpusLimit
=
1000
,
corpusParser
=
Csv
1000
,
phyloName
=
pack
"Default Phylo"
,
phyloLevel
=
2
,
phyloProximity
=
WeightedLogJaccard
10
,
timeUnit
=
1
,
maxTimeMatch
=
5
,
timePeriod
=
3
,
timeStep
=
1
,
fisSupport
=
2
,
fisSize
=
4
,
phyloQuality
=
0.5
,
phyloProximity
=
WeightedLogJaccard
10
0
0.05
,
timeUnit
=
Year
3
1
5
,
contextualUnit
=
Fis
2
4
,
branchSize
=
3
}
...
...
@@ -99,6 +116,10 @@ instance FromJSON CorpusParser
instance
ToJSON
CorpusParser
instance
FromJSON
Proximity
instance
ToJSON
Proximity
instance
FromJSON
TimeUnit
instance
ToJSON
TimeUnit
instance
FromJSON
ContextualUnit
instance
ToJSON
ContextualUnit
-- | Software parameters
...
...
@@ -237,7 +258,7 @@ data PhyloGroup =
,
_phylo_groupLevelChilds
::
[
Pointer
]
,
_phylo_groupPeriodParents
::
[
Pointer
]
,
_phylo_groupPeriodChilds
::
[
Pointer
]
,
_phylo_group
BreakPointer
::
Maybe
Pointer
,
_phylo_group
GhostPointers
::
[
Pointer
]
}
deriving
(
Generic
,
Show
,
Eq
)
...
...
@@ -276,6 +297,9 @@ data PhyloFis = PhyloFis
----------------
makeLenses
''
C
onfig
makeLenses
''
P
roximity
makeLenses
''
C
ontextualUnit
makeLenses
''
T
imeUnit
makeLenses
''
P
hyloFoundations
makeLenses
''
P
hyloFis
makeLenses
''
P
hylo
...
...
src/Gargantext/Viz/Phylo/PhyloExample.hs
View file @
92b4221b
...
...
@@ -49,7 +49,7 @@ phylo1 = appendGroups fisToGroup 1 phyloFis phyloBase
phyloFis
::
Map
(
Date
,
Date
)
[
PhyloFis
]
phyloFis
=
toPhyloFis
docsByPeriods
(
fisSupport
config
)
(
fisSize
config
)
phyloFis
=
toPhyloFis
docsByPeriods
(
getFisSupport
$
contextualUnit
config
)
(
getFisSize
$
contextualUnit
config
)
docsByPeriods
::
Map
(
Date
,
Date
)
[
Document
]
...
...
@@ -66,23 +66,22 @@ phyloBase = toPhyloBase docs mapList config
phyloCooc
::
Map
Date
Cooc
phyloCooc
=
docsTo
CoocByYear
docs
(
foundations
^.
foundations_roots
)
config
phyloCooc
=
docsTo
TimeScaleCooc
docs
(
foundations
^.
foundations_roots
)
config
periods
::
[(
Date
,
Date
)]
periods
=
toPeriods
(
sort
$
nub
$
map
date
docs
)
(
timePeriod
config
)
(
timeStep
config
)
periods
=
toPeriods
(
sort
$
nub
$
map
date
docs
)
(
getTimePeriod
$
timeUnit
config
)
(
getTimeStep
$
timeUnit
config
)
nbDocsByYear
::
Map
Date
Double
nbDocsByYear
=
nbDocsByTime
docs
(
timeUnit
config
)
nbDocsByYear
=
docsToTimeScaleNb
docs
config
::
Config
config
=
defaultConfig
{
phyloName
=
"Cesar et Cleopatre"
,
branchSize
=
0
,
fisSupport
=
0
,
fisSize
=
0
}
,
contextualUnit
=
Fis
0
0
}
docs
::
[
Document
]
...
...
src/Gargantext/Viz/Phylo/PhyloMaker.hs
View file @
92b4221b
...
...
@@ -87,8 +87,7 @@ fisToGroup fis pId lvl idx fdt coocs =
ngrams
(
ngramsToCooc
ngrams
coocs
)
(
1
,
[]
)
[]
[]
[]
[]
Nothing
[]
[]
[]
[]
[]
toPhylo1
::
[
Document
]
->
Phylo
->
Phylo
...
...
@@ -96,7 +95,7 @@ toPhylo1 docs phyloBase = appendGroups fisToGroup 1 phyloFis phyloBase
where
--------------------------------------
phyloFis
::
Map
(
Date
,
Date
)
[
PhyloFis
]
phyloFis
=
toPhyloFis
docs'
(
fisSupport
$
getConfig
phyloBase
)
(
fisSize
$
getConfig
phyloBase
)
phyloFis
=
toPhyloFis
docs'
(
getFisSupport
$
contextualUnit
$
getConfig
phyloBase
)
(
getFisSize
$
contextualUnit
$
getConfig
phyloBase
)
--------------------------------------
docs'
::
Map
(
Date
,
Date
)
[
Document
]
docs'
=
groupDocsByPeriod
date
(
getPeriodIds
phyloBase
)
docs
...
...
@@ -174,14 +173,14 @@ ngramsToCooc ngrams coocs =
-- | To transform the docs into a time map of coocurency matrix
docsTo
CoocByYear
::
[
Document
]
->
Vector
Ngrams
->
Config
->
Map
Date
Cooc
docsTo
CoocByYear
docs
fdt
conf
=
docsTo
TimeScaleCooc
::
[
Document
]
->
Vector
Ngrams
->
Config
->
Map
Date
Cooc
docsTo
TimeScaleCooc
docs
fdt
conf
=
let
mCooc
=
fromListWith
sumCooc
$
map
(
\
(
_d
,
l
)
->
(
_d
,
listToMatrix
l
))
$
map
(
\
doc
->
(
date
doc
,
sort
$
ngramsToIdx
(
text
doc
)
fdt
))
docs
mCooc'
=
fromList
$
map
(
\
t
->
(
t
,
empty
))
$
toTimeScale
(
map
date
docs
)
(
timeUnit
conf
)
$
toTimeScale
(
map
date
docs
)
1
in
trace
(
"
\n
"
<>
"-- | Build the coocurency matrix for "
<>
show
(
length
$
keys
mCooc'
)
<>
" unit of time"
<>
"
\n
"
)
$
unionWith
sumCooc
mCooc
mCooc'
...
...
@@ -208,11 +207,11 @@ groupDocsByPeriod f pds es =
--------------------------------------
-- | To count the number of docs by unit of time
(like a year)
nbDocsByTime
::
[
Document
]
->
Int
->
Map
Date
Double
nbDocsByTime
docs
step
=
-- | To count the number of docs by unit of time
docsToTimeScaleNb
::
[
Document
]
->
Map
Date
Double
docsToTimeScaleNb
docs
=
let
docs'
=
fromListWith
(
+
)
$
map
(
\
d
->
(
date
d
,
1
))
docs
time
=
fromList
$
map
(
\
t
->
(
t
,
0
))
$
toTimeScale
(
keys
docs'
)
step
time
=
fromList
$
map
(
\
t
->
(
t
,
0
))
$
toTimeScale
(
keys
docs'
)
1
in
trace
(
"
\n
"
<>
"-- | Group "
<>
show
(
length
docs
)
<>
" docs by "
<>
show
(
length
time
)
<>
" unit of time"
<>
"
\n
"
)
$
unionWith
(
+
)
time
docs'
...
...
@@ -227,10 +226,10 @@ toPhyloBase :: [Document] -> TermList -> Config -> Phylo
toPhyloBase
docs
lst
conf
=
let
foundations
=
PhyloFoundations
(
Vector
.
fromList
$
nub
$
concat
$
map
text
docs
)
lst
params
=
defaultPhyloParam
{
_phyloParam_config
=
conf
}
periods
=
toPeriods
(
sort
$
nub
$
map
date
docs
)
(
timePeriod
conf
)
(
timeStep
conf
)
periods
=
toPeriods
(
sort
$
nub
$
map
date
docs
)
(
getTimePeriod
$
timeUnit
conf
)
(
getTimeStep
$
timeUnit
conf
)
in
trace
(
"
\n
"
<>
"-- | Create PhyloBase out of "
<>
show
(
length
docs
)
<>
" docs
\n
"
)
$
Phylo
foundations
(
docsTo
CoocByYear
docs
(
foundations
^.
foundations_roots
)
conf
)
(
nbDocsByTime
docs
$
timeUnit
conf
)
(
docsTo
TimeScaleCooc
docs
(
foundations
^.
foundations_roots
)
conf
)
(
docsToTimeScaleNb
docs
)
params
(
fromList
$
map
(
\
prd
->
(
prd
,
PhyloPeriod
prd
(
initPhyloLevels
(
phyloLevel
conf
)
prd
)))
periods
)
src/Gargantext/Viz/Phylo/PhyloTools.hs
View file @
92b4221b
...
...
@@ -91,6 +91,18 @@ toTimeScale dates step =
in
[
start
,
(
start
+
step
)
..
end
]
getTimeStep
::
TimeUnit
->
Int
getTimeStep
time
=
case
time
of
Year
_
s
_
->
s
getTimePeriod
::
TimeUnit
->
Int
getTimePeriod
time
=
case
time
of
Year
p
_
_
->
p
getTimeFrame
::
TimeUnit
->
Int
getTimeFrame
time
=
case
time
of
Year
_
_
f
->
f
-------------
-- | Fis | --
-------------
...
...
@@ -136,6 +148,22 @@ traceFis msg mFis = trace ( "\n" <> "-- | " <> msg <> " : " <> show (sum $ map l
<>
"Clique : "
<>
(
traceClique
mFis
)
<>
"
\n
"
)
mFis
-------------------------
-- | Contextual unit | --
-------------------------
getFisSupport
::
ContextualUnit
->
Int
getFisSupport
unit
=
case
unit
of
Fis
s
_
->
s
_
->
panic
(
"[ERR][Viz.Phylo.PhyloTools.getFisSupport] Only Fis has a support"
)
getFisSize
::
ContextualUnit
->
Int
getFisSize
unit
=
case
unit
of
Fis
_
s
->
s
_
->
panic
(
"[ERR][Viz.Phylo.PhyloTools.getFisSupport] Only Fis has a clique size"
)
--------------
-- | Cooc | --
--------------
...
...
@@ -223,10 +251,31 @@ updatePhyloGroups lvl m phylo =
-- | Pointers | --
------------------
pointersToLinks
::
PhyloGroupId
->
[
Pointer
]
->
[
Link
]
pointersToLinks
id
pointers
=
map
(
\
p
->
((
id
,
fst
p
),
snd
p
))
pointers
mergeLinks
::
[
Link
]
->
[
Link
]
->
[
Link
]
mergeLinks
toChilds
toParents
=
let
toChilds'
=
fromList
$
map
(
\
((
from
,
to
),
w
)
->
((
to
,
from
),
w
))
toChilds
in
toList
$
unionWith
max
(
fromList
toParents
)
toChilds'
\ No newline at end of file
in
toList
$
unionWith
max
(
fromList
toParents
)
toChilds'
-------------------
-- | Proximity | --
-------------------
getSensibility
::
Proximity
->
Double
getSensibility
proxi
=
case
proxi
of
WeightedLogJaccard
s
_
_
->
s
Hamming
->
undefined
getThresholdInit
::
Proximity
->
Double
getThresholdInit
proxi
=
case
proxi
of
WeightedLogJaccard
_
t
_
->
t
Hamming
->
undefined
getThresholdStep
::
Proximity
->
Double
getThresholdStep
proxi
=
case
proxi
of
WeightedLogJaccard
_
_
s
->
s
Hamming
->
undefined
\ No newline at end of file
src/Gargantext/Viz/Phylo/SynchronicClustering.hs
View file @
92b4221b
...
...
@@ -19,11 +19,19 @@ import Gargantext.Prelude
import
Gargantext.Viz.AdaptativePhylo
import
Gargantext.Viz.Phylo.PhyloTools
import
Data.List
(
foldl'
,
(
++
),
null
,
intersect
,
(
\\
),
union
,
nub
,
concat
)
--------------------
-- | Clustering | --
--------------------
relatedComponents
::
[
PhyloGroup
]
->
[[
PhyloGroup
]]
relatedComponents
groups
=
undefined
\ No newline at end of file
relatedComponents
::
Eq
a
=>
[[
a
]]
->
[[
a
]]
relatedComponents
graphs
=
foldl'
(
\
mem
groups
->
if
(
null
mem
)
then
mem
++
[
groups
]
else
let
related
=
filter
(
\
groups'
->
(
not
.
null
)
$
intersect
groups
groups'
)
mem
in
if
(
null
related
)
then
mem
++
[
groups
]
else
(
mem
\\
related
)
++
[
union
groups
(
nub
$
concat
related
)]
)
[]
graphs
\ No newline at end of file
src/Gargantext/Viz/Phylo/TemporalMatching.hs
View file @
92b4221b
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment