Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
199
Issues
199
List
Board
Labels
Milestones
Merge Requests
12
Merge Requests
12
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
0b0ee22a
Commit
0b0ee22a
authored
Apr 12, 2021
by
qlobbe
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add weighted csv
parent
3e6c4d4a
Pipeline
#1443
failed with stage
Changes
8
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
125 additions
and
48 deletions
+125
-48
Main.hs
bin/gargantext-adaptative-phylo/Main.hs
+42
-38
CSV.hs
src/Gargantext/Core/Text/Corpus/Parsers/CSV.hs
+31
-0
FrequentItemSet.hs
src/Gargantext/Core/Text/Metrics/FrequentItemSet.hs
+25
-0
AdaptativePhylo.hs
src/Gargantext/Core/Viz/AdaptativePhylo.hs
+6
-3
PhyloExample.hs
src/Gargantext/Core/Viz/Phylo/PhyloExample.hs
+4
-2
PhyloExport.hs
src/Gargantext/Core/Viz/Phylo/PhyloExport.hs
+1
-0
PhyloMaker.hs
src/Gargantext/Core/Viz/Phylo/PhyloMaker.hs
+11
-4
SynchronicClustering.hs
src/Gargantext/Core/Viz/Phylo/SynchronicClustering.hs
+5
-1
No files found.
bin/gargantext-adaptative-phylo/Main.hs
View file @
0b0ee22a
...
@@ -25,7 +25,7 @@ import Crypto.Hash.SHA256 (hash)
...
@@ -25,7 +25,7 @@ import Crypto.Hash.SHA256 (hash)
import
Gargantext.Prelude
import
Gargantext.Prelude
import
Gargantext.Database.Admin.Types.Hyperdata
(
HyperdataDocument
(
..
))
import
Gargantext.Database.Admin.Types.Hyperdata
(
HyperdataDocument
(
..
))
import
Gargantext.Core.Text.Context
(
TermList
)
import
Gargantext.Core.Text.Context
(
TermList
)
import
Gargantext.Core.Text.Corpus.Parsers.CSV
(
csv_title
,
csv_abstract
,
csv_publication_year
)
import
Gargantext.Core.Text.Corpus.Parsers.CSV
(
csv_title
,
csv_abstract
,
csv_publication_year
,
csv_w_title
,
csv_w_abstract
,
csv_w_publication_year
,
csv_w_weight
)
import
Gargantext.Core.Text.Corpus.Parsers
(
FileFormat
(
..
),
parseFile
)
import
Gargantext.Core.Text.Corpus.Parsers
(
FileFormat
(
..
),
parseFile
)
import
Gargantext.Core.Text.List.Formats.CSV
(
csvMapTermList
)
import
Gargantext.Core.Text.List.Formats.CSV
(
csvMapTermList
)
import
Gargantext.Core.Text.Terms.WithList
(
Patterns
,
buildPatterns
,
extractTermsWithList
)
import
Gargantext.Core.Text.Terms.WithList
(
Patterns
,
buildPatterns
,
extractTermsWithList
)
...
@@ -46,10 +46,9 @@ import qualified Data.Vector as Vector
...
@@ -46,10 +46,9 @@ import qualified Data.Vector as Vector
import
qualified
Gargantext.Core.Text.Corpus.Parsers.CSV
as
Csv
import
qualified
Gargantext.Core.Text.Corpus.Parsers.CSV
as
Csv
import
qualified
Data.Text
as
T
import
qualified
Data.Text
as
T
-- import Debug.Trace (trace)
data
PhyloStage
=
PhyloWithCliques
|
PhyloWithLinks
deriving
(
Show
)
data
PhyloStage
=
PhyloWithCliques
|
PhyloWithLinks
deriving
(
Show
)
---------------
---------------
-- | Tools | --
-- | Tools | --
...
@@ -63,7 +62,6 @@ getFilesFromPath path = do
...
@@ -63,7 +62,6 @@ getFilesFromPath path = do
then
(
listDirectory
path
)
then
(
listDirectory
path
)
else
return
[
path
]
else
return
[
path
]
--------------
--------------
-- | Json | --
-- | Json | --
--------------
--------------
...
@@ -79,18 +77,13 @@ readJson path = Lazy.readFile path
...
@@ -79,18 +77,13 @@ readJson path = Lazy.readFile path
----------------
----------------
-- | To filter the Ngrams of a document based on the termList
-- | To filter the Ngrams of a document based on the termList
filterTerms
::
Patterns
->
(
a
,
Text
)
->
(
a
,
[
Text
])
termsInText
::
Patterns
->
Text
->
[
Text
]
filterTerms
patterns
(
y
,
d
)
=
(
y
,
termsInText
patterns
d
)
termsInText
pats
txt
=
nub
$
concat
$
map
(
map
unwords
)
$
extractTermsWithList
pats
txt
where
--------------------------------------
termsInText
::
Patterns
->
Text
->
[
Text
]
-- | To transform a Wos file (or [file]) into a list of Docs
termsInText
pats
txt
=
nub
$
concat
$
map
(
map
unwords
)
$
extractTermsWithList
pats
txt
wosToDocs
::
Int
->
Patterns
->
FilePath
->
IO
([
Document
])
--------------------------------------
wosToDocs
limit
patterns
path
=
do
-- | To transform a Wos file (or [file]) into a readable corpus
wosToCorpus
::
Int
->
FilePath
->
IO
([(
Int
,
Text
)])
wosToCorpus
limit
path
=
do
files
<-
getFilesFromPath
path
files
<-
getFilesFromPath
path
take
limit
take
limit
<$>
map
(
\
d
->
let
date'
=
fromJust
$
_hd_publication_year
d
<$>
map
(
\
d
->
let
date'
=
fromJust
$
_hd_publication_year
d
...
@@ -98,7 +91,7 @@ wosToCorpus limit path = do
...
@@ -98,7 +91,7 @@ wosToCorpus limit path = do
abstr
=
if
(
isJust
$
_hd_abstract
d
)
abstr
=
if
(
isJust
$
_hd_abstract
d
)
then
fromJust
$
_hd_abstract
d
then
fromJust
$
_hd_abstract
d
else
""
else
""
in
(
date'
,
title
<>
" "
<>
abstr
)
)
in
Document
date'
(
termsInText
patterns
$
title
<>
" "
<>
abstr
)
Nothing
)
<$>
concat
<$>
concat
<$>
mapConcurrently
(
\
file
->
<$>
mapConcurrently
(
\
file
->
filter
(
\
d
->
(
isJust
$
_hd_publication_year
d
)
filter
(
\
d
->
(
isJust
$
_hd_publication_year
d
)
...
@@ -106,27 +99,38 @@ wosToCorpus limit path = do
...
@@ -106,27 +99,38 @@ wosToCorpus limit path = do
<$>
parseFile
WOS
(
path
<>
file
)
)
files
<$>
parseFile
WOS
(
path
<>
file
)
)
files
-- | To transform a Csv file into a readable corpus
-- To transform a Csv file into a list of Document
csvToCorpus
::
Int
->
FilePath
->
IO
([(
Int
,
Text
)])
csvToDocs
::
CorpusParser
->
Patterns
->
FilePath
->
IO
([
Document
])
csvToCorpus
limit
path
=
Vector
.
toList
csvToDocs
parser
patterns
path
=
<$>
Vector
.
take
limit
case
parser
of
<$>
Vector
.
map
(
\
row
->
(
csv_publication_year
row
,
(
csv_title
row
)
<>
" "
<>
(
csv_abstract
row
)))
Wos
_
->
undefined
<$>
snd
<$>
Csv
.
readFile
path
Csv
limit
->
Vector
.
toList
<$>
Vector
.
take
limit
<$>
Vector
.
map
(
\
row
->
Document
(
csv_publication_year
row
)
-- | To use the correct parser given a CorpusType
(
termsInText
patterns
$
(
csv_title
row
)
<>
" "
<>
(
csv_abstract
row
))
fileToCorpus
::
CorpusParser
->
FilePath
->
IO
([(
Int
,
Text
)])
Nothing
fileToCorpus
parser
path
=
case
parser
of
)
<$>
snd
<$>
Csv
.
readFile
path
Wos
limit
->
wosToCorpus
limit
path
CsvWeighted
limit
->
Vector
.
toList
Csv
limit
->
csvToCorpus
limit
path
<$>
Vector
.
take
limit
<$>
Vector
.
map
(
\
row
->
Document
(
csv_w_publication_year
row
)
(
termsInText
patterns
$
(
csv_w_title
row
)
<>
" "
<>
(
csv_w_abstract
row
))
(
Just
$
csv_w_weight
row
)
)
<$>
snd
<$>
Csv
.
readWeightedCsv
path
-- To parse a file into a list of Document
fileToDocs'
::
CorpusParser
->
FilePath
->
TermList
->
IO
[
Document
]
fileToDocs'
parser
path
lst
=
do
let
patterns
=
buildPatterns
lst
case
parser
of
Wos
limit
->
wosToDocs
limit
patterns
path
Csv
_
->
csvToDocs
parser
patterns
path
CsvWeighted
_
->
csvToDocs
parser
patterns
path
-- | To parse a file into a list of Document
---------------
fileToDocs
::
CorpusParser
->
FilePath
->
TermList
->
IO
[
Document
]
-- | Label | --
fileToDocs
parser
path
lst
=
do
---------------
corpus
<-
fileToCorpus
parser
path
let
patterns
=
buildPatterns
lst
pure
$
map
(
(
\
(
y
,
t
)
->
Document
y
t
)
.
filterTerms
patterns
)
corpus
-- Config time parameters to label
-- Config time parameters to label
...
@@ -235,7 +239,7 @@ main = do
...
@@ -235,7 +239,7 @@ main = do
printIOMsg
"Parse the corpus"
printIOMsg
"Parse the corpus"
mapList
<-
csvMapTermList
(
listPath
config
)
mapList
<-
csvMapTermList
(
listPath
config
)
corpus
<-
fileToDocs
(
corpusParser
config
)
(
corpusPath
config
)
mapList
corpus
<-
fileToDocs
'
(
corpusParser
config
)
(
corpusPath
config
)
mapList
printIOComment
(
show
(
length
corpus
)
<>
" parsed docs from the corpus"
)
printIOComment
(
show
(
length
corpus
)
<>
" parsed docs from the corpus"
)
printIOMsg
"Reconstruct the phylo"
printIOMsg
"Reconstruct the phylo"
...
...
src/Gargantext/Core/Text/Corpus/Parsers/CSV.hs
View file @
0b0ee22a
...
@@ -395,3 +395,34 @@ parseCsv fp = V.toList <$> V.map csv2doc <$> snd <$> readFile fp
...
@@ -395,3 +395,34 @@ parseCsv fp = V.toList <$> V.map csv2doc <$> snd <$> readFile fp
parseCsv'
::
BL
.
ByteString
->
[
HyperdataDocument
]
parseCsv'
::
BL
.
ByteString
->
[
HyperdataDocument
]
parseCsv'
bs
=
V
.
toList
$
V
.
map
csv2doc
$
snd
$
readCsvLazyBS
bs
parseCsv'
bs
=
V
.
toList
$
V
.
map
csv2doc
$
snd
$
readCsvLazyBS
bs
------------------------------------------------------------------------
-- Csv v3 weighted for phylo
data
WeightedCsv
=
WeightedCsv
{
csv_w_title
::
!
Text
,
csv_w_source
::
!
Text
,
csv_w_publication_year
::
!
Int
,
csv_w_publication_month
::
!
Int
,
csv_w_publication_day
::
!
Int
,
csv_w_abstract
::
!
Text
,
csv_w_authors
::
!
Text
,
csv_w_weight
::
!
Double
}
deriving
(
Show
)
instance
FromNamedRecord
WeightedCsv
where
parseNamedRecord
r
=
WeightedCsv
<$>
r
.:
"title"
<*>
r
.:
"source"
<*>
r
.:
"publication_year"
<*>
r
.:
"publication_month"
<*>
r
.:
"publication_day"
<*>
r
.:
"abstract"
<*>
r
.:
"authors"
<*>
r
.:
"weight"
readWeightedCsv
::
FilePath
->
IO
(
Header
,
Vector
WeightedCsv
)
readWeightedCsv
fp
=
fmap
(
\
bs
->
case
decodeByNameWith
csvDecodeOptions
bs
of
Left
e
->
panic
(
pack
e
)
Right
corpus
->
corpus
)
$
BL
.
readFile
fp
\ No newline at end of file
src/Gargantext/Core/Text/Metrics/FrequentItemSet.hs
View file @
0b0ee22a
...
@@ -21,6 +21,7 @@ module Gargantext.Core.Text.Metrics.FrequentItemSet
...
@@ -21,6 +21,7 @@ module Gargantext.Core.Text.Metrics.FrequentItemSet
,
fisWithSizePoly
,
fisWithSizePoly
,
fisWithSizePoly2
,
fisWithSizePoly2
,
fisWithSizePolyMap
,
fisWithSizePolyMap
,
fisWithSizePolyMap'
,
module
HLCM
,
module
HLCM
)
)
where
where
...
@@ -35,6 +36,8 @@ import qualified Data.Map.Strict as Map
...
@@ -35,6 +36,8 @@ import qualified Data.Map.Strict as Map
import
qualified
Data.Set
as
Set
import
qualified
Data.Set
as
Set
import
qualified
Data.Vector
as
V
import
qualified
Data.Vector
as
V
import
Control.Monad
(
sequence
)
data
Size
=
Point
Int
|
Segment
Int
Int
data
Size
=
Point
Int
|
Segment
Int
Int
------------------------------------------------------------------------
------------------------------------------------------------------------
...
@@ -120,6 +123,28 @@ fisWithSizePolyMap n f is =
...
@@ -120,6 +123,28 @@ fisWithSizePolyMap n f is =
------------------------------------------------------------------------
------------------------------------------------------------------------
------------------------------------------------------------------------
------------------------------------------------------------------------
---- Weighted [[Item]]
isSublistOf
::
Ord
a
=>
[
a
]
->
[
a
]
->
Bool
isSublistOf
sub
lst
=
all
(
\
i
->
elem
i
lst
)
sub
reIndexFis
::
Ord
a
=>
[([
a
],
b
)]
->
[
Fis'
a
]
->
[(
Fis'
a
,[
b
])]
reIndexFis
items
fis
=
map
(
\
f
->
let
docs
=
filter
(
\
(
lst
,
_
)
->
isSublistOf
(
_fisItemSet
f
)
lst
)
items
in
(
f
,
map
snd
docs
))
fis
wsum
::
[
Maybe
Double
]
->
Maybe
Double
wsum
lst
=
fmap
sum
$
sequence
lst
fisWithSizePolyMap'
::
Ord
a
=>
Size
->
Frequency
->
[([
a
],
Maybe
Double
)]
->
Map
(
Set
a
)
(
Int
,
Maybe
Double
)
fisWithSizePolyMap'
n
f
is
=
Map
.
fromList
$
map
(
\
(
fis
,
ws
)
->
(
Set
.
fromList
(
_fisItemSet
fis
),(
_fisCount
fis
,(
wsum
ws
))))
$
reIndexFis
is
$
fisWithSizePoly2
n
f
(
map
fst
is
)
------------------------------------------------------------------------
------------------------------------------------------------------------
--
--
---- | /!\ indexes are not the same:
---- | /!\ indexes are not the same:
...
...
src/Gargantext/Core/Viz/AdaptativePhylo.hs
View file @
0b0ee22a
...
@@ -52,6 +52,7 @@ import qualified Data.Text.Lazy as TextLazy
...
@@ -52,6 +52,7 @@ import qualified Data.Text.Lazy as TextLazy
data
CorpusParser
=
data
CorpusParser
=
Wos
{
_wos_limit
::
Int
}
Wos
{
_wos_limit
::
Int
}
|
Csv
{
_csv_limit
::
Int
}
|
Csv
{
_csv_limit
::
Int
}
|
CsvWeighted
{
_csvw_limit
::
Int
}
deriving
(
Show
,
Generic
,
Eq
)
deriving
(
Show
,
Generic
,
Eq
)
data
SeaElevation
=
data
SeaElevation
=
...
@@ -231,7 +232,6 @@ defaultPhyloParam =
...
@@ -231,7 +232,6 @@ defaultPhyloParam =
-- | Document | --
-- | Document | --
------------------
------------------
-- | Date : a simple Integer
-- | Date : a simple Integer
type
Date
=
Int
type
Date
=
Int
...
@@ -240,8 +240,9 @@ type Ngrams = Text
...
@@ -240,8 +240,9 @@ type Ngrams = Text
-- | Document : a piece of Text linked to a Date
-- | Document : a piece of Text linked to a Date
data
Document
=
Document
data
Document
=
Document
{
date
::
Date
{
date
::
Date
,
text
::
[
Ngrams
]
,
text
::
[
Ngrams
]
,
weight
::
Maybe
Double
}
deriving
(
Eq
,
Show
,
Generic
,
NFData
)
}
deriving
(
Eq
,
Show
,
Generic
,
NFData
)
...
@@ -335,6 +336,7 @@ data PhyloGroup =
...
@@ -335,6 +336,7 @@ data PhyloGroup =
,
_phylo_groupIndex
::
Int
,
_phylo_groupIndex
::
Int
,
_phylo_groupLabel
::
Text
,
_phylo_groupLabel
::
Text
,
_phylo_groupSupport
::
Support
,
_phylo_groupSupport
::
Support
,
_phylo_groupWeight
::
Maybe
Double
,
_phylo_groupNgrams
::
[
Int
]
,
_phylo_groupNgrams
::
[
Int
]
,
_phylo_groupCooc
::
!
(
Cooc
)
,
_phylo_groupCooc
::
!
(
Cooc
)
,
_phylo_groupBranchId
::
PhyloBranchId
,
_phylo_groupBranchId
::
PhyloBranchId
...
@@ -368,6 +370,7 @@ data PhyloClique = PhyloClique
...
@@ -368,6 +370,7 @@ data PhyloClique = PhyloClique
{
_phyloClique_nodes
::
[
Int
]
{
_phyloClique_nodes
::
[
Int
]
,
_phyloClique_support
::
Support
,
_phyloClique_support
::
Support
,
_phyloClique_period
::
(
Date
,
Date
)
,
_phyloClique_period
::
(
Date
,
Date
)
,
_phyloClique_weight
::
Maybe
Double
}
deriving
(
Generic
,
NFData
,
Show
,
Eq
)
}
deriving
(
Generic
,
NFData
,
Show
,
Eq
)
----------------
----------------
...
...
src/Gargantext/Core/Viz/Phylo/PhyloExample.hs
View file @
0b0ee22a
...
@@ -109,8 +109,10 @@ config =
...
@@ -109,8 +109,10 @@ config =
docs
::
[
Document
]
docs
::
[
Document
]
docs
=
map
(
\
(
d
,
t
)
docs
=
map
(
\
(
d
,
t
)
->
Document
d
(
filter
(
\
n
->
isRoots
n
(
foundations
^.
foundations_roots
))
->
Document
d
$
monoTexts
t
))
corpus
(
filter
(
\
n
->
isRoots
n
(
foundations
^.
foundations_roots
))
$
monoTexts
t
)
Nothing
)
corpus
foundations
::
PhyloFoundations
foundations
::
PhyloFoundations
...
...
src/Gargantext/Core/Viz/Phylo/PhyloExport.hs
View file @
0b0ee22a
...
@@ -139,6 +139,7 @@ groupToDotNode fdt g bId =
...
@@ -139,6 +139,7 @@ groupToDotNode fdt g bId =
,
toAttr
"branchId"
(
pack
$
unwords
(
init
$
map
show
$
snd
$
g
^.
phylo_groupBranchId
))
,
toAttr
"branchId"
(
pack
$
unwords
(
init
$
map
show
$
snd
$
g
^.
phylo_groupBranchId
))
,
toAttr
"bId"
(
pack
$
show
bId
)
,
toAttr
"bId"
(
pack
$
show
bId
)
,
toAttr
"support"
(
pack
$
show
(
g
^.
phylo_groupSupport
))
,
toAttr
"support"
(
pack
$
show
(
g
^.
phylo_groupSupport
))
,
toAttr
"weight"
(
pack
$
show
(
g
^.
phylo_groupWeight
))
,
toAttr
"lbl"
(
pack
$
show
(
ngramsToLabel
fdt
(
g
^.
phylo_groupNgrams
)))
,
toAttr
"lbl"
(
pack
$
show
(
ngramsToLabel
fdt
(
g
^.
phylo_groupNgrams
)))
,
toAttr
"foundation"
(
pack
$
show
(
idxToLabel
(
g
^.
phylo_groupNgrams
)))
,
toAttr
"foundation"
(
pack
$
show
(
idxToLabel
(
g
^.
phylo_groupNgrams
)))
,
toAttr
"role"
(
pack
$
show
(
idxToLabel'
((
g
^.
phylo_groupMeta
)
!
"dynamics"
)))
,
toAttr
"role"
(
pack
$
show
(
idxToLabel'
((
g
^.
phylo_groupMeta
)
!
"dynamics"
)))
...
...
src/Gargantext/Core/Viz/Phylo/PhyloMaker.hs
View file @
0b0ee22a
...
@@ -21,7 +21,7 @@ import Gargantext.Core.Viz.Phylo.PhyloTools
...
@@ -21,7 +21,7 @@ import Gargantext.Core.Viz.Phylo.PhyloTools
import
Gargantext.Core.Viz.Phylo.TemporalMatching
(
adaptativeTemporalMatching
,
constanteTemporalMatching
,
getNextPeriods
,
filterDocs
,
filterDiago
,
reduceDiagos
,
toProximity
)
import
Gargantext.Core.Viz.Phylo.TemporalMatching
(
adaptativeTemporalMatching
,
constanteTemporalMatching
,
getNextPeriods
,
filterDocs
,
filterDiago
,
reduceDiagos
,
toProximity
)
import
Gargantext.Core.Viz.Phylo.SynchronicClustering
(
synchronicClustering
)
import
Gargantext.Core.Viz.Phylo.SynchronicClustering
(
synchronicClustering
)
import
Gargantext.Core.Text.Context
(
TermList
)
import
Gargantext.Core.Text.Context
(
TermList
)
import
Gargantext.Core.Text.Metrics.FrequentItemSet
(
fisWithSizePolyMap
,
Size
(
..
))
import
Gargantext.Core.Text.Metrics.FrequentItemSet
(
fisWithSizePolyMap
,
fisWithSizePolyMap'
,
Size
(
..
))
import
Gargantext.Core.Methods.Graph.MaxClique
(
getMaxCliques
)
import
Gargantext.Core.Methods.Graph.MaxClique
(
getMaxCliques
)
import
Gargantext.Core.Methods.Distances
(
Distance
(
Conditional
))
import
Gargantext.Core.Methods.Distances
(
Distance
(
Conditional
))
import
Gargantext.Core.Viz.Phylo.PhyloExport
(
toHorizon
)
import
Gargantext.Core.Viz.Phylo.PhyloExport
(
toHorizon
)
...
@@ -128,6 +128,7 @@ appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ co
...
@@ -128,6 +128,7 @@ appendGroups f lvl m phylo = trace ("\n" <> "-- | Append " <> show (length $ co
cliqueToGroup
::
PhyloClique
->
PhyloPeriodId
->
Level
->
Int
->
[
Cooc
]
->
PhyloGroup
cliqueToGroup
::
PhyloClique
->
PhyloPeriodId
->
Level
->
Int
->
[
Cooc
]
->
PhyloGroup
cliqueToGroup
fis
pId
lvl
idx
coocs
=
PhyloGroup
pId
lvl
idx
""
cliqueToGroup
fis
pId
lvl
idx
coocs
=
PhyloGroup
pId
lvl
idx
""
(
fis
^.
phyloClique_support
)
(
fis
^.
phyloClique_support
)
(
fis
^.
phyloClique_weight
)
(
fis
^.
phyloClique_nodes
)
(
fis
^.
phyloClique_nodes
)
(
ngramsToCooc
(
fis
^.
phyloClique_nodes
)
coocs
)
(
ngramsToCooc
(
fis
^.
phyloClique_nodes
)
coocs
)
(
1
,[
0
])
-- branchid (lvl,[path in the branching tree])
(
1
,[
0
])
-- branchid (lvl,[path in the branching tree])
...
@@ -217,8 +218,14 @@ toPhyloClique phylo phyloDocs = case (clique $ getConfig phylo) of
...
@@ -217,8 +218,14 @@ toPhyloClique phylo phyloDocs = case (clique $ getConfig phylo) of
phyloClique
=
case
(
clique
$
getConfig
phylo
)
of
phyloClique
=
case
(
clique
$
getConfig
phylo
)
of
Fis
_
_
->
Fis
_
_
->
let
fis
=
map
(
\
(
prd
,
docs
)
->
let
fis
=
map
(
\
(
prd
,
docs
)
->
let
lst
=
toList
$
fisWithSizePolyMap
(
Segment
1
20
)
1
(
map
(
\
d
->
ngramsToIdx
(
text
d
)
(
getRoots
phylo
))
docs
)
case
(
corpusParser
$
getConfig
phylo
)
of
in
(
prd
,
map
(
\
f
->
PhyloClique
(
Set
.
toList
$
fst
f
)
(
snd
f
)
prd
)
lst
))
CsvWeighted
_
->
let
lst
=
toList
$
fisWithSizePolyMap'
(
Segment
1
20
)
1
(
map
(
\
d
->
(
ngramsToIdx
(
text
d
)
(
getRoots
phylo
),
weight
d
))
docs
)
in
(
prd
,
map
(
\
f
->
PhyloClique
(
Set
.
toList
$
fst
f
)
((
fst
.
snd
)
f
)
prd
((
snd
.
snd
)
f
))
lst
)
_
->
let
lst
=
toList
$
fisWithSizePolyMap
(
Segment
1
20
)
1
(
map
(
\
d
->
ngramsToIdx
(
text
d
)
(
getRoots
phylo
))
docs
)
in
(
prd
,
map
(
\
f
->
PhyloClique
(
Set
.
toList
$
fst
f
)
(
snd
f
)
prd
Nothing
)
lst
)
)
$
toList
phyloDocs
$
toList
phyloDocs
fis'
=
fis
`
using
`
parList
rdeepseq
fis'
=
fis
`
using
`
parList
rdeepseq
in
fromList
fis'
in
fromList
fis'
...
@@ -228,7 +235,7 @@ toPhyloClique phylo phyloDocs = case (clique $ getConfig phylo) of
...
@@ -228,7 +235,7 @@ toPhyloClique phylo phyloDocs = case (clique $ getConfig phylo) of
$
foldl
sumCooc
empty
$
foldl
sumCooc
empty
$
map
listToMatrix
$
map
listToMatrix
$
map
(
\
d
->
ngramsToIdx
(
text
d
)
(
getRoots
phylo
))
docs
$
map
(
\
d
->
ngramsToIdx
(
text
d
)
(
getRoots
phylo
))
docs
in
(
prd
,
map
(
\
cl
->
PhyloClique
cl
0
prd
)
$
getMaxCliques
filterType
Conditional
thr
cooc
))
in
(
prd
,
map
(
\
cl
->
PhyloClique
cl
0
prd
Nothing
)
$
getMaxCliques
filterType
Conditional
thr
cooc
))
$
toList
phyloDocs
$
toList
phyloDocs
mcl'
=
mcl
`
using
`
parList
rdeepseq
mcl'
=
mcl
`
using
`
parList
rdeepseq
in
fromList
mcl'
in
fromList
mcl'
...
...
src/Gargantext/Core/Viz/Phylo/SynchronicClustering.hs
View file @
0b0ee22a
...
@@ -22,6 +22,7 @@ import Data.Map (Map, fromList, fromListWith, foldlWithKey, (!), insert, empty,
...
@@ -22,6 +22,7 @@ import Data.Map (Map, fromList, fromListWith, foldlWithKey, (!), insert, empty,
import
Control.Lens
hiding
(
Level
)
import
Control.Lens
hiding
(
Level
)
import
Control.Parallel.Strategies
(
parList
,
rdeepseq
,
using
)
import
Control.Parallel.Strategies
(
parList
,
rdeepseq
,
using
)
import
Control.Monad
(
sequence
)
-- import Debug.Trace (trace)
-- import Debug.Trace (trace)
import
qualified
Data.Map
as
Map
import
qualified
Data.Map
as
Map
...
@@ -36,7 +37,10 @@ mergeGroups :: [Cooc] -> PhyloGroupId -> Map PhyloGroupId PhyloGroupId -> [Phylo
...
@@ -36,7 +37,10 @@ mergeGroups :: [Cooc] -> PhyloGroupId -> Map PhyloGroupId PhyloGroupId -> [Phylo
mergeGroups
coocs
id
mapIds
childs
=
mergeGroups
coocs
id
mapIds
childs
=
let
ngrams
=
(
sort
.
nub
.
concat
)
$
map
_phylo_groupNgrams
childs
let
ngrams
=
(
sort
.
nub
.
concat
)
$
map
_phylo_groupNgrams
childs
in
PhyloGroup
(
fst
$
fst
id
)
(
snd
$
fst
id
)
(
snd
id
)
""
in
PhyloGroup
(
fst
$
fst
id
)
(
snd
$
fst
id
)
(
snd
id
)
""
(
sum
$
map
_phylo_groupSupport
childs
)
ngrams
(
sum
$
map
_phylo_groupSupport
childs
)
(
fmap
sum
$
sequence
$
map
_phylo_groupWeight
childs
)
ngrams
(
ngramsToCooc
ngrams
coocs
)
(
ngramsToCooc
ngrams
coocs
)
((
snd
$
fst
id
),
bId
)
((
snd
$
fst
id
),
bId
)
(
mergeMeta
bId
childs
)
[]
(
map
(
\
g
->
(
getGroupId
g
,
1
))
childs
)
(
mergeMeta
bId
childs
)
[]
(
map
(
\
g
->
(
getGroupId
g
,
1
))
childs
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment