Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
H
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Przemyslaw Kaminski
haskell-gargantext
Commits
c44c07c7
Commit
c44c07c7
authored
Jun 09, 2018
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[COOC FILTERING] workflow done but optimization issue.
parent
e5419936
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
51 additions
and
31 deletions
+51
-31
Pipeline.hs
src/Gargantext/Pipeline.hs
+10
-14
Metrics.hs
src/Gargantext/Text/Metrics.hs
+41
-15
Graph.hs
src/Gargantext/Viz/Graph.hs
+0
-2
No files found.
src/Gargantext/Pipeline.hs
View file @
c44c07c7
...
...
@@ -46,27 +46,23 @@ import Data.Graph.Clustering.Louvain.CplusPlus (cLouvain)
-}
pipeline
path
=
do
workflow
lang
path
=
do
-- Text <- IO Text <- FilePath
text
<-
readFile
path
let
contexts
=
splitBy
(
Sentences
5
)
text
myterms
<-
extractTerms
Multi
FR
contexts
myterms
<-
extractTerms
Multi
lang
contexts
-- TODO filter (\t -> not . elem t stopList) myterms
-- TODO groupBy (Stem | GroupList)
let
myCooc
=
removeApax
$
cooc
myterms
--let (ti, fi) = createIndices myCooc
pure
True
--pure $ incExcSpeGen myCooc
let
myCooc
=
filterCooc
$
removeApax
$
cooc
myterms
-- Cooc -> Matrix
-- -- filter by spec/gen (dynmaic programming)
-- let theScores = M.filter (>0) $ score conditional myCoocFiltered
----
------ -- Matrix -> Clustering
------ pure $ bestpartition False $ map2graph $ toIndex ti theScores
-- partitions <- cLouvain theScores
-- pure partitions
--let (ti, fi) = createIndices myCooc
-- @np FIXME optimization issue of filterCooc (too much memory consumed)
pure
myCooc
-- Matrix -> Clustering
-- pure $ bestpartition False $ map2graph $ toIndex ti myCooc
--partitions <- cLouvain $ toIndex ti $ M.map (\v -> (fromIntegral v) :: Double) myCooc
--pure partitions
---- | Building : -> Graph -> JSON
src/Gargantext/Text/Metrics.hs
View file @
c44c07c7
...
...
@@ -16,6 +16,7 @@ noApax m = M.filter (>1) m
-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
...
...
@@ -53,16 +54,21 @@ import qualified Data.Array.Accelerate as DAA
import
GHC.Real
(
round
)
--filterCooc :: Ord t => Map (t, t) Int -> Map (t, t) Int
--filterCooc m =
---- filterCooc m = foldl (\k -> maybe (panic "no key") identity $ M.lookup k m) M.empty selection
----(ti, fi) = createIndices m
-- . fromIndex fi $ filterMat $ cooc2mat ti m
filterCooc
::
Ord
t
=>
Map
(
t
,
t
)
Int
->
Map
(
t
,
t
)
Int
filterCooc
cc
=
filterCooc'
ts
cc
where
ts
=
map
_scored_terms
$
takeSome
350
5
2
$
coocScored
cc
filterCooc'
::
Ord
t
=>
[
t
]
->
Map
(
t
,
t
)
Int
->
Map
(
t
,
t
)
Int
filterCooc'
ts
m
=
foldl'
(
\
m'
k
->
M
.
insert
k
(
maybe
errMessage
identity
$
M
.
lookup
k
m
)
m'
)
M
.
empty
selection
where
errMessage
=
panic
"Filter cooc: no key"
selection
=
[(
x
,
y
)
|
x
<-
ts
,
y
<-
ts
,
x
>
y
]
type
MapListSize
=
Int
type
SampleBins
=
Double
type
Clusters
=
Int
type
MapListSize
=
Int
type
SampleBins
=
Double
type
Clusters
=
Int
-- | Map list creation
-- Kmeans split into (Clusters::Int) main clusters with Inclusion/Exclusion (relevance score)
...
...
@@ -83,23 +89,44 @@ takeSome l s k scores = L.take l
m
=
round
$
(
fromIntegral
$
length
scores
)
/
(
s
)
takeSample
n
m
xs
=
L
.
concat
$
map
(
L
.
take
n
)
$
L
.
reverse
$
map
(
L
.
sortOn
_scored_incExc
)
$
splitEvery
m
-- TODO use kmeans s instead of splitEvery
-- in order to split in s heteregenous parts
-- without homogeneous order hypothesis
$
splitEvery
m
$
L
.
reverse
$
L
.
sortOn
_scored_speGen
xs
data
Scored
t
=
Scored
{
_scored_terms
::
t
,
_scored_incExc
::
InclusionExclusion
,
_scored_speGen
::
SpecificityGenericity
data
Scored
t
=
Scored
{
_scored_terms
::
!
t
,
_scored_incExc
::
!
InclusionExclusion
,
_scored_speGen
::
!
SpecificityGenericity
}
deriving
(
Show
)
incExcSpeGen_sorted'
::
Ord
t
=>
Map
(
t
,
t
)
Int
->
[
Scored
t
]
incExcSpeGen_sorted'
m
=
zipWith
(
\
(
i
,
t
)
(
inc
,
spe
)
->
Scored
t
inc
spe
)
(
M
.
toList
fi
)
scores
coocScored
::
Ord
t
=>
Map
(
t
,
t
)
Int
->
[
Scored
t
]
coocScored
m
=
zipWith
(
\
(
i
,
t
)
(
inc
,
spe
)
->
Scored
t
inc
spe
)
(
M
.
toList
fi
)
scores
where
(
ti
,
fi
)
=
createIndices
m
(
is
,
ss
)
=
incExcSpeGen
$
cooc2mat
ti
m
scores
=
DAA
.
toList
$
DAA
.
run
$
DAA
.
zip
(
DAA
.
use
is
)
(
DAA
.
use
ss
)
incExcSpeGen_sorted
::
Ord
t
=>
Map
(
t
,
t
)
Int
->
([(
t
,
Double
)],[(
t
,
Double
)])
incExcSpeGen_sorted
m
=
both
ordonne
(
incExcSpeGen
$
cooc2mat
ti
m
)
where
...
...
@@ -108,7 +135,6 @@ incExcSpeGen_sorted m = both ordonne (incExcSpeGen $ cooc2mat ti m)
metrics_text
::
Text
metrics_text
=
T
.
intercalate
" "
metrics_sentences
...
...
src/Gargantext/Viz/Graph.hs
View file @
c44c07c7
...
...
@@ -55,8 +55,6 @@ data Graph = Graph { g_nodes :: [Node]
}
deriving
(
Show
,
Generic
)
$
(
deriveJSON
(
unPrefix
"g_"
)
''
G
raph
)
-----------------------------------------------------------
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment