Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
165
Issues
165
List
Board
Labels
Milestones
Merge Requests
10
Merge Requests
10
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
0be01d72
Commit
0be01d72
authored
May 30, 2018
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[Pipeline] ok until clustering.
parent
4b81f9d1
Changes
6
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
58 additions
and
69 deletions
+58
-69
Pipeline.hs
src/Gargantext/Pipeline.hs
+12
-18
Context.hs
src/Gargantext/Text/Context.hs
+20
-12
CSV.hs
src/Gargantext/Text/Parsers/CSV.hs
+6
-6
Terms.hs
src/Gargantext/Text/Terms.hs
+4
-0
Matrice.hs
src/Gargantext/Viz/Graph/Distances/Matrice.hs
+12
-12
Index.hs
src/Gargantext/Viz/Graph/Index.hs
+4
-21
No files found.
src/Gargantext/Pipeline.hs
View file @
0be01d72
...
@@ -15,35 +15,29 @@ Portability : POSIX
...
@@ -15,35 +15,29 @@ Portability : POSIX
module
Gargantext.Pipeline
module
Gargantext.Pipeline
where
where
import
Data.Text
(
unpack
)
import
qualified
Data.Text
as
DT
import
Data.Text.IO
(
readFile
)
import
Data.Text.IO
(
readFile
)
----------------------------------------------
----------------------------------------------
----------------------------------------------
----------------------------------------------
import
Gargantext.Core
import
Gargantext.Core
import
Gargantext.Core.Types
import
Gargantext.Prelude
import
Gargantext.Prelude
import
Gargantext.Viz.Graph.Index
(
map'
,
createIndexes
)
import
Gargantext.Viz.Graph.Index
(
score
)
import
Gargantext.Viz.Graph.Distances.Matrice
(
distributional
,
int2double
)
import
Gargantext.Viz.Graph.Distances.Matrice
(
distributional
)
import
Gargantext.Text.Metrics.Occurrences
import
Gargantext.Text.Metrics.Occurrences
import
Gargantext.Text.Terms
import
Gargantext.Text.Terms
import
Gargantext.Text.Context
import
Gargantext.Text.Context
import
Data.Array.Accelerate
as
A
pipeline
pth
=
do
pipeline
path
=
do
text
<-
readFile
pth
-- Text <- IO Text <- FilePath
let
contexts
=
splitBy
Sentences
4
text
text
<-
readFile
path
myterms
<-
mapM
(
terms
Multi
FR
)
contexts
let
contexts
=
splitBy
(
Sentences
3
)
text
-- todo filter stop words
myterms
<-
extractTerms
Multi
FR
contexts
-- TODO filter (\t -> not . elem t stopList) myterms
-- TODO groupBy (Stem | GroupList)
let
myCooc
=
removeApax
$
cooc
myterms
let
myCooc
=
removeApax
$
cooc
myterms
--pure myCooc
-- Cooc -> Matrix
-- Cooc map -> Matrix
pure
$
score
distributional
myCooc
--pure $ createIndexes myCooc
-- Matrix -> Clustering -> Graph -> JSON
pure
$
map'
int2double
myCooc
-- Matrix -> Graph
src/Gargantext/Text/Context.hs
View file @
0be01d72
...
@@ -14,26 +14,34 @@ Context of text management tool
...
@@ -14,26 +14,34 @@ Context of text management tool
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE OverloadedStrings #-}
module
Gargantext.Text.Context
where
module
Gargantext.Text.Context
where
import
Data.Text
(
Text
,
pack
,
unpack
,
length
)
import
Data.Text
(
Text
,
pack
,
unpack
,
length
)
import
Data.String
(
IsString
)
import
Data.String
(
IsString
)
import
Text.HTML.TagSoup
import
Text.HTML.TagSoup
(
parseTags
,
isTagText
,
Tag
(
..
))
import
Gargantext.Text
import
Gargantext.Text
import
Gargantext.Prelude
hiding
(
length
)
import
Gargantext.Prelude
hiding
(
length
)
data
SplitBy
=
Paragraph
|
Sentences
|
Chars
splitBy
::
SplitBy
->
Int
->
Text
->
[
Text
]
data
SplitContext
=
Chars
Int
|
Sentences
Int
|
Paragraphs
Int
splitBy
Chars
n
=
map
pack
.
chunkAlong
n
n
.
unpack
splitBy
Sentences
n
=
map
unsentences
.
chunkAlong
n
n
.
sentences
tag
=
parseTags
splitBy
Paragraph
_
=
map
removeTag
.
filter
isTagText
.
parseTags
-- | splitBy contexts of Chars or Sentences or Paragraphs
-- >> splitBy (Chars 0) "abcde"
-- ["a","b","c","d","e"]
-- >> splitBy (Chars 1) "abcde"
-- ["ab","bc","cd","de"]
-- >> splitBy (Chars 2) "abcde"
-- ["abc","bcd","cde"]
splitBy
::
SplitContext
->
Text
->
[
Text
]
splitBy
(
Chars
n
)
=
map
pack
.
chunkAlong
(
n
+
1
)
1
.
unpack
splitBy
(
Sentences
n
)
=
map
unsentences
.
chunkAlong
(
n
+
1
)
1
.
sentences
splitBy
(
Paragraphs
_
)
=
map
unTag
.
filter
isTagText
.
tag
where
where
removeTag
::
IsString
p
=>
Tag
p
->
p
unTag
::
IsString
p
=>
Tag
p
->
p
removeTag
(
TagText
x
)
=
x
unTag
(
TagText
x
)
=
x
removeTag
(
TagComment
x
)
=
x
unTag
_
=
""
removeTag
_
=
""
src/Gargantext/Text/Parsers/CSV.hs
View file @
0be01d72
...
@@ -56,7 +56,7 @@ toDocs v = V.toList
...
@@ -56,7 +56,7 @@ toDocs v = V.toList
(
V
.
enumFromN
1
(
V
.
length
v''
))
v''
(
V
.
enumFromN
1
(
V
.
length
v''
))
v''
where
where
v''
=
V
.
foldl
(
\
v'
sep
->
V
.
concatMap
(
splitDoc
(
docsSize
v'
)
sep
)
v'
)
v
seps
v''
=
V
.
foldl
(
\
v'
sep
->
V
.
concatMap
(
splitDoc
(
docsSize
v'
)
sep
)
v'
)
v
seps
seps
=
(
V
.
fromList
[
Paragraph
,
Sentences
,
Chars
])
seps
=
(
V
.
fromList
[
Paragraph
s
1
,
Sentences
3
,
Chars
3
])
---------------------------------------------------------------
---------------------------------------------------------------
fromDocs
::
Vector
Doc
->
Vector
CsvDoc
fromDocs
::
Vector
Doc
->
Vector
CsvDoc
...
@@ -69,7 +69,7 @@ fromDocs docs = V.map fromDocs' docs
...
@@ -69,7 +69,7 @@ fromDocs docs = V.map fromDocs' docs
-- TODO adapt the size of the paragraph according to the corpus average
-- TODO adapt the size of the paragraph according to the corpus average
splitDoc
::
Mean
->
Split
By
->
CsvDoc
->
Vector
CsvDoc
splitDoc
::
Mean
->
Split
Context
->
CsvDoc
->
Vector
CsvDoc
splitDoc
m
splt
doc
=
let
docSize
=
(
length
$
c_abstract
doc
)
in
splitDoc
m
splt
doc
=
let
docSize
=
(
length
$
c_abstract
doc
)
in
if
docSize
>
1000
if
docSize
>
1000
then
then
...
@@ -82,15 +82,15 @@ splitDoc m splt doc = let docSize = (length $ c_abstract doc) in
...
@@ -82,15 +82,15 @@ splitDoc m splt doc = let docSize = (length $ c_abstract doc) in
V
.
fromList
[
doc
]
V
.
fromList
[
doc
]
splitDoc'
::
Split
By
->
CsvDoc
->
Vector
CsvDoc
splitDoc'
::
Split
Context
->
CsvDoc
->
Vector
CsvDoc
splitDoc'
splt
(
CsvDoc
t
s
py
pm
pd
abst
auth
)
=
V
.
fromList
$
[
firstDoc
]
<>
nextDocs
splitDoc'
contextSize
(
CsvDoc
t
s
py
pm
pd
abst
auth
)
=
V
.
fromList
$
[
firstDoc
]
<>
nextDocs
where
where
firstDoc
=
CsvDoc
t
s
py
pm
pd
firstAbstract
auth
firstDoc
=
CsvDoc
t
s
py
pm
pd
firstAbstract
auth
firstAbstract
=
head'
abstracts
firstAbstract
=
head'
abstracts
nextDocs
=
map
(
\
txt
->
CsvDoc
(
head'
$
sentences
txt
)
s
py
pm
pd
(
unsentences
$
tail'
$
sentences
txt
)
auth
)
(
tail'
abstracts
)
nextDocs
=
map
(
\
txt
->
CsvDoc
(
head'
$
sentences
txt
)
s
py
pm
pd
(
unsentences
$
tail'
$
sentences
txt
)
auth
)
(
tail'
abstracts
)
abstracts
=
(
splitBy
splt
20
)
abst
abstracts
=
(
splitBy
$
contextSize
)
abst
head'
x
=
maybe
""
identity
(
head
x
)
head'
x
=
maybe
""
identity
(
head
x
)
tail'
x
=
maybe
[
""
]
identity
(
tailMay
x
)
tail'
x
=
maybe
[
""
]
identity
(
tailMay
x
)
...
...
src/Gargantext/Text/Terms.hs
View file @
0be01d72
...
@@ -34,6 +34,7 @@ module Gargantext.Text.Terms
...
@@ -34,6 +34,7 @@ module Gargantext.Text.Terms
where
where
import
Data.Text
(
Text
)
import
Data.Text
(
Text
)
import
Data.Traversable
import
Gargantext.Prelude
import
Gargantext.Prelude
import
Gargantext.Core
import
Gargantext.Core
...
@@ -46,6 +47,9 @@ data TermType = Mono | Multi
...
@@ -46,6 +47,9 @@ data TermType = Mono | Multi
-- remove Stop Words
-- remove Stop Words
-- map (filter (\t -> not . elem t)) $
-- map (filter (\t -> not . elem t)) $
------------------------------------------------------------------------
------------------------------------------------------------------------
extractTerms
::
Traversable
t
=>
TermType
->
Lang
->
t
Text
->
IO
(
t
[
Terms
])
extractTerms
termType
lang
=
mapM
(
terms
termType
lang
)
------------------------------------------------------------------------
terms
::
TermType
->
Lang
->
Text
->
IO
[
Terms
]
terms
::
TermType
->
Lang
->
Text
->
IO
[
Terms
]
terms
Mono
lang
txt
=
pure
$
monoterms'
lang
txt
terms
Mono
lang
txt
=
pure
$
monoterms'
lang
txt
terms
Multi
lang
txt
=
multiterms
lang
txt
terms
Multi
lang
txt
=
multiterms
lang
txt
...
...
src/Gargantext/Viz/Graph/Distances/Matrice.hs
View file @
0be01d72
...
@@ -36,10 +36,8 @@ Implementation use Accelerate library :
...
@@ -36,10 +36,8 @@ Implementation use Accelerate library :
module
Gargantext.Viz.Graph.Distances.Matrice
module
Gargantext.Viz.Graph.Distances.Matrice
where
where
--import Data.Array.Accelerate.Data.Bits
import
Data.Array.Accelerate.Interpreter
(
run
)
import
Data.Array.Accelerate
import
Data.Array.Accelerate
import
Data.Array.Accelerate.Interpreter
(
run
)
import
Data.Array.Accelerate.Smart
import
Data.Array.Accelerate.Smart
import
Data.Array.Accelerate.Type
import
Data.Array.Accelerate.Type
import
Data.Array.Accelerate.Array.Sugar
(
fromArr
,
Array
,
Z
)
import
Data.Array.Accelerate.Array.Sugar
(
fromArr
,
Array
,
Z
)
...
@@ -94,13 +92,6 @@ type SpecificityGenericity = Double
...
@@ -94,13 +92,6 @@ type SpecificityGenericity = Double
conditional
::
Matrix
Double
->
(
Matrix
InclusionExclusion
,
Matrix
SpecificityGenericity
)
conditional
::
Matrix
Double
->
(
Matrix
InclusionExclusion
,
Matrix
SpecificityGenericity
)
conditional
m
=
(
run
$
ie
(
use
m
),
run
$
sg
(
use
m
))
conditional
m
=
(
run
$
ie
(
use
m
),
run
$
sg
(
use
m
))
where
where
r
::
Rank
r
=
rank'
m
xs
::
Matrix'
Double
->
Matrix'
Double
xs
mat
=
zipWith
(
-
)
(
proba
r
mat
)
(
mkSum
r
$
proba
r
mat
)
ys
::
Acc
(
Matrix
Double
)
->
Acc
(
Matrix
Double
)
ys
mat
=
zipWith
(
-
)
(
proba
r
mat
)
(
mkSum
r
$
transpose
$
proba
r
mat
)
ie
::
Matrix'
Double
->
Matrix'
Double
ie
::
Matrix'
Double
->
Matrix'
Double
ie
mat
=
map
(
\
x
->
x
/
(
2
*
n
-
1
))
$
zipWith
(
+
)
(
xs
mat
)
(
ys
mat
)
ie
mat
=
map
(
\
x
->
x
/
(
2
*
n
-
1
))
$
zipWith
(
+
)
(
xs
mat
)
(
ys
mat
)
...
@@ -110,6 +101,13 @@ conditional m = (run $ ie (use m), run $ sg (use m))
...
@@ -110,6 +101,13 @@ conditional m = (run $ ie (use m), run $ sg (use m))
n
::
Exp
Double
n
::
Exp
Double
n
=
P
.
fromIntegral
r
n
=
P
.
fromIntegral
r
r
::
Rank
r
=
rank'
m
xs
::
Matrix'
Double
->
Matrix'
Double
xs
mat
=
zipWith
(
-
)
(
proba
r
mat
)
(
mkSum
r
$
proba
r
mat
)
ys
::
Acc
(
Matrix
Double
)
->
Acc
(
Matrix
Double
)
ys
mat
=
zipWith
(
-
)
(
proba
r
mat
)
(
mkSum
r
$
transpose
$
proba
r
mat
)
-- filter with threshold
-- filter with threshold
-----------------------------------------------------------------------
-----------------------------------------------------------------------
...
@@ -121,7 +119,9 @@ distributional m = run $ filter $ ri (map fromIntegral $ use m)
...
@@ -121,7 +119,9 @@ distributional m = run $ filter $ ri (map fromIntegral $ use m)
where
where
n
=
rank'
m
n
=
rank'
m
miniMax
m
=
map
(
\
x
->
ifThenElse
(
x
>
(
the
$
minimum
$
maximum
m
))
x
0
)
m
miniMax
m
=
map
(
\
x
->
ifThenElse
(
x
>
miniMax'
)
x
0
)
m
where
miniMax'
=
(
the
$
minimum
$
maximum
m
)
filter
m
=
zipWith
(
\
a
b
->
max
a
b
)
m
(
transpose
m
)
filter
m
=
zipWith
(
\
a
b
->
max
a
b
)
m
(
transpose
m
)
...
...
src/Gargantext/Viz/Graph/Index.hs
View file @
0be01d72
...
@@ -43,28 +43,12 @@ import Gargantext.Prelude
...
@@ -43,28 +43,12 @@ import Gargantext.Prelude
type
Index
=
Int
type
Index
=
Int
-------------------------------------------------------------------------------
-------------------------------------------------------------------------------
{-
map'' :: (Ord t) => (A.Matrix Int -> A.Matrix Double)
-> Map (t, t) Int
-> Map (t, t) Double
map'' f m = back . f' . from m
where
from (fs, m') = unzip $ M.toAscList m
f' = f $ A.fromList shape m'
shape = (A.Z A.:. n A.:. n)
back = M.fromAscList . zip fs . A.toList
-}
-------------------------------------------------------------------------------
-------------------------------------------------------------------------------
map'
::
(
Ord
t
)
=>
(
A
.
Matrix
Int
->
A
.
Matrix
Double
)
score
::
(
Ord
t
)
=>
(
A
.
Matrix
Int
->
A
.
Matrix
Double
)
->
Map
(
t
,
t
)
Int
->
Map
(
t
,
t
)
Int
->
Map
(
t
,
t
)
Double
->
Map
(
t
,
t
)
Double
map'
f
m
=
fromIndex
fromI
.
mat2cooc
.
f
$
cooc2mat
toI
m
score
f
m
=
fromIndex
fromI
.
mat2map
.
f
$
cooc2mat
toI
m
where
(
toI
,
fromI
)
=
createIndexes
m
map''
m
=
cooc2mat
toI
m
where
where
(
toI
,
fromI
)
=
createIndexes
m
(
toI
,
fromI
)
=
createIndexes
m
...
@@ -81,10 +65,9 @@ map2mat def n m = A.fromFunction shape (\(Z :. x :. y) -> fromMaybe def $ M.look
...
@@ -81,10 +65,9 @@ map2mat def n m = A.fromFunction shape (\(Z :. x :. y) -> fromMaybe def $ M.look
where
where
shape
=
(
Z
:.
n
:.
n
)
shape
=
(
Z
:.
n
:.
n
)
-- TODO rename mat2map
mat2map
::
(
Elt
a
,
Shape
(
Z
:.
Index
))
=>
mat2cooc
::
(
Elt
a
,
Shape
(
Z
:.
Index
))
=>
A
.
Array
(
Z
:.
Index
:.
Index
)
a
->
Map
(
Index
,
Index
)
a
A
.
Array
(
Z
:.
Index
:.
Index
)
a
->
Map
(
Index
,
Index
)
a
mat2
cooc
m
=
M
.
fromList
.
map
f
.
A
.
toList
.
A
.
run
.
A
.
indexed
$
A
.
use
m
mat2
map
m
=
M
.
fromList
.
map
f
.
A
.
toList
.
A
.
run
.
A
.
indexed
$
A
.
use
m
where
where
Z
:.
_
:.
n
=
A
.
arrayShape
m
Z
:.
_
:.
n
=
A
.
arrayShape
m
f
((
Z
:.
i
:.
j
),
x
)
=
((
i
,
j
),
x
)
f
((
Z
:.
i
:.
j
),
x
)
=
((
i
,
j
),
x
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment