Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
H
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Przemyslaw Kaminski
haskell-gargantext
Commits
0be01d72
Commit
0be01d72
authored
May 30, 2018
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[Pipeline] ok until clustering.
parent
4b81f9d1
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
58 additions
and
69 deletions
+58
-69
Pipeline.hs
src/Gargantext/Pipeline.hs
+12
-18
Context.hs
src/Gargantext/Text/Context.hs
+20
-12
CSV.hs
src/Gargantext/Text/Parsers/CSV.hs
+6
-6
Terms.hs
src/Gargantext/Text/Terms.hs
+4
-0
Matrice.hs
src/Gargantext/Viz/Graph/Distances/Matrice.hs
+12
-12
Index.hs
src/Gargantext/Viz/Graph/Index.hs
+4
-21
No files found.
src/Gargantext/Pipeline.hs
View file @
0be01d72
...
...
@@ -15,35 +15,29 @@ Portability : POSIX
module
Gargantext.Pipeline
where
import
Data.Text
(
unpack
)
import
qualified
Data.Text
as
DT
import
Data.Text.IO
(
readFile
)
----------------------------------------------
----------------------------------------------
import
Gargantext.Core
import
Gargantext.Core.Types
import
Gargantext.Prelude
import
Gargantext.Viz.Graph.Index
(
map'
,
createIndexes
)
import
Gargantext.Viz.Graph.Distances.Matrice
(
distributional
,
int2double
)
import
Gargantext.Viz.Graph.Index
(
score
)
import
Gargantext.Viz.Graph.Distances.Matrice
(
distributional
)
import
Gargantext.Text.Metrics.Occurrences
import
Gargantext.Text.Terms
import
Gargantext.Text.Context
import
Data.Array.Accelerate
as
A
pipeline
pth
=
do
text
<-
readFile
pth
let
contexts
=
splitBy
Sentences
4
text
myterms
<-
mapM
(
terms
Multi
FR
)
contexts
-- todo filter stop words
pipeline
path
=
do
-- Text <- IO Text <- FilePath
text
<-
readFile
path
let
contexts
=
splitBy
(
Sentences
3
)
text
myterms
<-
extractTerms
Multi
FR
contexts
-- TODO filter (\t -> not . elem t stopList) myterms
-- TODO groupBy (Stem | GroupList)
let
myCooc
=
removeApax
$
cooc
myterms
--pure myCooc
-- Cooc map -> Matrix
--pure $ createIndexes myCooc
pure
$
map'
int2double
myCooc
-- Matrix -> Graph
-- Cooc -> Matrix
pure
$
score
distributional
myCooc
-- Matrix -> Clustering -> Graph -> JSON
src/Gargantext/Text/Context.hs
View file @
0be01d72
...
...
@@ -14,26 +14,34 @@ Context of text management tool
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
module
Gargantext.Text.Context
where
module
Gargantext.Text.Context
where
import
Data.Text
(
Text
,
pack
,
unpack
,
length
)
import
Data.String
(
IsString
)
import
Text.HTML.TagSoup
import
Text.HTML.TagSoup
(
parseTags
,
isTagText
,
Tag
(
..
))
import
Gargantext.Text
import
Gargantext.Prelude
hiding
(
length
)
data
SplitBy
=
Paragraph
|
Sentences
|
Chars
splitBy
::
SplitBy
->
Int
->
Text
->
[
Text
]
splitBy
Chars
n
=
map
pack
.
chunkAlong
n
n
.
unpack
splitBy
Sentences
n
=
map
unsentences
.
chunkAlong
n
n
.
sentences
splitBy
Paragraph
_
=
map
removeTag
.
filter
isTagText
.
parseTags
data
SplitContext
=
Chars
Int
|
Sentences
Int
|
Paragraphs
Int
tag
=
parseTags
-- | splitBy contexts of Chars or Sentences or Paragraphs
-- >> splitBy (Chars 0) "abcde"
-- ["a","b","c","d","e"]
-- >> splitBy (Chars 1) "abcde"
-- ["ab","bc","cd","de"]
-- >> splitBy (Chars 2) "abcde"
-- ["abc","bcd","cde"]
splitBy
::
SplitContext
->
Text
->
[
Text
]
splitBy
(
Chars
n
)
=
map
pack
.
chunkAlong
(
n
+
1
)
1
.
unpack
splitBy
(
Sentences
n
)
=
map
unsentences
.
chunkAlong
(
n
+
1
)
1
.
sentences
splitBy
(
Paragraphs
_
)
=
map
unTag
.
filter
isTagText
.
tag
where
removeTag
::
IsString
p
=>
Tag
p
->
p
removeTag
(
TagText
x
)
=
x
removeTag
(
TagComment
x
)
=
x
removeTag
_
=
""
unTag
::
IsString
p
=>
Tag
p
->
p
unTag
(
TagText
x
)
=
x
unTag
_
=
""
src/Gargantext/Text/Parsers/CSV.hs
View file @
0be01d72
...
...
@@ -50,13 +50,13 @@ data Doc = Doc
deriving
(
Show
)
---------------------------------------------------------------
toDocs
::
Vector
CsvDoc
->
[
Doc
]
toDocs
v
=
V
.
toList
toDocs
v
=
V
.
toList
$
V
.
zipWith
(
\
nId
(
CsvDoc
t
s
py
pm
pd
abst
auth
)
->
Doc
nId
t
s
py
pm
pd
abst
auth
)
(
V
.
enumFromN
1
(
V
.
length
v''
))
v''
where
v''
=
V
.
foldl
(
\
v'
sep
->
V
.
concatMap
(
splitDoc
(
docsSize
v'
)
sep
)
v'
)
v
seps
seps
=
(
V
.
fromList
[
Paragraph
,
Sentences
,
Chars
])
seps
=
(
V
.
fromList
[
Paragraph
s
1
,
Sentences
3
,
Chars
3
])
---------------------------------------------------------------
fromDocs
::
Vector
Doc
->
Vector
CsvDoc
...
...
@@ -69,7 +69,7 @@ fromDocs docs = V.map fromDocs' docs
-- TODO adapt the size of the paragraph according to the corpus average
splitDoc
::
Mean
->
Split
By
->
CsvDoc
->
Vector
CsvDoc
splitDoc
::
Mean
->
Split
Context
->
CsvDoc
->
Vector
CsvDoc
splitDoc
m
splt
doc
=
let
docSize
=
(
length
$
c_abstract
doc
)
in
if
docSize
>
1000
then
...
...
@@ -82,15 +82,15 @@ splitDoc m splt doc = let docSize = (length $ c_abstract doc) in
V
.
fromList
[
doc
]
splitDoc'
::
Split
By
->
CsvDoc
->
Vector
CsvDoc
splitDoc'
splt
(
CsvDoc
t
s
py
pm
pd
abst
auth
)
=
V
.
fromList
$
[
firstDoc
]
<>
nextDocs
splitDoc'
::
Split
Context
->
CsvDoc
->
Vector
CsvDoc
splitDoc'
contextSize
(
CsvDoc
t
s
py
pm
pd
abst
auth
)
=
V
.
fromList
$
[
firstDoc
]
<>
nextDocs
where
firstDoc
=
CsvDoc
t
s
py
pm
pd
firstAbstract
auth
firstAbstract
=
head'
abstracts
nextDocs
=
map
(
\
txt
->
CsvDoc
(
head'
$
sentences
txt
)
s
py
pm
pd
(
unsentences
$
tail'
$
sentences
txt
)
auth
)
(
tail'
abstracts
)
abstracts
=
(
splitBy
splt
20
)
abst
abstracts
=
(
splitBy
$
contextSize
)
abst
head'
x
=
maybe
""
identity
(
head
x
)
tail'
x
=
maybe
[
""
]
identity
(
tailMay
x
)
...
...
src/Gargantext/Text/Terms.hs
View file @
0be01d72
...
...
@@ -34,6 +34,7 @@ module Gargantext.Text.Terms
where
import
Data.Text
(
Text
)
import
Data.Traversable
import
Gargantext.Prelude
import
Gargantext.Core
...
...
@@ -46,6 +47,9 @@ data TermType = Mono | Multi
-- remove Stop Words
-- map (filter (\t -> not . elem t)) $
------------------------------------------------------------------------
extractTerms
::
Traversable
t
=>
TermType
->
Lang
->
t
Text
->
IO
(
t
[
Terms
])
extractTerms
termType
lang
=
mapM
(
terms
termType
lang
)
------------------------------------------------------------------------
terms
::
TermType
->
Lang
->
Text
->
IO
[
Terms
]
terms
Mono
lang
txt
=
pure
$
monoterms'
lang
txt
terms
Multi
lang
txt
=
multiterms
lang
txt
...
...
src/Gargantext/Viz/Graph/Distances/Matrice.hs
View file @
0be01d72
...
...
@@ -36,10 +36,8 @@ Implementation use Accelerate library :
module
Gargantext.Viz.Graph.Distances.Matrice
where
--import Data.Array.Accelerate.Data.Bits
import
Data.Array.Accelerate.Interpreter
(
run
)
import
Data.Array.Accelerate
import
Data.Array.Accelerate.Interpreter
(
run
)
import
Data.Array.Accelerate.Smart
import
Data.Array.Accelerate.Type
import
Data.Array.Accelerate.Array.Sugar
(
fromArr
,
Array
,
Z
)
...
...
@@ -94,14 +92,7 @@ type SpecificityGenericity = Double
conditional
::
Matrix
Double
->
(
Matrix
InclusionExclusion
,
Matrix
SpecificityGenericity
)
conditional
m
=
(
run
$
ie
(
use
m
),
run
$
sg
(
use
m
))
where
r
::
Rank
r
=
rank'
m
xs
::
Matrix'
Double
->
Matrix'
Double
xs
mat
=
zipWith
(
-
)
(
proba
r
mat
)
(
mkSum
r
$
proba
r
mat
)
ys
::
Acc
(
Matrix
Double
)
->
Acc
(
Matrix
Double
)
ys
mat
=
zipWith
(
-
)
(
proba
r
mat
)
(
mkSum
r
$
transpose
$
proba
r
mat
)
ie
::
Matrix'
Double
->
Matrix'
Double
ie
mat
=
map
(
\
x
->
x
/
(
2
*
n
-
1
))
$
zipWith
(
+
)
(
xs
mat
)
(
ys
mat
)
sg
::
Acc
(
Matrix
Double
)
->
Acc
(
Matrix
Double
)
...
...
@@ -109,7 +100,14 @@ conditional m = (run $ ie (use m), run $ sg (use m))
n
::
Exp
Double
n
=
P
.
fromIntegral
r
r
::
Rank
r
=
rank'
m
xs
::
Matrix'
Double
->
Matrix'
Double
xs
mat
=
zipWith
(
-
)
(
proba
r
mat
)
(
mkSum
r
$
proba
r
mat
)
ys
::
Acc
(
Matrix
Double
)
->
Acc
(
Matrix
Double
)
ys
mat
=
zipWith
(
-
)
(
proba
r
mat
)
(
mkSum
r
$
transpose
$
proba
r
mat
)
-- filter with threshold
-----------------------------------------------------------------------
...
...
@@ -121,7 +119,9 @@ distributional m = run $ filter $ ri (map fromIntegral $ use m)
where
n
=
rank'
m
miniMax
m
=
map
(
\
x
->
ifThenElse
(
x
>
(
the
$
minimum
$
maximum
m
))
x
0
)
m
miniMax
m
=
map
(
\
x
->
ifThenElse
(
x
>
miniMax'
)
x
0
)
m
where
miniMax'
=
(
the
$
minimum
$
maximum
m
)
filter
m
=
zipWith
(
\
a
b
->
max
a
b
)
m
(
transpose
m
)
...
...
src/Gargantext/Viz/Graph/Index.hs
View file @
0be01d72
...
...
@@ -43,28 +43,12 @@ import Gargantext.Prelude
type
Index
=
Int
-------------------------------------------------------------------------------
{-
map'' :: (Ord t) => (A.Matrix Int -> A.Matrix Double)
-> Map (t, t) Int
-> Map (t, t) Double
map'' f m = back . f' . from m
where
from (fs, m') = unzip $ M.toAscList m
f' = f $ A.fromList shape m'
shape = (A.Z A.:. n A.:. n)
back = M.fromAscList . zip fs . A.toList
-}
-------------------------------------------------------------------------------
map'
::
(
Ord
t
)
=>
(
A
.
Matrix
Int
->
A
.
Matrix
Double
)
score
::
(
Ord
t
)
=>
(
A
.
Matrix
Int
->
A
.
Matrix
Double
)
->
Map
(
t
,
t
)
Int
->
Map
(
t
,
t
)
Double
map'
f
m
=
fromIndex
fromI
.
mat2cooc
.
f
$
cooc2mat
toI
m
where
(
toI
,
fromI
)
=
createIndexes
m
map''
m
=
cooc2mat
toI
m
score
f
m
=
fromIndex
fromI
.
mat2map
.
f
$
cooc2mat
toI
m
where
(
toI
,
fromI
)
=
createIndexes
m
...
...
@@ -81,10 +65,9 @@ map2mat def n m = A.fromFunction shape (\(Z :. x :. y) -> fromMaybe def $ M.look
where
shape
=
(
Z
:.
n
:.
n
)
-- TODO rename mat2map
mat2cooc
::
(
Elt
a
,
Shape
(
Z
:.
Index
))
=>
mat2map
::
(
Elt
a
,
Shape
(
Z
:.
Index
))
=>
A
.
Array
(
Z
:.
Index
:.
Index
)
a
->
Map
(
Index
,
Index
)
a
mat2
cooc
m
=
M
.
fromList
.
map
f
.
A
.
toList
.
A
.
run
.
A
.
indexed
$
A
.
use
m
mat2
map
m
=
M
.
fromList
.
map
f
.
A
.
toList
.
A
.
run
.
A
.
indexed
$
A
.
use
m
where
Z
:.
_
:.
n
=
A
.
arrayShape
m
f
((
Z
:.
i
:.
j
),
x
)
=
((
i
,
j
),
x
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment