Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
158
Issues
158
List
Board
Labels
Milestones
Merge Requests
11
Merge Requests
11
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
1237e415
Unverified
Commit
1237e415
authored
Jul 02, 2018
by
Nicolas Pouillard
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
WIP Gargantext/Text/Terms/WithList
parent
cc31b225
Changes
7
Show whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
86 additions
and
5 deletions
+86
-5
package.yaml
package.yaml
+1
-0
List.hs
src/Gargantext/Text/List.hs
+1
-0
CSV.hs
src/Gargantext/Text/List/CSV.hs
+2
-3
Types.hs
src/Gargantext/Text/List/Types.hs
+1
-1
Terms.hs
src/Gargantext/Text/Terms.hs
+4
-1
WithList.hs
src/Gargantext/Text/Terms/WithList.hs
+76
-0
stack.yaml
stack.yaml
+1
-0
No files found.
package.yaml
View file @
1237e415
...
...
@@ -70,6 +70,7 @@ library:
-
ini
-
jose-jwt
-
kmeans-vector
-
KMP
-
lens
-
logging-effect
-
matrix
...
...
src/Gargantext/Text/List.hs
View file @
1237e415
...
...
@@ -13,6 +13,7 @@ commentary with @some markup@.
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
module
Gargantext.Text.List
where
...
...
src/Gargantext/Text/List/CSV.hs
View file @
1237e415
...
...
@@ -17,7 +17,6 @@ CSV parser for Gargantext corpus files.
module
Gargantext.Text.List.CSV
where
import
GHC.Real
(
round
)
import
GHC.IO
(
FilePath
)
import
Control.Applicative
...
...
@@ -26,14 +25,14 @@ import Control.Monad (mzero)
import
Data.Char
(
ord
)
import
Data.Csv
import
Data.Either
(
Either
(
Left
,
Right
))
import
Data.Text
(
Text
,
pack
,
length
,
intercalate
)
import
Data.Text
(
Text
,
pack
)
import
qualified
Data.ByteString.Lazy
as
BL
import
Data.Vector
(
Vector
)
import
qualified
Data.Vector
as
V
import
Gargantext.Prelude
hiding
(
length
)
import
Gargantext.Text.List.Types
--
import Gargantext.Text.List.Types
------------------------------------------------------------------------
--csv2lists :: Vector CsvList -> Lists
...
...
src/Gargantext/Text/List/Types.hs
View file @
1237e415
...
...
@@ -18,7 +18,7 @@ module Gargantext.Text.List.Types where
import
Prelude
(
Bounded
,
Enum
,
minBound
,
maxBound
)
import
Data.Text
(
Text
)
import
Data.Map
(
Map
,
empty
,
fromList
,
insert
,
lookup
)
import
Data.Map
(
Map
,
empty
,
fromList
)
import
Gargantext.Prelude
-------------------------------------------------------------------
...
...
src/Gargantext/Text/Terms.hs
View file @
1237e415
...
...
@@ -33,6 +33,7 @@ compute graph
module
Gargantext.Text.Terms
where
import
qualified
Data.Set
as
Set
import
Data.Text
(
Text
)
import
Data.Traversable
...
...
@@ -41,8 +42,9 @@ import Gargantext.Core
import
Gargantext.Core.Types
import
Gargantext.Text.Terms.Multi
(
multiterms
)
import
Gargantext.Text.Terms.Mono
(
monoterms'
)
import
Gargantext.Text.Terms.WithList
(
Patterns
,
extractTermsWithList
)
data
TermType
lang
=
Mono
lang
|
Multi
lang
|
MonoMulti
lang
data
TermType
lang
=
Mono
lang
|
Multi
lang
|
MonoMulti
lang
|
WithList
Patterns
-- remove Stop Words
-- map (filter (\t -> not . elem t)) $
...
...
@@ -60,5 +62,6 @@ terms :: TermType Lang -> Text -> IO [Terms]
terms
(
Mono
lang
)
txt
=
pure
$
monoterms'
lang
txt
terms
(
Multi
lang
)
txt
=
multiterms
lang
txt
terms
(
MonoMulti
lang
)
txt
=
terms
(
Multi
lang
)
txt
terms
(
WithList
list
)
txt
=
pure
.
map
(
\
x
->
Terms
x
Set
.
empty
{-TODO-}
)
$
extractTermsWithList
list
txt
------------------------------------------------------------------------
src/Gargantext/Text/Terms/WithList.hs
0 → 100644
View file @
1237e415
{-|
Module : Gargantext.Text.Terms.WithList
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Here is a longer description of this module, containing some
commentary with @some markup@.
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE BangPatterns #-}
module
Gargantext.Text.Terms.WithList
where
import
qualified
Data.Algorithms.KMP
as
KMP
import
Data.Char
(
isSpace
)
import
qualified
Data.Text
as
T
import
Data.Text
(
Text
)
import
qualified
Data.IntMap.Strict
as
IntMap
import
Gargantext.Prelude
import
Data.List
(
concatMap
)
type
Term
=
Text
type
Label
=
Term
type
Pattern
=
KMP
.
Table
Term
type
TermList
=
[(
Label
,
[[
Term
]])]
type
Patterns
=
[(
Pattern
,
Int
,
Label
)]
isMultiTermSep
::
Char
->
Bool
isMultiTermSep
=
(`
elem
`
",.:;?!(){}[]"
)
type
Sentence
a
=
[
a
]
-- or a nominal group
type
Corpus
a
=
[
Sentence
a
]
-- a list of sentences
replaceTerms
::
Patterns
->
Sentence
Term
->
Sentence
Label
replaceTerms
pats
terms
=
go
0
terms
where
go
_
[]
=
[]
go
!
ix
(
t
:
ts
)
=
case
IntMap
.
lookup
ix
m
of
Nothing
->
t
:
go
(
ix
+
1
)
ts
Just
(
len
,
label
)
->
label
:
go
(
ix
+
len
)
(
drop
(
len
-
1
)
ts
)
-- TODO is it what we want?
merge
(
len1
,
lab1
)
(
len2
,
lab2
)
=
if
len1
>
len2
then
(
len1
,
lab1
)
else
(
len2
,
lab2
)
m
=
IntMap
.
fromListWith
merge
[
(
ix
,
(
len
,
label
))
|
(
pat
,
len
,
label
)
<-
pats
,
ix
<-
KMP
.
match
pat
terms
]
buildPatterns
::
TermList
->
Patterns
buildPatterns
=
concatMap
buildPattern
where
buildPattern
(
label
,
alts
)
=
map
f
alts
where
f
alt
=
(
KMP
.
build
alt
,
length
alt
,
label
)
-- monoterms'' :: Lang -> Text -> [Terms]
-- monoterms'' l txt = map (text2terms l) $ monoterms txt
extractTermsWithList
::
Patterns
->
Text
->
Corpus
Label
extractTermsWithList
pats
=
map
(
replaceTerms
pats
)
.
map
(
T
.
split
isSpace
)
.
-- text2terms
T
.
split
isMultiTermSep
.
T
.
toLower
-- as in monoterms with a different list of seps
stack.yaml
View file @
1237e415
...
...
@@ -41,4 +41,5 @@ extra-deps:
-
servant-flatten-0.2
-
serialise-0.2.0.0
# imt-api-client
-
cborg-0.2.0.0
# imt-api-client
-
KMP-0.1.0.2
resolver
:
lts-11.10
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment