Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
H
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Przemyslaw Kaminski
haskell-gargantext
Commits
1237e415
Unverified
Commit
1237e415
authored
Jul 02, 2018
by
Nicolas Pouillard
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
WIP Gargantext/Text/Terms/WithList
parent
cc31b225
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
86 additions
and
5 deletions
+86
-5
package.yaml
package.yaml
+1
-0
List.hs
src/Gargantext/Text/List.hs
+1
-0
CSV.hs
src/Gargantext/Text/List/CSV.hs
+2
-3
Types.hs
src/Gargantext/Text/List/Types.hs
+1
-1
Terms.hs
src/Gargantext/Text/Terms.hs
+4
-1
WithList.hs
src/Gargantext/Text/Terms/WithList.hs
+76
-0
stack.yaml
stack.yaml
+1
-0
No files found.
package.yaml
View file @
1237e415
...
...
@@ -70,6 +70,7 @@ library:
-
ini
-
jose-jwt
-
kmeans-vector
-
KMP
-
lens
-
logging-effect
-
matrix
...
...
src/Gargantext/Text/List.hs
View file @
1237e415
...
...
@@ -13,6 +13,7 @@ commentary with @some markup@.
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
module
Gargantext.Text.List
where
...
...
src/Gargantext/Text/List/CSV.hs
View file @
1237e415
...
...
@@ -17,7 +17,6 @@ CSV parser for Gargantext corpus files.
module
Gargantext.Text.List.CSV
where
import
GHC.Real
(
round
)
import
GHC.IO
(
FilePath
)
import
Control.Applicative
...
...
@@ -26,14 +25,14 @@ import Control.Monad (mzero)
import
Data.Char
(
ord
)
import
Data.Csv
import
Data.Either
(
Either
(
Left
,
Right
))
import
Data.Text
(
Text
,
pack
,
length
,
intercalate
)
import
Data.Text
(
Text
,
pack
)
import
qualified
Data.ByteString.Lazy
as
BL
import
Data.Vector
(
Vector
)
import
qualified
Data.Vector
as
V
import
Gargantext.Prelude
hiding
(
length
)
import
Gargantext.Text.List.Types
--
import Gargantext.Text.List.Types
------------------------------------------------------------------------
--csv2lists :: Vector CsvList -> Lists
...
...
src/Gargantext/Text/List/Types.hs
View file @
1237e415
...
...
@@ -18,7 +18,7 @@ module Gargantext.Text.List.Types where
import
Prelude
(
Bounded
,
Enum
,
minBound
,
maxBound
)
import
Data.Text
(
Text
)
import
Data.Map
(
Map
,
empty
,
fromList
,
insert
,
lookup
)
import
Data.Map
(
Map
,
empty
,
fromList
)
import
Gargantext.Prelude
-------------------------------------------------------------------
...
...
src/Gargantext/Text/Terms.hs
View file @
1237e415
...
...
@@ -33,6 +33,7 @@ compute graph
module
Gargantext.Text.Terms
where
import
qualified
Data.Set
as
Set
import
Data.Text
(
Text
)
import
Data.Traversable
...
...
@@ -41,8 +42,9 @@ import Gargantext.Core
import
Gargantext.Core.Types
import
Gargantext.Text.Terms.Multi
(
multiterms
)
import
Gargantext.Text.Terms.Mono
(
monoterms'
)
import
Gargantext.Text.Terms.WithList
(
Patterns
,
extractTermsWithList
)
data
TermType
lang
=
Mono
lang
|
Multi
lang
|
MonoMulti
lang
data
TermType
lang
=
Mono
lang
|
Multi
lang
|
MonoMulti
lang
|
WithList
Patterns
-- remove Stop Words
-- map (filter (\t -> not . elem t)) $
...
...
@@ -60,5 +62,6 @@ terms :: TermType Lang -> Text -> IO [Terms]
terms
(
Mono
lang
)
txt
=
pure
$
monoterms'
lang
txt
terms
(
Multi
lang
)
txt
=
multiterms
lang
txt
terms
(
MonoMulti
lang
)
txt
=
terms
(
Multi
lang
)
txt
terms
(
WithList
list
)
txt
=
pure
.
map
(
\
x
->
Terms
x
Set
.
empty
{-TODO-}
)
$
extractTermsWithList
list
txt
------------------------------------------------------------------------
src/Gargantext/Text/Terms/WithList.hs
0 → 100644
View file @
1237e415
{-|
Module : Gargantext.Text.Terms.WithList
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Here is a longer description of this module, containing some
commentary with @some markup@.
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE BangPatterns #-}
module
Gargantext.Text.Terms.WithList
where
import
qualified
Data.Algorithms.KMP
as
KMP
import
Data.Char
(
isSpace
)
import
qualified
Data.Text
as
T
import
Data.Text
(
Text
)
import
qualified
Data.IntMap.Strict
as
IntMap
import
Gargantext.Prelude
import
Data.List
(
concatMap
)
type
Term
=
Text
type
Label
=
Term
type
Pattern
=
KMP
.
Table
Term
type
TermList
=
[(
Label
,
[[
Term
]])]
type
Patterns
=
[(
Pattern
,
Int
,
Label
)]
isMultiTermSep
::
Char
->
Bool
isMultiTermSep
=
(`
elem
`
",.:;?!(){}[]"
)
type
Sentence
a
=
[
a
]
-- or a nominal group
type
Corpus
a
=
[
Sentence
a
]
-- a list of sentences
replaceTerms
::
Patterns
->
Sentence
Term
->
Sentence
Label
replaceTerms
pats
terms
=
go
0
terms
where
go
_
[]
=
[]
go
!
ix
(
t
:
ts
)
=
case
IntMap
.
lookup
ix
m
of
Nothing
->
t
:
go
(
ix
+
1
)
ts
Just
(
len
,
label
)
->
label
:
go
(
ix
+
len
)
(
drop
(
len
-
1
)
ts
)
-- TODO is it what we want?
merge
(
len1
,
lab1
)
(
len2
,
lab2
)
=
if
len1
>
len2
then
(
len1
,
lab1
)
else
(
len2
,
lab2
)
m
=
IntMap
.
fromListWith
merge
[
(
ix
,
(
len
,
label
))
|
(
pat
,
len
,
label
)
<-
pats
,
ix
<-
KMP
.
match
pat
terms
]
buildPatterns
::
TermList
->
Patterns
buildPatterns
=
concatMap
buildPattern
where
buildPattern
(
label
,
alts
)
=
map
f
alts
where
f
alt
=
(
KMP
.
build
alt
,
length
alt
,
label
)
-- monoterms'' :: Lang -> Text -> [Terms]
-- monoterms'' l txt = map (text2terms l) $ monoterms txt
extractTermsWithList
::
Patterns
->
Text
->
Corpus
Label
extractTermsWithList
pats
=
map
(
replaceTerms
pats
)
.
map
(
T
.
split
isSpace
)
.
-- text2terms
T
.
split
isMultiTermSep
.
T
.
toLower
-- as in monoterms with a different list of seps
stack.yaml
View file @
1237e415
...
...
@@ -41,4 +41,5 @@ extra-deps:
-
servant-flatten-0.2
-
serialise-0.2.0.0
# imt-api-client
-
cborg-0.2.0.0
# imt-api-client
-
KMP-0.1.0.2
resolver
:
lts-11.10
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment