Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
147
Issues
147
List
Board
Labels
Milestones
Merge Requests
6
Merge Requests
6
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
4f5491c3
Commit
4f5491c3
authored
Jul 03, 2018
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[WithList] adding labelPolicy.
parent
1237e415
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
65 additions
and
44 deletions
+65
-44
package.yaml
package.yaml
+1
-0
Context.hs
src/Gargantext/Text/Context.hs
+15
-1
Search.hs
src/Gargantext/Text/Search.hs
+3
-3
Terms.hs
src/Gargantext/Text/Terms.hs
+5
-3
Mono.hs
src/Gargantext/Text/Terms/Mono.hs
+30
-16
WithList.hs
src/Gargantext/Text/Terms/WithList.hs
+11
-21
No files found.
package.yaml
View file @
4f5491c3
...
...
@@ -29,6 +29,7 @@ library:
-
Gargantext.Core
-
Gargantext.Core.Types
-
Gargantext.Text
-
Gargantext.Text.List.CSV
-
Gargantext.Text.Search
-
Gargantext.Text.Parsers.CSV
-
Gargantext.API
...
...
src/Gargantext/Text/Context.hs
View file @
4f5491c3
...
...
@@ -7,7 +7,7 @@ Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Context of text management tool
Context of text management tool
, here are logic of main types.
-}
...
...
@@ -25,6 +25,20 @@ import Gargantext.Text
import
Gargantext.Prelude
hiding
(
length
)
------------------------------------------------------------------------
type
Term
=
Text
type
Label
=
Term
type
Sentence
a
=
[
a
]
-- or a nominal group
type
Corpus
a
=
[
Sentence
a
]
-- a list of sentences
-- type ConText a = [Sentence a]
-- type Corpus a = [ConText a]
------------------------------------------------------------------------
data
SplitContext
=
Chars
Int
|
Sentences
Int
|
Paragraphs
Int
tag
::
Text
->
[
Tag
Text
]
...
...
src/Gargantext/Text/Search.hs
View file @
4f5491c3
...
...
@@ -28,7 +28,7 @@ import Data.Ix
import
Data.Text
(
Text
)
import
Gargantext.Prelude
import
Gargantext.Text.Terms.Mono
(
mono
term
s
)
import
Gargantext.Text.Terms.Mono
(
mono
Text
s
)
import
Gargantext.Text.Terms.Mono.Stem
as
ST
import
Gargantext.Text.Parsers.CSV
...
...
@@ -58,8 +58,8 @@ docSearchConfig =
}
where
extractTerms
::
Doc
->
DocField
->
[
Text
]
extractTerms
doc
TitleField
=
mono
term
s
(
d_title
doc
)
extractTerms
doc
AbstractField
=
mono
term
s
(
d_abstract
doc
)
extractTerms
doc
TitleField
=
mono
Text
s
(
d_title
doc
)
extractTerms
doc
AbstractField
=
mono
Text
s
(
d_abstract
doc
)
normaliseQueryToken
::
Text
->
DocField
->
Text
normaliseQueryToken
tok
=
...
...
src/Gargantext/Text/Terms.hs
View file @
4f5491c3
...
...
@@ -41,7 +41,7 @@ import Gargantext.Prelude
import
Gargantext.Core
import
Gargantext.Core.Types
import
Gargantext.Text.Terms.Multi
(
multiterms
)
import
Gargantext.Text.Terms.Mono
(
mono
terms'
)
import
Gargantext.Text.Terms.Mono
(
mono
Terms
)
import
Gargantext.Text.Terms.WithList
(
Patterns
,
extractTermsWithList
)
data
TermType
lang
=
Mono
lang
|
Multi
lang
|
MonoMulti
lang
|
WithList
Patterns
...
...
@@ -59,9 +59,11 @@ extractTerms termTypeLang = mapM (terms termTypeLang)
-- MonoMulti : mono and multi
-- TODO : multi terms should exclude mono (intersection is not empty yet)
terms
::
TermType
Lang
->
Text
->
IO
[
Terms
]
terms
(
Mono
lang
)
txt
=
pure
$
mono
terms'
lang
txt
terms
(
Mono
lang
)
txt
=
pure
$
mono
Terms
lang
txt
terms
(
Multi
lang
)
txt
=
multiterms
lang
txt
terms
(
MonoMulti
lang
)
txt
=
terms
(
Multi
lang
)
txt
terms
(
WithList
list
)
txt
=
pure
.
map
(
\
x
->
Terms
x
Set
.
empty
{-TODO-}
)
$
extractTermsWithList
list
txt
terms
(
WithList
list
)
txt
=
pure
.
map
(
\
x
->
Terms
x
Set
.
empty
{-TODO-}
)
$
extractTermsWithList
labelPolicy
list
txt
where
labelPolicy
=
undefined
------------------------------------------------------------------------
src/Gargantext/Text/Terms/Mono.hs
View file @
4f5491c3
...
...
@@ -13,10 +13,17 @@ Mono-terms are Nterms where n == 1.
{-# LANGUAGE NoImplicitPrelude #-}
module
Gargantext.Text.Terms.Mono
(
mono
terms
,
monoterms'
)
module
Gargantext.Text.Terms.Mono
(
mono
Terms
,
monoTexts
,
monoTextsBySentence
)
where
import
Prelude
(
String
)
import
Data.Char
(
isSpace
)
import
Data.Text
(
Text
,
toLower
,
split
,
splitOn
,
pack
)
import
Data.Text
(
Text
)
import
qualified
Data.Text
as
T
import
qualified
Data.List
as
L
import
qualified
Data.Set
as
S
import
Gargantext.Core
...
...
@@ -26,23 +33,30 @@ import Gargantext.Text.Terms.Mono.Stem (stem)
import
Gargantext.Prelude
--import Data.Char (isAlphaNum, isSpace)
monoterms'
::
Lang
->
Text
->
[
Terms
]
monoterms'
l
txt
=
map
(
text2terms
l
)
$
monoterms
txt
-- | TODO remove Num ?
--isGram c = isAlphaNum c
monoterms
::
Text
->
[
Text
]
monoterms
txt
=
map
toLower
$
split
isWord
txt
where
isWord
c
=
c
`
elem
`
[
' '
,
'
\'
'
,
','
,
';'
]
text2terms
::
Lang
->
Text
->
Terms
text2terms
lang
txt
=
Terms
label
stems
where
label
=
splitOn
(
pack
" "
)
txt
stems
=
S
.
fromList
$
map
(
stem
lang
)
label
-- | Sentence split separators
isSep
::
Char
->
Bool
isSep
=
(`
elem
`
(
",.:;?!(){}[]
\"
"
::
String
))
monoTerms
::
Lang
->
Text
->
[
Terms
]
monoTerms
l
txt
=
map
(
monoText2term
l
)
$
monoTexts
txt
monoTexts
::
Text
->
[
Text
]
monoTexts
=
L
.
concat
.
monoTextsBySentence
monoText2term
::
Lang
->
Text
->
Terms
monoText2term
lang
txt
=
Terms
[
txt
]
(
S
.
singleton
$
stem
lang
txt
)
monoTextsBySentence
::
Text
->
[[
Text
]]
monoTextsBySentence
=
map
(
T
.
split
isSpace
)
.
T
.
split
isSep
.
T
.
toLower
--monograms :: Text -> [Text]
--monograms xs = monograms $ toLower $ filter isGram xs
--isGram :: Char -> Bool
--isGram c = isAlphaNum c || isSpace c || c `elem` ['-','/','\'']
src/Gargantext/Text/Terms/WithList.hs
View file @
4f5491c3
...
...
@@ -16,32 +16,28 @@ commentary with @some markup@.
module
Gargantext.Text.Terms.WithList
where
import
Prelude
(
String
)
import
qualified
Data.Algorithms.KMP
as
KMP
import
Data.Char
(
isSpace
)
import
qualified
Data.Text
as
T
import
Data.Text
(
Text
)
import
qualified
Data.IntMap.Strict
as
IntMap
import
Gargantext.Text.Context
import
Gargantext.Text.Terms.Mono
(
monoTextsBySentence
)
import
Gargantext.Prelude
import
Data.List
(
concatMap
)
type
Term
=
Text
type
Label
=
Term
type
Pattern
=
KMP
.
Table
Term
type
TermList
=
[(
Label
,
[[
Term
]])]
type
Patterns
=
[(
Pattern
,
Int
,
Label
)]
isMultiTermSep
::
Char
->
Bool
isMultiTermSep
=
(`
elem
`
",.:;?!(){}[]"
)
type
Sentence
a
=
[
a
]
-- or a nominal group
type
Corpus
a
=
[
Sentence
a
]
-- a list of sentences
replaceTerms
::
Patterns
->
Sentence
Term
->
Sentence
Label
replaceTerms
pats
terms
=
go
0
terms
replaceTerms
::
(
Term
->
Label
)
->
Patterns
->
Sentence
Term
->
Sentence
Label
replaceTerms
labelPolicy
pats
terms
=
go
0
terms
where
go
_
[]
=
[]
go
!
ix
(
t
:
ts
)
=
...
...
@@ -50,9 +46,9 @@ replaceTerms pats terms = go 0 terms
Just
(
len
,
label
)
->
label
:
go
(
ix
+
len
)
(
drop
(
len
-
1
)
ts
)
--
TODO is it what we want?
--
| merge with labelPolicy (can be a Map Term label)
merge
(
len1
,
lab1
)
(
len2
,
lab2
)
=
if
len1
>
len2
then
(
len1
,
lab1
)
else
(
len2
,
lab2
)
if
(
labelPolicy
lab1
)
==
lab2
then
(
len2
,
lab2
)
else
(
len1
,
lab1
)
m
=
IntMap
.
fromListWith
merge
...
...
@@ -66,11 +62,5 @@ buildPatterns = concatMap buildPattern
where
f
alt
=
(
KMP
.
build
alt
,
length
alt
,
label
)
-- monoterms'' :: Lang -> Text -> [Terms]
-- monoterms'' l txt = map (text2terms l) $ monoterms txt
extractTermsWithList
::
Patterns
->
Text
->
Corpus
Label
extractTermsWithList
pats
=
map
(
replaceTerms
pats
)
.
map
(
T
.
split
isSpace
)
.
-- text2terms
T
.
split
isMultiTermSep
.
T
.
toLower
-- as in monoterms with a different list of seps
extractTermsWithList
::
(
Term
->
Label
)
->
Patterns
->
Text
->
Corpus
Label
extractTermsWithList
labelPolicy
pats
=
map
(
replaceTerms
labelPolicy
pats
)
.
monoTextsBySentence
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment