Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
158
Issues
158
List
Board
Labels
Milestones
Merge Requests
11
Merge Requests
11
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
4f5491c3
Commit
4f5491c3
authored
Jul 03, 2018
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[WithList] adding labelPolicy.
parent
1237e415
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
65 additions
and
44 deletions
+65
-44
package.yaml
package.yaml
+1
-0
Context.hs
src/Gargantext/Text/Context.hs
+15
-1
Search.hs
src/Gargantext/Text/Search.hs
+3
-3
Terms.hs
src/Gargantext/Text/Terms.hs
+5
-3
Mono.hs
src/Gargantext/Text/Terms/Mono.hs
+30
-16
WithList.hs
src/Gargantext/Text/Terms/WithList.hs
+11
-21
No files found.
package.yaml
View file @
4f5491c3
...
...
@@ -29,6 +29,7 @@ library:
-
Gargantext.Core
-
Gargantext.Core.Types
-
Gargantext.Text
-
Gargantext.Text.List.CSV
-
Gargantext.Text.Search
-
Gargantext.Text.Parsers.CSV
-
Gargantext.API
...
...
src/Gargantext/Text/Context.hs
View file @
4f5491c3
...
...
@@ -7,7 +7,7 @@ Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Context of text management tool
Context of text management tool
, here are logic of main types.
-}
...
...
@@ -25,6 +25,20 @@ import Gargantext.Text
import
Gargantext.Prelude
hiding
(
length
)
------------------------------------------------------------------------
type
Term
=
Text
type
Label
=
Term
type
Sentence
a
=
[
a
]
-- or a nominal group
type
Corpus
a
=
[
Sentence
a
]
-- a list of sentences
-- type ConText a = [Sentence a]
-- type Corpus a = [ConText a]
------------------------------------------------------------------------
data
SplitContext
=
Chars
Int
|
Sentences
Int
|
Paragraphs
Int
tag
::
Text
->
[
Tag
Text
]
...
...
src/Gargantext/Text/Search.hs
View file @
4f5491c3
...
...
@@ -28,7 +28,7 @@ import Data.Ix
import
Data.Text
(
Text
)
import
Gargantext.Prelude
import
Gargantext.Text.Terms.Mono
(
mono
term
s
)
import
Gargantext.Text.Terms.Mono
(
mono
Text
s
)
import
Gargantext.Text.Terms.Mono.Stem
as
ST
import
Gargantext.Text.Parsers.CSV
...
...
@@ -58,8 +58,8 @@ docSearchConfig =
}
where
extractTerms
::
Doc
->
DocField
->
[
Text
]
extractTerms
doc
TitleField
=
mono
term
s
(
d_title
doc
)
extractTerms
doc
AbstractField
=
mono
term
s
(
d_abstract
doc
)
extractTerms
doc
TitleField
=
mono
Text
s
(
d_title
doc
)
extractTerms
doc
AbstractField
=
mono
Text
s
(
d_abstract
doc
)
normaliseQueryToken
::
Text
->
DocField
->
Text
normaliseQueryToken
tok
=
...
...
src/Gargantext/Text/Terms.hs
View file @
4f5491c3
...
...
@@ -41,7 +41,7 @@ import Gargantext.Prelude
import
Gargantext.Core
import
Gargantext.Core.Types
import
Gargantext.Text.Terms.Multi
(
multiterms
)
import
Gargantext.Text.Terms.Mono
(
mono
terms'
)
import
Gargantext.Text.Terms.Mono
(
mono
Terms
)
import
Gargantext.Text.Terms.WithList
(
Patterns
,
extractTermsWithList
)
data
TermType
lang
=
Mono
lang
|
Multi
lang
|
MonoMulti
lang
|
WithList
Patterns
...
...
@@ -59,9 +59,11 @@ extractTerms termTypeLang = mapM (terms termTypeLang)
-- MonoMulti : mono and multi
-- TODO : multi terms should exclude mono (intersection is not empty yet)
terms
::
TermType
Lang
->
Text
->
IO
[
Terms
]
terms
(
Mono
lang
)
txt
=
pure
$
mono
terms'
lang
txt
terms
(
Mono
lang
)
txt
=
pure
$
mono
Terms
lang
txt
terms
(
Multi
lang
)
txt
=
multiterms
lang
txt
terms
(
MonoMulti
lang
)
txt
=
terms
(
Multi
lang
)
txt
terms
(
WithList
list
)
txt
=
pure
.
map
(
\
x
->
Terms
x
Set
.
empty
{-TODO-}
)
$
extractTermsWithList
list
txt
terms
(
WithList
list
)
txt
=
pure
.
map
(
\
x
->
Terms
x
Set
.
empty
{-TODO-}
)
$
extractTermsWithList
labelPolicy
list
txt
where
labelPolicy
=
undefined
------------------------------------------------------------------------
src/Gargantext/Text/Terms/Mono.hs
View file @
4f5491c3
...
...
@@ -13,10 +13,17 @@ Mono-terms are Nterms where n == 1.
{-# LANGUAGE NoImplicitPrelude #-}
module
Gargantext.Text.Terms.Mono
(
mono
terms
,
monoterms'
)
module
Gargantext.Text.Terms.Mono
(
mono
Terms
,
monoTexts
,
monoTextsBySentence
)
where
import
Prelude
(
String
)
import
Data.Char
(
isSpace
)
import
Data.Text
(
Text
,
toLower
,
split
,
splitOn
,
pack
)
import
Data.Text
(
Text
)
import
qualified
Data.Text
as
T
import
qualified
Data.List
as
L
import
qualified
Data.Set
as
S
import
Gargantext.Core
...
...
@@ -26,23 +33,30 @@ import Gargantext.Text.Terms.Mono.Stem (stem)
import
Gargantext.Prelude
--import Data.Char (isAlphaNum, isSpace)
monoterms'
::
Lang
->
Text
->
[
Terms
]
monoterms'
l
txt
=
map
(
text2terms
l
)
$
monoterms
txt
-- | TODO remove Num ?
--isGram c = isAlphaNum c
monoterms
::
Text
->
[
Text
]
monoterms
txt
=
map
toLower
$
split
isWord
txt
where
isWord
c
=
c
`
elem
`
[
' '
,
'
\'
'
,
','
,
';'
]
text2terms
::
Lang
->
Text
->
Terms
text2terms
lang
txt
=
Terms
label
stems
where
label
=
splitOn
(
pack
" "
)
txt
stems
=
S
.
fromList
$
map
(
stem
lang
)
label
-- | Sentence split separators
isSep
::
Char
->
Bool
isSep
=
(`
elem
`
(
",.:;?!(){}[]
\"
"
::
String
))
monoTerms
::
Lang
->
Text
->
[
Terms
]
monoTerms
l
txt
=
map
(
monoText2term
l
)
$
monoTexts
txt
monoTexts
::
Text
->
[
Text
]
monoTexts
=
L
.
concat
.
monoTextsBySentence
monoText2term
::
Lang
->
Text
->
Terms
monoText2term
lang
txt
=
Terms
[
txt
]
(
S
.
singleton
$
stem
lang
txt
)
monoTextsBySentence
::
Text
->
[[
Text
]]
monoTextsBySentence
=
map
(
T
.
split
isSpace
)
.
T
.
split
isSep
.
T
.
toLower
--monograms :: Text -> [Text]
--monograms xs = monograms $ toLower $ filter isGram xs
--isGram :: Char -> Bool
--isGram c = isAlphaNum c || isSpace c || c `elem` ['-','/','\'']
src/Gargantext/Text/Terms/WithList.hs
View file @
4f5491c3
...
...
@@ -16,32 +16,28 @@ commentary with @some markup@.
module
Gargantext.Text.Terms.WithList
where
import
Prelude
(
String
)
import
qualified
Data.Algorithms.KMP
as
KMP
import
Data.Char
(
isSpace
)
import
qualified
Data.Text
as
T
import
Data.Text
(
Text
)
import
qualified
Data.IntMap.Strict
as
IntMap
import
Gargantext.Text.Context
import
Gargantext.Text.Terms.Mono
(
monoTextsBySentence
)
import
Gargantext.Prelude
import
Data.List
(
concatMap
)
type
Term
=
Text
type
Label
=
Term
type
Pattern
=
KMP
.
Table
Term
type
TermList
=
[(
Label
,
[[
Term
]])]
type
Patterns
=
[(
Pattern
,
Int
,
Label
)]
isMultiTermSep
::
Char
->
Bool
isMultiTermSep
=
(`
elem
`
",.:;?!(){}[]"
)
type
Sentence
a
=
[
a
]
-- or a nominal group
type
Corpus
a
=
[
Sentence
a
]
-- a list of sentences
replaceTerms
::
Patterns
->
Sentence
Term
->
Sentence
Label
replaceTerms
pats
terms
=
go
0
terms
replaceTerms
::
(
Term
->
Label
)
->
Patterns
->
Sentence
Term
->
Sentence
Label
replaceTerms
labelPolicy
pats
terms
=
go
0
terms
where
go
_
[]
=
[]
go
!
ix
(
t
:
ts
)
=
...
...
@@ -50,9 +46,9 @@ replaceTerms pats terms = go 0 terms
Just
(
len
,
label
)
->
label
:
go
(
ix
+
len
)
(
drop
(
len
-
1
)
ts
)
--
TODO is it what we want?
--
| merge with labelPolicy (can be a Map Term label)
merge
(
len1
,
lab1
)
(
len2
,
lab2
)
=
if
len1
>
len2
then
(
len1
,
lab1
)
else
(
len2
,
lab2
)
if
(
labelPolicy
lab1
)
==
lab2
then
(
len2
,
lab2
)
else
(
len1
,
lab1
)
m
=
IntMap
.
fromListWith
merge
...
...
@@ -66,11 +62,5 @@ buildPatterns = concatMap buildPattern
where
f
alt
=
(
KMP
.
build
alt
,
length
alt
,
label
)
-- monoterms'' :: Lang -> Text -> [Terms]
-- monoterms'' l txt = map (text2terms l) $ monoterms txt
extractTermsWithList
::
Patterns
->
Text
->
Corpus
Label
extractTermsWithList
pats
=
map
(
replaceTerms
pats
)
.
map
(
T
.
split
isSpace
)
.
-- text2terms
T
.
split
isMultiTermSep
.
T
.
toLower
-- as in monoterms with a different list of seps
extractTermsWithList
::
(
Term
->
Label
)
->
Patterns
->
Text
->
Corpus
Label
extractTermsWithList
labelPolicy
pats
=
map
(
replaceTerms
labelPolicy
pats
)
.
monoTextsBySentence
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment