Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
H
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Przemyslaw Kaminski
haskell-gargantext
Commits
a3490841
Commit
a3490841
authored
Sep 29, 2018
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'bayes'
parents
88585c12
3cbf7e51
Changes
11
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
283 additions
and
79 deletions
+283
-79
package.yaml
package.yaml
+3
-0
Core.hs
src/Gargantext/Core.hs
+2
-1
Prelude.hs
src/Gargantext/Prelude.hs
+1
-1
Count.hs
src/Gargantext/Text/Metrics/Count.hs
+3
-0
FrequentItemSet.hs
src/Gargantext/Text/Metrics/FrequentItemSet.hs
+3
-3
Date.hs
src/Gargantext/Text/Parsers/Date.hs
+1
-1
Stem.hs
src/Gargantext/Text/Terms/Mono/Stem.hs
+1
-0
Multi.hs
src/Gargantext/Text/Terms/Multi.hs
+1
-1
PosTagging.hs
src/Gargantext/Text/Terms/Multi/PosTagging.hs
+1
-0
RAKE.hs
src/Gargantext/Text/Terms/Multi/RAKE.hs
+11
-72
Stop.hs
src/Gargantext/Text/Terms/Stop.hs
+256
-0
No files found.
package.yaml
View file @
a3490841
...
...
@@ -53,6 +53,7 @@ library:
-
Gargantext.Text.Parsers.WOS
-
Gargantext.Text.Search
-
Gargantext.Text.Terms
-
Gargantext.Text.Terms.Stop
-
Gargantext.Text.Terms.Mono
-
Gargantext.Text.Terms.Multi.Lang.En
-
Gargantext.Text.Terms.Multi.Lang.Fr
...
...
@@ -112,6 +113,7 @@ library:
-
path-io
-
postgresql-simple
-
pretty
-
probability
-
product-profunctors
-
profunctors
-
protolude
...
...
@@ -165,6 +167,7 @@ executables:
-
-with-rtsopts=-N
-
-O2
-
-Wmissing-signatures
-
-Wcompat
dependencies
:
-
base
-
containers
...
...
src/Gargantext/Core.hs
View file @
a3490841
...
...
@@ -25,4 +25,5 @@ module Gargantext.Core
-- - SP == spanish (not implemented yet)
--
-- ... add your language and help us to implement it (:
data
Lang
=
EN
|
FR
data
Lang
=
EN
|
FR
|
DE
|
SP
|
CH
deriving
(
Show
,
Eq
,
Ord
)
src/Gargantext/Prelude.hs
View file @
a3490841
...
...
@@ -46,7 +46,7 @@ import Protolude ( Bool(True, False), Int, Int64, Double, Integer
,
abs
,
min
,
max
,
maximum
,
minimum
,
return
,
snd
,
truncate
,
(
+
),
(
*
),
(
/
),
(
-
),
(
.
),
(
$
),
(
&
),
(
**
),
(
^
),
(
<
),
(
>
),
log
,
Eq
,
(
==
),
(
>=
),
(
<=
),
(
<>
),
(
/=
)
,
(
&&
),
(
||
),
not
,
any
,
(
&&
),
(
||
),
not
,
any
,
all
,
fst
,
snd
,
toS
,
elem
,
die
,
mod
,
div
,
const
,
either
,
curry
,
uncurry
,
repeat
...
...
src/Gargantext/Text/Metrics/Count.hs
View file @
a3490841
...
...
@@ -144,6 +144,9 @@ occurrences = occurrencesOn _terms_stem
occurrencesOn
::
(
Ord
a
,
Ord
b
)
=>
(
a
->
b
)
->
[
a
]
->
Map
b
(
Map
a
Int
)
occurrencesOn
f
=
foldl'
(
\
m
a
->
insertWith
(
unionWith
(
+
))
(
f
a
)
(
singleton
a
1
)
m
)
empty
occurrencesWith
::
(
Foldable
list
,
Ord
k
,
Num
a
)
=>
(
b
->
k
)
->
list
b
->
Map
k
a
occurrencesWith
f
xs
=
foldl'
(
\
x
y
->
insertWith
(
+
)
(
f
y
)
1
x
)
empty
xs
-- TODO add groups and filter stops
sumOcc
::
Ord
a
=>
[
Occ
a
]
->
Occ
a
...
...
src/Gargantext/Text/Metrics/FrequentItemSet.hs
View file @
a3490841
...
...
@@ -16,7 +16,7 @@ Domain Specific Language to manage Frequent Item Set (FIS)
module
Gargantext.Text.Metrics.FrequentItemSet
(
Fis
,
Size
(
..
)
,
occ_hlcm
,
cooc_hlcm
,
all
,
between
,
all
Fis
,
between
,
fisWithSize
,
fisWith
,
fisWithSizePoly
...
...
@@ -51,8 +51,8 @@ occ_hlcm = fisWithSize (Point 1)
cooc_hlcm
::
Frequency
->
[[
Item
]]
->
[
Fis
]
cooc_hlcm
=
fisWithSize
(
Point
2
)
all
::
Frequency
->
[[
Item
]]
->
[
Fis
]
all
=
fisWith
Nothing
all
Fis
::
Frequency
->
[[
Item
]]
->
[
Fis
]
all
Fis
=
fisWith
Nothing
------------------------------------------------------------------------
between
::
(
Int
,
Int
)
->
Frequency
->
[[
Item
]]
->
[
Fis
]
...
...
src/Gargantext/Text/Parsers/Date.hs
View file @
a3490841
...
...
@@ -68,7 +68,7 @@ import Text.XML.HXT.DOM.Util (decimalStringToInt)
parserLang
::
Lang
->
DC
.
Lang
parserLang
FR
=
DC
.
FR
parserLang
EN
=
DC
.
EN
parserLang
_
=
panic
"not implemented"
-- | Final Date parser API
-- IO can be avoided here:
...
...
src/Gargantext/Text/Terms/Mono/Stem.hs
View file @
a3490841
...
...
@@ -55,6 +55,7 @@ stem lang = DT.pack . N.stem lang' . DT.unpack
lang'
=
case
lang
of
EN
->
N
.
English
FR
->
N
.
French
_
->
panic
$
DT
.
pack
"not implemented yet"
src/Gargantext/Text/Terms/Multi.hs
View file @
a3490841
...
...
@@ -57,4 +57,4 @@ tokenTags' lang t = map tokens2tokensTags
group
::
Lang
->
[
TokenTag
]
->
[
TokenTag
]
group
EN
=
En
.
group
group
FR
=
Fr
.
group
group
_
=
panic
$
pack
"group :: Lang not implemeted yet"
src/Gargantext/Text/Terms/Multi/PosTagging.hs
View file @
a3490841
...
...
@@ -124,6 +124,7 @@ corenlp' lang txt = do
EN
->
"{
\"
annotators
\"
:
\"
tokenize,ssplit,pos,ner
\"
,
\"
outputFormat
\"
:
\"
json
\"
}"
-- FR -> "{\"annotators\": \"tokenize,ssplit,pos,ner\", \"outputFormat\": \"json\"}"
FR
->
"{
\"
annotators
\"
:
\"
tokenize,ssplit,pos,ner
\"
,
\"
parse.model
\"
:
\"
edu/stanford/nlp/models/lexparser/frenchFactored.ser.gz
\"
,
\"
pos.model
\"
:
\"
edu/stanford/nlp/models/pos-tagger/french/french.tagger
\"
,
\"
tokenize.language
\"
:
\"
fr
\"
,
\"
outputFormat
\"
:
\"
json
\"
}"
_
->
panic
$
pack
"not implemented yet"
url
<-
parseRequest
$
"POST http://localhost:9000/?properties="
<>
properties
let
request
=
setRequestBodyLBS
(
cs
txt
)
url
httpJSON
request
...
...
src/Gargantext/Text/Terms/Multi/RAKE.hs
View file @
a3490841
...
...
@@ -29,13 +29,22 @@ list quality in time.
{-# LANGUAGE NoImplicitPrelude #-}
module
Gargantext.Text.Terms.Multi.RAKE
(
multiterms_rake
)
module
Gargantext.Text.Terms.Multi.RAKE
(
multiterms_rake
,
select
,
hardStopList
)
where
import
GHC.Real
(
round
)
import
Data.Text
(
Text
)
import
NLP.RAKE.Text
import
Gargantext.Text.Terms.Stop
(
stopList
)
import
Gargantext.Prelude
select
::
Double
->
[
a
]
->
[
a
]
select
part
ns
=
take
n
ns
where
n
=
round
$
part
*
(
fromIntegral
$
length
ns
)
multiterms_rake
::
Text
->
[
WordScore
]
multiterms_rake
=
candidates
hardStopList
defaultNosplit
...
...
@@ -43,74 +52,4 @@ multiterms_rake = candidates hardStopList
-- | StopList
hardStopList
::
StopwordsMap
hardStopList
=
mkStopwordsStr
[
"a"
,
"a's"
,
"able"
,
"about"
,
"above"
,
"apply"
,
"according"
,
"accordingly"
,
"across"
,
"actually"
,
"after"
,
"afterwards"
,
"again"
,
"against"
,
"ain't"
,
"all"
,
"allow"
,
"allows"
,
"almost"
,
"alone"
,
"along"
,
"already"
,
"also"
,
"although"
,
"always"
,
"am"
,
"among"
,
"amongst"
,
"an"
,
"and"
,
"another"
,
"any"
,
"anybody"
,
"anyhow"
,
"anyone"
,
"anything"
,
"anyway"
,
"anyways"
,
"anywhere"
,
"analyze"
,
"apart"
,
"appear"
,
"appreciate"
,
"appropriate"
,
"are"
,
"aren't"
,
"around"
,
"as"
,
"aside"
,
"ask"
,
"asking"
,
"associated"
,
"at"
,
"available"
,
"away"
,
"awfully"
,
"based"
,
"b"
,
"be"
,
"became"
,
"because"
,
"become"
,
"becomes"
,
"becoming"
,
"been"
,
"before"
,
"beforehand"
,
"behind"
,
"being"
,
"believe"
,
"below"
,
"beside"
,
"besides"
,
"best"
,
"better"
,
"between"
,
"beyond"
,
"both"
,
"brief"
,
"but"
,
"by"
,
"c"
,
"c'mon"
,
"c's"
,
"came"
,
"can"
,
"can't"
,
"cannot"
,
"cant"
,
"cause"
,
"causes"
,
"certain"
,
"certainly"
,
"changes"
,
"clearly"
,
"co"
,
"com"
,
"come"
,
"comes"
,
"common"
,
"concerning"
,
"consequently"
,
"consider"
,
"considering"
,
"contain"
,
"containing"
,
"contains"
,
"corresponding"
,
"could"
,
"couldn't"
,
"course"
,
"currently"
,
"d"
,
"definitely"
,
"described"
,
"detects"
,
"detecting"
,
"despite"
,
"did"
,
"didn't"
,
"different"
,
"do"
,
"does"
,
"doesn't"
,
"doing"
,
"don't"
,
"done"
,
"down"
,
"downwards"
,
"during"
,
"e"
,
"each"
,
"edu"
,
"eg"
,
"eight"
,
"either"
,
"else"
,
"elsewhere"
,
"enough"
,
"entirely"
,
"especially"
,
"et"
,
"etc"
,
"even"
,
"ever"
,
"every"
,
"everybody"
,
"everyone"
,
"everything"
,
"everywhere"
,
"ex"
,
"exactly"
,
"example"
,
"except"
,
"f"
,
"far"
,
"few"
,
"find"
,
"fifth"
,
"first"
,
"five"
,
"followed"
,
"following"
,
"follows"
,
"for"
,
"former"
,
"formerly"
,
"forth"
,
"four"
,
"from"
,
"further"
,
"furthermore"
,
"g"
,
"get"
,
"gets"
,
"getting"
,
"given"
,
"gives"
,
"go"
,
"goes"
,
"going"
,
"gone"
,
"got"
,
"gotten"
,
"greetings"
,
"h"
,
"had"
,
"hadn't"
,
"happens"
,
"hardly"
,
"has"
,
"hasn't"
,
"have"
,
"haven't"
,
"having"
,
"he"
,
"he's"
,
"hello"
,
"help"
,
"hence"
,
"her"
,
"here"
,
"here's"
,
"hereafter"
,
"hereby"
,
"herein"
,
"hereupon"
,
"hers"
,
"herself"
,
"hi"
,
"him"
,
"himself"
,
"his"
,
"hither"
,
"hopefully"
,
"how"
,
"howbeit"
,
"however"
,
"i"
,
"i'd"
,
"identify"
,
"i'll"
,
"i'm"
,
"i've"
,
"ie"
,
"if"
,
"ignored"
,
"immediate"
,
"in"
,
"inasmuch"
,
"inc"
,
"indeed"
,
"indicate"
,
"indicated"
,
"indicates"
,
"inner"
,
"insofar"
,
"instead"
,
"into"
,
"inward"
,
"is"
,
"isn't"
,
"it"
,
"it'd"
,
"it'll"
,
"it's"
,
"its"
,
"itself"
,
"j"
,
"just"
,
"k"
,
"keep"
,
"keeps"
,
"kept"
,
"know"
,
"known"
,
"knows"
,
"l"
,
"last"
,
"lately"
,
"later"
,
"latter"
,
"latterly"
,
"least"
,
"less"
,
"lest"
,
"let"
,
"let's"
,
"like"
,
"liked"
,
"likely"
,
"little"
,
"look"
,
"looking"
,
"looks"
,
"ltd"
,
"m"
,
"mainly"
,
"many"
,
"may"
,
"maybe"
,
"me"
,
"mean"
,
"meanwhile"
,
"merely"
,
"might"
,
"more"
,
"moreover"
,
"most"
,
"mostly"
,
"much"
,
"must"
,
"my"
,
"myself"
,
"n"
,
"name"
,
"namely"
,
"nd"
,
"near"
,
"nearly"
,
"necessary"
,
"need"
,
"needs"
,
"neither"
,
"never"
,
"nevertheless"
,
"new"
,
"next"
,
"nine"
,
"no"
,
"nobody"
,
"non"
,
"none"
,
"noone"
,
"nor"
,
"normally"
,
"not"
,
"nothing"
,
"novel"
,
"now"
,
"nowhere"
,
"o"
,
"obviously"
,
"of"
,
"off"
,
"often"
,
"oh"
,
"ok"
,
"okay"
,
"old"
,
"on"
,
"once"
,
"one"
,
"ones"
,
"only"
,
"onto"
,
"or"
,
"other"
,
"others"
,
"otherwise"
,
"ought"
,
"our"
,
"ours"
,
"ourselves"
,
"out"
,
"outside"
,
"over"
,
"overall"
,
"own"
,
"p"
,
"particular"
,
"particularly"
,
"per"
,
"perhaps"
,
"placed"
,
"please"
,
"plus"
,
"possible"
,
"presents"
,
"presumably"
,
"probably"
,
"provides"
,
"q"
,
"que"
,
"quite"
,
"qv"
,
"r"
,
"rather"
,
"rd"
,
"re"
,
"really"
,
"reasonably"
,
"regarding"
,
"regardless"
,
"regards"
,
"relatively"
,
"respectively"
,
"right"
,
"s"
,
"said"
,
"same"
,
"saw"
,
"say"
,
"saying"
,
"says"
,
"second"
,
"secondly"
,
"see"
,
"seeing"
,
"seem"
,
"seemed"
,
"seeming"
,
"seems"
,
"seen"
,
"self"
,
"selves"
,
"sensible"
,
"sent"
,
"serious"
,
"seriously"
,
"seven"
,
"several"
,
"shall"
,
"she"
,
"should"
,
"shouldn't"
,
"since"
,
"six"
,
"so"
,
"some"
,
"somebody"
,
"somehow"
,
"someone"
,
"something"
,
"sometime"
,
"sometimes"
,
"somewhat"
,
"somewhere"
,
"soon"
,
"sorry"
,
"specified"
,
"specify"
,
"specifying"
,
"still"
,
"sub"
,
"such"
,
"sup"
,
"sure"
,
"t"
,
"t's"
,
"take"
,
"taken"
,
"tell"
,
"tends"
,
"th"
,
"than"
,
"thank"
,
"thanks"
,
"thanx"
,
"that"
,
"that's"
,
"thats"
,
"the"
,
"their"
,
"theirs"
,
"them"
,
"themselves"
,
"then"
,
"thence"
,
"there"
,
"there's"
,
"thereafter"
,
"thereby"
,
"therefore"
,
"therein"
,
"theres"
,
"thereupon"
,
"these"
,
"they"
,
"they'd"
,
"they'll"
,
"they're"
,
"they've"
,
"think"
,
"third"
,
"this"
,
"thorough"
,
"thoroughly"
,
"those"
,
"though"
,
"three"
,
"through"
,
"throughout"
,
"thru"
,
"thus"
,
"to"
,
"together"
,
"too"
,
"took"
,
"toward"
,
"towards"
,
"tried"
,
"tries"
,
"truly"
,
"try"
,
"trying"
,
"twice"
,
"two"
,
"u"
,
"un"
,
"under"
,
"unfortunately"
,
"unless"
,
"unlikely"
,
"until"
,
"unto"
,
"up"
,
"upon"
,
"us"
,
"use"
,
"used"
,
"useful"
,
"uses"
,
"using"
,
"usually"
,
"uucp"
,
"v"
,
"value"
,
"various"
,
"very"
,
"via"
,
"viz"
,
"vs"
,
"w"
,
"want"
,
"wants"
,
"was"
,
"wasn't"
,
"way"
,
"we"
,
"we'd"
,
"we'll"
,
"we're"
,
"we've"
,
"welcome"
,
"well"
,
"went"
,
"were"
,
"weren't"
,
"what"
,
"what's"
,
"whatever"
,
"when"
,
"whence"
,
"whenever"
,
"where"
,
"where's"
,
"whereafter"
,
"whereas"
,
"whereby"
,
"wherein"
,
"whereupon"
,
"wherever"
,
"whether"
,
"which"
,
"while"
,
"whither"
,
"who"
,
"who's"
,
"whoever"
,
"whole"
,
"whom"
,
"whose"
,
"why"
,
"will"
,
"willing"
,
"wish"
,
"with"
,
"within"
,
"without"
,
"won't"
,
"wonder"
,
"would"
,
"wouldn't"
,
"x"
,
"y"
,
"yes"
,
"yet"
,
"you"
,
"you'd"
,
"you'll"
,
"you're"
,
"you've"
,
"your"
,
"yours"
,
"yourself"
,
"yourselves"
,
"z"
,
"zero"
]
hardStopList
=
mkStopwordsStr
stopList
src/Gargantext/Text/Terms/Stop.hs
0 → 100644
View file @
a3490841
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment