Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
H
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Przemyslaw Kaminski
haskell-gargantext
Commits
f420c18a
Unverified
Commit
f420c18a
authored
Oct 03, 2018
by
Nicolas Pouillard
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Build only Langs which are fully supported
parent
e9035df2
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
26 additions
and
18 deletions
+26
-18
Core.hs
src/Gargantext/Core.hs
+5
-2
Date.hs
src/Gargantext/Text/Parsers/Date.hs
+1
-1
Stem.hs
src/Gargantext/Text/Terms/Mono/Stem.hs
+1
-1
Multi.hs
src/Gargantext/Text/Terms/Multi.hs
+1
-1
PosTagging.hs
src/Gargantext/Text/Terms/Multi/PosTagging.hs
+1
-1
Stop.hs
src/Gargantext/Text/Terms/Stop.hs
+17
-12
No files found.
src/Gargantext/Core.hs
View file @
f420c18a
...
@@ -25,5 +25,8 @@ module Gargantext.Core
...
@@ -25,5 +25,8 @@ module Gargantext.Core
-- - SP == spanish (not implemented yet)
-- - SP == spanish (not implemented yet)
--
--
-- ... add your language and help us to implement it (:
-- ... add your language and help us to implement it (:
data
Lang
=
EN
|
FR
|
DE
|
SP
|
CH
data
Lang
=
EN
|
FR
-- | DE | SP | CH
deriving
(
Show
,
Eq
,
Ord
)
deriving
(
Show
,
Eq
,
Ord
,
Bounded
,
Enum
)
allLangs
::
[
Lang
]
allLangs
=
[
minBound
..
]
src/Gargantext/Text/Parsers/Date.hs
View file @
f420c18a
...
@@ -68,7 +68,7 @@ import Text.XML.HXT.DOM.Util (decimalStringToInt)
...
@@ -68,7 +68,7 @@ import Text.XML.HXT.DOM.Util (decimalStringToInt)
parserLang
::
Lang
->
DC
.
Lang
parserLang
::
Lang
->
DC
.
Lang
parserLang
FR
=
DC
.
FR
parserLang
FR
=
DC
.
FR
parserLang
EN
=
DC
.
EN
parserLang
EN
=
DC
.
EN
parserLang
_
=
panic
"not implemented"
--
parserLang _ = panic "not implemented"
-- | Final Date parser API
-- | Final Date parser API
-- IO can be avoided here:
-- IO can be avoided here:
...
...
src/Gargantext/Text/Terms/Mono/Stem.hs
View file @
f420c18a
...
@@ -55,7 +55,7 @@ stem lang = DT.pack . N.stem lang' . DT.unpack
...
@@ -55,7 +55,7 @@ stem lang = DT.pack . N.stem lang' . DT.unpack
lang'
=
case
lang
of
lang'
=
case
lang
of
EN
->
N
.
English
EN
->
N
.
English
FR
->
N
.
French
FR
->
N
.
French
_
->
panic
$
DT
.
pack
"not implemented yet"
--
_
->
panic
$
DT
.
pack
"not implemented yet"
src/Gargantext/Text/Terms/Multi.hs
View file @
f420c18a
...
@@ -57,4 +57,4 @@ tokenTags' lang t = map tokens2tokensTags
...
@@ -57,4 +57,4 @@ tokenTags' lang t = map tokens2tokensTags
group
::
Lang
->
[
TokenTag
]
->
[
TokenTag
]
group
::
Lang
->
[
TokenTag
]
->
[
TokenTag
]
group
EN
=
En
.
group
group
EN
=
En
.
group
group
FR
=
Fr
.
group
group
FR
=
Fr
.
group
group
_
=
panic
$
pack
"group :: Lang not implemeted yet"
--
group _ = panic $ pack "group :: Lang not implemeted yet"
src/Gargantext/Text/Terms/Multi/PosTagging.hs
View file @
f420c18a
...
@@ -124,7 +124,7 @@ corenlp' lang txt = do
...
@@ -124,7 +124,7 @@ corenlp' lang txt = do
EN
->
"{
\"
annotators
\"
:
\"
tokenize,ssplit,pos,ner
\"
,
\"
outputFormat
\"
:
\"
json
\"
}"
EN
->
"{
\"
annotators
\"
:
\"
tokenize,ssplit,pos,ner
\"
,
\"
outputFormat
\"
:
\"
json
\"
}"
-- FR -> "{\"annotators\": \"tokenize,ssplit,pos,ner\", \"outputFormat\": \"json\"}"
-- FR -> "{\"annotators\": \"tokenize,ssplit,pos,ner\", \"outputFormat\": \"json\"}"
FR
->
"{
\"
annotators
\"
:
\"
tokenize,ssplit,pos,ner
\"
,
\"
parse.model
\"
:
\"
edu/stanford/nlp/models/lexparser/frenchFactored.ser.gz
\"
,
\"
pos.model
\"
:
\"
edu/stanford/nlp/models/pos-tagger/french/french.tagger
\"
,
\"
tokenize.language
\"
:
\"
fr
\"
,
\"
outputFormat
\"
:
\"
json
\"
}"
FR
->
"{
\"
annotators
\"
:
\"
tokenize,ssplit,pos,ner
\"
,
\"
parse.model
\"
:
\"
edu/stanford/nlp/models/lexparser/frenchFactored.ser.gz
\"
,
\"
pos.model
\"
:
\"
edu/stanford/nlp/models/pos-tagger/french/french.tagger
\"
,
\"
tokenize.language
\"
:
\"
fr
\"
,
\"
outputFormat
\"
:
\"
json
\"
}"
_
->
panic
$
pack
"not implemented yet"
--
_ -> panic $ pack "not implemented yet"
url
<-
parseRequest
$
"POST http://localhost:9000/?properties="
<>
properties
url
<-
parseRequest
$
"POST http://localhost:9000/?properties="
<>
properties
let
request
=
setRequestBodyLBS
(
cs
txt
)
url
let
request
=
setRequestBodyLBS
(
cs
txt
)
url
httpJSON
request
httpJSON
request
...
...
src/Gargantext/Text/Terms/Stop.hs
View file @
f420c18a
...
@@ -33,15 +33,15 @@ import Data.String (String)
...
@@ -33,15 +33,15 @@ import Data.String (String)
import
Data.Text
(
pack
,
unpack
)
import
Data.Text
(
pack
,
unpack
)
import
Gargantext.Prelude
import
Gargantext.Prelude
import
Gargantext.Core
(
Lang
(
..
))
import
Gargantext.Core
(
Lang
(
..
)
,
allLangs
)
import
Gargantext.Text.Terms.Mono
(
words
)
import
Gargantext.Text.Terms.Mono
(
words
)
import
Gargantext.Text.Metrics.Count
(
occurrencesWith
)
import
Gargantext.Text.Metrics.Count
(
occurrencesWith
)
import
Gargantext.Text.Samples.FR
as
FR
import
qualified
Gargantext.Text.Samples.FR
as
FR
import
Gargantext.Text.Samples.EN
as
EN
import
qualified
Gargantext.Text.Samples.EN
as
EN
import
Gargantext.Text.Samples.DE
as
DE
--import qualified
Gargantext.Text.Samples.DE as DE
import
Gargantext.Text.Samples.SP
as
SP
--import qualified
Gargantext.Text.Samples.SP as SP
import
Gargantext.Text.Samples.CH
as
CH
--import qualified
Gargantext.Text.Samples.CH as CH
------------------------------------------------------------------------
------------------------------------------------------------------------
data
Candidate
=
Candidate
{
stop
::
Double
data
Candidate
=
Candidate
{
stop
::
Double
...
@@ -88,13 +88,18 @@ detectLangs s = DL.reverse $ DL.sortOn snd
...
@@ -88,13 +88,18 @@ detectLangs s = DL.reverse $ DL.sortOn snd
$
toList
$
toList
$
detect
(
wordsToBook
[
0
..
2
]
s
)
testEL
$
detect
(
wordsToBook
[
0
..
2
]
s
)
testEL
textMining
::
Lang
->
String
textMining
EN
=
EN
.
textMining
textMining
FR
=
FR
.
textMining
--textMining DE = DE.textMining
--textMining SP = SP.textMining
--textMining CH = CH.textMining
langWord
::
Lang
->
LangWord
langWord
l
=
LangWord
l
(
textMining
l
)
testEL
::
EventLang
testEL
::
EventLang
testEL
=
toEventLangs
[
0
..
2
]
[
LangWord
EN
EN
.
textMining
testEL
=
toEventLangs
[
0
..
2
]
[
langWord
l
|
l
<-
allLangs
]
,
LangWord
FR
FR
.
textMining
,
LangWord
DE
DE
.
textMining
,
LangWord
SP
SP
.
textMining
,
LangWord
CH
CH
.
textMining
]
detect
::
EventBook
->
EventLang
->
LangProba
detect
::
EventBook
->
EventLang
->
LangProba
detect
(
EventBook
mapFreq
_
)
el
=
DM
.
unionsWith
(
+
)
$
map
(
\
(
s
,
n
)
->
DM
.
map
(
\
eb
->
(
fromIntegral
n
)
*
peb
s
eb
)
el
)
$
filter
(
\
x
->
fst
x
/=
" "
)
$
DM
.
toList
mapFreq
detect
(
EventBook
mapFreq
_
)
el
=
DM
.
unionsWith
(
+
)
$
map
(
\
(
s
,
n
)
->
DM
.
map
(
\
eb
->
(
fromIntegral
n
)
*
peb
s
eb
)
el
)
$
filter
(
\
x
->
fst
x
/=
" "
)
$
DM
.
toList
mapFreq
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment