Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
153
Issues
153
List
Board
Labels
Milestones
Merge Requests
12
Merge Requests
12
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
3cbf7e51
Commit
3cbf7e51
authored
Sep 29, 2018
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[BAYES] detect lang (todo proba density).
parent
704fe86f
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
25 additions
and
14 deletions
+25
-14
package.yaml
package.yaml
+1
-0
Core.hs
src/Gargantext/Core.hs
+1
-2
Date.hs
src/Gargantext/Text/Parsers/Date.hs
+1
-1
Stem.hs
src/Gargantext/Text/Terms/Mono/Stem.hs
+1
-0
Multi.hs
src/Gargantext/Text/Terms/Multi.hs
+1
-1
PosTagging.hs
src/Gargantext/Text/Terms/Multi/PosTagging.hs
+1
-0
RAKE.hs
src/Gargantext/Text/Terms/Multi/RAKE.hs
+1
-1
Stop.hs
src/Gargantext/Text/Terms/Stop.hs
+18
-9
No files found.
package.yaml
View file @
3cbf7e51
...
...
@@ -167,6 +167,7 @@ executables:
-
-with-rtsopts=-N
-
-O2
-
-Wmissing-signatures
-
-Wcompat
dependencies
:
-
base
-
containers
...
...
src/Gargantext/Core.hs
View file @
3cbf7e51
...
...
@@ -12,7 +12,6 @@ Portability : POSIX
module
Gargantext.Core
where
import
Gargantext.Prelude
------------------------------------------------------------------------
-- | Language of a Text
-- For simplicity, we suppose text has an homogenous language
...
...
@@ -26,5 +25,5 @@ import Gargantext.Prelude
-- - SP == spanish (not implemented yet)
--
-- ... add your language and help us to implement it (:
data
Lang
=
EN
|
FR
data
Lang
=
EN
|
FR
|
DE
|
SP
|
CH
deriving
(
Show
,
Eq
,
Ord
)
src/Gargantext/Text/Parsers/Date.hs
View file @
3cbf7e51
...
...
@@ -68,7 +68,7 @@ import Text.XML.HXT.DOM.Util (decimalStringToInt)
parserLang
::
Lang
->
DC
.
Lang
parserLang
FR
=
DC
.
FR
parserLang
EN
=
DC
.
EN
parserLang
_
=
panic
"not implemented"
-- | Final Date parser API
-- IO can be avoided here:
...
...
src/Gargantext/Text/Terms/Mono/Stem.hs
View file @
3cbf7e51
...
...
@@ -55,6 +55,7 @@ stem lang = DT.pack . N.stem lang' . DT.unpack
lang'
=
case
lang
of
EN
->
N
.
English
FR
->
N
.
French
_
->
panic
$
DT
.
pack
"not implemented yet"
src/Gargantext/Text/Terms/Multi.hs
View file @
3cbf7e51
...
...
@@ -57,4 +57,4 @@ tokenTags' lang t = map tokens2tokensTags
group
::
Lang
->
[
TokenTag
]
->
[
TokenTag
]
group
EN
=
En
.
group
group
FR
=
Fr
.
group
group
_
=
panic
$
pack
"group :: Lang not implemeted yet"
src/Gargantext/Text/Terms/Multi/PosTagging.hs
View file @
3cbf7e51
...
...
@@ -124,6 +124,7 @@ corenlp' lang txt = do
EN
->
"{
\"
annotators
\"
:
\"
tokenize,ssplit,pos,ner
\"
,
\"
outputFormat
\"
:
\"
json
\"
}"
-- FR -> "{\"annotators\": \"tokenize,ssplit,pos,ner\", \"outputFormat\": \"json\"}"
FR
->
"{
\"
annotators
\"
:
\"
tokenize,ssplit,pos,ner
\"
,
\"
parse.model
\"
:
\"
edu/stanford/nlp/models/lexparser/frenchFactored.ser.gz
\"
,
\"
pos.model
\"
:
\"
edu/stanford/nlp/models/pos-tagger/french/french.tagger
\"
,
\"
tokenize.language
\"
:
\"
fr
\"
,
\"
outputFormat
\"
:
\"
json
\"
}"
_
->
panic
$
pack
"not implemented yet"
url
<-
parseRequest
$
"POST http://localhost:9000/?properties="
<>
properties
let
request
=
setRequestBodyLBS
(
cs
txt
)
url
httpJSON
request
...
...
src/Gargantext/Text/Terms/Multi/RAKE.hs
View file @
3cbf7e51
...
...
@@ -33,7 +33,7 @@ module Gargantext.Text.Terms.Multi.RAKE (multiterms_rake, select, hardStopList)
where
import
GHC.Real
(
round
)
import
Data.Text
(
Text
,
pack
)
import
Data.Text
(
Text
)
import
NLP.RAKE.Text
import
Gargantext.Text.Terms.Stop
(
stopList
)
...
...
src/Gargantext/Text/Terms/Stop.hs
View file @
3cbf7e51
...
...
@@ -37,6 +37,12 @@ import Gargantext.Core (Lang(..))
import
Gargantext.Text.Terms.Mono
(
words
)
import
Gargantext.Text.Metrics.Count
(
occurrencesWith
)
import
Gargantext.Text.Samples.FR
as
FR
import
Gargantext.Text.Samples.EN
as
EN
import
Gargantext.Text.Samples.DE
as
DE
import
Gargantext.Text.Samples.SP
as
SP
import
Gargantext.Text.Samples.CH
as
CH
------------------------------------------------------------------------
data
Candidate
=
Candidate
{
stop
::
Double
,
noStop
::
Double
...
...
@@ -78,18 +84,20 @@ type LangProba = Map Lang Double
------------------------------------------------------------------------
estimeTest
::
String
->
LangProba
estimeTest
s
=
estime
(
wordsToBook
[
0
..
2
]
s
)
testEL
detectLangs
::
String
->
LangProba
detectLangs
s
=
detect
(
wordsToBook
[
0
..
2
]
s
)
testEL
testEL
::
EventLang
testEL
=
toEventLangs
[
0
,
1
,
2
]
[
LangWord
EN
"Lovely day. This day."
,
LangWord
FR
"Belle journée, j'y vais."
,
LangWord
EN
"Hello Sir, how are you doing? I am fine thank you, good bye"
,
LangWord
FR
"Bonjour Monsieur, comment allez-vous? Je vais bien merci."
testEL
=
toEventLangs
[
0
..
2
]
[
LangWord
EN
EN
.
textMining
,
LangWord
FR
FR
.
textMining
,
LangWord
DE
DE
.
textMining
,
LangWord
SP
SP
.
textMining
,
LangWord
CH
CH
.
textMining
]
estime
::
EventBook
->
EventLang
->
LangProba
estime
(
EventBook
mapFreq
_
)
el
=
DM
.
unionsWith
(
+
)
$
map
(
\
(
s
,
n
)
->
DM
.
map
(
\
eb
->
(
fromIntegral
n
)
*
peb
s
eb
)
el
)
$
filter
(
\
x
->
fst
x
/=
" "
)
$
DM
.
toList
mapFreq
detect
::
EventBook
->
EventLang
->
LangProba
detect
(
EventBook
mapFreq
_
)
el
=
DM
.
unionsWith
(
+
)
$
map
(
\
(
s
,
n
)
->
DM
.
map
(
\
eb
->
(
fromIntegral
n
)
*
peb
s
eb
)
el
)
$
filter
(
\
x
->
fst
x
/=
" "
)
$
DM
.
toList
mapFreq
------------------------------------------------------------------------
-- | TODO: monoids
...
...
@@ -133,7 +141,7 @@ wordToBook :: [Int] -> Word -> EventBook
wordToBook
ns
txt
=
EventBook
ef
en
where
chks
=
allChunks'
ns
10
txt
en
=
DM
.
fromList
$
map
(
\
(
n
,
ns
)
->
(
n
,
length
ns
))
$
zip
ns
chks
en
=
DM
.
fromList
$
map
(
\
(
n
,
ns
'
)
->
(
n
,
length
ns'
))
$
zip
ns
chks
ef
=
foldl'
DM
.
union
DM
.
empty
$
map
(
occurrencesWith
identity
)
chks
op
::
(
Freq
->
Freq
->
Freq
)
->
EventBook
->
EventBook
->
EventBook
...
...
@@ -163,6 +171,7 @@ sumProba ds x = sum $ map ((~?) ds) $ allChunks [0,2] 10 $ map toLower x
(
~?
)
ds
x
=
(
==
x
)
??
ds
------------------------------------------------------------------------
candidate
::
[
Char
]
->
Candidate
candidate
x
=
Candidate
(
sumProba
stopDist
x
)
(
sumProba
candDist
x
)
------------------------------------------------------------------------
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment