Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
H
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Przemyslaw Kaminski
haskell-gargantext
Commits
3cbf7e51
Commit
3cbf7e51
authored
Sep 29, 2018
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[BAYES] detect lang (todo proba density).
parent
704fe86f
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
25 additions
and
14 deletions
+25
-14
package.yaml
package.yaml
+1
-0
Core.hs
src/Gargantext/Core.hs
+1
-2
Date.hs
src/Gargantext/Text/Parsers/Date.hs
+1
-1
Stem.hs
src/Gargantext/Text/Terms/Mono/Stem.hs
+1
-0
Multi.hs
src/Gargantext/Text/Terms/Multi.hs
+1
-1
PosTagging.hs
src/Gargantext/Text/Terms/Multi/PosTagging.hs
+1
-0
RAKE.hs
src/Gargantext/Text/Terms/Multi/RAKE.hs
+1
-1
Stop.hs
src/Gargantext/Text/Terms/Stop.hs
+18
-9
No files found.
package.yaml
View file @
3cbf7e51
...
...
@@ -167,6 +167,7 @@ executables:
-
-with-rtsopts=-N
-
-O2
-
-Wmissing-signatures
-
-Wcompat
dependencies
:
-
base
-
containers
...
...
src/Gargantext/Core.hs
View file @
3cbf7e51
...
...
@@ -12,7 +12,6 @@ Portability : POSIX
module
Gargantext.Core
where
import
Gargantext.Prelude
------------------------------------------------------------------------
-- | Language of a Text
-- For simplicity, we suppose text has an homogenous language
...
...
@@ -26,5 +25,5 @@ import Gargantext.Prelude
-- - SP == spanish (not implemented yet)
--
-- ... add your language and help us to implement it (:
data
Lang
=
EN
|
FR
data
Lang
=
EN
|
FR
|
DE
|
SP
|
CH
deriving
(
Show
,
Eq
,
Ord
)
src/Gargantext/Text/Parsers/Date.hs
View file @
3cbf7e51
...
...
@@ -68,7 +68,7 @@ import Text.XML.HXT.DOM.Util (decimalStringToInt)
parserLang
::
Lang
->
DC
.
Lang
parserLang
FR
=
DC
.
FR
parserLang
EN
=
DC
.
EN
parserLang
_
=
panic
"not implemented"
-- | Final Date parser API
-- IO can be avoided here:
...
...
src/Gargantext/Text/Terms/Mono/Stem.hs
View file @
3cbf7e51
...
...
@@ -55,6 +55,7 @@ stem lang = DT.pack . N.stem lang' . DT.unpack
lang'
=
case
lang
of
EN
->
N
.
English
FR
->
N
.
French
_
->
panic
$
DT
.
pack
"not implemented yet"
src/Gargantext/Text/Terms/Multi.hs
View file @
3cbf7e51
...
...
@@ -57,4 +57,4 @@ tokenTags' lang t = map tokens2tokensTags
group
::
Lang
->
[
TokenTag
]
->
[
TokenTag
]
group
EN
=
En
.
group
group
FR
=
Fr
.
group
group
_
=
panic
$
pack
"group :: Lang not implemeted yet"
src/Gargantext/Text/Terms/Multi/PosTagging.hs
View file @
3cbf7e51
...
...
@@ -124,6 +124,7 @@ corenlp' lang txt = do
EN
->
"{
\"
annotators
\"
:
\"
tokenize,ssplit,pos,ner
\"
,
\"
outputFormat
\"
:
\"
json
\"
}"
-- FR -> "{\"annotators\": \"tokenize,ssplit,pos,ner\", \"outputFormat\": \"json\"}"
FR
->
"{
\"
annotators
\"
:
\"
tokenize,ssplit,pos,ner
\"
,
\"
parse.model
\"
:
\"
edu/stanford/nlp/models/lexparser/frenchFactored.ser.gz
\"
,
\"
pos.model
\"
:
\"
edu/stanford/nlp/models/pos-tagger/french/french.tagger
\"
,
\"
tokenize.language
\"
:
\"
fr
\"
,
\"
outputFormat
\"
:
\"
json
\"
}"
_
->
panic
$
pack
"not implemented yet"
url
<-
parseRequest
$
"POST http://localhost:9000/?properties="
<>
properties
let
request
=
setRequestBodyLBS
(
cs
txt
)
url
httpJSON
request
...
...
src/Gargantext/Text/Terms/Multi/RAKE.hs
View file @
3cbf7e51
...
...
@@ -33,7 +33,7 @@ module Gargantext.Text.Terms.Multi.RAKE (multiterms_rake, select, hardStopList)
where
import
GHC.Real
(
round
)
import
Data.Text
(
Text
,
pack
)
import
Data.Text
(
Text
)
import
NLP.RAKE.Text
import
Gargantext.Text.Terms.Stop
(
stopList
)
...
...
src/Gargantext/Text/Terms/Stop.hs
View file @
3cbf7e51
...
...
@@ -37,6 +37,12 @@ import Gargantext.Core (Lang(..))
import
Gargantext.Text.Terms.Mono
(
words
)
import
Gargantext.Text.Metrics.Count
(
occurrencesWith
)
import
Gargantext.Text.Samples.FR
as
FR
import
Gargantext.Text.Samples.EN
as
EN
import
Gargantext.Text.Samples.DE
as
DE
import
Gargantext.Text.Samples.SP
as
SP
import
Gargantext.Text.Samples.CH
as
CH
------------------------------------------------------------------------
data
Candidate
=
Candidate
{
stop
::
Double
,
noStop
::
Double
...
...
@@ -78,18 +84,20 @@ type LangProba = Map Lang Double
------------------------------------------------------------------------
estimeTest
::
String
->
LangProba
estimeTest
s
=
estime
(
wordsToBook
[
0
..
2
]
s
)
testEL
detectLangs
::
String
->
LangProba
detectLangs
s
=
detect
(
wordsToBook
[
0
..
2
]
s
)
testEL
testEL
::
EventLang
testEL
=
toEventLangs
[
0
,
1
,
2
]
[
LangWord
EN
"Lovely day. This day."
,
LangWord
FR
"Belle journée, j'y vais."
,
LangWord
EN
"Hello Sir, how are you doing? I am fine thank you, good bye"
,
LangWord
FR
"Bonjour Monsieur, comment allez-vous? Je vais bien merci."
testEL
=
toEventLangs
[
0
..
2
]
[
LangWord
EN
EN
.
textMining
,
LangWord
FR
FR
.
textMining
,
LangWord
DE
DE
.
textMining
,
LangWord
SP
SP
.
textMining
,
LangWord
CH
CH
.
textMining
]
estime
::
EventBook
->
EventLang
->
LangProba
estime
(
EventBook
mapFreq
_
)
el
=
DM
.
unionsWith
(
+
)
$
map
(
\
(
s
,
n
)
->
DM
.
map
(
\
eb
->
(
fromIntegral
n
)
*
peb
s
eb
)
el
)
$
filter
(
\
x
->
fst
x
/=
" "
)
$
DM
.
toList
mapFreq
detect
::
EventBook
->
EventLang
->
LangProba
detect
(
EventBook
mapFreq
_
)
el
=
DM
.
unionsWith
(
+
)
$
map
(
\
(
s
,
n
)
->
DM
.
map
(
\
eb
->
(
fromIntegral
n
)
*
peb
s
eb
)
el
)
$
filter
(
\
x
->
fst
x
/=
" "
)
$
DM
.
toList
mapFreq
------------------------------------------------------------------------
-- | TODO: monoids
...
...
@@ -133,7 +141,7 @@ wordToBook :: [Int] -> Word -> EventBook
wordToBook
ns
txt
=
EventBook
ef
en
where
chks
=
allChunks'
ns
10
txt
en
=
DM
.
fromList
$
map
(
\
(
n
,
ns
)
->
(
n
,
length
ns
))
$
zip
ns
chks
en
=
DM
.
fromList
$
map
(
\
(
n
,
ns
'
)
->
(
n
,
length
ns'
))
$
zip
ns
chks
ef
=
foldl'
DM
.
union
DM
.
empty
$
map
(
occurrencesWith
identity
)
chks
op
::
(
Freq
->
Freq
->
Freq
)
->
EventBook
->
EventBook
->
EventBook
...
...
@@ -163,6 +171,7 @@ sumProba ds x = sum $ map ((~?) ds) $ allChunks [0,2] 10 $ map toLower x
(
~?
)
ds
x
=
(
==
x
)
??
ds
------------------------------------------------------------------------
candidate
::
[
Char
]
->
Candidate
candidate
x
=
Candidate
(
sumProba
stopDist
x
)
(
sumProba
candDist
x
)
------------------------------------------------------------------------
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment