Build only Langs which are fully supported

parent e9035df2
...@@ -25,5 +25,8 @@ module Gargantext.Core ...@@ -25,5 +25,8 @@ module Gargantext.Core
-- - SP == spanish (not implemented yet) -- - SP == spanish (not implemented yet)
-- --
-- ... add your language and help us to implement it (: -- ... add your language and help us to implement it (:
data Lang = EN | FR | DE | SP | CH data Lang = EN | FR -- | DE | SP | CH
deriving (Show, Eq, Ord) deriving (Show, Eq, Ord, Bounded, Enum)
allLangs :: [Lang]
allLangs = [minBound ..]
...@@ -68,7 +68,7 @@ import Text.XML.HXT.DOM.Util (decimalStringToInt) ...@@ -68,7 +68,7 @@ import Text.XML.HXT.DOM.Util (decimalStringToInt)
parserLang :: Lang -> DC.Lang parserLang :: Lang -> DC.Lang
parserLang FR = DC.FR parserLang FR = DC.FR
parserLang EN = DC.EN parserLang EN = DC.EN
parserLang _ = panic "not implemented" -- parserLang _ = panic "not implemented"
-- | Final Date parser API -- | Final Date parser API
-- IO can be avoided here: -- IO can be avoided here:
......
...@@ -55,7 +55,7 @@ stem lang = DT.pack . N.stem lang' . DT.unpack ...@@ -55,7 +55,7 @@ stem lang = DT.pack . N.stem lang' . DT.unpack
lang' = case lang of lang' = case lang of
EN -> N.English EN -> N.English
FR -> N.French FR -> N.French
_ -> panic $ DT.pack "not implemented yet" --_ -> panic $ DT.pack "not implemented yet"
...@@ -57,4 +57,4 @@ tokenTags' lang t = map tokens2tokensTags ...@@ -57,4 +57,4 @@ tokenTags' lang t = map tokens2tokensTags
group :: Lang -> [TokenTag] -> [TokenTag] group :: Lang -> [TokenTag] -> [TokenTag]
group EN = En.group group EN = En.group
group FR = Fr.group group FR = Fr.group
group _ = panic $ pack "group :: Lang not implemeted yet" -- group _ = panic $ pack "group :: Lang not implemeted yet"
...@@ -124,7 +124,7 @@ corenlp' lang txt = do ...@@ -124,7 +124,7 @@ corenlp' lang txt = do
EN -> "{\"annotators\": \"tokenize,ssplit,pos,ner\", \"outputFormat\": \"json\"}" EN -> "{\"annotators\": \"tokenize,ssplit,pos,ner\", \"outputFormat\": \"json\"}"
-- FR -> "{\"annotators\": \"tokenize,ssplit,pos,ner\", \"outputFormat\": \"json\"}" -- FR -> "{\"annotators\": \"tokenize,ssplit,pos,ner\", \"outputFormat\": \"json\"}"
FR -> "{\"annotators\": \"tokenize,ssplit,pos,ner\", \"parse.model\":\"edu/stanford/nlp/models/lexparser/frenchFactored.ser.gz\", \"pos.model\":\"edu/stanford/nlp/models/pos-tagger/french/french.tagger\", \"tokenize.language\":\"fr\", \"outputFormat\": \"json\"}" FR -> "{\"annotators\": \"tokenize,ssplit,pos,ner\", \"parse.model\":\"edu/stanford/nlp/models/lexparser/frenchFactored.ser.gz\", \"pos.model\":\"edu/stanford/nlp/models/pos-tagger/french/french.tagger\", \"tokenize.language\":\"fr\", \"outputFormat\": \"json\"}"
_ -> panic $ pack "not implemented yet" -- _ -> panic $ pack "not implemented yet"
url <- parseRequest $ "POST http://localhost:9000/?properties=" <> properties url <- parseRequest $ "POST http://localhost:9000/?properties=" <> properties
let request = setRequestBodyLBS (cs txt) url let request = setRequestBodyLBS (cs txt) url
httpJSON request httpJSON request
......
...@@ -33,15 +33,15 @@ import Data.String (String) ...@@ -33,15 +33,15 @@ import Data.String (String)
import Data.Text (pack, unpack) import Data.Text (pack, unpack)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Core (Lang(..)) import Gargantext.Core (Lang(..), allLangs)
import Gargantext.Text.Terms.Mono (words) import Gargantext.Text.Terms.Mono (words)
import Gargantext.Text.Metrics.Count (occurrencesWith) import Gargantext.Text.Metrics.Count (occurrencesWith)
import Gargantext.Text.Samples.FR as FR import qualified Gargantext.Text.Samples.FR as FR
import Gargantext.Text.Samples.EN as EN import qualified Gargantext.Text.Samples.EN as EN
import Gargantext.Text.Samples.DE as DE --import qualified Gargantext.Text.Samples.DE as DE
import Gargantext.Text.Samples.SP as SP --import qualified Gargantext.Text.Samples.SP as SP
import Gargantext.Text.Samples.CH as CH --import qualified Gargantext.Text.Samples.CH as CH
------------------------------------------------------------------------ ------------------------------------------------------------------------
data Candidate = Candidate { stop :: Double data Candidate = Candidate { stop :: Double
...@@ -88,13 +88,18 @@ detectLangs s = DL.reverse $ DL.sortOn snd ...@@ -88,13 +88,18 @@ detectLangs s = DL.reverse $ DL.sortOn snd
$ toList $ toList
$ detect (wordsToBook [0..2] s) testEL $ detect (wordsToBook [0..2] s) testEL
textMining :: Lang -> String
textMining EN = EN.textMining
textMining FR = FR.textMining
--textMining DE = DE.textMining
--textMining SP = SP.textMining
--textMining CH = CH.textMining
langWord :: Lang -> LangWord
langWord l = LangWord l (textMining l)
testEL :: EventLang testEL :: EventLang
testEL = toEventLangs [0..2] [ LangWord EN EN.textMining testEL = toEventLangs [0..2] [ langWord l | l <- allLangs ]
, LangWord FR FR.textMining
, LangWord DE DE.textMining
, LangWord SP SP.textMining
, LangWord CH CH.textMining
]
detect :: EventBook -> EventLang -> LangProba detect :: EventBook -> EventLang -> LangProba
detect (EventBook mapFreq _) el = DM.unionsWith (+) $ map (\(s,n) -> DM.map (\eb -> (fromIntegral n) * peb s eb) el) $ filter (\x -> fst x /= " ") $ DM.toList mapFreq detect (EventBook mapFreq _) el = DM.unionsWith (+) $ map (\(s,n) -> DM.map (\eb -> (fromIntegral n) * peb s eb) el) $ filter (\x -> fst x /= " ") $ DM.toList mapFreq
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment