Build only Langs which are fully supported

parent e9035df2
......@@ -25,5 +25,8 @@ module Gargantext.Core
-- - SP == spanish (not implemented yet)
-- ... add your language and help us to implement it (:
data Lang = EN | FR | DE | SP | CH
deriving (Show, Eq, Ord)
data Lang = EN | FR -- | DE | SP | CH
deriving (Show, Eq, Ord, Bounded, Enum)
allLangs :: [Lang]
allLangs = [minBound ..]
......@@ -68,7 +68,7 @@ import Text.XML.HXT.DOM.Util (decimalStringToInt)
parserLang :: Lang -> DC.Lang
parserLang FR = DC.FR
parserLang EN = DC.EN
parserLang _ = panic "not implemented"
-- parserLang _ = panic "not implemented"
-- | Final Date parser API
-- IO can be avoided here:
......@@ -55,7 +55,7 @@ stem lang = DT.pack . N.stem lang' . DT.unpack
lang' = case lang of
EN -> N.English
FR -> N.French
_ -> panic $ DT.pack "not implemented yet"
--_ -> panic $ DT.pack "not implemented yet"
......@@ -57,4 +57,4 @@ tokenTags' lang t = map tokens2tokensTags
group :: Lang -> [TokenTag] -> [TokenTag]
group EN =
group FR =
group _ = panic $ pack "group :: Lang not implemeted yet"
-- group _ = panic $ pack "group :: Lang not implemeted yet"
......@@ -124,7 +124,7 @@ corenlp' lang txt = do
EN -> "{\"annotators\": \"tokenize,ssplit,pos,ner\", \"outputFormat\": \"json\"}"
-- FR -> "{\"annotators\": \"tokenize,ssplit,pos,ner\", \"outputFormat\": \"json\"}"
FR -> "{\"annotators\": \"tokenize,ssplit,pos,ner\", \"parse.model\":\"edu/stanford/nlp/models/lexparser/frenchFactored.ser.gz\", \"pos.model\":\"edu/stanford/nlp/models/pos-tagger/french/french.tagger\", \"tokenize.language\":\"fr\", \"outputFormat\": \"json\"}"
_ -> panic $ pack "not implemented yet"
-- _ -> panic $ pack "not implemented yet"
url <- parseRequest $ "POST http://localhost:9000/?properties=" <> properties
let request = setRequestBodyLBS (cs txt) url
httpJSON request
......@@ -33,15 +33,15 @@ import Data.String (String)
import Data.Text (pack, unpack)
import Gargantext.Prelude
import Gargantext.Core (Lang(..))
import Gargantext.Core (Lang(..), allLangs)
import Gargantext.Text.Terms.Mono (words)
import Gargantext.Text.Metrics.Count (occurrencesWith)
import Gargantext.Text.Samples.FR as FR
import Gargantext.Text.Samples.EN as EN
import Gargantext.Text.Samples.DE as DE
import Gargantext.Text.Samples.SP as SP
import Gargantext.Text.Samples.CH as CH
import qualified Gargantext.Text.Samples.FR as FR
import qualified Gargantext.Text.Samples.EN as EN
--import qualified Gargantext.Text.Samples.DE as DE
--import qualified Gargantext.Text.Samples.SP as SP
--import qualified Gargantext.Text.Samples.CH as CH
data Candidate = Candidate { stop :: Double
......@@ -88,13 +88,18 @@ detectLangs s = DL.reverse $ DL.sortOn snd
$ toList
$ detect (wordsToBook [0..2] s) testEL
textMining :: Lang -> String
textMining EN = EN.textMining
textMining FR = FR.textMining
--textMining DE = DE.textMining
--textMining SP = SP.textMining
--textMining CH = CH.textMining
langWord :: Lang -> LangWord
langWord l = LangWord l (textMining l)
testEL :: EventLang
testEL = toEventLangs [0..2] [ LangWord EN EN.textMining
, LangWord FR FR.textMining
, LangWord DE DE.textMining
, LangWord SP SP.textMining
, LangWord CH CH.textMining
testEL = toEventLangs [0..2] [ langWord l | l <- allLangs ]
detect :: EventBook -> EventLang -> LangProba
detect (EventBook mapFreq _) el = DM.unionsWith (+) $ map (\(s,n) -> (\eb -> (fromIntegral n) * peb s eb) el) $ filter (\x -> fst x /= " ") $ DM.toList mapFreq
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment