......@@ -53,6 +53,7 @@ library:
- Gargantext.Text.Parsers.WOS
- Gargantext.Text.Search
- Gargantext.Text.Terms
- Gargantext.Text.Terms.Stop
- Gargantext.Text.Terms.Mono
- Gargantext.Text.Terms.Multi.Lang.En
- Gargantext.Text.Terms.Multi.Lang.Fr
......@@ -112,6 +113,7 @@ library:
- path-io
- postgresql-simple
- pretty
- probability
- product-profunctors
- profunctors
- protolude
......@@ -165,6 +167,7 @@ executables:
- -with-rtsopts=-N
- -O2
- -Wmissing-signatures
- -Wcompat
- base
- containers
......@@ -25,4 +25,5 @@ module Gargantext.Core
-- - SP == spanish (not implemented yet)
-- ... add your language and help us to implement it (:
data Lang = EN | FR
data Lang = EN | FR | DE | SP | CH
deriving (Show, Eq, Ord)
......@@ -46,7 +46,7 @@ import Protolude ( Bool(True, False), Int, Int64, Double, Integer
, abs, min, max, maximum, minimum, return, snd, truncate
, (+), (*), (/), (-), (.), ($), (&), (**), (^), (<), (>), log
, Eq, (==), (>=), (<=), (<>), (/=)
, (&&), (||), not, any
, (&&), (||), not, any, all
, fst, snd, toS
, elem, die, mod, div, const, either
, curry, uncurry, repeat
......@@ -144,6 +144,9 @@ occurrences = occurrencesOn _terms_stem
occurrencesOn :: (Ord a, Ord b) => (a -> b) -> [a] -> Map b (Map a Int)
occurrencesOn f = foldl' (\m a -> insertWith (unionWith (+)) (f a) (singleton a 1) m) empty
occurrencesWith :: (Foldable list, Ord k, Num a) => (b -> k) -> list b -> Map k a
occurrencesWith f xs = foldl' (\x y -> insertWith (+) (f y) 1 x) empty xs
-- TODO add groups and filter stops
sumOcc :: Ord a => [Occ a] -> Occ a
......@@ -16,7 +16,7 @@ Domain Specific Language to manage Frequent Item Set (FIS)
module Gargantext.Text.Metrics.FrequentItemSet
( Fis, Size(..)
, occ_hlcm, cooc_hlcm
, all, between
, allFis, between
, fisWithSize
, fisWith
, fisWithSizePoly
......@@ -51,8 +51,8 @@ occ_hlcm = fisWithSize (Point 1)
cooc_hlcm :: Frequency -> [[Item]] -> [Fis]
cooc_hlcm = fisWithSize (Point 2)
all :: Frequency -> [[Item]] -> [Fis]
all = fisWith Nothing
allFis :: Frequency -> [[Item]] -> [Fis]
allFis = fisWith Nothing
between :: (Int, Int) -> Frequency -> [[Item]] -> [Fis]
......@@ -68,7 +68,7 @@ import Text.XML.HXT.DOM.Util (decimalStringToInt)
parserLang :: Lang -> DC.Lang
parserLang FR = DC.FR
parserLang EN = DC.EN
parserLang _ = panic "not implemented"
-- | Final Date parser API
-- IO can be avoided here:
......@@ -55,6 +55,7 @@ stem lang = DT.pack . N.stem lang' . DT.unpack
lang' = case lang of
EN -> N.English
FR -> N.French
_ -> panic $ DT.pack "not implemented yet"
......@@ -57,4 +57,4 @@ tokenTags' lang t = map tokens2tokensTags
group :: Lang -> [TokenTag] -> [TokenTag]
group EN =
group FR =
group _ = panic $ pack "group :: Lang not implemeted yet"
......@@ -124,6 +124,7 @@ corenlp' lang txt = do
EN -> "{\"annotators\": \"tokenize,ssplit,pos,ner\", \"outputFormat\": \"json\"}"
-- FR -> "{\"annotators\": \"tokenize,ssplit,pos,ner\", \"outputFormat\": \"json\"}"
FR -> "{\"annotators\": \"tokenize,ssplit,pos,ner\", \"parse.model\":\"edu/stanford/nlp/models/lexparser/frenchFactored.ser.gz\", \"pos.model\":\"edu/stanford/nlp/models/pos-tagger/french/french.tagger\", \"tokenize.language\":\"fr\", \"outputFormat\": \"json\"}"
_ -> panic $ pack "not implemented yet"
url <- parseRequest $ "POST http://localhost:9000/?properties=" <> properties
let request = setRequestBodyLBS (cs txt) url
httpJSON request
......@@ -29,13 +29,22 @@ list quality in time.
{-# LANGUAGE NoImplicitPrelude #-}
module Gargantext.Text.Terms.Multi.RAKE (multiterms_rake)
module Gargantext.Text.Terms.Multi.RAKE (multiterms_rake, select, hardStopList)
import GHC.Real (round)
import Data.Text (Text)
import NLP.RAKE.Text
import Gargantext.Text.Terms.Stop (stopList)
import Gargantext.Prelude
select :: Double -> [a] -> [a]
select part ns = take n ns
n = round $ part * (fromIntegral $ length ns)
multiterms_rake :: Text -> [WordScore]
multiterms_rake = candidates hardStopList
......@@ -43,74 +52,4 @@ multiterms_rake = candidates hardStopList
-- | StopList
hardStopList :: StopwordsMap
hardStopList = mkStopwordsStr [
"available","away","awfully","based", "b","be","became","because","become",
hardStopList = mkStopwordsStr stopList
This diff is collapsed.
