Commit a3490841 authored by Alexandre Delanoë's avatar Alexandre Delanoë

Merge branch 'bayes'

parents 88585c12 3cbf7e51
......@@ -53,6 +53,7 @@ library:
- Gargantext.Text.Parsers.WOS
- Gargantext.Text.Search
- Gargantext.Text.Terms
- Gargantext.Text.Terms.Stop
- Gargantext.Text.Terms.Mono
- Gargantext.Text.Terms.Multi.Lang.En
- Gargantext.Text.Terms.Multi.Lang.Fr
......@@ -112,6 +113,7 @@ library:
- path-io
- postgresql-simple
- pretty
- probability
- product-profunctors
- profunctors
- protolude
......@@ -165,6 +167,7 @@ executables:
- -with-rtsopts=-N
- -O2
- -Wmissing-signatures
- -Wcompat
dependencies:
- base
- containers
......
......@@ -25,4 +25,5 @@ module Gargantext.Core
-- - SP == spanish (not implemented yet)
--
-- ... add your language and help us to implement it (:
data Lang = EN | FR
data Lang = EN | FR | DE | SP | CH
deriving (Show, Eq, Ord)
......@@ -46,7 +46,7 @@ import Protolude ( Bool(True, False), Int, Int64, Double, Integer
, abs, min, max, maximum, minimum, return, snd, truncate
, (+), (*), (/), (-), (.), ($), (&), (**), (^), (<), (>), log
, Eq, (==), (>=), (<=), (<>), (/=)
, (&&), (||), not, any
, (&&), (||), not, any, all
, fst, snd, toS
, elem, die, mod, div, const, either
, curry, uncurry, repeat
......
......@@ -144,6 +144,9 @@ occurrences = occurrencesOn _terms_stem
occurrencesOn :: (Ord a, Ord b) => (a -> b) -> [a] -> Map b (Map a Int)
occurrencesOn f = foldl' (\m a -> insertWith (unionWith (+)) (f a) (singleton a 1) m) empty
occurrencesWith :: (Foldable list, Ord k, Num a) => (b -> k) -> list b -> Map k a
occurrencesWith f xs = foldl' (\x y -> insertWith (+) (f y) 1 x) empty xs
-- TODO add groups and filter stops
sumOcc :: Ord a => [Occ a] -> Occ a
......
......@@ -16,7 +16,7 @@ Domain Specific Language to manage Frequent Item Set (FIS)
module Gargantext.Text.Metrics.FrequentItemSet
( Fis, Size(..)
, occ_hlcm, cooc_hlcm
, all, between
, allFis, between
, fisWithSize
, fisWith
, fisWithSizePoly
......@@ -51,8 +51,8 @@ occ_hlcm = fisWithSize (Point 1)
cooc_hlcm :: Frequency -> [[Item]] -> [Fis]
cooc_hlcm = fisWithSize (Point 2)
all :: Frequency -> [[Item]] -> [Fis]
all = fisWith Nothing
allFis :: Frequency -> [[Item]] -> [Fis]
allFis = fisWith Nothing
------------------------------------------------------------------------
between :: (Int, Int) -> Frequency -> [[Item]] -> [Fis]
......
......@@ -68,7 +68,7 @@ import Text.XML.HXT.DOM.Util (decimalStringToInt)
parserLang :: Lang -> DC.Lang
parserLang FR = DC.FR
parserLang EN = DC.EN
parserLang _ = panic "not implemented"
-- | Final Date parser API
-- IO can be avoided here:
......
......@@ -55,6 +55,7 @@ stem lang = DT.pack . N.stem lang' . DT.unpack
lang' = case lang of
EN -> N.English
FR -> N.French
_ -> panic $ DT.pack "not implemented yet"
......@@ -57,4 +57,4 @@ tokenTags' lang t = map tokens2tokensTags
group :: Lang -> [TokenTag] -> [TokenTag]
group EN = En.group
group FR = Fr.group
group _ = panic $ pack "group :: Lang not implemeted yet"
......@@ -124,6 +124,7 @@ corenlp' lang txt = do
EN -> "{\"annotators\": \"tokenize,ssplit,pos,ner\", \"outputFormat\": \"json\"}"
-- FR -> "{\"annotators\": \"tokenize,ssplit,pos,ner\", \"outputFormat\": \"json\"}"
FR -> "{\"annotators\": \"tokenize,ssplit,pos,ner\", \"parse.model\":\"edu/stanford/nlp/models/lexparser/frenchFactored.ser.gz\", \"pos.model\":\"edu/stanford/nlp/models/pos-tagger/french/french.tagger\", \"tokenize.language\":\"fr\", \"outputFormat\": \"json\"}"
_ -> panic $ pack "not implemented yet"
url <- parseRequest $ "POST http://localhost:9000/?properties=" <> properties
let request = setRequestBodyLBS (cs txt) url
httpJSON request
......
......@@ -29,13 +29,22 @@ list quality in time.
{-# LANGUAGE NoImplicitPrelude #-}
module Gargantext.Text.Terms.Multi.RAKE (multiterms_rake)
module Gargantext.Text.Terms.Multi.RAKE (multiterms_rake, select, hardStopList)
where
import GHC.Real (round)
import Data.Text (Text)
import NLP.RAKE.Text
import Gargantext.Text.Terms.Stop (stopList)
import Gargantext.Prelude
select :: Double -> [a] -> [a]
select part ns = take n ns
where
n = round $ part * (fromIntegral $ length ns)
multiterms_rake :: Text -> [WordScore]
multiterms_rake = candidates hardStopList
defaultNosplit
......@@ -43,74 +52,4 @@ multiterms_rake = candidates hardStopList
-- | StopList
hardStopList :: StopwordsMap
hardStopList = mkStopwordsStr [
"a","a's","able","about","above","apply","according","accordingly",
"across","actually","after","afterwards","again","against",
"ain't","all","allow","allows","almost","alone","along",
"already","also","although","always","am","among","amongst",
"an","and","another","any","anybody","anyhow","anyone","anything",
"anyway","anyways","anywhere","analyze","apart","appear","appreciate","appropriate",
"are","aren't","around","as","aside","ask","asking","associated","at",
"available","away","awfully","based", "b","be","became","because","become",
"becomes","becoming","been","before","beforehand","behind","being",
"believe","below","beside","besides","best","better","between","beyond",
"both","brief","but","by","c","c'mon","c's","came","can","can't","cannot",
"cant","cause","causes","certain","certainly","changes","clearly","co",
"com","come","comes","common","concerning","consequently","consider","considering",
"contain","containing","contains","corresponding","could","couldn't","course",
"currently","d","definitely","described","detects","detecting","despite","did","didn't","different",
"do","does","doesn't","doing","don't","done","down","downwards","during","e",
"each","edu","eg","eight","either","else","elsewhere","enough","entirely",
"especially","et","etc","even","ever","every","everybody","everyone",
"everything","everywhere","ex","exactly","example","except","f","far",
"few","find","fifth","first","five","followed","following","follows","for",
"former","formerly","forth","four","from","further","furthermore","g",
"get","gets","getting","given","gives","go","goes","going","gone","got",
"gotten","greetings","h","had","hadn't","happens","hardly","has","hasn't",
"have","haven't","having","he","he's","hello","help","hence","her","here",
"here's","hereafter","hereby","herein","hereupon","hers","herself","hi",
"him","himself","his","hither","hopefully","how","howbeit","however","i",
"i'd","identify","i'll","i'm","i've","ie","if","ignored","immediate","in","inasmuch",
"inc","indeed","indicate","indicated","indicates","inner","insofar",
"instead","into","inward","is","isn't","it","it'd","it'll","it's","its",
"itself","j","just","k","keep","keeps","kept","know","known","knows","l",
"last","lately","later","latter","latterly","least","less","lest","let",
"let's","like","liked","likely","little","look","looking","looks","ltd",
"m","mainly","many","may","maybe","me","mean","meanwhile","merely","might",
"more","moreover","most","mostly","much","must","my","myself","n",
"name","namely","nd","near","nearly","necessary","need","needs","neither",
"never","nevertheless","new","next","nine","no","nobody","non","none",
"noone","nor","normally","not","nothing","novel","now","nowhere","o",
"obviously","of","off","often","oh","ok","okay","old","on","once","one",
"ones","only","onto","or","other","others","otherwise","ought","our",
"ours","ourselves","out","outside","over","overall","own","p","particular",
"particularly","per","perhaps","placed","please","plus","possible",
"presents","presumably","probably","provides","q","que","quite","qv","r","rather",
"rd","re","really","reasonably","regarding","regardless","regards",
"relatively","respectively","right","s","said","same","saw","say",
"saying","says","second","secondly","see","seeing","seem","seemed",
"seeming","seems","seen","self","selves","sensible","sent","serious",
"seriously","seven","several","shall","she","should","shouldn't","since",
"six","so","some","somebody","somehow","someone","something","sometime",
"sometimes","somewhat","somewhere","soon","sorry","specified","specify",
"specifying","still","sub","such","sup","sure","t","t's","take","taken",
"tell","tends","th","than","thank","thanks","thanx","that","that's",
"thats","the","their","theirs","them","themselves","then","thence","there",
"there's","thereafter","thereby","therefore","therein","theres",
"thereupon","these","they","they'd","they'll","they're","they've",
"think","third","this","thorough","thoroughly","those","though","three",
"through","throughout","thru","thus","to","together","too","took","toward",
"towards","tried","tries","truly","try","trying","twice","two","u","un",
"under","unfortunately","unless","unlikely","until","unto","up","upon",
"us","use","used","useful","uses","using","usually","uucp","v","value",
"various","very","via","viz","vs","w","want","wants","was","wasn't","way",
"we","we'd","we'll","we're","we've","welcome","well","went","were",
"weren't","what","what's","whatever","when","whence","whenever","where",
"where's","whereafter","whereas","whereby","wherein","whereupon",
"wherever","whether","which","while","whither","who","who's","whoever",
"whole","whom","whose","why","will","willing","wish","with","within",
"without","won't","wonder","would","wouldn't","x","y","yes","yet","you",
"you'd","you'll","you're","you've","your","yours","yourself","yourselves",
"z","zero"]
hardStopList = mkStopwordsStr stopList
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment