Commit cddf0f9e authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FEAT] [LEARN] REFACTO.

parent 2ecc0288
......@@ -69,7 +69,7 @@ isidoreToDoc l (IsidoreDoc t a d u s as) = do
langText (OnlyText t2 ) = t2
langText (ArrayText ts ) = Text.intercalate " " $ map langText ts
(utcTime, (pub_year, pub_month, pub_day)) <- Date.split l (maybe (Just "2019") (Just) d)
(utcTime, (pub_year, pub_month, pub_day)) <- Date.dateSplit l (maybe (Just "2019") (Just) d)
pure $ HyperdataDocument (Just "IsidoreApi")
Nothing
......
......@@ -53,7 +53,7 @@ import qualified Gargantext.Text.Corpus.Parsers.RIS as RIS
import Gargantext.Text.Corpus.Parsers.RIS.Presse (presseEnrich)
import qualified Gargantext.Text.Corpus.Parsers.Date as Date
import Gargantext.Text.Corpus.Parsers.CSV (parseHal, parseCsv)
import Gargantext.Text.Terms.Stop (detectLang)
import Gargantext.Text.Terms.Learn (detectLangDefault)
------------------------------------------------------------------------
type ParseError = String
......@@ -97,11 +97,11 @@ toDoc :: FileFormat -> [(Text, Text)] -> IO HyperdataDocument
-- TODO use language for RIS
toDoc ff d = do
let abstract = lookup "abstract" d
let lang = maybe EN identity (join $ detectLang <$> (fmap (DT.take 50) abstract))
let lang = maybe EN identity (join $ detectLangDefault <$> (fmap (DT.take 50) abstract))
let dateToParse = DT.replace "-" " " <$> lookup "PY" d <> Just " " <> lookup "publication_date" d
(utcTime, (pub_year, pub_month, pub_day)) <- Date.split lang dateToParse
(utcTime, (pub_year, pub_month, pub_day)) <- Date.dateSplit lang dateToParse
pure $ HyperdataDocument (Just $ DT.pack $ show ff)
(lookup "doi" d)
......
......@@ -18,7 +18,7 @@ DGP.parseDateRaw DGP.FR "12 avril 2010" == "2010-04-12T00:00:00.000+00:00"
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
module Gargantext.Text.Corpus.Parsers.Date (parse, parseRaw, split) where
module Gargantext.Text.Corpus.Parsers.Date (parse, parseRaw, dateSplit) where
import Data.HashMap.Strict as HM hiding (map)
import Data.Text (Text, unpack, splitOn, pack)
......@@ -40,9 +40,9 @@ import qualified Duckling.Core as DC
------------------------------------------------------------------------
-- | Parse date to Ints
-- TODO add hours, minutes and seconds
split :: Lang -> Maybe Text -> IO (Maybe UTCTime, (Maybe Year, Maybe Month, Maybe Day))
split _ Nothing = pure (Nothing, (Nothing, Nothing, Nothing))
split l (Just txt) = do
dateSplit :: Lang -> Maybe Text -> IO (Maybe UTCTime, (Maybe Year, Maybe Month, Maybe Day))
dateSplit _ Nothing = pure (Nothing, (Nothing, Nothing, Nothing))
dateSplit l (Just txt) = do
utcTime <- parse l txt
let (y, m, d) = split' utcTime
pure (Just utcTime, (Just y, Just m,Just d))
......
......@@ -22,4 +22,89 @@ import Data.String (String)
textSample :: String
textSample = "Text mining, also referred to as text data mining, roughly equivalent to text analytics, is the process of deriving high-quality information from text. High-quality information is typically derived through the devising of patterns and trends through means such as statistical pattern learning. Text mining usually involves the process of structuring the input text (usually parsing, along with the addition of some derived linguistic features and the removal of others, and subsequent insertion into a database), deriving patterns within the structured data, and finally evaluation and interpretation of the output. 'High quality' in text mining usually refers to some combination of relevance, novelty, and interestingness. Typical text mining tasks include text categorization, text clustering, concept/entity extraction, production of granular taxonomies, sentiment analysis, document summarization, and entity relation modeling (i.e., learning relations between named entities). Text analysis involves information retrieval, lexical analysis to study word frequency distributions, pattern recognition, tagging/annotation, information extraction, data mining techniques including link and association analysis, visualization, and predictive analytics. The overarching goal is, essentially, to turn text into data for analysis, via application of natural language processing (NLP) and analytical methods. A typical application is to scan a set of documents written in a natural language and either model the document set for predictive classification purposes or populate a database or search index with the information extracted."
stopList :: [String]
stopList =
["a", "a's", "able", "about", "above", "according", "accordingly"
, "across", "actually", "after", "afterwards", "again", "against"
, "ain't", "all", "allow", "allows", "almost", "alone", "along"
, "already", "also", "although", "always", "am", "among", "amongst", "an"
, "analyze", "and", "another", "any", "anybody", "anyhow", "anyone"
, "anything", "anyway", "anyways", "anywhere", "apart", "appear"
, "apply", "appreciate", "appropriate", "are", "aren't", "around"
, "as", "aside", "ask", "asking", "associated", "at", "available"
, "away", "awfully", "b", "based", "be", "became", "because", "become"
, "becomes", "becoming", "been", "before", "beforehand", "behind"
, "being", "believe", "below", "beside", "besides", "best", "better"
, "between", "beyond", "both", "brief", "but", "by", "c", "c'mon", "c's"
, "came", "can", "can't", "cannot", "cant", "cause", "causes", "certain"
, "certainly", "changes", "clearly", "co", "com", "come", "comes"
, "common", "concerning", "consequently", "consider", "considering"
, "contain", "containing", "contains", "corresponding", "could"
, "couldn't", "course", "currently", "d", "definitely", "described"
, "despite", "detecting", "detects", "did", "didn't", "different", "do"
, "does", "doesn't", "doing", "don't", "done", "down", "downwards"
, "during", "e", "each", "edu", "eg", "eight", "either", "else"
, "elsewhere", "enough", "entirely", "especially", "et", "etc", "even"
, "ever", "every", "everybody", "everyone", "everything", "everywhere"
, "ex", "exactly", "example", "except", "f", "far", "few", "fifth"
, "find", "first", "five", "followed", "following", "follows", "for"
, "former", "formerly", "forth", "four", "from", "further", "furthermore"
, "g", "get", "gets", "getting", "gif", "given", "gives", "go", "goes"
, "going", "gone", "got", "gotten", "greetings", "h", "had", "hadn't"
, "happens", "hardly", "has", "hasn't", "have", "haven't", "having"
, "he", "he'd", "he'll", "he's", "hello", "help", "hence", "her"
, "here", "here's", "hereafter", "hereby", "herein", "hereupon", "hers"
, "herself", "hi", "him", "himself", "his", "hither", "hopefully", "how"
, "how's", "howbeit", "however", "i", "i'd", "i'll", "i'm", "i've"
, "identify", "ie", "if", "ignored", "immediate", "in", "inasmuch"
, "inc", "indeed", "indicate", "indicated", "indicates", "inner"
, "insofar", "instead", "into", "involves", "inward", "is", "isn't"
, "it", "it'd", "it'll", "it's", "its", "itself", "j", "just", "k"
, "keep", "keeps", "kept", "know", "known", "knows", "l", "last"
, "late", "lately", "later", "latter", "latterly", "least", "less"
, "lest", "let", "let's", "like", "liked", "likely", "little", "look"
, "looking", "looks", "ltd", "m", "main", "mainly", "many", "may"
, "maybe", "me", "mean", "meanwhile", "merely", "might", "min", "more"
, "moreover", "most", "mostly", "much", "must", "mustn't", "my", "myself"
, "n", "name", "namely", "nd", "near", "nearly", "necessary", "need"
, "needs", "neither", "never", "nevertheless", "new", "next", "nine"
, "no", "nobody", "non", "none", "noone", "nor", "normally", "not"
, "nothing", "novel", "now", "nowhere", "o", "obviously", "of", "off"
, "often", "oh", "ok", "okay", "old", "on", "once", "one", "ones"
, "only", "onto", "or", "other", "others", "otherwise", "ought", "our"
, "ours", "ourselves", "out", "outside", "over", "overall", "own", "p"
, "particular", "particularly", "per", "perhaps", "placed", "please"
, "plus", "possible", "presents", "presumably", "probably", "provides"
, "q", "que", "quite", "qv", "r", "rather", "rd", "re", "really"
, "reasonably", "regarding", "regardless", "regards", "relatively"
, "respectively", "right", "s", "said", "same", "saw", "say", "saying"
, "says", "sds", "second", "secondly", "see", "seeing", "seem", "seemed"
, "seeming", "seems", "seen", "self", "selves", "sensible", "sent"
, "serious", "seriously", "seven", "several", "shall", "shan't"
, "she", "she'd", "she'll", "she's", "should", "shouldn't", "since"
, "six", "so", "some", "somebody", "somehow", "someone", "something"
, "sometime", "sometimes", "somewhat", "somewhere", "soon", "sorry"
, "specified", "specify", "specifying", "still", "sub", "such", "sup"
, "sure", "t", "t's", "take", "taken", "tell", "tends", "th", "than"
, "thank", "thanks", "thanx", "that", "that's", "thats", "the", "their"
, "theirs", "them", "themselves", "then", "thence", "there", "there's"
, "thereafter", "thereby", "therefore", "therein", "theres", "thereupon"
, "these", "they", "they'd", "they'll", "they're", "they've", "think"
, "third", "this", "thorough", "thoroughly", "those", "though", "three"
, "through", "throughout", "thru", "thus", "to", "together", "too"
, "took", "toward", "towards", "tried", "tries", "truly", "try"
, "trying", "twice", "two", "u", "un", "under", "unfortunately"
, "unless", "unlikely", "until", "unto", "up", "upon", "us", "use"
, "used", "useful", "uses", "using", "usually", "uucp", "v", "value"
, "various", "very", "via", "viz", "vs", "w", "want", "wants", "was"
, "wasn't", "way", "we", "we'd", "we'll", "we're", "we've", "welcome"
, "well", "went", "were", "weren't", "what", "what's", "whatever", "when"
, "when's", "whence", "whenever", "where", "where's", "whereafter"
, "whereas", "whereby", "wherein", "whereupon", "wherever", "whether"
, "which", "while", "whither", "who", "who's", "whoever", "whole", "whom"
, "whose", "why", "why's", "will", "willing", "wish", "with", "within"
, "without", "won't", "wonder", "would", "wouldn't", "x", "y", "yes"
, "yet", "you", "you'd", "you'll", "you're", "you've", "your", "yours"
, "yourself", "yourselves", "z", "zero"]
......@@ -16,14 +16,10 @@ Main type here is String.
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
module Gargantext.Text.Terms.Stop -- (detectLang, detectLangs, stopList)
module Gargantext.Text.Terms.Learn -- (detectLang, detectLangs, stopList)
where
import GHC.Base (Functor)
import Numeric.Probability.Distribution ((??))
import qualified Numeric.Probability.Distribution as D
import Data.Char (toLower)
--import Data.Char (toLower)
import qualified Data.List as DL
import Data.Maybe (maybe)
......@@ -33,7 +29,7 @@ import qualified Data.Map.Strict as DM
import Data.String (String)
import Data.Text (Text)
import Data.Text (pack, unpack)
import Data.Text (pack, unpack, toLower)
import Data.Tuple.Extra (both)
import Gargantext.Prelude
......@@ -52,34 +48,6 @@ data Candidate = Candidate { stop :: Double
, noStop :: Double
} deriving (Show)
-- * String preparation
-- | String prepare
blanks :: String -> String
blanks [] = []
blanks xs = [' '] <> xs <> [' ']
-- | Blocks increase the size of the word to ease computations
-- some border and unexepected effects can happen, need to be tested
blockOf :: Int -> String -> String
blockOf n = DL.concat . DL.take n . DL.repeat
-- | Chunks is the same function as splitBy in Context but for Strings,
-- not Text (without pack and unpack operations that are not needed).
chunks :: Int -> Int -> String -> [String]
chunks n m = DL.take m . filter (not . all (== ' '))
. chunkAlong (n+1) 1
. DL.concat
. DL.take 1000
. DL.repeat
. blanks
allChunks :: [Int] -> Int -> String -> [String]
allChunks ns m st = DL.concat $ map (\n -> chunks n m st) ns
allChunks' :: [Int] -> Int -> String -> [[String]]
allChunks' ns m st = map (\n -> chunks n m st) ns
------------------------------------------------------------------------
-- * Analyze candidate
type StringSize = Int
......@@ -87,119 +55,121 @@ type TotalFreq = Int
type Freq = Int
type Word = String
data LangWord = LangWord Lang Word
data CatWord a = CatWord a Word
type CatProb a = Map a Double
type LangProba = Map Lang Double
type Events a = Map a EventBook
------------------------------------------------------------------------
detectLang :: Text -> Maybe Lang
detectLang = head . map fst . detectLangs
detectLangDefault :: Text -> Maybe Lang
detectLangDefault = detectCat 99 eventLang
where
eventLang :: Events Lang
eventLang = toEvents FR [0..2] 10 [ langWord l | l <- allLangs ]
detectLangs :: Text -> [(Lang, Double)]
detectLangs = detectLangs' . unpack
langWord :: Lang -> CatWord Lang
langWord l = CatWord l (textSample l)
detectLangs' :: String -> [(Lang, Double)]
detectLangs' s = DL.reverse $ DL.sortOn snd
$ toList
$ detect (wordsToBook [0..2] s) eventLang
textSample :: Lang -> String
textSample EN = EN.textSample
textSample FR = FR.textSample
--textSample DE = DE.textSample
--textSample SP = SP.textSample
--textSample CH = CH.textSample
part :: (Eq p, Fractional p) => p -> p -> p
part 0 _ = 0
part _ 0 = 0
part x y = x / y
detectStopDefault :: Text -> Maybe Bool
detectStopDefault = undefined
toProba :: (Eq b, Fractional b, Functor t, Foldable t) =>
t (a, b) -> t (a, b)
toProba xs = map (\(a,b) -> (a, part b total)) xs
detectDefault :: [(Bool, Text)] -> Text -> Maybe Bool
detectDefault events = detectCat 99 (priorEvents events)
where
total = sum $ map snd xs
textSample :: Lang -> String
textSample EN = EN.textSample
textSample FR = FR.textSample
--textSample DE = DE.textSample
--textSample SP = SP.textSample
--textSample CH = CH.textSample
priorEvents events' = toEvents True [0..2] 10 (map (\(a,b) -> CatWord a (unpack $ toLower b)) events')
langWord :: Lang -> LangWord
langWord l = LangWord l (textSample l)
------------------------------------------------------------------------
detectCat :: Ord a => Int -> Events a -> Text -> Maybe a
detectCat n es = head . map fst . (detectCat' n es) . unpack
where
detectCat' :: Ord a => Int -> Events a -> String -> [(a, Double)]
detectCat' n' es' s = DL.reverse $ DL.sortOn snd
$ toList
$ detectWith n' es' (wordsToBook [0..2] n' s)
eventLang :: EventLang
eventLang = toEventLangs [0..2] [ langWord l | l <- allLangs ]
detect :: EventBook -> EventLang -> LangProba
detect (EventBook mapFreq _) el =
detectWith :: Ord a => Int -> Events a -> EventBook -> CatProb a
detectWith n'' el (EventBook mapFreq _) =
DM.unionsWith (+)
$ map DM.fromList
$ map (\(s,n) -> map (\(l,f) -> (l, (fromIntegral n) * f)) $ toPrior s el)
$ map (\(s,m) -> map (\(l,f) -> (l, (fromIntegral m) * f)) $ toPrior n'' s el)
$ filter (\x -> fst x /= " ")
$ DM.toList mapFreq
------------------------------------------------------------------------
-- | TODO: monoids
type EventLang = Map Lang EventBook
toEventLangs :: [Int] -> [LangWord] -> EventLang
toEventLangs ns = foldl' (opLang (+)) (emptyEventLang ns) . map (toLang ns)
emptyEventLang :: [Int] -> EventLang
emptyEventLang ns = toLang ns (LangWord FR "")
toLang :: [Int] -> LangWord -> EventLang
toLang ns (LangWord l txt) = DM.fromList [(l, wordsToBook ns txt)]
opLang :: (Freq -> Freq -> Freq) -> EventLang -> EventLang -> EventLang
opLang f = DM.unionWith (op f)
------------------------------------------------------------------------
-- | TODO: monoids (but proba >= 0)
peb :: String -> EventBook -> Double
peb st (EventBook mapFreq mapN) = (fromIntegral a) / (fromIntegral b)
-- | TODO: monoids (but proba >= 0)
toPrior :: Int -> String -> Events a -> [(a, Double)]
toPrior n'' s el = prior n'' $ pebLang s el
where
a = maybe 0 identity $ DM.lookup st mapFreq
b = maybe 1 identity $ DM.lookup (length st) mapN
pebLang :: String -> Events a -> [(a, (Freq,TotalFreq))]
pebLang st = map (\(l,eb) -> (l, peb st eb)) . DM.toList
peb' :: String -> EventBook -> (Freq, TotalFreq)
peb' st (EventBook mapFreq mapN) = (fromIntegral a, fromIntegral b)
peb :: String -> EventBook -> (Freq, TotalFreq)
peb st (EventBook mapFreq mapN) = (fromIntegral a, fromIntegral b)
where
a = maybe 0 identity $ DM.lookup st mapFreq
b = maybe 1 identity $ DM.lookup (length st) mapN
------------------------------------------------------------------------
toPrior :: String -> EventLang -> [(Lang, Double)]
toPrior s el = prior $ pebLang s el
pebLang :: String -> EventLang -> [(Lang, (Freq,TotalFreq))]
pebLang st = map (\(l,eb) -> (l, peb' st eb)) . DM.toList
------------------------------------------------------------------------
prior :: [(Lang, (Freq, TotalFreq))] -> [(Lang, Double)]
prior ps = zip ls $ zipWith (\x y -> x^(99::Int) * y) (map (\(a,_) -> part a (sum $ map fst ps')) ps')
prior :: Int -> [(a, (Freq, TotalFreq))] -> [(a, Double)]
prior i ps = zip ls $ zipWith (\x y -> x^i * y) (map (\(a,_) -> part a (sum $ map fst ps')) ps')
(map (\(a,b) -> a / b) ps')
where
(ls, ps'') = DL.unzip ps
ps' = map (both fromIntegral) ps''
part :: (Eq p, Fractional p) => p -> p -> p
part 0 _ = 0
part _ 0 = 0
part x y = x / y
{-
toProba :: (Eq b, Fractional b, Functor t, Foldable t) =>
t (a, b) -> t (a, b)
toProba xs = map (\(a,b) -> (a, part b total)) xs
where
total = sum $ map snd xs
-}
-- | TODO: monoids
toEvents :: Ord a => a -> [Int] -> Int -> [CatWord a] -> Events a
toEvents e ns n = foldl' (opEvent (+)) (emptyEvent e ns n) . map (toEvent ns n)
where
emptyEvent :: Ord a => a -> [Int] -> Int -> Events a
emptyEvent e' ns' n'= toEvent ns' n' (CatWord e' "")
toEvent :: Ord a => [Int] -> Int -> CatWord a -> Events a
toEvent ns'' n'' (CatWord l txt) = DM.fromList [(l, wordsToBook ns'' n'' txt)]
opEvent :: Ord a => (Freq -> Freq -> Freq) -> Events a -> Events a -> Events a
opEvent f = DM.unionWith (op f)
------------------------------------------------------------------------
------------------------------------------------------------------------
data EventBook = EventBook { events_freq :: Map String Freq
, events_n :: Map StringSize TotalFreq
}
deriving (Show)
emptyEventBook :: [Int] -> EventBook
emptyEventBook ns = wordToBook ns " "
emptyEventBook :: [Int] -> Int -> EventBook
emptyEventBook ns n = wordToBook ns n " "
wordsToBook :: [Int] -> String -> EventBook
wordsToBook ns txt = foldl' (op (+)) (emptyEventBook ns) eventsBook
wordsToBook :: [Int] -> Int -> String -> EventBook
wordsToBook ns n txt = foldl' (op (+)) (emptyEventBook ns n) eventsBook
where
ws = map unpack $ words $ pack txt
eventsBook = map (wordToBook ns) ws
eventsBook = map (wordToBook ns n) ws
wordToBook :: [Int] -> Word -> EventBook
wordToBook ns txt = EventBook ef en
wordToBook :: [Int] -> Int -> Word -> EventBook
wordToBook ns n txt = EventBook ef en
where
chks = allChunks' ns 10 txt
en = DM.fromList $ map (\(n,ns') -> (n, length ns')) $ zip ns chks
chks = allChunks ns n txt
en = DM.fromList $ map (\(n',ns') -> (n', length ns')) $ zip ns chks
ef = foldl' DM.union DM.empty $ map (occurrencesWith identity) chks
op :: (Freq -> Freq -> Freq) -> EventBook -> EventBook -> EventBook
......@@ -209,19 +179,49 @@ op f (EventBook ef1 en1)
------------------------------------------------------------------------
------------------------------------------------------------------------
allChunks :: [Int] -> Int -> String -> [[String]]
allChunks ns m st = map (\n -> chunks n m st) ns
-- | Chunks is the same function as splitBy in Context but for Strings,
-- not Text (without pack and unpack operations that are not needed).
chunks :: Int -> Int -> String -> [String]
chunks n m = DL.take m . filter (not . all (== ' '))
. chunkAlong (n+1) 1
. DL.concat
. DL.take 1000
. DL.repeat
. blanks
-- | String preparation
blanks :: String -> String
blanks [] = []
blanks xs = [' '] <> xs <> [' ']
{-
-- Some previous tests to be removed
--import GHC.Base (Functor)
--import Numeric.Probability.Distribution ((??))
--import qualified Numeric.Probability.Distribution as D
-- | Blocks increase the size of the word to ease computations
-- some border and unexepected effects can happen, need to be tested
blockOf :: Int -> String -> String
blockOf n = DL.concat . DL.take n . DL.repeat
-- * Make the distributions
makeDist :: [String] -> D.T Double String
makeDist = D.uniform . DL.concat . map (allChunks [0,2] 10)
makeDist = D.uniform . DL.concat . map (DL.concat . allChunks [0,2] 10)
stopDist :: D.T Double String
stopDist = makeDist stopList
stopDist = makeDist $ map show ([0..9]::[Int]) <> EN.stopList
candDist :: D.T Double String
candDist = makeDist candList
------------------------------------------------------------------------
sumProba :: Num a => D.T a String -> [Char] -> a
sumProba ds x = sum $ map ((~?) ds) $ allChunks [0,2] 10 $ map toLower x
sumProba ds x = sum $ map ((~?) ds) $ DL.concat $ allChunks [0,2] 10 $ map toLower x
-- | Get probability according a distribution
(~?) :: (Num prob, Eq a) => D.T prob a -> a -> prob
......@@ -236,93 +236,4 @@ candList :: [String]
candList = [ "france", "alexandre", "mael", "constitution"
, "etats-unis", "associes", "car", "train", "spam"]
stopList :: [String]
stopList = map show ([0..9]::[Int]) <> stopListWords
stopListWords :: [String]
stopListWords =
["a", "a's", "able", "about", "above", "according", "accordingly"
, "across", "actually", "after", "afterwards", "again", "against"
, "ain't", "all", "allow", "allows", "almost", "alone", "along"
, "already", "also", "although", "always", "am", "among", "amongst", "an"
, "analyze", "and", "another", "any", "anybody", "anyhow", "anyone"
, "anything", "anyway", "anyways", "anywhere", "apart", "appear"
, "apply", "appreciate", "appropriate", "are", "aren't", "around"
, "as", "aside", "ask", "asking", "associated", "at", "available"
, "away", "awfully", "b", "based", "be", "became", "because", "become"
, "becomes", "becoming", "been", "before", "beforehand", "behind"
, "being", "believe", "below", "beside", "besides", "best", "better"
, "between", "beyond", "both", "brief", "but", "by", "c", "c'mon", "c's"
, "came", "can", "can't", "cannot", "cant", "cause", "causes", "certain"
, "certainly", "changes", "clearly", "co", "com", "come", "comes"
, "common", "concerning", "consequently", "consider", "considering"
, "contain", "containing", "contains", "corresponding", "could"
, "couldn't", "course", "currently", "d", "definitely", "described"
, "despite", "detecting", "detects", "did", "didn't", "different", "do"
, "does", "doesn't", "doing", "don't", "done", "down", "downwards"
, "during", "e", "each", "edu", "eg", "eight", "either", "else"
, "elsewhere", "enough", "entirely", "especially", "et", "etc", "even"
, "ever", "every", "everybody", "everyone", "everything", "everywhere"
, "ex", "exactly", "example", "except", "f", "far", "few", "fifth"
, "find", "first", "five", "followed", "following", "follows", "for"
, "former", "formerly", "forth", "four", "from", "further", "furthermore"
, "g", "get", "gets", "getting", "gif", "given", "gives", "go", "goes"
, "going", "gone", "got", "gotten", "greetings", "h", "had", "hadn't"
, "happens", "hardly", "has", "hasn't", "have", "haven't", "having"
, "he", "he'd", "he'll", "he's", "hello", "help", "hence", "her"
, "here", "here's", "hereafter", "hereby", "herein", "hereupon", "hers"
, "herself", "hi", "him", "himself", "his", "hither", "hopefully", "how"
, "how's", "howbeit", "however", "i", "i'd", "i'll", "i'm", "i've"
, "identify", "ie", "if", "ignored", "immediate", "in", "inasmuch"
, "inc", "indeed", "indicate", "indicated", "indicates", "inner"
, "insofar", "instead", "into", "involves", "inward", "is", "isn't"
, "it", "it'd", "it'll", "it's", "its", "itself", "j", "just", "k"
, "keep", "keeps", "kept", "know", "known", "knows", "l", "last"
, "late", "lately", "later", "latter", "latterly", "least", "less"
, "lest", "let", "let's", "like", "liked", "likely", "little", "look"
, "looking", "looks", "ltd", "m", "main", "mainly", "many", "may"
, "maybe", "me", "mean", "meanwhile", "merely", "might", "min", "more"
, "moreover", "most", "mostly", "much", "must", "mustn't", "my", "myself"
, "n", "name", "namely", "nd", "near", "nearly", "necessary", "need"
, "needs", "neither", "never", "nevertheless", "new", "next", "nine"
, "no", "nobody", "non", "none", "noone", "nor", "normally", "not"
, "nothing", "novel", "now", "nowhere", "o", "obviously", "of", "off"
, "often", "oh", "ok", "okay", "old", "on", "once", "one", "ones"
, "only", "onto", "or", "other", "others", "otherwise", "ought", "our"
, "ours", "ourselves", "out", "outside", "over", "overall", "own", "p"
, "particular", "particularly", "per", "perhaps", "placed", "please"
, "plus", "possible", "presents", "presumably", "probably", "provides"
, "q", "que", "quite", "qv", "r", "rather", "rd", "re", "really"
, "reasonably", "regarding", "regardless", "regards", "relatively"
, "respectively", "right", "s", "said", "same", "saw", "say", "saying"
, "says", "sds", "second", "secondly", "see", "seeing", "seem", "seemed"
, "seeming", "seems", "seen", "self", "selves", "sensible", "sent"
, "serious", "seriously", "seven", "several", "shall", "shan't"
, "she", "she'd", "she'll", "she's", "should", "shouldn't", "since"
, "six", "so", "some", "somebody", "somehow", "someone", "something"
, "sometime", "sometimes", "somewhat", "somewhere", "soon", "sorry"
, "specified", "specify", "specifying", "still", "sub", "such", "sup"
, "sure", "t", "t's", "take", "taken", "tell", "tends", "th", "than"
, "thank", "thanks", "thanx", "that", "that's", "thats", "the", "their"
, "theirs", "them", "themselves", "then", "thence", "there", "there's"
, "thereafter", "thereby", "therefore", "therein", "theres", "thereupon"
, "these", "they", "they'd", "they'll", "they're", "they've", "think"
, "third", "this", "thorough", "thoroughly", "those", "though", "three"
, "through", "throughout", "thru", "thus", "to", "together", "too"
, "took", "toward", "towards", "tried", "tries", "truly", "try"
, "trying", "twice", "two", "u", "un", "under", "unfortunately"
, "unless", "unlikely", "until", "unto", "up", "upon", "us", "use"
, "used", "useful", "uses", "using", "usually", "uucp", "v", "value"
, "various", "very", "via", "viz", "vs", "w", "want", "wants", "was"
, "wasn't", "way", "we", "we'd", "we'll", "we're", "we've", "welcome"
, "well", "went", "were", "weren't", "what", "what's", "whatever", "when"
, "when's", "whence", "whenever", "where", "where's", "whereafter"
, "whereas", "whereby", "wherein", "whereupon", "wherever", "whether"
, "which", "while", "whither", "who", "who's", "whoever", "whole", "whom"
, "whose", "why", "why's", "will", "willing", "wish", "with", "within"
, "without", "won't", "wonder", "would", "wouldn't", "x", "y", "yes"
, "yet", "you", "you'd", "you'll", "you're", "you've", "your", "yours"
, "yourself", "yourselves", "z", "zero"]
--}
......@@ -36,7 +36,7 @@ import GHC.Real (round)
import Data.Text (Text)
import NLP.RAKE.Text
import Gargantext.Text.Terms.Stop (stopList)
import Gargantext.Text.Samples.EN (stopList)
import Gargantext.Prelude
select :: Double -> [a] -> [a]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment