Commit c9abf005 authored by Grégoire Locqueville's avatar Grégoire Locqueville

Removed some more dead code

parent ec1b4dd1
Pipeline #7142 canceled with stages
in 43 minutes and 6 seconds
...@@ -377,12 +377,7 @@ library ...@@ -377,12 +377,7 @@ library
Gargantext.Core.Text.Metrics.FrequentItemSet Gargantext.Core.Text.Metrics.FrequentItemSet
Gargantext.Core.Text.Metrics.SpeGen.IncExc Gargantext.Core.Text.Metrics.SpeGen.IncExc
Gargantext.Core.Text.Metrics.Utils Gargantext.Core.Text.Metrics.Utils
Gargantext.Core.Text.Samples.DE
Gargantext.Core.Text.Samples.EN Gargantext.Core.Text.Samples.EN
Gargantext.Core.Text.Samples.ES
Gargantext.Core.Text.Samples.FR
Gargantext.Core.Text.Samples.PL
Gargantext.Core.Text.Samples.ZH
Gargantext.Core.Text.Terms.Mono.Token.En Gargantext.Core.Text.Terms.Mono.Token.En
Gargantext.Core.Text.Terms.Multi.Group Gargantext.Core.Text.Terms.Multi.Group
Gargantext.Core.Text.Terms.Multi.PosTagging Gargantext.Core.Text.Terms.Multi.PosTagging
......
...@@ -195,10 +195,6 @@ mkNgramsElement :: NgramsTerm ...@@ -195,10 +195,6 @@ mkNgramsElement :: NgramsTerm
mkNgramsElement ngrams list' rp children = mkNgramsElement ngrams list' rp children =
NgramsElement ngrams (size (unNgramsTerm ngrams)) list' mempty (_rp_root <$> rp) (_rp_parent <$> rp) children NgramsElement ngrams (size (unNgramsTerm ngrams)) list' mempty (_rp_root <$> rp) (_rp_parent <$> rp) children
newNgramsElement :: Maybe ListType -> NgramsTerm -> NgramsElement
newNgramsElement mayList ngrams =
mkNgramsElement ngrams (fromMaybe MapTerm mayList) Nothing mempty
instance ToSchema NgramsElement where instance ToSchema NgramsElement where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_ne_") declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_ne_")
...@@ -467,9 +463,6 @@ newtype NgramsTablePatch = NgramsTablePatch (PatchMap NgramsTerm NgramsPatch) ...@@ -467,9 +463,6 @@ newtype NgramsTablePatch = NgramsTablePatch (PatchMap NgramsTerm NgramsPatch)
deriving stock (Eq, Show, Generic) deriving stock (Eq, Show, Generic)
deriving newtype (ToJSON, FromJSON, Semigroup, Monoid, Validity, Transformable) deriving newtype (ToJSON, FromJSON, Semigroup, Monoid, Validity, Transformable)
mkNgramsTablePatch :: Map NgramsTerm NgramsPatch -> NgramsTablePatch
mkNgramsTablePatch = NgramsTablePatch . PM.fromMap
instance FromField NgramsTablePatch instance FromField NgramsTablePatch
where where
fromField = fromJSONField fromField = fromJSONField
......
...@@ -30,14 +30,7 @@ import Data.String (String) ...@@ -30,14 +30,7 @@ import Data.String (String)
import Data.Text (pack, unpack, toLower) import Data.Text (pack, unpack, toLower)
import Data.Tuple.Extra (both) import Data.Tuple.Extra (both)
import GHC.Generics import GHC.Generics
import Gargantext.Core (Lang(..), allLangs)
import Gargantext.Core.Text.Metrics.Count (occurrencesWith) import Gargantext.Core.Text.Metrics.Count (occurrencesWith)
import Gargantext.Core.Text.Samples.DE qualified as DE
import Gargantext.Core.Text.Samples.EN qualified as EN
import Gargantext.Core.Text.Samples.ES qualified as ES
import Gargantext.Core.Text.Samples.FR qualified as FR
import Gargantext.Core.Text.Samples.PL qualified as PL
import Gargantext.Core.Text.Samples.ZH qualified as ZH
import Gargantext.Core.Text.Terms.Mono (words) import Gargantext.Core.Text.Terms.Mono (words)
import Gargantext.Database.GargDB import Gargantext.Database.GargDB
import Gargantext.Prelude hiding (Word, toList, toLower, words) import Gargantext.Prelude hiding (Word, toList, toLower, words)
...@@ -73,20 +66,6 @@ instance (Serialise a, Ord a) => ReadFile (Events a) where ...@@ -73,20 +66,6 @@ instance (Serialise a, Ord a) => ReadFile (Events a) where
readFile' filepath = deserialise <$> BSL.readFile filepath readFile' filepath = deserialise <$> BSL.readFile filepath
------------------------------------------------------------------------ ------------------------------------------------------------------------
detectStopDefault :: Text -> Maybe Bool
detectStopDefault = undefined
detectBool :: [(Bool, Text)] -> Text -> Maybe Bool
detectBool events = detectDefault False events
detectDefault :: Ord a => a -> [(a, Text)] -> Text -> Maybe a
detectDefault = detectDefaultWith identity
detectDefaultWith :: Ord a => (b -> Text) -> a -> [(a, b)] -> b -> Maybe a
detectDefaultWith f d events = detectDefaultWithPriors f ps
where
ps = priorEventsWith f d events
detectDefaultWithPriors :: Ord b => (a -> Text) -> Events b -> a -> Maybe b detectDefaultWithPriors :: Ord b => (a -> Text) -> Events b -> a -> Maybe b
detectDefaultWithPriors f priors = detectCat 99 priors . f detectDefaultWithPriors f priors = detectCat 99 priors . f
...@@ -95,28 +74,6 @@ priorEventsWith f d e = toEvents d [0..2] 10 es ...@@ -95,28 +74,6 @@ priorEventsWith f d e = toEvents d [0..2] 10 es
where where
es = map (\(a,b) -> CatWord a (unpack $ toLower $ f b)) e es = map (\(a,b) -> CatWord a (unpack $ toLower $ f b)) e
------------------------------------------------------------------------
detectLangDefault :: Text -> Maybe Lang
detectLangDefault = detectCat 99 eventLang
where
eventLang :: Events Lang
eventLang = toEvents FR [0..2] 10 [ langWord l | l <- allLangs ]
langWord :: Lang -> CatWord Lang
langWord l = CatWord l (textSample l)
textSample :: Lang -> String
textSample EN = EN.textSample
textSample FR = FR.textSample
textSample DE = DE.textSample
textSample ES = ES.textSample
textSample ZH = ZH.textSample
textSample PL = PL.textSample
textSample _ = panic "[G.C.T.L:detectLangDefault] not impl yet"
--textSample DE = DE.textSample
--textSample SP = SP.textSample
--textSample CH = CH.textSample
------------------------------------------------------------------------ ------------------------------------------------------------------------
detectCat :: Ord a => Int -> Events a -> Text -> Maybe a detectCat :: Ord a => Int -> Events a -> Text -> Maybe a
detectCat n es = head . map fst . (detectCat' n es) . unpack detectCat n es = head . map fst . (detectCat' n es) . unpack
......
...@@ -79,11 +79,6 @@ tsvDecodeOptions :: DecodeOptions ...@@ -79,11 +79,6 @@ tsvDecodeOptions :: DecodeOptions
tsvDecodeOptions = (defaultDecodeOptions tsvDecodeOptions = (defaultDecodeOptions
{decDelimiter = fromIntegral $ ord tsvListFieldDelimiter} {decDelimiter = fromIntegral $ ord tsvListFieldDelimiter}
) )
tsvEncodeOptions :: EncodeOptions
tsvEncodeOptions = ( defaultEncodeOptions
{encDelimiter = fromIntegral $ ord tsvListFieldDelimiter}
)
------------------------------------------------------------------------ ------------------------------------------------------------------------
fromTsvListFile :: FilePath -> IO (Header, Vector TsvList) fromTsvListFile :: FilePath -> IO (Header, Vector TsvList)
fromTsvListFile fp = do fromTsvListFile fp = do
...@@ -92,7 +87,3 @@ fromTsvListFile fp = do ...@@ -92,7 +87,3 @@ fromTsvListFile fp = do
Left e -> panicTrace (pack e) Left e -> panicTrace (pack e)
Right tsvList -> pure tsvList Right tsvList -> pure tsvList
------------------------------------------------------------------------ ------------------------------------------------------------------------
toTsvListFile :: FilePath -> (Header, Vector TsvList) -> IO ()
toTsvListFile fp (h, vs) = BL.writeFile fp $
encodeByNameWith tsvEncodeOptions h (V.toList vs)
------------------------------------------------------------------------
...@@ -96,12 +96,6 @@ toGroupedTreeInstitutes' institutesTree m = case HashMap.lookup Nothing m of ...@@ -96,12 +96,6 @@ toGroupedTreeInstitutes' institutesTree m = case HashMap.lookup Nothing m of
Nothing -> mempty Nothing -> mempty
Just m' -> toGroupedTreeInstitutes'' m m' institutesTree Just m' -> toGroupedTreeInstitutes'' m m' institutesTree
filterGroupedTree :: (GroupedTreeScores a -> Bool)
-> HashMap Parent (GroupedTreeScores a)
-> HashMap Parent (GroupedTreeScores a)
filterGroupedTree f = HashMap.filter f
toGroupedTree'' :: Eq a => HashMap (Maybe Parent) (HashMap NgramsTerm (GroupedTreeScores a)) toGroupedTree'' :: Eq a => HashMap (Maybe Parent) (HashMap NgramsTerm (GroupedTreeScores a))
-> (HashMap NgramsTerm (GroupedTreeScores a)) -> (HashMap NgramsTerm (GroupedTreeScores a))
-> HashMap Parent (GroupedTreeScores a) -> HashMap Parent (GroupedTreeScores a)
...@@ -153,4 +147,4 @@ toGroupedTreeInstitutes'' m notEmpty institutesTree ...@@ -153,4 +147,4 @@ toGroupedTreeInstitutes'' m notEmpty institutesTree
$ HashMap.lookup (Just key) dict' $ HashMap.lookup (Just key) dict'
) )
) )
val val
\ No newline at end of file
...@@ -21,7 +21,6 @@ module Gargantext.Core.Text.List.Social.Prelude ...@@ -21,7 +21,6 @@ module Gargantext.Core.Text.List.Social.Prelude
import Control.Lens import Control.Lens
import Data.HashMap.Strict (HashMap) import Data.HashMap.Strict (HashMap)
import Data.HashMap.Strict qualified as HashMap import Data.HashMap.Strict qualified as HashMap
import Data.Map.Strict qualified as Map
import Data.Map.Strict.Patch qualified as PatchMap import Data.Map.Strict.Patch qualified as PatchMap
import Data.Monoid import Data.Monoid
import Gargantext.API.Ngrams.Types import Gargantext.API.Ngrams.Types
...@@ -79,21 +78,6 @@ instance Monoid FlowListScores where ...@@ -79,21 +78,6 @@ instance Monoid FlowListScores where
-- | Tools to inherit groupings -- | Tools to inherit groupings
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | Tools -- | Tools
parentUnionsMerge :: (Ord a, Ord b, Num c, Hashable a, Hashable b)
=> [HashMap a (HashMap b c)]
-> HashMap a (HashMap b c)
parentUnionsMerge = HashMap.unionsWith (HashMap.unionWith (+))
-- This Parent union is specific
-- [Private, Shared, Public]
-- means the following preferences:
-- Private > Shared > Public
-- if data have not been tagged privately, then use others tags
-- This unions behavior takes first key only and ignore others
parentUnionsExcl :: (Ord a, Hashable a)
=> [HashMap a b]
-> HashMap a b
parentUnionsExcl = HashMap.unions
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | Takes key with max value if and only if value > 0 -- | Takes key with max value if and only if value > 0
...@@ -118,9 +102,3 @@ keyWithMaxValue m = do ...@@ -118,9 +102,3 @@ keyWithMaxValue m = do
------------------------------------------------------------------------ ------------------------------------------------------------------------
unPatchMapToHashMap :: (Ord a, Hashable a) => PatchMap a b -> HashMap a b unPatchMapToHashMap :: (Ord a, Hashable a) => PatchMap a b -> HashMap a b
unPatchMapToHashMap = HashMap.fromList . PatchMap.toList unPatchMapToHashMap = HashMap.fromList . PatchMap.toList
unPatchMapToMap :: Ord a => PatchMap a b -> Map a b
unPatchMapToMap = Map.fromList . PatchMap.toList
unNgramsTablePatch :: NgramsTablePatch -> HashMap NgramsTerm NgramsPatch
unNgramsTablePatch (NgramsTablePatch p) = unPatchMapToHashMap p
{-|
Module : Gargantext.Core.Text.Samples.DE
Description : Sample of German Text
Copyright : (c) CNRS, 2017 - present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Source: Wikipedia
Page : text mining
-}
module Gargantext.Core.Text.Samples.DE where
import Data.String (String)
textSample :: String
textSample = "Text Mining, seltener auch Textmining, Text Data Mining oder Textual Data Mining, ist ein Bündel von Algorithmus-basierten Analyseverfahren zur Entdeckung von Bedeutungsstrukturen aus un- oder schwachstrukturierten Textdaten. Mit statistischen und linguistischen Mitteln erschließt Text-Mining-Software aus Texten Strukturen, die die Benutzer in die Lage versetzen sollen, Kerninformationen der verarbeiteten Texte schnell zu erkennen. Im Optimalfall liefern Text-Mining-Systeme Informationen, von denen die Benutzer zuvor nicht wissen, ob und dass sie in den verarbeiteten Texten enthalten sind. Bei zielgerichteter Anwendung sind Werkzeuge des Text Mining außerdem in der Lage, Hypothesen zu generieren, diese zu überprüfen und schrittweise zu verfeinern."
{-|
Module : Gargantext.Core.Text.Samples.ES
Description : Sample of Spanish Text
Copyright : (c) CNRS, 2017 - present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Source: Wikipedia
Page : text mining
-}
module Gargantext.Core.Text.Samples.ES where
import Data.String (String)
textSample :: String
textSample = "La minería de textos se refiere al proceso de derivar información nueva de textos. A comienzos de los años ochenta surgieron los primeros esfuerzos de minería de textos que necesitaban una gran cantidad de esfuerzo humano, pero los avances tecnológicos han permitido que esta área progrese de manera rápida en la última década. La minería de textos es un área multidisciplinar basada en la recuperación de información, minería de datos, aprendizaje automático, estadísticas y la lingüística computacional. Como la mayor parte de la información (más de un 80%) se encuentra actualmente almacenada como texto, se cree que la minería de textos tiene un gran valor comercial."
{-|
Module : Gargantext.Core.Text.Samples.FR
Description : Sample of French Text
Copyright : (c) CNRS, 2017 - present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Source: Wikipedia
Page : text mining
-}
module Gargantext.Core.Text.Samples.FR where
import Gargantext.Prelude ((<>))
import Data.String (String)
textSample :: String
textSample = "La fouille de textes ou « l'extraction de connaissances » dans les textes est une spécialisation de la fouille de données et fait partie du domaine de l'intelligence artificielle. Cette technique est souvent désignée sous l'anglicisme text mining. Elle désigne un ensemble de traitements informatiques consistant à extraire des connaissances selon un critère de nouveauté ou de similarité dans des textes produits par des humains pour des humains. Dans la pratique, cela revient à mettre en algorithme un modèle simplifié des théories linguistiques dans des systèmes informatiques d'apprentissage et de statistiques. Les disciplines impliquées sont donc la linguistique calculatoire, l'ingénierie des langues, l'apprentissage artificiel, les statistiques et l'informatique." <> "Je pense donc je suis."
{-|
Module : Gargantext.Core.Text.Samples.PL
Description : Sample of Polish Text
Copyright : (c) CNRS, 2017 - present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Source: Wikipedia
Page : text mining
-}
module Gargantext.Core.Text.Samples.PL where
import Data.String (String)
textSample :: String
textSample = "Text mining (eksploracja tekstu) – ogólna nazwa metod eksploracji danych służących do wydobywania danych z tekstu i ich późniejszej obróbki. Metody text mining stosowane są np. do statystycznego przetwarzania: artykułów prasowych, wiadomości poczty elektronicznej, otwartych odpowiedzi na pytania ankietowe, opisów dolegliwości, podawanych przez pacjentów, komentarzy do sesji giełdowych i zdarzeń dotyczące spółek, życiorysów zawodowych i listów motywacyjnych, tekstów reklamacji konsumenckich. Text mining może polegać na znalezieniu kluczowych fraz, zdań, które zostają następnie zakodowane pod postacią zmiennych numerycznych. Później stosuje się metody statystyki i eksploracji danych w celu odkrycia zależności pomiędzy zmiennymi. Ze względu na to, że powstające zmienne są zwykle nominalne, szczególnie użyteczna jest analiza koszykowa."
{-|
Module : Gargantext.Core.Text.Samples.ZH
Description : Sample of Chinese Text
Copyright : (c) CNRS, 2017 - present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Source: Wikipedia
Page : text mining
-}
module Gargantext.Core.Text.Samples.ZH where
import Data.String (String)
textSample :: String
textSample = "文本挖掘有时也被称为文字探勘、文本数据挖掘等,大致相当于文字分析,一般指文本处理过程中产生高质量的信息。高质量的信息通常通过分类和预测来产生,如模式识别。文本挖掘通常涉及输入文本的处理过程(通常进行分析,同时加上一些衍生语言特征以及消除杂音,随后插入到数据库中) ,产生结构化数据,并最终评价和解释输出。'高品质'的文本挖掘通常是指某种组合的相关性,新颖性和趣味性。典型的文本挖掘方法包括文本分类,文本聚类,概念/实体挖掘,生产精确分类,观点分析,文档摘要和实体关系模型(即,学习已命名实体之间的关系) 。 文本分析包括了信息检索、词典分析来研究词语的频数分布、模式识别、标签 注释、信息抽取,数据挖掘技术包括链接和关联分析、可视化和预测分析。本质上,首要的任务是,通过自然语言处理和分析方法,将文本转化为数据进行分析"
...@@ -10,7 +10,8 @@ roots = [ '^Main\.main$' ...@@ -10,7 +10,8 @@ roots = [ '^Main\.main$'
# Definitions whose name (or the name of the module they are in) suggests # Definitions whose name (or the name of the module they are in) suggests
# there is a good reason for them not to be included: "test", "example" # there is a good reason for them not to be included: "test", "example"
# Name begins with "test" or "trace": # Name begins with "example", test" or "trace":
, '.*\.example[^\.]*$'
, '.*\.test[^\.]*$' , '.*\.test[^\.]*$'
, '.*\.trace[^\.]*$' , '.*\.trace[^\.]*$'
# Module is named "Example": # Module is named "Example":
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment