Removed some more dead code and modules

e69e0599 · Grégoire Locqueville · 52110ac8 · e69e0599 · e69e0599 · e69e0599
Commit e69e0599 authored Dec 16, 2024 by Grégoire Locqueville
14 changed files
--- a/gargantext.cabal
+++ b/gargantext.cabal
@@ -217,7 +217,6 @@ library
      Gargantext.Core.Text.List.Group.WithStem
      Gargantext.Core.Text.List.Social
      Gargantext.Core.Text.Metrics
-      Gargantext.Core.Text.Metrics.CharByChar
      Gargantext.Core.Text.Metrics.Count
      Gargantext.Core.Text.Metrics.TFICF
      Gargantext.Core.Text.Ngrams
@@ -376,7 +375,6 @@ library
      Gargantext.Core.Text.List.Social.Prelude
      Gargantext.Core.Text.Metrics.FrequentItemSet
      Gargantext.Core.Text.Metrics.SpeGen.IncExc
-      Gargantext.Core.Text.Metrics.Utils
      Gargantext.Core.Text.Samples.EN
      Gargantext.Core.Text.Terms.Mono.Token.En
      Gargantext.Core.Text.Terms.Multi.Group
@@ -815,7 +813,6 @@ test-suite garg-test-tasty
      Test.Ngrams.Lang.En
      Test.Ngrams.Lang.Fr
      Test.Ngrams.Lang.Occurrences
-      Test.Ngrams.Metrics
      Test.Ngrams.NLP
      Test.Ngrams.Query
      Test.Ngrams.Query.PaginationCorpus

--- a/src/Gargantext/API/Metrics.hs
+++ b/src/Gargantext/API/Metrics.hs
@@ -25,7 +25,7 @@ import Gargantext.API.Ngrams.Types (QueryParamR, TabType, ngramsTypeFromTabType,
 import Gargantext.API.Prelude (IsGargServer)
 import Gargantext.API.Routes.Named.Metrics qualified as Named
 import Gargantext.Core.NodeStory (HasNodeStory)
-import Gargantext.Core.Text.Metrics (Scored(..), {-normalizeGlobal,-} normalizeLocal)
+import Gargantext.Core.Text.Metrics (Scored(..),  normalizeLocal)
 import Gargantext.Core.Types (CorpusId, ListId, ListType(..))
 import Gargantext.Core.Types.Query (Limit)
 import Gargantext.Core.Viz.Chart (chartData, histoData, treeData)

--- a/src/Gargantext/Core/Text/Metrics.hs
+++ b/src/Gargantext/Core/Text/Metrics.hs
@@ -58,17 +58,6 @@ instance Semigroup a => Semigroup (Scored a) where
               (b <> b')
               (c <> c')
-localMetrics' :: Ord t => Map (t,t) Int -> Map t (Vec.Vector Double)
-localMetrics' m = Map.fromList $ zipWith (\(_,t) (inc,spe) -> (t, Vec.fromList [inc,spe]))
-                                         (Map.toList fi)
-                                          scores
-  where
-    (ti, fi) = createIndices m
-    (is, ss) = incExcSpeGen $ cooc2mat Triangle ti m
-    scores   = DAA.toList
-             $ DAA.run
-             $ DAA.zip (DAA.use is) (DAA.use ss)
 -- TODO Code to be removed below
 -- TODO in the textflow we end up needing these indices , it might be
 -- better to compute them earlier and pass them around.

--- a/src/Gargantext/Core/Text/Metrics/CharByChar.hs
+++ b/src/Gargantext/Core/Text/Metrics/CharByChar.hs
-{-|
-Module      : Gargantext.Core.Text.Metrics.CharByChar
-Description : All parsers of Gargantext in one file.
-Copyright   : (c) CNRS, 2017 - present
-License     : AGPL + CECILL v3
-Maintainer  : team@gargantext.org
-Stability   : experimental
-Portability : POSIX
-Mainly reexport functions in @Data.Text.Metrics@
-}
-module Gargantext.Core.Text.Metrics.CharByChar (levenshtein
-                                      , levenshteinNorm
-                                      , damerauLevenshtein
-                                      , damerauLevenshteinNorm
-                                      , overlap
-                                      , jaccard
-                                      , hamming
-                                      ) where
-import Data.Text.Metrics qualified as DTM
-import Gargantext.Prelude
--noApax :: Ord a => Map a Occ -> Map a Occ
--noApax m = M.filter (>1) m
-{- * Example de titre
-}
-- | This module provide metrics to compare Text
-- starting as an API rexporting main functions of the great lib
-- text-metrics of Mark Karpov
-- | Levenshtein Similarity
-- In information theory, Linguistics and computer science, 
-- the Levenshtein distance is a string metric for measuring 
-- the difference between two sequences.
-- See: https://en.wikipedia.org/wiki/Levenshtein_distance
--
-levenshtein :: Text -> Text -> Int
-levenshtein = DTM.levenshtein
-- | Return normalized Levenshtein distance between two 'Text' values.
-- Result is a non-negative rational number (represented as @'Ratio'
-- 'Data.Numeric.Natural'@), where 0 signifies no similarity between the
-- strings, while 1 means exact match.
--
-levenshteinNorm :: Text -> Text -> Ratio Int
-levenshteinNorm = DTM.levenshteinNorm
-- | Return Damerau-Levenshtein distance between two 'Text' values. The 
-- function works like 'levenshtein', but the collection of allowed     
-- operations also includes transposition of two /adjacent/ characters. 
-- See also:                                                            
-- <https://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance> 
--
-damerauLevenshtein :: Text -> Text -> Int
-damerauLevenshtein = DTM.damerauLevenshtein
-- damerau-Levenshtein distance normalized
--
-damerauLevenshteinNorm :: Text -> Text -> Ratio Int
-damerauLevenshteinNorm = DTM.damerauLevenshteinNorm
-- Treating inputs like sets
-- | Return overlap coefficient for two 'Text' values. Returned value   
-- is in the range from 0 (no similarity) to 1 (exact match). Return 1  
-- if both 'Text' values are empty.                                     
--
-- See also: <https://en.wikipedia.org/wiki/Overlap_coefficient>.
-overlap :: Text -> Text -> Ratio Int
-overlap = DTM.overlap
-- | Jaccard distance
-- measures dissimilarity between sample sets
-jaccard :: Text -> Text -> Ratio Int
-jaccard = DTM.jaccard
-- | Hamming Similarity
-- In information theory, the Hamming distance between two strings of
-- equal length is the number of positions at which the corresponding
-- symbols are different. In other words, it measures the minimum number of
-- substitutions required to change one string into the other
-- See:  https://en.wikipedia.org/wiki/Hamming_distance
-hamming :: Text -> Text -> Maybe Int
-hamming = DTM.hamming
--- a/src/Gargantext/Core/Text/Metrics/Count.hs
+++ b/src/Gargantext/Core/Text/Metrics/Count.hs
@@ -27,12 +27,10 @@ Source : https://en.wikipedia.org/wiki/Type%E2%80%93token_distinction#Occurrence
 module Gargantext.Core.Text.Metrics.Count
  where
-import Control.Arrow (Arrow(..), (***))
 import Data.List qualified as List
-import Data.Map.Strict  ( empty, singleton , insertWith, unionWith, unionsWith , mapKeys )
+import Data.Map.Strict  ( empty, insertWith )
 import Data.Map.Strict qualified as DMS
-import Data.Text (pack)
+import Gargantext.Core.Types (Stems)
-import Gargantext.Core.Types (Terms(..), Stems)
 import Gargantext.Prelude hiding (empty)
 ------------------------------------------------------------------------
@@ -45,73 +43,10 @@ data Group = ByStem | ByOntology
 type Grouped = Stems
-{-
-- >> let testData = ["blue lagoon", "blues lagoon", "red lagoon"]
-- >> map occurrences <$> Prelude.mapM (terms Mono EN) 
-- [fromList [(fromList ["blue"],1),(fromList ["lagoon"],1)],fromList [(fromList ["blue"],1),(fromList ["lagoon"],1)],fromList [(fromList ["lagoon"],1),(fromList ["red"],1)]]
--λ:   cooc <$> Prelude.map occurrences <$> Prelude.mapM (terms Mono EN) ["blue lagoon", "blues lagoon", "red lagoon"]
--fromList [((fromList ["blue"],fromList ["lagoon"]),2),((fromList ["lagoon"],fromList ["red"]),1)]
--λ:   cooc <$> Prelude.map occurrences <$> Prelude.mapM (terms Mono EN) ["blue lagoon", "blues lagoon", "red lagoon", "red lagoon"]
--fromList [((fromList ["blue"],fromList ["lagoon"]),2),((fromList ["lagoon"],fromList ["red"]),2)]
--λ:   cooc <$> Prelude.map occurrences <$> Prelude.mapM (terms Mono EN) ["blue lagoon", "blues lagoon", "red lagoon red lagoon", "red lagoon"]
--fromList [((fromList ["blue"],fromList ["lagoon"]),2),((fromList ["lagoon"],fromList ["red"]),2)]
--λ:   cooc <$> Prelude.map occurrences <$> Prelude.mapM (terms Mono EN) ["blue lagoon", "blues lagoon blues lagoon", "red lagoon red lagoon", "red lagoon"]
--fromList [((fromList ["blue"],fromList ["lagoon"]),2),((fromList ["lagoon"],fromList ["red"]),2)]
---- 
-           -}
 type Occs      = Int
 type Coocs     = Int
 type Threshold = Int
-removeApax :: Threshold -> Map ([Text], [Text]) Int -> Map ([Text], [Text]) Int
-removeApax t = DMS.filter (> t)
-cooc :: [[Terms]] -> Map ([Text], [Text]) Int
-cooc tss = coocOnWithLabel _terms_stem (useLabelPolicy label_policy) tss
-  where
-    terms_occs = occurrencesOn _terms_stem (List.concat tss)
-    label_policy = mkLabelPolicy terms_occs
-coocOnWithLabel :: (Ord label, Ord b) => (a -> b) -> (b -> label)
-                                      -> [[a]] -> Map (label, label) Coocs
-coocOnWithLabel on' policy tss = mapKeys (delta policy) $ coocOn on' tss
-  where
-    delta :: Arrow a => a b' c' -> a (b', b') (c', c')
-    delta f = f *** f
-mkLabelPolicy :: Map Grouped (Map Terms Occs) -> Map Grouped [Text]
-mkLabelPolicy = DMS.map f where
-  f = _terms_label . fst . maximumWith snd . DMS.toList
-     -- TODO use the Foldable instance of Map instead of building a list
-useLabelPolicy :: Map Grouped [Text] -> Grouped -> [Text]
-useLabelPolicy m g = case DMS.lookup g m of
-  Just label -> label
-  Nothing    -> panic $ "Label of Grouped not found: " <> (pack $ show g)
-                -- TODO: use a non-fatal error if this can happen in practice
-{-
-labelPolicy :: Map Grouped (Map Terms Occs) -> Grouped -> Label
-labelPolicy m g =  case _terms_label <$> fst <$> maximumWith snd <$> DMS.toList <$> lookup g m of
-                     Just label -> label
-                     Nothing    -> panic $ "Label of Grouped not found: " <> (pack $ show g)
-}
-coocOn :: Ord b => (a -> b) -> [[a]] -> Map (b, b) Int
-coocOn f as = DMS.unionsWith (+) $ map (coocOn' f) as
-coocOn' :: Ord b => (a -> b) -> [a] -> Map (b, b) Int
-coocOn' fun ts = DMS.fromListWith (+) xs
-  where
-      ts' = List.nub $ map fun ts
-      xs = [ ((x, y), 1)
-           | x <- ts'
-           , y <- ts'
-           , x >= y
-           ]
 ------------------------------------------------------------------------
 coocOnContexts :: (a -> [Text]) -> [[a]] -> Map ([Text], [Text]) Int
@@ -127,23 +62,9 @@ coocOnSingleContext fun ts = xs
           , x >= y
           ]
 ------------------------------------------------------------------------
-- | Compute the grouped occurrences (occ)
-occurrences :: [Terms] -> Map Grouped (Map Terms Int)
-occurrences = occurrencesOn _terms_stem
-occurrencesOn :: (Ord a, Ord b) => (a -> b) -> [a] -> Map b (Map a Int)
-occurrencesOn f = foldl' (\m a -> insertWith (unionWith (+)) (f a) (singleton a 1) m) empty
 occurrencesWith :: (Foldable list, Ord k, Num a, Show k, Show a, Show (list b)) => (b -> k) -> list b -> Map k a
 occurrencesWith f xs = trace (show (xs,m) :: Text)  m
  where
    m = foldl' (\x y -> insertWith (+) (f y) 1 x) empty xs
 -- TODO add groups and filter stops
-sumOcc :: Ord a => [Occ a] -> Occ a
-sumOcc xs = unionsWith (+) xs
--- a/src/Gargantext/Core/Text/Metrics/FrequentItemSet.hs
+++ b/src/Gargantext/Core/Text/Metrics/FrequentItemSet.hs
@@ -15,7 +15,6 @@ Domain Specific Language to manage Frequent Item Set (FIS)
 module Gargantext.Core.Text.Metrics.FrequentItemSet
  ( Fis, Size(..)
  , occ_hlcm, cooc_hlcm
-  , allFis, between
  , fisWithSize
  , fisWith
  , fisWithSizePoly
@@ -43,18 +42,6 @@ occ_hlcm = fisWithSize (Point 1)
 cooc_hlcm :: Frequency -> [[Item]] -> [Fis]
 cooc_hlcm = fisWithSize (Point 2)
-allFis :: Frequency -> [[Item]] -> [Fis]
-allFis = fisWith Nothing
------------------------------------------------------------------------
-between :: (Int, Int) -> Frequency -> [[Item]] -> [Fis]
-between (x,y) = fisWithSize (Segment x y)
--maximum :: Int -> Frequency -> [[Item]] -> [Fis]
--maximum m = between (0,m)
------------------------------------------------------------------------
 ------------------------------------------------------------------------
 -- | Data type to type the Frequent Item Set
 -- TODO replace List with Set in fisItemSet 

--- a/src/Gargantext/Core/Text/Metrics/TFICF.hs
+++ b/src/Gargantext/Core/Text/Metrics/TFICF.hs
@@ -20,14 +20,9 @@ module Gargantext.Core.Text.Metrics.TFICF
 , Total(..)
 , Count(..)
 , tficf
- , sortTficf
 )
 where
-import Data.List qualified as List
-import Data.Map.Strict (toList)
-import Data.Ord qualified as DO (Down(..))
-import Gargantext.Core.Types (Ordering(..))
 import Gargantext.Prelude hiding (Down, Ordering, toList)
 path :: Text
@@ -64,11 +59,3 @@ tficf (TficfInfra (Count ic) (Total it) )
                                               <> ", sc = " <> show sc
                                               <> ", st = " <> show st
 tficf _ _ = panicTrace $ "[ERR]" <> path <> "Undefined for these contexts"
-sortTficf :: Ordering
-          -> Map Text Double
-          -> [(Text, Double)]
-sortTficf Down = List.sortOn (DO.Down . snd) . toList
-sortTficf Up   = List.sortOn snd . toList
--- a/src/Gargantext/Core/Text/Metrics/Utils.hs
+++ b/src/Gargantext/Core/Text/Metrics/Utils.hs
-{-|
-Module      : Gargantext.Core.Text.Metrics.Utils
-Description : Some functions to count.
-Copyright   : (c) CNRS, 2017-Present
-License     : AGPL + CECILL v3
-Maintainer  : team@gargantext.org
-Stability   : experimental
-Portability : POSIX
-}
-module Gargantext.Core.Text.Metrics.Utils where
-import Data.List qualified as L
-import Data.Map.Strict qualified as DM
-import Gargantext.Prelude
-countElem :: (Ord k) => DM.Map k Int -> k -> DM.Map k Int
-countElem m e = DM.insertWith (+) e 1 m
-freq :: (Ord k) => [k] -> DM.Map k Int
-freq = foldl countElem DM.empty
-getMaxFromMap :: Ord a => Map a1 a -> [a1]
-getMaxFromMap m = go [] Nothing (DM.toList m)
-  where
-    go ks _        []           = ks
-    go ks Nothing  ((k,v):rest) = go (k:ks) (Just v) rest
-    go ks (Just u) ((k,v):rest)
-        | v < u     = go ks     (Just u) rest
-        | v > u     = go [k]    (Just v) rest
-        | otherwise = go (k:ks) (Just v) rest
-average :: [Double] -> Double
-average x = L.sum x / L.genericLength x
-average' :: [Int] -> Double
-average' x = (L.sum y) / (L.genericLength y) where
-    y = L.map fromIntegral x
--- a/src/Gargantext/Core/Text/Terms/Eleve.hs
+++ b/src/Gargantext/Core/Text/Terms/Eleve.hs
@@ -61,16 +61,8 @@ nan = 0 / 0
 noNaNs :: P.RealFloat e => [e] -> [e]
 noNaNs = filter (not . P.isNaN)
-updateIfDefined :: P.RealFloat e => e -> e -> e
-updateIfDefined e0 e | P.isNaN e = e0
-                     | otherwise = e
 sim :: Entropy e => e -> e -> Bool
 sim x y = x == y || (P.isNaN x && P.isNaN y)
-subst :: Entropy e => (e, e) -> e -> e
-subst (src, dst) x | sim src x = dst
-                   | otherwise = x
 ------------------------------------------------------------------------
 -- | TODO: Show Instance only used for debugging
@@ -169,14 +161,6 @@ toTree k (Node c e cs) = Tree.Node (k, c, Just e)  (map (uncurry toTree) $ Map.t
 normalizeLevel :: Entropy e => e -> e -> e -> e
 normalizeLevel m v e = (e - m) / v
-{- Unused
-nodeChildren :: Trie k e -> Map k (Trie k e)
-nodeChildren (Node _ _ cs) = cs
-nodeChildren (Leaf _)      = Map.empty
-}
 chunkAlongEleve :: Int -> [a] -> [[a]]
 chunkAlongEleve n xs = L.take n <$> L.tails xs
@@ -317,38 +301,6 @@ mayCons :: [a] -> [[a]] -> [[a]]
 mayCons [] xss = xss
 mayCons xs xss = xs : xss
-{-
-split :: (IsTrie trie, Entropy e) => Lens' i e -> trie Token i -> [Token] -> [[Token]]
-split _   _ [] = []
-split inE t (Terminal Start:xs) = split inE t xs
-split inE t (x0:xs0) = go [x0] xs0
-  where
-    go pref []                  = [pref]
-    go pref (Terminal Stop:_)   = [pref]
-    go _    (Terminal Start:_)  = panic "split impossible"
-    go pref (x:xs) =
-        -- trace (show (if acc then "ACC" else "CUT", (prefx, epxt), if acc then ">" else "<=", ((pref, ept), "+", ([x], ext)))) $
-        if acc
-          then go prefx xs
-          else mayCons pref $ go [x] xs
-      where
-        prefx = pref <> [x]
-        pt   = findTrie pref t
-        pxt  = findTrie prefx t
-        xt   = findTrie [x] t
-        ept  = ne pt
-    --  ^ entropy of the current prefix
-        ext  = ne xt
-    --  ^ entropy of [x]
-        epxt = ne pxt
-    --  ^ entropy of the current prefix plus x
-        acc  = P.isNaN ept || P.isNaN ext || not (P.isNaN epxt) -- && (epxt > mean [ept, ext])
-        -- aut(["in","this","paper"]) > aut(["in","this"]) + aut(["paper"])
-    ne = nodeEntropy inE
-}
 split :: Entropy e => Int -> Lens' i e -> Tries Token i -> [Token] -> [[Text]]
 split _ _   _ []  = []
 split _ _   _ [t] = pure <$> nonTerminals [t]
@@ -357,32 +309,8 @@ split n inE t ts  = nonTerminals pref `mayCons` split n inE t (drop (length pref
    pref = maximumWith (\ks -> nodeEntropy inE $ findTrie ks t)
                       (L.tail . L.inits . take n $ ts)
-{-
-split :: Entropy e => Lens' i e -> Tries Token i -> [Token] -> [[Token]]
-split inE t0 ts =
-  maximumWith (sum . map $ nodeAutonomy inE t0) (all the splits of ts)
-}
 ------------------------------------------------------------------------
-mainEleve :: Int -> [[Text]] -> [[[Text]]]
-mainEleve n x = mainEleve' n x x
-mainEleve' :: Int -> [[Text]] -> [[Text]] -> [[[Text]]]
-mainEleve' n x y = mainEleveWith x' n y
-  where
-    x' = buildTries n (fmap toToken x)
-  -- (fmap toToken i) is computed twice, since mainEleveWith is computing it too
-- | This function should take the longest possible chain of:
-- mainEleve'' n x y = maxChainSizeOf [ mainEleve' n x y
--                                    , mainEleve' n x x
--                                    , mainEleve' n y y
--                                    ]
-mainEleve'' :: Int -> [[Text]] -> [[Text]] -> [[[Text]]]
-mainEleve'' = undefined
 mainEleveWith :: Tries Token () -> Int -> [[Text]] -> [[[Text]]]
 mainEleveWith m n i = fmap (split n info_autonomy t) (fmap toToken i)
  where

--- a/src/Gargantext/Core/Text/Terms/Mono/Token/En.hs
+++ b/src/Gargantext/Core/Text/Terms/Mono/Token/En.hs
@@ -21,7 +21,6 @@ module Gargantext.Core.Text.Terms.Mono.Token.En
    , punctuation
    , finalPunctuation
    , initialPunctuation
-    , allPunctuation
    , contractions
    , negatives
    )
@@ -109,14 +108,6 @@ initialPunctuation x = E $ filter (not . T.null . unwrap) $
             | otherwise -> [ Right ps
                            , Right w ]
-- | Split tokens on transitions between punctuation and
-- non-punctuation characters. This tokenizer is not included in
-- defaultTokenizer pipeline because dealing with word-internal
-- punctuation is quite application specific.
-allPunctuation :: Tokenizer
-allPunctuation = E . map Right 
-                 . T.groupBy (\a b -> Char.isPunctuation a == Char.isPunctuation b) 
 -- | Split words ending in n't, and freeze n't 
 negatives :: Tokenizer
 negatives x | "n't" `T.isSuffixOf` x = E [ Right . T.reverse . T.drop 3 . T.reverse $ x

--- a/src/Gargantext/Core/Text/Terms/Multi/PosTagging.hs
+++ b/src/Gargantext/Core/Text/Terms/Multi/PosTagging.hs
@@ -21,9 +21,6 @@ Source: https://en.wikipedia.org/wiki/Part-of-speech_tagging
 {-# OPTIONS_GHC -fno-warn-deprecations #-}
-{-# LANGUAGE TemplateHaskell   #-}
-{-# LANGUAGE TypeOperators     #-}
 module Gargantext.Core.Text.Terms.Multi.PosTagging where
 import Data.Aeson
@@ -39,9 +36,7 @@ import Gargantext.Prelude hiding (ByteString, toLower)
 import Network.HTTP.Simple
 import Network.URI (URI(..))
-- import qualified Gargantext.Utils.SpacyNLP as SpacyNLP
------------------------------------------------------------------------
 ------------------------------------------------------------------------
 tokens2tokensTags :: [Token] -> [TokenTag]
 tokens2tokensTags ts = filter' $ map tokenTag ts
@@ -132,23 +127,3 @@ corenlp :: URI -> Lang -> Text -> IO PosSentences
 corenlp uri lang txt = do
  response <- corenlp' uri lang txt
  pure (getResponseBody response)
-- | parseWith
-- Part Of Speech example
-- parseWith  _tokenPos    "Hello world."
-- == [[("``","``"),("Hello","UH"),("world","NN"),(".","."),("''","''")]]
-- Named Entity Recognition example
-- parseWith  _tokenNer     "Hello world of Peter."
-- [[("``","O"),("Hello","O"),("world","O"),("of","O"),("Peter","PERSON"),(".","O"),("''","O")]]
-tokenWith :: URI -> (Token -> t) -> Lang -> Text -> IO [[(Text, t)]]
-tokenWith uri f lang s = map (map (\t -> (_tokenWord t, f t)))
-                  <$> map _sentenceTokens
-                  <$> _sentences
-                  <$> corenlp uri lang s
----------------------------------------------------------------------------------
-- Here connect to the JohnSnow Server as it has been done above with the corenlp'
-- We need the PosTagging according to the language and the lems
-serverNLP :: Lang -> Text -> IO PosSentences
-serverNLP = undefined
--- a/src/Gargantext/Core/Text/Terms/Multi/RAKE.hs
+++ b/src/Gargantext/Core/Text/Terms/Multi/RAKE.hs
@@ -28,18 +28,13 @@ list quality in time.
 -}
-module Gargantext.Core.Text.Terms.Multi.RAKE (multiterms_rake, select, hardStopList)
+module Gargantext.Core.Text.Terms.Multi.RAKE (multiterms_rake, hardStopList)
  where
 import Gargantext.Core.Text.Samples.EN (stopList)
 import Gargantext.Prelude
 import NLP.RAKE.Text
-select :: Double -> [a] -> [a]
-select part ns = take n ns
-  where
-    n = round $ part * (fromIntegral $ length ns)
 multiterms_rake :: Text -> [WordScore]
 multiterms_rake = candidates hardStopList

--- a/src/Gargantext/Database/Action/Flow/Pairing.hs
+++ b/src/Gargantext/Database/Action/Flow/Pairing.hs
@@ -24,11 +24,11 @@ import Data.HashMap.Strict qualified as HashMap
 import Data.List qualified as List
 import Data.Set qualified as Set
 import Data.Text qualified as Text
+import Data.Text.Metrics (levenshtein)
 import Gargantext.API.Ngrams.Tools ( filterListWithRoot, getRepo, groupNodesByNgrams, mapTermListRoot )
 import Gargantext.API.Ngrams.Types (NgramsTerm(..))
 import Gargantext.Core ( HasDBid(toDBid) )
 import Gargantext.Core.NodeStory.Types ( HasNodeStory )
-import Gargantext.Core.Text.Metrics.CharByChar (levenshtein)
 import Gargantext.Core.Text.Ngrams (NgramsType(..))
 import Gargantext.Core.Types (TableResult(..))
 import Gargantext.Core.Types.Main ( ListType(CandidateTerm, MapTerm) )

--- a/test/Test/Ngrams/Metrics.hs
+++ b/test/Test/Ngrams/Metrics.hs
-{-|
-Module      : Ngrams.Metrics
-Description : 
-Copyright   : Ngrams.Metrics (c)
-License     : AGPL + CECILL v3
-Maintainer  : team@gargantext.org
-Stability   : experimental
-Portability : POSIX
-Here is a longer description of this module, containing some
-commentary with @some markup@.
-}
-{-# OPTIONS_GHC -fno-warn-orphans #-}
-{-# LANGUAGE CPP                  #-}
--module Ngrams.Metrics (main) where
-module Test.Ngrams.Metrics  where
-{-
-import           Data.Text (Text)
-import qualified Data.Text as T
-import           Data.Ratio
-import Test.Hspec
-import Test.QuickCheck
-import Gargantext.Prelude
-import Gargantext.Text.Metrics
-#if !MIN_VERSION_base(4,8,0)
-import Control.Applicative
-#endif
-instance Arbitrary Text where
-  arbitrary = T.pack <$> arbitrary
-main :: IO ()
-main = hspec spec
-spec :: Spec
-spec = do
-  describe "levenshtein" $ do
-    testSwap levenshtein
-    context "with concrete examples" $ do
-      testPair levenshtein "kitten"   "sitting" 3
-      testPair levenshtein "cake"     "drake"   2
-      testPair levenshtein "saturday" "sunday"  3
-      testPair levenshtein "red"      "wax"     3
-#if __GLASGOW_HASKELL__ >= 710
-      testPair levenshtein "a😀c"     "abc"     1
-#endif
-      testPair levenshtein "lucky"    "lucky"   0
-      testPair levenshtein ""         ""        0
-  describe "levenshteinNorm" $ do
-    testSwap levenshteinNorm
-    testPair levenshteinNorm "kitten"   "sitting" (4 % 7)
-    testPair levenshteinNorm "cake"     "drake"   (3 % 5)
-    testPair levenshteinNorm "saturday" "sunday"  (5 % 8)
-    testPair levenshteinNorm "red"      "wax"     (0 % 1)
-#if __GLASGOW_HASKELL__ >= 710
-    testPair levenshteinNorm "a😀c"     "abc"     (2 % 3)
-#endif
-    testPair levenshteinNorm "lucky"    "lucky"   (1 % 1)
-    testPair levenshteinNorm ""         ""        (1 % 1)
-  describe "damerauLevenshtein" $ do
-    testSwap damerauLevenshtein
-    testPair damerauLevenshtein "veryvery long" "very long" 4
-    testPair damerauLevenshtein "thing"         "think"     1
-    testPair damerauLevenshtein "nose"          "ones"      2
-    testPair damerauLevenshtein "thing"         "sign"      3
-    testPair damerauLevenshtein "red"           "wax"       3
-#if __GLASGOW_HASKELL__ >= 710
-    testPair damerauLevenshtein "a😀c"          "abc"       1
-#endif
-    testPair damerauLevenshtein "lucky"         "lucky"     0
-    testPair damerauLevenshtein ""              ""          0
-  describe "damerauLevenshteinNorm" $ do
-    testSwap damerauLevenshteinNorm
-    testPair damerauLevenshteinNorm "veryvery long" "very long" (9 % 13)
-    testPair damerauLevenshteinNorm "thing"         "think"     (4 % 5)
-    testPair damerauLevenshteinNorm "nose"          "ones"      (1 % 2)
-    testPair damerauLevenshteinNorm "thing"         "sign"      (2 % 5)
-    testPair damerauLevenshteinNorm "red"           "wax"       (0 % 1)
-#if __GLASGOW_HASKELL__ >= 710
-    testPair damerauLevenshteinNorm "a😀c"          "abc"       (2 % 3)
-#endif
-    testPair damerauLevenshteinNorm "lucky"         "lucky"     (1 % 1)
-    testPair damerauLevenshteinNorm ""              ""          (1 % 1)
-  describe "hamming" $ do
-    testSwap hamming
-    testPair hamming "karolin" "kathrin" (Just 3)
-    testPair hamming "karolin" "kerstin" (Just 3)
-    testPair hamming "1011101" "1001001" (Just 2)
-    testPair hamming "2173896" "2233796" (Just 3)
-    testPair hamming "toned"   "roses"   (Just 3)
-    testPair hamming "red"     "wax"     (Just 3)
-#if __GLASGOW_HASKELL__ >= 710
-    testPair hamming "a😀c"    "abc"      (Just 1)
-#endif
-    testPair hamming "lucky"   "lucky"   (Just 0)
-    testPair hamming ""        ""        (Just 0)
-    testPair hamming "small"   "big"     Nothing
-  describe "overlap" $ do
-    testSwap overlap
-    testPair overlap "fly"     "butterfly" (1 % 1)
-    testPair overlap "night"   "nacht"     (3 % 5)
-    testPair overlap "context" "contact"   (5 % 7)
-    testPair overlap "red"     "wax"       (0 % 1)
-#if __GLASGOW_HASKELL__ >= 710
-    testPair overlap "a😀c"   "abc"   (2 % 3)
-#endif
-    testPair overlap "lucky" "lucky" (1 % 1)
-  describe "jaccard" $ do
-    testSwap jaccard
-    testPair jaccard "xxx"     "xyx"     (1 % 2)
-    testPair jaccard "night"   "nacht"   (3 % 7)
-    testPair jaccard "context" "contact" (5 % 9)
-#if __GLASGOW_HASKELL__ >= 710
-    testPair overlap "a😀c"     "abc"     (2 % 3)
-#endif
-    testPair jaccard "lucky"   "lucky"   (1 % 1)
-- | Test that given function returns the same results when order of
-- arguments is swapped.
-testSwap :: (Eq a, Show a) => (Text -> Text -> a) -> SpecWith ()
-testSwap f = context "if we swap the arguments" $
-  it "produces the same result" $
-    property $ \a b -> f a b === f b a
-- | Create spec for given metric function applying it to two 'Text' values
-- and comparing the result with expected one.
-testPair :: (Eq a, Show a)
-  => (Text -> Text -> a) -- ^ Function to test
-  -> Text              -- ^ First input
-  -> Text              -- ^ Second input
-  -> a                 -- ^ Expected result
-  -> SpecWith ()
-testPair f a b r = it ("‘" <> T.unpack a <> "’ and ‘" <> T.unpack b <> "’") $
-  f a b `shouldBe` r
-}