Commit e69e0599 authored by Grégoire Locqueville's avatar Grégoire Locqueville

Removed some more dead code and modules

parent 52110ac8
Pipeline #7143 canceled with stages
......@@ -217,7 +217,6 @@ library
Gargantext.Core.Text.List.Group.WithStem
Gargantext.Core.Text.List.Social
Gargantext.Core.Text.Metrics
Gargantext.Core.Text.Metrics.CharByChar
Gargantext.Core.Text.Metrics.Count
Gargantext.Core.Text.Metrics.TFICF
Gargantext.Core.Text.Ngrams
......@@ -376,7 +375,6 @@ library
Gargantext.Core.Text.List.Social.Prelude
Gargantext.Core.Text.Metrics.FrequentItemSet
Gargantext.Core.Text.Metrics.SpeGen.IncExc
Gargantext.Core.Text.Metrics.Utils
Gargantext.Core.Text.Samples.EN
Gargantext.Core.Text.Terms.Mono.Token.En
Gargantext.Core.Text.Terms.Multi.Group
......@@ -815,7 +813,6 @@ test-suite garg-test-tasty
Test.Ngrams.Lang.En
Test.Ngrams.Lang.Fr
Test.Ngrams.Lang.Occurrences
Test.Ngrams.Metrics
Test.Ngrams.NLP
Test.Ngrams.Query
Test.Ngrams.Query.PaginationCorpus
......
......@@ -25,7 +25,7 @@ import Gargantext.API.Ngrams.Types (QueryParamR, TabType, ngramsTypeFromTabType,
import Gargantext.API.Prelude (IsGargServer)
import Gargantext.API.Routes.Named.Metrics qualified as Named
import Gargantext.Core.NodeStory (HasNodeStory)
import Gargantext.Core.Text.Metrics (Scored(..), {-normalizeGlobal,-} normalizeLocal)
import Gargantext.Core.Text.Metrics (Scored(..), normalizeLocal)
import Gargantext.Core.Types (CorpusId, ListId, ListType(..))
import Gargantext.Core.Types.Query (Limit)
import Gargantext.Core.Viz.Chart (chartData, histoData, treeData)
......
......@@ -58,17 +58,6 @@ instance Semigroup a => Semigroup (Scored a) where
(b <> b')
(c <> c')
localMetrics' :: Ord t => Map (t,t) Int -> Map t (Vec.Vector Double)
localMetrics' m = Map.fromList $ zipWith (\(_,t) (inc,spe) -> (t, Vec.fromList [inc,spe]))
(Map.toList fi)
scores
where
(ti, fi) = createIndices m
(is, ss) = incExcSpeGen $ cooc2mat Triangle ti m
scores = DAA.toList
$ DAA.run
$ DAA.zip (DAA.use is) (DAA.use ss)
-- TODO Code to be removed below
-- TODO in the textflow we end up needing these indices , it might be
-- better to compute them earlier and pass them around.
......
{-|
Module : Gargantext.Core.Text.Metrics.CharByChar
Description : All parsers of Gargantext in one file.
Copyright : (c) CNRS, 2017 - present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Mainly reexport functions in @Data.Text.Metrics@
-}
module Gargantext.Core.Text.Metrics.CharByChar (levenshtein
, levenshteinNorm
, damerauLevenshtein
, damerauLevenshteinNorm
, overlap
, jaccard
, hamming
) where
import Data.Text.Metrics qualified as DTM
import Gargantext.Prelude
--noApax :: Ord a => Map a Occ -> Map a Occ
--noApax m = M.filter (>1) m
{- * Example de titre
-}
-- | This module provide metrics to compare Text
-- starting as an API rexporting main functions of the great lib
-- text-metrics of Mark Karpov
-- | Levenshtein Similarity
-- In information theory, Linguistics and computer science,
-- the Levenshtein distance is a string metric for measuring
-- the difference between two sequences.
-- See: https://en.wikipedia.org/wiki/Levenshtein_distance
--
levenshtein :: Text -> Text -> Int
levenshtein = DTM.levenshtein
-- | Return normalized Levenshtein distance between two 'Text' values.
-- Result is a non-negative rational number (represented as @'Ratio'
-- 'Data.Numeric.Natural'@), where 0 signifies no similarity between the
-- strings, while 1 means exact match.
--
levenshteinNorm :: Text -> Text -> Ratio Int
levenshteinNorm = DTM.levenshteinNorm
-- | Return Damerau-Levenshtein distance between two 'Text' values. The
-- function works like 'levenshtein', but the collection of allowed
-- operations also includes transposition of two /adjacent/ characters.
-- See also:
-- <https://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance>
--
damerauLevenshtein :: Text -> Text -> Int
damerauLevenshtein = DTM.damerauLevenshtein
-- damerau-Levenshtein distance normalized
--
damerauLevenshteinNorm :: Text -> Text -> Ratio Int
damerauLevenshteinNorm = DTM.damerauLevenshteinNorm
-- Treating inputs like sets
-- | Return overlap coefficient for two 'Text' values. Returned value
-- is in the range from 0 (no similarity) to 1 (exact match). Return 1
-- if both 'Text' values are empty.
--
-- See also: <https://en.wikipedia.org/wiki/Overlap_coefficient>.
overlap :: Text -> Text -> Ratio Int
overlap = DTM.overlap
-- | Jaccard distance
-- measures dissimilarity between sample sets
jaccard :: Text -> Text -> Ratio Int
jaccard = DTM.jaccard
-- | Hamming Similarity
-- In information theory, the Hamming distance between two strings of
-- equal length is the number of positions at which the corresponding
-- symbols are different. In other words, it measures the minimum number of
-- substitutions required to change one string into the other
-- See: https://en.wikipedia.org/wiki/Hamming_distance
hamming :: Text -> Text -> Maybe Int
hamming = DTM.hamming
......@@ -27,12 +27,10 @@ Source : https://en.wikipedia.org/wiki/Type%E2%80%93token_distinction#Occurrence
module Gargantext.Core.Text.Metrics.Count
where
import Control.Arrow (Arrow(..), (***))
import Data.List qualified as List
import Data.Map.Strict ( empty, singleton , insertWith, unionWith, unionsWith , mapKeys )
import Data.Map.Strict ( empty, insertWith )
import Data.Map.Strict qualified as DMS
import Data.Text (pack)
import Gargantext.Core.Types (Terms(..), Stems)
import Gargantext.Core.Types (Stems)
import Gargantext.Prelude hiding (empty)
------------------------------------------------------------------------
......@@ -45,73 +43,10 @@ data Group = ByStem | ByOntology
type Grouped = Stems
{-
-- >> let testData = ["blue lagoon", "blues lagoon", "red lagoon"]
-- >> map occurrences <$> Prelude.mapM (terms Mono EN)
-- [fromList [(fromList ["blue"],1),(fromList ["lagoon"],1)],fromList [(fromList ["blue"],1),(fromList ["lagoon"],1)],fromList [(fromList ["lagoon"],1),(fromList ["red"],1)]]
--λ: cooc <$> Prelude.map occurrences <$> Prelude.mapM (terms Mono EN) ["blue lagoon", "blues lagoon", "red lagoon"]
--fromList [((fromList ["blue"],fromList ["lagoon"]),2),((fromList ["lagoon"],fromList ["red"]),1)]
--λ: cooc <$> Prelude.map occurrences <$> Prelude.mapM (terms Mono EN) ["blue lagoon", "blues lagoon", "red lagoon", "red lagoon"]
--fromList [((fromList ["blue"],fromList ["lagoon"]),2),((fromList ["lagoon"],fromList ["red"]),2)]
--λ: cooc <$> Prelude.map occurrences <$> Prelude.mapM (terms Mono EN) ["blue lagoon", "blues lagoon", "red lagoon red lagoon", "red lagoon"]
--fromList [((fromList ["blue"],fromList ["lagoon"]),2),((fromList ["lagoon"],fromList ["red"]),2)]
--λ: cooc <$> Prelude.map occurrences <$> Prelude.mapM (terms Mono EN) ["blue lagoon", "blues lagoon blues lagoon", "red lagoon red lagoon", "red lagoon"]
--fromList [((fromList ["blue"],fromList ["lagoon"]),2),((fromList ["lagoon"],fromList ["red"]),2)]
----
-}
type Occs = Int
type Coocs = Int
type Threshold = Int
removeApax :: Threshold -> Map ([Text], [Text]) Int -> Map ([Text], [Text]) Int
removeApax t = DMS.filter (> t)
cooc :: [[Terms]] -> Map ([Text], [Text]) Int
cooc tss = coocOnWithLabel _terms_stem (useLabelPolicy label_policy) tss
where
terms_occs = occurrencesOn _terms_stem (List.concat tss)
label_policy = mkLabelPolicy terms_occs
coocOnWithLabel :: (Ord label, Ord b) => (a -> b) -> (b -> label)
-> [[a]] -> Map (label, label) Coocs
coocOnWithLabel on' policy tss = mapKeys (delta policy) $ coocOn on' tss
where
delta :: Arrow a => a b' c' -> a (b', b') (c', c')
delta f = f *** f
mkLabelPolicy :: Map Grouped (Map Terms Occs) -> Map Grouped [Text]
mkLabelPolicy = DMS.map f where
f = _terms_label . fst . maximumWith snd . DMS.toList
-- TODO use the Foldable instance of Map instead of building a list
useLabelPolicy :: Map Grouped [Text] -> Grouped -> [Text]
useLabelPolicy m g = case DMS.lookup g m of
Just label -> label
Nothing -> panic $ "Label of Grouped not found: " <> (pack $ show g)
-- TODO: use a non-fatal error if this can happen in practice
{-
labelPolicy :: Map Grouped (Map Terms Occs) -> Grouped -> Label
labelPolicy m g = case _terms_label <$> fst <$> maximumWith snd <$> DMS.toList <$> lookup g m of
Just label -> label
Nothing -> panic $ "Label of Grouped not found: " <> (pack $ show g)
-}
coocOn :: Ord b => (a -> b) -> [[a]] -> Map (b, b) Int
coocOn f as = DMS.unionsWith (+) $ map (coocOn' f) as
coocOn' :: Ord b => (a -> b) -> [a] -> Map (b, b) Int
coocOn' fun ts = DMS.fromListWith (+) xs
where
ts' = List.nub $ map fun ts
xs = [ ((x, y), 1)
| x <- ts'
, y <- ts'
, x >= y
]
------------------------------------------------------------------------
coocOnContexts :: (a -> [Text]) -> [[a]] -> Map ([Text], [Text]) Int
......@@ -127,23 +62,9 @@ coocOnSingleContext fun ts = xs
, x >= y
]
------------------------------------------------------------------------
-- | Compute the grouped occurrences (occ)
occurrences :: [Terms] -> Map Grouped (Map Terms Int)
occurrences = occurrencesOn _terms_stem
occurrencesOn :: (Ord a, Ord b) => (a -> b) -> [a] -> Map b (Map a Int)
occurrencesOn f = foldl' (\m a -> insertWith (unionWith (+)) (f a) (singleton a 1) m) empty
occurrencesWith :: (Foldable list, Ord k, Num a, Show k, Show a, Show (list b)) => (b -> k) -> list b -> Map k a
occurrencesWith f xs = trace (show (xs,m) :: Text) m
where
m = foldl' (\x y -> insertWith (+) (f y) 1 x) empty xs
-- TODO add groups and filter stops
sumOcc :: Ord a => [Occ a] -> Occ a
sumOcc xs = unionsWith (+) xs
......@@ -15,7 +15,6 @@ Domain Specific Language to manage Frequent Item Set (FIS)
module Gargantext.Core.Text.Metrics.FrequentItemSet
( Fis, Size(..)
, occ_hlcm, cooc_hlcm
, allFis, between
, fisWithSize
, fisWith
, fisWithSizePoly
......@@ -43,18 +42,6 @@ occ_hlcm = fisWithSize (Point 1)
cooc_hlcm :: Frequency -> [[Item]] -> [Fis]
cooc_hlcm = fisWithSize (Point 2)
allFis :: Frequency -> [[Item]] -> [Fis]
allFis = fisWith Nothing
------------------------------------------------------------------------
between :: (Int, Int) -> Frequency -> [[Item]] -> [Fis]
between (x,y) = fisWithSize (Segment x y)
--maximum :: Int -> Frequency -> [[Item]] -> [Fis]
--maximum m = between (0,m)
------------------------------------------------------------------------
------------------------------------------------------------------------
-- | Data type to type the Frequent Item Set
-- TODO replace List with Set in fisItemSet
......
......@@ -20,14 +20,9 @@ module Gargantext.Core.Text.Metrics.TFICF
, Total(..)
, Count(..)
, tficf
, sortTficf
)
where
import Data.List qualified as List
import Data.Map.Strict (toList)
import Data.Ord qualified as DO (Down(..))
import Gargantext.Core.Types (Ordering(..))
import Gargantext.Prelude hiding (Down, Ordering, toList)
path :: Text
......@@ -64,11 +59,3 @@ tficf (TficfInfra (Count ic) (Total it) )
<> ", sc = " <> show sc
<> ", st = " <> show st
tficf _ _ = panicTrace $ "[ERR]" <> path <> "Undefined for these contexts"
sortTficf :: Ordering
-> Map Text Double
-> [(Text, Double)]
sortTficf Down = List.sortOn (DO.Down . snd) . toList
sortTficf Up = List.sortOn snd . toList
{-|
Module : Gargantext.Core.Text.Metrics.Utils
Description : Some functions to count.
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module Gargantext.Core.Text.Metrics.Utils where
import Data.List qualified as L
import Data.Map.Strict qualified as DM
import Gargantext.Prelude
countElem :: (Ord k) => DM.Map k Int -> k -> DM.Map k Int
countElem m e = DM.insertWith (+) e 1 m
freq :: (Ord k) => [k] -> DM.Map k Int
freq = foldl countElem DM.empty
getMaxFromMap :: Ord a => Map a1 a -> [a1]
getMaxFromMap m = go [] Nothing (DM.toList m)
where
go ks _ [] = ks
go ks Nothing ((k,v):rest) = go (k:ks) (Just v) rest
go ks (Just u) ((k,v):rest)
| v < u = go ks (Just u) rest
| v > u = go [k] (Just v) rest
| otherwise = go (k:ks) (Just v) rest
average :: [Double] -> Double
average x = L.sum x / L.genericLength x
average' :: [Int] -> Double
average' x = (L.sum y) / (L.genericLength y) where
y = L.map fromIntegral x
......@@ -61,16 +61,8 @@ nan = 0 / 0
noNaNs :: P.RealFloat e => [e] -> [e]
noNaNs = filter (not . P.isNaN)
updateIfDefined :: P.RealFloat e => e -> e -> e
updateIfDefined e0 e | P.isNaN e = e0
| otherwise = e
sim :: Entropy e => e -> e -> Bool
sim x y = x == y || (P.isNaN x && P.isNaN y)
subst :: Entropy e => (e, e) -> e -> e
subst (src, dst) x | sim src x = dst
| otherwise = x
------------------------------------------------------------------------
-- | TODO: Show Instance only used for debugging
......@@ -169,14 +161,6 @@ toTree k (Node c e cs) = Tree.Node (k, c, Just e) (map (uncurry toTree) $ Map.t
normalizeLevel :: Entropy e => e -> e -> e -> e
normalizeLevel m v e = (e - m) / v
{- Unused
nodeChildren :: Trie k e -> Map k (Trie k e)
nodeChildren (Node _ _ cs) = cs
nodeChildren (Leaf _) = Map.empty
-}
chunkAlongEleve :: Int -> [a] -> [[a]]
chunkAlongEleve n xs = L.take n <$> L.tails xs
......@@ -317,38 +301,6 @@ mayCons :: [a] -> [[a]] -> [[a]]
mayCons [] xss = xss
mayCons xs xss = xs : xss
{-
split :: (IsTrie trie, Entropy e) => Lens' i e -> trie Token i -> [Token] -> [[Token]]
split _ _ [] = []
split inE t (Terminal Start:xs) = split inE t xs
split inE t (x0:xs0) = go [x0] xs0
where
go pref [] = [pref]
go pref (Terminal Stop:_) = [pref]
go _ (Terminal Start:_) = panic "split impossible"
go pref (x:xs) =
-- trace (show (if acc then "ACC" else "CUT", (prefx, epxt), if acc then ">" else "<=", ((pref, ept), "+", ([x], ext)))) $
if acc
then go prefx xs
else mayCons pref $ go [x] xs
where
prefx = pref <> [x]
pt = findTrie pref t
pxt = findTrie prefx t
xt = findTrie [x] t
ept = ne pt
-- ^ entropy of the current prefix
ext = ne xt
-- ^ entropy of [x]
epxt = ne pxt
-- ^ entropy of the current prefix plus x
acc = P.isNaN ept || P.isNaN ext || not (P.isNaN epxt) -- && (epxt > mean [ept, ext])
-- aut(["in","this","paper"]) > aut(["in","this"]) + aut(["paper"])
ne = nodeEntropy inE
-}
split :: Entropy e => Int -> Lens' i e -> Tries Token i -> [Token] -> [[Text]]
split _ _ _ [] = []
split _ _ _ [t] = pure <$> nonTerminals [t]
......@@ -357,32 +309,8 @@ split n inE t ts = nonTerminals pref `mayCons` split n inE t (drop (length pref
pref = maximumWith (\ks -> nodeEntropy inE $ findTrie ks t)
(L.tail . L.inits . take n $ ts)
{-
split :: Entropy e => Lens' i e -> Tries Token i -> [Token] -> [[Token]]
split inE t0 ts =
maximumWith (sum . map $ nodeAutonomy inE t0) (all the splits of ts)
-}
------------------------------------------------------------------------
mainEleve :: Int -> [[Text]] -> [[[Text]]]
mainEleve n x = mainEleve' n x x
mainEleve' :: Int -> [[Text]] -> [[Text]] -> [[[Text]]]
mainEleve' n x y = mainEleveWith x' n y
where
x' = buildTries n (fmap toToken x)
-- (fmap toToken i) is computed twice, since mainEleveWith is computing it too
-- | This function should take the longest possible chain of:
-- mainEleve'' n x y = maxChainSizeOf [ mainEleve' n x y
-- , mainEleve' n x x
-- , mainEleve' n y y
-- ]
mainEleve'' :: Int -> [[Text]] -> [[Text]] -> [[[Text]]]
mainEleve'' = undefined
mainEleveWith :: Tries Token () -> Int -> [[Text]] -> [[[Text]]]
mainEleveWith m n i = fmap (split n info_autonomy t) (fmap toToken i)
where
......
......@@ -21,7 +21,6 @@ module Gargantext.Core.Text.Terms.Mono.Token.En
, punctuation
, finalPunctuation
, initialPunctuation
, allPunctuation
, contractions
, negatives
)
......@@ -109,14 +108,6 @@ initialPunctuation x = E $ filter (not . T.null . unwrap) $
| otherwise -> [ Right ps
, Right w ]
-- | Split tokens on transitions between punctuation and
-- non-punctuation characters. This tokenizer is not included in
-- defaultTokenizer pipeline because dealing with word-internal
-- punctuation is quite application specific.
allPunctuation :: Tokenizer
allPunctuation = E . map Right
. T.groupBy (\a b -> Char.isPunctuation a == Char.isPunctuation b)
-- | Split words ending in n't, and freeze n't
negatives :: Tokenizer
negatives x | "n't" `T.isSuffixOf` x = E [ Right . T.reverse . T.drop 3 . T.reverse $ x
......
......@@ -21,9 +21,6 @@ Source: https://en.wikipedia.org/wiki/Part-of-speech_tagging
{-# OPTIONS_GHC -fno-warn-deprecations #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-}
module Gargantext.Core.Text.Terms.Multi.PosTagging where
import Data.Aeson
......@@ -39,9 +36,7 @@ import Gargantext.Prelude hiding (ByteString, toLower)
import Network.HTTP.Simple
import Network.URI (URI(..))
-- import qualified Gargantext.Utils.SpacyNLP as SpacyNLP
------------------------------------------------------------------------
------------------------------------------------------------------------
tokens2tokensTags :: [Token] -> [TokenTag]
tokens2tokensTags ts = filter' $ map tokenTag ts
......@@ -132,23 +127,3 @@ corenlp :: URI -> Lang -> Text -> IO PosSentences
corenlp uri lang txt = do
response <- corenlp' uri lang txt
pure (getResponseBody response)
-- | parseWith
-- Part Of Speech example
-- parseWith _tokenPos "Hello world."
-- == [[("``","``"),("Hello","UH"),("world","NN"),(".","."),("''","''")]]
-- Named Entity Recognition example
-- parseWith _tokenNer "Hello world of Peter."
-- [[("``","O"),("Hello","O"),("world","O"),("of","O"),("Peter","PERSON"),(".","O"),("''","O")]]
tokenWith :: URI -> (Token -> t) -> Lang -> Text -> IO [[(Text, t)]]
tokenWith uri f lang s = map (map (\t -> (_tokenWord t, f t)))
<$> map _sentenceTokens
<$> _sentences
<$> corenlp uri lang s
----------------------------------------------------------------------------------
-- Here connect to the JohnSnow Server as it has been done above with the corenlp'
-- We need the PosTagging according to the language and the lems
serverNLP :: Lang -> Text -> IO PosSentences
serverNLP = undefined
......@@ -28,18 +28,13 @@ list quality in time.
-}
module Gargantext.Core.Text.Terms.Multi.RAKE (multiterms_rake, select, hardStopList)
module Gargantext.Core.Text.Terms.Multi.RAKE (multiterms_rake, hardStopList)
where
import Gargantext.Core.Text.Samples.EN (stopList)
import Gargantext.Prelude
import NLP.RAKE.Text
select :: Double -> [a] -> [a]
select part ns = take n ns
where
n = round $ part * (fromIntegral $ length ns)
multiterms_rake :: Text -> [WordScore]
multiterms_rake = candidates hardStopList
......
......@@ -24,11 +24,11 @@ import Data.HashMap.Strict qualified as HashMap
import Data.List qualified as List
import Data.Set qualified as Set
import Data.Text qualified as Text
import Data.Text.Metrics (levenshtein)
import Gargantext.API.Ngrams.Tools ( filterListWithRoot, getRepo, groupNodesByNgrams, mapTermListRoot )
import Gargantext.API.Ngrams.Types (NgramsTerm(..))
import Gargantext.Core ( HasDBid(toDBid) )
import Gargantext.Core.NodeStory.Types ( HasNodeStory )
import Gargantext.Core.Text.Metrics.CharByChar (levenshtein)
import Gargantext.Core.Text.Ngrams (NgramsType(..))
import Gargantext.Core.Types (TableResult(..))
import Gargantext.Core.Types.Main ( ListType(CandidateTerm, MapTerm) )
......
{-|
Module : Ngrams.Metrics
Description :
Copyright : Ngrams.Metrics (c)
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Here is a longer description of this module, containing some
commentary with @some markup@.
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE CPP #-}
--module Ngrams.Metrics (main) where
module Test.Ngrams.Metrics where
{-
import Data.Text (Text)
import qualified Data.Text as T
import Data.Ratio
import Test.Hspec
import Test.QuickCheck
import Gargantext.Prelude
import Gargantext.Text.Metrics
#if !MIN_VERSION_base(4,8,0)
import Control.Applicative
#endif
instance Arbitrary Text where
arbitrary = T.pack <$> arbitrary
main :: IO ()
main = hspec spec
spec :: Spec
spec = do
describe "levenshtein" $ do
testSwap levenshtein
context "with concrete examples" $ do
testPair levenshtein "kitten" "sitting" 3
testPair levenshtein "cake" "drake" 2
testPair levenshtein "saturday" "sunday" 3
testPair levenshtein "red" "wax" 3
#if __GLASGOW_HASKELL__ >= 710
testPair levenshtein "a😀c" "abc" 1
#endif
testPair levenshtein "lucky" "lucky" 0
testPair levenshtein "" "" 0
describe "levenshteinNorm" $ do
testSwap levenshteinNorm
testPair levenshteinNorm "kitten" "sitting" (4 % 7)
testPair levenshteinNorm "cake" "drake" (3 % 5)
testPair levenshteinNorm "saturday" "sunday" (5 % 8)
testPair levenshteinNorm "red" "wax" (0 % 1)
#if __GLASGOW_HASKELL__ >= 710
testPair levenshteinNorm "a😀c" "abc" (2 % 3)
#endif
testPair levenshteinNorm "lucky" "lucky" (1 % 1)
testPair levenshteinNorm "" "" (1 % 1)
describe "damerauLevenshtein" $ do
testSwap damerauLevenshtein
testPair damerauLevenshtein "veryvery long" "very long" 4
testPair damerauLevenshtein "thing" "think" 1
testPair damerauLevenshtein "nose" "ones" 2
testPair damerauLevenshtein "thing" "sign" 3
testPair damerauLevenshtein "red" "wax" 3
#if __GLASGOW_HASKELL__ >= 710
testPair damerauLevenshtein "a😀c" "abc" 1
#endif
testPair damerauLevenshtein "lucky" "lucky" 0
testPair damerauLevenshtein "" "" 0
describe "damerauLevenshteinNorm" $ do
testSwap damerauLevenshteinNorm
testPair damerauLevenshteinNorm "veryvery long" "very long" (9 % 13)
testPair damerauLevenshteinNorm "thing" "think" (4 % 5)
testPair damerauLevenshteinNorm "nose" "ones" (1 % 2)
testPair damerauLevenshteinNorm "thing" "sign" (2 % 5)
testPair damerauLevenshteinNorm "red" "wax" (0 % 1)
#if __GLASGOW_HASKELL__ >= 710
testPair damerauLevenshteinNorm "a😀c" "abc" (2 % 3)
#endif
testPair damerauLevenshteinNorm "lucky" "lucky" (1 % 1)
testPair damerauLevenshteinNorm "" "" (1 % 1)
describe "hamming" $ do
testSwap hamming
testPair hamming "karolin" "kathrin" (Just 3)
testPair hamming "karolin" "kerstin" (Just 3)
testPair hamming "1011101" "1001001" (Just 2)
testPair hamming "2173896" "2233796" (Just 3)
testPair hamming "toned" "roses" (Just 3)
testPair hamming "red" "wax" (Just 3)
#if __GLASGOW_HASKELL__ >= 710
testPair hamming "a😀c" "abc" (Just 1)
#endif
testPair hamming "lucky" "lucky" (Just 0)
testPair hamming "" "" (Just 0)
testPair hamming "small" "big" Nothing
describe "overlap" $ do
testSwap overlap
testPair overlap "fly" "butterfly" (1 % 1)
testPair overlap "night" "nacht" (3 % 5)
testPair overlap "context" "contact" (5 % 7)
testPair overlap "red" "wax" (0 % 1)
#if __GLASGOW_HASKELL__ >= 710
testPair overlap "a😀c" "abc" (2 % 3)
#endif
testPair overlap "lucky" "lucky" (1 % 1)
describe "jaccard" $ do
testSwap jaccard
testPair jaccard "xxx" "xyx" (1 % 2)
testPair jaccard "night" "nacht" (3 % 7)
testPair jaccard "context" "contact" (5 % 9)
#if __GLASGOW_HASKELL__ >= 710
testPair overlap "a😀c" "abc" (2 % 3)
#endif
testPair jaccard "lucky" "lucky" (1 % 1)
-- | Test that given function returns the same results when order of
-- arguments is swapped.
testSwap :: (Eq a, Show a) => (Text -> Text -> a) -> SpecWith ()
testSwap f = context "if we swap the arguments" $
it "produces the same result" $
property $ \a b -> f a b === f b a
-- | Create spec for given metric function applying it to two 'Text' values
-- and comparing the result with expected one.
testPair :: (Eq a, Show a)
=> (Text -> Text -> a) -- ^ Function to test
-> Text -- ^ First input
-> Text -- ^ Second input
-> a -- ^ Expected result
-> SpecWith ()
testPair f a b r = it ("‘" <> T.unpack a <> "’ and ‘" <> T.unpack b <> "’") $
f a b `shouldBe` r
-}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment