1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
{-|
Module : Ngrams.Metrics
Description :
Copyright : Ngrams.Metrics (c)
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Here is a longer description of this module, containing some
commentary with @some markup@.
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
module Ngrams.Metrics (main) where
import Data.Text (Text)
import qualified Data.Text as T
import Data.Ratio
import Test.Hspec
import Test.QuickCheck
import Gargantext.Prelude
import Gargantext.Text.Metrics
#if !MIN_VERSION_base(4,8,0)
import Control.Applicative
#endif
instance Arbitrary Text where
arbitrary = T.pack <$> arbitrary
main :: IO ()
main = hspec spec
spec :: Spec
spec = do
describe "levenshtein" $ do
testSwap levenshtein
context "with concrete examples" $ do
testPair levenshtein "kitten" "sitting" 3
testPair levenshtein "cake" "drake" 2
testPair levenshtein "saturday" "sunday" 3
testPair levenshtein "red" "wax" 3
#if __GLASGOW_HASKELL__ >= 710
testPair levenshtein "a😀c" "abc" 1
#endif
testPair levenshtein "lucky" "lucky" 0
testPair levenshtein "" "" 0
describe "levenshteinNorm" $ do
testSwap levenshteinNorm
testPair levenshteinNorm "kitten" "sitting" (4 % 7)
testPair levenshteinNorm "cake" "drake" (3 % 5)
testPair levenshteinNorm "saturday" "sunday" (5 % 8)
testPair levenshteinNorm "red" "wax" (0 % 1)
#if __GLASGOW_HASKELL__ >= 710
testPair levenshteinNorm "a😀c" "abc" (2 % 3)
#endif
testPair levenshteinNorm "lucky" "lucky" (1 % 1)
testPair levenshteinNorm "" "" (1 % 1)
describe "damerauLevenshtein" $ do
testSwap damerauLevenshtein
testPair damerauLevenshtein "veryvery long" "very long" 4
testPair damerauLevenshtein "thing" "think" 1
testPair damerauLevenshtein "nose" "ones" 2
testPair damerauLevenshtein "thing" "sign" 3
testPair damerauLevenshtein "red" "wax" 3
#if __GLASGOW_HASKELL__ >= 710
testPair damerauLevenshtein "a😀c" "abc" 1
#endif
testPair damerauLevenshtein "lucky" "lucky" 0
testPair damerauLevenshtein "" "" 0
describe "damerauLevenshteinNorm" $ do
testSwap damerauLevenshteinNorm
testPair damerauLevenshteinNorm "veryvery long" "very long" (9 % 13)
testPair damerauLevenshteinNorm "thing" "think" (4 % 5)
testPair damerauLevenshteinNorm "nose" "ones" (1 % 2)
testPair damerauLevenshteinNorm "thing" "sign" (2 % 5)
testPair damerauLevenshteinNorm "red" "wax" (0 % 1)
#if __GLASGOW_HASKELL__ >= 710
testPair damerauLevenshteinNorm "a😀c" "abc" (2 % 3)
#endif
testPair damerauLevenshteinNorm "lucky" "lucky" (1 % 1)
testPair damerauLevenshteinNorm "" "" (1 % 1)
describe "hamming" $ do
testSwap hamming
testPair hamming "karolin" "kathrin" (Just 3)
testPair hamming "karolin" "kerstin" (Just 3)
testPair hamming "1011101" "1001001" (Just 2)
testPair hamming "2173896" "2233796" (Just 3)
testPair hamming "toned" "roses" (Just 3)
testPair hamming "red" "wax" (Just 3)
#if __GLASGOW_HASKELL__ >= 710
testPair hamming "a😀c" "abc" (Just 1)
#endif
testPair hamming "lucky" "lucky" (Just 0)
testPair hamming "" "" (Just 0)
testPair hamming "small" "big" Nothing
describe "overlap" $ do
testSwap overlap
testPair overlap "fly" "butterfly" (1 % 1)
testPair overlap "night" "nacht" (3 % 5)
testPair overlap "context" "contact" (5 % 7)
testPair overlap "red" "wax" (0 % 1)
#if __GLASGOW_HASKELL__ >= 710
testPair overlap "a😀c" "abc" (2 % 3)
#endif
testPair overlap "lucky" "lucky" (1 % 1)
describe "jaccard" $ do
testSwap jaccard
testPair jaccard "xxx" "xyx" (1 % 2)
testPair jaccard "night" "nacht" (3 % 7)
testPair jaccard "context" "contact" (5 % 9)
#if __GLASGOW_HASKELL__ >= 710
testPair overlap "a😀c" "abc" (2 % 3)
#endif
testPair jaccard "lucky" "lucky" (1 % 1)
-- | Test that given function returns the same results when order of
-- arguments is swapped.
testSwap :: (Eq a, Show a) => (Text -> Text -> a) -> SpecWith ()
testSwap f = context "if we swap the arguments" $
it "produces the same result" $
property $ \a b -> f a b === f b a
-- | Create spec for given metric function applying it to two 'Text' values
-- and comparing the result with expected one.
testPair :: (Eq a, Show a)
=> (Text -> Text -> a) -- ^ Function to test
-> Text -- ^ First input
-> Text -- ^ Second input
-> a -- ^ Expected result
-> SpecWith ()
testPair f a b r = it ("‘" <> T.unpack a <> "’ and ‘" <> T.unpack b <> "’") $
f a b `shouldBe` r