Code cleanup, changelog added

parent 934ed593
# Changelog
## [0.1.5] - 2023-11-09
### Changed
- Code cleanup
## [0.1.4] - 2023-11-09
### Fixed
- Hash is still incorrect because of overflowing (`UInt64` range vs
`modulus`). I removed `modulus` and just used the overflow
functionality of `UInt64`.
## [0.1.3] - 2023-11-08
### Fixed
- Hash was computed incorrectly. I changed to `UInt64` and added tests
with `SmallInt`.
...@@ -25,15 +25,11 @@ module Data.String.Search.KarpRabin ( ...@@ -25,15 +25,11 @@ module Data.String.Search.KarpRabin (
, fromCodePoint , fromCodePoint
, Base , Base
, universalBase , universalBase
, Modulus
-- , universalModulus
, Hash , Hash
, HashStruct , HashStruct
, hashStruct , hashStruct
, RollingHash , RollingHash
, mkRollingHash , mkRollingHash
-- , hashRH
-- , rehashRH
, hashU64 , hashU64
, rehashU64 , rehashU64
) where ) where
...@@ -60,7 +56,6 @@ import Prelude ...@@ -60,7 +56,6 @@ import Prelude
type Base = UInt64 type Base = UInt64
type Modulus = UInt64
type Hash = UInt64 type Hash = UInt64
...@@ -68,12 +63,6 @@ type Hash = UInt64 ...@@ -68,12 +63,6 @@ type Hash = UInt64
universalBase :: Base universalBase :: Base
universalBase = unsafeFromInt 256 universalBase = unsafeFromInt 256
-- | Modulus that we will use in Karp-Rabin
-- https://www.wolframalpha.com/input?i=prime+number+greater+than+50000000
-- universalModulus :: Modulus
-- universalModulus = unsafeFromInt 1009
-- universalModulus = unsafeFromInt 50000017
fromCodePoint :: CodePoint -> Base fromCodePoint :: CodePoint -> Base
fromCodePoint c = unsafeFromInt (fromEnum c) fromCodePoint c = unsafeFromInt (fromEnum c)
...@@ -83,7 +72,6 @@ fromCodePoint c = unsafeFromInt (fromEnum c) ...@@ -83,7 +72,6 @@ fromCodePoint c = unsafeFromInt (fromEnum c)
-- https://en.wikipedia.org/wiki/Rabin%E2%80%93Karp_algorithm -- https://en.wikipedia.org/wiki/Rabin%E2%80%93Karp_algorithm
newtype RollingHash = RollingHash { newtype RollingHash = RollingHash {
base :: Base base :: Base
-- , modulus :: Modulus -- in our case, the modulus is (top :: UInt64)
, len :: Int , len :: Int
, basePowLen :: Base -- pow base len % modulus (stored for performance reasons) , basePowLen :: Base -- pow base len % modulus (stored for performance reasons)
} }
...@@ -101,16 +89,6 @@ mkRollingHash base len = RollingHash { base ...@@ -101,16 +89,6 @@ mkRollingHash base len = RollingHash { base
-- basePowLen = foldl (\acc _l -> (acc*base) `mod` modulus) (unsafeFromInt 1) (1..(len - 1)) -- basePowLen = foldl (\acc _l -> (acc*base) `mod` modulus) (unsafeFromInt 1) (1..(len - 1))
basePowLen = foldl (\acc _l -> (acc*base)) (unsafeFromInt 1) (1..(len - 1)) basePowLen = foldl (\acc _l -> (acc*base)) (unsafeFromInt 1) (1..(len - 1))
-- -- | NOTE: xs must be of length RollingHash.len
-- hashRH :: RollingHash -> Array Base -> Hash
-- hashRH rh@(RollingHash { base, modulus }) xs =
-- -- foldl (\acc x -> (((acc*base) `mod` modulus) + x) `mod` modulus) (unsafeFromInt 0) xs
-- foldl (\acc x -> rehashRH rh acc (unsafeFromInt 0) x) (unsafeFromInt 0) xs
-- rehashRH :: RollingHash -> Hash -> Base -> Base -> Hash
-- rehashRH (RollingHash { base, basePowLen, modulus }) h old new =
-- ((h + (modulus - old)*basePowLen)*base + new) `mod` modulus
-- | NOTE: xs must be of length RollingHash.len -- | NOTE: xs must be of length RollingHash.len
hashU64 :: RollingHash -> Array Base -> Hash hashU64 :: RollingHash -> Array Base -> Hash
...@@ -160,23 +138,6 @@ hashStruct pats = { hash, hashMap, hLen, pats, rehash, rehashChar } ...@@ -160,23 +138,6 @@ hashStruct pats = { hash, hashMap, hLen, pats, rehash, rehashChar }
M.fromFoldableWith (flip (<>)) M.fromFoldableWith (flip (<>))
(mapWithIndex (\i a -> Tuple (hash a) [i]) pats) (mapWithIndex (\i a -> Tuple (hash a) [i]) pats)
-- hLen' = fromInt hLen
-- shDi = case 32 `quot` hLen of
-- q | q < 4 -> q
-- | otherwise -> 4
-- outS = fromInt (shDi * hLen)
-- rehash :: UInt -> CodePoint -> CodePoint -> UInt
-- rehash = case shDi of
-- 1 -> rehash' (fromInt 1) hLen'
-- 2 -> rehash' (fromInt 2) outS
-- 3 -> rehash' (fromInt 3) outS
-- _ -> rehash' (fromInt 4) outS
-- hash :: String -> UInt
-- hash = foldl (\h w -> (h `shl` fromInt shDi) + fromCodePoint w) (fromInt 0)
-- <<< S.toCodePointArray
-- <<< S.take hLen
-- $overview -- $overview
......
...@@ -9,5 +9,5 @@ import Test.Spec.Runner (runSpec) ...@@ -9,5 +9,5 @@ import Test.Spec.Runner (runSpec)
main :: Effect Unit main :: Effect Unit
main = launchAff_ do main = launchAff_ do
specs <- discover "Data\\.String\\.Search\\..*Spec" specs <- discover "Test\\.Data\\.String\\.Search\\..*Spec"
runSpec [consoleReporter] specs runSpec [consoleReporter] specs
module Data.String.Search.KarpRabin.Spec where module Test.Data.String.Search.KarpRabin.Spec where
import Prelude
import Data.Array (index) import Data.Array (index)
import Data.Bounded (class Bounded)
import Data.Bounded.Generic (genericTop, genericBottom)
import Data.Enum (class BoundedEnum, class Enum)
import Data.Enum.Generic (genericCardinality, genericToEnum, genericFromEnum, genericSucc, genericPred)
import Data.Eq (class Eq)
import Data.Eq.Generic (genericEq)
import Data.Foldable (all) import Data.Foldable (all)
import Data.Generic.Rep (class Generic)
import Data.Maybe (Maybe(..), isJust) import Data.Maybe (Maybe(..), isJust)
import Data.Ord (class Ord) import Data.String (drop, stripPrefix, Pattern(..))
import Data.Ord.Generic (genericCompare) import Data.String.Search.KarpRabin (indicesOfAny, mkRollingHash, hashU64, rehashU64, hashStruct, fromCodePoint)
import Data.String (drop, stripPrefix, Pattern(..), codePointFromChar, CodePoint)
import Data.String.Search.KarpRabin (indicesOfAny, mkRollingHash, hashU64, rehashU64, hashStruct, fromCodePoint) -- indicesOfAnyLegacy,
import Data.String.Search.Utils (slidingWindow) import Data.String.Search.Utils (slidingWindow)
import Data.Tuple (Tuple(..)) import Data.Tuple (Tuple(..))
import Data.UInt64 (unsafeFromInt, UInt64) import Data.UInt64 (unsafeFromInt)
import Prelude
import Test.Data.String.Search.Utils (CodePointA(..))
import Test.QuickCheck ((<?>)) import Test.QuickCheck ((<?>))
import Test.QuickCheck.Arbitrary
import Test.QuickCheck.Gen (enum)
import Test.Spec (Spec, describe, it) import Test.Spec (Spec, describe, it)
import Test.Spec.Assertions (shouldEqual) import Test.Spec.Assertions (shouldEqual)
import Test.Spec.QuickCheck (quickCheck') import Test.Spec.QuickCheck (quickCheck')
fromInt = unsafeFromInt
newtype CodePointA = CodePointA CodePoint
derive instance Generic CodePointA _
instance Eq CodePointA where
eq = genericEq
instance Ord CodePointA where
compare = genericCompare
instance Bounded CodePointA where
top = genericTop
bottom = genericBottom
instance Enum CodePointA where
succ = genericSucc
pred = genericPred
instance BoundedEnum CodePointA where
cardinality = genericCardinality
toEnum = genericToEnum
fromEnum = genericFromEnum
instance Arbitrary CodePointA where
arbitrary = enum
validIndices :: Array String -> String -> Boolean validIndices :: Array String -> String -> Boolean
validIndices pats input = all validIndex (indicesOfAny pats input) validIndices pats input = all validIndex (indicesOfAny pats input)
...@@ -62,9 +29,6 @@ validIndices pats input = all validIndex (indicesOfAny pats input) ...@@ -62,9 +29,6 @@ validIndices pats input = all validIndex (indicesOfAny pats input)
-- <?> (show input' <> " should start with " <> show pat) -- <?> (show input' <> " should start with " <> show pat)
Nothing -> false -- Failed "out of bounds pattern" Nothing -> false -- Failed "out of bounds pattern"
-- indicesOfAny :: Array String -> String -> Array (Tuple Int (Array Int))
-- indicesOfAny = indicesOfAnyLegacy
spec :: Spec Unit spec :: Spec Unit
spec = spec =
describe "KarpRabin" do describe "KarpRabin" do
...@@ -73,11 +37,11 @@ spec = ...@@ -73,11 +37,11 @@ spec =
[[1, 2], [2, 3], [3, 4]] [[1, 2], [2, 3], [3, 4]]
it "rolling hash works 1" do it "rolling hash works 1" do
let rh = mkRollingHash (fromInt 7) 3 let rh = mkRollingHash (unsafeFromInt 7) 3
let a = fromInt 1 let a = unsafeFromInt 1
let b = fromInt 2 let b = unsafeFromInt 2
let c = fromInt 3 let c = unsafeFromInt 3
let d = fromInt 4 let d = unsafeFromInt 4
let h1 = hashU64 rh [a, b, c] let h1 = hashU64 rh [a, b, c]
h2 = hashU64 rh [b, c, d] h2 = hashU64 rh [b, c, d]
...@@ -87,7 +51,7 @@ spec = ...@@ -87,7 +51,7 @@ spec =
h3 `shouldEqual` rehashU64 rh h2 b a h3 `shouldEqual` rehashU64 rh h2 b a
it "rolling hash works 2 (quickcheck)" $ do it "rolling hash works 2 (quickcheck)" $ do
let rh = mkRollingHash (fromInt 256) 3 let rh = mkRollingHash (unsafeFromInt 256) 3
quickCheck' 2000 \(CodePointA a') (CodePointA b') (CodePointA c') (CodePointA d') -> quickCheck' 2000 \(CodePointA a') (CodePointA b') (CodePointA c') (CodePointA d') ->
let a = fromCodePoint a' let a = fromCodePoint a'
......
module Test.Data.String.Search.Utils where
import Data.Bounded.Generic (genericTop, genericBottom)
import Data.Enum (class BoundedEnum, class Enum)
import Data.Enum.Generic (genericCardinality, genericToEnum, genericFromEnum, genericSucc, genericPred)
import Data.Eq.Generic (genericEq)
import Data.Generic.Rep (class Generic)
import Data.String (CodePoint)
import Prelude (class Bounded, class Eq, class Ord)
import Data.Ord.Generic (genericCompare)
import Test.QuickCheck.Arbitrary (class Arbitrary)
import Test.QuickCheck.Gen (enum)
newtype CodePointA = CodePointA CodePoint
derive instance Generic CodePointA _
instance Eq CodePointA where
eq = genericEq
instance Ord CodePointA where
compare = genericCompare
instance Bounded CodePointA where
top = genericTop
bottom = genericBottom
instance Enum CodePointA where
succ = genericSucc
pred = genericPred
instance BoundedEnum CodePointA where
cardinality = genericCardinality
toEnum = genericToEnum
fromEnum = genericFromEnum
instance Arbitrary CodePointA where
arbitrary = enum
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment