Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
P
purescript-string-search
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
purescript-string-search
Commits
a97d7392
Verified
Commit
a97d7392
authored
Nov 09, 2023
by
Przemyslaw Kaminski
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Code cleanup, changelog added
parent
934ed593
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
69 additions
and
88 deletions
+69
-88
CHANGELOG.md
CHANGELOG.md
+22
-0
KarpRabin.purs
src/Data/String/Search/KarpRabin.purs
+0
-39
Main.purs
test/Main.purs
+1
-1
Spec.purs
test/Test/Data/String/Search/Spec.purs
+12
-48
Utils.purs
test/Test/Data/String/Utils.purs
+34
-0
No files found.
CHANGELOG.md
0 → 100644
View file @
a97d7392
# Changelog
## [0.1.5] - 2023-11-09
### Changed
-
Code cleanup
## [0.1.4] - 2023-11-09
### Fixed
-
Hash is still incorrect because of overflowing (
`UInt64`
range vs
`modulus`
). I removed
`modulus`
and just used the overflow
functionality of
`UInt64`
.
## [0.1.3] - 2023-11-08
### Fixed
-
Hash was computed incorrectly. I changed to
`UInt64`
and added tests
with
`SmallInt`
.
src/Data/String/Search/KarpRabin.purs
View file @
a97d7392
...
...
@@ -25,15 +25,11 @@ module Data.String.Search.KarpRabin (
, fromCodePoint
, Base
, universalBase
, Modulus
-- , universalModulus
, Hash
, HashStruct
, hashStruct
, RollingHash
, mkRollingHash
-- , hashRH
-- , rehashRH
, hashU64
, rehashU64
) where
...
...
@@ -60,7 +56,6 @@ import Prelude
type Base = UInt64
type Modulus = UInt64
type Hash = UInt64
...
...
@@ -68,12 +63,6 @@ type Hash = UInt64
universalBase :: Base
universalBase = unsafeFromInt 256
-- | Modulus that we will use in Karp-Rabin
-- https://www.wolframalpha.com/input?i=prime+number+greater+than+50000000
-- universalModulus :: Modulus
-- universalModulus = unsafeFromInt 1009
-- universalModulus = unsafeFromInt 50000017
fromCodePoint :: CodePoint -> Base
fromCodePoint c = unsafeFromInt (fromEnum c)
...
...
@@ -83,7 +72,6 @@ fromCodePoint c = unsafeFromInt (fromEnum c)
-- https://en.wikipedia.org/wiki/Rabin%E2%80%93Karp_algorithm
newtype RollingHash = RollingHash {
base :: Base
-- , modulus :: Modulus -- in our case, the modulus is (top :: UInt64)
, len :: Int
, basePowLen :: Base -- pow base len % modulus (stored for performance reasons)
}
...
...
@@ -101,16 +89,6 @@ mkRollingHash base len = RollingHash { base
-- basePowLen = foldl (\acc _l -> (acc*base) `mod` modulus) (unsafeFromInt 1) (1..(len - 1))
basePowLen = foldl (\acc _l -> (acc*base)) (unsafeFromInt 1) (1..(len - 1))
-- -- | NOTE: xs must be of length RollingHash.len
-- hashRH :: RollingHash -> Array Base -> Hash
-- hashRH rh@(RollingHash { base, modulus }) xs =
-- -- foldl (\acc x -> (((acc*base) `mod` modulus) + x) `mod` modulus) (unsafeFromInt 0) xs
-- foldl (\acc x -> rehashRH rh acc (unsafeFromInt 0) x) (unsafeFromInt 0) xs
-- rehashRH :: RollingHash -> Hash -> Base -> Base -> Hash
-- rehashRH (RollingHash { base, basePowLen, modulus }) h old new =
-- ((h + (modulus - old)*basePowLen)*base + new) `mod` modulus
-- | NOTE: xs must be of length RollingHash.len
hashU64 :: RollingHash -> Array Base -> Hash
...
...
@@ -161,23 +139,6 @@ hashStruct pats = { hash, hashMap, hLen, pats, rehash, rehashChar }
(mapWithIndex (\i a -> Tuple (hash a) [i]) pats)
-- hLen' = fromInt hLen
-- shDi = case 32 `quot` hLen of
-- q | q < 4 -> q
-- | otherwise -> 4
-- outS = fromInt (shDi * hLen)
-- rehash :: UInt -> CodePoint -> CodePoint -> UInt
-- rehash = case shDi of
-- 1 -> rehash' (fromInt 1) hLen'
-- 2 -> rehash' (fromInt 2) outS
-- 3 -> rehash' (fromInt 3) outS
-- _ -> rehash' (fromInt 4) outS
-- hash :: String -> UInt
-- hash = foldl (\h w -> (h `shl` fromInt shDi) + fromCodePoint w) (fromInt 0)
-- <<< S.toCodePointArray
-- <<< S.take hLen
-- $overview
--
...
...
test/Main.purs
View file @
a97d7392
...
...
@@ -9,5 +9,5 @@ import Test.Spec.Runner (runSpec)
main :: Effect Unit
main = launchAff_ do
specs <- discover "Data\\.String\\.Search\\..*Spec"
specs <- discover "
Test\\.
Data\\.String\\.Search\\..*Spec"
runSpec [consoleReporter] specs
test/Data/String/Search/Spec.purs
→
test/
Test/
Data/String/Search/Spec.purs
View file @
a97d7392
module Data.String.Search.KarpRabin.Spec where
module
Test.
Data.String.Search.KarpRabin.Spec where
import Prelude
import Data.Array (index)
import Data.Bounded (class Bounded)
import Data.Bounded.Generic (genericTop, genericBottom)
import Data.Enum (class BoundedEnum, class Enum)
import Data.Enum.Generic (genericCardinality, genericToEnum, genericFromEnum, genericSucc, genericPred)
import Data.Eq (class Eq)
import Data.Eq.Generic (genericEq)
import Data.Foldable (all)
import Data.Generic.Rep (class Generic)
import Data.Maybe (Maybe(..), isJust)
import Data.Ord (class Ord)
import Data.Ord.Generic (genericCompare)
import Data.String (drop, stripPrefix, Pattern(..), codePointFromChar, CodePoint)
import Data.String.Search.KarpRabin (indicesOfAny, mkRollingHash, hashU64, rehashU64, hashStruct, fromCodePoint) -- indicesOfAnyLegacy,
import Data.String (drop, stripPrefix, Pattern(..))
import Data.String.Search.KarpRabin (indicesOfAny, mkRollingHash, hashU64, rehashU64, hashStruct, fromCodePoint)
import Data.String.Search.Utils (slidingWindow)
import Data.Tuple (Tuple(..))
import Data.UInt64 (unsafeFromInt, UInt64)
import Data.UInt64 (unsafeFromInt)
import Prelude
import Test.Data.String.Search.Utils (CodePointA(..))
import Test.QuickCheck ((<?>))
import Test.QuickCheck.Arbitrary
import Test.QuickCheck.Gen (enum)
import Test.Spec (Spec, describe, it)
import Test.Spec.Assertions (shouldEqual)
import Test.Spec.QuickCheck (quickCheck')
fromInt = unsafeFromInt
newtype CodePointA = CodePointA CodePoint
derive instance Generic CodePointA _
instance Eq CodePointA where
eq = genericEq
instance Ord CodePointA where
compare = genericCompare
instance Bounded CodePointA where
top = genericTop
bottom = genericBottom
instance Enum CodePointA where
succ = genericSucc
pred = genericPred
instance BoundedEnum CodePointA where
cardinality = genericCardinality
toEnum = genericToEnum
fromEnum = genericFromEnum
instance Arbitrary CodePointA where
arbitrary = enum
validIndices :: Array String -> String -> Boolean
validIndices pats input = all validIndex (indicesOfAny pats input)
...
...
@@ -62,9 +29,6 @@ validIndices pats input = all validIndex (indicesOfAny pats input)
-- <?> (show input' <> " should start with " <> show pat)
Nothing -> false -- Failed "out of bounds pattern"
-- indicesOfAny :: Array String -> String -> Array (Tuple Int (Array Int))
-- indicesOfAny = indicesOfAnyLegacy
spec :: Spec Unit
spec =
describe "KarpRabin" do
...
...
@@ -73,11 +37,11 @@ spec =
[[1, 2], [2, 3], [3, 4]]
it "rolling hash works 1" do
let rh = mkRollingHash (
f
romInt 7) 3
let a =
f
romInt 1
let b =
f
romInt 2
let c =
f
romInt 3
let d =
f
romInt 4
let rh = mkRollingHash (
unsafeF
romInt 7) 3
let a =
unsafeF
romInt 1
let b =
unsafeF
romInt 2
let c =
unsafeF
romInt 3
let d =
unsafeF
romInt 4
let h1 = hashU64 rh [a, b, c]
h2 = hashU64 rh [b, c, d]
...
...
@@ -87,7 +51,7 @@ spec =
h3 `shouldEqual` rehashU64 rh h2 b a
it "rolling hash works 2 (quickcheck)" $ do
let rh = mkRollingHash (
f
romInt 256) 3
let rh = mkRollingHash (
unsafeF
romInt 256) 3
quickCheck' 2000 \(CodePointA a') (CodePointA b') (CodePointA c') (CodePointA d') ->
let a = fromCodePoint a'
...
...
test/Test/Data/String/Utils.purs
0 → 100644
View file @
a97d7392
module Test.Data.String.Search.Utils where
import Data.Bounded.Generic (genericTop, genericBottom)
import Data.Enum (class BoundedEnum, class Enum)
import Data.Enum.Generic (genericCardinality, genericToEnum, genericFromEnum, genericSucc, genericPred)
import Data.Eq.Generic (genericEq)
import Data.Generic.Rep (class Generic)
import Data.String (CodePoint)
import Prelude (class Bounded, class Eq, class Ord)
import Data.Ord.Generic (genericCompare)
import Test.QuickCheck.Arbitrary (class Arbitrary)
import Test.QuickCheck.Gen (enum)
newtype CodePointA = CodePointA CodePoint
derive instance Generic CodePointA _
instance Eq CodePointA where
eq = genericEq
instance Ord CodePointA where
compare = genericCompare
instance Bounded CodePointA where
top = genericTop
bottom = genericBottom
instance Enum CodePointA where
succ = genericSucc
pred = genericPred
instance BoundedEnum CodePointA where
cardinality = genericCardinality
toEnum = genericToEnum
fromEnum = genericFromEnum
instance Arbitrary CodePointA where
arbitrary = enum
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment