Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
P
purescript-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Grégoire Locqueville
purescript-gargantext
Commits
be456f9c
Verified
Commit
be456f9c
authored
Feb 28, 2023
by
Przemyslaw Kaminski
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[KarpRabin] fixes to the algorithm
No more recursion now, but this is still slow.
parent
d3218107
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
74 additions
and
19 deletions
+74
-19
Array.purs
src/Gargantext/Data/Array.purs
+14
-1
KarpRabin.purs
src/Gargantext/Utils/KarpRabin.purs
+58
-18
Spec.purs
test/Gargantext/Data/Spec.purs
+2
-0
No files found.
src/Gargantext/Data/Array.purs
View file @
be456f9c
...
@@ -2,11 +2,12 @@ module Gargantext.Data.Array
...
@@ -2,11 +2,12 @@ module Gargantext.Data.Array
where
where
import Data.Array as DA
import Data.Array as DA
import Data.Array ((..))
import Data.List as List
import Data.List as List
import Data.Maybe
import Data.Maybe
import Data.Sequence as Seq
import Data.Sequence as Seq
import Data.Tuple (Tuple(..))
import Data.Tuple (Tuple(..))
import Prelude (bind, flip, identity, (<<<), ($))
import Prelude (bind, flip, identity, (<<<), ($)
, (<), (-), (<$>)
)
----------------------------------------------------------------------
----------------------------------------------------------------------
-- | Split arrays tools
-- | Split arrays tools
...
@@ -65,3 +66,15 @@ swapList i j seq = List.fromFoldable $ swap i j $ List.toUnfoldable seq
...
@@ -65,3 +66,15 @@ swapList i j seq = List.fromFoldable $ swap i j $ List.toUnfoldable seq
swapSeq :: forall a. Int -> Int -> Seq.Seq a -> Seq.Seq a
swapSeq :: forall a. Int -> Int -> Seq.Seq a -> Seq.Seq a
swapSeq i j seq = Seq.fromFoldable $ swap i j $ Seq.toUnfoldable seq
swapSeq i j seq = Seq.fromFoldable $ swap i j $ Seq.toUnfoldable seq
slidingWindow :: forall a. Array a -> Int -> Array (Array a)
slidingWindow lst len =
let diff = DA.length lst - len
in
if diff < 0 then []
else (\idx -> DA.take len $ DA.drop idx lst) <$> 0 .. diff
-- slidingWindow lst len = DA.fromFoldable $ go lst
-- where
-- go l =
-- if (DA.length l) < len then List.Nil
-- else (DA.take len l) List.: (go $ DA.drop 1 l)
src/Gargantext/Utils/KarpRabin.purs
View file @
be456f9c
...
@@ -24,6 +24,7 @@ module Gargantext.Utils.KarpRabin ( -- * Overview
...
@@ -24,6 +24,7 @@ module Gargantext.Utils.KarpRabin ( -- * Overview
import Data.Array as A
import Data.Array as A
import Data.Array ((..))
import Data.Enum (fromEnum)
import Data.Enum (fromEnum)
import Data.Foldable (class Foldable, minimum, foldl)
import Data.Foldable (class Foldable, minimum, foldl)
import Data.FunctorWithIndex (mapWithIndex)
import Data.FunctorWithIndex (mapWithIndex)
...
@@ -33,10 +34,11 @@ import Data.Map as M
...
@@ -33,10 +34,11 @@ import Data.Map as M
import Data.Maybe (Maybe(..), isJust)
import Data.Maybe (Maybe(..), isJust)
import Data.String as S
import Data.String as S
import Data.String (CodePoint)
import Data.String (CodePoint)
import Data.Tuple (Tuple(..))
import Data.Tuple (Tuple(..)
, snd
)
import Data.UInt (UInt, shl, fromInt)
import Data.UInt (UInt, shl, fromInt)
import Partial.Unsafe (unsafePartial)
import Partial.Unsafe (unsafePartial)
import Gargantext.Data.Array as GDA
import Prelude
import Prelude
fromCodePoint :: CodePoint -> UInt
fromCodePoint :: CodePoint -> UInt
...
@@ -118,6 +120,7 @@ minimum1 a fa =
...
@@ -118,6 +120,7 @@ minimum1 a fa =
Nothing -> a
Nothing -> a
Just b -> min a b
Just b -> min a b
type GoAcc = Tuple UInt (Array (Maybe (Tuple Int (Array Int))))
strictMatcher :: Array String -> String -> Array (Tuple Int (Array Int))
strictMatcher :: Array String -> String -> Array (Tuple Int (Array Int))
strictMatcher pats = unsafePartial search
strictMatcher pats = unsafePartial search
where
where
...
@@ -138,31 +141,68 @@ strictMatcher pats = unsafePartial search
...
@@ -138,31 +141,68 @@ strictMatcher pats = unsafePartial search
hash = foldl (\h w -> (h `shl` fromInt shDi) + fromCodePoint w) (fromInt 0)
hash = foldl (\h w -> (h `shl` fromInt shDi) + fromCodePoint w) (fromInt 0)
<<< S.toCodePointArray
<<< S.toCodePointArray
<<< S.take hLen
<<< S.take hLen
hashMap :: M.Map UInt (Array Int)
hashMap =
hashMap =
M.fromFoldableWith (flip (<>))
M.fromFoldableWith (flip (<>))
(mapWithIndex (\i a -> Tuple (hash a) [i]) pats)
(mapWithIndex (\i a -> Tuple (hash a) [i]) pats)
search :: Partial => String -> Array (Tuple Int (Array Int))
search :: Partial => String -> Array (Tuple Int (Array Int))
search str = if strLen < hLen then []
search str = if strLen < hLen then []
else A.fromFoldable (go 0 shash)
else A.catMaybes $ snd <$> foldl go' (Tuple shash []) $ A.zip (0 .. maxIdx) (GDA.slidingWindow arr hLen)
-- else A.fromFoldable (go 0 shash)
where
where
strLen = S.length str
strLen = S.length str
maxIdx = strLen - hLen
maxIdx = strLen - hLen
arr = S.toCodePointArray str
arr = S.toCodePointArray str
strAt i = A.unsafeIndex arr i
strAt i = A.unsafeIndex arr i
shash :: UInt
shash = hash str
shash = hash str
go sI h =
-- NOTE: A new function, with a map over the range [0..maxIdx]
case M.lookup h hashMap of
go' :: GoAcc -> Tuple Int (Array CodePoint) -> GoAcc
Nothing ->
go' (Tuple h acc) (Tuple idx arr') =
if sI == maxIdx
let rehashed = rehash h (A.unsafeIndex arr' 0) (A.unsafeIndex arr' (-1))
then L.Nil
str' = S.fromCodePointArray arr'
else go (sI + 1) (rehash h (strAt sI) (strAt (sI + hLen)))
okay bs = isJust (S.stripPrefix (S.Pattern bs) str')
mPs = M.lookup h hashMap
acc' = case mPs of
Nothing -> Nothing
Just ps ->
Just ps ->
let rst = S.drop sI str
case A.filter (\x -> okay (A.unsafeIndex pats x)) ps of
hd = strAt sI
[] -> Nothing
more = if sI == maxIdx then L.Nil else
qs -> Just (Tuple idx qs)
go (sI + 1) (rehash h hd (strAt (sI + hLen)))
in
okay bs =
Tuple rehashed (A.snoc acc acc')
isJust (S.stripPrefix (S.Pattern bs) rst)
in case A.filter (\x -> okay (A.unsafeIndex pats x)) ps of
-- go :: Int -> UInt -> L.List (Tuple Int (Array Int))
[] -> more
-- go sI h =
qs -> Tuple sI qs L.: more
-- let rehashed = rehash h (strAt sI) (strAt (sI + hLen))
-- in
-- if sI == maxIdx then
-- L.Nil
-- else
-- case M.lookup h hashMap of
-- Nothing ->
-- go (sI + 1) rehashed
-- Just ps ->
-- let rst = S.drop sI str
-- okay bs =
-- isJust (S.stripPrefix (S.Pattern bs) rst)
-- in case A.filter (\x -> okay (A.unsafeIndex pats x)) ps of
-- [] -> go (sI + 1) rehashed
-- qs -> Tuple sI qs L.: (go (sI + 1) rehashed)
-- case M.lookup h hashMap of
-- Nothing ->
-- if sI == maxIdx
-- then L.Nil
-- else go (sI + 1) (rehash h (strAt sI) (strAt (sI + hLen)))
-- Just ps ->
-- let rst = S.drop sI str
-- hd = strAt sI
-- more = if sI == maxIdx then L.Nil else
-- go (sI + 1) (rehash h hd (strAt (sI + hLen)))
-- okay bs =
-- isJust (S.stripPrefix (S.Pattern bs) rst)
-- in case A.filter (\x -> okay (A.unsafeIndex pats x)) ps of
-- [] -> more
-- qs -> Tuple sI qs L.: more
test/Gargantext/Data/Spec.purs
View file @
be456f9c
...
@@ -18,3 +18,5 @@ spec =
...
@@ -18,3 +18,5 @@ spec =
it "swap works" do
it "swap works" do
GDA.swap 1 0 [0, 1, 2] `shouldEqual` [1, 0, 2]
GDA.swap 1 0 [0, 1, 2] `shouldEqual` [1, 0, 2]
GDA.swap 1 2 [0, 1, 2] `shouldEqual` [0, 2, 1]
GDA.swap 1 2 [0, 1, 2] `shouldEqual` [0, 2, 1]
it "slidingWindow works" do
GDA.slidingWindow [1, 2, 3, 4, 5] 2 `shouldEqual` [[1, 2], [2, 3], [3, 4], [4, 5]]
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment