Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
P
purescript-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
131
Issues
131
List
Board
Labels
Milestones
Merge Requests
3
Merge Requests
3
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
purescript-gargantext
Commits
be456f9c
Verified
Commit
be456f9c
authored
Feb 28, 2023
by
Przemyslaw Kaminski
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[KarpRabin] fixes to the algorithm
No more recursion now, but this is still slow.
parent
d3218107
Pipeline
#3702
failed with stage
in 0 seconds
Changes
3
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
74 additions
and
19 deletions
+74
-19
Array.purs
src/Gargantext/Data/Array.purs
+14
-1
KarpRabin.purs
src/Gargantext/Utils/KarpRabin.purs
+58
-18
Spec.purs
test/Gargantext/Data/Spec.purs
+2
-0
No files found.
src/Gargantext/Data/Array.purs
View file @
be456f9c
...
@@ -2,11 +2,12 @@ module Gargantext.Data.Array
...
@@ -2,11 +2,12 @@ module Gargantext.Data.Array
where
where
import Data.Array as DA
import Data.Array as DA
import Data.Array ((..))
import Data.List as List
import Data.List as List
import Data.Maybe
import Data.Maybe
import Data.Sequence as Seq
import Data.Sequence as Seq
import Data.Tuple (Tuple(..))
import Data.Tuple (Tuple(..))
import Prelude (bind, flip, identity, (<<<), ($))
import Prelude (bind, flip, identity, (<<<), ($)
, (<), (-), (<$>)
)
----------------------------------------------------------------------
----------------------------------------------------------------------
-- | Split arrays tools
-- | Split arrays tools
...
@@ -65,3 +66,15 @@ swapList i j seq = List.fromFoldable $ swap i j $ List.toUnfoldable seq
...
@@ -65,3 +66,15 @@ swapList i j seq = List.fromFoldable $ swap i j $ List.toUnfoldable seq
swapSeq :: forall a. Int -> Int -> Seq.Seq a -> Seq.Seq a
swapSeq :: forall a. Int -> Int -> Seq.Seq a -> Seq.Seq a
swapSeq i j seq = Seq.fromFoldable $ swap i j $ Seq.toUnfoldable seq
swapSeq i j seq = Seq.fromFoldable $ swap i j $ Seq.toUnfoldable seq
slidingWindow :: forall a. Array a -> Int -> Array (Array a)
slidingWindow lst len =
let diff = DA.length lst - len
in
if diff < 0 then []
else (\idx -> DA.take len $ DA.drop idx lst) <$> 0 .. diff
-- slidingWindow lst len = DA.fromFoldable $ go lst
-- where
-- go l =
-- if (DA.length l) < len then List.Nil
-- else (DA.take len l) List.: (go $ DA.drop 1 l)
src/Gargantext/Utils/KarpRabin.purs
View file @
be456f9c
...
@@ -24,6 +24,7 @@ module Gargantext.Utils.KarpRabin ( -- * Overview
...
@@ -24,6 +24,7 @@ module Gargantext.Utils.KarpRabin ( -- * Overview
import Data.Array as A
import Data.Array as A
import Data.Array ((..))
import Data.Enum (fromEnum)
import Data.Enum (fromEnum)
import Data.Foldable (class Foldable, minimum, foldl)
import Data.Foldable (class Foldable, minimum, foldl)
import Data.FunctorWithIndex (mapWithIndex)
import Data.FunctorWithIndex (mapWithIndex)
...
@@ -33,10 +34,11 @@ import Data.Map as M
...
@@ -33,10 +34,11 @@ import Data.Map as M
import Data.Maybe (Maybe(..), isJust)
import Data.Maybe (Maybe(..), isJust)
import Data.String as S
import Data.String as S
import Data.String (CodePoint)
import Data.String (CodePoint)
import Data.Tuple (Tuple(..))
import Data.Tuple (Tuple(..)
, snd
)
import Data.UInt (UInt, shl, fromInt)
import Data.UInt (UInt, shl, fromInt)
import Partial.Unsafe (unsafePartial)
import Partial.Unsafe (unsafePartial)
import Gargantext.Data.Array as GDA
import Prelude
import Prelude
fromCodePoint :: CodePoint -> UInt
fromCodePoint :: CodePoint -> UInt
...
@@ -118,6 +120,7 @@ minimum1 a fa =
...
@@ -118,6 +120,7 @@ minimum1 a fa =
Nothing -> a
Nothing -> a
Just b -> min a b
Just b -> min a b
type GoAcc = Tuple UInt (Array (Maybe (Tuple Int (Array Int))))
strictMatcher :: Array String -> String -> Array (Tuple Int (Array Int))
strictMatcher :: Array String -> String -> Array (Tuple Int (Array Int))
strictMatcher pats = unsafePartial search
strictMatcher pats = unsafePartial search
where
where
...
@@ -138,31 +141,68 @@ strictMatcher pats = unsafePartial search
...
@@ -138,31 +141,68 @@ strictMatcher pats = unsafePartial search
hash = foldl (\h w -> (h `shl` fromInt shDi) + fromCodePoint w) (fromInt 0)
hash = foldl (\h w -> (h `shl` fromInt shDi) + fromCodePoint w) (fromInt 0)
<<< S.toCodePointArray
<<< S.toCodePointArray
<<< S.take hLen
<<< S.take hLen
hashMap :: M.Map UInt (Array Int)
hashMap =
hashMap =
M.fromFoldableWith (flip (<>))
M.fromFoldableWith (flip (<>))
(mapWithIndex (\i a -> Tuple (hash a) [i]) pats)
(mapWithIndex (\i a -> Tuple (hash a) [i]) pats)
search :: Partial => String -> Array (Tuple Int (Array Int))
search :: Partial => String -> Array (Tuple Int (Array Int))
search str = if strLen < hLen then []
search str = if strLen < hLen then []
else A.fromFoldable (go 0 shash)
else A.catMaybes $ snd <$> foldl go' (Tuple shash []) $ A.zip (0 .. maxIdx) (GDA.slidingWindow arr hLen)
-- else A.fromFoldable (go 0 shash)
where
where
strLen = S.length str
strLen = S.length str
maxIdx = strLen - hLen
maxIdx = strLen - hLen
arr = S.toCodePointArray str
arr = S.toCodePointArray str
strAt i = A.unsafeIndex arr i
strAt i = A.unsafeIndex arr i
shash :: UInt
shash = hash str
shash = hash str
go sI h =
-- NOTE: A new function, with a map over the range [0..maxIdx]
case M.lookup h hashMap of
go' :: GoAcc -> Tuple Int (Array CodePoint) -> GoAcc
Nothing ->
go' (Tuple h acc) (Tuple idx arr') =
if sI == maxIdx
let rehashed = rehash h (A.unsafeIndex arr' 0) (A.unsafeIndex arr' (-1))
then L.Nil
str' = S.fromCodePointArray arr'
else go (sI + 1) (rehash h (strAt sI) (strAt (sI + hLen)))
okay bs = isJust (S.stripPrefix (S.Pattern bs) str')
Just ps ->
mPs = M.lookup h hashMap
let rst = S.drop sI str
acc' = case mPs of
hd = strAt sI
Nothing -> Nothing
more = if sI == maxIdx then L.Nil else
Just ps ->
go (sI + 1) (rehash h hd (strAt (sI + hLen)))
case A.filter (\x -> okay (A.unsafeIndex pats x)) ps of
okay bs =
[] -> Nothing
isJust (S.stripPrefix (S.Pattern bs) rst)
qs -> Just (Tuple idx qs)
in case A.filter (\x -> okay (A.unsafeIndex pats x)) ps of
in
[] -> more
Tuple rehashed (A.snoc acc acc')
qs -> Tuple sI qs L.: more
-- go :: Int -> UInt -> L.List (Tuple Int (Array Int))
-- go sI h =
-- let rehashed = rehash h (strAt sI) (strAt (sI + hLen))
-- in
-- if sI == maxIdx then
-- L.Nil
-- else
-- case M.lookup h hashMap of
-- Nothing ->
-- go (sI + 1) rehashed
-- Just ps ->
-- let rst = S.drop sI str
-- okay bs =
-- isJust (S.stripPrefix (S.Pattern bs) rst)
-- in case A.filter (\x -> okay (A.unsafeIndex pats x)) ps of
-- [] -> go (sI + 1) rehashed
-- qs -> Tuple sI qs L.: (go (sI + 1) rehashed)
-- case M.lookup h hashMap of
-- Nothing ->
-- if sI == maxIdx
-- then L.Nil
-- else go (sI + 1) (rehash h (strAt sI) (strAt (sI + hLen)))
-- Just ps ->
-- let rst = S.drop sI str
-- hd = strAt sI
-- more = if sI == maxIdx then L.Nil else
-- go (sI + 1) (rehash h hd (strAt (sI + hLen)))
-- okay bs =
-- isJust (S.stripPrefix (S.Pattern bs) rst)
-- in case A.filter (\x -> okay (A.unsafeIndex pats x)) ps of
-- [] -> more
-- qs -> Tuple sI qs L.: more
test/Gargantext/Data/Spec.purs
View file @
be456f9c
...
@@ -18,3 +18,5 @@ spec =
...
@@ -18,3 +18,5 @@ spec =
it "swap works" do
it "swap works" do
GDA.swap 1 0 [0, 1, 2] `shouldEqual` [1, 0, 2]
GDA.swap 1 0 [0, 1, 2] `shouldEqual` [1, 0, 2]
GDA.swap 1 2 [0, 1, 2] `shouldEqual` [0, 2, 1]
GDA.swap 1 2 [0, 1, 2] `shouldEqual` [0, 2, 1]
it "slidingWindow works" do
GDA.slidingWindow [1, 2, 3, 4, 5] 2 `shouldEqual` [[1, 2], [2, 3], [3, 4], [4, 5]]
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment