[KarpRabin] fixes to the algorithm

No more recursion now, but this is still slow.
parent d3218107
Pipeline #3702 failed with stage
in 0 seconds
...@@ -2,11 +2,12 @@ module Gargantext.Data.Array ...@@ -2,11 +2,12 @@ module Gargantext.Data.Array
where where
import Data.Array as DA import Data.Array as DA
import Data.Array ((..))
import Data.List as List import Data.List as List
import Data.Maybe import Data.Maybe
import Data.Sequence as Seq import Data.Sequence as Seq
import Data.Tuple (Tuple(..)) import Data.Tuple (Tuple(..))
import Prelude (bind, flip, identity, (<<<), ($)) import Prelude (bind, flip, identity, (<<<), ($), (<), (-), (<$>))
---------------------------------------------------------------------- ----------------------------------------------------------------------
-- | Split arrays tools -- | Split arrays tools
...@@ -65,3 +66,15 @@ swapList i j seq = List.fromFoldable $ swap i j $ List.toUnfoldable seq ...@@ -65,3 +66,15 @@ swapList i j seq = List.fromFoldable $ swap i j $ List.toUnfoldable seq
swapSeq :: forall a. Int -> Int -> Seq.Seq a -> Seq.Seq a swapSeq :: forall a. Int -> Int -> Seq.Seq a -> Seq.Seq a
swapSeq i j seq = Seq.fromFoldable $ swap i j $ Seq.toUnfoldable seq swapSeq i j seq = Seq.fromFoldable $ swap i j $ Seq.toUnfoldable seq
slidingWindow :: forall a. Array a -> Int -> Array (Array a)
slidingWindow lst len =
let diff = DA.length lst - len
in
if diff < 0 then []
else (\idx -> DA.take len $ DA.drop idx lst) <$> 0 .. diff
-- slidingWindow lst len = DA.fromFoldable $ go lst
-- where
-- go l =
-- if (DA.length l) < len then List.Nil
-- else (DA.take len l) List.: (go $ DA.drop 1 l)
...@@ -24,6 +24,7 @@ module Gargantext.Utils.KarpRabin ( -- * Overview ...@@ -24,6 +24,7 @@ module Gargantext.Utils.KarpRabin ( -- * Overview
import Data.Array as A import Data.Array as A
import Data.Array ((..))
import Data.Enum (fromEnum) import Data.Enum (fromEnum)
import Data.Foldable (class Foldable, minimum, foldl) import Data.Foldable (class Foldable, minimum, foldl)
import Data.FunctorWithIndex (mapWithIndex) import Data.FunctorWithIndex (mapWithIndex)
...@@ -33,10 +34,11 @@ import Data.Map as M ...@@ -33,10 +34,11 @@ import Data.Map as M
import Data.Maybe (Maybe(..), isJust) import Data.Maybe (Maybe(..), isJust)
import Data.String as S import Data.String as S
import Data.String (CodePoint) import Data.String (CodePoint)
import Data.Tuple (Tuple(..)) import Data.Tuple (Tuple(..), snd)
import Data.UInt (UInt, shl, fromInt) import Data.UInt (UInt, shl, fromInt)
import Partial.Unsafe (unsafePartial) import Partial.Unsafe (unsafePartial)
import Gargantext.Data.Array as GDA
import Prelude import Prelude
fromCodePoint :: CodePoint -> UInt fromCodePoint :: CodePoint -> UInt
...@@ -118,6 +120,7 @@ minimum1 a fa = ...@@ -118,6 +120,7 @@ minimum1 a fa =
Nothing -> a Nothing -> a
Just b -> min a b Just b -> min a b
type GoAcc = Tuple UInt (Array (Maybe (Tuple Int (Array Int))))
strictMatcher :: Array String -> String -> Array (Tuple Int (Array Int)) strictMatcher :: Array String -> String -> Array (Tuple Int (Array Int))
strictMatcher pats = unsafePartial search strictMatcher pats = unsafePartial search
where where
...@@ -138,31 +141,68 @@ strictMatcher pats = unsafePartial search ...@@ -138,31 +141,68 @@ strictMatcher pats = unsafePartial search
hash = foldl (\h w -> (h `shl` fromInt shDi) + fromCodePoint w) (fromInt 0) hash = foldl (\h w -> (h `shl` fromInt shDi) + fromCodePoint w) (fromInt 0)
<<< S.toCodePointArray <<< S.toCodePointArray
<<< S.take hLen <<< S.take hLen
hashMap :: M.Map UInt (Array Int)
hashMap = hashMap =
M.fromFoldableWith (flip (<>)) M.fromFoldableWith (flip (<>))
(mapWithIndex (\i a -> Tuple (hash a) [i]) pats) (mapWithIndex (\i a -> Tuple (hash a) [i]) pats)
search :: Partial => String -> Array (Tuple Int (Array Int)) search :: Partial => String -> Array (Tuple Int (Array Int))
search str = if strLen < hLen then [] search str = if strLen < hLen then []
else A.fromFoldable (go 0 shash) else A.catMaybes $ snd <$> foldl go' (Tuple shash []) $ A.zip (0 .. maxIdx) (GDA.slidingWindow arr hLen)
-- else A.fromFoldable (go 0 shash)
where where
strLen = S.length str strLen = S.length str
maxIdx = strLen - hLen maxIdx = strLen - hLen
arr = S.toCodePointArray str arr = S.toCodePointArray str
strAt i = A.unsafeIndex arr i strAt i = A.unsafeIndex arr i
shash :: UInt
shash = hash str shash = hash str
go sI h = -- NOTE: A new function, with a map over the range [0..maxIdx]
case M.lookup h hashMap of go' :: GoAcc -> Tuple Int (Array CodePoint) -> GoAcc
Nothing -> go' (Tuple h acc) (Tuple idx arr') =
if sI == maxIdx let rehashed = rehash h (A.unsafeIndex arr' 0) (A.unsafeIndex arr' (-1))
then L.Nil str' = S.fromCodePointArray arr'
else go (sI + 1) (rehash h (strAt sI) (strAt (sI + hLen))) okay bs = isJust (S.stripPrefix (S.Pattern bs) str')
Just ps -> mPs = M.lookup h hashMap
let rst = S.drop sI str acc' = case mPs of
hd = strAt sI Nothing -> Nothing
more = if sI == maxIdx then L.Nil else Just ps ->
go (sI + 1) (rehash h hd (strAt (sI + hLen))) case A.filter (\x -> okay (A.unsafeIndex pats x)) ps of
okay bs = [] -> Nothing
isJust (S.stripPrefix (S.Pattern bs) rst) qs -> Just (Tuple idx qs)
in case A.filter (\x -> okay (A.unsafeIndex pats x)) ps of in
[] -> more Tuple rehashed (A.snoc acc acc')
qs -> Tuple sI qs L.: more
-- go :: Int -> UInt -> L.List (Tuple Int (Array Int))
-- go sI h =
-- let rehashed = rehash h (strAt sI) (strAt (sI + hLen))
-- in
-- if sI == maxIdx then
-- L.Nil
-- else
-- case M.lookup h hashMap of
-- Nothing ->
-- go (sI + 1) rehashed
-- Just ps ->
-- let rst = S.drop sI str
-- okay bs =
-- isJust (S.stripPrefix (S.Pattern bs) rst)
-- in case A.filter (\x -> okay (A.unsafeIndex pats x)) ps of
-- [] -> go (sI + 1) rehashed
-- qs -> Tuple sI qs L.: (go (sI + 1) rehashed)
-- case M.lookup h hashMap of
-- Nothing ->
-- if sI == maxIdx
-- then L.Nil
-- else go (sI + 1) (rehash h (strAt sI) (strAt (sI + hLen)))
-- Just ps ->
-- let rst = S.drop sI str
-- hd = strAt sI
-- more = if sI == maxIdx then L.Nil else
-- go (sI + 1) (rehash h hd (strAt (sI + hLen)))
-- okay bs =
-- isJust (S.stripPrefix (S.Pattern bs) rst)
-- in case A.filter (\x -> okay (A.unsafeIndex pats x)) ps of
-- [] -> more
-- qs -> Tuple sI qs L.: more
...@@ -18,3 +18,5 @@ spec = ...@@ -18,3 +18,5 @@ spec =
it "swap works" do it "swap works" do
GDA.swap 1 0 [0, 1, 2] `shouldEqual` [1, 0, 2] GDA.swap 1 0 [0, 1, 2] `shouldEqual` [1, 0, 2]
GDA.swap 1 2 [0, 1, 2] `shouldEqual` [0, 2, 1] GDA.swap 1 2 [0, 1, 2] `shouldEqual` [0, 2, 1]
it "slidingWindow works" do
GDA.slidingWindow [1, 2, 3, 4, 5] 2 `shouldEqual` [[1, 2], [2, 3], [3, 4], [4, 5]]
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment