[KarpRabin] fixes to the algorithm

No more recursion now, but this is still slow.
parent d3218107
...@@ -2,11 +2,12 @@ module Gargantext.Data.Array ...@@ -2,11 +2,12 @@ module Gargantext.Data.Array
where where
import Data.Array as DA import Data.Array as DA
import Data.Array ((..))
import Data.List as List import Data.List as List
import Data.Maybe import Data.Maybe
import Data.Sequence as Seq import Data.Sequence as Seq
import Data.Tuple (Tuple(..)) import Data.Tuple (Tuple(..))
import Prelude (bind, flip, identity, (<<<), ($)) import Prelude (bind, flip, identity, (<<<), ($), (<), (-), (<$>))
---------------------------------------------------------------------- ----------------------------------------------------------------------
-- | Split arrays tools -- | Split arrays tools
...@@ -65,3 +66,15 @@ swapList i j seq = List.fromFoldable $ swap i j $ List.toUnfoldable seq ...@@ -65,3 +66,15 @@ swapList i j seq = List.fromFoldable $ swap i j $ List.toUnfoldable seq
swapSeq :: forall a. Int -> Int -> Seq.Seq a -> Seq.Seq a swapSeq :: forall a. Int -> Int -> Seq.Seq a -> Seq.Seq a
swapSeq i j seq = Seq.fromFoldable $ swap i j $ Seq.toUnfoldable seq swapSeq i j seq = Seq.fromFoldable $ swap i j $ Seq.toUnfoldable seq
slidingWindow :: forall a. Array a -> Int -> Array (Array a)
slidingWindow lst len =
let diff = DA.length lst - len
in
if diff < 0 then []
else (\idx -> DA.take len $ DA.drop idx lst) <$> 0 .. diff
-- slidingWindow lst len = DA.fromFoldable $ go lst
-- where
-- go l =
-- if (DA.length l) < len then List.Nil
-- else (DA.take len l) List.: (go $ DA.drop 1 l)
...@@ -24,6 +24,7 @@ module Gargantext.Utils.KarpRabin ( -- * Overview ...@@ -24,6 +24,7 @@ module Gargantext.Utils.KarpRabin ( -- * Overview
import Data.Array as A import Data.Array as A
import Data.Array ((..))
import Data.Enum (fromEnum) import Data.Enum (fromEnum)
import Data.Foldable (class Foldable, minimum, foldl) import Data.Foldable (class Foldable, minimum, foldl)
import Data.FunctorWithIndex (mapWithIndex) import Data.FunctorWithIndex (mapWithIndex)
...@@ -33,10 +34,11 @@ import Data.Map as M ...@@ -33,10 +34,11 @@ import Data.Map as M
import Data.Maybe (Maybe(..), isJust) import Data.Maybe (Maybe(..), isJust)
import Data.String as S import Data.String as S
import Data.String (CodePoint) import Data.String (CodePoint)
import Data.Tuple (Tuple(..)) import Data.Tuple (Tuple(..), snd)
import Data.UInt (UInt, shl, fromInt) import Data.UInt (UInt, shl, fromInt)
import Partial.Unsafe (unsafePartial) import Partial.Unsafe (unsafePartial)
import Gargantext.Data.Array as GDA
import Prelude import Prelude
fromCodePoint :: CodePoint -> UInt fromCodePoint :: CodePoint -> UInt
...@@ -118,6 +120,7 @@ minimum1 a fa = ...@@ -118,6 +120,7 @@ minimum1 a fa =
Nothing -> a Nothing -> a
Just b -> min a b Just b -> min a b
type GoAcc = Tuple UInt (Array (Maybe (Tuple Int (Array Int))))
strictMatcher :: Array String -> String -> Array (Tuple Int (Array Int)) strictMatcher :: Array String -> String -> Array (Tuple Int (Array Int))
strictMatcher pats = unsafePartial search strictMatcher pats = unsafePartial search
where where
...@@ -138,31 +141,68 @@ strictMatcher pats = unsafePartial search ...@@ -138,31 +141,68 @@ strictMatcher pats = unsafePartial search
hash = foldl (\h w -> (h `shl` fromInt shDi) + fromCodePoint w) (fromInt 0) hash = foldl (\h w -> (h `shl` fromInt shDi) + fromCodePoint w) (fromInt 0)
<<< S.toCodePointArray <<< S.toCodePointArray
<<< S.take hLen <<< S.take hLen
hashMap :: M.Map UInt (Array Int)
hashMap = hashMap =
M.fromFoldableWith (flip (<>)) M.fromFoldableWith (flip (<>))
(mapWithIndex (\i a -> Tuple (hash a) [i]) pats) (mapWithIndex (\i a -> Tuple (hash a) [i]) pats)
search :: Partial => String -> Array (Tuple Int (Array Int)) search :: Partial => String -> Array (Tuple Int (Array Int))
search str = if strLen < hLen then [] search str = if strLen < hLen then []
else A.fromFoldable (go 0 shash) else A.catMaybes $ snd <$> foldl go' (Tuple shash []) $ A.zip (0 .. maxIdx) (GDA.slidingWindow arr hLen)
-- else A.fromFoldable (go 0 shash)
where where
strLen = S.length str strLen = S.length str
maxIdx = strLen - hLen maxIdx = strLen - hLen
arr = S.toCodePointArray str arr = S.toCodePointArray str
strAt i = A.unsafeIndex arr i strAt i = A.unsafeIndex arr i
shash :: UInt
shash = hash str shash = hash str
go sI h = -- NOTE: A new function, with a map over the range [0..maxIdx]
case M.lookup h hashMap of go' :: GoAcc -> Tuple Int (Array CodePoint) -> GoAcc
Nothing -> go' (Tuple h acc) (Tuple idx arr') =
if sI == maxIdx let rehashed = rehash h (A.unsafeIndex arr' 0) (A.unsafeIndex arr' (-1))
then L.Nil str' = S.fromCodePointArray arr'
else go (sI + 1) (rehash h (strAt sI) (strAt (sI + hLen))) okay bs = isJust (S.stripPrefix (S.Pattern bs) str')
mPs = M.lookup h hashMap
acc' = case mPs of
Nothing -> Nothing
Just ps -> Just ps ->
let rst = S.drop sI str case A.filter (\x -> okay (A.unsafeIndex pats x)) ps of
hd = strAt sI [] -> Nothing
more = if sI == maxIdx then L.Nil else qs -> Just (Tuple idx qs)
go (sI + 1) (rehash h hd (strAt (sI + hLen))) in
okay bs = Tuple rehashed (A.snoc acc acc')
isJust (S.stripPrefix (S.Pattern bs) rst)
in case A.filter (\x -> okay (A.unsafeIndex pats x)) ps of -- go :: Int -> UInt -> L.List (Tuple Int (Array Int))
[] -> more -- go sI h =
qs -> Tuple sI qs L.: more -- let rehashed = rehash h (strAt sI) (strAt (sI + hLen))
-- in
-- if sI == maxIdx then
-- L.Nil
-- else
-- case M.lookup h hashMap of
-- Nothing ->
-- go (sI + 1) rehashed
-- Just ps ->
-- let rst = S.drop sI str
-- okay bs =
-- isJust (S.stripPrefix (S.Pattern bs) rst)
-- in case A.filter (\x -> okay (A.unsafeIndex pats x)) ps of
-- [] -> go (sI + 1) rehashed
-- qs -> Tuple sI qs L.: (go (sI + 1) rehashed)
-- case M.lookup h hashMap of
-- Nothing ->
-- if sI == maxIdx
-- then L.Nil
-- else go (sI + 1) (rehash h (strAt sI) (strAt (sI + hLen)))
-- Just ps ->
-- let rst = S.drop sI str
-- hd = strAt sI
-- more = if sI == maxIdx then L.Nil else
-- go (sI + 1) (rehash h hd (strAt (sI + hLen)))
-- okay bs =
-- isJust (S.stripPrefix (S.Pattern bs) rst)
-- in case A.filter (\x -> okay (A.unsafeIndex pats x)) ps of
-- [] -> more
-- qs -> Tuple sI qs L.: more
...@@ -18,3 +18,5 @@ spec = ...@@ -18,3 +18,5 @@ spec =
it "swap works" do it "swap works" do
GDA.swap 1 0 [0, 1, 2] `shouldEqual` [1, 0, 2] GDA.swap 1 0 [0, 1, 2] `shouldEqual` [1, 0, 2]
GDA.swap 1 2 [0, 1, 2] `shouldEqual` [0, 2, 1] GDA.swap 1 2 [0, 1, 2] `shouldEqual` [0, 2, 1]
it "slidingWindow works" do
GDA.slidingWindow [1, 2, 3, 4, 5] 2 `shouldEqual` [[1, 2], [2, 3], [3, 4], [4, 5]]
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment