diff --git a/src/Gargantext/Components/NgramsTable.purs b/src/Gargantext/Components/NgramsTable.purs index 510f71370cf75ec7a0a848dbe127471588656bf1..3527b24a0c875197f9f2ad9bfef9879baaa9dcaa 100644 --- a/src/Gargantext/Components/NgramsTable.purs +++ b/src/Gargantext/Components/NgramsTable.purs @@ -2,7 +2,9 @@ module Gargantext.Components.NgramsTable ( PageParams , PatchMap , NgramsPatch - , NgramsTable + , NgramsTable(..) + , NgramsElement(..) + , NgramsTerm , VersionedNgramsTable , Version , Versioned(..) @@ -15,12 +17,14 @@ module Gargantext.Components.NgramsTable , ngramsTableClass , MainNgramsTableProps , mainNgramsTableSpec + , highlightNgrams ) where import Control.Monad.State (class MonadState, execState) import Control.Monad.Cont.Trans (lift) import Data.Array (head) +import Data.Array as A import Data.Argonaut ( class DecodeJson, decodeJson, class EncodeJson, encodeJson , jsonEmptyObject, (:=), (~>), (.?), (.??) ) import Data.Either (Either(..)) @@ -36,6 +40,7 @@ import Data.Lens.Fold (folded, traverseOf_) import Data.Lens.Record (prop) import Data.Lens.Iso.Newtype (_Newtype) import Data.List as List +import Data.List ((:), List(Nil)) import Data.Map (Map) import Data.Map as Map import Data.Maybe (Maybe(..), maybe) @@ -44,6 +49,7 @@ import Data.Traversable (class Traversable, traverse, traverse_, sequence) import Data.TraversableWithIndex (class TraversableWithIndex, traverseWithIndex) import Data.Set (Set) import Data.Set as Set +import Data.String as S import Data.Symbol (SProxy(..)) import Data.Tuple (Tuple(..)) import Effect (Effect) @@ -56,7 +62,10 @@ import React.DOM.Props (_id, _type, checked, className, name, onChange, onClick, import React.DOM.Props as DOM import Thermite (PerformAction, Render, Spec, StateCoTransformer, defaultPerformAction, modifyState_, simpleSpec, createClass) import Unsafe.Coerce (unsafeCoerce) +import Partial (crashWith) +import Partial.Unsafe (unsafePartial) +import Gargantext.Utils.KarpRabin (indicesOfAny) import Gargantext.Types (TermList(..), TermSize, readTermList, readTermSize, termLists, termSizes) import Gargantext.Config (toUrl, End(..), Path(..), TabType(..), OrderBy(..)) import Gargantext.Config.REST (get, put) @@ -90,6 +99,7 @@ type Props' = Loader.InnerProps PageParams VersionedNgramsTable () type NgramsTerm = String +----------------------------------------------------------------------------------- newtype NgramsElement = NgramsElement { ngrams :: NgramsTerm , list :: TermList @@ -126,6 +136,7 @@ instance decodeJsonNgramsElement :: DecodeJson NgramsElement where let children = Set.fromFoldable (children' :: Array NgramsTerm) pure $ NgramsElement {ngrams, list, occurrences, parent, root, children} +----------------------------------------------------------------------------------- type Version = Int newtype Versioned a = Versioned @@ -169,9 +180,49 @@ instance decodeJsonNgramsTable :: DecodeJson NgramsTable where $ f <$> (elements :: Array NgramsElement) where f e@(NgramsElement e') = Tuple e'.ngrams e +----------------------------------------------------------------------------------- + +-- This initial version does not pay attention to word boundaries. +highlightNgrams :: NgramsTable -> String -> Array (Tuple String (Maybe TermList)) +highlightNgrams (NgramsTable table) input = + let sN = unsafePartial (foldl goFold {i0: 0, s: input, l: Nil} ixs) in + A.reverse (A.fromFoldable (consNonEmpty sN.s sN.l)) + where + pats = A.fromFoldable (Map.keys table) + ixs = indicesOfAny pats input + + consNonEmpty x xs + | S.null x = xs + | otherwise = Tuple x Nothing : xs + + -- NOTE that only the first matching pattern is used, the others are ignored! + goFold :: Partial => _ -> Tuple Int (Array Int) -> _ + goFold { i0, s, l } (Tuple i pis) = + case A.index pis 0 of + Nothing -> + { i0, s, l } + Just pi -> + case A.index pats pi of + Nothing -> + crashWith "highlightNgrams: out of bounds pattern" + Just pat -> + let lpat = S.length pat in + case Map.lookup pat table of + Nothing -> + crashWith "highlightNgrams: pattern missing from table" + Just (NgramsElement ne) -> + let s1 = S.splitAt (i - i0) s in + let s2 = S.splitAt lpat s1.after in + { i0: i + lpat + , s: s2.after + , l: Tuple s2.before (Just ne.list) : consNonEmpty s1.before l + } + +----------------------------------------------------------------------------------- type VersionedNgramsTable = Versioned NgramsTable +----------------------------------------------------------------------------------- data Replace a = Keep | Replace { old :: a, new :: a } @@ -395,6 +446,8 @@ applyNgramsTablePatch p (NgramsTable m) = execState (reParentNgramsTablePatch p) $ NgramsTable $ applyPatchMap applyNgramsPatch p m +----------------------------------------------------------------------------------- + type State = { ngramsTablePatch :: NgramsTablePatch , ngramsVersion :: Version diff --git a/test/Gargantext/Components/NgramsTable/Spec.purs b/test/Gargantext/Components/NgramsTable/Spec.purs new file mode 100644 index 0000000000000000000000000000000000000000..57ecabeb9c50467871135c4698e8e876c1fb4f7a --- /dev/null +++ b/test/Gargantext/Components/NgramsTable/Spec.purs @@ -0,0 +1,40 @@ +module Gargantext.Components.NgramsTable.Spec where + +import Prelude +import Data.Maybe (Maybe(..)) +import Data.Tuple (Tuple(..)) +import Gargantext.Components.NgramsTable (highlightNgrams, NgramsElement(..), NgramsTable(..)) +import Gargantext.Types (TermList(..)) +import Test.Spec (Spec, describe, it) +import Test.Spec.Assertions (shouldEqual) +-- import Test.Spec.QuickCheck (quickCheck') +import Data.Map as Map +import Data.Set as Set + +spec :: Spec Unit +spec = + describe "NgramsTable.highlightNgrams" do + it "partially works" do + let ne ngrams list = + NgramsElement + { ngrams + , list + , occurrences: 0 + , parent: Nothing + , root: Nothing + , children: Set.empty + } + tne ngrams list = Tuple ngrams (ne ngrams list) + table = NgramsTable + (Map.fromFoldable [tne "graph" GraphTerm + ,tne "stop" StopTerm + ,tne "candidate" CandidateTerm + ]) + input = "this is a biography which stops at every candidate" + output = [Tuple "this is a bio" Nothing + ,Tuple "graph" (Just GraphTerm) + ,Tuple "y which " Nothing + ,Tuple "stop" (Just StopTerm) + ,Tuple "s at every " Nothing + ,Tuple "candidate" (Just CandidateTerm)] + highlightNgrams table input `shouldEqual` output