Add highlightNgrams

parent 1bd1ca55
...@@ -2,7 +2,9 @@ module Gargantext.Components.NgramsTable ...@@ -2,7 +2,9 @@ module Gargantext.Components.NgramsTable
( PageParams ( PageParams
, PatchMap , PatchMap
, NgramsPatch , NgramsPatch
, NgramsTable , NgramsTable(..)
, NgramsElement(..)
, NgramsTerm
, VersionedNgramsTable , VersionedNgramsTable
, Version , Version
, Versioned(..) , Versioned(..)
...@@ -15,12 +17,14 @@ module Gargantext.Components.NgramsTable ...@@ -15,12 +17,14 @@ module Gargantext.Components.NgramsTable
, ngramsTableClass , ngramsTableClass
, MainNgramsTableProps , MainNgramsTableProps
, mainNgramsTableSpec , mainNgramsTableSpec
, highlightNgrams
) )
where where
import Control.Monad.State (class MonadState, execState) import Control.Monad.State (class MonadState, execState)
import Control.Monad.Cont.Trans (lift) import Control.Monad.Cont.Trans (lift)
import Data.Array (head) import Data.Array (head)
import Data.Array as A
import Data.Argonaut ( class DecodeJson, decodeJson, class EncodeJson, encodeJson import Data.Argonaut ( class DecodeJson, decodeJson, class EncodeJson, encodeJson
, jsonEmptyObject, (:=), (~>), (.?), (.??) ) , jsonEmptyObject, (:=), (~>), (.?), (.??) )
import Data.Either (Either(..)) import Data.Either (Either(..))
...@@ -36,6 +40,7 @@ import Data.Lens.Fold (folded, traverseOf_) ...@@ -36,6 +40,7 @@ import Data.Lens.Fold (folded, traverseOf_)
import Data.Lens.Record (prop) import Data.Lens.Record (prop)
import Data.Lens.Iso.Newtype (_Newtype) import Data.Lens.Iso.Newtype (_Newtype)
import Data.List as List import Data.List as List
import Data.List ((:), List(Nil))
import Data.Map (Map) import Data.Map (Map)
import Data.Map as Map import Data.Map as Map
import Data.Maybe (Maybe(..), maybe) import Data.Maybe (Maybe(..), maybe)
...@@ -44,6 +49,7 @@ import Data.Traversable (class Traversable, traverse, traverse_, sequence) ...@@ -44,6 +49,7 @@ import Data.Traversable (class Traversable, traverse, traverse_, sequence)
import Data.TraversableWithIndex (class TraversableWithIndex, traverseWithIndex) import Data.TraversableWithIndex (class TraversableWithIndex, traverseWithIndex)
import Data.Set (Set) import Data.Set (Set)
import Data.Set as Set import Data.Set as Set
import Data.String as S
import Data.Symbol (SProxy(..)) import Data.Symbol (SProxy(..))
import Data.Tuple (Tuple(..)) import Data.Tuple (Tuple(..))
import Effect (Effect) import Effect (Effect)
...@@ -56,7 +62,10 @@ import React.DOM.Props (_id, _type, checked, className, name, onChange, onClick, ...@@ -56,7 +62,10 @@ import React.DOM.Props (_id, _type, checked, className, name, onChange, onClick,
import React.DOM.Props as DOM import React.DOM.Props as DOM
import Thermite (PerformAction, Render, Spec, StateCoTransformer, defaultPerformAction, modifyState_, simpleSpec, createClass) import Thermite (PerformAction, Render, Spec, StateCoTransformer, defaultPerformAction, modifyState_, simpleSpec, createClass)
import Unsafe.Coerce (unsafeCoerce) import Unsafe.Coerce (unsafeCoerce)
import Partial (crashWith)
import Partial.Unsafe (unsafePartial)
import Gargantext.Utils.KarpRabin (indicesOfAny)
import Gargantext.Types (TermList(..), TermSize, readTermList, readTermSize, termLists, termSizes) import Gargantext.Types (TermList(..), TermSize, readTermList, readTermSize, termLists, termSizes)
import Gargantext.Config (toUrl, End(..), Path(..), TabType(..), OrderBy(..)) import Gargantext.Config (toUrl, End(..), Path(..), TabType(..), OrderBy(..))
import Gargantext.Config.REST (get, put) import Gargantext.Config.REST (get, put)
...@@ -90,6 +99,7 @@ type Props' = Loader.InnerProps PageParams VersionedNgramsTable () ...@@ -90,6 +99,7 @@ type Props' = Loader.InnerProps PageParams VersionedNgramsTable ()
type NgramsTerm = String type NgramsTerm = String
-----------------------------------------------------------------------------------
newtype NgramsElement = NgramsElement newtype NgramsElement = NgramsElement
{ ngrams :: NgramsTerm { ngrams :: NgramsTerm
, list :: TermList , list :: TermList
...@@ -126,6 +136,7 @@ instance decodeJsonNgramsElement :: DecodeJson NgramsElement where ...@@ -126,6 +136,7 @@ instance decodeJsonNgramsElement :: DecodeJson NgramsElement where
let children = Set.fromFoldable (children' :: Array NgramsTerm) let children = Set.fromFoldable (children' :: Array NgramsTerm)
pure $ NgramsElement {ngrams, list, occurrences, parent, root, children} pure $ NgramsElement {ngrams, list, occurrences, parent, root, children}
-----------------------------------------------------------------------------------
type Version = Int type Version = Int
newtype Versioned a = Versioned newtype Versioned a = Versioned
...@@ -169,9 +180,49 @@ instance decodeJsonNgramsTable :: DecodeJson NgramsTable where ...@@ -169,9 +180,49 @@ instance decodeJsonNgramsTable :: DecodeJson NgramsTable where
$ f <$> (elements :: Array NgramsElement) $ f <$> (elements :: Array NgramsElement)
where where
f e@(NgramsElement e') = Tuple e'.ngrams e f e@(NgramsElement e') = Tuple e'.ngrams e
-----------------------------------------------------------------------------------
-- This initial version does not pay attention to word boundaries.
highlightNgrams :: NgramsTable -> String -> Array (Tuple String (Maybe TermList))
highlightNgrams (NgramsTable table) input =
let sN = unsafePartial (foldl goFold {i0: 0, s: input, l: Nil} ixs) in
A.reverse (A.fromFoldable (consNonEmpty sN.s sN.l))
where
pats = A.fromFoldable (Map.keys table)
ixs = indicesOfAny pats input
consNonEmpty x xs
| S.null x = xs
| otherwise = Tuple x Nothing : xs
-- NOTE that only the first matching pattern is used, the others are ignored!
goFold :: Partial => _ -> Tuple Int (Array Int) -> _
goFold { i0, s, l } (Tuple i pis) =
case A.index pis 0 of
Nothing ->
{ i0, s, l }
Just pi ->
case A.index pats pi of
Nothing ->
crashWith "highlightNgrams: out of bounds pattern"
Just pat ->
let lpat = S.length pat in
case Map.lookup pat table of
Nothing ->
crashWith "highlightNgrams: pattern missing from table"
Just (NgramsElement ne) ->
let s1 = S.splitAt (i - i0) s in
let s2 = S.splitAt lpat s1.after in
{ i0: i + lpat
, s: s2.after
, l: Tuple s2.before (Just ne.list) : consNonEmpty s1.before l
}
-----------------------------------------------------------------------------------
type VersionedNgramsTable = Versioned NgramsTable type VersionedNgramsTable = Versioned NgramsTable
-----------------------------------------------------------------------------------
data Replace a data Replace a
= Keep = Keep
| Replace { old :: a, new :: a } | Replace { old :: a, new :: a }
...@@ -395,6 +446,8 @@ applyNgramsTablePatch p (NgramsTable m) = ...@@ -395,6 +446,8 @@ applyNgramsTablePatch p (NgramsTable m) =
execState (reParentNgramsTablePatch p) $ execState (reParentNgramsTablePatch p) $
NgramsTable $ applyPatchMap applyNgramsPatch p m NgramsTable $ applyPatchMap applyNgramsPatch p m
-----------------------------------------------------------------------------------
type State = type State =
{ ngramsTablePatch :: NgramsTablePatch { ngramsTablePatch :: NgramsTablePatch
, ngramsVersion :: Version , ngramsVersion :: Version
......
module Gargantext.Components.NgramsTable.Spec where
import Prelude
import Data.Maybe (Maybe(..))
import Data.Tuple (Tuple(..))
import Gargantext.Components.NgramsTable (highlightNgrams, NgramsElement(..), NgramsTable(..))
import Gargantext.Types (TermList(..))
import Test.Spec (Spec, describe, it)
import Test.Spec.Assertions (shouldEqual)
-- import Test.Spec.QuickCheck (quickCheck')
import Data.Map as Map
import Data.Set as Set
spec :: Spec Unit
spec =
describe "NgramsTable.highlightNgrams" do
it "partially works" do
let ne ngrams list =
NgramsElement
{ ngrams
, list
, occurrences: 0
, parent: Nothing
, root: Nothing
, children: Set.empty
}
tne ngrams list = Tuple ngrams (ne ngrams list)
table = NgramsTable
(Map.fromFoldable [tne "graph" GraphTerm
,tne "stop" StopTerm
,tne "candidate" CandidateTerm
])
input = "this is a biography which stops at every candidate"
output = [Tuple "this is a bio" Nothing
,Tuple "graph" (Just GraphTerm)
,Tuple "y which " Nothing
,Tuple "stop" (Just StopTerm)
,Tuple "s at every " Nothing
,Tuple "candidate" (Just CandidateTerm)]
highlightNgrams table input `shouldEqual` output
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment