Commit d9080405 authored by Alexandre Delanoë's avatar Alexandre Delanoë
parents 864e0235 2409fac8
...@@ -7,6 +7,7 @@ ...@@ -7,6 +7,7 @@
"argonaut", "argonaut",
"console", "console",
"css", "css",
"debug",
"dom-filereader", "dom-filereader",
"dom-simple", "dom-simple",
"effect", "effect",
......
...@@ -69,6 +69,7 @@ import Data.String.Regex as R ...@@ -69,6 +69,7 @@ import Data.String.Regex as R
import Data.String.Regex.Flags as R import Data.String.Regex.Flags as R
import Data.Symbol (SProxy(..)) import Data.Symbol (SProxy(..))
import Data.Tuple (Tuple(..)) import Data.Tuple (Tuple(..))
-- import Debug.Trace
import Effect.Aff (Aff) import Effect.Aff (Aff)
import Foreign.Object as FO import Foreign.Object as FO
import React (ReactElement) import React (ReactElement)
...@@ -208,23 +209,30 @@ instance decodeJsonNgramsTable :: DecodeJson NgramsTable where ...@@ -208,23 +209,30 @@ instance decodeJsonNgramsTable :: DecodeJson NgramsTable where
-- it inserts too many spaces. -- it inserts too many spaces.
highlightNgrams :: NgramsTable -> String -> Array (Tuple String (Maybe TermList)) highlightNgrams :: NgramsTable -> String -> Array (Tuple String (Maybe TermList))
highlightNgrams (NgramsTable table) input0 = highlightNgrams (NgramsTable table) input0 =
-- trace {pats, input0, input, ixs} \_ ->
let sN = unsafePartial (foldl goFold {i0: 0, s: input, l: Nil} ixs) in let sN = unsafePartial (foldl goFold {i0: 0, s: input, l: Nil} ixs) in
map trimmer $ A.reverse (A.fromFoldable (consNonEmpty sN.s sN.l)) A.reverse (A.fromFoldable (consNonEmpty (undb (init sN.s)) sN.l))
where where
-- we need to trim so that the highlighting is without endings spR x = " " <> R.replace theRegex "$1$1" x <> " "
trimmer (Tuple t (Just l)) = Tuple (S.trim t) (Just l) reR = R.replace theRegex " "
trimmer x = x db = S.replace (S.Pattern " ") (S.Replacement " ")
sp x = " " <> S.replaceAll (S.Pattern " ") (S.Replacement " ") x <> " " sp x = " " <> db x <> " "
unsp x = undb = R.replace theRegex2 "$1"
case S.stripSuffix (S.Pattern " ") x of init x = S.take (S.length x - 1) x
Nothing -> x input = spR input0
Just x1 -> S.replaceAll (S.Pattern " ") (S.Replacement " ") (S.drop 1 x1)
input = sp input0
pats = A.fromFoldable (Map.keys table) pats = A.fromFoldable (Map.keys table)
theRegex = case R.regex "[.,;:!?'\\{}()]" (R.global <> R.multiline) of word_boundaries = "[ .,;:!?'\\{}()]"
theRegex = case R.regex ("(" <> word_boundaries <> ")") (R.global <> R.multiline) of
Left e -> unsafePartial $ crashWith e Left e -> unsafePartial $ crashWith e
Right r -> r Right r -> r
ixs = indicesOfAny (sp <$> pats) (S.toLower $ R.replace theRegex " " input) theRegex2 = case R.regex ("(" <> word_boundaries <> ")\\1") (R.global <> R.multiline) of
Left e -> unsafePartial $ crashWith e
Right r -> r
ixs = indicesOfAny (sp <$> pats) (S.toLower $ reR input)
consOnJustTail s xs@(Tuple _ (Just _) : _) =
Tuple s Nothing : xs
consOnJustTail _ xs = xs
consNonEmpty x xs consNonEmpty x xs
| S.null x = xs | S.null x = xs
...@@ -245,20 +253,25 @@ highlightNgrams (NgramsTable table) input0 = ...@@ -245,20 +253,25 @@ highlightNgrams (NgramsTable table) input0 =
Nothing -> Nothing ->
crashWith "highlightNgrams: out of bounds pattern" crashWith "highlightNgrams: out of bounds pattern"
Just pat -> Just pat ->
let lpat = S.length (sp pat) in let lpat = S.length (db pat) in
case Map.lookup pat table of case Map.lookup pat table of
Nothing -> Nothing ->
crashWith "highlightNgrams: pattern missing from table" crashWith "highlightNgrams: pattern missing from table"
Just (NgramsElement ne) -> Just (NgramsElement ne) ->
let s1 = S.splitAt (i - i0) s let
s2 = S.splitAt lpat s1.after in s1 = S.splitAt (i - i0) s
-- s2.before and pat might differ by casing only! s2 = S.splitAt lpat (S.drop 1 s1.after)
{ i0: i + lpat s3 = S.splitAt 1 s2.after
, s: s2.after unspB = if i0 == 0 then S.drop 1 else identity
, l: Tuple " " Nothing : s3b = s3.before
Tuple s2.before (Just ne.list) : in
Tuple " " Nothing : -- trace {s, i, i0, s1, s2, s3, pat, lpat, s3b} \_ ->
consNonEmpty (unsp s1.before) l -- `undb s2.before` and pat might differ by casing only!
{ i0: i + lpat + 2
, s: s3.after
, l: Tuple (undb s2.before) (Just ne.list) :
consOnJustTail s3b
(consNonEmpty (unspB (undb s1.before)) l)
} }
----------------------------------------------------------------------------------- -----------------------------------------------------------------------------------
......
...@@ -3,7 +3,7 @@ module Gargantext.Components.NgramsTable.Spec where ...@@ -3,7 +3,7 @@ module Gargantext.Components.NgramsTable.Spec where
import Prelude import Prelude
import Data.Maybe (Maybe(..)) import Data.Maybe (Maybe(..))
import Data.Tuple (Tuple(..)) import Data.Tuple (Tuple(..))
import Gargantext.Components.NgramsTable (highlightNgrams, NgramsElement(..), NgramsTable(..)) import Gargantext.Components.NgramsTable.Core (highlightNgrams, NgramsElement(..), NgramsTable(..))
import Gargantext.Types (TermList(..)) import Gargantext.Types (TermList(..))
import Test.Spec (Spec, describe, it) import Test.Spec (Spec, describe, it)
import Test.Spec.Assertions (shouldEqual) import Test.Spec.Assertions (shouldEqual)
...@@ -32,21 +32,14 @@ spec = do ...@@ -32,21 +32,14 @@ spec = do
,tne "candidate" CandidateTerm ,tne "candidate" CandidateTerm
]) ])
input = "this is a graph about a biography which stops at every candidate" input = "this is a graph about a biography which stops at every candidate"
output = [Tuple "this is a" Nothing output = [Tuple "this is a " Nothing
,Tuple " " Nothing
,Tuple "graph" (Just GraphTerm) ,Tuple "graph" (Just GraphTerm)
,Tuple " " Nothing ,Tuple " about a biography " Nothing
,Tuple "about a biography" Nothing
,Tuple " " Nothing
,Tuple "which" (Just StopTerm) ,Tuple "which" (Just StopTerm)
,Tuple " " Nothing ,Tuple " " Nothing
,Tuple " " Nothing
,Tuple "stops" (Just StopTerm) ,Tuple "stops" (Just StopTerm)
,Tuple " " Nothing ,Tuple " at every " Nothing
,Tuple "at every" Nothing
,Tuple " " Nothing
,Tuple "candidate" (Just CandidateTerm) ,Tuple "candidate" (Just CandidateTerm)
,Tuple " " Nothing
] ]
highlightNgrams table input `shouldEqual` output highlightNgrams table input `shouldEqual` output
...@@ -60,25 +53,18 @@ spec = do ...@@ -60,25 +53,18 @@ spec = do
,tne "state" GraphTerm ,tne "state" GraphTerm
]) ])
input = "This is a new state of the" input = "This is a new state of the"
output = [Tuple "This" Nothing output = [Tuple "This " Nothing
,Tuple " " Nothing
,Tuple "is" (Just StopTerm) ,Tuple "is" (Just StopTerm)
,Tuple " " Nothing ,Tuple " " Nothing
,Tuple " " Nothing
,Tuple "a" (Just StopTerm) ,Tuple "a" (Just StopTerm)
,Tuple " " Nothing ,Tuple " " Nothing
,Tuple " " Nothing
,Tuple "new" (Just GraphTerm) ,Tuple "new" (Just GraphTerm)
,Tuple " " Nothing ,Tuple " " Nothing
,Tuple " " Nothing
,Tuple "state" (Just GraphTerm) ,Tuple "state" (Just GraphTerm)
,Tuple " " Nothing ,Tuple " " Nothing
,Tuple " " Nothing
,Tuple "of" (Just StopTerm) ,Tuple "of" (Just StopTerm)
,Tuple " " Nothing ,Tuple " " Nothing
,Tuple " " Nothing
,Tuple "the" (Just GraphTerm) ,Tuple "the" (Just GraphTerm)
,Tuple " " Nothing
] ]
highlightNgrams table input `shouldEqual` output highlightNgrams table input `shouldEqual` output
...@@ -89,13 +75,19 @@ spec = do ...@@ -89,13 +75,19 @@ spec = do
,tne "images" GraphTerm ,tne "images" GraphTerm
]) ])
input = "This is from space images" input = "This is from space images"
output = [Tuple "This is" Nothing output = [Tuple "This is " Nothing
,Tuple " " Nothing
,Tuple "from" (Just GraphTerm) ,Tuple "from" (Just GraphTerm)
,Tuple " " Nothing ,Tuple " space " Nothing
,Tuple "space" Nothing
,Tuple " " Nothing
,Tuple "images" (Just GraphTerm) ,Tuple "images" (Just GraphTerm)
,Tuple " " Nothing ]
highlightNgrams table input `shouldEqual` output
it "works with punctuation" do
let table = NgramsTable
(Map.fromFoldable [tne "graph" GraphTerm])
input = "before graph, after"
output = [Tuple "before " Nothing
,Tuple "graph" (Just GraphTerm)
,Tuple ", after" Nothing
] ]
highlightNgrams table input `shouldEqual` output highlightNgrams table input `shouldEqual` output
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment