Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
P
purescript-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
142
Issues
142
List
Board
Labels
Milestones
Merge Requests
4
Merge Requests
4
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
purescript-gargantext
Commits
2e955f49
Unverified
Commit
2e955f49
authored
May 07, 2019
by
Nicolas Pouillard
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[NGRAMS] Teach the highlighter about word boundaries
parent
838e5ac8
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
62 additions
and
15 deletions
+62
-15
NgramsTable.purs
src/Gargantext/Components/NgramsTable.purs
+16
-5
Spec.purs
test/Gargantext/Components/NgramsTable/Spec.purs
+46
-10
No files found.
src/Gargantext/Components/NgramsTable.purs
View file @
2e955f49
...
@@ -193,14 +193,21 @@ instance decodeJsonNgramsTable :: DecodeJson NgramsTable where
...
@@ -193,14 +193,21 @@ instance decodeJsonNgramsTable :: DecodeJson NgramsTable where
f e@(NgramsElement e') = Tuple e'.ngrams e
f e@(NgramsElement e') = Tuple e'.ngrams e
-----------------------------------------------------------------------------------
-----------------------------------------------------------------------------------
-- This initial version does not pay attention to word boundaries.
-- TODO: while this function works well with word boundaries,
-- it inserts too many spaces.
highlightNgrams :: NgramsTable -> String -> Array (Tuple String (Maybe TermList))
highlightNgrams :: NgramsTable -> String -> Array (Tuple String (Maybe TermList))
highlightNgrams (NgramsTable table) input =
highlightNgrams (NgramsTable table) input
0
=
let sN = unsafePartial (foldl goFold {i0: 0, s: input, l: Nil} ixs) in
let sN = unsafePartial (foldl goFold {i0: 0, s: input, l: Nil} ixs) in
A.reverse (A.fromFoldable (consNonEmpty sN.s sN.l))
A.reverse (A.fromFoldable (consNonEmpty sN.s sN.l))
where
where
sp x = " " <> S.replaceAll (S.Pattern " ") (S.Replacement " ") x <> " "
unsp x =
case S.stripSuffix (S.Pattern " ") x of
Nothing -> x
Just x1 -> S.replaceAll (S.Pattern " ") (S.Replacement " ") (S.drop 1 x1)
input = sp input0
pats = A.fromFoldable (Map.keys table)
pats = A.fromFoldable (Map.keys table)
ixs = indicesOfAny
pats
input
ixs = indicesOfAny
(sp <$> pats)
input
consNonEmpty x xs
consNonEmpty x xs
| S.null x = xs
| S.null x = xs
...
@@ -210,6 +217,7 @@ highlightNgrams (NgramsTable table) input =
...
@@ -210,6 +217,7 @@ highlightNgrams (NgramsTable table) input =
goFold :: Partial => _ -> Tuple Int (Array Int) -> _
goFold :: Partial => _ -> Tuple Int (Array Int) -> _
goFold { i0, s, l } (Tuple i pis)
goFold { i0, s, l } (Tuple i pis)
| i < i0 =
| i < i0 =
-- Skip this pattern which is overlapping with a previous one.
{ i0, s, l }
{ i0, s, l }
| otherwise =
| otherwise =
case A.index pis 0 of
case A.index pis 0 of
...
@@ -220,7 +228,7 @@ highlightNgrams (NgramsTable table) input =
...
@@ -220,7 +228,7 @@ highlightNgrams (NgramsTable table) input =
Nothing ->
Nothing ->
crashWith "highlightNgrams: out of bounds pattern"
crashWith "highlightNgrams: out of bounds pattern"
Just pat ->
Just pat ->
let lpat = S.length
pat
in
let lpat = S.length
(sp pat)
in
case Map.lookup pat table of
case Map.lookup pat table of
Nothing ->
Nothing ->
crashWith "highlightNgrams: pattern missing from table"
crashWith "highlightNgrams: pattern missing from table"
...
@@ -228,7 +236,10 @@ highlightNgrams (NgramsTable table) input =
...
@@ -228,7 +236,10 @@ highlightNgrams (NgramsTable table) input =
let s1 = S.splitAt (i - i0) s in
let s1 = S.splitAt (i - i0) s in
{ i0: i + lpat
{ i0: i + lpat
, s: S.drop lpat s1.after
, s: S.drop lpat s1.after
, l: Tuple pat (Just ne.list) : consNonEmpty s1.before l
, l: Tuple " " Nothing :
Tuple pat (Just ne.list) :
Tuple " " Nothing :
consNonEmpty (unsp s1.before) l
}
}
-----------------------------------------------------------------------------------
-----------------------------------------------------------------------------------
...
...
test/Gargantext/Components/NgramsTable/Spec.purs
View file @
2e955f49
...
@@ -24,19 +24,30 @@ spec = do
...
@@ -24,19 +24,30 @@ spec = do
}
}
tne ngrams list = Tuple ngrams (ne ngrams list)
tne ngrams list = Tuple ngrams (ne ngrams list)
describe "NgramsTable.highlightNgrams" do
describe "NgramsTable.highlightNgrams" do
it "
partially works
" do
it "
works on a simple example
" do
let table = NgramsTable
let table = NgramsTable
(Map.fromFoldable [tne "graph" GraphTerm
(Map.fromFoldable [tne "graph" GraphTerm
,tne "stop" StopTerm
,tne "which" StopTerm
,tne "stops" StopTerm
,tne "candidate" CandidateTerm
,tne "candidate" CandidateTerm
])
])
input = "this is a biography which stops at every candidate"
input = "this is a graph about a biography which stops at every candidate"
output = [Tuple "this is a bio" Nothing
output = [Tuple "this is a" Nothing
,Tuple " " Nothing
,Tuple "graph" (Just GraphTerm)
,Tuple "graph" (Just GraphTerm)
,Tuple "y which " Nothing
,Tuple " " Nothing
,Tuple "stop" (Just StopTerm)
,Tuple "about a biography" Nothing
,Tuple "s at every " Nothing
,Tuple " " Nothing
,Tuple "candidate" (Just CandidateTerm)]
,Tuple "which" (Just StopTerm)
,Tuple " " Nothing
,Tuple " " Nothing
,Tuple "stops" (Just StopTerm)
,Tuple " " Nothing
,Tuple "at every" Nothing
,Tuple " " Nothing
,Tuple "candidate" (Just CandidateTerm)
,Tuple " " Nothing
]
highlightNgrams table input `shouldEqual` output
highlightNgrams table input `shouldEqual` output
it "works when pattern overlaps" do
it "works when pattern overlaps" do
...
@@ -48,18 +59,43 @@ spec = do
...
@@ -48,18 +59,43 @@ spec = do
,tne "the" GraphTerm
,tne "the" GraphTerm
,tne "state" GraphTerm
,tne "state" GraphTerm
])
])
input = "SCIPION is a new state of the"
input = "This is a new state of the"
output = [Tuple "SCIPION " Nothing
output = [Tuple "This" Nothing
,Tuple " " Nothing
,Tuple "is" (Just StopTerm)
,Tuple "is" (Just StopTerm)
,Tuple " " Nothing
,Tuple " " Nothing
,Tuple " " Nothing
,Tuple "a" (Just StopTerm)
,Tuple "a" (Just StopTerm)
,Tuple " " Nothing
,Tuple " " Nothing
,Tuple " " Nothing
,Tuple "new" (Just GraphTerm)
,Tuple "new" (Just GraphTerm)
,Tuple " " Nothing
,Tuple " " Nothing
,Tuple " " Nothing
,Tuple "state" (Just GraphTerm)
,Tuple "state" (Just GraphTerm)
,Tuple " " Nothing
,Tuple " " Nothing
,Tuple " " Nothing
,Tuple "of" (Just StopTerm)
,Tuple "of" (Just StopTerm)
,Tuple " " Nothing
,Tuple " " Nothing
,Tuple " " Nothing
,Tuple "the" (Just GraphTerm)
,Tuple "the" (Just GraphTerm)
,Tuple " " Nothing
]
highlightNgrams table input `shouldEqual` output
it "works when pattern overlaps 2" do
let table = NgramsTable
(Map.fromFoldable [tne "from" GraphTerm
,tne "i" StopTerm
,tne "images" GraphTerm
])
input = "This is from space images"
output = [Tuple "This is" Nothing
,Tuple " " Nothing
,Tuple "from" (Just GraphTerm)
,Tuple " " Nothing
,Tuple "space" Nothing
,Tuple " " Nothing
,Tuple "images" (Just GraphTerm)
,Tuple " " Nothing
]
]
highlightNgrams table input `shouldEqual` output
highlightNgrams table input `shouldEqual` output
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment