Commit 00cc08e7 authored by Romain Loth's avatar Romain Loth

FIX: remove SENT_BOUND tokens from melt tagger

parent 5407219a
......@@ -104,11 +104,12 @@ class MeltTagger(Tagger):
def tag_text(self, text, lemmatize=False):
tagged_tokens = self._tag(text)
# without lemmatization
if not lemmatize:
# without lemmatization
for form, tag in tagged_tokens:
if form != "SENT_BOUND":
yield (form, self._tag_replacements[tag])
return
else:
# with lemmatization
command_input = ' '.join(
'%s/%s' % (token, tag)
......@@ -118,6 +119,7 @@ class MeltTagger(Tagger):
for token in lemmatized.split():
if len(token):
values = token.split('/')
if values[0] != "SENT_BOUND":
yield (values[0], self._tag_replacements[values[1]], values[2].replace('*', ''))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment