Commit 00cc08e7 authored by Romain Loth's avatar Romain Loth

FIX: remove SENT_BOUND tokens from melt tagger

parent 5407219a
...@@ -104,21 +104,23 @@ class MeltTagger(Tagger): ...@@ -104,21 +104,23 @@ class MeltTagger(Tagger):
def tag_text(self, text, lemmatize=False): def tag_text(self, text, lemmatize=False):
tagged_tokens = self._tag(text) tagged_tokens = self._tag(text)
# without lemmatization
if not lemmatize: if not lemmatize:
# without lemmatization
for form, tag in tagged_tokens: for form, tag in tagged_tokens:
yield (form, self._tag_replacements[tag]) if form != "SENT_BOUND":
return yield (form, self._tag_replacements[tag])
# with lemmatization else:
command_input = ' '.join( # with lemmatization
'%s/%s' % (token, tag) command_input = ' '.join(
for token, tag in tagged_tokens '%s/%s' % (token, tag)
) for token, tag in tagged_tokens
lemmatized = self._pipe(command_input, self._lemmatization_commands) )
for token in lemmatized.split(): lemmatized = self._pipe(command_input, self._lemmatization_commands)
if len(token): for token in lemmatized.split():
values = token.split('/') if len(token):
yield (values[0], self._tag_replacements[values[1]], values[2].replace('*', '')) values = token.split('/')
if values[0] != "SENT_BOUND":
yield (values[0], self._tag_replacements[values[1]], values[2].replace('*', ''))
def EnglishMeltTagger(*args, **kwargs): def EnglishMeltTagger(*args, **kwargs):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment