Commit dde756c1 authored by Mathieu Rodic's avatar Mathieu Rodic

[DEBUG] trying to fit 'MElt FR' as a tagger

parent 3b565087
from .NgramsExtractor import NgramsExtractor
from ..Taggers import NltkTagger
from ..Taggers import NltkTagger, MeltTagger
class EnglishNgramsExtractor(NgramsExtractor):
def start(self):
self.tagger = NltkTagger()
# self.tagger = MeltTagger(language='en')
# self.tagger = NltkTagger()
self.tagger = MeltTagger(language='en')
\ No newline at end of file
......@@ -29,9 +29,9 @@ class NgramsExtractor:
Returns a list of the ngrams found in the given text.
"""
def extract_ngrams(self, contents):
tagged_ngrams = list(self.tagger.tag_text(contents))
if len(tagged_ngrams):
grammar_parsed = self._grammar.parse(tagged_ngrams)
tagged_tokens = list(self.tagger.tag_text(contents))
if len(tagged_tokens):
grammar_parsed = self._grammar.parse(tagged_tokens)
for subtree in grammar_parsed.subtrees():
if subtree.label() == self._label:
yield subtree.leaves()
......@@ -102,7 +102,7 @@ class MeltTagger(Tagger):
if len(token.string):
yield (token.string, token.label, )
def tag_text(self, text, lemmatize=True):
def tag_text(self, text, lemmatize=False):
tagged_tokens = self._tag(text)
# without lemmatization
if not lemmatize:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment