Commit 3b565087 authored by delanoe's avatar delanoe

[FEAT] Configuring Melt but still with bugs.

parent 88a82af8
from .NgramsExtractor import NgramsExtractor from .NgramsExtractor import NgramsExtractor
from ..Taggers import TreeTagger from ..Taggers import TreeTagger, MeltTagger
class FrenchNgramsExtractor(NgramsExtractor): class FrenchNgramsExtractor(NgramsExtractor):
def start(self): def start(self):
self.tagger = TreeTagger() #self.tagger = TreeTagger()
# self.tagger = MeltTagger(language='fr') self.tagger = MeltTagger(language='fr')
...@@ -14,22 +14,22 @@ class NgramsExtractor: ...@@ -14,22 +14,22 @@ class NgramsExtractor:
self._label = "NP" self._label = "NP"
self._rule = self._label + ": " + rule self._rule = self._label + ": " + rule
self._grammar = nltk.RegexpParser(self._rule) self._grammar = nltk.RegexpParser(self._rule)
def __del__(self): def __del__(self):
self.stop() self.stop()
def start(self): def start(self):
self.tagger = TurboTagger() self.tagger = TurboTagger()
def stop(self): def stop(self):
pass pass
"""Extracts a list of ngrams. """Extracts a list of ngrams.
Returns a list of the ngrams found in the given text. Returns a list of the ngrams found in the given text.
""" """
def extract_ngrams(self, contents): def extract_ngrams(self, contents):
tagged_ngrams = self.tagger.tag_text(contents) tagged_ngrams = list(self.tagger.tag_text(contents))
if len(tagged_ngrams): if len(tagged_ngrams):
grammar_parsed = self._grammar.parse(tagged_ngrams) grammar_parsed = self._grammar.parse(tagged_ngrams)
for subtree in grammar_parsed.subtrees(): for subtree in grammar_parsed.subtrees():
......
from .FrenchNgramsExtractor import FrenchNgramsExtractor from .FrenchNgramsExtractor import FrenchNgramsExtractor
from .TurboNgramsExtractor import TurboNgramsExtractor as EnglishNgramsExtractor #from .TurboNgramsExtractor import TurboNgramsExtractor as EnglishNgramsExtractor
# from .EnglishNgramsExtractor import EnglishNgramsExtractor from .EnglishNgramsExtractor import EnglishNgramsExtractor
from .NgramsExtractor import NgramsExtractor from .NgramsExtractor import NgramsExtractor
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment