Commit 95f90782 authored by Mathieu Rodic's avatar Mathieu Rodic

Slighlty improved the generic class for ngrams extraction.

parent 0d98962c
...@@ -9,9 +9,10 @@ class NgramsExtractor: ...@@ -9,9 +9,10 @@ class NgramsExtractor:
"""Class instanciation. """Class instanciation.
This method can be overriden. This method can be overriden.
""" """
def __init__(self, rule="NP: {<JJ.*>*<NN.*|>+<JJ.*>*}"): def __init__(self, rule="{<JJ.*>*<NN.*|>+<JJ.*>*}"):
self.start() self.start()
self._rule = rule self._label = "NP"
self._rule = self._label + ": " + rule
def __del__(self): def __del__(self):
self.stop() self.stop()
...@@ -33,20 +34,11 @@ class NgramsExtractor: ...@@ -33,20 +34,11 @@ class NgramsExtractor:
result = [] result = []
try: try:
grammar_parsed = grammar.parse(tagged_ngrams) grammar_parsed = grammar.parse(tagged_ngrams)
grammar_parsed_iterator = grammar_parsed.subtrees() for subtree in grammar_parsed.subtrees():
if subtree.label() == self._label:
while True:
try:
subtree = next(grammar_parsed_iterator)
if subtree.label() == 'NP':
#print(subtree.label())
result.append(subtree.leaves()) result.append(subtree.leaves())
except Exception as e:
break
except Exception as e: except Exception as e:
print(e) print("Problem while parsing rule '%s'" % (self._rule, ))
pass pass
return iter(result) return iter(result)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment