Commit 62d0c17d authored by Romain Loth's avatar Romain Loth

taggers: fix old comments in _Tagger.py

parent 3ce75c48
...@@ -19,8 +19,7 @@ class Tagger: ...@@ -19,8 +19,7 @@ class Tagger:
| [][.,;"'?!():-_`] # these are separate tokens | [][.,;"'?!():-_`] # these are separate tokens
''', re.UNICODE | re.MULTILINE | re.DOTALL) ''', re.UNICODE | re.MULTILINE | re.DOTALL)
self.buffer = [] self.buffer = []
self.start()
#self.start()
def clean_text(self, text): def clean_text(self, text):
...@@ -33,6 +32,7 @@ class Tagger: ...@@ -33,6 +32,7 @@ class Tagger:
self.text = self.clean_text(text) self.text = self.clean_text(text)
grammar = nltk.RegexpParser(label + ': ' + rule) grammar = nltk.RegexpParser(label + ': ' + rule)
tagged_tokens = list(self.tag_text(self.text)) tagged_tokens = list(self.tag_text(self.text))
print("the tagged_tokens", tagged_tokens)
if len(tagged_tokens): if len(tagged_tokens):
grammar_parsed = grammar.parse(tagged_tokens) grammar_parsed = grammar.parse(tagged_tokens)
for subtree in grammar_parsed.subtrees(): for subtree in grammar_parsed.subtrees():
...@@ -46,12 +46,11 @@ class Tagger: ...@@ -46,12 +46,11 @@ class Tagger:
self.stop() self.stop()
def start(self): def start(self):
"""Initializes the tagger. """Initializes the tagger (once per corpus).
This method is called by the constructor, and can be overriden by This method is called by the constructor, and can be overriden by
inherited classes. inherited classes.
""" """
print("START") # print("START")
self.extract(self.text)
def stop(self): def stop(self):
"""Ends the tagger. """Ends the tagger.
...@@ -81,7 +80,7 @@ class Tagger: ...@@ -81,7 +80,7 @@ class Tagger:
return [] return []
# Not used right now # Main function for extract()
def tag_text(self, text): def tag_text(self, text):
"""Send a text to be tagged. """Send a text to be tagged.
""" """
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment