# TODO: have a look at "queue" instead of "list" (cf. http://stackoverflow.com/questions/17564804/in-python-how-to-wait-until-only-the-first-thread-is-finished)
classidentity_dict(dict):
def__missing__(self,key):
returnkey
_tag_replacements=identity_dict({
"NOM":"NN",
"NAM":"NN",
"ADJ":"NN",
"VER":"JJ",
"PREP":"PRP",
"KON":"CC",
"DET":"DT",
"PRO":"DT",
# Do we also have to take semicolons, comas and other points into account?
})
def_readOutput(output,buffer):
hasStarted=False
whileTrue:
line=output.readline()
ifline:
ifline==b"<block>\n":
hasStarted=True
continue
ifline==b"<block/>\n":
break
ifhasStarted:
token,tag=line.decode('utf8').split()[:2]
tag=_tag_replacements[tag.split(':')[0]]
buffer.append((token,tag))
else:
time.sleep(0.1)
"""Use MElt for the tagging.
"""
classMelt(Tagger):
defstart(self,taggerPath="/usr/local/bin/"):
binaryFile="%s/MElt"%taggerPath
tagcmdlist=[
binaryFile,
"-l",
]
tagcmdlist=[]
self._popen=subprocess.Popen(
tagcmdlist,# Use a list of params in place of a string.
bufsize=0,# Not buffered to retrieve data asap from Tagger
executable=binaryFile,# As we have it, specify it
stdin=subprocess.PIPE,# Get a pipe to write input data to Tagger process
stdout=subprocess.PIPE,# Get a pipe to read processing results from Tagger
stderr=subprocess.PIPE,# Get a pipe to read processing results from Tagger