Commit d0e5b81d authored by Administrator's avatar Administrator

Merge branch 'testing' into prod-dev

parents b88b162e 32d6d3cb
......@@ -34,17 +34,19 @@ class FileParser:
"""
# First, check the split dates...
# This part mainly deal with Zotero data but can be usefull for others
# parts
date_string = hyperdata.get('publication_date_to_parse', None)
if date_string is not None:
date_string = re.sub(r'\/\/+', '', date_string)
date_string = re.sub(r'undefined', '', date_string)
date_string = re.sub(r'\/\/+(\w*|\d*)', '', date_string)
#date_string = re.sub(r'undefined', '', date_string)
try:
hyperdata['publication' + "_date"] = dateutil.parser.parse(
date_string,
default=DEFAULT_DATE
).strftime("%Y-%m-%d %H:%M:%S")
except:
print('Parser Zotero, Date not parsed for:', date_string)
except Exception as error:
print(error, 'Parser Zotero, Date not parsed for:', date_string)
hyperdata['publication_date'] = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
......
from .Tagger import Tagger
from .lib.melttagger.tagger import POSTagger, Token, DAGParser, DAGReader
import subprocess
import sys
import os
# references for tag equivalents:
# - http://cs.nyu.edu/grishman/jet/guide/PennPOS.html
# - http://www.lattice.cnrs.fr/sites/itellier/SEM.html
class identity_dict(dict):
def __missing__(self, key):
return key
_tag_replacements = identity_dict({
'DET': 'DT',
'NC': 'NN',
'NPP': 'NNP',
'ADJ': 'JJ',
'PONCT': '.',
'ADVWH': 'WRB',
'ADV': 'RB',
'DETWH': 'WDT',
'PROWH': 'WP',
'ET': 'FW',
'VINF': 'VB',
'I': 'UH',
'CS': 'IN',
# 'CLS': '',
# 'CLR': '',
# 'CLO': '',
# 'PRO': '',
# 'PROREL': '',
# 'P': '',
# 'P+D': '',
# 'P+PRO': '',
# 'V': '',
# 'VPR': '',
# 'VPP': '',
# 'VS': '',
# 'VIMP': '',
# 'PREF': '',
# 'ADJWH': '',
})
class MeltTagger(Tagger):
def start(self, language='fr', melt_data_path='lib/melttagger'):
basepath = os.path.dirname(os.path.realpath(__file__))
path = os.path.join(basepath, melt_data_path)
self._pos_tagger = POSTagger()
self._pos_tagger.load_tag_dictionary('%s/%s/tag_dict.json' % (path, language))
self._pos_tagger.load_lexicon('%s/%s/lexicon.json' % (path, language))
self._pos_tagger.load_model('%s/%s' % (path, language))
self._preprocessing_commands = (
# ('/usr/local/bin/clean_noisy_characters.sh', ),
('%s/MElt_normalizer.pl' % path, '-nc', '-c', '-d', '%s/%s' % (path, language), '-l', language, ),
('%s/segmenteur.pl' % path, '-a', '-ca', '-af=%s/pctabr' % path, '-p', 'r'),
)
self._lemmatization_commands = (
('%s/MElt_postprocess.pl' % path, '-npp', '-l', language),
('%s/MElt_lemmatizer.pl' % path, '-m', '%s/%s' % (path, language)),
)
def stop(self):
pass
def _pipe(self, text, commands, encoding='utf8'):
text = text.encode(encoding)
for command in commands:
process = subprocess.Popen(
command,
bufsize=0,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
text, err = process.communicate(text)
if len(err):
print(err.decode(encoding), file=sys.stderr)
return text.decode(encoding)
def _tag(self, text):
preprocessed = self._pipe(text, self._preprocessing_commands)
for sentence in preprocessed.split('\n'):
words = sentence.split(' ')
tokens = [Token(word) for word in words]
tagged_tokens = self._pos_tagger.tag_token_sequence(tokens)
for token in tagged_tokens:
if len(token.string):
yield (token.string, _tag_replacements[token.label], )
def tag_text(self, text, lemmatize=True):
tagged_tokens = self._tag(text)
if not lemmatize:
for tagged_token in tagged_tokens:
yield tagged_token
return
# lemmatization
command_input = ' '.join(
'%s/%s' % (token, tag)
for token, tag in tagged_tokens
)
lemmatized = self._pipe(command_input, self._lemmatization_commands)
for token in lemmatized.split():
if len(token):
values = token.split('/')
yield (values[0], values[1], values[2].replace('*', ''))
In this repo are all files for Gargantext Taggers.
For developers please indicate this path:
/srv/gargantext_lib/gargantext-taggers/.
Then this repo should be locate in /srv/gargantext_lib
from .Tagger import Tagger
from .nlpserver.client import NLPClient
from .lib.nlpserver.client import NLPClient
class TurboTagger:
def start(self):
self._nlpclient = NLPClient()
......
......@@ -2,3 +2,4 @@ from .Tagger import Tagger
from .NltkTagger import NltkTagger
from .TreeTagger import TreeTagger
from .TurboTagger import TurboTagger
from .MeltTagger import MeltTagger
/srv/gargantext_lib/taggers
\ No newline at end of file
GETTING STARTED
===============
* Download the following files (if all you need is tagging, the second
archive is not necessary):
- http://www.ark.cs.cmu.edu/TurboParser/sample_models/english_proj_tagger.tar.gz
- http://www.ark.cs.cmu.edu/TurboParser/sample_models/english_proj_parser.tar.gz
* Expand them, and place the extract files in the `data` directory
CONFIGURATION
=============
The settings for the server can be found in `settings.py`.
Please ensure the TCP port is not already in use on your machine, and that the path to the models are correct.
START for tests
===============
python3 server.py
"CTRL + c" to shut down
START/STOP THE SERVER
=====================
Simply run the following command to start: `./nlpserver start`
To stop: `./nlpserver stop`
If starting the server failed, have a look at the log in `nlpserver.log`.
import socket
import sys
import re
from .settings import server_type_client, server_host, server_port, server_buffer
from .settings import implemented_methods
class NLPClient:
def __init__(self):
self._socket = None
for method_name in dir(self):
if method_name[0] != '_':
if method_name.upper() not in implemented_methods:
setattr(self, method_name, self._notimplemented)
def __del__(self):
self._disconnect()
def _connect(self):
self._disconnect()
self._socket = socket.socket(*server_type_client)
self._socket.connect((server_host, server_port))
def _disconnect(self):
if self._socket is not None:
self._socket.close()
self._socket = None
def _notimplemented(self, *args, **kwargs):
raise NotImplementedError(
'Only the following methods are allowed: {}'.format(
', '.join(implemented_methods)
)
)
def _getline(self):
"""Get one line of text from the buffer
"""
buf = self._socket.recv(server_buffer).decode()
done = False
while not done:
if '\n' in buf:
line, buf = buf.split('\n', 1)
yield line
else:
more = self._socket.recv(server_buffer).decode()
if not more:
done = True
else:
buf += more
if buf:
yield buf
def _request(self, action, text, language, keys=None):
"""Generic method to request info from the server
"""
data = action + ' '
data += language + '\n'
data += re.sub(r'\n+', '\n', text)
data += '\n\n'
self._connect()
self._socket.sendall(data.encode())
sentence = []
if keys is None:
for line in self._getline():
if not line:
if not sentence:
break
yield sentence
sentence = []
continue
sentence.append(line.split('\t'))
else:
for line in self._getline():
if not line:
if not sentence:
break
yield sentence
sentence = []
continue
values = line.split('\t')
sentence.append(dict(zip(keys, line.split('\t'))))
def tokenize(self, text, language='english', asdict=False):
keys = ('token', ) if asdict else None
return self._request('TOKENIZE', text, language, keys)
def tag(self, text, language='english', asdict=False):
keys = ('token', 'tag', ) if asdict else None
return self._request('TAG', text, language, keys)
def lemmatize(self, text, language='english', asdict=False):
keys = ('token', 'tag', 'lemma') if asdict else None
return self._request('LEMMATIZE', text, language, keys)
def parse(self, text, language='english', asdict=False):
keys = ('token', 'tag', 'lemma', 'head', 'deprel', ) if asdict else None
return self._request('PARSE', text, language, keys)
# Benchmark when the script is called directly
if __name__ == '__main__':
from time import time
text = """Current therapeutics for schizophrenia, the typical and atypical antipsychotic class of drugs, derive their therapeutic benefit predominantly by antagonism of the dopamine D2 receptor subtype and have robust clinical benefit on positive symptoms of the disease with limited to no impact on negative symptoms and cognitive impairment. Driven by these therapeutic limitations of current treatments and the recognition that transmitter systems beyond the dopaminergic system in particular glutamatergic transmission contribute to the etiology of schizophrenia significant recent efforts have focused on the discovery and development of novel treatments for schizophrenia with mechanisms of action that are distinct from current drugs. Specifically, compounds selectively targeting the metabotropic glutamate receptor 2/3 subtype, phosphodiesterase subtype 10, glycine transporter subtype 1 and the alpha7 nicotinic acetylcholine receptor have been the subject of intense drug discovery and development efforts. Here we review recent clinical experience with the most advanced drug candidates targeting each of these novel mechanisms and discuss whether these new agents are living up to expectations."""
text = open('/home/mat/projects/parser/animal-farm.txt').read()
client = NLPClient()
iterations = int(sys.argv[1]) if len(sys.argv) > 1 else 1
for asdict in (False, True):
print()
print('Retrieving results as ' + (
'dict' if asdict else 'list'
) + 's')
print('---------------------------')
for method_name in dir(client):
if method_name[0] != '_':
method = getattr(client, method_name)
print('%-16s' % method_name, end='')
t0 = time()
n = 0.0
for i in range(0, iterations):
try:
for sentence in method(text, asdict=asdict):
n += 1.0
t = time() - t0
print('%8.2f s %8.2f ms per sentence' % (t, 1000*t/n if n else 0.0))
except NotImplementedError:
print('(not implemented)')
print()
# lemmatize 2.89 s 1.76 ms per sentence
# parse 25.21 s 15.37 ms per sentence
# tag 2.90 s 1.77 ms per sentence
# tokenize 0.19 s 0.12 ms per sentence
*.model
\ No newline at end of file
from nltk.stem import WordNetLemmatizer
from collections import defaultdict
lemmatizer = WordNetLemmatizer()
_lemmatize = lemmatizer.lemmatize
tags_translate = defaultdict(str)
tags_translate.update({
'J': 'a',
'N': 'n',
'V': 'v',
})
def lemmatize(token, tag):
tag_type = tags_translate[tag[0]]
return _lemmatize(token, tag_type) if tag_type else token
#!/bin/sh
# In case this bash file is placed in another directory (e.g., /etc/init.d),
# the following line should be changed to an absolute path
DAEMON_DIR=$( cd "$(dirname "$BASH_SOURCE[0]")" && pwd)
DAEMON_SCRIPT=$DAEMON_DIR/server.py
DAEMON_NAME=nlpserver
DAEMON_ARGS=
# DAEMON_USER=root
# The process ID of the script when it runs is stored here:
DAEMON_PID=/tmp/$DAEMON_NAME.pid
. /lib/lsb/init-functions
do_start () {
log_daemon_msg "Starting system '$DAEMON_NAME' daemon..."
/sbin/start-stop-daemon --start --quiet \
--make-pidfile --pidfile $DAEMON_PID --background \
--startas /bin/bash -- -c "python3 $DAEMON_SCRIPT $DAEMON_ARGS > /tmp/$DAEMON_NAME.log 2>&1"
# --exec $DAEMON_SCRIPT \
# --user $DAEMON_USER --chuid $DAEMON_USER
log_end_msg $?
}
do_stop () {
log_daemon_msg "Stopping system '$DAEMON_NAME' daemon..."
/sbin/start-stop-daemon --stop --pidfile $DAEMON_PID --retry 10
log_end_msg $?
}
case "$1" in
start|stop)
do_${1}
;;
restart|reload|force-reload)
do_stop
do_start
;;
status)
status_of_proc "$DAEMON_NAME" "$DAEMON" && exit 0 || exit $?
;;
*)
echo "Usage: $DAEMON_NAME {start|stop|restart|status}"
exit 1
;;
esac
exit 0
from settings import *
from sys import stderr
def print(text):
stderr.write(text + '\n')
print('PREPARING TURBOPARSER')
import turboparser
turbo_interface = turboparser.PTurboParser()
print('LOADING TOKENIZERS')
import nltk
sentence_tokenizer = nltk.data.load(tokenizer_model)
word_tokenizer = nltk.TreebankWordTokenizer()
if 'TAG' in implemented_methods or 'LEMMATIZE' in implemented_methods:
print('LOADING TAGGER')
tagger = turbo_interface.create_tagger()
tagger.load_tagger_model(b_tagger_model)
if 'LEMMATIZE' in implemented_methods or 'TAG' in implemented_methods or 'PARSE' in implemented_methods:
print('LOADING LEMMATIZER')
from lemmatizer import lemmatize
if 'PARSE' in implemented_methods:
print('LOADING PARSER')
parser = turbo_interface.create_parser()
parser.load_parser_model(b_parser_model)
def split_sentences(text):
return sentence_tokenizer.tokenize(text)
def tokenize(sentence):
return word_tokenizer.tokenize(sentence)
def tag_sentence(sentence):
# Write tokens to input file
f_input = open(tmp_input_path, 'w')
for token in tokenize(sentence):
f_input.write(token + '\t_\n')
f_input.close()
# Tag tokens
tagger.tag(b_tmp_input_path, b_tmp_output_path)
# Iterate through tagged tokens
f_output = open(tmp_output_path)
for line in f_output:
line = line.rstrip('\n')
if line == '':
continue
token, tag = line.split('\t')
yield (token, tag)
f_output.close()
def tag_lemmatize_sentence(sentence):
# Write tokens to input file
f_input = open(tmp_input_path, 'w')
for token in tokenize(sentence):
f_input.write(token + '\t_\n')
f_input.close()
# Tag tokens
tagger.tag(b_tmp_input_path, b_tmp_output_path)
# Iterate through tagged tokens
f_output = open(tmp_output_path)
for line in f_output:
line = line.rstrip('\n')
if line == '':
continue
token, tag = line.split('\t')
lemma = lemmatize(token, tag)
yield (token, tag, lemma)
f_output.close()
def parse_sentence(sentence):
# Write tokens to input file
f_input = open(tmp_input_path, 'w')
# Iterate through tagged tokens, prepare input
i = 0
for token, tag, lemma in tag_lemmatize_sentence(sentence):
i += 1
f_input.write(
# position
str(i) + '\t' +
# token
token + '\t' +
# lemma
lemma + '\t' +
# tag (twice)
tag + '\t' +
tag + '\t' +
# filler
'_\t_\t_\n'
)
f_input.close()
# Parse sentence
parser.parse(b_tmp_input_path, b_tmp_output_path)
# Iterate through parsed stuff
f_output = open(tmp_output_path)
for line in f_output:
line = line.rstrip('\n')
if line == '':
continue
fields = line.split('\t')
#
token = fields[1]
lemma = fields[2]
tag = fields[3]
head = str(int(fields[6]) - 1)
deprel = fields[7]
yield (token, tag, head, deprel)
#!python3
import pipeline
import socketserver
from settings import server_type_server, server_host, server_port, server_timeout
from settings import b_implemented_methods
actions = {
b'TAG': pipeline.tag_sentence,
b'LEMMATIZE': pipeline.tag_lemmatize_sentence,
b'PARSE': pipeline.parse_sentence,
}
class NLPServer(socketserver.StreamRequestHandler):
def handle(self):
# What kind of request are we handling?
firstline = self.rfile.readline()
parameters = firstline.split()
if len(parameters) != 2:
self.wfile.write(b'\n\n')
return
action, language = parameters
if action not in b_implemented_methods:
self.wfile.write(b'\n\n')
return
# Get the text data
text = ''
while True:
line = self.rfile.readline().decode()
if not line.strip():
break
text += line
text += '\n'
# Execute the action
method = actions.get(action, None)
if method is None:
for sentence in pipeline.split_sentences(text):
for token in pipeline.tokenize(sentence):
self.wfile.write(
token.encode() + b'\n'
)
self.wfile.write(b'\n')
self.wfile.write(b'\n')
else:
for sentence in pipeline.split_sentences(text):
for row in method(sentence):
self.wfile.write(
(
'\t'.join(row)
).encode() + b'\n'
)
self.wfile.write(b'\n')
self.wfile.write(b'\n')
def handle_timeout(self):
self.request.sendall(b'\n\n')
if __name__ == '__main__':
print('STARTING TCP SERVER')
server = server_type_server((server_host, server_port), NLPServer)
server.timeout = server_timeout
try:
server.serve_forever()
except (KeyboardInterrupt, SystemExit):
print('STOPPING TCP SERVER')
server.shutdown()
import os
import socket
import socketserver
# Server parameters
server_host = 'localhost'
server_port = 7777
server_type_server = socketserver.TCPServer
server_type_client = socket.AF_INET, socket.SOCK_STREAM
server_timeout = 2.0
server_buffer = 4096
# Implemented methods (other are treated as 'tokenize')
implemented_methods = {'TOKENIZE', 'TAG', 'LEMMATIZE'}
# server_methods = {'TOKENIZE', 'TAG', 'LEMMATIZE', 'PARSE'}
b_implemented_methods = {name.encode() for name in implemented_methods}
# Models
data_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'data')
tokenizer_model = os.path.join(data_dir, 'english.pickle')
tagger_model = os.path.join(data_dir, 'english_proj_tagger.model')
# parser_model = 'data/210basic_sd330'
parser_model = os.path.join(data_dir, 'english_proj_parser_pruned-true_model-full.model')
b_tagger_model = tagger_model.encode()
b_parser_model = parser_model.encode()
# Temporary files access
tmp_input_path = '/tmp/nlpserver_input.tmp'
tmp_output_path = '/tmp/nlpserver_output.tmp'
b_tmp_input_path = tmp_input_path.encode()
b_tmp_output_path = tmp_output_path.encode()
/srv/gargantext_lib/treetagger
\ No newline at end of file
from parsing.Taggers import MeltTagger
# from parsing.Taggers.melttagger.tagger import POSTagger, Token, DAGParser, DAGReader
# # references:
# # - http://cs.nyu.edu/grishman/jet/guide/PennPOS.html
# # - http://www.lattice.cnrs.fr/sites/itellier/SEM.html
# class identity_dict(dict):
# def __missing__(self, key):
# return key
# _tag_replacements = identity_dict({
# 'DET': 'DT',
# 'NC': 'NN',
# 'NPP': 'NNP',
# 'ADJ': 'JJ',
# 'PONCT': '.',
# 'ADVWH': 'WRB',
# 'ADV': 'RB',
# 'DETWH': 'WDT',
# 'PROWH': 'WP',
# 'ET': 'FW',
# 'VINF': 'VB',
# 'I': 'UH',
# 'CS': 'IN',
# # 'CLS': '',
# # 'CLR': '',
# # 'CLO': '',
# # 'PRO': '',
# # 'PROREL': '',
# # 'P': '',
# # 'P+D': '',
# # 'P+PRO': '',
# # 'V': '',
# # 'VPR': '',
# # 'VPP': '',
# # 'VS': '',
# # 'VIMP': '',
# # 'PREF': '',
# # 'ADJWH': '',
# })
# import subprocess
# class MeltTagger:
# def __init__(self, language='fr', melt_data_path='./parsing/Taggers/melttagger'):
# path = '%s/%s' % (melt_data_path, language)
# self.pos_tagger = POSTagger()
# self.pos_tagger.load_tag_dictionary('%s/tag_dict.json' % path)
# self.pos_tagger.load_lexicon('%s/lexicon.json' % path)
# self.pos_tagger.load_model('%s' % path)
# self._preprocessing_commands = (
# # ('/usr/local/bin/clean_noisy_characters.sh', ),
# # ('/usr/local/bin/MElt_normalizer.pl', '-nc', '-c', '-d', '/usr/local/share/melt/normalization/%s' % language, '-l', language, ),
# ('/usr/local/share/melt/segmenteur.pl', '-a', '-ca', '-af=/usr/local/share/melt/pctabr', '-p', 'r'),
# )
# self._lemmatization_commands = (
# ('/usr/local/bin/MElt_postprocess.pl', '-npp', '-l', language),
# ('MElt_lemmatizer.pl', '-m', '/usr/local/share/melt/%s' % language),
# )
# def pipe(self, text, commands, encoding='utf8'):
# text = text.encode(encoding)
# # print(text.decode(encoding))
# for command in commands:
# # print(command)
# process = subprocess.Popen(
# command,
# bufsize=0,
# stdin=subprocess.PIPE,
# stdout=subprocess.PIPE,
# stderr=subprocess.PIPE,
# )
# text, err = process.communicate(text)
# # print()
# # print(text.decode(encoding))
# if len(err):
# print(err.decode(encoding))
# return text.decode(encoding)
# def tag(self, text, encoding='utf8', lemmatize=True):
# preprocessed = self.pipe(text, self._preprocessing_commands)
# if lemmatize:
# result = ''
# for sentence in preprocessed.split('\n'):
# words = sentence.split(' ')
# tokens = [Token(word) for word in words]
# tagged_tokens = self.pos_tagger.tag_token_sequence(tokens)
# # result += ' '.join(token.__str__() for token in tagged_tokens)
# for token in tagged_tokens:
# if len(token.string):
# result += '%s/%s ' % (token.string, token.label, )
# result += '\n'
# lemmatized = self.pipe(result, self._lemmatization_commands)
# for sentence in lemmatized.split('\n'):
# for token in sentence.split(' '):
# if len(token):
# yield tuple(token.split('/'))
# else:
# for sentence in preprocessed.split('\n'):
# words = sentence.split(' ')
# tokens = [Token(word) for word in words]
# tagged_tokens = self.pos_tagger.tag_token_sequence(tokens)
# for token in tagged_tokens:
# if len(token.string):
# yield (token.string, _tag_replacements[token.label], )
if __name__ == '__main__':
from time import time
t0 = time()
tagger = MeltTagger()
print(time() - t0)
print()
text = """Le vieil hôtel de ville, construit de 1608 à 1610 est le plus ancien bâtiment de la ville de Wiesbaden. Il se dresse sur la place centrale de la vieille ville, la Place du Palais, qui abrite aujourd'hui le Parlement de l'État de Hesse, l'église et l'hôtel de ville.
Il a été construit dans le style Renaissance. On a ajouté, en 1828, un étage de style romantique historié. Sur les bas-reliefs des cinq fenêtres de l'étage, en bois, étaient représentées les vertus de la force, la justice, la charité, de prudence et de modération, alors que la pierre a remplacé par des copies. Le pièces de chêne d'origine peut être visitées aujourd'hui au Musée de Wiesbaden. Aujourd'hui, le bâtiment sert de bureau de la ville de Wiesbaden.
Devant le porche, entre l'hôtel de Ville et l'Ancien hôtel de ville, se trouve la colonne centrale de Nassau, un lion couronné avec bouclier.
Il s'agit de construire progressivement, à partir des données initiales, un sous-graphe dans lequel sont classés les différents sommets par ordre croissant de leur distance minimale au sommet de départ. La distance correspond à la somme des poids des arêtes empruntées.
Au départ, on considère que les distances de chaque sommet au sommet de départ sont infinies. Au cours de chaque itération, on va mettre à jour les distances des sommets reliés par un arc au dernier du sous-graphe (en ajoutant le poids de l'arc à la distance séparant ce dernier sommet du sommet de départ ; si la distance obtenue ainsi est supérieure à celle qui précédait, la distance n'est cependant pas modifiée). Après cette mise à jour, on examine l'ensemble des sommets qui ne font pas partie du sous-graphe, et on choisit celui dont la distance est minimale pour l'ajouter au sous-graphe.
La première étape consiste à mettre de côté le sommet de départ et à lui attribuer une distance de 0. Les sommets qui lui sont adjacents sont mis à jour avec une valeur égale au poids de l'arc qui les relie au sommet de départ (ou à celui de poids le plus faible si plusieurs arcs les relient) et les autres sommets conservent leur distance infinie.
Le plus proche des sommets adjacents est alors ajouté au sous-graphe.
La seconde étape consiste à mettre à jour les distances des sommets adjacents à ce dernier. Encore une fois, on recherche alors le sommet doté de la distance la plus faible. Comme tous les sommets n'avaient plus une valeur infinie, il est donc possible que le sommet choisi ne soit pas un des derniers mis à jour.
On l'ajoute au sous-graphe, puis on continue ainsi à partir du dernier sommet ajouté, jusqu'à épuisement des sommets ou jusqu'à sélection du sommet d'arrivée.
"""
i = 0
t0 = time()
for x in tagger.tag_text(text, lemmatize=True):
print(x)
i += 1
t = time() - t0
print(t)
print(t / i)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment