Commit 286fa18a authored by Administrator's avatar Administrator

[FIX MERGE] Conflicts resolution on merge with unstable-mat.

parents f83c6c65 475431b6
......@@ -3,6 +3,11 @@ from django.core.exceptions import PermissionDenied, SuspiciousOperation
from django.core.exceptions import ValidationError
from django.core.urlresolvers import reverse
from django.db.models import Avg, Max, Min, Count, Sum
# from node.models import Language, ResourceType, Resource
# from node.models import Node, NodeType, Node_Resource, Project, Corpus
from sqlalchemy import text, distinct, or_
from sqlalchemy.sql import func
from sqlalchemy.orm import aliased
......@@ -64,7 +69,7 @@ _operators = {
">": lambda field, value: (field > value),
"<=": lambda field, value: (field <= value),
">=": lambda field, value: (field >= value),
"in": lambda field, value: (field.in_(value)),
"in": lambda field, value: (or_(*tuple(field == x for x in value))),
"contains": lambda field, value: (field.contains(value)),
"startswith": lambda field, value: (field.startswith(value)),
}
......@@ -481,10 +486,10 @@ class NodesChildrenQueries(APIView):
query = query.filter(
Node.id.in_(session
.query(Node_Ngram.node_id)
.filter(Node_Ngram.ngram_id == Ngram.id)
.join(Ngram, Ngram.id == Node_Ngram.ngram_id)
.filter(operator(
getattr(Ngram, field[1]),
value
map(lambda x: x.replace('-', ' '), value)
))
)
)
......
......@@ -58,7 +58,6 @@ class MeltTagger(Tagger):
self._pos_tagger.load_lexicon('%s/%s/lexicon.json' % (path, language))
self._pos_tagger.load_model('%s/%s' % (path, language))
self._preprocessing_commands = (
# ('/usr/local/bin/clean_noisy_characters.sh', ),
('%s/MElt_normalizer.pl' % path, '-nc', '-c', '-d', '%s/%s' % (path, language), '-l', language, ),
('%s/segmenteur.pl' % path, '-a', '-ca', '-af=%s/pctabr' % path, '-p', 'r'),
)
......@@ -93,15 +92,16 @@ class MeltTagger(Tagger):
tagged_tokens = self._pos_tagger.tag_token_sequence(tokens)
for token in tagged_tokens:
if len(token.string):
yield (token.string, _tag_replacements[token.label], )
yield (token.string, token.label, )
def tag_text(self, text, lemmatize=True):
tagged_tokens = self._tag(text)
# without lemmatization
if not lemmatize:
for tagged_token in tagged_tokens:
yield tagged_token
for form, tag in tagged_tokens:
yield (form, _tag_replacements[tag])
return
# lemmatization
# with lemmatization
command_input = ' '.join(
'%s/%s' % (token, tag)
for token, tag in tagged_tokens
......@@ -110,4 +110,4 @@ class MeltTagger(Tagger):
for token in lemmatized.split():
if len(token):
values = token.split('/')
yield (values[0], values[1], values[2].replace('*', ''))
yield (values[0], _tag_replacements[values[1]], values[2].replace('*', ''))
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment