Commit 286fa18a authored by Administrator's avatar Administrator

[FIX MERGE] Conflicts resolution on merge with unstable-mat.

parents f83c6c65 475431b6
...@@ -3,6 +3,11 @@ from django.core.exceptions import PermissionDenied, SuspiciousOperation ...@@ -3,6 +3,11 @@ from django.core.exceptions import PermissionDenied, SuspiciousOperation
from django.core.exceptions import ValidationError from django.core.exceptions import ValidationError
from django.core.urlresolvers import reverse from django.core.urlresolvers import reverse
from django.db.models import Avg, Max, Min, Count, Sum
# from node.models import Language, ResourceType, Resource
# from node.models import Node, NodeType, Node_Resource, Project, Corpus
from sqlalchemy import text, distinct, or_
from sqlalchemy.sql import func from sqlalchemy.sql import func
from sqlalchemy.orm import aliased from sqlalchemy.orm import aliased
...@@ -64,7 +69,7 @@ _operators = { ...@@ -64,7 +69,7 @@ _operators = {
">": lambda field, value: (field > value), ">": lambda field, value: (field > value),
"<=": lambda field, value: (field <= value), "<=": lambda field, value: (field <= value),
">=": lambda field, value: (field >= value), ">=": lambda field, value: (field >= value),
"in": lambda field, value: (field.in_(value)), "in": lambda field, value: (or_(*tuple(field == x for x in value))),
"contains": lambda field, value: (field.contains(value)), "contains": lambda field, value: (field.contains(value)),
"startswith": lambda field, value: (field.startswith(value)), "startswith": lambda field, value: (field.startswith(value)),
} }
...@@ -481,10 +486,10 @@ class NodesChildrenQueries(APIView): ...@@ -481,10 +486,10 @@ class NodesChildrenQueries(APIView):
query = query.filter( query = query.filter(
Node.id.in_(session Node.id.in_(session
.query(Node_Ngram.node_id) .query(Node_Ngram.node_id)
.filter(Node_Ngram.ngram_id == Ngram.id) .join(Ngram, Ngram.id == Node_Ngram.ngram_id)
.filter(operator( .filter(operator(
getattr(Ngram, field[1]), getattr(Ngram, field[1]),
value map(lambda x: x.replace('-', ' '), value)
)) ))
) )
) )
......
...@@ -58,7 +58,6 @@ class MeltTagger(Tagger): ...@@ -58,7 +58,6 @@ class MeltTagger(Tagger):
self._pos_tagger.load_lexicon('%s/%s/lexicon.json' % (path, language)) self._pos_tagger.load_lexicon('%s/%s/lexicon.json' % (path, language))
self._pos_tagger.load_model('%s/%s' % (path, language)) self._pos_tagger.load_model('%s/%s' % (path, language))
self._preprocessing_commands = ( self._preprocessing_commands = (
# ('/usr/local/bin/clean_noisy_characters.sh', ),
('%s/MElt_normalizer.pl' % path, '-nc', '-c', '-d', '%s/%s' % (path, language), '-l', language, ), ('%s/MElt_normalizer.pl' % path, '-nc', '-c', '-d', '%s/%s' % (path, language), '-l', language, ),
('%s/segmenteur.pl' % path, '-a', '-ca', '-af=%s/pctabr' % path, '-p', 'r'), ('%s/segmenteur.pl' % path, '-a', '-ca', '-af=%s/pctabr' % path, '-p', 'r'),
) )
...@@ -93,15 +92,16 @@ class MeltTagger(Tagger): ...@@ -93,15 +92,16 @@ class MeltTagger(Tagger):
tagged_tokens = self._pos_tagger.tag_token_sequence(tokens) tagged_tokens = self._pos_tagger.tag_token_sequence(tokens)
for token in tagged_tokens: for token in tagged_tokens:
if len(token.string): if len(token.string):
yield (token.string, _tag_replacements[token.label], ) yield (token.string, token.label, )
def tag_text(self, text, lemmatize=True): def tag_text(self, text, lemmatize=True):
tagged_tokens = self._tag(text) tagged_tokens = self._tag(text)
# without lemmatization
if not lemmatize: if not lemmatize:
for tagged_token in tagged_tokens: for form, tag in tagged_tokens:
yield tagged_token yield (form, _tag_replacements[tag])
return return
# lemmatization # with lemmatization
command_input = ' '.join( command_input = ' '.join(
'%s/%s' % (token, tag) '%s/%s' % (token, tag)
for token, tag in tagged_tokens for token, tag in tagged_tokens
...@@ -110,4 +110,4 @@ class MeltTagger(Tagger): ...@@ -110,4 +110,4 @@ class MeltTagger(Tagger):
for token in lemmatized.split(): for token in lemmatized.split():
if len(token): if len(token):
values = token.split('/') values = token.split('/')
yield (values[0], values[1], values[2].replace('*', '')) yield (values[0], _tag_replacements[values[1]], values[2].replace('*', ''))
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment