Commit 667c9218 authored by Alexandre Delanoë's avatar Alexandre Delanoë

Merge branch 'stable' into stable-imt

parents dbaeae04 be49e970
"""Optimize title_abstract indexation
Revision ID: 73112a361617
Revises: 1fb4405b59e1
Create Date: 2017-09-15 14:14:51.737963
"""
from alembic import op
import sqlalchemy as sa
from gargantext.util.alembic import ReplaceableObject
# revision identifiers, used by Alembic.
revision = '73112a361617'
down_revision = '1fb4405b59e1'
branch_labels = None
depends_on = None
title_abstract_insert = ReplaceableObject(
'title_abstract_insert',
'AFTER INSERT',
'nodes',
"""FOR EACH ROW
WHEN (NEW.hyperdata::text <> '{}'::text)
EXECUTE PROCEDURE title_abstract_update_trigger()"""
)
title_abstract_update = ReplaceableObject(
'title_abstract_update',
'AFTER UPDATE OF hyperdata',
'nodes',
"""FOR EACH ROW
WHEN ((OLD.hyperdata ->> 'title', OLD.hyperdata ->> 'abstract')
IS DISTINCT FROM
(NEW.hyperdata ->> 'title', NEW.hyperdata ->> 'abstract'))
EXECUTE PROCEDURE title_abstract_update_trigger()"""
)
def upgrade():
op.replace_trigger(title_abstract_update, replaces="1fb4405b59e1.title_abstract_update")
op.create_trigger(title_abstract_insert)
def downgrade():
op.drop_trigger(title_abstract_insert)
op.replace_trigger(title_abstract_update, replace_with="1fb4405b59e1.title_abstract_update")
......@@ -56,18 +56,15 @@ def extract_ngrams(corpus, keys=DEFAULT_INDEX_FIELDS, do_subngrams = DEFAULT_IND
tagger_bots = {lang: load_tagger(lang) for lang in corpus.hyperdata["languages"] \
if lang != "__unknown__"}
tagger_bots["__unknown__"] = load_tagger("en")
# print("#TAGGERS LOADED: ", tagger_bots)
print("#TAGGERS LOADED: ", tagger_bots)
supported_taggers_lang = tagger_bots.keys()
# print("#SUPPORTED TAGGER LANGS", supported_taggers_lang)
print("#SUPPORTED TAGGER LANGS", list(supported_taggers_lang))
for documents_count, document in enumerate(corpus.children('DOCUMENT')):
#load only the docs that have passed the parsing without error
if document.id not in corpus.hyperdata["skipped_docs"]:
if 'language_iso2' in document.hyperdata:
language_iso2 = document.hyperdata['language_iso2']
else:
language_iso2 = "__unknown__"
language_iso2 = document.hyperdata.get('language_iso2', '__unknown__')
# debug
# print(language_iso2)
......
......@@ -55,17 +55,18 @@ def scan_hal(request):
def scan_gargantext(corpus_id, request):
return (session.query(DocumentNode)
.filter_by(parent_id=corpus_id)
.filter(Node.title_abstract.match(request))
.filter(DocumentNode.parent_id==corpus_id)
.filter(DocumentNode.title_abstract.match(request))
.count())
def scan_gargantext_and_delete(corpus_id, request):
return (session.query(DocumentNode)
.filter_by(parent_id=corpus_id)
.filter(Node.title_abstract.match(request))
.delete(synchronize_session='fetch')
)
(session.query(DocumentNode)
.filter(DocumentNode.parent_id=corpus_id)
.filter(DocumentNode.title_abstract.match(request))
.delete(synchronize_session='fetch')
)
session.commit()
def myProject_fromUrl(url):
"""
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment