Commit cab96c0f authored by Administrator's avatar Administrator

nouveau fichier : admin/env.py

Has to be imported from /srv/gargantext/test*py
(Environment configuration of Django)

	nouveau fichier : admin/mergeuntable2prod.sh
Script to automatically merge main branches
Push all of them if needed

	nouveau fichier : admin/update_corpus.py
Script to update corpora (need to be executed later to clean database)
parent be968d2c
# Without this, we couldn't use the Django environment
import os
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "gargantext_web.settings")
os.environ.setdefault("DJANGO_HSTORE_GLOBAL_REGISTER", "False")
from admin.utils import PrintException
# database tools
from node import models
from gargantext_web.db import *
from parsing.corpustools import *
#!/bin/bash
git checkout unstable
git checkout testing
git merge unstable
git checkout prod-dev
git merge testing
git checkout prod
git merge prod-dev
git checkout unstable
echo "Push ? (yes)"
read y
if [[ $y == "yes" ]]; then
echo "je push"
git push origin prod prod-dev testing unstable
fi
from env import *
from gargantext_web.db import *
from parsing.corpustools import *
from gargantext_web.views import move_to_trash, empty_trash
def do_empty():
corpus_ids = (session.query(Node.id)
.filter(Node.type_id == cache.NodeType['Corpus'].id)
.all()
)
for corpus_id in corpus_ids :
doc_count = int()
doc_count = (session.query(Node.id)
.filter(Node.parent_id == corpus_id)
.filter(Node.type_id == cache.NodeType['Document'].id)
.count()
)
if doc_count == 0 :
move_to_trash(corpus_id)
empty_trash()
do_empty()
def extract_again():
corpus_ids = (session.query(Node.id)
.join(Node_Resource, Node_Resource.node_id == Node.id)
.join(Resource, Node_Resource.resource_id == Resource.id )
.join(or_(Resource.name == 'Europress (French)',
Resource.name == 'Europress (English)'))
.filter(Node.type_id == cache.NodeType['Corpus'].id )
.filter(Node.resource_id == cache.NodeType['Corpus'].id)
.all()
)
print(corpus_ids)
extract_again()
#add_resource(corpus,
# # file = './data_samples/pubmed_result.xml',
# file = '/srv/gargantext_lib/data_samples/pubmed_2013-04-01_HoneyBeesBeeBees.xml',
# type_id = cache.ResourceType['pubmed'].id,
#)
#parse_resources(corpus)
#extract_ngrams(corpus, ('title', ))
#
#
#
## print(corpus)
## corpus = session.query(Node).filter(Node.id == 72771).first()
## corpus = session.query(Node).filter(Node.id == 73017).first()
# compute_tfidf(corpus)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment