nouveau fichier : admin/env.py

Has to be imported from /srv/gargantext/test*py (Environment configuration of Django) nouveau fichier : admin/mergeuntable2prod.sh Script to automatically merge main branches Push all of them if needed nouveau fichier : admin/update_corpus.py Script to update corpora (need to be executed later to clean database)

nouveau fichier : admin/env.py
Has to be imported from /srv/gargantext/test*py (Environment configuration of Django) nouveau fichier : admin/mergeuntable2prod.sh Script to automatically merge main branches Push all of them if needed nouveau fichier : admin/update_corpus.py Script to update corpora (need to be executed later to clean database)
cab96c0f · Administrator · be968d2c · cab96c0f · cab96c0f · cab96c0f
Commit cab96c0f authored Jun 04, 2015 by Administrator
Hide whitespace changes
Inline Side-by-side

Showing with 92 additions and 0 deletions

env.py admin/env.py +13 -0

mergeuntable2prod.sh admin/mergeuntable2prod.sh +23 -0

update_corpus.py admin/update_corpus.py +56 -0

No files found.
--- a/admin/env.py
+++ b/admin/env.py
+# Without this, we couldn't use the Django environment
+import os
+os.environ.setdefault("DJANGO_SETTINGS_MODULE", "gargantext_web.settings")
+os.environ.setdefault("DJANGO_HSTORE_GLOBAL_REGISTER", "False")
+
+from admin.utils import PrintException
+
+# database tools
+from node import models
+from gargantext_web.db import *
+from parsing.corpustools import *
+
+
--- a/admin/mergeuntable2prod.sh
+++ b/admin/mergeuntable2prod.sh
+#!/bin/bash
+
+git checkout unstable
+
+git checkout testing
+git merge unstable
+
+git checkout prod-dev
+git merge testing
+
+git checkout prod
+git merge prod-dev
+
+git checkout unstable
+
+echo "Push ? (yes)"
+
+read y
+
+if [[ $y == "yes" ]]; then
+	echo "je push"
+	git push origin prod prod-dev testing unstable
+fi
--- a/admin/update_corpus.py
+++ b/admin/update_corpus.py
+
+from env import *
+from gargantext_web.db import *
+from parsing.corpustools import *
+
+from gargantext_web.views import move_to_trash, empty_trash
+
+def do_empty():
+    corpus_ids = (session.query(Node.id)
+            .filter(Node.type_id == cache.NodeType['Corpus'].id)
+            .all()
+            )
+
+    for corpus_id in corpus_ids :
+        doc_count = int()
+        doc_count = (session.query(Node.id)
+                .filter(Node.parent_id == corpus_id)
+                .filter(Node.type_id == cache.NodeType['Document'].id)
+                .count()
+                )
+        if doc_count == 0 :
+            move_to_trash(corpus_id)
+
+    empty_trash()
+
+do_empty()
+
+
+def extract_again():
+    corpus_ids = (session.query(Node.id)
+            .join(Node_Resource, Node_Resource.node_id == Node.id)
+            .join(Resource, Node_Resource.resource_id == Resource.id )
+            .join(or_(Resource.name == 'Europress (French)',
+                      Resource.name == 'Europress (English)'))
+            .filter(Node.type_id == cache.NodeType['Corpus'].id )
+            .filter(Node.resource_id == cache.NodeType['Corpus'].id)
+            .all()
+            )
+    print(corpus_ids)
+
+extract_again()
+
+#add_resource(corpus,
+#    # file = './data_samples/pubmed_result.xml',
+#    file = '/srv/gargantext_lib/data_samples/pubmed_2013-04-01_HoneyBeesBeeBees.xml',
+#    type_id = cache.ResourceType['pubmed'].id,
+#)
+#parse_resources(corpus)
+#extract_ngrams(corpus, ('title', ))
+#
+#
+#
+## print(corpus)
+## corpus = session.query(Node).filter(Node.id == 72771).first()
+## corpus = session.query(Node).filter(Node.id == 73017).first()
+# compute_tfidf(corpus)