Commit 64ec1672 authored by Romain Loth's avatar Romain Loth

[WIP] début importExport

parent 9e0f9688
import re import re
from admin.utils import PrintException from admin.utils import PrintException
from gargantext_web.db import Node, Ngram, NodeNgram,NodeNodeNgram from gargantext_web.db import Node, Ngram, NodeNgram, NodeNodeNgram, NodeNgramNgram
from gargantext_web.db import cache, session, get_or_create_node, bulk_insert from gargantext_web.db import cache, session, get_or_create_node, bulk_insert
import sqlalchemy as sa import sqlalchemy as sa
...@@ -13,44 +13,73 @@ from sqlalchemy.orm import aliased ...@@ -13,44 +13,73 @@ from sqlalchemy.orm import aliased
from ngram.tools import insert_ngrams from ngram.tools import insert_ngrams
from analysis.lists import WeightedList, UnweightedList from analysis.lists import WeightedList, UnweightedList
from collections import defaultdict
def exportNgramList(node,filename): def exportNgramList(node,filename):
# les nodes couvrant les listes
# -----------------------------
stop_node = get_or_create_node(nodetype='StopList', corpus=node) stop_node = get_or_create_node(nodetype='StopList', corpus=node)
miam_node = get_or_create_node(nodetype='MiamList', corpus=node) miam_node = get_or_create_node(nodetype='MiamList', corpus=node)
map_node = get_or_create_node(nodetype='MapList', corpus=node) map_node = get_or_create_node(nodetype='MapList', corpus=node)
group_node = get_or_create_node(nodetype='Group', corpus=node) group_node = get_or_create_node(nodetype='Group', corpus=node)
stop_ngrams = session.query(NodeNgram.ngram_id).filter(NodeNgram.node_id==stop_node.id).all() # listes de ngram_ids correspondantes
miam_ngrams = session.query(NodeNgram.ngram_id).filter(NodeNgram.node_id==miam_node.id).all() # ------------------------------------
map_ngrams = session.query(NodeNgram.ngram_id).filter(NodeNgram.node_id==map_node.id).all() #~~ contenu: liste des ids [2562,...]
group_ngrams= (session.query(NodeNgramNgram.ngramx_id, NodeNgramNgram.ngramy_id) stop_ngrams_ids = [stop_ngram.ngram_id for stop_ngram in stop_node.node_node_ngram_collection]
.filter(NodeNgramNgram.node_id==group_node.id) # idem pour miam et map
.all() miam_ngrams_ids = [miam_ng.ngram_id for miam_ng in miam_node.node_node_ngram_collection]
) map_ngrams_ids = [map_ng.ngram_id for map_ng in map_node.node_node_ngram_collection]
all_ngrams = set()
grouped = defaultdict(lambda: defaultdict(set)) # union des listes (est-elle nécessaire ?)
toList = list() all_ngrams = set(
set(stop_ngrams_ids)
| set(map_ngrams_ids)
| set(miam_ngrams_ids)
)
# pour la group_list on a des couples de ngram_ids
# -------------------
# ex: [(3544, 2353), (2787, 4032), ...]
group_ngrams_id_couples = [(nd_ng_ng.ngramx_id,nd_ng_ng.ngramy_id) for nd_ng_ng in group_node.node_nodengramngram_collection]
# k couples comme set
# --------------------
# [(a => x) (a => y)] => [a => {x,y}]
grouped = defaultdict(set)
for ngram in group_ngrams : for ngram in group_ngrams :
grouped[ngram[0]].add(ngram[1]) grouped[ngram[0]].add(ngram[1])
all_ngrams.add(ngram[0]) all_ngrams.add(ngram[0])
all_ngrams.add(ngram[1]) all_ngrams.add(ngram[1])
def add_ngram(fromList, toList=toList, grouplist=grouped, all_ngrams=all_ngrams, weight=0):
for ngram_id in from_list:
all_ngrams.add(ngram_id)
if ngram_id in grouplist.keys():
ngrams.append((ngram_id, grouped[ngram_id], weight))
else :
ngram.append((ngram_id, "", weight))
add_ngrams(stop_ngrams, weight=0) toList = list()
add_ngrams(miam_ngrams, weight=1)
add_ngrams(map_ngrams, weight=2) # pour récupérer les objets Ngram (avec terme)
# -------------------------------
# session.query(Ngram).filter(Ngram.id.in_(stop_ngrams_ids)).all()
# in_ => OUTER JOIN préalable ?
#~ def add_ngram(fromList, toList=toList, grouplist=grouped, all_ngrams=all_ngrams, weight=0):
#~ for ngram_id in from_list:
#~ all_ngrams.add(ngram_id)
#~ if ngram_id in grouplist.keys():
#~ ngrams.append((ngram_id, grouped[ngram_id], weight))
#~ else :
#~ ngram.append((ngram_id, "", weight))
#~
#~ add_ngrams(stop_ngrams, weight=0)
#~ add_ngrams(miam_ngrams, weight=1)
#~ add_ngrams(map_ngrams, weight=2)
# to csv # to csv
with open(filename, "w") as f: with open(filename, "w") as f:
f.write(ngram) for ngram in ngrams for ngram in ngrams:
f.write(ngram)
def importNgramList(node,filename): def importNgramList(node,filename):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment