Commit 64ec1672 authored by Romain Loth's avatar Romain Loth

[WIP] début importExport

parent 9e0f9688
import re
from admin.utils import PrintException
from gargantext_web.db import Node, Ngram, NodeNgram,NodeNodeNgram
from gargantext_web.db import Node, Ngram, NodeNgram, NodeNodeNgram, NodeNgramNgram
from gargantext_web.db import cache, session, get_or_create_node, bulk_insert
import sqlalchemy as sa
......@@ -13,44 +13,73 @@ from sqlalchemy.orm import aliased
from ngram.tools import insert_ngrams
from analysis.lists import WeightedList, UnweightedList
from collections import defaultdict
def exportNgramList(node,filename):
# les nodes couvrant les listes
# -----------------------------
stop_node = get_or_create_node(nodetype='StopList', corpus=node)
miam_node = get_or_create_node(nodetype='MiamList', corpus=node)
map_node = get_or_create_node(nodetype='MapList', corpus=node)
group_node = get_or_create_node(nodetype='Group', corpus=node)
stop_ngrams = session.query(NodeNgram.ngram_id).filter(NodeNgram.node_id==stop_node.id).all()
miam_ngrams = session.query(NodeNgram.ngram_id).filter(NodeNgram.node_id==miam_node.id).all()
map_ngrams = session.query(NodeNgram.ngram_id).filter(NodeNgram.node_id==map_node.id).all()
group_ngrams= (session.query(NodeNgramNgram.ngramx_id, NodeNgramNgram.ngramy_id)
.filter(NodeNgramNgram.node_id==group_node.id)
.all()
)
all_ngrams = set()
grouped = defaultdict(lambda: defaultdict(set))
toList = list()
# listes de ngram_ids correspondantes
# ------------------------------------
#~~ contenu: liste des ids [2562,...]
stop_ngrams_ids = [stop_ngram.ngram_id for stop_ngram in stop_node.node_node_ngram_collection]
# idem pour miam et map
miam_ngrams_ids = [miam_ng.ngram_id for miam_ng in miam_node.node_node_ngram_collection]
map_ngrams_ids = [map_ng.ngram_id for map_ng in map_node.node_node_ngram_collection]
# union des listes (est-elle nécessaire ?)
all_ngrams = set(
set(stop_ngrams_ids)
| set(map_ngrams_ids)
| set(miam_ngrams_ids)
)
# pour la group_list on a des couples de ngram_ids
# -------------------
# ex: [(3544, 2353), (2787, 4032), ...]
group_ngrams_id_couples = [(nd_ng_ng.ngramx_id,nd_ng_ng.ngramy_id) for nd_ng_ng in group_node.node_nodengramngram_collection]
# k couples comme set
# --------------------
# [(a => x) (a => y)] => [a => {x,y}]
grouped = defaultdict(set)
for ngram in group_ngrams :
grouped[ngram[0]].add(ngram[1])
all_ngrams.add(ngram[0])
all_ngrams.add(ngram[1])
def add_ngram(fromList, toList=toList, grouplist=grouped, all_ngrams=all_ngrams, weight=0):
for ngram_id in from_list:
all_ngrams.add(ngram_id)
if ngram_id in grouplist.keys():
ngrams.append((ngram_id, grouped[ngram_id], weight))
else :
ngram.append((ngram_id, "", weight))
add_ngrams(stop_ngrams, weight=0)
add_ngrams(miam_ngrams, weight=1)
add_ngrams(map_ngrams, weight=2)
toList = list()
# pour récupérer les objets Ngram (avec terme)
# -------------------------------
# session.query(Ngram).filter(Ngram.id.in_(stop_ngrams_ids)).all()
# in_ => OUTER JOIN préalable ?
#~ def add_ngram(fromList, toList=toList, grouplist=grouped, all_ngrams=all_ngrams, weight=0):
#~ for ngram_id in from_list:
#~ all_ngrams.add(ngram_id)
#~ if ngram_id in grouplist.keys():
#~ ngrams.append((ngram_id, grouped[ngram_id], weight))
#~ else :
#~ ngram.append((ngram_id, "", weight))
#~
#~ add_ngrams(stop_ngrams, weight=0)
#~ add_ngrams(miam_ngrams, weight=1)
#~ add_ngrams(map_ngrams, weight=2)
# to csv
with open(filename, "w") as f:
f.write(ngram) for ngram in ngrams
for ngram in ngrams:
f.write(ngram)
def importNgramList(node,filename):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment