Commit 3f346c63 authored by Romain Loth's avatar Romain Loth

export terms table: minor corrections on group handling

parent 0fc9b0bc
...@@ -18,7 +18,7 @@ from io import StringIO # pseudo file to write CSV to memory ...@@ -18,7 +18,7 @@ from io import StringIO # pseudo file to write CSV to memory
def query_list(list_id, def query_list(list_id,
pagination_limit=None, pagination_offset=None, pagination_limit=None, pagination_offset=None,
details=False, scoring_metric_id=None details=False, scoring_metric_id=None, groupings_id=None
): ):
""" """
Paginated listing of ngram_ids in a NodeNgram lists. Paginated listing of ngram_ids in a NodeNgram lists.
...@@ -32,17 +32,47 @@ def query_list(list_id, ...@@ -32,17 +32,47 @@ def query_list(list_id,
if True and a scoring_id, send triples with (ngram_id, term, scoring) if True and a scoring_id, send triples with (ngram_id, term, scoring)
- scoring_metric_id: id of a scoring metric node (TFIDF or OCCS) - scoring_metric_id: id of a scoring metric node (TFIDF or OCCS)
(for details and sorting) (for details and sorting)
- groupings_id: optional id of a list of grouping relations (synonyms)
(each synonym will be added to the list if not already in there)
FIXME: subforms appended recently and not generalized enough
=> add a common part for all "if groupings_id"
=> provide the option also in combination with scoring
""" """
# simple contents # simple contents
if not details: if not details:
query = session.query(NodeNgram.ngram_id).filter(NodeNgram.node_id == list_id) query = session.query(NodeNgram.ngram_id).filter(NodeNgram.node_id == list_id)
if groupings_id:
subforms = (session.query(NodeNgramNgram.ngram2_id)
# subform ids...
.filter(NodeNgramNgram.node_id == groupings_id)
# .. that are connected to a mainform
.join(NodeNgram, NodeNgram.ngram_id == NodeNgramNgram.ngram1_id)
# .. which is in the list
.filter(NodeNgram.node_id == list_id)
)
# union with the main q
query = query.union(subforms)
# detailed contents (id + terms) # detailed contents (id + terms)
elif not scoring_metric_id: elif not scoring_metric_id:
query = (session.query(Ngram.id, Ngram.terms, Ngram.n) query = (session.query(Ngram.id, Ngram.terms, Ngram.n)
.join(NodeNgram, NodeNgram.ngram_id == Ngram.id) .join(NodeNgram, NodeNgram.ngram_id == Ngram.id)
.filter(NodeNgram.node_id == list_id) .filter(NodeNgram.node_id == list_id)
) )
if groupings_id:
subforms = (session.query(Ngram.id, Ngram.terms, Ngram.n)
.join(NodeNgramNgram, NodeNgramNgram.ngram2_id == Ngram.id)
# subform ids...
.filter(NodeNgramNgram.node_id == groupings_id)
# .. that are connected to a mainform
.join(NodeNgram, NodeNgram.ngram_id == NodeNgramNgram.ngram1_id)
# .. which is in the list
.filter(NodeNgram.node_id == list_id)
)
# union with the main q
query = query.union(subforms)
# detailed contents (id + terms) + score # detailed contents (id + terms) + score
else: else:
...@@ -175,9 +205,9 @@ def export_ngramlists(node,fname=None,delimiter="\t",titles=False): ...@@ -175,9 +205,9 @@ def export_ngramlists(node,fname=None,delimiter="\t",titles=False):
# listes de ngram_ids correspondantes # listes de ngram_ids correspondantes
# ------------------------------------ # ------------------------------------
# contenu: liste des objets ngrammes [(2562,"monterme",1),...] # contenu: liste des objets ngrammes [(2562,"monterme",1),...]
stop_ngrams = query_list(stoplist_node.id, details=True).all() stop_ngrams = query_list(stoplist_node.id, details=True, groupings_id=group_node.id).all()
main_ngrams = query_list(mainlist_node.id, details=True).all() main_ngrams = query_list(mainlist_node.id, details=True, groupings_id=group_node.id).all()
map_ngrams = query_list(maplist_node.id, details=True).all() map_ngrams = query_list(maplist_node.id, details=True, groupings_id=group_node.id).all()
# pour debug ---------->8 -------------------- # pour debug ---------->8 --------------------
...@@ -209,8 +239,8 @@ def export_ngramlists(node,fname=None,delimiter="\t",titles=False): ...@@ -209,8 +239,8 @@ def export_ngramlists(node,fname=None,delimiter="\t",titles=False):
list_type="stop") list_type="stop")
# miam contient map donc il y a un préalable ici # miam contient map donc il y a un préalable ici
map_ngram_ids = [ng.id for ng in map_ngrams] map_ngram_ids = {ng.id for ng in map_ngrams}
main_without_map = [ng for ng in main_ngrams if ng not in map_ngram_ids] main_without_map = [ng for ng in main_ngrams if ng.id not in map_ngram_ids]
miam_csv_rows = ngrams_to_csv_rows(main_without_map, miam_csv_rows = ngrams_to_csv_rows(main_without_map,
id_groupings=grouped, id_groupings=grouped,
list_type="main") list_type="main")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment