Commit 60ec5d88 authored by delanoe's avatar delanoe

[FIX][NGRAMS WORKFLOW] Default Map List improved in quality. Remark: map seems...

[FIX][NGRAMS WORKFLOW] Default Map List improved in quality. Remark: map seems to be less impressive. Maybe need to increase threeshold of map list selection.
parent b3faf308
...@@ -7,7 +7,7 @@ from gargantext.models.ngrams import Node, Ngram, NodeNgram, \ ...@@ -7,7 +7,7 @@ from gargantext.models.ngrams import Node, Ngram, NodeNgram, \
from gargantext.util.db import session, aliased, func from gargantext.util.db import session, aliased, func
from gargantext.util.db_cache import cache from gargantext.util.db_cache import cache
from gargantext.util.lists import UnweightedList from gargantext.util.lists import UnweightedList
from sqlalchemy import desc from sqlalchemy import desc, asc
from gargantext.constants import DEFAULT_MAPLIST_MAX,\ from gargantext.constants import DEFAULT_MAPLIST_MAX,\
DEFAULT_MAPLIST_MONOGRAMS_RATIO DEFAULT_MAPLIST_MONOGRAMS_RATIO
...@@ -70,7 +70,7 @@ def do_maplist(corpus, ...@@ -70,7 +70,7 @@ def do_maplist(corpus,
# TODO: move these 2 pools up to mainlist selection # TODO: move these 2 pools up to mainlist selection
top_monograms = (query top_monograms = (query
.filter(Ngram.n == 1) .filter(Ngram.n == 1)
.order_by(desc(ScoreSpec.weight)) .order_by(asc(ScoreSpec.weight))
.limit(monograms_limit) .limit(monograms_limit)
.all() .all()
) )
......
...@@ -7,6 +7,7 @@ from gargantext.util.db import session, aliased, func, bulk_insert ...@@ -7,6 +7,7 @@ from gargantext.util.db import session, aliased, func, bulk_insert
from gargantext.util.lists import WeightedList from gargantext.util.lists import WeightedList
from collections import defaultdict from collections import defaultdict
from pandas import DataFrame from pandas import DataFrame
import pandas as pd
def compute_specificity(corpus, cooc_id=None, overwrite_id = None): def compute_specificity(corpus, cooc_id=None, overwrite_id = None):
''' '''
...@@ -33,13 +34,23 @@ def compute_specificity(corpus, cooc_id=None, overwrite_id = None): ...@@ -33,13 +34,23 @@ def compute_specificity(corpus, cooc_id=None, overwrite_id = None):
print("SPECIFICITY: computing on %i ngrams" % nb_ngrams) print("SPECIFICITY: computing on %i ngrams" % nb_ngrams)
d = DataFrame(matrix).fillna(0) x = DataFrame(matrix).fillna(0)
# proba (x/y) ( <= on divise chaque colonne par son total) # proba (x/y) ( <= on divise chaque ligne par son total)
d = d / d.sum(axis=0) x = x / x.sum(axis=1)
# vectorisation
# d:Matrix => v: Vector (len = nb_ngrams) # d:Matrix => v: Vector (len = nb_ngrams)
v = d.sum(axis=1) # v = d.sum(axis=1) (- lui-même)
xs = x.sum(axis=1) - x
ys = x.sum(axis=0) - x
# top inclus ou exclus
#n = ( xs + ys) / (2 * (x.shape[0] - 1))
# top generic or specific (asc is spec, desc is generic)
v = ( xs - ys) / ( 2 * (x.shape[0] - 1))
## d ## ## d ##
####### #######
...@@ -66,7 +77,7 @@ def compute_specificity(corpus, cooc_id=None, overwrite_id = None): ...@@ -66,7 +77,7 @@ def compute_specificity(corpus, cooc_id=None, overwrite_id = None):
# pour l'instant on va utiliser les sommes en ligne comme ranking de spécificité # pour l'instant on va utiliser les sommes en ligne comme ranking de spécificité
# (**même** ordre qu'avec la formule d'avant le refactoring mais calcul + simple) # (**même** ordre qu'avec la formule d'avant le refactoring mais calcul + simple)
# TODO analyser la cohérence math ET sem de cet indicateur # TODO analyser la cohérence math ET sem de cet indicateur
v.sort_values(inplace=True) #v.sort_values(inplace=True)
# [ ('biodiversité' , 0.333 ), # [ ('biodiversité' , 0.333 ),
# ('Grenelle' , 0.5 ), # ('Grenelle' , 0.5 ),
...@@ -92,10 +103,11 @@ def compute_specificity(corpus, cooc_id=None, overwrite_id = None): ...@@ -92,10 +103,11 @@ def compute_specificity(corpus, cooc_id=None, overwrite_id = None):
the_id = specnode.id the_id = specnode.id
# print(v) # print(v)
pd.options.display.float_format = '${:,.2f}'.format
data = WeightedList( data = WeightedList(
zip( v.index.tolist() zip( v.index.tolist()
, v.values.tolist() , v.values.tolist()[0]
) )
) )
data.save(the_id) data.save(the_id)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment