Commit f7d58faf authored by delanoe's avatar delanoe

[MERGE] merge of Romain and Mathieu branches.

parents 309e6c69 eec89097
......@@ -9,29 +9,32 @@ LISTTYPES = {
'STOPLIST' : UnweightedList,
'MAINLIST' : UnweightedList,
'MAPLIST' : UnweightedList,
'OCCURRENCES' : WeightedList,
'SPECIFICITY' : WeightedList,
'OCCURRENCES' : WeightedContextIndex,
'COOCCURRENCES': WeightedMatrix,
'TFIDF-CORPUS' : WeightedContextIndex,
'TFIDF-GLOBAL' : WeightedContextIndex,
}
NODETYPES = [
None,
# documents hierarchy
'USER',
'PROJECT',
'CORPUS',
'DOCUMENT',
'USER', # 1
'PROJECT', # 2
'CORPUS', # 3
'DOCUMENT', # 4
# lists
'STOPLIST',
'GROUPLIST',
'MAINLIST',
'MAPLIST',
'COOCCURRENCES',
'STOPLIST', # 5
'GROUPLIST', # 6
'MAINLIST', # 7
'MAPLIST', # 8
'COOCCURRENCES', # 9
# scores
'OCCURRENCES',
'SPECIFICITY',
'CVALUE',
'TFIDF-CORPUS',
'TFIDF-GLOBAL',
'OCCURRENCES', # 10
'SPECIFICITY', # 11
'CVALUE', # 12
'TFIDF-CORPUS', # 13
'TFIDF-GLOBAL', # 14
]
import datetime
......@@ -108,6 +111,21 @@ RESOURCETYPES = [
# },
]
# linguistic extraction parameters ---------------------------------------------
DEFAULT_TFIDF_CUTOFF_RATIO = .45 # MAINLIST maximum terms in %
DEFAULT_TFIDF_HARD_LIMIT = 750 # MAINLIST maximum terms abs
# (makes COOCS larger ~ O(N²) /!\)
DEFAULT_COOC_THRESHOLD = 5 # inclusive minimum for COOCS coefs
# (makes COOCS more sparse)
DEFAULT_MAPLIST_MAX = 300 # MAPLIST maximum terms
DEFAULT_MAPLIST_MONOGRAMS_RATIO = .5 # part of monograms in MAPLIST
# (NB: used to be 0.005 !!)
# ------------------------------------------------------------------------------
# other parameters
# default number of docs POSTed to scrappers.views.py
......
......@@ -19,7 +19,7 @@ class NodeNgram(Base):
weight = Column(Float)
class NodeNodeNgram(Base):
""" for instance for tfidf:
""" for instance for TFIDF
(
doc ::Node ,
corpus ::Node ,
......@@ -37,8 +37,16 @@ class NodeNodeNgram(Base):
# (cf. www.postgresql.org/docs/9.4/static/datatype-numeric.html#DATATYPE-FLOAT)
class NodeNgramNgram(Base):
""" for instance for COOCCURRENCES and GROUPLIST
(
cooc_node/group_node ::Node ,
term_A ::Ngram ,
term_B ::Ngram ,
weight ::Float (real)
)
"""
__tablename__ = 'nodes_ngrams_ngrams'
node_id = Column(Integer, ForeignKey(Node.id, ondelete='CASCADE'), primary_key=True)
ngram1_id = Column(Integer, ForeignKey(Ngram.id, ondelete='CASCADE'), primary_key=True)
ngram2_id = Column(Integer, ForeignKey(Ngram.id, ondelete='CASCADE'), primary_key=True)
weight = Column(Float)
weight = Column(Float(precision=24)) # see comment for NodeNodeNgram.score
from gargantext.util.db import *
from gargantext.util.db_cache import *
from gargantext.constants import *
from gargantext.models.nodes import Node
from gargantext.models.ngrams import Ngram, NodeNgram, NodeNgramNgram, \
NodeHyperdataNgram, NodeHyperdata, Hyperdata
from sqlalchemy import desc, asc, or_, and_, Date, cast, select
from sqlalchemy import literal_column
from sqlalchemy.orm import aliased
from sqlalchemy.sql import func
import datetime
import inspect
def do_cooc(corpus=None
, field1='ngrams', field2='ngrams'
, main_id=None, stop_id=None, group_id=None
, cvalue_id=None
, n_min=1, n_max=None
, start=None, end=None
, limit=1000
, isMonopartite=True
, hapax = 3
, session=None):
'''
Compute the cooccurence matrix and save it, returning NodeNgramNgram.node_id
For the moment list of paramters are not supported because, lists need to
be merged before.
corpus :: Corpus
cvalue_id :: Int
main_id :: Int
stop_id :: Int
group_id :: Int
For the moment, start and end are simple, only year is implemented yet
start :: TimeStamp -- example: '2010-05-30 02:00:00+02'
end :: TimeStamp
limit :: Int
'''
# TODO : add hyperdata here
# Security test
field1,field2 = str(field1), str(field2)
# Get node
node_cooc = session.query(Node).filter(
Node.parent_id==corpus.id,
Node.typename == "COOCCURRENCES"
).first()
if node_cooc == None:
node_cooc = Node(
name="Coccurrences node",
parent_id=corpus.id,
user_id=corpus.user_id,
typename="COOCCURRENCES")
session.add(node_cooc)
session.commit()
# BEGIN
# Saving the parameters of the analysis in the Node JSONB hyperdata field
args, _, _, parameters = inspect.getargvalues(inspect.currentframe())
# hyperdata = dict()
#
# for parameter in parameters.keys():
# if parameter != 'corpus' and parameter != 'node_cooc':
# hyperdata[parameter] = parameters[parameter]
#
# node_cooc.hyperdata = hyperdata
#
# session.add(node_cooc)
# session.commit()
# END
session.query(NodeNgramNgram).filter(NodeNgramNgram.node_id==node_cooc.id).delete()
session.commit()
NodeNgramX = aliased(NodeNgram)
cooc_score = func.count(NodeNgramX.node_id).label('cooc_score')
#cooc_score = func.sqrt(func.sum(NodeNgramX.weight * NodeNgramY.weight)).label('cooc_score')
#print([n for n in test_query])
if isMonopartite :
NodeNgramY = aliased(NodeNgram)
cooc_query = (session.query(NodeNgramX.ngram_id, NodeNgramY.ngram_id, cooc_score)
.join(Node, Node.id == NodeNgramX.node_id)
.join(NodeNgramY, NodeNgramY.node_id == Node.id)
.filter(Node.parent_id==corpus.id, Node.typename=="DOCUMENT")
)
else :
NodeNgramY = aliased(NodeNgram)
cooc_query = (session.query(NodeHyperdataNgram.ngram_id, NodeNgramY.ngram_id, cooc_score)
.join(Node, Node.id == NodeHyperdataNgram.node_id)
.join(NodeNgramY, NodeNgramY.node_id == Node.id)
.join(Hyperdata, Hyperdata.id == NodeHyperdataNgram.hyperdata_id)
.filter(Node.parent_id == corpus.id, Node.typename == "DOCUMENT")
.filter(Hyperdata.name == field1)
)
#print(cooc_query)
# Size of the ngrams between n_min and n_max
if n_min is not None or n_max is not None:
if isMonopartite:
NgramX = aliased(Ngram)
cooc_query = cooc_query.join(NgramX, NgramX.id == NodeNgramX.ngram_id)
NgramY = aliased(Ngram)
cooc_query = (cooc_query
.join(NgramY, NgramY.id == NodeNgramY.ngram_id)
)
if n_min is not None:
cooc_query = (cooc_query
.filter(NgramY.n >= n_min)
)
if isMonopartite:
cooc_query = cooc_query.filter(NgramX.n >= n_min)
if n_max is not None:
cooc_query = (cooc_query
.filter(NgramY.n >= n_min)
)
if isMonopartite:
cooc_query = cooc_query.filter(NgramX.n >= n_min)
# Cooc between the dates start and end
if start is not None:
#date_start = datetime.datetime.strptime ("2001-2-3 10:11:12", "%Y-%m-%d %H:%M:%S")
# TODO : more complexe date format here.
date_start = datetime.datetime.strptime (str(start), "%Y-%m-%d")
date_start_utc = date_start.strftime("%Y-%m-%d %H:%M:%S")
Start=aliased(NodeHyperdata)
StartFormat = aliased(Hyperdata)
cooc_query = (cooc_query.join(Start, Start.node_id == Node.id)
.join(StartFormat, StartFormat.id == Start.hyperdata_id)
.filter(StartFormat.name == 'publication_date')
.filter(Start.value_datetime >= date_start_utc)
)
if end is not None:
# TODO : more complexe date format here.
date_end = datetime.datetime.strptime (str(end), "%Y-%m-%d")
date_end_utc = date_end.strftime("%Y-%m-%d %H:%M:%S")
End=aliased(NodeHyperdata)
EndFormat = aliased(Hyperdata)
cooc_query = (cooc_query.join(End, End.node_id == Node.id)
.join(EndFormat, EndFormat.id == End.hyperdata_id)
.filter(EndFormat.name == 'publication_date')
.filter(End.value_datetime <= date_end_utc)
)
if isMonopartite:
# Cooc is symetric, take only the main cooccurrences and cut at the limit
cooc_query = cooc_query.filter(NodeNgramX.ngram_id < NodeNgramY.ngram_id)
cooc_query = cooc_query.having(cooc_score > hapax)
if isMonopartite:
cooc_query = cooc_query.group_by(NodeNgramX.ngram_id, NodeNgramY.ngram_id)
else:
cooc_query = cooc_query.group_by(NodeHyperdataNgram.ngram_id, NodeNgramY.ngram_id)
cooc_query = cooc_query.order_by(desc('cooc_score'))
# END of the query
matrix = LISTTYPES["COOCCURRENCES"](cooc_query)
#print(matrix)
if isMonopartite:
if main_id is not None :
main_list = LISTTYPES["MAINLIST"](main_id)
if stop_id is not None :
stop_list = LISTTYPES["STOPLIST"](stop_id)
if group_id is not None :
group_list = LISTTYPES["GROUPLIST"](group_id)
if main_id is not None and stop_id is None and group_id is None :
cooc = matrix & main_list
elif main_id is not None and stop_id is not None and group_id is None :
cooc = matrix & (main_list - stop_list)
elif main_id is not None and stop_id is not None and group_id is not None :
print("main_id is not None and stop_id is not None and group_id is not None")
cooc = matrix & (main_list * group_list - stop_list)
#cooc = matrix & (main_list - stop_list)
elif main_id is not None and stop_id is None and group_id is not None :
cooc = matrix & (main_list * group_list)
else :
cooc = matrix
else:
cooc = matrix
cooc.save(node_cooc.id)
return(node_cooc.id)
......@@ -2,7 +2,7 @@
"""
__all__ = ['Translations', 'WeightedMatrix', 'UnweightedList', 'WeightedList']
__all__ = ['Translations', 'WeightedMatrix', 'UnweightedList', 'WeightedList', 'WeightedContextIndex']
from gargantext.util.db import session, bulk_insert
......@@ -70,8 +70,10 @@ class _BaseClass:
class Translations(_BaseClass):
def __init__(self, source=None):
def __init__(self, source=None, just_items=False):
self.items = defaultdict(int)
# TODO lazyinit for groups
# (not necessary for save)
self.groups = defaultdict(set)
if source is None:
return
......@@ -83,15 +85,35 @@ class Translations(_BaseClass):
.filter(NodeNgramNgram.node_id == source)
)
self.items.update(query)
for key, value in self.items.items():
self.groups[value].add(key)
if not just_items:
for key, value in self.items.items():
self.groups[value].add(key)
elif isinstance(source, Translations):
self.items.update(source.items)
self.groups.update(source.groups)
if not just_items:
self.groups.update(source.groups)
elif hasattr(source, '__iter__'):
# not very intuitive with update here:
# /!\ source must be "reversed" (like self.items)
# bad exemple
# In > couples = [(1, 2), (1, 3)]
# In > tlko = Translations(couples)
# Out> Translations {1: 3}
# In > tlko.save()
# DB-- 3 -> 1
# good exemple
# In > reversed_couples = [(2, 1), (3, 1)]
# In > tlok = Translations(reversed_couples)
# Out> Translations {2: 1, 3: 1}
# In > tlok.save()
# DB-- 1 -> 2
# DB-- 1 -> 3
self.items.update(source)
for key, value in self.items.items():
self.groups[value].add(key)
if not just_items:
for key, value in self.items.items():
self.groups[value].add(key)
else:
raise TypeError
......@@ -138,11 +160,29 @@ class Translations(_BaseClass):
# insert new data
bulk_insert(
NodeNgramNgram,
('node_id', 'ngram2_id', 'ngram1_id', 'score'),
('node_id', 'ngram2_id', 'ngram1_id', 'weight'),
((node_id, key, value, 1.0) for key, value in self.items.items())
)
class WeightedContextIndex(_BaseClass):
"""
associated model : NodeNodeNgram
associated columns : node1_id | node2_id | ngram_id | score (float)
Tensor representing a contextual index or registry
(matrix of weighted ngrams *per* doc *per* context)
Exemple : tfidf by corpus
"""
def __init__(self, source=None):
self.items = defaultdict(float)
# £TODO
class WeightedMatrix(_BaseClass):
def __init__(self, source=None):
......@@ -184,7 +224,7 @@ class WeightedMatrix(_BaseClass):
# insert new data
bulk_insert(
NodeNgramNgram,
('node_id', 'ngram1_id', 'ngram2_id', 'score'),
('node_id', 'ngram1_id', 'ngram2_id', 'weight'),
((node_id, key1, key2, value) for key1, key2, value in self)
)
......
from .parsing import parse
from .parsing import parse
from .ngrams_extraction import extract_ngrams
from .hyperdata_indexing import index_hyperdata
# in usual run order
from .list_stop import do_stoplist
from .metric_tfidf import compute_occs, compute_tfidf
from .list_main import do_mainlist
from .ngram_coocs import compute_coocs
from .metric_specificity import compute_specificity
from .list_map import do_maplist # TEST
from .ngram_groups import compute_groups
from gargantext.util.db import session
from gargantext.models import Node
from gargantext.models import Node
from datetime import datetime
def parse_extract(corpus):
# retrieve corpus from database from id
......@@ -18,6 +27,12 @@ def parse_extract(corpus):
# apply actions
print('CORPUS #%d' % (corpus.id))
parse(corpus)
# was there an error in the process ?
if corpus.status()['error']:
print("ERROR: aborting parse_extract for corpus #%i" % corpus_id)
return None
print('CORPUS #%d: parsed' % (corpus.id))
extract_ngrams(corpus)
print('CORPUS #%d: extracted ngrams' % (corpus.id))
......@@ -38,3 +53,55 @@ def parse_extract_indexhyperdata(corpus):
print('CORPUS #%d: extracted ngrams' % (corpus.id))
index_hyperdata(corpus)
print('CORPUS #%d: indexed hyperdata' % (corpus.id))
# -------------------------------
# temporary ngram lists workflow
# -------------------------------
print('CORPUS #%d: [%s] starting ngram lists computation' % (corpus.id, t()))
# -> stoplist: filter + write (to Node and NodeNgram)
stop_id = do_stoplist(corpus)
print('CORPUS #%d: [%s] new stoplist node #%i' % (corpus.id, t(), stop_id))
# -> write groups to Node and NodeNgramNgram
group_id = compute_groups(corpus, stoplist_id = None)
print('CORPUS #%d: [%s] new grouplist node #%i' % (corpus.id, t(), group_id))
# -> write occurrences to Node and NodeNodeNgram # possible: factorize with tfidf
occ_id = compute_occs(corpus)
print('CORPUS #%d: [%s] new occs node #%i' % (corpus.id, t(), occ_id))
# ------------
# -> write local tfidf to Node and NodeNodeNgram
ltfidf_id = compute_tfidf(corpus, scope="local")
print('CORPUS #%d: [%s] new localtfidf node #%i' % (corpus.id, t(), ltfidf_id))
# -> write global tfidf to Node and NodeNodeNgram
gtfidf_id = compute_tfidf(corpus, scope="global")
print('CORPUS #%d: [%s] new globaltfidf node #%i' % (corpus.id, t(), gtfidf_id))
# -> mainlist: filter + write (to Node and NodeNgram)
mainlist_id = do_mainlist(corpus,
tfidf_id = gtfidf_id,
stoplist_id = stop_id)
print('CORPUS #%d: [%s] new mainlist node #%i' % (corpus.id, t(), mainlist_id))
# ------------
# -> cooccurrences: compute + write (=> Node and NodeNodeNgram)
cooc_id = compute_coocs(corpus, mainlist_id = mainlist_id)
print('CORPUS #%d: [%s] new coocs node #%i' % (corpus.id, t(), cooc_id))
# -> specificity: compute + write (=> NodeNodeNgram)
spec_id = compute_specificity(corpus, cooc_id=cooc_id)
print('CORPUS #%d: [%s] new specificity node #%i' % (corpus.id, t(), spec_id))
# ?? maplist: compute + write (to Node and NodeNgram)
map_id = do_maplist(corpus,
mainlist_id = mainlist_id,
specificity_id=spec_id,
grouplist_id=group_id)
print('CORPUS #%d: [%s] new maplist node #%i' % (corpus.id, t(), map_id))
def t():
return datetime.now().strftime("%Y-%m-%d_%H:%M:%S")
from gargantext.models import Node, NodeNgram, NodeNodeNgram
from gargantext.util.db import session
from gargantext.util.lists import UnweightedList
from sqlalchemy import desc
from gargantext.constants import DEFAULT_TFIDF_CUTOFF_RATIO, \
DEFAULT_TFIDF_HARD_LIMIT
def do_mainlist(corpus,
overwrite_id = None,
tfidf_id=None, stoplist_id=None,
hard_limit=DEFAULT_TFIDF_HARD_LIMIT,
ratio_limit=DEFAULT_TFIDF_CUTOFF_RATIO
):
"""
Select top n terms according to a global tfidf ranking and stoplist filter.
The number of selected terms will be:
min(hard_limit, number_of_terms * ratio_limit)
NB : We use a global tfidf node where the values are global but the ngrams
are already selected (== only within this corpus documents).
TO DISCUSS: allow influence of the local tfidf scores too
Parameters:
- the corpus itself
- a tfidf score for ranking the ngrams
- a stoplist for filtering some ngrams
- overwrite_id: optional id of a pre-existing MAINLIST node for this corpus
(the Node and its previous NodeNgram rows will be replaced)
+ 2 limits to set the amount of picked terms:
- ratio_limit ∈ [0,1]: a ratio relative to the number of distinct ngrams
(default: 0.55)
- hard_limit: an absolute max value
(default: 1000)
"""
# retrieve helper nodes if not provided
if not tfidf_id:
tfidf_id = session.query(Node.id).filter(
Node.typename == "TFIDF-GLOBAL",
Node.parent_id == corpus.id
).first()
if not tfidf_id:
raise ValueError("MAINLIST: TFIDF node needed for mainlist creation")
if not stoplist_id:
stoplist_id = session.query(Node.id).filter(
Node.typename == "STOPLIST",
Node.parent_id == corpus.id
).first()
if not stoplist_id:
raise ValueError("MAINLIST: STOPLIST node needed for mainlist creation")
# the ngrams we don't want
# NOTE: keep sure we do this only once during the ngram initial workflow
stopterms_subquery = (session
.query(NodeNgram.ngram_id)
.filter(NodeNgram.node_id == stoplist_id)
.subquery()
)
# tfidf-ranked query
ordered_filtered_tfidf = (session
.query(NodeNodeNgram.ngram_id)
.filter(NodeNodeNgram.node1_id == tfidf_id)
.filter(~ NodeNodeNgram.ngram_id.in_(stopterms_subquery))
.order_by(desc(NodeNodeNgram.score))
)
# total count
nb_ngrams = ordered_filtered_tfidf.count()
# apply ratio to find smallest limit
our_limit = min(hard_limit, round(nb_ngrams * ratio_limit))
print("MAINLIST: keeping %i ngrams out of %i" % (our_limit,nb_ngrams))
# DB retrieve up to limit => MAINLIST
top_ngrams_ids = ordered_filtered_tfidf.limit(our_limit).all()
if overwrite_id:
# overwrite pre-existing id
the_id = overwrite_id
# mainlist = cache.Node[overwrite_id]
else:
# now create the new MAINLIST node
mainlist = corpus.add_child(
typename = "MAINLIST",
name = "Mainlist (in:%s)" % corpus.id
)
session.add(mainlist)
session.commit()
the_id = mainlist.id
# create UnweightedList object and save (=> new NodeNgram rows)
UnweightedList(top_ngrams_ids).save(the_id)
return the_id
"""
Selects a subset of corpus ngrams to use in the graph map.
"""
from gargantext.models.ngrams import Node, Ngram, NodeNgram, \
NodeNgramNgram, NodeNodeNgram
from gargantext.util.db import session, aliased, func
from gargantext.util.db_cache import cache
from gargantext.util.lists import UnweightedList
from sqlalchemy import desc
from gargantext.constants import DEFAULT_MAPLIST_MAX,\
DEFAULT_MAPLIST_MONOGRAMS_RATIO
def do_maplist(corpus,
overwrite_id = None,
mainlist_id = None,
specificity_id = None,
grouplist_id = None,
limit=DEFAULT_MAPLIST_MAX,
monograms_part=DEFAULT_MAPLIST_MONOGRAMS_RATIO
):
'''
According to Specificities and mainlist
Parameters:
- mainlist_id (starting point, already cleaned of stoplist terms)
- specificity_id (ranking factor)
- grouplist_id (filtering grouped ones)
- overwrite_id: optional if preexisting MAPLIST node to overwrite
+ 2 constants to modulate the terms choice
- limit for the amount of picked terms
- monograms_part: a ratio of terms with only one lexical unit to keep
'''
if not (mainlist_id and specificity_id and grouplist_id):
raise ValueError("Please provide mainlist_id, specificity_id and grouplist_id")
monograms_limit = round(limit * monograms_part)
multigrams_limit = limit - monograms_limit
print("MAPLIST: monograms_limit =", monograms_limit)
print("MAPLIST: multigrams_limit = ", multigrams_limit)
#dbg = DebugTime('Corpus #%d - computing Miam' % corpus.id)
mainterms_subquery = (session
# we want only terms within mainlist
.query(NodeNgram.ngram_id)
.filter(NodeNgram.node_id == mainlist_id)
.subquery()
)
primary_groupterms_subquery = (session
# we want only primary terms (ngram1)
.query(NodeNgramNgram.ngram1_id)
.filter(NodeNgramNgram.node_id == grouplist_id)
.subquery()
)
ScoreSpec=aliased(NodeNgram)
# specificity-ranked
query = (session.query(ScoreSpec.ngram_id)
.join(Ngram, Ngram.id == ScoreSpec.ngram_id)
.filter(ScoreSpec.node_id == specificity_id)
.filter(ScoreSpec.ngram_id.in_(mainterms_subquery))
.filter(ScoreSpec.ngram_id.in_(primary_groupterms_subquery))
)
# TODO: move these 2 pools up to mainlist selection
top_monograms = (query
.filter(Ngram.n == 1)
.order_by(desc(ScoreSpec.weight))
.limit(monograms_limit)
.all()
)
top_multigrams = (query
.filter(Ngram.n >= 2)
.order_by(desc(ScoreSpec.weight))
.limit(multigrams_limit)
.all()
)
print("MAPLIST: top_monograms =", len(top_monograms))
print("MAPLIST: top_multigrams = ", len(top_multigrams))
# NEW MAPLIST NODE
# -----------------
# saving the parameters of the analysis in the Node JSON
new_hyperdata = { 'corpus': corpus.id,
'limit' : limit,
'monograms_part' : monograms_part
}
if overwrite_id:
# overwrite pre-existing node
the_maplist = cache.Node[overwrite_id]
the_maplist.hyperdata = new_hyperdata
the_maplist.save_hyperdata()
session.commit()
the_id = overwrite_id
else:
# create a new maplist node
the_maplist = corpus.add_child(
name="Maplist (in %i)" % corpus.id,
typename="MAPLIST",
hyperdata = new_hyperdata
)
session.add(the_maplist)
session.commit()
the_id = the_maplist.id
# create UnweightedList object and save (=> new NodeNgram rows)
datalist = UnweightedList(
[res.ngram_id for res in top_monograms + top_multigrams]
)
# save
datalist.save(the_id)
# dbg.show('MapList computed')
return the_id
"""
Creates a filtering list for corpus ngrams.
(implementation: regexp + "master" stoplist)
"""
from gargantext.models import User, Node, Ngram, NodeNgram
from gargantext.util.db import session, func
from gargantext.constants import LISTTYPES
from re import compile
from sqlalchemy import desc
def is_stop_word(ngram, stop_words=None):
'''
ngram :: (Int, String) => (ngram_id, ngram_terms)
stop_words :: Set of String
(to avoid SQL query each time is_stop_word is invoked, get in as parameter)
'''
word = ngram[1]
if word in stop_words:
return(True)
compiled_regexes = [] # to compile them only once
for regex in [
"^.{1,2}$"
, "(.*)\d(.*)"
# , "(.*)(\.)(.*)" trop fort (enlève les sigles !)
, "(.*)(\,)(.*)"
, "(.*)(< ?/?p ?>)(.*)" # marques de paragraphes
, "(.*)(study)(.*)"
, "(.*)\b(xx|xi|xv)\b(.*)"
, "(.*)(result)(.*)"
, "(.*)(année|nombre|moitié)(.*)"
, "(.*)(temps)(.*)"
, "(.*)(%)(.*)"
, "(.*)(\{)(.*)"
, "(.*)(terme)(.*)"
, "(.*)(différent)(.*)"
, "(.*)(travers)(.*)"
, "(.*)(:|\|)(.*)"
] :
compiled_regexes.append(compile(regex))
for format_regex in compiled_regexes:
if format_regex.match(word):
# print("STOPLIST += '%s' (regex: %s)" % (word, format_regex.pattern))
return(True)
return False
def create_gargantua_resources():
gargantua_id = session.query(User.id).filter(User.username=="gargantua").first()
project = Node(
name="Resources",
user_id=gargantua_id,
typename="PROJECT")
stopList = Node(name="STOPLIST", parent_id=project.id, user_id=gargantua_id, typename="STOPLIST")
session.add(project)
session.add(stopList)
session.commit()
def do_stoplist(corpus, overwrite_id=None):
'''
Create list of stop words.
TODO do a function to get all stop words with social scores
Parameters:
- overwrite_id: optional preexisting STOPLIST node to overwrite
'''
# Get preexisting StopList if provided in overwrite_id param
if overwrite_id:
stoplist_id = overwrite_id
# At this step of development, a new StopList should be created
else:
stoplist = corpus.add_child(
name="Stoplist (in:%s)" % corpus.id,
typename="STOPLIST"
)
session.add(stoplist)
session.commit()
stoplist_id = stoplist.id
# Get common resources, all common StopWords on the platform
## First get the id of the StopList of Gargantua super user
gargantua_id = session.query(User.id).filter(User.username=="gargantua").first()
rootStopList_id = session.query(Node.id).filter(
Node.user_id == gargantua_id,
Node.typename == "STOPLIST"
).first()
## Then get all the stop words
## stop_words :: [String]
stop_words = (session.query(Ngram.terms)
.join(NodeNgram, NodeNgram.ngram_id == Ngram.id)
.filter(NodeNgram.node_id == rootStopList_id)
.all()
)
# print([n for n in stop_words])
## Get the ngrams
## ngrams :: [(Int, String, Int)]
ngrams = (session.query( Ngram.id, Ngram.terms)
.join( NodeNgram, NodeNgram.ngram_id == Ngram.id )
.join( Node, Node.id == NodeNgram.node_id )
.filter( Node.parent_id == corpus.id,
Node.typename == "DOCUMENT")
.group_by( Ngram.id )
#.limit(limit)
.all()
)
ngrams_to_stop = filter(
lambda x: is_stop_word(x,stop_words=stop_words), ngrams
)
# print([n for n in ngrams_to_stop])
stop = LISTTYPES["STOPLIST"]({ n[0] : -1 for n in ngrams_to_stop})
# stop = LISTTYPES["STOPLIST"]([n[0] for n in ngrams_to_stop])
stop.save(stoplist_id)
return stoplist_id
"""
Computes a specificity metric from the ngram cooccurrence matrix.
+ SAVE => WeightedList => NodeNgram
"""
from gargantext.models import Node, Ngram, NodeNgram, NodeNgramNgram
from gargantext.util.db import session, aliased, func, bulk_insert
from gargantext.util.lists import WeightedList
from collections import defaultdict
from pandas import DataFrame
def compute_specificity(corpus, cooc_id=None, overwrite_id = None):
'''
Compute the specificity, simple calculus.
Parameters:
- cooc_id: mandatory id of a cooccurrences node to use as base
- overwrite_id: optional preexisting specificity node to overwrite
'''
cooccurrences = (session.query(NodeNgramNgram)
.filter(NodeNgramNgram.node_id==cooc_id)
)
# no filtering: new choice cooc already filtered on tfidf before creation
matrix = defaultdict(lambda : defaultdict(float))
# £TODO re-rename weight => score
for cooccurrence in cooccurrences:
matrix[cooccurrence.ngram1_id][cooccurrence.ngram2_id] = cooccurrence.weight
matrix[cooccurrence.ngram2_id][cooccurrence.ngram1_id] = cooccurrence.weight
nb_ngrams = len(matrix)
print("SPECIFICITY: computing on %i ngrams" % nb_ngrams)
d = DataFrame(matrix).fillna(0)
# proba (x/y) ( <= on divise chaque colonne par son total)
d = d / d.sum(axis=0)
# d:Matrix => v: Vector (len = nb_ngrams)
v = d.sum(axis=1)
## d ##
#######
# Grenelle biodiversité kilomètres site élus île
# Grenelle 0 0 4 0 0 0
# biodiversité 0 0 0 0 4 0
# kilomètres 4 0 0 0 4 0
# site 0 0 0 0 4 6
# élus 0 4 4 4 0 0
# île 0 0 0 6 0 0
## d.sum(axis=1) ##
###################
# Grenelle 4
# biodiversité 4
# kilomètres 8
# site 10
# élus 12
# île 6
# résultat temporaire
# -------------------
# pour l'instant on va utiliser les sommes en ligne comme ranking de spécificité
# (**même** ordre qu'avec la formule d'avant le refactoring mais calcul + simple)
# TODO analyser la cohérence math ET sem de cet indicateur
v.sort_values(inplace=True)
# [ ('biodiversité' , 0.333 ),
# ('Grenelle' , 0.5 ),
# ('île' , 0.599 ),
# ('kilomètres' , 1.333 ),
# ('site' , 1.333 ),
# ('élus' , 1.899 ) ]
# ----------------
# specificity node
if overwrite_id:
# overwrite pre-existing id
the_id = overwrite_id
session.query(NodeNodeNgram).filter(NodeNodeNgram.node1_id==the_id).delete()
session.commit()
else:
specnode = corpus.add_child(
typename = "SPECIFICITY",
name = "Specif (in:%s)" % corpus.id
)
session.add(specnode)
session.commit()
the_id = specnode.id
# print(v)
data = WeightedList(
zip( v.index.tolist()
, v.values.tolist()
)
)
data.save(the_id)
return(the_id)
"""
Computes ngram scores with 3 ranking functions:
- the simple sum of occurrences inside the corpus
- the tfidf inside the corpus
- the global tfidf for all corpora having same source
FIXME: "having the same source" means we need to select inside hyperdata
with a (perhaps costly) JSON query: WHERE hyperdata->'resources' @> ...
"""
from gargantext.models import Node, NodeNgram, NodeNodeNgram
from gargantext.util.db import session, bulk_insert, func # = sqlalchemy.func like sum() or count()
from sqlalchemy import text # for query from raw SQL statement
from math import log
# £TODO
# from gargantext.util.lists import WeightedContextIndex
def compute_occs(corpus, overwrite_id = None):
"""
Calculates sum of occs per ngram within corpus
(used as info in the ngrams table view)
? optimize ? OCCS here could be calculated simultaneously within TFIDF-CORPUS loop
Parameters:
- overwrite_id: optional id of a pre-existing OCCURRENCES node for this corpus
(the Node and its previous NodeNodeNgram rows will be replaced)
"""
# 1) all the doc_ids of our corpus (scope of counts for filter)
# slower alternative: [doc.id for doc in corpus.children('DOCUMENT').all()]
docids_subquery = (session
.query(Node.id)
.filter(Node.parent_id == corpus.id)
.filter(Node.typename == "DOCUMENT")
.subquery()
)
# 2) our sums per ngram_id
occ_sums = (session
.query(
NodeNgram.ngram_id,
func.sum(NodeNgram.weight)
)
.filter(NodeNgram.node_id.in_(docids_subquery))
.group_by(NodeNgram.ngram_id)
.all()
)
# example result = [(1970, 1.0), (2024, 2.0), (259, 2.0), (302, 1.0), ... ]
# ^^^^ ^^^
# ngram_id sum_wei
if overwrite_id:
# overwrite pre-existing id
the_id = overwrite_id
# occnode = cache.Node[overwrite_id]
else:
# create the new OCCURRENCES node
occnode = corpus.add_child(
typename = "OCCURRENCES",
name = "occ_sums (in:%s)" % corpus.id
)
session.add(occnode)
session.commit()
the_id = occnode.id
# reflect that in NodeNodeNgrams (could be NodeNgram but harmony with tfidf)
# £TODO replace bulk_insert by something like WeightedContextMatrix.save()
bulk_insert(
NodeNodeNgram,
('node1_id' , 'node2_id', 'ngram_id', 'score'),
((the_id, corpus.id, res[0], res[1]) for res in occ_sums)
)
return the_id
def compute_tfidf(corpus, scope="local", overwrite_id=None):
"""
Calculates tfidf within the current corpus
Parameters:
- the corpus itself
- scope: {"local" or "global"}
- overwrite_id: optional id of a pre-existing TFIDF-XXXX node for this corpus
(the Node and its previous NodeNodeNgram rows will be replaced)
"""
# local <=> within this corpus
if scope == "local":
# All docs of this corpus
docids_subquery = (session
.query(Node.id)
.filter(Node.parent_id == corpus.id)
.filter(Node.typename == "DOCUMENT")
.subquery()
)
# global <=> within all corpora of this source
elif scope == "global":
this_source_type = corpus.resources()[0]['type']
# all corpora with the same source type
# (we need raw SQL query for postgres JSON operators) (TODO test speed)
same_source_corpora_query = (session
.query(Node.id)
.from_statement(text(
"""
SELECT id FROM nodes
WHERE hyperdata->'resources' @> '[{\"type\"\:%s}]'
""" % this_source_type
))
)
# All docs **in all corpora of the same source**
docids_subquery = (session
.query(Node.id)
.filter(Node.parent_id.in_(same_source_corpora_query))
.filter(Node.typename == "DOCUMENT")
.subquery()
)
# N
total_docs = session.query(docids_subquery).count()
# or perhaps at least do the occurrences right now at the same time
tf_nd = (session
.query(
NodeNgram.ngram_id,
func.sum(NodeNgram.weight), # tf: same as occnode
func.count(NodeNgram.node_id) # nd: n docs with term
)
.filter(NodeNgram.node_id.in_(docids_subquery))
.group_by(NodeNgram.ngram_id)
.all()
)
# -------------------------------------------------
tfidfs = {}
log_tot_docs = log(total_docs)
for (ngram_id, tf, nd) in tf_nd:
# tfidfs[ngram_id] = tf * log(total_docs/nd)
tfidfs[ngram_id] = tf * (log_tot_docs-log(nd))
# -------------------------------------------------
if overwrite_id:
the_id = overwrite_id
else:
# create the new TFIDF-XXXX node
tfidf_nd = corpus.add_child()
if scope == "local":
tfidf_nd.typename = "TFIDF-CORPUS"
tfidf_nd.name = "tfidf-c (in:%s)" % corpus.id
elif scope == "global":
tfidf_nd.typename = "TFIDF-GLOBAL"
tfidf_nd.name = "tfidf-g (in type:%s)" % this_source_type
session.add(tfidf_nd)
session.commit()
the_id = tfidf_nd.id
# reflect that in NodeNodeNgrams
# £TODO replace bulk_insert by something like WeightedContextMatrix.save()
bulk_insert(
NodeNodeNgram,
('node1_id', 'node2_id','ngram_id', 'score'),
((the_id, corpus.id, ng, tfidfs[ng]) for ng in tfidfs)
)
return the_id
from gargantext.models import Node, NodeNgram, NodeNgramNgram
from gargantext.util.lists import WeightedMatrix
from gargantext.util.db import session, aliased, func
from gargantext.util.db_cache import cache
from gargantext.constants import DEFAULT_COOC_THRESHOLD
def compute_coocs(corpus,
overwrite_id = None,
threshold = DEFAULT_COOC_THRESHOLD,
mainlist_id = None,
stoplist_id = None,
symmetry_filter = True):
"""
Count how often some extracted terms appear
together in a small context (document)
throughout a larger context (corpus).
[NodeNgram] [NodeNgramNgram]
node_id | ngram_id | weight ngram1_id | ngram2_id | score |
--------+----------+-------- ----------+-----------+-------+
MYDOCA | 487 | 1 => 487 | 294 | 2 |
MYDOCA | 294 | 3
MYDOCB | 487 | 1
MYDOCB | 294 | 4
Fill that info in DB:
- a *new* COOCCURRENCES node
- and all corresponding NodeNgramNgram rows
worse case complexity ~ O(N²/2) with N = number of ngrams
If a mainlist is provided, we filter doc ngrams to those also in the list.
Parameters:
- the corpus node
- overwrite_id: id of a pre-existing COOCCURRENCES node for this corpus
(all hyperdata and previous NodeNgramNgram rows will be replaced)
- threshold: on output cooc count (previously called hapax)
- mainlist_id: mainlist to constrain the input ngrams
- stoplist_id: stoplist for filtering input ngrams
(normally unnecessary if a mainlist is provided)
(deprecated parameters)
- field1,2: allowed to count other things than ngrams (eg tags) but no use case at present
- isMonopartite: ?? used a nodes_hyperdata_ngrams table ???
basic idea for one doc
======================
each pair of ngrams sharing same doc (node_id)
SELEC idx1.ngram_id, idx2.ngram_id
FROM nodes_ngrams AS idx1, nodes_ngrams AS idx2
---------------------------------
WHERE idx1.node_id = idx2.node_id <== that's cooc
---------------------------------
AND idx1.ngram_id <> idx2.ngram_id
AND idx1.node_id = MY_DOC ;
on entire corpus
=================
coocs for each doc :
- each given pair like (termA, termB) will likely appear several times
=> we do GROUP BY (x1.ngram_id, x2.ngram_id)
- we count unique appearances of the pair (cooc)
"""
# - TODO cvalue_id: allow a metric as additional input filter
# - TODO n_min, n_max : filter on Ngram.n (aka length of ngram)
# - TODO start, end : filter on document date
# - TODO weighted: if False normal cooc to be saved as result
# if True weighted cooc (experimental)
# /!\ big combinatorial complexity /!\
# pour 8439 lignes dans l'index nodes_ngrams dont 1442 avec occ > 1
# 1.859.408 lignes pour la requête cooc simple
# 71.134 lignes en se limitant aux ngrammes qui ont une occ > 1 (weight)
# docs of our corpus
docids_subquery = (session
.query(Node.id)
.filter(Node.parent_id == corpus.id)
.filter(Node.typename == "DOCUMENT")
.subquery()
)
# 2 x the occurrence index table
x1 = aliased(NodeNgram)
x2 = aliased(NodeNgram)
# cooccurrences columns definition
ucooc = func.count(x1.ngram_id).label("ucooc")
# 1) MAIN DB QUERY
coocs_query = (
session.query(x1.ngram_id, x2.ngram_id, ucooc)
.filter(x1.node_id == x2.node_id) # <- by definition of cooc
.filter(x1.ngram_id != x2.ngram_id) # <- b/c not with itself
.filter(x1.node_id.in_(docids_subquery)) # <- b/c within corpus
.group_by(x1.ngram_id, x2.ngram_id)
)
# 2) INPUT FILTERS (reduce N before O(N²))
if mainlist_id:
main_subquery = (
session.query(NodeNgram.ngram_id)
.filter(NodeNgram.node_id == mainlist_id)
.subquery()
)
coocs_query = ( coocs_query
.filter( x1.ngram_id.in_(main_subquery) )
.filter( x2.ngram_id.in_(main_subquery) )
)
if stoplist_id:
stop_subquery = (
session.query(NodeNgram.ngram_id)
.filter(NodeNgram.node_id == stoplist_id)
.subquery()
)
coocs_query = ( coocs_query
.filter( ~ x1.ngram_id.in_(stop_subquery) )
.filter( ~ x2.ngram_id.in_(stop_subquery) )
)
if symmetry_filter:
# 1 filtre tenant en compte de la symétrie
# -> réduit le travail de moitié !!
# -> mais empêchera l'accès direct aux cooccurrences de x2
# -> seront éparpillées: notées dans les x1 qui ont précédé x2
# -> récupération sera plus couteuse via des requêtes OR comme:
# WHERE ngram1 = mon_ngram OR ngram2 = mon_ngram
coocs_query = coocs_query.filter(x1.ngram_id < x2.ngram_id)
# ------------
# 2 filtres amont possibles pour réduire combinatoire
# - par exemple 929k lignes => 35k lignes
# - ici sur weight mais dégrade les résultats
# => imaginable sur une autre métrique (cvalue ou tfidf?)
# coocs_query = coocs_query.filter(x1.weight > 1)
# coocs_query = coocs_query.filter(x2.weight > 1)
# ------------
# 3) OUTPUT FILTERS
# ------------------
# threshold
# £TODO adjust COOC_THRESHOLD a posteriori:
# ex: sometimes 2 sometimes 4 depending on sparsity
coocs_query = coocs_query.having(ucooc >= threshold)
# 4) EXECUTE QUERY
# ----------------
# => storage in our matrix structure
matrix = WeightedMatrix(coocs_query.all())
# fyi
# shape_0 = len({pair[0] for pair in matrix.items})
# shape_1 = len({pair[1] for pair in matrix.items})
# print("COOCS: NEW matrix shape [%ix%i]" % (shape_0, shape_1))
# 5) SAVE
# --------
# saving the parameters of the analysis in the Node JSON
new_hyperdata = { 'corpus': corpus.id,
'threshold': threshold }
if overwrite_id:
# overwrite pre-existing id
the_cooc = cache.Node[overwrite_id]
the_cooc.hyperdata = new_hyperdata
the_cooc.save_hyperdata()
session.commit()
the_id = overwrite_id
else:
# create the new cooc node
the_cooc = corpus.add_child(
typename = "COOCCURRENCES",
name = "Coocs (in:%s)" % corpus.name[0:10],
hyperdata = new_hyperdata,
)
session.add(the_cooc)
session.commit()
the_id = the_cooc.id
# ==> save all NodeNgramNgram with link to new cooc node id
matrix.save(the_id)
return the_id
"""
For initial ngram groups via stemming
Exemple:
- groups['copper engrav'] = {'copper engraving':3, 'coppers engraver':1...}
- groups['post'] = {'poste':3, 'poster':5, 'postés':2...}
"""
from gargantext.models import Node, NodeNgramNgram
from gargantext.util.db import session
from gargantext.util.lists import Translations
# to convert fr => french :/
from gargantext.util.languages import languages
from re import split as resplit
from collections import defaultdict, Counter
from nltk.stem.snowball import SnowballStemmer
def prepare_stemmers(corpus):
"""
Returns *several* stemmers (one for each language in the corpus)
(as a dict of stemmers with key = language_iso2)
"""
stemmers_by_lg = {
# always get a generic stemmer in case language code unknown
'__unknown__' : SnowballStemmer("english")
}
for lgiso2 in corpus.hyperdata['languages'].keys():
lgname = languages[lgiso2].name.lower()
stemmers_by_lg[lgiso2] = SnowballStemmer(lgname)
return stemmers_by_lg
def compute_groups(corpus, stoplist_id = None, overwrite_id = None):
"""
1) Use a stemmer/lemmatizer to group forms if they have same stem/lemma
2) Create an empty GROUPLIST node (for a list of "synonym" ngrams)
3) Save the list to DB (list node + each grouping as listnode - ngram1 - ngram2)
"""
stop_ngrams_ids = {}
# we will need the ngrams of the stoplist to filter
if stoplist_id is not None:
for id in session.query(NodeNgram.ngram_id).filter(NodeNgram.node_id == stoplist_id).all():
stop_ngrams_ids[id[0]] = True
# 1) compute stems/lemmas
# and group if same stem/lemma
stemmers = prepare_stemmers(corpus)
# todo dict {lg => {ngrams_todo} }
todo_ngrams_per_lg = defaultdict(set)
# res dict { commonstem: {ngram_1:freq_1 ,ngram_2:freq_2 ,ngram_3:freq_3} }
my_groups = defaultdict(Counter)
# preloop per doc to sort ngrams by language
for doc in corpus.children():
if ('language_iso2' in doc.hyperdata):
lgid = doc.hyperdata['language_iso2']
else:
lgid = "__unknown__"
# doc.ngrams is an sql query (ugly but useful intermediate step)
# FIXME: move the counting and stoplist filtering up here
for ngram_pack in doc.ngrams.all():
todo_ngrams_per_lg[lgid].add(ngram_pack)
# --------------------
# long loop per ngrams
for (lgid,todo_ngs) in todo_ngrams_per_lg.items():
# fun: word::str => stem::str
stem_it = stemmers[lgid].stem
for ng in todo_ngs:
doc_wei = ng[0]
ngram = ng[1] # Ngram obj
# break if in STOPLIST
if ngram.id in stop_ngrams_ids:
next
lexforms = [lexunit for lexunit in resplit(r'\W+',ngram.terms)]
# STEM IT, and this term's stems will become a new grouping key...
stemseq = " ".join([stem_it(lexfo) for lexfo in lexforms])
# ex:
# groups['post'] = {'poste':3, 'poster':5, 'postés':2...}
# groups['copper engrav'] = {'copper engraving':3, 'coppers engraver':1...}
my_groups[stemseq][ngram.id] += doc_wei
del todo_ngrams_per_lg
# now serializing all groups to a list of couples
ng_couples = []
addcouple = ng_couples.append
for grped_ngramids in my_groups.values():
if len(grped_ngramids) > 1:
# first find most frequent term in the counter
winner_id = grped_ngramids.most_common(1)[0][0]
for ngram_id in grped_ngramids:
if ngram_id != winner_id:
addcouple((winner_id, ngram_id))
del my_groups
# 2) the list node
if overwrite_id:
# overwrite pre-existing id
the_id = overwrite_id
# or create the new id
else:
the_group = corpus.add_child(
typename = "GROUPLIST",
name = "Group (src:%s)" % corpus.name[0:10]
)
# and save the node
session.add(the_group)
session.commit()
the_id = the_group.id
# 3) Save each grouping couple to DB thanks to Translations.save() table
ndngng_list = Translations(
[(sec,prim) for (prim,sec) in ng_couples],
just_items=True
)
# ...referring to the list node we just got
ndngng_list.save(the_id)
return the_id
......@@ -2,11 +2,16 @@ from gargantext.util.db import *
from gargantext.models import *
from gargantext.constants import *
from collections import defaultdict
def parse(corpus):
try:
documents_count = 0
corpus.status('parsing', progress=0)
# will gather info about languages
observed_languages = defaultdict(int)
# retrieve resource information
for resource in corpus.resources():
# information about the resource
......@@ -22,6 +27,7 @@ def parse(corpus):
hyperdata = hyperdata,
)
session.add(document)
observed_languages[hyperdata["language_iso2"]] += 1
if documents_count % BATCH_PARSING_SIZE == 0:
corpus.status('parsing', progress=documents_count)
corpus.save_hyperdata()
......@@ -29,6 +35,8 @@ def parse(corpus):
documents_count += 1
# update info about the resource
resource['extracted'] = True
# add a corpus-level info about languages
corpus.hyperdata['languages'] = observed_languages
# commit all changes
corpus.status('parsing', progress=documents_count, complete=True)
corpus.save_hyperdata()
......
......@@ -94,23 +94,36 @@ def project(request, project_id):
)
session.add(corpus)
session.commit()
# parse_extract: fileparsing -> ngram extraction -> lists
scheduled(parse_extract_indexhyperdata)(corpus.id)
#scheduled(parse_extract)(corpus.id)
# corpora within this project
corpora = project.children('CORPUS').all()
sourcename2corpora = defaultdict(list)
for corpus in corpora:
# we only consider the first resource of the corpus to determine its type
resource = corpus.resources()[0]
resource_type_name = RESOURCETYPES[resource['type']]['name']
resources = corpus.resources()
if len(resources):
resource = resources[0]
resource_type_name = RESOURCETYPES[resource['type']]['name']
else:
print("(WARNING) PROJECT view: no listed resource")
# add some data for the viewer
corpus.count = corpus.children('DOCUMENT').count()
status = corpus.status()
if status is not None and not status['complete']:
corpus.status_message = '(in progress: %s, %d complete)' % (
status['action'].replace('_', ' '),
status['progress'],
)
if not status['error']:
corpus.status_message = '(in progress: %s, %d complete)' % (
status['action'].replace('_', ' '),
status['progress'],
)
else:
corpus.status_message = '(aborted: "%s" after %i docs)' % (
status['error'][-1],
status['progress']
)
else:
corpus.status_message = ''
# add
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment