Commit 45b7f61e authored by delanoe's avatar delanoe

[FEAT] Hyperdata insertion

Please drop your table and update database (dbmigrate.py).
Insertion optimized according the types of data.

Modifications validées :
	modifié :         ../constants.py
	modifié :         hyperdata.py
	modifié :         ../util/toolchain/hyperdata_indexing.py
parent b9733eb2
......@@ -49,26 +49,64 @@ def convert_to_date(date):
INDEXED_HYPERDATA = {
# TODO use properties during toolchain.hyperdata_indexing
# (type, convert_to_db, convert_from_db)
'publication_date':
'count':
{ 'id' : 1
, 'type' : int
, 'convert_to_db' : int
, 'convert_from_db': int
},
'publication_date':
{ 'id' : 2
, 'type' : datetime.datetime
, 'convert_to_db' : convert_to_date
, 'convert_from_db': datetime.datetime.fromtimestamp
},
'title':
{ 'id' : 2
{ 'id' : 3
, 'type' : str
, 'convert_to_db' : str
, 'convert_from_db': str
},
'count':
{ 'id' : 3
'authors':
{ 'id' : 4
, 'type' : str
, 'convert_to_db' : str
, 'convert_from_db': str
},
'journal':
{ 'id' : 5
, 'type' : str
, 'convert_to_db' : str
, 'convert_from_db': str
},
'abstract':
{ 'id' : 6
, 'type' : str
, 'convert_to_db' : str
, 'convert_from_db': str
},
'text':
{ 'id' : 7
, 'type' : str
, 'convert_to_db' : str
, 'convert_from_db': str
},
'page':
{ 'id' : 8
, 'type' : int
, 'convert_to_db' : float
, 'convert_to_db' : int
, 'convert_from_db': int
},
}
......
......@@ -64,11 +64,15 @@ class NodeHyperdata(Base):
)
"""
__tablename__ = 'nodes_hyperdata'
id = Column(Integer, primary_key=True)
node_id = Column(Integer, ForeignKey(Node.id, ondelete='CASCADE'))
key = Column(HyperdataKey)
value_flt = Column(Double(), index=True)
value_str = Column(String(255), index=True)
id = Column( Integer, primary_key=True )
node_id = Column( Integer, ForeignKey(Node.id, ondelete='CASCADE'))
key = Column( HyperdataKey )
value_int = Column( Integer , index=True )
value_flt = Column( Double() , index=True )
value_utc = Column( DateTime(timezone=True) , index=True )
value_str = Column( String(255) , index=True )
value_txt = Column( Text , index=True )
def __init__(self, node=None, key=None, value=None):
"""Custom constructor
......@@ -126,6 +130,13 @@ def HyperdataValueComparer_overrider(key):
return comparator
# ??
for key in set(dir(NodeHyperdata.value_flt) + dir(NodeHyperdata.value_str)):
if key in ('__dict__', '__weakref__', '__repr__', '__str__') or 'attr' in key or 'class' in key or 'init' in key or 'new' in key:
if key in ( '__dict__'
, '__weakref__'
, '__repr__'
, '__str__') \
or 'attr' in key \
or 'class' in key \
or 'init' in key \
or 'new' in key :
continue
setattr(HyperdataValueComparer, key, HyperdataValueComparer_overrider(key))
......@@ -16,28 +16,61 @@ def _nodes_hyperdata_generator(corpus):
if not isinstance(values, list):
values = [values]
for value in values:
if isinstance(value, (int, float, )):
if isinstance(value, (int, )):
yield (
document.id,
key['id'],
value,
None,
None,
None,
None,
)
elif isinstance(value, (str, )):
elif isinstance(value, (float, )):
yield (
document.id,
key['id'],
None,
value[:255],
value,
None,
None,
None,
)
elif isinstance(value, (datetime, )):
yield (
document.id,
key['id'],
None,
# value_str
value.strftime("%Y-%m-%d %H:%M:%S"),
None,
value.strftime("%Y-%m-%d %H:%M:%S"),
# FIXME check timestamp +%Z
None,
None,
)
elif isinstance(value, (str, )) :
if len(value) < 255 :
yield (
document.id,
key['id'],
None,
None,
None,
value[:255],
None,
)
else :
yield (
document.id,
key['id'],
None,
None,
None,
None,
value,
)
else:
print("WARNING: Couldn't insert an INDEXED_HYPERDATA value because of unknown type:", type(value))
......@@ -45,6 +78,11 @@ def _nodes_hyperdata_generator(corpus):
def index_hyperdata(corpus):
bulk_insert(
table = NodeHyperdata,
fields = ('node_id', 'key', 'value_flt', 'value_str', ),
fields = ( 'node_id', 'key'
, 'value_int'
, 'value_flt'
, 'value_utc'
, 'value_str'
, 'value_txt' ),
data = _nodes_hyperdata_generator(corpus),
)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment