Commit 9a58664f authored by delanoe's avatar delanoe

[FEAT] Hyperdata insertion

Please drop your table and update database (dbmigrate.py).
Insertion optimized according the types of data.

Modifications validées :
	modifié :         ../constants.py
	modifié :         hyperdata.py
	modifié :         ../util/toolchain/hyperdata_indexing.py
parent bf6fe987
...@@ -49,26 +49,64 @@ def convert_to_date(date): ...@@ -49,26 +49,64 @@ def convert_to_date(date):
INDEXED_HYPERDATA = { INDEXED_HYPERDATA = {
# TODO use properties during toolchain.hyperdata_indexing # TODO use properties during toolchain.hyperdata_indexing
# (type, convert_to_db, convert_from_db) # (type, convert_to_db, convert_from_db)
'publication_date':
'count':
{ 'id' : 1 { 'id' : 1
, 'type' : int
, 'convert_to_db' : int
, 'convert_from_db': int
},
'publication_date':
{ 'id' : 2
, 'type' : datetime.datetime , 'type' : datetime.datetime
, 'convert_to_db' : convert_to_date , 'convert_to_db' : convert_to_date
, 'convert_from_db': datetime.datetime.fromtimestamp , 'convert_from_db': datetime.datetime.fromtimestamp
}, },
'title': 'title':
{ 'id' : 2 { 'id' : 3
, 'type' : str , 'type' : str
, 'convert_to_db' : str , 'convert_to_db' : str
, 'convert_from_db': str , 'convert_from_db': str
}, },
'count':
{ 'id' : 3 'authors':
{ 'id' : 4
, 'type' : str
, 'convert_to_db' : str
, 'convert_from_db': str
},
'journal':
{ 'id' : 5
, 'type' : str
, 'convert_to_db' : str
, 'convert_from_db': str
},
'abstract':
{ 'id' : 6
, 'type' : str
, 'convert_to_db' : str
, 'convert_from_db': str
},
'text':
{ 'id' : 7
, 'type' : str
, 'convert_to_db' : str
, 'convert_from_db': str
},
'page':
{ 'id' : 8
, 'type' : int , 'type' : int
, 'convert_to_db' : float , 'convert_to_db' : int
, 'convert_from_db': int , 'convert_from_db': int
}, },
} }
......
...@@ -64,11 +64,15 @@ class NodeHyperdata(Base): ...@@ -64,11 +64,15 @@ class NodeHyperdata(Base):
) )
""" """
__tablename__ = 'nodes_hyperdata' __tablename__ = 'nodes_hyperdata'
id = Column(Integer, primary_key=True) id = Column( Integer, primary_key=True )
node_id = Column(Integer, ForeignKey(Node.id, ondelete='CASCADE')) node_id = Column( Integer, ForeignKey(Node.id, ondelete='CASCADE'))
key = Column(HyperdataKey) key = Column( HyperdataKey )
value_flt = Column(Double(), index=True) value_int = Column( Integer , index=True )
value_str = Column(String(255), index=True) value_flt = Column( Double() , index=True )
value_utc = Column( DateTime(timezone=True) , index=True )
value_str = Column( String(255) , index=True )
value_txt = Column( Text , index=True )
def __init__(self, node=None, key=None, value=None): def __init__(self, node=None, key=None, value=None):
"""Custom constructor """Custom constructor
...@@ -126,6 +130,13 @@ def HyperdataValueComparer_overrider(key): ...@@ -126,6 +130,13 @@ def HyperdataValueComparer_overrider(key):
return comparator return comparator
# ?? # ??
for key in set(dir(NodeHyperdata.value_flt) + dir(NodeHyperdata.value_str)): for key in set(dir(NodeHyperdata.value_flt) + dir(NodeHyperdata.value_str)):
if key in ('__dict__', '__weakref__', '__repr__', '__str__') or 'attr' in key or 'class' in key or 'init' in key or 'new' in key: if key in ( '__dict__'
, '__weakref__'
, '__repr__'
, '__str__') \
or 'attr' in key \
or 'class' in key \
or 'init' in key \
or 'new' in key :
continue continue
setattr(HyperdataValueComparer, key, HyperdataValueComparer_overrider(key)) setattr(HyperdataValueComparer, key, HyperdataValueComparer_overrider(key))
...@@ -16,28 +16,61 @@ def _nodes_hyperdata_generator(corpus): ...@@ -16,28 +16,61 @@ def _nodes_hyperdata_generator(corpus):
if not isinstance(values, list): if not isinstance(values, list):
values = [values] values = [values]
for value in values: for value in values:
if isinstance(value, (int, float, )): if isinstance(value, (int, )):
yield ( yield (
document.id, document.id,
key['id'], key['id'],
value, value,
None, None,
None,
None,
None,
) )
elif isinstance(value, (str, )): elif isinstance(value, (float, )):
yield ( yield (
document.id, document.id,
key['id'], key['id'],
None, None,
value[:255], value,
None,
None,
None,
) )
elif isinstance(value, (datetime, )): elif isinstance(value, (datetime, )):
yield ( yield (
document.id, document.id,
key['id'], key['id'],
None, None,
# value_str None,
value.strftime("%Y-%m-%d %H:%M:%S"), value.strftime("%Y-%m-%d %H:%M:%S"),
# FIXME check timestamp +%Z
None,
None,
)
elif isinstance(value, (str, )) :
if len(value) < 255 :
yield (
document.id,
key['id'],
None,
None,
None,
value[:255],
None,
)
else :
yield (
document.id,
key['id'],
None,
None,
None,
None,
value,
) )
else: else:
print("WARNING: Couldn't insert an INDEXED_HYPERDATA value because of unknown type:", type(value)) print("WARNING: Couldn't insert an INDEXED_HYPERDATA value because of unknown type:", type(value))
...@@ -45,6 +78,11 @@ def _nodes_hyperdata_generator(corpus): ...@@ -45,6 +78,11 @@ def _nodes_hyperdata_generator(corpus):
def index_hyperdata(corpus): def index_hyperdata(corpus):
bulk_insert( bulk_insert(
table = NodeHyperdata, table = NodeHyperdata,
fields = ('node_id', 'key', 'value_flt', 'value_str', ), fields = ( 'node_id', 'key'
, 'value_int'
, 'value_flt'
, 'value_utc'
, 'value_str'
, 'value_txt' ),
data = _nodes_hyperdata_generator(corpus), data = _nodes_hyperdata_generator(corpus),
) )
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment