Commit b90eb786 authored by Romain Loth's avatar Romain Loth

oops fix the fix ;-) bon we à tous

parent fef28b5b
...@@ -113,6 +113,7 @@ def parse(corpus): ...@@ -113,6 +113,7 @@ def parse(corpus):
#skipped_languages #skipped_languages
skipped_languages = [] skipped_languages = []
#skipped docs to remember for later processing #skipped docs to remember for later processing
pending_add_error_stats = False
skipped_docs = [] skipped_docs = []
documents_count = 0 documents_count = 0
...@@ -161,9 +162,7 @@ def parse(corpus): ...@@ -161,9 +162,7 @@ def parse(corpus):
'action':'Parsing', 'action':'Parsing',
'error': hyperdata['error'] 'error': hyperdata['error']
}) })
pending_add_error_stats = True
#adding skipped_docs for later processsing if error in parsing
skipped_docs.append(document.id)
# ----------------------- # -----------------------
# save as corpus DB child # save as corpus DB child
...@@ -175,6 +174,12 @@ def parse(corpus): ...@@ -175,6 +174,12 @@ def parse(corpus):
) )
session.add(document) session.add(document)
session.commit() session.commit()
documents_count += 1
if pending_add_error_stats:
#adding skipped_docs for later processing if error in parsing
skipped_docs.append(document.id)
pending_add_error_stats = False
#BATCH_PARSING_SIZE #BATCH_PARSING_SIZE
if documents_count % BATCH_PARSING_SIZE == 0: if documents_count % BATCH_PARSING_SIZE == 0:
...@@ -183,8 +188,6 @@ def parse(corpus): ...@@ -183,8 +188,6 @@ def parse(corpus):
session.add(corpus) session.add(corpus)
session.commit() session.commit()
documents_count += 1
# update info about the resource # update info about the resource
resource['extracted'] = True resource['extracted'] = True
#print( "resource n°",i, ":", d, "docs inside this file") #print( "resource n°",i, ":", d, "docs inside this file")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment