Commit dbb66340 authored by c24b's avatar c24b

removing MAIN doc lang assignation to unsupported languages docs #TODO

parent 9f117ac4
...@@ -175,12 +175,14 @@ def parse(corpus): ...@@ -175,12 +175,14 @@ def parse(corpus):
session.commit() session.commit()
if len(corpus.skipped_docs) > 0: if len(corpus.skipped_docs) > 0:
print (sum(languages["__skipped__"].values()), "docs with unsupported lang") print (sum(languages["__skipped__"].values()), "docs with unsupported lang")
#assign main lang to unsupported languages docs #assign main lang of the corpus to unsupported languages docs
for d_id in corpus.skipped_docs: # for d_id in corpus.skipped_docs:
document = session.query(Node).filter(Node.id == d_id, Node.typename == "DOCUMENT").first() # document = session.query(Node).filter(Node.id == d_id, Node.typename == "DOCUMENT").first()
document.hyperdata["language_iso2"] = corpus.language_id # if document.hyperdata["error"].startswith("Error: unsupported language"):
document.save_hyperdata() # print(document.hyperdata["language_iso2"])
session.commit() # document.hyperdata["language_iso2"] = corpus.language_id
# document.save_hyperdata()
# session.commit()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment