Commit d8ae5f6c authored by c24b's avatar c24b

M languages => detect_lang(text)

parent 188081f8
...@@ -25,7 +25,8 @@ class Languages(dict): ...@@ -25,7 +25,8 @@ class Languages(dict):
raise KeyError raise KeyError
languages = Languages() languages = Languages()
def detect_lang(self, text):
def detect_lang(text):
DetectorFactory.seed = 0 DetectorFactory.seed = 0
return languages[detect(text)].iso2 return languages[detect(text)].iso2
......
...@@ -75,10 +75,13 @@ def parse(corpus): ...@@ -75,10 +75,13 @@ def parse(corpus):
break break
if k in hyperdata.keys(): if k in hyperdata.keys():
try: try:
hyperdata["language_iso2"] = detect_lang(hyperdata[k]) if len(hyperdata[k]) > 10:
corpus.languages[lang] += 1 print("> detected on",k, ":", detect_lang(hyperdata[k]))
indexed = True hyperdata["language_iso2"] = detect_lang(hyperdata[k])
break
corpus.languages[lang] += 1
indexed = True
break
except KeyError: except KeyError:
hyperdata["error"] = "Error: unsupported language" hyperdata["error"] = "Error: unsupported language"
skipped_languages.append(hyperdata["language_iso2"]) skipped_languages.append(hyperdata["language_iso2"])
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment