Commit d8ae5f6c authored by c24b's avatar c24b

M languages => detect_lang(text)

parent 188081f8
......@@ -25,7 +25,8 @@ class Languages(dict):
raise KeyError
languages = Languages()
def detect_lang(self, text):
def detect_lang(text):
DetectorFactory.seed = 0
return languages[detect(text)].iso2
......
......@@ -75,10 +75,13 @@ def parse(corpus):
break
if k in hyperdata.keys():
try:
hyperdata["language_iso2"] = detect_lang(hyperdata[k])
corpus.languages[lang] += 1
indexed = True
break
if len(hyperdata[k]) > 10:
print("> detected on",k, ":", detect_lang(hyperdata[k]))
hyperdata["language_iso2"] = detect_lang(hyperdata[k])
corpus.languages[lang] += 1
indexed = True
break
except KeyError:
hyperdata["error"] = "Error: unsupported language"
skipped_languages.append(hyperdata["language_iso2"])
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment