Commit 188081f8 authored by c24b's avatar c24b

[TO TEST] lang_detection

parent ed967608
from gargantext.constants import *
from langdetect import detect
from langdetect import DetectorFactory
from langdetect import detect, DetectorFactory
class Language:
def __init__(self, iso2=None, iso3=None,full_name=None, name=None):
......@@ -18,9 +16,6 @@ class Language:
return result
__repr__ = __str__
def detect_lang(self, text):
DetectorFactory.seed = 0
return Languages[detect(text)].iso2
class Languages(dict):
def __missing__(self, key):
......@@ -30,6 +25,9 @@ class Languages(dict):
raise KeyError
languages = Languages()
def detect_lang(self, text):
DetectorFactory.seed = 0
return languages[detect(text)].iso2
import pycountry
pycountry_keys = (
......
......@@ -47,7 +47,7 @@ def parse(corpus):
indexed = False
# a simple census to raise language info at corpus level
for l in ["iso2", "iso3", "full_name"]:
if hyperdata["indexed"] is True:
if indexed is True:
break
lang_field = "language_"+l
if lang_field in hyperdata.keys():
......
......@@ -14,6 +14,7 @@ html5lib==0.9999999
python-igraph>=0.7.1
jdatetime==1.7.2
kombu==3.0.33 # messaging
langdetect==1.0.6 #detectinglanguage
nltk==3.1
numpy==1.10.4
psycopg2==2.6.1
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment