Commit 712f8f0a authored by Mathieu Rodic's avatar Mathieu Rodic

Quelques retouches...

parent 45addafc
...@@ -5,16 +5,17 @@ import collections ...@@ -5,16 +5,17 @@ import collections
# from the cache instead of using the database for every call # from the cache instead of using the database for every call
class Ngram_Cache: class Ngram_Cache:
def __init__(self): def __init__(self, language):
self._cache = {} self._cache = {}
self._language = language
def get(self, terms): def get(self, terms):
terms = terms.strip().lower() terms = terms.strip().lower()
if terms not in self._cache: if terms not in self._cache:
try: try:
ngram = NGram.get(terms=terms) ngram = NGram.get(terms=terms, language=self._language)
except: except:
ngram = NGram(terms=terms, n=len(terms)) ngram = NGram(terms=terms, n=len(terms), language=self._language)
ngram.save() ngram.save()
self._cache[terms] = ngram self._cache[terms] = ngram
return self._cache[terms] return self._cache[terms]
...@@ -73,6 +74,9 @@ class FileParser: ...@@ -73,6 +74,9 @@ class FileParser:
resource = Resource.get(guid=guid) resource = Resource.get(guid=guid)
except: except:
resource = Resource(guid=guid) resource = Resource(guid=guid)
# If the parent node already has a child with this resource, pass
# (is it a good thing?)
if parentNode.get_descendants().
# create the document itself # create the document itself
childNode = Node( childNode = Node(
user = parentNode.pk, user = parentNode.pk,
...@@ -87,15 +91,14 @@ class FileParser: ...@@ -87,15 +91,14 @@ class FileParser:
# parse it! # parse it!
ngrams = self.extract_ngrams(contents, language) ngrams = self.extract_ngrams(contents, language)
# we should already be in a transaction, so no use doing another one (or is there?) # we should already be in a transaction, so no use doing another one (or is there?)
# btw, this is not very good (the get/insert part)
ngram_cache = self._ngram_caches[language.iso3] ngram_cache = self._ngram_caches[language.iso3]
for ngram_text, count in ngrams.items(): for ngram_text, occurences in ngrams.items():
ngram = ngram_cache.get(ngram_text) ngram = ngram_cache.get(ngram_text)
Node_Ngram( Node_Ngram(
node = childNode, node = childNode,
ngram = ngram, ngram = ngram,
count = count occurences = occurences
) ).save()
# return the created document # return the created document
return document return document
...@@ -111,4 +114,5 @@ class FileParser: ...@@ -111,4 +114,5 @@ class FileParser:
This method shall be overriden by inherited classes. This method shall be overriden by inherited classes.
""" """
def parse(self): def parse(self):
pass return []
\ No newline at end of file
...@@ -29,35 +29,6 @@ class Ngram(models.Model): ...@@ -29,35 +29,6 @@ class Ngram(models.Model):
language = models.ForeignKey(Language, blank=True, null=True, on_delete=models.SET_NULL) language = models.ForeignKey(Language, blank=True, null=True, on_delete=models.SET_NULL)
n = models.IntegerField() n = models.IntegerField()
terms = models.CharField(max_length=255) terms = models.CharField(max_length=255)
class class Ngram_Cache:
def __init__(self, language):
self._language_id = {}
self._ngram_ids = []
# get the language id
language = language.lower()
if len(language) == "3":
self._language_id = Language.get(iso3=language).id
elif len(language) == "2":
self._language_id = Language.get(iso2=language).id
else:
import pycountry
pycountry.languages.get(alpha2='an')
def get(self, language, terms):
# get the term id
terms = terms.strip().lower()
if terms not in self._cache[language]:
try:
ngram = NGram.get(terms=terms)
except:
ngram = NGram(terms=terms, n=len(terms), language_id=self._language_id)
ngram.save()
self._cache[language][terms] = ngram.pk
# return the term id
return self._cache[language][terms]
class Resource(models.Model): class Resource(models.Model):
...@@ -96,7 +67,10 @@ class Node(MP_Node): ...@@ -96,7 +67,10 @@ class Node(MP_Node):
for noeud in Node.objects.filter(user=user): for noeud in Node.objects.filter(user=user):
print(noeud.depth * " " + "[%d] %d" % (noeud.pk, noeud.name)) print(noeud.depth * " " + "[%d] %d" % (noeud.pk, noeud.name))
class Node_Ngram(mdels.Model):
node = models.ForeignKey(Node, on_delete=models.CASCADE)
ngram = models.ForeignKey(Ngram, on_delete=models.CASCADE)
occurences = models.IntegerField()
class Project(Node): class Project(Node):
class Meta: class Meta:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment