Commit c50f2fff authored by Mathieu Rodic's avatar Mathieu Rodic

[FEAUTURE] Resource model implementation.

parent 96420c1b
......@@ -12,7 +12,7 @@ from django.contrib.auth.models import User
from collections import defaultdict
# Some usefull functions
# TODO: start the function name with an underscore (private)
def upload_to(instance, filename):
return 'corpora/%s/%s' % (instance.user.username, filename)
#return 'corpora/%s/%f/%s' % (instance.user.username, time(), filename)
......@@ -41,7 +41,8 @@ class Ngram(models.Model):
class Resource(models.Model):
guid = models.CharField(max_length=255)
bdd_type = models.ForeignKey(DatabaseType, blank=True, null=True)
#file = models.FileField(upload_to=upload_to, blank=True)
file = models.FileField(upload_to=upload_to, blank=True)
digest = models.CharField(max_length=32) # MD5 digest
class NodeType(models.Model):
name = models.CharField(max_length=200)
......@@ -59,7 +60,8 @@ class Node(CTENode):
date = models.DateField(default=timezone.now, blank=True)
metadata = hstore.DictionaryField(blank=True)
# TODO: remove the three following fields
fichier = models.FileField(upload_to=upload_to, blank=True)
#resource = models.ForeignKey(Resource, blank=True, null=True)
#ngrams = models.ManyToManyField(NGrams)
......@@ -67,15 +69,36 @@ class Node(CTENode):
def __str__(self):
return self.name
# TODO: voir à quoi sert cette méthode
def liste(self, user):
for noeud in Node.objects.filter(user=user):
print(noeud.depth * " " + "[%d] %d" % (noeud.pk, noeud.name))
def add_resource(self, **kwargs):
resource = Resource(**kwargs)
# TODO: vérifier si tous ces 'save' sont réellement utiles
resource.save()
node_resource = Node_Resource(
node = self,
resource = resource
)
node_resource.save()
return resource
def parse(self):
# TODO: that's not very pretty...
# can't we make a simple join in Django?
for node_resource in self.node_resource.filter(parsed=False):
# TODO: call parsers here
print(node_resource.resource.file)
def extract_ngrams(self, keys, cache):
# TODO: instanciate the ngrams extractors
# WHERE TO PUT THEIR CACHE?
extractor = cache.extractors[self.language.iso2]
# what do we want from the cache?
extractor = cache.extractors[self.language]
ngrams = cache.ngrams[self.language]
# find & count all the occurrences
associations = defaultdict(float) # float or int?
......@@ -94,11 +117,15 @@ class Node(CTENode):
ngram = ngrams[ngram_text],
weight = weight
)
class Node_Resource(models.Model):
node = models.ForeignKey(Node, related_name='node_resource')
resource = models.ForeignKey(Resource)
parsed = models.BooleanField(default=False)
class Node_Ngram(models.Model):
node = models.ForeignKey(Node, on_delete=models.CASCADE)
ngram = models.ForeignKey(Ngram, on_delete=models.CASCADE)
node = models.ForeignKey(Node)
ngram = models.ForeignKey(Ngram)
weight = models.IntegerField()
class Project(Node):
......
......@@ -38,6 +38,11 @@ except:
parent = corpus
).save()
corpus.add_resource(file='/path/to/file')
corpus.parse()
exit()
cache = Cache()
for child in corpus.children.all():
print(child.id)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment