Commit 5eddc2a1 authored by Mathieu Rodic's avatar Mathieu Rodic

Added language and ngrams in the database objects section

parent 4cdb309d
......@@ -19,4 +19,17 @@
* interface graphique / admin Django
* ok pour JS + Django dans un premier temps
* que font exactement les parsers (outre les metadata) ?
\ No newline at end of file
* que font exactement les parsers (outre les metadata) ?
1) on prend le raw (html, txt, csv n'importe quoi) et on extrait
les information pertinentes pour création de documents
2) on sélectionne les noeuds/documents à analyser
3) on extrait les ngrams du titre ou de l'abstract ou du texte (corps de l'article)
a) découpage du texte en phrases
b) découpages des phrases en mots
c) post-tagging des mots (détecter le langage auparavant)
d) repérage des séquences pertinentes (grammar_rules de nltk)
e) enregistrement des ngrams
\ No newline at end of file
from django.db import models
from django.utils import timezone
from django.contrib.auth.models import User
from django_hstore import hstore
from treebeard.mp_tree import MP_Node
from time import time
from django.contrib.auth.models import User
from language import Language
def upload_to(instance, filename):
return 'corpora/%s/%f/%s' % (instance.user.username, time(), filename)
class Language(models.Model):
iso2 = models.CharField(max_length=2)
iso3 = models.CharField(max_length=3)
fullname = models.CharField(max_length=255)
class Ngram(models.Model):
n = models.IntegerField()
terms = models.CharField(max_length=255)
class Resource(models.Model):
guid = models.CharField(max_length=255)
file = models.FileField(upload_to=upload_to, blank=True)
......@@ -26,6 +43,8 @@ class Node(MP_Node):
type = models.ForeignKey(NodeType)
name = models.CharField(max_length=200)
language = models.ForeignKey(Language, blank=True, null=True, on_delete=models.SET_NULL)
date = models.DateField(default=timezone.now(), blank=True)
metadata = hstore.DictionaryField(blank=True)
......@@ -42,6 +61,8 @@ class Node(MP_Node):
for noeud in Node.objects.filter(user=user):
print(noeud.depth * " " + "[%d] %d" % (noeud.pk, noeud.name))
class Project(Node):
class Meta:
proxy=True
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment