Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
humanities
gargantext
Commits
0f25f4e9
Commit
0f25f4e9
authored
Nov 07, 2014
by
PkSM3
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[UPDATE] conflict resolver
parent
0871bb8b
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
362 additions
and
0 deletions
+362
-0
settings.py
gargantext_web/settings.py
+149
-0
models.py
node/models.py
+213
-0
No files found.
gargantext_web/settings.py
0 → 100644
View file @
0f25f4e9
"""
Django settings for gargantext_web project.
For more information on this file, see
https://docs.djangoproject.com/en/1.6/topics/settings/
For the full list of settings and their values, see
https://docs.djangoproject.com/en/1.6/ref/settings/
"""
# Build paths inside the project like this: os.path.join(BASE_DIR, ...)
import
os
BASE_DIR
=
os
.
path
.
dirname
(
os
.
path
.
dirname
(
__file__
))
PROJECT_PATH
=
os
.
path
.
join
(
BASE_DIR
,
os
.
pardir
)
PROJECT_PATH
=
os
.
path
.
abspath
(
PROJECT_PATH
)
# Quick-start development settings - unsuitable for production
# See https://docs.djangoproject.com/en/1.6/howto/deployment/checklist/
# SECURITY WARNING: keep the secret key used in production secret!
SECRET_KEY
=
'bt)3n9v&a02cu7^^=+u_t2tmn8ex5fvx8$x4r*j*pb1yawd+rz'
# SECURITY WARNING: don't run with debug turned on in production!
DEBUG
=
True
TEMPLATE_DEBUG
=
True
TEMPLATE_DIRS
=
(
# Put strings here, like "/home/html/django_templates" or "C:/www/django/templates".
# Always use forward slashes
# Don't forget to use absolute paths, not relative paths.
'/srv/gargantext/templates'
,
#import os.path
#
#TEMPLATE_DIRS = (
# os.path.join(os.path.dirname(__file__), 'templates').replace('\\','/'),
#)
)
#ALLOWED_HOSTS = ['*',]
ALLOWED_HOSTS
=
[
'localhost'
,
'master.polemic.be'
,
'mat.polemic.be'
,
'alexandre.polemic.be'
]
# Application definition
INSTALLED_APPS
=
(
'grappelli'
,
'django.contrib.admin'
,
'django.contrib.auth'
,
'django.contrib.contenttypes'
,
'django.contrib.sessions'
,
'django.contrib.messages'
,
'django.contrib.staticfiles'
,
'django_extensions'
,
#'south',
#'documents',
'cte_tree'
,
'node'
,
'ngram'
,
'django_hstore'
,
)
MIDDLEWARE_CLASSES
=
(
'django.contrib.sessions.middleware.SessionMiddleware'
,
'django.middleware.common.CommonMiddleware'
,
'django.middleware.csrf.CsrfViewMiddleware'
,
'django.contrib.auth.middleware.AuthenticationMiddleware'
,
'django.contrib.messages.middleware.MessageMiddleware'
,
'django.middleware.clickjacking.XFrameOptionsMiddleware'
,
)
WSGI_APPLICATION
=
'wsgi.application'
# Database
# https://docs.djangoproject.com/en/1.6/ref/settings/#databases
DATABASES
=
{
'default'
:
{
'ENGINE'
:
'django.db.backends.postgresql_psycopg2'
,
'NAME'
:
'gargandb'
,
'USER'
:
'alexandre'
,
'PASSWORD'
:
'C8kdcUrAQy66U'
,
#'USER': 'gargantext',
#'PASSWORD': 'C8krdcURAQy99U',
'HOST'
:
'127.0.0.1'
,
'PORT'
:
'5432'
,
}
}
# Internationalization
# https://docs.djangoproject.com/en/1.6/topics/i18n/
LANGUAGE_CODE
=
'en-us'
TIME_ZONE
=
'UTC'
USE_I18N
=
True
USE_L10N
=
True
USE_TZ
=
True
# Static files (CSS, JavaScript, Images)
# https://docs.djangoproject.com/en/1.6/howto/static-files/
ROOT_URLCONF
=
'gargantext_web.urls'
STATIC_ROOT
=
'/var/www/gargantext/static/'
STATIC_URL
=
'/static/'
MEDIA_ROOT
=
'/var/www/gargantext/media'
#MEDIA_ROOT = os.path.join(PROJECT_PATH, 'media')
MEDIA_URL
=
'/media/'
STATICFILES_FINDERS
=
(
'django.contrib.staticfiles.finders.AppDirectoriesFinder'
,
'django.contrib.staticfiles.finders.FileSystemFinder'
,
)
STATICFILES_DIRS
=
(
#os.path.join(BASE_DIR, "static"),
'/srv/gargantext/static'
,
#'/var/www/www/alexandre/media',
#'/var/www/alexandre.delanoe.org/',
)
TEMPLATE_CONTEXT_PROCESSORS
=
(
"django.contrib.auth.context_processors.auth"
,
"django.core.context_processors.request"
,
"django.core.context_processors.static"
,
)
# grappelli custom
GRAPPELLI_ADMIN_TITLE
=
"Gargantext"
node/models.py
0 → 100644
View file @
0f25f4e9
from
django.db
import
models
from
django.utils
import
timezone
from
django.contrib.auth.models
import
User
from
django_hstore
import
hstore
from
cte_tree.models
import
CTENode
,
Manager
#from cte_tree.fields import DepthField, PathField, OrderingField
from
parsing.Caches
import
LanguagesCache
,
NgramsExtractorsCache
,
NgramsCaches
from
parsing.FileParsers
import
*
from
time
import
time
from
collections
import
defaultdict
# Some usefull functions
# TODO: start the function name with an underscore (private)
def
upload_to
(
instance
,
filename
):
return
'corpora/
%
s/
%
s'
%
(
instance
.
user
.
username
,
filename
)
#return 'corpora/%s/%f/%s' % (instance.user.username, time(), filename)
# All classes here
class
Language
(
models
.
Model
):
iso2
=
models
.
CharField
(
max_length
=
2
,
unique
=
True
)
iso3
=
models
.
CharField
(
max_length
=
3
)
fullname
=
models
.
CharField
(
max_length
=
255
)
implemented
=
models
.
BooleanField
(
blank
=
True
)
def
__str__
(
self
):
return
self
.
fullname
class
ResourceType
(
models
.
Model
):
name
=
models
.
CharField
(
max_length
=
255
)
def
__str__
(
self
):
return
self
.
name
class
Ngram
(
models
.
Model
):
language
=
models
.
ForeignKey
(
Language
,
blank
=
True
,
null
=
True
,
on_delete
=
models
.
SET_NULL
)
n
=
models
.
IntegerField
()
terms
=
models
.
CharField
(
max_length
=
255
)
class
Resource
(
models
.
Model
):
guid
=
models
.
CharField
(
max_length
=
255
)
type
=
models
.
ForeignKey
(
ResourceType
,
blank
=
True
,
null
=
True
)
file
=
models
.
FileField
(
upload_to
=
upload_to
,
blank
=
True
)
digest
=
models
.
CharField
(
max_length
=
32
)
# MD5 digest
class
NodeType
(
models
.
Model
):
name
=
models
.
CharField
(
max_length
=
200
)
def
__str__
(
self
):
return
self
.
name
class
NodeQuerySet
(
models
.
query
.
QuerySet
):
"""Methods available from Node querysets."""
def
extract_ngrams
(
self
,
keys
,
ngramsextractorscache
=
None
,
ngramscaches
=
None
):
if
ngramsextractorscache
is
None
:
ngramsextractorscache
=
NgramsExtractorsCache
()
if
ngramscaches
is
None
:
ngramscaches
=
NgramsCaches
()
for
node
in
self
:
node
.
extract_ngrams
(
keys
,
ngramsextractorscache
,
ngramscaches
)
class
NodeManager
(
models
.
Manager
):
"""Methods available from Node.object."""
def
get_queryset
(
self
):
return
NodeQuerySet
(
self
.
model
)
def
__getattr__
(
self
,
name
,
*
args
):
if
name
.
startswith
(
"_"
):
raise
AttributeError
return
getattr
(
self
.
get_queryset
(),
name
,
*
args
)
class
Node
(
CTENode
):
"""The node."""
objects
=
NodeManager
()
user
=
models
.
ForeignKey
(
User
)
type
=
models
.
ForeignKey
(
NodeType
)
name
=
models
.
CharField
(
max_length
=
200
)
language
=
models
.
ForeignKey
(
Language
,
blank
=
True
,
null
=
True
,
on_delete
=
models
.
SET_NULL
)
date
=
models
.
DateField
(
default
=
timezone
.
now
,
blank
=
True
)
metadata
=
hstore
.
DictionaryField
(
blank
=
True
)
# TODO: remove the three following fields
#fichier = models.FileField(upload_to=upload_to, blank=True)
#resource = models.ForeignKey(Resource, blank=True, null=True)
#ngrams = models.ManyToManyField(NGrams)
def
__str__
(
self
):
return
self
.
name
def
add_resource
(
self
,
**
kwargs
):
# only for tests
resource
=
Resource
(
guid
=
str
(
time
()),
digest
=
str
(
time
()),
**
kwargs
)
# TODO: verifier si tous ces 'save' sont reellement utiles
resource
.
save
()
node_resource
=
Node_Resource
(
node
=
self
,
resource
=
resource
)
node_resource
.
save
()
return
resource
def
parse_resources
(
self
):
# parse all resources into a list of metadata
metadata_list
=
[]
for
node_resource
in
self
.
node_resource
.
filter
(
parsed
=
False
):
resource
=
node_resource
.
resource
parser
=
defaultdict
(
lambda
:
FileParser
.
FileParser
,
{
'pubmed'
:
PubmedFileParser
,
'isi'
:
IsiFileParser
,
'ris'
:
RisFileParser
,
'europress'
:
EuropressFileParser
,
})[
resource
.
type
.
name
]()
metadata_list
+=
parser
.
parse
(
str
(
resource
.
file
))
# insert the new resources in the database!
type
=
NodeType
.
objects
.
get
(
name
=
'Document'
)
langages_cache
=
LanguagesCache
()
Node
.
objects
.
bulk_create
([
Node
(
user
=
self
.
user
,
type
=
type
,
name
=
metadata
[
'title'
][
0
:
199
]
if
'title'
in
metadata
else
''
,
parent
=
self
,
language
=
langages_cache
[
metadata
[
'language_iso2'
]]
if
'language_iso2'
in
metadata
else
None
,
metadata
=
metadata
,
)
for
metadata
in
metadata_list
])
# mark the resources as parsed for this node
self
.
node_resource
.
update
(
parsed
=
True
)
def
extract_ngrams
(
self
,
keys
,
ngramsextractorscache
=
None
,
ngramscaches
=
None
):
# if there is no cache...
if
ngramsextractorscache
is
None
:
ngramsextractorscache
=
NgramsExtractorsCache
()
if
ngramscaches
is
None
:
ngramscaches
=
NgramsCaches
()
# what do we want from the cache?
language
=
self
.
language
if
self
.
language
else
self
.
parent
.
language
#print(language.fullname)
extractor
=
ngramsextractorscache
[
language
]
ngrams
=
ngramscaches
[
language
]
# find & count all the occurrences
associations
=
defaultdict
(
float
)
# float or int?
if
isinstance
(
keys
,
dict
):
for
key
,
weight
in
keys
.
items
():
for
ngram
in
extractor
.
extract_ngrams
(
self
.
metadata
[
key
]):
terms
=
' '
.
join
([
token
for
token
,
tag
in
ngram
])
associations
[
ngram
]
+=
weight
else
:
for
key
in
keys
:
for
ngram
in
extractor
.
extract_ngrams
(
self
.
metadata
[
key
]):
terms
=
' '
.
join
([
token
for
token
,
tag
in
ngram
])
associations
[
terms
]
+=
1
#print(associations)
# insert the occurrences in the database
Node_Ngram
.
objects
.
bulk_create
([
Node_Ngram
(
node
=
self
,
ngram
=
ngrams
[
ngram_text
],
weight
=
weight
)
for
ngram_text
,
weight
in
associations
.
items
()
])
class
Node_Resource
(
models
.
Model
):
node
=
models
.
ForeignKey
(
Node
,
related_name
=
'node_resource'
)
resource
=
models
.
ForeignKey
(
Resource
)
parsed
=
models
.
BooleanField
(
default
=
False
)
class
Node_Ngram
(
models
.
Model
):
node
=
models
.
ForeignKey
(
Node
)
ngram
=
models
.
ForeignKey
(
Ngram
)
weight
=
models
.
FloatField
()
def
__str__
(
self
):
return
"
%
s:
%
s"
%
(
self
.
node
.
name
,
self
.
ngram
.
terms
)
class
Project
(
Node
):
class
Meta
:
proxy
=
True
class
Corpus
(
Node
):
class
Meta
:
proxy
=
True
verbose_name_plural
=
'Corpora'
class
Document
(
Node
):
class
Meta
:
proxy
=
True
class
NodeNgramNgram
(
models
.
Model
):
node
=
models
.
ForeignKey
(
Node
)
ngramx
=
models
.
ForeignKey
(
Ngram
,
related_name
=
"nodengramngramx"
,
on_delete
=
models
.
CASCADE
)
ngramy
=
models
.
ForeignKey
(
Ngram
,
related_name
=
"nodengramngramy"
,
on_delete
=
models
.
CASCADE
)
score
=
models
.
FloatField
(
default
=
0
)
def
__str__
(
self
):
return
"
%
s:
%
s /
%
s"
%
(
self
.
node
.
name
,
self
.
ngramX
.
terms
,
self
.
ngramY
.
terms
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment