Commit 204bfc6d authored by Mathieu Rodic's avatar Mathieu Rodic

[CODE] removed the `NodeType` model

see hardcoded constans in `gargantext/constants.py`
(also, kept working on the single project page)
parent bf8001dc
NODETYPES = [
None,
'USER',
'PROJECT',
'CORPUS',
'DOCUMENT',
]
LANGUAGES = {
# 'fr': {
# 'tagger': FrenchNgramsTagger
# }
}
from gargantext.util.parsers import *
RESOURCETYPES = [
# { 'name': 'CSV',
# # 'parser': CSVParser,
# 'default_language': 'en',
# },
{ 'name': 'Europress (English)',
'parser': EuropressParser,
'default_language': 'en',
},
{ 'name': 'Europress (French)',
# 'parser': EuropressParser,
'default_language': 'fr',
},
# { 'name': 'ISTex',
# # 'parser': ISTexParser,
# 'default_language': 'en',
# },
{ 'name': 'Jstor (RIS format)',
# 'parser': RISParser,
'default_language': 'en',
},
{ 'name': 'Pubmed (XML format)',
'parser': PubmedParser,
'default_language': 'en',
},
{ 'name': 'Scopus (RIS format)',
# 'parser': RISParser,
'default_language': 'en',
},
{ 'name': 'Web of Science (ISI format)',
# 'parser': ISIParser,
'default_language': 'fr',
},
{ 'name': 'Zotero (RIS format)',
# 'parser': RISParser,
'default_language': 'en',
},
]
from gargantext.util.db import * from gargantext.util.db import *
from gargantext.constants import *
from datetime import datetime from datetime import datetime
from .users import User from .users import User
__all__ = ['NodeType', 'Node'] __all__ = ['Node']
class NodeType(Base): class NodeType(TypeDecorator):
__tablename__ = 'nodetypes' """Define a new type of column to describe a Node's type.
id = Column(Integer, primary_key=True) This column type is implemented as an SQL integer.
name = Column(String(255), unique=True) Values are detailed in `gargantext.constants.NODETYPES`.
"""
impl = Integer
def process_bind_param(self, typename, dialect):
return NODETYPES.index(typename)
def process_result_value(self, typeindex, dialect):
return NODETYPES[typeindex]
class Node(Base): class Node(Base):
__tablename__ = 'nodes' __tablename__ = 'nodes'
id = Column(Integer, primary_key=True) id = Column(Integer, primary_key=True)
# foreign keys type = Column(NodeType, index=True)
user_id = Column(Integer, ForeignKey(User.id)) user_id = Column(Integer, ForeignKey(User.id))
type_id = Column(Integer, ForeignKey(NodeType.id))
# main data # main data
name = Column(String(255), unique=True) name = Column(String(255), unique=True)
date = Column(DateTime(), default=datetime.now) date = Column(DateTime(), default=datetime.now)
......
...@@ -32,7 +32,7 @@ class User(Base): ...@@ -32,7 +32,7 @@ class User(Base):
) )
return query.all() return query.all()
def get_nodes(self, nodetype=None): def get_nodes(self, type=None):
"""get all nodes belonging to the user""" """get all nodes belonging to the user"""
# ↓ this below is a workaround because of Python's lame import system # ↓ this below is a workaround because of Python's lame import system
from .nodes import Node from .nodes import Node
...@@ -41,8 +41,8 @@ class User(Base): ...@@ -41,8 +41,8 @@ class User(Base):
.filter(Node.user_id == self.id) .filter(Node.user_id == self.id)
.order_by(Node.date) .order_by(Node.date)
) )
if nodetype is not None: if type is not None:
query = query.filter(Node.type_id == nodetype.id) query = query.filter(Node.type == type)
return query.all() return query.all()
def owns(user, node): def owns(user, node):
......
...@@ -22,7 +22,8 @@ session = scoped_session(sessionmaker(bind=engine)) ...@@ -22,7 +22,8 @@ session = scoped_session(sessionmaker(bind=engine))
# tools to build models # tools to build models
from sqlalchemy.orm import aliased from sqlalchemy.orm import aliased, synonym
from sqlalchemy.types import * from sqlalchemy.types import *
from sqlalchemy.schema import Column, ForeignKey, UniqueConstraint from sqlalchemy.schema import Column, ForeignKey, UniqueConstraint
from sqlalchemy.dialects.postgresql import JSONB from sqlalchemy.dialects.postgresql import JSONB
from sqlalchemy.ext.hybrid import hybrid_property
...@@ -2,6 +2,8 @@ from django.template.loader import get_template ...@@ -2,6 +2,8 @@ from django.template.loader import get_template
from django.http import Http404, HttpResponse, HttpResponseRedirect, HttpResponseForbidden from django.http import Http404, HttpResponse, HttpResponseRedirect, HttpResponseForbidden
from django.shortcuts import render, redirect from django.shortcuts import render, redirect
from django import forms
from urllib.parse import quote_plus as urlencode from urllib.parse import quote_plus as urlencode
from gargantext import settings from gargantext import settings
......
This diff is collapsed.
from lxml import etree
from ._Parser import Parser
from datetime import datetime
from io import BytesIO
class PubmedParser(Parser):
def _parse(self, file):
# open the file as XML
xml_parser = etree.XMLParser(resolve_entities=False, recover=True)
if type(file) == bytes:
file = BytesIO(file)
xml = etree.parse(file, parser=xml_parser)
xml_articles = xml.findall('PubmedArticle')
# initialize the list of hyperdata
hyperdata_list = []
# parse all the articles, one by one
for xml_article in xml_articles:
# extract data from the document
hyperdata = {}
hyperdata_path = {
"journal" : 'MedlineCitation/Article/Journal/Title',
"title" : 'MedlineCitation/Article/ArticleTitle',
"abstract" : 'MedlineCitation/Article/Abstract/AbstractText',
"title2" : 'MedlineCitation/Article/VernacularTitle',
"language_iso3" : 'MedlineCitation/Article/Language',
"doi" : 'PubmedData/ArticleIdList/ArticleId[@type=doi]',
"realdate_full_" : 'MedlineCitation/Article/Journal/JournalIssue/PubDate/MedlineDate',
"realdate_year_" : 'MedlineCitation/Article/Journal/JournalIssue/PubDate/Year',
"realdate_month_" : 'MedlineCitation/Article/Journal/JournalIssue/PubDate/Month',
"realdate_day_" : 'MedlineCitation/Article/Journal/JournalIssue/PubDate/Day',
"publication_year" : 'MedlineCitation/DateCreated/Year',
"publication_month" : 'MedlineCitation/DateCreated/Month',
"publication_day" : 'MedlineCitation/DateCreated/Day',
"authors" : 'MedlineCitation/Article/AuthorList',
}
for key, path in hyperdata_path.items():
try:
xml_node = xml_article.find(path)
# Authors tag
if key == 'authors':
hyperdata[key] = ', '.join([
xml_author.find('ForeName').text + ' ' + xml_author.find('LastName').text
for xml_author in xml_node
])
else:
hyperdata[key] = xml_node.text
except:
pass
#Title-Decision
Title=""
if not hyperdata["title"] or hyperdata["title"]=="":
if "title2" in hyperdata:
hyperdata["title"] = hyperdata["title2"]
else: hyperdata["title"] = ""
# Date-Decision
# forge.iscpif.fr/issues/1418
RealDate = ""
if "realdate_full_" in hyperdata:
RealDate = hyperdata["realdate_full_"]
else:
if "realdate_year_" in hyperdata: RealDate+=hyperdata["realdate_year_"]
if "realdate_month_" in hyperdata: RealDate+=" "+hyperdata["realdate_month_"]
if "realdate_day_" in hyperdata: RealDate+=" "+hyperdata["realdate_day_"]
hyperdata["realdate_full_"] = RealDate
RealDate = RealDate.split("-")[0]
PubmedDate = ""
if "publication_year" in hyperdata: PubmedDate+=hyperdata["publication_year"]
if "publication_month" in hyperdata: PubmedDate+=" "+hyperdata["publication_month"]
if "publication_day" in hyperdata: PubmedDate+=" "+hyperdata["publication_day"]
Decision=""
if len(RealDate)>4:
if len(RealDate)>8:
try: Decision = datetime.strptime(RealDate, '%Y %b %d').date()
except:
try: Decision = datetime.strptime(PubmedDate, '%Y %m %d').date()
except: Decision=False
else:
try: Decision = datetime.strptime(RealDate, '%Y %b').date()
except:
try: Decision = datetime.strptime(PubmedDate, '%Y %m %d').date()
except: Decision=False
else:
try: Decision = datetime.strptime(PubmedDate, '%Y %m %d').date()
except: Decision=False
if Decision!=False:
if "publication_year" in hyperdata: hyperdata["publication_year"] = str(Decision.year)
if "publication_month" in hyperdata: hyperdata["publication_month"] = str(Decision.month)
if "publication_day" in hyperdata: hyperdata["publication_day"] = str(Decision.day)
if "realdate_year_" in hyperdata: hyperdata.pop("realdate_year_")
if "realdate_month_" in hyperdata: hyperdata.pop("realdate_month_")
if "realdate_day_" in hyperdata: hyperdata.pop("realdate_day_")
if "title2" in hyperdata: hyperdata.pop("title2")
hyperdata_list.append(hyperdata)
# return the list of hyperdata
return hyperdata_list
import collections
import datetime
import dateutil.parser
import zipfile
import re
DEFAULT_DATE = datetime.datetime(datetime.MINYEAR, 1, 1)
class Parser:
"""Base class for performing files parsing depending on their type.
"""
def __init__(self, language_cache=None):
self._languages_cache = LanguagesCache() if language_cache is None else language_cache
def detect_encoding(self, string):
"""Useful method to detect the encoding of a document.
"""
import chardet
encoding = chardet.detect(string)
return encoding.get('encoding', 'UTF-8')
def format_hyperdata_dates(self, hyperdata):
"""Format the dates found in the hyperdata.
Examples:
{"publication_date": "2014-10-23 09:57:42"}
-> {"publication_date": "2014-10-23 09:57:42", "publication_year": "2014", ...}
{"publication_year": "2014"}
-> {"publication_date": "2014-01-01 00:00:00", "publication_year": "2014", ...}
"""
# First, check the split dates...
# This part mainly deal with Zotero data but can be usefull for others
# parts
date_string = hyperdata.get('publication_date_to_parse', None)
if date_string is not None:
date_string = re.sub(r'\/\/+(\w*|\d*)', '', date_string)
#date_string = re.sub(r'undefined', '', date_string)
try:
hyperdata['publication' + "_date"] = dateutil.parser.parse(
date_string,
default=DEFAULT_DATE
).strftime("%Y-%m-%d %H:%M:%S")
except Exception as error:
print(error, 'Parser Zotero, Date not parsed for:', date_string)
hyperdata['publication_date'] = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
elif hyperdata.get('publication_year', None) is not None:
prefixes = [key[:-5] for key in hyperdata.keys() if key[-5:] == "_year"]
for prefix in prefixes:
date_string = hyperdata[prefix + "_year"]
key = prefix + "_month"
if key in hyperdata:
date_string += " " + hyperdata[key]
key = prefix + "_day"
if key in hyperdata:
date_string += " " + hyperdata[key]
key = prefix + "_hour"
if key in hyperdata:
date_string += " " + hyperdata[key]
key = prefix + "_minute"
if key in hyperdata:
date_string += ":" + hyperdata[key]
key = prefix + "_second"
if key in hyperdata:
date_string += ":" + hyperdata[key]
try:
hyperdata[prefix + "_date"] = dateutil.parser.parse(date_string).strftime("%Y-%m-%d %H:%M:%S")
except:
pass
else:
hyperdata['publication_date'] = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
# ...then parse all the "date" fields, to parse it into separate elements
prefixes = [key[:-5] for key in hyperdata.keys() if key[-5:] == "_date"]
for prefix in prefixes:
date = dateutil.parser.parse(hyperdata[prefix + "_date"])
#print(date)
hyperdata[prefix + "_year"] = date.strftime("%Y")
hyperdata[prefix + "_month"] = date.strftime("%m")
hyperdata[prefix + "_day"] = date.strftime("%d")
hyperdata[prefix + "_hour"] = date.strftime("%H")
hyperdata[prefix + "_minute"] = date.strftime("%M")
hyperdata[prefix + "_second"] = date.strftime("%S")
# finally, return the transformed result!
return hyperdata
print(hyperdata['publication_date'])
def format_hyperdata_languages(self, hyperdata):
"""format the languages found in the hyperdata."""
language = None
for key in ["fullname", "iso3", "iso2"]:
language_key = "language_" + key
if language_key in hyperdata:
language_symbol = hyperdata[language_key]
language = self._languages_cache[language_symbol]
if language:
break
if language:
hyperdata["language_iso2"] = language.iso2
hyperdata["language_iso3"] = language.iso3
hyperdata["language_fullname"] = language.fullname
return hyperdata
def format_hyperdata(self, hyperdata):
"""Format the hyperdata."""
hyperdata = self.format_hyperdata_dates(hyperdata)
hyperdata = self.format_hyperdata_languages(hyperdata)
return hyperdata
def _parse(self, file):
"""This method shall be overriden by inherited classes."""
return list()
def parse(self, file):
"""Parse the file, and its children files found in the file.
"""
# initialize the list of hyperdata
hyperdata_list = []
if zipfile.is_zipfile(file):
# if the file is a ZIP archive, recurse on each of its files...
zipArchive = zipfile.ZipFile(file)
for filename in zipArchive.namelist():
try:
f = zipArchive.open(filename, 'r')
hyperdata_list += self.parse(f)
f.close()
except Exception as error:
print(error)
# ...otherwise, let's parse it directly!
else:
try:
for hyperdata in self._parse(file):
hyperdata_list.append(self.format_hyperdata(hyperdata))
if hasattr(file, 'close'):
file.close()
except Exception as error:
print(error)
# return the list of formatted hyperdata
return hyperdata_list
# from .Ris import RisParser
# from .Isi import IsiParser
# from .Jstor import JstorParser
# from .Zotero import ZoteroParser
from .Pubmed import PubmedParser
# # 2015-12-08: parser 2 en 1
from .Europress import EuropressParser
# from .ISTex import ISTexParser
# from .CSV import CSVParser
...@@ -2,6 +2,7 @@ from gargantext.util.http import * ...@@ -2,6 +2,7 @@ from gargantext.util.http import *
from gargantext.util.db import * from gargantext.util.db import *
from gargantext.util.db_cache import cache from gargantext.util.db_cache import cache
from gargantext.models import * from gargantext.models import *
from gargantext.constants import *
from datetime import datetime from datetime import datetime
...@@ -14,22 +15,21 @@ def overview(request): ...@@ -14,22 +15,21 @@ def overview(request):
''' '''
user = cache.User[request.user.username] user = cache.User[request.user.username]
project_type = cache.NodeType['Project']
# If POST method, creates a new project... # If POST method, creates a new project...
if request.method == 'POST': if request.method == 'POST':
name = str(request.POST['name']) name = str(request.POST['name'])
if name != '': if name != '':
new_project = Node( new_project = Node(
name = name,
type_id = project_type.id,
user_id = user.id, user_id = user.id,
type = 'PROJECT',
name = name,
) )
session.add(new_project) session.add(new_project)
session.commit() session.commit()
# list of projects created by the logged user # list of projects created by the logged user
user_projects = user.get_nodes(nodetype=project_type) user_projects = user.get_nodes(type='PROJECT')
# list of contacts of the logged user # list of contacts of the logged user
contacts = user.get_contacts() contacts = user.get_contacts()
...@@ -38,7 +38,7 @@ def overview(request): ...@@ -38,7 +38,7 @@ def overview(request):
contact_projects = (session contact_projects = (session
.query(Node) .query(Node)
.filter(Node.user_id == contact.id) .filter(Node.user_id == contact.id)
.filter(Node.type_id == project_type.id) .filter(Node.type == 'PROJECT')
.order_by(Node.date) .order_by(Node.date)
).all() ).all()
contacts_projects += contact_projects contacts_projects += contact_projects
...@@ -60,19 +60,37 @@ def overview(request): ...@@ -60,19 +60,37 @@ def overview(request):
) )
from django.utils.translation import ugettext_lazy
class NewCorpusForm(forms.Form):
type = forms.ChoiceField(
choices = enumerate(resourcetype['name'] for resourcetype in RESOURCETYPES),
widget = forms.Select(attrs={'onchange':'CustomForSelect( $("option:selected", this).text() );'})
)
name = forms.CharField( label='Name', max_length=199 , widget=forms.TextInput(attrs={ 'required': 'true' }))
file = forms.FileField()
def clean_file(self):
file_ = self.cleaned_data.get('file')
if len(file_) > 1024 ** 3: # we don't accept more than 1GB
raise forms.ValidationError(ugettext_lazy('File too heavy! (>1GB).'))
return file_
@requires_auth @requires_auth
def project(request, project_id): def project(request, project_id):
project = session.query(Node).filter(project_id == project_id).first()
return render( return render(
template_name = 'pages/projects/project.html', template_name = 'pages/projects/project.html',
request = request, request = request,
context = { context = {
# 'debug': settings.DEBUG, 'form': NewCorpusForm,
# 'date': datetime.now(), 'user': request.user,
# # projects owned by the user 'date': datetime.now(),
# 'number': len(user_projects), 'project': project,
# 'projects': user_projects, 'donut': donut,
# # projects owned by the user's contacts # 'list_corpora' : dict(corpora_by_resourcetype),
# 'common_users': contacts if len(contacts) else False, 'whitelists': [],
# 'common_projects': contacts_projects if len(contacts_projects) else False, 'blacklists': [],
'cooclists': [],
# 'number' : corpora_count,
# 'query_size' : QUERY_SIZE_N_DEFAULT,
}, },
) )
.morris-hover{position:absolute;z-index:1000;}.morris-hover.morris-default-style{border-radius:10px;padding:6px;color:#666;background:rgba(255, 255, 255, 0.8);border:solid 2px rgba(230, 230, 230, 0.8);font-family:sans-serif;font-size:12px;text-align:center;}.morris-hover.morris-default-style .morris-hover-row-label{font-weight:bold;margin:0.25em 0;}
.morris-hover.morris-default-style .morris-hover-point{white-space:nowrap;margin:0.1em 0;}
This diff is collapsed.
...@@ -3,14 +3,14 @@ ...@@ -3,14 +3,14 @@
{% block css %} {% block css %}
{% load staticfiles %} {% load staticfiles %}
<link rel="stylesheet" href="{% static "css/bootstrap.css" %}"> <link rel="stylesheet" href="{% static "css/bootstrap.css" %}">
<link rel="stylesheet" type="text/css" href="{% static "css/morris.css" %}">
<link rel="stylesheet" type="text/css" href="{% static "css/jquery.easy-pie-chart.css"%}">
<script type="text/javascript" src="{% static "js/jquery/jquery.min.js" %}"></script> <script type="text/javascript" src="{% static "js/jquery/jquery.min.js" %}"></script>
<link rel="stylesheet" href="http://code.jquery.com/ui/1.11.2/themes/smoothness/jquery-ui.css">
<script type="text/javascript" src="{% static "js/morris.min.js" %}"></script>
<script type="text/javascript" src="{% static "js/morris.min.js" %}"></script>
<link rel="stylesheet" href="{% static "css/morris.css" %}">
<script src="{% static "js/raphael-min.js"%}"></script> <script src="{% static "js/raphael-min.js"%}"></script>
<script src="{% static "js/morris.min.js"%}"></script>
<link rel="stylesheet" href="http://code.jquery.com/ui/1.11.2/themes/smoothness/jquery-ui.css">
<style type="text/css"> <style type="text/css">
.ui-autocomplete { .ui-autocomplete {
z-index: 5000; z-index: 5000;
...@@ -182,7 +182,7 @@ ...@@ -182,7 +182,7 @@
<td> <td>
{{ field.errors }} {{ field.errors }}
{{ field }} {{ field }}
{% if field.name == "name" %} {% if field.name == 'name' %}
<span onclick="getGlobalResults(this);" id="scanpubmed"></span> <span onclick="getGlobalResults(this);" id="scanpubmed"></span>
<div id="theresults"></div> <div id="theresults"></div>
{% endif %} {% endif %}
...@@ -437,9 +437,12 @@ ...@@ -437,9 +437,12 @@
//CSS events for changing the Select element //CSS events for changing the Select element
function CustomForSelect( selected ) { function CustomForSelect( selected ) {
// show Radio-Inputs and trigger FileOrNotFile>@upload-file events // show Radio-Inputs and trigger FileOrNotFile>@upload-file events
if(selected=="Pubmed (xml format)" || selected=="ISTex") { selected = selected.toLowerCase()
var is_pubmed = (selected.indexOf('pubmed') != -1);
var is_istex = (selected.indexOf('istex') != -1);
if (is_pubmed || is_istex) {
// if(selected=="pubmed") { // if(selected=="pubmed") {
console.log("show the button for: "+selected) console.log("show the button for: " + selected)
$("#pubmedcrawl").css("visibility", "visible"); $("#pubmedcrawl").css("visibility", "visible");
$("#pubmedcrawl").show(); $("#pubmedcrawl").show();
$("#file_yes").click(); $("#file_yes").click();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment