Commit 204bfc6d authored by Mathieu Rodic's avatar Mathieu Rodic

[CODE] removed the `NodeType` model

see hardcoded constans in `gargantext/constants.py`
(also, kept working on the single project page)
parent bf8001dc
NODETYPES = [
None,
'USER',
'PROJECT',
'CORPUS',
'DOCUMENT',
]
LANGUAGES = {
# 'fr': {
# 'tagger': FrenchNgramsTagger
# }
}
from gargantext.util.parsers import *
RESOURCETYPES = [
# { 'name': 'CSV',
# # 'parser': CSVParser,
# 'default_language': 'en',
# },
{ 'name': 'Europress (English)',
'parser': EuropressParser,
'default_language': 'en',
},
{ 'name': 'Europress (French)',
# 'parser': EuropressParser,
'default_language': 'fr',
},
# { 'name': 'ISTex',
# # 'parser': ISTexParser,
# 'default_language': 'en',
# },
{ 'name': 'Jstor (RIS format)',
# 'parser': RISParser,
'default_language': 'en',
},
{ 'name': 'Pubmed (XML format)',
'parser': PubmedParser,
'default_language': 'en',
},
{ 'name': 'Scopus (RIS format)',
# 'parser': RISParser,
'default_language': 'en',
},
{ 'name': 'Web of Science (ISI format)',
# 'parser': ISIParser,
'default_language': 'fr',
},
{ 'name': 'Zotero (RIS format)',
# 'parser': RISParser,
'default_language': 'en',
},
]
from gargantext.util.db import *
from gargantext.constants import *
from datetime import datetime
from .users import User
__all__ = ['NodeType', 'Node']
__all__ = ['Node']
class NodeType(Base):
__tablename__ = 'nodetypes'
id = Column(Integer, primary_key=True)
name = Column(String(255), unique=True)
class NodeType(TypeDecorator):
"""Define a new type of column to describe a Node's type.
This column type is implemented as an SQL integer.
Values are detailed in `gargantext.constants.NODETYPES`.
"""
impl = Integer
def process_bind_param(self, typename, dialect):
return NODETYPES.index(typename)
def process_result_value(self, typeindex, dialect):
return NODETYPES[typeindex]
class Node(Base):
__tablename__ = 'nodes'
id = Column(Integer, primary_key=True)
# foreign keys
type = Column(NodeType, index=True)
user_id = Column(Integer, ForeignKey(User.id))
type_id = Column(Integer, ForeignKey(NodeType.id))
# main data
name = Column(String(255), unique=True)
date = Column(DateTime(), default=datetime.now)
......
......@@ -32,7 +32,7 @@ class User(Base):
)
return query.all()
def get_nodes(self, nodetype=None):
def get_nodes(self, type=None):
"""get all nodes belonging to the user"""
# ↓ this below is a workaround because of Python's lame import system
from .nodes import Node
......@@ -41,8 +41,8 @@ class User(Base):
.filter(Node.user_id == self.id)
.order_by(Node.date)
)
if nodetype is not None:
query = query.filter(Node.type_id == nodetype.id)
if type is not None:
query = query.filter(Node.type == type)
return query.all()
def owns(user, node):
......
......@@ -22,7 +22,8 @@ session = scoped_session(sessionmaker(bind=engine))
# tools to build models
from sqlalchemy.orm import aliased
from sqlalchemy.orm import aliased, synonym
from sqlalchemy.types import *
from sqlalchemy.schema import Column, ForeignKey, UniqueConstraint
from sqlalchemy.dialects.postgresql import JSONB
from sqlalchemy.ext.hybrid import hybrid_property
......@@ -2,6 +2,8 @@ from django.template.loader import get_template
from django.http import Http404, HttpResponse, HttpResponseRedirect, HttpResponseForbidden
from django.shortcuts import render, redirect
from django import forms
from urllib.parse import quote_plus as urlencode
from gargantext import settings
......
This diff is collapsed.
from lxml import etree
from ._Parser import Parser
from datetime import datetime
from io import BytesIO
class PubmedParser(Parser):
def _parse(self, file):
# open the file as XML
xml_parser = etree.XMLParser(resolve_entities=False, recover=True)
if type(file) == bytes:
file = BytesIO(file)
xml = etree.parse(file, parser=xml_parser)
xml_articles = xml.findall('PubmedArticle')
# initialize the list of hyperdata
hyperdata_list = []
# parse all the articles, one by one
for xml_article in xml_articles:
# extract data from the document
hyperdata = {}
hyperdata_path = {
"journal" : 'MedlineCitation/Article/Journal/Title',
"title" : 'MedlineCitation/Article/ArticleTitle',
"abstract" : 'MedlineCitation/Article/Abstract/AbstractText',
"title2" : 'MedlineCitation/Article/VernacularTitle',
"language_iso3" : 'MedlineCitation/Article/Language',
"doi" : 'PubmedData/ArticleIdList/ArticleId[@type=doi]',
"realdate_full_" : 'MedlineCitation/Article/Journal/JournalIssue/PubDate/MedlineDate',
"realdate_year_" : 'MedlineCitation/Article/Journal/JournalIssue/PubDate/Year',
"realdate_month_" : 'MedlineCitation/Article/Journal/JournalIssue/PubDate/Month',
"realdate_day_" : 'MedlineCitation/Article/Journal/JournalIssue/PubDate/Day',
"publication_year" : 'MedlineCitation/DateCreated/Year',
"publication_month" : 'MedlineCitation/DateCreated/Month',
"publication_day" : 'MedlineCitation/DateCreated/Day',
"authors" : 'MedlineCitation/Article/AuthorList',
}
for key, path in hyperdata_path.items():
try:
xml_node = xml_article.find(path)
# Authors tag
if key == 'authors':
hyperdata[key] = ', '.join([
xml_author.find('ForeName').text + ' ' + xml_author.find('LastName').text
for xml_author in xml_node
])
else:
hyperdata[key] = xml_node.text
except:
pass
#Title-Decision
Title=""
if not hyperdata["title"] or hyperdata["title"]=="":
if "title2" in hyperdata:
hyperdata["title"] = hyperdata["title2"]
else: hyperdata["title"] = ""
# Date-Decision
# forge.iscpif.fr/issues/1418
RealDate = ""
if "realdate_full_" in hyperdata:
RealDate = hyperdata["realdate_full_"]
else:
if "realdate_year_" in hyperdata: RealDate+=hyperdata["realdate_year_"]
if "realdate_month_" in hyperdata: RealDate+=" "+hyperdata["realdate_month_"]
if "realdate_day_" in hyperdata: RealDate+=" "+hyperdata["realdate_day_"]
hyperdata["realdate_full_"] = RealDate
RealDate = RealDate.split("-")[0]
PubmedDate = ""
if "publication_year" in hyperdata: PubmedDate+=hyperdata["publication_year"]
if "publication_month" in hyperdata: PubmedDate+=" "+hyperdata["publication_month"]
if "publication_day" in hyperdata: PubmedDate+=" "+hyperdata["publication_day"]
Decision=""
if len(RealDate)>4:
if len(RealDate)>8:
try: Decision = datetime.strptime(RealDate, '%Y %b %d').date()
except:
try: Decision = datetime.strptime(PubmedDate, '%Y %m %d').date()
except: Decision=False
else:
try: Decision = datetime.strptime(RealDate, '%Y %b').date()
except:
try: Decision = datetime.strptime(PubmedDate, '%Y %m %d').date()
except: Decision=False
else:
try: Decision = datetime.strptime(PubmedDate, '%Y %m %d').date()
except: Decision=False
if Decision!=False:
if "publication_year" in hyperdata: hyperdata["publication_year"] = str(Decision.year)
if "publication_month" in hyperdata: hyperdata["publication_month"] = str(Decision.month)
if "publication_day" in hyperdata: hyperdata["publication_day"] = str(Decision.day)
if "realdate_year_" in hyperdata: hyperdata.pop("realdate_year_")
if "realdate_month_" in hyperdata: hyperdata.pop("realdate_month_")
if "realdate_day_" in hyperdata: hyperdata.pop("realdate_day_")
if "title2" in hyperdata: hyperdata.pop("title2")
hyperdata_list.append(hyperdata)
# return the list of hyperdata
return hyperdata_list
import collections
import datetime
import dateutil.parser
import zipfile
import re
DEFAULT_DATE = datetime.datetime(datetime.MINYEAR, 1, 1)
class Parser:
"""Base class for performing files parsing depending on their type.
"""
def __init__(self, language_cache=None):
self._languages_cache = LanguagesCache() if language_cache is None else language_cache
def detect_encoding(self, string):
"""Useful method to detect the encoding of a document.
"""
import chardet
encoding = chardet.detect(string)
return encoding.get('encoding', 'UTF-8')
def format_hyperdata_dates(self, hyperdata):
"""Format the dates found in the hyperdata.
Examples:
{"publication_date": "2014-10-23 09:57:42"}
-> {"publication_date": "2014-10-23 09:57:42", "publication_year": "2014", ...}
{"publication_year": "2014"}
-> {"publication_date": "2014-01-01 00:00:00", "publication_year": "2014", ...}
"""
# First, check the split dates...
# This part mainly deal with Zotero data but can be usefull for others
# parts
date_string = hyperdata.get('publication_date_to_parse', None)
if date_string is not None:
date_string = re.sub(r'\/\/+(\w*|\d*)', '', date_string)
#date_string = re.sub(r'undefined', '', date_string)
try:
hyperdata['publication' + "_date"] = dateutil.parser.parse(
date_string,
default=DEFAULT_DATE
).strftime("%Y-%m-%d %H:%M:%S")
except Exception as error:
print(error, 'Parser Zotero, Date not parsed for:', date_string)
hyperdata['publication_date'] = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
elif hyperdata.get('publication_year', None) is not None:
prefixes = [key[:-5] for key in hyperdata.keys() if key[-5:] == "_year"]
for prefix in prefixes:
date_string = hyperdata[prefix + "_year"]
key = prefix + "_month"
if key in hyperdata:
date_string += " " + hyperdata[key]
key = prefix + "_day"
if key in hyperdata:
date_string += " " + hyperdata[key]
key = prefix + "_hour"
if key in hyperdata:
date_string += " " + hyperdata[key]
key = prefix + "_minute"
if key in hyperdata:
date_string += ":" + hyperdata[key]
key = prefix + "_second"
if key in hyperdata:
date_string += ":" + hyperdata[key]
try:
hyperdata[prefix + "_date"] = dateutil.parser.parse(date_string).strftime("%Y-%m-%d %H:%M:%S")
except:
pass
else:
hyperdata['publication_date'] = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
# ...then parse all the "date" fields, to parse it into separate elements
prefixes = [key[:-5] for key in hyperdata.keys() if key[-5:] == "_date"]
for prefix in prefixes:
date = dateutil.parser.parse(hyperdata[prefix + "_date"])
#print(date)
hyperdata[prefix + "_year"] = date.strftime("%Y")
hyperdata[prefix + "_month"] = date.strftime("%m")
hyperdata[prefix + "_day"] = date.strftime("%d")
hyperdata[prefix + "_hour"] = date.strftime("%H")
hyperdata[prefix + "_minute"] = date.strftime("%M")
hyperdata[prefix + "_second"] = date.strftime("%S")
# finally, return the transformed result!
return hyperdata
print(hyperdata['publication_date'])
def format_hyperdata_languages(self, hyperdata):
"""format the languages found in the hyperdata."""
language = None
for key in ["fullname", "iso3", "iso2"]:
language_key = "language_" + key
if language_key in hyperdata:
language_symbol = hyperdata[language_key]
language = self._languages_cache[language_symbol]
if language:
break
if language:
hyperdata["language_iso2"] = language.iso2
hyperdata["language_iso3"] = language.iso3
hyperdata["language_fullname"] = language.fullname
return hyperdata
def format_hyperdata(self, hyperdata):
"""Format the hyperdata."""
hyperdata = self.format_hyperdata_dates(hyperdata)
hyperdata = self.format_hyperdata_languages(hyperdata)
return hyperdata
def _parse(self, file):
"""This method shall be overriden by inherited classes."""
return list()
def parse(self, file):
"""Parse the file, and its children files found in the file.
"""
# initialize the list of hyperdata
hyperdata_list = []
if zipfile.is_zipfile(file):
# if the file is a ZIP archive, recurse on each of its files...
zipArchive = zipfile.ZipFile(file)
for filename in zipArchive.namelist():
try:
f = zipArchive.open(filename, 'r')
hyperdata_list += self.parse(f)
f.close()
except Exception as error:
print(error)
# ...otherwise, let's parse it directly!
else:
try:
for hyperdata in self._parse(file):
hyperdata_list.append(self.format_hyperdata(hyperdata))
if hasattr(file, 'close'):
file.close()
except Exception as error:
print(error)
# return the list of formatted hyperdata
return hyperdata_list
# from .Ris import RisParser
# from .Isi import IsiParser
# from .Jstor import JstorParser
# from .Zotero import ZoteroParser
from .Pubmed import PubmedParser
# # 2015-12-08: parser 2 en 1
from .Europress import EuropressParser
# from .ISTex import ISTexParser
# from .CSV import CSVParser
......@@ -2,6 +2,7 @@ from gargantext.util.http import *
from gargantext.util.db import *
from gargantext.util.db_cache import cache
from gargantext.models import *
from gargantext.constants import *
from datetime import datetime
......@@ -14,22 +15,21 @@ def overview(request):
'''
user = cache.User[request.user.username]
project_type = cache.NodeType['Project']
# If POST method, creates a new project...
if request.method == 'POST':
name = str(request.POST['name'])
if name != '':
new_project = Node(
name = name,
type_id = project_type.id,
user_id = user.id,
type = 'PROJECT',
name = name,
)
session.add(new_project)
session.commit()
# list of projects created by the logged user
user_projects = user.get_nodes(nodetype=project_type)
user_projects = user.get_nodes(type='PROJECT')
# list of contacts of the logged user
contacts = user.get_contacts()
......@@ -38,7 +38,7 @@ def overview(request):
contact_projects = (session
.query(Node)
.filter(Node.user_id == contact.id)
.filter(Node.type_id == project_type.id)
.filter(Node.type == 'PROJECT')
.order_by(Node.date)
).all()
contacts_projects += contact_projects
......@@ -60,19 +60,37 @@ def overview(request):
)
from django.utils.translation import ugettext_lazy
class NewCorpusForm(forms.Form):
type = forms.ChoiceField(
choices = enumerate(resourcetype['name'] for resourcetype in RESOURCETYPES),
widget = forms.Select(attrs={'onchange':'CustomForSelect( $("option:selected", this).text() );'})
)
name = forms.CharField( label='Name', max_length=199 , widget=forms.TextInput(attrs={ 'required': 'true' }))
file = forms.FileField()
def clean_file(self):
file_ = self.cleaned_data.get('file')
if len(file_) > 1024 ** 3: # we don't accept more than 1GB
raise forms.ValidationError(ugettext_lazy('File too heavy! (>1GB).'))
return file_
@requires_auth
def project(request, project_id):
project = session.query(Node).filter(project_id == project_id).first()
return render(
template_name = 'pages/projects/project.html',
request = request,
context = {
# 'debug': settings.DEBUG,
# 'date': datetime.now(),
# # projects owned by the user
# 'number': len(user_projects),
# 'projects': user_projects,
# # projects owned by the user's contacts
# 'common_users': contacts if len(contacts) else False,
# 'common_projects': contacts_projects if len(contacts_projects) else False,
'form': NewCorpusForm,
'user': request.user,
'date': datetime.now(),
'project': project,
'donut': donut,
# 'list_corpora' : dict(corpora_by_resourcetype),
'whitelists': [],
'blacklists': [],
'cooclists': [],
# 'number' : corpora_count,
# 'query_size' : QUERY_SIZE_N_DEFAULT,
},
)
.morris-hover{position:absolute;z-index:1000;}.morris-hover.morris-default-style{border-radius:10px;padding:6px;color:#666;background:rgba(255, 255, 255, 0.8);border:solid 2px rgba(230, 230, 230, 0.8);font-family:sans-serif;font-size:12px;text-align:center;}.morris-hover.morris-default-style .morris-hover-row-label{font-weight:bold;margin:0.25em 0;}
.morris-hover.morris-default-style .morris-hover-point{white-space:nowrap;margin:0.1em 0;}
This diff is collapsed.
......@@ -3,14 +3,14 @@
{% block css %}
{% load staticfiles %}
<link rel="stylesheet" href="{% static "css/bootstrap.css" %}">
<link rel="stylesheet" type="text/css" href="{% static "css/morris.css" %}">
<link rel="stylesheet" type="text/css" href="{% static "css/jquery.easy-pie-chart.css"%}">
<script type="text/javascript" src="{% static "js/jquery/jquery.min.js" %}"></script>
<link rel="stylesheet" href="http://code.jquery.com/ui/1.11.2/themes/smoothness/jquery-ui.css">
<script type="text/javascript" src="{% static "js/morris.min.js" %}"></script>
<script type="text/javascript" src="{% static "js/morris.min.js" %}"></script>
<link rel="stylesheet" href="{% static "css/morris.css" %}">
<script src="{% static "js/raphael-min.js"%}"></script>
<script src="{% static "js/morris.min.js"%}"></script>
<link rel="stylesheet" href="http://code.jquery.com/ui/1.11.2/themes/smoothness/jquery-ui.css">
<style type="text/css">
.ui-autocomplete {
z-index: 5000;
......@@ -182,7 +182,7 @@
<td>
{{ field.errors }}
{{ field }}
{% if field.name == "name" %}
{% if field.name == 'name' %}
<span onclick="getGlobalResults(this);" id="scanpubmed"></span>
<div id="theresults"></div>
{% endif %}
......@@ -437,9 +437,12 @@
//CSS events for changing the Select element
function CustomForSelect( selected ) {
// show Radio-Inputs and trigger FileOrNotFile>@upload-file events
if(selected=="Pubmed (xml format)" || selected=="ISTex") {
selected = selected.toLowerCase()
var is_pubmed = (selected.indexOf('pubmed') != -1);
var is_istex = (selected.indexOf('istex') != -1);
if (is_pubmed || is_istex) {
// if(selected=="pubmed") {
console.log("show the button for: "+selected)
console.log("show the button for: " + selected)
$("#pubmedcrawl").css("visibility", "visible");
$("#pubmedcrawl").show();
$("#file_yes").click();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment