Commit 9d3682eb authored by Mathieu Rodic's avatar Mathieu Rodic

Merge branch 'master' of ssh://delanoe.org:1979/gargantext into mat-master

parents e80fdd78 d18065e7
...@@ -249,9 +249,10 @@ def tfidf(corpus, document, ngram): ...@@ -249,9 +249,10 @@ def tfidf(corpus, document, ngram):
xx = Node.objects.filter(parent=corpus, type=NodeType.objects.get(name="Document")).count() xx = Node.objects.filter(parent=corpus, type=NodeType.objects.get(name="Document")).count()
yy = Node_Ngram.objects.filter(ngram=ngram).count() yy = Node_Ngram.objects.filter(ngram=ngram).count()
idf= log(xx/yy) inverse_d_frequency= log(xx/yy)
result = tf * idf # result = tf * idf
result = term_frequency * inverse_d_frequency
except Exception as error: except Exception as error:
print(error) print(error)
result = 0 result = 0
......
from node.models import Node, NodeType, Node_Resource,\
Project, Corpus, Document,\
Ngram, Node_Ngram, NodeNgramNgram, NodeNodeNgram,\
User, Language, ResourceType, Resource
from math import log
# - tfidf / corpus , type de corpus, tous corpus
# - tfidf / échelle de temps
# - tfidf / sources, auteurs etc.
# => liste de listes
def tfidf(corpus, document, ngram):
try:
x = Node_Ngram.objects.get(node=document, ngram=ngram).weight
y = Node_Ngram.objects.filter(node=document).count()
tf = x/y
xx = Node.objects.filter(parent=corpus, type=NodeType.objects.get(name="Document")).count()
yy = Node_Ngram.objects.filter(ngram=ngram).count()
idf= log(xx/yy)
result = tf * idf
except Exception as error:
print(error)
result = 0
return result
def do_tfidf(corpus, reset=True):
with transaction.atomic():
if reset==True:
NodeNodeNgram.objects.filter(nodex=corpus).delete()
if isinstance(corpus, Node) and corpus.type.name == "Corpus":
for document in Node.objects.filter(parent=corpus, type=NodeType.objects.get(name="Document")):
for node_ngram in Node_Ngram.objects.filter(node=document):
try:
nnn = NodeNodeNgram.objects.get(nodex=corpus, nodey=document, ngram=node_ngram.ngram)
except:
score = tfidf(corpus, document, node_ngram.ngram)
nnn = NodeNodeNgram(nodex=corpus, nodey=node_ngram.node, ngram=node_ngram.ngram, score=score)
nnn.save()
else:
print("Only implemented for corpus yet, whereas you put:", type(corpus))
...@@ -17,36 +17,43 @@ urlpatterns = patterns('', ...@@ -17,36 +17,43 @@ urlpatterns = patterns('',
url(r'^login/', include(admin.site.urls)), url(r'^login/', include(admin.site.urls)),
url(r'^grappelli/', include('grappelli.urls')), url(r'^grappelli/', include('grappelli.urls')),
# User views # User Home view
url(r'^$', views.home), url(r'^$', views.home),
# Project Management
url(r'^projects/$', views.projects), url(r'^projects/$', views.projects),
url(r'^project/(\d+)/delete/$', views.delete_project), url(r'^project/(\d+)/delete/$', views.delete_project),
url(r'^project/(\d+)/$', views.project), url(r'^project/(\d+)/$', views.project),
# Corpus management
url(r'^project/(\d+)/corpus/(\d+)/$', views.corpus), url(r'^project/(\d+)/corpus/(\d+)/$', views.corpus),
url(r'^project/(\d+)/corpus/(\d+)/delete/$', views.delete_corpus), url(r'^project/(\d+)/corpus/(\d+)/delete/$', views.delete_corpus),
url(r'^project/(\d+)/corpus/(\d+)/corpus.csv$', views.corpus_csv),
url(r'^project/(\d+)/corpus/(\d+)/timerange/(\d+)/(\d+)$', views.subcorpus),
# Visualizations # Visualizations
url(r'^corpus/(\d+)/explorer$', views.explorer_graph), url(r'^project/(\d+)/corpus/(\d+)/chart$', views.chart),
url(r'^corpus/(\d+)/matrix$', views.explorer_matrix), url(r'^corpus/(\d+)/explorer$', views.graph),
url(r'^corpus/(\d+)/matrix$', views.matrix),
# Getting data # Data management
url(r'^chart/corpus/(\d+)/data.csv$', views.send_csv), url(r'^chart/corpus/(\d+)/data.csv$', views.send_csv),
url(r'^corpus/(\d+)/node_link.json$', views.node_link), url(r'^corpus/(\d+)/node_link.json$', views.node_link),
url(r'^corpus/(\d+)/adjacency.json$', views.adjacency), url(r'^corpus/(\d+)/adjacency.json$', views.adjacency),
url(r'^api/tfidf/(\d+)/(\w+)$', views.tfidf), url(r'^api/tfidf/(\d+)/(\w+)$', views.tfidf),
# Data management
url(r'^api$', gargantext_web.api.Root), url(r'^api$', gargantext_web.api.Root),
url(r'^api/nodes/(\d+)/children/metadata$', gargantext_web.api.NodesChildrenMetatadata.as_view()), url(r'^api/nodes/(\d+)/children/metadata$', gargantext_web.api.NodesChildrenMetatadata.as_view()),
url(r'^api/nodes/(\d+)/children/queries$', gargantext_web.api.NodesChildrenQueries.as_view()), url(r'^api/nodes/(\d+)/children/queries$', gargantext_web.api.NodesChildrenQueries.as_view()),
url(r'^api/nodes/(\d+)$', gargantext_web.api.Nodes.as_view()), url(r'^api/nodes/(\d+)$', gargantext_web.api.Nodes.as_view()),
url(r'^api/nodes$', gargantext_web.api.NodesList.as_view()), url(r'^api/nodes$', gargantext_web.api.NodesList.as_view()),
url(r'^api/project/(\d+)/corpus/(\d+)/timerange/(\d+)/(\d+)$', views.subcorpusJSON),
url(r'^api/nodes/(\d+)/ngrams$', gargantext_web.api.CorpusController.ngrams), url(r'^api/nodes/(\d+)/ngrams$', gargantext_web.api.CorpusController.ngrams),
url(r'^graph-it$', views.graph_it),
url(r'^ngrams$', views.ngrams), url(r'^ngrams$', views.ngrams),
url(r'^nodeinfo/(\d+)$', views.nodeinfo), url(r'^nodeinfo/(\d+)$', views.nodeinfo),
url(r'^tests/mvc$', views.tests_mvc), url(r'^tests/mvc$', views.tests_mvc),
......
...@@ -285,8 +285,8 @@ def corpus(request, project_id, corpus_id): ...@@ -285,8 +285,8 @@ def corpus(request, project_id, corpus_id):
project = Node.objects.get(id=project_id) project = Node.objects.get(id=project_id)
corpus = Node.objects.get(id=corpus_id) corpus = Node.objects.get(id=corpus_id)
#documents = corpus.children.all() type_doc = NodeType.objects.get(name="Document")
#number = corpus.children.count() number = Node.objects.filter(parent=corpus, type=type_doc).count()
# try: # try:
# sources = defaultdict(int) # sources = defaultdict(int)
...@@ -357,12 +357,171 @@ def corpus(request, project_id, corpus_id): ...@@ -357,12 +357,171 @@ def corpus(request, project_id, corpus_id):
'project': project,\ 'project': project,\
'corpus' : corpus,\ 'corpus' : corpus,\
'documents': documents,\ 'documents': documents,\
# 'number' : number,\ 'number' : number,\
'dates' : chart,\ 'dates' : chart,\
})) }))
return HttpResponse(html) return HttpResponse(html)
from django.core.paginator import Paginator, EmptyPage, PageNotAnInteger
def subcorpus(request, project_id, corpus_id, start , end ):
if not request.user.is_authenticated():
return redirect('/login/?next=%s' % request.path)
try:
offset = str(project_id)
offset = str(corpus_id)
offset = str(start)
offset = str(end)
except ValueError:
raise Http404()
# parameters received via web. Format = (yearmonthday = 20150106 = 06 jan 2015)
import datetime
dateini = datetime.datetime.strptime(str(start), '%Y%m%d').date()
datefin = datetime.datetime.strptime(str(end), '%Y%m%d').date()
t = get_template('subcorpus.html')
user = request.user
date = datetime.datetime.now()
project = Node.objects.get(id=project_id)
corpus = Node.objects.get(id=corpus_id)
# retrieving all the documents
documents = corpus.children.all()
number = corpus.children.count()
filtered_docs = []
# filtering documents by range-date
for doc in documents:
if "publication_date" in doc.metadata:
realdate = doc.metadata["publication_date"].split(" ")[0] # in database is = (year-month-day = 2015-01-06 00:00:00 = 06 jan 2015 00 hrs)
realdate = datetime.datetime.strptime(str(realdate), '%Y-%m-%d').date() # finalform = (yearmonthday = 20150106 = 06 jan 2015)
if dateini <= realdate <= datefin:
doc.date = realdate
filtered_docs.append(doc)
# ordering from most recent to the older.
ordered = sorted(filtered_docs, key=lambda x: x.date, reverse=True)
# pages of 10 elements. Like a sir.
paginator = Paginator(ordered, 10)
page = request.GET.get('page')
try:
results = paginator.page(page)
except PageNotAnInteger:
# If page is not an integer, deliver first page.
results = paginator.page(1)
except EmptyPage:
# If page is out of range (e.g. 9999), deliver last page of results.
results = paginator.page(paginator.num_pages)
html = t.render(Context({\
'user': user,\
'date': date,\
'project': project,\
'corpus' : corpus,\
'documents': results,\
# 'number' : len(filtered_docs),\
# 'dates' : chart,\
}))
return HttpResponse(html)
import json
def subcorpusJSON(request, project_id, corpus_id, start , end ):
if not request.user.is_authenticated():
return redirect('/login/?next=%s' % request.path)
try:
offset = str(project_id)
offset = str(corpus_id)
offset = str(start)
offset = str(end)
except ValueError:
raise Http404()
# parameters received via web. Format = (yearmonthday = 20150106 = 06 jan 2015)
import datetime
dateini = datetime.datetime.strptime(str(start), '%Y%m%d').date()
datefin = datetime.datetime.strptime(str(end), '%Y%m%d').date()
t = get_template('subcorpus.html')
user = request.user
date = datetime.datetime.now()
project = Node.objects.get(id=project_id)
corpus = Node.objects.get(id=corpus_id)
# retrieving all the documents
documents = corpus.children.all()
number = corpus.children.count()
filtered_docs = []
# filtering documents by range-date
for doc in documents:
if "publication_date" in doc.metadata:
realdate = doc.metadata["publication_date"].split(" ")[0] # in database is = (year-month-day = 2015-01-06 00:00:00 = 06 jan 2015 00 hrs)
realdate = datetime.datetime.strptime(str(realdate), '%Y-%m-%d').date() # finalform = (yearmonthday = 20150106 = 06 jan 2015)
if dateini <= realdate <= datefin:
doc.date = realdate
filtered_docs.append(doc)
# ordering from most recent to the older.
ordered = sorted(filtered_docs, key=lambda x: x.date, reverse=True)
# pages of 10 elements. Like a sir.
paginator = Paginator(ordered, 10)
page = request.GET.get('page')
try:
results = paginator.page(page)
except PageNotAnInteger:
# If page is not an integer, deliver first page.
results = paginator.page(1)
except EmptyPage:
# If page is out of range (e.g. 9999), deliver last page of results.
results = paginator.page(paginator.num_pages)
from rest_framework.pagination import PaginationSerializer
serializer = PaginationSerializer(instance=results)
print(serializer.data)
html = t.render(Context({\
'user': user,\
'date': date,\
'corpus': corpus,\
}))
# return HttpResponse(html)
return HttpResponse( serializer.data , content_type='application/json')
# for pagexample.html
from django.core.paginator import Paginator, InvalidPage, EmptyPage
def get_pagination_page(page=1):
items = range(0, 100)
paginator = Paginator(items, 10)
try:
page = int(page)
except ValueError:
page = 1
try:
items = paginator.page(page)
except (EmptyPage, InvalidPage):
items = paginator.page(paginator.num_pages)
return items
def delete_project(request, node_id): def delete_project(request, node_id):
Node.objects.filter(id=node_id).all().delete() Node.objects.filter(id=node_id).all().delete()
return HttpResponseRedirect('/projects/') return HttpResponseRedirect('/projects/')
...@@ -371,8 +530,22 @@ def delete_corpus(request, project_id, corpus_id): ...@@ -371,8 +530,22 @@ def delete_corpus(request, project_id, corpus_id):
Node.objects.filter(id=corpus_id).all().delete() Node.objects.filter(id=corpus_id).all().delete()
return HttpResponseRedirect('/project/' + project_id) return HttpResponseRedirect('/project/' + project_id)
def explorer_graph(request, corpus_id):
t = get_template('explorer.html') def chart(request, project_id, corpus_id):
''' Charts to compare, filter, count'''
t = get_template('chart.html')
user = request.user
date = datetime.datetime.now()
project = Node.objects.get(id=project_id)
html = t.render(Context({
'user': user,
'date': date,
'project' : project,
}))
return HttpResponse(html)
def matrix(request, corpus_id):
t = get_template('matrix.html')
user = request.user user = request.user
date = datetime.datetime.now() date = datetime.datetime.now()
corpus = Node.objects.get(id=corpus_id) corpus = Node.objects.get(id=corpus_id)
...@@ -385,8 +558,8 @@ def explorer_graph(request, corpus_id): ...@@ -385,8 +558,8 @@ def explorer_graph(request, corpus_id):
return HttpResponse(html) return HttpResponse(html)
def explorer_matrix(request, corpus_id): def graph(request, corpus_id):
t = get_template('matrix.html') t = get_template('explorer.html')
user = request.user user = request.user
date = datetime.datetime.now() date = datetime.datetime.now()
corpus = Node.objects.get(id=corpus_id) corpus = Node.objects.get(id=corpus_id)
...@@ -399,6 +572,10 @@ def explorer_matrix(request, corpus_id): ...@@ -399,6 +572,10 @@ def explorer_matrix(request, corpus_id):
return HttpResponse(html) return HttpResponse(html)
def exploration(request): def exploration(request):
t = get_template('exploration.html') t = get_template('exploration.html')
user = request.user user = request.user
...@@ -426,6 +603,36 @@ def explorer_chart(request): ...@@ -426,6 +603,36 @@ def explorer_chart(request):
import csv import csv
from django.db import connection from django.db import connection
def corpus_csv(request, project_id, corpus_id):
'''
Create the HttpResponse object with the appropriate CSV header.
'''
response = HttpResponse(content_type='text/csv')
response['Content-Disposition'] = 'attachment; filename="corpus.csv"'
writer = csv.writer(response)
corpus = Node.objects.get(id=corpus_id)
type_document = NodeType.objects.get(name="Document")
documents = Node.objects.filter(parent=corpus, type=type_document)
keys = list(documents[0].metadata.keys())
writer.writerow(keys)
for doc in documents:
data = list()
for key in keys:
try:
data.append(doc.metadata[key])
except:
data.append("")
writer.writerow(data)
return response
def send_csv(request, corpus_id): def send_csv(request, corpus_id):
''' '''
Create the HttpResponse object with the appropriate CSV header. Create the HttpResponse object with the appropriate CSV header.
......
...@@ -2,6 +2,7 @@ import re ...@@ -2,6 +2,7 @@ import re
import locale import locale
from lxml import etree from lxml import etree
from datetime import datetime, date from datetime import datetime, date
from django.utils import timezone
from .FileParser import FileParser from .FileParser import FileParser
from ..NgramsExtractors import * from ..NgramsExtractors import *
...@@ -10,163 +11,165 @@ from ..NgramsExtractors import * ...@@ -10,163 +11,165 @@ from ..NgramsExtractors import *
class EuropressFileParser(FileParser): class EuropressFileParser(FileParser):
def _parse(self, file): def _parse(self, file):
localeEncoding = "fr_FR" localeEncoding = "fr_FR"
codif = "UTF-8" codif = "UTF-8"
count = 0 count = 0
if isinstance(file, str): if isinstance(file, str):
file = open(file, 'rb') file = open(file, 'rb')
print(file) #print(file)
contents = file.read() contents = file.read()
print(len(contents)) #print(len(contents))
#return [] #return []
encoding = self.detect_encoding(contents) encoding = self.detect_encoding(contents)
print(encoding)
try: if encoding != "utf-8":
html_parser = etree.HTMLParser(encoding=encoding) contents = contents.decode(encoding, errors='replace').encode(codif)
html = etree.fromstring(contents, html_parser)
html_articles = html.xpath('/html/body/table') try:
except: html_parser = etree.HTMLParser(encoding=codif)
return [] html = etree.fromstring(contents, html_parser)
html_articles = html.xpath('/html/body/table')
# initialize the list of metadata except:
metadata_list = [] return []
# parse all the articles, one by one
# initialize the list of metadata
for html_article in html_articles: metadata_list = []
# parse all the articles, one by one
metadata = {}
for html_article in html_articles:
if len(html_article):
for name in html_article.xpath("./tr/td/span[@class = 'DocPublicationName']"): metadata = {}
if name.text is not None:
format_journal = re.compile('(.*), (.*)', re.UNICODE) if len(html_article):
test_journal = format_journal.match(name.text) for name in html_article.xpath("./tr/td/span[@class = 'DocPublicationName']"):
if test_journal is not None: if name.text is not None:
metadata['source'] = test_journal.group(1) format_journal = re.compile('(.*), (.*)', re.UNICODE)
metadata['volume'] = test_journal.group(2) test_journal = format_journal.match(name.text)
else: if test_journal is not None:
metadata['source'] = name.text.encode(codif) metadata['source'] = test_journal.group(1)
metadata['volume'] = test_journal.group(2)
for header in html_article.xpath("./tr/td/span[@class = 'DocHeader']"): else:
text = header.text metadata['source'] = name.text.encode(codif)
if isinstance(text, bytes):
text = text.decode(encoding) for header in html_article.xpath("./tr/td/span[@class = 'DocHeader']"):
text = header.text
format_date_fr = re.compile('\d+\s*\w+\s+\d{4}', re.UNICODE) if isinstance(text, bytes):
test_date_fr = format_date_fr.match(text) text = text.decode(encoding)
format_date_en = re.compile('\w+\s+\d+,\s+\d{4}', re.UNICODE) format_date_fr = re.compile('\d*\s*\w+\s+\d{4}', re.UNICODE)
test_date_en = format_date_en.match(text) test_date_fr = format_date_fr.match(text)
format_sect = re.compile('(\D+),', re.UNICODE) format_date_en = re.compile('\w+\s+\d+,\s+\d{4}', re.UNICODE)
test_sect = format_sect.match(text) test_date_en = format_date_en.match(text)
format_page = re.compile(', p. (\w+)', re.UNICODE) format_sect = re.compile('(\D+),', re.UNICODE)
test_page = format_page.match(text) test_sect = format_sect.match(text)
if test_date_fr is not None: format_page = re.compile(', p. (\w+)', re.UNICODE)
self.localeEncoding = "fr_FR" test_page = format_page.match(text)
locale.setlocale(locale.LC_ALL, localeEncoding)
try : if test_date_fr is not None:
metadata['publication_date'] = datetime.strptime(text, '%d %B %Y') self.localeEncoding = "fr_FR"
except : locale.setlocale(locale.LC_ALL, localeEncoding)
try: if encoding != "utf-8":
metadata['publication_date'] = datetime.strptime(text, '%B %Y') text = text.replace('י', 'é')
except : text = text.replace('ű', 'û')
pass text = text.replace(' aot ', ' août ')
if test_date_en is not None: try :
localeEncoding = "en_GB.UTF-8" metadata['publication_date'] = datetime.strptime(text, '%d %B %Y')
locale.setlocale(locale.LC_ALL, localeEncoding) except :
try : try:
metadata['publication_date'] = datetime.strptime(text, '%B %d, %Y') metadata['publication_date'] = datetime.strptime(text, '%B %Y')
except : except :
try : print(text)
metadata['publication_date'] = datetime.strptime(text, '%B %Y') pass
except :
pass if test_date_en is not None:
localeEncoding = "en_GB.UTF-8"
if test_sect is not None: locale.setlocale(locale.LC_ALL, localeEncoding)
metadata['section'] = test_sect.group(1).encode(codif) try :
metadata['publication_date'] = datetime.strptime(text, '%B %d, %Y')
if test_page is not None: except :
metadata['page'] = test_page.group(1).encode(codif) try :
metadata['publication_date'] = datetime.strptime(text, '%B %Y')
metadata['title'] = html_article.xpath("string(./tr/td/span[@class = 'TitreArticleVisu'])").encode(codif) except :
metadata['text'] = html_article.xpath("./tr/td/descendant-or-self::*[not(self::span[@class='DocHeader'])]/text()") pass
line = 0 if test_sect is not None:
br_tag = 10 metadata['section'] = test_sect.group(1).encode(codif)
for i in html_articles[count].iter():
# print line, br, i, i.tag, i.attrib, i.tail if test_page is not None:
if i.tag == "span": metadata['page'] = test_page.group(1).encode(codif)
if "class" in i.attrib:
if i.attrib['class'] == 'TitreArticleVisu': metadata['title'] = html_article.xpath("string(./tr/td/span[@class = 'TitreArticleVisu'])").encode(codif)
line = 1 metadata['text'] = html_article.xpath("./tr/td/descendant-or-self::*[not(self::span[@class='DocHeader'])]/text()")
br_tag = 2
if line == 1 and i.tag == "br":
br_tag -= 1
if line == 1 and br_tag == 0:
try:
metadata['authors'] = str.title(etree.tostring(i, method="text", encoding=codif)).encode(codif)#.split(';')
except:
metadata['authors'] = 'not found'
line = 0
br_tag = 10
line = 0
try: br_tag = 10
if metadata['publication_date'] is not None or metadata['publication_date'] != '': for i in html_articles[count].iter():
try: # print line, br, i, i.tag, i.attrib, i.tail
back = metadata['publication_date'] if i.tag == "span":
except Exception as e: if "class" in i.attrib:
print(e) if i.attrib['class'] == 'TitreArticleVisu':
pass line = 1
else: br_tag = 2
try: if line == 1 and i.tag == "br":
metadata['publication_date'] = back br_tag -= 1
except Exception as e: if line == 1 and br_tag == 0:
print(e) try:
except : metadata['authors'] = str.title(etree.tostring(i, method="text", encoding=codif)).encode(codif)#.split(';')
metadata['publication_date'] = datetime.now() except:
metadata['authors'] = 'not found'
#if lang == 'fr': line = 0
#metadata['language_iso2'] = 'fr' br_tag = 10
#elif lang == 'en':
# metadata['language_iso2'] = 'en'
try:
if metadata['publication_date'] is not None or metadata['publication_date'] != '':
metadata['publication_year'] = metadata['publication_date'].strftime('%Y') try:
metadata['publication_month'] = metadata['publication_date'].strftime('%m') back = metadata['publication_date']
metadata['publication_day'] = metadata['publication_date'].strftime('%d') except Exception as e:
metadata['publication_date'] = "" #print(e)
pass
metadata['object_id'] = str(metadata['text'][-9]) else:
metadata['text'].pop() try:
metadata['text'] = str(' '.join(metadata['text'])) metadata['publication_date'] = back
metadata['text'] = str(re.sub('Tous droits réservés.*$', '', metadata['text'])) except Exception as e:
print(e)
metadata['bdd'] = u'europresse' except :
metadata['url'] = u'' metadata['publication_date'] = timezone.now()
#metadata_str = {} #if lang == 'fr':
for key, value in metadata.items(): #metadata['language_iso2'] = 'fr'
metadata[key] = value.decode() if isinstance(value, bytes) else value #elif lang == 'en':
metadata_list.append(metadata) # metadata['language_iso2'] = 'en'
count += 1
metadata['publication_year'] = metadata['publication_date'].strftime('%Y')
metadata['publication_month'] = metadata['publication_date'].strftime('%m')
metadata['publication_day'] = metadata['publication_date'].strftime('%d')
metadata['publication_date'] = ""
metadata['object_id'] = str(metadata['text'][-9])
metadata['text'].pop()
metadata['text'] = str(' '.join(metadata['text']))
metadata['text'] = str(re.sub('Tous droits réservés.*$', '', metadata['text']))
metadata['bdd'] = u'europresse'
metadata['url'] = u''
#metadata_str = {}
for key, value in metadata.items():
metadata[key] = value.decode() if isinstance(value, bytes) else value
metadata_list.append(metadata)
count += 1
# from pprint import pprint # from pprint import pprint
# pprint(metadata_list) # pprint(metadata_list)
# return [] # return []
return metadata_list return metadata_list
#
from NgramsExtractors import *
from Taggers import *
#texts = [
# "This is quite a simple test.",
# "Forman Brown (1901–1996) was one of the world's leaders in puppet theatre in his day, as well as an important early gay novelist. He was a member of the Yale Puppeteers and the driving force behind Turnabout Theatre. He was born in Otsego, Michigan, in 1901 and died in 1996, two days after his 95th birthday. Brown briefly taught at North Carolina State College, followed by an extensive tour of Europe.",
# "James Patrick (born c. 1940) is the pseudonym of a Scottish sociologist, which he used to publish a book A Glasgow Gang Observed. It attracted some attention in Scotland when it was published in 1973. It was based on research he had done in 1966, when he was aged 26. At that time he was working as a teacher in an Approved School, a Scottish reformatory. One gang member in the school, \"Tim Malloy\" (born 1950, also a pseudonym and a generic term for a Glasgow Catholic), agreed to infiltrate him into his gang in Maryhill in Glasgow. Patrick spent four months as a gang member, observing their behaviour.",
#]
#tagger = NltkTagger()
#extractor = EnglishNgramsExtractor()
#
texts = [
"La saison 1921-1922 du Foot-Ball Club Juventus est la vingtième de l'histoire du club, créé vingt-cinq ans plus tôt en 1897. La société turinoise qui fête cette année son 25e anniversaire prend part à l'édition du championnat dissident d'Italie de la CCI (appelé alors la Première division), la dernière édition d'une compétition annuelle de football avant l'ère fasciste de Mussolini.",
"Le terme oblong désigne une forme qui est plus longue que large et dont les angles sont arrondis. En langage bibliographique, oblong signifie un format dont la largeur excède la hauteur. Ce qui correspond au format paysage en termes informatiques et \"à l'italienne\", pour l'imprimerie.",
"Les sanglots longs des violons de l'automne bercent mon coeur d'une langueur monotone.",
]
tagger = TreeTagger()
extractor = FrenchNgramsExtractor()
for text in texts:
print(tagger.tag_text(text))
print()
ngrams = extractor.extract_ngrams(text)
for ngram in ngrams:
print("\t" + str(ngram))
print("\n")
{% extends "menu.html" %} {% extends "menu.html" %}
{% block css %} {% block css %}
{% load staticfiles %} <!-- {% load staticfiles %} -->
<link rel="stylesheet" href="{% static "css/bootstrap.css" %}"> <link rel="stylesheet" href="{% static "css/bootstrap.css" %}">
<link rel="stylesheet" href="{% static "css/bootstrap-theme.min.css" %}"> <link rel="stylesheet" href="{% static "css/bootstrap-theme.min.css" %}">
<title>GarganText: Analyze your data with graphs</title>
<link rel="stylesheet" type="text/css" href="http://jun9.github.io/dc.js/css/dc.css"/>
<script type="text/javascript" src="http://jun9.github.io/dc.js/js/d3.js"></script>
<script type="text/javascript" src="http://jun9.github.io/dc.js/js/crossfilter.js"></script>
<script type="text/javascript" src="http://jun9.github.io/dc.js/js/dc.js"></script>
{% endblock %} {% endblock %}
{% block content %} {% block content %}
<div class="container theme-showcase" role="main"> <div class="container theme-showcase" role="main">
<div class="jumbotron"> <div class="jumbotron">
<div class="container"> <h1>{{ project.name }}</h1>
<p>Advanced charts</p>
</div>
</div>
<div class="container">
<div id="test-container"></div>
</div>
<!-- All the templates used by the Javascript framework -->
{% verbatim %}
<!--
<script type="text/template" id="filter-template">
<span>...where</span>
<select name="entity">
<option value="metadata">metadata</option>
<option value="ngrams">ngrams</option>
</select>
<span class="entity metadata">
<select name="key"><% _.each(metadataList, function(metadata){ %>
<option><%= metadata.name %></option>
<% }); %></select>
</span>
<span class="entity ngrams">
<select name="key">
<option value="terms">terms</option>
<option value="n">terms count</option>
</select>
</span>
<select name="operator"></select>
<input name="value" />
<button class="remove">X</button>
</script>
<script type="text/template" id="filterlist-template">
<ul class="filters"></ul>
<button class="add">Add a filter...</button>
</script>
<script type="text/template" id="nodesquery-template">
<div ng-include="'filterlist-template'"></div>
</script>
-->
<!--
<script type="text/ejs" id="FilterView">
<li>
<span>...where the</span>
<select name="entity">
<option>metadata</option>
<option>ngrams</option>
</select>
<span class="entity metadata">
<select name="key"></select>
</span>
<span class="entity ngrams">
<select name="key">
<option value="terms">terms</option>
<option value="n">terms count</option>
</select>
</span>
</li>
</script>
<script type="text/ejs" id="FilterListView">
<div class="filters">
<ul class="filters"></ul>
<button class="create">Add a filter</button>
</div>
</script>
-->
<div class="row"> {% endverbatim %}
<div id="monthly-move-chart">
<strong>Title</strong> (Blue Line: Avg Index, Green Line: Index
Fluctuation)
<a class="reset" href="javascript:volumeChart.filterAll();dc.redrawAll();"
style="display: none;">reset</a>
<div class="clearfix"></div> <script type="text/javascript" src="{% static "js/jquery/jquery.min.js" %}"></script>
</div>
</div>
<div class="row"> {% verbatim %}
<div id="monthly-volume-chart"> <!--
</div> <div ng-app="Gargantext" ng-controller="FilterListController">
<p class="muted pull-left" style="margin-right: 15px;">Select a time range to zoom in</p> <ul>
</div> <li ng-repeat="(filterIndex, filter) in filters">
<span>...where the </span>
<select ng-model="filter.entity">
<option ng-repeat="(entityName, entityColumns) in entities" value="{{entityName}}">{{entityName}}</option>
</select>
<span ng-if="filter.entity">
<select ng-model="filter.column">
<option ng-repeat="column in entities[filter.entity] | orderBy:'key'" value="{{column.key}}">{{column.key}}</option>
</select>
<span ng-if="filter.column">
<span ng-repeat="column in entities[filter.entity] | filter : {'key':filter.column} : strict">
<select ng-model="filter.operator">
<option ng-repeat="operator in operators[column.type]" value="{{operator.key}}">{{operator.label}}</option>
</select>
<input type="text" ng-model="filter.value">
</span>
</span>
</span>
<button ng-click="removeFilter(filterIndex)" title="remove this filter">X</button>
</li>
</ul>
<button ng-click="addFilter()">Add a filter...</button>
</div>
-->
<style type="text/css">
div.corpus button:first-child+select {color:#FFF;}
div.list-results table {border-collapse: collapse;}
div.list-results th, div.list-results td {border: solid 1px #888; padding: 0.5em;}
div.list-results th {background: #444; color: #FFF}
div.list-results tr:nth-child(even) td {background: #FFF; color: #111}
div.list-results tr:nth-child(odd) td {background: #EEE; color: #000}
</style>
<!--
TODO: use inclusions
</div> SEE: http://stackoverflow.com/questions/17801988/dynamically-loading-controllers-and-ng-include
</div>
-->
</div> <div class="container">
<!--
<div ng-app="Gargantext" ng-controller="QueryController">
<div class="corpus">
Chosen corpus:
<select ng-model="corpusId" ng-change="updateEntities()">
<option ng-repeat="corpus in corpora" value="{{corpus.id}}">{{corpus.name}}</option>
</select>
</div>
<div class="filters" ng-if="corpusId">
<hr/>
<ul>
<li ng-repeat="(filterIndex, filter) in filters">
<span>...where the </span>
<select ng-model="filter.entity">
<option ng-repeat="(entityName, entityColumns) in entities" value="{{entityName}}">{{entityName}}</option>
</select>
<span ng-if="filter.entity">
<select ng-model="filter.column">
<option ng-repeat="column in entities[filter.entity] | orderBy:'key'" value="{{column.key}}">{{column.key}}</option>
</select>
<span ng-if="filter.column">
<span ng-repeat="column in entities[filter.entity] | filter : {'key':filter.column} : strict">
<select ng-model="filter.operator">
<option ng-repeat="operator in operators[column.type]" value="{{operator.key}}">{{operator.label}}</option>
</select>
<input type="text" ng-model="filter.value">
</span>
</span>
</span>
<button ng-click="removeFilter(filterIndex)" title="remove this filter">X</button>
</li>
</ul>
<button ng-click="addFilter()">Add a filter...</button>
</div>
<div class="results" ng-if="corpusId">
<hr/>
<button ng-if="corpusId" ng-click="postQuery()">Refresh results</button>
<div class="list">
<div class="list-pagination">
<select ng-model="pagination.limit">
<option ng-repeat="limit in [5, 10, 20, 50, 100]" value="{{limit}}">{{limit}}</option>
</select>
results per page
<span ng-if="resultsCount != undefined">
showing page
<select ng-model="pagination.offset">
<option ng-repeat="x in range(0, resultsCount+1, pagination.limit)" value="{{ x }}">{{ Math.round(1+x/pagination.limit) }}</option>
</select>
out of
{{ Math.ceil(resultsCount / pagination.limit) }}
<strong>({{resultsCount}}</strong> results found)
</span>
</div>
<div class="list-wait" ng-if="loading">
<em>Loading, please wait...</em>
</div>
<div class="list-results" ng-if="!loading &amp;&amp; resultsCount != undefined">
<div ng-if="!results.length">
<em>No results were found.</em>
</div>
<table ng-if="results.length">
<thead>
<tr>
<th ng-repeat="(key, value) in results[0]">{{key}}</th>
</tr>
</thead>
<tbody>
<tr ng-repeat="result in results">
<td ng-repeat="(key, value) in result">{{value}}</td>
</tr>
</tbody>
</table>
</div>
</div>
</div>
</div>
-->
<div ng-app="Gargantext" ng-controller="GraphController">
<ul class="datasets">
<button class="add" ng-click="addDataset()">Add a dataset...</button>
<li class="dataset" ng-controller="DatasetController" ng-repeat="dataset in datasets">
<hr/>
<div class="corpus">
<button ng-click="removeDataset($index)" title="remove this dataset">X</button>
<select ng-model="mesured" style="background-color:{{ getColor($index, datasets.length) }}" ng-options="value as key for (key, value) in {'Documents count': 'nodes.count', 'Ngrams count': 'ngrams.count'}" ng-change="updateQuery()"></select>
in the corpus
<select ng-model="corpusId" ng-change="updateEntities()">
<option ng-repeat="corpus in corpora" value="{{corpus.id}}">{{corpus.name}}</option>
</select>
</div>
<div class="filters" ng-if="entities">
<ul>
<li ng-repeat="filter in filters">
<button ng-click="removeFilter($index)" title="remove this filter">x</button>
<span>...where the </span>
<select ng-model="filter.entity">
<option ng-repeat="(entityName, entityColumns) in entities" value="{{entityName}}">{{entityName}}</option>
</select>
<span ng-if="filter.entity">
<select ng-model="filter.column">
<option ng-repeat="column in entities[filter.entity] | orderBy:'key'" value="{{column.key}}">{{column.key}}</option>
</select>
<span ng-if="filter.column" ng-repeat="column in entities[filter.entity]">
<span ng-if="column.key == filter.column">
<select ng-model="filter.operator">
<option ng-repeat="operator in operators[column.type]" value="{{operator.key}}">{{operator.label}}</option>
</select>
<input type="text" ng-model="filter.value" ng-change="updateQuery()">
</span>
</span>
</span>
</li>
</ul>
<button ng-click="addFilter()">Add a filter...</button>
</div>
</li>
</ul>
<hr/>
<script src="http://code.jquery.com/jquery-latest.js"></script> <button style="width:100%" class="refresh" ng-click="query()">Refresh results</button>
<script src="http://jun9.github.io/dc.js/js/bootstrap.min.js"></script> <div class="graph">
<script src="{% static "js/d3/DC.js"%}"></script> <linechart data="graph.data" options="graph.options"></linechart>
</div>
<div class="graph-parameters">
X-axis: groups the results by
<select ng-model="groupingKey" ng-options="key for key in ['day', 'month', 'year', 'decade', 'century']" ng-change="query()">
</select>
<br/>
Y-axis: use a
<select ng-model="graph.options.axes.y.type" ng-options="type for type in ['log', 'linear']"></select>
scale
<br/>
<hr/>
Represent data with
<select ng-model="seriesOptions.type" ng-options="type for type in ['line', 'area', 'column']" ng-change="query()"></select>
<span ng-show="seriesOptions.type == 'area'">
(<select ng-model="seriesOptions.striped" ng-options="value as key for (key, value) in {'with':true, 'without':false}" ng-change="query()"></select> stripes)
</span>
<span ng-show="seriesOptions.type == 'area' || seriesOptions.type == 'column'">
(<select ng-model="options.stacking" ng-options="value as key for (key, value) in {'with':true, 'without':false}" ng-change="query()"></select> stacking)
</span>
<br/>
<span ng-hide="seriesOptions.type == 'column'">
Line thickness:
<input ng-model="seriesOptions.thicknessNumber" type="range" min="1" max="8" ng-change="seriesOptions.thickness = seriesOptions.thicknessNumber + 'px'; query()" />
<br/>
Interpolation:
<select ng-model="graph.options.lineMode">
<option ng-repeat="mode in ['bundle', 'linear']" value="{{ mode }}">{{ mode }}</option>
</select>
<span ng-if="graph.options.lineMode != 'linear'">
with a tension of
<input type="text" disabled="disabled" ng-model="graph.options.tension" />
<input type="range" min="0" max="2" step=".1" ng-model="graph.options.tension" />
</span>
</span>
</div>
</div>
</div>
<!--
<div ng-app="Gargantext" ng-controller="FilterController">
<span>...where the </span>
<select ng-model="entityName">
<option ng-repeat="(entityName, entityColumns) in entities" value="{{entityName}}">{{entityName}}</option>
</select>
<span ng-if="entityName">
<select ng-model="entityColumn.key">
<option ng-repeat="entityColumn in entities[entityName] | orderBy:'key'" value="{{entityColumn.key}}">{{entityColumn.key}}</option>
</select>
<span ng-if="entityColumn.key">
<span ng-repeat="entityColumn in entities[entityName] | filter : entityColumn : strict">
<select ng-model="operator.key">
<option ng-repeat="operator in operators[entityColumn.type]" value="{{operator.key}}">{{operator.label}}</option>
</select>
<input type="text">
</span>
</span>
</span>
</div>
-->
{% endverbatim %}
<script type="text/javascript" src="{% static "js/angular.min.js" %}"></script>
<script type="text/javascript" src="{% static "js/angular-cookies.min.js" %}"></script>
<script type="text/javascript" src="{% static "js/d3/d3.v2.min.js" %}"></script>
<script type="text/javascript" src="{% static "js/d3/n3.line-chart.min.js" %}"></script>
<!-- <script type="text/javascript" src="{% static "js/d3/angular-charts.js" %}"></script> -->
<script type="text/javascript" src="{% static "js/gargantext.angular.js" %}"></script>
<!--
<script type="text/javascript" src="{% static "js/underscore-min.js" %}"></script>
<script type="text/javascript" src="{% static "js/backbone.js" %}"></script>
<script type="text/javascript" src="{% static "js/gargantext.backbone.js" %}"></script>
<script type="text/javascript">
var filterList = new FilterListView({nodeId: 39576});
filterList.render().$el.appendTo('#test-container');
</script>
-->
<!--
<script type="text/javascript" src="{% static "js/can.custom.js" %}"></script>
<script type="text/javascript" src="{% static "js/gargantext.can.js" %}"></script>
<script type="text/javascript">
var f = new FilterListController('#test-container', {parent: 39576});
</script>
-->
{% endblock %} {% endblock %}
...@@ -23,17 +23,19 @@ ...@@ -23,17 +23,19 @@
<div class="container theme-showcase" role="main"> <div class="container theme-showcase" role="main">
<div class="jumbotron"> <div class="jumbotron">
{% if project %} {% if project %}
<h1>{{ project.name }} </h1> <h1>{{ project.name }}, {{ corpus.name }}
</h1>
{% endif %} {% endif %}
{% if corpus %} {% if corpus %}
{{ corpus.name }} <p>
, Created on {{ corpus.date }} ({{ number}} docs)</p> {{ number}} docs, Created on {{ corpus.date }}
</p>
{% endif %} {% endif %}
<a class="btn btn-primary btn-lg" role="button" href="/admin/documents/corpus/{{ corpus.pk }}/">Import</a> <!-- <a class="btn btn-primary btn-lg" role="button" href="/admin/documents/corpus/{{ corpus.pk }}/">Add file</a> --!>
<a class="btn btn-primary btn-lg" role="button" href="/admin/documents/corpus/{{ corpus.pk }}/">Export</a> <a class="btn btn-primary btn-lg" role="button" href="/project/{{project.pk}}/corpus/{{ corpus.pk }}/corpus.csv">Save as</a>
<a class="btn btn-primary btn-lg" role="button" href="/admin/documents/corpus/{{ corpus.pk }}/">Delete</a></p> <a class="btn btn-primary btn-lg" role="button" href="/project/{{project.pk}}/corpus/{{ corpus.pk }}/delete">Delete</a></p>
{% if number == 0 %} {% if number == 0 %}
<a class="btn btn-primary btn-lg" role="button" href="/admin/documents/corpus/{{ corpus.pk }}/">Add documents</a></p> <a class="btn btn-primary btn-lg" role="button" href="/admin/documents/corpus/{{ corpus.pk }}/">Add documents</a></p>
...@@ -48,7 +50,7 @@ ...@@ -48,7 +50,7 @@
<div class="row"> <div class="row">
<div id="monthly-move-chart"> <div id="monthly-move-chart">
<center> <center>
<strong>Title</strong> (Blue Line: Publications by months, Green Line: Zoomable publications) <strong>Title</strong> (Blue bars: all, Green line: zoom)
<a class="reset" href="javascript:volumeChart.filterAll();dc.redrawAll();" <a class="reset" href="javascript:volumeChart.filterAll();dc.redrawAll();"
style="display: none;">reset</a> style="display: none;">reset</a>
<div class="clearfix"></div> <div class="clearfix"></div>
...@@ -73,23 +75,16 @@ ...@@ -73,23 +75,16 @@
<div class="panel-heading"> <div class="panel-heading">
<h4 class="panel-title"> <h4 class="panel-title">
<a data-toggle="collapse" data-parent="#accordion" href="#collapseOne"> <a data-toggle="collapse" data-parent="#accordion" href="#collapseOne">
<p class="btn btn-primary btn-lg" align="right">Read documents</h2></p> <p onclick="updateDocuments();" class="btn btn-primary btn-lg" align="right">Read documents</h2></p>
</a> </a>
</h4> </h4>
</div> </div>
<div id="collapseOne" class="panel-collapse collapse" role="tabpanel"> <div id="collapseOne" class="panel-collapse collapse" role="tabpanel">
<div class="panel-body"> <div class="panel-body">
<p align="right"> <p align="right">
Page <a href="#">2</a> sur 100
</p> <!--{% include "subcorpus.html" %}-->
</center> <div id="subcorpusdiv"></div>
{% if documents %}
<ul>
{% for doc in documents %}
<li> <b>{{ doc.date }}</b>, <a href="/admin/node/document/{{doc.id}}">{{ doc.title}}</a></li>
{% endfor %}
</ul>
{% endif %}
</div> </div>
</div> </div>
...@@ -103,34 +98,37 @@ ...@@ -103,34 +98,37 @@
<div class="row"> <div class="row">
<div class="col-md-4"> <div class="col-md-4">
<div class="jumbotron"> <div class="jumbotron">
<h3><a href="/graph-it">Documents</a></h3> <h3><a href="/project/{{project.id}}/corpus/{{corpus.id}}/chart">Advanced charts</a></h3>
<ol> <ol>
<li>Read</li> <!-- write --> <li>Count</li> <!-- read, compute -->
<li>Count</li> <!-- compute --> <li>Filter</li> <!-- count, compute -->
<li>Select</li> <!-- cut --> <li>Compare</li> <!-- select, cut -->
</ol> </ol>
<h4><a href="/project/{{project.id}}/corpus/{{corpus.id}}/">Back to corpus</a></h3>
</div> </div>
</div> </div>
<div class="col-md-4"> <div class="col-md-4">
<div class="jumbotron"> <div class="jumbotron">
<h3><a href="/ngrams">Dictionaries</a></h3> <h3><a href="/corpus/{{corpus.id}}/matrix">Matrix</a></h3>
<ol> <ol>
<li>Synonyms</li> <li>Sort</li>
<li>Black Lists</li> <li>Group</li>
<li>White Lists</li> <li>Cluster</li>
</ol> </ol>
<h4><a href="/project/{{project.id}}/corpus/{{corpus.id}}/">Back to corpus</a></h3>
</div> </div>
</div> </div>
<div class="col-md-4"> <div class="col-md-4">
<div class="jumbotron"> <div class="jumbotron">
<h3><a href="/corpus/{{ corpus.id }}/explorer">Visualizations</a></h3> <h3><a href="/corpus/{{ corpus.id }}/explorer">Graph</a></h3>
<ol> <ol>
<li><a href="/corpus/{{ corpus.id }}/matrix">Adjacency matrix</a></li> <li>Visualize</li>
<li><a href="/corpus/{{ corpus.id }}/explorer">Static maps</a></li> <li>Explore</li>
<li>Dynamic maps</li> <li>Read</li>
</ol> </ol>
<h4><a href="/project/{{project.id}}/corpus/{{corpus.id}}/">Back to corpus</a></h3>
</div> </div>
</div> </div>
</div> </div>
...@@ -145,6 +143,64 @@ ...@@ -145,6 +143,64 @@
<script> <script>
var datesbuffer = false;
function pr(msg) {
console.log(msg)
}
function dateToInt(todayTime) {
var month = (todayTime .getMonth() + 1);
var day = (todayTime .getDate());
var year = (todayTime .getFullYear());
if(month<10) month = "0"+month;
if(day<10) day = "0"+day;
return year+""+month+""+day;
}
// I've clicked "Read Documents":
function updateDocuments(pagenumber,pagenav) {
pagenav = (pagenav)?pagenav:true;
pagenumber = (pagenumber)?pagenumber:1;
pr("in here pagenav:"+pagenav+" - pagenumber:"+pagenumber)
pr($( "#collapseOne" ).height())
// if "Read Documents" collapsible is close, then... show some me pubs!
if ( pagenav || $( "#collapseOne" ).height() < 50) {
// Here u ask for the server some paginated results (pubs)
// if u havent select a timerange from the blue chart, then show me all pubs
if(!datesbuffer) {
console.log("nothing cause dont wanna")
}
// there's some timerange selected in the blue chart, so show me the pubs of that period
else {
var dataini = dateToInt(datesbuffer[0])
var datafin = dateToInt(datesbuffer[1])
//http://localhost:8000/project/37525/corpus/37526/timerange/20040117/20040125?page=1
var base = window.location.href;
var theurl = base+"timerange/"+dataini+"/"+datafin+"?page="+pagenumber;
pr("theurl: "+theurl)
$.ajax({
url: theurl,
success: function(data) {
// console.log(data)
$('#subcorpusdiv').html(data);
}
});
}
}
//else: "Read Documents" collapsible is open!, so do nothing
}
// var gainOrLossChart = dc.pieChart("#gain-loss-chart"); // var gainOrLossChart = dc.pieChart("#gain-loss-chart");
...@@ -177,44 +233,6 @@ d3.csv("/chart/corpus/{{ corpus.id }}/data.csv", function (data) { ...@@ -177,44 +233,6 @@ d3.csv("/chart/corpus/{{ corpus.id }}/data.csv", function (data) {
var ndx = crossfilter(data); var ndx = crossfilter(data);
var all = ndx.groupAll(); var all = ndx.groupAll();
/*
var yearlyDimension = ndx.dimension(function (d) {
return d3.time.year(d.dd);
});
var yearlyPerformanceGroup = yearlyDimension.group().reduce(
//add
function (p, v) {
++p.count;
p.absGain += +v.close - +v.open;
p.fluctuation += Math.abs(+v.close - +v.open);
p.sumIndex += (+v.open + +v.close) / 2;
p.avgIndex = p.sumIndex / p.count;
p.percentageGain = (p.absGain / p.avgIndex) * 100;
p.fluctuationPercentage = (p.fluctuation / p.avgIndex) * 100;
return p;
},
//remove
function (p, v) {
--p.count;
p.absGain -= +v.close - +v.open;
p.fluctuation -= Math.abs(+v.close - +v.open);
p.sumIndex -= (+v.open + +v.close) / 2;
p.avgIndex = p.sumIndex / p.count;
p.percentageGain = (p.absGain / p.avgIndex) * 100;
p.fluctuationPercentage = (p.fluctuation / p.avgIndex) * 100;
return p;
},
//init
function () {
return {count: 0, absGain: 0, fluctuation: 0, fluctuationPercentage: 0, sumIndex: 0, avgIndex: 0, percentageGain: 0};
}
);
var dateDimension = ndx.dimension(function (d) {
return d.dd;
});
*/
//volumeChart:(1) //volumeChart:(1)
//moveChart:(1) //moveChart:(1)
// monthly index avg fluctuation in percentage // monthly index avg fluctuation in percentage
...@@ -252,153 +270,6 @@ d3.csv("/chart/corpus/{{ corpus.id }}/data.csv", function (data) { ...@@ -252,153 +270,6 @@ d3.csv("/chart/corpus/{{ corpus.id }}/data.csv", function (data) {
} }
); );
/*
var gainOrLoss = ndx.dimension(function (d) {
return +d.open > +d.close ? "Loss" : "Gain";
});
var gainOrLossGroup = gainOrLoss.group();
var fluctuation = ndx.dimension(function (d) {
return Math.round((d.close - d.open) / d.open * 100);
});
var fluctuationGroup = fluctuation.group();
var quarter = ndx.dimension(function (d) {
var month = d.dd.getMonth();
if (month <= 3)
return "Q1";
else if (month > 3 && month <= 5)
return "Q2";
else if (month > 5 && month <= 7)
return "Q3";
else
return "Q4";
});
var quarterGroup = quarter.group().reduceSum(function (d) {
return d.volume;
});
var dayOfWeek = ndx.dimension(function (d) {
var day = d.dd.getDay();
switch (day) {
case 0:
return "0.Sun";
case 1:
return "1.Mon";
case 2:
return "2.Tue";
case 3:
return "3.Wed";
case 4:
return "4.Thu";
case 5:
return "5.Fri";
case 6:
return "6.Sat";
}
});
var dayOfWeekGroup = dayOfWeek.group();
*/
/*
yearlyBubbleChart.width(990)
.height(250)
.margins({top: 10, right: 50, bottom: 30, left: 40})
.dimension(yearlyDimension)
.group(yearlyPerformanceGroup)
.transitionDuration(1500)
.colors(["#a60000", "#ff0000", "#ff4040", "#ff7373", "#67e667", "#39e639", "#00cc00"])
.colorDomain([-12000, 12000])
.colorAccessor(function (d) {
return d.value.absGain;
})
.keyAccessor(function (p) {
return p.value.absGain;
})
.valueAccessor(function (p) {
return p.value.percentageGain;
})
.radiusValueAccessor(function (p) {
return p.value.fluctuationPercentage;
})
.maxBubbleRelativeSize(0.3)
.x(d3.scale.linear().domain([-2500, 2500]))
.y(d3.scale.linear().domain([-100, 100]))
.r(d3.scale.linear().domain([0, 4000]))
.elasticY(true)
.yAxisPadding(100)
.elasticX(true)
.xAxisPadding(500)
.renderHorizontalGridLines(true)
.renderVerticalGridLines(true)
.renderLabel(true)
.renderTitle(true)
.label(function (p) {
return p.key.getFullYear();
})
.title(function (p) {
return p.key.getFullYear()
+ "\n"
+ "Index Gain: " + numberFormat(p.value.absGain) + "\n"
+ "Index Gain in Percentage: " + numberFormat(p.value.percentageGain) + "%\n"
+ "Fluctuation / Index Ratio: " + numberFormat(p.value.fluctuationPercentage) + "%";
})
.yAxis().tickFormat(function (v) {
return v + "%";
});
*/
/*
gainOrLossChart.width(180)
.height(180)
.radius(80)
.dimension(gainOrLoss)
.group(gainOrLossGroup)
.label(function (d) {
return d.data.key + "(" + Math.floor(d.data.value / all.value() * 100) + "%)";
});
*/
/*
quarterChart.width(180)
.height(180)
.radius(80)
.innerRadius(30)
.dimension(quarter)
.group(quarterGroup);
*/
/*
dayOfWeekChart.width(180)
.height(180)
.margins({top: 20, left: 10, right: 10, bottom: 20})
.group(dayOfWeekGroup)
.dimension(dayOfWeek)
.colors(['#3182bd', '#6baed6', '#9ecae1', '#c6dbef', '#dadaeb'])
.label(function (d){
return d.key.split(".")[1];
})
.xAxis().ticks(4);
*/
/*
fluctuationChart.width(420)
.height(180)
.margins({top: 10, right: 50, bottom: 30, left: 40})
.dimension(fluctuation)
.group(fluctuationGroup)
.elasticY(true)
.centerBar(true)
.gap(1)
.round(dc.round.floor)
.x(d3.scale.linear().domain([-25, 25]))
.renderHorizontalGridLines(true)
.xAxis()
.tickFormat(function (v) {
return v + "%";
});
*/
moveChart.width(800) moveChart.width(800)
.height(150) .height(150)
...@@ -409,7 +280,7 @@ d3.csv("/chart/corpus/{{ corpus.id }}/data.csv", function (data) { ...@@ -409,7 +280,7 @@ d3.csv("/chart/corpus/{{ corpus.id }}/data.csv", function (data) {
.valueAccessor(function (d) { .valueAccessor(function (d) {
return d.value.avg; return d.value.avg;
}) })
.x(d3.time.scale().domain([new Date(1950,01,01), new Date(2014,12,31)])) .x(d3.time.scale().domain([new Date(1990,01,01), new Date(2015,01,31)]))
.round(d3.time.month.round) .round(d3.time.month.round)
.xUnits(d3.time.months) .xUnits(d3.time.months)
.elasticY(true) .elasticY(true)
...@@ -439,12 +310,13 @@ d3.csv("/chart/corpus/{{ corpus.id }}/data.csv", function (data) { ...@@ -439,12 +310,13 @@ d3.csv("/chart/corpus/{{ corpus.id }}/data.csv", function (data) {
.group(volumeByMonthGroup) .group(volumeByMonthGroup)
.centerBar(true) .centerBar(true)
.gap(0) .gap(0)
.x(d3.time.scale().domain([new Date(1950, 01, 01), new Date(2015, 01, 01)])) .x(d3.time.scale().domain([new Date(1990, 01, 01), new Date(2015, 01, 11)]))
.round(d3.time.month.round) .round(d3.time.month.round)
.xUnits(d3.time.months) .xUnits(d3.time.months)
.renderlet(function (chart) { .renderlet(function (chart) {
chart.select("g.y").style("display", "none"); chart.select("g.y").style("display", "none");
moveChart.filter(chart.filter()); moveChart.filter(chart.filter());
datesbuffer = chart.filter();
}) })
.on("filtered", function (chart) { .on("filtered", function (chart) {
dc.events.trigger(function () { dc.events.trigger(function () {
...@@ -452,47 +324,6 @@ d3.csv("/chart/corpus/{{ corpus.id }}/data.csv", function (data) { ...@@ -452,47 +324,6 @@ d3.csv("/chart/corpus/{{ corpus.id }}/data.csv", function (data) {
}); });
}); });
/*
dc.dataCount(".dc-data-count")
.dimension(ndx)
.group(all);
*/
/*
dc.dataTable(".dc-data-table")
.dimension(dateDimension)
.group(function (d) {
var format = d3.format("02d");
return d.dd.getFullYear() + "/" + format((d.dd.getMonth() + 1));
})
.size(10)
.columns([
function (d) {
return d.date;
},
function (d) {
return d.open;
},
function (d) {
return d.close;
},
function (d) {
return numberFormat(d.close - d.open);
},
function (d) {
return d.volume;
}
])
.sortBy(function (d) {
return d.dd;
})
.order(d3.ascending)
.renderlet(function (table) {
table.selectAll(".dc-table-group").classed("info", true);
});
*/
dc.renderAll(); dc.renderAll();
} }
); );
......
...@@ -6,6 +6,33 @@ ...@@ -6,6 +6,33 @@
<link rel="stylesheet" href="{% static "css/bootstrap-theme.min.css" %}"> <link rel="stylesheet" href="{% static "css/bootstrap-theme.min.css" %}">
<link rel="stylesheet" href="{% static "js/libs/jquery/jquery-ui.css" %}" media="screen"> <link rel="stylesheet" href="{% static "js/libs/jquery/jquery-ui.css" %}" media="screen">
<style>
.nodeinfo-container {
margin: 25px;
padding: 25px;
}
.nodeinfo-elem {
border-style: solid;
border-width: 1px;
margin: 5px;
padding: 5px;
}
.nodeinfo-head {
font-size: large;
margin: 5px;
padding: 5px;
}
.nodeinfo-content {
margin: 5px;
padding: 5px;
}
</style>
{% endblock %} {% endblock %}
...@@ -16,6 +43,7 @@ ...@@ -16,6 +43,7 @@
<div id="metadata"></div> <div id="metadata"></div>
{% endif %} {% endif %}
<script src="{% static "js/jquery/jquery.min.js" %}" type="text/javascript"></script> <script src="{% static "js/jquery/jquery.min.js" %}" type="text/javascript"></script>
<script src="{% static "js/libs/jquery/jquery-ui.js" %}" type="text/javascript"></script> <script src="{% static "js/libs/jquery/jquery-ui.js" %}" type="text/javascript"></script>
...@@ -44,11 +72,25 @@ ...@@ -44,11 +72,25 @@
jsondata = data.metadata jsondata = data.metadata
console.log(jsondata) console.log(jsondata)
var output = "" var output = ""
if(jsondata.title) output += "Title:<br>"+jsondata.title+"<br>"; // if(jsondata.title) output += "Title:<br>"+jsondata.title+"<br>";
if(jsondata.publication_date) output += jsondata.publication_date.split(" ")[0]+"<br><br>"; // if(jsondata.publication_date) output += jsondata.publication_date.split(" ")[0]+"<br><br>";
if(jsondata.authors) output += "Authors:<br>"+jsondata.authors+"<br><br>"; // if(jsondata.authors) output += "Authors:<br>"+jsondata.authors+"<br><br>";
if(jsondata.fields) output += "Keywords:<br>"+jsondata.fields+"<br><br>"; // if(jsondata.fields) output += "Keywords:<br>"+jsondata.fields+"<br><br>";
if(jsondata.abstract) output += "Abstract:<br>"+jsondata.abstract+"<br><br>"; // if(jsondata.abstract) output += "Abstract:<br>"+jsondata.abstract+"<br><br>";
// <div class='jumbotron'>
output += "<div class='nodeinfo-container'>"
if(jsondata.title) output += "<div class='nodeinfo-elem'> <div class='nodeinfo-head'>Title</div> <div class='nodeinfo-content'>"+jsondata.title+"</div> </div>";
if(jsondata.publication_date) output += "<div class='nodeinfo-elem'> <div class='nodeinfo-head'>Publication Date</div> <div class='nodeinfo-content'>"+jsondata.publication_date.split(" ")[0]+"</div> </div>";
if(jsondata.authors && jsondata.authors!="not found") output += "<div class='nodeinfo-elem'> <div class='nodeinfo-head'>Authors</div> <div class='nodeinfo-content'>"+jsondata.authors+"</div> </div>";
else {
if(jsondata.source) output += "<div class='nodeinfo-elem'> <div class='nodeinfo-head'>Published in</div> <div class='nodeinfo-content'>"+jsondata.source+"</div> </div>";
}
if(jsondata.fields) output += "<div class='nodeinfo-elem'> <div class='nodeinfo-head'>Keywords</div> <div class='nodeinfo-content'>"+jsondata.fields+"</div> </div>";
if(jsondata.abstract) output += "<div class='nodeinfo-elem'> <div class='nodeinfo-head'>Abstract</div> <div class='nodeinfo-content'>"+jsondata.abstract+"</div> </div>";
else {
if(jsondata.text) output += "<div class='nodeinfo-elem'> <div class='nodeinfo-head'>Abstract</div> <div class='nodeinfo-content'>"+jsondata.text+"</div> </div>";
}
output += "</div>"
$("#metadata").html(output); $("#metadata").html(output);
}, },
......
{% if date %}
<p>Today: {{date}}</p>
{% endif %}
<div class="pagination">
<span class="step-links">
{% if documents.has_previous %}
<a onclick="updateDocuments({{ documents.previous_page_number }},true);">previous</a>
{% endif %}
<span class="current">
Page {{ documents.number }} of {{ documents.paginator.num_pages }}.
</span>
{% if documents.has_next %}
<a onclick="updateDocuments({{ documents.next_page_number }},true);">next</a>
{% endif %}
</span>
</div>
{% if documents %}
<p>Paginator stuff</p>
<ul>
{% for doc in documents %}
{% if doc.date %}
<li> <b>{{ doc.date }}</b>, <a href="/admin/node/document/{{doc.id}}">id:{{ doc.id}} title:{{ doc.name}}</a></li>
{% endif %}
{% endfor %}
</ul>
{% endif %}
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment