Commit de3fe8db authored by Mathieu Rodic's avatar Mathieu Rodic

[GIT] Resolved conflicts in url.py

[CODE] Import paths are now relative instead of absolute in parsing/
parent f5443d84
__pycache__/ __pycache__/
parsing/Taggers/treetagger/ parsing/Taggers/treetagger/
.ipynb_checkpoints/ .ipynb_checkpoints/
*.pyc
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
...@@ -11,7 +11,7 @@ def create_blacklist(user, corpus): ...@@ -11,7 +11,7 @@ def create_blacklist(user, corpus):
def create_synonymes(user, corpus): def create_synonymes(user, corpus):
pass pass
def create_whitelist(user, corpus): def create_whitelist(user, corpus, size=100):
cursor = connection.cursor() cursor = connection.cursor()
try: try:
...@@ -51,7 +51,7 @@ def create_whitelist(user, corpus): ...@@ -51,7 +51,7 @@ def create_whitelist(user, corpus):
AND AND
n.type_id = %d n.type_id = %d
AND AND
ngX.n >= 1 ngX.n >= 2
GROUP BY GROUP BY
ngX.id ngX.id
...@@ -60,16 +60,16 @@ def create_whitelist(user, corpus): ...@@ -60,16 +60,16 @@ def create_whitelist(user, corpus):
ORDER BY ORDER BY
occurrences DESC occurrences DESC
LIMIT LIMIT
100 %d
; ;
""" % (white_list.id, corpus.id, type_document.id) """ % (white_list.id, corpus.id, type_document.id, size)
cursor.execute(query_whitelist) cursor.execute(query_whitelist)
return white_list return white_list
#def create_cooc(user, corpus, whitelist, blacklist, synonymes): #def create_cooc(user, corpus, whitelist, blacklist, synonymes):
def create_cooc(user=None, corpus=None, whitelist=None): def create_cooc(user=None, corpus=None, whitelist=None, size=150):
cursor = connection.cursor() cursor = connection.cursor()
try: try:
...@@ -127,11 +127,117 @@ def create_cooc(user=None, corpus=None, whitelist=None): ...@@ -127,11 +127,117 @@ def create_cooc(user=None, corpus=None, whitelist=None):
ORDER BY ORDER BY
score DESC score DESC
LIMIT LIMIT
150 %d
""" % (cooc.pk, corpus.id, whitelist.id, whitelist.id) """ % (cooc.pk, corpus.id, whitelist.id, whitelist.id, size)
cursor.execute(query_cooc) cursor.execute(query_cooc)
return cooc return cooc
def get_cooc(request=None, corpus_id=None, cooc_id=None, type="node_link"):
import pandas as pd
from copy import copy
import numpy as np
import networkx as nx
from networkx.readwrite import json_graph
from gargantext_web.api import JsonHttpResponse
from analysis.louvain import best_partition
matrix = defaultdict(lambda : defaultdict(float))
labels = dict()
weight = dict()
corpus = Node.objects.get(id=corpus_id)
type_cooc = NodeType.objects.get(name="Cooccurrence")
if Node.objects.filter(type=type_cooc, parent=corpus).first() is None:
print("Coocurrences do not exist yet, create it.")
if type == "node_link":
n = 150
elif type == "adjacency":
n = 50
whitelist = create_whitelist(request.user, corpus, size=n)
cooccurrence_node = create_cooc(user=request.user, corpus=corpus, whitelist=whitelist, size=n)
print(cooccurrence_node.id, "Cooc created")
else:
cooccurrence_node = Node.objects.filter(type=type_cooc, parent=corpus).first()
for cooccurrence in NodeNgramNgram.objects.filter(node=cooccurrence_node):
labels[cooccurrence.ngramx.id] = cooccurrence.ngramx.terms
labels[cooccurrence.ngramy.id] = cooccurrence.ngramy.terms
matrix[cooccurrence.ngramx.id][cooccurrence.ngramy.id] = cooccurrence.score
matrix[cooccurrence.ngramy.id][cooccurrence.ngramx.id] = cooccurrence.score
weight[cooccurrence.ngramy.terms] = weight.get(cooccurrence.ngramy.terms, 0) + cooccurrence.score
weight[cooccurrence.ngramx.terms] = weight.get(cooccurrence.ngramx.terms, 0) + cooccurrence.score
df = pd.DataFrame(matrix).T.fillna(0)
x = copy(df.values)
x = x / x.sum(axis=1)
# Removing unconnected nodes
threshold = min(x.max(axis=1))
matrix_filtered = np.where(x >= threshold, 1, 0)
#matrix_filtered = np.where(x > threshold, x, 0)
#matrix_filtered = matrix_filtered.resize((90,90))
G = nx.from_numpy_matrix(matrix_filtered)
G = nx.relabel_nodes(G, dict(enumerate([ labels[label] for label in list(df.columns)])))
#G = nx.relabel_nodes(G, dict(enumerate(df.columns)))
# Removing too connected nodes (find automatic way to do it)
# outdeg = G.degree()
# to_remove = [n for n in outdeg if outdeg[n] >= 10]
# G.remove_nodes_from(to_remove)
partition = best_partition(G)
if type == "node_link":
for node in G.nodes():
try:
#node,type(labels[node])
G.node[node]['label'] = node
G.node[node]['name'] = node
G.node[node]['size'] = weight[node]
G.node[node]['group'] = partition[node]
#G.add_edge(node, partition[node], weight=3)
# G.node[node]['color'] = '19,180,300'
except Exception as error:
print(error)
data = json_graph.node_link_data(G)
elif type == "adjacency":
for node in G.nodes():
try:
#node,type(labels[node])
#G.node[node]['label'] = node
G.node[node]['name'] = node
#G.node[node]['size'] = weight[node]
G.node[node]['group'] = partition[node]
#G.add_edge(node, partition[node], weight=3)
# G.node[node]['color'] = '19,180,300'
except Exception as error:
print(error)
data = json_graph.node_link_data(G)
# data = json_graph.node_link_data(G, attrs={\
# 'source':'source',\
# 'target':'target',\
# 'weight':'weight',\
# #'label':'label',\
# #'color':'color',\
# 'id':'id',})
#print(data)
return data
...@@ -11,12 +11,13 @@ import gargantext_web.api ...@@ -11,12 +11,13 @@ import gargantext_web.api
admin.autodiscover() admin.autodiscover()
urlpatterns = patterns('', urlpatterns = patterns('',
# url(r'^$', 'gargantext_web.views.home', name='home'),
# url(r'^blog/', include('blog.urls')), # Admin views
url(r'^admin/', include(admin.site.urls)), url(r'^admin/', include(admin.site.urls)),
url(r'^login/', include(admin.site.urls)), url(r'^login/', include(admin.site.urls)),
url(r'^grappelli/', include('grappelli.urls')), url(r'^grappelli/', include('grappelli.urls')),
# User views
url(r'^$', views.home), url(r'^$', views.home),
url(r'^projects/$', views.projects), url(r'^projects/$', views.projects),
...@@ -27,26 +28,35 @@ urlpatterns = patterns('', ...@@ -27,26 +28,35 @@ urlpatterns = patterns('',
url(r'^project/(\d+)/corpus/(\d+)/$', views.corpus), url(r'^project/(\d+)/corpus/(\d+)/$', views.corpus),
url(r'^project/(\d+)/corpus/(\d+)/delete/$', views.delete_corpus), url(r'^project/(\d+)/corpus/(\d+)/delete/$', views.delete_corpus),
# Visualizations
url(r'^corpus/(\d+)/explorer$', views.explorer_graph), url(r'^corpus/(\d+)/explorer$', views.explorer_graph),
url(r'^chart$', views.explorer_chart), url(r'^corpus/(\d+)/matrix$', views.explorer_matrix),
url(r'^matrix$', views.explorer_matrix),
#url(r'^exploration$', views.exploration), # Getting data [which?]
url(r'^chart/corpus/(\d+)/data.csv$', views.send_csv), url(r'^chart/corpus/(\d+)/data.csv$', views.send_csv),
url(r'^corpus/(\d+)/node_link.json$', views.node_link), url(r'^corpus/(\d+)/node_link.json$', views.node_link),
url(r'^corpus/(\d+)/adjancy_matrix$', views.node_link), url(r'^corpus/(\d+)/adjancy_matrix$', views.node_link),
url(r'^corpus/(\d+)/adjacency.json$', views.adjacency),
"""RESTful API
These URLs allow operations on the database in a RESTful way.
"""
url(r'^api$', gargantext_web.api.Root), # retrieve all the metadata from a given node's children
url(r'^api/nodes/(\d+)/children/metadata$', gargantext_web.api.NodesChildrenMetatadata.as_view()), url(r'^api/nodes/(\d+)/children/metadata$', gargantext_web.api.NodesChildrenMetatadata.as_view()),
# retrieve the ngrams from a given node's children
url(r'^api/nodes/(\d+)/ngrams$', gargantext_web.api.CorpusController.ngrams),
# perform a query on a given node's children
url(r'^api/nodes/(\d+)/children/queries$', gargantext_web.api.NodesChildrenQueries.as_view()), url(r'^api/nodes/(\d+)/children/queries$', gargantext_web.api.NodesChildrenQueries.as_view()),
# get all the nodes
url(r'^api/nodes$', gargantext_web.api.NodesController.get), url(r'^api/nodes$', gargantext_web.api.NodesController.get),
url(r'^api/nodes/(\d+)/ngrams$', gargantext_web.api.CorpusController.ngrams),
url(r'^api/nodes/(\d+)/data$', gargantext_web.api.CorpusController.data),
url(r'^graph-it$', views.graph_it), # other (DEPRECATED, TO BE REMOVED)
url(r'^ngrams$', views.ngrams), url(r'^api/nodes$', gargantext_web.api.NodesController.get),
url(r'^api/corpus/(\d+)/ngrams$', gargantext_web.api.CorpusController.ngrams),
url(r'^api/corpus/(\d+)/metadata$', gargantext_web.api.CorpusController.metadata),
url(r'^api/corpus/(\d+)/data$', gargantext_web.api.CorpusController.data),
) )
......
...@@ -26,7 +26,6 @@ from collections import defaultdict ...@@ -26,7 +26,6 @@ from collections import defaultdict
from parsing.FileParsers import * from parsing.FileParsers import *
# SOME FUNCTIONS # SOME FUNCTIONS
def query_to_dicts(query_string, *query_args): def query_to_dicts(query_string, *query_args):
...@@ -243,6 +242,7 @@ def project(request, project_id): ...@@ -243,6 +242,7 @@ def project(request, project_id):
# async # async
corpus.children.filter(type_id=type_document.pk).extract_ngrams(keys=['title',]) corpus.children.filter(type_id=type_document.pk).extract_ngrams(keys=['title',])
#corpus.children.filter(type_id=type_document.pk).extract_ngrams(keys=['title',])
except Exception as error: except Exception as error:
print(error) print(error)
...@@ -367,7 +367,6 @@ def corpus(request, project_id, corpus_id): ...@@ -367,7 +367,6 @@ def corpus(request, project_id, corpus_id):
return HttpResponse(html) return HttpResponse(html)
def delete_project(request, node_id): def delete_project(request, node_id):
Node.objects.filter(id=node_id).all().delete() Node.objects.filter(id=node_id).all().delete()
return HttpResponseRedirect('/projects/') return HttpResponseRedirect('/projects/')
...@@ -390,14 +389,16 @@ def explorer_graph(request, corpus_id): ...@@ -390,14 +389,16 @@ def explorer_graph(request, corpus_id):
return HttpResponse(html) return HttpResponse(html)
def explorer_matrix(request): def explorer_matrix(request, corpus_id):
t = get_template('matrix.html') t = get_template('matrix.html')
user = request.user user = request.user
date = datetime.datetime.now() date = datetime.datetime.now()
corpus = Node.objects.get(id=corpus_id)
html = t.render(Context({\ html = t.render(Context({\
'user': user,\ 'user': user,\
'date': date,\ 'date': date,\
'corpus': corpus,\
})) }))
return HttpResponse(html) return HttpResponse(html)
...@@ -469,78 +470,27 @@ def send_csv(request, corpus_id): ...@@ -469,78 +470,27 @@ def send_csv(request, corpus_id):
return response return response
def node_link(request, corpus_id):
'''
Create the HttpResponse object with the graph dataset.
'''
import pandas as pd
from copy import copy
import numpy as np
import networkx as nx
from networkx.readwrite import json_graph
from gargantext_web.api import JsonHttpResponse
from analysis.louvain import best_partition
from analysis.functions import create_whitelist, create_cooc
matrix = defaultdict(lambda : defaultdict(float))
labels = dict()
corpus = Node.objects.get(id=corpus_id)
type_cooc = NodeType.objects.get(name="Cooccurrence")
if Node.objects.filter(type=type_cooc, parent=corpus).first() is None: # To get the data
print("Coocurrences do not exist yet, create it.") from gargantext_web.api import JsonHttpResponse
whitelist = create_whitelist(request.user, corpus) from analysis.functions import get_cooc
cooc = create_cooc(user=request.user, corpus=corpus, whitelist=whitelist)
print(cooc.id, "Cooc created")
else:
cooc = Node.objects.filter(type=type_cooc, parent=corpus).first()
for cooccurrence in NodeNgramNgram.objects.filter(node=cooc):
labels[cooccurrence.ngramx.id] = cooccurrence.ngramx.terms
labels[cooccurrence.ngramy.id] = cooccurrence.ngramy.terms
matrix[cooccurrence.ngramx.id][cooccurrence.ngramy.id] = cooccurrence.score
matrix[cooccurrence.ngramy.id][cooccurrence.ngramx.id] = cooccurrence.score
df = pd.DataFrame(matrix).T.fillna(0) def node_link(request, corpus_id):
x = copy(df.values) '''
x = x / x.sum(axis=1) Create the HttpResponse object with the node_link dataset.
'''
# Removing unconnected nodes
threshold = min(x.max(axis=1)) data = get_cooc(request=request, corpus_id=corpus_id, type="node_link")
matrix_filtered = np.where(x > threshold, 1, 0) return JsonHttpResponse(data)
#matrix_filtered = np.where(x > threshold, x, 0)
G = nx.from_numpy_matrix(matrix_filtered)
G = nx.relabel_nodes(G, dict(enumerate([ labels[x] for x in list(df.columns)])))
#G = nx.relabel_nodes(G, dict(enumerate(df.columns)))
# Removing too connected nodes (find automatic way to do it)
# outdeg = G.degree()
# to_remove = [n for n in outdeg if outdeg[n] >= 10]
# G.remove_nodes_from(to_remove)
partition = best_partition(G) def adjacency(request, corpus_id):
'''
for node in G.nodes(): Create the HttpResponse object with the adjacency dataset.
try: '''
#node,type(labels[node]) data = get_cooc(request=request, corpus_id=corpus_id, type="adjacency")
G.node[node]['label'] = node
# G.node[node]['color'] = '19,180,300'
except Exception as error:
print(error)
data = json_graph.node_link_data(G)
# data = json_graph.node_link_data(G, attrs={\
# 'source':'source',\
# 'target':'target',\
# 'weight':'weight',\
# #'label':'label',\
# #'color':'color',\
# 'id':'id',})
#print(data)
return JsonHttpResponse(data) return JsonHttpResponse(data)
...@@ -555,7 +505,6 @@ def graph_it(request): ...@@ -555,7 +505,6 @@ def graph_it(request):
})) }))
return HttpResponse(html) return HttpResponse(html)
def ngrams(request): def ngrams(request):
'''The ngrams list.''' '''The ngrams list.'''
t = get_template('ngrams.html') t = get_template('ngrams.html')
......
...@@ -4,23 +4,30 @@ Install the requirements ...@@ -4,23 +4,30 @@ Install the requirements
1) Install all the Debian packages listed in dependances.deb 1) Install all the Debian packages listed in dependances.deb
(also: sudo apt-get install postgresql-contrib) (also: sudo apt-get install postgresql-contrib)
2) Create a virtual enironnement with pyvenv: apt-get install python-virtualenv 2) Create a Pythton virtual enironnement
3) Type: source [your virtual environment directory]/bin/activate On Debian:
---------
4) Do your work! sudo apt-get install python3.4-venv
pyvenv3 /srv/gargantext_env
5) Type: deactivate On ubuntu:
---------
sudo apt-get install python-pip
sudo pip install -U pip
sudo pip install -U virtualenv
ensuite tu peux créer ton virtualenv dans le dossier de travail ou à un
endroit choisi :
Configure stuff sudo virtualenv -p python3 /srv/gargantext_env
---------------
1) ln -s [the project folder] /srv/gargantext 3) Type: source [your virtual environment directory]/bin/activate
2) ln -s [your folder for tree tagger] [the project folder]/parsing/Tagger/treetagger 4) sudo chown -R user:user /srv/gargantext_env
pip install -r /srv/gargantext/init/requirements.txt
Warning: for ln, path has to be absolute! 5) Type: deactivate
In PostreSQL In PostreSQL
...@@ -52,6 +59,36 @@ Populate the database ...@@ -52,6 +59,36 @@ Populate the database
python manage.py syncdb python manage.py syncdb
Last steps of configuration:
----------------------------
1) If your project is not in /srv/gargantext:
ln -s [the project folder] /srv/gargantext
2) build gargantext_lib
wget http://docs.delanoe.org/gargantext_lib.tar.bz2
cd /srv/
sudo tar xvjf gargantext_lib.tar.bz2
sudo chown user:user /srv/gargantext_lib
3) Explorer:
cd /srv/gargantext_lib/js
git clone git@github.com:PkSM3/garg.git
4) Adapt all symlinks:
ln -s [your folder for tree tagger] [the project folder]/parsing/Tagger/treetagger
Warning: for ln, path has to be absolute!
5) patch CTE
patch /srv/gargantext_env/lib/python3.4/site-packages/cte_tree/models.py /srv/gargantext/init/cte_tree.models.diff
6) init nodetypes and main variables
/srv/gargantext/manage.py shell < /srv/gargantext/init/init.py
Extras:
======
Start the Python Notebook server Start the Python Notebook server
-------------------------------- --------------------------------
...@@ -65,4 +102,4 @@ Start the Django server ...@@ -65,4 +102,4 @@ Start the Django server
----------------------- -----------------------
In Pyvenv: In Pyvenv:
python manage.py runserver python manage.py runserver
\ No newline at end of file
sudo apt-get install postgresql
sudo apt-get install postgresql-contrib
sudo apt-get install python-virtualenv sudo apt-get install python-virtualenv
sudo apt-cache search libpng
sudo apt-get install libpng12-dev sudo apt-get install libpng12-dev
sudo apt-get install libpng-dev sudo apt-get install libpng-dev
apt-cache search freetype sudo apt-cache search freetype
apt-cache search freetype | grep dev
sudo apt-get install libfreetype6-dev sudo apt-get install libfreetype6-dev
sudo apt-cache search python-dev sudo apt-cache search python-dev
sudo apt-get install python-dev sudo apt-get install python-dev
sudo apt-get install libpq-dev sudo apt-get install libpq-dev
sudo apt-get postgresql-contrib
sudo aptèget install libpq-dev
# Pour avoir toutes les dependences de matpolotlib (c'est sale, trouver
sudo apt-get build-dep python-matplotlib
postgresql-contrib
libpq-dev
# rajouter david
#
#
#Paquets Debian a installer #Paquets Debian a installer
# easy_install -U distribute (matplotlib) # easy_install -U distribute (matplotlib)
#lxml #lxml
libffi-dev sudo apt-get install libffi-dev
libxml2-dev sudo apt-get install libxml2-dev
libxslt1-dev sudo apt-get install libxslt1-dev
# ipython readline # ipython readline
libncurses5-dev sudo apt-get install libncurses5-dev
pandoc sudo apt-get install pandoc
# scipy: # scipy:
gfortran sudo apt-get install gfortran
libopenblas-dev sudo apt-get install libopenblas-dev
liblapack-dev sudo apt-get install liblapack-dev
...@@ -82,6 +82,13 @@ except Exception as error: ...@@ -82,6 +82,13 @@ except Exception as error:
typeDoc = NodeType(name='Synonyme') typeDoc = NodeType(name='Synonyme')
typeDoc.save() typeDoc.save()
try:
typeDoc = NodeType.objects.get(name='Cooccurrence')
except Exception as error:
print(error)
typeDoc = NodeType(name='Cooccurrence')
typeDoc.save()
# In[33]: # In[33]:
......
...@@ -4,13 +4,16 @@ Jinja2==2.7.3 ...@@ -4,13 +4,16 @@ Jinja2==2.7.3
MarkupSafe==0.23 MarkupSafe==0.23
Pillow==2.5.3 Pillow==2.5.3
Pygments==1.6 Pygments==1.6
SQLAlchemy==0.9.8
South==1.0 South==1.0
aldjemy==0.3.51
amqp==1.4.6 amqp==1.4.6
anyjson==0.3.3 anyjson==0.3.3
billiard==3.3.0.18 billiard==3.3.0.18
celery==3.1.15 celery==3.1.15
certifi==14.05.14 certifi==14.05.14
cffi==0.8.6 cffi==0.8.6
chardet==2.3.0
cryptography==0.6 cryptography==0.6
decorator==3.4.0 decorator==3.4.0
django-autoslug==1.7.2 django-autoslug==1.7.2
...@@ -27,9 +30,9 @@ graphviz==0.4 ...@@ -27,9 +30,9 @@ graphviz==0.4
ipython==2.2.0 ipython==2.2.0
kombu==3.0.23 kombu==3.0.23
lxml==3.3.6 lxml==3.3.6
matplotlib==1.4.0 #matplotlib==1.4.0
networkx==1.9 networkx==1.9
nltk==3.0a4 #nltk==3.0a4
nose==1.3.4 nose==1.3.4
numpy==1.8.2 numpy==1.8.2
pandas==0.14.1 pandas==0.14.1
......
...@@ -200,6 +200,7 @@ class Node(CTENode): ...@@ -200,6 +200,7 @@ class Node(CTENode):
self.node_resource.update(parsed=True) self.node_resource.update(parsed=True)
@current_app.task(filter=task_method)
def extract_ngrams(self, keys, ngramsextractorscache=None, ngramscaches=None): def extract_ngrams(self, keys, ngramsextractorscache=None, ngramscaches=None):
# if there is no cache... # if there is no cache...
if ngramsextractorscache is None: if ngramsextractorscache is None:
......
import re import re
import locale import locale
from lxml import etree from lxml import etree
from datetime import datetime, date from datetime import datetime, date
from parsing.FileParsers.FileParser import FileParser from .FileParser import FileParser
from parsing.NgramsExtractors import * from ..NgramsExtractors import *
......
...@@ -3,7 +3,7 @@ import dateutil.parser ...@@ -3,7 +3,7 @@ import dateutil.parser
import zipfile import zipfile
import chardet import chardet
from parsing.Caches import LanguagesCache from ..Caches import LanguagesCache
class FileParser: class FileParser:
......
from parsing.FileParsers.RisFileParser import RisFileParser from .RisFileParser import RisFileParser
class IsiFileParser(RisFileParser): class IsiFileParser(RisFileParser):
......
from django.db import transaction from django.db import transaction
from lxml import etree from lxml import etree
from parsing.FileParsers.FileParser import FileParser from .FileParser import FileParser
from parsing.NgramsExtractors import * from ..NgramsExtractors import *
class PubmedFileParser(FileParser): class PubmedFileParser(FileParser):
......
from django.db import transaction from django.db import transaction
from parsing.FileParsers.FileParser import FileParser from .FileParser import FileParser
class RisFileParser(FileParser): class RisFileParser(FileParser):
......
from parsing.NgramsExtractors.NgramsExtractor import NgramsExtractor from .NgramsExtractor import NgramsExtractor
from parsing.Taggers import NltkTagger from ..Taggers import NltkTagger
class EnglishNgramsExtractor(NgramsExtractor): class EnglishNgramsExtractor(NgramsExtractor):
......
from parsing.Taggers.Tagger import Tagger from .Tagger import Tagger
import nltk import nltk
......
from parsing.Taggers.Tagger import Tagger from .Tagger import Tagger
import subprocess import subprocess
import threading import threading
......
# from NltkTagger import NltkTagger
# tagger = NltkTagger()
# text0 = "Forman Brown (1901–1996) was one of the world's leaders in puppet theatre in his day, as well as an important early gay novelist. He was a member of the Yale Puppeteers and the driving force behind Turnabout Theatre. He was born in Otsego, Michigan, in 1901 and died in 1996, two days after his 95th birthday. Brown briefly taught at North Carolina State College, followed by an extensive tour of Europe."
# text1 = "James Patrick (born c. 1940) is the pseudonym of a Scottish sociologist, which he used to publish a book A Glasgow Gang Observed. It attracted some attention in Scotland when it was published in 1973. It was based on research he had done in 1966, when he was aged 26. At that time he was working as a teacher in an Approved School, a Scottish reformatory. One gang member in the school, \"Tim Malloy\" (born 1950, also a pseudonym and a generic term for a Glasgow Catholic), agreed to infiltrate him into his gang in Maryhill in Glasgow. Patrick spent four months as a gang member, observing their behaviour."
from TreeTagger import TreeTagger
tagger = TreeTagger()
text0 = "La saison 1921-1922 du Foot-Ball Club Juventus est la vingtième de l'histoire du club, créé vingt-cinq ans plus tôt en 1897. La société turinoise qui fête cette année son 25e anniversaire prend part à l'édition du championnat dissident d'Italie de la CCI (appelé alors la Première division), la dernière édition d'une compétition annuelle de football avant l'ère fasciste de Mussolini."
text1 = "Le terme oblong désigne une forme qui est plus longue que large et dont les angles sont arrondis. En langage bibliographique, oblong signifie un format dont la largeur excède la hauteur. Ce qui correspond au format paysage en termes informatiques et \"à l'italienne\", pour l'imprimerie."
text2 = "Les sanglots longs des violons de l'automne bercent mon coeur d'une langueur monotone."
print()
print(tagger.tag_text(text0))
print()
print(tagger.tag_text(text1))
print()
print(tagger.tag_text(text2))
print()
\ No newline at end of file
/* Copyright 2013 Michael Bostock. All rights reserved. Do not copy. */
/*@import url(http://fonts.googleapis.com/css?family=PT+Serif|PT+Serif:b|PT+Serif:i|PT+Sans|PT+Sans:b);*/
html {
min-width: 1040px;
}
.ocks-org body {
background: #fcfcfa;
color: #333;
font-family: "PT Serif", serif;
margin: 1em auto 4em auto;
position: relative;
width: 960px;
}
.ocks-org header,
.ocks-org footer,
.ocks-org aside,
.ocks-org h1,
.ocks-org h2,
.ocks-org h3,
.ocks-org h4 {
font-family: "PT Sans", sans-serif;
}
.ocks-org h1,
.ocks-org h2,
.ocks-org h3,
.ocks-org h4 {
color: #000;
}
.ocks-org header,
.ocks-org footer {
color: #636363;
}
h1 {
font-size: 64px;
font-weight: 300;
letter-spacing: -2px;
margin: .3em 0 .1em 0;
}
h2 {
margin-top: 2em;
}
h1, h2 {
text-rendering: optimizeLegibility;
}
h2 a[name],
h2 a[id] {
color: #ccc;
right: 100%;
padding: 0 .3em;
position: absolute;
}
header,
footer {
font-size: small;
}
.ocks-org header aside,
.ocks-org footer aside {
float: left;
margin-right: .5em;
}
.ocks-org header aside:after,
.ocks-org footer aside:after {
padding-left: .5em;
content: "/";
}
footer {
margin-top: 8em;
}
h1 ~ aside {
font-size: small;
right: 0;
position: absolute;
width: 180px;
}
.attribution {
font-size: small;
margin-bottom: 2em;
}
body > p, li > p {
line-height: 1.5em;
}
body > p {
width: 720px;
}
body > blockquote {
width: 640px;
}
blockquote q {
display: block;
font-style: oblique;
}
ul {
padding: 0;
}
li {
width: 690px;
margin-left: 30px;
}
a {
color: steelblue;
}
a:not(:hover) {
text-decoration: none;
}
pre, code, textarea {
font-family: "Menlo", monospace;
}
code {
line-height: 1em;
}
textarea {
font-size: 100%;
}
body > pre {
border-left: solid 2px #ccc;
padding-left: 18px;
margin: 2em 0 2em -20px;
}
.html .value,
.javascript .string,
.javascript .regexp {
color: #756bb1;
}
.html .tag,
.css .tag,
.javascript .keyword {
color: #3182bd;
}
.comment {
color: #636363;
}
.html .doctype,
.javascript .number {
color: #31a354;
}
.html .attribute,
.css .attribute,
.javascript .class,
.javascript .special {
color: #e6550d;
}
svg {
font: 10px sans-serif;
}
.axis path, .axis line {
fill: none;
stroke: #000;
shape-rendering: crispEdges;
}
sup, sub {
line-height: 0;
}
q:before {
content: "“";
}
q:after {
content: "”";
}
blockquote q {
line-height: 1.5em;
display: inline;
}
blockquote q:before,
blockquote q:after {
content: "";
}
...@@ -127,8 +127,8 @@ ...@@ -127,8 +127,8 @@
<div class="jumbotron"> <div class="jumbotron">
<h3><a href="/corpus/{{ corpus.id }}/explorer">Visualizations</a></h3> <h3><a href="/corpus/{{ corpus.id }}/explorer">Visualizations</a></h3>
<ol> <ol>
<li>Matrix</li> <li><a href="/corpus/{{ corpus.id }}/matrix">Adjacency matrix</a></li>
<li>Static maps</li> <li><a href="/corpus/{{ corpus.id }}/explorer">Static maps</a></li>
<li>Dynamics maps</li> <li>Dynamics maps</li>
</ol> </ol>
</div> </div>
......
...@@ -120,22 +120,19 @@ ...@@ -120,22 +120,19 @@
</ul> </ul>
<!--
<ul id="colorGraph" class="nav navbar-nav navbar-right"> <ul id="colorGraph" class="nav navbar-nav navbar-right">
<li class="dropdown"> <li class="dropdown">
<a href="#" class="dropdown-toggle" data-toggle="dropdown"> <a href="#" class="dropdown-toggle" data-toggle="dropdown">
<img title="Set Colors" src="{% static "js/libs/img2/colors.png" %}" width="20px"><b class="caret"></b></img> <img title="Set Colors" src="{% static "js/libs/img2/colors.png" %}" width="20px"><b class="caret"></b></img>
</a> </a>
<ul class="dropdown-menu"> <ul class="dropdown-menu">
<li><a href="#" onclick='clustersBy("country")'>By Country</a></li> <li><a href="#" onclick='clustersBy("group")'>By Group</a></li>
<li><a href="#" onclick='clustersBy("acronym")'>By Acronym</a></li>
<li><a href="#" onclick='clustersBy("default")'>By Default</a></li> <li><a href="#" onclick='clustersBy("default")'>By Default</a></li>
<li class="divider"></li>
<li><a href="#"> <span class="glyphicon glyphicon-repeat"></span> <strike>Properties</strike></a></li>
</ul> </ul>
</li> </li>
</ul> </ul>
--> <!---->
...@@ -361,7 +358,7 @@ ...@@ -361,7 +358,7 @@
<script type="text/javascript" src="{% static "js/libs/jquery/jquery.easytabs.min.js" %}"></script> <script type="text/javascript" src="{% static "js/libs/jquery/jquery.easytabs.min.js" %}"></script>
<script src="{% static "js/libs/bootstrap/js/bootstrap.min.js" %}"></script> <!--<script src="{% static "js/libs/bootstrap/js/bootstrap.min.js" %}"></script>-->
<script src="{% static "js/libs/bootstrap/js/bootstrap-modal.js" %}" type="text/javascript"></script> <script src="{% static "js/libs/bootstrap/js/bootstrap-modal.js" %}" type="text/javascript"></script>
<script src="{% static "js/libs/bootstrap/js/bootstrap-hover-dropdown.min.js" %}" type="text/javascript"></script> <script src="{% static "js/libs/bootstrap/js/bootstrap-hover-dropdown.min.js" %}" type="text/javascript"></script>
......
...@@ -5,6 +5,22 @@ ...@@ -5,6 +5,22 @@
{% load staticfiles %} {% load staticfiles %}
<link rel="stylesheet" href="{% static "css/bootstrap.css" %}"> <link rel="stylesheet" href="{% static "css/bootstrap.css" %}">
<link rel="stylesheet" href="{% static "css/bootstrap-theme.min.css" %}"> <link rel="stylesheet" href="{% static "css/bootstrap-theme.min.css" %}">
<link rel="stylesheet" href="{% static "css/d3matrix.css" %}">
<style>
.background {
fill: #eee;
}
line {
stroke: #fff;
}
text.active {
fill: red;
}
</style>
{% endblock %} {% endblock %}
...@@ -17,7 +33,11 @@ ...@@ -17,7 +33,11 @@
</div> </div>
</div> </div>
<script src="{% static "js/d3/d3.v2.min.js" %}></script> <div id="graphid" style="visibility: hidden;">/corpus/{{ corpus.id }}/adjacency.json</div>
<script src="{% static "js/jquery/jquery.min.js" %}" type="text/javascript"></script>
<script src="{% static "js/d3/d3.v2.min.js" %}"></script>
<p>Order: <select id="order"> <p>Order: <select id="order">
<option value="name">by Name</option> <option value="name">by Name</option>
...@@ -39,11 +59,12 @@ var x = d3.scale.ordinal().rangeBands([0, width]), ...@@ -39,11 +59,12 @@ var x = d3.scale.ordinal().rangeBands([0, width]),
var svg = d3.select("body").append("svg") var svg = d3.select("body").append("svg")
.attr("width", width + margin.left + margin.right) .attr("width", width + margin.left + margin.right)
.attr("height", height + margin.top + margin.bottom) .attr("height", height + margin.top + margin.bottom)
.style("margin-left", -margin.left + "px") //.style("margin-left", -margin.left + "px")
.append("g") .append("g")
.attr("transform", "translate(" + margin.left + "," + margin.top + ")"); .attr("transform", "translate(" + margin.left + "," + margin.top + ")");
d3.json("{% static "img/miserables.json" %}, function(miserables) { var filename = document.getElementById("graphid").innerHTML
d3.json(filename, function(miserables) {
var matrix = [], var matrix = [],
nodes = miserables.nodes, nodes = miserables.nodes,
n = nodes.length; n = nodes.length;
...@@ -57,12 +78,12 @@ var svg = d3.select("body").append("svg") ...@@ -57,12 +78,12 @@ var svg = d3.select("body").append("svg")
// Convert links to matrix; count character occurrences. // Convert links to matrix; count character occurrences.
miserables.links.forEach(function(link) { miserables.links.forEach(function(link) {
matrix[link.source][link.target].z += link.value; matrix[link.source][link.target].z += link.weight;
matrix[link.target][link.source].z += link.value; matrix[link.target][link.source].z += link.weight;
matrix[link.source][link.source].z += link.value; matrix[link.source][link.source].z += link.weight;
matrix[link.target][link.target].z += link.value; matrix[link.target][link.target].z += link.weight;
nodes[link.source].count += link.value; nodes[link.source].count += link.weight;
nodes[link.target].count += link.value; nodes[link.target].count += link.weight;
}); });
// Precompute the orders. // Precompute the orders.
......
...@@ -146,7 +146,7 @@ ...@@ -146,7 +146,7 @@
data: [ data: [
{% if donut %} {% if donut %}
{% for part in donut %} {% for part in donut %}
{label: '{{ part.source }}, {{part.count}} docs', value: {{ part.part }} }, {label: '{{ part.source }}', value: {{ part.part }} },
{% endfor %} {% endfor %}
{% endif %} {% endif %}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment