Commit 06a5ba47 authored by PkSM3's avatar PkSM3

[MERGE] maybe this works?

parents f9a08e53 b57ae7fe
......@@ -82,7 +82,7 @@ class NodesChildrenNgrams(APIView):
def get(self, request, node_id):
# query ngrams
ParentNode = aliased(Node)
ngrams_query = (Ngram
ngrams_query = (session
.query(Ngram.terms, func.count().label('count'))
# .query(Ngram.id, Ngram.terms, func.count().label('count'))
.join(Node_Ngram, Node_Ngram.ngram_id == Ngram.id)
......@@ -128,7 +128,7 @@ class NodesChildrenDuplicates(APIView):
raise APIException('Missing GET parameter: "keys"', 400)
keys = request.GET['keys'].split(',')
# metadata retrieval
metadata_query = (Metadata
metadata_query = (session
.query(Metadata)
.filter(Metadata.name.in_(keys))
)
......@@ -213,7 +213,7 @@ class NodesChildrenMetatadata(APIView):
# query metadata keys
ParentNode = aliased(Node)
metadata_query = (Metadata
metadata_query = (session
.query(Metadata)
.join(Node_Metadata, Node_Metadata.metadata_id == Metadata.id)
.join(Node, Node.id == Node_Metadata.node_id)
......@@ -233,7 +233,7 @@ class NodesChildrenMetatadata(APIView):
values_to = None
if metadata.type != 'text':
value_column = getattr(Node_Metadata, 'value_' + metadata.type)
node_metadata_query = (Node_Metadata
node_metadata_query = (session
.query(value_column)
.join(Node, Node.id == Node_Metadata.node_id)
.filter(Node.parent_id == node_id)
......@@ -381,9 +381,9 @@ class NodesChildrenQueries(APIView):
for field_name in fields_names:
split_field_name = field_name.split('.')
if split_field_name[0] == 'metadata':
metadata = Metadata.query(Metadata).filter(Metadata.name == split_field_name[1]).first()
metadata = session.query(Metadata).filter(Metadata.name == split_field_name[1]).first()
if metadata is None:
metadata_query = Metadata.query(Metadata.name).order_by(Metadata.name)
metadata_query = session.query(Metadata.name).order_by(Metadata.name)
metadata_names = [metadata.name for metadata in metadata_query.all()]
raise APIException('Invalid key for "%s" in parameter "field", should be one of the following values: "%s". "%s" was found instead' % (field[0], '", "'.join(metadata_names), field[1]), 400)
# check or create Node_Metadata alias; join if necessary
......@@ -422,7 +422,7 @@ class NodesChildrenQueries(APIView):
)
# starting the query!
document_type_id = NodeType.query(NodeType.id).filter(NodeType.name == 'Document').scalar()
document_type_id = session.query(NodeType.id).filter(NodeType.name == 'Document').scalar()
query = (session
.query(*fields_list)
.select_from(Node)
......@@ -451,9 +451,9 @@ class NodesChildrenQueries(APIView):
#
if field[0] == 'metadata':
# which metadata?
metadata = Metadata.query(Metadata).filter(Metadata.name == field[1]).first()
metadata = session.query(Metadata).filter(Metadata.name == field[1]).first()
if metadata is None:
metadata_query = Metadata.query(Metadata.name).order_by(Metadata.name)
metadata_query = session.query(Metadata.name).order_by(Metadata.name)
metadata_names = [metadata.name for metadata in metadata_query.all()]
raise APIException('Invalid key for "%s" in parameter "field", should be one of the following values: "%s". "%s" was found instead' % (field[0], '", "'.join(metadata_names), field[1]), 400)
# check or create Node_Metadata alias; join if necessary
......@@ -475,7 +475,7 @@ class NodesChildrenQueries(APIView):
))
elif field[0] == 'ngrams':
query = query.filter(
Node.id.in_(Node_Metadata
Node.id.in_(session
.query(Node_Ngram.node_id)
.filter(Node_Ngram.ngram_id == Ngram.id)
.filter(operator(
......@@ -551,7 +551,7 @@ class NodesChildrenQueries(APIView):
class NodesList(APIView):
def get(self, request):
query = (Node
query = (session
.query(Node.id, Node.name, NodeType.name.label('type'))
.filter(Node.user_id == request.session._session_cache['_auth_user_id'])
.join(NodeType)
......@@ -626,7 +626,7 @@ class CorpusController:
# build query
ParentNode = aliased(Node)
query = (Ngram
query = (session
.query(Ngram.terms, func.count('*'))
.join(Node_Ngram, Node_Ngram.ngram_id == Ngram.id)
.join(Node, Node.id == Node_Ngram.node_id)
......
from node import models
from gargantext_web import settings
from node import models
__all__ = ['literalquery', 'session', 'cache', 'Session', 'bulk_insert', 'engine', 'get_cursor']
__all__ = ['literalquery', 'session', 'cache']
# initialize sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine
engine = create_engine('postgresql+psycopg2://{USER}:{PASSWORD}@{HOST}/{NAME}'.format(
**settings.DATABASES['default']
))
Base = automap_base()
Base.prepare(engine, reflect=True)
# model representation
def model_repr(modelname):
def _repr(obj):
result = '<' + modelname
isfirst = True
for key, value in obj.__dict__.items():
if key[0] != '_':
value = repr(value)
if len(value) > 64:
value = value[:30] + '....' + value[-30:]
if isfirst:
isfirst = False
else:
result += ','
result += ' ' + key + '=' + value
result += '>'
return result
return _repr
# map the Django models found in node.models to SQLAlchemy models
for model_name, model in models.__dict__.items():
if hasattr(model, 'sa'):
globals()[model_name] = model.sa
__all__.append(model_name)
if hasattr(model, '_meta'):
table_name = model._meta.db_table
if hasattr(Base.classes, table_name):
sqla_model = getattr(Base.classes, table_name)
setattr(sqla_model, '__repr__', model_repr(model_name))
globals()[model_name] = sqla_model
__all__.append(model_name)
NodeNgram = Node_Ngram
......@@ -61,16 +97,17 @@ def literalquery(statement, dialect=None):
# SQLAlchemy session management
def get_sessionmaker():
from django.db import connections
from sqlalchemy.orm import sessionmaker
def get_engine():
from sqlalchemy import create_engine
alias = 'default'
connection = connections[alias]
url = 'postgresql+psycopg2://{USER}:{PASSWORD}@{HOST}/{NAME}'.format(
**settings.DATABASES['default']
)
engine = create_engine(url, use_native_hstore=True)
return create_engine(url, use_native_hstore=True)
engine = get_engine()
def get_sessionmaker():
from sqlalchemy.orm import sessionmaker
return sessionmaker(bind=engine)
Session = get_sessionmaker()
......@@ -84,7 +121,7 @@ from sqlalchemy import or_
class ModelCache(dict):
def __init__(self, model, preload=False):
self._model = model.sa
self._model = globals()[model.__name__]
self._columns_names = [column.name for column in model._meta.fields if column.unique]
self._columns = [getattr(self._model, column_name) for column_name in self._columns_names]
self._columns_validators = []
......@@ -92,20 +129,17 @@ class ModelCache(dict):
self.preload()
def __missing__(self, key):
for column in self._columns:
conditions = []
try:
formatted_key = column.type.python_type(key)
conditions.append(column == key)
except ValueError:
pass
if formatted_key in self:
self[key] = self[formatted_key]
else:
element = session.query(self._model).filter(or_(*conditions)).first()
if element is None:
raise KeyError
self[key] = element
conditions = [
(column == key)
for column in self._columns
if key.__class__ == column.type.python_type
]
if len(conditions) == 0:
raise KeyError
element = session.query(self._model).filter(or_(*conditions)).first()
if element is None:
raise KeyError
self[key] = element
return element
def preload(self):
......@@ -127,3 +161,48 @@ class Cache:
return modelcache
cache = Cache()
# Insert many elements at once
import psycopg2
def get_cursor():
db_settings = settings.DATABASES['default']
db = psycopg2.connect(**{
'database': db_settings['NAME'],
'user': db_settings['USER'],
'password': db_settings['PASSWORD'],
'host': db_settings['HOST'],
})
return db, db.cursor()
class bulk_insert:
def __init__(self, table, keys, data, cursor=None):
# prepare the iterator
self.iter = iter(data)
# template
self.template = '%s' + (len(keys) - 1) * '\t%s' + '\n'
# prepare the cursor
if cursor is None:
db, cursor = get_cursor()
mustcommit = True
else:
mustcommit = False
# insert data
if not isinstance(table, str):
table = table.__table__.name
cursor.copy_from(self, table, columns=keys)
# commit if necessary
if mustcommit:
db.commit()
def read(self, size=None):
try:
return self.template % next(self.iter)
except StopIteration:
return ''
readline = read
......@@ -3,7 +3,7 @@ from django.conf.urls import patterns, include, url
from django.contrib import admin
from django.contrib.auth.views import login
from gargantext_web import views
from gargantext_web import views, views_optimized
import gargantext_web.api
import scrap_pubmed.views as pubmedscrapper
......@@ -31,7 +31,7 @@ urlpatterns = patterns('',
# Project Management
url(r'^projects/$', views.projects),
url(r'^project/(\d+)/delete/$', views.delete_project),
url(r'^project/(\d+)/$', views.project),
url(r'^project/(\d+)/$', views_optimized.project),
# Corpus management
url(r'^project/(\d+)/corpus/(\d+)/$', views.corpus),
......
from django.shortcuts import redirect
from django.shortcuts import render
from django.http import Http404, HttpResponse, HttpResponseRedirect
from sqlalchemy import func
from sqlalchemy.orm import aliased
from collections import defaultdict
from datetime import datetime
from node.admin import CustomForm
from gargantext_web.db import *
from gargantext_web.settings import DEBUG, MEDIA_ROOT
from parsing.corpustools import add_resource, parse_resources, extract_ngrams, compute_tfidf
def project(request, project_id):
# SQLAlchemy session
session = Session()
# do we have a valid project id?
try:
project_id = int(project_id)
except ValueError:
raise Http404()
# do we have a valid project?
project = (session
.query(Node)
.filter(Node.id == project_id)
.filter(Node.type_id == cache.NodeType['Project'].id)
).first()
if project is None:
raise Http404()
# do we have a valid user?
user = request.user
if not user.is_authenticated():
return redirect('/login/?next=%s' % request.path)
if project.user_id != user.id:
return HttpResponseForbidden()
# Let's find out about the children nodes of the project
ChildrenNode = aliased(Node)
corpus_query = (session
.query(Node, Resource, func.count(ChildrenNode.id))
.outerjoin(ChildrenNode, ChildrenNode.parent_id == Node.id)
.outerjoin(Node_Resource, Node_Resource.node_id == Node.id)
.outerjoin(Resource, Resource.id == Node_Resource.resource_id)
.filter(Node.parent_id == project.id)
.group_by(Node, Resource)
.order_by(Node.name)
)
corpora_by_resourcetype = defaultdict(list)
documents_count_by_resourcetype = defaultdict(int)
corpora_count = 0
for corpus, resource, document_count in corpus_query:
if resource is None:
resourcetype_name = '(no resource)'
else:
resourcetype = cache.ResourceType[resource.type_id]
resourcetype_name = resourcetype.name
corpora_by_resourcetype[resourcetype_name].append({
'id': corpus.id,
'name': corpus.name,
'count': document_count,
})
documents_count_by_resourcetype[resourcetype_name] += document_count
corpora_count += 1
# do the donut
total_documents_count = sum(documents_count_by_resourcetype.values())
donut = [
{ 'source': key,
'count': value,
'part' : round(value * 100 / total_documents_count) if total_documents_count else 0,
}
for key, value in documents_count_by_resourcetype.items()
]
# deal with the form
if request.method == 'POST':
# fomr validation
form = CustomForm(request.POST, request.FILES)
if form.is_valid():
# extract information from the form
name = form.cleaned_data['name']
thefile = form.cleaned_data['file']
resourcetype = cache.ResourceType[form.cleaned_data['type']]
# which default language shall be used?
if resourcetype.name == "europress_french":
language_id = cache.Language['fr'].id
elif resourcetype.name == "europress_english":
language_id = cache.Language['en'].id
else:
language_id = None
# corpus node instanciation as a Django model
corpus = Node(
name = name,
user_id = request.user.id,
parent_id = project_id,
type_id = cache.NodeType['Corpus'].id,
language_id = language_id,
)
session.add(corpus)
session.commit()
# save the uploaded file
filepath = '%s/corpora/%s/%s' % (MEDIA_ROOT, request.user.username, thefile._name)
f = open(filepath, 'wb')
f.write(thefile.read())
f.close()
# add the uploaded resource to the corpus
add_resource(corpus,
user_id = request.user.id,
type_id = resourcetype.id,
file = filepath,
)
# let's start the workflow
try:
parse_resources(corpus)
extract_ngrams(corpus, ['title'])
compute_tfidf(corpus)
except Exception as error:
print('WORKFLOW ERROR')
print(error)
# redirect to the main project page
return HttpResponseRedirect('/project/' + str(project_id))
else:
print('ERROR: BAD FORM')
else:
form = CustomForm()
# HTML output
return render(request, 'project.html', {
'form' : form,
'user' : user,
'date' : datetime.now(),
'project' : project,
'donut' : donut,
'list_corpora' : dict(corpora_by_resourcetype),
'whitelists' : '',
'blacklists' : '',
'cooclists' : '',
'number' : corpora_count,
})
......@@ -70,7 +70,7 @@ class Resource(models.Model):
return self.file
class NodeType(models.Model):
name = models.CharField(max_length=200, unique=True)
name = models.CharField(max_length=255, unique=True)
def __str__(self):
return self.name
......@@ -111,7 +111,7 @@ class NodeManager(CTENodeManager):
return getattr(self.get_queryset(), name, *args)
class Metadata(models.Model):
name = models.CharField(max_length=32, db_index=True)
name = models.CharField(max_length=32, unique=True)
type = models.CharField(max_length=16, db_index=True)
class Node(CTENode):
......@@ -120,7 +120,7 @@ class Node(CTENode):
user = models.ForeignKey(User)
type = models.ForeignKey(NodeType)
name = models.CharField(max_length=200)
name = models.CharField(max_length=255)
language = models.ForeignKey(Language, blank=True, null=True, on_delete=models.SET_NULL)
......@@ -189,7 +189,7 @@ class Node(CTENode):
for i, metadata_values in enumerate(metadata_list):
if verbose:
print(i, end='\r', flush=True)
name = metadata_values.get('title', '')[:200]
name = metadata_values.get('title', '')[:255]
language = langages_cache[metadata_values['language_iso2']] if 'language_iso2' in metadata_values else None,
if isinstance(language, tuple):
language = language[0]
......
......@@ -48,11 +48,9 @@ class EuropressFileParser(FileParser):
print(error)
except:
return []
except Exception as error:
print(error)
# initialize the list of metadata
metadata_list = []
# parse all the articles, one by one
try:
for html_article in html_articles:
......@@ -201,16 +199,9 @@ class EuropressFileParser(FileParser):
#metadata_str = {}
for key, value in metadata.items():
metadata[key] = value.decode() if isinstance(value, bytes) else value
metadata_list.append(metadata)
yield metadata
count += 1
except Exception as error:
print(error)
pass
# from pprint import pprint
# pprint(metadata_list)
# return []
return metadata_list
......@@ -103,15 +103,21 @@ class FileParser:
zipArchive = zipfile.ZipFile(file)
for filename in zipArchive.namelist():
try:
metadata_list += self.parse(zipArchive.open(filename, "r"))
f = zipArchive.open(filename, 'r')
metadata_list += self.parse(f)
f.close()
except Exception as error:
print(error)
# ...otherwise, let's parse it directly!
else:
try:
metadata_list += self._parse(file)
for metadata in self._parse(file):
metadata_list.append(self.format_metadata(metadata))
if hasattr(file, 'close'):
file.close()
except Exception as error:
print(error)
# return the list of formatted metadata
return map(self.format_metadata, metadata_list)
return metadata_list
......@@ -2,22 +2,14 @@ from django.db import transaction
from lxml import etree
from .FileParser import FileParser
from ..NgramsExtractors import *
from datetime import datetime
from io import BytesIO
class PubmedFileParser(FileParser):
def _parse(self, file):
# open the file as XML
xml_parser = etree.XMLParser(resolve_entities=False, recover=True)
xml = ""
if type(file)==bytes: xml = etree.parse( BytesIO(file) , parser=xml_parser)
else: xml = etree.parse(file, parser=xml_parser)
xml = etree.parse(file, parser=xml_parser)
xml_articles = xml.findall('PubmedArticle')
# initialize the list of metadata
metadata_list = []
# parse all the articles, one by one
for xml_article in xml_articles:
# extract data from the document
......@@ -25,14 +17,9 @@ class PubmedFileParser(FileParser):
metadata_path = {
"journal" : 'MedlineCitation/Article/Journal/Title',
"title" : 'MedlineCitation/Article/ArticleTitle',
# "abstract" : 'MedlineCitation/Article/Abstract/AbstractText',
"title2" : 'MedlineCitation/Article/VernacularTitle',
"language_iso3" : 'MedlineCitation/Article/Language',
"doi" : 'PubmedData/ArticleIdList/ArticleId[@type=doi]',
"realdate_full_" : 'MedlineCitation/Article/Journal/JournalIssue/PubDate/MedlineDate',
"realdate_year_" : 'MedlineCitation/Article/Journal/JournalIssue/PubDate/Year',
"realdate_month_" : 'MedlineCitation/Article/Journal/JournalIssue/PubDate/Month',
"realdate_day_" : 'MedlineCitation/Article/Journal/JournalIssue/PubDate/Day',
"abstract" : 'MedlineCitation/Article/Abstract/AbstractText',
"publication_year" : 'MedlineCitation/DateCreated/Year',
"publication_month" : 'MedlineCitation/DateCreated/Month',
"publication_day" : 'MedlineCitation/DateCreated/Day',
......@@ -41,7 +28,6 @@ class PubmedFileParser(FileParser):
for key, path in metadata_path.items():
try:
xml_node = xml_article.find(path)
# Authors tag
if key == 'authors':
metadata[key] = ', '.join([
xml_author.find('ForeName').text + ' ' + xml_author.find('LastName').text
......@@ -49,61 +35,6 @@ class PubmedFileParser(FileParser):
])
else:
metadata[key] = xml_node.text
except:
pass
#Title-Decision
Title=""
if not metadata["title"] or metadata["title"]=="":
if "title2" in metadata:
metadata["title"] = metadata["title2"]
else: metadata["title"] = ""
# Date-Decision
# forge.iscpif.fr/issues/1418
RealDate = ""
if "realdate_full_" in metadata:
RealDate = metadata["realdate_full_"]
else:
if "realdate_year_" in metadata: RealDate+=metadata["realdate_year_"]
if "realdate_month_" in metadata: RealDate+=" "+metadata["realdate_month_"]
if "realdate_day_" in metadata: RealDate+=" "+metadata["realdate_day_"]
metadata["realdate_full_"] = RealDate
RealDate = RealDate.split("-")[0]
PubmedDate = ""
if "publication_year" in metadata: PubmedDate+=metadata["publication_year"]
if "publication_month" in metadata: PubmedDate+=" "+metadata["publication_month"]
if "publication_day" in metadata: PubmedDate+=" "+metadata["publication_day"]
Decision=""
if len(RealDate)>4:
if len(RealDate)>8:
try: Decision = datetime.strptime(RealDate, '%Y %b %d').date()
except:
try: Decision = datetime.strptime(PubmedDate, '%Y %m %d').date()
except: Decision=False
else:
try: Decision = datetime.strptime(RealDate, '%Y %b').date()
except:
try: Decision = datetime.strptime(PubmedDate, '%Y %m %d').date()
except: Decision=False
else:
try: Decision = datetime.strptime(PubmedDate, '%Y %m %d').date()
except: Decision=False
if Decision!=False:
if "publication_year" in metadata: metadata["publication_year"] = str(Decision.year)
if "publication_month" in metadata: metadata["publication_month"] = str(Decision.month)
if "publication_day" in metadata: metadata["publication_day"] = str(Decision.day)
if "realdate_year_" in metadata: metadata.pop("realdate_year_")
if "realdate_month_" in metadata: metadata.pop("realdate_month_")
if "realdate_day_" in metadata: metadata.pop("realdate_day_")
if "title2" in metadata: metadata.pop("title2")
# print(metadata)
# print("* * * * ** * * * * ")
metadata_list.append(metadata)
# return the list of metadata
return metadata_list
yield metadata
\ No newline at end of file
......@@ -17,42 +17,34 @@ class RisFileParser(FileParser):
}
def _parse(self, file):
metadata_list = []
metadata = {}
last_key = None
last_values = []
# browse every line of the file
for line in file:
if len(line) > 2:
# extract the parameter key
parameter_key = line[:2]
# print(parameter_key)
if parameter_key != b' ' and parameter_key != last_key:
if last_key in self._parameters:
# translate the parameter key
parameter = self._parameters[last_key]
if parameter["type"] == "metadata":
separator = parameter["separator"] if "separator" in parameter else ""
metadata[parameter["key"]] = separator.join(last_values)
elif parameter["type"] == "delimiter":
#language = self._languages_fullname[metadata["language"].lower()]
#print(metadata)
try:
#print("append")
if 'language_fullname' not in metadata.keys():
if 'language_iso3' not in metadata.keys():
if 'language_iso2' not in metadata.keys():
metadata['language_iso2'] = 'en'
metadata_list.append(metadata)
metadata = {}
#print("append succeeded")
except:
pass
if 'language_fullname' not in metadata.keys():
if 'language_iso3' not in metadata.keys():
if 'language_iso2' not in metadata.keys():
metadata['language_iso2'] = 'en'
yield metadata
metadata = {}
last_key = parameter_key
last_values = []
try:
last_values.append(line[3:-1].decode())
except Exception as error:
print(error)
pass
#print(len(metadata_list))
#print(metadata_list)
return metadata_list
# if a metadata object is left in memory, yield it as well
if metadata:
yield metadata
......@@ -13,6 +13,7 @@ class NgramsExtractor:
self.start()
self._label = "NP"
self._rule = self._label + ": " + rule
self._grammar = nltk.RegexpParser(self._rule)
def __del__(self):
self.stop()
......@@ -29,19 +30,8 @@ class NgramsExtractor:
"""
def extract_ngrams(self, contents):
tagged_ngrams = self.tagger.tag_text(contents)
if len(tagged_ngrams)==0: return []
grammar = nltk.RegexpParser(self._rule)
result = []
# try:
grammar_parsed = grammar.parse(tagged_ngrams)
for subtree in grammar_parsed.subtrees():
if subtree.label() == self._label:
result.append(subtree.leaves())
# except Exception as e:
# print("Problem while parsing rule '%s'" % (self._rule, ))
# print(e)
return result
if len(tagged_ngrams):
grammar_parsed = self._grammar.parse(tagged_ngrams)
for subtree in grammar_parsed.subtrees():
if subtree.label() == self._label:
yield subtree.leaves()
from .FrenchNgramsExtractor import FrenchNgramsExtractor
from .TurboNgramsExtractor import TurboNgramsExtractor as EnglishNgramsExtractor
# from parsing.NgramsExtractors.EnglishNgramsExtractor import EnglishNgramsExtractor
# from .EnglishNgramsExtractor import EnglishNgramsExtractor
from .NgramsExtractor import NgramsExtractor
......@@ -71,4 +71,3 @@ class Tagger:
tokens_tags += self.tag_tokens(tokens, False)
self.tagging_end()
return tokens_tags
......@@ -9,15 +9,24 @@ from .settings import implemented_methods
class NLPClient:
def __init__(self):
self._socket = socket.socket(*server_type_client)
self._socket.connect((server_host, server_port))
self._socket = None
for method_name in dir(self):
if method_name[0] != '_':
if method_name.upper() not in implemented_methods:
setattr(self, method_name, self._notimplemented)
def __del__(self):
self._socket.close()
self._disconnect()
def _connect(self):
self._disconnect()
self._socket = socket.socket(*server_type_client)
self._socket.connect((server_host, server_port))
def _disconnect(self):
if self._socket is not None:
self._socket.close()
self._socket = None
def _notimplemented(self, *args, **kwargs):
raise NotImplementedError(
......@@ -51,7 +60,7 @@ class NLPClient:
data += language + '\n'
data += re.sub(r'\n+', '\n', text)
data += '\n\n'
self.__init__()
self._connect()
self._socket.sendall(data.encode())
sentence = []
if keys is None:
......@@ -73,7 +82,6 @@ class NLPClient:
continue
values = line.split('\t')
sentence.append(dict(zip(keys, line.split('\t'))))
self.__del__()
def tokenize(self, text, language='english', asdict=False):
keys = ('token', ) if asdict else None
......
......@@ -4,7 +4,7 @@ import socketserver
# Server parameters
server_host = 'localhost'
server_port = 1234
server_port = 7777
server_type_server = socketserver.TCPServer
server_type_client = socket.AF_INET, socket.SOCK_STREAM
server_timeout = 2.0
......
This diff is collapsed.
# Without this, we couldn't use the Django environment
import os
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "gargantext_web.settings")
os.environ.setdefault("DJANGO_HSTORE_GLOBAL_REGISTER", "False")
# database tools
from gargantext_web.db import *
from parsing.corpustools import *
user = session.query(User).first()
project = session.query(Node).filter(Node.name == 'A').first()
corpus = Node(
parent_id = project.id,
name = 'Test 456',
type_id = cache.NodeType['Corpus'].id,
user_id = user.id,
)
session.add(corpus)
session.commit()
add_resource(corpus,
# file = './data_samples/pubmed_result.xml',
file = './data_samples/pubmed_2013-04-01_HoneyBeesBeeBees.xml',
type_id = cache.ResourceType['pubmed'].id,
)
parse_resources(corpus)
extract_ngrams(corpus, ('title', ))
# print(corpus)
# corpus = session.query(Node).filter(Node.id == 72771).first()
# corpus = session.query(Node).filter(Node.id == 73017).first()
compute_tfidf(corpus)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment