Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
humanities
gargantext
Commits
da2b3252
Commit
da2b3252
authored
Jan 14, 2016
by
delanoe
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[FIX] Remove session with session.remove() when each function is over.
parent
ea335122
Changes
24
Hide whitespace changes
Inline
Side-by-side
Showing
24 changed files
with
221 additions
and
84 deletions
+221
-84
cooccurrences.py
analysis/cooccurrences.py
+1
-0
diachronic_specificity.py
analysis/diachronic_specificity.py
+2
-1
lists.py
analysis/lists.py
+8
-0
views.py
annotations/views.py
+9
-3
celery.py
gargantext_web/celery.py
+3
-2
db.py
gargantext_web/db.py
+9
-0
views.py
gargantext_web/views.py
+42
-22
views_optimized.py
gargantext_web/views_optimized.py
+8
-2
cvalue.py
ngram/cvalue.py
+4
-2
group.py
ngram/group.py
+4
-1
importExport.py
ngram/importExport.py
+2
-4
lists.py
ngram/lists.py
+11
-2
mapList.py
ngram/mapList.py
+4
-4
occurrences.py
ngram/occurrences.py
+5
-1
specificity.py
ngram/specificity.py
+5
-2
stemLem.py
ngram/stemLem.py
+5
-1
stop.py
ngram/stop.py
+5
-2
tfidf.py
ngram/tfidf.py
+11
-3
tools.py
ngram/tools.py
+3
-2
corpustools.py
parsing/corpustools.py
+9
-6
api.py
rest_v1_0/api.py
+37
-10
graph.py
rest_v1_0/graph.py
+4
-1
ngrams.py
rest_v1_0/ngrams.py
+26
-6
views.py
scrappers/scrap_pubmed/views.py
+4
-7
No files found.
analysis/cooccurrences.py
View file @
da2b3252
...
...
@@ -200,3 +200,4 @@ def do_cooc(corpus=None
cooc
=
matrix
cooc
.
save
(
node_cooc
.
id
)
return
(
node_cooc
.
id
)
session
.
remove
()
analysis/diachronic_specificity.py
View file @
da2b3252
...
...
@@ -9,7 +9,6 @@ import numpy as np
import
collections
session
=
get_session
()
def
result2dict
(
query
):
results
=
dict
()
...
...
@@ -27,6 +26,7 @@ def diachronic_specificity(corpus_id, terms, order=True):
Values are measure to indicate diachronic specificity.
Nowadays, the measure is rather simple: distance of frequency of period from mean of frequency of all corpus.
'''
session
=
get_session
()
ngram_frequency_query
=
(
session
.
query
(
Node
.
hyperdata
[
'publication_year'
],
func
.
count
(
'*'
))
.
join
(
NodeNgram
,
Node
.
id
==
NodeNgram
.
node_id
)
...
...
@@ -64,6 +64,7 @@ def diachronic_specificity(corpus_id, terms, order=True):
else
:
return
relative_terms_count
session
.
remove
()
# For tests
# diachronic_specificity(102750, "bayer", order=True)
...
...
analysis/lists.py
View file @
da2b3252
...
...
@@ -76,6 +76,7 @@ class Translations(BaseClass):
self
.
groups
=
defaultdict
(
set
)
for
key
,
value
in
self
.
items
.
items
():
self
.
groups
[
value
]
.
add
(
key
)
session
.
remove
()
elif
isinstance
(
other
,
Translations
):
self
.
items
=
other
.
items
.
copy
()
self
.
groups
=
other
.
groups
.
copy
()
...
...
@@ -128,6 +129,7 @@ class Translations(BaseClass):
(
'node_id'
,
'ngramy_id'
,
'ngramx_id'
,
'score'
),
((
node_id
,
key
,
value
,
1.0
)
for
key
,
value
in
self
.
items
.
items
())
)
session
.
remove
()
class
WeightedMatrix
(
BaseClass
):
...
...
@@ -144,6 +146,7 @@ class WeightedMatrix(BaseClass):
self
.
items
=
defaultdict
(
lambda
:
defaultdict
(
float
))
for
key1
,
key2
,
value
in
self
.
items
.
items
():
self
.
items
[
key1
][
key2
]
=
value
session
.
remove
()
elif
isinstance
(
other
,
WeightedMatrix
):
self
.
items
=
defaultdict
(
lambda
:
defaultdict
(
float
))
for
key1
,
key2
,
value
in
other
:
...
...
@@ -171,6 +174,7 @@ class WeightedMatrix(BaseClass):
(
'node_id'
,
'ngramx_id'
,
'ngramy_id'
,
'score'
),
((
node_id
,
key1
,
key2
,
value
)
for
key1
,
key2
,
value
in
self
)
)
session
.
remove
()
def
__radd__
(
self
,
other
):
result
=
NotImplemented
...
...
@@ -253,6 +257,7 @@ class UnweightedList(BaseClass):
.
filter
(
NodeNgram
.
node_id
==
other
)
)
self
.
items
=
{
row
[
0
]
for
row
in
query
}
session
.
remove
()
elif
isinstance
(
other
,
WeightedList
):
self
.
items
=
set
(
other
.
items
.
keys
())
elif
isinstance
(
other
,
UnweightedList
):
...
...
@@ -337,6 +342,7 @@ class UnweightedList(BaseClass):
(
'node_id'
,
'ngram_id'
,
'weight'
),
((
node_id
,
key
,
1.0
)
for
key
in
self
.
items
)
)
session
.
remove
()
class
WeightedList
(
BaseClass
):
...
...
@@ -351,6 +357,7 @@ class WeightedList(BaseClass):
.
filter
(
NodeNgram
.
node_id
==
other
)
)
self
.
items
=
defaultdict
(
float
,
query
)
session
.
remove
()
elif
isinstance
(
other
,
WeightedList
):
self
.
items
=
other
.
items
.
copy
()
elif
isinstance
(
other
,
UnweightedList
):
...
...
@@ -451,6 +458,7 @@ class WeightedList(BaseClass):
(
'node_id'
,
'ngram_id'
,
'weight'
),
((
node_id
,
key
,
value
)
for
key
,
value
in
self
.
items
.
items
())
)
session
.
remove
()
def
test
():
...
...
annotations/views.py
View file @
da2b3252
...
...
@@ -63,12 +63,12 @@ class NgramEdit(APIView):
"""
renderer_classes
=
(
JSONRenderer
,)
authentication_classes
=
(
SessionAuthentication
,
BasicAuthentication
)
session
=
get_session
()
def
post
(
self
,
request
,
list_id
,
ngram_ids
):
"""
Edit an existing NGram in a given list
"""
session
=
get_session
()
list_id
=
int
(
list_id
)
list_node
=
session
.
query
(
Node
)
.
filter
(
Node
.
id
==
list_id
)
.
first
()
# TODO add 1 for MapList social score ?
...
...
@@ -90,6 +90,8 @@ class NgramEdit(APIView):
'uuid'
:
ngram_id
,
'list_id'
:
list_id
,
}
for
ngram_id
in
ngram_ids
)
session
.
remove
()
def
put
(
self
,
request
,
list_id
,
ngram_ids
):
return
Response
(
None
,
204
)
...
...
@@ -98,6 +100,7 @@ class NgramEdit(APIView):
"""
Delete a ngram from a list
"""
session
=
get_session
()
print
(
"to del"
,
ngram_ids
)
for
ngram_id
in
ngram_ids
.
split
(
'+'
):
print
(
'ngram_id'
,
ngram_id
)
...
...
@@ -128,6 +131,7 @@ class NgramEdit(APIView):
# [ = = = = / del from map-list = = = = ]
return
Response
(
None
,
204
)
session
.
remove
()
class
NgramCreate
(
APIView
):
"""
...
...
@@ -135,7 +139,6 @@ class NgramCreate(APIView):
"""
renderer_classes
=
(
JSONRenderer
,)
authentication_classes
=
(
SessionAuthentication
,
BasicAuthentication
)
session
=
get_session
()
def
post
(
self
,
request
,
list_id
):
"""
...
...
@@ -143,6 +146,7 @@ class NgramCreate(APIView):
example: request.data = {'text': 'phylogeny'}
"""
session
=
get_session
()
list_id
=
int
(
list_id
)
# format the ngram's text
ngram_text
=
request
.
data
.
get
(
'text'
,
None
)
...
...
@@ -177,6 +181,7 @@ class NgramCreate(APIView):
'list_id'
:
list_id
,
})
session
.
remove
()
class
Document
(
APIView
):
"""
...
...
@@ -186,6 +191,7 @@ class Document(APIView):
def
get
(
self
,
request
,
doc_id
):
"""Document by ID"""
session
=
get_session
()
node
=
session
.
query
(
Node
)
.
filter
(
Node
.
id
==
doc_id
)
.
first
()
if
node
is
None
:
raise
APIException
(
'This node does not exist'
,
404
)
...
...
@@ -207,5 +213,5 @@ class Document(APIView):
'id'
:
node
.
id
}
return
Response
(
data
)
session
.
remove
()
gargantext_web/celery.py
View file @
da2b3252
...
...
@@ -20,7 +20,7 @@ def apply_sum(x, y):
print
(
x
+
y
)
session
=
get_session
()
print
(
session
.
query
(
Node
.
name
)
.
first
())
session
.
remove
()
from
parsing.corpustools
import
parse_resources
,
extract_ngrams
#add_resource,
from
ngram.lists
import
ngrams2miam
...
...
@@ -52,7 +52,8 @@ def apply_workflow(corpus_id):
print
(
"End of the Workflow for corpus
%
d"
%
(
corpus_id
))
update_state
.
processing_
(
corpus
,
"0"
)
session
.
remove
()
@
shared_task
def
empty_trash
(
corpus_id
):
...
...
gargantext_web/db.py
View file @
da2b3252
...
...
@@ -168,6 +168,7 @@ class ModelCache(dict):
raise
KeyError
self
[
key
]
=
element
return
element
session
.
remove
()
def
preload
(
self
):
self
.
clear
()
...
...
@@ -176,6 +177,7 @@ class ModelCache(dict):
for
column_name
in
self
.
_columns_names
:
key
=
getattr
(
element
,
column_name
)
self
[
key
]
=
element
session
.
remove
()
class
Cache
():
...
...
@@ -243,8 +245,11 @@ def get_or_create_node(nodetype=None,corpus=None,corpus_id=None,name_str=None,hy
name_str :: String
hyperdata :: Dict
'''
sessionToRemove
=
False
if
session
is
None
:
session
=
get_session
()
sessionToRemove
=
True
if
nodetype
is
None
:
print
(
"Need to give a type node"
)
...
...
@@ -285,3 +290,7 @@ def get_or_create_node(nodetype=None,corpus=None,corpus_id=None,name_str=None,hy
session
.
commit
()
#print(parent_id, n.parent_id, n.id, n.name)
return
(
node
)
if
sessionToRemove
:
session
.
remove
()
gargantext_web/views.py
View file @
da2b3252
...
...
@@ -220,6 +220,7 @@ def projects(request):
Each project is described with hyperdata that are updateded on each following view.
To each project, we can link a resource that can be an image.
'''
session
=
get_session
()
if
not
request
.
user
.
is_authenticated
():
return
redirect
(
'/auth/'
)
...
...
@@ -230,7 +231,6 @@ def projects(request):
date
=
datetime
.
datetime
.
now
()
# print(Logger.write("STATIC_ROOT"))
session
=
get_session
()
projects
=
session
.
query
(
Node
)
.
filter
(
Node
.
user_id
==
user_id
,
Node
.
type_id
==
project_type_id
)
.
order_by
(
Node
.
date
)
.
all
()
number
=
len
(
projects
)
...
...
@@ -288,7 +288,7 @@ def projects(request):
'common_projects'
:
common_projects
,
'common_users'
:
common_users
,
})
session
.
remove
()
def
update_nodes
(
request
,
project_id
,
corpus_id
,
view
=
None
):
'''
...
...
@@ -297,10 +297,11 @@ def update_nodes(request, project_id, corpus_id, view=None):
- permanent deletion of Trash
'''
session
=
get_session
()
if
not
request
.
user
.
is_authenticated
():
return
redirect
(
'/login/?next=
%
s'
%
request
.
path
)
session
=
get_session
()
try
:
offset
=
int
(
project_id
)
offset
=
int
(
corpus_id
)
...
...
@@ -358,8 +359,12 @@ def update_nodes(request, project_id, corpus_id, view=None):
# context_instance=RequestContext(request)
# )
#
session
.
remove
()
def
corpus
(
request
,
project_id
,
corpus_id
):
session
=
get_session
()
if
not
request
.
user
.
is_authenticated
():
return
redirect
(
'/login/?next=
%
s'
%
request
.
path
)
...
...
@@ -378,7 +383,6 @@ def corpus(request, project_id, corpus_id):
type_doc_id
=
cache
.
NodeType
[
'Document'
]
.
id
session
=
get_session
()
number
=
session
.
query
(
func
.
count
(
Node
.
id
))
.
filter
(
Node
.
parent_id
==
corpus_id
,
Node
.
type_id
==
type_doc_id
)
.
all
()[
0
][
0
]
...
...
@@ -405,15 +409,15 @@ def corpus(request, project_id, corpus_id):
}))
return
HttpResponse
(
html
)
session
.
remove
()
def
newpaginatorJSON
(
request
,
corpus_id
):
results
=
[
"hola"
,
"mundo"
]
session
=
get_session
()
# t = get_template('tests/newpag/thetable.html')
# project = session.query(Node).filter(Node.id==project_id).first()
session
=
get_session
()
corpus
=
session
.
query
(
Node
)
.
filter
(
Node
.
id
==
corpus_id
)
.
first
()
type_document_id
=
cache
.
NodeType
[
'Document'
]
.
id
user_id
=
request
.
user
.
id
...
...
@@ -464,11 +468,11 @@ def newpaginatorJSON(request , corpus_id):
"totalRecordCount"
:
len
(
results
)
}
return
JsonHttpResponse
(
finaldict
)
session
.
remove
()
def
move_to_trash
(
node_id
):
session
=
get_session
()
try
:
session
=
get_session
()
node
=
session
.
query
(
Node
)
.
filter
(
Node
.
id
==
node_id
)
.
first
()
previous_type_id
=
node
.
type_id
...
...
@@ -486,9 +490,14 @@ def move_to_trash(node_id):
#return(previous_type_id)
except
Exception
as
error
:
print
(
"can not move to trash Node"
+
str
(
node_id
)
+
":"
+
str
(
error
))
session
.
remove
()
def
move_to_trash_multiple
(
request
):
session
=
get_session
()
user
=
request
.
user
if
not
user
.
is_authenticated
():
return
redirect
(
'/login/?next=
%
s'
%
request
.
path
)
...
...
@@ -498,7 +507,6 @@ def move_to_trash_multiple(request):
nodes2trash
=
json
.
loads
(
request
.
POST
[
"nodeids"
])
print
(
"nodes to the trash:"
)
print
(
nodes2trash
)
session
=
get_session
()
nodes
=
session
.
query
(
Node
)
.
filter
(
Node
.
id
.
in_
(
nodes2trash
))
.
all
()
for
node
in
nodes
:
node
.
type_id
=
cache
.
NodeType
[
'Trash'
]
.
id
...
...
@@ -509,13 +517,15 @@ def move_to_trash_multiple(request):
results
=
[
"tudo"
,
"fixe"
]
return
JsonHttpResponse
(
results
)
session
.
remove
()
def
delete_node
(
request
,
node_id
):
session
=
get_session
()
# do we have a valid user?
user
=
request
.
user
session
=
get_session
()
node
=
session
.
query
(
Node
)
.
filter
(
Node
.
id
==
node_id
)
.
first
()
if
not
user
.
is_authenticated
():
...
...
@@ -531,7 +541,8 @@ def delete_node(request, node_id):
return
HttpResponseRedirect
(
'/project/'
+
str
(
node_parent_id
))
else
:
return
HttpResponseRedirect
(
'/projects/'
)
session
.
remove
()
def
delete_corpus
(
request
,
project_id
,
node_id
):
# ORM Django
...
...
@@ -553,11 +564,12 @@ def delete_corpus(request, project_id, node_id):
def
chart
(
request
,
project_id
,
corpus_id
):
''' Charts to compare, filter, count'''
session
=
get_session
()
t
=
get_template
(
'chart.html'
)
user
=
request
.
user
date
=
datetime
.
datetime
.
now
()
session
=
get_session
()
project
=
session
.
query
(
Node
)
.
filter
(
Node
.
id
==
project_id
)
.
first
()
corpus
=
session
.
query
(
Node
)
.
filter
(
Node
.
id
==
corpus_id
)
.
first
()
...
...
@@ -569,13 +581,15 @@ def chart(request, project_id, corpus_id):
'corpus'
:
corpus
,
}))
return
HttpResponse
(
html
)
session
.
remove
()
def
sankey
(
request
,
corpus_id
):
session
=
get_session
()
t
=
get_template
(
'sankey.html'
)
user
=
request
.
user
date
=
datetime
.
datetime
.
now
()
session
=
get_session
()
corpus
=
session
.
query
(
Node
)
.
filter
(
Node
.
id
==
corpus_id
)
.
first
()
html
=
t
.
render
(
Context
({
\
...
...
@@ -586,15 +600,15 @@ def sankey(request, corpus_id):
}))
return
HttpResponse
(
html
)
session
.
remove
()
def
matrix
(
request
,
project_id
,
corpus_id
):
session
=
get_session
()
t
=
get_template
(
'matrix.html'
)
user
=
request
.
user
date
=
datetime
.
datetime
.
now
()
session
=
get_session
()
project
=
session
.
query
(
Node
)
.
filter
(
Node
.
id
==
project_id
)
.
first
()
corpus
=
session
.
query
(
Node
)
.
filter
(
Node
.
id
==
corpus_id
)
.
first
()
...
...
@@ -607,13 +621,15 @@ def matrix(request, project_id, corpus_id):
}))
return
HttpResponse
(
html
)
session
.
remove
()
def
graph
(
request
,
project_id
,
corpus_id
,
generic
=
100
,
specific
=
100
):
session
=
get_session
()
t
=
get_template
(
'explorer.html'
)
user
=
request
.
user
date
=
datetime
.
datetime
.
now
()
session
=
get_session
()
project
=
session
.
query
(
Node
)
.
filter
(
Node
.
id
==
project_id
)
.
first
()
corpus
=
session
.
query
(
Node
)
.
filter
(
Node
.
id
==
corpus_id
)
.
first
()
...
...
@@ -638,6 +654,7 @@ def graph(request, project_id, corpus_id, generic=100, specific=100):
}))
return
HttpResponse
(
html
)
session
.
remove
()
def
exploration
(
request
):
t
=
get_template
(
'exploration.html'
)
...
...
@@ -672,12 +689,13 @@ def corpus_csv(request, project_id, corpus_id):
'''
Create the HttpResponse object with the appropriate CSV header.
'''
session
=
get_session
()
response
=
HttpResponse
(
content_type
=
'text/csv'
)
response
[
'Content-Disposition'
]
=
'attachment; filename="corpus.csv"'
writer
=
csv
.
writer
(
response
)
session
=
get_session
()
corpus_id
=
session
.
query
(
Node
.
id
)
.
filter
(
Node
.
id
==
corpus_id
)
.
first
()
type_document_id
=
cache
.
NodeType
[
'Document'
]
.
id
documents
=
session
.
query
(
Node
)
.
filter
(
Node
.
parent_id
==
corpus_id
,
Node
.
type_id
==
type_document_id
)
.
all
()
...
...
@@ -700,6 +718,7 @@ def corpus_csv(request, project_id, corpus_id):
return
response
session
.
remove
()
def
send_csv
(
request
,
corpus_id
):
'''
...
...
@@ -748,17 +767,17 @@ def node_link(request, corpus_id):
'''
Create the HttpResponse object with the node_link dataset.
'''
data
=
[]
session
=
get_session
()
data
=
[]
corpus
=
session
.
query
(
Node
)
.
filter
(
Node
.
id
==
corpus_id
)
.
first
()
data
=
get_cooc
(
request
=
request
,
corpus
=
corpus
,
type
=
"node_link"
)
return
JsonHttpResponse
(
data
)
session
.
remove
()
def
sankey_csv
(
request
,
corpus_id
):
data
=
[]
session
=
get_session
()
data
=
[]
corpus
=
session
.
query
(
Node
)
.
filter
(
Node
.
id
==
corpus_id
)
.
first
()
data
=
[
[
"source"
,
"target"
,
"value"
]
...
...
@@ -775,6 +794,7 @@ def sankey_csv(request, corpus_id):
,
[
"Theme_3"
,
"Reco_par_5"
,
1
]
]
return
(
CsvHttpResponse
(
data
))
session
.
remove
()
def
adjacency
(
request
,
corpus_id
):
'''
...
...
gargantext_web/views_optimized.py
View file @
da2b3252
...
...
@@ -199,6 +199,8 @@ def project(request, project_id):
'number'
:
corpora_count
,
})
session
.
remove
()
def
tfidf
(
request
,
corpus_id
,
ngram_ids
):
"""Takes IDs of corpus and ngram and returns list of relevent documents in json format
according to TFIDF score (order is decreasing).
...
...
@@ -254,11 +256,13 @@ def tfidf(request, corpus_id, ngram_ids):
nodes_list
.
append
(
node_dict
)
return
JsonHttpResponse
(
nodes_list
)
session
.
remove
()
def
getCorpusIntersection
(
request
,
corpuses_ids
):
FinalDict
=
False
session
=
get_session
()
FinalDict
=
False
if
request
.
method
==
'POST'
and
"nodeids"
in
request
.
POST
and
len
(
request
.
POST
[
"nodeids"
])
>
0
:
import
ast
...
...
@@ -303,15 +307,16 @@ def getCorpusIntersection(request , corpuses_ids):
# Getting AVG-COOC of each ngram that exists in the cooc-matrix of the compared-corpus.
return
JsonHttpResponse
(
FinalDict
)
session
.
remove
()
def
getUserPortfolio
(
request
,
project_id
):
session
=
get_session
()
user
=
request
.
user
user_id
=
cache
.
User
[
request
.
user
.
username
]
.
id
project_type_id
=
cache
.
NodeType
[
'Project'
]
.
id
corpus_type_id
=
cache
.
NodeType
[
'Corpus'
]
.
id
results
=
{}
session
=
get_session
()
projs
=
session
.
query
(
Node
)
.
filter
(
Node
.
user_id
==
user_id
,
Node
.
type_id
==
project_type_id
)
.
all
()
...
...
@@ -349,3 +354,4 @@ def getUserPortfolio(request , project_id):
return
JsonHttpResponse
(
results
)
session
.
remove
()
ngram/cvalue.py
View file @
da2b3252
...
...
@@ -41,10 +41,11 @@ def getNgrams(corpus=None, limit=1000):
'''
getNgrams :: Corpus -> [(Int, String, String, Float)]
'''
session
=
get_session
()
terms
=
dict
()
tfidf_node
=
get_or_create_node
(
nodetype
=
'Tfidf (global)'
,
corpus
=
corpus
)
session
=
get_session
()
#print(corpus.name)
ngrams
=
(
session
.
query
(
Ngram
.
id
,
Ngram
.
terms
,
func
.
sum
(
NodeNgram
.
weight
),
NodeNodeNgram
.
score
)
.
join
(
NodeNgram
,
NodeNgram
.
ngram_id
==
Ngram
.
id
)
...
...
@@ -64,6 +65,7 @@ def getNgrams(corpus=None, limit=1000):
except
:
PrintException
()
return
(
terms
)
session
.
remove
()
def
compute_cvalue
(
corpus
=
None
,
limit
=
1000
):
'''
...
...
@@ -130,7 +132,7 @@ def compute_cvalue(corpus=None, limit=1000):
#bulk_insert(NodeNodeNgram, ['nodex_id', 'nodey_id', 'ngram_id', 'score'], [n for n in islice(result,0,100)])
bulk_insert
(
NodeNodeNgram
,
[
'nodex_id'
,
'nodey_id'
,
'ngram_id'
,
'score'
],
[
n
for
n
in
result
])
session
.
remove
()
# test
#corpus=session.query(Node).filter(Node.id==244250).first()
#computeCvalue(corpus)
ngram/group.py
View file @
da2b3252
...
...
@@ -52,10 +52,11 @@ def compute_groups(corpus, limit_inf=None, limit_sup=None, how='Stem'):
'''
group ngrams according to a function (stemming or lemming)
'''
session
=
get_session
()
dbg
=
DebugTime
(
'Corpus #
%
d - group'
%
corpus
.
id
)
dbg
.
show
(
'Group'
)
session
=
get_session
()
#spec,cvalue = getNgrams(corpus, limit_inf=limit_inf, limit_sup=limit_sup)
#list_to_check=cvalue.union(spec)
...
...
@@ -138,3 +139,5 @@ def compute_groups(corpus, limit_inf=None, limit_sup=None, how='Stem'):
,
[
data
for
data
in
group_to_insert
])
bulk_insert
(
NodeNgram
,
(
'node_id'
,
'ngram_id'
,
'weight'
),
[
data
for
data
in
list
(
miam_to_insert
)])
session
.
remove
()
ngram/importExport.py
View file @
da2b3252
...
...
@@ -131,7 +131,7 @@ def exportNgramLists(node,filename,delimiter="\t"):
# csv_rows = [[ligne1_a, ligne1_b..],[ligne2_a, ligne2_b..],..]
return
csv_rows
session
.
remove
()
# on applique notre fonction ng_to_csv sur chaque liste
# ------------------------------------------------------
...
...
@@ -380,9 +380,7 @@ def importNgramLists(node,filename,delimiter="\t", del_lists=[]):
print
(
"INFO: added
%
i elements in the lists indices"
%
added_nd_ng
)
print
(
"INFO: added
%
i new ngrams in the lexicon"
%
added_ng
)
session
.
remove
()
# à chronométrer:
...
...
ngram/lists.py
View file @
da2b3252
...
...
@@ -59,6 +59,9 @@ def listIds(typeList=None, user_id=None, corpus_id=None):
else
:
raise
Exception
(
"Usage (Warning): Need corpus_id and user_id"
)
session
.
remove
()
# Some functions to manage ngrams according to the lists
...
...
@@ -118,6 +121,8 @@ def listNgramIds(list_id=None, typeList=None,
)
return
(
query
.
all
())
session
.
remove
()
def
ngramList
(
do
,
list_id
,
ngram_ids
=
None
)
:
'''
...
...
@@ -129,8 +134,9 @@ def ngramList(do, list_id, ngram_ids=None) :
ngram_id = [Int] : list of Ngrams id (Ngrams.id)
list_id = Int : list id (Node.id)
'''
results
=
[]
session
=
get_session
()
results
=
[]
if
do
==
'create'
:
terms
=
copy
(
ngram_ids
)
...
...
@@ -163,6 +169,7 @@ def ngramList(do, list_id, ngram_ids=None) :
session
.
commit
()
return
(
results
)
session
.
remove
()
# Some functions to manage automatically the lists
def
doStopList
(
user_id
=
None
,
corpus_id
=
None
,
stop_id
=
None
,
reset
=
False
,
limit
=
None
):
...
...
@@ -202,6 +209,7 @@ def ngrams2miam(user_id=None, corpus_id=None):
.
all
()
)
bulk_insert
(
NodeNgram
,
[
'node_id'
,
'ngram_id'
,
'weight'
],
query
)
session
.
remove
()
from
gargantext_web.db
import
get_or_create_node
from
analysis.lists
import
Translations
,
UnweightedList
...
...
@@ -232,6 +240,7 @@ def ngrams2miamBis(corpus):
.
all
()
)
bulk_insert
(
NodeNgram
,
[
'node_id'
,
'ngram_id'
,
'weight'
],
query
)
session
.
remove
()
def
doList
(
type_list
=
'MiamList'
,
...
...
@@ -365,6 +374,6 @@ def doList(
bulk_insert
(
NodeNgram
,
[
'node_id'
,
'ngram_id'
,
'weight'
],
query
)
return
(
list_dict
[
type_list
][
'id'
])
session
.
remove
()
ngram/mapList.py
View file @
da2b3252
...
...
@@ -87,10 +87,12 @@ def compute_mapList(corpus,limit=500,n=1):
bulk_insert
(
NodeNgram
,
[
'node_id'
,
'ngram_id'
,
'weight'
],
[
d
for
d
in
data
])
dbg
.
show
(
'MapList computed'
)
session
.
remove
()
def
insert_miam
(
corpus
,
ngrams
=
None
,
path_file_csv
=
None
):
dbg
=
DebugTime
(
'Corpus #
%
d - computing Miam'
%
corpus
.
id
)
session
=
get_session
()
dbg
=
DebugTime
(
'Corpus #
%
d - computing Miam'
%
corpus
.
id
)
node_miam
=
get_or_create_node
(
nodetype
=
'MiamList'
,
corpus
=
corpus
)
session
.
query
(
NodeNgram
)
.
filter
(
NodeNgram
.
node_id
==
node_miam
.
id
)
.
delete
()
session
.
commit
()
...
...
@@ -122,8 +124,6 @@ def insert_miam(corpus, ngrams=None, path_file_csv=None):
bulk_insert
(
NodeNgram
,
[
'node_id'
,
'ngram_id'
,
'weight'
],
[
d
for
d
in
data
])
file_csv
.
close
()
dbg
.
show
(
'Miam computed'
)
session
.
remove
()
#corpus = session.query(Node).filter(Node.id==540420).first()
#compute_mapList(corpus)
#insert_miam(corpus=corpus, path_file_csv="Thesaurus_tag.csv")
ngram/occurrences.py
View file @
da2b3252
...
...
@@ -5,11 +5,12 @@ from gargantext_web.db import get_or_create_node
from
admin.utils
import
DebugTime
def
compute_occs
(
corpus
):
session
=
get_session
()
dbg
=
DebugTime
(
'Corpus #
%
d - OCCURRENCES'
%
corpus
.
id
)
dbg
.
show
(
'Calculate occurrences'
)
occs_node
=
get_or_create_node
(
nodetype
=
'Occurrences'
,
corpus
=
corpus
)
session
=
get_session
()
#print(occs_node.id)
(
session
.
query
(
NodeNodeNgram
)
...
...
@@ -47,5 +48,8 @@ def compute_occs(corpus):
)
)
db
.
commit
()
session
.
remove
()
#data = session.query(NodeNodeNgram).filter(NodeNodeNgram.nodex_id==occs_node.id).all()
#print([n for n in data])
ngram/specificity.py
View file @
da2b3252
...
...
@@ -20,6 +20,7 @@ def specificity(cooc_id=None, corpus=None, limit=100):
Compute the specificity, simple calculus.
'''
session
=
get_session
()
cooccurrences
=
(
session
.
query
(
NodeNgramNgram
)
.
filter
(
NodeNgramNgram
.
node_id
==
cooc_id
)
.
order_by
(
NodeNgramNgram
.
score
)
...
...
@@ -54,6 +55,7 @@ def specificity(cooc_id=None, corpus=None, limit=100):
bulk_insert
(
NodeNodeNgram
,
[
'nodex_id'
,
'nodey_id'
,
'ngram_id'
,
'score'
],
[
d
for
d
in
data
])
return
(
node
.
id
)
session
.
remove
()
def
compute_specificity
(
corpus
,
limit
=
100
):
'''
...
...
@@ -62,15 +64,16 @@ def compute_specificity(corpus,limit=100):
1) Compute the cooc matrix
2) Compute the specificity score, saving it in database, return its Node
'''
session
=
get_session
()
dbg
=
DebugTime
(
'Corpus #
%
d - specificity'
%
corpus
.
id
)
session
=
get_session
()
list_cvalue
=
get_or_create_node
(
nodetype
=
'Cvalue'
,
corpus
=
corpus
,
session
=
session
)
cooc_id
=
do_cooc
(
corpus
=
corpus
,
cvalue_id
=
list_cvalue
.
id
,
limit
=
limit
)
specificity
(
cooc_id
=
cooc_id
,
corpus
=
corpus
,
limit
=
limit
)
dbg
.
show
(
'specificity'
)
session
.
remove
()
#corpus=session.query(Node).filter(Node.id==244250).first()
#compute_specificity(corpus)
...
...
ngram/stemLem.py
View file @
da2b3252
...
...
@@ -11,6 +11,7 @@ def get_ngramogram(corpus, limit=None):
Ngram is a composition of ograms (ogram = 1gram)
"""
session
=
get_session
()
try
:
query
=
(
session
.
query
(
Ngram
.
id
,
Ngram
.
terms
)
...
...
@@ -34,6 +35,8 @@ def get_ngramogram(corpus, limit=None):
except
Exception
as
error
:
PrintException
()
session
.
remove
()
def
split_ngram
(
ngram
):
if
isinstance
(
ngram
,
str
):
...
...
@@ -329,6 +332,7 @@ def stem_corpus(corpus_id=None):
PrintException
()
else
:
print
(
'Usage: stem_corpus(corpus_id=corpus.id)'
)
session
.
remove
()
ngram/stop.py
View file @
da2b3252
...
...
@@ -36,6 +36,7 @@ def importStopList(node,filename,language='fr'):
)
bulk_insert
(
NodeNgram
,
[
'node_id'
,
'ngram_id'
,
'weight'
],
[
d
for
d
in
data
])
session
.
remove
()
def
isStopWord
(
ngram
,
stop_words
=
None
):
'''
...
...
@@ -78,10 +79,11 @@ def compute_stop(corpus,limit=2000,debug=False):
'''
do some statitics on all stop lists of database of the same type
'''
session
=
get_session
()
stop_node_id
=
get_or_create_node
(
nodetype
=
'StopList'
,
corpus
=
corpus
)
.
id
# TODO do a function to get all stop words with social scores
session
=
get_session
()
root
=
session
.
query
(
Node
)
.
filter
(
Node
.
type_id
==
cache
.
NodeType
[
'Root'
]
.
id
)
.
first
()
root_stop_id
=
get_or_create_node
(
nodetype
=
'StopList'
,
corpus
=
root
)
.
id
...
...
@@ -112,4 +114,5 @@ def compute_stop(corpus,limit=2000,debug=False):
stop
=
WeightedList
({
n
[
0
]
:
-
1
for
n
in
ngrams_to_stop
})
stop
.
save
(
stop_node_id
)
session
.
remove
()
ngram/tfidf.py
View file @
da2b3252
...
...
@@ -7,9 +7,11 @@ from admin.utils import DebugTime
def
compute_tfidf
(
corpus
):
# compute terms frequency sum
session
=
get_session
()
dbg
=
DebugTime
(
'Corpus #
%
d - TFIDF'
%
corpus
.
id
)
dbg
.
show
(
'calculate terms frequencies sums'
)
tfidf_node
=
get_or_create_node
(
nodetype
=
'Tfidf'
,
corpus
=
corpus
)
tfidf_node
=
get_or_create_node
(
nodetype
=
'Tfidf'
,
corpus
=
corpus
,
session
=
session
)
db
,
cursor
=
get_cursor
()
cursor
.
execute
(
'''
...
...
@@ -119,16 +121,20 @@ def compute_tfidf(corpus):
# the end!
db
.
commit
()
session
.
remove
()
def
compute_tfidf_global
(
corpus
):
'''
Maybe improve this with:
#http://stackoverflow.com/questions/8674718/best-way-to-select-random-rows-postgresql
'''
session
=
get_session
()
dbg
=
DebugTime
(
'Corpus #
%
d - tfidf global'
%
corpus
.
id
)
dbg
.
show
(
'calculate terms frequencies sums'
)
tfidf_node
=
get_or_create_node
(
nodetype
=
'Tfidf (global)'
,
corpus
=
corpus
)
tfidf_node
=
get_or_create_node
(
nodetype
=
'Tfidf (global)'
,
corpus
=
corpus
,
session
=
session
)
session
=
get_session
()
# update would be better
session
.
query
(
NodeNodeNgram
)
.
filter
(
NodeNodeNgram
.
nodex_id
==
tfidf_node
.
id
)
.
delete
()
session
.
commit
()
...
...
@@ -258,6 +264,8 @@ def compute_tfidf_global(corpus):
db
.
commit
()
dbg
.
show
(
'insert tfidf'
)
session
.
remove
()
#corpus=session.query(Node).filter(Node.id==244250).first()
#compute_tfidf_global(corpus)
ngram/tools.py
View file @
da2b3252
...
...
@@ -8,8 +8,8 @@ def insert_ngrams_to_list(list_of_ngrams, corpus, list_type='MapList', erase=Tru
'''
session
=
get_session
()
list_node
=
get_or_create_node
(
corpus
=
corpus
,
nodetype
=
list_type
)
group_node
=
get_or_create_node
(
corpus
=
corpus
,
nodetype
=
'GroupList'
)
list_node
=
get_or_create_node
(
corpus
=
corpus
,
nodetype
=
list_type
,
session
=
session
)
group_node
=
get_or_create_node
(
corpus
=
corpus
,
nodetype
=
'GroupList'
,
session
=
session
)
group_list
=
(
session
.
query
(
NodeNgramNgram
.
ngramy_id
)
.
filter
(
NodeNgramNgram
.
id
==
group_node
.
id
)
.
all
()
...
...
@@ -35,6 +35,7 @@ def insert_ngrams_to_list(list_of_ngrams, corpus, list_type='MapList', erase=Tru
#print(list_to_insert)
db
,
cursor
=
get_cursor
()
bulk_insert
(
NodeNgram
,
[
'node_id'
,
'ngram_id'
,
'weight'
],
[
n
for
n
in
list_to_insert
])
session
.
remove
()
def
insert_ngrams
(
ngrams
,
get
=
'terms-id'
):
'''
...
...
parsing/corpustools.py
View file @
da2b3252
...
...
@@ -30,8 +30,9 @@ parsers = Parsers()
# resources management
def
add_resource
(
corpus
,
**
kwargs
):
# only for tests
session
=
get_session
()
# only for tests
resource
=
Resource
(
guid
=
str
(
random
()),
**
kwargs
)
# User
if
'user_id'
not
in
kwargs
:
...
...
@@ -64,11 +65,12 @@ def add_resource(corpus, **kwargs):
session
.
commit
()
# return result
return
resource
session
.
remove
()
def
parse_resources
(
corpus
,
user
=
None
,
user_id
=
None
):
dbg
=
DebugTime
(
'Corpus #
%
d - parsing'
%
corpus
.
id
)
session
=
get_session
()
dbg
=
DebugTime
(
'Corpus #
%
d - parsing'
%
corpus
.
id
)
corpus_id
=
corpus
.
id
type_id
=
cache
.
NodeType
[
'Document'
]
.
id
...
...
@@ -176,7 +178,7 @@ def parse_resources(corpus, user=None, user_id=None):
# mark the corpus as parsed
corpus
.
parsed
=
True
session
.
remove
()
# ngrams extraction
from
.NgramsExtractors
import
EnglishNgramsExtractor
,
FrenchNgramsExtractor
,
NgramsExtractor
...
...
@@ -207,8 +209,9 @@ class NgramsExtractors(defaultdict):
ngramsextractors
=
NgramsExtractors
()
def
extract_ngrams
(
corpus
,
keys
,
nlp
=
True
):
dbg
=
DebugTime
(
'Corpus #
%
d - ngrams'
%
corpus
.
id
)
session
=
get_session
()
dbg
=
DebugTime
(
'Corpus #
%
d - ngrams'
%
corpus
.
id
)
default_language_iso2
=
None
if
corpus
.
language_id
is
None
else
cache
.
Language
[
corpus
.
language_id
]
.
iso2
# query the hyperdata associated with the given keys
columns
=
[
Node
.
id
,
Node
.
language_id
]
+
[
Node
.
hyperdata
[
key
]
for
key
in
keys
]
...
...
@@ -289,4 +292,4 @@ def extract_ngrams(corpus, keys, nlp=True):
dbg
.
message
=
'insert
%
d associations'
%
len
(
node_ngram_data
)
# commit to database
db
.
commit
()
session
.
remove
()
rest_v1_0/api.py
View file @
da2b3252
...
...
@@ -97,13 +97,16 @@ def Root(request, format=None):
'snippets'
:
reverse
(
'snippet-list'
,
request
=
request
,
format
=
format
)
})
session
.
remove
()
class
NodesChildrenNgrams
(
APIView
):
def
get
(
self
,
request
,
node_id
):
session
=
get_session
()
# query ngrams
ParentNode
=
aliased
(
Node
)
session
=
get_session
()
ngrams_query
=
(
session
.
query
(
Ngram
.
terms
,
func
.
sum
(
Node_Ngram
.
weight
)
.
label
(
'count'
))
.
join
(
Node_Ngram
,
Node_Ngram
.
ngram_id
==
Ngram
.
id
)
...
...
@@ -140,13 +143,16 @@ class NodesChildrenNgrams(APIView):
for
ngram
in
ngrams_query
[
offset
:
offset
+
limit
]
],
})
session
.
remove
()
class
NodesChildrenNgramsIds
(
APIView
):
def
get
(
self
,
request
,
node_id
):
session
=
get_session
()
# query ngrams
ParentNode
=
aliased
(
Node
)
session
=
get_session
()
ngrams_query
=
(
session
.
query
(
Node
.
id
,
func
.
sum
(
Node_Ngram
.
weight
)
.
label
(
'count'
))
.
join
(
Node_Ngram
,
Node_Ngram
.
node_id
==
Node
.
id
)
...
...
@@ -183,16 +189,18 @@ class NodesChildrenNgramsIds(APIView):
for
node
,
count
in
ngrams_query
[
offset
:
offset
+
limit
]
],
})
session
.
remove
()
from
gargantext_web.db
import
get_or_create_node
class
Ngrams
(
APIView
):
def
get
(
self
,
request
,
node_id
):
session
=
get_session
()
# query ngrams
ParentNode
=
aliased
(
Node
)
session
=
get_session
()
corpus
=
session
.
query
(
Node
)
.
filter
(
Node
.
id
==
node_id
)
.
first
()
group_by
=
[]
results
=
[
'id'
,
'terms'
]
...
...
@@ -307,11 +315,13 @@ class Ngrams(APIView):
],
})
session
.
remove
()
class
NodesChildrenDuplicates
(
APIView
):
def
_fetch_duplicates
(
self
,
request
,
node_id
,
extra_columns
=
None
,
min_count
=
1
):
session
=
get_session
()
# input validation
if
extra_columns
is
None
:
extra_columns
=
[]
...
...
@@ -319,7 +329,6 @@ class NodesChildrenDuplicates(APIView):
raise
APIException
(
'Missing GET parameter: "keys"'
,
400
)
keys
=
request
.
GET
[
'keys'
]
.
split
(
','
)
# hyperdata retrieval
session
=
get_session
()
hyperdata_query
=
(
session
.
query
(
Hyperdata
)
.
filter
(
Hyperdata
.
name
.
in_
(
keys
))
...
...
@@ -351,6 +360,8 @@ class NodesChildrenDuplicates(APIView):
duplicates_query
=
duplicates_query
.
having
(
func
.
count
()
>
min_count
)
# and now, return it
return
duplicates_query
session
.
remove
()
def
get
(
self
,
request
,
node_id
):
# data to be returned
...
...
@@ -400,10 +411,11 @@ class NodesChildrenDuplicates(APIView):
# retrieve metadata from a given list of parent node
def
get_metadata
(
corpus_id_list
):
session
=
get_session
()
# query hyperdata keys
ParentNode
=
aliased
(
Node
)
session
=
get_session
()
hyperdata_query
=
(
session
.
query
(
Hyperdata
)
.
join
(
Node_Hyperdata
,
Node_Hyperdata
.
hyperdata_id
==
Hyperdata
.
id
)
...
...
@@ -455,6 +467,7 @@ def get_metadata(corpus_id_list):
# give the result back
return
collection
session
.
remove
()
class
ApiHyperdata
(
APIView
):
...
...
@@ -520,6 +533,7 @@ class ApiNgrams(APIView):
class
NodesChildrenQueries
(
APIView
):
def
_sql
(
self
,
input
,
node_id
):
session
=
get_session
()
fields
=
dict
()
tables
=
set
(
'nodes'
)
hyperdata_aliases
=
dict
()
...
...
@@ -602,6 +616,7 @@ class NodesChildrenQueries(APIView):
else
query
[
input
[
'pagination'
][
'offset'
]:]
)
return
output
session
.
remove
()
def
_haskell
(
self
,
input
,
node_id
):
output
=
copy
.
deepcopy
(
input
)
...
...
@@ -702,8 +717,9 @@ class NodesList(APIView):
authentication_classes
=
(
SessionAuthentication
,
BasicAuthentication
)
def
get
(
self
,
request
):
print
(
"user id : "
+
str
(
request
.
user
))
session
=
get_session
()
print
(
"user id : "
+
str
(
request
.
user
))
query
=
(
session
.
query
(
Node
.
id
,
Node
.
name
,
NodeType
.
name
.
label
(
'type'
))
.
filter
(
Node
.
user_id
==
int
(
request
.
user
.
id
))
...
...
@@ -718,10 +734,13 @@ class NodesList(APIView):
node
.
_asdict
()
for
node
in
query
.
all
()
]})
session
.
remove
()
class
Nodes
(
APIView
):
def
get
(
self
,
request
,
node_id
):
session
=
get_session
()
node
=
session
.
query
(
Node
)
.
filter
(
Node
.
id
==
node_id
)
.
first
()
if
node
is
None
:
raise
APIException
(
'This node does not exist'
,
404
)
...
...
@@ -734,6 +753,8 @@ class Nodes(APIView):
#'hyperdata': dict(node.hyperdata),
'hyperdata'
:
node
.
hyperdata
,
})
session
.
remove
()
# deleting node by id
# currently, very dangerous.
...
...
@@ -741,8 +762,9 @@ class Nodes(APIView):
# for better constistency...
def
delete
(
self
,
request
,
node_id
):
user
=
request
.
user
session
=
get_session
()
user
=
request
.
user
node
=
session
.
query
(
Node
)
.
filter
(
Node
.
id
==
node_id
)
.
first
()
msgres
=
str
()
...
...
@@ -754,6 +776,8 @@ class Nodes(APIView):
except
Exception
as
error
:
msgres
=
"error deleting : "
+
node_id
+
str
(
error
)
session
.
remove
()
class
CorpusController
:
@
classmethod
...
...
@@ -774,7 +798,7 @@ class CorpusController:
# if corpus.user != request.user:
# raise Http403("Unauthorized access.")
return
corpus
session
.
remove
()
@
classmethod
def
ngrams
(
cls
,
request
,
node_id
):
...
...
@@ -785,6 +809,7 @@ class CorpusController:
# build query
ParentNode
=
aliased
(
Node
)
session
=
get_session
()
query
=
(
session
.
query
(
Ngram
.
terms
,
func
.
count
(
'*'
))
.
join
(
Node_Ngram
,
Node_Ngram
.
ngram_id
==
Ngram
.
id
)
...
...
@@ -811,3 +836,5 @@ class CorpusController:
)
else
:
raise
ValidationError
(
'Unrecognized "format=
%
s", should be "csv" or "json"'
%
(
format
,
))
session
.
remove
()
rest_v1_0/graph.py
View file @
da2b3252
...
...
@@ -6,7 +6,6 @@ from analysis.functions import get_cooc
class
Graph
(
APIView
):
authentication_classes
=
(
SessionAuthentication
,
BasicAuthentication
)
session
=
get_session
()
def
get
(
self
,
request
,
corpus_id
):
'''
...
...
@@ -15,6 +14,8 @@ class Graph(APIView):
graph?field1=ngrams&field2=ngrams&
graph?field1=ngrams&field2=ngrams&start=''&end=''
'''
session
=
get_session
()
field1
=
request
.
GET
.
get
(
'field1'
,
'ngrams'
)
field2
=
request
.
GET
.
get
(
'field2'
,
'ngrams'
)
...
...
@@ -52,3 +53,5 @@ class Graph(APIView):
,
'field2'
:
accepted_field2
,
'options'
:
options
})
session
.
remove
()
rest_v1_0/ngrams.py
View file @
da2b3252
...
...
@@ -82,9 +82,10 @@ class List(APIView):
def
get_metadata
(
self
,
ngram_ids
,
parent_id
):
session
=
get_session
()
start_
=
time
.
time
()
session
=
get_session
()
nodes_ngrams
=
session
.
query
(
Ngram
.
id
,
Ngram
.
terms
)
.
filter
(
Ngram
.
id
.
in_
(
list
(
ngram_ids
.
keys
())))
.
all
()
for
node
in
nodes_ngrams
:
if
node
.
id
in
ngram_ids
:
...
...
@@ -120,7 +121,7 @@ class List(APIView):
end_
=
time
.
time
()
return
{
"data"
:
ngram_ids
,
"secs"
:(
end_
-
start_
)
}
session
.
remove
()
def
get
(
self
,
request
,
corpus_id
,
list_name
):
...
...
@@ -154,6 +155,8 @@ class List(APIView):
measurements
[
"tfidf"
]
=
{
"s"
:
ngrams_meta
[
"secs"
],
"n"
:
len
(
ngrams_meta
[
"data"
]
.
keys
())
}
return
JsonHttpResponse
(
{
"data"
:
ngram_ids
,
"time"
:
measurements
}
)
session
.
remove
()
class
Ngrams
(
APIView
):
'''
...
...
@@ -323,6 +326,8 @@ class Ngrams(APIView):
'data'
:
output
,
"time"
:
measurements
})
session
.
remove
()
def
post
(
self
,
request
,
node_id
):
return
JsonHttpResponse
([
"POST"
,
"ok"
])
...
...
@@ -343,11 +348,15 @@ class Group(APIView):
'''
def
get_group_id
(
self
,
node_id
,
user_id
):
node_id
=
int
(
node_id
)
session
=
get_session
()
corpus
=
session
.
query
(
Node
)
.
filter
(
Node
.
id
==
node_id
)
.
first
()
if
corpus
==
None
:
return
None
group
=
get_or_create_node
(
corpus
=
corpus
,
nodetype
=
'Group'
)
return
(
group
.
id
)
session
.
remove
()
def
get
(
self
,
request
,
corpus_id
):
if
not
request
.
user
.
is_authenticated
():
...
...
@@ -376,6 +385,7 @@ class Group(APIView):
DG
=
nx
.
DiGraph
()
session
=
get_session
()
ngrams_ngrams
=
(
session
.
query
(
NodeNgramNgram
)
.
filter
(
NodeNgramNgram
.
node_id
==
group_id
)
...
...
@@ -415,7 +425,8 @@ class Group(APIView):
# groups["nodes"] = get_occtfidf( ngrams , request.user.id , corpus_id , "Group")
return
JsonHttpResponse
(
{
"data"
:
groups
}
)
session
.
remove
()
def
post
(
self
,
request
,
node_id
):
return
JsonHttpResponse
(
[
"hola"
,
"mundo"
]
)
...
...
@@ -440,6 +451,8 @@ class Group(APIView):
return
JsonHttpResponse
(
True
,
201
)
else
:
raise
APIException
(
'Missing parameter: "{
\'
data
\'
: [
\'
source
\'
: Int,
\'
target
\'
: [Int]}"'
,
400
)
session
.
remove
()
def
put
(
self
,
request
,
corpus_id
):
session
=
get_session
()
...
...
@@ -523,6 +536,7 @@ class Group(APIView):
nodengramngram
=
NodeNgramNgram
(
node_id
=
existing_group_id
,
ngramx_id
=
n1
,
ngramy_id
=
n2
,
score
=
1.0
)
session
.
add
(
nodengramngram
)
session
.
commit
()
# [ - - - / doing links of new clique and adding to DB - - - ] #
...
...
@@ -573,6 +587,7 @@ class Group(APIView):
return
JsonHttpResponse
(
True
,
201
)
session
.
remove
()
class
Keep
(
APIView
):
"""
...
...
@@ -580,9 +595,9 @@ class Keep(APIView):
"""
renderer_classes
=
(
JSONRenderer
,)
authentication_classes
=
(
SessionAuthentication
,
BasicAuthentication
)
session
=
get_session
()
def
get
(
self
,
request
,
corpus_id
):
session
=
get_session
()
# list_id = session.query(Node).filter(Node.id==list_id).first()
corpus
=
session
.
query
(
Node
)
.
filter
(
Node
.
id
==
corpus_id
)
.
first
()
node_mapList
=
get_or_create_node
(
nodetype
=
'MapList'
,
corpus
=
corpus
)
...
...
@@ -591,11 +606,13 @@ class Keep(APIView):
for
node
in
nodes_in_map
:
results
[
node
.
ngram_id
]
=
True
return
JsonHttpResponse
(
results
)
session
.
remove
()
def
put
(
self
,
request
,
corpus_id
):
"""
Add ngrams to map list
"""
session
=
get_session
()
group_rawreq
=
dict
(
request
.
data
)
ngram_2add
=
[
int
(
i
)
for
i
in
list
(
group_rawreq
.
keys
())]
corpus
=
session
.
query
(
Node
)
.
filter
(
Node
.
id
==
corpus_id
)
.
first
()
...
...
@@ -605,11 +622,14 @@ class Keep(APIView):
session
.
add
(
map_node
)
session
.
commit
()
return
JsonHttpResponse
(
True
,
201
)
session
.
remove
()
def
delete
(
self
,
request
,
corpus_id
):
"""
Delete ngrams from the map list
"""
session
=
get_session
()
group_rawreq
=
dict
(
request
.
data
)
# print("group_rawreq:")
# print(group_rawreq)
...
...
@@ -627,5 +647,5 @@ class Keep(APIView):
return
JsonHttpResponse
(
True
,
201
)
session
.
remove
()
scrappers/scrap_pubmed/views.py
View file @
da2b3252
...
...
@@ -84,15 +84,12 @@ def getGlobalStatsISTEXT(request ):
def
doTheQuery
(
request
,
project_id
):
alist
=
[
"hola"
,
"mundo"
]
makeSession
=
get_sessionmaker
()
session
=
makeSession
()
# get_session()
session
=
get_session
()
# do we have a valid project id?
try
:
project_id
=
int
(
project_id
)
except
ValueError
:
raise
Http404
()
# do we have a valid project?
project
=
(
session
.
query
(
Node
)
...
...
@@ -184,14 +181,14 @@ def doTheQuery(request , project_id):
data
=
alist
return
JsonHttpResponse
(
data
)
session
.
remove
()
def
testISTEX
(
request
,
project_id
):
print
(
"testISTEX:"
)
print
(
request
.
method
)
alist
=
[
"bar"
,
"foo"
]
sessionMaker
=
get_sessionmaker
()
# get_session()
session
=
sessionMaker
()
session
=
get_session
()
# do we have a valid project id?
try
:
project_id
=
int
(
project_id
)
...
...
@@ -292,4 +289,4 @@ def testISTEX(request , project_id):
data
=
[
query_string
,
query
,
N
]
return
JsonHttpResponse
(
data
)
session
.
remove
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment