Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
humanities
gargantext
Commits
50cbc12d
Commit
50cbc12d
authored
Nov 29, 2014
by
Mathieu Rodic
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[CODE] the route "/api/corpus/{id}/ngrams" is now doing the job with SQLAlchemy!
See:
https://github.com/mathieurodic/aldjemy
parent
542ddf49
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
63 additions
and
59 deletions
+63
-59
api.py
gargantext_web/api.py
+62
-59
settings.py
gargantext_web/settings.py
+1
-0
No files found.
gargantext_web/api.py
View file @
50cbc12d
...
@@ -3,33 +3,39 @@ from django.core.exceptions import PermissionDenied, SuspiciousOperation
...
@@ -3,33 +3,39 @@ from django.core.exceptions import PermissionDenied, SuspiciousOperation
from
django.core.exceptions
import
ValidationError
from
django.core.exceptions
import
ValidationError
from
django.db.models
import
Avg
,
Max
,
Min
,
Count
,
Sum
from
django.db.models
import
Avg
,
Max
,
Min
,
Count
,
Sum
from
node.models
import
NodeType
,
Node
,
Node_Ngram
,
Ngram
from
django.db
import
connection
# from node.models import Language, ResourceType, Resource
# from node.models import Language, ResourceType, Resource
# from node.models import Node, NodeType, Node_Resource, Project, Corpus
# from node.models import Node, NodeType, Node_Resource, Project, Corpus
# from node.admin import CorpusForm, ProjectForm, ResourceForm
_sql_cte
=
'''
from
sqlalchemy.sql
import
func
WITH RECURSIVE cte ("depth", "path", "ordering", "id") AS (
from
sqlalchemy.orm
import
aliased
SELECT 1 AS depth,
array[T."id"] AS path,
array[T."id"] AS ordering,
T."id"
FROM
%
s T
WHERE T."parent_id" IS NULL
UNION ALL
import
node.models
NodeType
=
node
.
models
.
NodeType
.
sa
Node
=
node
.
models
.
Node
.
sa
Node_Ngram
=
node
.
models
.
Node_Ngram
.
sa
Ngram
=
node
.
models
.
Ngram
.
sa
# _sql_cte = '''
# WITH RECURSIVE cte ("depth", "path", "ordering", "id") AS (
# SELECT 1 AS depth,
# array[T."id"] AS path,
# array[T."id"] AS ordering,
# T."id"
# FROM %s T
# WHERE T."parent_id" IS NULL
# UNION ALL
# SELECT cte.depth + 1 AS depth,
# cte.path || T."id",
# cte.ordering || array[T."id"],
# T."id"
# FROM %s T
# JOIN cte ON T."parent_id" = cte."id"
# )
# ''' % (Node._meta.db_table, Node._meta.db_table, )
SELECT cte.depth + 1 AS depth,
cte.path || T."id",
cte.ordering || array[T."id"],
T."id"
FROM
%
s T
JOIN cte ON T."parent_id" = cte."id"
)
'''
%
(
Node
.
_meta
.
db_table
,
Node
.
_meta
.
db_table
,
)
def
DebugHttpResponse
(
data
):
def
DebugHttpResponse
(
data
):
return
HttpResponse
(
'<html><body style="background:#000;color:#FFF"><pre>
%
s</pre></body></html>'
%
(
str
(
data
),
))
return
HttpResponse
(
'<html><body style="background:#000;color:#FFF"><pre>
%
s</pre></body></html>'
%
(
str
(
data
),
))
...
@@ -111,59 +117,56 @@ class CorpusController:
...
@@ -111,59 +117,56 @@ class CorpusController:
@
classmethod
@
classmethod
def
ngrams
(
cls
,
request
,
corpus_id
):
def
ngrams
(
cls
,
request
,
node_id
):
# parameters retrieval and validation
# parameters retrieval and validation
corpus
=
cls
.
get
(
corpus_id
)
startwith
=
request
.
GET
.
get
(
'startwith'
,
''
)
.
replace
(
"'"
,
"
\\
'"
)
order
=
request
.
GET
.
get
(
'order'
,
'frequency'
)
if
order
not
in
_ngrams_order_columns
:
# build query
raise
ValidationError
(
'The order parameter should take one of the following values: '
+
', '
.
join
(
_ngrams_order_columns
),
400
)
ParentNode
=
aliased
(
Node
)
order_column
=
_ngrams_order_columns
[
order
]
query
=
(
Ngram
# query building
.
query
(
Ngram
.
terms
,
func
.
count
(
'*'
))
cursor
=
connection
.
cursor
()
.
join
(
Node_Ngram
,
Node_Ngram
.
ngram_id
==
Ngram
.
id
)
cursor
.
execute
(
_sql_cte
+
'''
.
join
(
Node
,
Node
.
id
==
Node_Ngram
.
node_id
)
SELECT ngram.terms, COUNT(*) AS occurrences
.
join
(
ParentNode
,
ParentNode
.
id
==
Node
.
parent_id
)
FROM cte
.
filter
(
ParentNode
.
id
==
node_id
)
INNER JOIN
%
s AS node ON node.id = cte.id
.
filter
(
Ngram
.
terms
.
like
(
'
%
s
%%
'
%
(
startwith
,
)))
INNER JOIN
%
s AS nodetype ON nodetype.id = node.type_id
.
group_by
(
Ngram
.
terms
)
INNER JOIN
%
s AS node_ngram ON node_ngram.node_id = node.id
.
order_by
(
func
.
count
(
'*'
)
.
desc
())
INNER JOIN
%
s AS ngram ON ngram.id = node_ngram.ngram_id
)
WHERE (NOT cte.id =
\'
%
d
\'
) AND (
\'
%
d
\'
= ANY(cte."path"))
AND nodetype.name = 'Document'
AND ngram.terms LIKE '
%
s
%%
'
GROUP BY ngram.terms
ORDER BY occurrences DESC
'''
%
(
Node
.
_meta
.
db_table
,
NodeType
.
_meta
.
db_table
,
Node_Ngram
.
_meta
.
db_table
,
Ngram
.
_meta
.
db_table
,
corpus
.
id
,
corpus
.
id
,
request
.
GET
.
get
(
'startwith'
,
''
)
.
replace
(
"'"
,
"
\\
'"
),
))
# # response building
# return JsonHttpResponse({
# "list" : [row[0] for row in cursor.fetchall()],
# })
# response building
# response building
format
=
request
.
GET
.
get
(
'format'
,
'json'
)
format
=
request
.
GET
.
get
(
'format'
,
'json'
)
if
format
==
'json'
:
if
format
==
'json'
:
return
JsonHttpResponse
({
return
JsonHttpResponse
({
"
list
"
:
[{
"
collection
"
:
[{
'terms'
:
row
[
0
],
'terms'
:
row
[
0
],
'occurrences'
:
row
[
1
]
'occurrences'
:
row
[
1
]
}
for
row
in
cursor
.
fetch
all
()],
}
for
row
in
query
.
all
()],
})
})
elif
format
==
'csv'
:
elif
format
==
'csv'
:
return
CsvHttpResponse
(
return
CsvHttpResponse
(
[[
'terms'
,
'occurences'
]]
+
[
row
for
row
in
cursor
.
fetch
all
()]
[[
'terms'
,
'occurences'
]]
+
[
row
for
row
in
query
.
all
()]
)
)
else
:
else
:
raise
ValidationError
(
'Unrecognized "format=
%
s", should be "csv" or "json"'
%
(
format
,
))
raise
ValidationError
(
'Unrecognized "format=
%
s", should be "csv" or "json"'
%
(
format
,
))
@
classmethod
@
classmethod
def
metadata
(
cls
,
request
,
corpus_id
):
def
metadata
(
cls
,
request
,
node_id
):
ParentNode
=
aliased
(
Node
)
query
=
(
Ngram
.
query
(
Ngram
.
metadata
[
''
],
func
.
count
(
'*'
))
.
join
(
Node
,
Node
.
id
==
Node_Ngram
.
node_id
)
.
join
(
ParentNode
,
ParentNode
.
id
==
Node
.
parent_id
)
.
filter
(
ParentNode
.
id
==
node_id
)
.
group_by
(
Ngram
.
terms
)
.
order_by
(
func
.
count
(
'*'
)
.
desc
())
)
collection
=
query
.
all
()
return
JsonHttpResponse
(
collection
)
# parameters retrieval and validation
# parameters retrieval and validation
corpus
=
cls
.
get
(
corpus_id
)
corpus
=
cls
.
get
(
corpus_id
)
# query building
# query building
...
...
gargantext_web/settings.py
View file @
50cbc12d
...
@@ -68,6 +68,7 @@ INSTALLED_APPS = (
...
@@ -68,6 +68,7 @@ INSTALLED_APPS = (
'ngram'
,
'ngram'
,
'django_hstore'
,
'django_hstore'
,
'djcelery'
,
'djcelery'
,
'aldjemy'
,
)
)
MIDDLEWARE_CLASSES
=
(
MIDDLEWARE_CLASSES
=
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment