Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
humanities
gargantext
Commits
5fc7c2d7
Commit
5fc7c2d7
authored
10 years ago
by
Mathieu Rodic
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[CODE] The API is now wrapped in a controller.
NgramsCache: ngrams are now stored in lowercase
parent
281e712a
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
131 additions
and
130 deletions
+131
-130
api.py
gargantext_web/api.py
+127
-127
urls.py
gargantext_web/urls.py
+3
-3
Caches.py
parsing/Caches.py
+1
-0
No files found.
gargantext_web/api.py
View file @
5fc7c2d7
...
...
@@ -27,135 +27,135 @@ _ngrams_order_columns = {
}
def
corpus_ngrams
(
request
,
corpus_id
):
# parameters retrieval and control
corpusQuery
=
Node
.
objects
.
filter
(
id
=
corpus_id
)
if
not
corpusQuery
:
raise
Http404
(
"No such corpus."
)
corpus
=
corpusQuery
.
first
()
if
corpus
.
type
.
name
!=
'Corpus'
:
raise
Http404
(
"No such corpus."
)
order
=
request
.
GET
.
get
(
'order'
,
'frequency'
)
if
order
not
in
_ngrams_order_columns
:
raise
ValidationError
(
'The order parameter should take one of the following values: '
+
', '
.
join
(
_ngrams_order_columns
),
400
)
order_column
=
_ngrams_order_columns
[
order
]
# query building
ngramsQuery
=
Ngram
.
objects
.
filter
(
nodes__parent
=
corpus
,
terms__startswith
=
request
.
GET
.
get
(
'startswith'
,
''
)
)
.
annotate
(
count
=
Count
(
'id'
))
# how should we order this?
orderColumn
=
{
"frequency"
:
"-count"
,
"alphabetical"
:
"terms"
}
.
get
(
request
.
GET
.
get
(
'order'
,
'frequency'
),
'-count'
)
ngramsQuery
=
ngramsQuery
.
order_by
(
orderColumn
)
# response building
return
JsonHttpResponse
({
"list"
:
[
ngram
.
terms
for
ngram
in
ngramsQuery
],
})
class
CorpusController
:
def
corpus_metadata
(
request
,
corpus_id
):
# parameters retrieval and control
corpusQuery
=
Node
.
objects
.
filter
(
id
=
corpus_id
)
if
not
corpusQuery
:
raise
Http404
(
"No such corpus."
)
corpus
=
corpusQuery
.
first
(
)
if
corpus
.
type
.
name
!=
'Corpus'
:
raise
Http404
(
"No such corpus."
)
# query building
cursor
=
connection
.
cursor
(
)
cursor
.
execute
(
''' SELECT
key,
COUNT(*) AS count
FROM
(
SELECT skeys(metadata) AS key
FROM
%
s
)
AS keys
GROUP BY
key
ORDER BY
count DESC
'''
%
(
Node
.
_meta
.
db_table
,
)
)
# response building
return
JsonHttpResponse
({
"list"
:
[
row
[
0
]
for
row
in
cursor
.
fetchall
()],
})
@
staticmethod
def
ngrams
(
request
,
corpus_id
):
# parameters retrieval and control
corpusQuery
=
Node
.
objects
.
filter
(
id
=
corpus_id
)
if
not
corpusQuery
:
raise
Http404
(
"No such corpus."
)
corpus
=
corpusQuery
.
first
()
if
corpus
.
type
.
name
!=
'Corpus'
:
raise
Http404
(
"No such corpus."
)
order
=
request
.
GET
.
get
(
'order'
,
'frequency'
)
if
order
not
in
_ngrams_order_columns
:
raise
ValidationError
(
'The order parameter should take one of the following values: '
+
', '
.
join
(
_ngrams_order_columns
),
400
)
order_column
=
_ngrams_order_columns
[
order
]
# query building
ngramsQuery
=
Ngram
.
objects
.
filter
(
nodes__parent
=
corpus
,
terms__startswith
=
request
.
GET
.
get
(
'startswith'
,
''
)
)
.
annotate
(
count
=
Count
(
'id'
))
# how should we order this?
orderColumn
=
{
"frequency"
:
"-count"
,
"alphabetical"
:
"terms"
}
.
get
(
request
.
GET
.
get
(
'order'
,
'frequency'
),
'-count'
)
ngramsQuery
=
ngramsQuery
.
order_by
(
orderColumn
)
# response building
return
JsonHttpResponse
({
"list"
:
[
ngram
.
terms
for
ngram
in
ngramsQuery
],
})
# ?mesured=documents.count
# ¶meters[]=metadata.publication_year
# &filter[]=ngrams.in.test,ht,grf
def
corpus_data
(
request
,
corpus_id
):
# parameters retrieval and control
corpusQuery
=
Node
.
objects
.
filter
(
id
=
corpus_id
)
if
not
corpusQuery
:
raise
Http404
(
"No such corpus."
)
corpus
=
corpusQuery
.
first
()
if
corpus
.
type
.
name
!=
'Corpus'
:
raise
Http404
(
"No such corpus."
)
# query building: initialization
columns
=
[]
conditions
=
[]
group
=
[]
order
=
[]
join_ngrams
=
False
# query building: parameters
for
parameter
in
request
.
GET
.
getlist
(
'parameters[]'
):
@
staticmethod
def
metadata
(
request
,
corpus_id
):
# parameters retrieval and control
corpusQuery
=
Node
.
objects
.
filter
(
id
=
corpus_id
)
if
not
corpusQuery
:
raise
Http404
(
"No such corpus."
)
corpus
=
corpusQuery
.
first
()
if
corpus
.
type
.
name
!=
'Corpus'
:
raise
Http404
(
"No such corpus."
)
# query building
cursor
=
connection
.
cursor
()
cursor
.
execute
(
''' SELECT
key,
COUNT(*) AS count
FROM (
SELECT skeys(metadata) AS key
FROM
%
s
) AS keys
GROUP BY
key
ORDER BY
count DESC
'''
%
(
Node
.
_meta
.
db_table
,
))
# response building
return
JsonHttpResponse
({
"list"
:
[
row
[
0
]
for
row
in
cursor
.
fetchall
()],
})
@
staticmethod
def
data
(
request
,
corpus_id
):
# parameters retrieval and control
corpusQuery
=
Node
.
objects
.
filter
(
id
=
corpus_id
)
if
not
corpusQuery
:
raise
Http404
(
"No such corpus."
)
corpus
=
corpusQuery
.
first
()
if
corpus
.
type
.
name
!=
'Corpus'
:
raise
Http404
(
"No such corpus."
)
# query building: initialization
columns
=
[]
conditions
=
[]
group
=
[]
order
=
[]
join_ngrams
=
False
# query building: parameters
for
parameter
in
request
.
GET
.
getlist
(
'parameters[]'
):
c
=
len
(
columns
)
parameter_array
=
parameter
.
split
(
'.'
)
if
len
(
parameter_array
)
!=
2
:
raise
ValidationError
(
'Unrecognized "parameter[]=
%
s"'
%
(
parameter
,
))
origin
=
parameter_array
[
0
]
key
=
parameter_array
[
1
]
if
origin
==
"metadata"
:
key
=
key
.
replace
(
'
\'
'
,
'
\\\'
'
)
columns
.
append
(
"node.metadata->'
%
s' AS c
%
d"
%
(
key
,
c
,
))
conditions
.
append
(
"node.metadata ? '
%
s'"
%
(
key
,
))
group
.
append
(
"c
%
d"
%
(
c
,
))
order
.
append
(
"c
%
d"
%
(
c
,
))
else
:
raise
ValidationError
(
'Unrecognized type "
%
s" in "parameter[]=
%
s"'
%
(
origin
,
parameter
,
))
# query building: mesured value
mesured
=
request
.
GET
.
get
(
'mesured'
,
''
)
c
=
len
(
columns
)
parameter_array
=
parameter
.
split
(
'.'
)
if
len
(
parameter_array
)
!=
2
:
raise
ValidationError
(
'Unrecognized "parameter[]=
%
s"'
%
(
parameter
,
))
origin
=
parameter_array
[
0
]
key
=
parameter_array
[
1
]
if
origin
==
"metadata"
:
key
=
key
.
replace
(
'
\'
'
,
'
\\\'
'
)
columns
.
append
(
"node.metadata->'
%
s' AS c
%
d"
%
(
key
,
c
,
))
conditions
.
append
(
"node.metadata ? '
%
s'"
%
(
key
,
))
group
.
append
(
"c
%
d"
%
(
c
,
))
order
.
append
(
"c
%
d"
%
(
c
,
))
else
:
raise
ValidationError
(
'Unrecognized type "
%
s" in "parameter[]=
%
s"'
%
(
origin
,
parameter
,
))
# query building: mesured value
mesured
=
request
.
GET
.
get
(
'mesured'
,
''
)
c
=
len
(
columns
)
if
mesured
==
"documents.count"
:
columns
.
append
(
"COUNT(node.id) AS c
%
d "
%
(
c
,
))
elif
mesured
==
"ngrams.count"
:
columns
.
append
(
"COUNT(ngram.id) AS c
%
d "
%
(
c
,
))
join_ngrams
=
True
else
:
raise
ValidationError
(
'The "mesured" parameter should take one of the following values: "documents.count", "ngrams.count"'
)
# query building: filters
for
filter
in
request
.
GET
.
getlist
(
'filters[]'
,
''
):
if
'|'
in
filter
:
filter_array
=
filter
.
split
(
"|"
)
key
=
filter_array
[
0
]
values
=
filter_array
[
1
]
.
replace
(
"'"
,
"
\\
'"
)
.
split
(
","
)
if
key
==
'ngram.terms'
:
conditions
.
append
(
"ngram.terms IN ('
%
s')"
%
(
"', '"
.
join
(
values
),
))
join_ngrams
=
True
if
mesured
==
"documents.count"
:
columns
.
append
(
"COUNT(node.id) AS c
%
d "
%
(
c
,
))
elif
mesured
==
"ngrams.count"
:
columns
.
append
(
"COUNT(ngram.id) AS c
%
d "
%
(
c
,
))
join_ngrams
=
True
else
:
raise
ValidationError
(
'Unrecognized "filter[]=
%
s"'
%
(
filter
,
))
# query building: assembling
sql
=
"SELECT
%
s FROM
%
s AS node"
%
(
', '
.
join
(
columns
),
Node
.
_meta
.
db_table
,
)
if
join_ngrams
:
sql
+=
" INNER JOIN
%
s AS node_ngram ON node_ngram.node_id = node.id"
%
(
Node_Ngram
.
_meta
.
db_table
,
)
sql
+=
" INNER JOIN
%
s AS ngram ON ngram.id = node_ngram.ngram_id"
%
(
Ngram
.
_meta
.
db_table
,
)
if
conditions
:
sql
+=
" WHERE
%
s"
%
(
" AND "
.
join
(
conditions
),
)
if
group
:
sql
+=
" GROUP BY
%
s"
%
(
", "
.
join
(
group
),
)
if
order
:
sql
+=
" ORDER BY
%
s"
%
(
", "
.
join
(
order
),
)
# query execution
# return HttpResponse(sql)
cursor
=
connection
.
cursor
()
cursor
.
execute
(
sql
)
# response building
return
JsonHttpResponse
({
"list"
:
[
row
for
row
in
cursor
.
fetchall
()],
})
raise
ValidationError
(
'The "mesured" parameter should take one of the following values: "documents.count", "ngrams.count"'
)
# query building: filters
for
filter
in
request
.
GET
.
getlist
(
'filters[]'
,
''
):
if
'|'
in
filter
:
filter_array
=
filter
.
split
(
"|"
)
key
=
filter_array
[
0
]
values
=
filter_array
[
1
]
.
replace
(
"'"
,
"
\\
'"
)
.
split
(
","
)
if
key
==
'ngram.terms'
:
conditions
.
append
(
"ngram.terms IN ('
%
s')"
%
(
"', '"
.
join
(
values
),
))
join_ngrams
=
True
else
:
raise
ValidationError
(
'Unrecognized "filter[]=
%
s"'
%
(
filter
,
))
# query building: assembling
sql
=
"SELECT
%
s FROM
%
s AS node"
%
(
', '
.
join
(
columns
),
Node
.
_meta
.
db_table
,
)
if
join_ngrams
:
sql
+=
" INNER JOIN
%
s AS node_ngram ON node_ngram.node_id = node.id"
%
(
Node_Ngram
.
_meta
.
db_table
,
)
sql
+=
" INNER JOIN
%
s AS ngram ON ngram.id = node_ngram.ngram_id"
%
(
Ngram
.
_meta
.
db_table
,
)
if
conditions
:
sql
+=
" WHERE
%
s"
%
(
" AND "
.
join
(
conditions
),
)
if
group
:
sql
+=
" GROUP BY
%
s"
%
(
", "
.
join
(
group
),
)
if
order
:
sql
+=
" ORDER BY
%
s"
%
(
", "
.
join
(
order
),
)
# query execution
# return HttpResponse(sql)
cursor
=
connection
.
cursor
()
cursor
.
execute
(
sql
)
# response building
return
JsonHttpResponse
({
"list"
:
[
row
for
row
in
cursor
.
fetchall
()],
})
This diff is collapsed.
Click to expand it.
gargantext_web/urls.py
View file @
5fc7c2d7
...
...
@@ -40,9 +40,9 @@ urlpatterns = patterns('',
url
(
r'^chart/corpus/(\d+)/data.csv$'
,
send_csv
),
url
(
r'^graph.json$'
,
send_graph
),
url
(
r'^api/corpus/(\d+)/ngrams$'
,
gargantext_web
.
api
.
corpus_
ngrams
),
url
(
r'^api/corpus/(\d+)/metadata$'
,
gargantext_web
.
api
.
corpus_
metadata
),
url
(
r'^api/corpus/(\d+)/data$'
,
gargantext_web
.
api
.
corpus_
data
),
url
(
r'^api/corpus/(\d+)/ngrams$'
,
gargantext_web
.
api
.
CorpusController
.
ngrams
),
url
(
r'^api/corpus/(\d+)/metadata$'
,
gargantext_web
.
api
.
CorpusController
.
metadata
),
url
(
r'^api/corpus/(\d+)/data$'
,
gargantext_web
.
api
.
CorpusController
.
data
),
)
from
django.conf
import
settings
...
...
This diff is collapsed.
Click to expand it.
parsing/Caches.py
View file @
5fc7c2d7
...
...
@@ -16,6 +16,7 @@ class NgramsCache(defaultdict):
def
__missing__
(
self
,
terms
):
"""If the terms are not yet present in the dictionary,
retrieve it from the database or insert it."""
terms
=
terms
.
strip
()
.
lower
()
try
:
ngram
=
node
.
models
.
Ngram
.
get
(
terms
=
terms
,
language
=
self
.
language
)
except
:
...
...
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment