Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
humanities
gargantext
Commits
5fc7c2d7
Commit
5fc7c2d7
authored
Nov 16, 2014
by
Mathieu Rodic
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[CODE] The API is now wrapped in a controller.
NgramsCache: ngrams are now stored in lowercase
parent
281e712a
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
131 additions
and
130 deletions
+131
-130
api.py
gargantext_web/api.py
+127
-127
urls.py
gargantext_web/urls.py
+3
-3
Caches.py
parsing/Caches.py
+1
-0
No files found.
gargantext_web/api.py
View file @
5fc7c2d7
...
@@ -27,135 +27,135 @@ _ngrams_order_columns = {
...
@@ -27,135 +27,135 @@ _ngrams_order_columns = {
}
}
def
corpus_ngrams
(
request
,
corpus_id
):
class
CorpusController
:
# parameters retrieval and control
corpusQuery
=
Node
.
objects
.
filter
(
id
=
corpus_id
)
if
not
corpusQuery
:
raise
Http404
(
"No such corpus."
)
corpus
=
corpusQuery
.
first
()
if
corpus
.
type
.
name
!=
'Corpus'
:
raise
Http404
(
"No such corpus."
)
order
=
request
.
GET
.
get
(
'order'
,
'frequency'
)
if
order
not
in
_ngrams_order_columns
:
raise
ValidationError
(
'The order parameter should take one of the following values: '
+
', '
.
join
(
_ngrams_order_columns
),
400
)
order_column
=
_ngrams_order_columns
[
order
]
# query building
ngramsQuery
=
Ngram
.
objects
.
filter
(
nodes__parent
=
corpus
,
terms__startswith
=
request
.
GET
.
get
(
'startswith'
,
''
)
)
.
annotate
(
count
=
Count
(
'id'
))
# how should we order this?
orderColumn
=
{
"frequency"
:
"-count"
,
"alphabetical"
:
"terms"
}
.
get
(
request
.
GET
.
get
(
'order'
,
'frequency'
),
'-count'
)
ngramsQuery
=
ngramsQuery
.
order_by
(
orderColumn
)
# response building
return
JsonHttpResponse
({
"list"
:
[
ngram
.
terms
for
ngram
in
ngramsQuery
],
})
def
corpus_metadata
(
request
,
corpus_id
):
@
staticmethod
# parameters retrieval and control
def
ngrams
(
request
,
corpus_id
):
corpusQuery
=
Node
.
objects
.
filter
(
id
=
corpus_id
)
# parameters retrieval and control
if
not
corpusQuery
:
corpusQuery
=
Node
.
objects
.
filter
(
id
=
corpus_id
)
raise
Http404
(
"No such corpus."
)
if
not
corpusQuery
:
corpus
=
corpusQuery
.
first
(
)
raise
Http404
(
"No such corpus."
)
if
corpus
.
type
.
name
!=
'Corpus'
:
corpus
=
corpusQuery
.
first
()
raise
Http404
(
"No such corpus."
)
if
corpus
.
type
.
name
!=
'Corpus'
:
# query building
raise
Http404
(
"No such corpus."
)
cursor
=
connection
.
cursor
(
)
order
=
request
.
GET
.
get
(
'order'
,
'frequency'
)
cursor
.
execute
(
if
order
not
in
_ngrams_order_columns
:
''' SELECT
raise
ValidationError
(
'The order parameter should take one of the following values: '
+
', '
.
join
(
_ngrams_order_columns
),
400
)
key,
order_column
=
_ngrams_order_columns
[
order
]
COUNT(*) AS count
# query building
FROM
(
ngramsQuery
=
Ngram
.
objects
.
filter
(
SELECT skeys(metadata) AS key
nodes__parent
=
corpus
,
FROM
%
s
terms__startswith
=
request
.
GET
.
get
(
'startswith'
,
''
)
)
AS keys
)
.
annotate
(
count
=
Count
(
'id'
))
GROUP BY
# how should we order this?
key
orderColumn
=
{
ORDER BY
"frequency"
:
"-count"
,
count DESC
"alphabetical"
:
"terms"
'''
%
(
Node
.
_meta
.
db_table
,
)
)
}
.
get
(
request
.
GET
.
get
(
'order'
,
'frequency'
),
'-count'
)
# response building
ngramsQuery
=
ngramsQuery
.
order_by
(
orderColumn
)
return
JsonHttpResponse
({
# response building
"list"
:
[
row
[
0
]
for
row
in
cursor
.
fetchall
()],
return
JsonHttpResponse
({
})
"list"
:
[
ngram
.
terms
for
ngram
in
ngramsQuery
],
})
# ?mesured=documents.count
@
staticmethod
# ¶meters[]=metadata.publication_year
def
metadata
(
request
,
corpus_id
):
# &filter[]=ngrams.in.test,ht,grf
# parameters retrieval and control
corpusQuery
=
Node
.
objects
.
filter
(
id
=
corpus_id
)
def
corpus_data
(
request
,
corpus_id
):
if
not
corpusQuery
:
# parameters retrieval and control
raise
Http404
(
"No such corpus."
)
corpusQuery
=
Node
.
objects
.
filter
(
id
=
corpus_id
)
corpus
=
corpusQuery
.
first
()
if
not
corpusQuery
:
if
corpus
.
type
.
name
!=
'Corpus'
:
raise
Http404
(
"No such corpus."
)
raise
Http404
(
"No such corpus."
)
corpus
=
corpusQuery
.
first
()
# query building
if
corpus
.
type
.
name
!=
'Corpus'
:
cursor
=
connection
.
cursor
()
raise
Http404
(
"No such corpus."
)
cursor
.
execute
(
# query building: initialization
''' SELECT
columns
=
[]
key,
conditions
=
[]
COUNT(*) AS count
group
=
[]
FROM (
order
=
[]
SELECT skeys(metadata) AS key
join_ngrams
=
False
FROM
%
s
# query building: parameters
) AS keys
for
parameter
in
request
.
GET
.
getlist
(
'parameters[]'
):
GROUP BY
key
ORDER BY
count DESC
'''
%
(
Node
.
_meta
.
db_table
,
))
# response building
return
JsonHttpResponse
({
"list"
:
[
row
[
0
]
for
row
in
cursor
.
fetchall
()],
})
@
staticmethod
def
data
(
request
,
corpus_id
):
# parameters retrieval and control
corpusQuery
=
Node
.
objects
.
filter
(
id
=
corpus_id
)
if
not
corpusQuery
:
raise
Http404
(
"No such corpus."
)
corpus
=
corpusQuery
.
first
()
if
corpus
.
type
.
name
!=
'Corpus'
:
raise
Http404
(
"No such corpus."
)
# query building: initialization
columns
=
[]
conditions
=
[]
group
=
[]
order
=
[]
join_ngrams
=
False
# query building: parameters
for
parameter
in
request
.
GET
.
getlist
(
'parameters[]'
):
c
=
len
(
columns
)
parameter_array
=
parameter
.
split
(
'.'
)
if
len
(
parameter_array
)
!=
2
:
raise
ValidationError
(
'Unrecognized "parameter[]=
%
s"'
%
(
parameter
,
))
origin
=
parameter_array
[
0
]
key
=
parameter_array
[
1
]
if
origin
==
"metadata"
:
key
=
key
.
replace
(
'
\'
'
,
'
\\\'
'
)
columns
.
append
(
"node.metadata->'
%
s' AS c
%
d"
%
(
key
,
c
,
))
conditions
.
append
(
"node.metadata ? '
%
s'"
%
(
key
,
))
group
.
append
(
"c
%
d"
%
(
c
,
))
order
.
append
(
"c
%
d"
%
(
c
,
))
else
:
raise
ValidationError
(
'Unrecognized type "
%
s" in "parameter[]=
%
s"'
%
(
origin
,
parameter
,
))
# query building: mesured value
mesured
=
request
.
GET
.
get
(
'mesured'
,
''
)
c
=
len
(
columns
)
c
=
len
(
columns
)
parameter_array
=
parameter
.
split
(
'.'
)
if
mesured
==
"documents.count"
:
if
len
(
parameter_array
)
!=
2
:
columns
.
append
(
"COUNT(node.id) AS c
%
d "
%
(
c
,
))
raise
ValidationError
(
'Unrecognized "parameter[]=
%
s"'
%
(
parameter
,
))
elif
mesured
==
"ngrams.count"
:
origin
=
parameter_array
[
0
]
columns
.
append
(
"COUNT(ngram.id) AS c
%
d "
%
(
c
,
))
key
=
parameter_array
[
1
]
join_ngrams
=
True
if
origin
==
"metadata"
:
key
=
key
.
replace
(
'
\'
'
,
'
\\\'
'
)
columns
.
append
(
"node.metadata->'
%
s' AS c
%
d"
%
(
key
,
c
,
))
conditions
.
append
(
"node.metadata ? '
%
s'"
%
(
key
,
))
group
.
append
(
"c
%
d"
%
(
c
,
))
order
.
append
(
"c
%
d"
%
(
c
,
))
else
:
raise
ValidationError
(
'Unrecognized type "
%
s" in "parameter[]=
%
s"'
%
(
origin
,
parameter
,
))
# query building: mesured value
mesured
=
request
.
GET
.
get
(
'mesured'
,
''
)
c
=
len
(
columns
)
if
mesured
==
"documents.count"
:
columns
.
append
(
"COUNT(node.id) AS c
%
d "
%
(
c
,
))
elif
mesured
==
"ngrams.count"
:
columns
.
append
(
"COUNT(ngram.id) AS c
%
d "
%
(
c
,
))
join_ngrams
=
True
else
:
raise
ValidationError
(
'The "mesured" parameter should take one of the following values: "documents.count", "ngrams.count"'
)
# query building: filters
for
filter
in
request
.
GET
.
getlist
(
'filters[]'
,
''
):
if
'|'
in
filter
:
filter_array
=
filter
.
split
(
"|"
)
key
=
filter_array
[
0
]
values
=
filter_array
[
1
]
.
replace
(
"'"
,
"
\\
'"
)
.
split
(
","
)
if
key
==
'ngram.terms'
:
conditions
.
append
(
"ngram.terms IN ('
%
s')"
%
(
"', '"
.
join
(
values
),
))
join_ngrams
=
True
else
:
else
:
raise
ValidationError
(
'Unrecognized "filter[]=
%
s"'
%
(
filter
,
))
raise
ValidationError
(
'The "mesured" parameter should take one of the following values: "documents.count", "ngrams.count"'
)
# query building: assembling
# query building: filters
sql
=
"SELECT
%
s FROM
%
s AS node"
%
(
', '
.
join
(
columns
),
Node
.
_meta
.
db_table
,
)
for
filter
in
request
.
GET
.
getlist
(
'filters[]'
,
''
):
if
join_ngrams
:
if
'|'
in
filter
:
sql
+=
" INNER JOIN
%
s AS node_ngram ON node_ngram.node_id = node.id"
%
(
Node_Ngram
.
_meta
.
db_table
,
)
filter_array
=
filter
.
split
(
"|"
)
sql
+=
" INNER JOIN
%
s AS ngram ON ngram.id = node_ngram.ngram_id"
%
(
Ngram
.
_meta
.
db_table
,
)
key
=
filter_array
[
0
]
if
conditions
:
values
=
filter_array
[
1
]
.
replace
(
"'"
,
"
\\
'"
)
.
split
(
","
)
sql
+=
" WHERE
%
s"
%
(
" AND "
.
join
(
conditions
),
)
if
key
==
'ngram.terms'
:
if
group
:
conditions
.
append
(
"ngram.terms IN ('
%
s')"
%
(
"', '"
.
join
(
values
),
))
sql
+=
" GROUP BY
%
s"
%
(
", "
.
join
(
group
),
)
join_ngrams
=
True
if
order
:
else
:
sql
+=
" ORDER BY
%
s"
%
(
", "
.
join
(
order
),
)
raise
ValidationError
(
'Unrecognized "filter[]=
%
s"'
%
(
filter
,
))
# query execution
# query building: assembling
# return HttpResponse(sql)
sql
=
"SELECT
%
s FROM
%
s AS node"
%
(
', '
.
join
(
columns
),
Node
.
_meta
.
db_table
,
)
cursor
=
connection
.
cursor
()
if
join_ngrams
:
cursor
.
execute
(
sql
)
sql
+=
" INNER JOIN
%
s AS node_ngram ON node_ngram.node_id = node.id"
%
(
Node_Ngram
.
_meta
.
db_table
,
)
# response building
sql
+=
" INNER JOIN
%
s AS ngram ON ngram.id = node_ngram.ngram_id"
%
(
Ngram
.
_meta
.
db_table
,
)
return
JsonHttpResponse
({
if
conditions
:
"list"
:
[
row
for
row
in
cursor
.
fetchall
()],
sql
+=
" WHERE
%
s"
%
(
" AND "
.
join
(
conditions
),
)
})
if
group
:
sql
+=
" GROUP BY
%
s"
%
(
", "
.
join
(
group
),
)
if
order
:
sql
+=
" ORDER BY
%
s"
%
(
", "
.
join
(
order
),
)
# query execution
# return HttpResponse(sql)
cursor
=
connection
.
cursor
()
cursor
.
execute
(
sql
)
# response building
return
JsonHttpResponse
({
"list"
:
[
row
for
row
in
cursor
.
fetchall
()],
})
gargantext_web/urls.py
View file @
5fc7c2d7
...
@@ -40,9 +40,9 @@ urlpatterns = patterns('',
...
@@ -40,9 +40,9 @@ urlpatterns = patterns('',
url
(
r'^chart/corpus/(\d+)/data.csv$'
,
send_csv
),
url
(
r'^chart/corpus/(\d+)/data.csv$'
,
send_csv
),
url
(
r'^graph.json$'
,
send_graph
),
url
(
r'^graph.json$'
,
send_graph
),
url
(
r'^api/corpus/(\d+)/ngrams$'
,
gargantext_web
.
api
.
corpus_
ngrams
),
url
(
r'^api/corpus/(\d+)/ngrams$'
,
gargantext_web
.
api
.
CorpusController
.
ngrams
),
url
(
r'^api/corpus/(\d+)/metadata$'
,
gargantext_web
.
api
.
corpus_
metadata
),
url
(
r'^api/corpus/(\d+)/metadata$'
,
gargantext_web
.
api
.
CorpusController
.
metadata
),
url
(
r'^api/corpus/(\d+)/data$'
,
gargantext_web
.
api
.
corpus_
data
),
url
(
r'^api/corpus/(\d+)/data$'
,
gargantext_web
.
api
.
CorpusController
.
data
),
)
)
from
django.conf
import
settings
from
django.conf
import
settings
...
...
parsing/Caches.py
View file @
5fc7c2d7
...
@@ -16,6 +16,7 @@ class NgramsCache(defaultdict):
...
@@ -16,6 +16,7 @@ class NgramsCache(defaultdict):
def
__missing__
(
self
,
terms
):
def
__missing__
(
self
,
terms
):
"""If the terms are not yet present in the dictionary,
"""If the terms are not yet present in the dictionary,
retrieve it from the database or insert it."""
retrieve it from the database or insert it."""
terms
=
terms
.
strip
()
.
lower
()
try
:
try
:
ngram
=
node
.
models
.
Ngram
.
get
(
terms
=
terms
,
language
=
self
.
language
)
ngram
=
node
.
models
.
Ngram
.
get
(
terms
=
terms
,
language
=
self
.
language
)
except
:
except
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment