Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
humanities
gargantext
Commits
1676bf93
Commit
1676bf93
authored
Oct 31, 2017
by
sim
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Remove old REST-API
parent
8d42b26a
Changes
11
Hide whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
0 additions
and
2881 deletions
+0
-2881
urls.py
gargantext/urls.py
+0
-4
analytics.py
gargantext/views/api/analytics.py
+0
-354
api.py
gargantext/views/api/api.py
+0
-119
corpora.py
gargantext/views/api/corpora.py
+0
-106
metrics.py
gargantext/views/api/metrics.py
+0
-43
ngramlists.py
gargantext/views/api/ngramlists.py
+0
-694
ngrams.py
gargantext/views/api/ngrams.py
+0
-209
nodes.py
gargantext/views/api/nodes.py
+0
-714
projects.py
gargantext/views/api/projects.py
+0
-503
urls.py
gargantext/views/api/urls.py
+0
-96
users.py
gargantext/views/api/users.py
+0
-39
No files found.
gargantext/urls.py
View file @
1676bf93
"""URL Configuration of GarganText
"""URL Configuration of GarganText
Views are shared between these modules:
Views are shared between these modules:
- `api`, for JSON and CSV interaction with data
- `contents`, for Python-generated contents
- `contents`, for Python-generated contents
"""
"""
...
@@ -10,11 +9,8 @@ from django.contrib import admin
...
@@ -10,11 +9,8 @@ from django.contrib import admin
from
django.views.generic.base
import
RedirectView
as
Redirect
from
django.views.generic.base
import
RedirectView
as
Redirect
from
django.contrib.staticfiles.storage
import
staticfiles_storage
as
static
from
django.contrib.staticfiles.storage
import
staticfiles_storage
as
static
import
gargantext.views.api.urls
urlpatterns
=
[
url
(
r'^admin/'
,
admin
.
site
.
urls
)
urlpatterns
=
[
url
(
r'^admin/'
,
admin
.
site
.
urls
)
,
url
(
r'^api/'
,
include
(
gargantext
.
views
.
api
.
urls
)
)
,
url
(
r'^favicon.ico$'
,
Redirect
.
as_view
(
url
=
static
.
url
(
'favicon.ico'
)
,
url
(
r'^favicon.ico$'
,
Redirect
.
as_view
(
url
=
static
.
url
(
'favicon.ico'
)
,
permanent
=
False
),
name
=
"favicon"
)
,
permanent
=
False
),
name
=
"favicon"
)
]
]
gargantext/views/api/analytics.py
deleted
100644 → 0
View file @
8d42b26a
from
gargantext.util.http
import
ValidationException
,
APIView
\
,
get_parameters
,
JsonHttpResponse
,
Http404
\
,
HttpResponse
from
gargantext.util.db
import
session
,
delete
,
func
,
bulk_insert
from
gargantext.models
import
Node
,
Ngram
,
NodeNgram
,
NodeNodeNgram
,
NodeNode
,
NodeHyperdata
,
HyperdataKey
from
gargantext.constants
import
INDEXED_HYPERDATA
from
django.core.exceptions
import
PermissionDenied
,
SuspiciousOperation
from
sqlalchemy
import
or_
,
not_
from
sqlalchemy.sql
import
func
from
sqlalchemy.orm
import
aliased
import
datetime
import
collections
from
gargantext.util.db
import
*
from
gargantext.util.validation
import
validate
from
rest_framework.authentication
import
SessionAuthentication
,
BasicAuthentication
from
rest_framework.views
import
APIView
from
rest_framework.response
import
Response
from
rest_framework.exceptions
import
APIException
as
_APIException
def
DebugHttpResponse
(
data
):
return
HttpResponse
(
'<html><body style="background:#000;color:#FFF"><pre>
%
s</pre></body></html>'
%
(
str
(
data
),
))
import
json
class
JSONEncoder
(
json
.
JSONEncoder
):
def
default
(
self
,
obj
):
if
isinstance
(
obj
,
datetime
.
datetime
):
return
obj
.
isoformat
()[:
19
]
+
'Z'
else
:
return
super
(
self
.
__class__
,
self
)
.
default
(
obj
)
json_encoder
=
JSONEncoder
(
indent
=
4
)
def
JsonHttpResponse
(
data
,
status
=
200
):
return
HttpResponse
(
content
=
json_encoder
.
encode
(
data
),
content_type
=
'application/json; charset=utf-8'
,
status
=
status
)
Http400
=
SuspiciousOperation
Http403
=
PermissionDenied
import
csv
def
CsvHttpResponse
(
data
,
headers
=
None
,
status
=
200
):
response
=
HttpResponse
(
content_type
=
"text/csv"
,
status
=
status
)
writer
=
csv
.
writer
(
response
,
delimiter
=
','
)
if
headers
:
writer
.
writerow
(
headers
)
for
row
in
data
:
writer
.
writerow
(
row
)
return
response
class
APIException
(
_APIException
):
def
__init__
(
self
,
message
,
code
=
500
):
self
.
status_code
=
code
self
.
detail
=
message
class
NodeNgramsQueries
(
APIView
):
_resolutions
=
{
'second'
:
lambda
d
:
d
+
datetime
.
timedelta
(
seconds
=
1
),
'minute'
:
lambda
d
:
d
+
datetime
.
timedelta
(
minutes
=
1
),
'hour'
:
lambda
d
:
d
+
datetime
.
timedelta
(
hours
=
1
),
'day'
:
lambda
d
:
d
+
datetime
.
timedelta
(
days
=
1
),
'week'
:
lambda
d
:
d
+
datetime
.
timedelta
(
days
=
7
),
'month'
:
lambda
d
:
(
d
+
datetime
.
timedelta
(
days
=
32
))
.
replace
(
day
=
1
),
'year'
:
lambda
d
:
(
d
+
datetime
.
timedelta
(
days
=
367
))
.
replace
(
day
=
1
,
month
=
1
),
'decade'
:
lambda
d
:
(
d
+
datetime
.
timedelta
(
days
=
3660
))
.
replace
(
day
=
1
,
month
=
1
),
'century'
:
lambda
d
:
(
d
+
datetime
.
timedelta
(
days
=
36600
))
.
replace
(
day
=
1
,
month
=
1
),
}
_operators
=
{
'='
:
lambda
field
,
value
:
(
field
==
value
),
'!='
:
lambda
field
,
value
:
(
field
!=
value
),
'<'
:
lambda
field
,
value
:
(
field
<
value
),
'>'
:
lambda
field
,
value
:
(
field
>
value
),
'<='
:
lambda
field
,
value
:
(
field
<=
value
),
'>='
:
lambda
field
,
value
:
(
field
>=
value
),
'in'
:
lambda
field
,
value
:
(
or_
(
*
tuple
(
field
==
x
for
x
in
value
))),
'contains'
:
lambda
field
,
value
:
(
field
.
contains
(
value
)),
'doesnotcontain'
:
lambda
field
,
value
:
(
not_
(
field
.
contains
(
value
))),
'startswith'
:
lambda
field
,
value
:
(
field
.
startswith
(
value
)),
'endswith'
:
lambda
field
,
value
:
(
field
.
endswith
(
value
)),
}
_converters
=
{
'float'
:
float
,
'int'
:
int
,
'datetime'
:
lambda
x
:
x
+
'2000-01-01 00:00:00Z'
[
len
(
x
):],
'text'
:
str
,
'string'
:
str
,
}
def
post
(
self
,
request
,
project_id
):
# example only
input
=
request
.
data
or
{
'x'
:
{
'with_empty'
:
True
,
'resolution'
:
'decade'
,
'value'
:
'publication_date'
,
},
'y'
:
{
# 'divided_by': 'total_ngrams_count',
# 'divided_by': 'total_documents_count',
},
'filter'
:
{
# 'ngrams': ['bees', 'bee', 'honeybee', 'honeybees', 'honey bee', 'honey bees'],
# 'ngrams': ['insecticide', 'pesticide'],
# 'corpora': [52633],
# 'date': {'min': '1995-12-31'}
},
# 'format': 'csv',
}
print
(
input
)
# input validation
input
=
validate
(
input
,
{
'type'
:
dict
,
'default'
:
{},
'items'
:
{
'x'
:
{
'type'
:
dict
,
'default'
:
{},
'items'
:
{
# which hyperdata to choose for the date
'value'
:
{
'type'
:
str
,
'default'
:
'publication_date'
,
'range'
:
{
'publication_date'
,
}},
# time resolution
'resolution'
:
{
'type'
:
str
,
'range'
:
self
.
_resolutions
.
keys
(),
'default'
:
'month'
},
# should we add zeroes for empty values?
'with_empty'
:
{
'type'
:
bool
,
'default'
:
False
},
}},
'y'
:
{
'type'
:
dict
,
'default'
:
{},
'items'
:
{
# mesured value
'value'
:
{
'type'
:
str
,
'default'
:
'ngrams_count'
,
'range'
:
{
'ngrams_count'
,
'documents_count'
,
'ngrams_tfidf'
}},
# value by which we should normalize
'divided_by'
:
{
'type'
:
str
,
'range'
:
{
'total_documents_count'
,
'documents_count'
,
'total_ngrams_count'
}},
}},
# filtering
'filter'
:
{
'type'
:
dict
,
'default'
:
{},
'items'
:
{
# filter by metadata
'hyperdata'
:
{
'type'
:
list
,
'default'
:
[],
'items'
:
{
'type'
:
dict
,
'items'
:
{
'key'
:
{
'type'
:
str
,
'range'
:
self
.
_operators
.
keys
()},
'operator'
:
{
'type'
:
str
},
'value'
:
{
'type'
:
str
},
}}},
# filter by date
'date'
:
{
'type'
:
dict
,
'items'
:
{
'min'
:
{
'type'
:
datetime
.
datetime
},
'max'
:
{
'type'
:
datetime
.
datetime
},
},
'default'
:
{}},
# filter by corpora
'corpora'
:
{
'type'
:
list
,
'default'
:
[],
'items'
:
{
'type'
:
int
}},
# filter by ngrams
'ngrams'
:
{
'type'
:
list
,
'default'
:
[],
'items'
:
{
'type'
:
str
}},
}},
# output format
'format'
:
{
'type'
:
str
,
'default'
:
'json'
,
'range'
:
{
'json'
,
'csv'
}},
}})
# build query: prepare columns
X
=
aliased
(
NodeHyperdata
)
column_x
=
func
.
date_trunc
(
input
[
'x'
][
'resolution'
],
X
.
value_utc
)
column_y
=
{
'documents_count'
:
func
.
count
(
Node
.
id
.
distinct
()),
'ngrams_count'
:
func
.
sum
(
NodeNgram
.
weight
),
# 'ngrams_tfidf': func.sum(NodeNodeNgram.weight),
}[
input
[
'y'
][
'value'
]]
# build query: base
print
(
input
)
query_base
=
(
session
.
query
(
column_x
)
.
select_from
(
Node
)
.
join
(
NodeNgram
,
NodeNgram
.
node_id
==
Node
.
id
)
.
join
(
X
,
X
.
node_id
==
NodeNgram
.
node_id
)
#.filter(X.key == input['x']['value'])
.
group_by
(
column_x
)
.
order_by
(
column_x
)
)
# build query: base, filter by corpora or project
if
'corpora'
in
input
[
'filter'
]
and
input
[
'filter'
][
'corpora'
]:
query_base
=
(
query_base
.
filter
(
Node
.
parent_id
.
in_
(
input
[
'filter'
][
'corpora'
]))
)
else
:
ParentNode
=
aliased
(
Node
)
query_base
=
(
query_base
.
join
(
ParentNode
,
ParentNode
.
id
==
Node
.
parent_id
)
.
filter
(
ParentNode
.
parent_id
==
project_id
)
)
# build query: base, filter by date
if
'date'
in
input
[
'filter'
]:
if
'min'
in
input
[
'filter'
][
'date'
]:
query_base
=
query_base
.
filter
(
X
.
value
>=
input
[
'filter'
][
'date'
][
'min'
])
if
'max'
in
input
[
'filter'
][
'date'
]:
query_base
=
query_base
.
filter
(
X
.
value
<=
input
[
'filter'
][
'date'
][
'max'
])
# build query: filter by ngrams
query_result
=
query_base
.
add_columns
(
column_y
)
if
'ngrams'
in
input
[
'filter'
]
and
input
[
'filter'
][
'ngrams'
]:
query_result
=
(
query_result
.
join
(
Ngram
,
Ngram
.
id
==
NodeNgram
.
ngram_id
)
.
filter
(
Ngram
.
terms
.
in_
(
input
[
'filter'
][
'ngrams'
]))
)
# build query: filter by metadata
if
'hyperdata'
in
input
[
'filter'
]:
for
h
,
hyperdata
in
enumerate
(
input
[
'filter'
][
'hyperdata'
]):
print
(
h
,
hyperdata
)
# get hyperdata in database
#if hyperdata_model is None:
# continue
#hyperdata_id, hyperdata_type = hyperdata_model
# create alias and query it
operator
=
self
.
_operators
[
hyperdata
[
'operator'
]]
type_string
=
type2string
(
INDEXED_HYPERDATA
[
hyperdata
[
'key'
]][
'type'
])
value
=
self
.
_converters
[
type_string
](
hyperdata
[
'value'
])
query_result
=
(
query_result
.
join
(
NodeHyperdata
,
NodeHyperdata
.
node_id
==
NodeNgram
.
node_id
)
.
filter
(
NodeHyperdata
.
key
==
hyperdata
[
'key'
])
.
filter
(
operator
(
NodeHyperdata
.
value
,
value
))
)
# build result: prepare data
date_value_list
=
query_result
.
all
()
#print(date_value_list)
if
date_value_list
:
date_min
=
date_value_list
[
0
][
0
]
.
replace
(
tzinfo
=
None
)
date_max
=
date_value_list
[
-
2
][
0
]
.
replace
(
tzinfo
=
None
)
# build result: prepare interval
result
=
collections
.
OrderedDict
()
if
input
[
'x'
][
'with_empty'
]
and
date_value_list
:
compute_next_date
=
self
.
_resolutions
[
input
[
'x'
][
'resolution'
]]
date
=
date_min
while
date
<=
date_max
:
result
[
date
]
=
0.0
date
=
compute_next_date
(
date
)
# build result: integrate
for
date
,
value
in
date_value_list
[
0
:
-
1
]:
result
[
date
.
replace
(
tzinfo
=
None
)]
=
value
# build result: normalize
query_normalize
=
None
if
date_value_list
and
'divided_by'
in
input
[
'y'
]
and
input
[
'y'
][
'divided_by'
]:
if
input
[
'y'
][
'divided_by'
]
==
'total_documents_count'
:
query_normalize
=
query_base
.
add_column
(
func
.
count
(
Node
.
id
.
distinct
()))
elif
input
[
'y'
][
'divided_by'
]
==
'total_ngrams_count'
:
query_normalize
=
query_base
.
add_column
(
func
.
sum
(
NodeNgram
.
weight
))
if
query_normalize
is
not
None
:
for
date
,
value
in
query_normalize
[
0
:
-
1
]:
date
=
date
.
replace
(
tzinfo
=
None
)
if
date
in
result
:
result
[
date
]
/=
value
# return result with proper formatting
if
input
[
'format'
]
==
'json'
:
return
JsonHttpResponse
({
'query'
:
input
,
'result'
:
sorted
(
result
.
items
()),
},
201
)
elif
input
[
'format'
]
==
'csv'
:
return
CsvHttpResponse
(
sorted
(
result
.
items
()),
(
'date'
,
'value'
),
201
)
_operators_dict
=
{
"="
:
lambda
field
,
value
:
(
field
==
value
),
"!="
:
lambda
field
,
value
:
(
field
!=
value
),
"<"
:
lambda
field
,
value
:
(
field
<
value
),
">"
:
lambda
field
,
value
:
(
field
>
value
),
"<="
:
lambda
field
,
value
:
(
field
<=
value
),
">="
:
lambda
field
,
value
:
(
field
>=
value
),
"in"
:
lambda
field
,
value
:
(
or_
(
*
tuple
(
field
==
x
for
x
in
value
))),
"contains"
:
lambda
field
,
value
:
(
field
.
contains
(
value
)),
"doesnotcontain"
:
lambda
field
,
value
:
(
not_
(
field
.
contains
(
value
))),
"startswith"
:
lambda
field
,
value
:
(
field
.
startswith
(
value
)),
}
od
=
collections
.
OrderedDict
(
sorted
(
INDEXED_HYPERDATA
.
items
()))
_hyperdata_list
=
[
{
key
:
value
}
for
key
,
value
in
od
.
items
()
if
key
!=
'abstract'
]
def
type2string
(
given_type
):
if
given_type
==
int
:
return
"integer"
elif
given_type
==
str
:
return
"string"
elif
given_type
==
datetime
.
datetime
:
return
"datetime"
def
get_metadata
(
corpus_id_list
):
# query hyperdata keys
ParentNode
=
aliased
(
Node
)
hyperdata_query
=
(
session
.
query
(
NodeHyperdata
.
key
)
.
join
(
Node
,
Node
.
id
==
NodeHyperdata
.
node_id
)
.
filter
(
Node
.
parent_id
.
in_
(
corpus_id_list
))
.
group_by
(
NodeHyperdata
.
key
)
)
# build a collection with the hyperdata keys
collection
=
[]
for
hyperdata
in
INDEXED_HYPERDATA
.
keys
():
valuesCount
=
0
values
=
None
# count values and determine their span
values_count
=
None
values_from
=
None
values_to
=
None
# if hyperdata == 'text':
# node_hyperdata_query = (session
# .query(NodeHyperdata.key)
# .join(Node, Node.id == NodeHyperdata.node_id)
# .filter(Node.parent_id.in_(corpus_id_list))
# .filter(NodeHyperdata.key == hyperdata)
# .group_by(NodeHyperdata.key)
# .order_by(NodeHyperdata.key)
# )
# values_count = node_hyperdata_query.count()
# # values_count, values_from, values_to = node_hyperdata_query.first()
# if there is less than 32 values, retrieve them
values
=
None
if
isinstance
(
values_count
,
int
)
and
values_count
<=
48
:
if
hyperdata
==
'datetime'
:
values
=
[
row
.
isoformat
()
for
row
in
node_hyperdata_query
.
all
()]
else
:
values
=
[
row
for
row
in
node_hyperdata_query
.
all
()]
# adding this hyperdata to the collection
collection
.
append
({
'key'
:
str
(
hyperdata
),
'type'
:
type2string
(
INDEXED_HYPERDATA
[
hyperdata
][
'type'
]),
'values'
:
values
,
'valuesFrom'
:
values_from
,
'valuesTo'
:
values_to
,
'valuesCount'
:
values_count
,
})
# give the result back
return
collection
class
ApiHyperdata
(
APIView
):
def
get
(
self
,
request
):
corpus_id_list
=
list
(
map
(
int
,
request
.
GET
[
'corpus_id'
]
.
split
(
','
)))
return
JsonHttpResponse
({
'data'
:
get_metadata
(
corpus_id_list
),
})
gargantext/views/api/api.py
deleted
100644 → 0
View file @
8d42b26a
from
rest_framework.status
import
*
from
rest_framework.exceptions
import
APIException
from
rest_framework.response
import
Response
from
rest_framework.renderers
import
JSONRenderer
,
BrowsableAPIRenderer
from
rest_framework.views
import
APIView
from
rest_framework.authentication
import
SessionAuthentication
,
BasicAuthentication
from
rest_framework.permissions
import
IsAuthenticated
from
gargantext.constants
import
RESOURCETYPES
,
NODETYPES
,
get_resource
from
gargantext.models
import
Node
,
Ngram
,
NodeNgram
,
NodeNodeNgram
,
NodeNode
from
gargantext.util.db
import
session
,
delete
,
func
,
bulk_insert
from
gargantext.util.db_cache
import
cache
,
or_
from
gargantext.util.files
import
upload
from
gargantext.util.http
import
ValidationException
,
APIView
,
JsonHttpResponse
,
get_parameters
from
gargantext.util.scheduling
import
scheduled
from
gargantext.util.validation
import
validate
#import
#NODES format
_user_default_fields
=
[
"is_staff"
,
"is_superuser"
,
"is_active"
,
"username"
,
"email"
,
"first_name"
,
"last_name"
,
"id"
]
_api_default_fields
=
[
'id'
,
'parent_id'
,
'name'
,
'typename'
,
'date'
]
_doc_default_fields
=
[
'id'
,
'parent_id'
,
'name'
,
'typename'
,
'date'
,
"hyperdata"
]
#_resource_default_fields = [['id', 'parent_id', 'name', 'typename', "hyperdata.method"]
#_corpus_default_fields = ['id', 'parent_id', 'name', 'typename', 'date', "hyperdata","resource"]
def
format_parent
(
node
):
'''format the parent'''
try
:
#USER
if
node
.
username
!=
""
:
return
{
field
:
getattr
(
node
,
field
)
for
field
in
_user_default_fields
}
except
:
#DOC
if
node
.
typename
==
"DOCUMENT"
:
return
{
field
:
getattr
(
node
,
field
)
for
field
in
_doc_default_fields
}
elif
node
.
typename
==
"CORPUS"
:
parent
=
{
field
:
getattr
(
node
,
field
)
for
field
in
_doc_default_fields
}
#documents
#parent["documents"] = {"count":node.children("DOCUMENT").count()}
#resources
#parent["resources"] = {"count":node.children("RESOURCE").count()}
#status
#return {field: getattr(node, field) for field in _doc_default_fields}
parent
[
"status_msg"
]
=
status_message
return
parent
#PROJECT, RESOURCES?
else
:
return
{
field
:
getattr
(
node
,
field
)
for
field
in
_api_default_fields
}
def
format_records
(
node_list
):
'''format the records list'''
if
len
(
node_list
)
==
0
:
return
[]
node1
=
node_list
[
0
]
#USER
if
node1
.
typename
==
"USER"
:
return
[{
field
:
getattr
(
node
,
field
)
for
field
in
_user_default_fields
}
for
node
in
node_list
]
#DOCUMENT
elif
node1
.
typename
==
"DOCUMENT"
:
return
[{
field
:
getattr
(
node
,
field
)
for
field
in
_doc_default_fields
}
for
node
in
node_list
]
#CORPUS, PROJECT, RESOURCES?
elif
node1
.
typename
==
"CORPUS"
:
records
=
[]
for
node
in
node_list
:
#PROJECTS VIEW SHOULD NE BE SO DETAILED
record
=
{
field
:
getattr
(
node
,
field
)
for
field
in
_doc_default_fields
}
record
[
"resources"
]
=
[
n
.
id
for
n
in
node
.
children
(
"RESOURCE"
)]
record
[
"documents"
]
=
[
n
.
id
for
n
in
node
.
children
(
"DOCUMENT"
)]
#record["resources"] = format_records([n for n in node.children("RESOURCE")])
#record["documents"] = format_records([n for n in node.children("DOCUMENT")])
status
=
node
.
status
()
if
status
is
not
None
and
not
status
[
'complete'
]:
if
not
status
[
'error'
]:
status_message
=
'(in progress:
%
s,
%
d complete)'
%
(
status
[
'action'
]
.
replace
(
'_'
,
' '
),
status
[
'progress'
],
)
else
:
status_message
=
'(aborted: "
%
s" after
%
i docs)'
%
(
status
[
'error'
][
-
1
],
status
[
'progress'
]
)
else
:
status_message
=
''
record
[
"status"
]
=
status_message
records
.
append
(
record
)
return
records
else
:
return
[{
field
:
getattr
(
node
,
field
)
for
field
in
_api_default_fields
}
for
node
in
node_list
]
def
check_rights
(
request
,
node_id
):
'''check that the node belong to USER'''
node
=
session
.
query
(
Node
)
.
filter
(
Node
.
id
==
node_id
)
.
first
()
if
node
is
None
:
raise
APIException
(
"403 Unauthorized"
)
# return Response({'detail' : "Node #%s not found" %(node_id) },
# status = status.HTTP_404_NOT_FOUND)
elif
node
.
user_id
!=
request
.
user
.
id
:
#response_data = {"log": "Unauthorized"}
#return JsonHttpResponse(response_data, status=403)
raise
APIException
(
"403 Unauthorized"
)
else
:
return
node
def
format_response
(
parent
,
records
):
#print(records)
return
{
"parent"
:
format_parent
(
parent
),
"records"
:
format_records
(
records
),
"count"
:
len
(
records
)
}
gargantext/views/api/corpora.py
deleted
100644 → 0
View file @
8d42b26a
from
django.core.exceptions
import
*
from
.api
import
*
#APIView, APIException entre autres
from
gargantext.util.db
import
session
from
gargantext.models
import
Node
from
gargantext.util.http
import
*
class
CorpusView
(
APIView
):
'''API endpoint that represent a corpus'''
def
get
(
self
,
request
,
project_id
,
corpus_id
,
view
=
"DOCUMENT"
):
'''GET corpus detail
default view full documents
'''
params
=
get_parameters
(
request
)
if
"view"
in
params
.
keys
():
filter_view
=
params
[
"view"
]
.
upper
()
if
view
in
[
"DOCUMENT"
,
"JOURNAL"
,
"TITLE"
,
"ANALYTICS"
,
"RESSOURCE"
]:
view
=
filter_view
project
=
session
.
query
(
Node
)
.
filter
(
Node
.
id
==
project_id
,
Node
.
typename
==
"PROJECT"
)
.
first
()
check_rights
(
request
,
project
.
id
)
if
project
is
None
:
return
Response
({
'detail'
:
"PROJECT Node #
%
s not found"
%
(
project_id
)
},
status
=
status
.
HTTP_404_NOT_FOUND
)
corpus
=
session
.
query
(
Node
)
.
filter
(
Node
.
id
==
corpus_id
,
Node
.
typename
==
"CORPUS"
)
.
first
()
if
corpus
is
None
:
return
Response
({
'detail'
:
"CORPUS Node #
%
s not found"
%
(
corpus_id
)
},
status
=
status
.
HTTP_404_NOT_FOUND
)
documents
=
session
.
query
(
Node
)
.
filter
(
Node
.
parent_id
==
corpus_id
,
Node
.
typename
==
view
)
.
all
()
context
=
format_response
(
corpus
,
documents
)
return
Response
(
context
)
def
delete
(
self
,
request
,
project_id
,
corpus_id
):
'''DELETE corpus'''
print
(
">>>>>>>>>>>>>>>>>>>>>>>>>>>>delete"
)
# project = session.query(Node).filter(Node.id == project_id, Node.typename == "PROJECT").first()
# check_rights(request, project.id)
# if project is None:
# return Response({'detail' : "PROJECT Node #%s not found" %(project_id) },
# status = status.HTTP_404_NOT_FOUND)
corpus
=
session
.
query
(
Node
)
.
filter
(
Node
.
id
==
corpus_id
,
Node
.
typename
==
"CORPUS"
)
.
first
()
if
corpus
is
None
:
return
Response
({
'detail'
:
"CORPUS Node #
%
s not found"
%
(
corpus_id
)
},
status
=
status
.
HTTP_404_NOT_FOUND
)
documents
=
session
.
query
(
Node
)
.
filter
(
Node
.
parent_id
==
corpus_id
)
.
all
()
session
.
delete
(
documents
)
session
.
delete
(
corpus
)
session
.
commit
()
return
Response
(
detail
=
"Deleted corpus #
%
s"
%
str
(
corpus_id
),
status
=
HTTP_204_NO_CONTENT
)
def
put
(
self
,
request
,
project_id
,
corpus_id
,
view
=
"DOCUMENT"
):
'''UPDATE corpus'''
project
=
session
.
query
(
Node
)
.
filter
(
Node
.
id
==
project_id
,
Node
.
typename
==
"PROJECT"
)
.
first
()
project
=
check_rights
(
request
,
project
.
id
)
if
project
is
None
:
return
Response
({
'detail'
:
"PROJECT Node #
%
s not found"
%
(
project_id
)
},
status
=
status
.
HTTP_404_NOT_FOUND
)
corpus
=
session
.
query
(
Node
)
.
filter
(
Node
.
id
==
corpus_id
,
Node
.
typename
==
"CORPUS"
)
.
first
()
if
corpus
is
None
:
return
Response
({
'detail'
:
"CORPUS Node #
%
s not found"
%
(
corpus_id
)
},
status
=
status
.
HTTP_404_NOT_FOUND
)
#documents = session.query(Node).filter(Node.parent_id == corpus_id, Node.typename= view).all()
for
key
,
val
in
request
.
data
.
items
():
if
key
in
[
"name"
,
"date"
,
"username"
,
"hyperdata"
]:
if
key
==
"username"
:
#changement de propriétaire
#user = session.query(Node).filter(Node.typename=="USER", Node.username== username).first()
#print(user)
#set(node, user_id, user.id)
pass
elif
key
==
"hyperdata"
:
#updating some contextualvalues of the corpus
pass
else
:
setattr
(
node
,
key
,
val
)
session
.
add
(
node
)
session
.
commit
()
'''#updating children???
'''
return
Response
({
"detail"
:
"Updated corpus #"
%
str
(
corpus
.
id
)},
status
=
HTTP_202_ACCEPTED
)
def
post
(
self
,
request
,
project_id
,
corpus_id
):
'''ADD a new RESOURCE to CORPUS'''
project
=
session
.
query
(
Node
)
.
filter
(
Node
.
id
==
project_id
,
Node
.
typename
==
"PROJECT"
)
.
first
()
check_rights
(
request
,
project
.
id
)
if
project
is
None
:
return
Response
({
'detail'
:
"PROJECT Node #
%
s not found"
%
(
project_id
)
},
status
=
status
.
HTTP_404_NOT_FOUND
)
corpus
=
session
.
query
(
Node
)
.
filter
(
Node
.
id
==
corpus_id
,
Node
.
typename
==
"CORPUS"
)
.
first
()
if
corpus
is
None
:
return
Response
({
'detail'
:
"CORPUS Node #
%
s not found"
%
(
corpus_id
)
},
status
=
status
.
HTTP_404_NOT_FOUND
)
gargantext/views/api/metrics.py
deleted
100644 → 0
View file @
8d42b26a
from
gargantext.util.db_cache
import
cache
from
gargantext.util.http
import
ValidationException
,
APIView
\
,
HttpResponse
,
JsonHttpResponse
from
gargantext.util.toolchain.main
import
recount
from
gargantext.util.scheduling
import
scheduled
from
datetime
import
datetime
class
CorpusMetrics
(
APIView
):
def
patch
(
self
,
request
,
corpusnode_id
):
"""
PATCH triggers recount of metrics for the specified corpus.
ex PATCH http://localhost:8000/api/metrics/14072
-----
corpus_id
"""
print
(
"==> update metrics request on "
,
corpusnode_id
)
if
not
request
.
user
.
is_authenticated
():
# can't use @requires_auth because of positional 'self' within class
return
HttpResponse
(
'Unauthorized'
,
status
=
401
)
try
:
corpus
=
cache
.
Node
[
int
(
corpusnode_id
)]
except
:
corpus
=
None
if
corpus
is
None
:
raise
ValidationException
(
"
%
s is not a valid corpus node id."
%
corpusnode_id
)
else
:
t_before
=
datetime
.
now
()
# =============
scheduled
(
recount
)(
corpus
.
id
)
# =============
t_after
=
datetime
.
now
()
return
JsonHttpResponse
({
'corpus_id'
:
corpusnode_id
,
'took'
:
"
%
f s."
%
(
t_after
-
t_before
)
.
total_seconds
()
})
gargantext/views/api/ngramlists.py
deleted
100644 → 0
View file @
8d42b26a
"""
API views for advanced operations on ngrams and ngramlists
-----------------------------------------------------------
- retrieve several lists together ("family")
- retrieve detailed list infos (ngram_id, term strings, scores...)
- modify NodeNgram lists (PUT/DEL an ngram to a MAINLIST OR MAPLIST...)
- modify NodeNgramNgram groups (PUT/DEL a list of groupings like {"767[]":[209,640],"779[]":[436,265,385]}")
"""
from
gargantext.util.http
import
APIView
,
get_parameters
,
JsonHttpResponse
,
\
ValidationException
,
Http404
,
HttpResponse
from
gargantext.util.db
import
session
,
aliased
,
bulk_insert
from
gargantext.util.db_cache
import
cache
from
sqlalchemy
import
tuple_
from
gargantext.models
import
Ngram
,
NodeNgram
,
NodeNodeNgram
,
NodeNgramNgram
,
Node
from
gargantext.util.lists
import
UnweightedList
,
Translations
from
gargantext.util.scheduling
import
scheduled
# useful subroutines
from
gargantext.util.ngramlists_tools
import
query_list
,
export_ngramlists
,
\
import_ngramlists
,
merge_ngramlists
,
\
import_and_merge_ngramlists
from
gargantext.util.group_tools
import
query_grouped_ngrams
class
List
(
APIView
):
"""
see already available API query api/nodes/<list_id>?fields[]=ngrams
"""
pass
class
CSVLists
(
APIView
):
"""
GET => CSV exports of all lists of a corpus
POST => CSV import into existing lists as "post"
PATCH => internal import into existing lists (?POSSIBILITY put it in another class ?)
"""
def
get
(
self
,
request
):
params
=
get_parameters
(
request
)
corpus_id
=
int
(
params
.
pop
(
"corpus"
))
corpus_node
=
cache
.
Node
[
corpus_id
]
# response is file-like + headers
response
=
HttpResponse
(
content_type
=
'text/csv'
)
response
[
'Content-Disposition'
]
=
'attachment; filename="corpus-
%
i_gargantext_term_list.csv"'
%
corpus_id
# fill the response with the data
export_ngramlists
(
corpus_node
,
fname
=
response
,
titles
=
True
)
return
response
def
post
(
self
,
request
):
"""
Merge the lists of a corpus with other lists from a CSV source
or from another corpus
params in request.GET:
onto_corpus: the corpus whose lists are getting patched
params in request.data:
csvfile: the csv file
/!
\
We assume we checked the file size client-side before upload
"""
if
not
request
.
user
.
is_authenticated
():
res
=
HttpResponse
(
"Unauthorized"
)
res
.
status_code
=
401
return
res
# the corpus with the target lists to be patched
params
=
get_parameters
(
request
)
corpus_id
=
int
(
params
.
pop
(
"onto_corpus"
))
corpus_node
=
cache
.
Node
[
corpus_id
]
if
request
.
user
.
id
!=
corpus_node
.
user_id
:
res
=
HttpResponse
(
"Unauthorized"
)
res
.
status_code
=
401
return
res
# request also contains the file
# csv_file has type django.core.files.uploadedfile.InMemoryUploadedFile
# ----------------------
csv_file
=
request
.
data
[
'csvfile'
]
csv_contents
=
csv_file
.
read
()
.
decode
(
"UTF-8"
)
.
split
(
"
\n
"
)
csv_file
.
close
()
del
csv_file
# import the csv
# try:
log_msg
=
"Async generation"
corpus_node_id
=
corpus_node
.
id
scheduled
(
import_and_merge_ngramlists
)(
csv_contents
,
corpus_node_id
,
overwrite
=
bool
(
params
.
get
(
'overwrite'
)))
return
JsonHttpResponse
({
'log'
:
log_msg
,
},
200
)
# except Exception as e:
# return JsonHttpResponse({
# 'err': str(e),
# }, 400)
def
patch
(
self
,
request
):
"""
A copy of POST (merging list) but with the source == just an internal corpus_id
params in request.GET:
onto_corpus: the corpus whose lists are getting patched
from: the corpus from which we take the source lists to merge in
todo: an array of the list types ("map", "main", "stop") to merge in
"""
if
not
request
.
user
.
is_authenticated
():
res
=
HttpResponse
(
"Unauthorized"
)
res
.
status_code
=
401
return
res
params
=
get_parameters
(
request
)
print
(
params
)
# the corpus with the target lists to be patched
corpus_id
=
int
(
params
.
pop
(
"onto_corpus"
))
corpus_node
=
cache
.
Node
[
corpus_id
]
print
(
params
)
if
request
.
user
.
id
!=
corpus_node
.
user_id
:
res
=
HttpResponse
(
"Unauthorized"
)
res
.
status_code
=
401
return
res
list_types
=
{
'map'
:
'MAPLIST'
,
'main'
:
'MAINLIST'
,
'stop'
:
'STOPLIST'
}
# internal DB retrieve source_lists
source_corpus_id
=
int
(
params
.
pop
(
"from_corpus"
))
source_node
=
cache
.
Node
[
source_corpus_id
]
todo_lists
=
params
.
pop
(
"todo"
)
.
split
(
','
)
# ex: ['map', 'stop']
source_lists
=
{}
for
key
in
todo_lists
:
source_lists
[
key
]
=
UnweightedList
(
source_node
.
children
(
list_types
[
key
])
.
first
()
.
id
)
# add the groupings too
source_lists
[
'groupings'
]
=
Translations
(
source_node
.
children
(
"GROUPLIST"
)
.
first
()
.
id
)
# attempt to merge and send response
try
:
# merge the source_lists onto those of the target corpus
delete
=
todo_lists
if
bool
(
params
.
get
(
'overwrite'
))
else
[]
if
len
(
delete
)
==
len
(
list_types
):
delete
.
append
(
'groupings'
)
log_msg
=
merge_ngramlists
(
source_lists
,
onto_corpus
=
corpus_node
,
del_originals
=
delete
)
return
JsonHttpResponse
({
'log'
:
log_msg
,
},
200
)
except
Exception
as
e
:
return
JsonHttpResponse
({
'err'
:
str
(
e
),
},
400
)
class
GroupChange
(
APIView
):
"""
Modification of some groups
(typically new subform nodes under a mainform)
USAGE EXEMPLE:
HOST/api/ngramlists/groups?node=43
vvvvvv
group node
to modify
We use PUT HTTP method to send group data to DB and DELETE to remove them.
They both use same data format in the url (see links_to_couples).
No chained effects : simply adds or deletes rows of couples
NB: request.user is also checked for current authentication status
"""
def
initial
(
self
,
request
):
"""
Before dispatching to post() or delete()
Checks current user authentication to prevent remote DB manipulation
"""
if
not
request
.
user
.
is_authenticated
():
raise
Http404
()
# can't use return in initial() (although 401 maybe better than 404)
# can't use @requires_auth because of positional 'self' within class
def
links_to_couples
(
self
,
params
):
"""
IN (dict from url params)
---
params = {
"mainform_A": ["subform_A1"]
"mainform_B": ["subform_B1,subform_B2,subform_B3"]
...
}
OUT (for DB rows)
----
couples = [
(mainform_A , subform_A1),
(mainform_B , subform_B1),
(mainform_B , subform_B2),
(mainform_B , subform_B3),
...
]
"""
couples
=
[]
for
(
mainform_id
,
subforms_ids
)
in
params
.
items
():
for
subform_id
in
subforms_ids
[
0
]
.
split
(
','
):
# append the couple
couples
.
append
((
int
(
mainform_id
),
int
(
subform_id
)))
return
couples
def
put
(
self
,
request
):
"""
Add some group elements to a group node
=> adds new couples from GroupsBuffer._to_add of terms view
TODO see use of util.lists.Translations
Parameters are all in the url (for symmetry with DELETE method)
api/ngramlists/groups?node=783&1228[]=891,1639
=> creates 1228 - 891
and 1228 - 1639
general format is: mainform_id[]=subform_id1,subform_id2 etc
=> creates mainform_id - subform_id1
and mainform_id - subform_id2
NB: also checks if the couples exist before because the ngram table
will send the entire group (old existing links + new links)
"""
# from the url
params
=
get_parameters
(
request
)
# the node param is unique
group_node
=
params
.
pop
(
'node'
)
# the others params are links to change
couples
=
self
.
links_to_couples
(
params
)
# debug
# print("==couples from url =================================++++=")
# print(couples)
# local version of "insert if not exists" -------------------->8--------
# (1) check already existing elements
check_query
=
(
session
.
query
(
NodeNgramNgram
)
.
filter
(
NodeNgramNgram
.
node_id
==
group_node
)
.
filter
(
tuple_
(
NodeNgramNgram
.
ngram1_id
,
NodeNgramNgram
.
ngram2_id
)
.
in_
(
couples
)
)
)
existing
=
{}
for
synonyms
in
check_query
.
all
():
existing
[(
synonyms
.
ngram1_id
,
synonyms
.
ngram2_id
)]
=
True
# debug
#print("==existing")
#print(existing)
# (2) compute difference locally
couples_to_add
=
[(
mform
,
sform
)
for
(
mform
,
sform
)
in
couples
if
(
mform
,
sform
)
not
in
existing
]
# debug
# print("== couples_to_add =================================++++=")
# print(couples_to_add)
# (3) add new groupings
bulk_insert
(
NodeNgramNgram
,
(
'node_id'
,
'ngram1_id'
,
'ngram2_id'
,
'weight'
),
((
group_node
,
mainform
,
subform
,
1.0
)
for
(
mainform
,
subform
)
in
couples_to_add
)
)
# ------------------------------------------------------------>8--------
return
JsonHttpResponse
({
'count_added'
:
len
(
couples_to_add
),
},
200
)
def
delete
(
self
,
request
):
"""
Within a groupnode, deletes some group elements from some groups
Data format just like in POST, everything in the url
"""
# from the url
params
=
get_parameters
(
request
)
# the node param is unique
group_node
=
params
.
pop
(
'node'
)
# the others params are links to change
couples_to_remove
=
self
.
links_to_couples
(
params
)
# debug
# print("==couples_to_remove=================================dd=")
# print(couples_to_remove)
# remove selectively group_couples
# using IN is correct in this case: list of ids is short and external
# see stackoverflow.com/questions/444475/
db_rows
=
(
session
.
query
(
NodeNgramNgram
)
.
filter
(
NodeNgramNgram
.
node_id
==
group_node
)
.
filter
(
tuple_
(
NodeNgramNgram
.
ngram1_id
,
NodeNgramNgram
.
ngram2_id
)
.
in_
(
couples_to_remove
)
)
)
n_removed
=
db_rows
.
delete
(
synchronize_session
=
False
)
session
.
commit
()
return
JsonHttpResponse
({
'count_removed'
:
n_removed
},
200
)
class
ListChange
(
APIView
):
"""
Any ngram action on standard NodeNgram lists (MAIN, MAP, STOP)
USAGE EXEMPLE:
HOST/api/ngramlists/change?list=42&ngrams=1,2,3,4,5
vvvvvv ||||||
old list vvvvvv
to modify new list items
| |
v v
2 x UnweightedLists: self.base_list self.change_list
We use DEL/PUT HTTP methods to differentiate the 2 basic rm/add actions
They rely only on inline parameters (no need for payload data)
No chained effects: eg removing from MAPLIST will not remove
automatically from associated MAINLIST
NB: request.user is also checked for current authentication status
"""
def
initial
(
self
,
request
):
"""
Before dispatching to put(), delete()...
1) Checks current user authentication to prevent remote DB manipulation
2) Prepares self.list_objects from params
"""
if
not
request
.
user
.
is_authenticated
():
raise
Http404
()
# can't use return in initial() (although 401 maybe better than 404)
# can't use @requires_auth because of positional 'self' within class
# get validated params
self
.
params
=
get_parameters
(
request
)
(
self
.
base_list
,
self
.
change_list
)
=
ListChange
.
_validate
(
self
.
params
)
if
not
len
(
self
.
change_list
.
items
):
payload_ngrams
=
request
.
data
[
'ngrams'
]
# print("no change_list in params but we got:", payload_ngrams)
# change_list can be in payload too
change_ngram_ids
=
[
int
(
n
)
for
n
in
payload_ngrams
.
split
(
','
)]
if
(
not
len
(
change_ngram_ids
)):
raise
ValidationException
(
'The "ngrams" parameter requires one or more ngram_ids separated by comma'
)
else
:
self
.
change_list
=
UnweightedList
(
change_ngram_ids
)
def
put
(
self
,
request
):
"""
Adds one or more ngrams to a list.
NB: we assume ngram_ids don't contain subforms !!
(this assumption is not checked here because it would be
slow: if you want to add a subform, send the mainform's id)
"""
# union of items ----------------------------
new_list
=
self
.
base_list
+
self
.
change_list
# -------------------------------------------
# save
new_list
.
save
(
self
.
base_list
.
id
)
return
JsonHttpResponse
({
'parameters'
:
self
.
params
,
'count_added'
:
len
(
new_list
.
items
)
-
len
(
self
.
base_list
.
items
),
},
201
)
def
delete
(
self
,
request
):
"""
Removes one or more ngrams from a list.
"""
# removal (set difference) ------------------
new_list
=
self
.
base_list
-
self
.
change_list
# -------------------------------------------
# save
new_list
.
save
(
self
.
base_list
.
id
)
return
JsonHttpResponse
({
'parameters'
:
self
.
params
,
'count_removed'
:
len
(
self
.
base_list
.
items
)
-
len
(
new_list
.
items
),
},
200
)
@
staticmethod
def
_validate
(
params
):
"""
Checks "list" and "ngrams" parameters for their:
- presence
- type
These two parameters are mandatory for any ListChange methods.
ngrams are also converted to an UnweightedList object for easy add/remove
"""
if
'list'
not
in
params
:
raise
ValidationException
(
'The route /api/ngramlists/change requires a "list"
\
parameter, for instance /api/ngramlists/change?list_id=42'
)
# if 'ngrams' not in params:
# raise ValidationException('The route /api/ngramlists/change requires an "ngrams"\
# parameter, for instance /api/ngramlists/change?ngrams=1,2,3,4')
# 2 x retrieval => 2 x UnweightedLists
# ------------------------------------
base_list_id
=
None
try
:
base_list_id
=
int
(
params
[
'list'
])
# UnweightedList retrieved by id
except
:
raise
ValidationException
(
'The "list" parameter requires an existing list id.'
)
base_list
=
UnweightedList
(
base_list_id
)
change_ngram_ids
=
[]
try
:
change_ngram_ids
=
[
int
(
n
)
for
n
in
params
[
'ngrams'
]
.
split
(
','
)]
# UnweightedList created from items
except
:
# ngrams no longer mandatory inline, see payload check afterwards
pass
change_list
=
UnweightedList
(
change_ngram_ids
)
return
(
base_list
,
change_list
)
class
MapListGlance
(
APIView
):
"""
Fast infos about the maplist only
HOST/api/ngramlists/glance?corpus=2
HOST/api/ngramlists/glance?maplist=92
REST Parameters:
"maplist=92"
the maplist to retrieve
"corpus=ID"
alternatively, the corpus to which the maplist belongs
"""
def
get
(
self
,
request
):
parameters
=
get_parameters
(
request
)
maplist_id
=
None
scores_id
=
None
if
"corpus"
in
parameters
:
corpus_id
=
parameters
[
'corpus'
]
corpus
=
cache
.
Node
[
corpus_id
]
maplist_id
=
corpus
.
children
(
'MAPLIST'
)
.
first
()
.
id
# with a corpus_id, the explicit scoring pointer is optional
if
"scoring"
in
parameters
:
scores_id
=
parameters
[
'scoring'
]
else
:
scores_id
=
corpus
.
children
(
'OCCURRENCES'
)
.
first
()
.
id
elif
"maplist"
in
parameters
and
"scoring"
in
parameters
:
maplist_id
=
int
(
parameters
[
'mainlist'
])
scores_id
=
int
(
parameters
[
'scoring'
])
else
:
raise
ValidationException
(
"A 'corpus' id or 'maplist' id is required, and a 'scoring' for occurences counts"
)
ngraminfo
=
{}
# ngram details sorted per ngram id
listmembers
=
{
'maplist'
:[]}
# ngram ids sorted per list name
# infos for all ngrams from maplist
map_ngrams
=
query_list
(
maplist_id
,
details
=
True
,
scoring_metric_id
=
scores_id
)
.
all
()
# ex: [(8805, 'mean age', 4.0),
# (1632, 'activity', 4.0),
# (8423, 'present', 2.0),
# (2928, 'objective', 2.0)]
# shortcut to useful function during loop
add_to_members
=
listmembers
[
'maplist'
]
.
append
for
ng
in
map_ngrams
:
ng_id
=
ng
[
0
]
ngraminfo
[
ng_id
]
=
ng
[
1
:]
# maplist ngrams will already be <=> ngraminfos
# but the client side expects a membership lookup
# as when there are multiple lists or some groupings
add_to_members
(
ng_id
)
return
JsonHttpResponse
({
'ngraminfos'
:
ngraminfo
,
'listmembers'
:
listmembers
,
'links'
:
{},
# no grouping links sent during glance (for speed)
'nodeids'
:
{
'mainlist'
:
None
,
'maplist'
:
maplist_id
,
'stoplist'
:
None
,
'groups'
:
None
,
'scores'
:
None
,
}
})
class
ListFamily
(
APIView
):
"""
Compact combination of *multiple* list info
custom made for the "terms" view
---
Sends all JSON info of a collection of the 4 list types of a corpus
(or for any combination of lists that go together):
- a mainlist
- an optional stoplist
- an optional maplist
- an optional grouplist
USAGE EXEMPLES
HOST/api/ngramlists/family?corpus=2
HOST/api/ngramlists/family?corpus=2&head=10
HOST/api/ngramlists/family?mainlist=91&scoring=94
HOST/api/ngramlists/family?mainlist=91&scoring=94&head=10
HOST/api/ngramlists/family?mainlist=91&stoplist=90&scoring=94
etc.
REST Parameters:
"head=20"
use pagination to only load the k top ngrams of the mainlist
(useful for fast loading of terms view) [CURRENTLY NOT USED]
"corpus=ID"
the corpus id to retrieve all 4 lists
"scoring=ID"
the scoring node (defaults to the OCCURRENCES child of the corpus)
"mainlist=ID&scoring=ID[&stoplist=ID&groups=ID&maplist=ID]"
alternative call syntax without specifying a corpus
(uses all explicit IDs of the lists => gives the possibility for custom term views)
"""
def
get
(
self
,
request
):
parameters
=
get_parameters
(
request
)
glance_limit
=
None
mainlist_id
=
None
scores_id
=
None
groups_id
=
None
other_list_ids
=
{
'maplist'
:
None
,
'stoplist'
:
None
}
# 1) retrieve a mainlist_id and other lists
##########################################
# simple request: just refers to the parent corpus
# ------------------------------------------------
if
"corpus"
in
parameters
:
corpus_id
=
parameters
[
'corpus'
]
corpus
=
cache
.
Node
[
corpus_id
]
# with a corpus_id, the explicit scoring pointer is optional
if
"scoring"
in
parameters
:
scores_id
=
parameters
[
'scoring'
]
else
:
scores_id
=
corpus
.
children
(
'OCCURRENCES'
)
.
first
()
.
id
# retrieve the family of lists that have corpus as parent
mainlist_id
=
corpus
.
children
(
'MAINLIST'
)
.
first
()
.
id
groups_id
=
corpus
.
children
(
'GROUPLIST'
)
.
first
()
.
id
other_list_ids
[
'stoplist'
]
=
corpus
.
children
(
'STOPLIST'
)
.
first
()
.
id
other_list_ids
[
'maplist'
]
=
corpus
.
children
(
'MAPLIST'
)
.
first
()
.
id
# custom request: refers to each list individually
# -------------------------------------------------
elif
"mainlist"
in
parameters
and
"scoring"
in
parameters
:
mainlist_id
=
parameters
[
'mainlist'
]
scores_id
=
parameters
[
'scoring'
]
groups_id
=
None
if
'groups'
in
parameters
:
groups_id
=
parameters
[
'scoring'
]
for
k
in
[
'stoplist'
,
'maplist'
]:
if
k
in
parameters
:
other_list_ids
[
k
]
=
parameters
[
k
]
# or request has an error
# -----------------------
else
:
raise
ValidationException
(
"Either a 'corpus' parameter or 'mainlist' & 'scoring' params are required"
)
# 2) get the infos for each list
################################
ngraminfo
=
{}
# ngram details sorted per ngram id
linkinfo
=
{}
# ngram groups sorted per ngram id
listmembers
=
{}
# ngram ids sorted per list name
if
"head"
in
parameters
:
# head <=> only mainlist AND only k top ngrams
glance_limit
=
int
(
parameters
[
'head'
])
mainlist_query
=
query_list
(
mainlist_id
,
details
=
True
,
pagination_limit
=
glance_limit
,
scoring_metric_id
=
scores_id
)
else
:
# infos for all ngrams from mainlist
mainlist_query
=
query_list
(
mainlist_id
,
details
=
True
,
scoring_metric_id
=
scores_id
)
# infos for grouped ngrams, absent from mainlist
hidden_ngrams_query
=
query_grouped_ngrams
(
groups_id
,
details
=
True
)
# infos for stoplist terms, absent from mainlist
stop_ngrams_query
=
query_list
(
other_list_ids
[
'stoplist'
],
details
=
True
,
scoring_metric_id
=
scores_id
)
# and for the other lists (stop and map)
# no details needed here, just the member ids
for
li
in
other_list_ids
:
li_elts
=
query_list
(
other_list_ids
[
li
],
details
=
False
)
.
all
()
# simple array of ngram_ids
listmembers
[
li
]
=
[
ng
[
0
]
for
ng
in
li_elts
]
# and the groupings
if
groups_id
:
links
=
Translations
(
groups_id
)
linkinfo
=
links
.
groups
# list of
ngrams_which_need_detailed_info
=
[]
if
"head"
in
parameters
:
# head triggered simplified form: just the top of the mainlist
# TODO add maplist membership
ngrams_which_need_detailed_info
=
mainlist_query
.
all
()
else
:
ngrams_which_need_detailed_info
=
mainlist_query
.
all
()
+
hidden_ngrams_query
.
all
()
+
stop_ngrams_query
.
all
()
# the output form of details is:
# ngraminfo[id] => [term, weight]
for
ng
in
ngrams_which_need_detailed_info
:
ng_id
=
ng
[
0
]
ngraminfo
[
ng_id
]
=
ng
[
1
:]
# NB the client js will sort mainlist ngs from hidden ngs after ajax
# using linkinfo (otherwise needs redundant listmembers for main)
return
JsonHttpResponse
({
'ngraminfos'
:
ngraminfo
,
'listmembers'
:
listmembers
,
'links'
:
linkinfo
,
'nodeids'
:
{
'mainlist'
:
mainlist_id
,
'maplist'
:
other_list_ids
[
'maplist'
],
'stoplist'
:
other_list_ids
[
'stoplist'
],
'groups'
:
groups_id
,
'scores'
:
scores_id
,
}
})
gargantext/views/api/ngrams.py
deleted
100644 → 0
View file @
8d42b26a
from
gargantext.util.http
import
ValidationException
,
APIView
\
,
get_parameters
,
JsonHttpResponse
\
,
HttpResponse
from
gargantext.util.db
import
session
,
func
from
gargantext.util.db_cache
import
cache
from
gargantext.models
import
Node
,
Ngram
,
NodeNgram
,
NodeNgramNgram
from
sqlalchemy.orm
import
aliased
from
re
import
findall
# ngrams put() will implement same text cleaning procedures as toolchain
from
gargantext.util.toolchain.parsing
import
normalize_chars
from
gargantext.util.toolchain.ngrams_extraction
import
normalize_forms
# for indexing
from
gargantext.util.toolchain.ngrams_addition
import
index_new_ngrams
class
ApiNgrams
(
APIView
):
def
get
(
self
,
request
):
"""
Used for analytics
------------------
Get ngram listing + counts in a given scope
"""
# parameters retrieval and validation
startwith
=
request
.
GET
.
get
(
'startwith'
,
''
)
.
replace
(
"'"
,
"
\\
'"
)
# query ngrams
ParentNode
=
aliased
(
Node
)
ngrams_query
=
(
session
.
query
(
Ngram
.
id
,
Ngram
.
terms
,
func
.
sum
(
NodeNgram
.
weight
)
.
label
(
'count'
))
.
join
(
NodeNgram
,
NodeNgram
.
ngram_id
==
Ngram
.
id
)
.
join
(
Node
,
Node
.
id
==
NodeNgram
.
node_id
)
.
group_by
(
Ngram
.
id
,
Ngram
.
terms
)
# .group_by(Ngram)
.
order_by
(
func
.
sum
(
NodeNgram
.
weight
)
.
desc
(),
Ngram
.
terms
)
)
# filters
if
'startwith'
in
request
.
GET
:
ngrams_query
=
ngrams_query
.
filter
(
Ngram
.
terms
.
startswith
(
request
.
GET
[
'startwith'
]))
if
'contain'
in
request
.
GET
:
print
(
"request.GET['contain']"
)
print
(
request
.
GET
[
'contain'
])
ngrams_query
=
ngrams_query
.
filter
(
Ngram
.
terms
.
contains
(
request
.
GET
[
'contain'
]))
if
'corpus_id'
in
request
.
GET
:
corpus_id_list
=
list
(
map
(
int
,
request
.
GET
.
get
(
'corpus_id'
,
''
)
.
split
(
','
)))
if
corpus_id_list
and
corpus_id_list
[
0
]:
ngrams_query
=
ngrams_query
.
filter
(
Node
.
parent_id
.
in_
(
corpus_id_list
))
if
'ngram_id'
in
request
.
GET
:
ngram_id_list
=
list
(
map
(
int
,
request
.
GET
.
get
(
'ngram_id'
,
''
)
.
split
(
','
)))
if
ngram_id_list
and
ngram_id_list
[
0
]:
ngrams_query
=
ngrams_query
.
filter
(
Ngram
.
id
.
in_
(
ngram_id_list
))
# pagination
offset
=
int
(
request
.
GET
.
get
(
'offset'
,
0
))
limit
=
int
(
request
.
GET
.
get
(
'limit'
,
20
))
total
=
ngrams_query
.
count
()
# return formatted result
return
JsonHttpResponse
({
'pagination'
:
{
'offset'
:
offset
,
'limit'
:
limit
,
'total'
:
total
,
},
'data'
:
[
{
'id'
:
ngram
.
id
,
'terms'
:
ngram
.
terms
,
'count'
:
ngram
.
count
,
}
for
ngram
in
ngrams_query
[
offset
:
offset
+
limit
]
],
})
def
put
(
self
,
request
):
"""
Basic external access for *creating an ngram*
---------------------------------------------
1 - checks user authentication before any changes
2 - checks if ngram to Ngram table in DB
if yes returns ngram_id and optionally mainform_id
otherwise continues
3 - adds the ngram to Ngram table in DB
4 - (if corpus param is present)
adds the ngram doc counts to NodeNgram table in DB
(aka "index the ngram" throught the docs of the corpus)
5 - returns json with:
'msg' => a success msg
'text' => the initial text content
'term' => the normalized text content
'id' => the new ngram_id
'count' => the number of docs with the ngram in the corpus
(if corpus param is present)
'group' => the mainform_id if applicable
possible inline parameters
--------------------------
@param text=<ngram_string> [required]
@param corpus=<CORPUS_ID> [optional]
@param testgroup (true if present) [optional, requires corpus]
"""
# 1 - check user authentication
if
not
request
.
user
.
is_authenticated
():
res
=
HttpResponse
(
"Unauthorized"
)
res
.
status_code
=
401
return
res
# the params
params
=
get_parameters
(
request
)
print
(
"PARAMS"
,
[(
i
,
v
)
for
(
i
,
v
)
in
params
.
items
()])
if
'text'
in
params
:
original_text
=
str
(
params
.
pop
(
'text'
))
ngram_str
=
normalize_forms
(
normalize_chars
(
original_text
))
else
:
raise
ValidationException
(
'The route PUT /api/ngrams/ is used to create a new ngram
\
It requires a "text" parameter,
\
for instance /api/ngrams?text=hydrometallurgy'
)
if
(
'testgroup'
in
params
)
and
(
not
(
'corpus'
in
params
)):
raise
ValidationException
(
"'testgroup' param requires 'corpus' param"
)
# if we have a 'corpus' param (to do the indexing)...
do_indexation
=
False
if
'corpus'
in
params
:
# we retrieve the corpus...
corpus_id
=
int
(
params
.
pop
(
'corpus'
))
corpus_node
=
cache
.
Node
[
corpus_id
]
# and the user must also have rights on the corpus
if
request
.
user
.
id
==
corpus_node
.
user_id
:
do_indexation
=
True
else
:
res
=
HttpResponse
(
"Unauthorized"
)
res
.
status_code
=
401
return
res
# number of "words" in the ngram
ngram_size
=
len
(
findall
(
r' +'
,
ngram_str
))
+
1
# do the additions
try
:
log_msg
=
""
ngram_id
=
None
mainform_id
=
None
preexisting
=
session
.
query
(
Ngram
)
.
filter
(
Ngram
.
terms
==
ngram_str
)
.
first
()
if
preexisting
is
not
None
:
ngram_id
=
preexisting
.
id
log_msg
+=
"ngram already existed (id
%
i)
\n
"
%
ngram_id
# in the context of a corpus we can also check if has mainform
# (useful for)
if
'testgroup'
in
params
:
groupings_id
=
(
session
.
query
(
Node
.
id
)
.
filter
(
Node
.
parent_id
==
corpus_id
)
.
filter
(
Node
.
typename
==
'GROUPLIST'
)
.
first
()
)
had_mainform
=
(
session
.
query
(
NodeNgramNgram
.
ngram1_id
)
.
filter
(
NodeNgramNgram
.
node_id
==
groupings_id
)
.
filter
(
NodeNgramNgram
.
ngram2_id
==
preexisting
.
id
)
.
first
()
)
if
had_mainform
:
mainform_id
=
had_mainform
[
0
]
log_msg
+=
"ngram had mainform (id
%
i) in this corpus"
%
mainform_id
else
:
log_msg
+=
"ngram was not in any group for this corpus"
else
:
# 2 - insert into Ngrams
new_ngram
=
Ngram
(
terms
=
ngram_str
,
n
=
ngram_size
)
session
.
add
(
new_ngram
)
session
.
commit
()
ngram_id
=
new_ngram
.
id
log_msg
+=
"ngram was added with new id
%
i
\n
"
%
ngram_id
# 3 - index the term
if
do_indexation
:
n_added
=
index_new_ngrams
([
ngram_id
],
corpus_node
)
log_msg
+=
'ngram indexed in corpus
%
i
\n
'
%
corpus_id
return
JsonHttpResponse
({
'msg'
:
log_msg
,
'text'
:
original_text
,
'term'
:
ngram_str
,
'id'
:
ngram_id
,
'group'
:
mainform_id
,
'count'
:
n_added
if
do_indexation
else
'no corpus provided for indexation'
},
200
)
# just in case
except
Exception
as
e
:
return
JsonHttpResponse
({
'msg'
:
str
(
e
),
'text'
:
original_text
},
400
)
gargantext/views/api/nodes.py
deleted
100644 → 0
View file @
8d42b26a
from
gargantext.models
import
Node
,
Ngram
,
NodeNgram
,
NodeNodeNgram
,
NodeNode
from
gargantext.constants
import
NODETYPES
,
DEFAULT_N_DOCS_HAVING_NGRAM
from
gargantext.util.db
import
session
,
delete
,
func
,
bulk_insert
from
gargantext.util.db_cache
import
cache
,
or_
from
gargantext.util.validation
import
validate
from
gargantext.util.http
import
ValidationException
,
APIView
\
,
get_parameters
,
JsonHttpResponse
,
Http404
\
,
HttpResponse
from
.api
import
*
from
collections
import
defaultdict
import
csv
_node_available_fields
=
[
'id'
,
'parent_id'
,
'name'
,
'typename'
,
'hyperdata'
,
'ngrams'
,
'date'
]
_node_default_fields
=
[
'id'
,
'parent_id'
,
'name'
,
'typename'
]
_node_available_types
=
NODETYPES
_hyperdata_available_fields
=
[
'title'
,
'source'
,
'abstract'
,
'statuses'
,
'language_name'
,
'language_iso3'
,
'language_iso2'
,
'language_id'
,
'publication_date'
,
'publication_year'
,
'publication_month'
,
'publication_day'
,
'publication_hour'
,
'publication_minute'
,
'publication_second'
]
#_node_available_formats = ['json', 'csv', 'bibex']
def
_query_nodes
(
request
,
node_id
=
None
):
if
request
.
user
.
id
is
None
:
raise
TypeError
(
"This API request must come from an authenticated user."
)
else
:
# we query among the nodes that belong to this user
user
=
cache
.
User
[
request
.
user
.
id
]
# parameters validation
# fixme: this validation does not allow custom keys in url (eg '?name=' for rename action)
parameters
=
get_parameters
(
request
)
parameters
=
validate
(
parameters
,
{
'type'
:
dict
,
'items'
:
{
'formated'
:
{
'type'
:
str
,
'required'
:
False
,
'default'
:
'json'
},
'pagination_limit'
:
{
'type'
:
int
,
'default'
:
10
},
'pagination_offset'
:
{
'type'
:
int
,
'default'
:
0
},
'fields'
:
{
'type'
:
list
,
'default'
:
_node_default_fields
,
'items'
:
{
'type'
:
str
,
'range'
:
_node_available_fields
,
}},
# choice of hyperdata fields
'hyperdata_filter'
:
{
'type'
:
list
,
'required'
:
False
,
'items'
:
{
'type'
:
str
,
'range'
:
_hyperdata_available_fields
,
}},
# optional filtering parameters
'types'
:
{
'type'
:
list
,
'required'
:
False
,
'items'
:
{
'type'
:
str
,
'range'
:
_node_available_types
,
}},
'parent_id'
:
{
'type'
:
int
,
'required'
:
False
},
}})
# debug
# print('PARAMS', parameters)
# additional validation for hyperdata_filter
if
((
'hyperdata_filter'
in
parameters
)
and
(
not
(
'hyperdata'
in
parameters
[
'fields'
]))):
raise
ValidationException
(
"Using the hyperdata_filter filter requires fields[]=hyperdata"
)
# start the query
query
=
user
.
nodes
()
# filter by id
if
node_id
is
not
None
:
query
=
query
.
filter
(
Node
.
id
==
node_id
)
# filter by type
if
'types'
in
parameters
:
query
=
query
.
filter
(
Node
.
typename
.
in_
(
parameters
[
'types'
]))
# filter by parent
if
'parent_id'
in
parameters
:
query
=
query
.
filter
(
Node
.
parent_id
==
parameters
[
'parent_id'
])
# count
count
=
query
.
count
()
# order
query
=
query
.
order_by
(
Node
.
hyperdata
[
'publication_date'
],
Node
.
id
)
# paginate the query
if
parameters
[
'pagination_limit'
]
==
-
1
:
query
=
query
[
parameters
[
'pagination_offset'
]:]
else
:
query
=
query
[
parameters
[
'pagination_offset'
]
:
parameters
[
'pagination_limit'
]
]
# return the result!
# (the receiver function does the filtering of fields and hyperdata_filter)
return
parameters
,
query
,
count
def
_filter_node_fields
(
node
,
parameters
):
"""
Filters the properties of a Node object before sending them to response
@parameters: a dict comming from get_parameters
that must only contain a 'fields' key
Usually the dict looks like this :
{'fields': ['parent_id', 'id', 'name', 'typename', 'hyperdata'],
'hyperdata_filter': ['title'], 'parent_id': '55054',
'types': ['DOCUMENT'], 'pagination_limit': '15'}
History:
1) this used to be single line:
res = {field: getattr(node, field) for field in parameters['fields']}
2) it was in both NodeResource.get() and NodeListResource.get()
3) it's now expanded to add support for parameters['hyperdata_filter']
- if absent, entire hyperdata is considered as one field
(as before)
- if present, the hyperdata subfields are picked
(new)
"""
# FIXME all this filtering
# could be done in rawsql
# (in _query_nodes)
result
=
{}
for
field
in
parameters
[
'fields'
]:
# normal field or entire hyperdata
if
field
!=
'hyperdata'
or
(
not
'hyperdata_filter'
in
parameters
):
result
[
field
]
=
getattr
(
node
,
field
)
# hyperdata if needs to be filtered
else
:
this_filtered_hyp
=
{}
for
hfield
in
parameters
[
'hyperdata_filter'
]:
if
hfield
in
node
.
hyperdata
:
this_filtered_hyp
[
hfield
]
=
node
.
hyperdata
[
hfield
]
result
[
'hyperdata'
]
=
this_filtered_hyp
return
result
class
Status
(
APIView
):
'''API endpoint that represent the current status of the node'''
renderer_classes
=
(
JSONRenderer
,
BrowsableAPIRenderer
)
def
get
(
self
,
request
,
node_id
):
if
not
request
.
user
.
is_authenticated
():
# can't use @requires_auth because of positional 'self' within class
return
HttpResponse
(
'Unauthorized'
,
status
=
401
)
user
=
cache
.
User
[
request
.
user
.
id
]
# check_rights(request, node_id)
# I commented check_rights because filter on user_id below does the job
node
=
session
.
query
(
Node
)
.
filter
(
Node
.
id
==
node_id
,
Node
.
user_id
==
user
.
id
)
.
first
()
if
node
is
None
:
return
Response
({
"detail"
:
"Node not Found for this user"
},
status
=
HTTP_404_NOT_FOUND
)
else
:
# FIXME using the more generic strategy ---------------------------
# context = format_response(node, [n for n in node.children()])
# or perhaps ? context = format_response(None, [node])
# -----------------------------------------------------------------
# using a more direct strategy
context
=
{}
try
:
context
[
"statuses"
]
=
node
.
hyperdata
[
"statuses"
]
except
KeyError
:
context
[
"statuses"
]
=
None
return
Response
(
context
)
def
post
(
self
,
request
,
data
):
'''create a new status for node'''
if
not
request
.
user
.
is_authenticated
():
# can't use @requires_auth because of positional 'self' within class
return
HttpResponse
(
'Unauthorized'
,
status
=
401
)
raise
NotImplementedError
def
put
(
self
,
request
,
data
):
'''update status for node'''
if
not
request
.
user
.
is_authenticated
():
# can't use @requires_auth because of positional 'self' within class
return
HttpResponse
(
'Unauthorized'
,
status
=
401
)
user
=
cache
.
User
[
request
.
user
.
id
]
# check_rights(request, node_id)
node
=
session
.
query
(
Node
)
.
filter
(
Node
.
id
==
node_id
,
Node
.
user_id
==
user
.
id
)
.
first
()
raise
NotImplementedError
#return Response({"detail":"Udpated status for NODE #%i " %node.id}, status=HTTP_202_ACCEPTED)
def
delete
(
self
,
request
):
'''delete status for node'''
if
not
request
.
user
.
is_authenticated
():
# can't use @requires_auth because of positional 'self' within class
return
HttpResponse
(
'Unauthorized'
,
status
=
401
)
user
=
cache
.
User
[
request
.
user
.
id
]
# check_rights(request, node_id)
node
=
session
.
query
(
Node
)
.
filter
(
Node
.
id
==
node_id
,
Node
.
user_id
==
user
.
id
)
.
first
()
if
node
is
None
:
return
Response
({
"detail"
:
"Node not Found"
},
status
=
HTTP_404_NOT_FOUND
)
node
.
hyperdata
[
"status"
]
=
[]
session
.
add
(
node
)
session
.
commit
()
return
Response
({
"detail"
:
"Deleted status for NODE #
%
i "
%
node
.
id
},
status
=
HTTP_204_NO_CONTENT
)
class
NodeListResource
(
APIView
):
def
get
(
self
,
request
):
"""Displays the list of nodes corresponding to the query.
"""
if
not
request
.
user
.
is_authenticated
():
# can't use @requires_auth because of positional 'self' within class
return
HttpResponse
(
'Unauthorized'
,
status
=
401
)
parameters
,
query
,
count
=
_query_nodes
(
request
)
if
parameters
[
'formated'
]
==
'json'
:
records_array
=
[]
add_record
=
records_array
.
append
# FIXME filter in rawsql in _query_nodes
for
node
in
query
:
add_record
(
_filter_node_fields
(
node
,
parameters
))
return
JsonHttpResponse
({
'parameters'
:
parameters
,
'count'
:
count
,
'records'
:
records_array
})
elif
parameters
[
'formated'
]
==
'csv'
:
# TODO add support for fields and hyperdata_filter
response
=
HttpResponse
(
content_type
=
'text/csv'
)
response
[
'Content-Disposition'
]
=
'attachment; filename="Gargantext_Corpus.csv"'
writer
=
csv
.
writer
(
response
,
delimiter
=
'
\t
'
,
quoting
=
csv
.
QUOTE_MINIMAL
)
keys
=
[
'title'
,
'source'
,
'publication_year'
,
'publication_month'
,
'publication_day'
,
'abstract'
,
'authors'
]
writer
.
writerow
(
keys
)
for
node
in
query
:
data
=
list
()
for
key
in
keys
:
try
:
data
.
append
(
node
.
hyperdata
[
key
])
except
:
data
.
append
(
""
)
writer
.
writerow
(
data
)
return
response
def
post
(
self
,
request
):
"""Create a new node.
NOT IMPLEMENTED
"""
def
delete
(
self
,
request
):
"""Removes the list of nodes corresponding to the query.
TODO : Should be a delete method!
"""
if
not
request
.
user
.
is_authenticated
():
# can't use @requires_auth because of positional 'self' within class
return
HttpResponse
(
'Unauthorized'
,
status
=
401
)
parameters
=
get_parameters
(
request
)
parameters
=
validate
(
parameters
,
{
'ids'
:
list
}
)
try
:
node_ids
=
[
int
(
n
)
for
n
in
parameters
[
'ids'
]
.
split
(
','
)]
except
:
raise
ValidationException
(
'"ids" needs integers separated by comma.'
)
try
:
result
=
session
.
execute
(
delete
(
Node
)
.
where
(
Node
.
id
.
in_
(
node_ids
))
)
session
.
commit
()
finally
:
session
.
close
()
return
JsonHttpResponse
({
'deleted'
:
result
.
rowcount
})
class
NodeListHaving
(
APIView
):
'''
Gives a list of nodes according to its score which is related
to some specific ngrams.
TODO: implement other options (offset)
Simple implementation:
Takes IDs of corpus and ngram and returns list of relevent documents in json format
according to TFIDF score (order is decreasing).
2016-09: add total counts to output json
'''
def
get
(
self
,
request
,
corpus_id
):
if
not
request
.
user
.
is_authenticated
():
# can't use @requires_auth because of positional 'self' within class
return
HttpResponse
(
'Unauthorized'
,
status
=
401
)
parameters
=
get_parameters
(
request
)
parameters
=
validate
(
parameters
,
{
'score'
:
str
,
'ngram_ids'
:
list
}
)
try
:
ngram_ids
=
[
int
(
n
)
for
n
in
parameters
[
'ngram_ids'
]
.
split
(
','
)]
except
:
raise
ValidationException
(
'"ngram_ids" needs integers separated by comma.'
)
limit
=
DEFAULT_N_DOCS_HAVING_NGRAM
nodes_list
=
[]
corpus
=
session
.
query
(
Node
)
.
filter
(
Node
.
id
==
corpus_id
)
.
first
()
tfidf_id
=
(
session
.
query
(
Node
.
id
)
.
filter
(
Node
.
typename
==
"TFIDF-CORPUS"
,
Node
.
parent_id
==
corpus
.
id
)
.
first
()
)
tfidf_id
=
tfidf_id
[
0
]
print
(
tfidf_id
)
# request data
nodes_query
=
(
session
.
query
(
Node
,
func
.
sum
(
NodeNodeNgram
.
score
))
.
join
(
NodeNodeNgram
,
NodeNodeNgram
.
node2_id
==
Node
.
id
)
.
filter
(
NodeNodeNgram
.
node1_id
==
tfidf_id
)
.
filter
(
Node
.
typename
==
'DOCUMENT'
,
Node
.
parent_id
==
corpus
.
id
)
.
filter
(
or_
(
*
[
NodeNodeNgram
.
ngram_id
==
ngram_id
for
ngram_id
in
ngram_ids
]))
.
group_by
(
Node
)
)
# get the total count before applying limit
nodes_count
=
nodes_query
.
count
()
# now the query with the limit
nodes_results_query
=
(
nodes_query
.
order_by
(
func
.
sum
(
NodeNodeNgram
.
score
)
.
desc
())
.
limit
(
limit
)
)
for
node
,
score
in
nodes_results_query
:
print
(
node
,
score
)
print
(
"
\t
corpus:"
,
corpus_id
,
"
\t
"
,
node
.
name
)
node_dict
=
{
'id'
:
node
.
id
,
'score'
:
score
,
}
for
key
in
(
'title'
,
'publication_date'
,
'source'
,
'authors'
,
'fields'
):
if
key
in
node
.
hyperdata
:
node_dict
[
key
]
=
node
.
hyperdata
[
key
]
nodes_list
.
append
(
node_dict
)
return
JsonHttpResponse
({
'count'
:
nodes_count
,
'records'
:
nodes_list
})
class
NodeResource
(
APIView
):
# contains a check on user.id (within _query_nodes)
def
get
(
self
,
request
,
node_id
):
if
not
request
.
user
.
is_authenticated
():
# can't use @requires_auth because of positional 'self' within class
return
HttpResponse
(
'Unauthorized'
,
status
=
401
)
parameters
,
query
,
count
=
_query_nodes
(
request
,
node_id
)
if
not
len
(
query
):
raise
Http404
()
node
=
query
[
0
]
return
JsonHttpResponse
(
_filter_node_fields
(
node
,
parameters
))
# contains a check on user.id (within _query_nodes)
def
delete
(
self
,
request
,
node_id
):
if
not
request
.
user
.
is_authenticated
():
# can't use @requires_auth because of positional 'self' within class
return
HttpResponse
(
'Unauthorized'
,
status
=
401
)
parameters
,
query
,
count
=
_query_nodes
(
request
,
node_id
)
if
not
len
(
query
):
raise
Http404
()
try
:
result
=
session
.
execute
(
delete
(
Node
)
.
where
(
Node
.
id
==
node_id
)
)
session
.
commit
()
finally
:
session
.
close
()
return
JsonHttpResponse
({
'deleted'
:
result
.
rowcount
})
def
post
(
self
,
request
,
node_id
):
"""
For the moment, only used to rename a node
params in request.GET:
none (not allowed by _query_nodes validation)
params in request.DATA:
["name": the_new_name_str]
TODO 1 factorize with .projects.ProjectView.put and .post (thx c24b)
TODO 2 allow other changes than name
"""
if
not
request
.
user
.
is_authenticated
():
# can't use @requires_auth because of positional 'self' within class
return
HttpResponse
(
'Unauthorized'
,
status
=
401
)
# contains a check on user.id (within _query_nodes)
parameters
,
query
,
count
=
_query_nodes
(
request
,
node_id
)
the_node
=
query
.
pop
()
# retrieve the name
if
'name'
in
request
.
data
:
new_name
=
request
.
data
[
'name'
]
else
:
return
JsonHttpResponse
({
"detail"
:
"A 'name' parameter is required in data payload"
},
400
)
# check for conflicts
other
=
session
.
query
(
Node
)
.
filter
(
Node
.
name
==
new_name
)
.
count
()
if
other
>
0
:
return
JsonHttpResponse
({
"detail"
:
"A node with this name already exists"
},
409
)
# normal case: do the renaming
else
:
setattr
(
the_node
,
'name'
,
new_name
)
session
.
commit
()
return
JsonHttpResponse
({
'renamed'
:
new_name
},
200
)
class
CorpusFavorites
(
APIView
):
"""Retrieve/update/delete one or several docs from a corpus associated favs
(url: GET /api/nodes/<corpus_id>/favorites)
=> lists all favorites
(url: GET /api/nodes/<corpus_id>/favorites?docs[]=doc1,doc2)
=> checks for each doc if it is in favorites
(url: DEL /api/nodes/<corpus_id>/favorites?docs[]=doc1,doc2)
=> removes each doc from favorites
(url: PUT /api/nodes/<corpus_id>/favorites?docs[]=doc1,doc2)
=> add each doc to favorites
"""
def
_get_fav_node
(
self
,
corpus_id
):
"""
NB: fav_node can be None if no node is defined
this query could be faster if we didn't check that corpus_id is a CORPUS
ie: session.query(Node)
.filter(Node.parent_id==corpus_id)
.filter(Node.typename =='FAVORITES')
"""
corpus
=
cache
.
Node
[
corpus_id
]
if
corpus
.
typename
!=
'CORPUS'
:
raise
ValidationException
(
"Only nodes of type CORPUS can accept favorites queries"
+
" (but this node has type
%
s)..."
%
corpus
.
typename
)
else
:
self
.
corpus
=
corpus
fav_node
=
self
.
corpus
.
children
(
'FAVORITES'
)
.
first
()
return
fav_node
def
get
(
self
,
request
,
corpus_id
):
"""
2 possibilities with/without param
1) GET http://localhost:8000/api/nodes/2/favorites
(returns the full list of fav docs within corpus 2)
2) GET http://localhost:8000/api/nodes/2/favorites?docs=53,54
(will test if docs 53 and 54 are among the favorites of corpus 2)
(returns the intersection of fav docs with [53,54])
"""
if
not
request
.
user
.
is_authenticated
():
# can't use @requires_auth because of positional 'self' within class
return
HttpResponse
(
'Unauthorized'
,
status
=
401
)
fav_node
=
self
.
_get_fav_node
(
corpus_id
)
req_params
=
validate
(
get_parameters
(
request
),
{
'docs'
:
list
,
'default'
:
""
}
)
response
=
{}
if
fav_node
==
None
:
response
=
{
'warning'
:
'No favorites node is defined for this corpus (
\'
%
s
\'
)'
%
self
.
corpus
.
name
,
'favdocs'
:[]
}
elif
'docs'
not
in
req_params
:
# each docnode associated to the favnode of this corpusnode
q
=
(
session
.
query
(
NodeNode
.
node2_id
)
.
filter
(
NodeNode
.
node1_id
==
fav_node
.
id
))
all_doc_ids
=
[
row
.
node2_id
for
row
in
q
.
all
()]
response
=
{
'favdocs'
:
all_doc_ids
}
else
:
nodeids_to_check
=
[
int
(
did
)
for
did
in
req_params
[
'docs'
]
.
split
(
','
)]
# each docnode from the input list, if it is associated to the favnode
q
=
(
session
.
query
(
NodeNode
.
node2_id
)
.
filter
(
NodeNode
.
node1_id
==
fav_node
.
id
)
.
filter
(
NodeNode
.
node2_id
.
in_
(
nodeids_to_check
)))
present_doc_ids
=
[
row
.
node2_id
for
row
in
q
.
all
()]
absent_doc_ids
=
[
did
for
did
in
nodeids_to_check
if
did
not
in
present_doc_ids
]
response
=
{
'favdocs'
:
present_doc_ids
,
'missing'
:
absent_doc_ids
}
return
JsonHttpResponse
(
response
)
def
delete
(
self
,
request
,
corpus_id
):
"""
DELETE http://localhost:8000/api/nodes/2/favorites?docs=53,54
(will delete docs 53 and 54 from the favorites of corpus 2)
"""
if
not
request
.
user
.
is_authenticated
():
# can't use @requires_auth because of positional 'self' within class
return
HttpResponse
(
'Unauthorized'
,
status
=
401
)
# user is ok
fav_node
=
self
.
_get_fav_node
(
corpus_id
)
response
=
{}
if
fav_node
==
None
:
response
=
{
'warning'
:
'No favorites node is defined for this corpus (
\'
%
s
\'
)'
%
self
.
corpus
.
name
,
'count_removed'
:
0
}
else
:
req_params
=
validate
(
get_parameters
(
request
),
{
'docs'
:
list
,
'default'
:
""
}
)
nodeids_to_delete
=
[
int
(
did
)
for
did
in
req_params
[
'docs'
]
.
split
(
','
)]
try
:
# it deletes from favourites but not from DB
result
=
session
.
execute
(
delete
(
NodeNode
)
.
where
(
NodeNode
.
node1_id
==
fav_node
.
id
)
.
where
(
NodeNode
.
node2_id
.
in_
(
nodeids_to_delete
))
)
session
.
commit
()
response
=
{
'count_removed'
:
result
.
rowcount
}
finally
:
session
.
close
()
return
JsonHttpResponse
(
response
)
def
put
(
self
,
request
,
corpus_id
,
check_each_doc
=
True
):
if
not
request
.
user
.
is_authenticated
():
# can't use @requires_auth because of positional 'self' within class
return
HttpResponse
(
'Unauthorized'
,
status
=
401
)
# user is ok
fav_node
=
self
.
_get_fav_node
(
corpus_id
)
response
=
{}
if
fav_node
==
None
:
response
=
{
'warning'
:
'No favorites node is defined for this corpus (
\'
%
s
\'
)'
%
self
.
corpus
.
name
,
'count_added'
:
0
}
else
:
req_params
=
validate
(
get_parameters
(
request
),
{
'docs'
:
list
,
'default'
:
""
}
)
nodeids_to_add
=
[
int
(
did
)
for
did
in
req_params
[
'docs'
]
.
split
(
','
)]
if
check_each_doc
:
# verification que ce sont bien des documents du bon corpus
# un peu long => désactiver par défaut ?
known_docs_q
=
(
session
.
query
(
Node
.
id
)
.
filter
(
Node
.
parent_id
==
corpus_id
)
.
filter
(
Node
.
typename
==
'DOCUMENT'
)
)
lookup
=
{
known_doc
.
id
:
True
for
known_doc
in
known_docs_q
.
all
()}
# debug
# print("lookup hash", lookup)
rejected_list
=
[]
for
doc_node_id
in
nodeids_to_add
:
if
(
doc_node_id
not
in
lookup
):
rejected_list
.
append
(
doc_node_id
)
if
len
(
rejected_list
):
raise
ValidationException
(
"Error on some requested docs:
%
s (Only nodes of type 'doc' AND belonging to corpus
%
i can be added to favorites.)"
%
(
str
(
rejected_list
),
int
(
corpus_id
)))
# add them
bulk_insert
(
NodeNode
,
(
'node1_id'
,
'node2_id'
,
'score'
),
((
fav_node
.
id
,
doc_node_id
,
1.0
)
for
doc_node_id
in
nodeids_to_add
)
)
# todo count really added (here: counts input param not result)
response
=
{
'count_added'
:
len
(
nodeids_to_add
)}
return
JsonHttpResponse
(
response
)
class
CorpusFacet
(
APIView
):
"""Loop through a corpus node's docs => do counts by a hyperdata field
(url: /api/nodes/<node_id>/facets?hyperfield=<source>)
"""
# - old url: '^project/(\d+)/corpus/(\d+)/source/sources.json$',
# - old view: tests.ngramstable.views.get_sourcess_json()
# - now generalized for various hyperdata field:
# -> source
# -> publication_year
# -> rubrique
# -> language...
def
get
(
self
,
request
,
node_id
):
# check that the node is a corpus
# ? faster from cache than: corpus = session.query(Node)...
if
not
request
.
user
.
is_authenticated
():
# can't use @requires_auth because of positional 'self' within class
return
HttpResponse
(
'Unauthorized'
,
status
=
401
)
corpus
=
cache
.
Node
[
node_id
]
if
corpus
.
typename
!=
'CORPUS'
:
raise
ValidationException
(
"Only nodes of type CORPUS can accept facet queries"
+
" (but this node has type
%
s)..."
%
corpus
.
typename
)
else
:
self
.
corpus
=
corpus
# check that the hyperfield parameter makes sense
_facet_available_subfields
=
[
'source'
,
'publication_year'
,
'rubrique'
,
'language_iso2'
,
'language_iso3'
,
'language_name'
,
'authors'
]
parameters
=
get_parameters
(
request
)
# validate() triggers an info message if subfield not in range
parameters
=
validate
(
parameters
,
{
'type'
:
dict
,
'items'
:
{
'hyperfield'
:
{
'type'
:
str
,
'range'
:
_facet_available_subfields
}
}})
subfield
=
parameters
[
'hyperfield'
]
# do the aggregated sum
(
xcounts
,
total
)
=
self
.
_ndocs_by_facet
(
subfield
)
# response
return
JsonHttpResponse
({
'doc_count'
:
total
,
'by'
:
{
subfield
:
xcounts
}
})
def
_ndocs_by_facet
(
self
,
subfield
=
'source'
):
"""for example on 'source'
xcounts = {'j good sci' : 25, 'nature' : 32, 'j bla bla' : 1... }"""
xcounts
=
defaultdict
(
int
)
total
=
0
for
doc
in
self
.
corpus
.
children
(
typename
=
'DOCUMENT'
):
if
subfield
in
doc
.
hyperdata
:
xcounts
[
doc
.
hyperdata
[
subfield
]]
+=
1
else
:
xcounts
[
"_NA_"
]
+=
1
total
+=
1
# the counts below could also be memoized
# // if subfield not in corpus.aggs:
# // corpus.aggs[subfield] = xcounts
return
(
xcounts
,
total
)
gargantext/views/api/projects.py
deleted
100644 → 0
View file @
8d42b26a
from
.api
import
*
#notamment APIView, check_rights, format_response
from
gargantext.util.http
import
*
from
django.core.exceptions
import
*
from
collections
import
defaultdict
from
gargantext.util.toolchain
import
*
import
copy
from
gargantext.util.db
import
session
class
ProjectList
(
APIView
):
'''API endpoint that represent a list of projects owned by a user'''
renderer_classes
=
(
JSONRenderer
,
BrowsableAPIRenderer
)
def
get
(
self
,
request
):
'''GET the projects of a given user'''
user
=
cache
.
User
[
request
.
user
.
id
]
projects
=
session
.
query
(
Node
)
.
filter
(
Node
.
typename
==
"PROJECT"
,
Node
.
user_id
==
user
.
id
)
.
all
()
if
len
(
projects
)
==
0
:
return
Response
({
"detail"
:
"No projects Found for this user"
},
status
=
HTTP_404_NOT_FOUND
)
context
=
format_response
(
user
,
projects
)
return
Response
(
context
)
def
post
(
self
,
request
):
'''CREATE a new project for a given user'''
user
=
cache
.
User
[
request
.
user
.
id
]
try
:
#corpus name
name
=
request
.
data
[
"name"
]
except
AttributeError
:
return
Response
({
"detail"
:
"Invalid POST method:
\"
name
\"
field is required "
},
status
=
HTTP_406_NOT_ACCEPTABLE
)
if
name
==
""
:
return
Response
({
"detail"
:
"Invalid POST method:
\"
name
\"
field is empty "
},
status
=
HTTP_406_NOT_ACCEPTABLE
)
else
:
project
=
session
.
query
(
Node
)
.
filter
(
Node
.
typename
==
"PROJECT"
,
Node
.
name
==
name
)
.
first
()
if
project
is
not
None
:
return
Response
({
"detail"
:
"Project with this name already exists"
,
"url"
:
"/projects/
%
s"
%
str
(
project
.
id
)},
status
=
HTTP_409_CONFLICT
)
else
:
new_project
=
Node
(
user_id
=
request
.
user
.
id
,
typename
=
'PROJECT'
,
name
=
name
,
)
session
.
add
(
new_project
)
session
.
commit
()
return
Response
({
"detail"
:
"Created"
,
"url"
:
"/projects/
%
s"
%
str
(
new_project
.
id
)},
status
=
HTTP_201_CREATED
)
def
delete
(
self
,
request
):
''' DELETE the projects of a given user'''
user
=
cache
.
User
[
request
.
user
.
id
]
projects
=
session
.
query
(
Node
)
.
filter
(
Node
.
typename
==
"PROJECT"
,
Node
.
user_id
==
user
.
id
)
.
all
()
#for project in projects:
# project = check_rights(request, project)
uids
=
[]
for
node
in
projects
:
session
.
delete
(
node
)
session
.
commit
()
uids
.
append
(
node
.
id
)
return
Response
({
"detail"
:
"Deleted
%
i projects"
%
len
(
uids
)},
status
=
HTTP_204_NO_CONTENT
)
def
put
(
self
,
request
):
'''UPDATE EVERY projects of a given user'''
user
=
cache
.
User
[
request
.
user
.
id
]
query
=
session
.
query
(
Node
)
.
filter
(
Node
.
typename
==
"PROJECT"
,
Node
.
user_id
==
request
.
user
.
id
)
.
all
()
uids
=
[]
for
node
in
query
:
for
key
,
val
in
request
.
data
.
items
():
#here verify that key is in accepted modified keys
if
key
in
[
"name"
,
"date"
,
"username"
]:
if
key
==
"username"
:
#changement de propriétaire
user
=
session
.
query
(
Node
)
.
filter
(
Node
.
typename
==
"PROJECT"
,
Node
.
username
==
username
)
.
first
()
set
(
node
,
user_id
,
user
.
id
)
else
:
setattr
(
node
,
key
,
val
)
#node.name = request.data["name"]
session
.
add
(
node
)
session
.
commit
()
uids
.
append
(
node
.
id
)
return
Response
({
"detail"
:
"Updated
%
s projects"
%
len
(
uids
)},
status
=
HTTP_202_ACCEPTED
)
class
ProjectView
(
APIView
):
'''API endpoint that represent project detail'''
renderer_classes
=
(
JSONRenderer
,
BrowsableAPIRenderer
)
def
get
(
self
,
request
,
project_id
):
''' GET /api/projects/<project_id> the list of corpora given a project '''
project
=
session
.
query
(
Node
)
.
filter
(
Node
.
id
==
project_id
)
.
first
()
if
project
is
None
:
return
Response
({
'detail'
:
"PROJECT Node #
%
s not found"
%
(
project_id
)
},
status
=
HTTP_404_NOT_FOUND
)
check_rights
(
request
,
project_id
)
corpus_list
=
project
.
children
(
'CORPUS'
,
order
=
True
)
.
all
()
if
len
(
corpus_list
)
==
0
:
return
Response
({
'detail'
:
"No corpora found for Project Node #
%
s"
%
(
project_id
)
},
status
=
HTTP_404_NOT_FOUND
)
# resource_list = [(n["name"], n["type"], n["id"]) for n in corpus_list[0].children('RESOURCE', order=True).all()]
# print(resource_list)
context
=
format_response
(
project
,
corpus_list
)
return
Response
(
context
)
def
delete
(
self
,
request
,
project_id
):
'''DELETE project'''
node
=
session
.
query
(
Node
)
.
filter
(
Node
.
id
==
project_id
)
.
first
()
if
node
is
None
:
return
Response
({
'detail'
:
"PROJECT Node #
%
s not found"
%
(
project_id
)
},
status
=
HTTP_404_NOT_FOUND
)
else
:
try
:
check_rights
(
request
,
project_id
)
except
Exception
as
e
:
return
Response
({
'detail'
:
"Unauthorized"
%
(
project_id
)
},
status
=
403
)
session
.
delete
(
node
)
session
.
commit
()
return
Response
({
"detail"
:
"Successfully deleted Node #
%
s"
%
project_id
},
status
=
204
)
def
put
(
self
,
request
,
project_id
):
'''UPDATE project '''
project
=
session
.
query
(
Node
)
.
filter
(
Node
.
id
==
project_id
)
.
first
()
if
project
is
None
:
return
Response
({
'detail'
:
"PROJECT Node #
%
s not found"
%
(
project_id
)
},
status
=
HTTP_404_NOT_FOUND
)
check_rights
(
request
,
project_id
)
params
=
get_parameters
(
request
)
# print(params)
#u_project = deepcopy(project)
for
key
,
val
in
params
.
items
():
if
len
(
val
)
==
0
:
return
Response
({
"detail"
:
"Invalid POST method:
\"
%
s
\"
field is empty "
%
key
},
status
=
HTTP_406_NOT_ACCEPTABLE
)
if
key
in
[
"name"
,
"date"
,
"username"
]:
if
key
==
"username"
:
#change ownership
#find user
#user = session.query(Node).filter(Node.username == username, Node.typename="USER").first()
#if user.id
pass
elif
key
==
"name"
:
other
=
session
.
query
(
Node
)
.
filter
(
Node
.
name
==
val
)
.
count
()
if
other
==
0
:
setattr
(
project
,
key
,
val
)
else
:
return
Response
({
"detail"
:
"Project with this name already exists"
},
status
=
HTTP_409_CONFLICT
)
else
:
setattr
(
project
,
key
,
val
)
session
.
add
(
project
)
session
.
commit
()
return
Response
({
"detail"
:
"Updated PROJECT #
%
s"
%
str
(
project_id
)},
status
=
HTTP_206_PARTIAL_CONTENT
)
def
post
(
self
,
request
,
project_id
):
'''CREATE corpus'''
project
=
session
.
query
(
Node
)
.
filter
(
Node
.
id
==
project_id
)
.
first
()
if
project
is
None
:
return
Response
({
'detail'
:
"PROJECT Node #
%
s not found"
%
(
project_id
)
},
status
=
HTTP_404_NOT_FOUND
)
project
=
check_rights
(
request
,
project_id
)
#controling form data
if
not
"name"
in
request
.
data
.
keys
():
return
Response
({
'detail'
:
"CORPUS Node: field name is mandatory"
},
status
=
HTTP_406_NOT_ACCEPTABLE
)
if
not
"source"
in
request
.
data
.
keys
():
return
Response
({
'detail'
:
"CORPUS Node: field source is mandatory"
},
status
=
HTTP_406_NOT_ACCEPTABLE
)
corpus_name
=
request
.
data
[
"name"
]
corpus_source
=
request
.
data
[
"source"
]
if
corpus_name
==
""
:
return
Response
({
'detail'
:
"CORPUS Node name can't be empty"
},
status
=
HTTP_406_NOT_ACCEPTABLE
)
corpus
=
session
.
query
(
Node
)
.
filter
(
Node
.
name
==
corpus_name
,
Node
.
typename
==
"CORPUS"
)
.
first
()
if
corpus
is
not
None
:
return
Response
({
'detail'
:
"CORPUS Node with name '
%
s' already exists"
%
(
corpus_name
)
},
status
=
HTTP_409_CONFLICT
)
if
corpus_source
==
""
or
corpus_source
==
0
or
corpus_source
==
None
:
return
Response
({
'detail'
:
"CORPUS Node source can't be empty"
},
status
=
HTTP_406_NOT_ACCEPTABLE
)
params
=
get_parameters
(
request
)
if
"method"
not
in
params
.
keys
():
#if "method" not in request.data.keys():
return
Response
({
'detail'
:
"CORPUS Node has not 'method' parameter"
},
status
=
HTTP_405_METHOD_NOT_ALLOWED
)
#method = request.data["method"]
method
=
params
[
"method"
]
if
method
not
in
[
"parse"
,
"scan"
,
"copy"
]:
return
Response
({
'detail'
:
"CORPUS Node only parse, scan and copy 'method' are allowed"
},
status
=
HTTP_405_METHOD_NOT_ALLOWED
)
if
method
==
"copy"
:
corpus
=
session
.
query
(
Node
)
.
filter
(
Node
.
id
==
corpus_source
,
Node
.
typename
==
"CORPUS"
)
.
first
()
if
corpus
is
None
:
return
Response
({
'detail'
:
"CORPUS Node #
%
s doesn't exist. Fail to copy"
%
(
str
(
corpus_source
))
},
status
=
HTTP_404_NOT_FOUND
)
else
:
#cloned_corpus = {k:v for k,v in corpus if k not in ["user_id", "id", "parent_id"]}
cloned_corpus
=
copy
.
deepcopy
(
corpus
)
del
cloned_corpus
.
id
cloned_corpus
.
parent_id
=
project_id
cloned_corpus
.
user_id
=
request
.
user
.
id
for
child
in
corpus
.
get_children
():
#{k:getattr(corpus, k) for k in ["name", "date", "source", "hyperdata"] }
cloned_child
=
copy
.
deepcopy
(
child
)
del
cloned_child
[
"id"
]
cloned_child
[
"parent_id"
]
=
new_corpus
.
id
cloned_corpus
[
"user_id"
]
=
request
.
user
.
id
cloned_corpus
.
add_child
(
cloned_child
)
session
.
add
(
cloned_corpus
)
session
.
commit
()
#RESOURCE
source
=
get_resource
(
int
(
corpus_source
))
if
source
is
None
:
return
Response
({
'detail'
:
"CORPUS Node sourcetype unknown"
},
status
=
HTTP_406_NOT_ACCEPTABLE
)
if
method
==
"parse"
:
print
(
'PARSING'
)
if
not
"file"
in
request
.
FILES
.
keys
():
return
Response
({
'detail'
:
"CORPUS Node need a file to parse"
},
status
=
HTTP_405_METHOD_NOT_ALLOWED
)
corpus_file
=
request
.
FILES
[
'file'
]
if
"parser"
in
source
.
keys
():
corpus
=
project
.
add_child
(
name
=
request
.
data
[
"name"
],
typename
=
'CORPUS'
,
#path = corpus_file,
)
print
(
"CORPUS #"
,
corpus
.
id
)
session
.
add
(
corpus
)
session
.
commit
()
resource
=
Node
(
name
=
source
[
"name"
],
typename
=
'RESOURCE'
,
parent_id
=
corpus
.
id
,
hyperdata
=
{
"type"
:
source
[
"type"
],
"method"
:
method
,
"file"
:
upload
(
corpus_file
),
"query"
:
None
}
)
session
.
add
(
resource
)
session
.
commit
()
return
Response
({
"detail"
:
"Parsing corpus #
%
s of type #
%
s"
%
(
str
(
corpus
.
id
),
resource
.
name
)},
200
)
else
:
return
Response
({
"detail"
:
"No Parser found for this corpus #
%
s of type
%
s"
%
(
str
(
corpus
.
id
),
resource
.
name
)},
405
)
elif
method
==
"scan"
:
if
"crawler"
in
source
.
keys
():
if
not
"query"
in
request
.
data
.
keys
():
#corpus_file = request.FILES['file']
return
Response
({
'detail'
:
"CORPUS Node need a query to scan"
},
status
=
HTTP_405_METHOD_NOT_ALLOWED
)
query
=
request
.
data
[
'query'
]
corpus
=
project
.
add_child
(
name
=
request
.
data
[
"name"
],
typename
=
'CORPUS'
,
)
resource
=
Node
(
name
=
source
[
"name"
],
typename
=
'RESOURCE'
,
parent_id
=
corpus
.
id
,
user_id
=
request
.
user_id
,
hyperdata
=
{
"type"
:
source
[
"type"
],
"method"
:
method
,
"file"
:
None
,
"query"
:
query
}
)
session
.
add
(
resource
)
session
.
commit
()
return
Response
({
'detail'
:
"CORPUS #
%
s created"
%
corpus
.
id
},
status
=
HTTP_201_CREATED
)
else
:
return
Response
({
'detail'
:
"CORPUS Node only parse, scan and copy 'method' are allowed"
},
status
=
HTTP_405_METHOD_NOT_ALLOWED
)
def
old_post
(
self
,
request
,
project_id
):
form
=
self
.
_validate_form
(
request
)
#get params
method
=
form
[
"method"
]
if
method
in
[
"parse"
,
"scan"
,
"copy"
]:
#Le corpus et la resource n'existent pas
# [HACK]
# creation d'un corpus
corpus
=
Node
(
typename
=
'CORPUS'
,
user_id
=
request
.
user_id
,
parent_id
=
project
.
id
,
name
=
form
[
"name"
],
)
session
.
add
(
corpus
)
session
.
commit
()
# creation d'une resource
try
:
if
method
==
"parse"
:
form
[
"file"
]
=
request
.
FILES
[
'file'
]
action
=
getattr
(
self
,
"_"
+
method
)
#toutes les actions sauf scan suppriment la resource?
#et remontent l'info dans corpus
if
action
(
corpus
,
form
):
# transferer les infos resource dans le corpus
documents
=
session
.
query
(
Node
)
.
filter
(
Node
.
typename
==
"DOCUMENT"
,
Node
.
user_id
==
user
.
id
,
Node
.
parent_id
==
corpus
.
id
)
.
all
()
response_data
=
{
"records"
:
format_records
(
documents
),
"resource"
:
format_records
([
resource
]),
"parent"
:
format_parent
(
project
),
"count"
:
len
(
documents
)
}
return
Response
(
response_data
,
200
)
else
:
raise
APIException
(
"Error with "
,
method
)
except
Exception
as
e
:
raise
APIException
(
e
)
else
:
#Le corpus existe et la resource doit être mise à jour
corpus
=
session
.
query
(
Node
)
.
filter
(
Node
.
typename
==
"CORPUS"
,
Node
.
parent_id
==
project
.
id
,
Node
.
name
==
form
[
"corpus_name"
])
.
first
()
source
=
get_resource
(
form
[
"source"
])
if
corpus
is
None
:
return
Response
(
"CORPUS not found"
,
404
)
#[HACK] one corpus one resource by Resourcetype_name
resource
=
session
.
query
(
Node
)
.
filter
(
Node
.
typename
==
"RESOURCE"
,
Node
.
parent_id
==
corpus
.
id
,
Node
.
corpus_name
==
form
[
"corpus_name"
],
Node
.
name
==
source
[
"name"
]
)
.
first
()
action
=
getattr
(
self
,
"_"
+
method
)
if
action
(
resource
):
# transferer les infos resource dans le corpus
if
method
==
"fetch"
:
corpus
.
sources
[
resource
[
"name"
]]
.
append
(
resource
)
session
.
delete
(
resource
)
session
.
add
(
corpus
)
session
.
commit
()
else
:
session
.
add
(
resource
)
session
.
commit
()
return
Response
({
"log"
:
"Created"
,
"uids"
:[
corpus
.
id
]},
200
)
else
:
session
.
delete
(
resource
)
session
.
delete
(
corpus
)
session
.
commit
()
return
Response
({
"log"
:
method
+
": Error"
},
500
)
def
_check_method
(
self
,
request
):
METHODS
=
[
"scan"
,
"parse"
,
"sample"
,
"fetch"
,
"copy"
]
try
:
method
=
get_parameters
(
request
)[
"method"
]
except
AttributeError
:
raise
APIException
(
"Precondition failed : You must specify a method"
,
412
)
if
method
not
in
METHODS
:
raise
APIException
(
"Method not allowed"
,
405
)
else
:
return
method
def
_validate_form
(
self
,
request
):
'''basic validation of the step given each method
'''
params
=
{}
method
=
self
.
_check_method
(
request
)
#parsing a file
if
method
==
"parse"
:
fields
=
[
'source'
,
'name'
,
"file"
]
#scanning a query => results_nb
elif
method
==
"scan"
:
fields
=
[
'source'
,
'name'
,
"query"
]
#sampling checking results_nb => ids
#~ elif method == "sample":
#~ fields = ['source', 'name', "results_nb"]
#~ #fetching ids => NewParser
#~ elif method == "fetch":
#~ fields = ['source', 'name', "ids"]
#cloning a corpus_id => Corpus
elif
method
==
"copy"
:
fields
=
[
'source'
,
'name'
,
"corpus_id"
]
for
k
in
fields
:
try
:
if
request
.
data
[
k
]
!=
""
or
request
.
data
[
k
]
is
not
None
:
params
[
k
]
=
request
.
data
[
k
]
else
:
raise
APIException
(
"Mandatory value
%
s can't be empty "
%
str
(
k
),
400
)
except
AttributeError
:
raise
APIException
(
"Value
%
s is mandatory"
%
str
(
k
),
400
)
if
len
(
params
)
>
0
:
params
[
"method"
]
=
method
return
params
else
:
raise
APIException
(
"Form is empty:
%
s"
%
str
(
k
),
404
)
def
_sample
(
self
,
resource
):
resource
=
self
.
_find_resource_hyperdata
(
corpus
,
form
)
crawlbot
=
eval
(
resource
.
crawler
)(
resource
)
records
=
crawlbot
.
sample
()
#resource.status.insert(0,"sampled")
resource
.
ids
=
records
corpus
.
status
(
action
=
"sample"
,
progress
=
1
,
complete
=
True
)
session
.
add
(
corpus
)
session
.
commit
()
return
Response
({
"uids"
:
[
corpus
.
id
]},
status
=
HTTP_200_OK
)
def
_fetch
(
self
,
resource
):
'''internal method to fetch from a corpus the resource.urls >>> resource._parser(urls)'''
resource
=
self
.
_find_resource_hyperdata
(
corpus
,
form
)
resource
.
status
(
action
=
"fetch"
,
progress
=
1
,
complete
=
False
)
crawlbot
=
eval
(
resource
.
typecrawler
)(
resource
)
#send job to celery
scheduled
(
crawlbot
.
fetch
())
corpus
.
status
(
action
=
"fetch"
,
progress
=
1
,
complete
=
True
)
session
.
add
(
corpus
)
session
.
commit
()
return
Response
({
"uids"
:
[
corpus
.
id
]},
200
)
def
_copy
(
self
,
corpus
,
form
):
#find the target corpus
new_corpus
=
session
.
query
(
Node
)
.
filter
(
Node
.
typename
==
"CORPUS"
,
Node
.
corpus_id
==
form
[
"corpus_id"
])
.
first
()
#get the resource of this corpus and copy it two
new_resource
=
self
.
_find_resource_hyperdata
(
new_corpus
,
form
)
#copy new_corpus to previously created corpus
new_resouce
.
method
=
"cloned CORPUS #
%
i"
%
(
new_corpus
.
id
)
new_corpus
.
id
=
corpus
.
id
# change new_corpus ownership
new_corpus
.
parent_id
=
corpus
.
parent_id
new_corpus
.
user_id
=
corpus
.
user_id
#get the documents of the existing corpus
for
doc
in
new_corpus
.
get_children
():
doc
.
parent_id
=
new_corpus
.
parent_id
doc
.
user_id
=
new_corpus
.
id
#store it into corpus
new_doc
=
corpus
.
add_child
(
doc
)
for
ngrams
in
doc
.
get_children
():
new_ngrams
.
parent_id
=
new_doc
.
id
new_ngrams
.
user_id
=
new_corpus
.
user_id
#store it into corpus
new_doc
.
add_child
(
new_ngrams
)
#save the corpus
corpus
.
status
(
action
=
"copy"
,
progress
=
1
,
complete
=
True
)
session
.
add
(
corpus
)
session
.
commit
()
return
Response
({
"log"
:
"Corpus created"
,
"uids"
:[
corpus
.
id
]},
202
)
def
_scan
(
self
,
corpus
,
form
):
'''internal method to scan a query >> add results_nb to resource as a corpus hyperdata'''
resource
=
self
.
_find_resource_hyperdata
(
corpus
,
form
)
#corpus_query = check_query(form["query")
ressource
.
query
=
form
[
"query"
]
corpus
.
status
(
action
=
"scan"
,
progress
=
1
,
complete
=
False
)
session
.
add
(
corpus
)
session
.
commit
()
crawlbot
=
eval
(
resource
.
crawler
)(
corpus
.
id
)
corpus
.
status
(
action
=
"scan"
,
progress
=
2
,
complete
=
False
)
session
.
add
(
corpus
)
session
.
commit
()
results_nb
=
crawlbot
.
scan_results
()
resource
.
results_nb
=
results_nb
corpus
.
status
(
action
=
"scan"
,
progress
=
2
,
complete
=
True
)
code
=
200
session
.
add
(
corpus
)
session
.
commit
()
return
Response
({
"log"
:
"Corpus created"
,
"uids"
:[
corpus
.
id
]},
200
)
def
_parse
(
self
,
corpus
,
form
):
'''internal method to parse a corpus >> resource >> corpus >> docs
corpus >> resource (method + file params + parser )
^ >> docs (resource.defaultlang <--------| )
| >> ngrams
|------- le tout rappatrié dans corpus
'''
#1. creating a resource
resource
=
{}
resource
=
Node
(
user_id
=
corpus
.
user_id
,
parent_id
=
corpus
.
id
,
typename
=
"RESOURCE"
,
#corpus_name = form["name"],
)
resource
.
method
=
form
[
"method"
]
resource
.
path
=
upload
(
form
[
'file'
])
#mapping the default attribute of a given source from constant RESOURCETYPE
for
k
,
v
in
get_resource
(
int
(
form
[
"source"
]))
.
items
():
setattr
(
resource
,
k
,
v
)
resource
.
status
(
action
=
"parse"
,
progress
=
1
,
complete
=
False
)
session
.
add
(
resource
)
session
.
commit
()
try
:
workflow
(
resource
)
except
Exception
as
e
:
print
(
"=======except dans _parse==========="
)
print
(
e
)
from
traceback
import
print_tb
print_tb
(
e
.
__traceback__
)
print
(
"===================================="
)
return
True
gargantext/views/api/urls.py
deleted
100644 → 0
View file @
8d42b26a
from
django.conf.urls
import
url
from
rest_framework_jwt.views
import
obtain_jwt_token
from
.
import
nodes
from
.
import
projects
from
.
import
corpora
from
.
import
users
from
.
import
ngrams
from
.
import
metrics
from
.
import
ngramlists
from
.
import
analytics
urlpatterns
=
[
url
(
r'^nodes$'
,
nodes
.
NodeListResource
.
as_view
())
,
url
(
r'^nodes/(\d+)$'
,
nodes
.
NodeResource
.
as_view
())
,
url
(
r'^nodes/(\d+)/having$'
,
nodes
.
NodeListHaving
.
as_view
())
,
url
(
r'^nodes/(\d+)/status$'
,
nodes
.
Status
.
as_view
())
# Projects
,
url
(
r'^projects$'
,
projects
.
ProjectList
.
as_view
())
,
url
(
r'^projects/(\d+)$'
,
projects
.
ProjectView
.
as_view
())
# Corpora
,
url
(
r'^projects/(\d+)/corpora/(\d+)$'
,
corpora
.
CorpusView
.
as_view
())
# Sources
#, url(r'^projects/(\d+)/corpora/(\d+)/sources$', corpora.CorpusSources.as_view())
#, url(r'^projects/(\d+)/corpora/(\d+)/sources/(\d+)$ , corpora.CorpusSourceView.as_view())
# Facets
,
url
(
r'^projects/(\d+)/corpora/(\d+)/facets$'
,
nodes
.
CorpusFacet
.
as_view
())
# Favorites
,
url
(
r'^projects/(\d+)/corpora/(\d+)/favorites$'
,
nodes
.
CorpusFavorites
.
as_view
())
# Metrics
,
url
(
r'^projects/(\d+)/corpora/(\d+)/metrics$'
,
metrics
.
CorpusMetrics
.
as_view
())
# Ngrams
,
url
(
r'^ngrams/?$'
,
ngrams
.
ApiNgrams
.
as_view
())
# Analytics
,
url
(
r'^nodes/(\d+)/histories$'
,
analytics
.
NodeNgramsQueries
.
as_view
())
,
url
(
r'hyperdata$'
,
analytics
.
ApiHyperdata
.
as_view
())
# get a list of ngram_ids or ngram_infos by list_id
# url(r'^ngramlists/(\d+)$', ngramlists.List.as_view()),
,
url
(
r'^nodes/(\d+)/facets$'
,
nodes
.
CorpusFacet
.
as_view
())
,
url
(
r'^nodes/(\d+)/favorites$'
,
nodes
.
CorpusFavorites
.
as_view
())
# in these two routes the node is supposed to be a *corpus* node
,
url
(
r'^metrics/(\d+)$'
,
metrics
.
CorpusMetrics
.
as_view
())
# update all metrics for a corpus
# ex: PUT metrics/123
# \
# corpus id
,
url
(
r'^ngramlists/export$'
,
ngramlists
.
CSVLists
.
as_view
())
# get a CSV export of the ngramlists of a corpus
# ex: GET ngramlists/export?corpus=43
# TODO : unify to a /api/ngrams?formatted=csv
# (similar to /api/nodes?formatted=csv)
,
url
(
r'^ngramlists/import$'
,
ngramlists
.
CSVLists
.
as_view
())
# same handling class as export (CSVLists)
# but this route used only for POST + file
# or PATCH + other corpus id
,
url
(
r'^ngramlists/change$'
,
ngramlists
.
ListChange
.
as_view
())
# add or remove ngram from a list
# ex: add <=> PUT ngramlists/change?list=42&ngrams=1,2
# rm <=> DEL ngramlists/change?list=42&ngrams=1,2
,
url
(
r'^ngramlists/groups$'
,
ngramlists
.
GroupChange
.
as_view
())
# modify grouping couples of a group node
# ex: PUT/DEL ngramlists/groups?node=43
# & group data also in url: 767[]=209,640 & 779[]=436,265,385
,
url
(
r'^ngramlists/family$'
,
ngramlists
.
ListFamily
.
as_view
())
# entire combination of lists from a corpus, dedicated to termtable
# (or any combination of lists that go together :
# - a mainlist
# - an optional stoplist
# - an optional maplist
# - an optional grouplist
,
url
(
r'^ngramlists/maplist$'
,
ngramlists
.
MapListGlance
.
as_view
())
# fast access to maplist, similarly formatted for termtable
,
url
(
r'^user/parameters/$'
,
users
.
UserParameters
.
as_view
())
,
url
(
'^auth/token$'
,
obtain_jwt_token
)
]
gargantext/views/api/users.py
deleted
100644 → 0
View file @
8d42b26a
from
.api
import
*
#notamment APIView, check_rights, format_response
from
gargantext.util.http
import
*
from
django.core.exceptions
import
*
from
collections
import
defaultdict
from
gargantext.util.toolchain
import
*
import
copy
from
gargantext.util.db
import
session
class
UserParameters
(
APIView
):
'''API endpoint that represent the parameters of the user'''
def
get
(
self
,
request
):
node_user
=
session
.
query
(
Node
)
.
filter
(
Node
.
user_id
==
request
.
user
.
id
,
Node
.
typename
==
"USER"
)
.
first
()
if
node_user
is
None
:
return
Response
({
"detail"
:
"Not Found"
},
status
=
HTTP_404
)
else
:
#context = format_response(node_user, )
return
Response
(
node_user
.
hyperdata
)
def
put
(
self
,
request
):
if
request
.
user
.
id
is
None
:
raise
TypeError
(
"This API request must come from an authenticated user."
)
else
:
# we query among the nodes that belong to this user
user
=
cache
.
User
[
request
.
user
.
id
]
node_user
=
session
.
query
(
Node
)
.
filter
(
Node
.
user_id
==
user
.
id
,
Node
.
typename
==
"USER"
)
.
first
()
if
node_user
is
None
:
return
Response
({
"detail"
:
"Not Allowed"
},
status
=
HTTP_401_UNAUTHORIZED
)
for
k
,
v
in
request
.
data
.
items
():
node_user
.
hyperdata
[
k
]
=
v
# setattr(node_user.hyperdata, k, v)
# print(node_user.hyperdata)
node_user
.
save_hyperdata
()
session
.
add
(
node_user
)
session
.
commit
()
node_user
=
session
.
query
(
Node
)
.
filter
(
Node
.
user_id
==
user
.
id
,
Node
.
typename
==
"USER"
)
.
first
()
print
(
node_user
.
hyperdata
)
return
Response
({
"detail"
:
"Updated user parameters"
,
"hyperdata"
:
node_user
.
hyperdata
},
status
=
HTTP_202_ACCEPTED
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment