Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
humanities
gargantext
Commits
022f1c6f
Commit
022f1c6f
authored
Mar 29, 2016
by
delanoe
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[FEAT] Adding TFIDF filter for Explorer.
parent
e861900f
Changes
4
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
1974 additions
and
45 deletions
+1974
-45
nodes.py
gargantext/views/api/nodes.py
+81
-43
urls.py
gargantext/views/api/urls.py
+1
-0
extras_explorerjs.js
static/js/extras_explorerjs.js
+946
-1
extras_explorerjs.js
static/js/extras_explorerjs.js
+946
-1
No files found.
gargantext/views/api/nodes.py
View file @
022f1c6f
from
gargantext.models
import
Node
from
gargantext.models
import
Node
,
Ngram
,
NodeNgram
,
NodeNodeNgram
from
gargantext.constants
import
NODETYPES
from
gargantext.util.db
import
session
,
delete
from
gargantext.util.db_cache
import
cache
from
gargantext.util.db
import
session
,
delete
,
func
from
gargantext.util.db_cache
import
cache
,
or_
from
gargantext.util.validation
import
validate
from
gargantext.util.http
import
ValidationException
,
APIView
\
,
get_parameters
,
JsonHttpResponse
...
...
@@ -98,6 +98,84 @@ class NodeListResource(APIView):
return
JsonHttpResponse
({
'deleted'
:
result
.
rowcount
})
class
NodeListHaving
(
APIView
):
'''
Gives a list of nodes according to its score which is related
to some specific ngrams.
TODO: implement other options (offset)
Simple implementation:
Takes IDs of corpus and ngram and returns list of relevent documents in json format
according to TFIDF score (order is decreasing).
'''
def
get
(
self
,
request
,
corpus_id
):
parameters
=
get_parameters
(
request
)
parameters
=
validate
(
parameters
,
{
'score'
:
str
,
'ngram_ids'
:
list
}
)
try
:
ngram_ids
=
[
int
(
n
)
for
n
in
parameters
[
'ngram_ids'
]
.
split
(
','
)]
except
:
raise
ValidationException
(
'"ngram_ids" needs integers separated by comma.'
)
limit
=
5
nodes_list
=
[]
corpus
=
session
.
query
(
Node
)
.
filter
(
Node
.
id
==
corpus_id
)
.
first
()
tfidf_id
=
(
session
.
query
(
Node
.
id
)
.
filter
(
Node
.
typename
==
"TFIDF-CORPUS"
,
Node
.
parent_id
==
corpus
.
id
)
.
first
()
)
tfidf_id
=
tfidf_id
[
0
]
print
(
tfidf_id
)
# request data
nodes_query
=
(
session
.
query
(
Node
,
func
.
sum
(
NodeNodeNgram
.
score
))
.
join
(
NodeNodeNgram
,
NodeNodeNgram
.
node2_id
==
Node
.
id
)
.
filter
(
NodeNodeNgram
.
node1_id
==
tfidf_id
)
.
filter
(
Node
.
typename
==
'DOCUMENT'
,
Node
.
parent_id
==
corpus
.
id
)
.
filter
(
or_
(
*
[
NodeNodeNgram
.
ngram_id
==
ngram_id
for
ngram_id
in
ngram_ids
]))
.
group_by
(
Node
)
.
order_by
(
func
.
sum
(
NodeNodeNgram
.
score
)
.
desc
())
.
limit
(
limit
)
)
# print("\n")
# print("in TFIDF:")
# print("\tcorpus_id:",corpus_id)
# convert query result to a list of dicts
# if nodes_query is None:
# print("TFIDF error, juste take sums")
# nodes_query = (session
# .query(Node, func.sum(NodeNgram.weight))
# .join(NodeNgram, NodeNgram.node_id == Node.id)
# .filter(Node.parent_id == corpus_id)
# .filter(Node.typename == 'DOCUMENT')
# .filter(or_(*[NodeNgram.ngram_id==ngram_id for ngram_id in ngram_ids]))
# .group_by(Node)
# .order_by(func.sum(NodeNgram.weight).desc())
# .limit(limit)
# )
for
node
,
score
in
nodes_query
:
print
(
node
,
score
)
print
(
"
\t
corpus:"
,
corpus_id
,
"
\t
"
,
node
.
name
)
node_dict
=
{
'id'
:
node
.
id
,
'score'
:
score
,
}
for
key
in
(
'title'
,
'publication_date'
,
'journal'
,
'authors'
,
'fields'
):
if
key
in
node
.
hyperdata
:
node_dict
[
key
]
=
node
.
hyperdata
[
key
]
nodes_list
.
append
(
node_dict
)
return
JsonHttpResponse
(
nodes_list
)
class
NodeResource
(
APIView
):
...
...
@@ -188,44 +266,4 @@ class CorpusFacet(APIView):
return
(
xcounts
,
total
)
class
CorpusGraph
(
APIView
):
'''
Generate a graph
'''
def
get
(
self
,
request
,
node_id
):
# check that the node is a corpus
# ? faster from cache than: corpus = session.query(Node)...
corpus
=
cache
.
Node
[
node_id
]
if
corpus
.
typename
!=
'CORPUS'
:
raise
ValidationException
(
"Only nodes of type CORPUS can accept facet queries"
+
" (but this node has type
%
s)..."
%
corpus
.
typename
)
else
:
self
.
corpus
=
corpus
# check that the hyperfield parameter makes sense
_facet_available_subfields
=
[
'journal'
,
'publication_year'
,
'rubrique'
,
'language_iso2'
,
'language_iso3'
,
'language_name'
]
parameters
=
get_parameters
(
request
)
# validate() triggers an info message if subfield not in range
parameters
=
validate
(
parameters
,
{
'type'
:
dict
,
'items'
:
{
'hyperfield'
:
{
'type'
:
str
,
'range'
:
_facet_available_subfields
}
}})
subfield
=
parameters
[
'hyperfield'
]
# do_cooc
# do_distance
# response
return
JsonHttpResponse
({
'doc_count'
:
total
,
'by'
:
{
subfield
:
xcounts
}
})
gargantext/views/api/urls.py
View file @
022f1c6f
...
...
@@ -9,6 +9,7 @@ urlpatterns = [
url
(
r'^nodes/(\d+)$'
,
nodes
.
NodeResource
.
as_view
()),
url
(
r'^nodes/(\d+)/facets$'
,
nodes
.
CorpusFacet
.
as_view
()),
url
(
r'^nodes/(\d+)/having$'
,
nodes
.
NodeListHaving
.
as_view
()),
# get a list of ngram_ids or ngram_infos by list_id
#
...
...
static/js/extras_explorerjs.js
deleted
120000 → 0
View file @
e861900f
/
srv
/
gargantext_lib
/
js
/
extras_explorerjs
.
js
\ No newline at end of file
static/js/extras_explorerjs.js
0 → 100644
View file @
022f1c6f
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment