Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
humanities
gargantext
Commits
be75f405
Commit
be75f405
authored
Dec 16, 2014
by
Administrator
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[FEAT] List of relevant documents according tfidf score.
parent
33429e65
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
61 additions
and
5 deletions
+61
-5
functions.py
analysis/functions.py
+38
-1
urls.py
gargantext_web/urls.py
+1
-0
views.py
gargantext_web/views.py
+20
-3
models.py
node/models.py
+2
-1
No files found.
analysis/functions.py
View file @
be75f405
from
node.models
import
Language
,
ResourceType
,
Resource
,
\
from
node.models
import
Language
,
ResourceType
,
Resource
,
\
Node
,
NodeType
,
Node_Resource
,
Project
,
Corpus
,
\
Node
,
NodeType
,
Node_Resource
,
Project
,
Corpus
,
\
Node_Ngram
,
NodeNgramNgram
Node_Ngram
,
NodeNgramNgram
,
NodeNodeNgram
from
collections
import
defaultdict
from
collections
import
defaultdict
from
django.db
import
connection
,
transaction
from
django.db
import
connection
,
transaction
from
math
import
log
def
create_blacklist
(
user
,
corpus
):
def
create_blacklist
(
user
,
corpus
):
pass
pass
...
@@ -233,6 +235,41 @@ def get_cooc(request=None, corpus_id=None, cooc_id=None, type='node_link', n=150
...
@@ -233,6 +235,41 @@ def get_cooc(request=None, corpus_id=None, cooc_id=None, type='node_link', n=150
return
data
return
data
def
tfidf
(
corpus
,
document
,
ngram
):
try
:
x
=
Node_Ngram
.
objects
.
get
(
node
=
document
,
ngram
=
ngram
)
.
weight
y
=
Node_Ngram
.
objects
.
filter
(
node
=
document
)
.
count
()
tf
=
x
/
y
xx
=
Node
.
objects
.
filter
(
parent
=
corpus
,
type
=
NodeType
.
objects
.
get
(
name
=
"Document"
))
.
count
()
yy
=
Node_Ngram
.
objects
.
filter
(
ngram
=
ngram
)
.
count
()
idf
=
log
(
xx
/
yy
)
result
=
tf
*
idf
except
Exception
as
error
:
print
(
error
)
result
=
0
return
result
def
do_tfidf
(
corpus
,
reset
=
True
):
with
transaction
.
atomic
():
if
reset
==
True
:
NodeNodeNgram
.
objects
.
filter
(
nodex
=
corpus
)
.
delete
()
if
isinstance
(
corpus
,
Node
)
and
corpus
.
type
.
name
==
"Corpus"
:
for
document
in
Node
.
objects
.
filter
(
parent
=
corpus
,
type
=
NodeType
.
objects
.
get
(
name
=
"Document"
)):
for
node_ngram
in
Node_Ngram
.
objects
.
filter
(
node
=
document
):
try
:
nnn
=
NodeNodeNgram
.
objects
.
get
(
nodex
=
corpus
,
nodey
=
document
,
ngram
=
node_ngram
.
ngram
)
except
:
score
=
tfidf
(
corpus
,
document
,
node_ngram
.
ngram
)
nnn
=
NodeNodeNgram
(
nodex
=
corpus
,
nodey
=
node_ngram
.
node
,
ngram
=
node_ngram
.
ngram
,
score
=
score
)
nnn
.
save
()
else
:
print
(
"Only corpus implemented yet, you put instead:"
,
type
(
corpus
))
...
...
gargantext_web/urls.py
View file @
be75f405
...
@@ -36,6 +36,7 @@ urlpatterns = patterns('',
...
@@ -36,6 +36,7 @@ urlpatterns = patterns('',
url
(
r'^chart/corpus/(\d+)/data.csv$'
,
views
.
send_csv
),
url
(
r'^chart/corpus/(\d+)/data.csv$'
,
views
.
send_csv
),
url
(
r'^corpus/(\d+)/node_link.json$'
,
views
.
node_link
),
url
(
r'^corpus/(\d+)/node_link.json$'
,
views
.
node_link
),
url
(
r'^corpus/(\d+)/adjacency.json$'
,
views
.
adjacency
),
url
(
r'^corpus/(\d+)/adjacency.json$'
,
views
.
adjacency
),
url
(
r'^api/tfidf/(\d+)/(\d+)$'
,
views
.
tfidf
),
url
(
r'^api$'
,
gargantext_web
.
api
.
Root
),
url
(
r'^api$'
,
gargantext_web
.
api
.
Root
),
url
(
r'^api/nodes/(\d+)/children/metadata$'
,
gargantext_web
.
api
.
NodesChildrenMetatadata
.
as_view
()),
url
(
r'^api/nodes/(\d+)/children/metadata$'
,
gargantext_web
.
api
.
NodesChildrenMetatadata
.
as_view
()),
...
...
gargantext_web/views.py
View file @
be75f405
...
@@ -7,7 +7,7 @@ from django.template import Context
...
@@ -7,7 +7,7 @@ from django.template import Context
from
node.models
import
Language
,
ResourceType
,
Resource
,
\
from
node.models
import
Language
,
ResourceType
,
Resource
,
\
Node
,
NodeType
,
Node_Resource
,
Project
,
Corpus
,
\
Node
,
NodeType
,
Node_Resource
,
Project
,
Corpus
,
\
N
ode_Ngram
,
NodeNgram
Ngram
N
gram
,
Node_Ngram
,
NodeNgramNgram
,
NodeNode
Ngram
from
node.admin
import
CorpusForm
,
ProjectForm
,
ResourceForm
from
node.admin
import
CorpusForm
,
ProjectForm
,
ResourceForm
...
@@ -470,7 +470,7 @@ def send_csv(request, corpus_id):
...
@@ -470,7 +470,7 @@ def send_csv(request, corpus_id):
# To get the data
# To get the data
from
gargantext_web.api
import
JsonHttpResponse
from
gargantext_web.api
import
JsonHttpResponse
from
analysis.functions
import
get_cooc
from
analysis.functions
import
get_cooc
import
json
def
node_link
(
request
,
corpus_id
):
def
node_link
(
request
,
corpus_id
):
'''
'''
...
@@ -488,7 +488,6 @@ def adjacency(request, corpus_id):
...
@@ -488,7 +488,6 @@ def adjacency(request, corpus_id):
data
=
get_cooc
(
request
=
request
,
corpus_id
=
corpus_id
,
type
=
"adjacency"
)
data
=
get_cooc
(
request
=
request
,
corpus_id
=
corpus_id
,
type
=
"adjacency"
)
return
JsonHttpResponse
(
data
)
return
JsonHttpResponse
(
data
)
def
graph_it
(
request
):
def
graph_it
(
request
):
'''The new multimodal graph.'''
'''The new multimodal graph.'''
t
=
get_template
(
'graph-it.html'
)
t
=
get_template
(
'graph-it.html'
)
...
@@ -528,3 +527,21 @@ def ngrams(request):
...
@@ -528,3 +527,21 @@ def ngrams(request):
}))
}))
return
HttpResponse
(
html
)
return
HttpResponse
(
html
)
def
tfidf
(
request
,
corpus_id
,
ngram_id
):
"""
Takes IDs of corpus and ngram and returns list of relevent documents in json format
according to TFIDF score (order is decreasing).
"""
corpus
=
Node
.
objects
.
get
(
id
=
corpus_id
)
ngram
=
Ngram
.
objects
.
get
(
id
=
ngram_id
)
node_node_ngrams
=
NodeNodeNgram
.
objects
.
filter
(
nodex
=
corpus
,
ngram
=
ngram
)
.
order_by
(
'-score'
)
tfidf_list
=
[
dict
(
id
=
x
.
nodey
.
id
,
title
=
x
.
nodey
.
metadata
[
'title'
])
for
x
in
node_node_ngrams
]
data
=
json
.
dumps
(
tfidf_list
)
return
JsonHttpResponse
(
data
)
node/models.py
View file @
be75f405
...
@@ -239,7 +239,8 @@ class Node(CTENode):
...
@@ -239,7 +239,8 @@ class Node(CTENode):
self
.
parse_resources
()
self
.
parse_resources
()
type_document
=
NodeType
.
objects
.
get
(
name
=
'Document'
)
type_document
=
NodeType
.
objects
.
get
(
name
=
'Document'
)
self
.
children
.
filter
(
type_id
=
type_document
.
pk
)
.
extract_ngrams
(
keys
=
[
'title'
,])
self
.
children
.
filter
(
type_id
=
type_document
.
pk
)
.
extract_ngrams
(
keys
=
[
'title'
,])
from
analysis.functions
import
do_tfidf
do_tfidf
(
self
)
class
Node_Metadata
(
models
.
Model
):
class
Node_Metadata
(
models
.
Model
):
node
=
models
.
ForeignKey
(
Node
)
node
=
models
.
ForeignKey
(
Node
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment