Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
humanities
gargantext
Commits
3629523d
Commit
3629523d
authored
Jun 15, 2015
by
PkSM3
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[UPDATE] send * to stoplist and apres show (mainlist-stoplist): OK
parent
08539c33
Changes
9
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
272 additions
and
69 deletions
+272
-69
urls.py
annotations/urls.py
+1
-0
views.py
annotations/views.py
+22
-0
urls.py
gargantext_web/urls.py
+2
-1
views.py
scrappers/scrap_pubmed/views.py
+2
-4
dyna_chart_and_table.js
static/js/dyna_chart_and_table.js
+72
-38
test_dyna_chart_and_table.js
static/js/test_dyna_chart_and_table.js
+43
-3
corpus.html
templates/corpus.html
+19
-1
project.html
templates/project.html
+2
-3
views.py
tests/ngramstable/views.py
+109
-19
No files found.
annotations/urls.py
View file @
3629523d
...
...
@@ -6,4 +6,5 @@ urlpatterns = patterns('',
url
(
r'^document/(?P<doc_id>[0-9]+)$'
,
views
.
Document
.
as_view
()),
# document view
url
(
r'^corpus/(?P<corpus_id>[0-9]+)/document/(?P<doc_id>[0-9]+)$'
,
views
.
NgramList
.
as_view
()),
# the list associated with an ngram
url
(
r'^lists/(?P<list_id>[0-9]+)/ngrams(?:/(?P<ngram_id>[0-9]+))?$'
,
views
.
NgramEdit
.
as_view
()),
#
url
(
r'^lists/(?P<list_id>[0-9]+)/multiple?$'
,
views
.
deleteMultiple
),
#
)
annotations/views.py
View file @
3629523d
...
...
@@ -16,6 +16,8 @@ from rest_framework.authentication import SessionAuthentication, BasicAuthentica
from
node.models
import
Node
from
gargantext_web.db
import
*
from
ngram.lists
import
listIds
,
listNgramIds
,
ngramList
from
gargantext_web.api
import
JsonHttpResponse
import
json
@
login_required
...
...
@@ -109,6 +111,26 @@ class NgramEdit(APIView):
session
.
query
(
Node_Ngram
)
.
filter
(
Node_Ngram
.
node_id
==
list_id
)
.
filter
(
Node_Ngram
.
ngram_id
==
ngram_id
)
.
delete
()
return
Response
(
None
,
204
)
def
deleteMultiple
(
request
,
list_id
):
results
=
[
"hola"
,
"mundo"
]
user
=
request
.
user
if
not
user
.
is_authenticated
():
return
redirect
(
'/login/?next=
%
s'
%
request
.
path
)
if
request
.
POST
:
todel_ids
=
json
.
loads
(
request
.
POST
[
'to_delete'
])
for
ngram_id
in
todel_ids
:
# add the ngram to the list if not already done
node_ngram
=
session
.
query
(
Node_Ngram
)
.
filter
(
Node_Ngram
.
node_id
==
list_id
)
.
filter
(
Node_Ngram
.
ngram_id
==
ngram_id
)
.
first
()
if
node_ngram
is
None
:
node_ngram
=
Node_Ngram
(
node_id
=
list_id
,
ngram_id
=
ngram_id
,
weight
=
1.0
)
session
.
add
(
node_ngram
)
session
.
commit
()
return
JsonHttpResponse
(
results
)
class
Document
(
APIView
):
"""
...
...
gargantext_web/urls.py
View file @
3629523d
...
...
@@ -85,7 +85,8 @@ urlpatterns = patterns('',
url
(
r'^tests/paginator/corpus/(\d+)/$'
,
views
.
newpaginatorJSON
),
url
(
r'^tests/move2trash/$'
,
views
.
move_to_trash_multiple
),
url
(
r'^project/(\d+)/corpus/(\d+)/ngrams/ngrams.json$'
,
samtest
.
test_ngrams
),
url
(
r'^project/(\d+)/corpus/(\d+)/ngrams$'
,
samtest
.
get_ngrams
)
url
(
r'^project/(\d+)/corpus/(\d+)/ngrams$'
,
samtest
.
get_ngrams
),
url
(
r'^corpus/(\d+)/document/(\d+)/testpage$'
,
samtest
.
test_test
)
)
...
...
scrappers/scrap_pubmed/views.py
View file @
3629523d
...
...
@@ -271,10 +271,8 @@ def testISTEX(request , project_id):
)
dwnldsOK
+=
1
if
dwnldsOK
==
0
:
return
JsonHttpResponse
([
"fail"
])
# print(urlreqs)
###########################
###########################
try
:
if
not
DEBUG
:
apply_workflow
.
apply_async
((
corpus
.
id
,),)
...
...
static/js/dyna_chart_and_table.js
View file @
3629523d
...
...
@@ -119,11 +119,15 @@ function Final_UpdateTable( action ) {
var
current_docs
=
{}
var
BIS_dict
=
{}
var
path
=
window
.
location
.
pathname
.
match
(
/
\/
project
\/(
.*
)\/
corpus
\/(
.*
)\/
/
);
var
projectid
=
path
[
1
]
var
corpusid
=
path
[
2
]
var
theurl
=
"/api/nodes/"
+
corpusid
+
"/children/duplicates?keys=title&limit=9999"
var
url_elems
=
window
.
location
.
href
.
split
(
"/"
)
var
url_mainIDs
=
{}
for
(
var
i
=
0
;
i
<
url_elems
.
length
;
i
++
)
{
// if the this element is a number:
if
(
url_elems
[
i
]
!=
""
&&
!
isNaN
(
Number
(
url_elems
[
i
])))
{
url_mainIDs
[
url_elems
[
i
-
1
]]
=
Number
(
url_elems
[
i
]);
}
}
var
theurl
=
"/api/nodes/"
+
url_mainIDs
[
"corpus"
]
+
"/children/duplicates?keys=title&limit=9999"
// $.ajax({
// url: theurl,
// success: function(data) {
...
...
@@ -204,6 +208,38 @@ function transformContent(rec_id , header , content) {
}
else
return
content
;
}
$
(
"#move2trash"
)
.
click
(
function
(){
var
ids2trash
=
[]
for
(
var
i
in
Garbage
)
{
ids2trash
.
push
(
AjaxRecords
[
i
].
id
);
}
console
.
log
(
"ids to the trash:"
)
console
.
log
(
ids2trash
)
$
.
ajax
({
url
:
"/tests/move2trash/"
,
data
:
"nodeids="
+
JSON
.
stringify
(
ids2trash
),
type
:
'POST'
,
beforeSend
:
function
(
xhr
)
{
xhr
.
setRequestHeader
(
"X-CSRFToken"
,
getCookie
(
"csrftoken"
));
},
success
:
function
(
data
)
{
console
.
log
(
"in #move2trash"
)
console
.
log
(
data
)
location
.
reload
();
},
error
:
function
(
result
)
{
console
.
log
(
"Data not found in #move2trash"
);
console
.
log
(
result
)
}
});
})
.
hide
();
//generic enough
function
ulWriter
(
rowIndex
,
record
,
columns
,
cellWriter
)
{
// pr("\tulWriter: "+record.id)
...
...
@@ -224,9 +260,30 @@ function ulWriter(rowIndex, record, columns, cellWriter) {
return
'<tr>'
+
tr
+
'</tr>'
;
}
// var div__filter_for_search = ''
// div__filter_for_search += '<select data-width="100px" class="selectpicker" multiple data-max-options="1">';
// div__filter_for_search += ' <optgroup label="All" data-max-options="1" selected>';
// div__filter_for_search += ' <option>Title</option>';
// div__filter_for_search += ' <option>Date</option>';
// div__filter_for_search += ' </optgroup>';
// div__filter_for_search += ' <optgroup label="Category" data-max-options="1">';
// div__filter_for_search += ' <option>Title</option>';
// div__filter_for_search += ' <option>Date</option>';
// div__filter_for_search += ' </optgroup>';
// div__filter_for_search += ' <optgroup label="Duplicates" data-max-options="1">';
// div__filter_for_search += ' <option>by DOI</option>';
// div__filter_for_search += ' <option>by Title</option>';
// div__filter_for_search += ' </optgroup>';
// div__filter_for_search += '</select>';
// $("#supmofos").html(div__filter_for_search)
// (3) Get records and hyperdata for paginator
$
.
ajax
({
url
:
'/tests/paginator/corpus/'
+
corpusid
,
url
:
'/tests/paginator/corpus/'
+
url_mainIDs
[
"corpus"
]
,
success
:
function
(
data
){
console
.
log
(
data
)
...
...
@@ -235,7 +292,7 @@ function ulWriter(rowIndex, record, columns, cellWriter) {
var
orig_id
=
parseInt
(
data
.
records
[
i
].
id
)
var
arr_id
=
parseInt
(
i
)
RecDict
[
orig_id
]
=
arr_id
;
data
.
records
[
i
][
"name"
]
=
'<a target="_blank" href="/project/'
+
projectid
+
'/corpus/'
+
corpusid
+
'/document/'
+
orig_id
+
'">'
+
data
.
records
[
i
][
"name"
]
+
'</a>'
data
.
records
[
i
][
"name"
]
=
'<a target="_blank" href="/project/'
+
url_mainIDs
[
"project"
]
+
'/corpus/'
+
url_mainIDs
[
"corpus"
]
+
'/document/'
+
orig_id
+
'">'
+
data
.
records
[
i
][
"name"
]
+
'</a>'
data
.
records
[
i
][
"del"
]
=
false
var
date
=
data
.
records
[
i
][
"date"
];
...
...
@@ -247,36 +304,6 @@ function ulWriter(rowIndex, record, columns, cellWriter) {
// $("#move2trash").prop('disabled', true);
$
(
"#move2trash"
)
.
click
(
function
(){
var
ids2trash
=
[]
for
(
var
i
in
Garbage
)
{
ids2trash
.
push
(
AjaxRecords
[
i
].
id
);
}
console
.
log
(
"ids to the trash:"
)
console
.
log
(
ids2trash
)
$
.
ajax
({
url
:
"/tests/move2trash/"
,
data
:
"nodeids="
+
JSON
.
stringify
(
ids2trash
),
type
:
'POST'
,
beforeSend
:
function
(
xhr
)
{
xhr
.
setRequestHeader
(
"X-CSRFToken"
,
getCookie
(
"csrftoken"
));
},
success
:
function
(
data
)
{
console
.
log
(
"in #move2trash"
)
console
.
log
(
data
)
location
.
reload
();
},
error
:
function
(
result
)
{
console
.
log
(
"Data not found in #move2trash"
);
console
.
log
(
result
)
}
});
})
.
hide
();
var
t0
=
AjaxRecords
[
0
].
date
.
split
(
"-"
).
map
(
Number
)
...
...
@@ -429,6 +456,13 @@ function ulWriter(rowIndex, record, columns, cellWriter) {
$
(
'<br><br><div class="imadiv"></div>'
).
insertAfter
(
".dynatable-per-page"
)
$
(
".dynatable-record-count"
).
insertAfter
(
".imadiv"
)
$
(
".dynatable-pagination-links"
).
insertAfter
(
".imadiv"
)
// console.log(RecDict)
var
the_content
=
$
(
"#supmofos"
).
html
();
$
(
""
+
the_content
).
insertAfter
(
"#dynatable-query-search-my-ajax-table"
)
$
(
"#supmofos"
).
remove
()
// .insertAfter("#dynatable-query-search-my-ajax-table")
}
});
static/js/test_dyna_chart_and_table.js
View file @
3629523d
...
...
@@ -159,8 +159,16 @@ function Final_UpdateTable( action ) {
// Get all the duplicates using the Django-Garg API
var
current_docs
=
{}
var
BIS_dict
=
{}
var
corpusid
=
window
.
location
.
href
.
split
(
"corpus"
)[
1
].
replace
(
/
\/
/g
,
''
)
//replace all the slashes
var
theurl
=
"/api/nodes/"
+
corpusid
+
"/children/duplicates?keys=title&limit=9999"
var
url_elems
=
window
.
location
.
href
.
split
(
"/"
)
var
url_mainIDs
=
{}
for
(
var
i
=
0
;
i
<
url_elems
.
length
;
i
++
)
{
// if the this element is a number:
if
(
url_elems
[
i
]
!=
""
&&
!
isNaN
(
Number
(
url_elems
[
i
])))
{
url_mainIDs
[
url_elems
[
i
-
1
]]
=
Number
(
url_elems
[
i
]);
}
}
var
theurl
=
"/api/nodes/"
+
url_mainIDs
[
"corpus"
]
+
"/children/duplicates?keys=title&limit=9999"
// $.ajax({
// url: theurl,
// success: function(data) {
...
...
@@ -364,14 +372,46 @@ $("#Clean_All").click(function(){
$
(
"#Save_All"
).
click
(
function
(){
var
sum__selected_elems
=
0
;
var
poubelle
=
[]
for
(
var
i
in
FlagsBuffer
)
if
(
Object
.
keys
(
FlagsBuffer
[
i
]).
length
==
0
)
poubelle
.
push
(
i
)
sum__selected_elems
+=
Object
.
keys
(
FlagsBuffer
[
i
]).
length
;
for
(
var
i
in
poubelle
)
delete
FlagsBuffer
[
poubelle
[
i
]];
if
(
sum__selected_elems
>
0
)
{
console
.
log
(
""
)
console
.
log
(
"Do the ajax conexion with API and send this array to be processed:"
)
for
(
var
i
in
FlagsBuffer
)
{
var
real_ids
=
[]
for
(
var
j
in
FlagsBuffer
[
i
])
real_ids
.
push
(
AjaxRecords
[
j
].
id
);
FlagsBuffer
[
i
]
=
real_ids
}
console
.
log
(
FlagsBuffer
)
console
.
log
(
""
)
var
list_id
=
$
(
"#list_id"
).
val
()
// '/annotations/lists/'+list_id+'/ngrams/108642'
console
.
log
(
window
.
location
.
origin
+
'/annotations/lists/'
+
list_id
+
"/multiple"
)
console
.
log
(
real_ids
)
$
.
ajax
({
method
:
"POST"
,
url
:
window
.
location
.
origin
+
'/annotations/lists/'
+
list_id
+
"/multiple"
,
data
:
"to_delete="
+
JSON
.
stringify
(
real_ids
),
beforeSend
:
function
(
xhr
)
{
xhr
.
setRequestHeader
(
"X-CSRFToken"
,
getCookie
(
"csrftoken"
));
},
success
:
function
(
data
){
console
.
log
(
data
)
},
error
:
function
(
result
)
{
console
.
log
(
"Data not found in #Save_All"
);
console
.
log
(
result
)
}
});
// console.log("")
}
});
...
...
templates/corpus.html
View file @
3629523d
...
...
@@ -5,6 +5,8 @@
{% load staticfiles %}
<link
rel=
"stylesheet"
type=
"text/css"
href=
"{% static "
css
/
bootstrap
.
css
"
%}"
>
<link
rel=
"stylesheet"
type=
"text/css"
href=
"{% static "
js
/
bootstrap
/
bootstrap-select
.
min
.
css
"
%}"
>
<link
rel=
"stylesheet"
type=
"text/css"
href=
"{% static "
css
/
morris
.
css
"
%}"
>
<link
rel=
"stylesheet"
type=
"text/css"
href=
"{% static "
css
/
jquery
.
easy-pie-chart
.
css
"%}"
>
...
...
@@ -132,7 +134,6 @@ th a {
</div>
</div>
<div
class=
"container"
>
<div
class=
"row"
>
<div
class=
"col-md-4"
>
...
...
@@ -178,8 +179,25 @@ th a {
</div>
</div>
<div
id=
"supmofos"
>
<select
data-width=
"100px"
dir=
"ltr"
class=
"selectpicker"
>
<option
selected
>
All
</option>
<option>
Title
</option>
<option>
Date
</option>
<optgroup
label=
"Duplicates"
>
<option>
by DOI
</option>
<option>
by Title
</option>
</optgroup>
</select>
</div>
<script
type=
"text/javascript"
src=
"{% static "
js
/
jquery
/
jquery
.
min
.
js
"
%}"
></script>
<script
src=
"{% static "
js
/
charts
/
bootstrap
.
min
.
js
"
%}"
></script>
<script
type=
"text/javascript"
src=
"{% static "
js
/
bootstrap
/
bootstrap-select
.
min
.
js
"
%}"
></script>
<script
type=
"text/javascript"
src=
"{% static "
js
/
jquery
/
jquery
.
dynatable
.
js
"
%}"
></script>
<!-- custom-lib for dynatable.js and dc.js -->
...
...
templates/project.html
View file @
3629523d
...
...
@@ -266,7 +266,7 @@
success
:
function
(
data
)
{
console
.
log
(
"in doTheQuery() Ajax.Success:"
)
console
.
log
(
data
)
//
location.reload();
location
.
reload
();
},
error
:
function
(
result
)
{
console
.
log
(
"in doTheQuery(). Data not found"
);
...
...
@@ -487,7 +487,6 @@
var
pubmedifiedQuery
=
{
query
:
query
,
string
:
query
}
// console.log(pubmedifiedQuery)
var
projectid
=
window
.
location
.
href
.
split
(
"project"
)[
1
].
replace
(
/
\/
/g
,
''
)
//replace all the slashes
...
...
@@ -502,7 +501,7 @@
success
:
function
(
data
)
{
console
.
log
(
"ajax_success: in testISTEX()"
)
console
.
log
(
data
)
//
location.reload();
location
.
reload
();
},
error
:
function
(
result
)
{
console
.
log
(
"in testISTEX(). Data not found"
);
...
...
tests/ngramstable/views.py
View file @
3629523d
...
...
@@ -50,6 +50,46 @@ from gargantext_web import about
from
gargantext_web.api
import
JsonHttpResponse
from
ngram.lists
import
listIds
,
listNgramIds
,
ngramList
,
doList
def
test_page
(
request
,
project_id
,
corpus_id
):
if
not
request
.
user
.
is_authenticated
():
return
redirect
(
'/login/?next=
%
s'
%
request
.
path
)
try
:
offset
=
int
(
project_id
)
offset
=
int
(
corpus_id
)
except
ValueError
:
raise
Http404
()
t
=
get_template
(
'tests/test_select-boostrap.html'
)
user
=
cache
.
User
[
request
.
user
.
username
]
.
id
date
=
datetime
.
datetime
.
now
()
project
=
cache
.
Node
[
int
(
project_id
)]
corpus
=
cache
.
Node
[
int
(
corpus_id
)]
type_doc_id
=
cache
.
NodeType
[
'Document'
]
.
id
number
=
session
.
query
(
func
.
count
(
Node
.
id
))
.
filter
(
Node
.
parent_id
==
corpus_id
,
Node
.
type_id
==
type_doc_id
)
.
all
()[
0
][
0
]
try
:
processing
=
corpus
.
hyperdata
[
'Processing'
]
except
Exception
as
error
:
print
(
error
)
processing
=
0
html
=
t
.
render
(
Context
({
'debug'
:
settings
.
DEBUG
,
'user'
:
user
,
'date'
:
date
,
'project'
:
project
,
'corpus'
:
corpus
,
'processing'
:
processing
,
'number'
:
number
,
}))
return
HttpResponse
(
html
)
def
get_ngrams
(
request
,
project_id
,
corpus_id
):
if
not
request
.
user
.
is_authenticated
():
return
redirect
(
'/login/?next=
%
s'
%
request
.
path
)
...
...
@@ -68,6 +108,13 @@ def get_ngrams(request , project_id , corpus_id ):
corpus
=
cache
.
Node
[
int
(
corpus_id
)]
type_doc_id
=
cache
.
NodeType
[
'Document'
]
.
id
number
=
session
.
query
(
func
.
count
(
Node
.
id
))
.
filter
(
Node
.
parent_id
==
corpus_id
,
Node
.
type_id
==
type_doc_id
)
.
all
()[
0
][
0
]
lists
=
dict
()
for
list_type
in
[
'MiamList'
,
'StopList'
]:
list_id
=
list
()
list_id
=
listIds
(
user_id
=
request
.
user
.
id
,
corpus_id
=
int
(
corpus_id
),
typeList
=
list_type
)
lists
[
"
%
s"
%
list_id
[
0
][
0
]]
=
list_type
try
:
processing
=
corpus
.
hyperdata
[
'Processing'
]
except
Exception
as
error
:
...
...
@@ -82,11 +129,40 @@ def get_ngrams(request , project_id , corpus_id ):
'corpus'
:
corpus
,
'processing'
:
processing
,
'number'
:
number
,
'list_id'
:
list_id
[
0
][
0
],
}))
return
HttpResponse
(
html
)
def
test_test
(
request
,
corpus_id
,
doc_id
):
"""Get All for a doc id"""
corpus_id
=
int
(
corpus_id
)
doc_id
=
int
(
doc_id
)
lists
=
dict
()
for
list_type
in
[
'StopList'
]:
list_id
=
list
()
list_id
=
listIds
(
user_id
=
request
.
user
.
id
,
corpus_id
=
int
(
corpus_id
),
typeList
=
list_type
)
lists
[
"
%
s"
%
list_id
[
0
][
0
]]
=
list_type
print
(
list_id
[
0
][
0
])
# # # ngrams of list_id of corpus_id:
# commeca = "StopList"
doc_ngram_list
=
listNgramIds
(
corpus_id
=
corpus_id
,
list_id
=
list_id
[
0
][
0
],
doc_id
=
list_id
[
0
][
0
],
user_id
=
request
.
user
.
id
)
to_del
=
{}
for
n
in
doc_ngram_list
:
to_del
[
n
[
0
]
]
=
True
print
(
to_del
.
keys
()
)
results
=
[
"hola"
,
"mundo"
]
return
JsonHttpResponse
(
results
)
def
test_ngrams
(
request
,
project_id
,
corpus_id
):
results
=
[
"hola"
,
"mundo"
]
...
...
@@ -94,6 +170,17 @@ def test_ngrams(request , project_id, corpus_id ):
whitelist_type_id
=
cache
.
NodeType
[
'WhiteList'
]
.
id
document_type_id
=
cache
.
NodeType
[
'Document'
]
.
id
corpus_id
=
int
(
corpus_id
)
lists
=
dict
()
for
list_type
in
[
'StopList'
]:
list_id
=
list
()
list_id
=
listIds
(
user_id
=
request
.
user
.
id
,
corpus_id
=
int
(
corpus_id
),
typeList
=
list_type
)
lists
[
"
%
s"
%
list_id
[
0
][
0
]]
=
list_type
doc_ngram_list
=
listNgramIds
(
corpus_id
=
corpus_id
,
list_id
=
list_id
[
0
][
0
],
doc_id
=
list_id
[
0
][
0
],
user_id
=
request
.
user
.
id
)
StopList
=
{}
for
n
in
doc_ngram_list
:
StopList
[
n
[
0
]
]
=
True
# # 13099 clinical benefits
# # 7492 recent data
# # 14279 brain development
...
...
@@ -145,15 +232,16 @@ def test_ngrams(request , project_id, corpus_id ):
for
doc
in
documents
:
NgramOccs
=
session
.
query
(
Node_Ngram
)
.
filter
(
Node_Ngram
.
node_id
==
doc
.
id
)
.
all
()
for
ngram
in
NgramOccs
:
if
ngram
.
ngram_id
not
in
Ngrams_Scores
:
Ngrams_Scores
[
ngram
.
ngram_id
]
=
{}
Ngrams_Scores
[
ngram
.
ngram_id
][
"scores"
]
=
{
"occ_sum"
:
0.0
,
"occ_uniq"
:
0.0
,
"tfidf_sum"
:
0.0
}
Ngrams_Scores
[
ngram
.
ngram_id
][
"scores"
][
"occ_sum"
]
+=
ngram
.
weight
Ngrams_Scores
[
ngram
.
ngram_id
][
"scores"
][
"occ_uniq"
]
+=
1
if
ngram
.
ngram_id
not
in
StopList
:
if
ngram
.
ngram_id
not
in
Ngrams_Scores
:
Ngrams_Scores
[
ngram
.
ngram_id
]
=
{}
Ngrams_Scores
[
ngram
.
ngram_id
][
"scores"
]
=
{
"occ_sum"
:
0.0
,
"occ_uniq"
:
0.0
,
"tfidf_sum"
:
0.0
}
Ngrams_Scores
[
ngram
.
ngram_id
][
"scores"
][
"occ_sum"
]
+=
ngram
.
weight
Ngrams_Scores
[
ngram
.
ngram_id
][
"scores"
][
"occ_uniq"
]
+=
1
# print("\t" , ngram.ngram_id , "\t" , ngram.weight )
## Getting the Effective nro de OCCS / >##
...
...
@@ -177,9 +265,10 @@ def test_ngrams(request , project_id, corpus_id ):
Sum
=
0
NgramTFIDF
=
session
.
query
(
NodeNodeNgram
)
.
filter
(
NodeNodeNgram
.
nodex_id
==
corpus_id
)
.
all
()
for
ngram
in
NgramTFIDF
:
Ngrams_Scores
[
ngram
.
ngram_id
][
"scores"
][
"tfidf_sum"
]
+=
ngram
.
score
Sum
+=
Ngrams_Scores
[
ngram
.
ngram_id
][
"scores"
][
"occ_uniq"
]
# print( "docid:", ngram.nodey_id , ngram.ngram_id , ngram.score)
if
ngram
.
ngram_id
not
in
StopList
:
Ngrams_Scores
[
ngram
.
ngram_id
][
"scores"
][
"tfidf_sum"
]
+=
ngram
.
score
Sum
+=
Ngrams_Scores
[
ngram
.
ngram_id
][
"scores"
][
"occ_uniq"
]
# print( "docid:", ngram.nodey_id , ngram.ngram_id , ngram.score)
# import pprint
...
...
@@ -210,13 +299,14 @@ def test_ngrams(request , project_id, corpus_id ):
query
=
session
.
query
(
Ngram
)
.
filter
(
Ngram
.
id
.
in_
(
ngrams_ids
))
ngrams_data
=
query
.
all
()
for
ngram
in
ngrams_data
:
occ_uniq
=
Ngrams_Scores
[
ngram
.
id
][
"scores"
][
"occ_uniq"
]
if
occ_uniq
>
occs_threshold
:
Ngrams_Scores
[
ngram
.
id
][
"name"
]
=
ngram
.
terms
Ngrams_Scores
[
ngram
.
id
][
"id"
]
=
ngram
.
id
Ngrams_Scores
[
ngram
.
id
][
"scores"
][
"tfidf"
]
=
Ngrams_Scores
[
ngram
.
id
][
"scores"
][
"tfidf_sum"
]
/
occ_uniq
del
Ngrams_Scores
[
ngram
.
id
][
"scores"
][
"tfidf_sum"
]
Metrics
[
"ngrams"
]
.
append
(
Ngrams_Scores
[
ngram
.
id
]
)
if
ngram
.
id
not
in
StopList
:
occ_uniq
=
Ngrams_Scores
[
ngram
.
id
][
"scores"
][
"occ_uniq"
]
if
occ_uniq
>
occs_threshold
:
Ngrams_Scores
[
ngram
.
id
][
"name"
]
=
ngram
.
terms
Ngrams_Scores
[
ngram
.
id
][
"id"
]
=
ngram
.
id
Ngrams_Scores
[
ngram
.
id
][
"scores"
][
"tfidf"
]
=
Ngrams_Scores
[
ngram
.
id
][
"scores"
][
"tfidf_sum"
]
/
occ_uniq
del
Ngrams_Scores
[
ngram
.
id
][
"scores"
][
"tfidf_sum"
]
Metrics
[
"ngrams"
]
.
append
(
Ngrams_Scores
[
ngram
.
id
]
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment