Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
humanities
gargantext
Commits
09b31df8
Commit
09b31df8
authored
Nov 26, 2015
by
delanoe
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'samuel' into unstable
parents
cc65af59
8dc52be7
Changes
16
Hide whitespace changes
Inline
Side-by-side
Showing
16 changed files
with
147 additions
and
269 deletions
+147
-269
utils.py
admin/utils.py
+1
-0
celery.py
gargantext_web/celery.py
+1
-1
urls.py
gargantext_web/urls.py
+3
-4
views.py
gargantext_web/views.py
+11
-5
views_optimized.py
gargantext_web/views_optimized.py
+1
-1
workflow.py
ngram/workflow.py
+3
-4
models.py
node/models.py
+36
-36
ngrams.py
rest_v1_0/ngrams.py
+3
-4
MedlineFetcherDavid2015.py
scrappers/scrap_pubmed/MedlineFetcherDavid2015.py
+23
-12
views.py
scrappers/scrap_pubmed/views.py
+2
-2
menu.html
templates/corpus/menu.html
+33
-2
terms.html
templates/corpus/terms.html
+1
-1
explorer.html
templates/explorer.html
+1
-0
project.html
templates/project.html
+5
-21
projects.html
templates/projects.html
+1
-1
views.py
tests/ngramstable/views.py
+22
-175
No files found.
admin/utils.py
View file @
09b31df8
...
...
@@ -57,6 +57,7 @@ class WorkflowTracking:
cursor
=
connection
.
cursor
()
try
:
cursor
.
execute
(
the_query
)
cursor
.
execute
(
"COMMIT;"
)
finally
:
connection
.
close
()
except
:
...
...
gargantext_web/celery.py
View file @
09b31df8
...
...
@@ -43,9 +43,9 @@ def apply_workflow(corpus_id):
ngram_workflow
(
corpus
)
#ngrams2miam(user_id=corpus.user_id, corpus_id=corpus_id)
update_state
.
processing_
(
corpus
,
"0"
)
print
(
"End of the Workflow for corpus
%
d"
%
(
corpus_id
))
update_state
.
processing_
(
corpus
,
"0"
)
@
shared_task
...
...
gargantext_web/urls.py
View file @
09b31df8
...
...
@@ -91,11 +91,10 @@ urlpatterns = patterns('',
############################################################################
url
(
r'^tests/'
,
include
(
'tests.urls'
)),
# TODO Samuel, lines below were on your tests, are they still used ?
# can we delete them ?
url
(
r'^project/(\d+)/corpus/(\d+)/terms/ngrams.json$'
,
samtest
.
get_ngrams_json
),
url
(
r'^project/(\d+)/corpus/(\d+)/terms$'
,
samtest
.
get_ngrams
),
url
(
r'^project/(\d+)/corpus/(\d+)/stop_list.json$'
,
samtest
.
get_stoplist
)
url
(
r'^api/corpus/(\d+)$'
,
samtest
.
get_corpus_state
),
url
(
r'^test_cores$'
,
samtest
.
get_cores
)
)
...
...
gargantext_web/views.py
View file @
09b31df8
...
...
@@ -345,12 +345,14 @@ def corpus(request, project_id, corpus_id):
type_doc_id
=
cache
.
NodeType
[
'Document'
]
.
id
number
=
session
.
query
(
func
.
count
(
Node
.
id
))
.
filter
(
Node
.
parent_id
==
corpus_id
,
Node
.
type_id
==
type_doc_id
)
.
all
()[
0
][
0
]
the_query
=
""" SELECT hyperdata FROM node_node WHERE id=
%
d """
%
(
int
(
corpus_id
)
)
cursor
=
connection
.
cursor
()
try
:
processing
=
corpus
.
hyperdata
[
'Processing'
]
except
Exception
as
error
:
print
(
error
)
processing
=
0
print
(
'corpus'
,
corpus_id
,
' , processing'
,
processing
)
cursor
.
execute
(
the_query
)
processing
=
cursor
.
fetchone
()[
0
][
"Processing"
]
except
:
processing
=
"Error"
html
=
t
.
render
(
Context
({
'debug'
:
settings
.
DEBUG
,
...
...
@@ -569,6 +571,9 @@ def graph(request, project_id, corpus_id, generic=100, specific=100):
project_type_id
=
cache
.
NodeType
[
'Project'
]
.
id
corpus_type_id
=
cache
.
NodeType
[
'Corpus'
]
.
id
miamlist_type_id
=
cache
.
NodeType
[
'MiamList'
]
.
id
miamlist
=
session
.
query
(
Node
)
.
filter
(
Node
.
user_id
==
request
.
user
.
id
,
Node
.
parent_id
==
corpus_id
,
Node
.
type_id
==
cache
.
NodeType
[
'MiamList'
]
.
id
)
.
first
()
graphurl
=
"corpus/"
+
str
(
corpus_id
)
+
"/node_link.json"
html
=
t
.
render
(
Context
({
\
...
...
@@ -576,6 +581,7 @@ def graph(request, project_id, corpus_id, generic=100, specific=100):
'user'
:
request
.
user
,
\
'date'
:
date
,
\
'corpus'
:
corpus
,
\
'list_id'
:
miamlist
.
id
,
\
'project'
:
project
,
\
'graphfile'
:
graphurl
,
\
}))
...
...
gargantext_web/views_optimized.py
View file @
09b31df8
...
...
@@ -140,7 +140,7 @@ def project(request, project_id):
parent_id
=
project_id
,
type_id
=
cache
.
NodeType
[
'Corpus'
]
.
id
,
language_id
=
language_id
,
hyperdata
=
{
'Processing'
:
1
,}
hyperdata
=
{
'Processing'
:
"Parsing documents"
,}
)
session
.
add
(
corpus
)
session
.
commit
()
...
...
ngram/workflow.py
View file @
09b31df8
...
...
@@ -6,7 +6,7 @@ from ngram.stop import compute_stop
from
ngram.group
import
compute_groups
from
gargantext_web.db
import
get_or_create_node
from
ngram.mapList
import
compute_mapList
from
ngram.occurrences
import
compute_occs
#
from ngram.occurrences import compute_occs
from
gargantext_web.db
import
session
,
Node
,
NodeNgram
from
admin.utils
import
WorkflowTracking
...
...
@@ -47,9 +47,8 @@ def ngram_workflow(corpus, n=5000):
update_state
.
processing_
(
corpus
,
"TF-IDF local score"
)
compute_tfidf
(
corpus
)
update_state
.
processing_
(
corpus
,
"OCCS local score"
)
compute_occs
(
corpus
)
# update_state.processing_(corpus, "OCCS local score")
# compute_occs(corpus)
#corpus=session.query(Node).filter(Node.id==540420).first()
#corpus=session.query(Node).filter(Node.id==559637).first()
...
...
node/models.py
View file @
09b31df8
...
...
@@ -269,42 +269,42 @@ class Node(CTENode):
for
ngram_text
,
weight
in
associations
.
items
()
])
@
current_app
.
task
(
filter
=
task_method
)
def
workflow
(
self
,
keys
=
None
,
ngramsextractorscache
=
None
,
ngramscaches
=
None
,
verbose
=
False
):
import
time
total
=
0
print
(
"LOG::TIME: In workflow() parse_resources()"
)
start
=
time
.
time
()
self
.
hyperdata
[
'Processing'
]
=
1
self
.
save
()
self
.
parse_resources
()
end
=
time
.
time
()
total
+=
(
end
-
start
)
print
(
"LOG::TIME:_ "
+
datetime
.
datetime
.
now
()
.
isoformat
()
+
" parse_resources() [s]"
,(
end
-
start
))
print
(
"LOG::TIME: In workflow() / parse_resources()"
)
start
=
time
.
time
()
print
(
"LOG::TIME: In workflow() extract_ngrams()"
)
print
(
"
\n
- - - - - - - - - -"
)
type_document
=
NodeType
.
objects
.
get
(
name
=
'Document'
)
self
.
children
.
filter
(
type_id
=
type_document
.
pk
)
.
extract_ngrams
(
keys
=
[
'title'
,])
end
=
time
.
time
()
print
(
"- - - - - - - - - -
\n
"
)
total
+=
(
end
-
start
)
print
(
"LOG::TIME:_ "
+
datetime
.
datetime
.
now
()
.
isoformat
()
+
" extract_ngrams() [s]"
,(
end
-
start
))
print
(
"LOG::TIME: In workflow() / extract_ngrams()"
)
start
=
time
.
time
()
print
(
"In workflow() do_tfidf()"
)
from
analysis.functions
import
do_tfidf
do_tfidf
(
self
)
end
=
time
.
time
()
total
+=
(
end
-
start
)
print
(
"LOG::TIME:_ "
+
datetime
.
datetime
.
now
()
.
isoformat
()
+
" do_tfidf() [s]"
,(
end
-
start
))
print
(
"LOG::TIME: In workflow() / do_tfidf()"
)
print
(
"In workflow() END"
)
self
.
hyperdata
[
'Processing'
]
=
0
self
.
save
()
#
@current_app.task(filter=task_method)
#
def workflow(self, keys=None, ngramsextractorscache=None, ngramscaches=None, verbose=False):
#
import time
#
total = 0
#
print("LOG::TIME: In workflow() parse_resources()")
#
start = time.time()
#
self.hyperdata['Processing'] = 1
#
self.save()
#
self.parse_resources()
#
end = time.time()
#
total += (end - start)
#
print ("LOG::TIME:_ "+datetime.datetime.now().isoformat()+" parse_resources() [s]",(end - start))
#
print("LOG::TIME: In workflow() / parse_resources()")
#
start = time.time()
#
print("LOG::TIME: In workflow() extract_ngrams()")
#
print("\n- - - - - - - - - -")
#
type_document = NodeType.objects.get(name='Document')
#
self.children.filter(type_id=type_document.pk).extract_ngrams(keys=['title',])
#
end = time.time()
#
print("- - - - - - - - - - \n")
#
total += (end - start)
#
print ("LOG::TIME:_ "+datetime.datetime.now().isoformat()+" extract_ngrams() [s]",(end - start))
#
print("LOG::TIME: In workflow() / extract_ngrams()")
#
start = time.time()
#
print("In workflow() do_tfidf()")
#
from analysis.functions import do_tfidf
#
do_tfidf(self)
#
end = time.time()
#
total += (end - start)
#
print ("LOG::TIME:_ "+datetime.datetime.now().isoformat()+" do_tfidf() [s]",(end - start))
#
print("LOG::TIME: In workflow() / do_tfidf()")
#
print("In workflow() END")
#
self.hyperdata['Processing'] = 0
#
self.save()
class
Node_Hyperdata
(
models
.
Model
):
node
=
models
.
ForeignKey
(
Node
,
on_delete
=
models
.
CASCADE
)
...
...
rest_v1_0/ngrams.py
View file @
09b31df8
...
...
@@ -132,10 +132,7 @@ class List(APIView):
if
request
.
GET
.
get
(
'custom'
,
False
)
!=
False
:
ngrams_meta
=
self
.
get_metadata
(
ngram_ids
,
corpus_id
)
ngram_ids
=
ngrams_meta
[
"data"
]
measurements
[
"tfidf"
]
=
{
"s"
:
ngrams_meta
[
"secs"
],
"n"
:
len
(
ngrams_meta
[
"data"
]
.
keys
())
}
measurements
[
"tfidf"
]
=
{
"s"
:
ngrams_meta
[
"secs"
],
"n"
:
len
(
ngrams_meta
[
"data"
]
.
keys
())
}
return
JsonHttpResponse
(
{
"data"
:
ngram_ids
,
"time"
:
measurements
}
)
...
...
@@ -575,6 +572,8 @@ class Keep(APIView):
Delete ngrams from the map list
"""
group_rawreq
=
dict
(
request
.
data
)
# print("group_rawreq:")
# print(group_rawreq)
from
django.utils.html
import
escape
ngram_2del
=
[
int
(
i
)
for
i
in
list
(
group_rawreq
.
keys
())]
corpus
=
session
.
query
(
Node
)
.
filter
(
Node
.
id
==
corpus_id
)
.
first
()
...
...
scrappers/scrap_pubmed/MedlineFetcherDavid2015.py
View file @
09b31df8
...
...
@@ -45,19 +45,22 @@ class MedlineFetcher:
query
=
query
.
replace
(
' '
,
'
%20
'
)
eSearch
=
'
%
s/esearch.fcgi?db=
%
s&retmax=1&usehistory=y&term=
%
s'
%
(
self
.
pubMedEutilsURL
,
self
.
pubMedDB
,
query
)
eSearchResult
=
urlopen
(
eSearch
)
data
=
eSearchResult
.
read
()
root
=
etree
.
XML
(
data
)
findcount
=
etree
.
XPath
(
"/eSearchResult/Count/text()"
)
count
=
findcount
(
root
)[
0
]
findquerykey
=
etree
.
XPath
(
"/eSearchResult/QueryKey/text()"
)
queryKey
=
findquerykey
(
root
)[
0
]
findwebenv
=
etree
.
XPath
(
"/eSearchResult/WebEnv/text()"
)
webEnv
=
findwebenv
(
root
)[
0
]
try
:
eSearchResult
=
urlopen
(
eSearch
)
data
=
eSearchResult
.
read
()
root
=
etree
.
XML
(
data
)
findcount
=
etree
.
XPath
(
"/eSearchResult/Count/text()"
)
count
=
findcount
(
root
)[
0
]
findquerykey
=
etree
.
XPath
(
"/eSearchResult/QueryKey/text()"
)
queryKey
=
findquerykey
(
root
)[
0
]
findwebenv
=
etree
.
XPath
(
"/eSearchResult/WebEnv/text()"
)
webEnv
=
findwebenv
(
root
)[
0
]
except
:
count
=
0
queryKey
=
False
webEnv
=
False
origQuery
=
False
values
=
{
"query"
:
origQuery
,
"count"
:
int
(
str
(
count
)),
"queryKey"
:
queryKey
,
"webEnv"
:
webEnv
}
return
values
...
...
@@ -173,8 +176,13 @@ class MedlineFetcher:
self
.
q
.
join
()
print
(
'time:'
,
time
.
perf_counter
()
-
start
)
Total
=
0
Fails
=
0
for
globalresults
in
self
.
firstResults
:
# globalresults = self.medlineEsearch(pubmedquery)
Total
+=
1
if
globalresults
[
"queryKey"
]
==
False
:
Fails
+=
1
if
globalresults
[
"count"
]
>
0
:
N
+=
globalresults
[
"count"
]
queryhyperdata
=
{
...
...
@@ -198,4 +206,7 @@ class MedlineFetcher:
if
query
[
"retmax"
]
==
0
:
query
[
"retmax"
]
+=
1
print
(
query
[
"string"
],
"
\t
["
,
k
,
">"
,
query
[
"retmax"
],
"]"
)
if
((
Fails
+
1
)
/
(
Total
+
1
))
==
1
:
# for identifying the epic fail or connection error
thequeries
=
[
False
]
return
thequeries
scrappers/scrap_pubmed/views.py
View file @
09b31df8
...
...
@@ -130,7 +130,7 @@ def doTheQuery(request , project_id):
parent_id
=
project_id
,
type_id
=
cache
.
NodeType
[
'Corpus'
]
.
id
,
language_id
=
None
,
hyperdata
=
{
'Processing'
:
1
,}
hyperdata
=
{
'Processing'
:
"Parsing documents"
,}
)
session
.
add
(
corpus
)
session
.
commit
()
...
...
@@ -243,7 +243,7 @@ def testISTEX(request , project_id):
parent_id
=
project_id
,
type_id
=
cache
.
NodeType
[
'Corpus'
]
.
id
,
language_id
=
None
,
hyperdata
=
{
'Processing'
:
1
,}
hyperdata
=
{
'Processing'
:
"Parsing documents"
,}
)
session
.
add
(
corpus
)
session
.
commit
()
...
...
templates/corpus/menu.html
View file @
09b31df8
...
...
@@ -85,7 +85,8 @@
</div>
</div>
<span
style=
"display:none;"
id=
"process_state"
>
{{processing}}
</span>
<span
style=
"display:none;"
id=
"corpus_id"
>
{{corpus.id}}
</span>
<div
class=
"col-md-6"
>
<div
class=
"jumbotron"
>
{% if processing == 0 or processing == "0" %}
...
...
@@ -96,8 +97,9 @@
<li>
Authors and Terms
</li>
</ol>
{% else %}
<h3><img
width=
"20px"
src=
"{% static "
js
/
libs
/
img2
/
loading-bar
.
gif
"
%}"
></img>
Networks
</h3>
<h6>
(Updating:
<i>
{{processing}}
</i>
)
</h6>
<h6>
(Updating:
<i
id=
"process_id"
data-since=
"date"
>
{{processing}}
</i>
)
</h6>
<ol>
<li>
Terms
</li>
<li>
Journals and Terms
</li>
...
...
@@ -134,6 +136,35 @@
return
window
.
open
(
url_
,
'_blank'
);
}
var
refresh_time
=
3000
//ms
function
corpus_monitorer
()
{
var
url_
=
"/api/corpus/"
+
$
(
"#corpus_id"
).
text
()
$
.
ajax
({
type
:
"GET"
,
url
:
url_
,
dataType
:
"json"
,
success
:
function
(
data
,
textStatus
,
jqXHR
)
{
if
(
data
[
"Processing"
]
==
"0"
)
{
window
.
location
.
reload
()
}
else
{
$
(
"#process_id"
).
html
(
data
[
"Processing"
]
+
"..."
)
}
},
error
:
function
(
exception
)
{
console
.
log
(
"exception!:"
+
exception
.
status
)
}
});
}
if
(
$
(
"#process_state"
).
text
()
==
"0"
)
{
// workflow : finished!
}
else
{
setInterval
(
corpus_monitorer
,
refresh_time
);
}
</script>
...
...
templates/corpus/terms.html
View file @
09b31df8
...
...
@@ -192,7 +192,7 @@ input[type=radio]:checked + label {
</table>
</p> -->
<p
align=
"right"
>
<
button
id=
"Clean_All"
class=
"btn btn-warning"
>
Clean
</button
>
<
!-- <button id="Clean_All" class="btn btn-warning">Clean</button> --
>
<button
id=
"Save_All"
class=
"btn btn-primary"
>
Save
</button>
</p>
...
...
templates/explorer.html
View file @
09b31df8
...
...
@@ -136,6 +136,7 @@
<li>
<a>
<div
id=
"graphid"
style=
"visibility: hidden;"
>
{{graphfile}}
</div>
<input
type=
"hidden"
id=
"list_id"
value=
"{{ list_id }}"
></input>
<div
id=
"jquerytemplatenb"
style=
"visibility: hidden;"
>
{{user.id}}
</div>
</a>
</li>
...
...
templates/project.html
View file @
09b31df8
...
...
@@ -24,26 +24,6 @@
<script
type=
"text/javascript"
>
var
refresh_time
=
5000
//ms
function
corpus_monitorer
()
{
console
.
log
(
"hola"
)
// $.ajax({
// type: "GET",
// url: "https://dl.dropboxusercontent.com/u/9975992/climat/ajax_file.json",
// dataType: "json",
// success : function(data, textStatus, jqXHR) {
// if( data.command ) {
// eval( data.command )
// }
// },
// error: function(exception) {
// console.log("exception!:"+exception.status)
// }
// });
}
setInterval
(
corpus_monitorer
,
refresh_time
);
</script>
...
...
@@ -363,6 +343,7 @@
xhr
.
setRequestHeader
(
"X-CSRFToken"
,
getCookie
(
"csrftoken"
));
},
success
:
function
(
data
)
{
console
.
log
(
"SUCCESS"
)
console
.
log
(
"in getGlobalResults"
)
console
.
log
(
data
)
console
.
log
(
"enabling "
+
"#"
+
value
.
id
)
...
...
@@ -379,12 +360,15 @@
$
(
'#submit_thing'
).
prop
(
'disabled'
,
false
);
}
else
{
$
(
"#theresults"
).
html
(
"<i> <b>"
+
pubmedquery
+
"</b>: No results!.</i><br>"
)
if
(
data
[
0
]
==
false
)
$
(
"#theresults"
).
html
(
"Pubmed connection error!</i><br>"
)
$
(
'#submit_thing'
).
prop
(
'disabled'
,
true
);
}
},
error
:
function
(
result
)
{
console
.
log
(
"Data not found"
);
$
(
"#theresults"
).
html
(
"Pubmed connection error!</i><br>"
)
$
(
'#submit_thing'
).
prop
(
'disabled'
,
true
);
}
});
}
...
...
templates/projects.html
View file @
09b31df8
...
...
@@ -44,7 +44,7 @@
data-content=
'
<ul>
<li> Rename </li>
<li>
Add new corpus
</li>
<li>
<a href="/project/{{ project.id }}">Add new corpus</a>
</li>
<li><a href="/delete/{{ project.id }}">Delete</a></li>
</ul>
'
>
Manage
</button>
...
...
tests/ngramstable/views.py
View file @
09b31df8
...
...
@@ -56,43 +56,6 @@ from rest_v1_0.api import JsonHttpResponse
from
ngram.lists
import
listIds
,
listNgramIds
,
ngramList
,
doList
def
test_page
(
request
,
project_id
,
corpus_id
):
if
not
request
.
user
.
is_authenticated
():
return
redirect
(
'/login/?next=
%
s'
%
request
.
path
)
try
:
offset
=
int
(
project_id
)
offset
=
int
(
corpus_id
)
except
ValueError
:
raise
Http404
()
t
=
get_template
(
'tests/test_select-boostrap.html'
)
user
=
cache
.
User
[
request
.
user
.
username
]
.
id
date
=
datetime
.
datetime
.
now
()
project
=
cache
.
Node
[
int
(
project_id
)]
corpus
=
cache
.
Node
[
int
(
corpus_id
)]
type_doc_id
=
cache
.
NodeType
[
'Document'
]
.
id
number
=
session
.
query
(
func
.
count
(
Node
.
id
))
.
filter
(
Node
.
parent_id
==
corpus_id
,
Node
.
type_id
==
type_doc_id
)
.
all
()[
0
][
0
]
try
:
processing
=
corpus
.
hyperdata
[
'Processing'
]
except
Exception
as
error
:
print
(
error
)
processing
=
0
html
=
t
.
render
(
Context
({
'debug'
:
settings
.
DEBUG
,
'user'
:
request
.
user
.
username
,
'date'
:
date
,
'project'
:
project
,
'corpus'
:
corpus
,
'processing'
:
processing
,
'number'
:
number
,
}))
return
HttpResponse
(
html
)
def
get_ngrams
(
request
,
project_id
,
corpus_id
):
if
not
request
.
user
.
is_authenticated
():
return
redirect
(
'/login/?next=
%
s'
%
request
.
path
)
...
...
@@ -133,32 +96,6 @@ def get_ngrams(request , project_id , corpus_id ):
return
HttpResponse
(
html
)
def
get_stoplist
(
request
,
corpus_id
,
doc_id
):
"""Get All for a doc id"""
user_id
=
request
.
user
.
id
whitelist_type_id
=
cache
.
NodeType
[
'WhiteList'
]
.
id
document_type_id
=
cache
.
NodeType
[
'Document'
]
.
id
miam_id
=
listIds
(
typeList
=
'MiamList'
,
user_id
=
request
.
user
.
id
,
corpus_id
=
corpus_id
)[
0
][
0
]
count_min
=
2
size
=
1000
corpus_id
=
int
(
corpus_id
)
lists
=
dict
()
for
list_type
in
[
'StopList'
]:
list_id
=
list
()
list_id
=
listIds
(
user_id
=
request
.
user
.
id
,
corpus_id
=
int
(
corpus_id
),
typeList
=
list_type
)
lists
[
"
%
s"
%
list_id
[
0
][
0
]]
=
list_type
doc_ngram_list
=
listNgramIds
(
corpus_id
=
corpus_id
,
list_id
=
list_id
[
0
][
0
],
doc_id
=
list_id
[
0
][
0
],
user_id
=
request
.
user
.
id
)
StopList
=
{}
for
n
in
doc_ngram_list
:
StopList
[
n
[
0
]
]
=
True
results
=
StopList
.
keys
()
#[ "hola" , "mundo" ]
return
JsonHttpResponse
(
StopList
)
def
get_journals
(
request
,
project_id
,
corpus_id
):
if
not
request
.
user
.
is_authenticated
():
...
...
@@ -216,122 +153,32 @@ def get_journals_json(request , project_id, corpus_id ):
from
gargantext_web.db
import
session
,
cache
,
Node
,
NodeNgram
from
sqlalchemy
import
or_
,
func
from
sqlalchemy.orm
import
aliased
def
get_ngrams_json
(
request
,
project_id
,
corpus_id
):
results
=
[
"holaaaa"
,
"mundo"
]
user_id
=
request
.
user
.
id
whitelist_type_id
=
cache
.
NodeType
[
'WhiteList'
]
.
id
document_type_id
=
cache
.
NodeType
[
'Document'
]
.
id
miam_id
=
listIds
(
typeList
=
'MiamList'
,
user_id
=
request
.
user
.
id
,
corpus_id
=
corpus_id
)[
0
][
0
]
count_min
=
2
size
=
1000
corpus_id
=
int
(
corpus_id
)
lists
=
dict
()
for
list_type
in
[
'StopList'
]:
list_id
=
list
()
list_id
=
listIds
(
user_id
=
request
.
user
.
id
,
corpus_id
=
int
(
corpus_id
),
typeList
=
list_type
)
lists
[
"
%
s"
%
list_id
[
0
][
0
]]
=
list_type
doc_ngram_list
=
listNgramIds
(
corpus_id
=
corpus_id
,
list_id
=
list_id
[
0
][
0
],
doc_id
=
list_id
[
0
][
0
],
user_id
=
request
.
user
.
id
)
StopList
=
{}
for
n
in
doc_ngram_list
:
StopList
[
n
[
0
]
]
=
True
# [ Get Uniq_Occs ]
myamlist_type_id
=
cache
.
NodeType
[
'MiamList'
]
.
id
myamlist
=
session
.
query
(
Node
)
.
filter
(
Node
.
user_id
==
user_id
,
Node
.
parent_id
==
corpus_id
,
Node
.
type_id
==
myamlist_type_id
)
.
first
()
myamlists
=
session
.
query
(
Node
)
.
filter
(
Node
.
user_id
==
user_id
,
Node
.
parent_id
==
corpus_id
,
Node
.
type_id
==
myamlist_type_id
)
.
all
()
# sql_average = """SELECT avg(weight) as Average FROM node_node_ngram WHERE node_node_ngram.node_id=%d""" % (myamlist.id)
# cursor = connection.cursor()
# cursor.execute(sql_average)
# avg_result = cursor.fetchone()[0]
# threshold = min (10 , math.sqrt(avg_result) )
# OCCs = session.query(Node_Ngram).filter( Node_Ngram.node_id==myamlist.id , Node_Ngram.weight >= threshold ).all()
# [ / Get Uniq_Occs ]
Miam
=
aliased
(
NodeNgram
)
sql_average
=
(
session
.
query
(
NodeNgram
.
ngram_id
,
func
.
sum
(
NodeNgram
.
weight
))
.
join
(
Node
,
Node
.
id
==
NodeNgram
.
node_id
)
.
join
(
Miam
,
Miam
.
ngram_id
==
NodeNgram
.
ngram_id
)
.
filter
(
Node
.
parent_id
==
corpus_id
,
Node
.
type_id
==
cache
.
NodeType
[
'Document'
]
.
id
)
.
filter
(
Miam
.
node_id
==
myamlist
.
id
)
.
group_by
(
NodeNgram
.
ngram_id
)
.
all
()
)
# print([n for n in sql_average])
OCCs
=
{}
for
ngram
in
sql_average
:
OCCs
[
ngram
[
0
]
]
=
ngram
[
1
]
# [ Initializing Ngrams_Scores with occ_uniq ]
Ngrams_Scores
=
{}
for
ngram
in
OCCs
:
if
ngram
not
in
StopList
:
if
ngram
not
in
Ngrams_Scores
:
Ngrams_Scores
[
ngram
]
=
{}
Ngrams_Scores
[
ngram
][
"scores"
]
=
{
"occ_uniq"
:
round
(
OCCs
[
ngram
]),
"tfidf_sum"
:
0.0
}
# [ / Initializing Ngrams_Scores with occ_uniq ]
# [ Getting TF-IDF scores (sum per each ngram) ]
NgramTFIDF
=
session
.
query
(
NodeNodeNgram
)
.
filter
(
NodeNodeNgram
.
nodex_id
==
corpus_id
)
.
all
()
for
ngram
in
NgramTFIDF
:
if
ngram
.
ngram_id
not
in
StopList
:
if
ngram
.
ngram_id
in
Ngrams_Scores
:
Ngrams_Scores
[
ngram
.
ngram_id
][
"scores"
][
"tfidf_sum"
]
+=
ngram
.
score
# [ / Getting TF-IDF scores ]
# [ Preparing JSON-Array full of Scores! ]
Metrics
=
{
"ngrams"
:[],
"scores"
:
{}
}
ngrams_ids
=
Ngrams_Scores
.
keys
()
query
=
session
.
query
(
Ngram
)
.
filter
(
Ngram
.
id
.
in_
(
ngrams_ids
))
ngrams_data
=
query
.
all
()
for
ngram
in
ngrams_data
:
if
ngram
.
id
not
in
StopList
:
occ_uniq
=
occ_uniq
=
Ngrams_Scores
[
ngram
.
id
][
"scores"
][
"occ_uniq"
]
Ngrams_Scores
[
ngram
.
id
][
"name"
]
=
ngram
.
terms
Ngrams_Scores
[
ngram
.
id
][
"id"
]
=
ngram
.
id
Ngrams_Scores
[
ngram
.
id
][
"scores"
][
"tfidf"
]
=
Ngrams_Scores
[
ngram
.
id
][
"scores"
][
"tfidf_sum"
]
/
occ_uniq
del
Ngrams_Scores
[
ngram
.
id
][
"scores"
][
"tfidf_sum"
]
Metrics
[
"ngrams"
]
.
append
(
Ngrams_Scores
[
ngram
.
id
]
)
Metrics
[
"scores"
]
=
{
"initial"
:
"occ_uniq"
,
"nb_docs"
:
1
,
"orig_nb_ngrams"
:
1
,
"nb_ngrams"
:
len
(
Metrics
[
"ngrams"
]),
# "occs_threshold":threshold
}
# [ / Preparing JSON-Array full of Scores! ]
# print("miamlist:",myamlist.id)
# print("sql avg:",sql_average)
# print (avg_result)
# print ("LALALALALALALALLLALALALALA")
return
JsonHttpResponse
(
Metrics
)
def
get_corpuses
(
request
,
node_ids
):
ngrams
=
[
int
(
i
)
for
i
in
node_ids
.
split
(
"+"
)
]
results
=
session
.
query
(
Node
.
id
,
Node
.
hyperdata
)
.
filter
(
Node
.
id
.
in_
(
ngrams
)
)
.
all
()
for
r
in
results
:
print
(
r
)
return
JsonHttpResponse
(
[
"tudo"
,
"bem"
]
)
\ No newline at end of file
return
JsonHttpResponse
(
[
"tudo"
,
"bem"
]
)
def
get_cores
(
request
):
import
multiprocessing
cpus
=
multiprocessing
.
cpu_count
()
return
JsonHttpResponse
(
{
"data"
:
cpus
}
)
def
get_corpus_state
(
request
,
corpus_id
):
if
not
request
.
user
.
is_authenticated
():
return
JsonHttpResponse
(
{
"request"
:
"forbidden"
}
)
processing
=
[
"Waiting"
]
the_query
=
""" SELECT hyperdata FROM node_node WHERE id=
%
d """
%
(
int
(
corpus_id
)
)
cursor
=
connection
.
cursor
()
try
:
cursor
.
execute
(
the_query
)
processing
=
cursor
.
fetchone
()[
0
]
finally
:
connection
.
close
()
# processing = corpus.hyperdata['Processing']
return
JsonHttpResponse
(
processing
)
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment