Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
humanities
gargantext
Commits
ca64cc01
Commit
ca64cc01
authored
Jun 22, 2016
by
delanoe
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'romain-goodies' into unstable
parents
82a44f48
3924b0fd
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
258 additions
and
55 deletions
+258
-55
ngramlists_tools.py
gargantext/util/ngramlists_tools.py
+15
-14
ngramlists.py
gargantext/views/api/ngramlists.py
+69
-12
urls.py
gargantext/views/api/urls.py
+1
-0
terms.py
gargantext/views/pages/terms.py
+12
-1
terms.html
templates/pages/corpora/terms.html
+161
-28
No files found.
gargantext/util/ngramlists_tools.py
View file @
ca64cc01
...
...
@@ -616,6 +616,8 @@ def merge_ngramlists(new_lists={}, onto_corpus=None, del_originals=[]):
'map': UnweightedList,
'groupings': Translations }
if any of those lists is absent it is considered empty
@param onto_corpus: a corpus node to get the *old* lists
@param del_originals: an array of original wordlists to ignore
...
...
@@ -694,17 +696,19 @@ def merge_ngramlists(new_lists={}, onto_corpus=None, del_originals=[]):
for
list_set
in
[
old_lists
,
new_lists
]:
for
lid
,
info
in
enumerate
(
linfos
):
list_type
=
info
[
'key'
]
# we use the fact that lids are ordered ints...
for
ng_id
in
list_set
[
list_type
]
.
items
:
if
ng_id
not
in
resolved_memberships
:
resolved_memberships
[
ng_id
]
=
lid
else
:
# ...now resolving is simply taking the max
# stop < main < map
resolved_memberships
[
ng_id
]
=
max
(
lid
,
resolved_memberships
[
ng_id
]
)
# if you don't want to merge one list just don't put it in new_lists
if
list_type
in
list_set
:
# we use the fact that lids are ordered ints...
for
ng_id
in
list_set
[
list_type
]
.
items
:
if
ng_id
not
in
resolved_memberships
:
resolved_memberships
[
ng_id
]
=
lid
else
:
# ...now resolving is simply taking the max
# stop < main < map
resolved_memberships
[
ng_id
]
=
max
(
lid
,
resolved_memberships
[
ng_id
]
)
# now each ngram is only in its most important list
# -------------------------------------------------
# NB temporarily map items are not in main anymore
...
...
@@ -714,9 +718,6 @@ def merge_ngramlists(new_lists={}, onto_corpus=None, del_originals=[]):
# after we merge the groups
del
old_lists
del
new_lists
[
'stop'
]
del
new_lists
[
'main'
]
del
new_lists
[
'map'
]
# ======== Merging old and new groups =========
# get the arcs already in the target DB (directed couples)
...
...
gargantext/views/api/ngramlists.py
View file @
ca64cc01
...
...
@@ -30,9 +30,10 @@ class List(APIView):
class
CSVLists
(
APIView
):
"""
For
CSV exports of all lists of a corpus
GET =>
CSV exports of all lists of a corpus
Or CSV import into existing lists as "patch"
POST => CSV import into existing lists as "post"
PATCH => internal import into existing lists (?POSSIBILITY put it in another class ?)
"""
def
get
(
self
,
request
):
params
=
get_parameters
(
request
)
...
...
@@ -47,23 +48,17 @@ class CSVLists(APIView):
export_ngramlists
(
corpus_node
,
fname
=
response
,
titles
=
True
)
return
response
def
post
(
self
,
request
):
"""
Merge the lists of a corpus with other lists from a CSV source
or from another corpus
params in request.GET:
corpus:
the corpus whose lists are getting patched
onto_corpus:
the corpus whose lists are getting patched
params in request.FILES:
csvsource: the csv file
or in get
dbsource: another corpus instead of the csvfile
(? this last option should perhaps not be in CSVLists ?)
NB: not using PATCH because we'll need POST file upload
csvfile: the csv file
/!
\
We assume we checked the file size client-side before upload
"""
...
...
@@ -72,7 +67,7 @@ class CSVLists(APIView):
res
.
status_code
=
401
return
res
# th
is time the corpus param is the one
with the target lists to be patched
# th
e corpus
with the target lists to be patched
params
=
get_parameters
(
request
)
corpus_id
=
int
(
params
.
pop
(
"onto_corpus"
))
corpus_node
=
cache
.
Node
[
corpus_id
]
...
...
@@ -90,6 +85,8 @@ class CSVLists(APIView):
# import the csv
try
:
new_lists
=
import_ngramlists
(
csv_file
)
print
(
"===============================!!!"
)
print
(
new_lists
)
del
csv_file
# merge the new_lists onto those of the target corpus
...
...
@@ -103,6 +100,66 @@ class CSVLists(APIView):
'err'
:
str
(
e
),
},
400
)
def
patch
(
self
,
request
):
"""
A copy of POST (merging list) but with the source == just an internal corpus_id
params in request.GET:
onto_corpus: the corpus whose lists are getting patched
from: the corpus from which we take the source lists to merge in
todo: an array of the list types ("map", "main", "stop") to merge in
"""
if
not
request
.
user
.
is_authenticated
():
res
=
HttpResponse
(
"Unauthorized"
)
res
.
status_code
=
401
return
res
params
=
get_parameters
(
request
)
print
(
params
)
# the corpus with the target lists to be patched
corpus_id
=
int
(
params
.
pop
(
"onto_corpus"
))
corpus_node
=
cache
.
Node
[
corpus_id
]
print
(
params
)
if
request
.
user
.
id
!=
corpus_node
.
user_id
:
res
=
HttpResponse
(
"Unauthorized"
)
res
.
status_code
=
401
return
res
list_types
=
{
'map'
:
'MAPLIST'
,
'main'
:
'MAINLIST'
,
'stop'
:
'STOPLIST'
}
# internal DB retrieve source_lists
source_corpus_id
=
int
(
params
.
pop
(
"from_corpus"
))
source_node
=
cache
.
Node
[
source_corpus_id
]
todo_lists
=
params
.
pop
(
"todo"
)
.
split
(
','
)
# ex: ['map', 'stop']
source_lists
=
{}
for
key
in
todo_lists
:
source_lists
[
key
]
=
UnweightedList
(
source_node
.
children
(
list_types
[
key
])
.
first
()
.
id
)
# add the groupings too
source_lists
[
'groupings'
]
=
Translations
(
source_node
.
children
(
"GROUPLIST"
)
.
first
()
.
id
)
# attempt to merge and send response
try
:
# merge the source_lists onto those of the target corpus
log_msg
=
merge_ngramlists
(
source_lists
,
onto_corpus
=
corpus_node
)
return
JsonHttpResponse
({
'log'
:
log_msg
,
},
200
)
except
Exception
as
e
:
return
JsonHttpResponse
({
'err'
:
str
(
e
),
},
400
)
class
GroupChange
(
APIView
):
...
...
gargantext/views/api/urls.py
View file @
ca64cc01
...
...
@@ -36,6 +36,7 @@ urlpatterns = [ url(r'^nodes$' , nodes.NodeListResource.as_view()
,
url
(
r'^ngramlists/import$'
,
ngramlists
.
CSVLists
.
as_view
()
)
# same handling class as export (CSVLists)
# but this route used only for POST + file
# or PATCH + other corpus id
,
url
(
r'^ngramlists/change$'
,
ngramlists
.
ListChange
.
as_view
()
)
# add or remove ngram from a list
...
...
gargantext/views/pages/terms.py
View file @
ca64cc01
...
...
@@ -22,6 +22,16 @@ def ngramtable(request, project_id, corpus_id):
# and the project just for project.id in corpusBannerTop
project
=
cache
.
Node
[
project_id
]
# retrieve all corpora of this user for list import option
# POSSIBILITY: could do same task in ajax "only if needed"
# (use api for that when merged)
corpora_infos_q
=
(
session
.
query
(
Node
.
id
,
Node
.
name
)
.
filter
(
Node
.
typename
==
"CORPUS"
)
.
filter
(
Node
.
user_id
==
project
.
user_id
))
# .filter(Node.id != corpus_id)
corpora_infos
=
corpora_infos_q
.
all
()
# rendered page : terms.html
return
render
(
template_name
=
'pages/corpora/terms.html'
,
...
...
@@ -36,6 +46,7 @@ def ngramtable(request, project_id, corpus_id):
'view'
:
'terms'
,
# for the CSV import modal
'csvimportroute'
:
"/api/ngramlists/import?onto_corpus=
%
i"
%
corpus
.
id
'importroute'
:
"/api/ngramlists/import?onto_corpus=
%
i"
%
corpus
.
id
,
'corporainfos'
:
corpora_infos
},
)
templates/pages/corpora/terms.html
View file @
ca64cc01
...
...
@@ -9,11 +9,25 @@
<style>
#formatinfos-announce
{
#corpuschoose
{
max-width
:
75%
;
}
input
[
type
=
"checkbox"
]
.importcheck
{
margin-right
:
1em
;
}
.announce
{
font-size
:
12px
;
padding-top
:
.5em
;
}
#importsubmit
{
margin-left
:
25%
;
padding
:
.3em
.5em
;
font-size
:
120%
;
}
#formatinfos
{
background-color
:
#CCC
;
font-size
:
12px
;
...
...
@@ -47,7 +61,7 @@
font-size
:
14px
;
}
#formatinfos
em
{
em
{
font-weight
:
bold
;
}
...
...
@@ -122,7 +136,7 @@
<br/>
<br/>
<!-- import icon -->
<span
class=
"
needsaveicon
glyphicon glyphicon-import"
></span>
<span
class=
"glyphicon glyphicon-import"
></span>
<button
id=
"ImportList"
class=
"btn btn-warning"
style=
"font-size:120%"
onclick=
"$('#csvimport').modal('show');"
>
...
...
@@ -176,20 +190,43 @@
<h3
id=
"myModalLabel"
>
Import a Termlist
</h3>
</div>
<div
class=
"modal-body"
id=
"uploadform"
>
<form
id=
"
csv
importform"
onsubmit=
"return
postCSV(event
)"
<form
id=
"importform"
onsubmit=
"return
submitImport(event, this
)"
enctype=
"multipart/form-data"
method=
"post"
>
{% csrf_token %}
<label>
From another corpus:
</label>
<select
id=
"corpuschoose"
name=
"corpuschoose"
>
<option
selected
value
>
-- select a corpus --
</option>
<!-- lists all corpora of this user -->
{% for corpusinfo in corporainfos %}
<option
value=
"{{corpusinfo.id}}"
>
{{corpusinfo.name}}
</option>
{% endfor %}
</select>
<br/>
<p
class=
"announce"
onclick=
"toggleAdvancedCheckboxes()"
>
<span
id=
"corpusadvanced-icon"
class=
"glyphicon glyphicon-triangle-right"
></span>
Advanced options
</p>
<div
id=
"corpusadvanced"
style=
"display:none;"
class=
"input-group"
>
<input
type=
"checkbox"
class=
"importcheck"
name=
"listtypes"
id=
"listtypes-map"
value=
"map"
checked
>
import map terms
</input>
<br/>
<input
type=
"checkbox"
class=
"importcheck"
name=
"listtypes"
id=
"listtypes-main"
value=
"main"
>
import normal terms
</input>
<br/>
<input
type=
"checkbox"
class=
"importcheck"
name=
"listtypes"
id=
"listtypes-stop"
value=
"stop"
>
import stoplist terms
</input>
</div>
<br/>
<br/>
<label>
From a CSV on your disk:
</label>
<input
type=
"file"
id=
"csvfile"
accept=
"text/csv"
>
<input
type=
"file"
id=
"csvfile"
name=
"csvfile"
accept=
"text/csv"
>
<p
id=
"formatinfos-announce
"
>
<p
class=
"announce"
onclick=
"toggleFormatInfos()
"
>
<span
id=
"formatinfos-icon"
class=
"glyphicon glyphicon-triangle-right"
onclick=
"toggleFormatInfos()"
></span>
More infos about CSV expected format
class=
"glyphicon glyphicon-triangle-right"
></span>
More info about the expected CSV format
</p>
<div
id=
"formatinfos"
style=
"display:none;"
>
<h4>
Example table
</h4>
...
...
@@ -210,12 +247,9 @@
</ul>
</div>
<br/>
<label>
From another corpus:
</label>
<p>
TODO
</p>
<br/>
<input
type=
"submit"
class=
"btn btn-xs btn-info"
id=
"csvsubmit"
value=
"Submit"
/>
<input
type=
"submit"
class=
"btn btn-xs btn-info"
id=
"importsubmit"
value=
"Import and merge with current table"
/>
</form>
</div>
<div
class=
"modal-footer"
id=
"formanswer"
></div>
...
...
@@ -227,8 +261,34 @@
<!-- custom-lib for dynatable.js and dc.js -->
<script
type=
"text/javascript"
src=
"{% static "
lib
/
gargantext
/
NGrams_dyna_chart_and_table
.
js
"
%}"
></script>
<!-- import modal controllers -->
<script
type=
"text/javascript"
>
var
formatInfosOpen
=
false
;
var
corpusAdvancedOpen
=
false
;
// declared here to enable inspection
var
myFormData
;
function
toggleAdvancedCheckboxes
()
{
// when already open => we close
if
(
corpusAdvancedOpen
)
{
// hide div
$
(
'#corpusadvanced'
).
hide
()
// change icon
$
(
'#corpusadvanced-icon'
)[
0
].
classList
.
remove
(
'glyphicon-triangle-bottom'
)
$
(
'#corpusadvanced-icon'
)[
0
].
classList
.
add
(
'glyphicon-triangle-right'
)
// toggle flag
corpusAdvancedOpen
=
false
;
}
else
{
// opposite case
$
(
'#corpusadvanced'
).
show
()
$
(
'#corpusadvanced-icon'
)[
0
].
classList
.
remove
(
'glyphicon-triangle-right'
)
$
(
'#corpusadvanced-icon'
)[
0
].
classList
.
add
(
'glyphicon-triangle-bottom'
)
corpusAdvancedOpen
=
true
;
}
}
function
toggleFormatInfos
()
{
// when already open => we close
...
...
@@ -252,29 +312,103 @@ function toggleFormatInfos() {
/* merci c24b !
* Uses
csv
importroute variable from the django template
* Uses importroute variable from the django template
* Ex: /api/ngramlists/import?onto_corpus=corpus_id
*
* Uses input#csvfile as source data.
*/
function
postCSV
(
e
){
function
submitImport
(
e
,
formElt
){
// don't do page reload of usual submits
e
.
preventDefault
()
// parse the form (allows iteration like so: for (kv of myFormData))
myFormData
=
new
FormData
(
formElt
);
// user had 2 possibilities
var
theCorpus
=
myFormData
.
get
(
"corpuschoose"
)
var
theFile
=
myFormData
.
get
(
"csvfile"
)
// console.log(theCorpus)
// console.log(theFile)
if
(
theCorpus
&&
theFile
.
name
)
{
// can't select both!
my_msg
=
"Please select a source corpus <em>or</em> choose a source file (not both!)."
}
else
if
(
theFile
.
name
)
{
listmergeCsvPost
(
theFile
)
my_msg
=
"CSV import in progress..."
}
else
if
(
theCorpus
)
{
listmergeUpdate
(
myFormData
)
my_msg
=
"Internal list import in progress..."
}
else
{
my_msg
=
"Please provide an input source!"
console
.
warn
(
'Ignoring "submit": no provided input'
)
}
$
(
'#formanswer'
).
html
(
'<p style="color:#777;font-style:italic">'
+
my_msg
+
'</p>'
);
}
function
listmergeUpdate
(
aFormData
){
// console.log(aFormData)
// get the selected source corpus
// ex: "13308"
var
sourceCorpusId
=
aFormData
.
get
(
"corpuschoose"
)
// get checkbox entries into an array
// ex: ["map", "stop"]
var
todoLists
=
aFormData
.
getAll
(
"listtypes"
)
// base url ex: /api/ngramlists/import?onto_corpus=123
var
theUrl
=
"{{importroute | safe}}"
// all params are added in the url like a GET
theUrl
+=
"&from_corpus="
+
sourceCorpusId
theUrl
+=
"&todo="
+
todoLists
.
join
(
','
)
// result url looks like this : /api/ngramlists/import?onto_corpus=2&from=13308&todo=map,stop
// console.log(theUrl)
// Update request
$
.
ajax
({
url
:
theUrl
,
type
:
'PATCH'
,
async
:
true
,
beforeSend
:
function
(
xhr
)
{
xhr
.
setRequestHeader
(
"X-CSRFToken"
,
getCookie
(
"csrftoken"
));
},
success
:
function
(
response
)
{
my_html
=
'<h3 style="color:green">IMPORT OK</h3>'
my_html
+=
"<p class='note'>"
+
response
[
'log'
].
replace
(
/
\n
/g
,
'<br/>'
)
+
"</p>"
my_html
+=
"<p'>(this page will reload in 3s)</p>"
$
(
'#formanswer'
).
html
(
my_html
);
console
.
log
(
response
)
;
// reload after 3s
setTimeout
(
"location.reload(true)"
,
3000
);
},
error
:
function
(
result
)
{
my_html
=
'<h3 style="color:red">Error</h3>'
my_html
+=
"<p>"
+
result
.
responseJSON
[
'err'
]
+
"</p>"
$
(
'#formanswer'
).
html
(
my_html
);
console
.
error
(
result
);
},
});
};
function
listmergeCsvPost
(
theFile
){
// 2MB ≈ 70000 ngrams
var
max_size
=
2097152
// we take it straight from the input element
theFile
=
$
(
'input#csvfile'
)[
0
].
files
[
0
]
// debug
// console.log(theFile.name, "size", theFile.size, theFile.lastModifiedDate)
if
(
!
theFile
)
{
console
.
warn
(
'Ignoring "submit": no provided file'
)
return
false
}
else
if
(
theFile
.
size
>
max_size
)
{
if
(
theFile
.
size
>
max_size
)
{
console
.
warn
(
'Ignoring "submit": file is too big'
)
$
(
'#formanswer'
).
html
(
'The import failed: your file is too big ('
+
max_size
/
1024
+
'kB max).'
...
...
@@ -283,13 +417,13 @@ function postCSV(e){
}
// normal case
else
{
// append into an empty form (
or fixme: initialize it using form element
)
// append into an empty form (
all other infos from old form were not for us
)
var
myFileFormData
=
new
FormData
();
myFileFormData
.
append
(
"csvfile"
,
theFile
)
//postCorpusFile
$
.
ajax
({
url
:
"{{
csv
importroute | safe}}"
,
url
:
"{{importroute | safe}}"
,
type
:
'POST'
,
async
:
true
,
contentType
:
false
,
...
...
@@ -315,7 +449,6 @@ function postCSV(e){
console
.
error
(
result
);
},
});
$
(
'#formanswer'
).
html
(
'CSV import in Progress'
);
}
};
</script>
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment