Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
humanities
gargantext
Commits
c03f17e2
Commit
c03f17e2
authored
Oct 03, 2016
by
Romain Loth
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[FIX] import termlist: was blocking on empty columns (BUG-7)
parent
d1460066
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
10 additions
and
2 deletions
+10
-2
ngramlists_tools.py
gargantext/util/ngramlists_tools.py
+10
-2
No files found.
gargantext/util/ngramlists_tools.py
View file @
c03f17e2
...
@@ -462,6 +462,9 @@ def import_ngramlists(fname, delimiter=DEFAULT_CSV_DELIM,
...
@@ -462,6 +462,9 @@ def import_ngramlists(fname, delimiter=DEFAULT_CSV_DELIM,
for
j
,
colname
in
enumerate
(
csv_row
):
for
j
,
colname
in
enumerate
(
csv_row
):
if
colname
in
[
'label'
,
'status'
,
'forms'
]:
if
colname
in
[
'label'
,
'status'
,
'forms'
]:
columns
[
colname
]
=
j
columns
[
colname
]
=
j
# skip empty columns
elif
match
(
r'^\s*$'
,
colname
):
pass
else
:
else
:
raise
ValueError
(
'Wrong header "
%
s" on line
%
i (only possible headers are "label", "forms" and "status")'
%
(
colname
,
n_read_lines
))
raise
ValueError
(
'Wrong header "
%
s" on line
%
i (only possible headers are "label", "forms" and "status")'
%
(
colname
,
n_read_lines
))
if
'label'
not
in
columns
:
if
'label'
not
in
columns
:
...
@@ -548,7 +551,9 @@ def import_ngramlists(fname, delimiter=DEFAULT_CSV_DELIM,
...
@@ -548,7 +551,9 @@ def import_ngramlists(fname, delimiter=DEFAULT_CSV_DELIM,
imported_ngrams_dbdata
.
append
((
ngram_str
,
n_words
))
imported_ngrams_dbdata
.
append
((
ngram_str
,
n_words
))
# returns a dict {term => id} and a count of inserted ones
# returns a dict {term => id} and a count of inserted ones
# -------------------------
(
new_ngrams_ids
,
n_added_ng
)
=
bulk_insert_ifnotexists
(
(
new_ngrams_ids
,
n_added_ng
)
=
bulk_insert_ifnotexists
(
# -------------------------
model
=
Ngram
,
model
=
Ngram
,
uniquekey
=
'terms'
,
uniquekey
=
'terms'
,
fields
=
(
'terms'
,
'n'
),
fields
=
(
'terms'
,
'n'
),
...
@@ -612,7 +617,7 @@ def merge_ngramlists(new_lists={}, onto_corpus=None, del_originals=[]):
...
@@ -612,7 +617,7 @@ def merge_ngramlists(new_lists={}, onto_corpus=None, del_originals=[]):
- resolves conflicts if terms belong in different lists
- resolves conflicts if terms belong in different lists
> map wins over both other types
> map wins over both other types
> main wins over stop
> main wins over stop
> stop never wins
> stop never wins
£TODO STOP wins over candidates from main
@param new_lists: a dict of *new* imported lists with format:
@param new_lists: a dict of *new* imported lists with format:
{'stop': UnweightedList,
{'stop': UnweightedList,
...
@@ -667,7 +672,10 @@ def merge_ngramlists(new_lists={}, onto_corpus=None, del_originals=[]):
...
@@ -667,7 +672,10 @@ def merge_ngramlists(new_lists={}, onto_corpus=None, del_originals=[]):
for
ng_id
in
new_lists
[
list_type
]
.
items
:
for
ng_id
in
new_lists
[
list_type
]
.
items
:
collect
(
ng_id
)
collect
(
ng_id
)
from
gargantext.util.toolchain.main
import
t
print
(
"MERGE DEBUG: starting index_new_ngrams"
,
t
())
n_added
=
index_new_ngrams
(
all_possibly_new_ngram_ids
,
onto_corpus
)
n_added
=
index_new_ngrams
(
all_possibly_new_ngram_ids
,
onto_corpus
)
print
(
"MERGE DEBUG: finished index_new_ngrams"
,
t
())
my_log
.
append
(
"MERGE: added
%
i new ngram occurrences in docs"
%
n_added
)
my_log
.
append
(
"MERGE: added
%
i new ngram occurrences in docs"
%
n_added
)
...
@@ -677,7 +685,7 @@ def merge_ngramlists(new_lists={}, onto_corpus=None, del_originals=[]):
...
@@ -677,7 +685,7 @@ def merge_ngramlists(new_lists={}, onto_corpus=None, del_originals=[]):
# DB nodes stored with same indices 0,1,2 (resp. stop, miam and map)
# DB nodes stored with same indices 0,1,2 (resp. stop, miam and map)
# find target ids of the list node objects
# find target ids of the list node objects
tgt_nodeids
=
[
tgt_nodeids
=
[
onto_corpus
.
children
(
"STOPLIST"
)
.
first
()
.
id
,
onto_corpus
.
children
(
"STOPLIST"
)
.
first
()
.
id
,
# £todo via parent project?
onto_corpus
.
children
(
"MAINLIST"
)
.
first
()
.
id
,
onto_corpus
.
children
(
"MAINLIST"
)
.
first
()
.
id
,
onto_corpus
.
children
(
"MAPLIST"
)
.
first
()
.
id
onto_corpus
.
children
(
"MAPLIST"
)
.
first
()
.
id
]
]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment