Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
C
clinicaltrials
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
david Chavalarias
clinicaltrials
Commits
b79f438f
Commit
b79f438f
authored
Jan 19, 2017
by
Romain Loth
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
WIP2 comex_shared DB (luid and hashtags in create/update user)
parent
ae898647
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
199 additions
and
129 deletions
+199
-129
table_specifications.md
doc/table_specifications.md
+1
-1
db.py
services/db.py
+145
-86
main.py
services/main.py
+52
-41
init_comex_shared.sql
setup/dockers/comex2_mysql_server/init_comex_shared.sql
+1
-1
No files found.
doc/table_specifications.md
View file @
b79f438f
...
@@ -11,7 +11,7 @@ CREATE TABLE scholars (
...
@@ -11,7 +11,7 @@ CREATE TABLE scholars (
-- local uid necessary for users who still have no doors_uid
-- local uid necessary for users who still have no doors_uid
luid int(15) not null auto_increment unique primary key,
luid int(15) not null auto_increment unique primary key,
-- doors uid common to all lab's services
-- doors uid common to all lab's services
doors_uid char(36)
not null
unique,
doors_uid char(36) unique,
last_modified_date char(24) not null,
last_modified_date char(24) not null,
email varchar(255) not null unique,
email varchar(255) not null unique,
country varchar(60) not null,
country varchar(60) not null,
...
...
services/db.py
View file @
b79f438f
...
@@ -19,7 +19,8 @@ else:
...
@@ -19,7 +19,8 @@ else:
# sorted columns as declared in DB, as a tuple
# sorted columns as declared in DB, as a tuple
USER_COLS
=
[
USER_COLS
=
[
# NAME, NOT NULL, N or MAXCHARS (if applicable)
# NAME, NOT NULL, N or MAXCHARS (if applicable)
(
"doors_uid"
,
True
,
36
),
(
"luid"
,
True
,
15
),
(
"doors_uid"
,
False
,
36
),
(
"last_modified_date"
,
True
,
24
),
(
"last_modified_date"
,
True
,
24
),
(
"email"
,
True
,
255
),
(
"email"
,
True
,
255
),
(
"country"
,
True
,
60
),
(
"country"
,
True
,
60
),
...
@@ -31,7 +32,6 @@ USER_COLS = [
...
@@ -31,7 +32,6 @@ USER_COLS = [
(
"position"
,
False
,
30
),
(
"position"
,
False
,
30
),
(
"hon_title"
,
False
,
30
),
(
"hon_title"
,
False
,
30
),
(
"interests_text"
,
False
,
1200
),
(
"interests_text"
,
False
,
1200
),
(
"community_hashtags"
,
False
,
350
),
(
"gender"
,
False
,
1
),
(
"gender"
,
False
,
1
),
(
"job_looking_date"
,
False
,
24
),
(
"job_looking_date"
,
False
,
24
),
(
"home_url"
,
False
,
120
),
(
"home_url"
,
False
,
120
),
...
@@ -49,14 +49,15 @@ ORG_COLS = [
...
@@ -49,14 +49,15 @@ ORG_COLS = [
FIELDS_FRONTEND_TO_SQL
=
{
FIELDS_FRONTEND_TO_SQL
=
{
"keywords"
:
"keywords.kwstr"
,
"keywords"
:
"keywords.kwstr"
,
"tags"
:
"hashtags.htstr"
,
"countries"
:
"scholars.country"
,
"countries"
:
"scholars.country"
,
"gender"
:
"scholars.gender"
,
"organizations"
:
"affiliations.org"
,
"organizations"
:
"affiliations.org"
,
"laboratories"
:
"affiliations.team_lab"
,
"laboratories"
:
"affiliations.team_lab"
,
"tags"
:
"scholars.community_hashtags"
,
# new
"gender"
:
"scholars.gender"
,
"cities"
:
"affiliations.org_city"
,
"cities"
:
"affiliations.org_city"
,
"linked"
:
"linked_ids.ext_id_type"
"linked"
:
"linked_ids.ext_id_type"
}
}
...
@@ -139,7 +140,7 @@ def get_field_aggs(a_field, hapax_threshold=int(REALCONFIG['HAPAX_THRESHOLD'])):
...
@@ -139,7 +140,7 @@ def get_field_aggs(a_field, hapax_threshold=int(REALCONFIG['HAPAX_THRESHOLD'])):
FROM scholars
FROM scholars
-- 0 or 1
-- 0 or 1
LEFT JOIN linked_ids
LEFT JOIN linked_ids
ON scholars.
doors_
uid = linked_ids.uid
ON scholars.
l
uid = linked_ids.uid
GROUP BY
%(col)
s
GROUP BY
%(col)
s
) AS allcounts
) AS allcounts
%(filter)
s
%(filter)
s
...
@@ -153,8 +154,8 @@ def get_field_aggs(a_field, hapax_threshold=int(REALCONFIG['HAPAX_THRESHOLD'])):
...
@@ -153,8 +154,8 @@ def get_field_aggs(a_field, hapax_threshold=int(REALCONFIG['HAPAX_THRESHOLD'])):
FROM scholars
FROM scholars
-- 0 or many
-- 0 or many
LEFT JOIN sch_kw
LEFT JOIN sch_kw
ON scholars.
doors_
uid = sch_kw.uid
ON scholars.
l
uid = sch_kw.uid
JOIN keywords
LEFT
JOIN keywords
ON sch_kw.kwid = keywords.kwid
ON sch_kw.kwid = keywords.kwid
GROUP BY
%(col)
s
GROUP BY
%(col)
s
) AS allcounts
) AS allcounts
...
@@ -162,6 +163,21 @@ def get_field_aggs(a_field, hapax_threshold=int(REALCONFIG['HAPAX_THRESHOLD'])):
...
@@ -162,6 +163,21 @@ def get_field_aggs(a_field, hapax_threshold=int(REALCONFIG['HAPAX_THRESHOLD'])):
ORDER BY occs DESC
ORDER BY occs DESC
"""
%
{
'col'
:
sql_col
,
'filter'
:
where_clause
}
"""
%
{
'col'
:
sql_col
,
'filter'
:
where_clause
}
elif
sql_tab
==
'hashtags'
:
stmt
=
"""
SELECT * FROM (
SELECT
%(col)
s AS x, COUNT(*) AS occs
FROM scholars
-- 0 or many
LEFT JOIN sch_ht
ON scholars.luid = sch_ht.uid
LEFT JOIN hashtags
ON sch_ht.htid = hashtags.htid
GROUP BY
%(col)
s
) AS allcounts
%(filter)
s
ORDER BY occs DESC
"""
%
{
'col'
:
sql_col
,
'filter'
:
where_clause
}
mlog
(
"DEBUGSQL"
,
"get_field_aggs STATEMENT:
\n
-- SQL
\n
%
s
\n
-- /SQL"
%
stmt
)
mlog
(
"DEBUGSQL"
,
"get_field_aggs STATEMENT:
\n
-- SQL
\n
%
s
\n
-- /SQL"
%
stmt
)
...
@@ -179,6 +195,9 @@ def get_field_aggs(a_field, hapax_threshold=int(REALCONFIG['HAPAX_THRESHOLD'])):
...
@@ -179,6 +195,9 @@ def get_field_aggs(a_field, hapax_threshold=int(REALCONFIG['HAPAX_THRESHOLD'])):
def
get_full_scholar
(
uid
):
def
get_full_scholar
(
uid
):
"""
"""
uid : str
local user id aka luid
Autonomous function to be used by User class
Autonomous function to be used by User class
=> Retrieves one line from *scholars* table, with joined optional concatenated *affiliations*, *keywords* and *linked_ids*
=> Retrieves one line from *scholars* table, with joined optional concatenated *affiliations*, *keywords* and *linked_ids*
=> Parse it all into a structured python user info dict
=> Parse it all into a structured python user info dict
...
@@ -195,7 +214,7 @@ def get_full_scholar(uid):
...
@@ -195,7 +214,7 @@ def get_full_scholar(uid):
one_usr_stmt
=
"""
one_usr_stmt
=
"""
SELECT
SELECT
sch_n_aff_n_kws.*,
sch_n_aff_n_kws
_n_hts
.*,
-- linked_ids info condensed
-- linked_ids info condensed
-- (format : "type1:ID1,type2:ID2,...")
-- (format : "type1:ID1,type2:ID2,...")
...
@@ -204,6 +223,15 @@ def get_full_scholar(uid):
...
@@ -204,6 +223,15 @@ def get_full_scholar(uid):
) AS linked_ids,
) AS linked_ids,
COUNT(linked_ids.ext_id) AS linked_ids_nb
COUNT(linked_ids.ext_id) AS linked_ids_nb
FROM (
SELECT
sch_n_aff_n_kws.*,
-- hts info condensed
COUNT(hashtags.htid) AS hashtags_nb,
-- GROUP_CONCAT(hashtags.htid) AS htids,
GROUP_CONCAT(hashtags.htstr) AS hashtags
FROM (
FROM (
SELECT
SELECT
sch_n_aff.*,
sch_n_aff.*,
...
@@ -215,10 +243,11 @@ def get_full_scholar(uid):
...
@@ -215,10 +243,11 @@ def get_full_scholar(uid):
FROM (
FROM (
SELECT
SELECT
scholars.*,
--
scholars.*,
-- for debug replace scholars.* by
-- for debug replace scholars.* by
-- scholars.doors_uid,
scholars.luid,
-- scholars.email,
scholars.doors_uid,
scholars.email,
-- scholars.last_modified_date,
-- scholars.last_modified_date,
-- scholars.initials,
-- scholars.initials,
...
@@ -229,25 +258,34 @@ def get_full_scholar(uid):
...
@@ -229,25 +258,34 @@ def get_full_scholar(uid):
LEFT JOIN affiliations
LEFT JOIN affiliations
ON scholars.affiliation_id = affiliations.affid
ON scholars.affiliation_id = affiliations.affid
GROUP BY doors_
uid
GROUP BY l
uid
) AS sch_n_aff
) AS sch_n_aff
-- two step JOIN for keywords
-- two step JOIN for keywords
LEFT JOIN sch_kw
LEFT JOIN sch_kw
ON sch_n_aff.doors_uid = sch_kw.
uid
ON sch_kw.uid = l
uid
LEFT JOIN keywords
LEFT JOIN keywords
ON sch_kw.kwid = keywords.kwid
ON sch_kw.kwid = keywords.kwid
GROUP BY doors_
uid
GROUP BY l
uid
) AS sch_n_aff_n_kws
) AS sch_n_aff_n_kws
-- also two step JOIN for hashtags
LEFT JOIN sch_ht
ON sch_ht.uid = luid
LEFT JOIN hashtags
ON sch_ht.htid = hashtags.htid
GROUP BY luid
) AS sch_n_aff_n_kws_n_hts
LEFT JOIN linked_ids
LEFT JOIN linked_ids
ON linked_ids.uid =
sch_n_aff_n_kws.doors_
uid
ON linked_ids.uid =
l
uid
-- WHERE our user UID
-- WHERE our user UID
WHERE
doors_
uid = "
%
s"
WHERE
l
uid = "
%
s"
GROUP BY
doors_
uid
GROUP BY
l
uid
"""
%
str
(
uid
)
"""
%
str
(
uid
)
mlog
(
"DEBUGSQL"
,
"DB get_full_scholar STATEMENT:
\n
-- SQL
\n
%
s
\n
-- /SQL"
%
one_usr_stmt
)
mlog
(
"DEBUGSQL"
,
"DB get_full_scholar STATEMENT:
\n
-- SQL
\n
%
s
\n
-- /SQL"
%
one_usr_stmt
)
...
@@ -289,16 +327,17 @@ def get_full_scholar(uid):
...
@@ -289,16 +327,17 @@ def get_full_scholar(uid):
# post-treatments
# post-treatments
# ---------------
# ---------------
# 1/ split concatenated kw lists and check correct length
# 1/ split concatenated kw an ht lists and check correct length
if
urow_dict
[
'keywords_nb'
]
==
0
:
for
toktype
in
[
'keywords'
,
'hashtags'
]:
urow_dict
[
'keywords'
]
=
[]
if
urow_dict
[
toktype
+
'_nb'
]
==
0
:
urow_dict
[
toktype
]
=
[]
else
:
else
:
kws_array
=
urow_dict
[
'keywords'
]
.
split
(
','
)
tokarray
=
urow_dict
[
toktype
]
.
split
(
','
)
if
len
(
kws_array
)
!=
urow_dict
[
'keywords
_nb'
]:
if
len
(
tokarray
)
!=
urow_dict
[
toktype
+
'
_nb'
]:
raise
ValueError
(
"Can't correctly split keywords for user
%
s"
%
uid
)
raise
ValueError
(
"Can't correctly split
%
s for user
%
s"
%
(
toktype
,
uid
)
)
else
:
else
:
urow_dict
[
'keywords'
]
=
kws_
array
urow_dict
[
toktype
]
=
tok
array
# 2/ also split and parse all linked_ids
# 2/ also split and parse all linked_ids
if
urow_dict
[
'linked_ids_nb'
]
==
0
:
if
urow_dict
[
'linked_ids_nb'
]
==
0
:
...
@@ -326,21 +365,21 @@ def get_full_scholar(uid):
...
@@ -326,21 +365,21 @@ def get_full_scholar(uid):
return
urow_dict
return
urow_dict
def
save_scholar
(
uid
,
date
,
safe_recs
,
reg_db
,
uactive
=
True
,
update_flag
=
Fals
e
):
def
save_scholar
(
safe_recs
,
reg_db
,
uactive
=
True
,
update_luid
=
Non
e
):
"""
"""
For new registration:
For new registration:
-> add to *scholars* table
-> add to *scholars* table
, return new local uid
For profile change (just
toggle update_flag to True
)
For profile change (just
pass previous local uid in update_luid
)
-> *update* scholars table
-> *update* scholars table
see also COLS variable and doc/table_specifications.md
see also COLS variable and doc/table_specifications.md
"""
"""
#
we already have the first two column
s
#
column names and column quoted value
s
db_tgtcols
=
[
'doors_uid'
,
'last_modified_date'
]
db_tgtcols
=
[]
db_qstrvals
=
[
"'"
+
str
(
uid
)
+
"'"
,
"'"
+
str
(
date
)
+
"'"
]
db_qstrvals
=
[]
actual_len_dbg
=
2
actual_len_dbg
=
0
# REMARK:
# REMARK:
# => In theory should be possible to execute(statment, values) to insert all
# => In theory should be possible to execute(statment, values) to insert all
...
@@ -353,8 +392,7 @@ def save_scholar(uid, date, safe_recs, reg_db, uactive=True, update_flag=False):
...
@@ -353,8 +392,7 @@ def save_scholar(uid, date, safe_recs, reg_db, uactive=True, update_flag=False):
# ------------- -----------
# ------------- -----------
# and then we execute(full_statmt) :-)
# and then we execute(full_statmt) :-)
for
colinfo
in
USER_COLS
:
for
colinfo
in
USER_COLS
[
2
:]:
colname
=
colinfo
[
0
]
colname
=
colinfo
[
0
]
# NB: each val already contains no quotes because of sanitize()
# NB: each val already contains no quotes because of sanitize()
...
@@ -385,7 +423,7 @@ def save_scholar(uid, date, safe_recs, reg_db, uactive=True, update_flag=False):
...
@@ -385,7 +423,7 @@ def save_scholar(uid, date, safe_recs, reg_db, uactive=True, update_flag=False):
reg_db_c
=
reg_db
.
cursor
()
reg_db_c
=
reg_db
.
cursor
()
if
not
update_
flag
:
if
not
update_
luid
:
# expected colnames "(doors_uid, last_modified_date, email, ...)"
# expected colnames "(doors_uid, last_modified_date, email, ...)"
db_tgtcols_str
=
','
.
join
(
db_tgtcols
)
db_tgtcols_str
=
','
.
join
(
db_tgtcols
)
...
@@ -404,63 +442,82 @@ def save_scholar(uid, date, safe_recs, reg_db, uactive=True, update_flag=False):
...
@@ -404,63 +442,82 @@ def save_scholar(uid, date, safe_recs, reg_db, uactive=True, update_flag=False):
set_full_str
=
','
.
join
([
db_tgtcols
[
i
]
+
'='
+
db_qstrvals
[
i
]
for
i
in
range
(
len
(
db_tgtcols
))])
set_full_str
=
','
.
join
([
db_tgtcols
[
i
]
+
'='
+
db_qstrvals
[
i
]
for
i
in
range
(
len
(
db_tgtcols
))])
# UPDATE: full_statement with formated values
# UPDATE: full_statement with formated values
full_statmt
=
'UPDATE scholars SET
%
s WHERE
doors_
uid = "
%
s"'
%
(
full_statmt
=
'UPDATE scholars SET
%
s WHERE
l
uid = "
%
s"'
%
(
set_full_str
,
set_full_str
,
uid
u
pdate_lu
id
)
)
mlog
(
"DEBUG"
,
"UPDATE"
if
update_
flag
else
"INSERT"
,
"SQL statement:"
,
full_statmt
)
mlog
(
"DEBUG"
,
"UPDATE"
if
update_
luid
else
"INSERT"
,
"SQL statement:"
,
full_statmt
)
reg_db_c
.
execute
(
full_statmt
)
reg_db_c
.
execute
(
full_statmt
)
if
not
update_luid
:
luid
=
reg_db_c
.
lastrowid
else
:
luid
=
update_luid
reg_db
.
commit
()
reg_db
.
commit
()
return
luid
def
save_pairs_sch_
kw
(
pairings_list
,
comex_db
):
def
save_pairs_sch_
tok
(
pairings_list
,
comex_db
,
map_table
=
'sch_kw'
):
"""
"""
Simply save all pairings (
uid, kw
id) in the list
Simply save all pairings (
luid, kwid) or (luid, ht
id) in the list
"""
"""
db_cursor
=
comex_db
.
cursor
()
db_cursor
=
comex_db
.
cursor
()
for
id_pair
in
pairings_list
:
for
id_pair
in
pairings_list
:
db_cursor
.
execute
(
'INSERT INTO
sch_kw VALUES
%
s'
%
str
(
id_pair
))
db_cursor
.
execute
(
'INSERT INTO
%
s VALUES
%
s'
%
(
map_table
,
str
(
id_pair
)
))
comex_db
.
commit
()
comex_db
.
commit
()
mlog
(
"DEBUG"
,
"
Keywords: saved
%
s pair"
%
str
(
id_pair
))
mlog
(
"DEBUG"
,
"
%
s: saved
%
s pair"
%
(
map_table
,
str
(
id_pair
)
))
def
delete_pairs_sch_
kw
(
uid
,
comex_db
):
def
delete_pairs_sch_
tok
(
uid
,
comex_db
,
map_table
=
'sch_kw'
):
"""
"""
Simply deletes all pairings (uid, *) in the table
Simply deletes all pairings (
l
uid, *) in the table
"""
"""
if
map_table
not
in
[
'sch_kw'
,
'sch_ht'
]:
raise
TypeError
(
'ERROR: Unknown map_table'
)
db_cursor
=
comex_db
.
cursor
()
db_cursor
=
comex_db
.
cursor
()
n
=
db_cursor
.
execute
(
'DELETE FROM
sch_kw WHERE uid = "
%
s"'
%
uid
)
n
=
db_cursor
.
execute
(
'DELETE FROM
%
s WHERE uid = "
%
s"'
%
(
map_table
,
uid
)
)
comex_db
.
commit
()
comex_db
.
commit
()
mlog
(
"DEBUG"
,
"
Keywords: DELETED
%
i pairings for
%
s"
%
(
n
,
str
(
uid
)))
mlog
(
"DEBUG"
,
"
%
s: DELETED
%
i pairings for
%
s"
%
(
map_table
,
n
,
str
(
uid
)))
def
get_or_create_
keywords
(
kw_list
,
comex_db
):
def
get_or_create_
tokitems
(
tok_list
,
comex_db
,
tok_table
=
'keywords'
):
"""
"""
kw_str -> lookup/add to *keywords* table -> kw_id
kw_str -> lookup/add to *keywords* table -> kw_id
ht_str -> lookup/add to *hashtags* table -> ht_id
-------------------------------------------------
-------------------------------------------------
kw
_list is an array of strings
tok
_list is an array of strings
NB keywords are mandatory: each registration should provide at least MIN_KW
NB keywords are mandatory: each registration should provide at least MIN_KW
hashtags aren't
for loop
for loop
1) query to *keywords* table (exact match)
1) query to *keywords*
or *hashtags*
table (exact match)
2) return id
2) return id
=> if a keyword
matches return kw
id
=> if a keyword
/tag matches return kwid/ht
id
=> if no keyword
matches create new and return kw
id
=> if no keyword
/tag matches create new and return kwid/ht
id
"""
"""
# sql names
fill
=
{
'tb'
:
tok_table
}
if
tok_table
==
'keywords'
:
fill
[
'idc'
]
=
'kwid'
fill
[
'strc'
]
=
'kwstr'
elif
tok_table
==
'hashtags'
:
fill
[
'idc'
]
=
'htid'
fill
[
'strc'
]
=
'htstr'
db_cursor
=
comex_db
.
cursor
()
db_cursor
=
comex_db
.
cursor
()
found_ids
=
[]
found_ids
=
[]
for
kw_str
in
kw
_list
:
for
tok_str
in
tok
_list
:
# TODO better string normalization here or in read_record
# TODO better string normalization here or in read_record
kw_str
=
kw_str
.
lower
()
tok_str
=
tok_str
.
lower
()
fill
[
'q'
]
=
tok_str
n_matched
=
db_cursor
.
execute
(
'SELECT kwid FROM keywords WHERE kwstr = "
%
s"'
%
kw_str
)
# ex: SELECT kwid FROM keywords WHERE kwstr = "complexity"
n_matched
=
db_cursor
.
execute
(
'SELECT
%(idc)
s FROM
%(tb)
s WHERE
%(strc)
s = "
%(q)
s"'
%
fill
)
# ok existing keyword => row id
# ok existing keyword => row id
if
n_matched
==
1
:
if
n_matched
==
1
:
...
@@ -468,15 +525,17 @@ def get_or_create_keywords(kw_list, comex_db):
...
@@ -468,15 +525,17 @@ def get_or_create_keywords(kw_list, comex_db):
# no matching keyword => add => row id
# no matching keyword => add => row id
elif
n_matched
==
0
:
elif
n_matched
==
0
:
db_cursor
.
execute
(
'INSERT INTO keywords(kwstr) VALUES ("
%
s")'
%
kw_str
)
# ex: INSERT INTO keywords(kwstr) VALUES ("complexity")
db_cursor
.
execute
(
'INSERT INTO
%(tb)
s(
%(strc)
s) VALUES ("
%(q)
s")'
%
fill
)
comex_db
.
commit
()
comex_db
.
commit
()
mlog
(
"INFO"
,
"Added
keyword '
%
s'"
%
kw_str
)
mlog
(
"INFO"
,
"Added
'
%
s' to
%
s table"
%
(
tok_str
,
tok_table
)
)
found_ids
.
append
(
db_cursor
.
lastrowid
)
found_ids
.
append
(
db_cursor
.
lastrowid
)
else
:
else
:
raise
Exception
(
"ERROR: non-unique
keyword '
%
s'"
%
kw
_str
)
raise
Exception
(
"ERROR: non-unique
token '
%
s'"
%
tok
_str
)
return
found_ids
return
found_ids
...
@@ -489,9 +548,11 @@ def get_or_create_affiliation(org_info, comex_db):
...
@@ -489,9 +548,11 @@ def get_or_create_affiliation(org_info, comex_db):
1) query to *affiliations* table
1) query to *affiliations* table
2) return id
2) return id
=> TODO if institution almost matches send suggestion
=> TODO if institution almost matches send suggestion
=>
TODO unicity constraint on institution + lab
=>
unicity constraint on institution + lab + org_type
=> if an institution matches return affid
=> if an institution matches return affid
=> if no institution matches create new and return affid
=> if no institution matches create new and return affid
TODO test more
"""
"""
the_aff_id
=
None
the_aff_id
=
None
...
@@ -512,10 +573,8 @@ def get_or_create_affiliation(org_info, comex_db):
...
@@ -512,10 +573,8 @@ def get_or_create_affiliation(org_info, comex_db):
db_qstrvals
.
append
(
quotedstrval
)
db_qstrvals
.
append
(
quotedstrval
)
# for select
# for select
if
colname
!=
'org_type'
:
db_constraints
.
append
(
"
%
s =
%
s"
%
(
colname
,
quotedstrval
))
db_constraints
.
append
(
"
%
s =
%
s"
%
(
colname
,
quotedstrval
))
else
:
else
:
if
colname
!=
'org_type'
:
db_constraints
.
append
(
"
%
s IS NULL"
%
colname
)
db_constraints
.
append
(
"
%
s IS NULL"
%
colname
)
db_cursor
=
comex_db
.
cursor
()
db_cursor
=
comex_db
.
cursor
()
...
...
services/main.py
View file @
b79f438f
...
@@ -43,7 +43,7 @@ if __package__ == 'services':
...
@@ -43,7 +43,7 @@ if __package__ == 'services':
from
services.user
import
User
,
login_manager
,
doors_login
,
UCACHE
from
services.user
import
User
,
login_manager
,
doors_login
,
UCACHE
from
services.text
import
keywords
from
services.text
import
keywords
from
services.tools
import
restparse
,
mlog
,
re_hash
,
REALCONFIG
from
services.tools
import
restparse
,
mlog
,
re_hash
,
REALCONFIG
from
services.db
import
connect_db
,
get_or_create_
keywords
,
save_pairs_sch_kw
,
delete_pairs_sch_kw
,
get_or_create_affiliation
,
save_scholar
,
get_field_aggs
from
services.db
import
connect_db
,
get_or_create_
tokitems
,
save_pairs_sch_tok
,
delete_pairs_sch_tok
,
get_or_create_affiliation
,
save_scholar
,
get_field_aggs
from
services.db_to_tina_api.extractDataCustom
import
MyExtractor
as
MySQL
from
services.db_to_tina_api.extractDataCustom
import
MyExtractor
as
MySQL
else
:
else
:
# when this script is run directly
# when this script is run directly
...
@@ -51,7 +51,7 @@ else:
...
@@ -51,7 +51,7 @@ else:
from
user
import
User
,
login_manager
,
doors_login
,
UCACHE
from
user
import
User
,
login_manager
,
doors_login
,
UCACHE
from
text
import
keywords
from
text
import
keywords
from
tools
import
restparse
,
mlog
,
re_hash
,
REALCONFIG
from
tools
import
restparse
,
mlog
,
re_hash
,
REALCONFIG
from
db
import
connect_db
,
get_or_create_
keywords
,
save_pairs_sch_kw
,
delete_pairs_sch_kw
,
get_or_create_affiliation
,
save_scholar
,
get_field_aggs
from
db
import
connect_db
,
get_or_create_
tokitems
,
save_pairs_sch_tok
,
delete_pairs_sch_tok
,
get_or_create_affiliation
,
save_scholar
,
get_field_aggs
from
db_to_tina_api.extractDataCustom
import
MyExtractor
as
MySQL
from
db_to_tina_api.extractDataCustom
import
MyExtractor
as
MySQL
# ============= app creation ============
# ============= app creation ============
...
@@ -61,7 +61,7 @@ app = Flask("services",
...
@@ -61,7 +61,7 @@ app = Flask("services",
static_folder
=
path
.
join
(
config
[
'HOME'
],
"static"
),
static_folder
=
path
.
join
(
config
[
'HOME'
],
"static"
),
template_folder
=
path
.
join
(
config
[
'HOME'
],
"templates"
))
template_folder
=
path
.
join
(
config
[
'HOME'
],
"templates"
))
app
.
config
[
'DEBUG'
]
=
(
config
[
'LOG_LEVEL'
]
==
"DEBUG"
)
app
.
config
[
'DEBUG'
]
=
(
config
[
'LOG_LEVEL'
]
in
[
"DEBUG"
,
"DEBUGSQL"
]
)
app
.
config
[
'SECRET_KEY'
]
=
'TODO fill secret key for sessions for login'
app
.
config
[
'SECRET_KEY'
]
=
'TODO fill secret key for sessions for login'
# for SSL
# for SSL
...
@@ -82,6 +82,7 @@ login_manager.init_app(app)
...
@@ -82,6 +82,7 @@ login_manager.init_app(app)
# all inputs as they are declared in form, as a couple
# all inputs as they are declared in form, as a couple
SOURCE_FIELDS
=
[
SOURCE_FIELDS
=
[
# NAME, SANITIZE?
# NAME, SANITIZE?
(
"luid"
,
False
),
(
"doors_uid"
,
False
),
(
"doors_uid"
,
False
),
(
"last_modified_date"
,
False
),
# ex 2016-11-16T17:47:07.308Z
(
"last_modified_date"
,
False
),
# ex 2016-11-16T17:47:07.308Z
(
"email"
,
True
),
(
"email"
,
True
),
...
@@ -95,8 +96,7 @@ SOURCE_FIELDS = [
...
@@ -95,8 +96,7 @@ SOURCE_FIELDS = [
(
"position"
,
True
),
(
"position"
,
True
),
(
"hon_title"
,
True
),
(
"hon_title"
,
True
),
(
"interests_text"
,
True
),
(
"interests_text"
,
True
),
(
"community_hashtags"
,
True
),
(
"gender"
,
False
),
# M|F
(
"gender"
,
True
),
# M|F
(
"job_looking_date"
,
True
),
# def null: not looking for a job
(
"job_looking_date"
,
True
),
# def null: not looking for a job
(
"home_url"
,
True
),
# scholar's homepage
(
"home_url"
,
True
),
# scholar's homepage
(
"pic_url"
,
True
),
(
"pic_url"
,
True
),
...
@@ -110,8 +110,11 @@ SOURCE_FIELDS = [
...
@@ -110,8 +110,11 @@ SOURCE_FIELDS = [
(
"org_city"
,
True
),
(
"org_city"
,
True
),
# => for *affiliations* table
# => for *affiliations* table
(
"keywords"
,
True
)
(
"keywords"
,
True
)
,
# => for *keywords* table (after split str)
# => for *keywords* table (after split str)
(
"community_hashtags"
,
True
)
# => for *hashtags* table (after split str)
]
]
# NB password values have already been sent by ajax to Doors
# NB password values have already been sent by ajax to Doors
...
@@ -442,10 +445,11 @@ def save_form(request_form, request_files, update_flag=False):
...
@@ -442,10 +445,11 @@ def save_form(request_form, request_files, update_flag=False):
"""
"""
# only safe values
# only safe values
clean_records
=
{}
clean_records
=
{}
kw_array
=
[]
# 1) handles all the inputs from form, no matter what target table
# 1) handles all the inputs from form, no matter what target table
(
duuid
,
rdate
,
kw_array
,
clean_records
)
=
read_record
(
request_form
)
clean_records
=
read_record
(
request_form
)
mlog
(
"DEBUG"
,
"===== clean_records ====="
,
clean_records
)
# 2) handles the pic_file if present
# 2) handles the pic_file if present
if
'pic_file'
in
request_files
:
if
'pic_file'
in
request_files
:
...
@@ -461,27 +465,41 @@ def save_form(request_form, request_files, update_flag=False):
...
@@ -461,27 +465,41 @@ def save_form(request_form, request_files, update_flag=False):
# B) read/fill the affiliation table to get associated id
# B) read/fill the affiliation table to get associated id
clean_records
[
'affiliation_id'
]
=
get_or_create_affiliation
(
clean_records
,
reg_db
)
clean_records
[
'affiliation_id'
]
=
get_or_create_affiliation
(
clean_records
,
reg_db
)
# C) create record into the primary user table
# C) create
/update
record into the primary user table
# ---------------------------------------------
# ---------------------------------------------
-------
# TODO class User method !!
# TODO class User method !!
save_scholar
(
duuid
,
rdate
,
clean_records
,
reg_db
,
update_flag
=
update_flag
)
luid
=
None
if
update_flag
:
luid
=
clean_records
[
'luid'
]
save_scholar
(
clean_records
,
reg_db
,
update_luid
=
luid
)
else
:
luid
=
save_scholar
(
clean_records
,
reg_db
)
# D) read/fill each keyword and save the (uid <=> kwid) pairings
# D) read/fill each keyword and save the (uid <=> kwid) pairings
kwids
=
get_or_create_keywords
(
kw_array
,
reg_db
)
# read/fill each hashtag and save the (uid <=> htid) pairings
for
intables
in
[[
'keywords'
,
'keywords'
,
'sch_kw'
],
[
'community_hashtags'
,
'hashtags'
,
'sch_ht'
]]:
tok_field
=
intables
[
0
]
if
tok_field
in
clean_records
:
tok_table
=
intables
[
1
]
map_table
=
intables
[
2
]
tokids
=
get_or_create_tokitems
(
clean_records
[
tok_field
],
reg_db
,
tok_table
)
# TODO class User method !!
# TODO class User method !!
# POSS selective delete ?
# POSS selective delete ?
if
update_flag
:
if
update_flag
:
delete_pairs_sch_kw
(
duuid
,
reg_db
)
delete_pairs_sch_tok
(
luid
,
reg_db
,
map_table
)
save_pairs_sch_kw
([(
duuid
,
kwid
)
for
kwid
in
kwids
],
reg_db
)
save_pairs_sch_tok
([(
luid
,
tokid
)
for
tokid
in
tokids
],
reg_db
,
map_table
)
# F) end connection
reg_db
.
close
()
# clear cache concerning this scholar
# clear cache concerning this scholar
# TODO class User method !!
# TODO class User method !!
if
duuid
in
UCACHE
:
UCACHE
.
pop
(
duuid
)
if
luid
in
UCACHE
:
UCACHE
.
pop
(
luid
)
# E) end connection
reg_db
.
close
()
return
clean_records
return
clean_records
...
@@ -492,14 +510,9 @@ def read_record(incoming_data):
...
@@ -492,14 +510,9 @@ def read_record(incoming_data):
- custom made for regcomex/templates/base_form
- custom made for regcomex/templates/base_form
- uses SOURCE_FIELDS
- uses SOURCE_FIELDS
"""
"""
# init var
# init var
clean_records
=
{}
clean_records
=
{}
# read in + sanitize values
duuid
=
None
rdate
=
None
# we should have all the mandatory fields (checked in client-side js)
# we should have all the mandatory fields (checked in client-side js)
# TODO recheck b/c if post comes from elsewhere
# TODO recheck b/c if post comes from elsewhere
for
field_info
in
SOURCE_FIELDS
:
for
field_info
in
SOURCE_FIELDS
:
...
@@ -513,31 +526,29 @@ def read_record(incoming_data):
...
@@ -513,31 +526,29 @@ def read_record(incoming_data):
else
:
else
:
# mysql will want None instead of ''
# mysql will want None instead of ''
val
=
None
val
=
None
# these 2 fields already validated and useful separately
elif
field
==
'doors_uid'
:
duuid
=
incoming_data
[
field
]
elif
field
==
'last_modified_date'
:
rdate
=
incoming_data
[
field
]
# any other fields that don't need sanitization (ex: menu options)
# any other fields that don't need sanitization (ex: menu options)
else
:
else
:
clean_records
[
field
]
=
incoming_data
[
field
]
clean_records
[
field
]
=
incoming_data
[
field
]
# special treatment for "other" subquestions
# special treatment for "other" subquestions
if
'org_type'
in
clean_records
:
if
'org_type'
in
clean_records
:
if
clean_records
[
'org_type'
]
==
'other'
and
'other_org_type'
in
clean_records
:
if
clean_records
[
'org_type'
]
==
'other'
and
'other_org_type'
in
clean_records
:
clean_records
[
'org_type'
]
=
clean_records
[
'other_org_type'
]
clean_records
[
'org_type'
]
=
clean_records
[
'other_org_type'
]
# split for kw_array
# splits for kw_array and ht_array
kw_array
=
[]
for
tok_field
in
[
'keywords'
,
'community_hashtags'
]:
if
'keywords'
in
clean_records
:
if
tok_field
in
clean_records
:
for
kw
in
clean_records
[
'keywords'
]
.
split
(
','
):
print
(
tok_field
,
"in clean_records"
)
kw
=
sanitize
(
kw
)
temp_array
=
[]
if
kw
!=
''
:
for
tok
in
clean_records
[
tok_field
]
.
split
(
','
):
kw_array
.
append
(
kw
)
tok
=
sanitize
(
tok
)
if
tok
!=
''
:
temp_array
.
append
(
tok
)
# replace str by array
clean_records
[
tok_field
]
=
temp_array
return
(
duuid
,
rdate
,
kw_array
,
clean_records
)
return
clean_records
# TODO move to text submodules
# TODO move to text submodules
...
...
setup/dockers/comex2_mysql_server/init_comex_shared.sql
View file @
b79f438f
...
@@ -11,7 +11,7 @@ CREATE TABLE scholars (
...
@@ -11,7 +11,7 @@ CREATE TABLE scholars (
-- local uid necessary for users who still have no doors_uid
-- local uid necessary for users who still have no doors_uid
luid
int
(
15
)
not
null
auto_increment
unique
primary
key
,
luid
int
(
15
)
not
null
auto_increment
unique
primary
key
,
-- doors uid common to all lab's services
-- doors uid common to all lab's services
doors_uid
char
(
36
)
not
null
unique
,
doors_uid
char
(
36
)
unique
,
last_modified_date
char
(
24
)
not
null
,
last_modified_date
char
(
24
)
not
null
,
email
varchar
(
255
)
not
null
unique
,
email
varchar
(
255
)
not
null
unique
,
country
varchar
(
60
)
not
null
,
country
varchar
(
60
)
not
null
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment