Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
C
clinicaltrials
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
david Chavalarias
clinicaltrials
Commits
990a5aed
Commit
990a5aed
authored
Jan 05, 2017
by
Romain Loth
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add a simple aggregations API
parent
ed8ea3da
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
156 additions
and
17 deletions
+156
-17
parametres_comex.dev.ini.default
config/parametres_comex.dev.ini.default
+4
-0
parametres_comex.ini
config/parametres_comex.ini
+4
-0
parametres_comex.prod.ini.default
config/parametres_comex.prod.ini.default
+4
-0
db.py
services/db.py
+118
-0
extractDataCustom.py
services/db_to_tina_api/extractDataCustom.py
+4
-10
main.py
services/main.py
+15
-2
tools.py
services/tools.py
+4
-1
comex_page_reg_controllers.js
static/js/comex_page_reg_controllers.js
+3
-4
No files found.
config/parametres_comex.dev.ini.default
View file @
990a5aed
...
...
@@ -26,3 +26,7 @@ SQL_PORT=3306
#; (used only in ajax context of sending credentials) #; TODO SSL
DOORS_HOST=172.18.0.3
DOORS_PORT=8989
[content]
# used for all aggregations queries
HAPAX_THRESHOLD = 0
config/parametres_comex.ini
View file @
990a5aed
...
...
@@ -26,3 +26,7 @@ SQL_PORT=3306
#; (used only in ajax context of sending credentials) #; TODO SSL
DOORS_HOST
=
134.158.75.71
DOORS_PORT
=
80
[content]
# used for all aggregations queries
HAPAX_THRESHOLD
=
0
config/parametres_comex.prod.ini.default
View file @
990a5aed
...
...
@@ -26,3 +26,7 @@ SQL_PORT=3306
#; (used only in ajax context of sending credentials) #; TODO SSL
DOORS_HOST=134.158.75.71
DOORS_PORT=80
[content]
# used for all aggregations queries
HAPAX_THRESHOLD = 0
services/db.py
View file @
990a5aed
...
...
@@ -46,6 +46,21 @@ ORG_COLS = [
(
"org_city"
,
False
,
50
)
]
FIELDS_FRONTEND_TO_SQL
=
{
"keywords"
:
"keywords.kwstr"
,
"countries"
:
"scholars.country"
,
"organizations"
:
"affiliations.org"
,
"laboratories"
:
"affiliations.team_lab"
,
"tags"
:
"scholars.community_hashtags"
,
# new
"gender"
:
"scholars.gender"
,
"cities"
:
"affiliations.org_city"
,
"linked"
:
"linked_ids.ext_id_type"
}
def
connect_db
(
config
=
REALCONFIG
):
"""
Simple connection
...
...
@@ -60,6 +75,109 @@ def connect_db(config=REALCONFIG):
db
=
"comex_shared"
)
def
get_field_aggs
(
a_field
,
hapax_threshold
=
int
(
REALCONFIG
[
'HAPAX_THRESHOLD'
])):
"""
Use case: api/aggs?field=a_field
=> Retrieves distinct field values and count having it
=> about *n* vs *occs*:
- for tables != keywords count is scholar count
- for table keywords count is occurrences count
NB relies on FIELDS_FRONTEND_TO_SQL mapping
POSS: allow other fields than those in the mapping
if they are already in sql table.col format?
"""
agg_rows
=
[]
if
a_field
in
FIELDS_FRONTEND_TO_SQL
:
sql_col
=
FIELDS_FRONTEND_TO_SQL
[
a_field
]
sql_tab
=
sql_col
.
split
(
'.'
)[
0
]
mlog
(
'DEBUG'
,
"AGG API sql_col"
,
sql_col
)
mlog
(
'DEBUG'
,
"AGG API sql_tab"
,
sql_tab
)
db
=
connect_db
()
db_c
=
db
.
cursor
(
DictCursor
)
if
type
(
hapax_threshold
)
==
int
and
hapax_threshold
>
0
:
count_col
=
'occs'
if
sql_tab
==
'keywords'
else
'n'
where_clause
=
"WHERE
%
s >
%
i"
%
(
count_col
,
hapax_threshold
)
else
:
where_clause
=
""
if
sql_tab
==
'scholars'
:
stmt
=
"""
SELECT * FROM (
SELECT
%(col)
s AS x, COUNT(*) AS n
FROM scholars
GROUP BY
%(col)
s
) AS allcounts
%(filter)
s
ORDER BY n DESC
"""
%
{
'col'
:
sql_col
,
'filter'
:
where_clause
}
elif
sql_tab
==
'affiliations'
:
stmt
=
"""
SELECT * FROM (
SELECT
%(col)
s AS x, COUNT(*) AS n
FROM scholars
-- 0 or 1
LEFT JOIN affiliations
ON scholars.affiliation_id = affiliations.affid
GROUP BY
%(col)
s
) AS allcounts
%(filter)
s
ORDER BY n DESC
"""
%
{
'col'
:
sql_col
,
'filter'
:
where_clause
}
elif
sql_tab
==
'linked_ids'
:
stmt
=
"""
SELECT * FROM (
SELECT
%(col)
s AS x, COUNT(*) AS n
FROM scholars
-- 0 or 1
LEFT JOIN linked_ids
ON scholars.doors_uid = linked_ids.uid
GROUP BY
%(col)
s
) AS allcounts
%(filter)
s
ORDER BY n DESC
"""
%
{
'col'
:
sql_col
,
'filter'
:
where_clause
}
elif
sql_tab
==
'keywords'
:
stmt
=
"""
SELECT * FROM (
SELECT
%(col)
s AS x, COUNT(*) AS occs
FROM scholars
-- 0 or many
LEFT JOIN sch_kw
ON scholars.doors_uid = sch_kw.uid
JOIN keywords
ON sch_kw.kwid = keywords.kwid
GROUP BY
%(col)
s
) AS allcounts
%(filter)
s
ORDER BY occs DESC
"""
%
{
'col'
:
sql_col
,
'filter'
:
where_clause
}
mlog
(
"DEBUG"
,
"get_field_aggs STATEMENT:
\n
-- SQL
\n
%
s
\n
-- /SQL"
%
stmt
)
# do it
n_rows
=
db_c
.
execute
(
stmt
)
if
n_rows
>
0
:
agg_rows
=
db_c
.
fetchall
()
db
.
close
()
mlog
(
'INFO'
,
agg_rows
)
return
agg_rows
def
get_full_scholar
(
uid
):
"""
Autonomous function to be used by User class
...
...
services/db_to_tina_api/extractDataCustom.py
View file @
990a5aed
...
...
@@ -10,18 +10,12 @@ from .converter import CountryConverter
if
__package__
==
"services.db_to_tina_api"
:
from
services.tools
import
mlog
from
services.db
import
FIELDS_FRONTEND_TO_SQL
else
:
from
tools
import
mlog
from
db
import
FIELDS_FRONTEND_TO_SQL
whoswhofilters_to_sqlnames
=
{
"keywords"
:
"keywords.kwstr"
,
"countries"
:
"scholars.country"
,
"organizations"
:
"affiliations.org"
,
"laboratories"
:
"affiliations.team_lab"
,
"tags"
:
"scholars.community_hashtags"
}
class
MyExtractor
:
def
__init__
(
self
,
dbhost
):
...
...
@@ -147,11 +141,11 @@ class MyExtractor:
known_filter
=
None
sql_column
=
None
if
key
not
in
whoswhofilters_to_sqlnames
:
if
key
not
in
FIELDS_FRONTEND_TO_SQL
:
continue
else
:
known_filter
=
key
sql_column
=
whoswhofilters_to_sqlnames
[
key
]
sql_column
=
FIELDS_FRONTEND_TO_SQL
[
key
]
val
=
filter_dict
[
known_filter
]
...
...
services/main.py
View file @
990a5aed
...
...
@@ -36,7 +36,7 @@ if __package__ == 'services':
from
services.user
import
User
,
login_manager
,
doors_login
from
services.text
import
keywords
from
services.tools
import
restparse
,
mlog
,
re_hash
,
REALCONFIG
from
services.db
import
connect_db
,
get_or_create_keywords
,
save_pairs_sch_kw
,
get_or_create_affiliation
,
save_scholar
from
services.db
import
connect_db
,
get_or_create_keywords
,
save_pairs_sch_kw
,
get_or_create_affiliation
,
save_scholar
,
get_field_aggs
from
services.db_to_tina_api.extractDataCustom
import
MyExtractor
as
MySQL
else
:
# when this script is run directly
...
...
@@ -44,7 +44,7 @@ else:
from
user
import
User
,
login_manager
,
doors_login
from
text
import
keywords
from
tools
import
restparse
,
mlog
,
re_hash
,
REALCONFIG
from
db
import
connect_db
,
get_or_create_keywords
,
save_pairs_sch_kw
,
get_or_create_affiliation
,
save_scholar
from
db
import
connect_db
,
get_or_create_keywords
,
save_pairs_sch_kw
,
get_or_create_affiliation
,
save_scholar
,
get_field_aggs
from
db_to_tina_api.extractDataCustom
import
MyExtractor
as
MySQL
# ============= read config ============
...
...
@@ -113,6 +113,19 @@ MIN_KW = 5
def
services
():
return
redirect
(
url_for
(
'login'
,
_external
=
True
))
# /services/api/aggs
@
app
.
route
(
config
[
'PREFIX'
]
+
config
[
'API_ROUTE'
]
+
'/aggs'
)
def
aggs_api
():
"""
API to read DB aggregation data (ex: for autocompletes)
"""
if
'field'
in
request
.
args
:
# field name itself is tested by db module
result
=
get_field_aggs
(
request
.
args
[
'field'
])
return
dumps
(
result
)
else
:
raise
TypeError
(
"aggs API query is missing 'field' argument"
)
# /services/api/graph
@
app
.
route
(
config
[
'PREFIX'
]
+
config
[
'API_ROUTE'
]
+
'/graph'
)
...
...
services/tools.py
View file @
990a5aed
...
...
@@ -38,7 +38,10 @@ CONFIGMENU = [
{
"sec"
:
'backends'
,
"var"
:
'SQL_HOST'
,
"def"
:
'172.17.0.2'
},
{
"sec"
:
'backends'
,
"var"
:
'SQL_PORT'
,
"def"
:
'3306'
},
{
"sec"
:
'backends'
,
"var"
:
'DOORS_HOST'
,
"def"
:
'0.0.0.0'
},
{
"sec"
:
'backends'
,
"var"
:
'DOORS_PORT'
,
"def"
:
'8989'
}
{
"sec"
:
'backends'
,
"var"
:
'DOORS_PORT'
,
"def"
:
'8989'
},
# data processing
{
"sec"
:
'content'
,
"var"
:
'HAPAX_THRESHOLD'
,
"def"
:
'1 '
}
]
def
home_path
():
...
...
static/js/comex_page_reg_controllers.js
View file @
990a5aed
...
...
@@ -416,10 +416,9 @@ function checkJobDateStatus() {
// £TODO1 move autocomp data to an autocomplete module
// -> local data for countries, jobtitles
// -> ajax fetcher for the scholars, kws and labs
// £TODO2 add a fetcher API on services side
// £TODO move autocomp data to an autocomplete module
// -> local data for countries, jobtitles
// -> use ajax aggs api for the scholars, kws and labs
// autocomplete countries
$
(
function
()
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment