Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
C
clinicaltrials
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
david Chavalarias
clinicaltrials
Commits
123c5b92
Commit
123c5b92
authored
Jan 26, 2017
by
Romain Loth
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
custom 'users_status' param for api/aggs
parent
791106e9
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
81 additions
and
29 deletions
+81
-29
db.py
services/db.py
+81
-29
No files found.
services/db.py
View file @
123c5b92
...
@@ -101,18 +101,38 @@ def doors_uid_to_luid(doors_uid):
...
@@ -101,18 +101,38 @@ def doors_uid_to_luid(doors_uid):
return
luid
return
luid
def
get_field_aggs
(
a_field
,
hapax_threshold
=
int
(
REALCONFIG
[
'HAPAX_THRESHOLD'
])):
def
get_field_aggs
(
a_field
,
hapax_threshold
=
int
(
REALCONFIG
[
'HAPAX_THRESHOLD'
]),
users_status
=
"active"
):
"""
"""
Use case: api/aggs?field=a_field
Use case: api/aggs?field=a_field
---------------------------------
=> Retrieves distinct field values and count having it
=> Retrieves distinct field values and count having it
=> about *n* vs *occs*:
=> about *n* vs *occs*:
- for tables != keywords count is scholar count
- for tables != keywords count is scholar count
- for table keywords count is occurrences count
- for table keywords count is occurrences count
NB relies on FIELDS_FRONTEND_TO_SQL mapping
Parameters
POSS: allow other fields than those in the mapping
----------
if they are already in sql table.col format?
a_field: str
a front-end fieldname to aggregate, like "keywords" "countries"
(allowed values cf. FIELDS_FRONTEND_TO_SQL)
POSS: allow other fields than those in the mapping
if they are already in sql table.col format?
hapax_threshold: int
for all data_types, categories with a total equal or below this will be excluded from results
TODO: put them in an 'others' category
POSS: have a different threshold by type
users_status: str
defines the perimeter (set of scholars over which we work),
(allowed values are ['active', 'test', 'legacy', 'ALL'])
NB: if the param is 'legacy' here, set is indifferent to call_date
(because aggs useful for *entire* legacy group)
"""
"""
agg_rows
=
[]
agg_rows
=
[]
...
@@ -127,82 +147,113 @@ def get_field_aggs(a_field, hapax_threshold=int(REALCONFIG['HAPAX_THRESHOLD'])):
...
@@ -127,82 +147,113 @@ def get_field_aggs(a_field, hapax_threshold=int(REALCONFIG['HAPAX_THRESHOLD'])):
db
=
connect_db
()
db
=
connect_db
()
db_c
=
db
.
cursor
(
DictCursor
)
db_c
=
db
.
cursor
(
DictCursor
)
if
type
(
hapax_threshold
)
==
int
and
hapax_threshold
>
0
:
# constraints 1, if any
count_col
=
'occs'
if
sql_tab
==
'keywords'
else
'n'
prefilters
=
[]
where_clause
=
"WHERE
%
s >
%
i"
%
(
count_col
,
hapax_threshold
)
if
users_status
!=
'ALL'
:
prefilters
.
append
(
"scholars.record_status =
\"
%
s
\"
"
%
users_status
)
if
len
(
prefilters
):
pre_where
=
"WHERE "
+
" AND "
.
join
(
[
'('
+
f
+
')'
for
f
in
prefilters
]
)
else
:
pre_where
=
""
# constraints 2, if any
postfilters
=
[]
if
hapax_threshold
>
0
:
count_col
=
'occs'
if
sql_tab
in
[
'keywords'
,
'hashtags'
]
else
'n'
postfilters
.
append
(
"
%
s >
%
i"
%
(
count_col
,
hapax_threshold
)
)
if
len
(
postfilters
):
post_where
=
"WHERE "
+
" AND "
.
join
(
[
'('
+
f
+
')'
for
f
in
postfilters
]
)
else
:
else
:
where_claus
e
=
""
post_wher
e
=
""
# retrieval cases
if
sql_tab
==
'scholars'
:
if
sql_tab
==
'scholars'
:
stmt
=
"""
stmt
=
"""
SELECT
*
FROM (
SELECT
x, n
FROM (
SELECT
%(col)
s AS x, COUNT(*) AS n
SELECT
%(col)
s AS x, COUNT(*) AS n
, record_status
FROM scholars
FROM scholars
%(pre_filter)
s
GROUP BY
%(col)
s
GROUP BY
%(col)
s
) AS allcounts
) AS allcounts
%(filter)
s
%(
post_
filter)
s
ORDER BY n DESC
ORDER BY n DESC
"""
%
{
'col'
:
sql_col
,
'filter'
:
where_clause
}
"""
%
{
'col'
:
sql_col
,
'pre_filter'
:
pre_where
,
'post_filter'
:
post_where
}
elif
sql_tab
==
'affiliations'
:
elif
sql_tab
==
'affiliations'
:
stmt
=
"""
stmt
=
"""
SELECT
*
FROM (
SELECT
x, n
FROM (
SELECT
%(col)
s AS x, COUNT(*) AS n
SELECT
%(col)
s AS x, COUNT(*) AS n
, record_status
FROM scholars
FROM scholars
-- 0 or 1
-- 0 or 1
LEFT JOIN affiliations
LEFT JOIN affiliations
ON scholars.affiliation_id = affiliations.affid
ON scholars.affiliation_id = affiliations.affid
%(pre_filter)
s
GROUP BY
%(col)
s
GROUP BY
%(col)
s
) AS allcounts
) AS allcounts
%(filter)
s
%(
post_
filter)
s
ORDER BY n DESC
ORDER BY n DESC
"""
%
{
'col'
:
sql_col
,
'filter'
:
where_clause
}
"""
%
{
'col'
:
sql_col
,
'pre_filter'
:
pre_where
,
'post_filter'
:
post_where
}
elif
sql_tab
==
'linked_ids'
:
elif
sql_tab
==
'linked_ids'
:
stmt
=
"""
stmt
=
"""
SELECT
*
FROM (
SELECT
x, n
FROM (
SELECT
%(col)
s AS x, COUNT(*) AS n
SELECT
%(col)
s AS x, COUNT(*) AS n
, record_status
FROM scholars
FROM scholars
-- 0 or 1
-- 0 or 1
LEFT JOIN linked_ids
LEFT JOIN linked_ids
ON scholars.luid = linked_ids.uid
ON scholars.luid = linked_ids.uid
%(pre_filter)
s
GROUP BY
%(col)
s
GROUP BY
%(col)
s
) AS allcounts
) AS allcounts
%(filter)
s
%(
post_
filter)
s
ORDER BY n DESC
ORDER BY n DESC
"""
%
{
'col'
:
sql_col
,
'filter'
:
where_clause
}
"""
%
{
'col'
:
sql_col
,
'pre_filter'
:
pre_where
,
'post_filter'
:
post_where
}
elif
sql_tab
==
'keywords'
:
elif
sql_tab
==
'keywords'
:
stmt
=
"""
stmt
=
"""
SELECT
*
FROM (
SELECT
x, occs
FROM (
SELECT
%(col)
s AS x, COUNT(*) AS occs
SELECT
%(col)
s AS x, COUNT(*) AS occs
, record_status
FROM scholars
FROM scholars
-- 0 or many
-- 0 or many
LEFT JOIN sch_kw
LEFT JOIN sch_kw
ON scholars.luid = sch_kw.uid
ON scholars.luid = sch_kw.uid
LEFT JOIN keywords
LEFT JOIN keywords
ON sch_kw.kwid = keywords.kwid
ON sch_kw.kwid = keywords.kwid
%(pre_filter)
s
GROUP BY
%(col)
s
GROUP BY
%(col)
s
) AS allcounts
) AS allcounts
%(filter)
s
%(
post_
filter)
s
ORDER BY occs DESC
ORDER BY occs DESC
"""
%
{
'col'
:
sql_col
,
'filter'
:
where_clause
}
"""
%
{
'col'
:
sql_col
,
'pre_filter'
:
pre_where
,
'post_filter'
:
post_where
}
elif
sql_tab
==
'hashtags'
:
elif
sql_tab
==
'hashtags'
:
stmt
=
"""
stmt
=
"""
SELECT
*
FROM (
SELECT
x, occs
FROM (
SELECT
%(col)
s AS x, COUNT(*) AS occs
SELECT
%(col)
s AS x, COUNT(*) AS occs
, record_status
FROM scholars
FROM scholars
-- 0 or many
-- 0 or many
LEFT JOIN sch_ht
LEFT JOIN sch_ht
ON scholars.luid = sch_ht.uid
ON scholars.luid = sch_ht.uid
LEFT JOIN hashtags
LEFT JOIN hashtags
ON sch_ht.htid = hashtags.htid
ON sch_ht.htid = hashtags.htid
%(pre_filter)
s
GROUP BY
%(col)
s
GROUP BY
%(col)
s
) AS allcounts
) AS allcounts
%(filter)
s
%(
post_
filter)
s
ORDER BY occs DESC
ORDER BY occs DESC
"""
%
{
'col'
:
sql_col
,
'filter'
:
where_clause
}
"""
%
{
'col'
:
sql_col
,
'pre_filter'
:
pre_where
,
'post_filter'
:
post_where
}
mlog
(
"DEBUGSQL"
,
"get_field_aggs STATEMENT:
\n
-- SQL
\n
%
s
\n
-- /SQL"
%
stmt
)
mlog
(
"DEBUGSQL"
,
"get_field_aggs STATEMENT:
\n
-- SQL
\n
%
s
\n
-- /SQL"
%
stmt
)
...
@@ -214,7 +265,8 @@ def get_field_aggs(a_field, hapax_threshold=int(REALCONFIG['HAPAX_THRESHOLD'])):
...
@@ -214,7 +265,8 @@ def get_field_aggs(a_field, hapax_threshold=int(REALCONFIG['HAPAX_THRESHOLD'])):
db
.
close
()
db
.
close
()
mlog
(
'INFO'
,
agg_rows
)
# mlog('DEBUG', "aggregation over %s: result rows =" % a_field, agg_rows)
return
agg_rows
return
agg_rows
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment