Merge branch 'db_modules_refacto'

7e7aefbb · Romain Loth · 976179a3 · f7716c7d · 7e7aefbb · 7e7aefbb
Commit 7e7aefbb authored Feb 24, 2017 by Romain Loth
14 changed files
--- a/README.md
+++ b/README.md
@@ -160,6 +160,13 @@ More info in `doc/` directory
  - transform `affiliations` table into 2 tables (differentiate labs and orgs)
+### Project History Overview
+This project was developed over several years with 3 main development phases.
+Scholars statistics and directory structure originally developed by D. Chavalarias (scholars data exploration in the `php_library` folder).
+Graph extraction logic originally developed by [S. Castillo](https://github.com/PkSM3/) (SubsetExtractor in the `dbdatapi` module)
+Ports to python3+MySQL, user registration and merge of the various layers into one deployable docker by [R. Loth](https://github.com/rloth/)
 ##### Copyright
 ###### Authors
  - Researchers and engineers of the ISC-PIF

--- a/services/db.py
+++ b/services/db.py
@@ -50,21 +50,6 @@ ORG_COLS = [
    ]
-FIELDS_FRONTEND_TO_SQL = {
-    "keywords":      {'col':"keywords.kwstr",        "type": "LIKE_relation"},
-    "tags":          {'col':"hashtags.htstr",        'type': "LIKE_relation"},
-    "countries":     {'col':"scholars.country",      'type': "EQ_relation"},
-    "gender":        {'col':"scholars.gender",       'type': "EQ_relation"},
-    "organizations": {'col':"affiliations.org",      'type': "LIKE_relation"},
-    "laboratories":  {'col':"affiliations.team_lab", 'type': "LIKE_relation"},
-    "cities":        {'col':"affiliations.org_city", 'type': "EQ_relation"},
-    "linked":          {'col':"linked_ids.ext_id_type", 'type': "EQ_relation"}
-}
 def connect_db(config=REALCONFIG):
    """
    Simple connection
@@ -84,20 +69,16 @@ def doors_uid_to_luid(doors_uid, cmx_db = None):
    """
    Find corresponding luid
    """
    if cmx_db:
        db = cmx_db
    else:
        db = connect_db()
    db_c = db.cursor()
    stmt = """
        SELECT luid FROM scholars
        WHERE doors_uid = "%s"
    """ % doors_uid
    n_rows = db_c.execute(stmt)
    luid = None
    if n_rows > 1:
        if not cmx_db:
@@ -107,7 +88,6 @@ def doors_uid_to_luid(doors_uid, cmx_db = None):
        luid =  db_c.fetchone()[0]
        if not cmx_db:
            db.close()
    return luid
@@ -135,152 +115,6 @@ def email_exists(email, cmx_db = None):
    return exi_bool
-def get_field_aggs(a_field,
-                   hapax_threshold=int(REALCONFIG['HAPAX_THRESHOLD']),
-                   users_status = "ALL"):
-    """
-    Use case: api/aggs?field=a_field
-    ---------------------------------
-       => Retrieves distinct field values and count having it
-       => about *n* vs *occs*:
-           - for tables != keywords count is scholar count
-           - for table keywords count is occurrences count
-    Parameters
-    ----------
-        a_field: str
-            a front-end fieldname to aggregate, like "keywords" "countries"
-            (allowed values cf. FIELDS_FRONTEND_TO_SQL)
-            POSS: allow other fields than those in the mapping
-                  if they are already in sql table.col format?
-        hapax_threshold: int
-            for all data_types, categories with a total equal or below this will be excluded from results
-            TODO: put them in an 'others' category
-            POSS: have a different threshold by type
-        users_status: str
-            defines the perimeter (set of scholars over which we work),
-            (allowed values are ['active', 'test', 'legacy', 'ALL'])
-            NB: if the param is 'legacy' here, set is indifferent to call_date
-                (because aggs useful for *entire* legacy group)
-    """
-    agg_rows = []
-    if a_field in FIELDS_FRONTEND_TO_SQL:
-        sql_col = FIELDS_FRONTEND_TO_SQL[a_field]['col']
-        sql_tab = sql_col.split('.')[0]
-        mlog('INFO', "AGG API sql_col", sql_col)
-        db = connect_db()
-        db_c = db.cursor(DictCursor)
-        # constraints 2, if any
-        postfilters = []
-        if hapax_threshold > 0:
-            count_col = 'occs' if sql_tab in ['keywords', 'hashtags'] else 'n'
-            postfilters.append( "%s > %i" % (count_col, hapax_threshold) )
-        if len(postfilters):
-            post_where = "WHERE "+" AND ".join(
-                                                ['('+f+')' for f in postfilters]
-                                                    )
-        else:
-            post_where = ""
-        # retrieval cases
-        if sql_tab == 'scholars':
-            stmt = """
-                SELECT x, n FROM (
-                    SELECT %(col)s AS x, COUNT(*) AS n
-                    FROM scholars
-                    GROUP BY %(col)s
-                ) AS allcounts
-                %(post_filter)s
-                ORDER BY n DESC
-            """ % {'col': sql_col, 'post_filter': post_where}
-        elif sql_tab == 'affiliations':
-            stmt = """
-                SELECT x, n FROM (
-                    SELECT %(col)s AS x, COUNT(*) AS n
-                    FROM scholars
-                    -- 0 or 1
-                    LEFT JOIN affiliations
-                        ON scholars.affiliation_id = affiliations.affid
-                    GROUP BY %(col)s
-                ) AS allcounts
-                ORDER BY n DESC
-            """ % {'col': sql_col, 'post_filter': post_where}
-        elif sql_tab == 'linked_ids':
-            stmt = """
-                SELECT x, n FROM (
-                    SELECT %(col)s AS x, COUNT(*) AS n
-                    FROM scholars
-                    -- 0 or 1
-                    LEFT JOIN linked_ids
-                        ON scholars.luid = linked_ids.uid
-                    GROUP BY %(col)s
-                ) AS allcounts
-                %(post_filter)s
-                ORDER BY n DESC
-            """ % {'col': sql_col, 'post_filter': post_where}
-        elif sql_tab == 'keywords':
-            stmt = """
-                SELECT x, occs FROM (
-                    SELECT %(col)s AS x, COUNT(*) AS occs
-                    FROM scholars
-                    -- 0 or many
-                    LEFT JOIN sch_kw
-                        ON scholars.luid = sch_kw.uid
-                    LEFT JOIN keywords
-                        ON sch_kw.kwid = keywords.kwid
-                    GROUP BY %(col)s
-                ) AS allcounts
-                %(post_filter)s
-                ORDER BY occs DESC
-            """ % {'col': sql_col, 'post_filter': post_where}
-        elif sql_tab == 'hashtags':
-            stmt = """
-                SELECT x, occs FROM (
-                    SELECT %(col)s AS x, COUNT(*) AS occs
-                    FROM scholars
-                    -- 0 or many
-                    LEFT JOIN sch_ht
-                        ON scholars.luid = sch_ht.uid
-                    LEFT JOIN hashtags
-                        ON sch_ht.htid = hashtags.htid
-                    GROUP BY %(col)s
-                ) AS allcounts
-                %(post_filter)s
-                ORDER BY occs DESC
-            """ % {'col': sql_col, 'post_filter': post_where}
-        mlog("DEBUGSQL", "get_field_aggs STATEMENT:\n-- SQL\n%s\n-- /SQL" % stmt)
-        # do it
-        n_rows = db_c.execute(stmt)
-        if n_rows > 0:
-            agg_rows = db_c.fetchall()
-        db.close()
-    # mlog('DEBUG', "aggregation over %s: result rows =" % a_field, agg_rows)
-    return agg_rows
 def rm_scholar(luid, cmx_db = None):
    """
@@ -483,38 +317,6 @@ def get_full_scholar(uid, cmx_db = None):
    return urow_dict
-def find_scholar(some_key, some_str_value, cmx_db = None):
-    """
-    Get the luid of a scholar based on some str value
-    To make sense, the key should be a unique one
-    but this function doesn't check it !
-    """
-    luid = None
-    if cmx_db:
-        db = cmx_db
-    else:
-        db = connect_db()
-    db_c = db.cursor(DictCursor)
-    try:
-        db_c.execute('''SELECT luid
-                        FROM scholars
-                        WHERE %s = "%s"
-                        ''' % (some_key, some_str_value))
-        first_row = db_c.fetchone()
-        if first_row:
-            luid = first_row['luid']
-    except:
-        mlog('WARNING', 'unsuccessful attempt to identify a scholar on key %s' % some_key)
-    if not cmx_db:
-        db.close()
-    return luid
 def save_full_scholar(safe_recs, cmx_db, uactive=True, update_user=None):
    """
    For new registration:

--- a/services/db_to_tina_api/extractDataCustom.py
+++ b/services/db_to_tina_api/extractDataCustom.py
-from MySQLdb   import connect, cursors
+"""
+DB data querying (mostly aggs + subset selections orginally made by Samuel)
+"""
+__author__    = "CNRS"
+__copyright__ = "Copyright 2016 ISCPIF-CNRS"
+__email__     = "romain.loth@iscpif.fr"
+from MySQLdb          import connect, cursors
+from MySQLdb.cursors  import DictCursor
 from networkx  import Graph, DiGraph
 from random    import randint
 from math      import floor, log, log1p
@@ -6,17 +15,211 @@ from cgi       import escape
 from re        import sub
 from traceback import format_tb
-from .converter import CountryConverter
+if __package__ == 'services':
+    from services.tools import mlog, REALCONFIG
-if __package__ == "services.db_to_tina_api":
+    from services.dbcrud  import connect_db
-    from services.tools import mlog
+    from services.text.converter import CountryConverter
-    from services.db    import FIELDS_FRONTEND_TO_SQL
 else:
-    from tools          import mlog
+    from tools          import mlog, REALCONFIG
-    from db             import FIELDS_FRONTEND_TO_SQL
+    from dbcrud         import connect_db
+    from text.converter import CountryConverter
+FIELDS_FRONTEND_TO_SQL = {
+    "keywords":      {'col':"keywords.kwstr",        "type": "LIKE_relation"},
+    "tags":          {'col':"hashtags.htstr",        'type': "LIKE_relation"},
+    "countries":     {'col':"scholars.country",      'type': "EQ_relation"},
+    "gender":        {'col':"scholars.gender",       'type': "EQ_relation"},
+    "organizations": {'col':"affiliations.org",      'type': "LIKE_relation"},
+    "laboratories":  {'col':"affiliations.team_lab", 'type': "LIKE_relation"},
+    "cities":        {'col':"affiliations.org_city", 'type': "EQ_relation"},
+    "linked":          {'col':"linked_ids.ext_id_type", 'type': "EQ_relation"}
+}
+def get_field_aggs(a_field,
+                   hapax_threshold=int(REALCONFIG['HAPAX_THRESHOLD']),
+                   users_status = "ALL"):
+    """
+    Use case: /services/api/aggs?field=a_field
+             ---------------------------------
+       => Retrieves distinct field values and count having it
+       => about *n* vs *occs*:
+           - for tables != keywords count is scholar count
+           - for table keywords count is occurrences count
+    Parameters
+    ----------
+        a_field: str
+            a front-end fieldname to aggregate, like "keywords" "countries"
+            (allowed values cf. FIELDS_FRONTEND_TO_SQL)
+            POSS: allow other fields than those in the mapping
+                  if they are already in sql table.col format?
+        hapax_threshold: int
+            for all data_types, categories with a total equal or below this will be excluded from results
+            TODO: put them in an 'others' category
+            POSS: have a different threshold by type
+        POSSible:
+            pre-filters
+                ex: users_status : str
+            shoudl define the perimeter (set of scholars over which we work),
+    """
+    agg_rows = []
+    if a_field in FIELDS_FRONTEND_TO_SQL:
+        sql_col = FIELDS_FRONTEND_TO_SQL[a_field]['col']
+        sql_tab = sql_col.split('.')[0]
+        mlog('INFO', "AGG API sql_col", sql_col)
+        db = connect_db()
+        db_c = db.cursor(DictCursor)
+        # constraints 2, if any
+        postfilters = []
+        if hapax_threshold > 0:
+            count_col = 'occs' if sql_tab in ['keywords', 'hashtags'] else 'n'
+            postfilters.append( "%s > %i" % (count_col, hapax_threshold) )
+        if len(postfilters):
+            post_where = "WHERE "+" AND ".join(
+                                                ['('+f+')' for f in postfilters]
+                                                    )
+        else:
+            post_where = ""
+        # retrieval cases
+        if sql_tab == 'scholars':
+            stmt = """
+                SELECT x, n FROM (
+                    SELECT %(col)s AS x, COUNT(*) AS n
+                    FROM scholars
+                    GROUP BY %(col)s
+                ) AS allcounts
+                %(post_filter)s
+                ORDER BY n DESC
+            """ % {'col': sql_col, 'post_filter': post_where}
+        elif sql_tab == 'affiliations':
+            stmt = """
+                SELECT x, n FROM (
+                    SELECT %(col)s AS x, COUNT(*) AS n
+                    FROM scholars
+                    -- 0 or 1
+                    LEFT JOIN affiliations
+                        ON scholars.affiliation_id = affiliations.affid
+                    GROUP BY %(col)s
+                ) AS allcounts
+                ORDER BY n DESC
+            """ % {'col': sql_col, 'post_filter': post_where}
+        elif sql_tab == 'linked_ids':
+            stmt = """
+                SELECT x, n FROM (
+                    SELECT %(col)s AS x, COUNT(*) AS n
+                    FROM scholars
+                    -- 0 or 1
+                    LEFT JOIN linked_ids
+                        ON scholars.luid = linked_ids.uid
+                    GROUP BY %(col)s
+                ) AS allcounts
+                %(post_filter)s
+                ORDER BY n DESC
+            """ % {'col': sql_col, 'post_filter': post_where}
+        elif sql_tab == 'keywords':
+            stmt = """
+                SELECT x, occs FROM (
+                    SELECT %(col)s AS x, COUNT(*) AS occs
+                    FROM scholars
+                    -- 0 or many
+                    LEFT JOIN sch_kw
+                        ON scholars.luid = sch_kw.uid
+                    LEFT JOIN keywords
+                        ON sch_kw.kwid = keywords.kwid
+                    GROUP BY %(col)s
+                ) AS allcounts
+                %(post_filter)s
+                ORDER BY occs DESC
+            """ % {'col': sql_col, 'post_filter': post_where}
+        elif sql_tab == 'hashtags':
+            stmt = """
+                SELECT x, occs FROM (
+                    SELECT %(col)s AS x, COUNT(*) AS occs
+                    FROM scholars
+                    -- 0 or many
+                    LEFT JOIN sch_ht
+                        ON scholars.luid = sch_ht.uid
+                    LEFT JOIN hashtags
+                        ON sch_ht.htid = hashtags.htid
+                    GROUP BY %(col)s
+                ) AS allcounts
+                %(post_filter)s
+                ORDER BY occs DESC
+            """ % {'col': sql_col, 'post_filter': post_where}
+        mlog("DEBUGSQL", "get_field_aggs STATEMENT:\n-- SQL\n%s\n-- /SQL" % stmt)
+        # do it
+        n_rows = db_c.execute(stmt)
+        if n_rows > 0:
+            agg_rows = db_c.fetchall()
+        db.close()
+    # mlog('DEBUG', "aggregation over %s: result rows =" % a_field, agg_rows)
+    return agg_rows
+def find_scholar(some_key, some_str_value, cmx_db = None):
+    """
+    Get the luid of a scholar based on some str value
+    To make sense, the key should be a unique one
+    but this function doesn't check it !
+    """
+    luid = None
+    if cmx_db:
+        db = cmx_db
+    else:
+        db = connect_db()
+    db_c = db.cursor(DictCursor)
+    try:
+        db_c.execute('''SELECT luid
+                        FROM scholars
+                        WHERE %s = "%s"
+                        ''' % (some_key, some_str_value))
+        first_row = db_c.fetchone()
+        if first_row:
+            luid = first_row['luid']
+    except:
+        mlog('WARNING', 'unsuccessful attempt to identify a scholar on key %s' % some_key)
+    if not cmx_db:
+        db.close()
+    return luid
-class MyExtractor:
+class SubsetExtractor:
    def __init__(self,dbhost):
        self.connection=connect(
@@ -358,6 +561,7 @@ class MyExtractor:
                # info['affiliation2'] = res3['affiliation2'];
                info['hon_title'] = res3['hon_title'] if res3['hon_title'] else ""
                info['position'] = res3['position'];
+                info['job_looking'] = res3['job_looking'];
                info['job_looking_date'] = res3['job_looking_date'];
                info['email'] = res3['email'];
                if info['keywords_nb']>0:
@@ -572,11 +776,11 @@ class MyExtractor:
        return escaped
-    def buildJSON_sansfa2(self,graph,coordsRAW=None):
+    def buildJSON(self,graph,coordsRAW=None):
        inst = CountryConverter("","","","")
-        ISO=inst.getCountries("services/db_to_tina_api/countries_ISO3166.txt")
+        ISO=inst.getCountries("services/text/countries_ISO3166.txt")
-        Alternatives=inst.getCountries("services/db_to_tina_api/countries_alternatives.txt")
+        Alternatives=inst.getCountries("services/text/countries_alternatives.txt")
        inst.createInvertedDicts(ISO,Alternatives)
        nodesA=0
@@ -636,8 +840,7 @@ class MyExtractor:
                if self.scholars_colors[self.scholars[idNode]['email']]==1:
                    color='243,183,19'
-                # TODO test the date
+                elif self.scholars[idNode]['job_looking']:
-                elif self.scholars[idNode]['job_looking_date'] is not None:
                    color = '139,28,28'
                else:
                    color = '78,193,127'
@@ -798,7 +1001,7 @@ class MyExtractor:
        mlog("INFO", graph["stats"])
        # mlog("DEBUG", "scholars",nodesA)
-        # mlog("DEBUG", "concepts",nodesB)
+        # mlog("DEBUG", "concept_tags",nodesB)
        # mlog("DEBUG", "nodes1",edgesA)
        # mlog("DEBUG", "nodes2",edgesB)
        # mlog("DEBUG", "bipartite",edgesAB)

--- a/services/db_to_tina_api/graph_manipulation_unused/FA2.py
+++ b/services/db_to_tina_api/graph_manipulation_unused/FA2.py
--- a/services/db_to_tina_api/graph_manipulation_unused/ForceFactory.py
+++ b/services/db_to_tina_api/graph_manipulation_unused/ForceFactory.py
--- a/services/db_to_tina_api/graph_manipulation_unused/RV-Uniform.py
+++ b/services/db_to_tina_api/graph_manipulation_unused/RV-Uniform.py
--- a/services/db_to_tina_api/graph_manipulation_unused/Region.py
+++ b/services/db_to_tina_api/graph_manipulation_unused/Region.py
--- a/services/db_to_tina_api/graph_manipulation_unused/comex.py
+++ b/services/db_to_tina_api/graph_manipulation_unused/comex.py
--- a/services/main.py
+++ b/services/main.py
@@ -38,20 +38,20 @@ from flask_login  import fresh_login_required, login_required, \
 if __package__ == 'services':
    # when we're run via import
    print("*** comex services ***")
-    from services       import db
+    from services.tools    import mlog
-    from services       import tools
+    from services          import tools, dbcrud, dbdatapi
-    from services.tools import mlog
+    from services.user     import User, login_manager, \
-    from services.user  import User, login_manager, doors_login, doors_register
+                                  doors_login, doors_register
-    from services.db_to_tina_api.extractDataCustom import MyExtractor
+    from services.dbdatapi import SubsetExtractor
    # TODO move sanitize there
    # from services.text  import keywords, sanitize
 else:
    # when this script is run directly
    print("*** comex services (dev server mode) ***")
-    import db
-    import tools
    from tools          import mlog
-    from user           import User, login_manager, doors_login, doors_register
+    import tools, dbcrud, dbdatapi
+    from user           import User, login_manager, \
+                               doors_login, doors_register
    from db_to_tina_api.extractDataCustom import MyExtractor
    # from text           import keywords, sanitize
@@ -209,9 +209,9 @@ def aggs_api():
        if hap_thresh is not None:
            # field name itself is tested by db module
-            result = db.get_field_aggs(request.args['field'], hapax_threshold=hap_thresh)
+            result = dbdatapi.get_field_aggs(request.args['field'], hapax_threshold=hap_thresh)
        else:
-            result = db.get_field_aggs(request.args['field'])
+            result = dbdatapi.get_field_aggs(request.args['field'])
        return dumps(result)
    else:
@@ -226,8 +226,13 @@ def graph_api():
    (original author S. Castillo)
    """
    if 'qtype' in request.args:
-        graphdb = MyExtractor(config['SQL_HOST'])
+        graphdb = SubsetExtractor(config['SQL_HOST'])
-        scholars = graphdb.getScholarsList(request.args['qtype'], tools.restparse(request.query_string.decode()))
+        scholars = graphdb.getScholarsList(
+                    request.args['qtype'],
+                    tools.restparse(
+                      request.query_string.decode()
+                    )
+                  )
        if scholars and len(scholars):
            # Data Extraction
            # (getting details for selected scholars into graph object)
@@ -235,8 +240,7 @@ def graph_api():
            # (less modular but a lot faster)
            graphdb.extract(scholars)
-        graphArray = graphdb.buildJSON_sansfa2(graphdb.Graph)
+        return dumps(graphdb.buildJSON(graphdb.Graph))
-        return dumps(graphArray)
    else:
        raise TypeError("graph API query is missing qtype (should be 'filters' or 'uid')")
@@ -256,7 +260,7 @@ def user_api():
        if request.args['op'] == "exists":
            if 'email' in request.args:
                email = sanitize(request.args['email'])
-                return(dumps({'exists':db.email_exists(email)}))
+                return(dumps({'exists':dbcrud.email_exists(email)}))
    else:
        raise TypeError("user API query is missing the operation to perform (eg op=exists)")
@@ -327,7 +331,7 @@ def login():
                        message = nologin_message
                    )
-            luid = db.doors_uid_to_luid(doors_uid)
+            luid = dbcrud.doors_uid_to_luid(doors_uid)
            if luid:
                # normal user
@@ -335,7 +339,7 @@ def login():
            else:
                mlog("DEBUG", "LOGIN: encountered new doors id (%s), switching to empty user profile" % doors_uid)
                # user exists in doors but has no comex profile nor luid yet
-                db.save_doors_temp_user(doors_uid, email)  # preserve the email
+                dbcrud.save_doors_temp_user(doors_uid, email)  # preserve the email
                user = User(None, doors_uid=doors_uid)     # get a user.empty
            # =========================
@@ -388,7 +392,7 @@ def login():
                else:
                    next_url = unquote(next_url)
                    mlog("DEBUG", "login with next_url:", next_url)
-                    safe_flag = is_safe_url(next_url, request.host_url)
+                    safe_flag = tools.is_safe_url(next_url, request.host_url)
                    # normal next_url
                    if safe_flag:
                        # if relative
@@ -453,7 +457,7 @@ def profile():
            mlog("INFO",
                 "executing DELETE scholar's data at the request of user %s" % str(the_id_to_delete))
            logout_user()
-            db.rm_scholar(the_id_to_delete)
+            dbcrud.rm_scholar(the_id_to_delete)
            return(redirect(url_for('rootindex', _external=True)))
@@ -483,7 +487,7 @@ def profile():
                            )
                # if all went well we can remove the temporary doors user data
-                db.rm_doors_temp_user(current_user.doors_uid)
+                dbcrud.rm_doors_temp_user(current_user.doors_uid)
                logout_user()
                # .. and login the user in his new mode
                login_user(User(luid))
@@ -536,7 +540,7 @@ def claim_profile():
            if (return_token
                    and type(return_token) == str
                    and len(return_token) == 36):
-                luid = db.get_legacy_user(return_token)
+                luid = dbcrud.get_legacy_user(return_token)
            if luid is not None:
                try:
@@ -605,8 +609,8 @@ def claim_profile():
        else:
            try:
-                db_connection = db.connect_db(config)
+                db_connection = dbcrud.connect_db(config)
-                db.update_scholar_cols({
+                dbcrud.update_scholar_cols({
                                  'doors_uid':doors_uid,
                                  'record_status': 'active',
                                  'valid_date': None
@@ -616,7 +620,7 @@ def claim_profile():
                db_connection.close()
                # the user is not a legacy user anymore
                # POSS: do this on first login instead
-                db.rm_legacy_user_rettoken(luid)
+                dbcrud.rm_legacy_user_rettoken(luid)
            except Exception as perr:
                return render_template(
@@ -741,10 +745,10 @@ def save_form(clean_records, update_flag=False, previous_user_info=None):
    """
    # A) a new DB connection
-    reg_db = db.connect_db(config)
+    reg_db = dbcrud.connect_db(config)
    # B) read/fill the affiliation table to get associated id
-    clean_records['affiliation_id'] = db.get_or_create_affiliation(
+    clean_records['affiliation_id'] = dbcrud.get_or_create_affiliation(
        clean_records,
        reg_db
    )
@@ -760,9 +764,9 @@ def save_form(clean_records, update_flag=False, previous_user_info=None):
            mlog("WARNING", "User %i attempted to modify the data of another user (%i)!... Aborting update" % (luid, sent_luid))
            return None
        else:
-            db.save_full_scholar(clean_records, reg_db, update_user=previous_user_info)
+            dbcrud.save_full_scholar(clean_records, reg_db, update_user=previous_user_info)
    else:
-        luid = int(db.save_full_scholar(clean_records, reg_db))
+        luid = int(dbcrud.save_full_scholar(clean_records, reg_db))
    # D) read/fill each keyword and save the (uid <=> kwid) pairings
@@ -773,18 +777,18 @@ def save_form(clean_records, update_flag=False, previous_user_info=None):
            tok_table = tok_field
            map_table = "sch_" + ('kw' if intable == 'keywords' else 'ht')
-            tokids = db.get_or_create_tokitems(
+            tokids = dbcrud.get_or_create_tokitems(
-                clean_records[tok_field],
+                        clean_records[tok_field],
-                reg_db,
+                        reg_db,
-                tok_table
+                        tok_table
-            )
+                     )
                # TODO class User method !!
                # POSS selective delete ?
            if update_flag:
-                db.delete_pairs_sch_tok(luid, reg_db, map_table)
+                dbcrud.delete_pairs_sch_tok(luid, reg_db, map_table)
-            db.save_pairs_sch_tok(
+            dbcrud.save_pairs_sch_tok(
                [(luid, tokid) for tokid in tokids],
                reg_db,
                map_table
@@ -906,19 +910,8 @@ def sanitize(value, specific_type=None):
        return san_typed_val
-def is_safe_url(target, host_url):
-    """
-    Checks if url is ok for redirects
-    cf. http://flask.pocoo.org/snippets/62/
-    """
-    ref_url = urlparse(host_url)
-    test_url = urlparse(urljoin(host_url, target))
-    return (test_url.scheme in ('http', 'https')
-            and ref_url.netloc == test_url.netloc)
 ########### MAIN ###########
-# this only uses the dev server (in prod we're run by unicorn and not as main)
+# this can only be used for debug
+# (in general use comex-run.sh to run the app)
 if __name__ == "__main__":
-    # our app should be bound to an ip (cf stackoverflow.com/a/30329547/2489184)
+    app.run(host='0.0.0.0', port=8989)
-    app.run(host=config['COMEX_HOST'], port=int(config['COMEX_PORT']))
--- a/services/db_to_tina_api/converter.py
+++ b/services/db_to_tina_api/converter.py
 from sqlite3  import connect, Row
-if __package__ == "services.db_to_tina_api":
+if __package__ == "services.text":
    from services.tools import mlog
 else:
    from tools          import mlog

--- a/services/db_to_tina_api/countries_ISO3166.txt
+++ b/services/db_to_tina_api/countries_ISO3166.txt
--- a/services/db_to_tina_api/countries_alternatives.txt
+++ b/services/db_to_tina_api/countries_alternatives.txt
--- a/services/tools.py
+++ b/services/tools.py
@@ -8,6 +8,7 @@ __email__     = "romain.loth@iscpif.fr"
 # for reading config
 from configparser import ConfigParser
 from os           import environ, path
+from sys          import stdout
 from urllib.parse import unquote
 from ctypes       import c_int32
 from traceback    import format_tb
@@ -46,7 +47,7 @@ CONFIGMENU = [
 def home_path():
    """
-    returns ./../..
+    returns ./../.. in any OS
    """
    return path.dirname(path.dirname(path.realpath(__file__)))
@@ -55,7 +56,7 @@ def read_config():
    """
    reads all global config vars trying in order:
        1) env variables of the same name
-        2) the config file $HOME/parametres_comex.ini
+        2) the config file $HOME/config/parametres_comex.ini
        3) hard-coded default values
    Effect: fills the var REALCONFIG
@@ -63,6 +64,8 @@ def read_config():
    """
    our_home = home_path()
+    print('_^_'+our_home)
    ini = ConfigParser()
    inipath = path.join(our_home, "config", "parametres_comex.ini")
    ini.read(inipath)
@@ -195,7 +198,11 @@ def mlog(loglvl, *args):
    levels = {"DEBUGSQL":-1, "DEBUG":0, "INFO":1, "WARNING":2, "ERROR":3}
    if 'LOG_FILE' in REALCONFIG:
-        logfile = open(REALCONFIG["LOG_FILE"], "a")    # a <=> append
+        try:
+            logfile = open(REALCONFIG["LOG_FILE"], "a")    # a <=> append
+        except:
+            print("can't open the logfile indicated in "+REALCONFIG["HOME"]+"/config/parametres_comex.ini, so using STDOUT instead" )
+            logfile = stdout
        if loglvl in levels:
            if levels[loglvl] >= levels[REALCONFIG["LOG_LEVEL"]]:

--- a/services/user.py
+++ b/services/user.py
@@ -14,10 +14,12 @@ from flask_login import LoginManager
 from re          import match
 if __package__ == 'services':
-    from services.db    import connect_db, get_full_scholar, get_doors_temp_user
+    from services.dbcrud import connect_db, get_full_scholar, \
+                                            get_doors_temp_user
    from services.tools import mlog, REALCONFIG
 else:
-    from db             import connect_db, get_full_scholar, get_doors_temp_user
+    from dbcrud         import connect_db, get_full_scholar, \
+                                           get_doors_temp_user
    from tools          import mlog, REALCONFIG
 # will be exported to main for initialization with app