• delanoe's avatar
    [FEAT] Growth on graph · fb87c813
    delanoe authored
           First select a period on Documents view.
    
           Then compute a graph chosing one distance (distributional or
           conditional).
    
           Then in Graph Explorer view (TinaWebJS), user can get colors of
           node with growth attributes.
    
           Red nodes are increasing.
           Yellow nodes are decreasing.
    
           Growth computed with previous period which is equal to the
           selected period in duration (but before).
    
           TODO: make some tests with Users to validate the fact that
           previous period is not all the corpus but the selected period.
    fb87c813
growth.py 1.94 KB
"""
Computes ngram growth on periods
"""

from gargantext.models   import Node, NodeNgram, NodeNodeNgram, NodeNgramNgram
from gargantext.util.db_cache  import cache
from gargantext.util.db  import session, bulk_insert, aliased, \
                                func, get_engine # = sqlalchemy.func like sum() or count()
from datetime             import datetime


def timeframes(start, end):
    """
    timeframes :: String -> String -> (UTCTime, UTCTime, UTCTime)
    """
    
    start = datetime.strptime (str(start), "%Y-%m-%d")
    end   = datetime.strptime (str(end), "%Y-%m-%d")

    date_0 = start - (end - start)
    date_1 = start
    date_2 = end

    return (date_0, date_1, date_2)



def compute_growth(corpus_id, groupList_id, mapList_id, start, end):
    """
    compute_graph :: Int -> UTCTime -> UTCTime -> Int -> Int 
                   -> [(Int, Numeric)]
    
    this function uses SQL function in 
    /srv/gargantext/install/gargamelle/sqlFunctions.sql

    First compute occurrences of ngrams in mapList (with groups) on the first
    period, then on the second and finally returns growth.

    Directly computed with Postgres Database (C) for optimization.
    """
    connection = get_engine()
    
    (date_0, date_1, date_2) = timeframes(start, end)
    
    query = """SELECT * FROM OCC_HIST( {corpus_id}
                                     , {groupList_id}
                                     , {mapList_id}
                                     , '{date_0}'
                                     , '{date_1}'
                                     , '{date_2}'
                                     )
            """.format( corpus_id    = corpus_id
                      , groupList_id = groupList_id
                      , mapList_id   = mapList_id
                      , date_0       = date_0
                      , date_1       = date_1
                      , date_2       = date_2
                      )
    return(connection.execute(query))