1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
"""
Computes ngram growth on periods
"""
from gargantext.models import Node, NodeNgram, NodeNodeNgram, NodeNgramNgram
from gargantext.util.db_cache import cache
from gargantext.util.db import session, bulk_insert, aliased, \
func, get_engine # = sqlalchemy.func like sum() or count()
from datetime import datetime
def timeframes(start, end):
"""
timeframes :: String -> String -> (UTCTime, UTCTime, UTCTime)
"""
start = datetime.strptime (str(start), "%Y-%m-%d")
end = datetime.strptime (str(end), "%Y-%m-%d")
date_0 = start - (end - start)
date_1 = start
date_2 = end
return (date_0, date_1, date_2)
def compute_growth(corpus_id, groupList_id, mapList_id, start, end):
"""
compute_graph :: Int -> UTCTime -> UTCTime -> Int -> Int
-> [(Int, Numeric)]
this function uses SQL function in
/srv/gargantext/install/gargamelle/sqlFunctions.sql
First compute occurrences of ngrams in mapList (with groups) on the first
period, then on the second and finally returns growth.
Directly computed with Postgres Database (C) for optimization.
"""
connection = get_engine()
(date_0, date_1, date_2) = timeframes(start, end)
query = """SELECT * FROM OCC_HIST( {corpus_id}
, {groupList_id}
, {mapList_id}
, '{date_0}'
, '{date_1}'
, '{date_2}'
)
""".format( corpus_id = corpus_id
, groupList_id = groupList_id
, mapList_id = mapList_id
, date_0 = date_0
, date_1 = date_1
, date_2 = date_2
)
return(connection.execute(query))