Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
humanities
gargantext
Commits
04afe46a
Commit
04afe46a
authored
Apr 04, 2017
by
delanoe
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'testing' into testing-graph-public
parents
39ab3eaf
c3a055a7
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
157 additions
and
18 deletions
+157
-18
db.py
gargantext/util/db.py
+1
-0
ngram_coocs.py
gargantext/util/toolchain/ngram_coocs.py
+3
-8
graph.py
graph/graph.py
+18
-9
growth.py
graph/growth.py
+61
-0
utils.py
graph/utils.py
+2
-0
psqlFunctions.sql
install/gargamelle/psqlFunctions.sql
+71
-0
menu.html
templates/pages/menu.html
+1
-1
No files found.
gargantext/util/db.py
View file @
04afe46a
...
@@ -173,3 +173,4 @@ def bulk_insert_ifnotexists(model, uniquekey, fields, data, cursor=None, do_stat
...
@@ -173,3 +173,4 @@ def bulk_insert_ifnotexists(model, uniquekey, fields, data, cursor=None, do_stat
cursor
.
close
()
cursor
.
close
()
gargantext/util/toolchain/ngram_coocs.py
View file @
04afe46a
...
@@ -3,9 +3,9 @@ COOCS
...
@@ -3,9 +3,9 @@ COOCS
(this is the full SQL version, should be more reliable on outerjoin)
(this is the full SQL version, should be more reliable on outerjoin)
"""
"""
from
gargantext
import
settings
from
gargantext
import
settings
from
sqlalchemy
import
create_engine
,
exc
from
sqlalchemy
import
exc
from
gargantext.util.lists
import
WeightedMatrix
from
gargantext.util.lists
import
WeightedMatrix
# from gargantext.util.db import session, aliased, func
from
gargantext.util.db
import
get_engine
from
gargantext.util.db_cache
import
cache
from
gargantext.util.db_cache
import
cache
from
gargantext.constants
import
DEFAULT_COOC_THRESHOLD
,
NODETYPES
from
gargantext.constants
import
DEFAULT_COOC_THRESHOLD
,
NODETYPES
from
gargantext.constants
import
INDEXED_HYPERDATA
from
gargantext.constants
import
INDEXED_HYPERDATA
...
@@ -64,12 +64,7 @@ def compute_coocs( corpus,
...
@@ -64,12 +64,7 @@ def compute_coocs( corpus,
"""
"""
# 1) prepare direct connection to the DB
# 1) prepare direct connection to the DB
url
=
'postgresql+psycopg2://{USER}:{PASSWORD}@{HOST}:{PORT}/{NAME}'
.
format
(
connection
=
get_engine
()
.
connect
()
**
settings
.
DATABASES
[
'default'
]
)
engine
=
create_engine
(
url
)
connection
=
engine
.
connect
()
# string vars for our SQL query
# string vars for our SQL query
# setting work memory high to improve cache perf.
# setting work memory high to improve cache perf.
...
...
graph/graph.py
View file @
04afe46a
...
@@ -8,6 +8,7 @@ from graph.cooccurrences import countCooccurrences
...
@@ -8,6 +8,7 @@ from graph.cooccurrences import countCooccurrences
from
graph.distances
import
clusterByDistances
from
graph.distances
import
clusterByDistances
from
graph.bridgeness
import
filterByBridgeness
from
graph.bridgeness
import
filterByBridgeness
from
graph.mail_notification
import
notify_owner
from
graph.mail_notification
import
notify_owner
from
graph.growth
import
compute_growth
from
gargantext.util.scheduling
import
scheduled
from
gargantext.util.scheduling
import
scheduled
from
gargantext.constants
import
graph_constraints
from
gargantext.constants
import
graph_constraints
...
@@ -64,7 +65,15 @@ def compute_graph( corpus_id=None , cooc_id=None
...
@@ -64,7 +65,15 @@ def compute_graph( corpus_id=None , cooc_id=None
print
(
"GRAPH #
%
d ... Filtering by bridgeness
%
d."
%
(
cooc_id
,
bridgeness
))
print
(
"GRAPH #
%
d ... Filtering by bridgeness
%
d."
%
(
cooc_id
,
bridgeness
))
data
=
filterByBridgeness
(
G
,
partition
,
ids
,
weight
,
bridgeness
,
"node_link"
,
field1
,
field2
)
data
=
filterByBridgeness
(
G
,
partition
,
ids
,
weight
,
bridgeness
,
"node_link"
,
field1
,
field2
)
if
start
is
not
None
and
end
is
not
None
:
growth
=
dict
()
for
(
ng_id
,
score
)
in
compute_growth
(
corpus_id
,
groupList_id
,
mapList_id
,
start
,
end
):
growth
[
ng_id
]
=
float
(
score
)
+
100
# for the normalization, should not be negativ
for
node
in
data
[
'nodes'
]:
node
[
'attributes'
][
'growth'
]
=
growth
[
node
[
'id'
]]
print
(
"GRAPH #
%
d ... Saving Graph in hyperdata as json."
%
cooc_id
)
print
(
"GRAPH #
%
d ... Saving Graph in hyperdata as json."
%
cooc_id
)
node
=
session
.
query
(
Node
)
.
filter
(
Node
.
id
==
cooc_id
)
.
first
()
node
=
session
.
query
(
Node
)
.
filter
(
Node
.
id
==
cooc_id
)
.
first
()
...
@@ -187,7 +196,7 @@ def get_graph( request=None , corpus=None
...
@@ -187,7 +196,7 @@ def get_graph( request=None , corpus=None
)
)
.
filter
(
Start
.
key
==
'publication_date'
)
.
filter
(
Start
.
key
==
'publication_date'
)
.
filter
(
Start
.
value_utc
>=
date_start_utc
)
.
filter
(
Start
.
value_utc
>=
date_start_utc
)
)
)
# Filter corpus by date if any end date
# Filter corpus by date if any end date
...
@@ -203,8 +212,7 @@ def get_graph( request=None , corpus=None
...
@@ -203,8 +212,7 @@ def get_graph( request=None , corpus=None
)
)
.
filter
(
End
.
key
==
'publication_date'
)
.
filter
(
End
.
key
==
'publication_date'
)
.
filter
(
End
.
value_utc
<=
date_end_utc
)
.
filter
(
End
.
value_utc
<=
date_end_utc
)
)
)
# Finally test if the size of the corpora is big enough
# Finally test if the size of the corpora is big enough
# --------------------------------
# --------------------------------
...
@@ -221,10 +229,11 @@ def get_graph( request=None , corpus=None
...
@@ -221,10 +229,11 @@ def get_graph( request=None , corpus=None
#, limit=size
#, limit=size
)
)
return
{
"state"
:
"saveOnly"
,
return
{
"state"
:
"saveOnly"
"target_id"
:
cooc_id
,
,
"target_id"
:
cooc_id
"target_name"
:
cooc_name
,
,
"target_name"
:
cooc_name
"target_date"
:
cooc_date
}
,
"target_date"
:
cooc_date
}
elif
corpus_size
>
graph_constraints
[
'corpusMax'
]:
elif
corpus_size
>
graph_constraints
[
'corpusMax'
]:
# Then compute cooc asynchronously with celery
# Then compute cooc asynchronously with celery
...
@@ -262,5 +271,5 @@ def get_graph( request=None , corpus=None
...
@@ -262,5 +271,5 @@ def get_graph( request=None , corpus=None
if
len
(
data
)
==
0
:
if
len
(
data
)
==
0
:
print
(
"GRAPH # ... GET_GRAPH: 0 coocs in matrix"
)
print
(
"GRAPH # ... GET_GRAPH: 0 coocs in matrix"
)
data
=
{
'nodes'
:[],
'links'
:[]}
# empty data
data
=
{
'nodes'
:[],
'links'
:[]}
# empty data
return
data
return
data
graph/growth.py
0 → 100644
View file @
04afe46a
"""
Computes ngram growth on periods
"""
from
gargantext.models
import
Node
,
NodeNgram
,
NodeNodeNgram
,
NodeNgramNgram
from
gargantext.util.db_cache
import
cache
from
gargantext.util.db
import
session
,
bulk_insert
,
aliased
,
\
func
,
get_engine
# = sqlalchemy.func like sum() or count()
from
datetime
import
datetime
def
timeframes
(
start
,
end
):
"""
timeframes :: String -> String -> (UTCTime, UTCTime, UTCTime)
"""
start
=
datetime
.
strptime
(
str
(
start
),
"
%
Y-
%
m-
%
d"
)
end
=
datetime
.
strptime
(
str
(
end
),
"
%
Y-
%
m-
%
d"
)
date_0
=
start
-
(
end
-
start
)
date_1
=
start
date_2
=
end
return
(
date_0
,
date_1
,
date_2
)
def
compute_growth
(
corpus_id
,
groupList_id
,
mapList_id
,
start
,
end
):
"""
compute_graph :: Int -> UTCTime -> UTCTime -> Int -> Int
-> [(Int, Numeric)]
this function uses SQL function in
/srv/gargantext/install/gargamelle/sqlFunctions.sql
First compute occurrences of ngrams in mapList (with groups) on the first
period, then on the second and finally returns growth.
Directly computed with Postgres Database (C) for optimization.
"""
connection
=
get_engine
()
(
date_0
,
date_1
,
date_2
)
=
timeframes
(
start
,
end
)
query
=
"""SELECT * FROM OCC_HIST( {corpus_id}
, {groupList_id}
, {mapList_id}
, '{date_0}'
, '{date_1}'
, '{date_2}'
)
"""
.
format
(
corpus_id
=
corpus_id
,
groupList_id
=
groupList_id
,
mapList_id
=
mapList_id
,
date_0
=
date_0
,
date_1
=
date_1
,
date_2
=
date_2
)
return
(
connection
.
execute
(
query
))
graph/utils.py
View file @
04afe46a
...
@@ -19,6 +19,8 @@ def compress_graph(graphdata):
...
@@ -19,6 +19,8 @@ def compress_graph(graphdata):
for
node
in
graphdata
[
'nodes'
]:
for
node
in
graphdata
[
'nodes'
]:
node
[
'lb'
]
=
node
[
'label'
]
node
[
'lb'
]
=
node
[
'label'
]
del
node
[
'label'
]
del
node
[
'label'
]
#node['attributes']['growth'] = 0.8
node
[
'at'
]
=
node
[
'attributes'
]
node
[
'at'
]
=
node
[
'attributes'
]
del
node
[
'attributes'
]
del
node
[
'attributes'
]
...
...
install/gargamelle/psqlFunctions.sql
0 → 100644
View file @
04afe46a
-- CNRS Copyrights 2017
-- See Gargantext Licence for details
-- Maintainers: team@gargantext.org
-- USAGE
-- psql gargandb < occ_growth.sql
-- OCC_HIST :: Corpus.id -> GroupList.id -> MapList.id -> Start -> EndFirst -> EndLast
-- EXEMPLE USAGE
-- SELECT * FROM OCC_HIST(182856, 183859, 183866, '1800-03-15 17:00:00+01', '2000-03-15 17:00:00+01', '2017-03-15 17:00:00+01')
-- OCC_HIST_PART :: Corpus.id -> GroupList.id -> Start -> End
DROP
FUNCTION
OCC_HIST_PART
(
integer
,
integer
,
timestamp
without
time
zone
,
timestamp
without
time
zone
);
-- DROP for tests
CREATE
OR
REPLACE
FUNCTION
OCC_HIST_PART
(
int
,
int
,
timestamp
,
timestamp
)
RETURNS
TABLE
(
ng_id
int
,
score
float8
)
AS
$$
-- EXPLAIN ANALYZE
SELECT
COALESCE
(
gr
.
ngram1_id
,
ng1
.
ngram_id
)
as
ng_id
,
SUM
(
ng1
.
weight
)
as
score
from
nodes
n
-- BEFORE
INNER
JOIN
nodes
as
n1
ON
n1
.
id
=
n
.
id
INNER
JOIN
nodes_ngrams
ng1
ON
ng1
.
node_id
=
n1
.
id
-- Limit with timestamps: ]start, end]
INNER
JOIN
nodes_hyperdata
nh1
ON
nh1
.
node_id
=
n1
.
id
AND
nh1
.
value_utc
>
$
3
AND
nh1
.
value_utc
<=
$
4
-- Group List
LEFT
JOIN
nodes_ngrams_ngrams
gr
ON
ng1
.
ngram_id
=
gr
.
ngram2_id
AND
gr
.
node_id
=
$
2
WHERE
n
.
typename
=
4
AND
n
.
parent_id
=
$
1
GROUP
BY
1
$$
LANGUAGE
SQL
;
DROP
FUNCTION
OCC_HIST
(
integer
,
integer
,
integer
,
timestamp
without
time
zone
,
timestamp
without
time
zone
,
timestamp
without
time
zone
);
-- OCC_HIST :: Corpus.id -> GroupList.id -> MapList.id -> Start -> EndFirst -> EndLast
CREATE
OR
REPLACE
FUNCTION
OCC_HIST
(
int
,
int
,
int
,
timestamp
,
timestamp
,
timestamp
)
RETURNS
TABLE
(
ng_id
int
,
score
numeric
)
AS
$$
WITH
OCC1
as
(
SELECT
*
from
OCC_HIST_PART
(
$
1
,
$
2
,
$
4
,
$
5
))
,
OCC2
as
(
SELECT
*
from
OCC_HIST_PART
(
$
1
,
$
2
,
$
5
,
$
6
))
,
GROWTH
as
(
SELECT
ml
.
ngram_id
as
ngram_id
,
COALESCE
(
OCC1
.
score
,
null
)
as
score1
,
COALESCE
(
OCC2
.
score
,
null
)
as
score2
FROM
nodes_ngrams
ml
LEFT
JOIN
OCC1
ON
OCC1
.
ng_id
=
ml
.
ngram_id
LEFT
JOIN
OCC2
ON
OCC2
.
ng_id
=
ml
.
ngram_id
WHERE
ml
.
node_id
=
$
3
ORDER
by
score2
DESC
)
SELECT
ngram_id
,
COALESCE
(
ROUND
(
CAST
((
100
*
(
score2
-
score1
)
/
COALESCE
((
score2
+
score1
),
1
))
as
numeric
),
2
),
0
)
from
GROWTH
$$
LANGUAGE
SQL
;
-- BEHAVIORAL TEST (should be equal to occ in terms table)
-- WITH OCC as (SELECT * from OCC_HIST(182856, 183859, '1800-03-15 17:00:00+01', '2300-03-15 17:00:00+01'))
-- SELECT ng_id, score from OCC
-- INNER JOIN nodes_ngrams ml on ml.ngram_id = ng_id
-- AND ml.node_id = 183866
-- ORDER BY score DESC;
templates/pages/menu.html
View file @
04afe46a
...
@@ -367,7 +367,7 @@
...
@@ -367,7 +367,7 @@
<p>
<p>
Gargantext
Gargantext
<span
class=
"glyphicon glyphicon-registration-mark"
aria-hidden=
"true"
></span>
<span
class=
"glyphicon glyphicon-registration-mark"
aria-hidden=
"true"
></span>
, version 3.0.6.
6
,
, version 3.0.6.
7
,
<a
href=
"http://www.cnrs.fr"
target=
"blank"
title=
"Institution that enables this project."
>
<a
href=
"http://www.cnrs.fr"
target=
"blank"
title=
"Institution that enables this project."
>
Copyrights
Copyrights
<span
class=
"glyphicon glyphicon-copyright-mark"
aria-hidden=
"true"
></span>
<span
class=
"glyphicon glyphicon-copyright-mark"
aria-hidden=
"true"
></span>
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment