Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
humanities
gargantext
Commits
d071736a
Commit
d071736a
authored
Jul 18, 2016
by
delanoe
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[FEAT] Async graph working (needs to know the parameters in myGraph + need refact)
parent
3ce5f459
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
69 additions
and
18 deletions
+69
-18
cooccurrences.py
graph/cooccurrences.py
+7
-1
graph.py
graph/graph.py
+46
-14
rest.py
graph/rest.py
+16
-3
No files found.
graph/cooccurrences.py
View file @
d071736a
...
...
@@ -9,6 +9,7 @@ from sqlalchemy import desc, asc, or_, and_
#import inspect
import
datetime
from
celery
import
shared_task
def
filterMatrix
(
matrix
,
mapList_id
,
groupList_id
):
mapList
=
UnweightedList
(
mapList_id
)
...
...
@@ -17,7 +18,8 @@ def filterMatrix(matrix, mapList_id, groupList_id):
return
cooc
def
countCooccurrences
(
corpus
=
None
,
test
=
False
@
shared_task
def
countCooccurrences
(
corpus_id
=
None
,
test
=
False
,
field1
=
'ngrams'
,
field2
=
'ngrams'
,
start
=
None
,
end
=
None
,
mapList_id
=
None
,
groupList_id
=
None
...
...
@@ -47,8 +49,12 @@ def countCooccurrences( corpus=None , test= False
# Security test
field1
,
field2
=
str
(
field1
),
str
(
field2
)
# Get corpus as Python object
corpus
=
session
.
query
(
Node
)
.
filter
(
Node
.
id
==
corpus_id
)
.
first
()
# Get node
if
not
coocNode_id
:
coocNode_id0
=
(
session
.
query
(
Node
.
id
)
.
filter
(
Node
.
typename
==
"COOCCURRENCES"
,
Node
.
name
==
"GRAPH EXPLORER"
...
...
graph/graph.py
View file @
d071736a
# Gargantext lib
from
gargantext.util.db
import
session
from
gargantext.util.db
import
session
,
aliased
from
gargantext.util.lists
import
WeightedMatrix
,
UnweightedList
,
Translations
from
gargantext.util.http
import
JsonHttpResponse
from
gargantext.models
import
Node
,
Ngram
,
NodeNgram
,
NodeNgramNgram
from
gargantext.models
import
Node
,
Ngram
,
NodeNgram
,
NodeNgramNgram
,
NodeHyperdata
#from gargantext.util.toolchain.ngram_coocs import compute_coocs
from
graph.cooccurrences
import
countCooccurrences
,
filterMatrix
from
graph.distances
import
clusterByDistances
from
graph.bridgeness
import
filterByBridgeness
from
gargantext.util.scheduling
import
scheduled
from
datetime
import
datetime
def
get_graph
(
request
=
None
,
corpus
=
None
,
test
=
False
,
field1
=
'ngrams'
,
field2
=
'ngrams'
,
mapList_id
=
None
,
groupList_id
=
None
,
cooc_id
=
None
,
type
=
'node_link'
...
...
@@ -37,18 +40,46 @@ def get_graph( request=None , corpus=None
'''
from
datetime
import
datetime
before_cooc
=
datetime
.
now
()
# TODO change test here (always true)
# to something like "if cooc.status threshold == required_threshold
# and group.creation_time < cooc.creation_time"
# if False => read and give to clusterByDistances
# if True => compute and give to clusterByDistances <==
if
cooc_id
==
None
:
if
test
=
True
:
cooc_matrix
=
countCooccurrences
(
corpus
=
corpus
,
test
=
test
# case of Cooccurrences have not been computed already
corpus_size_query
=
(
session
.
query
(
Node
)
.
filter
(
Node
.
typename
==
"DOCUMENT"
)
.
filter
(
Node
.
parent_id
==
corpus
.
id
)
)
if
start
is
not
None
:
#date_start = datetime.datetime.strptime ("2001-2-3 10:11:12", "%Y-%m-%d %H:%M:%S")
date_start
=
datetime
.
strptime
(
str
(
start
),
"
%
Y-
%
m-
%
d"
)
date_start_utc
=
date_start
.
strftime
(
"
%
Y-
%
m-
%
d
%
H:
%
M:
%
S"
)
Start
=
aliased
(
NodeHyperdata
)
corpus_size_query
=
(
corpus_size_query
.
join
(
Start
,
Start
.
node_id
==
Node
.
id
)
.
filter
(
Start
.
key
==
'publication_date'
)
.
filter
(
Start
.
value_utc
>=
date_start_utc
)
)
if
end
is
not
None
:
date_end
=
datetime
.
strptime
(
str
(
end
),
"
%
Y-
%
m-
%
d"
)
date_end_utc
=
date_end
.
strftime
(
"
%
Y-
%
m-
%
d
%
H:
%
M:
%
S"
)
End
=
aliased
(
NodeHyperdata
)
corpus_size_query
=
(
corpus_size_query
.
join
(
End
,
End
.
node_id
==
Node
.
id
)
.
filter
(
End
.
key
==
'publication_date'
)
.
filter
(
End
.
value_utc
<=
date_end_utc
)
)
if
corpus_size_query
.
count
()
>
400
:
scheduled
(
countCooccurrences
)(
corpus_id
=
corpus
.
id
#, field1="ngrams", field2="ngrams"
,
start
=
start
,
end
=
end
,
mapList_id
=
mapList_id
,
groupList_id
=
groupList_id
...
...
@@ -56,20 +87,21 @@ def get_graph( request=None , corpus=None
,
save_on_db
=
True
#, limit=size
)
return
{
'nodes'
:[],
'links'
:[
1
]}
# Dic trick to inform user that graph is computed asynchronously
else
:
cooc_matrix
=
countCooccurrences
(
corpus
=
corpus
,
test
=
test
cooc_matrix
=
countCooccurrences
(
corpus
_id
=
corpus
.
id
#, field1="ngrams", field2="ngrams"
,
start
=
start
,
end
=
end
,
mapList_id
=
mapList_id
,
groupList_id
=
groupList_id
,
isMonopartite
=
True
,
threshold
=
threshold
,
save_on_db
=
Tru
e
,
save_on_db
=
Fals
e
#, limit=size
)
else
:
print
(
"Getting data for matrix
%
d"
,
int
(
cooc_id
))
matrix
=
WeightedMatrix
(
int
(
cooc_id
))
print
(
matrix
)
#
print(matrix)
cooc_matrix
=
filterMatrix
(
matrix
,
mapList_id
,
groupList_id
)
...
...
graph/rest.py
View file @
d071736a
...
...
@@ -139,8 +139,10 @@ class Graph(APIView):
,
distance
=
distance
,
bridgeness
=
bridgeness
)
# Test data length
if
len
(
data
[
'nodes'
])
>
0
and
len
(
data
[
'links'
])
>
0
:
if
len
(
data
[
'nodes'
])
>
1
and
len
(
data
[
'links'
])
>
1
:
# normal case --------------------------------
if
format_
==
'json'
:
return
JsonHttpResponse
(
...
...
@@ -148,6 +150,15 @@ class Graph(APIView):
status
=
200
)
# --------------------------------------------
elif
len
(
data
[
'nodes'
])
==
0
and
len
(
data
[
'links'
])
==
1
:
# async data case
return
JsonHttpResponse
({
'msg'
:
'''Async graph generation
Wait a while and discover your graph
http://
%
sgargantext.org/projects/
%
d/corpora/
%
d/myGraph
'''
%
(
"dev."
,
corpus
.
parent_id
,
corpus
.
id
),
},
status
=
400
)
else
:
# empty data case
return
JsonHttpResponse
({
...
...
@@ -155,6 +166,7 @@ class Graph(APIView):
No cooccurences found in this corpus for the words of this maplist
(maybe add more terms to the maplist?)'''
,
},
status
=
400
)
else
:
# parameters error case
return
JsonHttpResponse
({
...
...
@@ -166,7 +178,8 @@ class Graph(APIView):
},
status
=
400
)
# for any other errors that we forgot to test
except
Exception
as
e
:
except
Exception
as
error
:
print
(
error
)
return
JsonHttpResponse
({
'msg'
:
'Unknown error (showing the trace):
\n
%
s'
%
"
\n
"
.
join
(
format_tb
(
e
.
__traceback__
))
'msg'
:
'Unknown error (showing the trace):
\n
%
s'
%
"
\n
"
.
join
(
format_tb
(
e
rror
.
__traceback__
))
},
status
=
400
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment