Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
humanities
gargantext
Commits
4afb0468
Commit
4afb0468
authored
8 years ago
by
delanoe
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[FEAT] Graph explorer, options required.
parent
f6e65087
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
59 additions
and
5 deletions
+59
-5
constants.py
gargantext/constants.py
+11
-0
README.md
graph/README.md
+15
-1
graph.py
graph/graph.py
+33
-4
No files found.
gargantext/constants.py
View file @
4afb0468
...
...
@@ -293,3 +293,14 @@ RULE_NPN = "{<JJ.*>*<NN.*>+((<P|IN> <DT>? <JJ.*>* <NN.*>+ <JJ.*>*)|(<JJ.*>))*
RULE_TINA
=
"^((VBD,|VBG,|VBN,|CD.?,|JJ.?,|
\
?,){0,2}?(N.?.?,|
\
?,)+?(CD.,)??)
\
+?((PREP.?|DET.?,|IN.?,|CC.?,|
\
?,)((VBD,|VBG,|VBN,|CD.?,|JJ.?,|
\
?
\
,){0,2}?(N.?.?,|
\
?,)+?)+?)*?$"
# ------------------------------------------------------------------------------
# Graph constraints to compute the graph:
# Modes: live graph generation, graph asynchronously computed or errors detected
# here are the maximum size of corpus and maplist required to compute the graph
graph_constraints
=
{
'corpus'
:
400
,
'mapList'
:
50
}
This diff is collapsed.
Click to expand it.
graph/README.md
View file @
4afb0468
Module Graph Explorer: from text to graph.
Module Graph Explorer: from text to graph
=========================================
Maintainer: If you see bugs, please report to team@gargantext.org
0) All urls.py of the Graph Explorer
1) Main view of the graph explorer: views.py
2) Data are retrieved as REST: rest.py
3) Graph is generated (graph.py) through different steps
a) check the constraints (graph_constraints) in gargantext/constants.py
b) Cooccurences are computed (in live or asynchronously): cooccurrences.py
c) Thresold and distances : distances.py
d) clustering: louvain.py
c) links between communities: bridgeness.py
4) Additional features:
a) intersection of graphs: intersection.py
This diff is collapsed.
Click to expand it.
graph/graph.py
View file @
4afb0468
...
...
@@ -10,6 +10,7 @@ from graph.distances import clusterByDistances
from
graph.bridgeness
import
filterByBridgeness
from
gargantext.util.scheduling
import
scheduled
from
gargantext.constants
import
graph_constraints
from
datetime
import
datetime
...
...
@@ -42,15 +43,33 @@ def get_graph( request=None , corpus=None
before_cooc
=
datetime
.
now
()
if
cooc_id
==
None
:
# case of Cooccurrences have not been computed already
if
cooc_id
==
None
:
# case of mapList not big enough
# ==============================
# if we do not have any mapList_id already
if
mapList_id
is
None
:
mapList_id
=
session
.
query
(
Node
.
id
)
.
filter
(
Node
.
typename
==
"MAPLIST"
)
.
first
()[
0
]
mapList_size
=
session
.
query
(
NodeNgram
)
.
filter
(
NodeNgram
.
node_id
==
mapList_id
)
if
mapList_size
.
count
()
<
graph_constraints
[
'mapList'
]:
# Do not compute the graph if mapList is not big enough
return
{
'nodes'
:[],
'links'
:[]}
# case of corpus not big enough
# ==============================
corpus_size_query
=
(
session
.
query
(
Node
)
.
filter
(
Node
.
typename
==
"DOCUMENT"
)
.
filter
(
Node
.
parent_id
==
corpus
.
id
)
)
# filter by date if any start date
# --------------------------------
if
start
is
not
None
:
#date_start = datetime.datetime.strptime ("2001-2-3 10:11:12", "%Y-%m-%d %H:%M:%S")
date_start
=
datetime
.
strptime
(
str
(
start
),
"
%
Y-
%
m-
%
d"
)
...
...
@@ -65,6 +84,8 @@ def get_graph( request=None , corpus=None
)
# filter by date if any end date
# --------------------------------
if
end
is
not
None
:
date_end
=
datetime
.
strptime
(
str
(
end
),
"
%
Y-
%
m-
%
d"
)
date_end_utc
=
date_end
.
strftime
(
"
%
Y-
%
m-
%
d
%
H:
%
M:
%
S"
)
...
...
@@ -77,8 +98,13 @@ def get_graph( request=None , corpus=None
.
filter
(
End
.
key
==
'publication_date'
)
.
filter
(
End
.
value_utc
<=
date_end_utc
)
)
if
corpus_size_query
.
count
()
>
400
:
# Finally test if the size of the corpora is big enough
# --------------------------------
if
corpus_size_query
.
count
()
>
graph_constraints
[
'corpus'
]:
# Then compute cooc asynchronously with celery
scheduled
(
countCooccurrences
)(
corpus_id
=
corpus
.
id
#, field1="ngrams", field2="ngrams"
,
start
=
start
,
end
=
end
...
...
@@ -87,9 +113,12 @@ def get_graph( request=None , corpus=None
,
save_on_db
=
True
#, limit=size
)
return
{
'nodes'
:[],
'links'
:[
1
]}
# Dic trick to inform user that graph is computed asynchronously
# Dic hack to inform user that graph is computed asynchronously
# (Impossible graph: no nodes with one link)
return
{
'nodes'
:[],
'links'
:[
1
]}
else
:
# If graph_constraints are ok then compute the graph in live
cooc_matrix
=
countCooccurrences
(
corpus_id
=
corpus
.
id
#, field1="ngrams", field2="ngrams"
,
start
=
start
,
end
=
end
...
...
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment