Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
humanities
gargantext
Commits
8a5dcf17
Commit
8a5dcf17
authored
8 years ago
by
delanoe
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[FEAT] Nodes of graphs need some label but clustering is ok (need to be done on map list only)
parent
05f9197a
Changes
4
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
725 additions
and
9 deletions
+725
-9
cooccurrences.py
graphExplorer/cooccurrences.py
+214
-0
rest.py
graphExplorer/rest.py
+7
-7
explorer.html
templates/graphExplorer/explorer.html
+502
-0
menu.html
templates/pages/menu.html
+2
-2
No files found.
graphExplorer/cooccurrences.py
0 → 100644
View file @
8a5dcf17
from
gargantext.models
import
Node
,
Ngram
,
NodeNgram
,
NodeNgramNgram
,
\
NodeHyperdata
from
gargantext.util.db
import
session
,
aliased
,
bulk_insert
,
func
from
gargantext.util.lists
import
WeightedMatrix
,
UnweightedList
,
Translations
from
sqlalchemy
import
desc
,
asc
,
or_
,
and_
#import inspect
import
datetime
def
do_cooc
(
corpus
=
None
,
field1
=
'ngrams'
,
field2
=
'ngrams'
,
mainList_id
=
None
,
stopList_id
=
None
,
groupList_id
=
None
,
coocNode_id
=
None
,
cvalue_id
=
None
,
n_min
=
1
,
n_max
=
None
,
start
=
None
,
end
=
None
,
limit
=
1000
,
isMonopartite
=
True
,
threshold
=
3
):
'''
Compute the cooccurence matrix and save it, returning NodeNgramNgram.node_id
For the moment list of paramters are not supported because, lists need to
be merged before.
corpus :: Corpus
cvalue_id :: Int
mainList_id :: Int
stopList_id :: Int
groupList_id :: Int
For the moment, start and end are simple, only year is implemented yet
start :: TimeStamp -- example: '2010-05-30 02:00:00+02'
end :: TimeStamp
limit :: Int
'''
# TODO : add hyperdata here
# Security test
field1
,
field2
=
str
(
field1
),
str
(
field2
)
# Get node
if
not
coocNode_id
:
coocNode_id0
=
(
session
.
query
(
Node
.
id
)
.
filter
(
Node
.
typename
==
"COOCCURRENCES"
,
Node
.
name
==
"GRAPH EXPLORER"
,
Node
.
parent_id
==
corpus
.
id
)
.
first
())
if
not
coocNode_id
:
coocNode
=
corpus
.
add_child
(
typename
=
"COOCCURRENCES"
,
name
=
"GRAPH EXPLORER COOC (in:
%
s)"
%
corpus
.
id
)
session
.
add
(
coocNode
)
session
.
commit
()
coocNode_id
=
coocNode
.
id
else
:
coocNode_id
=
coocNode_id
[
0
]
# node_cooc = get_or_create_node(nodetype='Cooccurrence', corpus=corpus
# , name_str="Cooccurrences corpus " \
# + str(corpus.id) + "list_id: " + str(mainList_id)
# #, hyperdata={'field1': field1, 'field2':field2}
# , session=session)
# BEGIN
# Saving the parameters of the analysis in the Node JSONB hyperdata field
# ok args, _, _, parameters = inspect.getargvalues(inspect.currentframe())
# hyperdata = dict()
#
# for parameter in parameters.keys():
# if parameter != 'corpus' and parameter != 'node_cooc':
# hyperdata[parameter] = parameters[parameter]
#
# node_cooc.hyperdata = hyperdata
#
# For tests only
session
.
query
(
NodeNgramNgram
)
.
filter
(
NodeNgramNgram
.
node_id
==
coocNode_id
)
.
delete
()
session
.
commit
()
NodeNgramX
=
aliased
(
NodeNgram
)
cooc_score
=
func
.
count
(
NodeNgramX
.
node_id
)
.
label
(
'cooc_score'
)
#cooc_score = func.sqrt(func.sum(NodeNgramX.weight * NodeNgramY.weight)).label('cooc_score')
#print([n for n in test_query])
if
isMonopartite
:
NodeNgramY
=
aliased
(
NodeNgram
)
cooc_query
=
(
session
.
query
(
NodeNgramX
.
ngram_id
,
NodeNgramY
.
ngram_id
,
cooc_score
)
.
join
(
Node
,
Node
.
id
==
NodeNgramX
.
node_id
)
.
join
(
NodeNgramY
,
NodeNgramY
.
node_id
==
Node
.
id
)
.
filter
(
Node
.
parent_id
==
corpus
.
id
,
Node
.
typename
==
"DOCUMENT"
)
)
else
:
NodeNgramY
=
aliased
(
NodeNgram
)
cooc_query
=
(
session
.
query
(
NodeHyperdataNgram
.
ngram_id
,
NodeNgramY
.
ngram_id
,
cooc_score
)
.
join
(
Node
,
Node
.
id
==
NodeHyperdataNgram
.
node_id
)
.
join
(
NodeNgramY
,
NodeNgramY
.
node_id
==
Node
.
id
)
.
join
(
Hyperdata
,
Hyperdata
.
id
==
NodeHyperdataNgram
.
hyperdata_id
)
.
filter
(
Node
.
parent_id
==
corpus
.
id
,
Node
.
typename
==
"DOCUMENT"
)
.
filter
(
Hyperdata
.
name
==
field1
)
)
#print(cooc_query)
# Size of the ngrams between n_min and n_max
if
n_min
is
not
None
or
n_max
is
not
None
:
if
isMonopartite
:
NgramX
=
aliased
(
Ngram
)
cooc_query
=
cooc_query
.
join
(
NgramX
,
NgramX
.
id
==
NodeNgramX
.
ngram_id
)
NgramY
=
aliased
(
Ngram
)
cooc_query
=
(
cooc_query
.
join
(
NgramY
,
NgramY
.
id
==
NodeNgramY
.
ngram_id
)
)
if
n_min
is
not
None
:
cooc_query
=
(
cooc_query
.
filter
(
NgramY
.
n
>=
n_min
)
)
if
isMonopartite
:
cooc_query
=
cooc_query
.
filter
(
NgramX
.
n
>=
n_min
)
if
n_max
is
not
None
:
cooc_query
=
(
cooc_query
.
filter
(
NgramY
.
n
>=
n_min
)
)
if
isMonopartite
:
cooc_query
=
cooc_query
.
filter
(
NgramX
.
n
>=
n_min
)
# Cooc between the dates start and end
if
start
is
not
None
:
#date_start = datetime.datetime.strptime ("2001-2-3 10:11:12", "%Y-%m-%d %H:%M:%S")
# TODO : more complexe date format here.
date_start
=
datetime
.
datetime
.
strptime
(
str
(
start
),
"
%
Y-
%
m-
%
d"
)
date_start_utc
=
date_start
.
strftime
(
"
%
Y-
%
m-
%
d
%
H:
%
M:
%
S"
)
Start
=
aliased
(
NodeHyperdata
)
StartFormat
=
aliased
(
Hyperdata
)
cooc_query
=
(
cooc_query
.
join
(
Start
,
Start
.
node_id
==
Node
.
id
)
.
join
(
StartFormat
,
StartFormat
.
id
==
Start
.
hyperdata_id
)
.
filter
(
StartFormat
.
name
==
'publication_date'
)
.
filter
(
Start
.
value_datetime
>=
date_start_utc
)
)
if
end
is
not
None
:
# TODO : more complexe date format here.
date_end
=
datetime
.
datetime
.
strptime
(
str
(
end
),
"
%
Y-
%
m-
%
d"
)
date_end_utc
=
date_end
.
strftime
(
"
%
Y-
%
m-
%
d
%
H:
%
M:
%
S"
)
End
=
aliased
(
NodeHyperdata
)
EndFormat
=
aliased
(
Hyperdata
)
cooc_query
=
(
cooc_query
.
join
(
End
,
End
.
node_id
==
Node
.
id
)
.
join
(
EndFormat
,
EndFormat
.
id
==
End
.
hyperdata_id
)
.
filter
(
EndFormat
.
name
==
'publication_date'
)
.
filter
(
End
.
value_datetime
<=
date_end_utc
)
)
if
isMonopartite
:
# Cooc is symetric, take only the main cooccurrences and cut at the limit
cooc_query
=
cooc_query
.
filter
(
NodeNgramX
.
ngram_id
<
NodeNgramY
.
ngram_id
)
cooc_query
=
cooc_query
.
having
(
cooc_score
>
threshold
)
if
isMonopartite
:
cooc_query
=
cooc_query
.
group_by
(
NodeNgramX
.
ngram_id
,
NodeNgramY
.
ngram_id
)
else
:
cooc_query
=
cooc_query
.
group_by
(
NodeHyperdataNgram
.
ngram_id
,
NodeNgramY
.
ngram_id
)
cooc_query
=
cooc_query
.
order_by
(
desc
(
'cooc_score'
))
# END of the query
matrix
=
WeightedMatrix
(
cooc_query
)
#print(matrix)
# Select according some scores
if
cvalue_id
is
not
None
:
#miam = get_or_create_node(nodetype='Cvalue', corpus=corpus)
cvalue_list
=
UnweightedList
(
session
.
query
(
NodeNodeNgram
.
ngram_id
)
.
filter
(
NodeNodeNgram
.
nodex_id
==
cvalue_id
)
.
all
()
)
if
isMonopartite
:
if
mainList_id
is
not
None
:
miam_list
=
UnweightedList
(
mainList_id
)
if
stopList_id
is
not
None
:
stop_list
=
UnweightedList
(
stopList_id
)
if
groupList_id
is
not
None
:
group_list
=
Translations
(
groupList_id
)
if
mainList_id
is
not
None
and
stopList_id
is
None
and
groupList_id
is
None
:
cooc
=
matrix
&
miam_list
elif
mainList_id
is
not
None
and
stopList_id
is
not
None
and
groupList_id
is
None
:
cooc
=
matrix
&
(
miam_list
-
stop_list
)
elif
mainList_id
is
not
None
and
stopList_id
is
not
None
and
groupList_id
is
not
None
:
print
(
"mainList_id is not None and stopList_id is not None and groupList_id is not None"
)
cooc
=
matrix
&
(
miam_list
*
group_list
-
stop_list
)
#cooc = matrix & (miam_list - stop_list)
elif
mainList_id
is
not
None
and
stopList_id
is
None
and
groupList_id
is
not
None
:
cooc
=
matrix
&
(
miam_list
*
group_list
)
else
:
cooc
=
matrix
else
:
cooc
=
matrix
cooc
.
save
(
coocNode_id
)
return
(
coocNode_id
)
This diff is collapsed.
Click to expand it.
graphExplorer/rest.py
View file @
8a5dcf17
...
...
@@ -19,17 +19,17 @@ class Graph(APIView):
'''
# implicit global session
field1
=
request
.
GET
.
get
(
'field1'
,
'ngrams'
)
field2
=
request
.
GET
.
get
(
'field2'
,
'ngrams'
)
field1
=
str
(
request
.
GET
.
get
(
'field1'
,
'ngrams'
)
)
field2
=
str
(
request
.
GET
.
get
(
'field2'
,
'ngrams'
)
)
start
=
request
.
GET
.
get
(
'start'
,
None
)
end
=
request
.
GET
.
get
(
'end'
,
None
)
threshold
=
request
.
GET
.
get
(
'threshold'
,
1
)
bridgeness
=
request
.
GET
.
get
(
'bridgeness'
,
-
1
)
format_
=
request
.
GET
.
get
(
'format'
,
'json'
)
type_
=
request
.
GET
.
get
(
'type'
,
'node_link'
)
distance
=
request
.
GET
.
get
(
'distance'
,
'conditional'
)
threshold
=
int
(
request
.
GET
.
get
(
'threshold'
,
1
)
)
bridgeness
=
int
(
request
.
GET
.
get
(
'bridgeness'
,
-
1
)
)
format_
=
str
(
request
.
GET
.
get
(
'format'
,
'json'
)
)
type_
=
str
(
request
.
GET
.
get
(
'type'
,
'node_link'
)
)
distance
=
str
(
request
.
GET
.
get
(
'distance'
,
'conditional'
)
)
corpus
=
session
.
query
(
Node
)
.
filter
(
Node
.
id
==
corpus_id
)
.
first
()
...
...
This diff is collapsed.
Click to expand it.
templates/graphExplorer/explorer.html
0 → 100644
View file @
8a5dcf17
This diff is collapsed.
Click to expand it.
templates/pages/menu.html
View file @
8a5dcf17
...
...
@@ -94,10 +94,10 @@
<!-- FIXME a pop up for advanced mode of graphs --!>
<a type="button" class="btn btn-default
{% if view == "conditional" %}active{%endif%}"
href="/projects/{{project.id}}/corpora/{{ corpus.id }}/
graph
">Graphs (Conditional)</a>
href="/projects/{{project.id}}/corpora/{{ corpus.id }}/
explorer?field1=ngrams&field2=ngrams&distance=conditional&bridgeness=5
">Graphs (Conditional)</a>
<a type="button" class="btn btn-default
{% if view == "distributional" %}active{%endif%}"
href="/projects/{{project.id}}/corpora/{{ corpus.id }}/
graph
">Graphs (Distributional)</a>
href="/projects/{{project.id}}/corpora/{{ corpus.id }}/
explorer?field1=ngrams&field2=ngrams&distance=distributional&bridgeness=5
">Graphs (Distributional)</a>
<!--
<a type="button" class="btn btn-default
{% if view == "journalTerms" %}active{%endif%}"
...
...
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment