Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
humanities
gargantext
Commits
a40f95bb
Commit
a40f95bb
authored
Mar 30, 2016
by
delanoe
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[FEAT GRAPH] Commit before factoring.
parent
0564f787
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
14 additions
and
202 deletions
+14
-202
distances.py
graphExplorer/distances.py
+3
-1
functions.py
graphExplorer/functions.py
+9
-199
rest.py
graphExplorer/rest.py
+2
-2
No files found.
graphExplorer/distances.py
View file @
a40f95bb
...
...
@@ -14,7 +14,9 @@ import numpy as np
import
pandas
as
pd
import
networkx
as
nx
def
do_distance
(
cooc_id
,
field1
=
None
,
field2
=
None
,
isMonopartite
=
True
,
distance
=
'conditional'
):
def
do_distance
(
cooc_id
,
field1
=
None
,
field2
=
None
,
isMonopartite
=
True
,
distance
=
'conditional'
):
'''
do_distance :: Int -> (Graph, Partition, {ids}, {weight})
'''
...
...
graphExplorer/functions.py
View file @
a40f95bb
...
...
@@ -4,7 +4,7 @@ from gargantext.util.http import JsonHttpResponse
from
gargantext.models
import
Node
,
Ngram
,
NodeNgram
,
NodeNgramNgram
#from gargantext.util.toolchain.ngram_coocs import compute_coocs
from
graphExplorer.distance
import
do_distance
from
graphExplorer.distance
s
import
do_distance
from
graphExplorer.cooccurrences
import
do_cooc
# Prelude lib
...
...
@@ -14,10 +14,10 @@ from sqlalchemy.orm import aliased
# Math/Graph lib
import
math
import
pandas
as
pd
import
numpy
as
np
import
pandas
as
pd
import
numpy
as
np
import
networkx
as
nx
import
networkx
as
nx
from
networkx.readwrite
import
json_graph
...
...
@@ -27,6 +27,7 @@ def get_cooc( request=None, corpus=None
,
start
=
None
,
end
=
None
,
threshold
=
1
,
distance
=
'conditional'
,
isMonopartite
=
True
# By default, we compute terms/terms graph
,
size
=
1000
,
bridgeness
=
5
,
mapList_id
=
None
,
groupList_id
=
None
...
...
@@ -35,8 +36,6 @@ def get_cooc( request=None, corpus=None
get_ccoc : to compute the graph.
'''
data
=
{}
if
mapList_id
==
None
:
mapList_id
=
(
session
.
query
(
Node
.
id
)
...
...
@@ -56,24 +55,11 @@ def get_cooc( request=None, corpus=None
)
.
first
()
)
if
groupList_id
==
None
:
raise
ValueError
(
"GROUPLIST node needed for cooccurrences"
)
# compute_cooc needs group, fields etc.
# group_id = 3
SamuelFlag
=
False
# if field1 == field2 == 'ngrams' :
# isMonopartite = True
# SamuelFlag = True
# else:
# isMonopartite = False
isMonopartite
=
True
# Always. So, calcule the graph B and from these B-nodes, build the graph-A
# data deleted each time
#cooc_id = get_or_create_node(nodetype='Cooccurrence', corpus=corpus).id
if
corpus
is
None
:
corpus
=
session
.
query
(
Node
)
.
filter
(
Node
.
id
==
corpus_id
)
.
first
()
...
...
@@ -90,6 +76,9 @@ def get_cooc( request=None, corpus=None
,
isMonopartite
=
True
,
distance
=
distance
)
# Data are stored in a dict(), (== hashmap by default for Python)
data
=
dict
()
if
type
==
"node_link"
:
nodesB_dict
=
{}
for
node_id
in
G
.
nodes
():
...
...
@@ -192,183 +181,4 @@ def get_cooc( request=None, corpus=None
return
(
data
)
def
get_graphA
(
nodeA_type
,
NodesB
,
links
,
corpus
):
from
analysis.InterUnion
import
Utils
print
(
" = = = == = = = "
)
print
(
"In get_graphA(), corpus id:"
,
corpus
.
id
)
nodeA_type_id
=
cache
.
Hyperdata
[
nodeA_type
]
.
id
threshold_cotainf
=
0.02
max_nodeid
=
-
1
for
nodeid
in
NodesB
:
if
nodeid
>
max_nodeid
:
max_nodeid
=
nodeid
# = = = = [ 01. Getting ALL documents of the Corpus c ] = = = = #
Docs
=
{}
document_type_id
=
cache
.
NodeType
[
'Document'
]
.
id
sql_query
=
'select id from node_node where parent_id='
+
str
(
corpus
.
id
)
+
' and type_id='
+
str
(
document_type_id
)
cursor
=
connection
.
cursor
()
cursor
.
execute
(
sql_query
)
results
=
cursor
.
fetchall
()
for
i
in
results
:
Docs
[
i
[
0
]]
=
True
print
(
"docs:"
,
len
(
Docs
.
keys
()))
# = = = = [ / 01. Getting ALL documents of the Corpus c ] = = = = #
# = = = = [ 02. Getting ALL Documents related with Ngrams of the carte semantic ] = = = = #
sql_query
=
'select nodey_id,ngram_id from node_nodenodengram where ngram_id IN ('
+
','
.
join
(
map
(
str
,
NodesB
.
keys
()))
+
")"
cursor
=
connection
.
cursor
()
cursor
.
execute
(
sql_query
)
results
=
cursor
.
fetchall
()
# = = = = [ / 02. Getting ALL Documents related with Ngrams of the carte semantic ] = = = = #
# = = = = [ 03. Now we limit the retrieved Documents(step 02) to those belonging to the Corpus c ] = = = = ]
Docs_and_
=
{
"nodesA"
:{},
"nodesB"
:{}
}
NodesB_and_Docs
=
{}
for
i
in
results
:
doc_id
=
i
[
0
]
ngram_id
=
i
[
1
]
if
ngram_id
in
NodesB
and
doc_id
in
Docs
:
if
doc_id
not
in
Docs_and_
[
"nodesB"
]:
Docs_and_
[
"nodesB"
][
doc_id
]
=
[]
Docs_and_
[
"nodesB"
][
doc_id
]
.
append
(
ngram_id
)
if
ngram_id
not
in
NodesB_and_Docs
:
NodesB_and_Docs
[
ngram_id
]
=
[]
NodesB_and_Docs
[
ngram_id
]
.
append
(
doc_id
)
# = = = = [ / 03. Now we limit the retrieved Documents(step 02) to those belonging to the Corpus c ] = = = = ]
# # = = = = [ Getting Authors ] = = = = ]
# Authors = {}
# sql_query = 'select node_id,value_string from node_node_hyperdata where node_id IN (' + ','.join(map(str, Docs_and_["nodesB"].keys())) + ")"+' and hyperdata_id=10'# 10 -> authors
# cursor = connection.cursor()
# cursor.execute(sql_query)
# results = cursor.fetchall()
# for i in results:
# doc_id = i[0]
# authors = i[1].split(",")
# for a in authors:
# if a not in Authors:
# Authors[a] = 0
# Authors[a] += 1
# print("")
# print("#authors:")
# import pprint
# pprint.pprint(Authors)
# print("")
# # = = = = [ / Getting Authors ] = = = = ]
# = = = = [ 04. Getting A-elems and making the dictionaries] = = = = ]
sql_query
=
'select node_id,value_string from node_node_hyperdata where node_id IN ('
+
\
','
.
join
(
map
(
str
,
Docs_and_
[
"nodesB"
]
.
keys
()))
+
")"
+
' and hyperdata_id='
+
str
(
nodeA_type_id
)
cursor
=
connection
.
cursor
()
cursor
.
execute
(
sql_query
)
results
=
cursor
.
fetchall
()
A_Freq
=
{}
A_int2str
=
{}
A_str2int
=
{}
counter
=
max_nodeid
+
1
for
i
in
results
:
doc_id
=
i
[
0
]
a
=
i
[
1
]
if
a
not
in
A_str2int
:
A_str2int
[
a
]
=
counter
A_int2str
[
counter
]
=
a
counter
+=
1
for
i
in
results
:
doc_id
=
i
[
0
]
a
=
A_str2int
[
i
[
1
]]
Docs_and_
[
"nodesA"
][
doc_id
]
=
a
if
a
not
in
A_Freq
:
A_Freq
[
a
]
=
0
A_Freq
[
a
]
+=
1
# = = = = [ / 04. Getting A-elems and making the dictionaries ] = = = = ]
# = = = = [ Filling graph-A ] = = = = ]
Graph_A
=
Utils
()
for
i
in
NodesB_and_Docs
:
ngram
=
i
docs
=
NodesB_and_Docs
[
i
]
k_A_clique
=
{}
for
doc
in
docs
:
k_A
=
Docs_and_
[
"nodesA"
][
doc
]
k_A_clique
[
k_A
]
=
True
if
len
(
k_A_clique
.
keys
())
>
1
:
Graph_A
.
addCompleteSubGraph
(
k_A_clique
.
keys
()
)
# = = = = [ / Filling graph-A ] = = = = ]
# = = = = [ graph-A to JSON ] = = = = ]
A
=
Graph_A
.
G
for
node_id
in
A
.
nodes
():
A
.
node
[
node_id
][
'label'
]
=
A_int2str
[
node_id
]
A
.
node
[
node_id
][
'size'
]
=
A_Freq
[
node_id
]
A
.
node
[
node_id
][
'type'
]
=
nodeA_type
A
.
node
[
node_id
][
'attributes'
]
=
{
"clust_default"
:
1
}
A_links
=
[]
min_weight
=
999999
max_weight
=
-
1
Weights_Dist
=
{}
for
e
in
A
.
edges_iter
():
s
=
e
[
0
]
t
=
e
[
1
]
w
=
A
[
s
][
t
][
"weight"
]
if
w
not
in
Weights_Dist
:
Weights_Dist
[
w
]
=
{
"freq"
:
0
,
"deleted"
:
0
}
Weights_Dist
[
w
][
"freq"
]
+=
1
if
min_weight
>
w
:
min_weight
=
w
if
max_weight
<
w
:
max_weight
=
w
edges2remove
=
[]
for
e
in
A
.
edges_iter
():
s
=
e
[
0
]
t
=
e
[
1
]
w
=
A
[
s
][
t
][
"weight"
]
if
Weights_Dist
[
w
][
"freq"
]
<
(
len
(
A
)
*
3
):
# weight-threshold
info
=
{
"s"
:
s
,
"t"
:
t
,
"w"
:
w
/
max_weight
# normalization
}
A_links
.
append
(
info
)
else
:
# if Weights_Dist [ w ]["deleted"] < round(Weights_Dist [ w ]["freq"]*0.95):
atuple
=
(
s
,
t
)
edges2remove
.
append
(
atuple
)
Weights_Dist
[
w
][
"deleted"
]
+=
1
A
.
remove_edges_from
(
edges2remove
)
A
.
remove_nodes_from
(
nx
.
isolates
(
A
))
data
=
json_graph
.
node_link_data
(
A
)
# saving nodesA
AB
=
nx
.
Graph
()
for
i
in
NodesB_and_Docs
:
b
=
i
docs
=
NodesB_and_Docs
[
i
]
for
doc
in
docs
:
a
=
Docs_and_
[
"nodesA"
][
doc
]
if
A
.
has_node
(
a
):
AB
.
add_edge
(
a
,
b
)
AB_links
=
[]
for
e
in
AB
.
edges_iter
():
info
=
{
"s"
:
e
[
0
],
"t"
:
e
[
1
],
"w"
:
1
}
AB_links
.
append
(
info
)
data
[
"links"
]
=
A_links
+
AB_links
# saving AA-links and AB-links
# = = = = [ / graph-A to JSON ] = = = = ]
return
data
graphExplorer/rest.py
View file @
a40f95bb
...
...
@@ -44,8 +44,8 @@ class Graph(APIView):
if
field2
in
accepted_field2
:
if
start
is
not
None
and
end
is
not
None
:
data
=
get_cooc
(
corpus
=
corpus
#, field1=field1 , field2=field2
,
start
=
start
,
end
=
end
#, field1=field1
, field2=field2
,
start
=
start
,
end
=
end
,
threshold
=
threshold
,
distance
=
distance
)
else
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment