Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
humanities
gargantext
Commits
9074c57b
Commit
9074c57b
authored
May 25, 2016
by
Romain Loth
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
FASTER git add graphExplorer/distances.py ! remove writing each new cooc node at graph open
parent
8ba25bfd
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
49 additions
and
23 deletions
+49
-23
cooccurrences.py
graphExplorer/cooccurrences.py
+9
-3
distances.py
graphExplorer/distances.py
+12
-13
graph.py
graphExplorer/graph.py
+28
-7
No files found.
graphExplorer/cooccurrences.py
View file @
9074c57b
...
@@ -15,7 +15,10 @@ def countCooccurrences( corpus=None
...
@@ -15,7 +15,10 @@ def countCooccurrences( corpus=None
,
mapList_id
=
None
,
groupList_id
=
None
,
mapList_id
=
None
,
groupList_id
=
None
,
n_min
=
1
,
n_max
=
None
,
limit
=
1000
,
n_min
=
1
,
n_max
=
None
,
limit
=
1000
,
coocNode_id
=
None
,
reset
=
True
,
coocNode_id
=
None
,
reset
=
True
,
isMonopartite
=
True
,
threshold
=
3
):
,
isMonopartite
=
True
,
threshold
=
3
,
just_pass_result
=
True
,
# just return the WeightedMatrix,
# (don't write to DB)
):
'''
'''
Compute the cooccurence matrix and save it, returning NodeNgramNgram.node_id
Compute the cooccurence matrix and save it, returning NodeNgramNgram.node_id
For the moment list of paramters are not supported because, lists need to
For the moment list of paramters are not supported because, lists need to
...
@@ -186,5 +189,8 @@ def countCooccurrences( corpus=None
...
@@ -186,5 +189,8 @@ def countCooccurrences( corpus=None
group_list
=
Translations
(
groupList_id
)
group_list
=
Translations
(
groupList_id
)
cooc
=
matrix
&
(
mapList
*
group_list
)
cooc
=
matrix
&
(
mapList
*
group_list
)
cooc
.
save
(
coocNode_id
)
if
just_pass_result
:
return
(
coocNode_id
)
return
cooc
else
:
cooc
.
save
(
coocNode_id
)
return
(
coocNode_id
)
graphExplorer/distances.py
View file @
9074c57b
...
@@ -14,11 +14,11 @@ import numpy as np
...
@@ -14,11 +14,11 @@ import numpy as np
import
pandas
as
pd
import
pandas
as
pd
import
networkx
as
nx
import
networkx
as
nx
def
clusterByDistances
(
cooc_
id
def
clusterByDistances
(
cooc_
matrix
,
field1
=
None
,
field2
=
None
,
field1
=
None
,
field2
=
None
,
distance
=
'conditional'
):
,
distance
=
'conditional'
):
'''
'''
do_distance ::
Int
-> (Graph, Partition, {ids}, {weight})
do_distance ::
Coocs[nga, ngb => ccweight]
-> (Graph, Partition, {ids}, {weight})
'''
'''
# implicit global session
# implicit global session
...
@@ -32,19 +32,19 @@ def clusterByDistances( cooc_id
...
@@ -32,19 +32,19 @@ def clusterByDistances( cooc_id
labels
=
dict
()
labels
=
dict
()
weight
=
dict
()
weight
=
dict
()
Cooc
=
aliased
(
NodeNgramNgram
)
for
cooc
in
cooc_matrix
.
items
:
ngram1_id
=
cooc
[
0
]
ngram2_id
=
cooc
[
1
]
ccweight
=
cooc_matrix
.
items
[
cooc
]
query
=
session
.
query
(
Cooc
)
.
filter
(
Cooc
.
node_id
==
cooc_id
)
.
all
()
matrix
[
ngram1_id
][
ngram2_id
]
=
ccweight
matrix
[
ngram2_id
][
ngram1_id
]
=
ccweight
for
cooc
in
query
:
ids
[
ngram1_id
]
=
(
field1
,
ngram1_id
)
matrix
[
cooc
.
ngram1_id
][
cooc
.
ngram2_id
]
=
cooc
.
weight
ids
[
ngram2_id
]
=
(
field2
,
ngram2_id
)
matrix
[
cooc
.
ngram2_id
][
cooc
.
ngram1_id
]
=
cooc
.
weight
ids
[
cooc
.
ngram1_id
]
=
(
field1
,
cooc
.
ngram1_id
)
weight
[
ngram1_id
]
=
weight
.
get
(
ngram1_id
,
0
)
+
ccweight
ids
[
cooc
.
ngram2_id
]
=
(
field2
,
cooc
.
ngram2_id
)
weight
[
ngram2_id
]
=
weight
.
get
(
ngram2_id
,
0
)
+
ccweight
weight
[
cooc
.
ngram1_id
]
=
weight
.
get
(
cooc
.
ngram1_id
,
0
)
+
cooc
.
weight
weight
[
cooc
.
ngram2_id
]
=
weight
.
get
(
cooc
.
ngram2_id
,
0
)
+
cooc
.
weight
x
=
pd
.
DataFrame
(
matrix
)
.
fillna
(
0
)
x
=
pd
.
DataFrame
(
matrix
)
.
fillna
(
0
)
...
@@ -217,4 +217,3 @@ def clusterByDistances( cooc_id
...
@@ -217,4 +217,3 @@ def clusterByDistances( cooc_id
partition
=
best_partition
(
G
.
to_undirected
())
partition
=
best_partition
(
G
.
to_undirected
())
return
(
G
,
partition
,
ids
,
weight
)
return
(
G
,
partition
,
ids
,
weight
)
graphExplorer/graph.py
View file @
9074c57b
...
@@ -42,27 +42,48 @@ def get_graph( request=None , corpus=None
...
@@ -42,27 +42,48 @@ def get_graph( request=None , corpus=None
3) filter By Bridgeness (filter By Bridgeness)
3) filter By Bridgeness (filter By Bridgeness)
main parameter: bridgness
main parameter: bridgness
4) format the graph (formatGraph)
4) format the graph (formatGraph)
main parameter: format_
main parameter: format_
'''
'''
from
datetime
import
datetime
before_cooc
=
datetime
.
now
()
# TODO change test here (always true)
# to something like "if cooc.status threshold == required_threshold
# and group.creation_time < cooc.creation_time"
# if False => read and give to clusterByDistances
# if True => compute and give to clusterByDistances <==
if
cooc_id
==
None
:
if
cooc_id
==
None
:
cooc_
id
=
countCooccurrences
(
corpus
=
corpus
cooc_
matrix
=
countCooccurrences
(
corpus
=
corpus
#, field1="ngrams", field2="ngrams"
#, field1="ngrams", field2="ngrams"
,
start
=
start
,
end
=
end
,
start
=
start
,
end
=
end
,
mapList_id
=
mapList_id
,
groupList_id
=
groupList_id
,
mapList_id
=
mapList_id
,
groupList_id
=
groupList_id
,
isMonopartite
=
True
,
threshold
=
threshold
,
isMonopartite
=
True
,
threshold
=
threshold
,
just_pass_result
=
True
#, limit=size
#, limit=size
)
)
else
:
G
,
partition
,
ids
,
weight
=
clusterByDistances
(
cooc_id
cooc_matrix
=
WeightedMatrix
(
cooc_id
)
# fyi
after_cooc
=
datetime
.
now
()
print
(
"... Cooccurrences took
%
f s."
%
(
after_cooc
-
before_cooc
)
.
total_seconds
())
G
,
partition
,
ids
,
weight
=
clusterByDistances
(
cooc_matrix
,
field1
=
"ngrams"
,
field2
=
"ngrams"
,
field1
=
"ngrams"
,
field2
=
"ngrams"
,
distance
=
distance
,
distance
=
distance
)
)
after_cluster
=
datetime
.
now
()
print
(
"... Clustering took
%
f s."
%
(
after_cluster
-
after_cooc
)
.
total_seconds
())
data
=
filterByBridgeness
(
G
,
partition
,
ids
,
weight
,
bridgeness
,
type
,
field1
,
field2
)
data
=
filterByBridgeness
(
G
,
partition
,
ids
,
weight
,
bridgeness
,
type
,
field1
,
field2
)
return
data
after_filter
=
datetime
.
now
()
print
(
"... Filtering took
%
f s."
%
(
after_filter
-
after_cluster
)
.
total_seconds
())
return
data
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment