Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
humanities
gargantext
Commits
b1ac2efb
Commit
b1ac2efb
authored
Mar 30, 2016
by
delanoe
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[FACTO] split graph function into 4 main functions.
parent
a40f95bb
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
149 additions
and
118 deletions
+149
-118
constants.py
gargantext/constants.py
+3
-3
README.md
graphExplorer/README.md
+3
-0
bridgeness.py
graphExplorer/bridgeness.py
+10
-86
cooccurrences.py
graphExplorer/cooccurrences.py
+7
-7
distances.py
graphExplorer/distances.py
+2
-2
graph.py
graphExplorer/graph.py
+68
-0
rest.py
graphExplorer/rest.py
+56
-20
No files found.
gargantext/constants.py
View file @
b1ac2efb
...
...
@@ -114,9 +114,9 @@ from gargantext.util.taggers import *
LANGUAGES
=
{
'en'
:
{
'tagger'
:
TurboTagger
,
#
'tagger': EnglishMeltTagger,
#
'tagger': NltkTagger,
#
'tagger': TurboTagger,
'tagger'
:
EnglishMeltTagger
,
#'tagger': NltkTagger,
},
'fr'
:
{
'tagger'
:
FrenchMeltTagger
,
...
...
graphExplorer/README.md
0 → 100644
View file @
b1ac2efb
Module Graph Explorer: from text to graph.
Maintainer: If you see bugs, please report to team@gargantext.org
graphExplorer/
function
s.py
→
graphExplorer/
bridgenes
s.py
View file @
b1ac2efb
# Gargantext lib
from
gargantext.util.db
import
session
from
gargantext.util.http
import
JsonHttpResponse
from
gargantext.models
import
Node
,
Ngram
,
NodeNgram
,
NodeNgramNgram
# Article coming soon
#from gargantext.util.toolchain.ngram_coocs import compute_coocs
from
g
raphExplorer.distances
import
do_distance
from
graphExplorer.cooccurrences
import
do_cooc
from
gargantext.util.db
import
session
from
g
argantext.models.ngrams
import
Ngram
from
collections
import
defaultdict
# Prelude lib
from
copy
import
copy
,
deepcopy
from
collections
import
defaultdict
from
sqlalchemy.orm
import
aliased
# Math/Graph lib
import
math
import
pandas
as
pd
import
numpy
as
np
import
networkx
as
nx
from
networkx.readwrite
import
json_graph
def
get_cooc
(
request
=
None
,
corpus
=
None
,
field1
=
'ngrams'
,
field2
=
'ngrams'
,
cooc_id
=
None
,
type
=
'node_link'
,
start
=
None
,
end
=
None
,
threshold
=
1
,
distance
=
'conditional'
,
isMonopartite
=
True
# By default, we compute terms/terms graph
,
size
=
1000
,
bridgeness
=
5
,
mapList_id
=
None
,
groupList_id
=
None
):
'''
get_ccoc : to compute the graph.
'''
if
mapList_id
==
None
:
mapList_id
=
(
session
.
query
(
Node
.
id
)
.
filter
(
Node
.
typename
==
"MAPLIST"
,
Node
.
parent_id
==
corpus
.
id
)
.
first
()
)
if
mapList_id
==
None
:
raise
ValueError
(
"MAPLIST node needed for cooccurrences"
)
if
groupList_id
==
None
:
groupList_id
=
(
session
.
query
(
Node
.
id
)
.
filter
(
Node
.
typename
==
"GROUPLIST"
,
Node
.
parent_id
==
corpus
.
id
)
.
first
()
)
if
groupList_id
==
None
:
raise
ValueError
(
"GROUPLIST node needed for cooccurrences"
)
if
corpus
is
None
:
corpus
=
session
.
query
(
Node
)
.
filter
(
Node
.
id
==
corpus_id
)
.
first
()
cooc_id
=
do_cooc
(
corpus
=
corpus
#, field1="ngrams", field2="ngrams"
,
mapList_id
=
int
(
mapList_id
[
0
]),
groupList_id
=
int
(
groupList_id
[
0
])
#, isMonopartite=True
,
start
=
start
,
end
=
end
,
threshold
=
threshold
#, limit=size
)
G
,
partition
,
ids
,
weight
=
do_distance
(
cooc_id
,
field1
=
"ngrams"
,
field2
=
"ngrams"
,
isMonopartite
=
True
,
distance
=
distance
)
def
filterByBridgeness
(
G
,
partition
,
ids
,
weight
,
bridgeness
,
type
,
field1
,
field2
):
# Data are stored in a dict(), (== hashmap by default for Python)
data
=
dict
()
if
type
==
"node_link"
:
nodesB_dict
=
{}
for
node_id
in
G
.
nodes
():
#node,type(labels[node])
G
.
node
[
node_id
][
'pk'
]
=
ids
[
node_id
][
1
]
G
.
node
[
node_id
][
'pk'
]
=
ids
[
node_id
][
1
]
nodesB_dict
[
ids
[
node_id
][
1
]
]
=
True
# TODO the query below is not optimized (do it do_distance).
the_label
=
session
.
query
(
Ngram
.
terms
)
.
filter
(
Ngram
.
id
==
node_id
)
.
first
()
the_label
=
", "
.
join
(
the_label
)
G
.
node
[
node_id
][
'label'
]
=
the_label
G
.
node
[
node_id
][
'label'
]
=
the_label
G
.
node
[
node_id
][
'size'
]
=
weight
[
node_id
]
G
.
node
[
node_id
][
'type'
]
=
ids
[
node_id
][
0
]
.
replace
(
"ngrams"
,
"terms"
)
G
.
node
[
node_id
][
'attributes'
]
=
{
"clust_default"
:
partition
[
node_id
]}
# new format
G
.
node
[
node_id
][
'size'
]
=
weight
[
node_id
]
G
.
node
[
node_id
][
'type'
]
=
ids
[
node_id
][
0
]
.
replace
(
"ngrams"
,
"terms"
)
G
.
node
[
node_id
][
'attributes'
]
=
{
"clust_default"
:
partition
[
node_id
]}
# new format
# G.add_edge(node, "cluster " + str(partition[node]), weight=3)
links
=
[]
i
=
1
if
bridgeness
>
0
:
com_link
=
defaultdict
(
lambda
:
defaultdict
(
list
))
...
...
@@ -107,7 +34,6 @@ def get_cooc( request=None, corpus=None
for
k
,
v
in
partition
.
items
():
com_ids
[
v
]
.
append
(
k
)
for
e
in
G
.
edges_iter
():
s
=
e
[
0
]
...
...
@@ -180,5 +106,3 @@ def get_cooc( request=None, corpus=None
return
(
partition
)
return
(
data
)
graphExplorer/cooccurrences.py
View file @
b1ac2efb
...
...
@@ -9,13 +9,13 @@ from sqlalchemy import desc, asc, or_, and_
#import inspect
import
datetime
def
do_cooc
(
corpus
=
None
,
field1
=
'ngrams'
,
field2
=
'ngrams'
,
start
=
None
,
end
=
None
,
mapList_id
=
None
,
groupList_id
=
None
,
n_min
=
1
,
n_max
=
None
,
limit
=
1000
,
coocNode_id
=
None
,
reset
=
True
,
isMonopartite
=
True
,
threshold
=
3
):
def
countCooccurrences
(
corpus
=
None
,
field1
=
'ngrams'
,
field2
=
'ngrams'
,
start
=
None
,
end
=
None
,
mapList_id
=
None
,
groupList_id
=
None
,
n_min
=
1
,
n_max
=
None
,
limit
=
1000
,
coocNode_id
=
None
,
reset
=
True
,
isMonopartite
=
True
,
threshold
=
3
):
'''
Compute the cooccurence matrix and save it, returning NodeNgramNgram.node_id
For the moment list of paramters are not supported because, lists need to
...
...
graphExplorer/distances.py
View file @
b1ac2efb
...
...
@@ -14,9 +14,9 @@ import numpy as np
import
pandas
as
pd
import
networkx
as
nx
def
do_distance
(
cooc_id
def
clusterByDistances
(
cooc_id
,
field1
=
None
,
field2
=
None
,
isMonopartite
=
True
,
distance
=
'conditional'
):
,
distance
=
'conditional'
):
'''
do_distance :: Int -> (Graph, Partition, {ids}, {weight})
'''
...
...
graphExplorer/graph.py
0 → 100644
View file @
b1ac2efb
# Gargantext lib
from
gargantext.util.db
import
session
from
gargantext.util.http
import
JsonHttpResponse
from
gargantext.models
import
Node
,
Ngram
,
NodeNgram
,
NodeNgramNgram
#from gargantext.util.toolchain.ngram_coocs import compute_coocs
from
graphExplorer.cooccurrences
import
countCooccurrences
from
graphExplorer.distances
import
clusterByDistances
from
graphExplorer.bridgeness
import
filterByBridgeness
# Prelude lib
from
copy
import
copy
,
deepcopy
from
collections
import
defaultdict
from
sqlalchemy.orm
import
aliased
# Math/Graph lib
import
math
import
pandas
as
pd
import
numpy
as
np
import
networkx
as
nx
def
get_graph
(
request
=
None
,
corpus
=
None
,
field1
=
'ngrams'
,
field2
=
'ngrams'
,
mapList_id
=
None
,
groupList_id
=
None
,
cooc_id
=
None
,
type
=
'node_link'
,
start
=
None
,
end
=
None
,
threshold
=
1
,
distance
=
'conditional'
,
isMonopartite
=
True
# By default, we compute terms/terms graph
,
bridgeness
=
5
#, size=1000
):
'''
Get_graph : main steps:
1) count Cooccurrences (function countCooccurrences)
main parameters: threshold
2) filter and cluster By Distances (function clusterByDistances)
main parameter: distance
3) filter By Bridgeness (filter By Bridgeness)
main parameter: bridgness
4) format the graph (formatGraph)
main parameter: format_
'''
if
cooc_id
==
None
:
cooc_id
=
countCooccurrences
(
corpus
=
corpus
#, field1="ngrams", field2="ngrams"
,
start
=
start
,
end
=
end
,
mapList_id
=
mapList_id
,
groupList_id
=
groupList_id
,
isMonopartite
=
True
,
threshold
=
threshold
#, limit=size
)
G
,
partition
,
ids
,
weight
=
clusterByDistances
(
cooc_id
,
field1
=
"ngrams"
,
field2
=
"ngrams"
,
distance
=
distance
)
data
=
filterByBridgeness
(
G
,
partition
,
ids
,
weight
,
bridgeness
,
type
,
field1
,
field2
)
return
data
graphExplorer/rest.py
View file @
b1ac2efb
...
...
@@ -2,7 +2,7 @@
from
gargantext.util.db
import
session
from
gargantext.models.nodes
import
Node
from
graphExplorer.
functions
import
get_cooc
from
graphExplorer.
graph
import
get_graph
from
gargantext.util.http
import
APIView
,
APIException
\
,
JsonHttpResponse
,
requires_auth
...
...
@@ -19,38 +19,74 @@ class Graph(APIView):
graph?field1=ngrams&field2=ngrams&
graph?field1=ngrams&field2=ngrams&start=''&end=''
'''
# implicit global session
field1
=
str
(
request
.
GET
.
get
(
'field1'
,
'ngrams'
))
field2
=
str
(
request
.
GET
.
get
(
'field2'
,
'ngrams'
)
)
# Get the node we are working with
corpus
=
session
.
query
(
Node
)
.
filter
(
Node
.
id
==
corpus_id
)
.
first
(
)
start
=
request
.
GET
.
get
(
'start'
,
None
)
end
=
request
.
GET
.
get
(
'end'
,
None
)
# Get all the parameters in the URL
field1
=
str
(
request
.
GET
.
get
(
'field1'
,
'ngrams'
))
field2
=
str
(
request
.
GET
.
get
(
'field2'
,
'ngrams'
))
threshold
=
int
(
request
.
GET
.
get
(
'threshold'
,
1
))
bridgeness
=
int
(
request
.
GET
.
get
(
'bridgeness'
,
-
1
))
format_
=
str
(
request
.
GET
.
get
(
'format'
,
'json'
))
type_
=
str
(
request
.
GET
.
get
(
'type'
,
'node_link'
))
distance
=
str
(
request
.
GET
.
get
(
'distance'
,
'conditional'
))
start
=
request
.
GET
.
get
(
'start'
,
None
)
end
=
request
.
GET
.
get
(
'end'
,
None
)
corpus
=
session
.
query
(
Node
)
.
filter
(
Node
.
id
==
corpus_id
)
.
first
()
mapList_id
=
int
(
request
.
GET
.
get
(
'mapList'
,
0
))
groupList_id
=
int
(
request
.
GET
.
get
(
'groupList'
,
0
))
threshold
=
int
(
request
.
GET
.
get
(
'threshold'
,
1
))
bridgeness
=
int
(
request
.
GET
.
get
(
'bridgeness'
,
-
1
))
format_
=
str
(
request
.
GET
.
get
(
'format'
,
'json'
))
type_
=
str
(
request
.
GET
.
get
(
'type'
,
'node_link'
))
distance
=
str
(
request
.
GET
.
get
(
'distance'
,
'conditional'
))
# Get default value if no map list
if
mapList_id
==
0
:
mapList_id
=
(
session
.
query
(
Node
.
id
)
.
filter
(
Node
.
typename
==
"MAPLIST"
,
Node
.
parent_id
==
corpus
.
id
)
.
first
()
)
mapList_id
=
mapList_id
[
0
]
if
mapList_id
==
None
:
raise
ValueError
(
"MAPLIST node needed for cooccurrences"
)
# Get default value if no group list
if
groupList_id
==
0
:
groupList_id
=
(
session
.
query
(
Node
.
id
)
.
filter
(
Node
.
typename
==
"GROUPLIST"
,
Node
.
parent_id
==
corpus
.
id
)
.
first
()
)
groupList_id
=
groupList_id
[
0
]
if
groupList_id
==
None
:
raise
ValueError
(
"GROUPLIST node needed for cooccurrences"
)
# Chec the options
accepted_field1
=
[
'ngrams'
,
'journal'
,
'source'
,
'authors'
]
accepted_field2
=
[
'ngrams'
,]
options
=
[
'start'
,
'end'
,
'threshold'
,
'distance'
]
accepted_field2
=
[
'ngrams'
,
]
options
=
[
'start'
,
'end'
,
'threshold'
,
'distance'
]
if
field1
in
accepted_field1
:
if
field2
in
accepted_field2
:
if
start
is
not
None
and
end
is
not
None
:
data
=
get_cooc
(
corpus
=
corpus
#, field1=field1 , field2=field2
,
start
=
start
,
end
=
end
,
threshold
=
threshold
,
distance
=
distance
data
=
get_graph
(
corpus
=
corpus
#, field1=field1 , field2=field2
,
mapList_id
=
mapList_id
,
groupList_id
=
groupList_id
,
start
=
start
,
end
=
end
,
threshold
=
threshold
,
distance
=
distance
)
else
:
data
=
get_
cooc
(
corpus
=
corpus
data
=
get_
graph
(
corpus
=
corpus
#, field1=field1, field2=field2
,
mapList_id
=
mapList_id
,
groupList_id
=
groupList_id
,
threshold
=
threshold
,
distance
=
distance
,
bridgeness
=
bridgeness
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment