Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
humanities
gargantext
Commits
dbedae3c
Commit
dbedae3c
authored
Mar 28, 2016
by
delanoe
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[FACTO/FIX] Facto graphs, removing all the tests we made, cleaning code.
parent
c874d397
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
98 additions
and
98 deletions
+98
-98
cooccurrences.py
graphExplorer/cooccurrences.py
+54
-62
functions.py
graphExplorer/functions.py
+33
-25
rest.py
graphExplorer/rest.py
+11
-11
No files found.
graphExplorer/cooccurrences.py
View file @
dbedae3c
...
...
@@ -11,12 +11,10 @@ import datetime
def
do_cooc
(
corpus
=
None
,
field1
=
'ngrams'
,
field2
=
'ngrams'
,
mainList_id
=
None
,
stopList_id
=
None
,
groupList_id
=
None
,
mainList_id
=
None
,
groupList_id
=
None
,
coocNode_id
=
None
,
cvalue_id
=
None
,
n_min
=
1
,
n_max
=
None
,
start
=
None
,
end
=
None
,
limit
=
1000
,
n_min
=
1
,
n_max
=
None
,
limit
=
1000
,
isMonopartite
=
True
,
threshold
=
3
):
'''
...
...
@@ -24,9 +22,8 @@ def do_cooc(corpus=None
For the moment list of paramters are not supported because, lists need to
be merged before.
corpus :: Corpus
cvalue_id :: Int
mainList_id :: Int
stopList_id :: Int
groupList_id :: Int
For the moment, start and end are simple, only year is implemented yet
...
...
@@ -42,10 +39,13 @@ def do_cooc(corpus=None
# Get node
if
not
coocNode_id
:
coocNode_id0
=
(
session
.
query
(
Node
.
id
)
.
filter
(
Node
.
typename
==
"COOCCURRENCES"
,
Node
.
name
==
"GRAPH EXPLORER"
,
Node
.
parent_id
==
corpus
.
id
)
.
first
())
coocNode_id0
=
(
session
.
query
(
Node
.
id
)
.
filter
(
Node
.
typename
==
"COOCCURRENCES"
,
Node
.
name
==
"GRAPH EXPLORER"
,
Node
.
parent_id
==
corpus
.
id
)
.
first
()
)
if
not
coocNode_id
:
coocNode
=
corpus
.
add_child
(
typename
=
"COOCCURRENCES"
,
...
...
@@ -76,8 +76,8 @@ def do_cooc(corpus=None
#
# node_cooc.hyperdata = hyperdata
#
# For tests only
session
.
query
(
NodeNgramNgram
)
.
filter
(
NodeNgramNgram
.
node_id
==
coocNode_id
)
.
delete
()
# For tests only
: delete previous cooccurrences
session
.
query
(
NodeNgramNgram
)
.
filter
(
NodeNgramNgram
.
node_id
==
coocNode_id
)
.
delete
()
session
.
commit
()
...
...
@@ -85,37 +85,57 @@ def do_cooc(corpus=None
cooc_score
=
func
.
count
(
NodeNgramX
.
node_id
)
.
label
(
'cooc_score'
)
#cooc_score = func.sqrt(func.sum(NodeNgramX.weight * NodeNgramY.weight)).label('cooc_score')
#print([n for n in test_query])
if
isMonopartite
:
NodeNgramY
=
aliased
(
NodeNgram
)
cooc_query
=
(
session
.
query
(
NodeNgramX
.
ngram_id
,
NodeNgramY
.
ngram_id
,
cooc_score
)
.
join
(
Node
,
Node
.
id
==
NodeNgramX
.
node_id
)
.
join
(
NodeNgramY
,
NodeNgramY
.
node_id
==
Node
.
id
)
.
filter
(
Node
.
parent_id
==
corpus
.
id
,
Node
.
typename
==
"DOCUMENT"
)
)
cooc_query
=
(
session
.
query
(
NodeNgramX
.
ngram_id
,
NodeNgramY
.
ngram_id
,
cooc_score
)
.
join
(
Node
,
Node
.
id
==
NodeNgramX
.
node_id
)
.
join
(
NodeNgramY
,
NodeNgramY
.
node_id
==
Node
.
id
)
.
filter
(
Node
.
parent_id
==
corpus
.
id
,
Node
.
typename
==
"DOCUMENT"
)
)
else
:
NodeNgramY
=
aliased
(
NodeNgram
)
cooc_query
=
(
session
.
query
(
NodeHyperdataNgram
.
ngram_id
,
NodeNgramY
.
ngram_id
,
cooc_score
)
.
join
(
Node
,
Node
.
id
==
NodeHyperdataNgram
.
node_id
)
.
join
(
NodeNgramY
,
NodeNgramY
.
node_id
==
Node
.
id
)
.
join
(
Hyperdata
,
Hyperdata
.
id
==
NodeHyperdataNgram
.
hyperdata_id
)
.
filter
(
Node
.
parent_id
==
corpus
.
id
,
Node
.
typename
==
"DOCUMENT"
)
.
filter
(
Hyperdata
.
name
==
field1
)
)
#print(cooc_query)
cooc_query
=
(
session
.
query
(
NodeHyperdataNgram
.
ngram_id
,
NodeNgramY
.
ngram_id
,
cooc_score
)
.
join
(
Node
,
Node
.
id
==
NodeHyperdataNgram
.
node_id
)
.
join
(
NodeNgramY
,
NodeNgramY
.
node_id
==
Node
.
id
)
.
join
(
Hyperdata
,
Hyperdata
.
id
==
NodeHyperdataNgram
.
hyperdata_id
)
.
filter
(
Node
.
parent_id
==
corpus
.
id
,
Node
.
typename
==
"DOCUMENT"
)
.
filter
(
Hyperdata
.
name
==
field1
)
)
# Size of the ngrams between n_min and n_max
if
n_min
is
not
None
or
n_max
is
not
None
:
if
isMonopartite
:
NgramX
=
aliased
(
Ngram
)
cooc_query
=
cooc_query
.
join
(
NgramX
,
NgramX
.
id
==
NodeNgramX
.
ngram_id
)
cooc_query
=
cooc_query
.
join
(
NgramX
,
NgramX
.
id
==
NodeNgramX
.
ngram_id
)
NgramY
=
aliased
(
Ngram
)
cooc_query
=
(
cooc_query
.
join
(
NgramY
,
NgramY
.
id
==
NodeNgramY
.
ngram_id
)
)
cooc_query
=
cooc_query
.
join
(
NgramY
,
NgramY
.
id
==
NodeNgramY
.
ngram_id
)
if
n_min
is
not
None
:
cooc_query
=
(
cooc_query
...
...
@@ -173,42 +193,14 @@ def do_cooc(corpus=None
cooc_query
=
cooc_query
.
group_by
(
NodeHyperdataNgram
.
ngram_id
,
NodeNgramY
.
ngram_id
)
cooc_query
=
cooc_query
.
order_by
(
desc
(
'cooc_score'
))
# END of the query
matrix
=
WeightedMatrix
(
cooc_query
)
#print(matrix)
# Select according some scores
if
cvalue_id
is
not
None
:
#miam = get_or_create_node(nodetype='Cvalue', corpus=corpus)
cvalue_list
=
UnweightedList
(
session
.
query
(
NodeNodeNgram
.
ngram_id
)
.
filter
(
NodeNodeNgram
.
nodex_id
==
cvalue_id
)
.
all
()
)
if
isMonopartite
:
if
mainList_id
is
not
None
:
miam_list
=
UnweightedList
(
mainList_id
)
if
stopList_id
is
not
None
:
stop_list
=
UnweightedList
(
stopList_id
)
if
groupList_id
is
not
None
:
group_list
=
Translations
(
groupList_id
)
if
mainList_id
is
not
None
and
stopList_id
is
None
and
groupList_id
is
None
:
cooc
=
matrix
&
miam_list
elif
mainList_id
is
not
None
and
stopList_id
is
not
None
and
groupList_id
is
None
:
cooc
=
matrix
&
(
miam_list
-
stop_list
)
elif
mainList_id
is
not
None
and
stopList_id
is
not
None
and
groupList_id
is
not
None
:
print
(
"mainList_id is not None and stopList_id is not None and groupList_id is not None"
)
cooc
=
matrix
&
(
miam_list
*
group_list
-
stop_list
)
#cooc = matrix & (miam_list - stop_list)
elif
mainList_id
is
not
None
and
stopList_id
is
None
and
groupList_id
is
not
None
:
cooc
=
matrix
&
(
miam_list
*
group_list
)
else
:
cooc
=
matrix
else
:
cooc
=
matrix
mainList
=
UnweightedList
(
mainList_id
)
group_list
=
Translations
(
groupList_id
)
cooc
=
matrix
&
(
mainList
*
group_list
)
cooc
.
save
(
coocNode_id
)
return
(
coocNode_id
)
graphExplorer/functions.py
View file @
dbedae3c
...
...
@@ -29,30 +29,36 @@ def get_cooc( request=None, corpus=None
,
distance
=
'conditional'
,
size
=
1000
,
bridgeness
=
5
,
mainList_id
=
None
,
sto
pList_id
=
None
,
mainList_id
=
None
,
grou
pList_id
=
None
):
'''
get_ccoc : to compute the graph.
'''
data
=
{}
#if session.query(Node).filter(Node.type_id==type_cooc_id, Node.parent_id==corpus_id).first() is None:
print
(
"Cooccurrences do not exist yet, creating it."
)
if
stopList_id
==
None
:
stopList_id
=
(
session
.
query
(
Node
.
id
)
.
filter
(
Node
.
typename
==
"STOPLIST"
,
Node
.
parent_id
==
corpus
.
id
)
.
first
())
if
stopList_id
==
None
:
raise
ValueError
(
"STOPLIST node needed for mainlist creation"
)
if
mainList_id
==
None
:
stopList_id
=
(
session
.
query
(
Node
.
id
)
.
filter
(
Node
.
typename
==
"STOPLIST"
,
Node
.
parent_id
==
corpus
.
id
)
.
first
())
if
not
mainList_id
==
None
:
raise
ValueError
(
"STOPLIST node needed for mainlist creation"
)
if
mainList_id
==
None
:
mainList_id
=
(
session
.
query
(
Node
.
id
)
.
filter
(
Node
.
typename
==
"MAINLIST"
,
Node
.
parent_id
==
corpus
.
id
)
.
first
()
)
if
mainList_id
==
None
:
raise
ValueError
(
"MAINLIST node needed for cooccurrences"
)
if
groupList_id
==
None
:
groupList_id
=
(
session
.
query
(
Node
.
id
)
.
filter
(
Node
.
typename
==
"GROUPLIST"
,
Node
.
parent_id
==
corpus
.
id
)
.
first
()
)
if
groupList_id
==
None
:
raise
ValueError
(
"GROUPLIST node needed for cooccurrences"
)
# compute_cooc needs group, fields etc.
...
...
@@ -72,16 +78,18 @@ def get_cooc( request=None, corpus=None
corpus
=
session
.
query
(
Node
)
.
filter
(
Node
.
id
==
corpus_id
)
.
first
()
cooc_id
=
do_cooc
(
corpus
=
corpus
#, field1="ngrams", field2="ngrams"
,
mainList_id
=
mainList_id
,
stopList_id
=
stopList_id
#, group_id=group_id
#, isMonopartite=True
,
start
=
start
,
end
=
end
,
threshold
=
threshold
#, limit=size
)
#, field1="ngrams", field2="ngrams"
,
mainList_id
=
int
(
mainList_id
[
0
]),
groupList_id
=
int
(
groupList_id
[
0
])
#, isMonopartite=True
,
start
=
start
,
end
=
end
,
threshold
=
threshold
#, limit=size
)
G
,
partition
,
ids
,
weight
=
do_distance
(
cooc_id
,
field1
=
"ngrams"
,
field2
=
"ngrams"
,
isMonopartite
=
True
,
distance
=
distance
)
G
,
partition
,
ids
,
weight
=
do_distance
(
cooc_id
,
field1
=
"ngrams"
,
field2
=
"ngrams"
,
isMonopartite
=
True
,
distance
=
distance
)
if
type
==
"node_link"
:
nodesB_dict
=
{}
for
node_id
in
G
.
nodes
():
...
...
graphExplorer/rest.py
View file @
dbedae3c
...
...
@@ -8,9 +8,9 @@ from graphExplorer.functions import get_cooc
# TODO check authentication
class
Graph
(
APIView
):
#authentication_classes = (SessionAuthentication, BasicAuthentication)
#@requires_auth
'''
REST part for graphs.
'''
def
get
(
self
,
request
,
project_id
,
corpus_id
):
'''
Graph.get :: Get graph data as REST api.
...
...
@@ -23,8 +23,8 @@ class Graph(APIView):
field1
=
str
(
request
.
GET
.
get
(
'field1'
,
'ngrams'
))
field2
=
str
(
request
.
GET
.
get
(
'field2'
,
'ngrams'
))
start
=
request
.
GET
.
get
(
'start'
,
None
)
end
=
request
.
GET
.
get
(
'end'
,
None
)
start
=
request
.
GET
.
get
(
'start'
,
None
)
end
=
request
.
GET
.
get
(
'end'
,
None
)
threshold
=
int
(
request
.
GET
.
get
(
'threshold'
,
1
))
bridgeness
=
int
(
request
.
GET
.
get
(
'bridgeness'
,
-
1
))
...
...
@@ -43,17 +43,17 @@ class Graph(APIView):
if
field2
in
accepted_field2
:
if
start
is
not
None
and
end
is
not
None
:
data
=
get_cooc
(
corpus
=
corpus
#, field1=field1 , field2=field2
#, field1=field1 , field2=field2
,
start
=
start
,
end
=
end
,
threshold
=
threshold
,
distance
=
distance
)
else
:
data
=
get_cooc
(
corpus
=
corpus
#, field1=field1, field2=field2
,
threshold
=
threshold
,
distance
=
distance
,
bridgeness
=
bridgeness
)
#, field1=field1, field2=field2
,
threshold
=
threshold
,
distance
=
distance
,
bridgeness
=
bridgeness
)
if
format_
==
'json'
:
return
JsonHttpResponse
(
data
)
else
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment