Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
humanities
gargantext
Commits
5582c35f
Commit
5582c35f
authored
Oct 08, 2015
by
delanoe
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[FIX] labels for nodes.
parent
94929700
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
37 additions
and
13 deletions
+37
-13
cooccurrences.py
analysis/cooccurrences.py
+31
-5
functions.py
analysis/functions.py
+6
-8
No files found.
analysis/cooccurrences.py
View file @
5582c35f
...
@@ -3,7 +3,8 @@ from sqlalchemy import literal_column
...
@@ -3,7 +3,8 @@ from sqlalchemy import literal_column
from
sqlalchemy.orm
import
aliased
from
sqlalchemy.orm
import
aliased
from
sqlalchemy.sql
import
func
from
sqlalchemy.sql
import
func
from
gargantext_web.db
import
Node
,
NodeNgram
,
NodeNgramNgram
,
NodeNodeNgram
,
NodeHyperdata
,
Hyperdata
from
gargantext_web.db
import
Node
,
Ngram
,
NodeNgram
,
NodeNgramNgram
,
\
NodeNodeNgram
,
NodeHyperdata
,
Hyperdata
from
gargantext_web.db
import
session
,
cache
,
get_or_create_node
,
bulk_insert
from
gargantext_web.db
import
session
,
cache
,
get_or_create_node
,
bulk_insert
from
analysis.lists
import
WeightedMatrix
,
UnweightedList
,
Translations
from
analysis.lists
import
WeightedMatrix
,
UnweightedList
,
Translations
...
@@ -11,6 +12,7 @@ def cooc(corpus=None
...
@@ -11,6 +12,7 @@ def cooc(corpus=None
,
field_X
=
None
,
field_Y
=
None
,
field_X
=
None
,
field_Y
=
None
,
miam_id
=
None
,
stop_id
=
None
,
group_id
=
None
,
miam_id
=
None
,
stop_id
=
None
,
group_id
=
None
,
cvalue_id
=
None
,
cvalue_id
=
None
,
n_min
=
2
,
n_max
=
None
,
start
=
None
,
end
=
None
,
start
=
None
,
end
=
None
,
limit
=
1000
):
,
limit
=
1000
):
'''
'''
...
@@ -57,8 +59,30 @@ def cooc(corpus=None
...
@@ -57,8 +59,30 @@ def cooc(corpus=None
.
join
(
NodeNgramY
,
NodeNgramY
.
node_id
==
Node
.
id
)
.
join
(
NodeNgramY
,
NodeNgramY
.
node_id
==
Node
.
id
)
.
filter
(
Node
.
parent_id
==
corpus
.
id
,
Node
.
type_id
==
doc_id
)
.
filter
(
Node
.
parent_id
==
corpus
.
id
,
Node
.
type_id
==
doc_id
)
)
)
# Size of the ngrams between n_min and n_max
if
n_min
is
not
None
or
n_max
is
not
None
:
NgramX
=
aliased
(
Ngram
)
NgramY
=
aliased
(
Ngram
)
cooc_query
=
(
cooc_query
.
join
(
NgramX
,
NgramX
.
id
==
NodeNgramX
.
ngram_id
)
.
join
(
NgramY
,
NgramY
.
id
==
NodeNgramY
.
ngram_id
)
)
if
n_min
is
not
None
:
cooc_query
=
(
cooc_query
.
filter
(
NgramX
.
n
>=
n_min
)
.
filter
(
NgramY
.
n
>=
n_min
)
)
if
n_max
is
not
None
:
cooc_query
=
(
cooc_query
.
filter
(
NgramX
.
n
>=
n_min
)
.
filter
(
NgramY
.
n
>=
n_min
)
)
# Cooc between the dates start and end
if
start
is
not
None
:
if
start
is
not
None
:
Start
=
aliased
(
NodeHyperdata
)
Start
=
aliased
(
NodeHyperdata
)
StartFormat
=
aliased
(
Hyperdata
)
StartFormat
=
aliased
(
Hyperdata
)
...
@@ -79,11 +103,12 @@ def cooc(corpus=None
...
@@ -79,11 +103,12 @@ def cooc(corpus=None
)
)
# Cooc is symetric, take only the main cooccurrences and cut at the limit
cooc_query
=
(
cooc_query
.
filter
(
Node
.
parent_id
==
corpus
.
id
,
Node
.
type_id
==
doc_id
)
cooc_query
=
(
cooc_query
.
filter
(
Node
.
parent_id
==
corpus
.
id
,
Node
.
type_id
==
doc_id
)
.
filter
(
NodeNgramX
.
ngram_id
<
NodeNgramY
.
ngram_id
)
.
filter
(
NodeNgramX
.
ngram_id
<
NodeNgramY
.
ngram_id
)
.
group_by
(
Node
.
id
,
Node
NgramX
.
ngram_id
,
NodeNgramY
.
ngram_id
)
.
group_by
(
NodeNgramX
.
ngram_id
,
NodeNgramY
.
ngram_id
)
.
order_by
(
func
.
count
(
))
.
order_by
(
desc
(
func
.
count
()
))
.
limit
(
limit
)
.
limit
(
limit
)
)
)
...
@@ -91,6 +116,7 @@ def cooc(corpus=None
...
@@ -91,6 +116,7 @@ def cooc(corpus=None
matrix
=
WeightedMatrix
(
cooc_query
)
matrix
=
WeightedMatrix
(
cooc_query
)
#print(matrix)
#print(matrix)
# Select according some scores
if
cvalue_id
is
not
None
:
if
cvalue_id
is
not
None
:
#miam = get_or_create_node(nodetype='Cvalue', corpus=corpus)
#miam = get_or_create_node(nodetype='Cvalue', corpus=corpus)
cvalue_list
=
UnweightedList
(
session
.
query
(
NodeNodeNgram
.
ngram_id
)
cvalue_list
=
UnweightedList
(
session
.
query
(
NodeNodeNgram
.
ngram_id
)
...
...
analysis/functions.py
View file @
5582c35f
...
@@ -34,7 +34,6 @@ def create_blacklist(user, corpus):
...
@@ -34,7 +34,6 @@ def create_blacklist(user, corpus):
def
create_synonymes
(
user
,
corpus
):
def
create_synonymes
(
user
,
corpus
):
pass
pass
size
=
1000
size
=
1000
def
create_whitelist
(
user
,
corpus_id
,
size
=
size
,
count_min
=
2
,
miam_id
=
None
):
def
create_whitelist
(
user
,
corpus_id
,
size
=
size
,
count_min
=
2
,
miam_id
=
None
):
...
@@ -170,7 +169,9 @@ def create_cooc(user=None, corpus_id=None, whitelist=None, size=size, year_start
...
@@ -170,7 +169,9 @@ def create_cooc(user=None, corpus_id=None, whitelist=None, size=size, year_start
return
cooc
.
id
return
cooc
.
id
def
get_cooc
(
request
=
None
,
corpus
=
None
,
cooc_id
=
None
,
type
=
'node_link'
,
size
=
size
):
def
get_cooc
(
request
=
None
,
corpus
=
None
,
cooc_id
=
None
,
type
=
'node_link'
,
size
=
size
):
'''
get_ccoc : to compute the graph.
'''
matrix
=
defaultdict
(
lambda
:
defaultdict
(
float
))
matrix
=
defaultdict
(
lambda
:
defaultdict
(
float
))
ids
=
dict
()
ids
=
dict
()
labels
=
dict
()
labels
=
dict
()
...
@@ -185,16 +186,15 @@ def get_cooc(request=None, corpus=None, cooc_id=None, type='node_link', size=siz
...
@@ -185,16 +186,15 @@ def get_cooc(request=None, corpus=None, cooc_id=None, type='node_link', size=siz
# data deleted each time
# data deleted each time
session
.
query
(
NodeNgramNgram
)
.
filter
(
NodeNgramNgram
.
node_id
==
cooc_id
)
.
delete
()
session
.
query
(
NodeNgramNgram
)
.
filter
(
NodeNgramNgram
.
node_id
==
cooc_id
)
.
delete
()
#cooc_id = cooc(corpus=corpus, miam_id=miam_id, stop_id=stop_id, limit=size)
cooc_id
=
cooc
(
corpus
=
corpus
,
miam_id
=
miam_id
,
group_id
=
group_id
,
stop_id
=
stop_id
,
limit
=
size
)
cooc_id
=
cooc
(
corpus
=
corpus
,
miam_id
=
miam_id
,
group_id
=
group_id
,
stop_id
=
stop_id
,
limit
=
size
)
#cooc_id = cooc(corpus=corpus, miam_id=miam_id, limit=size)
#print([n for n in session.query(NodeNgramNgram).filter(NodeNgramNgram.node_id==cooc_id).all()])
#print([n for n in session.query(NodeNgramNgram).filter(NodeNgramNgram.node_id==cooc_id).all()])
for
cooccurrence
in
session
.
query
(
NodeNgramNgram
)
.
filter
(
NodeNgramNgram
.
node_id
==
cooc_id
)
.
all
():
for
cooccurrence
in
session
.
query
(
NodeNgramNgram
)
.
filter
(
NodeNgramNgram
.
node_id
==
cooc_id
)
.
all
():
#print(cooccurrence)
#print(cooccurrence)
# print(cooccurrence.ngramx.terms," <=> ",cooccurrence.ngramy.terms,"\t",cooccurrence.score)
# print(cooccurrence.ngramx.terms," <=> ",cooccurrence.ngramy.terms,"\t",cooccurrence.score)
labels
[
cooccurrence
.
ngramx_id
]
=
session
.
query
(
Ngram
.
id
)
.
filter
(
Ngram
.
id
==
cooccurrence
.
ngramx_id
)
.
first
()[
0
]
# TODO clean this part, unuseful
labels
[
cooccurrence
.
ngramy_id
]
=
session
.
query
(
Ngram
.
id
)
.
filter
(
Ngram
.
id
==
cooccurrence
.
ngramy_id
)
.
first
()[
0
]
labels
[
cooccurrence
.
ngramx_id
]
=
cooccurrence
.
ngramx_id
#session.query(Ngram.id).filter(Ngram.id == cooccurrence.ngramx_id).first()[0]
labels
[
cooccurrence
.
ngramy_id
]
=
cooccurrence
.
ngramy_id
#session.query(Ngram.id).filter(Ngram.id == cooccurrence.ngramy_id).first()[0]
matrix
[
cooccurrence
.
ngramx_id
][
cooccurrence
.
ngramy_id
]
=
cooccurrence
.
score
matrix
[
cooccurrence
.
ngramx_id
][
cooccurrence
.
ngramy_id
]
=
cooccurrence
.
score
matrix
[
cooccurrence
.
ngramy_id
][
cooccurrence
.
ngramx_id
]
=
cooccurrence
.
score
matrix
[
cooccurrence
.
ngramy_id
][
cooccurrence
.
ngramx_id
]
=
cooccurrence
.
score
...
@@ -205,7 +205,6 @@ def get_cooc(request=None, corpus=None, cooc_id=None, type='node_link', size=siz
...
@@ -205,7 +205,6 @@ def get_cooc(request=None, corpus=None, cooc_id=None, type='node_link', size=siz
weight
[
cooccurrence
.
ngramx_id
]
=
weight
.
get
(
cooccurrence
.
ngramx_id
,
0
)
+
cooccurrence
.
score
weight
[
cooccurrence
.
ngramx_id
]
=
weight
.
get
(
cooccurrence
.
ngramx_id
,
0
)
+
cooccurrence
.
score
weight
[
cooccurrence
.
ngramy_id
]
=
weight
.
get
(
cooccurrence
.
ngramy_id
,
0
)
+
cooccurrence
.
score
weight
[
cooccurrence
.
ngramy_id
]
=
weight
.
get
(
cooccurrence
.
ngramy_id
,
0
)
+
cooccurrence
.
score
x
=
pd
.
DataFrame
(
matrix
)
.
fillna
(
0
)
x
=
pd
.
DataFrame
(
matrix
)
.
fillna
(
0
)
y
=
pd
.
DataFrame
(
matrix
)
.
fillna
(
0
)
y
=
pd
.
DataFrame
(
matrix
)
.
fillna
(
0
)
...
@@ -280,7 +279,6 @@ def get_cooc(request=None, corpus=None, cooc_id=None, type='node_link', size=siz
...
@@ -280,7 +279,6 @@ def get_cooc(request=None, corpus=None, cooc_id=None, type='node_link', size=siz
#node,type(labels[node])
#node,type(labels[node])
G
.
node
[
node
][
'pk'
]
=
ids
[
node
]
G
.
node
[
node
][
'pk'
]
=
ids
[
node
]
G
.
node
[
node
][
'label'
]
=
session
.
query
(
Ngram
.
terms
)
.
filter
(
Ngram
.
id
==
node
)
.
first
()
G
.
node
[
node
][
'label'
]
=
session
.
query
(
Ngram
.
terms
)
.
filter
(
Ngram
.
id
==
node
)
.
first
()
# G.node[node]['pk'] = ids[str(node)]
G
.
node
[
node
][
'size'
]
=
weight
[
ids
[
node
]]
G
.
node
[
node
][
'size'
]
=
weight
[
ids
[
node
]]
G
.
node
[
node
][
'group'
]
=
partition
[
node
]
G
.
node
[
node
][
'group'
]
=
partition
[
node
]
# G.add_edge(node, "cluster " + str(partition[node]), weight=3)
# G.add_edge(node, "cluster " + str(partition[node]), weight=3)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment