Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
humanities
gargantext
Commits
db229347
Commit
db229347
authored
Oct 15, 2015
by
delanoe
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[CLEAN] removing old raw SQL queries.
parent
72238540
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
14 additions
and
154 deletions
+14
-154
functions.py
analysis/functions.py
+14
-154
No files found.
analysis/functions.py
View file @
db229347
...
@@ -29,145 +29,9 @@ from ngram.lists import listIds
...
@@ -29,145 +29,9 @@ from ngram.lists import listIds
def
diag_null
(
x
):
def
diag_null
(
x
):
return
x
-
x
*
scipy
.
eye
(
x
.
shape
[
0
])
return
x
-
x
*
scipy
.
eye
(
x
.
shape
[
0
])
def
create_blacklist
(
user
,
corpus
):
pass
def
create_synonymes
(
user
,
corpus
):
pass
size
=
1000
size
=
1000
def
create_whitelist
(
user
,
corpus_id
,
size
=
size
,
count_min
=
2
,
miam_id
=
None
):
if
miam_id
is
None
:
PrintException
()
cursor
=
connection
.
cursor
()
whitelist_type_id
=
cache
.
NodeType
[
'WhiteList'
]
.
id
blacklist_type_id
=
cache
.
NodeType
[
'BlackList'
]
.
id
type_document_id
=
cache
.
NodeType
[
'Document'
]
.
id
white_list
=
Node
(
name
=
'WhiteList Corpus '
+
str
(
corpus_id
),
user_id
=
user
.
id
,
parent_id
=
corpus_id
,
type_id
=
whitelist_type_id
)
black_list
=
Node
(
name
=
'BlackList Corpus '
+
str
(
corpus_id
),
user_id
=
user
.
id
,
parent_id
=
corpus_id
,
type_id
=
blacklist_type_id
)
session
.
add
(
white_list
)
session
.
add
(
black_list
)
session
.
commit
()
# delete avant pour éviter les doublons
# try:
# Node_Ngram.objects.filter(node=white_list).all().delete()
# except:
# print('First time we compute cooc')
#
query_whitelist
=
"""
INSERT INTO node_node_ngram (node_id, ngram_id, weight)
SELECT
%
d,
ngX.id,
COUNT(*) AS occurrences
FROM
node_node AS n
INNER JOIN
node_node_ngram AS nngX ON nngX.node_id = n.id
INNER JOIN
node_ngram AS ngX ON ngX.id = nngX.ngram_id
INNER JOIN
node_node_ngram AS miam ON ngX.id = miam.ngram_id
WHERE
n.parent_id =
%
d
AND
n.type_id =
%
d
AND
miam.node_id =
%
d
AND
ngX.n >= 2
AND
ngX.n <= 3
GROUP BY
ngX.id
Having
COUNT(*) >=
%
d
ORDER BY
occurrences DESC
LIMIT
%
d
;
"""
%
(
white_list
.
id
,
int
(
corpus_id
),
int
(
type_document_id
),
int
(
miam_id
),
count_min
,
size
)
# print("PRINTING QYERY OF WHITELIST:")
# print(query_whitelist)
cursor
.
execute
(
query_whitelist
)
return
white_list
#def create_cooc(user, corpus, whitelist, blacklist, synonymes):
def
create_cooc
(
user
=
None
,
corpus_id
=
None
,
whitelist
=
None
,
size
=
size
,
year_start
=
None
,
year_end
=
None
):
cursor
=
connection
.
cursor
()
cooc_type_id
=
cache
.
NodeType
[
'Cooccurrence'
]
.
id
# pour les tests on supprime les cooc
#session.Node.objects.filter(type=cooc_type, parent=corpus).delete()
cooc
=
Node
(
user_id
=
user
.
id
,
\
parent_id
=
corpus_id
,
\
type_id
=
cooc_type_id
,
\
name
=
"Cooccurrences corpus "
+
str
(
corpus_id
))
session
.
add
(
cooc
)
session
.
commit
()
query_cooc
=
"""
INSERT INTO node_nodengramngram (node_id, "ngramx_id", "ngramy_id", score)
SELECT
%
d as node_id,
ngX.id,
ngY.id,
COUNT(*) AS score
FROM
node_node AS n -- the nodes who are direct children of the corpus
INNER JOIN
node_node_ngram AS nngX ON nngX.node_id = n.id -- list of ngrams contained in the node
INNER JOIN
node_node_ngram AS whitelistX ON whitelistX.ngram_id = nngX.ngram_id -- list of ngrams contained in the whitelist and in the node
INNER JOIN
node_ngram AS ngX ON ngX.id = whitelistX.ngram_id -- ngrams which are in both
INNER JOIN
node_node_ngram AS nngY ON nngY.node_id = n.id
INNER JOIN
node_node_ngram AS whitelistY ON whitelistY.ngram_id = nngY.ngram_id
INNER JOIN
node_ngram AS ngY ON ngY.id = whitelistY.ngram_id
WHERE
n.parent_id =
%
s
AND
whitelistX.node_id =
%
s
AND
whitelistY.node_id =
%
s
AND
nngX.ngram_id < nngY.ngram_id -- so we only get distinct pairs of ngrams
GROUP BY
ngX.id,
ngX.terms,
ngY.id,
ngY.terms
ORDER BY
score DESC
LIMIT
%
d
"""
%
(
cooc
.
id
,
corpus_id
,
whitelist
.
id
,
whitelist
.
id
,
size
)
# print(query_cooc)
cursor
.
execute
(
query_cooc
)
return
cooc
.
id
def
get_cooc
(
request
=
None
,
corpus
=
None
,
cooc_id
=
None
,
type
=
'node_link'
,
size
=
size
):
def
get_cooc
(
request
=
None
,
corpus
=
None
,
cooc_id
=
None
,
type
=
'node_link'
,
size
=
size
):
'''
'''
...
@@ -255,7 +119,6 @@ def get_cooc(request=None, corpus=None, cooc_id=None, type='node_link', size=siz
...
@@ -255,7 +119,6 @@ def get_cooc(request=None, corpus=None, cooc_id=None, type='node_link', size=siz
matrix_filtered
=
np
.
where
(
xxx
>=
threshold
,
xxx
,
0
)
matrix_filtered
=
np
.
where
(
xxx
>=
threshold
,
xxx
,
0
)
#matrix_filtered = matrix_filtered.resize((90,90))
#matrix_filtered = matrix_filtered.resize((90,90))
try
:
G
=
nx
.
from_numpy_matrix
(
np
.
matrix
(
matrix_filtered
))
G
=
nx
.
from_numpy_matrix
(
np
.
matrix
(
matrix_filtered
))
#G = nx.from_numpy_matrix(matrix_filtered, create_using=nx.MultiDiGraph())
#G = nx.from_numpy_matrix(matrix_filtered, create_using=nx.MultiDiGraph())
...
@@ -270,9 +133,6 @@ def get_cooc(request=None, corpus=None, cooc_id=None, type='node_link', size=siz
...
@@ -270,9 +133,6 @@ def get_cooc(request=None, corpus=None, cooc_id=None, type='node_link', size=siz
partition
=
best_partition
(
uG
)
partition
=
best_partition
(
uG
)
print
(
partition
)
print
(
partition
)
print
(
"Density of the graph:"
,
nx
.
density
(
G
))
print
(
"Density of the graph:"
,
nx
.
density
(
G
))
except
:
print
(
"-"
*
30
)
PrintException
()
if
type
==
"node_link"
:
if
type
==
"node_link"
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment