Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
humanities
gargantext
Commits
48579d8d
Commit
48579d8d
authored
Nov 26, 2014
by
Administrator
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[FEATURE] Functions to create whitelist and coocurrence objects.
parent
cfa0fabb
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
141 additions
and
4 deletions
+141
-4
functions.py
functions.py
+133
-0
urls.py
gargantext_web/urls.py
+0
-1
views.py
gargantext_web/views.py
+8
-3
No files found.
functions.py
0 → 100644
View file @
48579d8d
from
node.models
import
Language
,
ResourceType
,
Resource
,
\
Node
,
NodeType
,
Node_Resource
,
Project
,
Corpus
,
\
Node_Ngram
,
NodeNgramNgram
from
collections
import
defaultdict
from
django.db
import
connection
,
transaction
def
create_blacklist
(
user
,
corpus
):
pass
def
create_synonymes
(
user
,
corpus
):
pass
def
create_whitelist
(
user
,
corpus
):
cursor
=
connection
.
cursor
()
try
:
whitelist_type
=
NodeType
.
objects
.
get
(
name
=
'WhiteList'
)
blacklist_type
=
NodeType
.
objects
.
get
(
name
=
'BlackList'
)
except
:
whitelist_type
=
NodeType
(
name
=
'WhiteList'
)
whitelist_type
.
save
()
blacklist_type
=
NodeType
(
name
=
'BlackList'
)
blacklist_type
.
save
()
white_list
=
Node
.
objects
.
create
(
name
=
'WhiteList Corpus'
+
str
(
corpus
.
id
),
user
=
user
,
parent
=
corpus
,
type
=
whitelist_type
)
black_list
=
Node
.
objects
.
create
(
name
=
'BlackList Corpus'
+
str
(
corpus
.
id
),
user
=
user
,
parent
=
corpus
,
type
=
blacklist_type
)
# delete avant pour éviter les doublons
# try:
# Node_Ngram.objects.filter(node=white_list).all().delete()
# except:
# print('First time we compute cooc')
#
query_whitelist
=
"""
INSERT INTO node_node_ngram (node_id, ngram_id, weight)
SELECT
%
d,
ngX.id,
COUNT(*) AS occurrences
FROM
node_node AS n
INNER JOIN
node_node_ngram AS nngX ON nngX.node_id = n.id
INNER JOIN
node_ngram AS ngX ON ngX.id = nngX.ngram_id
WHERE
n.parent_id =
%
d
AND
n.type_id = 4
AND
ngX.n >= 1
GROUP BY
ngX.id
Having
COUNT(*) >= 1
ORDER BY
occurrences DESC
LIMIT
100
;
"""
%
(
white_list
.
id
,
corpus
.
id
)
cursor
.
execute
(
query_whitelist
)
return
white_list
#def create_cooc(user, corpus, whitelist, blacklist, synonymes):
def
create_cooc
(
user
=
None
,
corpus
=
None
,
whitelist
=
None
):
cursor
=
connection
.
cursor
()
try
:
cooc_type
=
NodeType
.
objects
.
get
(
name
=
'Cooccurrence'
)
except
:
cooc_type
=
NodeType
(
name
=
'Cooccurrence'
)
cooc_type
.
save
()
# pour les tests on supprime les cooc
Node
.
objects
.
filter
(
type
=
cooc_type
,
parent
=
corpus
)
.
delete
()
cooc
=
Node
.
objects
.
create
(
user
=
user
,
\
parent
=
corpus
,
\
type
=
cooc_type
,
\
name
=
"Cooccurrences corpus "
+
str
(
corpus
.
pk
))
query_cooc
=
"""
INSERT INTO node_nodengramngram (node_id, "ngramx_id", "ngramy_id", score)
SELECT
%
d as node_id,
ngX.id,
ngY.id,
COUNT(*) AS score
FROM
node_node AS n -- the nodes who are direct children of the corpus
INNER JOIN
node_node_ngram AS nngX ON nngX.node_id = n.id -- list of ngrams contained in the node
INNER JOIN
node_node_ngram AS whitelistX ON whitelistX.ngram_id = nngX.ngram_id -- list of ngrams contained in the whitelist and in the node
INNER JOIN
node_ngram AS ngX ON ngX.id = whitelistX.ngram_id -- ngrams which are in both
INNER JOIN
node_node_ngram AS nngY ON nngY.node_id = n.id
INNER JOIN
node_node_ngram AS whitelistY ON whitelistY.ngram_id = nngY.ngram_id
INNER JOIN
node_ngram AS ngY ON ngY.id = whitelistY.ngram_id
WHERE
n.parent_id =
%
s
AND
whitelistX.node_id =
%
s
AND
whitelistY.node_id =
%
s
AND
nngX.ngram_id < nngY.ngram_id -- so we only get distinct pairs of ngrams
GROUP BY
ngX.id,
ngX.terms,
ngY.id,
ngY.terms
ORDER BY
score DESC
LIMIT
150
"""
%
(
cooc
.
pk
,
corpus
.
id
,
whitelist
.
id
,
whitelist
.
id
)
cursor
.
execute
(
query_cooc
)
return
cooc
gargantext_web/urls.py
View file @
48579d8d
...
...
@@ -26,7 +26,6 @@ urlpatterns = patterns('',
url
(
r'^project/(\d+)/corpus/(\d+)/$'
,
views
.
corpus
),
url
(
r'^project/(\d+)/corpus/(\d+)/delete/$'
,
views
.
delete_corpus
),
url
(
r'^project/(\d+)/corpus/(\d+)/data.csv$'
,
views
.
send_csv
),
url
(
r'^graph$'
,
views
.
explorer_graph
),
url
(
r'^chart$'
,
views
.
explorer_chart
),
...
...
gargantext_web/views.py
View file @
48579d8d
...
...
@@ -7,8 +7,9 @@ from django.template import Context
#from documents.models import Project, Corpus, Document
from
node.models
import
Language
,
ResourceType
,
Resource
from
node.models
import
Node
,
NodeType
,
Node_Resource
,
Project
,
Corpus
,
NodeNgramNgram
from
node.models
import
Language
,
ResourceType
,
Resource
,
\
Node
,
NodeType
,
Node_Resource
,
Project
,
Corpus
,
\
Node_Ngram
,
NodeNgramNgram
from
node.admin
import
CorpusForm
,
ProjectForm
,
ResourceForm
from
django.contrib.auth.models
import
User
...
...
@@ -20,6 +21,7 @@ from dateutil.parser import parse
from
django.db
import
connection
from
django
import
forms
from
collections
import
defaultdict
from
parsing.FileParsers
import
*
...
...
@@ -483,7 +485,7 @@ def json_node_link(request):
matrix
=
defaultdict
(
lambda
:
defaultdict
(
float
))
labels
=
dict
()
cooc
=
Node
.
objects
.
get
(
id
=
61314
)
cooc
=
Node
.
objects
.
get
(
id
=
81249
)
for
cooccurrence
in
NodeNgramNgram
.
objects
.
filter
(
node
=
cooc
):
labels
[
cooccurrence
.
ngramx
.
id
]
=
cooccurrence
.
ngramx
.
terms
...
...
@@ -544,3 +546,6 @@ def graph_it(request):
'date'
:
date
,
}))
return
HttpResponse
(
html
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment