Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
humanities
gargantext
Commits
5ca9085b
Commit
5ca9085b
authored
Oct 15, 2015
by
delanoe
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[RENAME] cooc function > do_cooc.
parent
db229347
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
18 additions
and
22 deletions
+18
-22
cooccurrences.py
analysis/cooccurrences.py
+5
-5
functions.py
analysis/functions.py
+11
-15
specificity.py
ngram/specificity.py
+2
-2
No files found.
analysis/cooccurrences.py
View file @
5ca9085b
...
@@ -11,7 +11,7 @@ import inspect
...
@@ -11,7 +11,7 @@ import inspect
# keep list
# keep list
def
cooc
(
corpus
=
None
def
do_
cooc
(
corpus
=
None
,
field_X
=
None
,
field_Y
=
None
,
field_X
=
None
,
field_Y
=
None
,
miam_id
=
None
,
stop_id
=
None
,
group_id
=
None
,
miam_id
=
None
,
stop_id
=
None
,
group_id
=
None
,
cvalue_id
=
None
,
cvalue_id
=
None
...
@@ -110,7 +110,7 @@ def cooc(corpus=None
...
@@ -110,7 +110,7 @@ def cooc(corpus=None
# Cooc is symetric, take only the main cooccurrences and cut at the limit
# Cooc is symetric, take only the main cooccurrences and cut at the limit
cooc_query
=
(
cooc_query
cooc_query
=
(
cooc_query
.
filter
(
NodeNgramX
.
ngram_id
<
NodeNgramY
.
ngram_id
)
.
filter
(
NodeNgramX
.
ngram_id
<
NodeNgramY
.
ngram_id
)
.
having
(
cooc_score
>
2
)
.
having
(
cooc_score
>
4
)
#.having(cooc_score > 1)
#.having(cooc_score > 1)
.
group_by
(
NodeNgramX
.
ngram_id
,
NodeNgramY
.
ngram_id
)
.
group_by
(
NodeNgramX
.
ngram_id
,
NodeNgramY
.
ngram_id
)
...
@@ -130,11 +130,9 @@ def cooc(corpus=None
...
@@ -130,11 +130,9 @@ def cooc(corpus=None
)
)
if
miam_id
is
not
None
:
if
miam_id
is
not
None
:
#miam = get_or_create_node(nodetype='Cvalue', corpus=corpus)
miam_list
=
UnweightedList
(
miam_id
)
miam_list
=
UnweightedList
(
miam_id
)
if
stop_id
is
not
None
:
if
stop_id
is
not
None
:
#stop = get_or_create_node(nodetype='StopList', corpus=corpus)
stop_list
=
UnweightedList
(
stop_id
)
stop_list
=
UnweightedList
(
stop_id
)
if
group_id
is
not
None
:
if
group_id
is
not
None
:
...
@@ -145,7 +143,9 @@ def cooc(corpus=None
...
@@ -145,7 +143,9 @@ def cooc(corpus=None
elif
miam_id
is
not
None
and
stop_id
is
not
None
and
group_id
is
None
:
elif
miam_id
is
not
None
and
stop_id
is
not
None
and
group_id
is
None
:
cooc
=
matrix
&
(
miam_list
-
stop_list
)
cooc
=
matrix
&
(
miam_list
-
stop_list
)
elif
miam_id
is
not
None
and
stop_id
is
not
None
and
group_id
is
not
None
:
elif
miam_id
is
not
None
and
stop_id
is
not
None
and
group_id
is
not
None
:
cooc
=
matrix
&
(
miam_list
*
group_list
-
stop_list
)
print
(
"miam_id is not None and stop_id is not None and group_id is not None"
)
#cooc = matrix & (miam_list * group_list - stop_list)
cooc
=
matrix
&
(
miam_list
-
stop_list
)
elif
miam_id
is
not
None
and
stop_id
is
None
and
group_id
is
not
None
:
elif
miam_id
is
not
None
and
stop_id
is
None
and
group_id
is
not
None
:
cooc
=
matrix
&
(
miam_list
*
group_list
)
cooc
=
matrix
&
(
miam_list
*
group_list
)
else
:
else
:
...
...
analysis/functions.py
View file @
5ca9085b
...
@@ -11,7 +11,7 @@ import scipy
...
@@ -11,7 +11,7 @@ import scipy
from
gargantext_web.db
import
get_or_create_node
from
gargantext_web.db
import
get_or_create_node
from
analysis.cooccurrences
import
cooc
from
analysis.cooccurrences
import
do_
cooc
import
pandas
as
pd
import
pandas
as
pd
from
copy
import
copy
from
copy
import
copy
...
@@ -29,7 +29,6 @@ from ngram.lists import listIds
...
@@ -29,7 +29,6 @@ from ngram.lists import listIds
def
diag_null
(
x
):
def
diag_null
(
x
):
return
x
-
x
*
scipy
.
eye
(
x
.
shape
[
0
])
return
x
-
x
*
scipy
.
eye
(
x
.
shape
[
0
])
size
=
1000
size
=
1000
...
@@ -51,24 +50,21 @@ def get_cooc(request=None, corpus=None, cooc_id=None, type='node_link', size=siz
...
@@ -51,24 +50,21 @@ def get_cooc(request=None, corpus=None, cooc_id=None, type='node_link', size=siz
# data deleted each time
# data deleted each time
session
.
query
(
NodeNgramNgram
)
.
filter
(
NodeNgramNgram
.
node_id
==
cooc_id
)
.
delete
()
session
.
query
(
NodeNgramNgram
)
.
filter
(
NodeNgramNgram
.
node_id
==
cooc_id
)
.
delete
()
cooc_id
=
cooc
(
corpus
=
corpus
,
miam_id
=
miam_id
,
group_id
=
group_id
,
stop_id
=
stop_id
,
limit
=
size
)
cooc_id
=
do_
cooc
(
corpus
=
corpus
,
miam_id
=
miam_id
,
group_id
=
group_id
,
stop_id
=
stop_id
,
limit
=
size
)
#print([n for n in session.query(NodeNgramNgram).filter(NodeNgramNgram.node_id==cooc_id).all()])
#print([n for n in session.query(NodeNgramNgram).filter(NodeNgramNgram.node_id==cooc_id).all()])
for
cooccurrence
in
session
.
query
(
NodeNgramNgram
)
.
filter
(
NodeNgramNgram
.
node_id
==
cooc_id
)
.
all
():
for
cooc
in
session
.
query
(
NodeNgramNgram
)
.
filter
(
NodeNgramNgram
.
node_id
==
cooc_id
)
.
all
():
#print(cooccurrence)
labels
[
cooc
.
ngramx_id
]
=
cooc
.
ngramx_id
# print(cooccurrence.ngramx.terms," <=> ",cooccurrence.ngramy.terms,"\t",cooccurrence.score)
labels
[
cooc
.
ngramy_id
]
=
cooc
.
ngramy_id
# TODO clean this part, unuseful
labels
[
cooccurrence
.
ngramx_id
]
=
cooccurrence
.
ngramx_id
#session.query(Ngram.id).filter(Ngram.id == cooccurrence.ngramx_id).first()[0]
labels
[
cooccurrence
.
ngramy_id
]
=
cooccurrence
.
ngramy_id
#session.query(Ngram.id).filter(Ngram.id == cooccurrence.ngramy_id).first()[0]
matrix
[
cooc
currence
.
ngramx_id
][
cooccurrence
.
ngramy_id
]
=
cooccurrence
.
score
matrix
[
cooc
.
ngramx_id
][
cooc
.
ngramy_id
]
=
cooc
.
score
matrix
[
cooc
currence
.
ngramy_id
][
cooccurrence
.
ngramx_id
]
=
cooccurrence
.
score
matrix
[
cooc
.
ngramy_id
][
cooc
.
ngramx_id
]
=
cooc
.
score
ids
[
labels
[
cooc
currence
.
ngramx_id
]]
=
cooccurrence
.
ngramx_id
ids
[
labels
[
cooc
.
ngramx_id
]]
=
cooc
.
ngramx_id
ids
[
labels
[
cooc
currence
.
ngramy_id
]]
=
cooccurrence
.
ngramy_id
ids
[
labels
[
cooc
.
ngramy_id
]]
=
cooc
.
ngramy_id
weight
[
cooc
currence
.
ngramx_id
]
=
weight
.
get
(
cooccurrence
.
ngramx_id
,
0
)
+
cooccurrence
.
score
weight
[
cooc
.
ngramx_id
]
=
weight
.
get
(
cooc
.
ngramx_id
,
0
)
+
cooc
.
score
weight
[
cooc
currence
.
ngramy_id
]
=
weight
.
get
(
cooccurrence
.
ngramy_id
,
0
)
+
cooccurrence
.
score
weight
[
cooc
.
ngramy_id
]
=
weight
.
get
(
cooc
.
ngramy_id
,
0
)
+
cooc
.
score
x
=
pd
.
DataFrame
(
matrix
)
.
fillna
(
0
)
x
=
pd
.
DataFrame
(
matrix
)
.
fillna
(
0
)
y
=
pd
.
DataFrame
(
matrix
)
.
fillna
(
0
)
y
=
pd
.
DataFrame
(
matrix
)
.
fillna
(
0
)
...
...
ngram/specificity.py
View file @
5ca9085b
...
@@ -9,7 +9,7 @@ from collections import defaultdict
...
@@ -9,7 +9,7 @@ from collections import defaultdict
import
numpy
as
np
import
numpy
as
np
import
pandas
as
pd
import
pandas
as
pd
from
analysis.cooccurrences
import
cooc
from
analysis.cooccurrences
import
do_
cooc
from
gargantext_web.db
import
session
,
cache
,
get_or_create_node
,
bulk_insert
from
gargantext_web.db
import
session
,
cache
,
get_or_create_node
,
bulk_insert
from
gargantext_web.db
import
NodeNgramNgram
,
NodeNodeNgram
from
gargantext_web.db
import
NodeNgramNgram
,
NodeNodeNgram
...
@@ -65,7 +65,7 @@ def compute_specificity(corpus,limit=100):
...
@@ -65,7 +65,7 @@ def compute_specificity(corpus,limit=100):
dbg
=
DebugTime
(
'Corpus #
%
d - specificity'
%
corpus
.
id
)
dbg
=
DebugTime
(
'Corpus #
%
d - specificity'
%
corpus
.
id
)
list_cvalue
=
get_or_create_node
(
nodetype
=
'Cvalue'
,
corpus
=
corpus
)
list_cvalue
=
get_or_create_node
(
nodetype
=
'Cvalue'
,
corpus
=
corpus
)
cooc_id
=
cooc
(
corpus
=
corpus
,
cvalue_id
=
list_cvalue
.
id
,
limit
=
limit
)
cooc_id
=
do_
cooc
(
corpus
=
corpus
,
cvalue_id
=
list_cvalue
.
id
,
limit
=
limit
)
specificity
(
cooc_id
=
cooc_id
,
corpus
=
corpus
,
limit
=
limit
)
specificity
(
cooc_id
=
cooc_id
,
corpus
=
corpus
,
limit
=
limit
)
dbg
.
show
(
'specificity'
)
dbg
.
show
(
'specificity'
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment