Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
humanities
gargantext
Commits
a2df227c
Commit
a2df227c
authored
Jan 21, 2016
by
Romain Loth
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[session] quasi fix (import pour tout sauf le workflow)
parent
3476d4a2
Changes
19
Hide whitespace changes
Inline
Side-by-side
Showing
19 changed files
with
89 additions
and
141 deletions
+89
-141
diachronic_specificity.py
analysis/diachronic_specificity.py
+1
-3
distance.py
analysis/distance.py
+2
-3
functions.py
analysis/functions.py
+2
-3
lists.py
analysis/lists.py
+9
-17
periods.py
analysis/periods.py
+4
-8
views.py
annotations/views.py
+5
-11
celery.py
gargantext_web/celery.py
+4
-2
db.py
gargantext_web/db.py
+15
-6
views.py
gargantext_web/views.py
+3
-3
cvalue.py
ngram/cvalue.py
+2
-3
importExport.py
ngram/importExport.py
+3
-6
lists.py
ngram/lists.py
+7
-15
stemLem.py
ngram/stemLem.py
+3
-7
stop.py
ngram/stop.py
+2
-3
tools.py
ngram/tools.py
+2
-3
graph.py
rest_v1_0/graph.py
+2
-4
ngrams.py
rest_v1_0/ngrams.py
+12
-27
views.py
scrappers/scrap_pubmed/views.py
+3
-5
views.py
tests/ngramstable/views.py
+8
-12
No files found.
analysis/diachronic_specificity.py
View file @
a2df227c
...
...
@@ -26,7 +26,7 @@ def diachronic_specificity(corpus_id, terms, order=True):
Values are measure to indicate diachronic specificity.
Nowadays, the measure is rather simple: distance of frequency of period from mean of frequency of all corpus.
'''
session
=
get_session
()
# implicit global session
ngram_frequency_query
=
(
session
.
query
(
Node
.
hyperdata
[
'publication_year'
],
func
.
count
(
'*'
))
.
join
(
NodeNgram
,
Node
.
id
==
NodeNgram
.
node_id
)
...
...
@@ -64,8 +64,6 @@ def diachronic_specificity(corpus_id, terms, order=True):
else
:
return
relative_terms_count
session
.
remove
()
# For tests
# diachronic_specificity(102750, "bayer", order=True)
# diachronic_specificity(26128, "bee", order=True)
analysis/distance.py
View file @
a2df227c
from
admin.utils
import
PrintException
from
gargantext_web.db
import
*
from
gargantext_web.db
import
get_or_create_node
,
get_session
from
gargantext_web.db
import
get_or_create_node
,
session
,
get_session
from
collections
import
defaultdict
from
operator
import
itemgetter
...
...
@@ -31,7 +31,7 @@ def do_distance(cooc_id, field1=None, field2=None, isMonopartite=True, distance=
do_distance :: Int -> (Graph, Partition, {ids}, {weight})
'''
session
=
get_session
()
# implicit global session
authorized
=
[
'conditional'
,
'distributional'
,
'cosine'
]
if
distance
not
in
authorized
:
...
...
@@ -227,5 +227,4 @@ def do_distance(cooc_id, field1=None, field2=None, isMonopartite=True, distance=
partition
=
best_partition
(
G
.
to_undirected
())
return
(
G
,
partition
,
ids
,
weight
)
session
.
remove
()
analysis/functions.py
View file @
a2df227c
...
...
@@ -9,7 +9,7 @@ from math import log
import
scipy
from
gargantext_web.db
import
get_session
,
get_or_create_node
from
gargantext_web.db
import
session
,
get_session
,
get_or_create_node
,
session
from
analysis.cooccurrences
import
do_cooc
from
analysis.distance
import
do_distance
...
...
@@ -39,7 +39,7 @@ def get_cooc(request=None, corpus=None
'''
get_ccoc : to compute the graph.
'''
session
=
get_session
()
# implicit global session
data
=
{}
#if session.query(Node).filter(Node.type_id==type_cooc_id, Node.parent_id==corpus_id).first() is None:
...
...
@@ -170,7 +170,6 @@ def get_cooc(request=None, corpus=None
return
(
partition
)
return
(
data
)
session
.
remove
()
def
get_graphA
(
nodeA_type
,
NodesB
,
links
,
corpus
):
from
analysis.InterUnion
import
Utils
...
...
analysis/lists.py
View file @
a2df227c
from
collections
import
defaultdict
from
math
import
sqrt
from
gargantext_web.db
import
get_session
,
NodeNgram
,
NodeNgramNgram
,
bulk_insert
from
gargantext_web.db
import
session
,
get_session
,
NodeNgram
,
NodeNgramNgram
,
bulk_insert
class
BaseClass
:
...
...
@@ -67,7 +67,7 @@ class Translations(BaseClass):
self
.
items
=
defaultdict
(
int
)
self
.
groups
=
defaultdict
(
set
)
elif
isinstance
(
other
,
int
):
session
=
get_session
()
# implicit global session
query
=
(
session
.
query
(
NodeNgramNgram
.
ngramy_id
,
NodeNgramNgram
.
ngramx_id
)
.
filter
(
NodeNgramNgram
.
node_id
==
other
)
...
...
@@ -76,7 +76,6 @@ class Translations(BaseClass):
self
.
groups
=
defaultdict
(
set
)
for
key
,
value
in
self
.
items
.
items
():
self
.
groups
[
value
]
.
add
(
key
)
session
.
remove
()
elif
isinstance
(
other
,
Translations
):
self
.
items
=
other
.
items
.
copy
()
self
.
groups
=
other
.
groups
.
copy
()
...
...
@@ -120,7 +119,7 @@ class Translations(BaseClass):
def
save
(
self
,
node_id
):
# delete previous data
session
=
get_session
()
# implicit global session
session
.
query
(
NodeNgramNgram
)
.
filter
(
NodeNgramNgram
.
node_id
==
node_id
)
.
delete
()
session
.
commit
()
# insert new data
...
...
@@ -129,7 +128,6 @@ class Translations(BaseClass):
(
'node_id'
,
'ngramy_id'
,
'ngramx_id'
,
'score'
),
((
node_id
,
key
,
value
,
1.0
)
for
key
,
value
in
self
.
items
.
items
())
)
session
.
remove
()
class
WeightedMatrix
(
BaseClass
):
...
...
@@ -138,7 +136,7 @@ class WeightedMatrix(BaseClass):
if
other
is
None
:
self
.
items
=
defaultdict
(
lambda
:
defaultdict
(
float
))
elif
isinstance
(
other
,
int
):
session
=
get_session
()
# implicit global session
query
=
(
session
.
query
(
NodeNgramNgram
.
ngramx_id
,
NodeNgramNgram
.
ngramy_id
,
NodeNgramNgram
.
score
)
.
filter
(
NodeNgramNgram
.
node_id
==
other
)
...
...
@@ -146,7 +144,6 @@ class WeightedMatrix(BaseClass):
self
.
items
=
defaultdict
(
lambda
:
defaultdict
(
float
))
for
key1
,
key2
,
value
in
self
.
items
.
items
():
self
.
items
[
key1
][
key2
]
=
value
session
.
remove
()
elif
isinstance
(
other
,
WeightedMatrix
):
self
.
items
=
defaultdict
(
lambda
:
defaultdict
(
float
))
for
key1
,
key2
,
value
in
other
:
...
...
@@ -165,7 +162,7 @@ class WeightedMatrix(BaseClass):
def
save
(
self
,
node_id
):
# delete previous data
session
=
get_session
()
# implicit global session
session
.
query
(
NodeNgramNgram
)
.
filter
(
NodeNgramNgram
.
node_id
==
node_id
)
.
delete
()
session
.
commit
()
# insert new data
...
...
@@ -174,7 +171,6 @@ class WeightedMatrix(BaseClass):
(
'node_id'
,
'ngramx_id'
,
'ngramy_id'
,
'score'
),
((
node_id
,
key1
,
key2
,
value
)
for
key1
,
key2
,
value
in
self
)
)
session
.
remove
()
def
__radd__
(
self
,
other
):
result
=
NotImplemented
...
...
@@ -251,13 +247,12 @@ class UnweightedList(BaseClass):
if
other
is
None
:
self
.
items
=
set
()
elif
isinstance
(
other
,
int
):
session
=
get_session
()
# implicit global session
query
=
(
session
.
query
(
NodeNgram
.
ngram_id
)
.
filter
(
NodeNgram
.
node_id
==
other
)
)
self
.
items
=
{
row
[
0
]
for
row
in
query
}
session
.
remove
()
elif
isinstance
(
other
,
WeightedList
):
self
.
items
=
set
(
other
.
items
.
keys
())
elif
isinstance
(
other
,
UnweightedList
):
...
...
@@ -333,7 +328,7 @@ class UnweightedList(BaseClass):
def
save
(
self
,
node_id
):
# delete previous data
session
=
get_session
()
# implicit global session
session
.
query
(
NodeNgram
)
.
filter
(
NodeNgram
.
node_id
==
node_id
)
.
delete
()
session
.
commit
()
# insert new data
...
...
@@ -342,7 +337,6 @@ class UnweightedList(BaseClass):
(
'node_id'
,
'ngram_id'
,
'weight'
),
((
node_id
,
key
,
1.0
)
for
key
in
self
.
items
)
)
session
.
remove
()
class
WeightedList
(
BaseClass
):
...
...
@@ -351,13 +345,12 @@ class WeightedList(BaseClass):
if
other
is
None
:
self
.
items
=
defaultdict
(
float
)
elif
isinstance
(
other
,
int
):
session
=
get_session
()
# implicit global session
query
=
(
session
.
query
(
NodeNgram
.
ngram_id
,
NodeNgram
.
weight
)
.
filter
(
NodeNgram
.
node_id
==
other
)
)
self
.
items
=
defaultdict
(
float
,
query
)
session
.
remove
()
elif
isinstance
(
other
,
WeightedList
):
self
.
items
=
other
.
items
.
copy
()
elif
isinstance
(
other
,
UnweightedList
):
...
...
@@ -449,7 +442,7 @@ class WeightedList(BaseClass):
def
save
(
self
,
node_id
):
# delete previous data
session
=
get_session
()
# implicit global session
session
.
query
(
NodeNgram
)
.
filter
(
NodeNgram
.
node_id
==
node_id
)
.
delete
()
session
.
commit
()
# insert new data
...
...
@@ -458,7 +451,6 @@ class WeightedList(BaseClass):
(
'node_id'
,
'ngram_id'
,
'weight'
),
((
node_id
,
key
,
value
)
for
key
,
value
in
self
.
items
.
items
())
)
session
.
remove
()
def
test
():
...
...
analysis/periods.py
View file @
a2df227c
from
collections
import
defaultdict
from
gargantext_web.db
import
get_or_create_node
,
get_session
,
Node
,
NodeHyperdata
,
Hyperdata
,
Ngram
from
gargantext_web.db
import
get_or_create_node
,
session
,
get_session
,
Node
,
NodeHyperdata
,
Hyperdata
,
Ngram
import
pandas
as
pd
import
numpy
as
np
...
...
@@ -29,7 +29,7 @@ def periods(corpus, start=None, end=None):
# distance.cosine(a,b)
# search for min and split
'''
session
=
get_session
()
# implicit global session
Doc
=
aliased
(
Node
)
Corpus
=
aliased
(
Node
)
...
...
@@ -57,8 +57,6 @@ def periods(corpus, start=None, end=None):
for
t
in
times
:
for
ngram
in
miam_list
:
result_list
.
add
(
temporal_tfidf
(
ngram
,
time
))
session
.
remove
()
def
tfidf_temporal
(
corpus
,
start
=
None
,
end
=
None
):
pass
...
...
@@ -84,7 +82,7 @@ def jacquard(period1, period2):
comNode
[
x
[
1
]]
=
comNode
.
get
(
x
[
1
],
set
())
.
union
({
x
[
0
]})
def
get_partition
(
corpus
,
start
=
None
,
end
=
None
,
distance
=
distance
):
session
=
get_session
()
# implicit global session
miam_id
=
get_or_create_node
(
corpus
=
corpus
,
nodetype
=
'MapList'
,
mysession
=
session
)
.
id
print
(
"get Partition
%
s -
%
s"
%
(
start
,
end
))
...
...
@@ -100,14 +98,13 @@ def get_partition(corpus, start=None, end=None, distance=distance):
,
isMonopartite
=
True
,
distance
=
distance
)
return
(
partition
,
weight
)
session
.
remove
()
def
phylo_clusters
(
corpus
,
years
):
'''
corpus :: Node Corpus
years :: [Year]
'''
session
=
get_session
()
# implicit global session
clusters
=
dict
()
nodes_weight
=
dict
()
...
...
@@ -214,5 +211,4 @@ def phylo_clusters(corpus, years):
data
.
append
(
flow
)
return
(
data
)
session
.
remove
()
annotations/views.py
View file @
a2df227c
...
...
@@ -13,7 +13,7 @@ from rest_framework.exceptions import APIException
from
rest_framework.authentication
import
SessionAuthentication
,
BasicAuthentication
from
node.models
import
Node
from
gargantext_web.db
import
get_session
,
cache
,
Node
,
NodeNgram
,
Ngram
from
gargantext_web.db
import
session
,
get_session
,
cache
,
Node
,
NodeNgram
,
Ngram
from
ngram.lists
import
listIds
,
listNgramIds
from
gargantext_web.db
import
get_or_create_node
...
...
@@ -68,7 +68,7 @@ class NgramEdit(APIView):
"""
Edit an existing NGram in a given list
"""
session
=
get_session
()
# implicit global session
list_id
=
int
(
list_id
)
list_node
=
session
.
query
(
Node
)
.
filter
(
Node
.
id
==
list_id
)
.
first
()
# TODO add 1 for MapList social score ?
...
...
@@ -90,8 +90,6 @@ class NgramEdit(APIView):
'uuid'
:
ngram_id
,
'list_id'
:
list_id
,
}
for
ngram_id
in
ngram_ids
)
session
.
remove
()
def
put
(
self
,
request
,
list_id
,
ngram_ids
):
return
Response
(
None
,
204
)
...
...
@@ -100,7 +98,7 @@ class NgramEdit(APIView):
"""
Delete a ngram from a list
"""
session
=
get_session
()
# implicit global session
print
(
"to del"
,
ngram_ids
)
for
ngram_id
in
ngram_ids
.
split
(
'+'
):
print
(
'ngram_id'
,
ngram_id
)
...
...
@@ -131,7 +129,6 @@ class NgramEdit(APIView):
# [ = = = = / del from map-list = = = = ]
return
Response
(
None
,
204
)
session
.
remove
()
class
NgramCreate
(
APIView
):
"""
...
...
@@ -146,7 +143,7 @@ class NgramCreate(APIView):
example: request.data = {'text': 'phylogeny'}
"""
session
=
get_session
()
# implicit global session
list_id
=
int
(
list_id
)
# format the ngram's text
ngram_text
=
request
.
data
.
get
(
'text'
,
None
)
...
...
@@ -181,8 +178,6 @@ class NgramCreate(APIView):
'list_id'
:
list_id
,
})
session
.
remove
()
class
Document
(
APIView
):
"""
Read-only Document view, similar to /api/nodes/
...
...
@@ -191,7 +186,7 @@ class Document(APIView):
def
get
(
self
,
request
,
doc_id
):
"""Document by ID"""
session
=
get_session
()
# implicit global session
node
=
session
.
query
(
Node
)
.
filter
(
Node
.
id
==
doc_id
)
.
first
()
if
node
is
None
:
raise
APIException
(
'This node does not exist'
,
404
)
...
...
@@ -213,5 +208,4 @@ class Document(APIView):
'id'
:
node
.
id
}
return
Response
(
data
)
session
.
remove
()
gargantext_web/celery.py
View file @
a2df227c
...
...
@@ -47,11 +47,13 @@ def apply_workflow(corpus_id):
print
(
"End of the Workflow for corpus
%
d"
%
(
corpus_id
))
update_state
.
processing_
(
int
(
corpus_id
),
"0"
)
mysession
.
remove
()
mysession
.
close
()
get_session
.
remove
()
except
Exception
as
error
:
print
(
error
)
PrintException
()
mysession
.
remove
()
mysession
.
close
()
get_session
.
remove
()
@
shared_task
def
empty_trash
(
corpus_id
):
...
...
gargantext_web/db.py
View file @
a2df227c
...
...
@@ -136,14 +136,23 @@ def get_sessionmaker():
from
sqlalchemy.orm
import
sessionmaker
return
sessionmaker
(
bind
=
engine
)
def
get_session
():
Session
=
get_sessionmaker
()
return
scoped_session
(
Session
)
#get_ = scoped_session(Session)
#return get_()
#def get_session():
# session_factory = get_sessionmaker()
# return scoped_session(session_factory)
# get_session à importer, plus pratique pour les remove
session_factory
=
get_sessionmaker
()
get_session
=
scoped_session
(
session_factory
)
# the global session ------------
# pour les modules qui importent
# directement session
session
=
get_session
()
# -------------------------------
# SQLAlchemy model objects caching
from
sqlalchemy
import
or_
...
...
gargantext_web/views.py
View file @
a2df227c
...
...
@@ -228,6 +228,8 @@ def projects(request):
date
=
datetime
.
datetime
.
now
()
# print(Logger.write("STATIC_ROOT"))
# implicit global session
projects
=
session
.
query
(
Node
)
.
filter
(
Node
.
user_id
==
user_id
,
Node
.
type_id
==
project_type_id
)
.
order_by
(
Node
.
date
)
.
all
()
number
=
len
(
projects
)
...
...
@@ -274,8 +276,6 @@ def projects(request):
return
HttpResponseRedirect
(
'/projects/'
)
else
:
form
=
ProjectForm
()
session
.
remove
()
return
render
(
request
,
'projects.html'
,
{
'debug'
:
settings
.
DEBUG
,
...
...
@@ -377,6 +377,7 @@ def corpus(request, project_id, corpus_id):
type_doc_id
=
cache
.
NodeType
[
'Document'
]
.
id
# implicit global session
number
=
session
.
query
(
func
.
count
(
Node
.
id
))
.
filter
(
Node
.
parent_id
==
corpus_id
,
Node
.
type_id
==
type_doc_id
)
.
all
()[
0
][
0
]
...
...
@@ -402,7 +403,6 @@ def corpus(request, project_id, corpus_id):
'view'
:
"documents"
}))
session
.
remove
()
return
HttpResponse
(
html
)
def
newpaginatorJSON
(
request
,
corpus_id
):
...
...
ngram/cvalue.py
View file @
a2df227c
...
...
@@ -6,7 +6,7 @@ from admin.utils import PrintException,DebugTime
from
gargantext_web.db
import
NodeNgram
,
NodeNodeNgram
from
gargantext_web.db
import
*
from
gargantext_web.db
import
get_or_create_node
,
get_session
from
gargantext_web.db
import
get_or_create_node
,
session
,
get_session
from
parsing.corpustools
import
*
...
...
@@ -41,7 +41,7 @@ def getNgrams(corpus=None, limit=1000):
'''
getNgrams :: Corpus -> [(Int, String, String, Float)]
'''
session
=
get_session
()
# implicit global session
terms
=
dict
()
tfidf_node
=
get_or_create_node
(
nodetype
=
'Tfidf (global)'
...
...
@@ -65,7 +65,6 @@ def getNgrams(corpus=None, limit=1000):
except
:
PrintException
()
return
(
terms
)
session
.
remove
()
def
compute_cvalue
(
corpus
=
None
,
limit
=
1000
,
mysession
=
None
):
'''
...
...
ngram/importExport.py
View file @
a2df227c
...
...
@@ -12,7 +12,7 @@ TODO : REFACTOR 2) improvements in ngram creation (?bulk like node_ngram links)
"""
from
gargantext_web.db
import
Ngram
,
NodeNgram
,
NodeNodeNgram
,
NodeNgramNgram
from
gargantext_web.db
import
cache
,
get_session
,
get_or_create_node
,
bulk_insert
from
gargantext_web.db
import
cache
,
session
,
get_session
,
get_or_create_node
,
bulk_insert
# import sqlalchemy as sa
from
sqlalchemy.sql
import
func
,
exists
...
...
@@ -105,7 +105,7 @@ def exportNgramLists(node,filename,delimiter="\t"):
2 <=> mapList
"""
# récupérer d'un coup les objets Ngram (avec terme)
session
=
get_session
()
# implicit global session
if
len
(
ngram_ids
):
ng_objs
=
session
.
query
(
Ngram
)
.
filter
(
Ngram
.
id
.
in_
(
ngram_ids
))
.
all
()
else
:
...
...
@@ -131,7 +131,6 @@ def exportNgramLists(node,filename,delimiter="\t"):
# csv_rows = [[ligne1_a, ligne1_b..],[ligne2_a, ligne2_b..],..]
return
csv_rows
session
.
remove
()
# on applique notre fonction ng_to_csv sur chaque liste
# ------------------------------------------------------
...
...
@@ -188,7 +187,7 @@ def importNgramLists(node,filename,delimiter="\t", del_lists=[]):
(and ideally add its logic to analysis.lists.Translations)
'''
session
=
get_session
()
# implicit global session
# the node arg has to be a corpus here
if
not
hasattr
(
node
,
"type_id"
)
or
node
.
type_id
!=
4
:
raise
TypeError
(
"IMPORT: node argument must be a Corpus Node"
)
...
...
@@ -379,8 +378,6 @@ def importNgramLists(node,filename,delimiter="\t", del_lists=[]):
print
(
"INFO: added
%
i elements in the lists indices"
%
added_nd_ng
)
print
(
"INFO: added
%
i new ngrams in the lexicon"
%
added_ng
)
session
.
remove
()
# à chronométrer:
...
...
ngram/lists.py
View file @
a2df227c
from
admin.utils
import
PrintException
from
gargantext_web.db
import
NodeNgram
,
get_session
from
gargantext_web.db
import
NodeNgram
,
session
,
get_session
from
gargantext_web.db
import
*
from
parsing.corpustools
import
*
...
...
@@ -21,7 +21,7 @@ def listIds(typeList=None, user_id=None, corpus_id=None):
[Node] :: List of Int, returned or created by the function
'''
session
=
get_session
()
# implicit global session
if
typeList
is
None
:
typeList
=
'MiamList'
...
...
@@ -59,8 +59,6 @@ def listIds(typeList=None, user_id=None, corpus_id=None):
else
:
raise
Exception
(
"Usage (Warning): Need corpus_id and user_id"
)
session
.
remove
()
# Some functions to manage ngrams according to the lists
...
...
@@ -81,7 +79,7 @@ def listNgramIds(list_id=None, typeList=None,
user_id : needed to create list if it does not exist
'''
session
=
get_session
()
# implicit global session
if
typeList
is
None
:
typeList
=
[
'MiamList'
,
'StopList'
]
...
...
@@ -121,8 +119,6 @@ def listNgramIds(list_id=None, typeList=None,
)
return
(
query
.
all
())
session
.
remove
()
def
ngramList
(
do
,
list_id
,
ngram_ids
=
None
)
:
'''
...
...
@@ -134,7 +130,7 @@ def ngramList(do, list_id, ngram_ids=None) :
ngram_id = [Int] : list of Ngrams id (Ngrams.id)
list_id = Int : list id (Node.id)
'''
session
=
get_session
()
# implicit global session
results
=
[]
...
...
@@ -169,7 +165,6 @@ def ngramList(do, list_id, ngram_ids=None) :
session
.
commit
()
return
(
results
)
session
.
remove
()
# Some functions to manage automatically the lists
def
doStopList
(
user_id
=
None
,
corpus_id
=
None
,
stop_id
=
None
,
reset
=
False
,
limit
=
None
):
...
...
@@ -188,7 +183,7 @@ def ngrams2miam(user_id=None, corpus_id=None):
'''
Create a Miam List only
'''
session
=
get_session
()
# implicit global session
miam_id
=
listIds
(
typeList
=
'MiamList'
,
user_id
=
user_id
,
corpus_id
=
corpus_id
)[
0
][
0
]
print
(
miam_id
)
...
...
@@ -209,7 +204,6 @@ def ngrams2miam(user_id=None, corpus_id=None):
.
all
()
)
bulk_insert
(
NodeNgram
,
[
'node_id'
,
'ngram_id'
,
'weight'
],
query
)
session
.
remove
()
from
gargantext_web.db
import
get_or_create_node
from
analysis.lists
import
Translations
,
UnweightedList
...
...
@@ -222,7 +216,7 @@ def ngrams2miamBis(corpus):
miam_id
=
get_or_create_node
(
corpus
=
corpus
,
nodetype
=
'MiamList'
)
stop_id
=
get_or_create_node
(
corpus
=
corpus
,
nodetype
=
'StopList'
)
session
=
get_session
()
# implicit global session
query
=
(
session
.
query
(
literal_column
(
str
(
miam_id
))
.
label
(
"node_id"
),
...
...
@@ -240,7 +234,6 @@ def ngrams2miamBis(corpus):
.
all
()
)
bulk_insert
(
NodeNgram
,
[
'node_id'
,
'ngram_id'
,
'weight'
],
query
)
session
.
remove
()
def
doList
(
type_list
=
'MiamList'
,
...
...
@@ -266,7 +259,7 @@ def doList(
lem = equivalent Words which are lemmatized (but the main form)
cvalue = equivalent N-Words according to C-Value (but the main form)
'''
session
=
get_session
()
# implicit global session
if
type_list
not
in
[
'MiamList'
,
'MainList'
]:
raise
Exception
(
"Type List (
%
s) not supported, try:
\'
MiamList
\'
or
\'
MainList
\'
"
%
type_list
)
...
...
@@ -374,6 +367,5 @@ def doList(
bulk_insert
(
NodeNgram
,
[
'node_id'
,
'ngram_id'
,
'weight'
],
query
)
return
(
list_dict
[
type_list
][
'id'
])
session
.
remove
()
ngram/stemLem.py
View file @
a2df227c
...
...
@@ -4,13 +4,13 @@ from parsing.corpustools import *
from
gargantext_web.db
import
NodeNgram
from
sqlalchemy
import
desc
,
asc
,
or_
,
and_
,
Date
,
cast
,
select
from
gargantext_web.db
import
get_cursor
,
bulk_insert
,
get_session
from
gargantext_web.db
import
get_cursor
,
bulk_insert
,
session
,
get_session
def
get_ngramogram
(
corpus
,
limit
=
None
):
"""
Ngram is a composition of ograms (ogram = 1gram)
"""
session
=
get_session
()
# implicit global session
try
:
query
=
(
session
...
...
@@ -35,8 +35,6 @@ def get_ngramogram(corpus, limit=None):
except
Exception
as
error
:
PrintException
()
session
.
remove
()
def
split_ngram
(
ngram
):
if
isinstance
(
ngram
,
str
):
...
...
@@ -307,7 +305,7 @@ def stem_corpus(corpus_id=None):
Returns Int as id of the Stem Node
stem_corpus :: Int
'''
session
=
get_session
()
# implicit global session
corpus
=
session
.
query
(
Node
)
.
filter
(
Node
.
id
==
corpus_id
)
.
first
()
#print('Number of new ngrams to stem:',
...
...
@@ -332,7 +330,5 @@ def stem_corpus(corpus_id=None):
PrintException
()
else
:
print
(
'Usage: stem_corpus(corpus_id=corpus.id)'
)
session
.
remove
()
ngram/stop.py
View file @
a2df227c
...
...
@@ -2,7 +2,7 @@ import re
from
admin.utils
import
PrintException
from
gargantext_web.db
import
Node
,
Ngram
,
NodeNgram
,
NodeNodeNgram
from
gargantext_web.db
import
cache
,
get_session
,
get_or_create_node
,
bulk_insert
from
gargantext_web.db
import
cache
,
session
,
get_session
,
get_or_create_node
,
bulk_insert
import
sqlalchemy
as
sa
from
sqlalchemy.sql
import
func
...
...
@@ -14,7 +14,7 @@ from ngram.tools import insert_ngrams
from
analysis.lists
import
WeightedList
,
UnweightedList
def
importStopList
(
node
,
filename
,
language
=
'fr'
):
session
=
get_session
()
# implicit global session
with
open
(
filename
,
"r"
)
as
f
:
stop_list
=
f
.
read
()
.
splitlines
()
...
...
@@ -36,7 +36,6 @@ def importStopList(node,filename,language='fr'):
)
bulk_insert
(
NodeNgram
,
[
'node_id'
,
'ngram_id'
,
'weight'
],
[
d
for
d
in
data
])
session
.
remove
()
def
isStopWord
(
ngram
,
stop_words
=
None
):
'''
...
...
ngram/tools.py
View file @
a2df227c
from
gargantext_web.db
import
Ngram
,
NodeNgram
,
NodeNgramNgram
from
gargantext_web.db
import
get_cursor
,
bulk_insert
,
get_or_create_node
,
get_session
from
gargantext_web.db
import
get_cursor
,
bulk_insert
,
get_or_create_node
,
session
,
get_session
def
insert_ngrams_to_list
(
list_of_ngrams
,
corpus
,
list_type
=
'MapList'
,
erase
=
True
):
'''
Works only for Stop and Map
'''
session
=
get_session
()
# implicit global session
list_node
=
get_or_create_node
(
corpus
=
corpus
,
nodetype
=
list_type
,
mysession
=
session
)
group_node
=
get_or_create_node
(
corpus
=
corpus
,
nodetype
=
'GroupList'
,
mysession
=
session
)
...
...
@@ -35,7 +35,6 @@ def insert_ngrams_to_list(list_of_ngrams, corpus, list_type='MapList', erase=Tru
#print(list_to_insert)
db
,
cursor
=
get_cursor
()
bulk_insert
(
NodeNgram
,
[
'node_id'
,
'ngram_id'
,
'weight'
],
[
n
for
n
in
list_to_insert
])
session
.
remove
()
def
insert_ngrams
(
ngrams
,
get
=
'terms-id'
):
'''
...
...
rest_v1_0/graph.py
View file @
a2df227c
from
rest_v1_0.api
import
APIView
,
APIException
,
JsonHttpResponse
,
CsvHttpResponse
from
rest_framework.authentication
import
SessionAuthentication
,
BasicAuthentication
from
gargantext_web.db
import
get_session
,
Node
from
gargantext_web.db
import
session
,
get_session
,
Node
from
analysis.functions
import
get_cooc
class
Graph
(
APIView
):
...
...
@@ -14,7 +14,7 @@ class Graph(APIView):
graph?field1=ngrams&field2=ngrams&
graph?field1=ngrams&field2=ngrams&start=''&end=''
'''
session
=
get_session
()
# implicit global session
field1
=
request
.
GET
.
get
(
'field1'
,
'ngrams'
)
field2
=
request
.
GET
.
get
(
'field2'
,
'ngrams'
)
...
...
@@ -53,5 +53,3 @@ class Graph(APIView):
,
'field2'
:
accepted_field2
,
'options'
:
options
})
session
.
remove
()
rest_v1_0/ngrams.py
View file @
a2df227c
...
...
@@ -16,8 +16,7 @@ from gargantext_web.db import cache
from
gargantext_web.validation
import
validate
,
ValidationException
from
gargantext_web.db
import
get_session
,
Node
,
NodeNgram
,
NodeNgramNgram
\
,
NodeNodeNgram
,
Ngram
,
Hyperdata
,
Node_Ngram
,
get_or_create_node
from
gargantext_web.db
import
session
,
get_session
,
Node
,
NodeNgram
,
NodeNgramNgram
,
NodeNodeNgram
,
Ngram
,
Hyperdata
,
Node_Ngram
,
get_or_create_node
def
DebugHttpResponse
(
data
):
...
...
@@ -82,7 +81,7 @@ class List(APIView):
def
get_metadata
(
self
,
ngram_ids
,
parent_id
):
session
=
get_session
()
# implicit global session
start_
=
time
.
time
()
...
...
@@ -121,13 +120,12 @@ class List(APIView):
end_
=
time
.
time
()
return
{
"data"
:
ngram_ids
,
"secs"
:(
end_
-
start_
)
}
session
.
remove
()
def
get
(
self
,
request
,
corpus_id
,
list_name
):
if
not
request
.
user
.
is_authenticated
():
return
JsonHttpResponse
(
{
"request"
:
"forbidden"
}
)
session
=
get_session
()
# implicit global session
corpus
=
session
.
query
(
Node
)
.
filter
(
Node
.
id
==
corpus_id
)
.
first
()
# if corpus==None:
# return JsonHttpResponse( {"request" : "forbidden"} )
...
...
@@ -155,8 +153,6 @@ class List(APIView):
measurements
[
"tfidf"
]
=
{
"s"
:
ngrams_meta
[
"secs"
],
"n"
:
len
(
ngrams_meta
[
"data"
]
.
keys
())
}
return
JsonHttpResponse
(
{
"data"
:
ngram_ids
,
"time"
:
measurements
}
)
session
.
remove
()
class
Ngrams
(
APIView
):
'''
...
...
@@ -167,7 +163,7 @@ class Ngrams(APIView):
def
get
(
self
,
request
,
node_id
):
if
not
request
.
user
.
is_authenticated
():
return
JsonHttpResponse
(
{
"request"
:
"forbidden"
}
)
session
=
get_session
()
# implicit global session
corpus
=
session
.
query
(
Node
)
.
filter
(
Node
.
id
==
node_id
)
.
first
()
# if corpus==None:
# return JsonHttpResponse( {"request" : "forbidden"} )
...
...
@@ -326,8 +322,6 @@ class Ngrams(APIView):
'data'
:
output
,
"time"
:
measurements
})
session
.
remove
()
def
post
(
self
,
request
,
node_id
):
return
JsonHttpResponse
([
"POST"
,
"ok"
])
...
...
@@ -349,14 +343,12 @@ class Group(APIView):
def
get_group_id
(
self
,
node_id
,
user_id
):
node_id
=
int
(
node_id
)
session
=
get_session
()
# implicit global session
corpus
=
session
.
query
(
Node
)
.
filter
(
Node
.
id
==
node_id
)
.
first
()
if
corpus
==
None
:
return
None
group
=
get_or_create_node
(
corpus
=
corpus
,
nodetype
=
'Group'
)
return
(
group
.
id
)
session
.
remove
()
def
get
(
self
,
request
,
corpus_id
):
if
not
request
.
user
.
is_authenticated
():
...
...
@@ -384,7 +376,7 @@ class Group(APIView):
G
=
nx
.
Graph
()
DG
=
nx
.
DiGraph
()
session
=
get_session
()
# implicit global session
ngrams_ngrams
=
(
session
.
query
(
NodeNgramNgram
)
...
...
@@ -425,7 +417,6 @@ class Group(APIView):
# groups["nodes"] = get_occtfidf( ngrams , request.user.id , corpus_id , "Group")
return
JsonHttpResponse
(
{
"data"
:
groups
}
)
session
.
remove
()
def
post
(
self
,
request
,
node_id
):
return
JsonHttpResponse
(
[
"hola"
,
"mundo"
]
)
...
...
@@ -434,7 +425,7 @@ class Group(APIView):
# input validation
session
=
get_session
()
# implicit global session
input
=
validate
(
request
.
DATA
,
{
'data'
:
{
'source'
:
int
,
'target'
:
list
}})
group_id
=
get_group_id
(
corpus_id
,
request
.
user
.
id
)
...
...
@@ -451,11 +442,9 @@ class Group(APIView):
return
JsonHttpResponse
(
True
,
201
)
else
:
raise
APIException
(
'Missing parameter: "{
\'
data
\'
: [
\'
source
\'
: Int,
\'
target
\'
: [Int]}"'
,
400
)
session
.
remove
()
def
put
(
self
,
request
,
corpus_id
):
session
=
get_session
()
# implicit global session
group_rawreq
=
dict
(
request
.
data
)
...
...
@@ -471,7 +460,7 @@ class Group(APIView):
GDict
.
append
(
gdict
)
existing_group_id
=
self
.
get_group_id
(
corpus_id
,
request
.
user
.
id
)
session
=
get_session
()
# implicit global session
grouped_ngrams
=
(
session
.
query
(
NodeNgramNgram
)
.
filter
(
NodeNgramNgram
.
node_id
==
existing_group_id
)
...
...
@@ -587,7 +576,6 @@ class Group(APIView):
return
JsonHttpResponse
(
True
,
201
)
session
.
remove
()
class
Keep
(
APIView
):
"""
...
...
@@ -597,7 +585,7 @@ class Keep(APIView):
authentication_classes
=
(
SessionAuthentication
,
BasicAuthentication
)
def
get
(
self
,
request
,
corpus_id
):
session
=
get_session
()
# implicit global session
# list_id = session.query(Node).filter(Node.id==list_id).first()
corpus
=
session
.
query
(
Node
)
.
filter
(
Node
.
id
==
corpus_id
)
.
first
()
node_mapList
=
get_or_create_node
(
nodetype
=
'MapList'
,
corpus
=
corpus
)
...
...
@@ -606,13 +594,12 @@ class Keep(APIView):
for
node
in
nodes_in_map
:
results
[
node
.
ngram_id
]
=
True
return
JsonHttpResponse
(
results
)
session
.
remove
()
def
put
(
self
,
request
,
corpus_id
):
"""
Add ngrams to map list
"""
session
=
get_session
()
# implicit global session
group_rawreq
=
dict
(
request
.
data
)
ngram_2add
=
[
int
(
i
)
for
i
in
list
(
group_rawreq
.
keys
())]
corpus
=
session
.
query
(
Node
)
.
filter
(
Node
.
id
==
corpus_id
)
.
first
()
...
...
@@ -622,13 +609,12 @@ class Keep(APIView):
session
.
add
(
map_node
)
session
.
commit
()
return
JsonHttpResponse
(
True
,
201
)
session
.
remove
()
def
delete
(
self
,
request
,
corpus_id
):
"""
Delete ngrams from the map list
"""
session
=
get_session
()
# implicit global session
group_rawreq
=
dict
(
request
.
data
)
# print("group_rawreq:")
...
...
@@ -647,5 +633,4 @@ class Keep(APIView):
return
JsonHttpResponse
(
True
,
201
)
session
.
remove
()
scrappers/scrap_pubmed/views.py
View file @
a2df227c
...
...
@@ -29,7 +29,7 @@ import threading
from
node.admin
import
CustomForm
from
gargantext_web.db
import
*
from
gargantext_web.db
import
get_sessionmaker
,
get_session
from
gargantext_web.db
import
get_sessionmaker
,
session
,
get_session
from
gargantext_web.settings
import
DEBUG
,
MEDIA_ROOT
from
rest_v1_0.api
import
JsonHttpResponse
...
...
@@ -84,7 +84,7 @@ def getGlobalStatsISTEXT(request ):
def
doTheQuery
(
request
,
project_id
):
session
=
get_session
()
# implicit global session
# do we have a valid project id?
try
:
project_id
=
int
(
project_id
)
...
...
@@ -181,14 +181,13 @@ def doTheQuery(request , project_id):
data
=
alist
return
JsonHttpResponse
(
data
)
session
.
remove
()
def
testISTEX
(
request
,
project_id
):
print
(
"testISTEX:"
)
print
(
request
.
method
)
alist
=
[
"bar"
,
"foo"
]
session
=
get_session
()
# implicit global session
# do we have a valid project id?
try
:
project_id
=
int
(
project_id
)
...
...
@@ -289,4 +288,3 @@ def testISTEX(request , project_id):
data
=
[
query_string
,
query
,
N
]
return
JsonHttpResponse
(
data
)
session
.
remove
()
tests/ngramstable/views.py
View file @
a2df227c
...
...
@@ -42,7 +42,7 @@ from gargantext_web import settings
# from gargantext_web.db import *
from
gargantext_web.db
import
get_session
,
cache
,
Node
,
NodeNgram
from
gargantext_web.db
import
session
,
get_session
,
cache
,
Node
,
NodeNgram
from
sqlalchemy
import
func
from
rest_v1_0.api
import
JsonHttpResponse
...
...
@@ -66,7 +66,7 @@ def get_ngrams(request , project_id , corpus_id ):
corpus
=
cache
.
Node
[
int
(
corpus_id
)]
type_doc_id
=
cache
.
NodeType
[
'Document'
]
.
id
session
=
get_session
()
# implicit global session
number
=
session
.
query
(
func
.
count
(
Node
.
id
))
.
filter
(
Node
.
parent_id
==
corpus_id
,
Node
.
type_id
==
type_doc_id
)
.
all
()[
0
][
0
]
myamlist_type_id
=
cache
.
NodeType
[
'MiamList'
]
.
id
miamlist
=
session
.
query
(
Node
)
.
filter
(
Node
.
parent_id
==
corpus_id
,
Node
.
type_id
==
myamlist_type_id
)
.
first
()
...
...
@@ -104,7 +104,6 @@ def get_ngrams(request , project_id , corpus_id ):
}))
return
HttpResponse
(
html
)
session
.
remove
()
def
get_journals
(
request
,
project_id
,
corpus_id
):
...
...
@@ -125,7 +124,7 @@ def get_journals(request , project_id , corpus_id ):
corpus
=
cache
.
Node
[
int
(
corpus_id
)]
type_doc_id
=
cache
.
NodeType
[
'Document'
]
.
id
session
=
get_session
()
# implicit global session
number
=
session
.
query
(
func
.
count
(
Node
.
id
))
.
filter
(
Node
.
parent_id
==
corpus_id
,
Node
.
type_id
==
type_doc_id
)
.
all
()[
0
][
0
]
the_query
=
""" SELECT hyperdata FROM node_node WHERE id=
%
d """
%
(
int
(
corpus_id
)
)
...
...
@@ -147,7 +146,6 @@ def get_journals(request , project_id , corpus_id ):
}))
return
HttpResponse
(
html
)
session
.
remove
()
def
get_journals_json
(
request
,
project_id
,
corpus_id
):
results
=
[
"hola"
,
"mundo"
]
...
...
@@ -157,7 +155,7 @@ def get_journals_json(request , project_id, corpus_id ):
user_id
=
request
.
user
.
id
document_type_id
=
cache
.
NodeType
[
'Document'
]
.
id
session
=
get_session
()
# implicit global session
documents
=
session
.
query
(
Node
)
.
filter
(
Node
.
parent_id
==
corpus_id
,
Node
.
type_id
==
document_type_id
)
.
all
()
for
doc
in
documents
:
...
...
@@ -167,12 +165,11 @@ def get_journals_json(request , project_id, corpus_id ):
JournalsDict
[
journal
]
=
0
JournalsDict
[
journal
]
+=
1
return
JsonHttpResponse
(
JournalsDict
)
session
.
remove
()
def
get_corpuses
(
request
,
node_ids
):
ngrams
=
[
int
(
i
)
for
i
in
node_ids
.
split
(
"+"
)
]
session
=
get_session
()
# implicit global session
results
=
session
.
query
(
Node
.
id
,
Node
.
hyperdata
)
.
filter
(
Node
.
id
.
in_
(
ngrams
)
)
.
all
()
for
r
in
results
:
print
(
r
)
...
...
@@ -237,7 +234,7 @@ def graph_share(request, generic=100, specific=100):
# corpus = session.query(Node).filter( Node.type_id==resource_id , Node.user_id==user_id , Node.id==corpus_id , Node.type_id == cache.NodeType['Corpus'].id ).first()
# if corpus==None: return JsonHttpResponse( {"request" : "forbidden"} )
session
=
get_session
()
# implicit global session
miamlist
=
session
.
query
(
Node
)
.
filter
(
Node
.
user_id
==
user_id
,
Node
.
parent_id
==
corpus_id
,
Node
.
type_id
==
cache
.
NodeType
[
'MiamList'
]
.
id
)
.
first
()
if
miamlist
==
None
:
return
JsonHttpResponse
(
{
"request"
:
"forbidden"
}
)
...
...
@@ -252,7 +249,6 @@ def graph_share(request, generic=100, specific=100):
'graphfile'
:
graphurl
,
\
}))
return
HttpResponse
(
html
)
session
.
remove
()
return
JsonHttpResponse
(
request
.
GET
[
"token"
])
...
...
@@ -272,7 +268,7 @@ def node_link_share(request):
from
analysis.functions
import
get_cooc
data
=
[]
session
=
get_session
()
# implicit global session
corpus
=
session
.
query
(
Node
)
.
filter
(
Node
.
user_id
==
user_id
,
Node
.
id
==
corpus_id
)
.
first
()
data
=
get_cooc
(
request
=
request
,
corpus
=
corpus
,
type
=
"node_link"
)
...
...
@@ -307,7 +303,7 @@ def share_resource(request , resource_id , group_id) :
# [ getting all childs ids of this project ]
ids2changeowner
=
[
project2share
.
id
]
session
=
get_session
()
# implicit global session
corpuses
=
session
.
query
(
Node
.
id
)
.
filter
(
Node
.
user_id
==
request
.
user
.
id
,
Node
.
parent_id
==
resource_id
,
Node
.
type_id
==
cache
.
NodeType
[
"Corpus"
]
.
id
)
.
all
()
for
corpus
in
corpuses
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment