Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
humanities
gargantext
Commits
5b150ca3
Commit
5b150ca3
authored
Jul 20, 2017
by
sim
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
WIP
parent
226dc26e
Changes
9
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
179 additions
and
62 deletions
+179
-62
0b9b69af66c3_put_resourcetypes_in_database.py
...ic/versions/0b9b69af66c3_put_resourcetypes_in_database.py
+103
-0
constants.py
gargantext/constants.py
+1
-1
nodes.py
gargantext/models/nodes.py
+56
-9
nodes_constants.py
gargantext/models/nodes_constants.py
+0
-32
ngram_coocs.py
gargantext/util/toolchain/ngram_coocs.py
+3
-2
api.py
gargantext/views/api/api.py
+1
-1
nodes.py
gargantext/views/api/nodes.py
+12
-14
tests_070_routes.py
unittests/tests_070_routes.py
+2
-2
tests_090_toolchain.py
unittests/tests_090_toolchain.py
+1
-1
No files found.
alembic/versions/0b9b69af66c3_put_resourcetypes_in_database.py
0 → 100644
View file @
5b150ca3
"""Put resourcetypes in database
Revision ID: 0b9b69af66c3
Revises: bedce47c9e34
Create Date: 2017-07-18 16:54:07.581421
"""
from
alembic
import
op
import
sqlalchemy
as
sa
import
gargantext
# revision identifiers, used by Alembic.
revision
=
'0b9b69af66c3'
down_revision
=
'bedce47c9e34'
branch_labels
=
None
depends_on
=
None
RESOURCETYPES
=
[
{
"name"
:
"EUROPRESSE"
,
"label"
:
"Europresse"
,
"parser"
:
"EuropresseParser"
,
"crawler"
:
None
,
},
{
"name"
:
"JSTOR"
,
"label"
:
"Jstor [RIS]"
,
"parser"
:
"RISParser"
,
"crawler"
:
None
,
},
{
"name"
:
"PUBMED"
,
"label"
:
"Pubmed [XML]"
,
"parser"
:
"PubmedParser"
,
"crawler"
:
"PubmedCrawler"
,
},
{
"name"
:
"SCOPUS"
,
"label"
:
"Scopus [RIS]"
,
"parser"
:
"RISParser"
,
"crawler"
:
None
,
},
{
"name"
:
"WOS"
,
"label"
:
"Web of Science [ISI]"
,
"parser"
:
"ISIParser"
,
"crawler"
:
None
,
},
{
"name"
:
"ZOTERO"
,
"label"
:
"Zotero [RIS]"
,
"parser"
:
"RISParser"
,
"crawler"
:
None
,
},
{
"name"
:
"CSV"
,
"label"
:
"CSV"
,
"parser"
:
"CSVParser"
,
"crawler"
:
None
,
},
{
"name"
:
"ISTEX"
,
"label"
:
"ISTex"
,
"parser"
:
"ISTexParser"
,
"crawler"
:
None
,
},
{
"name"
:
"SCOAP"
,
"label"
:
"SCOAP [API/XML]"
,
"parser"
:
"CernParser"
,
"crawler"
:
"CernCrawler"
,
},
{
"name"
:
"REPEC"
,
"label"
:
"REPEC [MULTIVAC API]"
,
"parser"
:
"MultivacParser"
,
"crawler"
:
"MultivacCrawler"
,
},
{
"name"
:
"HAL"
,
"label"
:
"HAL [API]"
,
"parser"
:
"HalParser"
,
"crawler"
:
"HalCrawler"
,
},
{
"name"
:
"ISIDORE"
,
"label"
:
"ISIDORE [SPARQLE API /!
\
BETA]"
,
"parser"
:
"IsidoreParser"
,
"crawler"
:
"IsidoreCrawler"
,
},
]
def
upgrade
():
op
.
bulk_insert
(
ResourceTypeNode
,
[
{
"parent_id"
:
gargantua
,
"hyperdata"
:
x
}
for
x
in
RESOURCETYPES
])
def
downgrade
():
pass
gargantext/constants.py
View file @
5b150ca3
...
...
@@ -30,7 +30,7 @@ from .settings import BASE_DIR
# XXX Originally defined here, imported here for backward-compatibility,
# should be removed later.
from
.models.nodes_constants
import
NODETYPES
,
LISTTYPES
,
INDEXED_HYPERDATA
,
\
from
.models.nodes_constants
import
LISTTYPES
,
INDEXED_HYPERDATA
,
\
RESOURCETYPES
,
get_resource
,
get_resource_by_name
,
\
load_parser
,
load_crawler
...
...
gargantext/models/nodes.py
View file @
5b150ca3
...
...
@@ -6,10 +6,10 @@ from .base import Base, Column, ForeignKey, relationship, TypeDecorator, Index,
Integer
,
Float
,
String
,
DateTime
,
JSONB
,
\
MutableList
,
MutableDict
from
.users
import
User
from
.nodes_constants
import
NODETYPES
,
LISTTYPES
,
INDEXED_HYPERDATA
from
.nodes_constants
import
LISTTYPES
,
INDEXED_HYPERDATA
__all__
=
[
'
NODETYPES'
,
'
LISTTYPES'
,
'INDEXED_HYPERDATA'
,
'Node'
,
'NodeNode'
,
'CorpusNode'
]
__all__
=
[
'LISTTYPES'
,
'INDEXED_HYPERDATA'
,
'Node'
,
'Node
Type'
,
'Node
Node'
,
'CorpusNode'
]
class
NodeType
(
TypeDecorator
):
...
...
@@ -19,11 +19,49 @@ class NodeType(TypeDecorator):
"""
impl
=
Integer
def
process_bind_param
(
self
,
typename
,
dialect
):
return
NODETYPES
.
index
(
typename
)
def
process_result_value
(
self
,
typeindex
,
dialect
):
return
NODETYPES
[
typeindex
]
NODETYPES
=
dict
([
# Documents hierarchy
(
1
,
'USER'
),
(
2
,
'PROJECT'
),
(
3
,
'CORPUS'
),
(
4
,
'DOCUMENT'
),
(
19
,
'RESOURCE'
),
(
20
,
'RESOURCETYPE'
),
# Lists
(
5
,
'STOPLIST'
),
(
6
,
'GROUPLIST'
),
(
7
,
'MAINLIST'
),
(
8
,
'MAPLIST'
),
(
9
,
'COOCCURRENCES'
),
# Scores
(
10
,
'OCCURRENCES'
),
(
11
,
'SPECCLUSION'
),
(
18
,
'GENCLUSION'
),
(
12
,
'CVALUE'
),
(
13
,
'TFIDF-CORPUS'
),
(
14
,
'TFIDF-GLOBAL'
),
# more scores (sorry!)
(
16
,
'TIRANK-LOCAL'
),
(
17
,
'TIRANK-GLOBAL'
),
# Docs subset
(
15
,
'FAVORITES'
),
])
NODETYPES_BY_NAME
=
dict
((
v
,
k
)
for
k
,
v
in
NODETYPES
.
items
())
def
process_bind_param
(
self
,
typename
,
dialect
=
None
):
return
self
.
NODETYPES_BY_NAME
.
get
(
typename
)
def
process_result_value
(
self
,
typeindex
,
dialect
=
None
):
return
self
.
NODETYPES
.
get
(
typeindex
)
@
classmethod
def
all
(
cls
):
return
cls
.
NODETYPES
.
values
()
@
classmethod
def
get
(
cls
,
x
):
return
cls
.
process_result_value
(
cls
,
x
)
if
isinstance
(
x
,
int
)
else
\
cls
.
process_bind_param
(
cls
,
x
)
class
Node
(
Base
):
...
...
@@ -244,6 +282,15 @@ class CorpusNode(Node):
))
class
ResourceTypeNode
(
Node
):
__mapper_args__
=
{
'polymorphic_identity'
:
'RESOURCETYPE'
}
def
test
(
self
):
pass
class
NodeNode
(
Base
):
__tablename__
=
'nodes_nodes'
__table_args__
=
(
...
...
@@ -272,7 +319,7 @@ class NodeNode(Base):
_ALREADY_IMPLEMENTED_NODE_TYPES
=
\
set
(
cls
.
__mapper_args__
.
get
(
'polymorphic_identity'
)
for
cls
in
Node
.
__subclasses__
())
for
nodetype
in
N
ODETYPES
:
for
nodetype
in
N
odeType
.
all
()
:
if
nodetype
and
nodetype
not
in
_ALREADY_IMPLEMENTED_NODE_TYPES
:
# Convert nodetype to a CamelCase class name, assuming it's possible...
class_name
=
''
.
join
(
nodetype
.
title
()
.
split
(
"-"
))
+
'Node'
...
...
gargantext/models/nodes_constants.py
View file @
5b150ca3
...
...
@@ -30,38 +30,6 @@ from ..util.dates import datetime, convert_to_datetime
# types & models (nodes, lists, hyperdata, resource) ---------------------------------------------
NODETYPES
=
[
# TODO separate id not array index, read by models.node
None
,
# 0
# documents hierarchy
'USER'
,
# 1
'PROJECT'
,
# 2
#RESOURCE should be here but last
'CORPUS'
,
# 3
'DOCUMENT'
,
# 4
# lists
'STOPLIST'
,
# 5
'GROUPLIST'
,
# 6
'MAINLIST'
,
# 7
'MAPLIST'
,
# 8
'COOCCURRENCES'
,
# 9
# scores
'OCCURRENCES'
,
# 10
'SPECCLUSION'
,
# 11
'CVALUE'
,
# 12
'TFIDF-CORPUS'
,
# 13
'TFIDF-GLOBAL'
,
# 14
# docs subset
'FAVORITES'
,
# 15
# more scores (sorry!)
'TIRANK-LOCAL'
,
# 16
'TIRANK-GLOBAL'
,
# 17
'GENCLUSION'
,
# 18
'RESOURCE'
,
# 19
]
LISTTYPES
=
{
'DOCUMENT'
:
WeightedList
,
'GROUPLIST'
:
Translations
,
# todo remove "LIST" from name
...
...
gargantext/util/toolchain/ngram_coocs.py
View file @
5b150ca3
...
...
@@ -7,8 +7,9 @@ from sqlalchemy import exc
from
gargantext.util.lists
import
WeightedMatrix
from
gargantext.util.db
import
get_engine
from
gargantext.util.db_cache
import
cache
from
gargantext.constants
import
DEFAULT_COOC_THRESHOLD
,
NODETYPES
from
gargantext.constants
import
DEFAULT_COOC_THRESHOLD
from
gargantext.constants
import
INDEXED_HYPERDATA
from
gargantext.models
import
NodeType
def
compute_coocs
(
corpus
,
overwrite_id
=
None
,
...
...
@@ -101,7 +102,7 @@ def compute_coocs( corpus,
-- ==
-- GROUP BY ngA, ngB
)
"""
.
format
(
nodetype_id
=
N
ODETYPES
.
index
(
'DOCUMENT'
)
"""
.
format
(
nodetype_id
=
N
odeType
.
get
(
'DOCUMENT'
)
,
corpus_id
=
corpus
.
id
)
...
...
gargantext/views/api/api.py
View file @
5b150ca3
...
...
@@ -6,7 +6,7 @@ from rest_framework.views import APIView
from
rest_framework.authentication
import
SessionAuthentication
,
BasicAuthentication
from
rest_framework.permissions
import
IsAuthenticated
from
gargantext.constants
import
RESOURCETYPES
,
NODETYPES
,
get_resource
from
gargantext.constants
import
RESOURCETYPES
,
get_resource
from
gargantext.models
import
Node
,
Ngram
,
NodeNgram
,
NodeNodeNgram
,
NodeNode
from
gargantext.util.db
import
session
,
delete
,
func
,
bulk_insert
from
gargantext.util.db_cache
import
cache
,
or_
...
...
gargantext/views/api/nodes.py
View file @
5b150ca3
from
gargantext.models
import
Node
,
Ngram
,
NodeNgram
,
NodeNodeNgram
,
NodeNode
from
gargantext.constants
import
NODETYPES
,
DEFAULT_N_DOCS_HAVING_NGRAM
from
gargantext.models
import
Node
,
N
odeType
,
N
gram
,
NodeNgram
,
NodeNodeNgram
,
NodeNode
from
gargantext.constants
import
DEFAULT_N_DOCS_HAVING_NGRAM
from
gargantext.util.db
import
session
,
delete
,
func
,
bulk_insert
from
gargantext.util.db_cache
import
cache
,
or_
from
gargantext.util.validation
import
validate
...
...
@@ -13,19 +13,17 @@ from collections import defaultdict
import
csv
_node_available_fields
=
[
'id'
,
'parent_id'
,
'name'
,
'typename'
,
'hyperdata'
,
'ngrams'
,
'date'
]
_node_default_fields
=
[
'id'
,
'parent_id'
,
'name'
,
'typename'
]
_node_available_types
=
NODETYPES
_hyperdata_available_fields
=
[
'title'
,
'source'
,
'abstract'
,
'statuses'
,
'language_name'
,
'language_iso3'
,
'language_iso2'
,
'language_id'
,
'publication_date'
,
'publication_year'
,
'publication_month'
,
'publication_day'
,
'publication_hour'
,
'publication_minute'
,
'publication_second'
]
#_node_available_formats = ['json', 'csv', 'bibex']
def
_query_nodes
(
request
,
node_id
=
None
):
_node_available_fields
=
[
'id'
,
'parent_id'
,
'name'
,
'typename'
,
'hyperdata'
,
'ngrams'
,
'date'
]
_node_default_fields
=
[
'id'
,
'parent_id'
,
'name'
,
'typename'
]
_hyperdata_available_fields
=
[
'title'
,
'source'
,
'abstract'
,
'statuses'
,
'language_name'
,
'language_iso3'
,
'language_iso2'
,
'language_id'
,
'publication_date'
,
'publication_year'
,
'publication_month'
,
'publication_day'
,
'publication_hour'
,
'publication_minute'
,
'publication_second'
]
#_node_available_formats = ['json', 'csv', 'bibex']
if
request
.
user
.
id
is
None
:
raise
TypeError
(
"This API request must come from an authenticated user."
)
...
...
@@ -52,7 +50,7 @@ def _query_nodes(request, node_id=None):
}},
# optional filtering parameters
'types'
:
{
'type'
:
list
,
'required'
:
False
,
'items'
:
{
'type'
:
str
,
'range'
:
_node_available_types
,
'type'
:
str
,
'range'
:
NodeType
.
all
()
,
}},
'parent_id'
:
{
'type'
:
int
,
'required'
:
False
},
}})
...
...
unittests/tests_070_routes.py
View file @
5b150ca3
...
...
@@ -9,7 +9,7 @@ from django.test import Client
from
gargantext.models
import
Node
# to be able to compare in test_073_get_api_one_node()
from
gargantext.
constants
import
NODETYPES
from
gargantext.
models
import
NodeType
from
gargantext.util.db
import
session
...
...
@@ -80,7 +80,7 @@ class RoutesChecker(TestCase):
nodename
=
json_content
[
'name'
]
print
(
"
\n
testing nodename:"
,
nodename
)
print
(
"
\n
testing nodetype:"
,
nodetype
)
self
.
assert
In
(
nodetype
,
NODETYPES
)
self
.
assert
NotNone
(
NodeType
.
get
(
nodetype
)
)
self
.
assertEqual
(
nodename
,
"hello i'm a project"
)
# TODO http://localhost:8000/api/nodes?types[]=CORPUS
...
...
unittests/tests_090_toolchain.py
View file @
5b150ca3
...
...
@@ -8,7 +8,7 @@ from django.test import TestCase, Client, RequestFactory
from
gargantext.models
import
Node
,
User
from
gargantext.util.db
import
session
from
gargantext.constants
import
RESOURCETYPES
,
NODETYPES
,
get_resource
from
gargantext.constants
import
RESOURCETYPES
,
get_resource
from
gargantext.util.toolchain.main
import
*
DATA_SAMPLE_DIR
=
"/srv/gargantext/unittests/mini_test_samples/"
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment