Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
humanities
gargantext
Commits
fe23f25f
Commit
fe23f25f
authored
Sep 12, 2017
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'testing' into stable
parents
a3e8e25d
c12a0dae
Changes
31
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
31 changed files
with
3172 additions
and
401 deletions
+3172
-401
contribution.md
docs/contribution.md
+1
-1
constants.py
gargantext/constants.py
+1
-1
base.py
gargantext/models/base.py
+21
-1
nodes.py
gargantext/models/nodes.py
+6
-2
HAL.py
gargantext/util/crawlers/HAL.py
+30
-19
bool2sparql.py
gargantext/util/crawlers/sparql/bool2sparql.py
+2
-1
db.py
gargantext/util/db.py
+0
-14
group_tools.py
gargantext/util/group_tools.py
+17
-14
http.py
gargantext/util/http.py
+2
-1
lists.py
gargantext/util/lists.py
+3
-0
ngramlists_tools.py
gargantext/util/ngramlists_tools.py
+47
-33
CSV.py
gargantext/util/parsers/CSV.py
+51
-112
HAL.py
gargantext/util/parsers/HAL.py
+50
-36
ngrams_extraction.py
gargantext/util/toolchain/ngrams_extraction.py
+38
-37
ngramlists.py
gargantext/views/api/ngramlists.py
+5
-0
urls.py
gargantext/views/api/urls.py
+46
-38
requirements.txt
install/gargamelle/requirements.txt
+1
-0
notebook.run
install/notebook.run
+1
-1
Dockerfile
install/notebook/Dockerfile
+29
-24
gargantext_notebook.py
install/notebook/gargantext_notebook.py
+142
-32
cern.py
moissonneurs/cern.py
+4
-1
hal.py
moissonneurs/hal.py
+4
-1
isidore.py
moissonneurs/isidore.py
+4
-1
istex.py
moissonneurs/istex.py
+4
-1
multivac.py
moissonneurs/multivac.py
+4
-1
pubmed.py
moissonneurs/pubmed.py
+5
-1
AdvancedTutorial-checkpoint.ipynb
...ooks/.ipynb_checkpoints/AdvancedTutorial-checkpoint.ipynb
+31
-2
AdvancedTutorial.ipynb
notebooks/AdvancedTutorial.ipynb
+759
-0
gargantext_core_tutorial.ipynb
notebooks/gargantext_core_tutorial.ipynb
+1820
-0
overview.html
templates/pages/projects/overview.html
+1
-0
project.html
templates/pages/projects/project.html
+43
-26
No files found.
docs/contribution.md
View file @
fe23f25f
...
@@ -2,7 +2,7 @@
...
@@ -2,7 +2,7 @@
## Community
## Community
*
[
http://gargantext.org/about
](
http://gargantext.org/about
)
*
[
http://gargantext.org/about
](
http://gargantext.org/about
)
*
IRC Chat: (OFTC/FreeNode) #gargantex
*
IRC Chat: (OFTC/FreeNode) #gargantex
t
##Tools
##Tools
*
gogs
*
gogs
...
...
gargantext/constants.py
View file @
fe23f25f
...
@@ -263,7 +263,7 @@ RESOURCETYPES = [
...
@@ -263,7 +263,7 @@ RESOURCETYPES = [
},
},
{
"type"
:
11
,
{
"type"
:
11
,
"name"
:
'HAL [API]'
,
"name"
:
'HAL
(english)
[API]'
,
"parser"
:
"HalParser"
,
"parser"
:
"HalParser"
,
"format"
:
'JSON'
,
"format"
:
'JSON'
,
'file_formats'
:[
"zip"
,
"json"
],
'file_formats'
:[
"zip"
,
"json"
],
...
...
gargantext/models/base.py
View file @
fe23f25f
from
sqlalchemy.schema
import
Column
,
ForeignKey
,
UniqueConstraint
,
Index
from
sqlalchemy.schema
import
Column
,
ForeignKey
,
UniqueConstraint
,
Index
from
sqlalchemy.orm
import
relationship
from
sqlalchemy.orm
import
relationship
,
validates
from
sqlalchemy.types
import
TypeDecorator
,
\
from
sqlalchemy.types
import
TypeDecorator
,
\
Integer
,
Float
,
Boolean
,
DateTime
,
String
,
Text
Integer
,
Float
,
Boolean
,
DateTime
,
String
,
Text
from
sqlalchemy.dialects.postgresql
import
JSONB
,
DOUBLE_PRECISION
as
Double
from
sqlalchemy.dialects.postgresql
import
JSONB
,
DOUBLE_PRECISION
as
Double
...
@@ -7,6 +7,7 @@ from sqlalchemy.ext.mutable import MutableDict, MutableList
...
@@ -7,6 +7,7 @@ from sqlalchemy.ext.mutable import MutableDict, MutableList
from
sqlalchemy.ext.declarative
import
declarative_base
from
sqlalchemy.ext.declarative
import
declarative_base
__all__
=
[
"Column"
,
"ForeignKey"
,
"UniqueConstraint"
,
"relationship"
,
__all__
=
[
"Column"
,
"ForeignKey"
,
"UniqueConstraint"
,
"relationship"
,
"validates"
,
"ValidatorMixin"
,
"Integer"
,
"Float"
,
"Boolean"
,
"DateTime"
,
"String"
,
"Text"
,
"Integer"
,
"Float"
,
"Boolean"
,
"DateTime"
,
"String"
,
"Text"
,
"TypeDecorator"
,
"TypeDecorator"
,
"JSONB"
,
"Double"
,
"JSONB"
,
"Double"
,
...
@@ -18,6 +19,25 @@ __all__ = ["Column", "ForeignKey", "UniqueConstraint", "relationship",
...
@@ -18,6 +19,25 @@ __all__ = ["Column", "ForeignKey", "UniqueConstraint", "relationship",
# all tables handled by Alembic migration scripts.
# all tables handled by Alembic migration scripts.
Base
=
declarative_base
()
Base
=
declarative_base
()
# To be used by tables already handled by Django ORM, such as User model. We
# To be used by tables already handled by Django ORM, such as User model. We
# separate them in order to keep those out of Alembic sight.
# separate them in order to keep those out of Alembic sight.
DjangoBase
=
declarative_base
()
DjangoBase
=
declarative_base
()
class
ValidatorMixin
(
object
):
def
enforce_length
(
self
,
key
,
value
):
"""Truncate a string according to its column length
Usage example:
.. code-block:: python
@validates('some_column')
def validate_some_column(self, key, value):
self.enforce_length(key, value)
"""
max_len
=
getattr
(
self
.
__class__
,
key
)
.
prop
.
columns
[
0
]
.
type
.
length
if
value
and
len
(
value
)
>
max_len
:
return
value
[:
max_len
]
return
value
gargantext/models/nodes.py
View file @
fe23f25f
...
@@ -9,7 +9,7 @@ from datetime import datetime
...
@@ -9,7 +9,7 @@ from datetime import datetime
from
.base
import
Base
,
Column
,
ForeignKey
,
relationship
,
TypeDecorator
,
Index
,
\
from
.base
import
Base
,
Column
,
ForeignKey
,
relationship
,
TypeDecorator
,
Index
,
\
Integer
,
Float
,
String
,
DateTime
,
JSONB
,
\
Integer
,
Float
,
String
,
DateTime
,
JSONB
,
\
MutableList
,
MutableDict
MutableList
,
MutableDict
,
validates
,
ValidatorMixin
from
.users
import
User
from
.users
import
User
__all__
=
[
'Node'
,
'NodeNode'
,
'CorpusNode'
]
__all__
=
[
'Node'
,
'NodeNode'
,
'CorpusNode'
]
...
@@ -26,7 +26,7 @@ class NodeType(TypeDecorator):
...
@@ -26,7 +26,7 @@ class NodeType(TypeDecorator):
return
NODETYPES
[
typeindex
]
return
NODETYPES
[
typeindex
]
class
Node
(
Base
):
class
Node
(
ValidatorMixin
,
Base
):
"""This model can fit many purposes:
"""This model can fit many purposes:
myFirstCorpus = session.query(CorpusNode).first()
myFirstCorpus = session.query(CorpusNode).first()
...
@@ -112,6 +112,10 @@ class Node(Base):
...
@@ -112,6 +112,10 @@ class Node(Base):
'user_id={0.user_id}, parent_id={0.parent_id}, '
\
'user_id={0.user_id}, parent_id={0.parent_id}, '
\
'name={0.name!r}, date={0.date})>'
.
format
(
self
)
'name={0.name!r}, date={0.date})>'
.
format
(
self
)
@
validates
(
'name'
)
def
validate_name
(
self
,
key
,
value
):
return
self
.
enforce_length
(
key
,
value
)
@
property
@
property
def
ngrams
(
self
):
def
ngrams
(
self
):
"""Pseudo-attribute allowing to retrieve a node's ngrams.
"""Pseudo-attribute allowing to retrieve a node's ngrams.
...
...
gargantext/util/crawlers/HAL.py
View file @
fe23f25f
...
@@ -14,12 +14,12 @@ from gargantext.util.files import save
...
@@ -14,12 +14,12 @@ from gargantext.util.files import save
class
HalCrawler
(
Crawler
):
class
HalCrawler
(
Crawler
):
''' HAL API CLIENT'''
''' HAL API CLIENT'''
def
__init__
(
self
):
def
__init__
(
self
):
# Main EndPoints
# Main EndPoints
self
.
BASE_URL
=
"https://api.archives-ouvertes.fr"
self
.
BASE_URL
=
"https://api.archives-ouvertes.fr"
self
.
API_URL
=
"search"
self
.
API_URL
=
"search"
# Final EndPoints
# Final EndPoints
# TODO : Change endpoint according type of database
# TODO : Change endpoint according type of database
self
.
URL
=
self
.
BASE_URL
+
"/"
+
self
.
API_URL
self
.
URL
=
self
.
BASE_URL
+
"/"
+
self
.
API_URL
...
@@ -29,28 +29,39 @@ class HalCrawler(Crawler):
...
@@ -29,28 +29,39 @@ class HalCrawler(Crawler):
'''formating the query'''
'''formating the query'''
#search_field="title_t"
#search_field="title_t"
search_field
=
"abstract_t"
#
search_field="abstract_t"
return
(
search_field
+
":"
+
"("
+
query
+
")"
)
#return (search_field + ":" + "(" + query + ")")
return
"("
+
query
+
")"
def
_get
(
self
,
query
,
fromPage
=
1
,
count
=
10
,
lang
=
None
):
def
_get
(
self
,
query
,
fromPage
=
1
,
count
=
10
,
lang
=
None
):
# Parameters
# Parameters
fl
=
""" title_s
fl
=
""" docid
, title_s
, abstract_s
, abstract_s
, en_title_s
, en_abstract_s
, submittedDate_s
, submittedDate_s
, journalDate_s
, journalDate_s
, authFullName_s
, authFullName_s
, uri_s
, uri_s
, isbn_s
, isbn_s
, issue_s
, issue_s
, journalTitle_s
, language_s
, doiId_s
, authId_i
, instStructId_i
, deptStructId_i
, labStructId_i
, rteamStructId_i
, docType_s
, docType_s
, journalPublisher_s
"""
"""
#, authUrl_s
#, authUrl_s
#, type_s
#, type_s
wt
=
"json"
wt
=
"json"
querystring
=
{
"q"
:
query
querystring
=
{
"q"
:
query
...
@@ -59,18 +70,18 @@ class HalCrawler(Crawler):
...
@@ -59,18 +70,18 @@ class HalCrawler(Crawler):
,
"fl"
:
fl
,
"fl"
:
fl
,
"wt"
:
wt
,
"wt"
:
wt
}
}
# Specify Headers
# Specify Headers
headers
=
{
"cache-control"
:
"no-cache"
}
headers
=
{
"cache-control"
:
"no-cache"
}
# Do Request and get response
# Do Request and get response
response
=
requests
.
request
(
"GET"
response
=
requests
.
request
(
"GET"
,
self
.
URL
,
self
.
URL
,
headers
=
headers
,
headers
=
headers
,
params
=
querystring
,
params
=
querystring
)
)
#print(querystring)
#print(querystring)
# Validation : 200 if ok else raise Value
# Validation : 200 if ok else raise Value
if
response
.
status_code
==
200
:
if
response
.
status_code
==
200
:
...
@@ -81,27 +92,27 @@ class HalCrawler(Crawler):
...
@@ -81,27 +92,27 @@ class HalCrawler(Crawler):
return
(
json
.
loads
(
response
.
content
.
decode
(
charset
)))
return
(
json
.
loads
(
response
.
content
.
decode
(
charset
)))
else
:
else
:
raise
ValueError
(
response
.
status_code
,
response
.
reason
)
raise
ValueError
(
response
.
status_code
,
response
.
reason
)
def
scan_results
(
self
,
query
):
def
scan_results
(
self
,
query
):
'''
'''
scan_results : Returns the number of results
scan_results : Returns the number of results
Query String -> Int
Query String -> Int
'''
'''
self
.
results_nb
=
0
self
.
results_nb
=
0
total
=
(
self
.
_get
(
query
)
total
=
(
self
.
_get
(
query
)
.
get
(
"response"
,
{})
.
get
(
"response"
,
{})
.
get
(
"numFound"
,
0
)
.
get
(
"numFound"
,
0
)
)
)
self
.
results_nb
=
total
self
.
results_nb
=
total
return
self
.
results_nb
return
self
.
results_nb
def
download
(
self
,
query
):
def
download
(
self
,
query
):
downloaded
=
False
downloaded
=
False
self
.
status
.
append
(
"fetching results"
)
self
.
status
.
append
(
"fetching results"
)
corpus
=
[]
corpus
=
[]
...
@@ -113,9 +124,9 @@ class HalCrawler(Crawler):
...
@@ -113,9 +124,9 @@ class HalCrawler(Crawler):
msg
=
"Invalid sample size N =
%
i (max =
%
i)"
%
(
self
.
query_max
msg
=
"Invalid sample size N =
%
i (max =
%
i)"
%
(
self
.
query_max
,
QUERY_SIZE_N_MAX
,
QUERY_SIZE_N_MAX
)
)
print
(
"ERROR (scrap:
Multivac
d/l ): "
,
msg
)
print
(
"ERROR (scrap:
HAL
d/l ): "
,
msg
)
self
.
query_max
=
QUERY_SIZE_N_MAX
self
.
query_max
=
QUERY_SIZE_N_MAX
#for page in range(1, trunc(self.query_max / 100) + 2):
#for page in range(1, trunc(self.query_max / 100) + 2):
for
page
in
range
(
0
,
self
.
query_max
,
paging
):
for
page
in
range
(
0
,
self
.
query_max
,
paging
):
print
(
"Downloading page
%
s to
%
s results"
%
(
page
,
paging
))
print
(
"Downloading page
%
s to
%
s results"
%
(
page
,
paging
))
...
@@ -132,5 +143,5 @@ class HalCrawler(Crawler):
...
@@ -132,5 +143,5 @@ class HalCrawler(Crawler):
,
basedir
=
UPLOAD_DIRECTORY
,
basedir
=
UPLOAD_DIRECTORY
)
)
downloaded
=
True
downloaded
=
True
return
downloaded
return
downloaded
gargantext/util/crawlers/sparql/bool2sparql.py
View file @
fe23f25f
...
@@ -2,6 +2,7 @@
...
@@ -2,6 +2,7 @@
import
subprocess
import
subprocess
import
re
import
re
from
.sparql
import
Service
from
.sparql
import
Service
from
gargantext.settings
import
BOOL_TOOLS_PATH
#from sparql import Service
#from sparql import Service
def
bool2sparql
(
rawQuery
,
count
=
False
,
offset
=
None
,
limit
=
None
):
def
bool2sparql
(
rawQuery
,
count
=
False
,
offset
=
None
,
limit
=
None
):
...
@@ -12,7 +13,7 @@ def bool2sparql(rawQuery, count=False, offset=None, limit=None):
...
@@ -12,7 +13,7 @@ def bool2sparql(rawQuery, count=False, offset=None, limit=None):
See: https://github.com/delanoe/bool2sparql
See: https://github.com/delanoe/bool2sparql
"""
"""
query
=
re
.
sub
(
"
\"
"
,
"
\'
"
,
rawQuery
)
query
=
re
.
sub
(
"
\"
"
,
"
\'
"
,
rawQuery
)
bashCommand
=
[
"/srv/gargantext/gargantext/util/crawlers/sparql
/bool2sparql-exe"
,
"-q"
,
query
]
bashCommand
=
[
BOOL_TOOLS_PATH
+
"
/bool2sparql-exe"
,
"-q"
,
query
]
if
count
is
True
:
if
count
is
True
:
bashCommand
.
append
(
"-c"
)
bashCommand
.
append
(
"-c"
)
...
...
gargantext/util/db.py
View file @
fe23f25f
...
@@ -5,15 +5,9 @@ from gargantext.util.json import json_dumps
...
@@ -5,15 +5,9 @@ from gargantext.util.json import json_dumps
########################################################################
########################################################################
# get engine, session, etc.
# get engine, session, etc.
########################################################################
########################################################################
import
sqlalchemy
as
sa
from
sqlalchemy.orm
import
sessionmaker
,
scoped_session
from
sqlalchemy.orm
import
sessionmaker
,
scoped_session
from
sqlalchemy.ext.declarative
import
declarative_base
from
sqlalchemy
import
delete
from
sqlalchemy
import
delete
# To make Full Text search possible, uncomment lines below
# (and install it with pip before)
#from sqlalchemy_searchable import make_searchable
def
get_engine
():
def
get_engine
():
from
sqlalchemy
import
create_engine
from
sqlalchemy
import
create_engine
return
create_engine
(
settings
.
DATABASES
[
'default'
][
'URL'
]
return
create_engine
(
settings
.
DATABASES
[
'default'
][
'URL'
]
...
@@ -24,16 +18,8 @@ def get_engine():
...
@@ -24,16 +18,8 @@ def get_engine():
engine
=
get_engine
()
engine
=
get_engine
()
# To make Full Text search possible, uncomment lines below
# https://sqlalchemy-searchable.readthedocs.io/
#sa.orm.configure_mappers()
Base
=
declarative_base
()
#Base.metadata.create_all(engine)
#make_searchable()
session
=
scoped_session
(
sessionmaker
(
bind
=
engine
))
session
=
scoped_session
(
sessionmaker
(
bind
=
engine
))
########################################################################
########################################################################
# useful for queries
# useful for queries
########################################################################
########################################################################
...
...
gargantext/util/group_tools.py
View file @
fe23f25f
...
@@ -7,7 +7,7 @@ from gargantext.util.db import session, aliased
...
@@ -7,7 +7,7 @@ from gargantext.util.db import session, aliased
from
gargantext.models
import
Ngram
,
NodeNgramNgram
from
gargantext.models
import
Ngram
,
NodeNgramNgram
from
igraph
import
Graph
# for group_union
from
igraph
import
Graph
# for group_union
def
query_groups
(
groupings_id
,
details
=
False
):
def
query_groups
(
groupings_id
,
details
=
False
,
sort
=
False
):
"""
"""
Listing of couples (mainform, subform)
Listing of couples (mainform, subform)
aka (ngram1_id, ngram2_id)
aka (ngram1_id, ngram2_id)
...
@@ -15,24 +15,27 @@ def query_groups(groupings_id, details=False):
...
@@ -15,24 +15,27 @@ def query_groups(groupings_id, details=False):
Parameter:
Parameter:
- details: if False, just send the array of couples
- details: if False, just send the array of couples
if True, send quadruplets with (ngram1_id, term1, ngram2_id, term2)
if True, send quadruplets with (ngram1_id, term1, ngram2_id, term2)
- sort: order results by terms of ngram1 then ngram2
"""
"""
if
details
or
sort
:
Ngram1
,
Ngram2
=
Ngram
,
aliased
(
Ngram
)
if
not
details
:
if
not
details
:
# simple contents
# simple contents
query
=
session
.
query
(
NodeNgramNgram
.
ngram1_id
,
NodeNgramNgram
.
ngram2_id
)
columns
=
(
NodeNgramNgram
.
ngram1_id
,
NodeNgramNgram
.
ngram2_id
)
else
:
else
:
# detailed contents (id + terms)
# detailed contents (id + terms)
Ngram1
=
aliased
(
Ngram
)
columns
=
(
Ngram1
.
id
,
Ngram1
.
terms
,
Ngram2
=
aliased
(
Ngram
)
Ngram2
.
id
,
Ngram2
.
terms
)
query
=
(
session
.
query
(
query
=
session
.
query
(
*
columns
)
NodeNgramNgram
.
ngram1_id
,
Ngram1
.
terms
,
if
details
or
sort
:
NodeNgramNgram
.
ngram2_id
,
query
=
(
query
.
join
(
Ngram1
,
NodeNgramNgram
.
ngram1_id
==
Ngram1
.
id
)
Ngram2
.
terms
,
.
join
(
Ngram2
,
NodeNgramNgram
.
ngram2_id
==
Ngram2
.
id
))
)
.
join
(
Ngram1
,
NodeNgramNgram
.
ngram1_id
==
Ngram1
.
id
)
if
sort
:
.
join
(
Ngram2
,
NodeNgramNgram
.
ngram2_id
==
Ngram2
.
id
)
query
=
query
.
order_by
(
Ngram1
.
terms
,
Ngram2
.
terms
)
)
# main filter
# main filter
# -----------
# -----------
...
...
gargantext/util/http.py
View file @
fe23f25f
...
@@ -73,7 +73,8 @@ from rest_framework.views import APIView
...
@@ -73,7 +73,8 @@ from rest_framework.views import APIView
from
gargantext.util.json
import
json_encoder
from
gargantext.util.json
import
json_encoder
def
JsonHttpResponse
(
data
,
status
=
200
):
def
JsonHttpResponse
(
data
,
status
=
200
):
return
HttpResponse
(
return
HttpResponse
(
content
=
json_encoder
.
encode
(
data
),
content
=
data
.
encode
(
'utf-8'
)
if
isinstance
(
data
,
str
)
else
\
json_encoder
.
encode
(
data
),
content_type
=
'application/json; charset=utf-8'
,
content_type
=
'application/json; charset=utf-8'
,
status
=
status
status
=
status
)
)
...
...
gargantext/util/lists.py
View file @
fe23f25f
...
@@ -50,6 +50,9 @@ class _BaseClass:
...
@@ -50,6 +50,9 @@ class _BaseClass:
else
:
else
:
return
NotImplemented
return
NotImplemented
def
__len__
(
self
):
return
len
(
self
.
items
)
def
__repr__
(
self
):
def
__repr__
(
self
):
items
=
self
.
items
items
=
self
.
items
if
isinstance
(
items
,
defaultdict
):
if
isinstance
(
items
,
defaultdict
):
...
...
gargantext/util/ngramlists_tools.py
View file @
fe23f25f
...
@@ -8,8 +8,7 @@ Tools to work with ngramlists (MAINLIST, MAPLIST, STOPLIST)
...
@@ -8,8 +8,7 @@ Tools to work with ngramlists (MAINLIST, MAPLIST, STOPLIST)
"""
"""
from
gargantext.util.group_tools
import
query_groups
,
group_union
from
gargantext.util.group_tools
import
query_groups
,
group_union
from
gargantext.util.db
import
session
,
desc
,
func
,
\
from
gargantext.util.db
import
session
,
bulk_insert_ifnotexists
bulk_insert_ifnotexists
from
gargantext.models
import
Ngram
,
NodeNgram
,
NodeNodeNgram
,
\
from
gargantext.models
import
Ngram
,
NodeNgram
,
NodeNodeNgram
,
\
NodeNgramNgram
,
Node
NodeNgramNgram
,
Node
...
@@ -25,7 +24,6 @@ from gargantext.util.toolchain.ngrams_extraction import normalize_forms
...
@@ -25,7 +24,6 @@ from gargantext.util.toolchain.ngrams_extraction import normalize_forms
# merge will also index the new ngrams in the docs of the corpus
# merge will also index the new ngrams in the docs of the corpus
from
gargantext.util.toolchain.ngrams_addition
import
index_new_ngrams
from
gargantext.util.toolchain.ngrams_addition
import
index_new_ngrams
from
sqlalchemy.sql
import
exists
from
os
import
path
from
os
import
path
from
csv
import
writer
,
reader
,
QUOTE_MINIMAL
from
csv
import
writer
,
reader
,
QUOTE_MINIMAL
from
collections
import
defaultdict
from
collections
import
defaultdict
...
@@ -35,8 +33,8 @@ from celery import shared_task
...
@@ -35,8 +33,8 @@ from celery import shared_task
def
query_list
(
list_id
,
def
query_list
(
list_id
,
pagination_limit
=
None
,
pagination_offset
=
None
,
pagination_limit
=
None
,
pagination_offset
=
None
,
details
=
False
,
scoring_metric_id
=
None
,
groupings_id
=
None
details
=
False
,
scoring_metric_id
=
None
,
groupings_id
=
None
,
):
sort
=
False
):
"""
"""
Paginated listing of ngram_ids in a NodeNgram lists.
Paginated listing of ngram_ids in a NodeNgram lists.
...
@@ -51,6 +49,7 @@ def query_list(list_id,
...
@@ -51,6 +49,7 @@ def query_list(list_id,
(for details and sorting)
(for details and sorting)
- groupings_id: optional id of a list of grouping relations (synonyms)
- groupings_id: optional id of a list of grouping relations (synonyms)
(each synonym will be added to the list if not already in there)
(each synonym will be added to the list if not already in there)
- sort: order by Ngram.terms (not possible if details is False)
FIXME: subforms appended recently and not generalized enough
FIXME: subforms appended recently and not generalized enough
=> add a common part for all "if groupings_id"
=> add a common part for all "if groupings_id"
...
@@ -125,7 +124,10 @@ def query_list(list_id,
...
@@ -125,7 +124,10 @@ def query_list(list_id,
query
=
query
.
limit
(
pagination_limit
)
query
=
query
.
limit
(
pagination_limit
)
if
pagination_offset
:
if
pagination_offset
:
query
=
query
.
offset
(
pagination_offsets
)
query
=
query
.
offset
(
pagination_offset
)
if
details
and
sort
:
query
=
query
.
order_by
(
Ngram
.
terms
)
return
query
return
query
...
@@ -186,9 +188,7 @@ def ngrams_to_csv_rows(ngram_objs, ngram_dico={}, group_infos={},
...
@@ -186,9 +188,7 @@ def ngrams_to_csv_rows(ngram_objs, ngram_dico={}, group_infos={},
# 3 columns = |status, | mainform, | forms
# 3 columns = |status, | mainform, | forms
# (type_of_list) ( term ) ( subterm1|&|subterm2 )
# (type_of_list) ( term ) ( subterm1|&|subterm2 )
csv_rows
.
append
(
csv_rows
.
append
([
list_type
,
ng_obj
.
terms
,
this_grouped_terms
])
[
list_type
,
ng_obj
.
terms
,
this_grouped_terms
]
)
return
csv_rows
return
csv_rows
...
@@ -231,9 +231,10 @@ def export_ngramlists(node,fname=None,delimiter=DEFAULT_CSV_DELIM,titles=True):
...
@@ -231,9 +231,10 @@ def export_ngramlists(node,fname=None,delimiter=DEFAULT_CSV_DELIM,titles=True):
# listes de ngram_ids correspondantes
# listes de ngram_ids correspondantes
# ------------------------------------
# ------------------------------------
# contenu: liste des objets ngrammes [(2562,"monterme",1),...]
# contenu: liste des objets ngrammes [(2562,"monterme",1),...]
stop_ngrams
=
query_list
(
stoplist_node
.
id
,
details
=
True
,
groupings_id
=
group_node
.
id
)
.
all
()
stop_ngrams
,
main_ngrams
,
map_ngrams
=
(
main_ngrams
=
query_list
(
mainlist_node
.
id
,
details
=
True
,
groupings_id
=
group_node
.
id
)
.
all
()
query_list
(
n
.
id
,
details
=
True
,
groupings_id
=
group_node
.
id
,
sort
=
True
)
.
all
()
map_ngrams
=
query_list
(
maplist_node
.
id
,
details
=
True
,
groupings_id
=
group_node
.
id
)
.
all
()
for
n
in
(
stoplist_node
,
mainlist_node
,
maplist_node
)
)
# pour debug ---------->8 --------------------
# pour debug ---------->8 --------------------
#~ stop_ngrams = stop_ngrams[0:10]
#~ stop_ngrams = stop_ngrams[0:10]
...
@@ -250,7 +251,7 @@ def export_ngramlists(node,fname=None,delimiter=DEFAULT_CSV_DELIM,titles=True):
...
@@ -250,7 +251,7 @@ def export_ngramlists(node,fname=None,delimiter=DEFAULT_CSV_DELIM,titles=True):
# for the groups we got couples of ids in the DB
# for the groups we got couples of ids in the DB
# -------------------
# -------------------
# ex: [(3544, 2353), (2787, 4032), ...]
# ex: [(3544, 2353), (2787, 4032), ...]
group_ngram_id_couples
=
query_groups
(
group_node
.
id
)
.
all
(
)
group_ngram_id_couples
=
query_groups
(
group_node
.
id
,
sort
=
True
)
# we expend this to double structure for groups lookup
# we expend this to double structure for groups lookup
# 1) g['links'] = k couples (x,y_i) as a set [x => {y1,y2}]
# 1) g['links'] = k couples (x,y_i) as a set [x => {y1,y2}]
...
@@ -397,6 +398,9 @@ def import_ngramlists(the_file, delimiter=DEFAULT_CSV_DELIM,
...
@@ -397,6 +398,9 @@ def import_ngramlists(the_file, delimiter=DEFAULT_CSV_DELIM,
NB: To merge the imported lists into a corpus node's lists,
NB: To merge the imported lists into a corpus node's lists,
chain this function with merge_ngramlists()
chain this function with merge_ngramlists()
'''
'''
list_types
=
[
'stop'
,
'main'
,
'map'
]
# ---------------
# ---------------
# ngram storage
# ngram storage
# ---------------
# ---------------
...
@@ -461,7 +465,6 @@ def import_ngramlists(the_file, delimiter=DEFAULT_CSV_DELIM,
...
@@ -461,7 +465,6 @@ def import_ngramlists(the_file, delimiter=DEFAULT_CSV_DELIM,
# headers
# headers
if
i
==
0
:
if
i
==
0
:
n_cols
=
len
(
csv_row
)
for
j
,
colname
in
enumerate
(
csv_row
):
for
j
,
colname
in
enumerate
(
csv_row
):
if
colname
in
[
'label'
,
'status'
,
'forms'
]:
if
colname
in
[
'label'
,
'status'
,
'forms'
]:
columns
[
colname
]
=
j
columns
[
colname
]
=
j
...
@@ -508,31 +511,30 @@ def import_ngramlists(the_file, delimiter=DEFAULT_CSV_DELIM,
...
@@ -508,31 +511,30 @@ def import_ngramlists(the_file, delimiter=DEFAULT_CSV_DELIM,
continue
continue
# --- check correct list type
# --- check correct list type
if
not
this_list_type
in
[
'stop'
,
'main'
,
'map'
]
:
if
not
this_list_type
in
list_types
:
print
(
"IMPORT WARN: (skip line) wrong list type at CSV
%
s:l.
%
i"
%
(
fname
,
i
))
print
(
"IMPORT WARN: (skip line) wrong list type at CSV
%
s:l.
%
i"
%
(
fname
,
i
))
continue
continue
# subforms can be duplicated (in forms and another label)
# subforms can be duplicated (in forms and another label)
# but we must take care of unwanted other duplicates too
# but we must take care of unwanted other duplicates too
if
this_row_label
in
imported_unique_ngramstrs
:
if
imported_unique_ngramstrs
.
get
(
this_row_label
)
==
1
:
print
(
"TODO IMPORT DUPL: (skip line) term appears more than once at CSV
%
s:l.
%
i"
print
(
"TODO IMPORT DUPL: (skip line) term
%
r
appears more than once at CSV
%
s:l.
%
i"
%
(
fname
,
i
))
%
(
this_row_label
,
fname
,
i
))
# ================= Store the data ====================
# ================= Store the data ====================
# the ngram census
# the ngram census
imported_unique_ngramstrs
[
this_row_label
]
=
True
imported_unique_ngramstrs
[
this_row_label
]
=
1
# and the "list to ngram" relation
# and the "list to ngram" relation
imported_nodes_ngrams
[
this_list_type
]
.
append
(
this_row_label
)
imported_nodes_ngrams
[
this_list_type
]
.
append
(
this_row_label
)
# ====== Store synonyms from the import (if any) ======
# ====== Store synonyms from the import (if any) ======
if
len
(
this_row_forms
)
!=
0
:
if
len
(
this_row_forms
)
!=
0
:
other_terms
=
[]
for
raw_term_str
in
this_row_forms
.
split
(
group_delimiter
):
for
raw_term_str
in
this_row_forms
.
split
(
group_delimiter
):
# each subform is also like an ngram declaration
# each subform is also like an ngram declaration
term_str
=
normalize_forms
(
normalize_chars
(
raw_term_str
))
term_str
=
normalize_forms
(
normalize_chars
(
raw_term_str
))
imported_unique_ngramstrs
[
term_str
]
=
True
imported_unique_ngramstrs
[
term_str
]
=
2
imported_nodes_ngrams
[
this_list_type
]
.
append
(
term_str
)
imported_nodes_ngrams
[
this_list_type
]
.
append
(
term_str
)
# the optional repeated mainform doesn't interest us
# the optional repeated mainform doesn't interest us
...
@@ -610,7 +612,10 @@ def import_ngramlists(the_file, delimiter=DEFAULT_CSV_DELIM,
...
@@ -610,7 +612,10 @@ def import_ngramlists(the_file, delimiter=DEFAULT_CSV_DELIM,
%
(
n_total_ng
,
n_added_ng
,
n_total_ng
-
n_added_ng
)
)
%
(
n_total_ng
,
n_added_ng
,
n_total_ng
-
n_added_ng
)
)
print
(
"IMPORT: read
%
i grouping relations"
%
n_group_relations
)
print
(
"IMPORT: read
%
i grouping relations"
%
n_group_relations
)
# print("IMPORT RESULT", result)
list_counts
=
[(
typ
,
len
(
result
.
get
(
typ
)))
for
typ
in
list_types
]
list_counts
.
append
((
'total'
,
sum
(
x
[
1
]
for
x
in
list_counts
)))
print
(
"IMPORT: "
+
'; '
.
join
(
'
%
s
%
s'
%
stats
for
stats
in
list_counts
))
return
result
return
result
def
merge_ngramlists
(
new_lists
=
{},
onto_corpus
=
None
,
del_originals
=
[]):
def
merge_ngramlists
(
new_lists
=
{},
onto_corpus
=
None
,
del_originals
=
[]):
...
@@ -718,9 +723,11 @@ def merge_ngramlists(new_lists={}, onto_corpus=None, del_originals=[]):
...
@@ -718,9 +723,11 @@ def merge_ngramlists(new_lists={}, onto_corpus=None, del_originals=[]):
# ======== Merging all involved ngrams =========
# ======== Merging all involved ngrams =========
# all memberships with resolved conflicts of interfering memberships
# all ngram memberships with resolved conflicts of interfering memberships
# (associates ngram ids with list types -- see linfos definition above)
resolved_memberships
=
{}
resolved_memberships
=
{}
# iterates over each ngram of each list type for both old and new lists
for
list_set
in
[
old_lists
,
new_lists
]:
for
list_set
in
[
old_lists
,
new_lists
]:
for
lid
,
info
in
enumerate
(
linfos
):
for
lid
,
info
in
enumerate
(
linfos
):
list_type
=
info
[
'key'
]
list_type
=
info
[
'key'
]
...
@@ -749,12 +756,15 @@ def merge_ngramlists(new_lists={}, onto_corpus=None, del_originals=[]):
...
@@ -749,12 +756,15 @@ def merge_ngramlists(new_lists={}, onto_corpus=None, del_originals=[]):
# ======== Merging old and new groups =========
# ======== Merging old and new groups =========
# get the arcs already in the target DB (directed couples)
# get the arcs already in the target DB (directed couples)
previous_links
=
session
.
query
(
if
'groupings'
in
del_originals
:
NodeNgramNgram
.
ngram1_id
,
previous_links
=
[]
NodeNgramNgram
.
ngram2_id
else
:
)
.
filter
(
previous_links
=
session
.
query
(
NodeNgramNgram
.
node_id
==
old_group_id
NodeNgramNgram
.
ngram1_id
,
)
.
all
()
NodeNgramNgram
.
ngram2_id
)
.
filter
(
NodeNgramNgram
.
node_id
==
old_group_id
)
.
all
()
n_links_previous
=
len
(
previous_links
)
n_links_previous
=
len
(
previous_links
)
...
@@ -822,7 +832,7 @@ def merge_ngramlists(new_lists={}, onto_corpus=None, del_originals=[]):
...
@@ -822,7 +832,7 @@ def merge_ngramlists(new_lists={}, onto_corpus=None, del_originals=[]):
list_type
=
linfos
[
lid
][
'key'
]
list_type
=
linfos
[
lid
][
'key'
]
merged_results
[
list_type
]
.
items
.
add
(
ng_id
)
merged_results
[
list_type
]
.
items
.
add
(
ng_id
)
#
print("IMPORT: added %i elements in the lists indices" % added_nd_ng)
print
(
"IMPORT: added
%
i elements in the lists indices"
%
added_nd_ng
)
# ======== Overwrite old data with new =========
# ======== Overwrite old data with new =========
for
lid
,
info
in
enumerate
(
linfos
):
for
lid
,
info
in
enumerate
(
linfos
):
...
@@ -845,13 +855,17 @@ def import_and_merge_ngramlists(file_contents, onto_corpus_id, overwrite=False):
...
@@ -845,13 +855,17 @@ def import_and_merge_ngramlists(file_contents, onto_corpus_id, overwrite=False):
"""
"""
A single function to run import_ngramlists and merge_ngramlists together
A single function to run import_ngramlists and merge_ngramlists together
"""
"""
print
(
"import list"
)
print
(
"IMPORT CSV termlists file with
%
s lines in corpus
%
s (
%
s)"
%
(
len
(
file_contents
),
onto_corpus_id
,
'overwrite'
if
overwrite
else
'merge'
))
new_lists
=
import_ngramlists
(
file_contents
)
new_lists
=
import_ngramlists
(
file_contents
)
corpus_node
=
session
.
query
(
Node
)
.
filter
(
Node
.
id
==
onto_corpus_id
)
.
first
(
)
corpus_node
=
session
.
query
(
Node
)
.
get
(
onto_corpus_id
)
# merge the new_lists onto those of the target corpus
# merge the new_lists onto those of the target corpus
del_originals
=
[
'stop'
,
'main'
,
'map'
]
if
overwrite
else
[]
del_originals
=
[
'stop'
,
'main'
,
'map'
,
'groupings'
]
if
overwrite
else
[]
log_msg
=
merge_ngramlists
(
new_lists
,
onto_corpus
=
corpus_node
,
del_originals
=
del_originals
)
log_msg
=
merge_ngramlists
(
new_lists
,
onto_corpus
=
corpus_node
,
del_originals
=
del_originals
)
return
log_msg
return
log_msg
gargantext/util/parsers/CSV.py
View file @
fe23f25f
...
@@ -4,128 +4,67 @@ import sys
...
@@ -4,128 +4,67 @@ import sys
import
csv
import
csv
csv
.
field_size_limit
(
sys
.
maxsize
)
csv
.
field_size_limit
(
sys
.
maxsize
)
import
numpy
as
np
import
numpy
as
np
import
os
class
CSVParser
(
Parser
):
class
CSVParser
(
Parser
):
DELIMITERS
=
",
\t
;|:"
def
CSVsample
(
self
,
small_contents
,
delim
)
:
def
detect_delimiter
(
self
,
lines
,
sample_size
=
10
)
:
reader
=
csv
.
reader
(
small_contents
,
delimiter
=
delim
)
sample
=
lines
[:
sample_size
]
Freqs
=
[]
# Compute frequency of each delimiter on each input line
for
row
in
reader
:
delimiters_freqs
=
{
Freqs
.
append
(
len
(
row
))
d
:
[
line
.
count
(
d
)
for
line
in
sample
]
for
d
in
self
.
DELIMITERS
}
return
Freqs
# Select delimiters with a standard deviation of zero, ie. delimiters
# for which we have the same number of fields on each line
selected_delimiters
=
[
(
d
,
np
.
sum
(
freqs
))
for
d
,
freqs
in
delimiters_freqs
.
items
()
if
any
(
freqs
)
and
np
.
std
(
freqs
)
==
0
]
if
selected_delimiters
:
# Choose the delimiter with highest frequency amongst selected ones
sorted_delimiters
=
sorted
(
selected_delimiters
,
key
=
lambda
x
:
x
[
1
])
return
sorted_delimiters
[
-
1
][
0
]
def
parse
(
self
,
filebuf
):
def
parse
(
self
,
filebuf
):
print
(
"CSV: parsing (assuming UTF-8 and LF line endings)"
)
print
(
"CSV: parsing (assuming UTF-8 and LF line endings)"
)
contents
=
filebuf
.
read
()
.
decode
(
"UTF-8"
)
.
split
(
"
\n
"
)
contents
=
filebuf
.
read
()
.
decode
(
"UTF-8"
)
.
split
(
"
\n
"
)
sample_size
=
10
# Filter out empty lines
sample_contents
=
contents
[
0
:
sample_size
]
contents
=
[
line
for
line
in
contents
if
line
.
strip
()]
hyperdata_list
=
[]
# Delimiter auto-detection
delimiter
=
self
.
detect_delimiter
(
contents
,
sample_size
=
10
)
# # = = = = [ Getting delimiters frequency ] = = = = #
PossibleDelimiters
=
[
','
,
' '
,
'
\t
'
,
';'
,
'|'
,
':'
]
if
delimiter
is
None
:
AllDelimiters
=
{}
raise
ValueError
(
"CSV: couldn't detect delimiter, bug or malformed data"
)
for
delim
in
PossibleDelimiters
:
AllDelimiters
[
delim
]
=
self
.
CSVsample
(
sample_contents
,
delim
)
print
(
"CSV: selected delimiter:
%
r"
%
delimiter
)
# # = = = = [ / Getting delimiters frequency ] = = = = #
# # OUTPUT example:
# Parse CSV
# # AllDelimiters = {
reader
=
csv
.
reader
(
contents
,
delimiter
=
delimiter
)
# # '\t': [1, 1, 1, 1, 1],
# # ' ': [1, 13, 261, 348, 330],
# Get first not empty row and its fields (ie. header row), or (0, [])
# # ',': [15, 15, 15, 15, 15],
first_row
,
headers
=
\
# # ';': [1, 1, 1, 1, 1],
next
(((
i
,
fields
)
for
i
,
fields
in
enumerate
(
reader
)
if
any
(
fields
)),
# # '|': [1, 1, 1, 1, 1]
(
0
,
[]))
# # }
# Get first not empty column of the first row, or 0
# # = = = = [ Stand.Dev=0 & Sum of delimiters ] = = = = #
first_col
=
next
((
i
for
i
,
field
in
enumerate
(
headers
)
if
field
),
0
)
Delimiters
=
[]
for
d
in
AllDelimiters
:
# Strip out potential empty fields in headers
freqs
=
AllDelimiters
[
d
]
headers
=
headers
[
first_col
:]
suma
=
np
.
sum
(
freqs
)
if
suma
>
0
:
std
=
np
.
std
(
freqs
)
# print [ d , suma , len(freqs) , std]
if
std
==
0
:
Delimiters
.
append
(
[
d
,
suma
,
len
(
freqs
)
,
std
]
)
# # = = = = [ / Stand.Dev=0 & Sum of delimiters ] = = = = #
# # OUTPUT example:
# # Delimiters = [
# # ['\t', 5, 5, 0.0],
# # [',', 75, 5, 0.0],
# # ['|', 5, 5, 0.0]
# # ]
# # = = = = [ Delimiter selection ] = = = = #
Sorted_Delims
=
sorted
(
Delimiters
,
key
=
lambda
x
:
x
[
1
],
reverse
=
True
)
HighestDelim
=
Sorted_Delims
[
0
][
0
]
# HighestDelim = ","
print
(
"CSV selected delimiter:"
,[
HighestDelim
])
# # = = = = [ / Delimiter selection ] = = = = #
# # = = = = [ First data coordinate ] = = = = #
Coords
=
{
"row"
:
-
1
,
"column"
:
-
1
}
reader
=
csv
.
reader
(
contents
,
delimiter
=
HighestDelim
)
# Return a generator of dictionaries with column labels as keys,
# filtering out empty rows
for
rownum
,
tokens
in
enumerate
(
reader
):
for
i
,
fields
in
enumerate
(
reader
):
if
rownum
%
250
==
0
:
if
i
%
500
==
0
:
print
(
"CSV row: "
,
rownum
)
print
(
"CSV: parsing row #
%
s..."
%
(
i
+
1
))
joined_tokens
=
""
.
join
(
tokens
)
if
any
(
fields
):
if
Coords
[
"row"
]
<
0
and
len
(
joined_tokens
)
>
0
:
yield
dict
(
zip
(
headers
,
fields
[
first_col
:]))
Coords
[
"row"
]
=
rownum
for
columnum
in
range
(
len
(
tokens
)):
t
=
tokens
[
columnum
]
if
len
(
t
)
>
0
:
Coords
[
"column"
]
=
columnum
break
# # = = = = [ / First data coordinate ] = = = = #
# # = = = = [ Setting Headers ] = = = = #
Headers_Int2Str
=
{}
reader
=
csv
.
reader
(
contents
,
delimiter
=
HighestDelim
)
for
rownum
,
tokens
in
enumerate
(
reader
):
if
rownum
>=
Coords
[
"row"
]:
for
columnum
in
range
(
Coords
[
"column"
],
len
(
tokens
)
):
t
=
tokens
[
columnum
]
Headers_Int2Str
[
columnum
]
=
t
break
# print("Headers_Int2Str")
# print(Headers_Int2Str)
# # = = = = [ / Setting Headers ] = = = = #
# # OUTPUT example:
# # Headers_Int2Str = {
# # 0: 'publication_date',
# # 1: 'publication_month',
# # 2: 'publication_second',
# # 3: 'abstract'
# # }
# # = = = = [ Reading the whole CSV and saving ] = = = = #
hyperdata_list
=
[]
reader
=
csv
.
reader
(
contents
,
delimiter
=
HighestDelim
)
for
rownum
,
tokens
in
enumerate
(
reader
):
if
rownum
>
Coords
[
"row"
]:
RecordDict
=
{}
for
columnum
in
range
(
Coords
[
"column"
],
len
(
tokens
)
):
data
=
tokens
[
columnum
]
RecordDict
[
Headers_Int2Str
[
columnum
]
]
=
data
if
len
(
RecordDict
.
keys
())
>
0
:
hyperdata_list
.
append
(
RecordDict
)
# # = = = = [ / Reading the whole CSV and saving ] = = = = #
return
hyperdata_list
gargantext/util/parsers/HAL.py
View file @
fe23f25f
...
@@ -11,25 +11,26 @@ from datetime import datetime
...
@@ -11,25 +11,26 @@ from datetime import datetime
import
json
import
json
class
HalParser
(
Parser
):
class
HalParser
(
Parser
):
def
_parse
(
self
,
json_docs
):
def
parse
(
self
,
filebuf
):
'''
parse :: FileBuff -> [Hyperdata]
'''
contents
=
filebuf
.
read
()
.
decode
(
"UTF-8"
)
data
=
json
.
loads
(
contents
)
filebuf
.
close
()
json_docs
=
data
hyperdata_list
=
[]
hyperdata_list
=
[]
hyperdata_path
=
{
"id"
:
"isbn_s"
hyperdata_path
=
{
"id"
:
"docid"
,
"title"
:
"title_s"
,
"title"
:
[
"en_title_s"
,
"title_s"
]
,
"abstract"
:
"abstract_s"
,
"abstract"
:
[
"en_abstract_s"
,
"abstract_s"
]
,
"source"
:
"journalPublisher_s"
,
"source"
:
"journalTitle_s"
,
"url"
:
"uri_s"
,
"url"
:
"uri_s"
,
"authors"
:
"authFullName_s"
,
"authors"
:
"authFullName_s"
,
"isbn_s"
:
"isbn_s"
,
"issue_s"
:
"issue_s"
,
"language_s"
:
"language_s"
,
"doiId_s"
:
"doiId_s"
,
"authId_i"
:
"authId_i"
,
"instStructId_i"
:
"instStructId_i"
,
"deptStructId_i"
:
"deptStructId_i"
,
"labStructId_i"
:
"labStructId_i"
,
"rteamStructId_i"
:
"rteamStructId_i"
,
"docType_s"
:
"docType_s"
}
}
uris
=
set
()
uris
=
set
()
...
@@ -37,29 +38,32 @@ class HalParser(Parser):
...
@@ -37,29 +38,32 @@ class HalParser(Parser):
for
doc
in
json_docs
:
for
doc
in
json_docs
:
hyperdata
=
{}
hyperdata
=
{}
for
key
,
path
in
hyperdata_path
.
items
():
for
key
,
path
in
hyperdata_path
.
items
():
field
=
doc
.
get
(
path
,
"NOT FOUND"
)
# A path can be a field name or a sequence of field names
if
isinstance
(
field
,
list
):
if
isinstance
(
path
,
(
list
,
tuple
)):
hyperdata
[
key
]
=
", "
.
join
(
field
)
# Get first non-empty value of fields in path sequence, or None
else
:
field
=
next
((
x
for
x
in
(
doc
.
get
(
p
)
for
p
in
path
)
if
x
),
None
)
hyperdata
[
key
]
=
field
else
:
# Get field value
field
=
doc
.
get
(
path
)
if
field
is
None
:
field
=
"NOT FOUND"
if
isinstance
(
field
,
list
):
hyperdata
[
key
]
=
", "
.
join
(
map
(
str
,
field
))
else
:
hyperdata
[
key
]
=
str
(
field
)
if
hyperdata
[
"url"
]
in
uris
:
if
hyperdata
[
"url"
]
in
uris
:
print
(
"Document already parsed"
)
print
(
"Document already parsed"
)
else
:
else
:
uris
.
add
(
hyperdata
[
"url"
])
uris
.
add
(
hyperdata
[
"url"
])
# hyperdata["authors"] = ", ".join(
# [ p.get("person", {})
# .get("name" , "")
#
# for p in doc.get("hasauthor", [])
# ]
# )
#
maybeDate
=
doc
.
get
(
"submittedDate_s"
,
None
)
maybeDate
=
doc
.
get
(
"submittedDate_s"
,
None
)
if
maybeDate
is
not
None
:
if
maybeDate
is
not
None
:
date
=
datetime
.
strptime
(
maybeDate
,
"
%
Y-
%
m-
%
d
%
H:
%
M:
%
S"
)
date
=
datetime
.
strptime
(
maybeDate
,
"
%
Y-
%
m-
%
d
%
H:
%
M:
%
S"
)
else
:
else
:
...
@@ -69,7 +73,17 @@ class HalParser(Parser):
...
@@ -69,7 +73,17 @@ class HalParser(Parser):
hyperdata
[
"publication_year"
]
=
str
(
date
.
year
)
hyperdata
[
"publication_year"
]
=
str
(
date
.
year
)
hyperdata
[
"publication_month"
]
=
str
(
date
.
month
)
hyperdata
[
"publication_month"
]
=
str
(
date
.
month
)
hyperdata
[
"publication_day"
]
=
str
(
date
.
day
)
hyperdata
[
"publication_day"
]
=
str
(
date
.
day
)
hyperdata_list
.
append
(
hyperdata
)
hyperdata_list
.
append
(
hyperdata
)
return
hyperdata_list
return
hyperdata_list
def
parse
(
self
,
filebuf
):
'''
parse :: FileBuff -> [Hyperdata]
'''
contents
=
filebuf
.
read
()
.
decode
(
"UTF-8"
)
data
=
json
.
loads
(
contents
)
return
self
.
_parse
(
data
)
gargantext/util/toolchain/ngrams_extraction.py
View file @
fe23f25f
...
@@ -81,44 +81,45 @@ def extract_ngrams(corpus, keys=DEFAULT_INDEX_FIELDS, do_subngrams = DEFAULT_IND
...
@@ -81,44 +81,45 @@ def extract_ngrams(corpus, keys=DEFAULT_INDEX_FIELDS, do_subngrams = DEFAULT_IND
corpus
.
hyperdata
[
"skipped_docs"
]
.
append
(
document
.
id
)
corpus
.
hyperdata
[
"skipped_docs"
]
.
append
(
document
.
id
)
corpus
.
save_hyperdata
()
corpus
.
save_hyperdata
()
continue
continue
else
:
# ready !
# ready !
tagger
=
tagger_bots
[
language_iso2
]
tagger
=
tagger_bots
[
language_iso2
]
# to do verify if document has no KEYS to index
# to do verify if document has no KEYS to index
# eg: use set intersect (+ loop becomes direct! with no continue)
# eg: use set intersect (+ loop becomes direct! with no continue)
for
key
in
keys
:
for
key
in
keys
:
try
:
try
:
value
=
document
.
hyperdata
[
str
(
key
)]
value
=
document
.
hyperdata
[
str
(
key
)]
if
not
isinstance
(
value
,
str
):
if
not
isinstance
(
value
,
str
):
#print("DBG wrong content in doc for key", key)
#print("DBG wrong content in doc for key", key)
continue
# get ngrams
for
ngram
in
tagger
.
extract
(
value
):
tokens
=
tuple
(
normalize_forms
(
token
[
0
])
for
token
in
ngram
)
if
do_subngrams
:
# ex tokens = ["very", "cool", "exemple"]
# subterms = [['very', 'cool'],...]
subterms
=
subsequences
(
tokens
)
else
:
subterms
=
[
tokens
]
for
seqterm
in
subterms
:
ngram
=
' '
.
join
(
seqterm
)
nbwords
=
len
(
seqterm
)
nbchars
=
len
(
ngram
)
if
nbchars
>
1
:
if
nbchars
>
255
:
# max ngram length (DB constraint)
ngram
=
ngram
[:
255
]
# doc <=> ngram index
nodes_ngrams_count
[(
document
.
id
,
ngram
)]
+=
1
# add fields : terms n
ngrams_data
.
add
((
ngram
,
nbwords
,
))
except
:
#value not in doc
continue
continue
# get ngrams
for
ngram
in
tagger
.
extract
(
value
):
normal_forms
=
(
normalize_forms
(
t
[
0
])
for
t
in
ngram
)
tokens
=
tuple
(
nf
for
nf
in
normal_forms
if
nf
)
if
do_subngrams
:
# ex tokens = ["very", "cool", "exemple"]
# subterms = [['very', 'cool'],...]
subterms
=
subsequences
(
tokens
)
else
:
subterms
=
[
tokens
]
for
seqterm
in
subterms
:
ngram
=
' '
.
join
(
seqterm
)
nbwords
=
len
(
seqterm
)
nbchars
=
len
(
ngram
)
if
nbchars
>
1
:
if
nbchars
>
255
:
# max ngram length (DB constraint)
ngram
=
ngram
[:
255
]
# doc <=> ngram index
nodes_ngrams_count
[(
document
.
id
,
ngram
)]
+=
1
# add fields : terms n
ngrams_data
.
add
((
ngram
,
nbwords
,
))
except
:
#value not in doc
continue
# integrate ngrams and nodes-ngrams
# integrate ngrams and nodes-ngrams
if
len
(
nodes_ngrams_count
)
>=
BATCH_NGRAMSEXTRACTION_SIZE
:
if
len
(
nodes_ngrams_count
)
>=
BATCH_NGRAMSEXTRACTION_SIZE
:
...
...
gargantext/views/api/ngramlists.py
View file @
fe23f25f
...
@@ -155,7 +155,12 @@ class CSVLists(APIView):
...
@@ -155,7 +155,12 @@ class CSVLists(APIView):
try
:
try
:
# merge the source_lists onto those of the target corpus
# merge the source_lists onto those of the target corpus
delete
=
todo_lists
if
bool
(
params
.
get
(
'overwrite'
))
else
[]
delete
=
todo_lists
if
bool
(
params
.
get
(
'overwrite'
))
else
[]
if
len
(
delete
)
==
len
(
list_types
):
delete
.
append
(
'groupings'
)
log_msg
=
merge_ngramlists
(
source_lists
,
onto_corpus
=
corpus_node
,
del_originals
=
delete
)
log_msg
=
merge_ngramlists
(
source_lists
,
onto_corpus
=
corpus_node
,
del_originals
=
delete
)
return
JsonHttpResponse
({
return
JsonHttpResponse
({
'log'
:
log_msg
,
'log'
:
log_msg
,
},
200
)
},
200
)
...
...
gargantext/views/api/urls.py
View file @
fe23f25f
from
django.conf.urls
import
url
from
django.conf.urls
import
url
from
rest_framework_jwt.views
import
obtain_jwt_token
from
.
import
nodes
from
.
import
nodes
from
.
import
projects
from
.
import
projects
from
.
import
corpora
from
.
import
corpora
...
@@ -10,78 +12,81 @@ from . import ngramlists
...
@@ -10,78 +12,81 @@ from . import ngramlists
from
.
import
analytics
from
.
import
analytics
from
graph.rest
import
Graph
from
graph.rest
import
Graph
urlpatterns
=
[
url
(
r'^nodes$'
,
nodes
.
NodeListResource
.
as_view
()
)
urlpatterns
=
[
url
(
r'^nodes$'
,
nodes
.
NodeListResource
.
as_view
())
,
url
(
r'^nodes/(\d+)$'
,
nodes
.
NodeResource
.
as_view
()
)
,
url
(
r'^nodes/(\d+)$'
,
nodes
.
NodeResource
.
as_view
())
,
url
(
r'^nodes/(\d+)/having$'
,
nodes
.
NodeListHaving
.
as_view
()
)
,
url
(
r'^nodes/(\d+)/having$'
,
nodes
.
NodeListHaving
.
as_view
())
,
url
(
r'^nodes/(\d+)/status$'
,
nodes
.
Status
.
as_view
()
)
,
url
(
r'^nodes/(\d+)/status$'
,
nodes
.
Status
.
as_view
())
#Projects
,
url
(
r'^projects$'
,
projects
.
ProjectList
.
as_view
()
)
# Projects
,
url
(
r'^projects/(\d+)$'
,
projects
.
ProjectView
.
as_view
()
)
,
url
(
r'^projects$'
,
projects
.
ProjectList
.
as_view
())
#?view=resource
,
url
(
r'^projects/(\d+)$'
,
projects
.
ProjectView
.
as_view
())
#?view=docs
#Corpora
# Corpora
,
url
(
r'^projects/(\d+)/corpora/(\d+)$'
,
corpora
.
CorpusView
.
as_view
()
)
,
url
(
r'^projects/(\d+)/corpora/(\d+)$'
,
corpora
.
CorpusView
.
as_view
())
#?view=source
#?view=title
# Sources
#?view=analytics
#, url(r'^projects/(\d+)/corpora/(\d+)/sources$', corpora.CorpusSources.as_view())
#Sources
#, url(r'^projects/(\d+)/corpora/(\d+)/sources/(\d+)$ , corpora.CorpusSourceView.as_view())
#, url(r'^projects/(\d+)/corpora/(\d+)/sources$' , corpora.CorpusSources.as_view() )
#, url(r'^projects/(\d+)/corpora/(\d+)/sources/(\d+)$' , corpora.CorpusSourceView.as_view() )
# Facets
#Facets
,
url
(
r'^projects/(\d+)/corpora/(\d+)/facets$'
,
nodes
.
CorpusFacet
.
as_view
())
,
url
(
r'^projects/(\d+)/corpora/(\d+)/facets$'
,
nodes
.
CorpusFacet
.
as_view
()
)
#Favorites
# Favorites
,
url
(
r'^projects/(\d+)/corpora/(\d+)/favorites$'
,
nodes
.
CorpusFavorites
.
as_view
()
)
,
url
(
r'^projects/(\d+)/corpora/(\d+)/favorites$'
,
nodes
.
CorpusFavorites
.
as_view
())
#Metrics
,
url
(
r'^projects/(\d+)/corpora/(\d+)/metrics$'
,
metrics
.
CorpusMetrics
.
as_view
()
)
# Metrics
#GraphExplorer
,
url
(
r'^projects/(\d+)/corpora/(\d+)/metrics$'
,
metrics
.
CorpusMetrics
.
as_view
())
,
url
(
r'^projects/(\d+)/corpora/(\d+)/explorer$'
,
Graph
.
as_view
())
# GraphExplorer
,
url
(
r'^projects/(\d+)/corpora/(\d+)/explorer$'
,
Graph
.
as_view
())
# data for graph explorer (json)
# data for graph explorer (json)
# GET /api/projects/43198/corpora/111107/explorer?
# GET /api/projects/43198/corpora/111107/explorer?
# Corresponding view is : /projects/43198/corpora/111107/explorer?
# Corresponding view is : /projects/43198/corpora/111107/explorer?
# Parameters (example):
# Parameters (example):
# explorer?field1=ngrams&field2=ngrams&distance=conditional&bridgeness=5&start=1996-6-1&end=2002-10-5
# explorer?field1=ngrams&field2=ngrams&distance=conditional&bridgeness=5&start=1996-6-1&end=2002-10-5
# Ngrams
# Ngrams
,
url
(
r'^ngrams/?$'
,
ngrams
.
ApiNgrams
.
as_view
()
)
,
url
(
r'^ngrams/?$'
,
ngrams
.
ApiNgrams
.
as_view
())
# Analytics
# Analytics
,
url
(
r'^nodes/(\d+)/histories$'
,
analytics
.
NodeNgramsQueries
.
as_view
())
,
url
(
r'^nodes/(\d+)/histories$'
,
analytics
.
NodeNgramsQueries
.
as_view
())
,
url
(
r'hyperdata$'
,
analytics
.
ApiHyperdata
.
as_view
()
)
,
url
(
r'hyperdata$'
,
analytics
.
ApiHyperdata
.
as_view
())
# get a list of ngram_ids or ngram_infos by list_id
# get a list of ngram_ids or ngram_infos by list_id
# url(r'^ngramlists/(\d+)$', ngramlists.List.as_view()),
# url(r'^ngramlists/(\d+)$', ngramlists.List.as_view()),
,
url
(
r'^nodes/(\d+)/facets$'
,
nodes
.
CorpusFacet
.
as_view
()
)
,
url
(
r'^nodes/(\d+)/facets$'
,
nodes
.
CorpusFacet
.
as_view
())
,
url
(
r'^nodes/(\d+)/favorites$'
,
nodes
.
CorpusFavorites
.
as_view
()
)
,
url
(
r'^nodes/(\d+)/favorites$'
,
nodes
.
CorpusFavorites
.
as_view
())
# in these two routes the node is supposed to be a *corpus* node
# in these two routes the node is supposed to be a *corpus* node
,
url
(
r'^metrics/(\d+)$'
,
metrics
.
CorpusMetrics
.
as_view
()
)
,
url
(
r'^metrics/(\d+)$'
,
metrics
.
CorpusMetrics
.
as_view
()
)
# update all metrics for a corpus
# update all metrics for a corpus
# ex: PUT metrics/123
# ex: PUT metrics/123
# \
# \
# corpus id
# corpus id
,
url
(
r'^ngramlists/export$'
,
ngramlists
.
CSVLists
.
as_view
()
)
,
url
(
r'^ngramlists/export$'
,
ngramlists
.
CSVLists
.
as_view
())
# get a CSV export of the ngramlists of a corpus
# get a CSV export of the ngramlists of a corpus
# ex: GET ngramlists/export?corpus=43
# ex: GET ngramlists/export?corpus=43
# TODO : unify to a /api/ngrams?formatted=csv
# TODO : unify to a /api/ngrams?formatted=csv
# (similar to /api/nodes?formatted=csv)
# (similar to /api/nodes?formatted=csv)
,
url
(
r'^ngramlists/import$'
,
ngramlists
.
CSVLists
.
as_view
()
)
,
url
(
r'^ngramlists/import$'
,
ngramlists
.
CSVLists
.
as_view
())
# same handling class as export (CSVLists)
# same handling class as export (CSVLists)
# but this route used only for POST + file
# but this route used only for POST + file
# or PATCH + other corpus id
# or PATCH + other corpus id
,
url
(
r'^ngramlists/change$'
,
ngramlists
.
ListChange
.
as_view
()
)
,
url
(
r'^ngramlists/change$'
,
ngramlists
.
ListChange
.
as_view
())
# add or remove ngram from a list
# add or remove ngram from a list
# ex: add <=> PUT ngramlists/change?list=42&ngrams=1,2
# ex: add <=> PUT ngramlists/change?list=42&ngrams=1,2
# rm <=> DEL ngramlists/change?list=42&ngrams=1,2
# rm <=> DEL ngramlists/change?list=42&ngrams=1,2
,
url
(
r'^ngramlists/groups$'
,
ngramlists
.
GroupChange
.
as_view
()
)
,
url
(
r'^ngramlists/groups$'
,
ngramlists
.
GroupChange
.
as_view
())
# modify grouping couples of a group node
# modify grouping couples of a group node
# ex: PUT/DEL ngramlists/groups?node=43
# ex: PUT/DEL ngramlists/groups?node=43
# & group data also in url: 767[]=209,640 & 779[]=436,265,385
# & group data also in url: 767[]=209,640 & 779[]=436,265,385
,
url
(
r'^ngramlists/family$'
,
ngramlists
.
ListFamily
.
as_view
()
)
,
url
(
r'^ngramlists/family$'
,
ngramlists
.
ListFamily
.
as_view
()
)
# entire combination of lists from a corpus, dedicated to termtable
# entire combination of lists from a corpus, dedicated to termtable
# (or any combination of lists that go together :
# (or any combination of lists that go together :
# - a mainlist
# - a mainlist
...
@@ -89,8 +94,11 @@ urlpatterns = [ url(r'^nodes$' , nodes.NodeListResource.as_view()
...
@@ -89,8 +94,11 @@ urlpatterns = [ url(r'^nodes$' , nodes.NodeListResource.as_view()
# - an optional maplist
# - an optional maplist
# - an optional grouplist
# - an optional grouplist
,
url
(
r'^ngramlists/maplist$'
,
ngramlists
.
MapListGlance
.
as_view
()
)
,
url
(
r'^ngramlists/maplist$'
,
ngramlists
.
MapListGlance
.
as_view
()
)
# fast access to maplist, similarly formatted for termtable
# fast access to maplist, similarly formatted for termtable
,
url
(
r'^user/parameters/$'
,
users
.
UserParameters
.
as_view
())
,
url
(
r'^user/parameters/$'
,
users
.
UserParameters
.
as_view
())
,
url
(
'^auth/token$'
,
obtain_jwt_token
)
]
]
install/gargamelle/requirements.txt
View file @
fe23f25f
...
@@ -11,6 +11,7 @@ django-celery==3.2.1
...
@@ -11,6 +11,7 @@ django-celery==3.2.1
django-pgfields==1.4.4
django-pgfields==1.4.4
django-pgjsonb==0.0.23
django-pgjsonb==0.0.23
djangorestframework==3.5.3
djangorestframework==3.5.3
djangorestframework-jwt==1.9.0
html5lib==0.9999999
html5lib==0.9999999
python-igraph>=0.7.1
python-igraph>=0.7.1
jdatetime==1.7.2
jdatetime==1.7.2
...
...
install/notebook.run
View file @
fe23f25f
...
@@ -16,7 +16,7 @@ sudo docker run \
...
@@ -16,7 +16,7 @@ sudo docker run \
--env
POSTGRES_HOST
=
localhost
\
--env
POSTGRES_HOST
=
localhost
\
-v
/srv/gargantext:/srv/gargantext
\
-v
/srv/gargantext:/srv/gargantext
\
-it
garg-notebook:latest
\
-it
garg-notebook:latest
\
/bin/bash
-c
"/bin/su notebooks -c 'source /env_3-5/bin/activate && cd /
srv/gargantext/
&& jupyter notebook --port=8899 --ip=0.0.0.0 --no-browser'"
/bin/bash
-c
"/bin/su notebooks -c 'source /env_3-5/bin/activate && cd /
home/notebooks
&& jupyter notebook --port=8899 --ip=0.0.0.0 --no-browser'"
# #&& jupyter nbextension enable --py widgetsnbextension --sys-prefix
# #&& jupyter nbextension enable --py widgetsnbextension --sys-prefix
#/bin/bash -c "/bin/su notebooks -c 'source /env_3-5/bin/activate && cd /srv/gargantext/ && jupyter notebook --port=8899 --ip=0.0.0.0 --no-browser --notebook-dir=/home/notebooks/'"
#/bin/bash -c "/bin/su notebooks -c 'source /env_3-5/bin/activate && cd /srv/gargantext/ && jupyter notebook --port=8899 --ip=0.0.0.0 --no-browser --notebook-dir=/home/notebooks/'"
...
...
install/notebook/Dockerfile
View file @
fe23f25f
...
@@ -78,32 +78,8 @@ RUN . /env_3-5/bin/activate && pip3 install -r requirements.txt
...
@@ -78,32 +78,8 @@ RUN . /env_3-5/bin/activate && pip3 install -r requirements.txt
#RUN ./psql_configure.sh
#RUN ./psql_configure.sh
#RUN ./django_configure.sh
#RUN ./django_configure.sh
RUN
chown
notebooks:notebooks
-R
/env_3-5
RUN
chown
notebooks:notebooks
-R
/env_3-5
########################################################################
### Notebook IHaskell and IPYTHON ENVIRONNEMENT
########################################################################
#RUN apt-get update && apt-get install -y \
# libtinfo-dev \
# libzmq3-dev \
# libcairo2-dev \
# libpango1.0-dev \
# libmagic-dev \
# libblas-dev \
# liblapack-dev
#RUN curl -sSL https://get.haskellstack.org/ | sh
#RUN stack setup
#RUN git clone https://github.com/gibiansky/IHaskell
#RUN . /env_3-5/bin/activate \
# && cd IHaskell \
# && stack install gtk2hs-buildtools \
# && stack install --fast \
# && /root/.local/bin/ihaskell install --stack
#
#
########################################################################
########################################################################
### POSTGRESQL DATA (as ROOT)
### POSTGRESQL DATA (as ROOT)
########################################################################
########################################################################
...
@@ -115,3 +91,32 @@ RUN chown notebooks:notebooks -R /env_3-5
...
@@ -115,3 +91,32 @@ RUN chown notebooks:notebooks -R /env_3-5
EXPOSE
5432 8899
EXPOSE
5432 8899
VOLUME
["/srv/","/home/notebooks/"]
VOLUME
["/srv/","/home/notebooks/"]
########################################################################
### Notebook IHaskell and IPYTHON ENVIRONNEMENT
########################################################################
RUN
apt-get update
&&
apt-get
install
-y
\
libtinfo-dev
\
libzmq3-dev
\
libcairo2-dev
\
libpango1.0-dev
\
libmagic-dev
\
libblas-dev
\
liblapack-dev
USER
notebooks
RUN
cd
/home/notebooks
\
&&
curl
-sSL
https://get.haskellstack.org/ | sh
\
&&
stack setup
\
&&
git clone https://github.com/gibiansky/IHaskell
\
&&
.
/env_3-5/bin/activate
\
&&
cd
IHaskell
\
&&
stack
install
gtk2hs-buildtools
\
&&
stack
install
--fast
\
&&
/root/.local/bin/ihaskell
install
--stack
install/notebook/gargantext_notebook.py
View file @
fe23f25f
#!/usr/bin/env python
"""
"""
Gargantext Software Copyright (c) 2016-2017 CNRS ISC-PIF -
Gargantext Software Copyright (c) 2016-2017 CNRS ISC-PIF -
http://iscpif.fr
http://iscpif.fr
...
@@ -6,45 +7,33 @@ http://gitlab.iscpif.fr/humanities/gargantext/blob/stable/LICENSE )
...
@@ -6,45 +7,33 @@ http://gitlab.iscpif.fr/humanities/gargantext/blob/stable/LICENSE )
- In France : a CECILL variant affero compliant
- In France : a CECILL variant affero compliant
- GNU aGPLV3 for all other countries
- GNU aGPLV3 for all other countries
"""
"""
#!/usr/bin/env python
import
sys
import
os
import
os
import
django
# Django settings
os
.
environ
.
setdefault
(
'DJANGO_SETTINGS_MODULE'
,
'gargantext.settings'
)
dirname
=
os
.
path
.
dirname
(
os
.
path
.
realpath
(
__file__
))
django
.
setup
()
os
.
environ
.
setdefault
(
"DJANGO_SETTINGS_MODULE"
,
"gargantext.settings"
)
# initialize Django application
from
gargantext.constants
import
QUERY_SIZE_N_MAX
,
get_resource
,
get_resource_by_name
from
django.core.wsgi
import
get_wsgi_application
from
gargantext.models
import
ProjectNode
,
DocumentNode
application
=
get_wsgi_application
()
from
gargantext.util.db
import
session
,
get_engine
from
collections
import
Counter
import
importlib
from
django.http
import
Http404
from
gargantext.util.toolchain.main
import
parse_extract_indexhyperdata
# Import those to be available by notebook user
from
gargantext.util.db
import
*
from
langdetect
import
detect
as
detect_lang
from
gargantext.models
import
Node
from
gargantext.models
import
UserNode
,
User
from
nltk.tokenize
import
wordpunct_tokenize
from
gargantext.models
import
*
class
NotebookError
(
Exception
):
from
nltk.tokenize
import
word_tokenize
pass
import
nltk
as
nltk
from
statistics
import
mean
from
math
import
log
from
collections
import
defaultdict
import
matplotlib.pyplot
as
plt
import
numpy
as
np
import
datetime
from
collections
import
Counter
from
langdetect
import
detect
as
detect_lang
def
documents
(
corpus_id
):
def
documents
(
corpus_id
):
return
(
session
.
query
(
Node
)
.
filter
(
Node
.
parent_id
==
corpus_id
return
(
session
.
query
(
DocumentNode
)
.
filter_by
(
parent_id
=
corpus_id
)
,
Node
.
typename
==
"DOCUMENT"
#.order_by(Node.hyperdata['publication_date'])
)
.
all
())
# .order_by(Node.hyperdata['publication_date'])
.
all
()
)
#import seaborn as sns
#import seaborn as sns
...
@@ -63,13 +52,134 @@ def scan_hal(request):
...
@@ -63,13 +52,134 @@ def scan_hal(request):
hal
=
HalCrawler
()
hal
=
HalCrawler
()
return
hal
.
scan_results
(
request
)
return
hal
.
scan_results
(
request
)
def
scan_gargantext
(
corpus_id
,
lang
,
request
):
def
scan_gargantext
(
corpus_id
,
lang
,
request
):
connection
=
get_engine
()
.
connect
()
connection
=
get_engine
()
.
connect
()
# TODO add some sugar the request (ideally request should be the same for hal and garg)
# TODO add some sugar the request (ideally request should be the same for hal and garg)
query
=
"""select count(n.id) from nodes n
query
=
"""select count(n.id) from nodes n
where to_tsvector('
%
s', hyperdata ->> 'abstract' || 'title')
where to_tsvector('
%
s', hyperdata ->> 'abstract' || 'title')
@@ to_tsquery('
%
s')
@@ to_tsquery('
%
s')
AND n.parent_id =
%
s;"""
%
(
lang
,
request
,
corpus_id
)
AND n.parent_id =
%
s;"""
%
(
lang
,
request
,
corpus_id
)
return
[
i
for
i
in
connection
.
execute
(
query
)][
0
][
0
]
return
[
i
for
i
in
connection
.
execute
(
query
)][
0
][
0
]
connection
.
close
()
connection
.
close
()
def
myProject_fromUrl
(
url
):
"""
myProject :: String -> Project
"""
project_id
=
url
.
split
(
"/"
)[
4
]
project
=
session
.
query
(
ProjectNode
)
.
get
(
project_id
)
return
project
def
newCorpus
(
project
,
source
,
name
=
None
,
query
=
None
):
error
=
False
if
name
is
None
:
name
=
query
if
not
isinstance
(
project
,
ProjectNode
):
error
=
"a valid project"
if
not
isinstance
(
source
,
int
)
and
not
isinstance
(
source
,
str
):
error
=
"a valid source identifier: id or name"
elif
not
isinstance
(
query
,
str
):
error
=
"a valid query"
elif
not
isinstance
(
name
,
str
):
error
=
"a valid name"
if
error
:
raise
NotebookError
(
"Please provide
%
s."
%
error
)
resource
=
get_resource
(
source
)
if
isinstance
(
source
,
int
)
else
\
get_resource_by_name
(
source
)
moissonneur_name
=
get_moissonneur_name
(
resource
)
if
resource
else
\
source
.
lower
()
try
:
moissonneur
=
get_moissonneur
(
moissonneur_name
)
except
ImportError
:
raise
NotebookError
(
"Invalid source identifier:
%
r"
%
source
)
return
run_moissonneur
(
moissonneur
,
project
,
name
,
query
)
def
get_moissonneur_name
(
ident
):
""" Return moissonneur module name from RESOURCETYPE or crawler name """
# Does it quacks like a RESOURCETYPE ?
if
hasattr
(
ident
,
'get'
):
ident
=
ident
.
get
(
'crawler'
)
# Extract name from crawler class name, otherwise assume ident is already
# a moissonneur name.
if
isinstance
(
ident
,
str
)
and
ident
.
endswith
(
'Crawler'
):
return
ident
[:
-
len
(
'Crawler'
)]
.
lower
()
def
get_moissonneur
(
name
):
""" Return moissonneur module from its name """
if
not
isinstance
(
name
,
str
)
or
not
name
.
islower
():
raise
NotebookError
(
"Invalid moissonneur name:
%
r"
%
name
)
module
=
importlib
.
import_module
(
'moissonneurs.
%
s'
%
name
)
module
.
name
=
name
return
module
def
run_moissonneur
(
moissonneur
,
project
,
name
,
query
):
""" Run moissonneur and return resulting corpus """
# XXX Uber-kludge with gory details. Spaghetti rulezzzzz!
class
Dummy
(
object
):
pass
request
=
Dummy
()
request
.
method
=
'POST'
request
.
path
=
'nowhere'
request
.
META
=
{}
# XXX 'string' only have effect on moissonneurs.pubmed; its value is added
# when processing request client-side, take a deep breath and see
# templates/projects/project.html for more details.
request
.
POST
=
{
'string'
:
name
,
'query'
:
query
,
'N'
:
QUERY_SIZE_N_MAX
}
request
.
user
=
Dummy
()
request
.
user
.
id
=
project
.
user_id
request
.
user
.
is_authenticated
=
lambda
:
True
if
moissonneur
.
name
==
'istex'
:
# Replace ALL spaces by plus signs
request
.
POST
[
'query'
]
=
'+'
.
join
(
filter
(
None
,
query
.
split
(
' '
)))
try
:
import
json
r
=
moissonneur
.
query
(
request
)
raw_json
=
r
.
content
.
decode
(
'utf-8'
)
data
=
json
.
loads
(
raw_json
)
if
moissonneur
.
name
==
'pubmed'
:
count
=
sum
(
x
[
'count'
]
for
x
in
data
)
request
.
POST
[
'query'
]
=
raw_json
elif
moissonneur
.
name
==
'istex'
:
count
=
data
.
get
(
'total'
,
0
)
else
:
count
=
data
.
get
(
'results_nb'
,
0
)
if
count
>
0
:
corpus
=
moissonneur
.
save
(
request
,
project
.
id
,
return_corpus
=
True
)
else
:
return
None
except
(
ValueError
,
Http404
)
as
e
:
raise
e
# Sometimes strange things happens...
if
corpus
.
name
!=
name
:
corpus
.
name
=
name
session
.
commit
()
return
corpus
moissonneurs/cern.py
View file @
fe23f25f
...
@@ -30,7 +30,7 @@ def query( request):
...
@@ -30,7 +30,7 @@ def query( request):
#ids = crawlerbot.get_ids(query)
#ids = crawlerbot.get_ids(query)
return
JsonHttpResponse
({
"results_nb"
:
crawlerbot
.
results_nb
})
return
JsonHttpResponse
({
"results_nb"
:
crawlerbot
.
results_nb
})
def
save
(
request
,
project_id
):
def
save
(
request
,
project_id
,
return_corpus
=
False
):
'''save'''
'''save'''
if
request
.
method
==
"POST"
:
if
request
.
method
==
"POST"
:
...
@@ -101,6 +101,9 @@ def save(request, project_id):
...
@@ -101,6 +101,9 @@ def save(request, project_id):
session
.
rollback
()
session
.
rollback
()
# --------------------------------------------
# --------------------------------------------
if
return_corpus
:
return
corpus
return
render
(
return
render
(
template_name
=
'pages/projects/wait.html'
,
template_name
=
'pages/projects/wait.html'
,
request
=
request
,
request
=
request
,
...
...
moissonneurs/hal.py
View file @
fe23f25f
...
@@ -33,7 +33,7 @@ def query( request):
...
@@ -33,7 +33,7 @@ def query( request):
print
(
results
)
print
(
results
)
return
JsonHttpResponse
({
"results_nb"
:
crawlerbot
.
results_nb
})
return
JsonHttpResponse
({
"results_nb"
:
crawlerbot
.
results_nb
})
def
save
(
request
,
project_id
):
def
save
(
request
,
project_id
,
return_corpus
=
False
):
'''save'''
'''save'''
if
request
.
method
==
"POST"
:
if
request
.
method
==
"POST"
:
...
@@ -103,6 +103,9 @@ def save(request, project_id):
...
@@ -103,6 +103,9 @@ def save(request, project_id):
session
.
rollback
()
session
.
rollback
()
# --------------------------------------------
# --------------------------------------------
if
return_corpus
:
return
corpus
return
render
(
return
render
(
template_name
=
'pages/projects/wait.html'
,
template_name
=
'pages/projects/wait.html'
,
request
=
request
,
request
=
request
,
...
...
moissonneurs/isidore.py
View file @
fe23f25f
...
@@ -29,7 +29,7 @@ def query( request):
...
@@ -29,7 +29,7 @@ def query( request):
#ids = crawlerbot.get_ids(query)
#ids = crawlerbot.get_ids(query)
return
JsonHttpResponse
({
"results_nb"
:
crawlerbot
.
results_nb
})
return
JsonHttpResponse
({
"results_nb"
:
crawlerbot
.
results_nb
})
def
save
(
request
,
project_id
):
def
save
(
request
,
project_id
,
return_corpus
=
False
):
'''save'''
'''save'''
if
request
.
method
==
"POST"
:
if
request
.
method
==
"POST"
:
...
@@ -100,6 +100,9 @@ def save(request, project_id):
...
@@ -100,6 +100,9 @@ def save(request, project_id):
session
.
rollback
()
session
.
rollback
()
# --------------------------------------------
# --------------------------------------------
if
return_corpus
:
return
corpus
return
render
(
return
render
(
template_name
=
'pages/projects/wait.html'
,
template_name
=
'pages/projects/wait.html'
,
request
=
request
,
request
=
request
,
...
...
moissonneurs/istex.py
View file @
fe23f25f
...
@@ -52,7 +52,7 @@ def query( request ):
...
@@ -52,7 +52,7 @@ def query( request ):
def
save
(
request
,
project_id
):
def
save
(
request
,
project_id
,
return_corpus
=
False
):
print
(
"testISTEX:"
)
print
(
"testISTEX:"
)
print
(
request
.
method
)
print
(
request
.
method
)
alist
=
[
"bar"
,
"foo"
]
alist
=
[
"bar"
,
"foo"
]
...
@@ -171,6 +171,9 @@ def save(request , project_id):
...
@@ -171,6 +171,9 @@ def save(request , project_id):
session
.
rollback
()
session
.
rollback
()
# --------------------------------------------
# --------------------------------------------
if
return_corpus
:
return
corpus
return
render
(
return
render
(
template_name
=
'pages/projects/wait.html'
,
template_name
=
'pages/projects/wait.html'
,
request
=
request
,
request
=
request
,
...
...
moissonneurs/multivac.py
View file @
fe23f25f
...
@@ -33,7 +33,7 @@ def query( request):
...
@@ -33,7 +33,7 @@ def query( request):
print
(
results
)
print
(
results
)
return
JsonHttpResponse
({
"results_nb"
:
crawlerbot
.
results_nb
})
return
JsonHttpResponse
({
"results_nb"
:
crawlerbot
.
results_nb
})
def
save
(
request
,
project_id
):
def
save
(
request
,
project_id
,
return_corpus
=
False
):
'''save'''
'''save'''
if
request
.
method
==
"POST"
:
if
request
.
method
==
"POST"
:
...
@@ -104,6 +104,9 @@ def save(request, project_id):
...
@@ -104,6 +104,9 @@ def save(request, project_id):
session
.
rollback
()
session
.
rollback
()
# --------------------------------------------
# --------------------------------------------
if
return_corpus
:
return
corpus
return
render
(
return
render
(
template_name
=
'pages/projects/wait.html'
,
template_name
=
'pages/projects/wait.html'
,
request
=
request
,
request
=
request
,
...
...
moissonneurs/pubmed.py
View file @
fe23f25f
...
@@ -69,7 +69,7 @@ def query( request ):
...
@@ -69,7 +69,7 @@ def query( request ):
return
JsonHttpResponse
(
data
)
return
JsonHttpResponse
(
data
)
def
save
(
request
,
project_id
)
:
def
save
(
request
,
project_id
,
return_corpus
=
False
)
:
# implicit global session
# implicit global session
# do we have a valid project id?
# do we have a valid project id?
try
:
try
:
...
@@ -164,6 +164,10 @@ def save( request , project_id ) :
...
@@ -164,6 +164,10 @@ def save( request , project_id ) :
session
.
rollback
()
session
.
rollback
()
# --------------------------------------------
# --------------------------------------------
sleep
(
1
)
sleep
(
1
)
if
return_corpus
:
return
corpus
return
HttpResponseRedirect
(
'/projects/'
+
str
(
project_id
))
return
HttpResponseRedirect
(
'/projects/'
+
str
(
project_id
))
data
=
alist
data
=
alist
...
...
AdvancedTutorial
.ipynb
→
notebooks/.ipynb_checkpoints/AdvancedTutorial-checkpoint
.ipynb
View file @
fe23f25f
...
@@ -2,11 +2,38 @@
...
@@ -2,11 +2,38 @@
"cells": [
"cells": [
{
{
"cell_type": "markdown",
"cell_type": "markdown",
"metadata": {},
"metadata": {
"deletable": true,
"editable": true
},
"source": [
"source": [
"# Advanced Gargantext Tutorial (Python)"
"# Advanced Gargantext Tutorial (Python)"
]
]
},
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [
{
"ename": "TypeError",
"evalue": "'list' object is not callable",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------\u001b[0m",
"\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-3-a8e3501c9a54>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0msys\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0msys\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'/srv/gargantext'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;31mTypeError\u001b[0m: 'list' object is not callable"
]
}
],
"source": [
"import sys\n",
"sys.pa"
]
},
{
{
"cell_type": "code",
"cell_type": "code",
"execution_count": 1,
"execution_count": 1,
...
@@ -28,7 +55,9 @@
...
@@ -28,7 +55,9 @@
"cell_type": "code",
"cell_type": "code",
"execution_count": 8,
"execution_count": 8,
"metadata": {
"metadata": {
"collapsed": false
"collapsed": false,
"deletable": true,
"editable": true
},
},
"outputs": [
"outputs": [
{
{
...
...
notebooks/AdvancedTutorial.ipynb
0 → 100644
View file @
fe23f25f
This diff is collapsed.
Click to expand it.
notebooks/gargantext_core_tutorial.ipynb
0 → 100644
View file @
fe23f25f
This diff is collapsed.
Click to expand it.
templates/pages/projects/overview.html
View file @
fe23f25f
...
@@ -203,6 +203,7 @@
...
@@ -203,6 +203,7 @@
// do something…
// do something…
resetStatusForm
(
"#createForm"
);
resetStatusForm
(
"#createForm"
);
})
})
return
false
;
})
})
...
...
templates/pages/projects/project.html
View file @
fe23f25f
...
@@ -57,7 +57,7 @@
...
@@ -57,7 +57,7 @@
<center
id=
"corpus"
class=
"help"
>
<center
id=
"corpus"
class=
"help"
>
<a
data-toggle=
"modal"
href=
"#addcorpus"
>
<a
data-toggle=
"modal"
href=
"#addcorpus"
>
<button
<button
type=
"button"
type=
"button"
...
@@ -440,11 +440,12 @@
...
@@ -440,11 +440,12 @@
// in the form "Add a corpus"
// in the form "Add a corpus"
var
type
=
$
(
"#id_type"
).
val
()
var
type
=
$
(
"#id_type"
).
val
()
var
file
=
$
(
"#id_file"
).
val
()
// 5 booleans
// 5 booleans
var
nameField
=
$
(
"#id_name"
).
val
()
!=
""
var
nameField
=
$
(
"#id_name"
).
val
()
!=
""
var
typeField
=
(
type
!=
""
)
&&
(
type
!=
"0"
)
var
typeField
=
(
type
!=
""
)
&&
(
type
!=
"0"
)
var
fileField
=
$
(
"#id_file"
).
val
()
!=
""
var
fileField
=
file
!=
""
var
wantfileField
=
$
(
"#file_yes"
).
prop
(
"checked"
)
var
wantfileField
=
$
(
"#file_yes"
).
prop
(
"checked"
)
var
crawling
=
((
type
==
3
)
||
(
type
==
8
)
||
(
type
==
9
))
&&
!
wantfileField
var
crawling
=
((
type
==
3
)
||
(
type
==
8
)
||
(
type
==
9
))
&&
!
wantfileField
...
@@ -457,6 +458,23 @@
...
@@ -457,6 +458,23 @@
if
(
!
crawling
)
{
if
(
!
crawling
)
{
$
(
"#submit_thing"
).
prop
(
'disabled'
,
!
(
nameField
&&
typeField
&&
fileField
))
$
(
"#submit_thing"
).
prop
(
'disabled'
,
!
(
nameField
&&
typeField
&&
fileField
))
}
}
// Automatically select CSV when type is undefined
// and we have a .csv file
if
(
!
typeField
&&
file
&&
file
.
match
(
/.csv$/i
))
{
// Get CSV type id
var
csv
=
$
(
'#id_type > option'
)
.
filter
(
function
()
{
return
$
(
this
).
text
()
===
'CSV'
})
.
attr
(
'value'
)
// Select CSV type
$
(
'#id_type'
).
val
(
csv
)
// Focus on name field
setTimeout
(
function
()
{
$
(
"#id_name"
).
focus
()
})
}
}
}
function
bringDaNoise
()
{
function
bringDaNoise
()
{
...
@@ -532,7 +550,7 @@
...
@@ -532,7 +550,7 @@
$
(
"#submit_thing"
).
html
(
"Process a {{ query_size }} sample!"
)
$
(
"#submit_thing"
).
html
(
"Process a {{ query_size }} sample!"
)
thequeries
=
data
thequeries
=
data
var
N
=
0
,
k
=
0
;
var
N
=
0
;
for
(
var
i
in
thequeries
)
N
+=
thequeries
[
i
].
count
for
(
var
i
in
thequeries
)
N
+=
thequeries
[
i
].
count
if
(
N
>
0
)
{
if
(
N
>
0
)
{
...
@@ -571,12 +589,11 @@
...
@@ -571,12 +589,11 @@
$
(
"#submit_thing"
).
html
(
"Process a {{ query_size }} sample!"
)
$
(
"#submit_thing"
).
html
(
"Process a {{ query_size }} sample!"
)
thequeries
=
data
thequeries
=
data
var
N
=
data
.
length
,
k
=
0
;
var
N
=
data
.
total
;
// for(var i in thequeries) N += thequeries[i].count
if
(
N
>
1
)
{
if
(
N
>
0
)
{
var
total
=
JSON
.
parse
(
data
).
total
console
.
log
(
"N: "
+
N
)
console
.
log
(
"N: "
+
total
)
$
(
"#theresults"
).
html
(
"<i> <b>"
+
pubmedquery
+
"</b>: "
+
N
+
" publications.</i><br>"
)
$
(
"#theresults"
).
html
(
"<i> <b>"
+
pubmedquery
+
"</b>: "
+
total
+
" publications.</i><br>"
)
$
(
'#submit_thing'
).
prop
(
'disabled'
,
false
);
$
(
'#submit_thing'
).
prop
(
'disabled'
,
false
);
}
else
{
}
else
{
$
(
"#theresults"
).
html
(
"<i> <b>"
+
data
[
0
]
+
"</b></i><br>"
)
$
(
"#theresults"
).
html
(
"<i> <b>"
+
data
[
0
]
+
"</b></i><br>"
)
...
@@ -661,7 +678,7 @@
...
@@ -661,7 +678,7 @@
console
.
log
(
data
)
console
.
log
(
data
)
console
.
log
(
"SUCCESS"
)
console
.
log
(
"SUCCESS"
)
console
.
log
(
"enabling "
+
"#"
+
value
.
id
)
console
.
log
(
"enabling "
+
"#"
+
value
.
id
)
// $("#"+value.id).attr('onclick','getGlobalResults(this);');
// $("#"+value.id).attr('onclick','getGlobalResults(this);');
$
(
"#submit_thing"
).
prop
(
'disabled'
,
false
)
$
(
"#submit_thing"
).
prop
(
'disabled'
,
false
)
//$("#submit_thing").html("Process a {{ query_size }} sample!")
//$("#submit_thing").html("Process a {{ query_size }} sample!")
...
@@ -721,7 +738,7 @@
...
@@ -721,7 +738,7 @@
console
.
log
(
data
)
console
.
log
(
data
)
console
.
log
(
"SUCCESS"
)
console
.
log
(
"SUCCESS"
)
console
.
log
(
"enabling "
+
"#"
+
value
.
id
)
console
.
log
(
"enabling "
+
"#"
+
value
.
id
)
// $("#"+value.id).attr('onclick','getGlobalResults(this);');
// $("#"+value.id).attr('onclick','getGlobalResults(this);');
$
(
"#submit_thing"
).
prop
(
'disabled'
,
false
)
$
(
"#submit_thing"
).
prop
(
'disabled'
,
false
)
//$("#submit_thing").html("Process a {{ query_size }} sample!")
//$("#submit_thing").html("Process a {{ query_size }} sample!")
...
@@ -781,7 +798,7 @@
...
@@ -781,7 +798,7 @@
console
.
log
(
data
)
console
.
log
(
data
)
console
.
log
(
"SUCCESS"
)
console
.
log
(
"SUCCESS"
)
console
.
log
(
"enabling "
+
"#"
+
value
.
id
)
console
.
log
(
"enabling "
+
"#"
+
value
.
id
)
// $("#"+value.id).attr('onclick','getGlobalResults(this);');
// $("#"+value.id).attr('onclick','getGlobalResults(this);');
$
(
"#submit_thing"
).
prop
(
'disabled'
,
false
)
$
(
"#submit_thing"
).
prop
(
'disabled'
,
false
)
//$("#submit_thing").html("Process a {{ query_size }} sample!")
//$("#submit_thing").html("Process a {{ query_size }} sample!")
...
@@ -876,12 +893,12 @@
...
@@ -876,12 +893,12 @@
console
.
log
(
"selected:"
,
selectedId
);
console
.
log
(
"selected:"
,
selectedId
);
// by typeID: 3 = PUBMED, 8 = ISTEX, 9 = CERN
// by typeID: 3 = PUBMED, 8 = ISTEX, 9 = CERN
if
(
selectedId
==
"3"
if
(
selectedId
==
"3"
||
selectedId
==
"8"
||
selectedId
==
"8"
||
selectedId
==
"9"
||
selectedId
==
"9"
||
selectedId
==
"10"
||
selectedId
==
"10"
||
selectedId
==
"11"
||
selectedId
==
"11"
||
selectedId
==
"12"
||
selectedId
==
"12"
)
{
)
{
console
.
log
(
"show the button for: "
+
selectedId
)
console
.
log
(
"show the button for: "
+
selectedId
)
$
(
"#div-fileornot"
).
css
(
"visibility"
,
"visible"
);
$
(
"#div-fileornot"
).
css
(
"visibility"
,
"visible"
);
...
@@ -1019,16 +1036,16 @@
...
@@ -1019,16 +1036,16 @@
function
saveMultivac
(
query
,
N
){
function
saveMultivac
(
query
,
N
){
console
.
log
(
"In Multivac"
)
console
.
log
(
"In Multivac"
)
if
(
!
query
||
query
==
""
)
return
;
if
(
!
query
||
query
==
""
)
return
;
console
.
log
(
query
)
console
.
log
(
query
)
//var origQuery = query
//var origQuery = query
var
data
=
{
"query"
:
query
,
"N"
:
N
};
var
data
=
{
"query"
:
query
,
"N"
:
N
};
// Replace all the slashes
// Replace all the slashes
var
projectid
=
window
.
location
.
href
.
split
(
"projects"
)[
1
].
replace
(
/
\/
/g
,
''
)
var
projectid
=
window
.
location
.
href
.
split
(
"projects"
)[
1
].
replace
(
/
\/
/g
,
''
)
console
.
log
(
data
)
console
.
log
(
data
)
$
.
ajax
({
$
.
ajax
({
dataType
:
'json'
,
dataType
:
'json'
,
...
@@ -1066,16 +1083,16 @@
...
@@ -1066,16 +1083,16 @@
function
save
(
query
,
N
,
urlGarg
){
function
save
(
query
,
N
,
urlGarg
){
console
.
log
(
"In Gargantext"
)
console
.
log
(
"In Gargantext"
)
if
(
!
query
||
query
==
""
)
return
;
if
(
!
query
||
query
==
""
)
return
;
console
.
log
(
query
)
console
.
log
(
query
)
//var origQuery = query
//var origQuery = query
var
data
=
{
"query"
:
query
,
"N"
:
N
};
var
data
=
{
"query"
:
query
,
"N"
:
N
};
// Replace all the slashes
// Replace all the slashes
var
projectid
=
window
.
location
.
href
.
split
(
"projects"
)[
1
].
replace
(
/
\/
/g
,
''
)
var
projectid
=
window
.
location
.
href
.
split
(
"projects"
)[
1
].
replace
(
/
\/
/g
,
''
)
console
.
log
(
data
)
console
.
log
(
data
)
$
.
ajax
({
$
.
ajax
({
dataType
:
'json'
,
dataType
:
'json'
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment