Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
humanities
gargantext
Commits
b7d18edb
Commit
b7d18edb
authored
Jan 04, 2016
by
PkSM3
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'unstable' of
ssh://delanoe.org:1979/gargantext
into samuel
parents
00afa364
daf4e9d3
Changes
13
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
13 changed files
with
502 additions
and
200 deletions
+502
-200
document.js
annotations/static/annotations/document.js
+6
-1
main.html
annotations/templates/annotations/main.html
+10
-0
urls.py
annotations/urls.py
+6
-0
views.py
annotations/views.py
+4
-1
exec.py
exec.py
+11
-5
en.txt
init/stop_lists/en.txt
+13
-1
init_accounts.py
init_accounts.py
+6
-4
importExport.py
ngram/importExport.py
+301
-87
EuropressFileParser.py
parsing/FileParsers/EuropressFileParser.py
+128
-100
IsiFileParser.py
parsing/FileParsers/IsiFileParser.py
+1
-0
RisFileParser.py
parsing/FileParsers/RisFileParser.py
+1
-0
ZoteroFileParser.py
parsing/FileParsers/ZoteroFileParser.py
+1
-0
NgramsExtractor.py
parsing/NgramsExtractors/NgramsExtractor.py
+14
-1
No files found.
annotations/static/annotations/document.js
View file @
b7d18edb
...
@@ -2,10 +2,13 @@
...
@@ -2,10 +2,13 @@
'use strict'
;
'use strict'
;
var
annotationsAppDocument
=
angular
.
module
(
'annotationsAppDocument'
,
[
'annotationsAppHttp'
]);
var
annotationsAppDocument
=
angular
.
module
(
'annotationsAppDocument'
,
[
'annotationsAppHttp'
]);
annotationsAppDocument
.
controller
(
'DocController'
,
annotationsAppDocument
.
controller
(
'DocController'
,
[
'$scope'
,
'$rootScope'
,
'$timeout'
,
'NgramListHttpService'
,
'DocumentHttpService'
,
[
'$scope'
,
'$rootScope'
,
'$timeout'
,
'NgramListHttpService'
,
'DocumentHttpService'
,
function
(
$scope
,
$rootScope
,
$timeout
,
NgramListHttpService
,
DocumentHttpService
)
{
function
(
$scope
,
$rootScope
,
$timeout
,
NgramListHttpService
,
DocumentHttpService
)
{
// dataLoading = signal pour afficher wait
$scope
.
dataLoading
=
true
;
$rootScope
.
documentResource
=
DocumentHttpService
.
get
(
$rootScope
.
documentResource
=
DocumentHttpService
.
get
(
{
'docId'
:
$rootScope
.
docId
},
{
'docId'
:
$rootScope
.
docId
},
function
(
data
,
responseHeaders
)
{
function
(
data
,
responseHeaders
)
{
...
@@ -27,6 +30,7 @@
...
@@ -27,6 +30,7 @@
function
(
data
)
{
function
(
data
)
{
$rootScope
.
annotations
=
data
[
$rootScope
.
corpusId
.
toString
()][
$rootScope
.
docId
.
toString
()];
$rootScope
.
annotations
=
data
[
$rootScope
.
corpusId
.
toString
()][
$rootScope
.
docId
.
toString
()];
$rootScope
.
lists
=
data
[
$rootScope
.
corpusId
.
toString
()].
lists
;
$rootScope
.
lists
=
data
[
$rootScope
.
corpusId
.
toString
()].
lists
;
$scope
.
dataLoading
=
false
;
},
},
function
(
data
)
{
function
(
data
)
{
console
.
error
(
"unable to get the list of ngrams"
);
console
.
error
(
"unable to get the list of ngrams"
);
...
@@ -34,6 +38,7 @@
...
@@ -34,6 +38,7 @@
);
);
});
});
// TODO setup article pagination
// TODO setup article pagination
$scope
.
onPreviousClick
=
function
()
{
$scope
.
onPreviousClick
=
function
()
{
DocumentHttpService
.
get
(
$scope
.
docId
-
1
);
DocumentHttpService
.
get
(
$scope
.
docId
-
1
);
...
...
annotations/templates/annotations/main.html
View file @
b7d18edb
...
@@ -86,6 +86,16 @@
...
@@ -86,6 +86,16 @@
<li
class=
"list-group-item small"
><span
class=
"badge"
>
date
</span>
{[{publication_date}]}
</li>
<li
class=
"list-group-item small"
><span
class=
"badge"
>
date
</span>
{[{publication_date}]}
</li>
</ul>
</ul>
</div>
</div>
<div
ng-if=
"dataLoading"
>
Loading text...
<br>
<center>
<img
width=
"10%"
src=
"{% static 'img/ajax-loader.gif'%}"
></img>
</center>
<br>
</div>
<div
ng-if=
"abstract_text != null"
>
<div
ng-if=
"abstract_text != null"
>
<span
class=
"badge"
>
abstract
</span>
<span
class=
"badge"
>
abstract
</span>
</div>
</div>
...
...
annotations/urls.py
View file @
b7d18edb
...
@@ -2,9 +2,15 @@ from django.conf.urls import patterns, url
...
@@ -2,9 +2,15 @@ from django.conf.urls import patterns, url
from
annotations
import
views
from
annotations
import
views
# /!\ urls patterns here are *without* the trailing slash
urlpatterns
=
patterns
(
''
,
urlpatterns
=
patterns
(
''
,
# json:title,id,authors,journal,
# publication_date
# abstract_text,full_text
url
(
r'^document/(?P<doc_id>[0-9]+)$'
,
views
.
Document
.
as_view
()),
# document view
url
(
r'^document/(?P<doc_id>[0-9]+)$'
,
views
.
Document
.
as_view
()),
# document view
url
(
r'^corpus/(?P<corpus_id>[0-9]+)/document/(?P<doc_id>[0-9]+)$'
,
views
.
NgramList
.
as_view
()),
# the list associated with an ngram
url
(
r'^corpus/(?P<corpus_id>[0-9]+)/document/(?P<doc_id>[0-9]+)$'
,
views
.
NgramList
.
as_view
()),
# the list associated with an ngram
url
(
r'^lists/(?P<list_id>[0-9]+)/ngrams/(?P<ngram_ids>[0-9,\+]+)+$'
,
views
.
NgramEdit
.
as_view
()),
url
(
r'^lists/(?P<list_id>[0-9]+)/ngrams/(?P<ngram_ids>[0-9,\+]+)+$'
,
views
.
NgramEdit
.
as_view
()),
# POST (fixed 2015-12-16)
url
(
r'^lists/(?P<list_id>[0-9]+)/ngrams/create$'
,
views
.
NgramCreate
.
as_view
()),
#
url
(
r'^lists/(?P<list_id>[0-9]+)/ngrams/create$'
,
views
.
NgramCreate
.
as_view
()),
#
)
)
annotations/views.py
View file @
b7d18edb
...
@@ -13,7 +13,7 @@ from rest_framework.exceptions import APIException
...
@@ -13,7 +13,7 @@ from rest_framework.exceptions import APIException
from
rest_framework.authentication
import
SessionAuthentication
,
BasicAuthentication
from
rest_framework.authentication
import
SessionAuthentication
,
BasicAuthentication
from
node.models
import
Node
from
node.models
import
Node
from
gargantext_web.db
import
session
,
cache
,
Node
,
NodeNgram
from
gargantext_web.db
import
session
,
cache
,
Node
,
NodeNgram
,
Ngram
from
ngram.lists
import
listIds
,
listNgramIds
from
ngram.lists
import
listIds
,
listNgramIds
from
gargantext_web.db
import
get_or_create_node
from
gargantext_web.db
import
get_or_create_node
...
@@ -138,6 +138,8 @@ class NgramCreate(APIView):
...
@@ -138,6 +138,8 @@ class NgramCreate(APIView):
def
post
(
self
,
request
,
list_id
):
def
post
(
self
,
request
,
list_id
):
"""
"""
create NGram in a given list
create NGram in a given list
example: request.data = {'text': 'phylogeny'}
"""
"""
list_id
=
int
(
list_id
)
list_id
=
int
(
list_id
)
# format the ngram's text
# format the ngram's text
...
@@ -161,6 +163,7 @@ class NgramCreate(APIView):
...
@@ -161,6 +163,7 @@ class NgramCreate(APIView):
ngram_id
=
ngram
.
id
ngram_id
=
ngram
.
id
# create the new node_ngram relation
# create the new node_ngram relation
# TODO check existing Node_Ngram ?
# TODO check existing Node_Ngram ?
# £TODO ici indexation
node_ngram
=
NodeNgram
(
node_id
=
list_id
,
ngram_id
=
ngram_id
,
weight
=
1.0
)
node_ngram
=
NodeNgram
(
node_id
=
list_id
,
ngram_id
=
ngram_id
,
weight
=
1.0
)
session
.
add
(
node_ngram
)
session
.
add
(
node_ngram
)
session
.
commit
()
session
.
commit
()
...
...
exec.py
View file @
b7d18edb
...
@@ -11,8 +11,10 @@ from ngram.mapList import compute_mapList
...
@@ -11,8 +11,10 @@ from ngram.mapList import compute_mapList
from
gargantext_web.db
import
NodeNgram
from
gargantext_web.db
import
NodeNgram
from
admin.utils
import
WorkflowTracking
from
admin.utils
import
WorkflowTracking
from
ngram.importExport
import
exportNgramList
,
importNgramList
from
ngram.importExport
import
exportNgramList
,
importNgramList
from
analysis.periods
import
phylo_clusters
from
ngram.occurrences
import
compute_occs
def
ngram_workflow
(
corpus
,
n
=
5000
):
def
ngram_workflow
(
corpus
,
n
=
5000
):
'''
'''
...
@@ -50,13 +52,17 @@ def ngram_workflow(corpus, n=5000):
...
@@ -50,13 +52,17 @@ def ngram_workflow(corpus, n=5000):
# update_state.processing_(corpus, "TF-IDF local score")
# update_state.processing_(corpus, "TF-IDF local score")
# compute_tfidf(corpus)
# compute_tfidf(corpus)
# update_state.processing_(corpus, "OCCS local score")
# update_state.processing_(corpus, "OCCS local score")
#
compute_occs(corpus)
compute_occs
(
corpus
)
update_state
.
processing_
(
corpus
,
"0"
)
#
update_state.processing_(corpus, "0")
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
node_id
=
sys
.
argv
[
1
]
node_id
=
sys
.
argv
[
1
]
corpus
=
session
.
query
(
Node
)
.
filter
(
Node
.
id
==
node_id
)
.
first
()
corpus
=
session
.
query
(
Node
)
.
filter
(
Node
.
id
==
node_id
)
.
first
()
exportNgramList
(
corpus
,
"list.csv"
)
#importNgramList(corpus, "list.csv")
#ngram_workflow(corpus)
#ngram_workflow(corpus)
#exportNgramList(corpus, "list.csv")
#importNgramList(corpus, "list.csv")
phylo_clusters
(
corpus
,
range
(
2012
,
2016
))
init/stop_lists/en.txt
View file @
b7d18edb
-
-
...
@@ -462,3 +461,16 @@ your
...
@@ -462,3 +461,16 @@ your
yours
yours
yourself
yourself
yourselves
yourselves
Francis group
© taylor
copyright taylor
copyright © taylor
copyright ©
springer science+business media
sage publications
oxford university press
© springer international publishing switzerland
john wiley
elsevier ltd
© fpi
elsevier inc
init_accounts.py
View file @
b7d18edb
...
@@ -14,16 +14,18 @@ def notify_user(username, email, password):
...
@@ -14,16 +14,18 @@ def notify_user(username, email, password):
Votre login est:
%
s
Votre login est:
%
s
Votre mot de passe est :
%
s
Votre mot de passe est :
%
s
Nous restons votre disposition pour tout complément d'information.
En janvier prochain, il y aura une formation Gargantext (gratuite).
Inscription obligatoire pour les dernière places:
http://iscpif.fr/event/formation-gargantext/
Nous restons à votre disposition pour tout complément d'information.
Cordialement
Cordialement
--
--
L'équipe de Gargantext (CNRS)
L'équipe de Gargantext (CNRS)
'''
%
(
username
,
password
)
'''
%
(
username
,
password
)
send_mail
(
'[Gargantext] Votre compte'
,
message
,
'alexandre.delanoe@mines-paristech.fr'
,
[
email
],
fail_silently
=
False
)
send_mail
(
'[Gargantext] Votre accès à la plateforme'
,
message
,
'alexandre.delanoe@iscpif.fr'
,
[
email
],
fail_silently
=
False
)
#send_mail('[Gargantext] Votre compte', message, 'alexandre.delanoe@mines-paristech.fr', [email], ['alexandre@delanoe.org'] )
# add option for mass sending email
# add option for mass sending email
...
...
ngram/importExport.py
View file @
b7d18edb
This diff is collapsed.
Click to expand it.
parsing/FileParsers/EuropressFileParser.py
View file @
b7d18edb
This diff is collapsed.
Click to expand it.
parsing/FileParsers/IsiFileParser.py
View file @
b7d18edb
...
@@ -15,6 +15,7 @@ class IsiFileParser(RisFileParser):
...
@@ -15,6 +15,7 @@ class IsiFileParser(RisFileParser):
b
"TI"
:
{
"type"
:
"hyperdata"
,
"key"
:
"title"
,
"separator"
:
" "
},
b
"TI"
:
{
"type"
:
"hyperdata"
,
"key"
:
"title"
,
"separator"
:
" "
},
b
"AU"
:
{
"type"
:
"hyperdata"
,
"key"
:
"authors"
,
"separator"
:
", "
},
b
"AU"
:
{
"type"
:
"hyperdata"
,
"key"
:
"authors"
,
"separator"
:
", "
},
b
"DI"
:
{
"type"
:
"hyperdata"
,
"key"
:
"doi"
},
b
"DI"
:
{
"type"
:
"hyperdata"
,
"key"
:
"doi"
},
b
"SO"
:
{
"type"
:
"hyperdata"
,
"key"
:
"journal"
},
b
"PY"
:
{
"type"
:
"hyperdata"
,
"key"
:
"publication_year"
},
b
"PY"
:
{
"type"
:
"hyperdata"
,
"key"
:
"publication_year"
},
b
"PD"
:
{
"type"
:
"hyperdata"
,
"key"
:
"publication_month"
},
b
"PD"
:
{
"type"
:
"hyperdata"
,
"key"
:
"publication_month"
},
b
"LA"
:
{
"type"
:
"hyperdata"
,
"key"
:
"language_fullname"
},
b
"LA"
:
{
"type"
:
"hyperdata"
,
"key"
:
"language_fullname"
},
...
...
parsing/FileParsers/RisFileParser.py
View file @
b7d18edb
...
@@ -19,6 +19,7 @@ class RisFileParser(FileParser):
...
@@ -19,6 +19,7 @@ class RisFileParser(FileParser):
b
"TI"
:
{
"type"
:
"hyperdata"
,
"key"
:
"title"
,
"separator"
:
" "
},
b
"TI"
:
{
"type"
:
"hyperdata"
,
"key"
:
"title"
,
"separator"
:
" "
},
b
"ST"
:
{
"type"
:
"hyperdata"
,
"key"
:
"subtitle"
,
"separator"
:
" "
},
b
"ST"
:
{
"type"
:
"hyperdata"
,
"key"
:
"subtitle"
,
"separator"
:
" "
},
b
"AU"
:
{
"type"
:
"hyperdata"
,
"key"
:
"authors"
,
"separator"
:
", "
},
b
"AU"
:
{
"type"
:
"hyperdata"
,
"key"
:
"authors"
,
"separator"
:
", "
},
b
"T2"
:
{
"type"
:
"hyperdata"
,
"key"
:
"journal"
},
b
"UR"
:
{
"type"
:
"hyperdata"
,
"key"
:
"doi"
},
b
"UR"
:
{
"type"
:
"hyperdata"
,
"key"
:
"doi"
},
b
"PY"
:
{
"type"
:
"hyperdata"
,
"key"
:
"publication_year"
},
b
"PY"
:
{
"type"
:
"hyperdata"
,
"key"
:
"publication_year"
},
b
"PD"
:
{
"type"
:
"hyperdata"
,
"key"
:
"publication_month"
},
b
"PD"
:
{
"type"
:
"hyperdata"
,
"key"
:
"publication_month"
},
...
...
parsing/FileParsers/ZoteroFileParser.py
View file @
b7d18edb
...
@@ -12,6 +12,7 @@ class ZoteroFileParser(RisFileParser):
...
@@ -12,6 +12,7 @@ class ZoteroFileParser(RisFileParser):
b
"ER"
:
{
"type"
:
"delimiter"
},
b
"ER"
:
{
"type"
:
"delimiter"
},
b
"TI"
:
{
"type"
:
"hyperdata"
,
"key"
:
"title"
,
"separator"
:
" "
},
b
"TI"
:
{
"type"
:
"hyperdata"
,
"key"
:
"title"
,
"separator"
:
" "
},
b
"AU"
:
{
"type"
:
"hyperdata"
,
"key"
:
"authors"
,
"separator"
:
", "
},
b
"AU"
:
{
"type"
:
"hyperdata"
,
"key"
:
"authors"
,
"separator"
:
", "
},
b
"T2"
:
{
"type"
:
"hyperdata"
,
"key"
:
"journal"
},
b
"UR"
:
{
"type"
:
"hyperdata"
,
"key"
:
"doi"
},
b
"UR"
:
{
"type"
:
"hyperdata"
,
"key"
:
"doi"
},
b
"DA"
:
{
"type"
:
"hyperdata"
,
"key"
:
"publication_date_to_parse"
},
b
"DA"
:
{
"type"
:
"hyperdata"
,
"key"
:
"publication_date_to_parse"
},
b
"PY"
:
{
"type"
:
"hyperdata"
,
"key"
:
"publication_year"
},
b
"PY"
:
{
"type"
:
"hyperdata"
,
"key"
:
"publication_year"
},
...
...
parsing/NgramsExtractors/NgramsExtractor.py
View file @
b7d18edb
# from ..Taggers import NltkTagger
# from ..Taggers import NltkTagger
from
..Taggers
import
TurboTagger
from
..Taggers
import
TurboTagger
import
nltk
import
nltk
from
re
import
sub
"""Base class for all ngrams extractors.
"""Base class for all ngrams extractors.
...
@@ -33,9 +34,21 @@ class NgramsExtractor:
...
@@ -33,9 +34,21 @@ class NgramsExtractor:
Returns a list of the ngrams found in the given text.
Returns a list of the ngrams found in the given text.
"""
"""
def
extract_ngrams
(
self
,
contents
):
def
extract_ngrams
(
self
,
contents
):
tagged_tokens
=
list
(
self
.
tagger
.
tag_text
(
contents
))
clean_contents
=
self
.
_prepare_text
(
contents
)
# ici tagging
tagged_tokens
=
list
(
self
.
tagger
.
tag_text
(
clean_contents
))
if
len
(
tagged_tokens
):
if
len
(
tagged_tokens
):
grammar_parsed
=
self
.
_grammar
.
parse
(
tagged_tokens
)
grammar_parsed
=
self
.
_grammar
.
parse
(
tagged_tokens
)
for
subtree
in
grammar_parsed
.
subtrees
():
for
subtree
in
grammar_parsed
.
subtrees
():
if
subtree
.
label
()
==
self
.
_label
:
if
subtree
.
label
()
==
self
.
_label
:
yield
subtree
.
leaves
()
yield
subtree
.
leaves
()
@
staticmethod
def
_prepare_text
(
text_contents
):
"""
Clean the text for better POS tagging
"""
# strip xml tags
return
sub
(
r"<[^>]{0,45}>"
,
""
,
text_contents
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment