Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
humanities
gargantext
Commits
2b82c054
Commit
2b82c054
authored
May 11, 2016
by
Romain Loth
Browse files
Options
Browse Files
Download
Plain Diff
merge in refactoring
parents
8499ab9a
5e7c5603
Changes
15
Hide whitespace changes
Inline
Side-by-side
Showing
15 changed files
with
109 additions
and
64 deletions
+109
-64
constants.py
gargantext/constants.py
+4
-4
CSV.py
gargantext/util/parsers/CSV.py
+2
-1
list_map.py
gargantext/util/toolchain/list_map.py
+5
-5
list_stop.py
gargantext/util/toolchain/list_stop.py
+2
-2
metric_specificity.py
gargantext/util/toolchain/metric_specificity.py
+18
-6
metric_tfidf.py
gargantext/util/toolchain/metric_tfidf.py
+9
-1
nodes.py
gargantext/views/api/nodes.py
+3
-2
Docs_dyna_chart_and_table.js
static/lib/gargantext/Docs_dyna_chart_and_table.js
+2
-2
Journals_dyna_chart_and_table.js
static/lib/gargantext/Journals_dyna_chart_and_table.js
+1
-1
journals.html
templates/pages/corpora/journals.html
+6
-3
terms.html
templates/pages/corpora/terms.html
+5
-2
titles.html
templates/pages/corpora/titles.html
+6
-3
about.html
templates/pages/main/about.html
+1
-1
menu.html
templates/pages/menu.html
+41
-29
project.html
templates/pages/projects/project.html
+4
-2
No files found.
gargantext/constants.py
View file @
2b82c054
...
...
@@ -195,17 +195,17 @@ RESOURCETYPES = [
]
# linguistic extraction parameters ---------------------------------------------
DEFAULT_TFIDF_CUTOFF_RATIO
=
.
4
5
# MAINLIST maximum terms in %
DEFAULT_TFIDF_CUTOFF_RATIO
=
.
7
5
# MAINLIST maximum terms in %
DEFAULT_TFIDF_HARD_LIMIT
=
750
# MAINLIST maximum terms abs
DEFAULT_TFIDF_HARD_LIMIT
=
5000
# MAINLIST maximum terms abs
# (makes COOCS larger ~ O(N²) /!\)
DEFAULT_COOC_THRESHOLD
=
2
# inclusive minimum for COOCS coefs
# (makes COOCS more sparse)
DEFAULT_MAPLIST_MAX
=
3
0
0
# MAPLIST maximum terms
DEFAULT_MAPLIST_MAX
=
3
5
0
# MAPLIST maximum terms
DEFAULT_MAPLIST_MONOGRAMS_RATIO
=
.5
# part of monograms in MAPLIST
DEFAULT_MAPLIST_MONOGRAMS_RATIO
=
.
1
5
# part of monograms in MAPLIST
DEFAULT_MAX_NGRAM_LEN
=
7
# limit used after POStagging rule
# (initial ngrams number is a power law of this /!\)
...
...
gargantext/util/parsers/CSV.py
View file @
2b82c054
...
...
@@ -124,7 +124,8 @@ class CSVParser(Parser):
for
columnum
in
range
(
Coords
[
"column"
],
len
(
tokens
)
):
data
=
tokens
[
columnum
]
RecordDict
[
Headers_Int2Str
[
columnum
]
]
=
data
hyperdata_list
.
append
(
RecordDict
)
if
len
(
RecordDict
.
keys
())
>
0
:
hyperdata_list
.
append
(
RecordDict
)
# # = = = = [ / Reading the whole CSV and saving ] = = = = #
return
hyperdata_list
gargantext/util/toolchain/list_map.py
View file @
2b82c054
...
...
@@ -7,7 +7,7 @@ from gargantext.models.ngrams import Node, Ngram, NodeNgram, \
from
gargantext.util.db
import
session
,
aliased
,
func
from
gargantext.util.db_cache
import
cache
from
gargantext.util.lists
import
UnweightedList
from
sqlalchemy
import
desc
from
sqlalchemy
import
desc
,
asc
from
gargantext.constants
import
DEFAULT_MAPLIST_MAX
,
\
DEFAULT_MAPLIST_MONOGRAMS_RATIO
...
...
@@ -52,7 +52,7 @@ def do_maplist(corpus,
primary_groupterms_subquery
=
(
session
# we want only primary terms (ngram1)
.
query
(
NodeNgramNgram
.
ngram
1
_id
)
.
query
(
NodeNgramNgram
.
ngram
2
_id
)
.
filter
(
NodeNgramNgram
.
node_id
==
grouplist_id
)
.
subquery
()
)
...
...
@@ -64,13 +64,13 @@ def do_maplist(corpus,
.
join
(
Ngram
,
Ngram
.
id
==
ScoreSpec
.
ngram_id
)
.
filter
(
ScoreSpec
.
node_id
==
specificity_id
)
.
filter
(
ScoreSpec
.
ngram_id
.
in_
(
mainterms_subquery
))
.
filter
(
ScoreSpec
.
ngram_id
.
in_
(
primary_groupterms_subquery
))
.
filter
(
ScoreSpec
.
ngram_id
.
not
in_
(
primary_groupterms_subquery
))
)
# TODO: move these 2 pools up to mainlist selection
top_monograms
=
(
query
.
filter
(
Ngram
.
n
==
1
)
.
order_by
(
de
sc
(
ScoreSpec
.
weight
))
.
order_by
(
a
sc
(
ScoreSpec
.
weight
))
.
limit
(
monograms_limit
)
.
all
()
)
...
...
@@ -81,7 +81,7 @@ def do_maplist(corpus,
.
limit
(
multigrams_limit
)
.
all
()
)
obtained_mono
=
len
(
top_monograms
)
obtained_mono
=
len
(
top_monograms
)
obtained_multi
=
len
(
top_multigrams
)
obtained_total
=
obtained_mono
+
obtained_multi
# print("MAPLIST: top_monograms =", obtained_mono)
...
...
gargantext/util/toolchain/list_stop.py
View file @
2b82c054
...
...
@@ -27,10 +27,10 @@ def is_stop_word(ngram, stop_words=None):
# , "(.*)(\.)(.*)" trop fort (enlève les sigles !)
,
"(.*)(
\
,)(.*)"
,
"(.*)(< ?/?p ?>)(.*)"
# marques de paragraphes
,
"(.*)(study)(.*)"
,
"(.*)(study
|elsevier
)(.*)"
,
"(.*)
\b
(xx|xi|xv)
\b
(.*)"
,
"(.*)(result)(.*)"
,
"(.*)(année|nombre|moitié)(.*)"
,
"(.*)(
year|
année|nombre|moitié)(.*)"
,
"(.*)(temps)(.*)"
,
"(.*)(
%
)(.*)"
,
"(.*)(
\
{)(.*)"
...
...
gargantext/util/toolchain/metric_specificity.py
View file @
2b82c054
...
...
@@ -7,6 +7,7 @@ from gargantext.util.db import session, aliased, func, bulk_insert
from
gargantext.util.lists
import
WeightedList
from
collections
import
defaultdict
from
pandas
import
DataFrame
import
pandas
as
pd
def
compute_specificity
(
corpus
,
cooc_id
=
None
,
overwrite_id
=
None
):
'''
...
...
@@ -33,13 +34,23 @@ def compute_specificity(corpus, cooc_id=None, overwrite_id = None):
print
(
"SPECIFICITY: computing on
%
i ngrams"
%
nb_ngrams
)
d
=
DataFrame
(
matrix
)
.
fillna
(
0
)
x
=
DataFrame
(
matrix
)
.
fillna
(
0
)
# proba (x/y) ( <= on divise chaque
colon
ne par son total)
d
=
d
/
d
.
sum
(
axis
=
0
)
# proba (x/y) ( <= on divise chaque
lig
ne par son total)
x
=
x
/
x
.
sum
(
axis
=
1
)
# vectorisation
# d:Matrix => v: Vector (len = nb_ngrams)
v
=
d
.
sum
(
axis
=
1
)
# v = d.sum(axis=1) (- lui-même)
xs
=
x
.
sum
(
axis
=
1
)
-
x
ys
=
x
.
sum
(
axis
=
0
)
-
x
# top inclus ou exclus
#n = ( xs + ys) / (2 * (x.shape[0] - 1))
# top generic or specific (asc is spec, desc is generic)
v
=
(
xs
-
ys
)
/
(
2
*
(
x
.
shape
[
0
]
-
1
))
## d ##
#######
...
...
@@ -66,7 +77,7 @@ def compute_specificity(corpus, cooc_id=None, overwrite_id = None):
# pour l'instant on va utiliser les sommes en ligne comme ranking de spécificité
# (**même** ordre qu'avec la formule d'avant le refactoring mais calcul + simple)
# TODO analyser la cohérence math ET sem de cet indicateur
v
.
sort_values
(
inplace
=
True
)
#
v.sort_values(inplace=True)
# [ ('biodiversité' , 0.333 ),
# ('Grenelle' , 0.5 ),
...
...
@@ -92,10 +103,11 @@ def compute_specificity(corpus, cooc_id=None, overwrite_id = None):
the_id
=
specnode
.
id
# print(v)
pd
.
options
.
display
.
float_format
=
'${:,.2f}'
.
format
data
=
WeightedList
(
zip
(
v
.
index
.
tolist
()
,
v
.
values
.
tolist
()
,
v
.
values
.
tolist
()
[
0
]
)
)
data
.
save
(
the_id
)
...
...
gargantext/util/toolchain/metric_tfidf.py
View file @
2b82c054
...
...
@@ -8,7 +8,7 @@ FIXME: "having the same source" means we need to select inside hyperdata
with a (perhaps costly) JSON query: WHERE hyperdata->'resources' @> ...
"""
from
gargantext.models
import
Node
,
NodeNgram
,
NodeNodeNgram
from
gargantext.models
import
Node
,
NodeNgram
,
NodeNodeNgram
,
NodeNgramNgram
from
gargantext.util.db
import
session
,
bulk_insert
,
func
# = sqlalchemy.func like sum() or count()
from
sqlalchemy
import
text
# for query from raw SQL statement
from
math
import
log
...
...
@@ -29,6 +29,13 @@ def compute_occs(corpus, overwrite_id = None):
- overwrite_id: optional id of a pre-existing OCCURRENCES node for this corpus
(the Node and its previous NodeNodeNgram rows will be replaced)
"""
# 0) Get the groups
group_id
=
(
session
.
query
(
Node
.
id
)
.
filter
(
Node
.
parent_id
==
corpus
.
id
)
.
filter
(
Node
.
typename
==
"GROUPLIST"
)
.
first
()
)
# 1) all the doc_ids of our corpus (scope of counts for filter)
# slower alternative: [doc.id for doc in corpus.children('DOCUMENT').all()]
...
...
@@ -45,6 +52,7 @@ def compute_occs(corpus, overwrite_id = None):
NodeNgram
.
ngram_id
,
func
.
sum
(
NodeNgram
.
weight
)
)
#.join(NodeNgramNgram, NodeNgramNgram.node_id == group_id)
.
filter
(
NodeNgram
.
node_id
.
in_
(
docids_subquery
))
.
group_by
(
NodeNgram
.
ngram_id
)
.
all
()
...
...
gargantext/views/api/nodes.py
View file @
2b82c054
...
...
@@ -84,9 +84,10 @@ class NodeListResource(APIView):
response
=
HttpResponse
(
content_type
=
'text/csv'
)
response
[
'Content-Disposition'
]
=
'attachment; filename="Gargantext_Corpus.csv"'
writer
=
csv
.
writer
(
response
)
writer
=
csv
.
writer
(
response
,
delimiter
=
'
\t
'
)
keys
=
[
'title'
,
'journal'
,
'publication_date'
keys
=
[
'title'
,
'journal'
,
'publication_year'
,
'publication_month'
,
'publication_day'
,
'abstract'
,
'authors'
]
writer
.
writerow
(
keys
)
...
...
static/lib/gargantext/Docs_dyna_chart_and_table.js
View file @
2b82c054
...
...
@@ -170,7 +170,7 @@ function toggleFavstatus (rec_id) {
var
myHttpAction
=
statusBefore
?
'DELETE'
:
'PUT'
$
.
ajax
({
url
:
'http://localhost:8000
/api/nodes/'
+
corpus_id
+
'/favorites?docs='
+
doc_id
,
url
:
window
.
location
.
origin
+
'
/api/nodes/'
+
corpus_id
+
'/favorites?docs='
+
doc_id
,
type
:
myHttpAction
,
beforeSend
:
function
(
xhr
)
{
xhr
.
setRequestHeader
(
"X-CSRFToken"
,
getCookie
(
"csrftoken"
));
...
...
@@ -602,7 +602,7 @@ $.ajax({
success
:
function
(
maindata
){
// unfortunately favorites info is in a separate request (other nodes)
$
.
ajax
({
url
:
'http://localhost:8000
/api/nodes/'
+
corpus_id
+
'/favorites'
,
url
:
window
.
location
.
origin
+
'
/api/nodes/'
+
corpus_id
+
'/favorites'
,
success
:
function
(
favdata
){
// initialize favs lookup
for
(
var
i
in
favdata
[
'favdocs'
])
{
...
...
static/lib/gargantext/Journals_dyna_chart_and_table.js
View file @
2b82c054
...
...
@@ -290,7 +290,7 @@ function Main_test( data , initial) {
var
div_table
=
'<p align="right">'
+
"
\n
"
div_table
+=
'<table id="my-ajax-table" class="table table-bordered table-hover">'
+
"
\n
"
div_table
+=
"
\
t"
+
'<thead>'
+
"
\n
"
div_table
+=
"
\
t"
+
"
\
t"
+
'<th data-dynatable-column="name">Title</th>'
+
"
\n
"
div_table
+=
"
\
t"
+
"
\
t"
+
'<th data-dynatable-column="name">
<span class="glyphicon glyphicon-text-size"></span>
Title</th>'
+
"
\n
"
div_table
+=
"
\
t"
+
"
\
t"
+
'<th data-dynatable-column="score" data-dynatable-sorts="score">No. Pubs</th>'
+
"
\n
"
// div_table += "\t"+"\t"+'<th id="score_column_id" data-dynatable-sorts="score" data-dynatable-column="score">Score</th>'+"\n"
div_table
+=
"
\
t"
+
"
\
t"
+
'</th>'
+
"
\n
"
...
...
templates/pages/corpora/journals.html
View file @
2b82c054
...
...
@@ -59,9 +59,12 @@
<div
class=
"panel panel-default"
>
<div
class=
"panel-heading"
>
<h4
class=
"panel-title"
>
Publications by source
</h4>
<h2
class=
"panel-title"
>
<center>
<span
class=
"glyphicon glyphicon-hand-down"
aria-hidden=
"true"
></span>
Publications by source
</center>
</h2>
</div>
...
...
templates/pages/corpora/terms.html
View file @
2b82c054
...
...
@@ -55,12 +55,15 @@
<div
class=
"panel panel-default"
>
<div
class=
"panel-heading"
>
<h4
class=
"panel-title"
>
<h2
class=
"panel-title"
>
<center>
<span
class=
"glyphicon glyphicon-hand-down"
aria-hidden=
"true"
></span>
Extracted terms
<!-- <button title='run test function' onclick="doATest()">
TEST
</button> -->
</a>
</center>
</h2>
<!-- see in javascript function queries.functions['my_state_filter'] -->
<div
class=
"pull-left"
style=
"margin-top:1.85em;"
>
...
...
templates/pages/corpora/titles.html
View file @
2b82c054
...
...
@@ -54,9 +54,12 @@
<div
class=
"jumbotron"
>
<div
class=
"panel panel-default"
>
<div
class=
"panel-heading"
>
<h4
class=
"panel-title"
>
Publications by title
</h4>
<h2
class=
"panel-title"
>
<center>
<span
class=
"glyphicon glyphicon-hand-down"
aria-hidden=
"true"
></span>
Publications by title
</center>
</h2>
<!-- search box with custom function in Docs_dyna_chart_and_tables.js -->
<div
class=
"pull-left"
style=
"margin-top:1.85em; font-size: 16px;"
>
<span
class=
"glyphicon glyphicon-search"
aria-hidden=
"true"
></span>
...
...
templates/pages/main/about.html
View file @
2b82c054
...
...
@@ -41,7 +41,7 @@
<li>
Version 3.0.0
<ul>
<li>
[NAME] Blue Jasmin
e
</li>
<li>
[NAME] Blue Jasmin
</li>
<li>
[CODE] Refactored
</li>
<li>
[DATABASE] New schema
</li>
</ul>
...
...
templates/pages/menu.html
View file @
2b82c054
...
...
@@ -111,14 +111,26 @@
</a>
<i
class=
"caret"
></i>
<ul
class=
"dropdown-menu"
>
<li>
<a
tabindex=
"-1"
data-url=
"/projects/{{project.id}}/corpora/{{ corpus.id }}/explorer?field1=ngrams&field2=ngrams&distance=conditional&bridgeness=5"
onclick=
'gotoexplorer(this)'
>
With conditional distance
</a>
</li>
<li>
<a
tabindex=
"-1"
data-url=
"/projects/{{project.id}}/corpora/{{ corpus.id }}/explorer?field1=ngrams&field2=ngrams&distance=distributional&bridgeness=5"
onclick=
'gotoexplorer(this)'
>
With distributional distance
</a>
</li>
{% if view != "graph" %}
<li>
<a
tabindex=
"-1"
data-url=
"/projects/{{project.id}}/corpora/{{ corpus.id }}/explorer?field1=ngrams&field2=ngrams&distance=conditional&bridgeness=5"
onclick=
'gotoexplorer(this)'
>
With conditional distance
</a>
</li>
<li>
<a
tabindex=
"-1"
data-url=
"/projects/{{project.id}}/corpora/{{ corpus.id }}/explorer?field1=ngrams&field2=ngrams&distance=distributional&bridgeness=5"
onclick=
'gotoexplorer(this)'
>
With distributional distance
</a>
</li>
{% else %}
<li>
<a
tabindex=
"-1"
href=
"/projects/{{project.id}}/corpora/{{ corpus.id }}/explorer?field1=ngrams&field2=ngrams&distance=conditional&bridgeness=5"
>
With conditional distance
</a>
</li>
<li>
<a
tabindex=
"-1"
href=
"/projects/{{project.id}}/corpora/{{ corpus.id }}/explorer?field1=ngrams&field2=ngrams&distance=distributional&bridgeness=5"
>
With distributional distance
</a>
</li>
{% endif %}
</ul>
</li>
{% endif %}
...
...
@@ -138,30 +150,30 @@
<div
class=
"jumbotron"
style=
"margin-bottom:0"
>
<br>
<br>
<!--
<a type="button" class="btn btn-default
href="/projects/{{project.id}}/corpora/{{ corpus.id }}/">Export corpus</a>
--!>
<!-- <li class="divider"></li> --!>
<div
class=
"row"
>
<div class="col-md-5">
{% if project %}
<h3><a href="/projects/{{project.id}}">
<span class="glyphicon glyphicon-book" aria-hidden="true"></span>
{{ project.name }}
<h3>
<a
href=
"/projects/{{project.id}}"
>
<span
class=
"glyphicon glyphicon-book"
aria-hidden=
"true"
></span>
{{ project.name | truncatechars:50}}
</a>
</h3>
</div>
<div
class=
"row"
>
<div
class=
"col-md-1"
>
</div>
<div
class=
"col-md-6"
>
<h3>
<span
class=
"glyphicon glyphicon-cd"
aria-hidden=
"true"
></span>
{{ resourcename | truncatechars:20 }}
</h3>
<h3>
<span
class=
"glyphicon glyphicon-file"
aria-hidden=
"true"
></span>
{{ corpus.name | truncatechars:20 }}
<a
class=
"btn btn-primary"
role=
"button"
href=
"/api/nodes?parent_id={{corpus.id}}&types[]=DOCUMENT&pagination_limit=100000&formated=csv"
>
<span
class=
"glyphicon glyphicon-download"
aria-hidden=
"true"
></span>
</a>
<br>
<span class="glyphicon glyphicon-cd" aria-hidden="true"></span>
{{ resourcename | truncatechars:20 }}
<br>
<span class="glyphicon glyphicon-file" aria-hidden="true"></span>
{{ corpus.name }}
<br>
<span class="glyphicon glyphicon-calendar" aria-hidden="true"></span>
{{ corpus.date }}
</h3>
{% endif %}
</div>
<div
class=
"col-md-5"
>
<h3>
...
...
templates/pages/projects/project.html
View file @
2b82c054
...
...
@@ -73,6 +73,7 @@
{% for key, corpora in list_corpora.items %}
<h2>
<div
class=
"row"
>
<div
class=
"col-md-1 content"
></div>
<span
class=
"glyphicon glyphicon-cd"
aria-hidden=
"true"
></span>
{{ key }}
</h2>
...
...
@@ -80,6 +81,7 @@
<div
id=
"corpus_{{corpus.id}}"
>
<div
class=
"row"
>
<h4>
<div
class=
"col-md-1 content"
></div>
<div
class=
"col-md-5 content"
>
<a
href=
"/projects/{{project.id}}/corpora/{{corpus.id}}"
>
<span
class=
"glyphicon glyphicon-file"
aria-hidden=
"true"
></span>
...
...
@@ -108,8 +110,7 @@
<span
class=
"glyphicon glyphicon-trash"
aria-hidden=
"true"
></span>
</button>
</div>
<div
class=
"col-md-5 content"
>
<div
class=
"col-md-3 content"
>
{% for state in corpus.hyperdata.statuses %}
{% ifequal state.action "ngrams_extraction" %}
{% if state.complete %}
...
...
@@ -169,6 +170,7 @@
{% endifequal %}
{% endfor %}
</div>
<div
class=
"col-md-1 content"
></div>
</h4>
</div>
</div>
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment