Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
humanities
gargantext
Commits
c7526ad7
Commit
c7526ad7
authored
Apr 01, 2016
by
delanoe
Browse files
Options
Browse Files
Download
Plain Diff
[FIX] merge url conflicts.
parents
8be7e5a7
a77ea0cf
Changes
10
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
348 additions
and
137 deletions
+348
-137
ngram_parsing_flow.dot
doc/schemas/ngram_parsing_flow.dot
+20
-0
ngram_parsing_flow.png
doc/schemas/ngram_parsing_flow.png
+0
-0
constants.py
gargantext/constants.py
+4
-4
db.py
gargantext/util/db.py
+1
-2
CSV.py
gargantext/util/parsers/CSV.py
+20
-27
__init__.py
gargantext/util/parsers/__init__.py
+1
-1
ngramlists.py
gargantext/views/api/ngramlists.py
+148
-27
urls.py
gargantext/views/api/urls.py
+8
-7
NGrams_dyna_chart_and_table.js
static/js/gargantext/NGrams_dyna_chart_and_table.js
+139
-67
terms.html
templates/pages/corpora/terms.html
+7
-2
No files found.
doc/schemas/ngram_parsing_flow.dot
0 → 100644
View file @
c7526ad7
// dot ngram_parsing_flow.dot -Tpng -o ngram_parsing_flow.png
digraph
ngramflow
{
edge
[
fontsize
=
10
]
;
label
=<
<B><U>
gargantext.util.toolchain
</U></B><BR/>
(ngram extraction flow)
>;
labelloc
=
"t"
;
"extracted_ngrams"
->
"grouplist"
;
"extracted_ngrams"
->
"occs+tfidfs"
;
"main_user_stoplist"
->
"stoplist"
;
"stoplist"
->
"mainlist"
;
"occs+tfidfs"
->
"mainlist"
[
label
=
" TFIDF_LIMIT"
]
;
"mainlist"
->
"coocs"
[
label
=
" COOCS_THRESHOLD"
]
;
"coocs"
->
"specificity"
;
"specificity"
->
"maplist"
[
label
=
"MAPLIST_LIMIT\nMONOGRAM_PART"
]
;
"maplist"
->
"explore"
;
"grouplist"
->
"maplist"
;
}
doc/ngram_parsing_flow.png
→
doc/
schemas/
ngram_parsing_flow.png
View file @
c7526ad7
File moved
gargantext/constants.py
View file @
c7526ad7
...
...
@@ -156,10 +156,10 @@ RESOURCETYPES = [
'parser'
:
RISParser
,
'default_language'
:
'en'
,
},
#
{ 'name': 'CSV',
# #
'parser': CSVParser,
#
'default_language': 'en',
#
},
{
'name'
:
'CSV'
,
'parser'
:
CSVParser
,
'default_language'
:
'en'
,
},
# { 'name': 'ISTex',
# # 'parser': ISTexParser,
# 'default_language': 'en',
...
...
gargantext/util/db.py
View file @
c7526ad7
...
...
@@ -35,8 +35,7 @@ Double = DOUBLE_PRECISION
# useful for queries
from
sqlalchemy.orm
import
aliased
from
sqlalchemy
import
func
from
sqlalchemy
import
func
,
desc
# bulk insertions
...
...
gargantext/util/parsers/CSV
Parser
.py
→
gargantext/util/parsers/CSV.py
View file @
c7526ad7
...
...
@@ -8,34 +8,32 @@ import os
class
CSVParser
(
Parser
):
def
CSVsample
(
self
,
filename
,
delim
)
:
ifile
=
open
(
filename
,
"r"
)
reader
=
csv
.
reader
(
ifile
,
delimiter
=
delim
)
def
CSVsample
(
self
,
small_contents
,
delim
)
:
reader
=
csv
.
reader
(
small_contents
,
delimiter
=
delim
)
Freqs
=
[]
for
row
in
reader
:
Freqs
.
append
(
len
(
row
))
ifile
.
close
()
return
Freqs
def
parse
(
self
,
filename
):
print
(
"CSV: parsing (assuming UTF-8 and LF line endings)"
)
contents
=
filename
.
read
()
.
decode
(
"UTF-8"
)
.
split
(
"
\n
"
)
sample_size
=
10
sample_
file
=
filename
.
replace
(
".csv"
,
"_sample.csv"
)
sample_
contents
=
contents
[
0
:
sample_size
]
hyperdata_list
=
[]
command_for_sample
=
"cat '"
+
filename
+
"' | head -n "
+
str
(
sample_size
)
+
" > '"
+
sample_file
+
"'"
os
.
system
(
command_for_sample
)
# you just created a *_sample.csv
# # = = = = [ Getting delimiters frequency ] = = = = #
PossibleDelimiters
=
[
','
,
' '
,
'
\t
'
,
';'
,
'|'
,
':'
]
AllDelimiters
=
{}
for
delim
in
PossibleDelimiters
:
AllDelimiters
[
delim
]
=
self
.
CSVsample
(
sample_
file
,
delim
)
AllDelimiters
[
delim
]
=
self
.
CSVsample
(
sample_
contents
,
delim
)
# # = = = = [ / Getting delimiters frequency ] = = = = #
# # OUTPUT example:
# # AllDelimiters = {
...
...
@@ -59,8 +57,8 @@ class CSVParser(Parser):
# # = = = = [ / Stand.Dev=0 & Sum of delimiters ] = = = = #
# # OUTPUT example:
# # Delimiters = [
# # ['\t', 5, 5, 0.0],
# # [',', 75, 5, 0.0],
# # ['\t', 5, 5, 0.0],
# # [',', 75, 5, 0.0],
# # ['|', 5, 5, 0.0]
# # ]
...
...
@@ -68,23 +66,22 @@ class CSVParser(Parser):
# # = = = = [ Delimiter selection ] = = = = #
Sorted_Delims
=
sorted
(
Delimiters
,
key
=
lambda
x
:
x
[
1
],
reverse
=
True
)
HighestDelim
=
Sorted_Delims
[
0
][
0
]
#
print("selected delimiter:",[HighestDelim]
# print
#
HighestDelim = ","
print
(
"CSV selected delimiter:"
,[
HighestDelim
])
# # = = = = [ / Delimiter selection ] = = = = #
# # = = = = [ First data coordinate ] = = = = #
Coords
=
{
"row"
:
-
1
,
"column"
:
-
1
}
ifile
=
open
(
sample_file
,
"r"
)
reader
=
csv
.
reader
(
ifile
,
delimiter
=
HighestDelim
)
reader
=
csv
.
reader
(
contents
,
delimiter
=
HighestDelim
)
for
rownum
,
tokens
in
enumerate
(
reader
):
if
rownum
%
250
==
0
:
print
(
"CSV row: "
,
rownum
)
joined_tokens
=
""
.
join
(
tokens
)
if
Coords
[
"row"
]
<
0
and
len
(
joined_tokens
)
>
0
:
Coords
[
"row"
]
=
rownum
...
...
@@ -93,22 +90,21 @@ class CSVParser(Parser):
if
len
(
t
)
>
0
:
Coords
[
"column"
]
=
columnum
break
ifile
.
close
()
# # = = = = [ / First data coordinate ] = = = = #
# # = = = = [ Setting Headers ] = = = = #
Headers_Int2Str
=
{}
ifile
=
open
(
sample_file
,
"r"
)
reader
=
csv
.
reader
(
ifile
,
delimiter
=
HighestDelim
)
reader
=
csv
.
reader
(
contents
,
delimiter
=
HighestDelim
)
for
rownum
,
tokens
in
enumerate
(
reader
):
if
rownum
>=
Coords
[
"row"
]:
for
columnum
in
range
(
Coords
[
"column"
],
len
(
tokens
)
):
t
=
tokens
[
columnum
]
Headers_Int2Str
[
columnum
]
=
t
break
ifile
.
close
()
# print("Headers_Int2Str")
# print(Headers_Int2Str)
# # = = = = [ / Setting Headers ] = = = = #
# # OUTPUT example:
# # Headers_Int2Str = {
...
...
@@ -119,11 +115,9 @@ class CSVParser(Parser):
# # }
# # = = = = [ Reading the whole CSV and saving ] = = = = #
hyperdata_list
=
[]
ifile
=
open
(
filename
,
"r"
)
reader
=
csv
.
reader
(
ifile
,
delimiter
=
HighestDelim
)
reader
=
csv
.
reader
(
contents
,
delimiter
=
HighestDelim
)
for
rownum
,
tokens
in
enumerate
(
reader
):
if
rownum
>
Coords
[
"row"
]:
RecordDict
=
{}
...
...
@@ -131,7 +125,6 @@ class CSVParser(Parser):
data
=
tokens
[
columnum
]
RecordDict
[
Headers_Int2Str
[
columnum
]
]
=
data
hyperdata_list
.
append
(
RecordDict
)
ifile
.
close
()
# # = = = = [ / Reading the whole CSV and saving ] = = = = #
return
hyperdata_list
gargantext/util/parsers/__init__.py
View file @
c7526ad7
...
...
@@ -8,4 +8,4 @@ from .Pubmed import PubmedParser
from
.Europress
import
EuropressParser
# from .ISTex import ISTexParser
#
from .CSV import CSVParser
from
.CSV
import
CSVParser
gargantext/views/api/ngramlists.py
View file @
c7526ad7
This diff is collapsed.
Click to expand it.
gargantext/views/api/urls.py
View file @
c7526ad7
...
...
@@ -11,17 +11,18 @@ urlpatterns = [
url
(
r'^nodes/(\d+)/facets$'
,
nodes
.
CorpusFacet
.
as_view
()),
url
(
r'^nodes/(\d+)/having$'
,
nodes
.
NodeListHaving
.
as_view
()),
# get a list of ngram_ids or ngram_infos by list_id
#
# url(r'^ngramlists/(\d+)$', ngramlists.List.as_view()),
# add or remove ngram from a list
# ex: add <=> PUT ngramlists/change?list=42&ngrams=1,2
# rm <=> DEL ngramlists/change?list=42&ngrams=1,2
url
(
r'^ngramlists/change$'
,
ngramlists
.
ListChange
.
as_view
()),
# entire combination of lists from a corpus
#
get
entire combination of lists from a corpus
# (or any combination of lists that go together :
# - a mainlist
# - an optional stoplist
# - an optional maplist
# - an optional grouplist
# aka lexical model
url
(
r'^ngramlists/family$'
,
ngramlists
.
ListFamily
.
as_view
()),
# - an optional grouplist
)
url
(
r'^ngramlists/family$'
,
ngramlists
.
ListFamily
.
as_view
()),
]
static/js/gargantext/NGrams_dyna_chart_and_table.js
View file @
c7526ad7
This diff is collapsed.
Click to expand it.
templates/pages/corpora/terms.html
View file @
c7526ad7
...
...
@@ -22,7 +22,7 @@
<div
class=
"row"
>
<div
id=
"monthly-move-chart"
>
<center>
Select a
time
range in the chart with blue bars to zoom in
Select a
score/frequency
range in the chart with blue bars to zoom in
<p
align=
"center"
>
<a
class=
"btn btn-xs btn-default"
role=
"button"
href=
"/chart/corpus/{{ corpus.id }}/data.csv"
>
Save
</a>
<a
class=
"btn btn-xs btn-default"
href=
"javascript:volumeChart.filterAll();dc.redrawAll();"
>
Reset
</a></p>
...
...
@@ -41,7 +41,12 @@
<br>
</div>
<input
type=
"hidden"
id=
"list_id"
value=
"{{ list_id }}"
></input>
<!-- (values set by js) caching our DB ids (handy for list update commands) -->
<input
type=
"hidden"
id=
"mainlist_id"
value=
""
></input>
<input
type=
"hidden"
id=
"maplist_id"
value=
""
></input>
<input
type=
"hidden"
id=
"stoplist_id"
value=
""
></input>
<input
type=
"hidden"
id=
"groups_id"
value=
""
></input>
<input
type=
"hidden"
id=
"scores_id"
value=
""
></input>
<div
class=
"row"
>
<div
class=
"panel panel-default"
>
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment