Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
humanities
gargantext
Commits
77db4880
Commit
77db4880
authored
Nov 27, 2014
by
Administrator
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'alex'
Fix
parents
2bebc82d
b85dd6b8
Changes
7
Show whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
68 additions
and
49 deletions
+68
-49
EuropressFileParser.py
parsing/FileParsers/EuropressFileParser.py
+16
-5
FileParser.py
parsing/FileParsers/FileParser.py
+3
-1
graph-it.js
static/js/graph-it.js
+35
-35
corpus.html
templates/corpus.html
+3
-3
graph-it.html
templates/graph-it.html
+2
-3
home.html
templates/home.html
+1
-1
project.html
templates/project.html
+8
-1
No files found.
parsing/FileParsers/EuropressFileParser.py
View file @
77db4880
...
@@ -17,9 +17,20 @@ class EuropressFileParser(FileParser):
...
@@ -17,9 +17,20 @@ class EuropressFileParser(FileParser):
codif
=
"UTF-8"
codif
=
"UTF-8"
count
=
0
count
=
0
html_parser
=
etree
.
HTMLParser
(
encoding
=
codif
)
if
isinstance
(
file
,
str
):
html
=
etree
.
parse
(
file
,
html_parser
)
file
=
open
(
file
,
'rb'
)
print
(
file
)
contents
=
file
.
read
()
print
(
len
(
contents
))
#return []
encoding
=
self
.
detect_encoding
(
contents
)
try
:
html_parser
=
etree
.
HTMLParser
(
encoding
=
encoding
)
html
=
etree
.
fromstring
(
contents
,
html_parser
)
html_articles
=
html
.
xpath
(
'/html/body/table'
)
html_articles
=
html
.
xpath
(
'/html/body/table'
)
except
:
return
[]
# initialize the list of metadata
# initialize the list of metadata
metadata_list
=
[]
metadata_list
=
[]
...
@@ -43,7 +54,7 @@ class EuropressFileParser(FileParser):
...
@@ -43,7 +54,7 @@ class EuropressFileParser(FileParser):
for
header
in
html_article
.
xpath
(
"./tr/td/span[@class = 'DocHeader']"
):
for
header
in
html_article
.
xpath
(
"./tr/td/span[@class = 'DocHeader']"
):
text
=
header
.
text
text
=
header
.
text
if
isinstance
(
text
,
bytes
):
if
isinstance
(
text
,
bytes
):
text
=
text
.
decode
()
text
=
text
.
decode
(
encoding
)
format_date_fr
=
re
.
compile
(
'
\
d+
\
s*
\
w+
\
s+
\
d{4}'
,
re
.
UNICODE
)
format_date_fr
=
re
.
compile
(
'
\
d+
\
s*
\
w+
\
s+
\
d{4}'
,
re
.
UNICODE
)
test_date_fr
=
format_date_fr
.
match
(
text
)
test_date_fr
=
format_date_fr
.
match
(
text
)
...
...
parsing/FileParsers/FileParser.py
View file @
77db4880
import
collections
import
collections
import
dateutil.parser
import
dateutil.parser
import
zipfile
import
zipfile
import
chardet
from
parsing.Caches
import
LanguagesCache
from
parsing.Caches
import
LanguagesCache
...
@@ -14,7 +15,8 @@ class FileParser:
...
@@ -14,7 +15,8 @@ class FileParser:
def
detect_encoding
(
self
,
string
):
def
detect_encoding
(
self
,
string
):
"""Useful method to detect the document encoding.
"""Useful method to detect the document encoding.
"""
"""
pass
encoding
=
chardet
.
detect
(
string
)
return
encoding
.
get
(
'encoding'
,
'UTF-8'
)
def
format_metadata_dates
(
self
,
metadata
):
def
format_metadata_dates
(
self
,
metadata
):
...
...
static/js/graph-it.js
View file @
77db4880
...
@@ -379,38 +379,38 @@ buttonAddDataset.click(function() {
...
@@ -379,38 +379,38 @@ buttonAddDataset.click(function() {
//
$('.tree').jstree({
$
(
'.tree'
).
jstree
({
//
'core' : {
'core'
:
{
//
'data' : {
'data'
:
{
//
'url' : function(node) {
'url'
:
function
(
node
)
{
//
var url = '/api/nodes?' + ((node.id === '#')
var
url
=
'/api/nodes?'
+
((
node
.
id
===
'#'
)
//
? 'type=Project'
?
'type=Project'
//
: ('parent=' + node.id)
:
(
'parent='
+
node
.
id
)
//
);
);
//
console.log(url);
console
.
log
(
url
);
//
return url;
return
url
;
//
},
},
//
},
},
//
},
},
//
"plugins" : ["types"],
"plugins"
:
[
"types"
],
//
"types" : {
"types"
:
{
//
"#" : {
"#"
:
{
//
"max_children" : 1,
"max_children"
:
1
,
//
"max_depth" : 4,
"max_depth"
:
4
,
//
"valid_children" : ["root"]
"valid_children"
:
[
"root"
]
//
},
},
//
"Project" : {
"Project"
:
{
//
"icon" : "http://www.jstree.com/static/3.0.8/assets/images/tree_icon.png",
"icon"
:
"http://www.jstree.com/static/3.0.8/assets/images/tree_icon.png"
,
//
"valid_children" : ["default"]
"valid_children"
:
[
"default"
]
//
},
},
//
"Corpus" : {
"Corpus"
:
{
//
"valid_children" : ["default","file"]
"valid_children"
:
[
"default"
,
"file"
]
//
},
},
//
"Document" : {
"Document"
:
{
//
"icon" : "glyphicon glyphicon-file",
"icon"
:
"glyphicon glyphicon-file"
,
//
"valid_children" : []
"valid_children"
:
[]
//
}
}
//
},
},
//
});
});
// var graph = $('.graph-it').graphIt(640, 480);
// var graph = $('.graph-it').graphIt(640, 480);
templates/corpus.html
View file @
77db4880
...
@@ -103,7 +103,7 @@
...
@@ -103,7 +103,7 @@
<div
class=
"row"
>
<div
class=
"row"
>
<div
class=
"col-md-4"
>
<div
class=
"col-md-4"
>
<div
class=
"jumbotron"
>
<div
class=
"jumbotron"
>
<h3><a
href=
"/graph-it"
>
1)
Documents
</a></h3>
<h3><a
href=
"/graph-it"
>
Documents
</a></h3>
<ol>
<ol>
<li>
Read
</li>
<!-- write -->
<li>
Read
</li>
<!-- write -->
<li>
Count
</li>
<!-- compute -->
<li>
Count
</li>
<!-- compute -->
...
@@ -114,7 +114,7 @@
...
@@ -114,7 +114,7 @@
<div
class=
"col-md-4"
>
<div
class=
"col-md-4"
>
<div
class=
"jumbotron"
>
<div
class=
"jumbotron"
>
<h3><a
href=
"/ngrams"
>
2) Ngram
s
</a></h3>
<h3><a
href=
"/ngrams"
>
Dictionarie
s
</a></h3>
<ol>
<ol>
<li>
White Lists
</li>
<li>
White Lists
</li>
<li>
Black Lists
</li>
<li>
Black Lists
</li>
...
@@ -125,7 +125,7 @@
...
@@ -125,7 +125,7 @@
<div
class=
"col-md-4"
>
<div
class=
"col-md-4"
>
<div
class=
"jumbotron"
>
<div
class=
"jumbotron"
>
<h3><a
href=
"/graph"
>
3)
Visualizations
</a></h3>
<h3><a
href=
"/graph"
>
Visualizations
</a></h3>
<ol>
<ol>
<li>
Matrix
</li>
<li>
Matrix
</li>
<li>
Static maps
</li>
<li>
Static maps
</li>
...
...
templates/graph-it.html
View file @
77db4880
...
@@ -77,10 +77,9 @@
...
@@ -77,10 +77,9 @@
<script
type=
"text/javascript"
src=
"{% static "
js
/
jquery
/
jquery
.
min
.
js
"
%}"
></script>
<script
type=
"text/javascript"
src=
"{% static "
js
/
jquery
/
jquery
.
min
.
js
"
%}"
></script>
<script
type=
"text/javascript"
src=
"{% static "
js
/
jquery
/
jquery-ui
.
js
"
%}"
></script>
<script
type=
"text/javascript"
src=
"{% static "
js
/
jquery
/
jquery-ui
.
js
"
%}"
></script>
<!--
<link
rel=
"stylesheet"
href=
"//cdnjs.cloudflare.com/ajax/libs/jstree/3.0.4/themes/default/style.min.css"
/>
<link
rel=
"stylesheet"
href=
"//cdnjs.cloudflare.com/ajax/libs/jstree/3.0.4/themes/default/style.min.css"
/>
<script
src=
"//cdnjs.cloudflare.com/ajax/libs/jstree/3.0.4/jstree.min.js"
></script>
<script
src=
"//cdnjs.cloudflare.com/ajax/libs/jstree/3.0.4/jstree.min.js"
></script>
-->
<script
type=
"text/javascript"
src=
"{% static "
js
/
charts
/
dygraph-combined
.
js
"
%}"
></script>
<script
type=
"text/javascript"
src=
"{% static "
js
/
charts
/
dygraph-combined
.
js
"
%}"
></script>
<script
type=
"text/javascript"
src=
"{% static "
js
/
graph-it
.
js
"
%}"
></script>
<script
type=
"text/javascript"
src=
"{% static "
js
/
graph-it
.
js
"
%}"
></script>
...
...
templates/home.html
View file @
77db4880
...
@@ -17,7 +17,7 @@
...
@@ -17,7 +17,7 @@
<div
class=
"jumbotron"
>
<div
class=
"jumbotron"
>
<h1>
Gargantext
</h1>
<h1>
Gargantext
</h1>
<p>
A web platform to explore text-mining
</p>
<p>
A web platform to explore text-mining
</p>
<a
class=
"btn btn-primary btn-lg"
href=
"/projects"
>
Explore a corpus
</a>
<a
class=
"btn btn-primary btn-lg"
href=
"/projects"
>
Test Gargantext
</a>
</div>
</div>
<div
class=
"container"
>
<div
class=
"container"
>
...
...
templates/project.html
View file @
77db4880
...
@@ -79,7 +79,14 @@
...
@@ -79,7 +79,14 @@
<li>
<li>
<a href="/project/{{project.id}}/corpus/{{corpus.id}}">{{corpus.name}}</a>
<a href="/project/{{project.id}}/corpus/{{corpus.id}}">{{corpus.name}}</a>
, {{ corpus.count }} Documents
, {{ corpus.count }} Documents
<button type="button" class="btn btn-xs btn-default" data-container="body" data-toggle="popover" data-placement="bottom" data-content='<a href="/project/{{ project.id }}/corpus/{{ corpus.id}}/delete">Yes, I am sure!</a>'>Delete</button>
<button type="button" class="btn btn-xs btn-default" data-container="body" data-toggle="popover" data-placement="bottom"
data-content='
<ul>
<li> Add documents </li>
<li> Rename </li>
<li><a href="/project/{{ project.id }}/corpus/{{ corpus.id}}/delete">Delete</a></li>
</ul>
'>Manage</button>
</li>
</li>
{% endfor %}
{% endfor %}
</ul>
</ul>
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment