Commit 77db4880 authored by Administrator's avatar Administrator

Merge branch 'alex'

Fix
parents 2bebc82d b85dd6b8
......@@ -17,9 +17,20 @@ class EuropressFileParser(FileParser):
codif = "UTF-8"
count = 0
html_parser = etree.HTMLParser(encoding=codif)
html = etree.parse(file, html_parser)
if isinstance(file, str):
file = open(file, 'rb')
print(file)
contents = file.read()
print(len(contents))
#return []
encoding = self.detect_encoding(contents)
try:
html_parser = etree.HTMLParser(encoding=encoding)
html = etree.fromstring(contents, html_parser)
html_articles = html.xpath('/html/body/table')
except:
return []
# initialize the list of metadata
metadata_list = []
......@@ -43,7 +54,7 @@ class EuropressFileParser(FileParser):
for header in html_article.xpath("./tr/td/span[@class = 'DocHeader']"):
text = header.text
if isinstance(text, bytes):
text = text.decode()
text = text.decode(encoding)
format_date_fr = re.compile('\d+\s*\w+\s+\d{4}', re.UNICODE)
test_date_fr = format_date_fr.match(text)
......
import collections
import dateutil.parser
import zipfile
import chardet
from parsing.Caches import LanguagesCache
......@@ -14,7 +15,8 @@ class FileParser:
def detect_encoding(self, string):
"""Useful method to detect the document encoding.
"""
pass
encoding = chardet.detect(string)
return encoding.get('encoding', 'UTF-8')
def format_metadata_dates(self, metadata):
......
......@@ -379,38 +379,38 @@ buttonAddDataset.click(function() {
// $('.tree').jstree({
// 'core' : {
// 'data' : {
// 'url' : function(node) {
// var url = '/api/nodes?' + ((node.id === '#')
// ? 'type=Project'
// : ('parent=' + node.id)
// );
// console.log(url);
// return url;
// },
// },
// },
// "plugins" : ["types"],
// "types" : {
// "#" : {
// "max_children" : 1,
// "max_depth" : 4,
// "valid_children" : ["root"]
// },
// "Project" : {
// "icon" : "http://www.jstree.com/static/3.0.8/assets/images/tree_icon.png",
// "valid_children" : ["default"]
// },
// "Corpus" : {
// "valid_children" : ["default","file"]
// },
// "Document" : {
// "icon" : "glyphicon glyphicon-file",
// "valid_children" : []
// }
// },
// });
$('.tree').jstree({
'core' : {
'data' : {
'url' : function(node) {
var url = '/api/nodes?' + ((node.id === '#')
? 'type=Project'
: ('parent=' + node.id)
);
console.log(url);
return url;
},
},
},
"plugins" : ["types"],
"types" : {
"#" : {
"max_children" : 1,
"max_depth" : 4,
"valid_children" : ["root"]
},
"Project" : {
"icon" : "http://www.jstree.com/static/3.0.8/assets/images/tree_icon.png",
"valid_children" : ["default"]
},
"Corpus" : {
"valid_children" : ["default","file"]
},
"Document" : {
"icon" : "glyphicon glyphicon-file",
"valid_children" : []
}
},
});
// var graph = $('.graph-it').graphIt(640, 480);
......@@ -103,7 +103,7 @@
<div class="row">
<div class="col-md-4">
<div class="jumbotron">
<h3><a href="/graph-it">1) Documents</a></h3>
<h3><a href="/graph-it">Documents</a></h3>
<ol>
<li>Read</li> <!-- write -->
<li>Count</li> <!-- compute -->
......@@ -114,7 +114,7 @@
<div class="col-md-4">
<div class="jumbotron">
<h3><a href="/ngrams">2) Ngrams</a></h3>
<h3><a href="/ngrams">Dictionaries</a></h3>
<ol>
<li>White Lists</li>
<li>Black Lists</li>
......@@ -125,7 +125,7 @@
<div class="col-md-4">
<div class="jumbotron">
<h3><a href="/graph">3) Visualizations</a></h3>
<h3><a href="/graph">Visualizations</a></h3>
<ol>
<li>Matrix</li>
<li>Static maps</li>
......
......@@ -77,10 +77,9 @@
<script type="text/javascript" src="{% static "js/jquery/jquery.min.js" %}"></script>
<script type="text/javascript" src="{% static "js/jquery/jquery-ui.js" %}"></script>
<!--
<link rel="stylesheet" href="//cdnjs.cloudflare.com/ajax/libs/jstree/3.0.4/themes/default/style.min.css" />
<script src="//cdnjs.cloudflare.com/ajax/libs/jstree/3.0.4/jstree.min.js"></script>
-->
<script type="text/javascript" src="{% static "js/charts/dygraph-combined.js" %}"></script>
<script type="text/javascript" src="{% static "js/graph-it.js" %}"></script>
......
......@@ -17,7 +17,7 @@
<div class="jumbotron">
<h1>Gargantext</h1>
<p>A web platform to explore text-mining</p>
<a class="btn btn-primary btn-lg" href="/projects">Explore a corpus</a>
<a class="btn btn-primary btn-lg" href="/projects">Test Gargantext</a>
</div>
<div class="container">
......
......@@ -79,7 +79,14 @@
<li>
<a href="/project/{{project.id}}/corpus/{{corpus.id}}">{{corpus.name}}</a>
, {{ corpus.count }} Documents
<button type="button" class="btn btn-xs btn-default" data-container="body" data-toggle="popover" data-placement="bottom" data-content='<a href="/project/{{ project.id }}/corpus/{{ corpus.id}}/delete">Yes, I am sure!</a>'>Delete</button>
<button type="button" class="btn btn-xs btn-default" data-container="body" data-toggle="popover" data-placement="bottom"
data-content='
<ul>
<li> Add documents </li>
<li> Rename </li>
<li><a href="/project/{{ project.id }}/corpus/{{ corpus.id}}/delete">Delete</a></li>
</ul>
'>Manage</button>
</li>
{% endfor %}
</ul>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment