Commit 9fcf90b5 authored by c24b's avatar c24b

load corresponding parser

parent 1ed99fcc
......@@ -50,12 +50,12 @@ def extract_ngrams(corpus, keys=('title', 'abstract', ), do_subngrams = DEFAULT_
resource_type_index = corpus.resources()[0]['type']
documents_count = 0
resource_type = RESOURCETYPES[resource_type_index]
default_language_iso2 = resource_type['default_language']
default_language_iso2 = resource_type['default_languages']
for documents_count, document in enumerate(corpus.children('DOCUMENT')):
# get ngrams extractor for the current document
language_iso2 = document.hyperdata.get('language_iso2', default_language_iso2)
try:
# this looks for a parser in constants.LANGUAGES
# this looks for a tagger in constants.LANGUAGES
ngramsextractor = ngramsextractors[language_iso2]
except KeyError:
# skip document
......
from gargantext.util.db import *
from gargantext.models import *
from gargantext.constants import *
from gargantext.util.parsers import *
from collections import defaultdict
from re import sub
......@@ -19,7 +19,9 @@ def parse(corpus):
# information about the resource
if resource['extracted']:
continue
resource_parser = RESOURCETYPES[resource['type']]['parser']
#source store available module for a resource
source = get_resource(resource["type"])
resource_parser = load_parser(source)
resource_path = resource['path']
# extract and insert documents from corpus resource into database
for hyperdata in resource_parser(resource_path):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment