Commit be8928f9 authored by Mathieu Rodic's avatar Mathieu Rodic

Minor corrections...

parent 712f8f0a
...@@ -3,13 +3,13 @@ import collections ...@@ -3,13 +3,13 @@ import collections
# This allows the fast retrieval of ngram ids # This allows the fast retrieval of ngram ids
# from the cache instead of using the database for every call # from the cache instead of using the database for every call
class Ngram_Cache: class NgramCache:
def __init__(self, language): def __init__(self, language):
self._cache = {} self._cache = {}
self._language = language self._language = language
def get(self, terms): def __getitem__(self, terms):
terms = terms.strip().lower() terms = terms.strip().lower()
if terms not in self._cache: if terms not in self._cache:
try: try:
...@@ -32,7 +32,7 @@ class FileParser: ...@@ -32,7 +32,7 @@ class FileParser:
else: else:
self._file = file self._file = file
# cache for ngrams # cache for ngrams
self._ngram_caches = collections.defaultdicts(Ngram_Cache) self._ngramcaches = collections.defaultdicts(NgramCache)
# extractors # extractors
self._extractors = {} self._extractors = {}
self._document_nodetype = NodeType.get(label='document') self._document_nodetype = NodeType.get(label='document')
...@@ -91,9 +91,10 @@ class FileParser: ...@@ -91,9 +91,10 @@ class FileParser:
# parse it! # parse it!
ngrams = self.extract_ngrams(contents, language) ngrams = self.extract_ngrams(contents, language)
# we should already be in a transaction, so no use doing another one (or is there?) # we should already be in a transaction, so no use doing another one (or is there?)
ngram_cache = self._ngram_caches[language.iso3] ngramcache = self._ngramcaches[language.iso3]
for ngram_text, occurences in ngrams.items(): for terms, occurences in ngrams.items():
ngram = ngram_cache.get(ngram_text) ngram_text = ' '.join([term[0] for term in terms])
ngram = ngramcache[ngram_text]
Node_Ngram( Node_Ngram(
node = childNode, node = childNode,
ngram = ngram, ngram = ngram,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment