Commit d086dfd5 authored by Administrator's avatar Administrator

Merge branch 'resource_type' into unstable

parents 4fd5dec5 f4d4ea24
...@@ -104,34 +104,15 @@ except Exception as error: ...@@ -104,34 +104,15 @@ except Exception as error:
# In[33]: # In[33]:
try: from parsing.parsers_config import parsers
typePubmed = ResourceType.objects.get(name='pubmed')
typeIsi = ResourceType.objects.get(name='isi')
typeRis = ResourceType.objects.get(name='ris')
typeJstor = ResourceType.objects.get(name='RIS (Jstor)')
typePresseFrench = ResourceType.objects.get(name='europress_french')
typePresseEnglish = ResourceType.objects.get(name='europress_english')
except Exception as error:
print(error)
typePubmed = ResourceType(name='pubmed')
typePubmed.save()
typeIsi = ResourceType(name='isi') ResourceType.objects.all().delete()
typeIsi.save()
typeRis = ResourceType(name='ris') for key in parsers.keys():
typeRis.save() try:
ResourceType.objects.get_or_create(name=key)
typeJstor = ResourceType(name='RIS (Jstor)') except Exception as error:
typeJstor.save() print("Ressource Error: ", error)
typePresseFrench = ResourceType(name='europress_french')
typePresseFrench.save()
typePresseEnglish = ResourceType(name='europress_english')
typePresseEnglish.save()
# In[34]: # In[34]:
......
...@@ -54,16 +54,13 @@ import pycountry ...@@ -54,16 +54,13 @@ import pycountry
Language.objects.all().delete() Language.objects.all().delete()
for language in pycountry.languages: for language in pycountry.languages:
if 'alpha2' in language.__dict__: if 'alpha2' in language.__dict__:
Language( models.Language(
iso2 = language.alpha2, iso2 = language.alpha2,
iso3 = language.bibliographic, iso3 = language.bibliographic,
fullname = language.name, fullname = language.name,
implemented = 1 if language.alpha2 in ['en', 'fr'] else 0, implemented = 1 if language.alpha2 in ['en', 'fr'] else 0,
).save() ).save()
english = Language.objects.get(iso2='en')
french = Language.objects.get(iso2='fr')
# Integration: users # Integration: users
...@@ -99,56 +96,10 @@ for node_type in node_types: ...@@ -99,56 +96,10 @@ for node_type in node_types:
print('Initialize resource...') print('Initialize resource...')
resources = [ from parsing.parsers_config import parsers
'pubmed', 'isi', 'ris', 'europress_french', 'europress_english']
for resource in resources:
models.ResourceType.objects.get_or_create(name=resource)
# TODO
# here some tests
# add a new project and some corpora to test it
# Integration: project
#
#print('Initialize project...')
#try:
# project = Node.objects.get(name='Bees project')
#except:
# project = Node(name='Bees project', type=typeProject, user=me)
# project.save()
#
# Integration: corpus
#print('Initialize corpus...')
#try:
# corpus_pubmed = Node.objects.get(name='PubMed corpus')
#except:
# corpus_pubmed = Node(parent=project, name='PubMed corpus', type=typeCorpus, user=me)
# corpus_pubmed.save()
#
#print('Initialize resource...')
#corpus_pubmed.add_resource(
# # file='./data_samples/pubmed.zip',
# #file='./data_samples/pubmed_2013-04-01_HoneyBeesBeeBees.xml',
# file='/srv/gargantext_lib/data_samples/pubmed.xml',
# type=typePubmed,
# user=me
#)
#
#for resource in corpus_pubmed.get_resources():
# print('Resource #%d - %s - %s' % (resource.id, resource.digest, resource.file))
#
## print('Parse corpus #%d...' % (corpus_pubmed.id, ))
# corpus_pubmed.parse_resources(verbose=True)
# print('Extract corpus #%d...' % (corpus_pubmed.id, ))
# corpus_pubmed.children.all().extract_ngrams(['title',])
# print('Parsed corpus #%d.' % (corpus_pubmed.id, ))
for parser in parsers.keys():
models.ResourceType.objects.get_or_create(name=parser)
......
...@@ -29,6 +29,7 @@ from celery import current_app ...@@ -29,6 +29,7 @@ from celery import current_app
import os import os
import subprocess import subprocess
from parsing.parsers_config import parsers
# Some usefull functions # Some usefull functions
# TODO: start the function name with an underscore (private) # TODO: start the function name with an underscore (private)
...@@ -194,16 +195,19 @@ class Node(CTENode): ...@@ -194,16 +195,19 @@ class Node(CTENode):
print("= = = = = = = = = = =\n") print("= = = = = = = = = = =\n")
for node_resource in self.node_resource.filter(parsed=False): for node_resource in self.node_resource.filter(parsed=False):
resource = node_resource.resource resource = node_resource.resource
parser = defaultdict(lambda:FileParser.FileParser, { parser = defaultdict(lambda:FileParser.FileParser, parsers
'istext' : ISText, # {
'pubmed' : PubmedFileParser, # 'istext' : ISText,
'isi' : IsiFileParser, # 'pubmed' : PubmedFileParser,
'ris' : RisFileParser, # 'isi' : IsiFileParser,
'RIS (Jstor)' : JstorFileParser, # 'ris' : RisFileParser,
'europress' : EuropressFileParser, # 'RIS (Jstor)' : JstorFileParser,
'europress_french' : EuropressFileParser, # 'europress' : EuropressFileParser,
'europress_english' : EuropressFileParser, # 'europress_french' : EuropressFileParser,
})[resource.type.name]() # 'europress_english' : EuropressFileParser,
# }
)[resource.type.name]()
metadata_list += parser.parse(str(resource.file)) metadata_list += parser.parse(str(resource.file))
type_id = NodeType.objects.get(name='Document').id type_id = NodeType.objects.get(name='Document').id
langages_cache = LanguagesCache() langages_cache = LanguagesCache()
......
...@@ -7,7 +7,6 @@ from math import log ...@@ -7,7 +7,6 @@ from math import log
from gargantext_web.db import * from gargantext_web.db import *
from .FileParsers import *
...@@ -30,17 +29,7 @@ class DebugTime: ...@@ -30,17 +29,7 @@ class DebugTime:
# keep all the parsers in a cache # keep all the parsers in a cache
class Parsers(defaultdict): class Parsers(defaultdict):
from .parsers_config import parsers as _parsers
_parsers = {
'pubmed' : PubmedFileParser,
'isi' : IsiFileParser,
'ris' : RisFileParser,
'RIS (Jstor)' : JstorFileParser,
'europress' : EuropressFileParser,
'europress_french' : EuropressFileParser,
'europress_english' : EuropressFileParser,
}
def __missing__(self, key): def __missing__(self, key):
if key not in self._parsers: if key not in self._parsers:
......
from .FileParsers import *
parsers = {
'Pubmed (xml format)' : PubmedFileParser,
'Web of Science (ISI format)' : IsiFileParser,
'Scopus ou Zotero (RIS format)' : RisFileParser,
'Jstor (RIS format)' : JstorFileParser,
#'Europress' : EuropressFileParser,
'Europress (French)' : EuropressFileParser,
'Europress (English)' : EuropressFileParser,
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment