Trailing spaces

2d703388 · sim · 7e1e26a2 · 2d703388 · 2d703388
Commit 2d703388 authored Sep 11, 2017 by sim
Hide whitespace changes
Inline Side-by-side

Showing with 25 additions and 25 deletions

HAL.py gargantext/util/crawlers/HAL.py +14 -14

HAL.py gargantext/util/parsers/HAL.py +11 -11

No files found.
--- a/gargantext/util/crawlers/HAL.py
+++ b/gargantext/util/crawlers/HAL.py
@@ -14,12 +14,12 @@ from gargantext.util.files import save
 class HalCrawler(Crawler):
    ''' HAL API CLIENT'''
    def __init__(self):
        # Main EndPoints
        self.BASE_URL = "https://api.archives-ouvertes.fr"
        self.API_URL  = "search"
        # Final EndPoints
        # TODO : Change endpoint according type of database
        self.URL   = self.BASE_URL + "/" + self.API_URL
@@ -59,7 +59,7 @@ class HalCrawler(Crawler):
             """
               #, authUrl_s
               #, type_s
        wt = "json"
        querystring = { "q"       : query
@@ -68,18 +68,18 @@ class HalCrawler(Crawler):
                      , "fl"      : fl
                      , "wt"      : wt
                      }
        # Specify Headers
        headers = { "cache-control" : "no-cache" }
        # Do Request and get response
        response = requests.request( "GET"
                                   , self.URL
                                   , headers = headers
                                   , params  = querystring
                                   )
        #print(querystring)
        # Validation : 200 if ok else raise Value
        if response.status_code == 200:
@@ -90,27 +90,27 @@ class HalCrawler(Crawler):
            return (json.loads(response.content.decode(charset)))
        else:
            raise ValueError(response.status_code, response.reason)
    def scan_results(self, query):
        '''
        scan_results : Returns the number of results
        Query String -> Int
        '''
        self.results_nb = 0
        total = ( self._get(query)
                      .get("response", {})
                      .get("numFound"  ,  0)
                )
        self.results_nb = total
        return self.results_nb
    def download(self, query):
        downloaded = False
        self.status.append("fetching results")
        corpus = []
@@ -124,7 +124,7 @@ class HalCrawler(Crawler):
                                                            )
            print("ERROR (scrap: HAL d/l ): " , msg)
            self.query_max = QUERY_SIZE_N_MAX
        #for page in range(1, trunc(self.query_max / 100) + 2):
        for page in range(0, self.query_max, paging):
            print("Downloading page %s to %s results" % (page, paging))
@@ -141,5 +141,5 @@ class HalCrawler(Crawler):
                        , basedir=UPLOAD_DIRECTORY
                        )
        downloaded = True
        return downloaded
--- a/gargantext/util/parsers/HAL.py
+++ b/gargantext/util/parsers/HAL.py
@@ -12,9 +12,9 @@ import json
 class HalParser(Parser):
    def _parse(self, json_docs):
        hyperdata_list = []
        hyperdata_path = { "id"              : "isbn_s"
                         , "title"           : "en_title_s"
                         , "abstract"        : "en_abstract_s"
@@ -29,8 +29,8 @@ class HalParser(Parser):
                         , "instStructId_i"  : "instStructId_i"
                         , "deptStructId_i"  : "deptStructId_i"
                         , "labStructId_i"   : "labStructId_i"
-                         , "rteamStructId_i" : "rteamStructId_i" 
+                         , "rteamStructId_i" : "rteamStructId_i"
-                         , "docType_s"       : "docType_s" 
+                         , "docType_s"       : "docType_s"
                         }
        uris = set()
@@ -38,15 +38,15 @@ class HalParser(Parser):
        for doc in json_docs:
            hyperdata = {}
            for key, path in hyperdata_path.items():
                    field = doc.get(path, "NOT FOUND")
                    if isinstance(field, list):
                        hyperdata[key] = ", ".join(map(lambda x: str(x), field))
                    else:
                        hyperdata[key] = str(field)
            if hyperdata["url"] in uris:
                print("Document already parsed")
            else:
@@ -54,11 +54,11 @@ class HalParser(Parser):
 #            hyperdata["authors"] = ", ".join(
 #                                             [ p.get("person", {})
 #                                                .get("name"  , "")
-#                          
+#
 #                                               for p in doc.get("hasauthor", [])
 #                                             ]
 #                                            )
-#            
+#
                maybeDate = doc.get("submittedDate_s", None)
                if maybeDate is not None:
@@ -70,9 +70,9 @@ class HalParser(Parser):
                hyperdata["publication_year"]  = str(date.year)
                hyperdata["publication_month"] = str(date.month)
                hyperdata["publication_day"]   = str(date.day)
                hyperdata_list.append(hyperdata)
        return hyperdata_list
    def parse(self, filebuf):