Commit a3195d15 authored by Romain Loth's avatar Romain Loth

port from python2 to python3 and clean up

parent 24a897c7
Flask #### Structure
simplejson - `main.py`: exposes the web app and routes to the correct functions according to request's GET parameters
gunicorn - `extractDataCustom.py`: starting from a query or a scholar's id, retrieves all related kw+neighbors data from DB, and builds a graph's json with the metadata and custom html in node's `content` property
networkx - `converter.py`: normalizes country names and codes
#### Requirements
- flask
- networkx
#### History
Several modules related to graph community region extraction and graph force atlas layout have been moved to `graph_manipulation_unused` dir, because they are replaced by a client-side FA2 in tinawebJS.
Extraction logic originally developed by [S. Castillo](https://github.com/PkSM3/)
python3 port by [R. Loth](https://github.com/rloth/)
Copyright 2014-2016 ISCPIF/CNRS - UPS 3611
import math
class Region:
def __init__(self,nodes, depth):
#print "the atributes"
self.depthLimit = 20
self.size = 0
self.nodes = nodes
self.subregions = []
self.depth = depth
self.p = { "mass": 0, "massCenterX": 0, "massCenterY": 0 }
self.updateMassAndGeometry()
def updateMassAndGeometry(self):
#print "updating mass and geometry"
nds = self.nodes
if len(nds) > 1:
mass=0
massSumX=0
massSumY=0
for n in range(len(nds)):
mass += nds[n]['fa2']['mass']
massSumX += nds[n]['x'] * nds[n]['fa2']['mass']
massSumY += nds[n]['x'] * nds[n]['fa2']['mass']
massCenterX = massSumX / mass
massCenterY = massSumY / mass
size=0
for n in range(len(nds)):
distance = math.sqrt( (nds[n]['x'] - massCenterX) *(nds[n]['x'] - massCenterX) +(nds[n]['y'] - massCenterY) *(nds[n]['y'] - massCenterY) )
size = max((self.size or (2 * distance)), 2 * distance)
self.p['mass'] = mass;
self.p['massCenterX'] = massCenterX;nds
self.p['massCenterY'] = massCenterY;
self.size = size;
def buildSubRegions(self):
#print "buildSubRegions"
nds = self.nodes
if len(nds) > 1:
subregions = []
massCenterX = self.p['massCenterX']
massCenterY = self.p['massCenterY']
nextDepth = self.depth + 1
leftNodes = []
rightNodes = []
for n in range(len(nds)):
#nodesColumn = (nds[n]['x'] < massCenterX) ? (leftNodes) : (rightNodes);
if (nds[n]['x'] < massCenterX): nodesColumn= leftNodes
else: nodesColumn = rightNodes
nodesColumn.append(nds[n])
topleftNodes = []
bottomleftNodes = []
for n in range(len(nds)):
#nodesLine = (n.y() < massCenterY) ? (topleftNodes) : (bottomleftNodes);
if nds[n]['y'] < massCenterY: nodesLine = topleftNodes
else: nodesLine = bottomleftNodes
nodesLine.append(nds[n])
bottomrightNodes = []
toprightNodes = []
for n in range(len(nds)):
#nodesLine = (n.y() < massCenterY) ? (toprightNodes) : (bottomrightNodes);
if nds[n]['y'] < massCenterY: nodesLine = toprightNodes
else: nodesLine = bottomrightNodes
nodesLine.append(nds[n])
if (len(topleftNodes) > 0):
if (len(topleftNodes) < len(nds) and nextDepth <= self.depthLimit):
subregion = Region(topleftNodes,nextDepth)
subregions.append(subregion)
else:
for n in range(len(topleftNodes)):
oneNodeList = []
oneNodeList.append(topleftNodes[n])
subregion = Region(oneNodeList,nextDepth)
subregions.append(subregion)
if (len(bottomleftNodes) > 0):
if (len(bottomleftNodes) < len(nds) and nextDepth <= self.depthLimit):
subregion = Region(bottomleftNodes,nextDepth)
subregions.append(subregion)
else:
for n in range(len(bottomleftNodes)):
oneNodeList = []
oneNodeList.append(bottomleftNodes[n])
subregion = Region(oneNodeList,nextDepth)
subregions.append(subregion)
if (len(bottomrightNodes) > 0):
if (len(bottomrightNodes) < len(nds) and nextDepth <= self.depthLimit):
subregion = Region(bottomrightNodes,nextDepth)
subregions.append(subregion)
else:
for n in range(len(bottomrightNodes)):
oneNodeList = []
oneNodeList.append(bottomrightNodes[n])
subregion = Region(oneNodeList,nextDepth)
subregions.append(subregion)
if (len(toprightNodes) > 0):
if (len(toprightNodes) < len(nds) and nextDepth <= self.depthLimit):
subregion = Region(toprightNodes,nextDepth)
subregions.append(subregion)
else:
for n in range(len(toprightNodes)):
oneNodeList = []
oneNodeList.append(toprightNodes[n])
subregion = Region(oneNodeList,nextDepth)
subregions.append(subregion)
self.subregions = subregions
for i in range(len(subregions)):
subregions[i].buildSubRegions()
def applyForce(self, n , Force , theta):
if len(self.nodes) < 2:
regionNode = self.nodes[0]
Force.apply_nn(n, regionNode)
else:
distance = math.sqrt((n["x"] - self.p["massCenterX"]) * (n["x"] - self.p["massCenterX"]) + (n["y"] - self.p["massCenterY"]) * (n["y"] - self.p["massCenterY"]))
if (distance * theta > self.size):
Force.apply_nr(n, self)
else:
for i in range(len(self.subregions)):
self.subregions[i].applyForce(n, Force, theta)
# -*- encoding: utf-8 -*- from sqlite3 import connect, Row
import pprint as p
import sqlite3
class CountryConverter: class CountryConverter:
def __init__(self,dbname,dbtable,dbcolumnID,dbcolumnName): def __init__(self,dbname,dbtable,dbcolumnID,dbcolumnName):
self.connDBLP=sqlite3.connect(dbname) self.connDBLP=connect(dbname)
self.connDBLP.row_factory = sqlite3.Row# Magic line! self.connDBLP.row_factory = Row# Magic line!
self.cursorDBLP=self.connDBLP.cursor() self.cursorDBLP=self.connDBLP.cursor()
self.dbname=dbname self.dbname=dbname
self.dbtable=dbtable self.dbtable=dbtable
self.dbcolumnID=dbcolumnID self.dbcolumnID=dbcolumnID
self.dbcolumnName=dbcolumnName self.dbcolumnName=dbcolumnName
self.ISO={} self.ISO={}
self.dictISO={} self.dictISO={}
self.dictAlt={} self.dictAlt={}
def searchCode(self,c):
C=c.upper()
if C in self.ISO:
return C
if C in self.dictISO:
return self.dictISO[C]
if C in self.dictAlt:
return self.dictAlt[C]
def searchCode(self,c):
C=c.upper()
if self.ISO.has_key(C):
return C
if self.dictISO.has_key(C):
return self.dictISO[C]
if self.dictAlt.has_key(C):
return self.dictAlt[C]
def getCountries(self,filename):
dadict={}
f = open(filename, 'r')
for d in f:
line=d.replace("\n","")
a=line.split("\t")
arr=[]
for j in range(1,len(a)):
if a[j] is not "":
arr.append(a[j])
code=a[0]
name=[]
if len(arr)==1:
name.append(a[1])
if len(arr)>1:
name=arr
dadict[code]=name
f.close()
return dadict
def getCountries(self,filename): def createInvertedDicts(self,ISO,Alternatives):
dadict={} self.ISO=ISO
f = open(filename, 'r') for i in ISO:
for d in f: self.dictISO[ISO[i][0].upper()]=i
line=d.replace("\n","")
a=line.split("\t")
arr=[]
for j in range(1,len(a)):
if a[j] is not "":
arr.append(a[j])
code=a[0]
name=[]
if len(arr)==1:
name.append(a[1])
if len(arr)>1:
name=arr
dadict[code]=name
f.close()
return dadict
def createInvertedDicts(self,ISO,Alternatives): for i in Alternatives:
self.ISO=ISO a=Alternatives[i]
for i in ISO: if len(a)>0:
self.dictISO[ISO[i][0].upper()]=i for j in a:
self.dictAlt[j.upper()]=i
for i in Alternatives:
a=Alternatives[i]
if len(a)>0:
for j in a:
self.dictAlt[j.upper()]=i
def convertAll(self,write):
dbtable=self.dbtable
dbcolumnName=self.dbcolumnName
dbcolumnID=self.dbcolumnID
def convertAll(self,write): if write:
dbtable=self.dbtable query="ALTER TABLE "+dbtable+" ADD COLUMN norm_"+dbcolumnName+" char(250)"
dbcolumnName=self.dbcolumnName self.cursorDBLP.execute(query)
dbcolumnID=self.dbcolumnID self.connDBLP.commit()
if write:
query="ALTER TABLE "+dbtable+" ADD COLUMN norm_"+dbcolumnName+" char(250)"
self.cursorDBLP.execute(query)
self.connDBLP.commit()
query="SELECT "+dbcolumnID+","+dbcolumnName+" FROM "+dbtable
self.cursorDBLP.execute(query)
rows = self.cursorDBLP.fetchall()
total=len(rows)
query="SELECT "+dbcolumnID+","+dbcolumnName+" FROM "+dbtable fails={}
self.cursorDBLP.execute(query) for i in rows:
rows = self.cursorDBLP.fetchall() # if write:
total=len(rows) # q2='UPDATE '+dbtable+' SET norm_'+dbcolumnName+'="'+i[dbcolumnName]+'" WHERE '+dbcolumnID+'='+str(i[dbcolumnID])
# self.cursorDBLP.execute(q2)
# self.connDBLP.commit()
fails={} ind=i[dbcolumnName].encode("UTF-8")
for i in rows: code=self.searchCode(ind)
# if write: if code:
# q2='UPDATE '+dbtable+' SET norm_'+dbcolumnName+'="'+i[dbcolumnName]+'" WHERE '+dbcolumnID+'='+`i[dbcolumnID]` if write:
# self.cursorDBLP.execute(q2) q3='UPDATE '+dbtable+' SET norm_'+dbcolumnName+'="'+code+'" WHERE '+dbcolumnID+'='+str(i[dbcolumnID])
# self.connDBLP.commit() self.cursorDBLP.execute(q3)
self.connDBLP.commit()
ind=i[dbcolumnName].encode("UTF-8") else: fails[i[dbcolumnID]]=ind
code=self.searchCode(ind) print(str(i[dbcolumnID])+" / "+str(total))
if code:
if write:
q3='UPDATE '+dbtable+' SET norm_'+dbcolumnName+'="'+code+'" WHERE '+dbcolumnID+'='+`i[dbcolumnID]`
self.cursorDBLP.execute(q3)
self.connDBLP.commit()
else: fails[i[dbcolumnID]]=ind
print `i[dbcolumnID]`+" / "+`total`
self.connDBLP.close() self.connDBLP.close()
return fails return fails
This diff is collapsed.
from math import sqrt
class Region:
def __init__(self,nodes, depth):
#print("the atributes")
self.depthLimit = 20
self.size = 0
self.nodes = nodes
self.subregions = []
self.depth = depth
self.p = { "mass": 0, "massCenterX": 0, "massCenterY": 0 }
self.updateMassAndGeometry()
def updateMassAndGeometry(self):
#print("updating mass and geometry")
nds = self.nodes
if len(nds) > 1:
mass=0
massSumX=0
massSumY=0
for n in range(len(nds)):
mass += nds[n]['fa2']['mass']
massSumX += nds[n]['x'] * nds[n]['fa2']['mass']
massSumY += nds[n]['x'] * nds[n]['fa2']['mass']
massCenterX = massSumX / mass
massCenterY = massSumY / mass
size=0
for n in range(len(nds)):
distance = sqrt( (nds[n]['x'] - massCenterX) *(nds[n]['x'] - massCenterX) +(nds[n]['y'] - massCenterY) *(nds[n]['y'] - massCenterY) )
size = max((self.size or (2 * distance)), 2 * distance)
self.p['mass'] = mass;
self.p['massCenterX'] = massCenterX;nds
self.p['massCenterY'] = massCenterY;
self.size = size;
def buildSubRegions(self):
#print("buildSubRegions")
nds = self.nodes
if len(nds) > 1:
subregions = []
massCenterX = self.p['massCenterX']
massCenterY = self.p['massCenterY']
nextDepth = self.depth + 1
leftNodes = []
rightNodes = []
for n in range(len(nds)):
#nodesColumn = (nds[n]['x'] < massCenterX) ? (leftNodes) : (rightNodes);
if (nds[n]['x'] < massCenterX): nodesColumn= leftNodes
else: nodesColumn = rightNodes
nodesColumn.append(nds[n])
topleftNodes = []
bottomleftNodes = []
for n in range(len(nds)):
#nodesLine = (n.y() < massCenterY) ? (topleftNodes) : (bottomleftNodes);
if nds[n]['y'] < massCenterY: nodesLine = topleftNodes
else: nodesLine = bottomleftNodes
nodesLine.append(nds[n])
bottomrightNodes = []
toprightNodes = []
for n in range(len(nds)):
#nodesLine = (n.y() < massCenterY) ? (toprightNodes) : (bottomrightNodes);
if nds[n]['y'] < massCenterY: nodesLine = toprightNodes
else: nodesLine = bottomrightNodes
nodesLine.append(nds[n])
if (len(topleftNodes) > 0):
if (len(topleftNodes) < len(nds) and nextDepth <= self.depthLimit):
subregion = Region(topleftNodes,nextDepth)
subregions.append(subregion)
else:
for n in range(len(topleftNodes)):
oneNodeList = []
oneNodeList.append(topleftNodes[n])
subregion = Region(oneNodeList,nextDepth)
subregions.append(subregion)
if (len(bottomleftNodes) > 0):
if (len(bottomleftNodes) < len(nds) and nextDepth <= self.depthLimit):
subregion = Region(bottomleftNodes,nextDepth)
subregions.append(subregion)
else:
for n in range(len(bottomleftNodes)):
oneNodeList = []
oneNodeList.append(bottomleftNodes[n])
subregion = Region(oneNodeList,nextDepth)
subregions.append(subregion)
if (len(bottomrightNodes) > 0):
if (len(bottomrightNodes) < len(nds) and nextDepth <= self.depthLimit):
subregion = Region(bottomrightNodes,nextDepth)
subregions.append(subregion)
else:
for n in range(len(bottomrightNodes)):
oneNodeList = []
oneNodeList.append(bottomrightNodes[n])
subregion = Region(oneNodeList,nextDepth)
subregions.append(subregion)
if (len(toprightNodes) > 0):
if (len(toprightNodes) < len(nds) and nextDepth <= self.depthLimit):
subregion = Region(toprightNodes,nextDepth)
subregions.append(subregion)
else:
for n in range(len(toprightNodes)):
oneNodeList = []
oneNodeList.append(toprightNodes[n])
subregion = Region(oneNodeList,nextDepth)
subregions.append(subregion)
self.subregions = subregions
for i in range(len(subregions)):
subregions[i].buildSubRegions()
def applyForce(self, n , Force , theta):
if len(self.nodes) < 2:
regionNode = self.nodes[0]
Force.apply_nn(n, regionNode)
else:
distance = sqrt((n["x"] - self.p["massCenterX"]) * (n["x"] - self.p["massCenterX"]) + (n["y"] - self.p["massCenterY"]) * (n["y"] - self.p["massCenterY"]))
if (distance * theta > self.size):
Force.apply_nr(n, self)
else:
for i in range(len(self.subregions)):
self.subregions[i].applyForce(n, Force, theta)
# -*- coding: utf-8 -*- """
comex helper backend to create json graphs from sqlite3 db
# from FA2 import ForceAtlas2 TODO integrate with new regcomex server
"""
from extractDataCustom import extract as SQLite from extractDataCustom import extract as SQLite
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
from flask import Flask from flask import Flask
from flask import request from flask import request
import simplejson as json from json import dumps
app = Flask(__name__) app = Flask(__name__)
# @app.route("/getJSON") # route renamed # @app.route("/getJSON") # route renamed
@app.route("/comexAPI") @app.route("/comexAPI")
def main(): def main():
db=SQLite('../community.db') db=SQLite('../community.db')
if request.args.has_key("query"):
filteredquery = request.args['query']
scholars = db.getScholarsList("filter",filteredquery)
else:
unique_id = request.args['unique_id']
scholars = db.getScholarsList("unique_id",unique_id)
if scholars and len(scholars):
db.extract(scholars)
# < / Data Extraction > #
graphArray = db.buildJSON_sansfa2(db.Graph) print(request.args)
return json.dumps(graphArray) print('unique_id' in request.args)
print()
if 'query' in request.args:
filteredquery = request.args['query']
scholars = db.getScholarsList("filter",filteredquery)
else:
unique_id = request.args['unique_id']
scholars = db.getScholarsList("unique_id",unique_id)
if scholars and len(scholars):
db.extract(scholars)
# < / Data Extraction > #
graphArray = db.buildJSON_sansfa2(db.Graph)
return dumps(graphArray)
if __name__ == "__main__": if __name__ == "__main__":
app.run(host="0.0.0.0", port=8484) app.run(host="0.0.0.0", port=8484)
class ReverseProxied(object):
def __init__(self, app):
self.app = app
def __call__(self, environ, start_response):
script_name = environ.get('HTTP_X_SCRIPT_NAME', '')
if script_name:
environ['SCRIPT_NAME'] = script_name
path_info = environ['PATH_INFO']
if path_info.startswith(script_name):
environ['PATH_INFO'] = path_info[len(script_name):]
scheme = environ.get('HTTP_X_SCHEME', '')
if scheme:
environ['wsgi.url_scheme'] = scheme
return self.app(environ, start_response)
class ReverseProxied(object):
def __init__(self, app):
self.app = app
def __call__(self, environ, start_response):
script_name = environ.get('HTTP_X_SCRIPT_NAME', '')
if script_name:
environ['SCRIPT_NAME'] = script_name
path_info = environ['PATH_INFO']
if path_info.startswith(script_name):
environ['PATH_INFO'] = path_info[len(script_name):]
scheme = environ.get('HTTP_X_SCHEME', '')
if scheme:
environ['wsgi.url_scheme'] = scheme
return self.app(environ, start_response)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment