Commit ac109d37 authored by Yannick Chudy's avatar Yannick Chudy

refactor to

parent 2ba7aea7
sudo apt update --assume-yes
sudo apt upgrade --assume-yes
sudo apt install gunicorn python-igraph python-pip nginx git xz-utils python-wheel python-future python-numpy python-scipy python-sklearn certbot --assume-yes
sudo adduser botapad --gecos "" --disabled-password
sudo su botapad -c "cd /home/botapad;
git clone https://github.com/padagraph/botapadd.git;
cd botapadd ;
pip install -r requirements-prod.txt;
pip install https://github.com/padagraph/botapi/archive/master.zip --no-deps"
sudo adduser foldr --gecos "" --disabled-password
sudo su foldr -c " cd /home/foldr;
wget https://nodejs.org/dist/v6.11.3/node-v6.11.3-linux-x64.tar.xz ;
tar -xf node-v6.11.3-linux-x64.tar.xz ;
export PATH=$PATH:/home/foldr/node-v6.11.3-linux-x64/bin ;
git clone https://github.com/padagraph/hackfoldr-2.0-forkme.git ;
cd hackfoldr-2.0-forkme ;
npm i;
./node_modules/gulp/bin/gulp.js build
"
sudo cp botapad.service /etc/systemd/system/
sudo systemctl daemon-reload
sudo systemctl enable botapad
sudo service botapad start
sudo cp nginx/* /etc/nginx/sites-enabled/
sudo service nginx stop
certbot certonly --standalone -d botapad.padagraph.io
sudo service nginx restart
This diff is collapsed.
This diff is collapsed.
import igraph
import datetime
import requests
from collections import Counter
from reliure.pipeline import Optionable, Composable
from botapi import BotaIgraph
from botapad import Botapad
from cello.graphs import pedigree
@Composable
def empty_graph(gid, headers, **kwargs):
bot = BotaIgraph(directed=True)
botapad = Botapad(bot , gid, "", delete=False, verbose=True, debug=False)
botapad.parse_csvrows( headers, separator='auto', debug=False)
graph = bot.get_igraph(weight_prop="weight")
graph = prepare_graph(graph)
graph['starred'] = []
graph['queries'] = []
graph['meta'] = {
'owner': None,
'date': None,
#'date' : datetime.datetime.now().strftime("%Y-%m-%d %Hh%M")
'node_count': graph.vcount(),
'edge_count': graph.ecount(),
'star_count': len( graph['starred'] ),
'stats' : {}
}
#graph['meta']['pedigree'] = pedigree.compute(graph)
return graph
@Composable
def calc2igraph(gid, url, description="", verbose=True, debug=False):
bot = BotaIgraph(directed=True)
botapad = Botapad(bot , gid, description, delete=False, verbose=verbose, debug=debug)
botapad.parse(url, separator='auto', debug=False)
graph = bot.get_igraph(weight_prop="weight")
return graph
@Composable
def merge(gid, graph, g, index=None, vid=None, **kwargs):
""" merge g into graph, returns graph """
if callable(index):
idx = index(gid, graph)
else : idx = index
if vid == None :
vid = lambda v : v.index
if None in (gid, graph, g, idx) :
raise ValueError('One of (gid, graph, g, index) for graph `%s` is none' % gid )
nodetypes = [ e['name'] for e in graph['nodetypes'] ]
for k in g['nodetypes']:
if k['name'] not in nodetypes:
graph['nodetypes'].append(k)
nodetypes = { e['uuid']: e for e in graph['nodetypes'] }
for v in g.vs:
_vid = vid(gid,v)
if _vid not in idx:
uuid = "%s" % graph.vcount()
attrs = v.attributes()
attrs['uuid'] = uuid
nodetype = nodetypes[attrs['nodetype']]
properties = nodetype['properties']
for k in properties:
if k not in attrs['properties']:
attrs['properties'][k] = properties[k]['default']
graph.add_vertex( **attrs )
idx[ _vid ] = graph.vs[graph.vcount()-1]
edgetypes = [ e['name'] for e in graph['edgetypes'] ]
for k in g['edgetypes']:
if k['name'] not in edgetypes:
graph['edgetypes'].append(k)
edgetypes = { e['uuid']: e for e in graph['edgetypes'] }
for e in g.es:
v1, v2 = (vid(gid, g.vs[e.source] ), vid(gid, g.vs[e.target]) )
#if v1 in idx
v1, v2 = ( idx[v1], idx[v2] )
eid = graph.get_eid( v1, v2 , directed=True, error=False )
if eid == -1:
e['uuid'] = graph.ecount()
attrs = e.attributes()
edgetype = edgetypes[attrs['edgetype']]
properties = edgetype['properties']
for k in properties:
if k not in attrs['properties']:
attrs['properties'][k] = properties[k]['default']
graph.add_edge( v1, v2, **attrs )
graph['queries'].append(g['query'])
graph['meta'] = {
'node_count': graph.vcount(),
'edge_count': graph.ecount(),
'star_count': len( graph['starred'] ),
'owner': None,
'date': None,
#'date' : datetime.datetime.now().strftime("%Y-%m-%d %Hh%M")
}
graph['meta']['pedigree'] = pedigree.compute(graph)
graph = graph_stats(graph)
return graph
@Composable
def compute_pedigree(graph, **kwargs):
graph['meta']['pedigree'] = pedigree.compute(graph)
return graph
@Composable
def graph_stats(graph, **kwargs):
def _types_stats( items , opt={}):
counter = dict(Counter(items))
return counter
graph['meta']['stats'] = {}
stats = _types_stats(graph.vs['nodetype'])
for e in graph['nodetypes']:
e['count'] = stats.get(e['uuid'], 0)
graph['meta']['stats']['nodetypes'] = stats
stats = _types_stats(graph.es['edgetype'])
for e in graph['edgetypes']:
e['count'] = stats.get(e['uuid'], 0)
graph['meta']['stats']['edgetypes'] = stats
return graph
@Composable
def prepare_graph(graph):
if not 'meta' in graph.attributes():
graph['meta'] = { 'edge_count':0,'node_count':0, }
if 'nodetype' not in graph.vs.attribute_names():
graph.vs['nodetype'] = [ "T" for e in graph.vs ]
if 'uuid' not in graph.vs.attribute_names():
graph.vs['uuid'] = range(len(graph.vs))
if 'properties' not in graph.vs.attribute_names():
props = [ { } for i in range(len(graph.vs))]
attrs = graph.vs.attribute_names()
for p,v in zip(props, graph.vs):
for e in attrs:
if e not in ['nodetype', 'uuid', 'properties' ] :
p[e] = v[e]
if 'label' not in attrs:
p['label'] = v.index
graph.vs['properties'] = props
if 'edgetype' not in graph.es.attribute_names():
graph.es['edgetype'] = [ "T" for e in graph.es ]
if 'uuid' not in graph.es.attribute_names():
graph.es['uuid'] = range(len(graph.es))
if 'properties' not in graph.es.attribute_names():
props = [ { } for i in range(len(graph.es))]
attrs = graph.es.attribute_names()
for p,v in zip(props, graph.es):
for e in attrs:
if e not in ['edgetype', 'uuid', 'properties' ] :
p[e] = v[e]
if 'label' not in attrs:
p['label'] = v.index
graph.es['properties'] = props
if 'weight' not in graph.es.attribute_names():
graph.es['weight'] = [1. for e in graph.es ]
return graph
def igraph2dict(graph, exclude_gattrs=[], exclude_vattrs=[], exclude_eattrs=[], id_attribute=None):
""" Transform a graph (igraph graph) to a dictionary
to send it to template (or json)
:param graph: the graph to transform
:type graph: :class:`igraph.Graph`
:param exclude_gattrs: graph attributes to exclude (TODO)
:param exclude_vattrs: vertex attributes to exclude (TODO)
:param exclude_eattrs: edges attributes to exclude (TODO)
"""
# some check
assert isinstance(graph, igraph.Graph)
#if 'id' in graph.vs.attributes():
#raise Warning("The graph already have a vertex attribute 'id'")
# create the graph dict
attrs = { k : graph[k] for k in graph.attributes()}
d = {}
d['vs'] = []
d['es'] = []
# attributs of the graph
if 'nodetypes' in attrs :
d['nodetypes'] = attrs.pop('nodetypes')
if 'edgetypes' in attrs :
d['edgetypes'] = attrs.pop('edgetypes')
if 'properties' in attrs:
d['properties'] = attrs.pop('properties', {})
if 'meta' in attrs:
d['meta'] = attrs.pop('meta', {})
d['meta'].update( {
'directed' : graph.is_directed(),
'bipartite' : 'type' in graph.vs and graph.is_bipartite(),
'e_attrs' : sorted(graph.es.attribute_names()),
'v_attrs' : sorted( [ attr for attr in graph.vs.attribute_names() if not attr.startswith('_')])
})
# vertices
v_idx = { }
for vid, vtx in enumerate(graph.vs):
vertex = vtx.attributes()
if id_attribute is not None:
v_idx[vid] = vertex[id_attribute]
else:
v_idx[vid] = vid
vertex["id"] = vid
d['vs'].append(vertex)
# edges
_getvid = lambda vtxid : v_idx[vtxid] if id_attribute else vtxid
for edg in graph.es:
edge = edg.attributes() # recopie tous les attributs
edge["source"] = v_idx[edg.source] # match with 'id' vertex attributs
edge["target"] = v_idx[edg.target]
#TODO check il n'y a pas de 's' 't' dans attr
d['es'].append(edge)
return d
@Composable
def export_graph(graph, exclude_gattrs=[], exclude_vattrs=[], exclude_eattrs=[], id_attribute=None):
return igraph2dict(graph, exclude_gattrs, exclude_vattrs, exclude_eattrs, id_attribute)
\ No newline at end of file
......@@ -22,15 +22,15 @@ from cello.graphs.prox import ProxSubgraph, ProxExtract, pure_prox, sortcut
from cello.layout import export_layout
from cello.clustering import export_clustering
from pdgapi.explor import ComplexQuery, AdditiveNodes, NodeExpandQuery, export_graph, layout_api, clustering_api
from pdgapi.explor import ComplexQuery, AdditiveNodes, NodeExpandQuery, layout_api, clustering_api
from botapad.utils import export_graph
def db_graph(graphdb, query ):
gid = query['graph']
graph = graphdb.get_graph(gid)
return graph
def pad2pdg(gid, url, host, key, delete, debug=False):
description = "imported from %s" % url
bot = Botagraph()
......@@ -43,36 +43,7 @@ def pad2igraph(gid, url, format="csv"):
graph['meta']['owner'] = None
graph['meta']['date'] = datetime.datetime.now().strftime("%Y-%m-%d %Hh%M")
return graph
def types_stats( items , opt={}):
counter = Counter(items)
return dict(counter)
print counter
@Composable
def graph_stats(graph, **kwargs):
graph['meta']['stats'] = {}
stats = types_stats(graph.vs['nodetype'])
print stats
for e in graph['nodetypes']:
e['count'] = stats.get(e['uuid'], 0)
graph['meta']['stats']['nodetypes'] = stats
stats = types_stats(graph.es['edgetype'])
for e in graph['edgetypes']:
e['count'] = stats.get(e['uuid'], 0)
graph['meta']['stats']['edgetypes'] = stats
return graph
from cello.graphs import pedigree
@Composable
def compute_pedigree(graph, **kwargs):
graph['meta']['pedigree'] = pedigree.compute(graph)
return graph
from botapad import Botapad, BotapadError, BotapadParseError, BotapadURLError, BotapadCsvError
from botapi import BotApiError, Botagraph, BotaIgraph, BotLoginError
......
......@@ -16,9 +16,10 @@ from functools import wraps
from flask import Flask, Response, make_response, g, current_app, request
from flask import render_template, render_template_string, abort, redirect, url_for, jsonify
from botapadapi import pad2igraph, pad2pdg
from botapi import BotApiError, BotLoginError
from botapad import Botapad, BotapadError, BotapadParseError, BotapadURLError, BotapadCsvError
from botapadapi import pad2igraph, pad2pdg, compute_pedigree, graph_stats
from botapad.utils import export_graph, prepare_graph, compute_pedigree, graph_stats
from cello.graphs import IN, OUT, ALL
from cello.graphs.prox import ProxSubgraph
......@@ -102,7 +103,7 @@ import igraph
from igraph.utils import named_temporary_file
import cPickle as pickle
import StringIO
from pdgapi.explor import export_graph, prepare_graph, igraph2dict, EdgeList
from pdgapi.explor import EdgeList
from pdglib.graphdb_ig import IGraphDB, engines
......
import sys
import argparse
from botapi import Botagraph, BotApiError
from reliure.types import Text
from collections import namedtuple
import codecs
import requests
import re
import csv
from botapad import *
#. Assumes that the vertice data are separated from the links,
# that the graph is undirected,
# and that the links are ordered with the same 2 types always at the same position within an edgetype
#(e.g. person -- infraction for all the links or infraction -- person for all the links of an edgetype)
class Histograph(object):
def __init__(self, links_url):
""" Function doc
:param :
"""
self.vertices = {}
self.edges = {}
self.urls = {}
self.vtype = {}
self.evtype = {}
self.histodata = {}
self.distribdata = {}
self.parse(links_url)
def read(self, path, separator='auto'):
if path[0:4] == 'http':
try :
url = convert_url(path)
log( " * Downloading %s \n" % url)
content = requests.get(url).text
lines = content.split('\n')
except :
raise BotapadURLError("Can't download %s" % url, url)
else:
log( " * Opening %s \n" % path)
try :
with codecs.open(path, 'r', encoding='utf8' ) as fin:
lines = [ line for line in fin]
except :
raise BotapadError("Can't read file %s" % path)
lines = [ line.strip() for line in lines ]
lines = [ line.encode('utf8') for line in lines if len(line)]
if separator == u'auto':
line = lines[0].strip()
if line in ( '!;','!,'):
separator = line[1:]
else: separator = ','
log(" * Reading %s (%s) lines with delimiter '%s'" % (path, len(lines), separator))
try :
reader = csv.reader(lines, delimiter=separator)
rows = [ r for r in reader]
rows = [ [ e.strip().decode('utf8') for e in r ] for r in rows if len(r) and not all([ len(e) == 0 for e in r]) ]
except :
raise BotapadCsvError(path, separator, "Error while parsing data %s lines with separator %s" % (len(lines), separator ) )
return rows
def store(self,current,rows,path):
if current[0]==0:
rows = [x[0].split(' -- ') for x in rows]
self.edges[current[1]] = [[x[0].strip(),x[1].strip()] for x in rows]
else:
self.vertices[current[1]] = dict([[x[0].strip(),x[1].strip()] for x in rows])
for x in rows:
self.vtype[x[0].strip()]=current[1]
self.urls[current[1]]=path
def parse(self, path):
""" :param path : txt file path
handles special lines starting with [# @ _]
for comments, node type, property names
"""
csv = self.read(path)
rows = []
current = () # (VERTEX | EDGE, label, names, index_prop)
for row in csv:
cell = row[0]
# ! comment
if cell[:1] == "!":
continue
# IMPORT external ressource
if cell[:1] == "&":
url = cell[1:].strip()
self.parse(url)
# @ Nodetypes, _ Edgetypes
elif cell[:1] in ("@", "_"):
if len(current)>0:
self.store(current,rows,path)
# processing directiv
line = ";".join(row)
cols = re.sub(' ', '', line[1:]) # no space
# @Politic: %Chamber; #First Name; #Last Name;%Party;%State;%Stance;Statement;
cols = [e for e in re.split("[:;,]" , "%s" % cols, flags=re.UNICODE) if len(e)]
label = cols[0] # @Something
# ( name, type indexed, projection )
props = [ Prop( norm_key(e), Text(multi="+" in e), "@" in e, "#" in e, "+" in e, "%" in e, "+" in e and "=" in e ) for e in cols[1:]]
if cell[:1] == "@": # nodetype def
rows = []
current = (VERTEX, label, props)
elif cell[:1] == "_": # edgetype def
rows = []
current = (EDGE, label, props)
else: # table data
if current and current[2]:
for i, v in enumerate(row):
if i >= len(props): break
if props[i].ismulti :
row[i] = [ e.strip() for e in re.split("[_,;]", v.strip(), ) ]
rows.append(row)
self.store(current,rows,path)
def EdgesToVertices(self):
for x in self.edges:
self.evtype[x]={}
for edge in self.edges[x]:
pair =[self.vtype[edge[0]],self.vtype[edge[1]]]
pair.sort()
self.evtype[x][tuple(pair)] =self.evtype[x].get(tuple(pair), 0) + 1
def show(self):
print 'Vertices:'
for x in self.vertices.keys():
print x,'\t', self.urls[x]
print '\nEdges:'
self.EdgesToVertices()
for x in self.edges.keys():
print x,'\t', self.urls[x],'\t',self.evtype[x]
def checkOrder(self,type1,type2,links):
if self.vtype[self.edges[links][0][0]]==type1 and self.vtype[self.edges[links][0][1]]==type2:
self.order = [0,1]
elif self.vtype[self.edges[links][0][1]]==type1 and self.vtype[self.edges[links][0][0]]==type2:
self.order = [1,0]
else:
print 'oups, vertices and edges do not correspond'
sys.exit()
def histo(self,type1,type2,links):
# writes a framapad padagraph format file
fname = links+'_histograph.txt'
s = open(fname,'w')
# write the imports of data file and set the separator
s.write('!;\n\n& '+self.urls[type2]+'\n\n')
# Count the variable distribution
self.checkOrder(type1,type2,links)
counts = {}
for x in self.edges[links]:
counts[x[self.order[1]]]= counts.get(x[self.order[1]],0) + 1
for x in counts:
counts[x]=[counts[x],round(counts[x]*100/float(len(self.edges[links])),2)]
# Set the scale
percents = [x[1] for x in counts.values()]
rangep = max(percents) - min(percents)
step = round(rangep/10.0,0)
print step
pnode = int(min(percents))
pnodes = {}
p = []
# Create a scale
while pnode<max(percents):
pnodes[(pnode,pnode+step)]=[]
p.append(pnode)
pnode+=step
# Put eache node on the scale
for x in counts:
p.append(counts[x][1])
p.sort()
i = p[p.index(counts[x][1])-1]
pnodes[(i,i+step)].append(x)
p.remove(counts[x][1])
# Write the list of the scale nodes
s.write('\n\n@ Percent: #label, shape\n\n')
sci = {}
for x in pnodes:
if len(pnodes[x])>0:
idnode = str(x[0])+'_to_'+str(x[1])
s.write(idnode+'; circle\n')
sci[x[0]]=idnode
# Makes links between 'percent' nodes to create a visual scale
s.write('\n\n_ Scale\n\n')
sci = sci.values()
sci.sort(key = lambda w:w[0])
for n in range(len(sci[:-1])):
s.write(sci[n]+' -- '+sci[n+1]+'\n')
# Write the data links of the histograph
s.write('\n\n_ Distribution, percentage\n\n')
for x in pnodes:
for y in pnodes[x]:
s.write(str(x[0])+'_to_'+str(x[1])+' -- '+y+'; '+str(counts[y][0])+' items and '+str(counts[y][1])+' pct\n')
s.close()
print 'The file '+fname+' is ready to be imported in framadap!'
#def distrib(self,type1,type2,links):
# s1 = open(links+'_distrib_'+type1+'_to_'+type2+'.txt','w')
#s2to1 = open(links+'_distrib_'+type2+'_to_'+type1+'.txt','w')
# s1to2.write('!;\n\n& '+self.urls[type1]+'\n& '+self.urls[type2]+'\n')
#s2to1.write('!;\n\n& '+self.urls[type1]+'\n& '+self.urls[type2]+'\n\n')
# LinkType = '_ Distribution\n\n'
# self.checkOrder(type1,type2,links)
# s1.close()
#s2tos1.close()
if __name__ == '__main__':
h = Histograph(sys.argv[1])
h.show()
h.histo('Personne','Infraction','PersonInfraction')
\ No newline at end of file
#gunicorn
redis
flask
flask-login
flask-runner
Flask-Markdown
flask_cors
......
......@@ -4,6 +4,7 @@ from setuptools import setup, find_packages
"""
0.7 moved __init__ botapad.py
0.6.1 log formating
visualisation url parameters
0.6 added edge direction with reification > or < where % === %<
......@@ -20,7 +21,7 @@ required = []
setup(
name='botapad',
version='0.6.1',
version='0.6.9',
description='padagraph pad parser',
author='ynnk, a-tsioh',
author_email='contact@padagraph.io',
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment