refactor to

ac109d37 · Yannick Chudy · 2ba7aea7 · ac109d37 · ac109d37 · ac109d37
Commit ac109d37 authored Apr 03, 2018 by Yannick Chudy
9 changed files
--- a/bin/install.sh
+++ b/bin/install.sh
+
+sudo apt update --assume-yes
+sudo apt upgrade --assume-yes
+sudo apt install gunicorn python-igraph python-pip nginx git xz-utils python-wheel python-future python-numpy python-scipy python-sklearn certbot --assume-yes
+
+sudo adduser botapad --gecos "" --disabled-password 
+
+sudo su botapad -c "cd /home/botapad;
+git clone https://github.com/padagraph/botapadd.git;
+
+cd botapadd ;
+pip install -r requirements-prod.txt;
+pip install https://github.com/padagraph/botapi/archive/master.zip --no-deps"
+
+sudo adduser foldr --gecos "" --disabled-password
+
+sudo su foldr -c " cd /home/foldr;
+wget https://nodejs.org/dist/v6.11.3/node-v6.11.3-linux-x64.tar.xz ;
+tar -xf node-v6.11.3-linux-x64.tar.xz ;
+export PATH=$PATH:/home/foldr/node-v6.11.3-linux-x64/bin ;
+git clone https://github.com/padagraph/hackfoldr-2.0-forkme.git ;
+cd hackfoldr-2.0-forkme ;
+npm i;
+./node_modules/gulp/bin/gulp.js build
+"
+
+sudo cp botapad.service /etc/systemd/system/
+sudo systemctl daemon-reload
+sudo systemctl enable botapad
+sudo service botapad start
+
+sudo cp nginx/* /etc/nginx/sites-enabled/
+sudo service nginx stop
+certbot certonly --standalone -d botapad.padagraph.io
+sudo service nginx restart
--- a/botapad/__init__.py
+++ b/botapad/__init__.py
--- a/botapad/botapad.py
+++ b/botapad/botapad.py
--- a/botapad/utils.py
+++ b/botapad/utils.py
+
+
+import igraph
+import datetime
+import requests
+from collections import Counter
+
+from reliure.pipeline import Optionable, Composable
+from botapi import BotaIgraph
+from botapad import Botapad
+
+from cello.graphs import pedigree
+
+
+
+
+@Composable
+def empty_graph(gid, headers, **kwargs):
+
+    bot = BotaIgraph(directed=True)
+    botapad = Botapad(bot , gid, "", delete=False, verbose=True, debug=False)
+    botapad.parse_csvrows( headers, separator='auto', debug=False)
+
+    graph = bot.get_igraph(weight_prop="weight")
+    graph = prepare_graph(graph)
+    graph['starred'] = []
+    graph['queries'] = []
+    graph['meta'] = {  
+            'owner': None,
+            'date': None,
+            #'date' : datetime.datetime.now().strftime("%Y-%m-%d %Hh%M")
+            'node_count': graph.vcount(),
+            'edge_count': graph.ecount(),
+            'star_count': len( graph['starred'] ),
+            'stats' : {}
+        }
+    #graph['meta']['pedigree'] = pedigree.compute(graph)
+
+    return graph
+
+
+@Composable
+def calc2igraph(gid, url, description="", verbose=True, debug=False):
+    bot = BotaIgraph(directed=True)
+    botapad = Botapad(bot , gid, description, delete=False, verbose=verbose, debug=debug)
+    botapad.parse(url, separator='auto', debug=False)
+    graph = bot.get_igraph(weight_prop="weight")
+    return graph
+
+@Composable
+def merge(gid, graph, g, index=None, vid=None, **kwargs):
+    """ merge g into graph, returns graph """
+    if callable(index):
+        idx = index(gid, graph)
+    else : idx = index
+
+    if vid == None :
+        vid = lambda v : v.index
+    
+    if None in (gid, graph, g, idx) :
+        raise ValueError('One of (gid, graph, g, index)  for graph `%s` is none'  % gid )
+    
+    nodetypes = [ e['name'] for e in graph['nodetypes'] ]
+    for k in g['nodetypes']:
+        if k['name'] not in nodetypes:
+            graph['nodetypes'].append(k)
+
+    nodetypes = { e['uuid']: e  for e in graph['nodetypes'] }
+    for v in g.vs:
+        _vid = vid(gid,v)
+        if _vid not in idx:
+                uuid = "%s" % graph.vcount()
+                attrs = v.attributes()
+                attrs['uuid'] = uuid
+
+                nodetype = nodetypes[attrs['nodetype']]
+                properties = nodetype['properties']
+                for k in properties:
+                    if k not in attrs['properties']:
+                        attrs['properties'][k] = properties[k]['default']
+                
+                graph.add_vertex( **attrs )
+                idx[ _vid ] = graph.vs[graph.vcount()-1]
+                
+                            
+    edgetypes = [ e['name'] for e in graph['edgetypes'] ]
+    for k in g['edgetypes']:
+        if k['name'] not in edgetypes:
+            graph['edgetypes'].append(k)
+
+    edgetypes = { e['uuid']: e  for e in graph['edgetypes'] }
+    for e in g.es:
+        v1, v2 = (vid(gid, g.vs[e.source] ), vid(gid, g.vs[e.target]) )
+        #if v1 in idx 
+        v1, v2 = ( idx[v1], idx[v2] )
+        eid = graph.get_eid( v1, v2 , directed=True, error=False )
+        if eid == -1:
+            e['uuid'] = graph.ecount()
+            attrs = e.attributes()
+            edgetype = edgetypes[attrs['edgetype']]
+            properties = edgetype['properties']
+            for k in properties:
+                if k not in attrs['properties']:
+                    attrs['properties'][k] = properties[k]['default']
+            
+            graph.add_edge( v1, v2, **attrs )
+
+    graph['queries'].append(g['query'])
+    graph['meta'] = {
+            'node_count': graph.vcount(),
+            'edge_count': graph.ecount(),
+            'star_count': len( graph['starred'] ),
+            'owner': None,
+            'date': None,
+            #'date' : datetime.datetime.now().strftime("%Y-%m-%d %Hh%M")
+        }
+    graph['meta']['pedigree'] = pedigree.compute(graph)
+    graph = graph_stats(graph)    
+    return graph
+
+
+@Composable
+def compute_pedigree(graph, **kwargs):
+    graph['meta']['pedigree'] = pedigree.compute(graph)
+    return graph
+    
+
+@Composable
+def graph_stats(graph, **kwargs):
+
+    def _types_stats( items , opt={}):
+        counter = dict(Counter(items))
+        return counter
+
+    graph['meta']['stats'] = {}
+
+    stats = _types_stats(graph.vs['nodetype'])
+    for e in graph['nodetypes']:
+        e['count'] = stats.get(e['uuid'], 0)
+    graph['meta']['stats']['nodetypes'] = stats
+    
+    stats = _types_stats(graph.es['edgetype'])
+    for e in graph['edgetypes']:
+        e['count'] = stats.get(e['uuid'], 0)
+    graph['meta']['stats']['edgetypes'] = stats
+
+    return graph
+
+    
+@Composable
+def prepare_graph(graph):
+
+    if not 'meta' in graph.attributes():
+        graph['meta'] = { 'edge_count':0,'node_count':0, }
+    if 'nodetype' not in graph.vs.attribute_names():
+        graph.vs['nodetype'] = [ "T" for e in graph.vs ]
+    if 'uuid' not in graph.vs.attribute_names():
+        graph.vs['uuid'] = range(len(graph.vs))
+    if 'properties' not in graph.vs.attribute_names():
+        props = [ {  }  for i in range(len(graph.vs))]
+        attrs = graph.vs.attribute_names()
+        
+        for p,v  in zip(props, graph.vs):
+            for e in attrs:
+                if e not in ['nodetype', 'uuid', 'properties' ]  :
+                    p[e] = v[e]
+            if 'label' not in attrs:
+                p['label']  = v.index
+                
+        graph.vs['properties'] = props
+           
+    if 'edgetype' not in graph.es.attribute_names():
+        graph.es['edgetype'] = [ "T" for e in graph.es ]
+    if 'uuid' not in graph.es.attribute_names():
+        graph.es['uuid'] = range(len(graph.es))
+    if 'properties' not in graph.es.attribute_names():
+        props = [ {  }  for i in range(len(graph.es))]
+        attrs = graph.es.attribute_names()
+        
+        for p,v  in zip(props, graph.es):
+            for e in attrs:
+                if e not in ['edgetype', 'uuid', 'properties' ]  :
+                    p[e] = v[e]
+            if 'label' not in attrs:
+                p['label']  = v.index
+                
+        graph.es['properties'] = props
+
+    if 'weight' not in graph.es.attribute_names():
+        graph.es['weight'] = [1. for e in graph.es ]
+
+    return graph
+
+
+def igraph2dict(graph, exclude_gattrs=[], exclude_vattrs=[], exclude_eattrs=[], id_attribute=None):
+    """ Transform a graph (igraph graph) to a dictionary
+    to send it to template (or json)
+    
+    :param graph: the graph to transform
+    :type graph: :class:`igraph.Graph`
+    :param exclude_gattrs: graph attributes to exclude (TODO)
+    :param exclude_vattrs: vertex attributes to exclude (TODO)
+    :param exclude_eattrs: edges attributes to exclude (TODO)
+    """
+    
+    # some check
+    assert isinstance(graph, igraph.Graph)
+    #if 'id' in graph.vs.attributes():
+        #raise Warning("The graph already have a vertex attribute 'id'")
+
+    # create the graph dict
+    attrs = { k : graph[k] for k in graph.attributes()}
+    d = {}
+    d['vs'] = []
+    d['es'] = []
+    
+    # attributs of the graph
+    if 'nodetypes' in attrs : 
+        d['nodetypes']  = attrs.pop('nodetypes')
+    if 'edgetypes' in attrs : 
+        d['edgetypes']  = attrs.pop('edgetypes')
+    
+    if 'properties' in attrs:
+        d['properties'] = attrs.pop('properties', {})
+
+    if 'meta' in attrs:
+        d['meta'] = attrs.pop('meta', {})
+        d['meta'].update( {
+            'directed' : graph.is_directed(), 
+            'bipartite' : 'type' in graph.vs and graph.is_bipartite(),
+            'e_attrs' : sorted(graph.es.attribute_names()),
+            'v_attrs' : sorted( [ attr for attr in graph.vs.attribute_names() if not attr.startswith('_')])
+            })
+
+    # vertices
+    v_idx = { }
+    for vid, vtx in enumerate(graph.vs):
+        vertex = vtx.attributes()
+        if id_attribute is not None:
+            v_idx[vid] = vertex[id_attribute]
+        else:
+            v_idx[vid] = vid
+            vertex["id"] = vid
+
+        d['vs'].append(vertex)
+
+    # edges
+    _getvid = lambda vtxid : v_idx[vtxid] if id_attribute else vtxid 
+
+    for edg in graph.es:
+        edge = edg.attributes() # recopie tous les attributs
+        edge["source"] = v_idx[edg.source] # match with 'id' vertex attributs
+        edge["target"] = v_idx[edg.target]
+        #TODO check il n'y a pas de 's' 't' dans attr
+        d['es'].append(edge)
+
+    return d
+    
+@Composable
+def export_graph(graph, exclude_gattrs=[], exclude_vattrs=[], exclude_eattrs=[], id_attribute=None):
+    return  igraph2dict(graph, exclude_gattrs, exclude_vattrs, exclude_eattrs, id_attribute)    
+
+    
\ No newline at end of file
--- a/botapadapi.py
+++ b/botapadapi.py
@@ -22,15 +22,15 @@ from cello.graphs.prox import ProxSubgraph, ProxExtract, pure_prox, sortcut
 from cello.layout import export_layout
 from cello.clustering import export_clustering

-from pdgapi.explor import ComplexQuery, AdditiveNodes, NodeExpandQuery, export_graph, layout_api, clustering_api
+from pdgapi.explor import ComplexQuery, AdditiveNodes, NodeExpandQuery, layout_api, clustering_api

+from botapad.utils import export_graph

 def db_graph(graphdb, query ):
    gid = query['graph']
    graph = graphdb.get_graph(gid)
    return graph

-   
 def pad2pdg(gid, url, host, key, delete, debug=False):
    description = "imported from %s" % url
    bot = Botagraph()
@@ -43,36 +43,7 @@ def pad2igraph(gid, url, format="csv"):
    graph['meta']['owner'] = None
    graph['meta']['date'] = datetime.datetime.now().strftime("%Y-%m-%d %Hh%M")
    return graph
-
-def types_stats( items , opt={}):
-    counter = Counter(items)
-    return dict(counter)  
-    print counter
-
-@Composable
-def graph_stats(graph, **kwargs):
-    graph['meta']['stats'] = {}
-
-    stats = types_stats(graph.vs['nodetype'])
-    print stats
-    for e in graph['nodetypes']:
-        e['count'] = stats.get(e['uuid'], 0)
-    graph['meta']['stats']['nodetypes'] = stats
-    
-    stats = types_stats(graph.es['edgetype'])
-    for e in graph['edgetypes']:
-        e['count'] = stats.get(e['uuid'], 0)
-    graph['meta']['stats']['edgetypes'] = stats
-    return graph
-
-from cello.graphs import pedigree
-
-@Composable
-def compute_pedigree(graph, **kwargs):
-    graph['meta']['pedigree'] = pedigree.compute(graph)
-    return graph
    
-
 from botapad import Botapad, BotapadError, BotapadParseError, BotapadURLError, BotapadCsvError
 from botapi import BotApiError, Botagraph,  BotaIgraph, BotLoginError


--- a/botapadapp.py
+++ b/botapadapp.py
@@ -16,9 +16,10 @@ from functools import wraps
 from flask import Flask, Response, make_response, g, current_app, request
 from flask import render_template, render_template_string, abort, redirect, url_for,  jsonify

+from botapadapi import pad2igraph, pad2pdg
 from botapi import BotApiError, BotLoginError
 from botapad import Botapad, BotapadError, BotapadParseError, BotapadURLError, BotapadCsvError
-from botapadapi import pad2igraph, pad2pdg, compute_pedigree, graph_stats
+from botapad.utils import export_graph, prepare_graph, compute_pedigree, graph_stats

 from cello.graphs import IN, OUT, ALL
 from cello.graphs.prox import ProxSubgraph
@@ -102,7 +103,7 @@ import igraph
 from igraph.utils import named_temporary_file 
 import cPickle as pickle
 import StringIO
-from pdgapi.explor import export_graph, prepare_graph, igraph2dict, EdgeList
+from pdgapi.explor import EdgeList
 from pdglib.graphdb_ig import IGraphDB, engines



--- a/histograph.py
+++ b/histograph.py
+import sys
+import argparse
+from botapi import Botagraph, BotApiError
+from reliure.types import Text 
+
+from collections import namedtuple
+import codecs
+import requests
+import re
+import csv
+
+from botapad import *
+
+#. Assumes that the vertice data are separated from the links,
+# that the graph is undirected,
+# and that the links are ordered with the same 2 types always at the same position within an edgetype
+#(e.g. person -- infraction for all the links or infraction -- person for all the links of an edgetype)
+
+
+class Histograph(object):
+
+    def __init__(self, links_url):
+        """ Function doc
+        :param : 
+        """
+        self.vertices = {}
+        self.edges = {}
+        self.urls = {}
+        self.vtype = {}
+        self.evtype = {}
+        self.histodata = {}
+        self.distribdata = {}
+        self.parse(links_url)
+
+    def read(self, path, separator='auto'):
+
+        if path[0:4] == 'http':
+            try : 
+                url = convert_url(path)
+                log( " * Downloading %s \n" % url)
+                content = requests.get(url).text
+                lines = content.split('\n')
+            except :
+                raise BotapadURLError("Can't download %s" % url, url)
+        else:
+            log( " * Opening %s \n" % path)
+            try : 
+                with codecs.open(path, 'r', encoding='utf8' ) as fin:
+                    lines = [ line for line in fin]
+            except :
+                raise BotapadError("Can't read file %s" % path)
+
+        lines = [ line.strip() for line in lines ]
+        lines = [ line.encode('utf8') for line in lines if len(line)]
+        
+        if separator == u'auto':
+            line = lines[0].strip()
+            if line in ( '!;','!,'):
+                separator = line[1:]
+            else: separator = ','
+
+        log(" * Reading %s (%s) lines with delimiter '%s'" % (path, len(lines), separator))
+
+        try : 
+            reader = csv.reader(lines, delimiter=separator)
+            rows = [ r for r in reader]
+            rows = [ [ e.strip().decode('utf8')  for e in r ] for r in rows if len(r) and not all([ len(e) == 0 for e in r]) ]
+        except :
+            raise BotapadCsvError(path, separator, "Error while parsing data %s lines with separator %s" % (len(lines), separator )  )
+
+        return rows
+                    
+    def store(self,current,rows,path):
+        if current[0]==0:
+            rows = [x[0].split(' -- ') for x in rows]
+            self.edges[current[1]] = [[x[0].strip(),x[1].strip()] for x in rows]
+        else:
+            self.vertices[current[1]] = dict([[x[0].strip(),x[1].strip()] for x in rows])
+            for x in rows:
+                self.vtype[x[0].strip()]=current[1]
+        self.urls[current[1]]=path
+
+
+
+    def parse(self, path):
+        """ :param path : txt file path
+
+        handles special lines starting with [# @ _]
+        for comments, node type, property names
+        
+        """
+        csv = self.read(path)
+        
+        rows = []
+        current = () # (VERTEX | EDGE, label, names, index_prop)
+        
+        
+        for row in csv:
+            cell = row[0]
+            # ! comment
+            if cell[:1] == "!":
+                continue
+
+            # IMPORT external ressource
+            if cell[:1] == "&":
+                url = cell[1:].strip()
+                self.parse(url)
+                    
+            # @ Nodetypes, _ Edgetypes
+            elif cell[:1] in ("@", "_"):
+                if len(current)>0:
+                    self.store(current,rows,path)
+                # processing directiv
+                line = ";".join(row)
+                cols = re.sub(' ', '', line[1:]) # no space
+                # @Politic: %Chamber; #First Name; #Last Name;%Party;%State;%Stance;Statement;
+                cols = [e for e in re.split("[:;,]" , "%s" % cols, flags=re.UNICODE) if len(e)]
+                label = cols[0] # @Something
+                
+                # ( name, type indexed, projection )
+                props = [ Prop( norm_key(e), Text(multi="+" in e), "@" in e, "#" in e, "+" in e,  "%" in e, "+" in e and "=" in e ) for e in  cols[1:]]
+                    
+                if cell[:1] == "@": # nodetype def
+                    rows = []
+                    current = (VERTEX, label, props)
+                        
+                elif cell[:1] == "_": # edgetype def
+                    rows = []
+                    current = (EDGE, label, props)
+            else: # table data
+                if current and current[2]:
+                    for i, v in enumerate(row):
+                        if i >= len(props): break
+                        if props[i].ismulti :
+                            row[i] = [  e.strip() for e in re.split("[_,;]", v.strip(), ) ] 
+                            
+                rows.append(row)
+
+        self.store(current,rows,path)
+
+    def EdgesToVertices(self):
+        for x in self.edges:
+                self.evtype[x]={}
+                for edge in self.edges[x]:
+                    pair =[self.vtype[edge[0]],self.vtype[edge[1]]]
+                    pair.sort()
+                    self.evtype[x][tuple(pair)] =self.evtype[x].get(tuple(pair), 0) + 1
+
+    def show(self):
+        print 'Vertices:'
+        for x in self.vertices.keys():
+            print x,'\t', self.urls[x]
+        print '\nEdges:'
+        self.EdgesToVertices()
+        for x in self.edges.keys():
+            print x,'\t', self.urls[x],'\t',self.evtype[x]
+
+
+    def checkOrder(self,type1,type2,links):
+        if self.vtype[self.edges[links][0][0]]==type1 and self.vtype[self.edges[links][0][1]]==type2:
+            self.order = [0,1]
+        elif self.vtype[self.edges[links][0][1]]==type1 and self.vtype[self.edges[links][0][0]]==type2:
+            self.order = [1,0]
+        else:
+            print 'oups, vertices and edges do not correspond'
+            sys.exit()
+        
+
+    def histo(self,type1,type2,links):
+
+    	# writes a framapad padagraph format file
+    	fname = links+'_histograph.txt'
+        s = open(fname,'w')
+
+        # write the imports of  data file and set the separator
+        s.write('!;\n\n& '+self.urls[type2]+'\n\n')
+       
+        # Count the variable distribution
+        self.checkOrder(type1,type2,links)
+        counts = {}
+        for x in self.edges[links]:    
+            counts[x[self.order[1]]]= counts.get(x[self.order[1]],0) + 1
+        for x in counts:
+            counts[x]=[counts[x],round(counts[x]*100/float(len(self.edges[links])),2)]
+     
+
+        # Set the scale
+        percents = [x[1] for x in counts.values()]
+        rangep =  max(percents) - min(percents)
+        step = round(rangep/10.0,0)
+        print step
+        pnode = int(min(percents))
+        pnodes = {}
+        p = []
+
+        # Create a scale 
+        while pnode<max(percents):
+            pnodes[(pnode,pnode+step)]=[]
+            p.append(pnode)
+            pnode+=step
+
+        # Put eache node on the scale
+        for x in counts:
+            p.append(counts[x][1])
+            p.sort()
+            i = p[p.index(counts[x][1])-1]
+            pnodes[(i,i+step)].append(x)
+            p.remove(counts[x][1])
+
+        # Write the list of the scale nodes
+        s.write('\n\n@ Percent: #label, shape\n\n')
+        sci = {}
+        for x in pnodes:
+            if len(pnodes[x])>0:
+            	idnode = str(x[0])+'_to_'+str(x[1])
+                s.write(idnode+'; circle\n')
+                sci[x[0]]=idnode
+
+        # Makes links between 'percent' nodes to create a visual scale
+        s.write('\n\n_ Scale\n\n')
+        sci = sci.values()
+        sci.sort(key = lambda w:w[0])
+        for n in range(len(sci[:-1])):
+            s.write(sci[n]+' -- '+sci[n+1]+'\n')
+
+        # Write the data links of the histograph
+        s.write('\n\n_ Distribution, percentage\n\n')
+        for x in pnodes:
+            for y in pnodes[x]:
+                s.write(str(x[0])+'_to_'+str(x[1])+' -- '+y+'; '+str(counts[y][0])+' items and '+str(counts[y][1])+' pct\n')
+
+        s.close()
+        print 'The file '+fname+' is ready to be imported in framadap!'
+
+
+
+    #def distrib(self,type1,type2,links):
+    #	s1 = open(links+'_distrib_'+type1+'_to_'+type2+'.txt','w')
+       #s2to1 = open(links+'_distrib_'+type2+'_to_'+type1+'.txt','w')
+    #    s1to2.write('!;\n\n& '+self.urls[type1]+'\n& '+self.urls[type2]+'\n')
+        #s2to1.write('!;\n\n& '+self.urls[type1]+'\n& '+self.urls[type2]+'\n\n')
+    #    LinkType = '_ Distribution\n\n'
+    #    self.checkOrder(type1,type2,links)
+    #    s1.close()
+        #s2tos1.close()
+
+if __name__ == '__main__':
+    h = Histograph(sys.argv[1])
+    h.show()
+    h.histo('Personne','Infraction','PersonInfraction')
+    
\ No newline at end of file
--- a/requirements-prod.txt
+++ b/requirements-prod.txt
 #gunicorn

+redis
+
 flask
+flask-login
 flask-runner
 Flask-Markdown
 flask_cors

--- a/setup.py
+++ b/setup.py
@@ -4,6 +4,7 @@ from setuptools import setup, find_packages


 """
+0.7   moved __init__ botapad.py
 0.6.1 log formating
      visualisation url parameters 
 0.6   added edge direction with reification > or < where % === %<
@@ -20,7 +21,7 @@ required = []

 setup(
    name='botapad',
-    version='0.6.1',
+    version='0.6.9',
    description='padagraph pad parser',
    author='ynnk, a-tsioh',
    author_email='contact@padagraph.io',