[FIX] fix conflicts.

be968d2c · Administrator · 04b49e8a · be968d2c · be968d2c · be968d2c
Commit be968d2c authored Jun 04, 2015 by Administrator
Hide whitespace changes
Inline Side-by-side

Showing with 54 additions and 39 deletions

utils.py admin/utils.py +1 -0

functions.py analysis/functions.py +37 -35

init_gargantext.py init/init_gargantext.py +16 -3

test_db.py init/test_db.py +0 -1

No files found.
--- a/admin/utils.py
+++ b/admin/utils.py
@@ -28,3 +28,4 @@ def PrintException():
    line = linecache.getline(filename, lineno, f.f_globals)
    print('EXCEPTION IN ({}, LINE {} "{}"): {}'.format(filename, lineno, line.strip(), exc_obj))
--- a/analysis/functions.py
+++ b/analysis/functions.py
@@ -17,23 +17,23 @@ def create_blacklist(user, corpus):
 def create_synonymes(user, corpus):
    pass
-size = 1000 
+size = 1000
 def create_whitelist(user, corpus_id, size=size, count_min=2):
    cursor = connection.cursor()
    whitelist_type_id = cache.NodeType['WhiteList'].id
    blacklist_type_id = cache.NodeType['BlackList'].id
    type_document_id  = cache.NodeType['Document'].id
    white_list = Node(name='WhiteList Corpus ' + str(corpus_id), user_id=user.id, parent_id=corpus_id, type_id=whitelist_type_id)
    black_list = Node(name='BlackList Corpus ' + str(corpus_id), user_id=user.id, parent_id=corpus_id, type_id=blacklist_type_id)
    session.add(white_list)
    session.add(black_list)
    session.commit()
    # delete avant pour éviter les doublons
    #    try:
@@ -105,21 +105,21 @@ def create_cooc(user=None, corpus_id=None, whitelist=None, size=size, year_start
        COUNT(*) AS score
    FROM
        node_node AS n  -- the nodes who are direct children of the corpus
    INNER JOIN
        node_node_ngram AS nngX ON nngX.node_id = n.id  --  list of ngrams contained in the node
    INNER JOIN
        node_node_ngram AS whitelistX ON whitelistX.ngram_id = nngX.ngram_id -- list of ngrams contained in the whitelist and in the node
    INNER JOIN
        node_ngram AS ngX ON ngX.id = whitelistX.ngram_id -- ngrams which are in both
    INNER JOIN
        node_node_ngram AS nngY ON nngY.node_id = n.id
    INNER JOIN
        node_node_ngram AS whitelistY ON whitelistY.ngram_id = nngY.ngram_id
    INNER JOIN
        node_ngram AS ngY ON ngY.id = whitelistY.ngram_id
    WHERE
        n.parent_id = %s
    AND
@@ -128,13 +128,13 @@ def create_cooc(user=None, corpus_id=None, whitelist=None, size=size, year_start
        whitelistY.node_id = %s
    AND
        nngX.ngram_id < nngY.ngram_id   --  so we only get distinct pairs of ngrams
    GROUP BY
        ngX.id,
        ngX.terms,
        ngY.id,
        ngY.terms
    ORDER BY
        score DESC
    LIMIT
@@ -153,9 +153,9 @@ def get_cooc(request=None, corpus_id=None, cooc_id=None, type='node_link', size=
    import networkx as nx
    from networkx.readwrite import json_graph
    from gargantext_web.api import JsonHttpResponse
    from analysis.louvain import best_partition
    #print(corpus_id, cooc_id)
    try:
@@ -172,7 +172,7 @@ def get_cooc(request=None, corpus_id=None, cooc_id=None, type='node_link', size=
            cooccurrence_node_id = create_cooc(user=request.user, corpus_id=corpus_id, whitelist=whitelist, size=size)
        else:
            cooccurrence_node_id = session.query(Node.id).filter(Node.type_id==type_cooc_id, Node.parent_id==corpus_id).first()
        for cooccurrence in session.query(NodeNgramNgram).filter(NodeNgramNgram.node_id==cooccurrence_node_id).all():
@@ -192,41 +192,41 @@ def get_cooc(request=None, corpus_id=None, cooc_id=None, type='node_link', size=
        x = pd.DataFrame(matrix).fillna(0)
        y = pd.DataFrame(matrix).fillna(0)
        #xo = diag_null(x)
        #y = diag_null(y)
        x = x / x.sum(axis=1)
        y = y / y.sum(axis=0)
        #print(x)
        xs = x.sum(axis=1) - x
        ys = x.sum(axis=0) - x
        # top inclus ou exclus
        n = ( xs + ys) / (2 * (x.shape[0] - 1))
        # top generic or specific
        m = ( xs - ys) / (2 * (x.shape[0] - 1))
        n = n.sort(inplace=False)
        m = m.sort(inplace=False)
        print(n)
        print(m)
        nodes_included = 300 #int(round(size/20,0))
        #nodes_excluded = int(round(size/10,0))
        nodes_specific = 300 #int(round(size/10,0))
        #nodes_generic = int(round(size/10,0))
        # TODO user the included score for the node size
        n_index = pd.Index.intersection(x.index, n.index[:nodes_included])
-        # Generic: 
+        # Generic:
        #m_index = pd.Index.intersection(x.index, m.index[:nodes_generic])
-        # Specific: 
+        # Specific:
        m_index = pd.Index.intersection(x.index, m.index[-nodes_specific:])
        x_index = pd.Index.union(n_index, m_index)
        xx = x[list(x_index)].T[list(x_index)]
@@ -236,26 +236,28 @@ def get_cooc(request=None, corpus_id=None, cooc_id=None, type='node_link', size=
        # Removing unconnected nodes
        xxx = xx.values
        threshold = min(xxx.max(axis=1))
-        matrix_filtered = np.where(xxx > threshold, xxx, 0)
+        matrix_filtered = np.where(xxx >= threshold, xxx, 0)
        #matrix_filtered = matrix_filtered.resize((90,90))
    except:
        PrintException()
    try:
-        G = nx.from_numpy_matrix(matrix_filtered)
+        G = nx.from_numpy_matrix(matrix_filtered, create_using=nx.MultiDiGraph())
        G = nx.relabel_nodes(G, dict(enumerate([ labels[label] for label in list(xx.columns)])))
        #print(G)
        # Removing too connected nodes (find automatic way to do it)
+        #edges_to_remove = [ e for e in G.edges_iter() if
        degree = G.degree()
-        to_remove = [n for n in degree if degree[n] <= 1]
+        nodes_to_remove = [n for n in degree if degree[n] <= 1]
-        G.remove_nodes_from(to_remove)
+        G.remove_nodes_from(nodes_to_remove)
        partition = best_partition(G)
    except:
        PrintException()
    if type == "node_link":
@@ -270,7 +272,7 @@ def get_cooc(request=None, corpus_id=None, cooc_id=None, type='node_link', size=
                # G.add_edge(node, "cluster " + str(partition[node]), weight=3)
            except Exception as error:
                print("error01: ",error)
        data = json_graph.node_link_data(G)
        links = []
@@ -285,7 +287,7 @@ def get_cooc(request=None, corpus_id=None, cooc_id=None, type='node_link', size=
        # print(data)
        data["links"] = []
        data["links"] = links
    elif type == "adjacency":
        for node in G.nodes():
            try:
@@ -298,7 +300,7 @@ def get_cooc(request=None, corpus_id=None, cooc_id=None, type='node_link', size=
            except Exception as error:
                print("error02: ",error)
        data = json_graph.node_link_data(G)
    #    data = json_graph.node_link_data(G, attrs={\
    #            'source':'source',\

--- a/init/init_gargantext.py
+++ b/init/init_gargantext.py
@@ -82,9 +82,8 @@ print('Initialize node types...')
 node_types = [
        'Root', 'Trash',
        'Project', 'Corpus', 'Document',
-        'Stem', 'Lem', 'Tfidf',
+        'MiamList', 'StopList', 'MainList',
-        'Synonym',
+        'Stem', 'Lem', 'Group', 'Tfidf',
-        'MiamList', 'StopList',
        'Cooccurrence', 'WhiteList', 'BlackList'
        ]
@@ -93,6 +92,20 @@ for node_type in node_types:
 # Integration: resource types
+print('Initialize users...')
+me = session.query(User).filter(User.username=='alexandre').first()
+gargantua = session.query(User).filter(User.username=='gargantua').first()
+node_root = Node(user_id=gargantua.id, type_id=cache.NodeType['Root'].id, name='Root')
+node_stem = Node(user_id=gargantua.id, type_id=cache.NodeType['Stem'].id, name='Stem', parent_id=node_root.id)
+node_lem = Node(user_id=gargantua.id, type_id=cache.NodeType['Lem'].id, name='Lem', parent_id=node_root.id)
+session.add(node_root)
+session.add(node_stem)
+session.add(node_lem)
+session.commit()
 print('Initialize resource...')
 from parsing.parsers_config import parsers

--- a/init/test_db.py
+++ b/init/test_db.py
@@ -35,7 +35,6 @@ extract_ngrams(corpus, ('title', ))
-# print(corpus)
 # corpus = session.query(Node).filter(Node.id == 72771).first()
 # corpus = session.query(Node).filter(Node.id == 73017).first()
 compute_tfidf(corpus)