[FIX] merge url conflicts.

53f4d9b9 · delanoe · 95bbf414 · ba2517a4 · 53f4d9b9 · 53f4d9b9
Commit 53f4d9b9 authored Apr 01, 2016 by delanoe
10 changed files
--- a/doc/schemas/ngram_parsing_flow.dot
+++ b/doc/schemas/ngram_parsing_flow.dot
+// dot ngram_parsing_flow.dot -Tpng -o ngram_parsing_flow.png
+digraph ngramflow {
+
+    edge [fontsize=10] ;
+    label=<<B><U>gargantext.util.toolchain</U></B><BR/>(ngram extraction flow)>;
+    labelloc="t" ;
+
+    "extracted_ngrams" -> "grouplist" ;
+    "extracted_ngrams" -> "occs+tfidfs" ;
+    "main_user_stoplist" -> "stoplist" ;
+    "stoplist" -> "mainlist" ;
+    "occs+tfidfs" -> "mainlist" [label="  TFIDF_LIMIT"];
+    "mainlist" -> "coocs" [label="  COOCS_THRESHOLD"] ;
+    "coocs" -> "specificity" ;
+
+    "specificity" -> "maplist" [label="MAPLIST_LIMIT\nMONOGRAM_PART"];
+
+    "maplist" -> "explore" ;
+    "grouplist" -> "maplist" ;
+}
--- a/doc/ngram_parsing_flow.png
+++ b/doc/ngram_parsing_flow.png
--- a/gargantext/constants.py
+++ b/gargantext/constants.py
@@ -156,10 +156,10 @@ RESOURCETYPES = [
        'parser': RISParser,
        'default_language': 'en',
    },
-    # {   'name': 'CSV',
-    #     # 'parser': CSVParser,
-    #     'default_language': 'en',
-    # },
+    {   'name': 'CSV',
+        'parser': CSVParser,
+        'default_language': 'en',
+    },
    # {   'name': 'ISTex',
    #     # 'parser': ISTexParser,
    #     'default_language': 'en',

--- a/gargantext/util/db.py
+++ b/gargantext/util/db.py
@@ -35,8 +35,7 @@ Double = DOUBLE_PRECISION
 # useful for queries

 from sqlalchemy.orm import aliased
-from sqlalchemy import func
-
+from sqlalchemy import func, desc

 # bulk insertions


--- a/gargantext/util/parsers/CSVParser.py
+++ b/gargantext/util/parsers/CSVParser.py
@@ -8,34 +8,32 @@ import os

 class CSVParser(Parser):

-    def CSVsample( self, filename , delim) :
-        ifile  = open( filename, "r" )
-        reader = csv.reader(ifile, delimiter=delim)
+    def CSVsample( self, small_contents , delim) :
+        reader = csv.reader(small_contents, delimiter=delim)

        Freqs = []
        for row in reader:
            Freqs.append(len(row))

-
-        ifile.close()
        return Freqs

-    
+
    def parse(self, filename):

+        print("CSV: parsing (assuming UTF-8 and LF line endings)")
+
+        contents = filename.read().decode("UTF-8").split("\n")
+
        sample_size = 10
-        sample_file = filename.replace(".csv","_sample.csv")
+        sample_contents = contents[0:sample_size]

        hyperdata_list = []

-        command_for_sample = "cat '"+filename+"' | head -n "+str(sample_size)+" > '"+sample_file+"'"
-        os.system(command_for_sample) # you just created a  *_sample.csv
-
        # # = = = = [ Getting delimiters frequency ] = = = = #
        PossibleDelimiters = [ ',',' ','\t', ';', '|', ':' ]
        AllDelimiters = {}
        for delim in PossibleDelimiters:
-            AllDelimiters[delim] = self.CSVsample( sample_file , delim ) 
+            AllDelimiters[delim] = self.CSVsample( sample_contents , delim )
        # # = = = = [ / Getting delimiters frequency ] = = = = #
        # # OUTPUT example:
        # #  AllDelimiters = {
@@ -59,8 +57,8 @@ class CSVParser(Parser):
        # # = = = = [ / Stand.Dev=0 & Sum of delimiters ] = = = = #
        # # OUTPUT example:
        # #  Delimiters = [
-        # #     ['\t', 5, 5, 0.0], 
-        # #     [',', 75, 5, 0.0], 
+        # #     ['\t', 5, 5, 0.0],
+        # #     [',', 75, 5, 0.0],
        # #     ['|', 5, 5, 0.0]
        # #  ]

@@ -68,23 +66,22 @@ class CSVParser(Parser):
        # # = = = = [ Delimiter selection ] = = = = #
        Sorted_Delims = sorted(Delimiters, key=lambda x: x[1], reverse=True)
        HighestDelim = Sorted_Delims[0][0]
-        # print("selected delimiter:",[HighestDelim]
-        # print
+        # HighestDelim = ","
+        print("CSV selected delimiter:",[HighestDelim])
        # # = = = = [ / Delimiter selection ] = = = = #


-
-
        # # = = = = [ First data coordinate ] = = = = #
        Coords = {
            "row": -1,
            "column": -1
        }

-        ifile  = open( sample_file, "r" )
-        reader = csv.reader(ifile, delimiter=HighestDelim)
+        reader = csv.reader(contents, delimiter=HighestDelim)

        for rownum, tokens in enumerate(reader):
+            if rownum % 250 == 0:
+                print("CSV row: ", rownum)
            joined_tokens = "".join (tokens)
            if Coords["row"]<0 and len( joined_tokens )>0 :
                Coords["row"] = rownum
@@ -93,22 +90,21 @@ class CSVParser(Parser):
                    if len(t)>0:
                        Coords["column"] = columnum
                        break
-        ifile.close()
        # # = = = = [ / First data coordinate ] = = = = #



        # # = = = = [ Setting Headers ] = = = = #
        Headers_Int2Str = {}
-        ifile  = open( sample_file, "r" )
-        reader = csv.reader(ifile, delimiter=HighestDelim)
+        reader = csv.reader(contents, delimiter=HighestDelim)
        for rownum, tokens in enumerate(reader):
            if rownum>=Coords["row"]:
                for columnum in range( Coords["column"],len(tokens) ):
                    t = tokens[columnum]
                    Headers_Int2Str[columnum] = t
                break
-        ifile.close()
+        # print("Headers_Int2Str")
+        # print(Headers_Int2Str)
        # # = = = = [ / Setting Headers ] = = = = #
        # # OUTPUT example:
        # #  Headers_Int2Str = {
@@ -119,11 +115,9 @@ class CSVParser(Parser):
        # #  }


-
        # # = = = = [ Reading the whole CSV and saving ] = = = = #
        hyperdata_list = []
-        ifile  = open( filename, "r" )
-        reader = csv.reader(ifile, delimiter=HighestDelim)
+        reader = csv.reader(contents, delimiter=HighestDelim)
        for rownum, tokens in enumerate(reader):
            if rownum>Coords["row"]:
                RecordDict = {}
@@ -131,7 +125,6 @@ class CSVParser(Parser):
                    data = tokens[columnum]
                    RecordDict[ Headers_Int2Str[columnum] ] = data
                hyperdata_list.append( RecordDict )
-        ifile.close()
        # # = = = = [ / Reading the whole CSV and saving ] = = = = #

        return hyperdata_list
--- a/gargantext/util/parsers/__init__.py
+++ b/gargantext/util/parsers/__init__.py
@@ -8,4 +8,4 @@ from .Pubmed import PubmedParser
 from .Europress import EuropressParser

 # from .ISTex import ISTexParser
-# from .CSV import CSVParser
+from .CSV import CSVParser
--- a/gargantext/views/api/ngramlists.py
+++ b/gargantext/views/api/ngramlists.py
--- a/gargantext/views/api/urls.py
+++ b/gargantext/views/api/urls.py
@@ -11,17 +11,18 @@ urlpatterns = [
    url(r'^nodes/(\d+)/facets$' , nodes.CorpusFacet.as_view()),
    url(r'^nodes/(\d+)/having$' , nodes.NodeListHaving.as_view()),

-    # get a list of ngram_ids or ngram_infos by list_id
-    #
-    # url(r'^ngramlists/(\d+)$', ngramlists.List.as_view()),
+    # add or remove ngram from a list
+    #  ex: add <=> PUT ngramlists/change?list=42&ngrams=1,2
+    #       rm <=> DEL ngramlists/change?list=42&ngrams=1,2
+    url(r'^ngramlists/change$', ngramlists.ListChange.as_view()),

-    # entire combination of lists from a corpus
+    # get entire combination of lists from a corpus
    # (or any combination of lists that go together :
    #   - a mainlist
    #   - an optional stoplist
    #   - an optional maplist
-    #   - an optional grouplist
-    # aka lexical model
-    url(r'^ngramlists/family$'  , ngramlists.ListFamily.as_view()),
+    #   - an optional grouplist)
+    url(r'^ngramlists/family$', ngramlists.ListFamily.as_view()),
+

 ]
--- a/static/js/gargantext/NGrams_dyna_chart_and_table.js
+++ b/static/js/gargantext/NGrams_dyna_chart_and_table.js
--- a/templates/pages/corpora/terms.html
+++ b/templates/pages/corpora/terms.html
@@ -22,7 +22,7 @@
        <div class="row">
            <div id="monthly-move-chart">
                <center>
-                        Select a time range in the chart with blue bars to zoom in
+                        Select a score/frequency range in the chart with blue bars to zoom in
                <p align="center">
                <a class="btn btn-xs btn-default" role="button" href="/chart/corpus/{{ corpus.id }}/data.csv">Save</a>
                <a class="btn btn-xs btn-default" href="javascript:volumeChart.filterAll();dc.redrawAll();">Reset</a></p>
@@ -41,7 +41,12 @@
          <br>
        </div>

-        <input type="hidden" id="list_id" value="{{ list_id }}"></input>
+        <!-- (values set by js) caching our DB ids (handy for list update commands) -->
+        <input type="hidden" id="mainlist_id" value=""></input>
+        <input type="hidden" id="maplist_id" value=""></input>
+        <input type="hidden" id="stoplist_id" value=""></input>
+        <input type="hidden" id="groups_id" value=""></input>
+        <input type="hidden" id="scores_id" value=""></input>

        <div class="row">
            <div class="panel panel-default">