Commit 281b52e7 authored by sim's avatar sim

Merge branch 'unstable' into simon-unstable

parents 6783a786 c5ad749c
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Advanced Gargantext Tutorial (Python)"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"# First import the library Gargantext Notebook\n",
"from gargantext_notebook import *\n",
"\n",
"# This enables to draw graphics later\n",
"%matplotlib inline "
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"L'identifiant du corpus est : 254749\n"
]
}
],
"source": [
"# Copier/coller l'url du corpus (avec http://): sur lequel travailler\n",
"corpus_url = \"http://gargantext.org/projects/251737/corpora/254749\"\n",
"\n",
"corpus_id = corpus_url.split(\"/\")[6]\n",
"\n",
"print(\"L\\'identifiant du corpus est : %s\" % corpus_id)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"# To get all the documents:\n",
"docs = documents(corpus_id)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [
{
"data": {
"text/plain": [
"'Towards big data science in the decade ahead from ten years of InCoB and the 1st ISCB-Asia Joint Conference.'"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# To get the title of the first document \n",
"# [0] indicates the index of the first document\n",
"docs[0].hyperdata['title']"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [
{
"data": {
"text/plain": [
"\"The 2011 International Conference on Bioinformatics (InCoB) conference, which is the annual scientific conference of the Asia-Pacific Bioinformatics Network (APBioNet), is hosted by Kuala Lumpur, Malaysia, is co-organized with the first ISCB-Asia conference of the International Society for Computational Biology (ISCB). InCoB and the sequencing of the human genome are both celebrating their tenth anniversaries and InCoB's goalposts for the next decade, implementing standards in bioinformatics and globally distributed computational networks, will be discussed and adopted at this conference. Of the 49 manuscripts (selected from 104 submissions) accepted to BMC Genomics and BMC Bioinformatics conference supplements, 24 are featured in this issue, covering software tools, genome/proteome analysis, systems biology (networks, pathways, bioimaging) and drug discovery and design.\""
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# To get the abstract of the first document (0)\n",
"docs[0].hyperdata['abstract']"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [
{
"data": {
"text/plain": [
"'Shoba Ranganathan, Christian Schönbach, Janet Kelso, Burkhard Rost, Sheila Nathan, Tin Wee Tan'"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# To get the authors of the first document (0)\n",
"docs[0].hyperdata['authors']"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [
{
"data": {
"text/plain": [
"'BMC bioinformatics'"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# To get the source of the first document (0)\n",
"docs[0].hyperdata['source']"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"# If I want to count:\n",
"myChart = chart(docs, \"publication_year\")"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [
{
"data": {
"text/plain": [
"<matplotlib.axes._subplots.AxesSubplot at 0x7fc48a3da128>"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAEZCAYAAACZwO5kAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAGjxJREFUeJzt3X2QVfWd5/H3hwfFqAOKTRdFo40JGs2KpNMxWI55Ij5b\ngUrUgWwJWsyw2Ug0OpuxZ3drU1a5U6Q2JaNx1g0JiWhNRMNEYRNNIEYnGV1UJO0jUTuKoSmFFpEo\nPvHw3T/uD3JlgHsvfS+n74/Pq6rrnvM7v3Pu9946/enTv3vOPYoIzMwsX4OKLsDMzBrLQW9mljkH\nvZlZ5hz0ZmaZc9CbmWXOQW9mlrmqgl7S1ZKekfS0pDskDZM0TtIjknok3SnpkNT30DTfk5a3N/IF\nmJnZvlUMekljgCuBzoj4D8BgYBrwbWBeRHwE2ATMSqvMAjal9nmpn5mZFaTaoZshwGGShgAfAl4B\nPg8sTssXAlPT9JQ0T1o+WZLqU66ZmdVqSKUOEbFO0neAPwLvAMuAx4E3ImJb6tYLjEnTY4C1ad1t\nkjYDI4HX9vYcxxxzTLS3t+/vazAzOyg9/vjjr0VES6V+FYNe0lGUjtLHAW8APwHO7W+BkmYDswGO\nPfZYVq5c2d9NmpkdVCS9XE2/aoZuvgC8FBF9EbEV+ClwBjAiDeUAtAHr0vQ6YGwqYggwHNi4+0Yj\nYn5EdEZEZ0tLxT9IZma2n6oJ+j8CkyR9KI21TwaeBR4ALkp9ZgJL0vTSNE9a/uvwN6eZmRWmYtBH\nxCOUPlRdBTyV1pkPXAtcI6mH0hj8grTKAmBkar8G6GpA3WZmViUNhIPtzs7O2H2MfuvWrfT29vLu\nu+8WVFW+hg0bRltbG0OHDi26FDPrB0mPR0RnpX4VP4wtSm9vL0ceeSTt7e347Mz6iQg2btxIb28v\n48aNK7ocMzsABuxXILz77ruMHDnSIV9nkhg5cqT/UzI7iAzYoAcc8g3i99Xs4DKgg97MzPpvwI7R\n76696+d13d6auRdU7DN48GBOOeUUtm7dypAhQ5gxYwZXX301gwbt/e/jmjVrePjhh/nKV76yz20f\nf/zx3HfffZx44om72r7xjW8wevRorr322r1u+8ILL+Tpp5+uWLvZQFTv3+PdVfN7fTDyEf0+HHbY\nYXR3d/PMM8+wfPly7rvvPq677rp9rrNmzRp+/OMfV9z2tGnTWLRo0a75HTt2sHjxYqZNm9bvus3M\nyjnoqzRq1Cjmz5/PzTffTESwZs0azjzzTDo6Oujo6ODhhx8GoKuri9/+9rdMnDiRefPmsX37dr75\nzW/yyU9+kgkTJvC9730PgOnTp3PnnXfu2v5vfvMbjjvuOI477ri9brvcrbfeypw5c3bNX3jhhTz4\n4IMALFu2jNNPP52Ojg4uvvhi3nrrrQa+M2Y20Dnoa3D88cezfft2NmzYwKhRo1i+fDmrVq3izjvv\n5MorrwRg7ty5nHnmmXR3d3P11VezYMEChg8fzmOPPcZjjz3G97//fV566SVOOeUUBg0axBNPPAHA\nokWLmD59OsBet12N1157jeuvv55f/epXrFq1is7OTm644Yb6vxlm1jSaZox+oNm6dStz5syhu7ub\nwYMH8/zzz++x37Jly3jyySdZvLj0jc6bN2/mhRdeYNy4cUyfPp1FixbxsY99jHvuuWfXsFC1296T\nFStW8Oyzz3LGGWcA8P7773P66af389WaWTNz0NfgxRdfZPDgwYwaNYrrrruO1tZWnnjiCXbs2MGw\nYcP2uE5E8N3vfpdzzjnn3y2bNm0aZ599Np/5zGeYMGECra2tAMybN6/itocMGcKOHTt2ze88Lz4i\nOOuss7jjjjvq8ZLNLAMeuqlSX18fX/3qV5kzZw6S2Lx5M6NHj2bQoEHcfvvtbN++HYAjjzySN998\nc9d655xzDrfccgtbt24F4Pnnn2fLli0AfPjDH+aYY46hq6tr17ANsNdtl2tvb6e7u5sdO3awdu1a\nHn30UQAmTZrEQw89RE9PDwBbtmyp6T8CM8tP0xzRF3Ha1DvvvMPEiRN3nV556aWXcs011wDwta99\njS9/+cvcdtttnHvuuRx++OEATJgwgcGDB3Pqqady2WWXcdVVV7FmzRo6OjqICFpaWrjnnnt2Pcf0\n6dPp6uriS1/60q62vW273BlnnMG4ceM4+eSTOemkk+jo6ACgpaWFW2+9lenTp/Pee+8BcP3113PC\nCSc07H0ys4FtwH6p2erVqznppJMKqih/fn+tCD6Pvr6q/VIzD92YmWXOQW9mlrkBHfQDYVgpR35f\nzQ4uAzbohw0bxsaNGx1Kdbbz++j3djqomeVnwJ5109bWRm9vL319fUWXkp2dd5gys4NDxaCXdCJw\nZ1nT8cD/AG5L7e3AGuCSiNiUbiB+I3A+8DZwWUSsqrWwoUOH+g5IZmZ1UM3NwZ+LiIkRMRH4BKXw\nvpvSTb/vj4jxwP38+Sbg5wHj089s4JZGFG5mZtWpdYx+MvCHiHgZmAIsTO0LgalpegpwW5SsAEZI\nGl2Xas3MrGa1Bv00YOeXqLRGxCtp+lWgNU2PAdaWrdOb2szMrABVB72kQ4AvAj/ZfVmUTo2p6fQY\nSbMlrZS00h+4mpk1Ti1H9OcBqyJifZpfv3NIJj1uSO3rgLFl67Wltg+IiPkR0RkRnS0tLbVXbmZm\nVakl6Kfz52EbgKXAzDQ9E1hS1j5DJZOAzWVDPGZmdoBVdR69pMOBs4D/VNY8F7hL0izgZeCS1H4v\npVMreyidoXN53ao1M7OaVRX0EbEFGLlb20ZKZ+Hs3jeAK+pSnZmZ9duA/QoEMzOrDwe9mVnmHPRm\nZplz0JuZZc5Bb2aWOQe9mVnmHPRmZplz0JuZZc5Bb2aWOQe9mVnmHPRmZplz0JuZZc5Bb2aWOQe9\nmVnmHPRmZplz0JuZZc5Bb2aWOQe9mVnmHPRmZpmrKugljZC0WNLvJa2WdLqkoyUtl/RCejwq9ZWk\nmyT1SHpSUkdjX4KZme1LtUf0NwK/iIiPAqcCq4Eu4P6IGA/cn+YBzgPGp5/ZwC11rdjMzGpSMegl\nDQc+DSwAiIj3I+INYAqwMHVbCExN01OA26JkBTBC0ui6V25mZlWp5oh+HNAH/EjS7yT9QNLhQGtE\nvJL6vAq0pukxwNqy9XtT2wdImi1ppaSVfX19+/8KzMxsn6oJ+iFAB3BLRHwc2MKfh2kAiIgAopYn\njoj5EdEZEZ0tLS21rGpmZjWoJuh7gd6IeCTNL6YU/Ot3Dsmkxw1p+TpgbNn6banNzMwKUDHoI+JV\nYK2kE1PTZOBZYCkwM7XNBJak6aXAjHT2zSRgc9kQj5mZHWBDquz3deCfJR0CvAhcTumPxF2SZgEv\nA5ekvvcC5wM9wNupr5mZFaSqoI+IbqBzD4sm76FvAFf0sy4zM6sTXxlrZpY5B72ZWeYc9GZmmXPQ\nm5llzkFvZpY5B72ZWeYc9GZmmXPQm5llzkFvZpY5B72ZWeYc9GZmmXPQm5llzkFvZpY5B72ZWeYc\n9GZmmXPQm5llzkFvZpY5B72ZWeaqCnpJayQ9Jalb0srUdrSk5ZJeSI9HpXZJuklSj6QnJXU08gWY\nmdm+1XJE/7mImBgRO+8d2wXcHxHjgfvTPMB5wPj0Mxu4pV7FmplZ7fozdDMFWJimFwJTy9pvi5IV\nwAhJo/vxPGZm1g/VBn0AyyQ9Lml2amuNiFfS9KtAa5oeA6wtW7c3tX2ApNmSVkpa2dfXtx+lm5lZ\nNYZU2e8vI2KdpFHAckm/L18YESEpanniiJgPzAfo7OysaV0zM6teVUf0EbEuPW4A7gZOA9bvHJJJ\njxtS93XA2LLV21KbmZkVoGLQSzpc0pE7p4GzgaeBpcDM1G0msCRNLwVmpLNvJgGby4Z4zMzsAKtm\n6KYVuFvSzv4/johfSHoMuEvSLOBl4JLU/17gfKAHeBu4vO5Vm5lZ1SoGfUS8CJy6h/aNwOQ9tAdw\nRV2qMzOzfvOVsWZmmXPQm5llzkFvZpa5as+jN8tGe9fPG7btNXMvaNi2zfaXj+jNzDLnoDczy5yD\n3swscw56M7PMOejNzDLnoDczy5yD3swscw56M7PMOejNzDLnoDczy5yD3swscw56M7PMOejNzDLn\noDczy1zVQS9psKTfSfpZmh8n6RFJPZLulHRIaj80zfek5e2NKd3MzKpRyxH9VcDqsvlvA/Mi4iPA\nJmBWap8FbErt81I/MzMrSFVBL6kNuAD4QZoX8HlgceqyEJiapqekedLyyam/mZkVoNoj+n8E/g7Y\nkeZHAm9ExLY03wuMSdNjgLUAafnm1P8DJM2WtFLSyr6+vv0s38zMKqkY9JIuBDZExOP1fOKImB8R\nnRHR2dLSUs9Nm5lZmWruGXsG8EVJ5wPDgL8AbgRGSBqSjtrbgHWp/zpgLNAraQgwHNhY98rNzKwq\nFY/oI+LvI6ItItqBacCvI+I/Ag8AF6VuM4ElaXppmict/3VERF2rNjOzqvXnPPprgWsk9VAag1+Q\n2hcAI1P7NUBX/0o0M7P+qGboZpeIeBB4ME2/CJy2hz7vAhfXoTYzM6sDXxlrZpY5B72ZWeYc9GZm\nmXPQm5llzkFvZpY5B72ZWeYc9GZmmXPQm5llzkFvZpY5B72ZWeYc9GZmmXPQm5llzkFvZpY5B72Z\nWeYc9GZmmXPQm5llzkFvZpY5B72ZWeYqBr2kYZIelfSEpGckXZfax0l6RFKPpDslHZLaD03zPWl5\ne2NfgpmZ7Us1R/TvAZ+PiFOBicC5kiYB3wbmRcRHgE3ArNR/FrAptc9L/czMrCAVgz5K3kqzQ9NP\nAJ8HFqf2hcDUND0lzZOWT5akulVsZmY1qWqMXtJgSd3ABmA58AfgjYjYlrr0AmPS9BhgLUBavhkY\nuYdtzpa0UtLKvr6+/r0KMzPbq6qCPiK2R8REoA04Dfhof584IuZHRGdEdLa0tPR3c2Zmthc1nXUT\nEW8ADwCnAyMkDUmL2oB1aXodMBYgLR8ObKxLtWZmVrNqzrppkTQiTR8GnAWsphT4F6VuM4ElaXpp\nmict/3VERD2LNjOz6g2p3IXRwEJJgyn9YbgrIn4m6VlgkaTrgd8BC1L/BcDtknqA14FpDajbzMyq\nVDHoI+JJ4ON7aH+R0nj97u3vAhfXpTozM+s3XxlrZpY5B72ZWeYc9GZmmXPQm5llzkFvZpY5B72Z\nWeYc9GZmmXPQm5llzkFvZpY5B72ZWeYc9GZmmXPQm5llzkFvZpY5B72ZWeYc9GZmmXPQm5llzkFv\nZpY5B72ZWeaquTn4WEkPSHpW0jOSrkrtR0taLumF9HhUapekmyT1SHpSUkejX4SZme1dNUf024C/\njYiTgUnAFZJOBrqA+yNiPHB/mgc4DxiffmYDt9S9ajMzq1rFoI+IVyJiVZp+E1gNjAGmAAtTt4XA\n1DQ9BbgtSlYAIySNrnvlZmZWlSG1dJbUDnwceARojYhX0qJXgdY0PQZYW7Zab2p7pawNSbMpHfFz\n7LHH1lR0e9fPa+pfqzVzL2jo9s3MDqSqP4yVdATwL8A3IuJP5csiIoCo5YkjYn5EdEZEZ0tLSy2r\nmplZDaoKeklDKYX8P0fET1Pz+p1DMulxQ2pfB4wtW70ttZmZWQGqOetGwAJgdUTcULZoKTAzTc8E\nlpS1z0hn30wCNpcN8ZiZ2QFWzRj9GcClwFOSulPbfwXmAndJmgW8DFySlt0LnA/0AG8Dl9e1YjMz\nq0nFoI+IfwO0l8WT99A/gCv6WZeZmdWJr4w1M8ucg97MLHMOejOzzDnozcwy56A3M8ucg97MLHMO\nejOzzDnozcwy56A3M8ucg97MLHMOejOzzDnozcwy56A3M8ucg97MLHMOejOzzNV0c3Az8M3ZzZqN\nj+jNzDLnoDczy1w1Nwf/oaQNkp4uazta0nJJL6THo1K7JN0kqUfSk5I6Glm8mZlVVs0R/a3Aubu1\ndQH3R8R44P40D3AeMD79zAZuqU+ZZma2vyoGfUT8Bnh9t+YpwMI0vRCYWtZ+W5SsAEZIGl2vYs3M\nrHb7O0bfGhGvpOlXgdY0PQZYW9avN7X9O5JmS1opaWVfX99+lmFmZpX0+8PYiAgg9mO9+RHRGRGd\nLS0t/S3DzMz2Yn+Dfv3OIZn0uCG1rwPGlvVrS21mZlaQ/Q36pcDMND0TWFLWPiOdfTMJ2Fw2xGNm\nZgWoeGWspDuAzwLHSOoFvgXMBe6SNAt4Gbgkdb8XOB/oAd4GLm9AzWZmVoOKQR8R0/eyaPIe+gZw\nRX+LMjOz+vGVsWZmmXPQm5llzkFvZpY5B72ZWeYc9GZmmXPQm5llzkFvZpY5B72ZWeYc9GZmmXPQ\nm5llzkFvZpa5it91Y/XX3vXzhm5/zdwLGrp9M2suPqI3M8ucg97MLHMOejOzzDnozcwy56A3M8uc\ng97MLHMNCXpJ50p6TlKPpK5GPIeZmVWn7kEvaTDwT8B5wMnAdEkn1/t5zMysOo24YOo0oCciXgSQ\ntAiYAjzbgOcyO6j4YrtiNev7r4io7wali4BzI+Kv0/ylwKciYs5u/WYDs9PsicBzdS3kg44BXmvg\n9hvN9RenmWsH11+0Rtd/XES0VOpU2FcgRMR8YP6BeC5JKyOi80A8VyO4/uI0c+3g+os2UOpvxIex\n64CxZfNtqc3MzArQiKB/DBgvaZykQ4BpwNIGPI+ZmVWh7kM3EbFN0hzgl8Bg4IcR8Uy9n6dGB2SI\nqIFcf3GauXZw/UUbEPXX/cNYMzMbWHxlrJlZ5hz0ZmaZc9CbmWXOQW9mlrksg17SaZI+maZPlnSN\npPOLrsuaj6Tbiq7BrL+yuzm4pG9R+kK1IZKWA58CHgC6JH08Iv5noQVWQdJHgTHAIxHxVln7uRHx\ni+Iq2zdJnwJWR8SfJB0GdAEdlL7n6B8iYnOhBVYgaffrPQR8TtIIgIj44oGvqj4kXR4RPyq6jlpI\n+ktK3531dEQsK7qeSiRdCdwdEWuLrmV32Z1eKekpYCJwKPAq0FYWPI9ExIRCC6wg7SxXAKspvY6r\nImJJWrYqIjqKrG9fJD0DnJqupZgPvA0sBian9i8VWmAFklZR+qP0AyAoBf0dlC76IyL+tbjq+kfS\nHyPi2KLr2BdJj0bEaWn6byj9HtwNnA3834iYW2R9lUjaDGwB/kBpv/lJRPQVW1VJdkf0wLaI2A68\nLekPEfEngIh4R9KOgmurxt8An4iItyS1A4sltUfEjZSCZyAbFBHb0nRn2R+lf5PUXVRRNegErgL+\nG/DNiOiW9E6zBLykJ/e2CGg9kLXsp6Fl07OBsyKiT9J3gBXAgA564EXgE8AXgL8CrpP0OKXQ/2lE\nvFlUYTkG/fuSPhQRb1N60wGQNBxohqAftHO4JiLWSPospbA/joEf9E+XDRE8IakzIlZKOgHYWnRx\nlUTEDmCepJ+kx/U01+9IK3AOsGm3dgEPH/hyajZI0lGUPjvUzqPhiNgiadu+Vx0QIu1Dy4BlkoZS\nGkaeDnwHqPgtk43STDtxtT4dEe/Brl/cnYYCM4spqSbrJU2MiG6AdGR/IfBD4JRiS6vor4EbJf13\nSl/N+v8krQXWpmVNISJ6gYslXQD8qeh6avAz4Iid+045SQ8e+HJqNhx4nNIfppA0OiJekXQEA/8g\nB3arMSK2Uvqer6WSPlRMSSXZjdHvi6Qjyj/cHIgktVEafnp1D8vOiIiHCiirJpL+AhhH6UCiNyLW\nF1xSvzXDvpOrFJKtEfFS0bXsi6QTIuL5ouvYk4Mt6Af8B1L70sxh08y1g/edorn+/slu6EbSNXtb\nBBxxIGtpgGeBZg2bAV+7950BzfX3Q3ZBD/wD8L+APX14M+AvEGvmsGnm2hPvOwVy/Y2TY9CvAu6J\niMd3XyCpGT4QbOawaebawftO0Vx/g2Q3Ri/pROD1PV2oIKl1oH8wKOlh4Ot7CZu1ETF2D6sNCM1c\nO3jfKZrrb5zsgr7ZNXPYNHPtOWj299/1N052QZ8ujPp7YCowitKl7BuAJcDciHijwPJsAPO+Y7lq\nhnGvWt1F6crAz0bE0RExEvhcarur0MqqIGm4pLmSfi/pdUkbJa1ObSOKrm9fmrn2xPtOgVx/4+QY\n9O0R8e3yC44i4tWI+DZwXIF1VauZw6aZawfvO0Vz/Q2S49DNMuBXwMKdY2KSWoHLKH1J0hcKLK8i\nSc9FxIm1LhsImrl28L5TNNffODke0f8VMBL4V0mbJL0OPAgcDVxSZGFVelnS36WAAUphI+laSt8Z\nM5A1c+3gfadorr9Bsgv6iNgE/AiYA4xN/0KdFBHXUrqJwUDXzGHTzLV73yme62+QHIdumvbGHTup\ndIepNmBFNNEdpqDpa/e+UzDX3yARkdUP8BSlr2oFaAdWUvqFBfhd0fVVUf+VwHPAPcAaYErZslVF\n15dr7d53iv9x/Y37yfErEJr5xh3Q3HeYaubawftO0Vx/g+QY9M184w5o7rBp5trB+07RXH+jCivy\nyRtkBqWbgu8SEdsiYgbw6WJKqsl6SRN3zqQd50LgGAZ+2DRz7eB9p2iuv0Gy+zC22amJ7zDVzLXn\noNnff9ffOA56M7PM5Th0Y2ZmZRz0ZmaZc9DbQUnSdkndkp6R9ISkv5W0z98HSe2SvnKgajSrFwe9\nHazeiYiJEfEx4CzgPOBbFdZpBxz01nT8YawdlCS9FRFHlM0fDzxG6VS444DbgcPT4jkR8bCkFcBJ\nwEvAQuAmYC7wWeBQ4J8i4nsH7EWYVclBbwel3YM+tb0BnAi8CeyIiHcljQfuiIjOdAHMf4mIC1P/\n2cCoiLhe0qHAQ8DFEfHSAX0xZhXkeGWsWX8NBW5OF79sB07YS7+zgQmSLkrzw4HxlI74zQYMB70Z\nu4ZutlO6R+y3gPXAqZQ+x3p3b6sBX4+IXx6QIs32kz+MtYOepBbg/wA3R2ksczjwSkTsAC4FBqeu\nbwJHlq36S+A/SxqatnOCpMMxG2B8RG8Hq8MkdVMaptlG6cPXG9Ky/w38i6QZwC+ALan9SWC7pCeA\nW4EbKZ2Js0qSgD5g6oF6AWbV8oexZmaZ89CNmVnmHPRmZplz0JuZZc5Bb2aWOQe9mVnmHPRmZplz\n0JuZZe7/A6+cHY7zduzoAAAAAElFTkSuQmCC\n",
"text/plain": [
"<matplotlib.figure.Figure at 0x7fc48a441a58>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"myChart.plot.bar()"
]
},
{
"cell_type": "markdown",
"metadata": {
"deletable": true,
"editable": true
},
"source": [
"## Title\n",
"\n",
"Here I can add some comments on the cart.\n",
"1. First point\n",
"2. Second point"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {
"deletable": true,
"editable": true
},
"source": [
"# Lang Cleaning tools"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [
{
"data": {
"text/plain": [
"'fr'"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"detect_lang(\"Ceci est une phrase en français.\")"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [
{
"data": {
"text/plain": [
"'en'"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"detect_lang(\"This is an english sentence.\")"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [
{
"data": {
"text/plain": [
"<matplotlib.axes._subplots.AxesSubplot at 0x7fc487e01e80>"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYAAAAENCAYAAAAG6bK5AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAGuhJREFUeJzt3X+8VXWd7/HXm4OKmqMoB64Beg4NOiGBnTkqXMfJMkHL\nG95u9uDYD6YsKiX7dSuamcf12tTjYc0oZRYTJoFeA31YKbecUTQbTa8KElAq6rmIcbgax1+EFsmP\nz/1jfQ9ujucXe5+9t/B9Px+P82Cv7/qu9fluOOz3Xt+19l6KCMzMLD9D6j0AMzOrDweAmVmmHABm\nZplyAJiZZcoBYGaWKQeAmVmmHABmZplyAJiZZcoBYGaWqX4DQNJCSZsl/bZb+6clrZP0sKRvlrR/\nRVK7pMckTS9pPyu1tUuaO7hPw8zM9pb6+yoISX8LvARcGxETU9vbgX8A3h0Rf5Y0MiI2S5oALAFO\nBt4I3AEcl3b1OHAm0AGsANoi4pG+ao8YMSKamprKfW5mZll66KGHno2Ixv76De2vQ0TcLampW/On\ngMsi4s+pz+bUPgNYmtqflNROEQYA7RGxHkDS0tS3zwBoampi5cqV/Q3RzMxKSHpqIP3KPQdwHHCa\npAck/Yekk1L7aGBjSb+O1NZbu5mZ1Um/RwB9bHckMAU4CbhR0rjBGJCk2cBsgGOOOWYwdmlmZj0o\n9wigA/hJFB4EdgEjgE3A2JJ+Y1Jbb+2vERELIqI1IlobG/udwjIzszKVewRwM/B24C5JxwEHAs8C\ny4AfSbqC4iTweOBBQMB4Sc0UL/wzgfMrHLuZ7QO2b99OR0cH27Ztq/dQ9jvDhg1jzJgxHHDAAWVt\n328ASFoCnA6MkNQBXAIsBBamS0NfAWZFcTnRw5JupDi5uwO4KCJ2pv3MAW4DGoCFEfFwWSM2s31K\nR0cHhx12GE1NTUiq93D2GxHBc889R0dHB83NzWXtYyBXAbX1suqDvfT/OvD1HtpvBW7dq9GZ2T5v\n27ZtfvGvAkkcddRRdHZ2lr0PfxLYzKrOL/7VUenfqwPAzPZ7DQ0NnHjiiZxwwglMnjyZyy+/nF27\ndvW5zYYNG/jRj37U777HjRvHY489tkfbZz/7Wb7xjW/0ue+JEycObPBVVO5JYKuTprk/L3vbDZe9\nexBHYlaeSn6HezKQ3+uDDz6Y1atXA7B582bOP/98/vCHP3DppZf2vt8UAOef3/f1KjNnzmTp0qVc\ncsklAOzatYubbrqJe++9dy+eRX34CMDMsjJy5EgWLFjAVVddRUSwYcMGTjvtNFpaWmhpaeG+++4D\nYO7cudxzzz2ceOKJzJs3j507d/LFL36Rk046iUmTJvH9738fgLa2Nm644Ybd+7/77rs59thjOfbY\nY3vdd6lFixYxZ86c3cvnnHMOv/zlLwG4/fbbmTp1Ki0tLZx33nm89NJLg/p34QAws+yMGzeOnTt3\nsnnzZkaOHMny5ctZtWoVN9xwAxdffDEAl112GaeddhqrV6/mc5/7HNdccw2HH344K1asYMWKFVx9\n9dU8+eSTvOUtb2HIkCGsWbMGgKVLl9LWVlw709u+B+LZZ5/la1/7GnfccQerVq2itbWVK664YlD/\nHjwFZGZZ2759O3PmzGH16tU0NDTw+OOP99jv9ttvZ+3atdx0000AbNmyhSeeeILm5mba2tpYunQp\nJ5xwAjfffPPuqaWB7rsn999/P4888ginnnoqAK+88gpTp06t8NnuyQFgZtlZv349DQ0NjBw5kksv\nvZRRo0axZs0adu3axbBhw3rcJiL4zne+w/Tp01+zbubMmUybNo23ve1tTJo0iVGjRgEwb968fvc9\ndOjQPU5Id31gLiI488wzWbJkyWA85R55CsjMstLZ2cknP/lJ5syZgyS2bNnC0UcfzZAhQ7juuuvY\nuXMnAIcddhhbt27dvd306dOZP38+27dvB+Dxxx/n5ZdfBuBNb3oTI0aMYO7cubunf4Be912qqamJ\n1atXs2vXLjZu3MiDDz4IwJQpU7j33ntpb28H4OWXX96rI4iBcACY2X7vT3/60+7LQN/5zncybdq0\n3VftXHjhhSxevJjJkyezbt06Dj30UAAmTZpEQ0MDkydPZt68eXzsYx9jwoQJtLS0MHHiRD7xiU+w\nY8eO3TXa2tpYt24d733ve3e39bbvUqeeeirNzc1MmDCBiy++mJaWFgAaGxtZtGgRbW1tTJo0ialT\np7Ju3bpB/Xvp94Yw9dTa2hq+H8CefBmo7WseffRR3vzmN9d7GPutnv5+JT0UEa39besjADOzTDkA\nzMwy5QAwM8uUA8DMqu71fK5xX1bp36sDwMyqatiwYTz33HMOgUHWdT+A3j63MBD+IJiZVdWYMWPo\n6Oio6HvrrWdddwQrlwPAzKrqgAMOKPuOVVZdngIyM8tUvwEgaaGkzen+v93XfUFSSBqRliXpSknt\nktZKainpO0vSE+ln1uA+DTMz21sDOQJYBJzVvVHSWGAa8LuS5rOB8elnNjA/9T2S4mbypwAnA5dI\nGl7JwM3MrDL9BkBE3A0838OqecCXgNJT+zOAa6NwP3CEpKOB6cDyiHg+Il4AltNDqJiZWe2UdQ5A\n0gxgU0Ss6bZqNLCxZLkjtfXWbmZmdbLXVwFJOgT4e4rpn0EnaTbF9BHHHHNMNUqYmRnlHQG8CWgG\n1kjaAIwBVkn6T8AmYGxJ3zGprbf214iIBRHRGhGtjY2NZQzPzMwGYq8DICJ+ExEjI6IpIpoopnNa\nIuIZYBnw4XQ10BRgS0Q8DdwGTJM0PJ38nZbazMysTgZyGegS4P8Ax0vqkHRBH91vBdYD7cDVwIUA\nEfE88E/AivTz1dRmZmZ10u85gIho62d9U8njAC7qpd9CYOFejs/MzKrEnwQ2M8uUA8DMLFMOADOz\nTDkAzMwy5QAwM8uUA8DMLFMOADOzTDkAzMwy5QAwM8uUA8DMLFMOADOzTDkAzMwy5QAwM8uUA8DM\nLFMOADOzTDkAzMwy5QAwM8uUA8DMLFMDuSfwQkmbJf22pO2fJa2TtFbSTyUdUbLuK5LaJT0maXpJ\n+1mprV3S3MF/KmZmtjcGcgSwCDirW9tyYGJETAIeB74CIGkCMBM4IW3zPUkNkhqA7wJnAxOAttTX\nzMzqpN8AiIi7gee7td0eETvS4v3AmPR4BrA0Iv4cEU8C7cDJ6ac9ItZHxCvA0tTXzMzqZDDOAXwU\n+Lf0eDSwsWRdR2rrrf01JM2WtFLSys7OzkEYnpmZ9aSiAJD0D8AO4PrBGQ5ExIKIaI2I1sbGxsHa\nrZmZdTO03A0l/R1wDnBGRERq3gSMLek2JrXRR7uZmdVBWUcAks4CvgS8JyL+WLJqGTBT0kGSmoHx\nwIPACmC8pGZJB1KcKF5W2dDNzKwS/R4BSFoCnA6MkNQBXEJx1c9BwHJJAPdHxCcj4mFJNwKPUEwN\nXRQRO9N+5gC3AQ3Awoh4uArPx8zMBqjfAIiIth6ar+mj/9eBr/fQfitw616NzszMqsafBDYzy5QD\nwMwsUw4AM7NMOQDMzDLlADAzy5QDwMwsUw4AM7NMOQDMzDLlADAzy5QDwMwsUw4AM7NMOQDMzDLl\nADAzy5QDwMwsUw4AM7NMOQDMzDLlADAzy1S/ASBpoaTNkn5b0nakpOWSnkh/Dk/tknSlpHZJayW1\nlGwzK/V/QtKs6jwdMzMbqIEcASwCzurWNhe4MyLGA3emZYCzKW4EPx6YDcyHIjAo7iV8CnAycElX\naJiZWX30GwARcTfwfLfmGcDi9HgxcG5J+7VRuB84QtLRwHRgeUQ8HxEvAMt5baiYmVkNlXsOYFRE\nPJ0ePwOMSo9HAxtL+nWktt7azcysTio+CRwRAcQgjAUASbMlrZS0srOzc7B2a2Zm3ZQbAL9PUzuk\nPzen9k3A2JJ+Y1Jbb+2vERELIqI1IlobGxvLHJ6ZmfWn3ABYBnRdyTMLuKWk/cPpaqApwJY0VXQb\nME3S8HTyd1pqMzOzOhnaXwdJS4DTgRGSOiiu5rkMuFHSBcBTwPtT91uBdwHtwB+BjwBExPOS/glY\nkfp9NSK6n1g2M7Ma6jcAIqKtl1Vn9NA3gIt62c9CYOFejc7MzKrGnwQ2M8uUA8DMLFMOADOzTDkA\nzMwy5QAwM8uUA8DMLFMOADOzTDkAzMwy5QAwM8uUA8DMLFMOADOzTDkAzMwy5QAwM8uUA8DMLFMO\nADOzTDkAzMwy5QAwM8uUA8DMLFMVBYCkz0l6WNJvJS2RNExSs6QHJLVLukHSganvQWm5Pa1vGown\nYGZm5Sk7ACSNBi4GWiNiItAAzAS+AcyLiL8EXgAuSJtcALyQ2uelfmZmVieVTgENBQ6WNBQ4BHga\neAdwU1q/GDg3PZ6Rlknrz5CkCuubmVmZyg6AiNgE/AvwO4oX/i3AQ8CLEbEjdesARqfHo4GNadsd\nqf9R5dY3M7PKVDIFNJziXX0z8EbgUOCsSgckabaklZJWdnZ2Vro7MzPrRSVTQO8EnoyIzojYDvwE\nOBU4Ik0JAYwBNqXHm4CxAGn94cBz3XcaEQsiojUiWhsbGysYnpmZ9aWSAPgdMEXSIWku/wzgEeAu\n4H2pzyzglvR4WVomrf9FREQF9c3MrAKVnAN4gOJk7irgN2lfC4AvA5+X1E4xx39N2uQa4KjU/nlg\nbgXjNjOzCg3tv0vvIuIS4JJuzeuBk3vouw04r5J6ZmY2ePxJYDOzTDkAzMwy5QAwM8uUA8DMLFMO\nADOzTDkAzMwy5QAwM8uUA8DMLFMOADOzTDkAzMwy5QAwM8uUA8DMLFMOADOzTDkAzMwy5QAwM8uU\nA8DMLFMOADOzTDkAzMwyVVEASDpC0k2S1kl6VNJUSUdKWi7pifTn8NRXkq6U1C5praSWwXkKZmZW\njkqPAL4N/HtE/BUwGXiU4mbvd0bEeOBOXr35+9nA+PQzG5hfYW0zM6tA2QEg6XDgb4FrACLilYh4\nEZgBLE7dFgPnpsczgGujcD9whKSjyx65mZlVpJIjgGagE/ihpF9L+oGkQ4FREfF06vMMMCo9Hg1s\nLNm+I7WZmVkdVBIAQ4EWYH5EvBV4mVenewCIiABib3YqabaklZJWdnZ2VjA8MzPrSyUB0AF0RMQD\nafkmikD4fdfUTvpzc1q/CRhbsv2Y1LaHiFgQEa0R0drY2FjB8MzMrC9lB0BEPANslHR8ajoDeARY\nBsxKbbOAW9LjZcCH09VAU4AtJVNFZmZWY0Mr3P7TwPWSDgTWAx+hCJUbJV0APAW8P/W9FXgX0A78\nMfU1M7M6qSgAImI10NrDqjN66BvARZXUMzOzweNPApuZZcoBYGaWKQeAmVmmHABmZplyAJiZZcoB\nYGaWKQeAmVmmHABmZplyAJiZZcoBYGaWKQeAmVmmHABmZplyAJiZZcoBYGaWKQeAmVmmHABmZply\nAJiZZcoBYGaWqYoDQFKDpF9L+llabpb0gKR2STek+wUj6aC03J7WN1Va28zMyjcYRwCfAR4tWf4G\nMC8i/hJ4AbggtV8AvJDa56V+ZmZWJxUFgKQxwLuBH6RlAe8AbkpdFgPnpscz0jJp/Rmpv5mZ1UGl\nRwDfAr4E7ErLRwEvRsSOtNwBjE6PRwMbAdL6Lam/mZnVQdkBIOkcYHNEPDSI40HSbEkrJa3s7Owc\nzF2bmVmJSo4ATgXeI2kDsJRi6ufbwBGShqY+Y4BN6fEmYCxAWn848Fz3nUbEgohojYjWxsbGCoZn\nZmZ9KTsAIuIrETEmIpqAmcAvIuIDwF3A+1K3WcAt6fGytExa/4uIiHLrm5lZZarxOYAvA5+X1E4x\nx39Nar8GOCq1fx6YW4XaZmY2QEP779K/iPgl8Mv0eD1wcg99tgHnDUY9MzOrnD8JbGaWKQeAmVmm\nHABmZplyAJiZZcoBYGaWKQeAmVmmHABmZplyAJiZZcoBYGaWKQeAmVmmHABmZplyAJiZZcoBYGaW\nKQeAmVmmHABmZplyAJiZZcoBYGaWqbIDQNJYSXdJekTSw5I+k9qPlLRc0hPpz+GpXZKulNQuaa2k\nlsF6EmZmtvcqOQLYAXwhIiYAU4CLJE2guNfvnRExHriTV+/9ezYwPv3MBuZXUNvMzCpUdgBExNMR\nsSo93go8CowGZgCLU7fFwLnp8Qzg2ijcDxwh6eiyR25mZhUZlHMAkpqAtwIPAKMi4um06hlgVHo8\nGthYsllHajMzszqoOAAkvQH4MfDZiPhD6bqICCD2cn+zJa2UtLKzs7PS4ZmZWS8qCgBJB1C8+F8f\nET9Jzb/vmtpJf25O7ZuAsSWbj0lte4iIBRHRGhGtjY2NlQzPzMz6UMlVQAKuAR6NiCtKVi0DZqXH\ns4BbSto/nK4GmgJsKZkqMjOzGhtawbanAh8CfiNpdWr7e+Ay4EZJFwBPAe9P624F3gW0A38EPlJB\nbTMzq1DZARARvwLUy+ozeugfwEXl1jMzs8HlTwKbmWXKAWBmlikHgJlZphwAZmaZcgCYmWXKAWBm\nlikHgJlZphwAZmaZcgCYmWXKAWBmlikHgJlZphwAZmaZcgCYmWXKAWBmlikHgJlZphwAZmaZcgCY\nmWXKAWBmlqmaB4CksyQ9Jqld0txa1zczs0IlN4Xfa5IagO8CZwIdwApJyyLikVqOw/Ze09yfV7T9\nhsvePUgjMbPBUtMAAE4G2iNiPYCkpcAMwAFgZnVRyZubff2NTa0DYDSwsWS5AzilxmOomN8N11bO\n/0HNqkkRUbti0vuAsyLiY2n5Q8ApETGnpM9sYHZaPB54rIKSI4BnK9h+X6tbz9q51a1nbT/nPGpX\nUvfYiGjsr1OtjwA2AWNLlsektt0iYgGwYDCKSVoZEa2Dsa99oW49a+dWt561/ZzzqF2LurW+CmgF\nMF5Ss6QDgZnAshqPwczMqPERQETskDQHuA1oABZGxMO1HIOZmRVqPQVERNwK3FqjcoMylbQP1a1n\n7dzq1rO2n3Metatet6Yngc3M7PXDXwVhZpYpB4CZWaZqfg5gfyZpJDCsazkiflfH4ZiZ9clHAINA\n0nskPQE8CfwHsAH4t7oOyvY7kg6UNDH9HFDDup8ZSNsg1muQdH219m+v2i9OAkv6UkR8U9J3gNc8\noYi4uMr11wDvAO6IiLdKejvwwYi4oJp1U+1G4ONAEyVHdBHx0f2xbqr9GeCHwFbgB8BbgbkRcXsN\naj9Jz79j46pc93RgMcWbC1F8oHJWRNxdzbqp9qqIaOnW9uuIeGsVa/4KeEdEvFKtGn3UPg6YD4yK\niImSJgHviYiv1aD2D+n596sq/6/2lymgLwPfBP4v8EId6m+PiOckDZE0JCLukvStGtW+BbgHuAPY\nWaOa9awL8NGI+Lak6cBw4EPAdUDVAwAo/WTmMOA84Mga1L0cmBYRj8HuF6klwF9Xq6CkNuB8oFlS\n6Qc2DwOer1bdZD1wb6r7cldjRFxR5boAVwNfBL6faq6V9COg6gEA/Kzk8TDgvwL/r1rF9pcA+L2k\nNwIfAU6neIdUSy9KegPFC+L1kjZT8ktbZYdExJdrVOv1UBde/fd9N3BdRDwsqSb/5hHxXLemb0l6\nCPgfVS59QNeLfxrH4zWYBroPeJriO2kuL2nfCqytRkFJ10XEh4D3APMopqkPq0atPhwSEQ92+5Xa\nUYvCEfHj0mVJS4BfVave/hIA84E7gXHAQyXtojicqurhOcUv6zbgM8AHgb8ALq1yzS4/k/Su9AG7\nWqpXXYCHJN1G8e86V9JhwK5aFJZUOhUyhOKIoBb/j1ZK+gHwv9LyBym+WqVqIuIp4ClgajXrdPPX\n6c3c74Dv1LBuqWclvYk0FZO+xPLpOo1lPDCyWjvfL84BdJE0PyI+VcN6v4qIv5G0lVfn7breNuyi\nOEz+54j4XhXHsBU4BHgF2J7qR0T8RRXrdT3XN6S6r1S7brcxDAH+ERgeEZ+TdAzFtx/eU4Pad/Hq\n899BMSf/LxHxeJXr/g3F/TT+JjXdAzwRET/rfauKa/b0+w1V/LeWdDHwKaCZPac+umpW+80cksZR\nfAr3P1NMKT8JfCAFYjXrimI69aWS5meAr3Q/Mhi0mvtTALzeSDoKuC8ijq9ijSHAB4DmiPhqejE8\nOiIeqFbNVPcXwOUR8fOStqsj4uPVrJvqzKcI2HdExJslDQduj4iTalB7GPDf2PPkd0TEV6tcdxXF\nSd/fpOU24LMRsc/dT2Mgav1mLtX8fLemgymO8l6G2px/kPTbiJhY7TpdfBloFaX54tOrXOa7wBSg\nLS1vBa6qck0oXgC/JKl07rtqJyS7OSUiLqKYdiMiXgAOrFHtm4H/QnG09VL6qcX5nvcBiyQdL+nj\nwIXAtBrUrYtav/gnh6WfVoqjkOHAEcAngZY+thtMD0mq+huZLvvLOYDXrYio9tzhKRHRIunXqd4L\n6au2q+1F4AzgSkn/m2JOula2p/tLd83RNlKjcwDAmIg4q0a1douI9eld/80U8+PTIuJPtR7H/iwi\nLgWQdDfQEhFb0/L/BCq7DeDAnQJ8QNJTFG8suqa+JlWjmANg31evF0NFxA7gQkl/R3GlwvAa1AW4\nEvgpMFLS1yneHf9jjWrfJ+ktXVMx1SbpN+w5/34kxVepPyCJar0wZG4UxXmtLq+ktlqYXqM6gANg\nf1CvF8N/7XoQEYvSC9VFNahLRFyfLr08g+Id0rkR8Wg1a5a8EA8FPiJpPfBnqvwODTinSvu13l0L\nPCjpp2n5XGBRLQpX+0Rzdz4JvB+Q9Fe8+mJ4Z7VfDHMk6di+1tf6P65VV7rc97S0eHdE/Lqe46kW\nB4CZWaZ8FZCZWaYcAGZmmXIAmJWQtFPSakkPS1oj6Qvpw3Z9bdMk6fxajdFssDgAzPb0p4g4MSJO\nAM4EzgYu6WebJopvzTTbp/gksFkJSS9FxBtKlsdRfOnaCOBYiq+dPjStnhMR90m6H3gzxXfGLKa4\nNPcyik+BHwR8NyK+X7MnYTZADgCzEt0DILW9CBxP8TUbuyJim6TxwJKIaE03a/nvEXFO6j8bGBkR\nX5N0EHAvcF5EPFnTJ2PWD38QzGzgDgCuknQixbc2HtdLv2nApPQ1wgCHU3ytrwPAXlccAGZ9SFNA\nO4HNFOcCfg9Mpjh/tq23zYBPR8RtNRmkWZl8EtisF+l7lf4VuCqKudLDgacjYhfFbSgbUtet7HnX\nqtuAT3XdsUvScZIOxex1xkcAZns6WNJqiumeHRQnfbu+B/57wI8lfRj4d179Gui1wE5Jayi+M+bb\nFFcGrUo3+eik+D4Zs9cVnwQ2M8uUp4DMzDLlADAzy5QDwMwsUw4AM7NMOQDMzDLlADAzy5QDwMws\nUw4AM7NM/X+WgGyX8gJ3yQAAAABJRU5ErkJggg==\n",
"text/plain": [
"<matplotlib.figure.Figure at 0x7fc4cc6ea6d8>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"chart(docs, \"language_iso2\").plot.bar()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [
{
"data": {
"text/plain": [
"Counter({'de': 13,\n",
" 'en': 1547,\n",
" 'es': 5,\n",
" 'fi': 1,\n",
" 'fr': 4,\n",
" 'hu': 1,\n",
" 'it': 1,\n",
" 'ja': 5,\n",
" 'ko': 1,\n",
" 'ru': 3,\n",
" 'zh': 23})"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"Counter([doc.hyperdata[\"language_iso2\"] for doc in docs])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"# Deleting language that is not in majority\n",
"def cleanCorpusWithLang(corpus_id, lang):\n",
" return (session.query(Node.id).filter(Node.parent_id == corpus_id)\n",
" .filter(Node.hyperdata[\"language_iso2\"].astext != lang)\n",
" .count()\n",
" #.delete()\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [
{
"data": {
"text/plain": [
"57"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cleanCorpusWithLang(corpus_id, 'en')"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [
{
"data": {
"text/plain": [
"[(True, 'This is an english paragraph.\\n '),\n",
" (False, '\"This is an english paragraph.\\n\\nThis is an english paragraph.\\n ')]"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"abstract0 = \"\"\"\"Ceci est un paragraphe en français.\n",
"\n",
"This is an english paragraph.\n",
" \"\"\"\n",
"\n",
"abstract1 = \"\"\"\"This is an english paragraph.\n",
"\n",
"This is an english paragraph.\n",
" \"\"\"\n",
"\n",
"def clean_lang_inText(lang, text):\n",
" \n",
" texts_before = nltk.tokenize.blankline_tokenize(text)\n",
" texts_after = '\\n\\n'.join([sentence \n",
" for sentence in texts_before\n",
" if detect_lang(sentence) == lang\n",
" ])\n",
" \n",
" return (len(texts_before) != len(nltk.tokenize.blankline_tokenize(texts_after)), texts_after)\n",
"\n",
"[clean_lang_inText('en', abstract) for abstract in [abstract0, abstract1]]\n",
"\n",
"# TODO update each document accordingly"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"# TODO update all the abstract with That function"
]
},
{
"cell_type": "markdown",
"metadata": {
"deletable": true,
"editable": true
},
"source": [
"# Measures IMT Tools"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [
{
"data": {
"text/plain": [
"154"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"scan_hal(\"machine learning AND deep\")"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [
{
"data": {
"text/plain": [
"90"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Request syntax\n",
"# \"network analysis\" = network <-> analysis\n",
"# \"network OR analysis\" = network | analysis\n",
"# \"network AND analysis\" = network & analysis\n",
"\n",
"scan_gargantext(corpus_id, 'english', \"machine | learning & deep\")\n",
"\n",
"# \"network NOT analysis\" = @@ to_tsquery('network') !! to_tsquery('analysis')\n",
"# (need to change the function if not has to be used)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [
{
"data": {
"text/plain": [
"[('network analysis', 'network <-> analysis'),\n",
" ('big data AND something', '(big <-> data) & something')]"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Forces / Faiblesses de l'IMT\n",
"# Hal Query Gargantext Query\n",
"queries = [ (\"network analysis\" , \"network <-> analysis\" )\n",
" , (\"big data AND something\" , \"(big <-> data) & something\")\n",
" ]\n",
"[(query[0], query[1]) for query in queries]"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"def imt_vs_hal(corpus_id, queryHal, queryGarg):\n",
" return((scan_gargantext(corpus_id, 'english', queryGarg), scan_hal(queryHal)))\n",
" #return((scan_gargantext(corpus_id, 'english', queryGarg) *100 / scan_hal(queryHal)))"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [
{
"data": {
"text/plain": [
"[(5, 10649), (0, 5)]"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Then chart it to see your strenght and weakness!\n",
"[imt_vs_hal(corpus_id, query[0], query[1]) for query in queries]\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {
"deletable": true,
"editable": true
},
"source": [
"# Graph generation"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"# TODO Cooccurrences optimization"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"# TODO optimize the distributional distance"
]
},
{
"cell_type": "markdown",
"metadata": {
"deletable": true,
"editable": true
},
"source": [
"# List Management"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"# Front End add a check box to merge or to overwrite previous list"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"# optimize the list merge"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.3rc1"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
...@@ -2,6 +2,8 @@ ...@@ -2,6 +2,8 @@
* Guided Tour * Guided Tour
* Sources form highlighting crawlers * Sources form highlighting crawlers
## Version 3.0.7
* Alembic implemented to manage database migrations
## Version 3.0.6.8 ## Version 3.0.6.8
* REPEC Crawler (connection with https://multivac.iscpif.fr) * REPEC Crawler (connection with https://multivac.iscpif.fr)
......
tools/manual_install.md
\ No newline at end of file
* Create user gargantua
Main user of Gargantext is Gargantua (role of Pantagruel soon)!
``` bash
sudo adduser --disabled-password --gecos "" gargantua
```
* Create the directories you need
here for the example gargantext package will be installed in /srv/
``` bash
for dir in "/srv/gargantext"
"/srv/gargantext_lib"
"/srv/gargantext_static"
"/srv/gargantext_media"
"/srv/env_3-5"; do
sudo mkdir -p $dir ;
sudo chown gargantua:gargantua $dir ;
done
```
You should see:
```bash
$tree /srv
/srv
├── gargantext
├── gargantext_lib
├── gargantext_media
│   └── srv
│   └── env_3-5
└── gargantext_static
```
* Get the main libraries
Download uncompress and make main user access to it.
PLease, Be patient due to the size of the packages libraries (27GO)
this step can be long....
``` bash
wget http://dl.gargantext.org/gargantext_lib.tar.bz2 \
&& tar xvjf gargantext_lib.tar.bz2 -o /srv/gargantext_lib \
&& sudo chown -R gargantua:gargantua /srv/gargantext_lib \
&& echo "Libs installed"
```
* Get the source code of Gargantext
by cloning the repository of gargantext
``` bash
git clone ssh://gitolite@delanoe.org:1979/gargantext /srv/gargantext \
&& cd /srv/gargantext \
&& git fetch origin stable \
&& git checkout stable \
```
TODO(soon): git clone https://gogs.iscpif.fr/gargantext.git
* Install and configure the virtual environment
``` bash
cd /srv/
pip3 install virtualenv
virtualenv /srv/env_3-5 -p /usr/bin/python3.5
pip install -r /srv/gargantext/install
echo '/srv/gargantext' > /srv/env_3-5/lib/python3.5/site-packages/gargantext.pth
echo 'alias venv="source /srv/env_3-5/bin/activate"' >> ~/.bashrc
```
See the [next steps of installation procedure](install.md#Install)
See the [next manual steps of installation procedure](Debian.sh)
...@@ -59,25 +59,25 @@ LISTTYPES = { ...@@ -59,25 +59,25 @@ LISTTYPES = {
NODETYPES = [ NODETYPES = [
# TODO separate id not array index, read by models.node # TODO separate id not array index, read by models.node
None, # 0 None, # 0
# documents hierarchy # node/file hierarchy
'USER', # 1 'USER', # 1
'PROJECT', # 2 'PROJECT', # 2
#RESOURCE should be here but last #RESOURCE should be here but last
'CORPUS', # 3 'CORPUS', # 3
'DOCUMENT', # 4 'DOCUMENT', # 4
# lists # lists of ngrams
'STOPLIST', # 5 'STOPLIST', # 5
'GROUPLIST', # 6 'GROUPLIST', # 6
'MAINLIST', # 7 'MAINLIST', # 7
'MAPLIST', # 8 'MAPLIST', # 8
'COOCCURRENCES', # 9 'COOCCURRENCES', # 9
# scores # scores for ngrams
'OCCURRENCES', # 10 'OCCURRENCES', # 10
'SPECCLUSION', # 11 'SPECCLUSION', # 11
'CVALUE', # 12 'CVALUE', # 12
'TFIDF-CORPUS', # 13 'TFIDF-CORPUS', # 13
'TFIDF-GLOBAL', # 14 'TFIDF-GLOBAL', # 14
# docs subset # node subset
'FAVORITES', # 15 'FAVORITES', # 15
# more scores (sorry!) # more scores (sorry!)
......
...@@ -2,6 +2,9 @@ from gargantext.util.db import session ...@@ -2,6 +2,9 @@ from gargantext.util.db import session
from gargantext.util.files import upload from gargantext.util.files import upload
from gargantext.constants import * from gargantext.constants import *
# Uncomment to make column full text searchable
#from sqlalchemy_utils.types import TSVectorType
from datetime import datetime from datetime import datetime
from .base import Base, Column, ForeignKey, relationship, TypeDecorator, Index, \ from .base import Base, Column, ForeignKey, relationship, TypeDecorator, Index, \
...@@ -57,23 +60,28 @@ class Node(Base): ...@@ -57,23 +60,28 @@ class Node(Base):
Index('nodes_user_id_typename_parent_id_idx', 'user_id', 'typename', 'parent_id'), Index('nodes_user_id_typename_parent_id_idx', 'user_id', 'typename', 'parent_id'),
Index('nodes_hyperdata_idx', 'hyperdata', postgresql_using='gin')) Index('nodes_hyperdata_idx', 'hyperdata', postgresql_using='gin'))
# TODO
# create INDEX full_text_idx on nodes using gin(to_tsvector('english', hyperdata ->> 'abstract' || 'title'));
id = Column(Integer, primary_key=True) id = Column(Integer, primary_key=True)
typename = Column(NodeType, index=True) typename = Column(NodeType, index=True)
__mapper_args__ = { 'polymorphic_on': typename }
# foreign keys # foreign keys
user_id = Column(Integer, ForeignKey(User.id, ondelete='CASCADE')) user_id = Column(Integer, ForeignKey(User.id, ondelete='CASCADE'))
parent_id = Column(Integer, ForeignKey('nodes.id', ondelete='CASCADE')) user = relationship(User)
# main data
parent_id = Column(Integer, ForeignKey('nodes.id', ondelete='CASCADE'))
parent = relationship('Node', remote_side=[id])
name = Column(String(255)) name = Column(String(255))
date = Column(DateTime(timezone=True), default=datetime.now) date = Column(DateTime(timezone=True), default=datetime.now)
# metadata (see https://bashelton.com/2014/03/updating-postgresql-json-fields-via-sqlalchemy/)
hyperdata = Column(JSONB, default=dict)
user = relationship(User)
parent = relationship('Node', remote_side=[id])
__mapper_args__ = { hyperdata = Column(JSONB, default=dict)
'polymorphic_on': typename # metadata (see https://bashelton.com/2014/03/updating-postgresql-json-fields-via-sqlalchemy/)
} # To make search possible uncomment the line below
#search_vector = Column(TSVectorType('hyperdata'))
def __new__(cls, *args, **kwargs): def __new__(cls, *args, **kwargs):
if cls is Node and kwargs.get('typename'): if cls is Node and kwargs.get('typename'):
......
...@@ -45,6 +45,7 @@ class HalCrawler(Crawler): ...@@ -45,6 +45,7 @@ class HalCrawler(Crawler):
, uri_s , uri_s
, isbn_s , isbn_s
, issue_s , issue_s
, docType_s
, journalPublisher_s , journalPublisher_s
""" """
#, authUrl_s #, authUrl_s
......
...@@ -5,9 +5,15 @@ from gargantext.util.json import json_dumps ...@@ -5,9 +5,15 @@ from gargantext.util.json import json_dumps
######################################################################## ########################################################################
# get engine, session, etc. # get engine, session, etc.
######################################################################## ########################################################################
import sqlalchemy as sa
from sqlalchemy.orm import sessionmaker, scoped_session from sqlalchemy.orm import sessionmaker, scoped_session
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import delete from sqlalchemy import delete
# To make Full Text search possible, uncomment lines below
# (and install it with pip before)
#from sqlalchemy_searchable import make_searchable
def get_engine(): def get_engine():
from sqlalchemy import create_engine from sqlalchemy import create_engine
return create_engine( settings.DATABASES['default']['URL'] return create_engine( settings.DATABASES['default']['URL']
...@@ -18,6 +24,13 @@ def get_engine(): ...@@ -18,6 +24,13 @@ def get_engine():
engine = get_engine() engine = get_engine()
# To make Full Text search possible, uncomment lines below
# https://sqlalchemy-searchable.readthedocs.io/
#sa.orm.configure_mappers()
Base = declarative_base()
#Base.metadata.create_all(engine)
#make_searchable()
session = scoped_session(sessionmaker(bind=engine)) session = scoped_session(sessionmaker(bind=engine))
......
install/notebook/gargantext_notebook.py
\ No newline at end of file
#!/bin/bash
sudo apt-get update
sudo apt-get install \
apt-transport-https \
ca-certificates \
curl \
gnupg2 \
software-properties-common
curl -fsSL https://download.docker.com/linux/debian/gpg | sudo apt-key add -
sudo apt-key fingerprint 0EBFCD88
echo "Should be: Key fingerprint = 9DC8 5822 9FC7 DD38 854A E2D8 8D81 803C 0EBF CD88"
sudo add-apt-repository \
"deb [arch=amd64] https://download.docker.com/linux/debian \
$(lsb_release -cs) \
stable"
sudo apt-get update
sudo apt-get install docker-ce
sudo docker run hello-world
...@@ -15,9 +15,9 @@ RUN apt-get update && \ ...@@ -15,9 +15,9 @@ RUN apt-get update && \
apt-utils ca-certificates locales \ apt-utils ca-certificates locales \
sudo aptitude gcc g++ wget git vim \ sudo aptitude gcc g++ wget git vim \
build-essential make \ build-essential make \
postgresql-9.5 postgresql-client-9.5 postgresql-contrib-9.5 \ postgresql-9.6 postgresql-client-9.6 postgresql-contrib-9.6 \
postgresql-server-dev-9.5 libpq-dev libxml2 \ postgresql-server-dev-9.6 libpq-dev libxml2 \
postgresql-9.5 postgresql-client-9.5 postgresql-contrib-9.5 postgresql-9.6 postgresql-client-9.6 postgresql-contrib-9.6
### Configure timezone and locale ### Configure timezone and locale
...@@ -37,7 +37,7 @@ ENV LC_ALL fr_FR.UTF-8 ...@@ -37,7 +37,7 @@ ENV LC_ALL fr_FR.UTF-8
### Install main dependencies and python packages based on Debian distrib ### Install main dependencies and python packages based on Debian distrib
RUN echo "############# PYTHON DEPENDENCIES ###############" RUN echo "############# PYTHON DEPENDENCIES ###############"
RUN apt-get update && apt-get install -y \ RUN apt-get update && apt-get install -y \
libxml2-dev xml-core libgfortran-5-dev \ libxml2-dev xml-core libgfortran-6-dev \
libpq-dev \ libpq-dev \
python3.5 \ python3.5 \
python3-dev \ python3-dev \
...@@ -47,8 +47,8 @@ RUN apt-get update && apt-get install -y \ ...@@ -47,8 +47,8 @@ RUN apt-get update && apt-get install -y \
# python dependencies # python dependencies
python3-pip \ python3-pip \
# for lxml # for lxml
libxml2-dev libxslt-dev libxml2-dev libxslt-dev \
#libxslt1-dev zlib1g-dev libxslt1-dev zlib1g-dev
# UPDATE AND CLEAN # UPDATE AND CLEAN
RUN apt-get update && apt-get autoclean &&\ RUN apt-get update && apt-get autoclean &&\
......
...@@ -17,7 +17,7 @@ jdatetime==1.7.2 ...@@ -17,7 +17,7 @@ jdatetime==1.7.2
kombu==3.0.37 # messaging kombu==3.0.37 # messaging
langdetect==1.0.6 #detectinglanguage langdetect==1.0.6 #detectinglanguage
nltk==3.1 nltk==3.1
numpy==1.10.4 numpy==1.13.1
psycopg2==2.6.2 psycopg2==2.6.2
pycountry==1.20 pycountry==1.20
python-dateutil==2.4.2 python-dateutil==2.4.2
...@@ -34,3 +34,4 @@ requests-futures==0.9.7 ...@@ -34,3 +34,4 @@ requests-futures==0.9.7
bs4==0.0.1 bs4==0.0.1
requests==2.10.0 requests==2.10.0
alembic>=0.9.2 alembic>=0.9.2
# SQLAlchemy-Searchable==0.10.4
#!/bin/bash
sudo adduser --disabled-password --gecos "" notebooks
sudo docker rm $(sudo docker ps -a | grep sh | awk '{print $1}')
sudo docker build -t garg-notebook:latest ./notebook
#!/bin/bash
#-v /srv/gargandata:/srv/gargandata \
#-v /srv/gargantext_lib:/srv/gargantext_lib \
sudo docker rm $(sudo docker ps -a | grep notebook | grep sh | awk '{print $1}')
#HOSTIP=$(ip route show 0.0.0.0/0 | awk '{print $3}')
#--add-host=localhost:${HOSTIP} \
sudo docker run \
--name=garg-notebook \
--net=host \
-p 8899:8899 \
--env POSTGRES_HOST=localhost \
-v /srv/gargantext:/srv/gargantext \
-it garg-notebook:latest \
/bin/bash -c "/bin/su notebooks -c 'source /env_3-5/bin/activate && cd /srv/gargantext/ && jupyter notebook --port=8899 --ip=0.0.0.0 --no-browser'"
# #&& jupyter nbextension enable --py widgetsnbextension --sys-prefix
#/bin/bash -c "/bin/su notebooks -c 'source /env_3-5/bin/activate && cd /srv/gargantext/ && jupyter notebook --port=8899 --ip=0.0.0.0 --no-browser --notebook-dir=/home/notebooks/'"
###########################################################
# Gargamelle WEB
###########################################################
#Build an image starting with debian:stretch image
# wich contains all the source code of the app
FROM debian:stretch
MAINTAINER ISCPIF <gargantext@iscpif.fr>
USER root
### Update and install base dependencies
RUN echo "############ DEBIAN LIBS ###############"
RUN apt-get update && \
apt-get install -y \
apt-utils ca-certificates locales \
sudo aptitude gcc g++ wget git vim \
build-essential make \
curl
# postgresql-9.6 postgresql-client-9.6 postgresql-contrib-9.6 \
# postgresql-server-dev-9.6 libpq-dev libxml2 \
# postgresql-9.6 postgresql-client-9.6 postgresql-contrib-9.6
# Install Stack
### Configure timezone and locale
RUN echo "########### LOCALES & TZ #################"
RUN echo "Europe/Paris" > /etc/timezone
ENV TZ "Europe/Paris"
RUN sed -i -e 's/# en_GB.UTF-8 UTF-8/en_GB.UTF-8 UTF-8/' /etc/locale.gen && \
sed -i -e 's/# fr_FR.UTF-8 UTF-8/fr_FR.UTF-8 UTF-8/' /etc/locale.gen && \
dpkg-reconfigure --frontend=noninteractive locales && \
echo 'LANG="fr_FR.UTF-8"' > /etc/default/locale
ENV LANG fr_FR.UTF-8
ENV LANGUAGE fr_FR.UTF-8
ENV LC_ALL fr_FR.UTF-8
### Install main dependencies and python packages based on Debian distrib
RUN echo "############# PYTHON DEPENDENCIES ###############"
RUN apt-get update && apt-get install -y \
libxml2-dev xml-core libgfortran-6-dev \
libpq-dev \
python3.5 \
python3-dev \
# for numpy, pandas and numpyperf \
python3-six python3-numpy python3-setuptools \
python3-numexpr \
# python dependencies \
python3-pip \
# for lxml
libxml2-dev libxslt-dev libxslt1-dev zlib1g-dev
# UPDATE AND CLEAN
RUN apt-get update && apt-get autoclean \
&& rm -rf /var/lib/apt/lists/*
#NB: removing /var/lib will avoid to significantly fill up your /var/ folder on your native system
########################################################################
### PYTHON ENVIRONNEMENT (as ROOT)
########################################################################
RUN adduser --disabled-password --gecos "" notebooks
RUN pip3 install virtualenv
RUN virtualenv /env_3-5
RUN echo 'alias venv="source /env_3-5/bin/activate"' >> ~/.bashrc
# CONFIG FILES
ADD requirements.txt /
#ADD psql_configure.sh /
ADD django_configure.sh /
RUN . /env_3-5/bin/activate && pip3 install -r requirements.txt && \
pip3 install git+https://github.com/zzzeek/sqlalchemy.git@rel_1_1 && \
python3 -m nltk.downloader averaged_perceptron_tagger -d /usr/local/share/nltk_data
#RUN ./psql_configure.sh
#RUN ./django_configure.sh
RUN chown notebooks:notebooks -R /env_3-5
########################################################################
### Notebook IHaskell and IPYTHON ENVIRONNEMENT
########################################################################
#RUN apt-get update && apt-get install -y \
# libtinfo-dev \
# libzmq3-dev \
# libcairo2-dev \
# libpango1.0-dev \
# libmagic-dev \
# libblas-dev \
# liblapack-dev
#RUN curl -sSL https://get.haskellstack.org/ | sh
#RUN stack setup
#RUN git clone https://github.com/gibiansky/IHaskell
#RUN . /env_3-5/bin/activate \
# && cd IHaskell \
# && stack install gtk2hs-buildtools \
# && stack install --fast \
# && /root/.local/bin/ihaskell install --stack
#
#
########################################################################
### POSTGRESQL DATA (as ROOT)
########################################################################
#RUN sed -iP "s%^data_directory.*%data_directory = \'\/srv\/gargandata\'%" /etc/postgresql/9.5/main/postgresql.conf
#RUN echo "host all all 0.0.0.0/0 md5" >> /etc/postgresql/9.5/main/pg_hba.conf
#RUN echo "listen_addresses='*'" >> /etc/postgresql/9.5/main/postgresql.conf
EXPOSE 5432 8899
VOLUME ["/srv/","/home/notebooks/"]
#!/bin/bash
##################################################
# __| |(_) __ _ _ __ __ _ ___
# / _` || |/ _` | '_ \ / _` |/ _ \
# | (_| || | (_| | | | | (_| | (_) |
# \__,_|/ |\__,_|_| |_|\__, |\___/
# |__/ |___/
##################################################
#configure django migrations
##################################################
echo "::::: DJANGO :::::"
#echo "Starting Postgres"
#/usr/sbin/service postgresql start
su gargantua -c 'source /srv/env_3-5/bin/activate &&\
echo "Activated env" &&\
/srv/gargantext/manage.py makemigrations &&\
/srv/gargantext/manage.py migrate && \
echo "migrations ok" &&\
/srv/gargantext/dbmigrate.py && \
/srv/gargantext/dbmigrate.py && \
/srv/gargantext/dbmigrate.py && \
/srv/gargantext/manage.py createsuperuser'
service postgresql stop
"""
Gargantext Software Copyright (c) 2016-2017 CNRS ISC-PIF -
http://iscpif.fr
Licence (see :
http://gitlab.iscpif.fr/humanities/gargantext/blob/stable/LICENSE )
- In France : a CECILL variant affero compliant
- GNU aGPLV3 for all other countries
"""
#!/usr/bin/env python
import sys
import os
# Django settings
dirname = os.path.dirname(os.path.realpath(__file__))
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "gargantext.settings")
# initialize Django application
from django.core.wsgi import get_wsgi_application
application = get_wsgi_application()
from gargantext.util.toolchain.main import parse_extract_indexhyperdata
from gargantext.util.db import *
from gargantext.models import Node
from nltk.tokenize import wordpunct_tokenize
from gargantext.models import *
from nltk.tokenize import word_tokenize
import nltk as nltk
from statistics import mean
from math import log
from collections import defaultdict
import matplotlib.pyplot as plt
import numpy as np
import datetime
from collections import Counter
from langdetect import detect as detect_lang
def documents(corpus_id):
return (session.query(Node).filter( Node.parent_id==corpus_id
, Node.typename=="DOCUMENT"
)
# .order_by(Node.hyperdata['publication_date'])
.all()
)
#import seaborn as sns
import pandas as pd
def chart(docs, field):
year_publis = list(Counter([doc.hyperdata[field] for doc in docs]).items())
frame0 = pd.DataFrame(year_publis, columns=['Date', 'DateValue'])
frame1 = pd.DataFrame(year_publis, columns=['Date', 'DateValue'], index=frame0.Date)
return frame1
from gargantext.util.crawlers.HAL import HalCrawler
def scan_hal(request):
hal = HalCrawler()
return hal.scan_results(request)
def scan_gargantext(corpus_id, lang, request):
connection = get_engine().connect()
# TODO add some sugar the request (ideally request should be the same for hal and garg)
query = """select count(n.id) from nodes n
where to_tsvector('%s', hyperdata ->> 'abstract' || 'title')
@@ to_tsquery('%s')
AND n.parent_id = %s;""" % (lang, request, corpus_id)
return [i for i in connection.execute(query)][0][0]
connection.close()
#!/bin/bash
#######################################################################
## ____ _
## | _ \ ___ ___| |_ __ _ _ __ ___ ___
## | |_) / _ \/ __| __/ _` | '__/ _ \/ __|
## | __/ (_) \__ \ || (_| | | | __/\__ \
## |_| \___/|___/\__\__, |_| \___||___/
## |___/
#######################################################################
echo "::::: POSTGRESQL :::::"
su postgres -c 'pg_dropcluster 9.4 main --stop'
#done in docker but redoing it
rm -rf /srv/gargandata && mkdir /srv/gargandata && chown postgres:postgres /srv/gargandata
su postgres -c '/usr/lib/postgresql/9.6/bin/initdb -D /srv/gargandata/'
su postgres -c '/usr/lib/postgresql/9.6/bin/pg_ctl -D /srv/gargandata/ -l /srv/gargandata/journal_applicatif start'
su postgres -c 'pg_createcluster -D /srv/gargandata 9.6 main '
su postgres -c 'pg_ctlcluster -D /srv/gargandata 9.6 main start '
su postgres -c 'pg_ctlcluster 9.6 main start'
service postgresql start
su postgres -c "psql -c \"CREATE user gargantua WITH PASSWORD 'C8kdcUrAQy66U'\""
su postgres -c "createdb -O gargantua gargandb"
echo "Postgres configured"
#service postgresql stop
# try bottleneck
eventlet==0.20.1
amqp==1.4.9
anyjson==0.3.3
billiard==3.3.0.23
celery==3.1.25
chardet==2.3.0
dateparser==0.3.5
Django==1.10.5
django-celery==3.2.1
django-pgfields==1.4.4
django-pgjsonb==0.0.23
djangorestframework==3.5.3
html5lib==0.9999999
#python-igraph>=0.7.1
jdatetime==1.7.2
kombu==3.0.37 # messaging
langdetect==1.0.6 #detectinglanguage
nltk==3.1
numpy==1.13.1
psycopg2==2.6.2
pycountry==1.20
python-dateutil==2.4.2
pytz==2016.10 # timezones
PyYAML==3.11
RandomWords==0.1.12
ujson==1.35
umalqurra==0.2 # arabic calendars (?? why use ??)
networkx==1.11
pandas==0.18.0
six==1.10.0
lxml==3.5.0
requests-futures==0.9.7
bs4==0.0.1
requests==2.10.0
djangorestframework-jwt==1.9.0
jupyter==1.0.0
jupyter-client==5.0.0
jupyter-console==5.1.0
jupyter-core==4.3.0
ipython==5.2.0
ipython-genutils==0.1.0
ipywidgets
matplotlib==2.0.2
...@@ -367,7 +367,7 @@ ...@@ -367,7 +367,7 @@
<p> <p>
Gargantext Gargantext
<span class="glyphicon glyphicon-registration-mark" aria-hidden="true"></span> <span class="glyphicon glyphicon-registration-mark" aria-hidden="true"></span>
, version 3.0.6.9.4, , version 3.0.7,
<a href="http://www.cnrs.fr" target="blank" title="Institution that enables this project."> <a href="http://www.cnrs.fr" target="blank" title="Institution that enables this project.">
Copyrights Copyrights
<span class="glyphicon glyphicon-copyright-mark" aria-hidden="true"></span> <span class="glyphicon glyphicon-copyright-mark" aria-hidden="true"></span>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment