Commit c5ad749c authored by Alexandre Delanoë's avatar Alexandre Delanoë

[DOC] Advanced tutorial.

parent b11d72c3
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Advanced Gargantext Tutorial (Python)"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"# First import the library Gargantext Notebook\n",
"from gargantext_notebook import *\n",
"\n",
"# This enables to draw graphics later\n",
"%matplotlib inline "
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"L'identifiant du corpus est : 254749\n"
]
}
],
"source": [
"# Copier/coller l'url du corpus (avec http://): sur lequel travailler\n",
"corpus_url = \"http://gargantext.org/projects/251737/corpora/254749\"\n",
"\n",
"corpus_id = corpus_url.split(\"/\")[6]\n",
"\n",
"print(\"L\\'identifiant du corpus est : %s\" % corpus_id)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"# To get all the documents:\n",
"docs = documents(corpus_id)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [
{
"data": {
"text/plain": [
"'Towards big data science in the decade ahead from ten years of InCoB and the 1st ISCB-Asia Joint Conference.'"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# To get the title of the first document \n",
"# [0] indicates the index of the first document\n",
"docs[0].hyperdata['title']"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [
{
"data": {
"text/plain": [
"\"The 2011 International Conference on Bioinformatics (InCoB) conference, which is the annual scientific conference of the Asia-Pacific Bioinformatics Network (APBioNet), is hosted by Kuala Lumpur, Malaysia, is co-organized with the first ISCB-Asia conference of the International Society for Computational Biology (ISCB). InCoB and the sequencing of the human genome are both celebrating their tenth anniversaries and InCoB's goalposts for the next decade, implementing standards in bioinformatics and globally distributed computational networks, will be discussed and adopted at this conference. Of the 49 manuscripts (selected from 104 submissions) accepted to BMC Genomics and BMC Bioinformatics conference supplements, 24 are featured in this issue, covering software tools, genome/proteome analysis, systems biology (networks, pathways, bioimaging) and drug discovery and design.\""
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# To get the abstract of the first document (0)\n",
"docs[0].hyperdata['abstract']"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [
{
"data": {
"text/plain": [
"'Shoba Ranganathan, Christian Schönbach, Janet Kelso, Burkhard Rost, Sheila Nathan, Tin Wee Tan'"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# To get the authors of the first document (0)\n",
"docs[0].hyperdata['authors']"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [
{
"data": {
"text/plain": [
"'BMC bioinformatics'"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# To get the source of the first document (0)\n",
"docs[0].hyperdata['source']"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"# If I want to count:\n",
"myChart = chart(docs, \"publication_year\")"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [
{
"data": {
"text/plain": [
"<matplotlib.axes._subplots.AxesSubplot at 0x7fc48a3da128>"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAEZCAYAAACZwO5kAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAGjxJREFUeJzt3X2QVfWd5/H3hwfFqAOKTRdFo40JGs2KpNMxWI55Ij5b\ngUrUgWwJWsyw2Ug0OpuxZ3drU1a5U6Q2JaNx1g0JiWhNRMNEYRNNIEYnGV1UJO0jUTuKoSmFFpEo\nPvHw3T/uD3JlgHsvfS+n74/Pq6rrnvM7v3Pu9946/enTv3vOPYoIzMwsX4OKLsDMzBrLQW9mljkH\nvZlZ5hz0ZmaZc9CbmWXOQW9mlrmqgl7S1ZKekfS0pDskDZM0TtIjknok3SnpkNT30DTfk5a3N/IF\nmJnZvlUMekljgCuBzoj4D8BgYBrwbWBeRHwE2ATMSqvMAjal9nmpn5mZFaTaoZshwGGShgAfAl4B\nPg8sTssXAlPT9JQ0T1o+WZLqU66ZmdVqSKUOEbFO0neAPwLvAMuAx4E3ImJb6tYLjEnTY4C1ad1t\nkjYDI4HX9vYcxxxzTLS3t+/vazAzOyg9/vjjr0VES6V+FYNe0lGUjtLHAW8APwHO7W+BkmYDswGO\nPfZYVq5c2d9NmpkdVCS9XE2/aoZuvgC8FBF9EbEV+ClwBjAiDeUAtAHr0vQ6YGwqYggwHNi4+0Yj\nYn5EdEZEZ0tLxT9IZma2n6oJ+j8CkyR9KI21TwaeBR4ALkp9ZgJL0vTSNE9a/uvwN6eZmRWmYtBH\nxCOUPlRdBTyV1pkPXAtcI6mH0hj8grTKAmBkar8G6GpA3WZmViUNhIPtzs7O2H2MfuvWrfT29vLu\nu+8WVFW+hg0bRltbG0OHDi26FDPrB0mPR0RnpX4VP4wtSm9vL0ceeSTt7e347Mz6iQg2btxIb28v\n48aNK7ocMzsABuxXILz77ruMHDnSIV9nkhg5cqT/UzI7iAzYoAcc8g3i99Xs4DKgg97MzPpvwI7R\n76696+d13d6auRdU7DN48GBOOeUUtm7dypAhQ5gxYwZXX301gwbt/e/jmjVrePjhh/nKV76yz20f\nf/zx3HfffZx44om72r7xjW8wevRorr322r1u+8ILL+Tpp5+uWLvZQFTv3+PdVfN7fTDyEf0+HHbY\nYXR3d/PMM8+wfPly7rvvPq677rp9rrNmzRp+/OMfV9z2tGnTWLRo0a75HTt2sHjxYqZNm9bvus3M\nyjnoqzRq1Cjmz5/PzTffTESwZs0azjzzTDo6Oujo6ODhhx8GoKuri9/+9rdMnDiRefPmsX37dr75\nzW/yyU9+kgkTJvC9730PgOnTp3PnnXfu2v5vfvMbjjvuOI477ri9brvcrbfeypw5c3bNX3jhhTz4\n4IMALFu2jNNPP52Ojg4uvvhi3nrrrQa+M2Y20Dnoa3D88cezfft2NmzYwKhRo1i+fDmrVq3izjvv\n5MorrwRg7ty5nHnmmXR3d3P11VezYMEChg8fzmOPPcZjjz3G97//fV566SVOOeUUBg0axBNPPAHA\nokWLmD59OsBet12N1157jeuvv55f/epXrFq1is7OTm644Yb6vxlm1jSaZox+oNm6dStz5syhu7ub\nwYMH8/zzz++x37Jly3jyySdZvLj0jc6bN2/mhRdeYNy4cUyfPp1FixbxsY99jHvuuWfXsFC1296T\nFStW8Oyzz3LGGWcA8P7773P66af389WaWTNz0NfgxRdfZPDgwYwaNYrrrruO1tZWnnjiCXbs2MGw\nYcP2uE5E8N3vfpdzzjnn3y2bNm0aZ599Np/5zGeYMGECra2tAMybN6/itocMGcKOHTt2ze88Lz4i\nOOuss7jjjjvq8ZLNLAMeuqlSX18fX/3qV5kzZw6S2Lx5M6NHj2bQoEHcfvvtbN++HYAjjzySN998\nc9d655xzDrfccgtbt24F4Pnnn2fLli0AfPjDH+aYY46hq6tr17ANsNdtl2tvb6e7u5sdO3awdu1a\nHn30UQAmTZrEQw89RE9PDwBbtmyp6T8CM8tP0xzRF3Ha1DvvvMPEiRN3nV556aWXcs011wDwta99\njS9/+cvcdtttnHvuuRx++OEATJgwgcGDB3Pqqady2WWXcdVVV7FmzRo6OjqICFpaWrjnnnt2Pcf0\n6dPp6uriS1/60q62vW273BlnnMG4ceM4+eSTOemkk+jo6ACgpaWFW2+9lenTp/Pee+8BcP3113PC\nCSc07H0ys4FtwH6p2erVqznppJMKqih/fn+tCD6Pvr6q/VIzD92YmWXOQW9mlrkBHfQDYVgpR35f\nzQ4uAzbohw0bxsaNGx1Kdbbz++j3djqomeVnwJ5109bWRm9vL319fUWXkp2dd5gys4NDxaCXdCJw\nZ1nT8cD/AG5L7e3AGuCSiNiUbiB+I3A+8DZwWUSsqrWwoUOH+g5IZmZ1UM3NwZ+LiIkRMRH4BKXw\nvpvSTb/vj4jxwP38+Sbg5wHj089s4JZGFG5mZtWpdYx+MvCHiHgZmAIsTO0LgalpegpwW5SsAEZI\nGl2Xas3MrGa1Bv00YOeXqLRGxCtp+lWgNU2PAdaWrdOb2szMrABVB72kQ4AvAj/ZfVmUTo2p6fQY\nSbMlrZS00h+4mpk1Ti1H9OcBqyJifZpfv3NIJj1uSO3rgLFl67Wltg+IiPkR0RkRnS0tLbVXbmZm\nVakl6Kfz52EbgKXAzDQ9E1hS1j5DJZOAzWVDPGZmdoBVdR69pMOBs4D/VNY8F7hL0izgZeCS1H4v\npVMreyidoXN53ao1M7OaVRX0EbEFGLlb20ZKZ+Hs3jeAK+pSnZmZ9duA/QoEMzOrDwe9mVnmHPRm\nZplz0JuZZc5Bb2aWOQe9mVnmHPRmZplz0JuZZc5Bb2aWOQe9mVnmHPRmZplz0JuZZc5Bb2aWOQe9\nmVnmHPRmZplz0JuZZc5Bb2aWOQe9mVnmHPRmZpmrKugljZC0WNLvJa2WdLqkoyUtl/RCejwq9ZWk\nmyT1SHpSUkdjX4KZme1LtUf0NwK/iIiPAqcCq4Eu4P6IGA/cn+YBzgPGp5/ZwC11rdjMzGpSMegl\nDQc+DSwAiIj3I+INYAqwMHVbCExN01OA26JkBTBC0ui6V25mZlWp5oh+HNAH/EjS7yT9QNLhQGtE\nvJL6vAq0pukxwNqy9XtT2wdImi1ppaSVfX19+/8KzMxsn6oJ+iFAB3BLRHwc2MKfh2kAiIgAopYn\njoj5EdEZEZ0tLS21rGpmZjWoJuh7gd6IeCTNL6YU/Ot3Dsmkxw1p+TpgbNn6banNzMwKUDHoI+JV\nYK2kE1PTZOBZYCkwM7XNBJak6aXAjHT2zSRgc9kQj5mZHWBDquz3deCfJR0CvAhcTumPxF2SZgEv\nA5ekvvcC5wM9wNupr5mZFaSqoI+IbqBzD4sm76FvAFf0sy4zM6sTXxlrZpY5B72ZWeYc9GZmmXPQ\nm5llzkFvZpY5B72ZWeYc9GZmmXPQm5llzkFvZpY5B72ZWeYc9GZmmXPQm5llzkFvZpY5B72ZWeYc\n9GZmmXPQm5llzkFvZpY5B72ZWeaqCnpJayQ9Jalb0srUdrSk5ZJeSI9HpXZJuklSj6QnJXU08gWY\nmdm+1XJE/7mImBgRO+8d2wXcHxHjgfvTPMB5wPj0Mxu4pV7FmplZ7fozdDMFWJimFwJTy9pvi5IV\nwAhJo/vxPGZm1g/VBn0AyyQ9Lml2amuNiFfS9KtAa5oeA6wtW7c3tX2ApNmSVkpa2dfXtx+lm5lZ\nNYZU2e8vI2KdpFHAckm/L18YESEpanniiJgPzAfo7OysaV0zM6teVUf0EbEuPW4A7gZOA9bvHJJJ\njxtS93XA2LLV21KbmZkVoGLQSzpc0pE7p4GzgaeBpcDM1G0msCRNLwVmpLNvJgGby4Z4zMzsAKtm\n6KYVuFvSzv4/johfSHoMuEvSLOBl4JLU/17gfKAHeBu4vO5Vm5lZ1SoGfUS8CJy6h/aNwOQ9tAdw\nRV2qMzOzfvOVsWZmmXPQm5llzkFvZpa5as+jN8tGe9fPG7btNXMvaNi2zfaXj+jNzDLnoDczy5yD\n3swscw56M7PMOejNzDLnoDczy5yD3swscw56M7PMOejNzDLnoDczy5yD3swscw56M7PMOejNzDLn\noDczy1zVQS9psKTfSfpZmh8n6RFJPZLulHRIaj80zfek5e2NKd3MzKpRyxH9VcDqsvlvA/Mi4iPA\nJmBWap8FbErt81I/MzMrSFVBL6kNuAD4QZoX8HlgceqyEJiapqekedLyyam/mZkVoNoj+n8E/g7Y\nkeZHAm9ExLY03wuMSdNjgLUAafnm1P8DJM2WtFLSyr6+vv0s38zMKqkY9JIuBDZExOP1fOKImB8R\nnRHR2dLSUs9Nm5lZmWruGXsG8EVJ5wPDgL8AbgRGSBqSjtrbgHWp/zpgLNAraQgwHNhY98rNzKwq\nFY/oI+LvI6ItItqBacCvI+I/Ag8AF6VuM4ElaXppmict/3VERF2rNjOzqvXnPPprgWsk9VAag1+Q\n2hcAI1P7NUBX/0o0M7P+qGboZpeIeBB4ME2/CJy2hz7vAhfXoTYzM6sDXxlrZpY5B72ZWeYc9GZm\nmXPQm5llzkFvZpY5B72ZWeYc9GZmmXPQm5llzkFvZpY5B72ZWeYc9GZmmXPQm5llzkFvZpY5B72Z\nWeYc9GZmmXPQm5llzkFvZpY5B72ZWeYqBr2kYZIelfSEpGckXZfax0l6RFKPpDslHZLaD03zPWl5\ne2NfgpmZ7Us1R/TvAZ+PiFOBicC5kiYB3wbmRcRHgE3ArNR/FrAptc9L/czMrCAVgz5K3kqzQ9NP\nAJ8HFqf2hcDUND0lzZOWT5akulVsZmY1qWqMXtJgSd3ABmA58AfgjYjYlrr0AmPS9BhgLUBavhkY\nuYdtzpa0UtLKvr6+/r0KMzPbq6qCPiK2R8REoA04Dfhof584IuZHRGdEdLa0tPR3c2Zmthc1nXUT\nEW8ADwCnAyMkDUmL2oB1aXodMBYgLR8ObKxLtWZmVrNqzrppkTQiTR8GnAWsphT4F6VuM4ElaXpp\nmict/3VERD2LNjOz6g2p3IXRwEJJgyn9YbgrIn4m6VlgkaTrgd8BC1L/BcDtknqA14FpDajbzMyq\nVDHoI+JJ4ON7aH+R0nj97u3vAhfXpTozM+s3XxlrZpY5B72ZWeYc9GZmmXPQm5llzkFvZpY5B72Z\nWeYc9GZmmXPQm5llzkFvZpY5B72ZWeYc9GZmmXPQm5llzkFvZpY5B72ZWeYc9GZmmXPQm5llzkFv\nZpY5B72ZWeaquTn4WEkPSHpW0jOSrkrtR0taLumF9HhUapekmyT1SHpSUkejX4SZme1dNUf024C/\njYiTgUnAFZJOBrqA+yNiPHB/mgc4DxiffmYDt9S9ajMzq1rFoI+IVyJiVZp+E1gNjAGmAAtTt4XA\n1DQ9BbgtSlYAIySNrnvlZmZWlSG1dJbUDnwceARojYhX0qJXgdY0PQZYW7Zab2p7pawNSbMpHfFz\n7LHH1lR0e9fPa+pfqzVzL2jo9s3MDqSqP4yVdATwL8A3IuJP5csiIoCo5YkjYn5EdEZEZ0tLSy2r\nmplZDaoKeklDKYX8P0fET1Pz+p1DMulxQ2pfB4wtW70ttZmZWQGqOetGwAJgdUTcULZoKTAzTc8E\nlpS1z0hn30wCNpcN8ZiZ2QFWzRj9GcClwFOSulPbfwXmAndJmgW8DFySlt0LnA/0AG8Dl9e1YjMz\nq0nFoI+IfwO0l8WT99A/gCv6WZeZmdWJr4w1M8ucg97MLHMOejOzzDnozcwy56A3M8ucg97MLHMO\nejOzzDnozcwy56A3M8ucg97MLHMOejOzzDnozcwy56A3M8ucg97MLHMOejOzzNV0c3Az8M3ZzZqN\nj+jNzDLnoDczy1w1Nwf/oaQNkp4uazta0nJJL6THo1K7JN0kqUfSk5I6Glm8mZlVVs0R/a3Aubu1\ndQH3R8R44P40D3AeMD79zAZuqU+ZZma2vyoGfUT8Bnh9t+YpwMI0vRCYWtZ+W5SsAEZIGl2vYs3M\nrHb7O0bfGhGvpOlXgdY0PQZYW9avN7X9O5JmS1opaWVfX99+lmFmZpX0+8PYiAgg9mO9+RHRGRGd\nLS0t/S3DzMz2Yn+Dfv3OIZn0uCG1rwPGlvVrS21mZlaQ/Q36pcDMND0TWFLWPiOdfTMJ2Fw2xGNm\nZgWoeGWspDuAzwLHSOoFvgXMBe6SNAt4Gbgkdb8XOB/oAd4GLm9AzWZmVoOKQR8R0/eyaPIe+gZw\nRX+LMjOz+vGVsWZmmXPQm5llzkFvZpY5B72ZWeYc9GZmmXPQm5llzkFvZpY5B72ZWeYc9GZmmXPQ\nm5llzkFvZpa5it91Y/XX3vXzhm5/zdwLGrp9M2suPqI3M8ucg97MLHMOejOzzDnozcwy56A3M8uc\ng97MLHMNCXpJ50p6TlKPpK5GPIeZmVWn7kEvaTDwT8B5wMnAdEkn1/t5zMysOo24YOo0oCciXgSQ\ntAiYAjzbgOcyO6j4YrtiNev7r4io7wali4BzI+Kv0/ylwKciYs5u/WYDs9PsicBzdS3kg44BXmvg\n9hvN9RenmWsH11+0Rtd/XES0VOpU2FcgRMR8YP6BeC5JKyOi80A8VyO4/uI0c+3g+os2UOpvxIex\n64CxZfNtqc3MzArQiKB/DBgvaZykQ4BpwNIGPI+ZmVWh7kM3EbFN0hzgl8Bg4IcR8Uy9n6dGB2SI\nqIFcf3GauXZw/UUbEPXX/cNYMzMbWHxlrJlZ5hz0ZmaZc9CbmWXOQW9mlrksg17SaZI+maZPlnSN\npPOLrsuaj6Tbiq7BrL+yuzm4pG9R+kK1IZKWA58CHgC6JH08Iv5noQVWQdJHgTHAIxHxVln7uRHx\ni+Iq2zdJnwJWR8SfJB0GdAEdlL7n6B8iYnOhBVYgaffrPQR8TtIIgIj44oGvqj4kXR4RPyq6jlpI\n+ktK3531dEQsK7qeSiRdCdwdEWuLrmV32Z1eKekpYCJwKPAq0FYWPI9ExIRCC6wg7SxXAKspvY6r\nImJJWrYqIjqKrG9fJD0DnJqupZgPvA0sBian9i8VWmAFklZR+qP0AyAoBf0dlC76IyL+tbjq+kfS\nHyPi2KLr2BdJj0bEaWn6byj9HtwNnA3834iYW2R9lUjaDGwB/kBpv/lJRPQVW1VJdkf0wLaI2A68\nLekPEfEngIh4R9KOgmurxt8An4iItyS1A4sltUfEjZSCZyAbFBHb0nRn2R+lf5PUXVRRNegErgL+\nG/DNiOiW9E6zBLykJ/e2CGg9kLXsp6Fl07OBsyKiT9J3gBXAgA564EXgE8AXgL8CrpP0OKXQ/2lE\nvFlUYTkG/fuSPhQRb1N60wGQNBxohqAftHO4JiLWSPospbA/joEf9E+XDRE8IakzIlZKOgHYWnRx\nlUTEDmCepJ+kx/U01+9IK3AOsGm3dgEPH/hyajZI0lGUPjvUzqPhiNgiadu+Vx0QIu1Dy4BlkoZS\nGkaeDnwHqPgtk43STDtxtT4dEe/Brl/cnYYCM4spqSbrJU2MiG6AdGR/IfBD4JRiS6vor4EbJf13\nSl/N+v8krQXWpmVNISJ6gYslXQD8qeh6avAz4Iid+045SQ8e+HJqNhx4nNIfppA0OiJekXQEA/8g\nB3arMSK2Uvqer6WSPlRMSSXZjdHvi6Qjyj/cHIgktVEafnp1D8vOiIiHCiirJpL+AhhH6UCiNyLW\nF1xSvzXDvpOrFJKtEfFS0bXsi6QTIuL5ouvYk4Mt6Af8B1L70sxh08y1g/edorn+/slu6EbSNXtb\nBBxxIGtpgGeBZg2bAV+7950BzfX3Q3ZBD/wD8L+APX14M+AvEGvmsGnm2hPvOwVy/Y2TY9CvAu6J\niMd3XyCpGT4QbOawaebawftO0Vx/g2Q3Ri/pROD1PV2oIKl1oH8wKOlh4Ot7CZu1ETF2D6sNCM1c\nO3jfKZrrb5zsgr7ZNXPYNHPtOWj299/1N052QZ8ujPp7YCowitKl7BuAJcDciHijwPJsAPO+Y7lq\nhnGvWt1F6crAz0bE0RExEvhcarur0MqqIGm4pLmSfi/pdUkbJa1ObSOKrm9fmrn2xPtOgVx/4+QY\n9O0R8e3yC44i4tWI+DZwXIF1VauZw6aZawfvO0Vz/Q2S49DNMuBXwMKdY2KSWoHLKH1J0hcKLK8i\nSc9FxIm1LhsImrl28L5TNNffODke0f8VMBL4V0mbJL0OPAgcDVxSZGFVelnS36WAAUphI+laSt8Z\nM5A1c+3gfadorr9Bsgv6iNgE/AiYA4xN/0KdFBHXUrqJwUDXzGHTzLV73yme62+QHIdumvbGHTup\ndIepNmBFNNEdpqDpa/e+UzDX3yARkdUP8BSlr2oFaAdWUvqFBfhd0fVVUf+VwHPAPcAaYErZslVF\n15dr7d53iv9x/Y37yfErEJr5xh3Q3HeYaubawftO0Vx/g+QY9M184w5o7rBp5trB+07RXH+jCivy\nyRtkBqWbgu8SEdsiYgbw6WJKqsl6SRN3zqQd50LgGAZ+2DRz7eB9p2iuv0Gy+zC22amJ7zDVzLXn\noNnff9ffOA56M7PM5Th0Y2ZmZRz0ZmaZc9DbQUnSdkndkp6R9ISkv5W0z98HSe2SvnKgajSrFwe9\nHazeiYiJEfEx4CzgPOBbFdZpBxz01nT8YawdlCS9FRFHlM0fDzxG6VS444DbgcPT4jkR8bCkFcBJ\nwEvAQuAmYC7wWeBQ4J8i4nsH7EWYVclBbwel3YM+tb0BnAi8CeyIiHcljQfuiIjOdAHMf4mIC1P/\n2cCoiLhe0qHAQ8DFEfHSAX0xZhXkeGWsWX8NBW5OF79sB07YS7+zgQmSLkrzw4HxlI74zQYMB70Z\nu4ZutlO6R+y3gPXAqZQ+x3p3b6sBX4+IXx6QIs32kz+MtYOepBbg/wA3R2ksczjwSkTsAC4FBqeu\nbwJHlq36S+A/SxqatnOCpMMxG2B8RG8Hq8MkdVMaptlG6cPXG9Ky/w38i6QZwC+ALan9SWC7pCeA\nW4EbKZ2Js0qSgD5g6oF6AWbV8oexZmaZ89CNmVnmHPRmZplz0JuZZc5Bb2aWOQe9mVnmHPRmZplz\n0JuZZe7/A6+cHY7zduzoAAAAAElFTkSuQmCC\n",
"text/plain": [
"<matplotlib.figure.Figure at 0x7fc48a441a58>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"myChart.plot.bar()"
]
},
{
"cell_type": "markdown",
"metadata": {
"deletable": true,
"editable": true
},
"source": [
"## Title\n",
"\n",
"Here I can add some comments on the cart.\n",
"1. First point\n",
"2. Second point"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {
"deletable": true,
"editable": true
},
"source": [
"# Lang Cleaning tools"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [
{
"data": {
"text/plain": [
"'fr'"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"detect_lang(\"Ceci est une phrase en français.\")"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [
{
"data": {
"text/plain": [
"'en'"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"detect_lang(\"This is an english sentence.\")"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [
{
"data": {
"text/plain": [
"<matplotlib.axes._subplots.AxesSubplot at 0x7fc487e01e80>"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYAAAAENCAYAAAAG6bK5AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAGuhJREFUeJzt3X+8VXWd7/HXm4OKmqMoB64Beg4NOiGBnTkqXMfJMkHL\nG95u9uDYD6YsKiX7dSuamcf12tTjYc0oZRYTJoFeA31YKbecUTQbTa8KElAq6rmIcbgax1+EFsmP\nz/1jfQ9ujucXe5+9t/B9Px+P82Cv7/qu9fluOOz3Xt+19l6KCMzMLD9D6j0AMzOrDweAmVmmHABm\nZplyAJiZZcoBYGaWKQeAmVmmHABmZplyAJiZZcoBYGaWqX4DQNJCSZsl/bZb+6clrZP0sKRvlrR/\nRVK7pMckTS9pPyu1tUuaO7hPw8zM9pb6+yoISX8LvARcGxETU9vbgX8A3h0Rf5Y0MiI2S5oALAFO\nBt4I3AEcl3b1OHAm0AGsANoi4pG+ao8YMSKamprKfW5mZll66KGHno2Ixv76De2vQ0TcLampW/On\ngMsi4s+pz+bUPgNYmtqflNROEQYA7RGxHkDS0tS3zwBoampi5cqV/Q3RzMxKSHpqIP3KPQdwHHCa\npAck/Yekk1L7aGBjSb+O1NZbu5mZ1Um/RwB9bHckMAU4CbhR0rjBGJCk2cBsgGOOOWYwdmlmZj0o\n9wigA/hJFB4EdgEjgE3A2JJ+Y1Jbb+2vERELIqI1IlobG/udwjIzszKVewRwM/B24C5JxwEHAs8C\ny4AfSbqC4iTweOBBQMB4Sc0UL/wzgfMrHLuZ7QO2b99OR0cH27Ztq/dQ9jvDhg1jzJgxHHDAAWVt\n328ASFoCnA6MkNQBXAIsBBamS0NfAWZFcTnRw5JupDi5uwO4KCJ2pv3MAW4DGoCFEfFwWSM2s31K\nR0cHhx12GE1NTUiq93D2GxHBc889R0dHB83NzWXtYyBXAbX1suqDvfT/OvD1HtpvBW7dq9GZ2T5v\n27ZtfvGvAkkcddRRdHZ2lr0PfxLYzKrOL/7VUenfqwPAzPZ7DQ0NnHjiiZxwwglMnjyZyy+/nF27\ndvW5zYYNG/jRj37U777HjRvHY489tkfbZz/7Wb7xjW/0ue+JEycObPBVVO5JYKuTprk/L3vbDZe9\nexBHYlaeSn6HezKQ3+uDDz6Y1atXA7B582bOP/98/vCHP3DppZf2vt8UAOef3/f1KjNnzmTp0qVc\ncsklAOzatYubbrqJe++9dy+eRX34CMDMsjJy5EgWLFjAVVddRUSwYcMGTjvtNFpaWmhpaeG+++4D\nYO7cudxzzz2ceOKJzJs3j507d/LFL36Rk046iUmTJvH9738fgLa2Nm644Ybd+7/77rs59thjOfbY\nY3vdd6lFixYxZ86c3cvnnHMOv/zlLwG4/fbbmTp1Ki0tLZx33nm89NJLg/p34QAws+yMGzeOnTt3\nsnnzZkaOHMny5ctZtWoVN9xwAxdffDEAl112GaeddhqrV6/mc5/7HNdccw2HH344K1asYMWKFVx9\n9dU8+eSTvOUtb2HIkCGsWbMGgKVLl9LWVlw709u+B+LZZ5/la1/7GnfccQerVq2itbWVK664YlD/\nHjwFZGZZ2759O3PmzGH16tU0NDTw+OOP99jv9ttvZ+3atdx0000AbNmyhSeeeILm5mba2tpYunQp\nJ5xwAjfffPPuqaWB7rsn999/P4888ginnnoqAK+88gpTp06t8NnuyQFgZtlZv349DQ0NjBw5kksv\nvZRRo0axZs0adu3axbBhw3rcJiL4zne+w/Tp01+zbubMmUybNo23ve1tTJo0iVGjRgEwb968fvc9\ndOjQPU5Id31gLiI488wzWbJkyWA85R55CsjMstLZ2cknP/lJ5syZgyS2bNnC0UcfzZAhQ7juuuvY\nuXMnAIcddhhbt27dvd306dOZP38+27dvB+Dxxx/n5ZdfBuBNb3oTI0aMYO7cubunf4Be912qqamJ\n1atXs2vXLjZu3MiDDz4IwJQpU7j33ntpb28H4OWXX96rI4iBcACY2X7vT3/60+7LQN/5zncybdq0\n3VftXHjhhSxevJjJkyezbt06Dj30UAAmTZpEQ0MDkydPZt68eXzsYx9jwoQJtLS0MHHiRD7xiU+w\nY8eO3TXa2tpYt24d733ve3e39bbvUqeeeirNzc1MmDCBiy++mJaWFgAaGxtZtGgRbW1tTJo0ialT\np7Ju3bpB/Xvp94Yw9dTa2hq+H8CefBmo7WseffRR3vzmN9d7GPutnv5+JT0UEa39besjADOzTDkA\nzMwy5QAwM8uUA8DMqu71fK5xX1bp36sDwMyqatiwYTz33HMOgUHWdT+A3j63MBD+IJiZVdWYMWPo\n6Oio6HvrrWdddwQrlwPAzKrqgAMOKPuOVVZdngIyM8tUvwEgaaGkzen+v93XfUFSSBqRliXpSknt\nktZKainpO0vSE+ln1uA+DTMz21sDOQJYBJzVvVHSWGAa8LuS5rOB8elnNjA/9T2S4mbypwAnA5dI\nGl7JwM3MrDL9BkBE3A0838OqecCXgNJT+zOAa6NwP3CEpKOB6cDyiHg+Il4AltNDqJiZWe2UdQ5A\n0gxgU0Ss6bZqNLCxZLkjtfXWbmZmdbLXVwFJOgT4e4rpn0EnaTbF9BHHHHNMNUqYmRnlHQG8CWgG\n1kjaAIwBVkn6T8AmYGxJ3zGprbf214iIBRHRGhGtjY2NZQzPzMwGYq8DICJ+ExEjI6IpIpoopnNa\nIuIZYBnw4XQ10BRgS0Q8DdwGTJM0PJ38nZbazMysTgZyGegS4P8Ax0vqkHRBH91vBdYD7cDVwIUA\nEfE88E/AivTz1dRmZmZ10u85gIho62d9U8njAC7qpd9CYOFejs/MzKrEnwQ2M8uUA8DMLFMOADOz\nTDkAzMwy5QAwM8uUA8DMLFMOADOzTDkAzMwy5QAwM8uUA8DMLFMOADOzTDkAzMwy5QAwM8uUA8DM\nLFMOADOzTDkAzMwy5QAwM8uUA8DMLFMDuSfwQkmbJf22pO2fJa2TtFbSTyUdUbLuK5LaJT0maXpJ\n+1mprV3S3MF/KmZmtjcGcgSwCDirW9tyYGJETAIeB74CIGkCMBM4IW3zPUkNkhqA7wJnAxOAttTX\nzMzqpN8AiIi7gee7td0eETvS4v3AmPR4BrA0Iv4cEU8C7cDJ6ac9ItZHxCvA0tTXzMzqZDDOAXwU\n+Lf0eDSwsWRdR2rrrf01JM2WtFLSys7OzkEYnpmZ9aSiAJD0D8AO4PrBGQ5ExIKIaI2I1sbGxsHa\nrZmZdTO03A0l/R1wDnBGRERq3gSMLek2JrXRR7uZmdVBWUcAks4CvgS8JyL+WLJqGTBT0kGSmoHx\nwIPACmC8pGZJB1KcKF5W2dDNzKwS/R4BSFoCnA6MkNQBXEJx1c9BwHJJAPdHxCcj4mFJNwKPUEwN\nXRQRO9N+5gC3AQ3Awoh4uArPx8zMBqjfAIiIth6ar+mj/9eBr/fQfitw616NzszMqsafBDYzy5QD\nwMwsUw4AM7NMOQDMzDLlADAzy5QDwMwsUw4AM7NMOQDMzDLlADAzy5QDwMwsUw4AM7NMOQDMzDLl\nADAzy5QDwMwsUw4AM7NMOQDMzDLlADAzy1S/ASBpoaTNkn5b0nakpOWSnkh/Dk/tknSlpHZJayW1\nlGwzK/V/QtKs6jwdMzMbqIEcASwCzurWNhe4MyLGA3emZYCzKW4EPx6YDcyHIjAo7iV8CnAycElX\naJiZWX30GwARcTfwfLfmGcDi9HgxcG5J+7VRuB84QtLRwHRgeUQ8HxEvAMt5baiYmVkNlXsOYFRE\nPJ0ePwOMSo9HAxtL+nWktt7azcysTio+CRwRAcQgjAUASbMlrZS0srOzc7B2a2Zm3ZQbAL9PUzuk\nPzen9k3A2JJ+Y1Jbb+2vERELIqI1IlobGxvLHJ6ZmfWn3ABYBnRdyTMLuKWk/cPpaqApwJY0VXQb\nME3S8HTyd1pqMzOzOhnaXwdJS4DTgRGSOiiu5rkMuFHSBcBTwPtT91uBdwHtwB+BjwBExPOS/glY\nkfp9NSK6n1g2M7Ma6jcAIqKtl1Vn9NA3gIt62c9CYOFejc7MzKrGnwQ2M8uUA8DMLFMOADOzTDkA\nzMwy5QAwM8uUA8DMLFMOADOzTDkAzMwy5QAwM8uUA8DMLFMOADOzTDkAzMwy5QAwM8uUA8DMLFMO\nADOzTDkAzMwy5QAwM8uUA8DMLFMVBYCkz0l6WNJvJS2RNExSs6QHJLVLukHSganvQWm5Pa1vGown\nYGZm5Sk7ACSNBi4GWiNiItAAzAS+AcyLiL8EXgAuSJtcALyQ2uelfmZmVieVTgENBQ6WNBQ4BHga\neAdwU1q/GDg3PZ6Rlknrz5CkCuubmVmZyg6AiNgE/AvwO4oX/i3AQ8CLEbEjdesARqfHo4GNadsd\nqf9R5dY3M7PKVDIFNJziXX0z8EbgUOCsSgckabaklZJWdnZ2Vro7MzPrRSVTQO8EnoyIzojYDvwE\nOBU4Ik0JAYwBNqXHm4CxAGn94cBz3XcaEQsiojUiWhsbGysYnpmZ9aWSAPgdMEXSIWku/wzgEeAu\n4H2pzyzglvR4WVomrf9FREQF9c3MrAKVnAN4gOJk7irgN2lfC4AvA5+X1E4xx39N2uQa4KjU/nlg\nbgXjNjOzCg3tv0vvIuIS4JJuzeuBk3vouw04r5J6ZmY2ePxJYDOzTDkAzMwy5QAwM8uUA8DMLFMO\nADOzTDkAzMwy5QAwM8uUA8DMLFMOADOzTDkAzMwy5QAwM8uUA8DMLFMOADOzTDkAzMwy5QAwM8uU\nA8DMLFMOADOzTDkAzMwyVVEASDpC0k2S1kl6VNJUSUdKWi7pifTn8NRXkq6U1C5praSWwXkKZmZW\njkqPAL4N/HtE/BUwGXiU4mbvd0bEeOBOXr35+9nA+PQzG5hfYW0zM6tA2QEg6XDgb4FrACLilYh4\nEZgBLE7dFgPnpsczgGujcD9whKSjyx65mZlVpJIjgGagE/ihpF9L+oGkQ4FREfF06vMMMCo9Hg1s\nLNm+I7WZmVkdVBIAQ4EWYH5EvBV4mVenewCIiABib3YqabaklZJWdnZ2VjA8MzPrSyUB0AF0RMQD\nafkmikD4fdfUTvpzc1q/CRhbsv2Y1LaHiFgQEa0R0drY2FjB8MzMrC9lB0BEPANslHR8ajoDeARY\nBsxKbbOAW9LjZcCH09VAU4AtJVNFZmZWY0Mr3P7TwPWSDgTWAx+hCJUbJV0APAW8P/W9FXgX0A78\nMfU1M7M6qSgAImI10NrDqjN66BvARZXUMzOzweNPApuZZcoBYGaWKQeAmVmmHABmZplyAJiZZcoB\nYGaWKQeAmVmmHABmZplyAJiZZcoBYGaWKQeAmVmmHABmZplyAJiZZcoBYGaWKQeAmVmmHABmZply\nAJiZZcoBYGaWqYoDQFKDpF9L+llabpb0gKR2STek+wUj6aC03J7WN1Va28zMyjcYRwCfAR4tWf4G\nMC8i/hJ4AbggtV8AvJDa56V+ZmZWJxUFgKQxwLuBH6RlAe8AbkpdFgPnpscz0jJp/Rmpv5mZ1UGl\nRwDfAr4E7ErLRwEvRsSOtNwBjE6PRwMbAdL6Lam/mZnVQdkBIOkcYHNEPDSI40HSbEkrJa3s7Owc\nzF2bmVmJSo4ATgXeI2kDsJRi6ufbwBGShqY+Y4BN6fEmYCxAWn848Fz3nUbEgohojYjWxsbGCoZn\nZmZ9KTsAIuIrETEmIpqAmcAvIuIDwF3A+1K3WcAt6fGytExa/4uIiHLrm5lZZarxOYAvA5+X1E4x\nx39Nar8GOCq1fx6YW4XaZmY2QEP779K/iPgl8Mv0eD1wcg99tgHnDUY9MzOrnD8JbGaWKQeAmVmm\nHABmZplyAJiZZcoBYGaWKQeAmVmmHABmZplyAJiZZcoBYGaWKQeAmVmmHABmZplyAJiZZcoBYGaW\nKQeAmVmmHABmZplyAJiZZcoBYGaWqbIDQNJYSXdJekTSw5I+k9qPlLRc0hPpz+GpXZKulNQuaa2k\nlsF6EmZmtvcqOQLYAXwhIiYAU4CLJE2guNfvnRExHriTV+/9ezYwPv3MBuZXUNvMzCpUdgBExNMR\nsSo93go8CowGZgCLU7fFwLnp8Qzg2ijcDxwh6eiyR25mZhUZlHMAkpqAtwIPAKMi4um06hlgVHo8\nGthYsllHajMzszqoOAAkvQH4MfDZiPhD6bqICCD2cn+zJa2UtLKzs7PS4ZmZWS8qCgBJB1C8+F8f\nET9Jzb/vmtpJf25O7ZuAsSWbj0lte4iIBRHRGhGtjY2NlQzPzMz6UMlVQAKuAR6NiCtKVi0DZqXH\ns4BbSto/nK4GmgJsKZkqMjOzGhtawbanAh8CfiNpdWr7e+Ay4EZJFwBPAe9P624F3gW0A38EPlJB\nbTMzq1DZARARvwLUy+ozeugfwEXl1jMzs8HlTwKbmWXKAWBmlikHgJlZphwAZmaZcgCYmWXKAWBm\nlikHgJlZphwAZmaZcgCYmWXKAWBmlikHgJlZphwAZmaZcgCYmWXKAWBmlikHgJlZphwAZmaZcgCY\nmWXKAWBmlqmaB4CksyQ9Jqld0txa1zczs0IlN4Xfa5IagO8CZwIdwApJyyLikVqOw/Ze09yfV7T9\nhsvePUgjMbPBUtMAAE4G2iNiPYCkpcAMwAFgZnVRyZubff2NTa0DYDSwsWS5AzilxmOomN8N11bO\n/0HNqkkRUbti0vuAsyLiY2n5Q8ApETGnpM9sYHZaPB54rIKSI4BnK9h+X6tbz9q51a1nbT/nPGpX\nUvfYiGjsr1OtjwA2AWNLlsektt0iYgGwYDCKSVoZEa2Dsa99oW49a+dWt561/ZzzqF2LurW+CmgF\nMF5Ss6QDgZnAshqPwczMqPERQETskDQHuA1oABZGxMO1HIOZmRVqPQVERNwK3FqjcoMylbQP1a1n\n7dzq1rO2n3Metatet6Yngc3M7PXDXwVhZpYpB4CZWaZqfg5gfyZpJDCsazkiflfH4ZiZ9clHAINA\n0nskPQE8CfwHsAH4t7oOyvY7kg6UNDH9HFDDup8ZSNsg1muQdH219m+v2i9OAkv6UkR8U9J3gNc8\noYi4uMr11wDvAO6IiLdKejvwwYi4oJp1U+1G4ONAEyVHdBHx0f2xbqr9GeCHwFbgB8BbgbkRcXsN\naj9Jz79j46pc93RgMcWbC1F8oHJWRNxdzbqp9qqIaOnW9uuIeGsVa/4KeEdEvFKtGn3UPg6YD4yK\niImSJgHviYiv1aD2D+n596sq/6/2lymgLwPfBP4v8EId6m+PiOckDZE0JCLukvStGtW+BbgHuAPY\nWaOa9awL8NGI+Lak6cBw4EPAdUDVAwAo/WTmMOA84Mga1L0cmBYRj8HuF6klwF9Xq6CkNuB8oFlS\n6Qc2DwOer1bdZD1wb6r7cldjRFxR5boAVwNfBL6faq6V9COg6gEA/Kzk8TDgvwL/r1rF9pcA+L2k\nNwIfAU6neIdUSy9KegPFC+L1kjZT8ktbZYdExJdrVOv1UBde/fd9N3BdRDwsqSb/5hHxXLemb0l6\nCPgfVS59QNeLfxrH4zWYBroPeJriO2kuL2nfCqytRkFJ10XEh4D3APMopqkPq0atPhwSEQ92+5Xa\nUYvCEfHj0mVJS4BfVave/hIA84E7gXHAQyXtojicqurhOcUv6zbgM8AHgb8ALq1yzS4/k/Su9AG7\nWqpXXYCHJN1G8e86V9JhwK5aFJZUOhUyhOKIoBb/j1ZK+gHwv9LyBym+WqVqIuIp4ClgajXrdPPX\n6c3c74Dv1LBuqWclvYk0FZO+xPLpOo1lPDCyWjvfL84BdJE0PyI+VcN6v4qIv5G0lVfn7breNuyi\nOEz+54j4XhXHsBU4BHgF2J7qR0T8RRXrdT3XN6S6r1S7brcxDAH+ERgeEZ+TdAzFtx/eU4Pad/Hq\n899BMSf/LxHxeJXr/g3F/TT+JjXdAzwRET/rfauKa/b0+w1V/LeWdDHwKaCZPac+umpW+80cksZR\nfAr3P1NMKT8JfCAFYjXrimI69aWS5meAr3Q/Mhi0mvtTALzeSDoKuC8ijq9ijSHAB4DmiPhqejE8\nOiIeqFbNVPcXwOUR8fOStqsj4uPVrJvqzKcI2HdExJslDQduj4iTalB7GPDf2PPkd0TEV6tcdxXF\nSd/fpOU24LMRsc/dT2Mgav1mLtX8fLemgymO8l6G2px/kPTbiJhY7TpdfBloFaX54tOrXOa7wBSg\nLS1vBa6qck0oXgC/JKl07rtqJyS7OSUiLqKYdiMiXgAOrFHtm4H/QnG09VL6qcX5nvcBiyQdL+nj\nwIXAtBrUrYtav/gnh6WfVoqjkOHAEcAngZY+thtMD0mq+huZLvvLOYDXrYio9tzhKRHRIunXqd4L\n6au2q+1F4AzgSkn/m2JOula2p/tLd83RNlKjcwDAmIg4q0a1douI9eld/80U8+PTIuJPtR7H/iwi\nLgWQdDfQEhFb0/L/BCq7DeDAnQJ8QNJTFG8suqa+JlWjmANg31evF0NFxA7gQkl/R3GlwvAa1AW4\nEvgpMFLS1yneHf9jjWrfJ+ktXVMx1SbpN+w5/34kxVepPyCJar0wZG4UxXmtLq+ktlqYXqM6gANg\nf1CvF8N/7XoQEYvSC9VFNahLRFyfLr08g+Id0rkR8Wg1a5a8EA8FPiJpPfBnqvwODTinSvu13l0L\nPCjpp2n5XGBRLQpX+0Rzdz4JvB+Q9Fe8+mJ4Z7VfDHMk6di+1tf6P65VV7rc97S0eHdE/Lqe46kW\nB4CZWaZ8FZCZWaYcAGZmmXIAmJWQtFPSakkPS1oj6Qvpw3Z9bdMk6fxajdFssDgAzPb0p4g4MSJO\nAM4EzgYu6WebJopvzTTbp/gksFkJSS9FxBtKlsdRfOnaCOBYiq+dPjStnhMR90m6H3gzxXfGLKa4\nNPcyik+BHwR8NyK+X7MnYTZADgCzEt0DILW9CBxP8TUbuyJim6TxwJKIaE03a/nvEXFO6j8bGBkR\nX5N0EHAvcF5EPFnTJ2PWD38QzGzgDgCuknQixbc2HtdLv2nApPQ1wgCHU3ytrwPAXlccAGZ9SFNA\nO4HNFOcCfg9Mpjh/tq23zYBPR8RtNRmkWZl8EtisF+l7lf4VuCqKudLDgacjYhfFbSgbUtet7HnX\nqtuAT3XdsUvScZIOxex1xkcAZns6WNJqiumeHRQnfbu+B/57wI8lfRj4d179Gui1wE5Jayi+M+bb\nFFcGrUo3+eik+D4Zs9cVnwQ2M8uUp4DMzDLlADAzy5QDwMwsUw4AM7NMOQDMzDLlADAzy5QDwMws\nUw4AM7NM/X+WgGyX8gJ3yQAAAABJRU5ErkJggg==\n",
"text/plain": [
"<matplotlib.figure.Figure at 0x7fc4cc6ea6d8>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"chart(docs, \"language_iso2\").plot.bar()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [
{
"data": {
"text/plain": [
"Counter({'de': 13,\n",
" 'en': 1547,\n",
" 'es': 5,\n",
" 'fi': 1,\n",
" 'fr': 4,\n",
" 'hu': 1,\n",
" 'it': 1,\n",
" 'ja': 5,\n",
" 'ko': 1,\n",
" 'ru': 3,\n",
" 'zh': 23})"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"Counter([doc.hyperdata[\"language_iso2\"] for doc in docs])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"# Deleting language that is not in majority\n",
"def cleanCorpusWithLang(corpus_id, lang):\n",
" return (session.query(Node.id).filter(Node.parent_id == corpus_id)\n",
" .filter(Node.hyperdata[\"language_iso2\"].astext != lang)\n",
" .count()\n",
" #.delete()\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [
{
"data": {
"text/plain": [
"57"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cleanCorpusWithLang(corpus_id, 'en')"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [
{
"data": {
"text/plain": [
"[(True, 'This is an english paragraph.\\n '),\n",
" (False, '\"This is an english paragraph.\\n\\nThis is an english paragraph.\\n ')]"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"abstract0 = \"\"\"\"Ceci est un paragraphe en français.\n",
"\n",
"This is an english paragraph.\n",
" \"\"\"\n",
"\n",
"abstract1 = \"\"\"\"This is an english paragraph.\n",
"\n",
"This is an english paragraph.\n",
" \"\"\"\n",
"\n",
"def clean_lang_inText(lang, text):\n",
" \n",
" texts_before = nltk.tokenize.blankline_tokenize(text)\n",
" texts_after = '\\n\\n'.join([sentence \n",
" for sentence in texts_before\n",
" if detect_lang(sentence) == lang\n",
" ])\n",
" \n",
" return (len(texts_before) != len(nltk.tokenize.blankline_tokenize(texts_after)), texts_after)\n",
"\n",
"[clean_lang_inText('en', abstract) for abstract in [abstract0, abstract1]]\n",
"\n",
"# TODO update each document accordingly"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"# TODO update all the abstract with That function"
]
},
{
"cell_type": "markdown",
"metadata": {
"deletable": true,
"editable": true
},
"source": [
"# Measures IMT Tools"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [
{
"data": {
"text/plain": [
"154"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"scan_hal(\"machine learning AND deep\")"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [
{
"data": {
"text/plain": [
"90"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Request syntax\n",
"# \"network analysis\" = network <-> analysis\n",
"# \"network OR analysis\" = network | analysis\n",
"# \"network AND analysis\" = network & analysis\n",
"\n",
"scan_gargantext(corpus_id, 'english', \"machine | learning & deep\")\n",
"\n",
"# \"network NOT analysis\" = @@ to_tsquery('network') !! to_tsquery('analysis')\n",
"# (need to change the function if not has to be used)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [
{
"data": {
"text/plain": [
"[('network analysis', 'network <-> analysis'),\n",
" ('big data AND something', '(big <-> data) & something')]"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Forces / Faiblesses de l'IMT\n",
"# Hal Query Gargantext Query\n",
"queries = [ (\"network analysis\" , \"network <-> analysis\" )\n",
" , (\"big data AND something\" , \"(big <-> data) & something\")\n",
" ]\n",
"[(query[0], query[1]) for query in queries]"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"def imt_vs_hal(corpus_id, queryHal, queryGarg):\n",
" return((scan_gargantext(corpus_id, 'english', queryGarg), scan_hal(queryHal)))\n",
" #return((scan_gargantext(corpus_id, 'english', queryGarg) *100 / scan_hal(queryHal)))"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [
{
"data": {
"text/plain": [
"[(5, 10649), (0, 5)]"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Then chart it to see your strenght and weakness!\n",
"[imt_vs_hal(corpus_id, query[0], query[1]) for query in queries]\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {
"deletable": true,
"editable": true
},
"source": [
"# Graph generation"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"# TODO Cooccurrences optimization"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"# TODO optimize the distributional distance"
]
},
{
"cell_type": "markdown",
"metadata": {
"deletable": true,
"editable": true
},
"source": [
"# List Management"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"# Front End add a check box to merge or to overwrite previous list"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"# optimize the list merge"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.3rc1"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment