"\u001b[0;32m/srv/gargantext/gargantext/util/toolchain/ngrams_extraction.py\u001b[0m in \u001b[0;36mextract_ngrams\u001b[0;34m(corpus, keys, do_subngrams)\u001b[0m\n\u001b[1;32m 54\u001b[0m \u001b[0;31m#load available taggers for default langage of plateform\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 55\u001b[0m \u001b[0;31m#print(LANGUAGES.keys())\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 56\u001b[0;31m tagger_bots = {lang: load_tagger(lang) for lang in corpus.hyperdata[\"languages\"] \\\n\u001b[0m\u001b[1;32m 57\u001b[0m if lang != \"__unknown__\"}\n\u001b[1;32m 58\u001b[0m \u001b[0mtagger_bots\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"__unknown__\"\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mload_tagger\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"en\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"print(\"L\\'identifiant du corpus est : %s\" % corpus_id)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"# To get all the documents:\n",
"docs = documents(corpus_id)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [
{
"data": {
"text/plain": [
"'Towards big data science in the decade ahead from ten years of InCoB and the 1st ISCB-Asia Joint Conference.'"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# To get the title of the first document \n",
"# [0] indicates the index of the first document\n",
"docs[0].hyperdata['title']"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [
{
"data": {
"text/plain": [
"\"The 2011 International Conference on Bioinformatics (InCoB) conference, which is the annual scientific conference of the Asia-Pacific Bioinformatics Network (APBioNet), is hosted by Kuala Lumpur, Malaysia, is co-organized with the first ISCB-Asia conference of the International Society for Computational Biology (ISCB). InCoB and the sequencing of the human genome are both celebrating their tenth anniversaries and InCoB's goalposts for the next decade, implementing standards in bioinformatics and globally distributed computational networks, will be discussed and adopted at this conference. Of the 49 manuscripts (selected from 104 submissions) accepted to BMC Genomics and BMC Bioinformatics conference supplements, 24 are featured in this issue, covering software tools, genome/proteome analysis, systems biology (networks, pathways, bioimaging) and drug discovery and design.\""
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# To get the abstract of the first document (0)\n",
"docs[0].hyperdata['abstract']"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [
{
"data": {
"text/plain": [
"'Shoba Ranganathan, Christian Schönbach, Janet Kelso, Burkhard Rost, Sheila Nathan, Tin Wee Tan'"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# To get the authors of the first document (0)\n",
"docs[0].hyperdata['authors']"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [
{
"data": {
"text/plain": [
"'BMC bioinformatics'"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# To get the source of the first document (0)\n",
"docs[0].hyperdata['source']"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"# If I want to count:\n",
"myChart = chart(docs, \"publication_year\")"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [
{
"data": {
"text/plain": [
"<matplotlib.axes._subplots.AxesSubplot at 0x7f48069c5208>"
"[clean_lang_inText('en', abstract) for abstract in [abstract0, abstract1]]\n",
"\n",
"# TODO update each document accordingly"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"# TODO update all the abstract with That function"
]
},
{
"cell_type": "markdown",
"metadata": {
"deletable": true,
"editable": true
},
"source": [
"# Measures IMT Tools"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [
{
"ename": "ConnectionError",
"evalue": "HTTPSConnectionPool(host='api.archives-ouvertes.fr', port=443): Max retries exceeded with url: /search?wt=json&q=machine+learning+AND+deep&fl=+title_s%0A+++++++++++++++%2C+abstract_s%0A+++++++++++++++%2C+submittedDate_s%0A+++++++++++++++%2C+journalDate_s%0A+++++++++++++++%2C+authFullName_s%0A+++++++++++++++%2C+uri_s%0A+++++++++++++++%2C+isbn_s%0A+++++++++++++++%2C+issue_s%0A+++++++++++++++%2C+docType_s%0A+++++++++++++++%2C+journalPublisher_s%0A+++++++++++++&start=1&rows=10 (Caused by NewConnectionError('<requests.packages.urllib3.connection.VerifiedHTTPSConnection object at 0x7f48069d1f98>: Failed to establish a new connection: [Errno -3] Temporary failure in name resolution',))",
"\u001b[0;32m/home/alexandre/local/logiciels/python/env/3.5_20170123/lib/python3.5/site-packages/requests/packages/urllib3/connection.py\u001b[0m in \u001b[0;36m_new_conn\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 150\u001b[0m raise NewConnectionError(\n\u001b[0;32m--> 151\u001b[0;31m self, \"Failed to establish a new connection: %s\" % e)\n\u001b[0m\u001b[1;32m 152\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mNewConnectionError\u001b[0m: <requests.packages.urllib3.connection.VerifiedHTTPSConnection object at 0x7f48069d1f98>: Failed to establish a new connection: [Errno -3] Temporary failure in name resolution",
"\nDuring handling of the above exception, another exception occurred:\n",
"\u001b[0;31mMaxRetryError\u001b[0m: HTTPSConnectionPool(host='api.archives-ouvertes.fr', port=443): Max retries exceeded with url: /search?wt=json&q=machine+learning+AND+deep&fl=+title_s%0A+++++++++++++++%2C+abstract_s%0A+++++++++++++++%2C+submittedDate_s%0A+++++++++++++++%2C+journalDate_s%0A+++++++++++++++%2C+authFullName_s%0A+++++++++++++++%2C+uri_s%0A+++++++++++++++%2C+isbn_s%0A+++++++++++++++%2C+issue_s%0A+++++++++++++++%2C+docType_s%0A+++++++++++++++%2C+journalPublisher_s%0A+++++++++++++&start=1&rows=10 (Caused by NewConnectionError('<requests.packages.urllib3.connection.VerifiedHTTPSConnection object at 0x7f48069d1f98>: Failed to establish a new connection: [Errno -3] Temporary failure in name resolution',))",
"\nDuring handling of the above exception, another exception occurred:\n",
"\u001b[0;32m<ipython-input-16-b220cbbc8ecc>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mscan_hal\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"machine learning AND deep\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;32m/home/alexandre/local/logiciels/python/env/3.5_20170123/lib/python3.5/site-packages/requests/sessions.py\u001b[0m in \u001b[0;36msend\u001b[0;34m(self, request, **kwargs)\u001b[0m\n\u001b[1;32m 583\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 584\u001b[0m \u001b[0;31m# Send the request\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 585\u001b[0;31m \u001b[0mr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0madapter\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrequest\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 586\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 587\u001b[0m \u001b[0;31m# Total elapsed time of the request (approximately)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mConnectionError\u001b[0m: HTTPSConnectionPool(host='api.archives-ouvertes.fr', port=443): Max retries exceeded with url: /search?wt=json&q=machine+learning+AND+deep&fl=+title_s%0A+++++++++++++++%2C+abstract_s%0A+++++++++++++++%2C+submittedDate_s%0A+++++++++++++++%2C+journalDate_s%0A+++++++++++++++%2C+authFullName_s%0A+++++++++++++++%2C+uri_s%0A+++++++++++++++%2C+isbn_s%0A+++++++++++++++%2C+issue_s%0A+++++++++++++++%2C+docType_s%0A+++++++++++++++%2C+journalPublisher_s%0A+++++++++++++&start=1&rows=10 (Caused by NewConnectionError('<requests.packages.urllib3.connection.VerifiedHTTPSConnection object at 0x7f48069d1f98>: Failed to establish a new connection: [Errno -3] Temporary failure in name resolution',))"