Merge branch 'mat'

Intégration des modifs de Mat (dev parsing, bug fix for ngrams)

Merge branch 'mat'
Intégration des modifs de Mat (dev parsing, bug fix for ngrams)
0b35b6a7 · Administrator · 0aa95012 · 610ee647 · 0b35b6a7 · 0b35b6a7
Commit 0b35b6a7 authored Oct 28, 2014 by Administrator
Hide whitespace changes
Inline Side-by-side

Showing with 421 additions and 0 deletions

Test extractor cache-checkpoint.ipynb .ipynb_checkpoints/Test extractor cache-checkpoint.ipynb +198 -0

Test extractor cache.ipynb Test extractor cache.ipynb +223 -0

No files found.
--- a/.ipynb_checkpoints/Test extractor cache-checkpoint.ipynb
+++ b/.ipynb_checkpoints/Test extractor cache-checkpoint.ipynb
+{
+ "metadata": {
+  "name": "",
+  "signature": "sha256:d03d3f5dbf9a1dbfc43deb947718f31529d3d67b0901f8e743b23ce28a9f3205"
+ },
+ "nbformat": 3,
+ "nbformat_minor": 0,
+ "worksheets": [
+  {
+   "cells": [
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "from parsing.NgramsExtractors import NgramsExtractorsCache"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 1
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "c = NgramsExtractorsCache()"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 2
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "c[\"en\"]"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 3,
+       "text": [
+        "<parsing.NgramsExtractors.EnglishNgramsExtractor.EnglishNgramsExtractor at 0x7fc3aa431f98>"
+       ]
+      }
+     ],
+     "prompt_number": 3
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "c[\"fre\"]"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 5,
+       "text": [
+        "<parsing.NgramsExtractors.FrenchNgramsExtractor.FrenchNgramsExtractor at 0x7f0e1fb0c978>"
+       ]
+      }
+     ],
+     "prompt_number": 5
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "p = c[\"fr\"]"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 6
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "p.extract_ngrams(\"En voil\u00e0 un beau parseur !\")"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 8,
+       "text": [
+        "[[('beau', 'NN'), ('parseur', 'NN')]]"
+       ]
+      }
+     ],
+     "prompt_number": 8
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "c[\"fr\"]"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 9,
+       "text": [
+        "<parsing.NgramsExtractors.FrenchNgramsExtractor.FrenchNgramsExtractor at 0x7f0e1fb0c978>"
+       ]
+      }
+     ],
+     "prompt_number": 9
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "c[\"french\"]"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 10,
+       "text": [
+        "<parsing.NgramsExtractors.FrenchNgramsExtractor.FrenchNgramsExtractor at 0x7f0e1fb0c978>"
+       ]
+      }
+     ],
+     "prompt_number": 10
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "c[\"german\"]"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 11,
+       "text": [
+        "<parsing.NgramsExtractors.FrenchNgramsExtractor.FrenchNgramsExtractor at 0x7f0e1fb0c978>"
+       ]
+      }
+     ],
+     "prompt_number": 11
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "c[\"dutch\"]"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 12,
+       "text": [
+        "<parsing.NgramsExtractors.FrenchNgramsExtractor.FrenchNgramsExtractor at 0x7f0e1fb0c978>"
+       ]
+      }
+     ],
+     "prompt_number": 12
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "c[\"]"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": []
+    }
+   ],
+   "metadata": {}
+  }
+ ]
+}
\ No newline at end of file
--- a/Test extractor cache.ipynb
+++ b/Test extractor cache.ipynb
+{
+ "metadata": {
+  "name": "",
+  "signature": "sha256:cb74945a57bed4d2ec124c7c05411b9346c7601e8339e613ddbc37fb950c4d86"
+ },
+ "nbformat": 3,
+ "nbformat_minor": 0,
+ "worksheets": [
+  {
+   "cells": [
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "from parsing.NgramsExtractors import NgramsExtractorsCache"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 1
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "c = NgramsExtractorsCache()"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 2
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "c[\"en\"]"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 3,
+       "text": [
+        "<parsing.NgramsExtractors.EnglishNgramsExtractor.EnglishNgramsExtractor at 0x7f8d14947c88>"
+       ]
+      }
+     ],
+     "prompt_number": 3
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "c[\"fre\"]"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 4,
+       "text": [
+        "<parsing.NgramsExtractors.FrenchNgramsExtractor.FrenchNgramsExtractor at 0x7f8d24a97d30>"
+       ]
+      }
+     ],
+     "prompt_number": 4
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "p = c[\"fr\"]"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 5
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "p.extract_ngrams(\"En voil\u00e0 un beau parseur !\")"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 6,
+       "text": [
+        "[[('beau', 'NN'), ('parseur', 'NN')]]"
+       ]
+      }
+     ],
+     "prompt_number": 6
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "c[\"fr\"]"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 7,
+       "text": [
+        "<parsing.NgramsExtractors.FrenchNgramsExtractor.FrenchNgramsExtractor at 0x7f8d24a97d30>"
+       ]
+      }
+     ],
+     "prompt_number": 7
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "c[\"french\"]"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 8,
+       "text": [
+        "<parsing.NgramsExtractors.FrenchNgramsExtractor.FrenchNgramsExtractor at 0x7f8d24a97d30>"
+       ]
+      }
+     ],
+     "prompt_number": 8
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "c[\"german\"]"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 9,
+       "text": [
+        "<parsing.NgramsExtractors.NgramsExtractor.NgramsExtractor at 0x7f8d24a979e8>"
+       ]
+      }
+     ],
+     "prompt_number": 9
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "c[\"dutch\"]"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 10,
+       "text": [
+        "<parsing.NgramsExtractors.NgramsExtractor.NgramsExtractor at 0x7f8d24a979e8>"
+       ]
+      }
+     ],
+     "prompt_number": 10
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "c[\"italian\"].extract_ngrams(\"Est-ce un texte ?\")"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "Warning: parsing empty text\n"
+       ]
+      },
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 14,
+       "text": [
+        "[]"
+       ]
+      }
+     ],
+     "prompt_number": 14
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [],
+     "language": "python",
+     "metadata": {},
+     "outputs": []
+    }
+   ],
+   "metadata": {}
+  }
+ ]
+}
\ No newline at end of file