Commit 0b35b6a7 authored by Administrator's avatar Administrator

Merge branch 'mat'

Intégration des modifs de Mat (dev parsing, bug fix for ngrams)
parents 0aa95012 610ee647
{
"metadata": {
"name": "",
"signature": "sha256:d03d3f5dbf9a1dbfc43deb947718f31529d3d67b0901f8e743b23ce28a9f3205"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"from parsing.NgramsExtractors import NgramsExtractorsCache"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"c = NgramsExtractorsCache()"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 2
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"c[\"en\"]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 3,
"text": [
"<parsing.NgramsExtractors.EnglishNgramsExtractor.EnglishNgramsExtractor at 0x7fc3aa431f98>"
]
}
],
"prompt_number": 3
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"c[\"fre\"]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 5,
"text": [
"<parsing.NgramsExtractors.FrenchNgramsExtractor.FrenchNgramsExtractor at 0x7f0e1fb0c978>"
]
}
],
"prompt_number": 5
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"p = c[\"fr\"]"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 6
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"p.extract_ngrams(\"En voil\u00e0 un beau parseur !\")"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 8,
"text": [
"[[('beau', 'NN'), ('parseur', 'NN')]]"
]
}
],
"prompt_number": 8
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"c[\"fr\"]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 9,
"text": [
"<parsing.NgramsExtractors.FrenchNgramsExtractor.FrenchNgramsExtractor at 0x7f0e1fb0c978>"
]
}
],
"prompt_number": 9
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"c[\"french\"]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 10,
"text": [
"<parsing.NgramsExtractors.FrenchNgramsExtractor.FrenchNgramsExtractor at 0x7f0e1fb0c978>"
]
}
],
"prompt_number": 10
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"c[\"german\"]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 11,
"text": [
"<parsing.NgramsExtractors.FrenchNgramsExtractor.FrenchNgramsExtractor at 0x7f0e1fb0c978>"
]
}
],
"prompt_number": 11
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"c[\"dutch\"]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 12,
"text": [
"<parsing.NgramsExtractors.FrenchNgramsExtractor.FrenchNgramsExtractor at 0x7f0e1fb0c978>"
]
}
],
"prompt_number": 12
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"c[\"]"
],
"language": "python",
"metadata": {},
"outputs": []
}
],
"metadata": {}
}
]
}
\ No newline at end of file
{
"metadata": {
"name": "",
"signature": "sha256:cb74945a57bed4d2ec124c7c05411b9346c7601e8339e613ddbc37fb950c4d86"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"from parsing.NgramsExtractors import NgramsExtractorsCache"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"c = NgramsExtractorsCache()"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 2
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"c[\"en\"]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 3,
"text": [
"<parsing.NgramsExtractors.EnglishNgramsExtractor.EnglishNgramsExtractor at 0x7f8d14947c88>"
]
}
],
"prompt_number": 3
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"c[\"fre\"]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 4,
"text": [
"<parsing.NgramsExtractors.FrenchNgramsExtractor.FrenchNgramsExtractor at 0x7f8d24a97d30>"
]
}
],
"prompt_number": 4
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"p = c[\"fr\"]"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 5
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"p.extract_ngrams(\"En voil\u00e0 un beau parseur !\")"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 6,
"text": [
"[[('beau', 'NN'), ('parseur', 'NN')]]"
]
}
],
"prompt_number": 6
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"c[\"fr\"]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 7,
"text": [
"<parsing.NgramsExtractors.FrenchNgramsExtractor.FrenchNgramsExtractor at 0x7f8d24a97d30>"
]
}
],
"prompt_number": 7
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"c[\"french\"]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 8,
"text": [
"<parsing.NgramsExtractors.FrenchNgramsExtractor.FrenchNgramsExtractor at 0x7f8d24a97d30>"
]
}
],
"prompt_number": 8
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"c[\"german\"]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 9,
"text": [
"<parsing.NgramsExtractors.NgramsExtractor.NgramsExtractor at 0x7f8d24a979e8>"
]
}
],
"prompt_number": 9
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"c[\"dutch\"]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 10,
"text": [
"<parsing.NgramsExtractors.NgramsExtractor.NgramsExtractor at 0x7f8d24a979e8>"
]
}
],
"prompt_number": 10
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"c[\"italian\"].extract_ngrams(\"Est-ce un texte ?\")"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"Warning: parsing empty text\n"
]
},
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 14,
"text": [
"[]"
]
}
],
"prompt_number": 14
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": []
}
],
"metadata": {}
}
]
}
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment