Commit 556ec1c4 authored by delanoe's avatar delanoe

[FIX] Adding some default stop words.

parent 869bd484
......@@ -27,10 +27,10 @@ def is_stop_word(ngram, stop_words=None):
# , "(.*)(\.)(.*)" trop fort (enlève les sigles !)
, "(.*)(\,)(.*)"
, "(.*)(< ?/?p ?>)(.*)" # marques de paragraphes
, "(.*)(study)(.*)"
, "(.*)(study|elsevier)(.*)"
, "(.*)\b(xx|xi|xv)\b(.*)"
, "(.*)(result)(.*)"
, "(.*)(année|nombre|moitié)(.*)"
, "(.*)(year|année|nombre|moitié)(.*)"
, "(.*)(temps)(.*)"
, "(.*)(%)(.*)"
, "(.*)(\{)(.*)"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment