Commit 556ec1c4 authored by delanoe's avatar delanoe

[FIX] Adding some default stop words.

parent 869bd484
...@@ -27,10 +27,10 @@ def is_stop_word(ngram, stop_words=None): ...@@ -27,10 +27,10 @@ def is_stop_word(ngram, stop_words=None):
# , "(.*)(\.)(.*)" trop fort (enlève les sigles !) # , "(.*)(\.)(.*)" trop fort (enlève les sigles !)
, "(.*)(\,)(.*)" , "(.*)(\,)(.*)"
, "(.*)(< ?/?p ?>)(.*)" # marques de paragraphes , "(.*)(< ?/?p ?>)(.*)" # marques de paragraphes
, "(.*)(study)(.*)" , "(.*)(study|elsevier)(.*)"
, "(.*)\b(xx|xi|xv)\b(.*)" , "(.*)\b(xx|xi|xv)\b(.*)"
, "(.*)(result)(.*)" , "(.*)(result)(.*)"
, "(.*)(année|nombre|moitié)(.*)" , "(.*)(year|année|nombre|moitié)(.*)"
, "(.*)(temps)(.*)" , "(.*)(temps)(.*)"
, "(.*)(%)(.*)" , "(.*)(%)(.*)"
, "(.*)(\{)(.*)" , "(.*)(\{)(.*)"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment