Commit 5149e7ce authored by Romain Loth's avatar Romain Loth

<p> dans la stoplist via regex

parent ca537f58
...@@ -56,6 +56,7 @@ def isStopWord(ngram, stop_words=None): ...@@ -56,6 +56,7 @@ def isStopWord(ngram, stop_words=None):
, "(.*)\d(.*)" , "(.*)\d(.*)"
, "(.*)(\.)(.*)" , "(.*)(\.)(.*)"
, "(.*)(\,)(.*)" , "(.*)(\,)(.*)"
, "(.*)(< ?/?p ?>)(.*)" # marques de paragraphes
, "(.*)(study)(.*)" , "(.*)(study)(.*)"
, "(.*)(xx|xi|xv)(.*)" , "(.*)(xx|xi|xv)(.*)"
, "(.*)(result)(.*)" , "(.*)(result)(.*)"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment