Commit a982e36a authored by Romain Loth's avatar Romain Loth

[fix] stoplist: 4 regex corrected

parent 2c5405a7
...@@ -37,11 +37,10 @@ def is_stop_word(ngram, stop_words=None): ...@@ -37,11 +37,10 @@ def is_stop_word(ngram, stop_words=None):
, "(.*)(différent)(.*)" , "(.*)(différent)(.*)"
, "(.*)(travers)(.*)" , "(.*)(travers)(.*)"
# academic stamps # academic stamps
, ".*\belsevier\b.*" , ".*elsevier.*"
, ".*\bwiley\b.*" , ".*wiley.*"
, ".*\bspringer\b.*" , ".*springer.*"
, ".*university press\b.*" , ".*university press.*"
, ".*\bstudy\b.*"
# academic terms when alone ~~> usually not informative # academic terms when alone ~~> usually not informative
, "hypothes[ie]s$" , "hypothes[ie]s$"
, "analys[ie]s$" , "analys[ie]s$"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment