Commit 0723ca24 authored by Romain Loth's avatar Romain Loth

[fix] stoplist: 4 regex corrected

parent 67ffa91d
......@@ -37,11 +37,10 @@ def is_stop_word(ngram, stop_words=None):
, "(.*)(différent)(.*)"
, "(.*)(travers)(.*)"
# academic stamps
, ".*\belsevier\b.*"
, ".*\bwiley\b.*"
, ".*\bspringer\b.*"
, ".*university press\b.*"
, ".*\bstudy\b.*"
, ".*elsevier.*"
, ".*wiley.*"
, ".*springer.*"
, ".*university press.*"
# academic terms when alone ~~> usually not informative
, "hypothes[ie]s$"
, "analys[ie]s$"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment