From 5149e7ced7cfb4284e5ea1c13840b559256c12bb Mon Sep 17 00:00:00 2001
From: rloth <romain.loth@iscpif.fr>
Date: Tue, 8 Dec 2015 10:00:22 +0100
Subject: [PATCH] <p> dans la stoplist via regex

---
 ngram/stop.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/ngram/stop.py b/ngram/stop.py
index 26be40d0..cb4e28b3 100644
--- a/ngram/stop.py
+++ b/ngram/stop.py
@@ -56,6 +56,7 @@ def isStopWord(ngram, stop_words=None):
             , "(.*)\d(.*)"
             , "(.*)(\.)(.*)"
             , "(.*)(\,)(.*)"
+            , "(.*)(< ?/?p ?>)(.*)"       # marques de paragraphes
             , "(.*)(study)(.*)"
             , "(.*)(xx|xi|xv)(.*)"
             , "(.*)(result)(.*)"
-- 
2.21.0