Commit 4f9f1044 authored by delanoe's avatar delanoe

[NGRAMS WORKFLOW] Changing default parameters for tests.

parent 0b2cc7bf
......@@ -197,7 +197,7 @@ RESOURCETYPES = [
# linguistic extraction parameters ---------------------------------------------
DEFAULT_TFIDF_CUTOFF_RATIO = .75 # MAINLIST maximum terms in %
DEFAULT_TFIDF_HARD_LIMIT = 3000 # MAINLIST maximum terms abs
DEFAULT_TFIDF_HARD_LIMIT = 5000 # MAINLIST maximum terms abs
# (makes COOCS larger ~ O(N²) /!\)
DEFAULT_COOC_THRESHOLD = 2 # inclusive minimum for COOCS coefs
......@@ -205,7 +205,7 @@ DEFAULT_COOC_THRESHOLD = 2 # inclusive minimum for COOCS coefs
DEFAULT_MAPLIST_MAX = 350 # MAPLIST maximum terms
DEFAULT_MAPLIST_MONOGRAMS_RATIO = .05 # part of monograms in MAPLIST
DEFAULT_MAPLIST_MONOGRAMS_RATIO = .15 # part of monograms in MAPLIST
DEFAULT_MAX_NGRAM_LEN = 7 # limit used after POStagging rule
# (initial ngrams number is a power law of this /!\)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment