Commit 777cf4cc authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FIX] clean groups + handling duplicates (TODO inside)

parent e55c6044
......@@ -127,6 +127,7 @@ instance (ToJSONKey a, ToSchema a) => ToSchema (MSet a) where
newtype NgramsTerm = NgramsTerm { unNgramsTerm :: Text }
deriving (Ord, Eq, Show, Generic, ToJSONKey, ToJSON, FromJSON, Semigroup, Arbitrary, Serialise, ToSchema, Hashable)
instance IsHashable NgramsTerm where
hash (NgramsTerm t) = hash t
......
......@@ -88,8 +88,10 @@ groupWith (GroupParams l _m _n _) t =
-- | This lemmatization group done with CoreNLP algo (or others)
groupWith (GroupWithPosTag _ _ m) t =
case HashMap.lookup (unNgramsTerm t) m of
Nothing -> t
Just t' -> NgramsTerm t'
Nothing -> clean t
Just t' -> clean $ NgramsTerm t'
where
clean (NgramsTerm t) = NgramsTerm $ Text.replace "-" " " t
--------------------------------------------------------------------
stemPatches :: GroupParams
......
......@@ -144,7 +144,8 @@ queryInsertNgramsPostag = [sql|
-- ORDER BY s DESC
-- LIMIT 1
ON CONFLICT (lang_id,algo_id,postag,ngrams_id,lemm_id)
DO UPDATE SET score = ngrams_postag.score + 1
DO NOTHING -- acceptable for now since we are using NP mainly
-- DO UPDATE SET score = ngrams_postag.score + 1
)
SELECT terms,id FROM ins_form_ret
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment