Commit 777cf4cc authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FIX] clean groups + handling duplicates (TODO inside)

parent e55c6044
...@@ -127,6 +127,7 @@ instance (ToJSONKey a, ToSchema a) => ToSchema (MSet a) where ...@@ -127,6 +127,7 @@ instance (ToJSONKey a, ToSchema a) => ToSchema (MSet a) where
newtype NgramsTerm = NgramsTerm { unNgramsTerm :: Text } newtype NgramsTerm = NgramsTerm { unNgramsTerm :: Text }
deriving (Ord, Eq, Show, Generic, ToJSONKey, ToJSON, FromJSON, Semigroup, Arbitrary, Serialise, ToSchema, Hashable) deriving (Ord, Eq, Show, Generic, ToJSONKey, ToJSON, FromJSON, Semigroup, Arbitrary, Serialise, ToSchema, Hashable)
instance IsHashable NgramsTerm where instance IsHashable NgramsTerm where
hash (NgramsTerm t) = hash t hash (NgramsTerm t) = hash t
......
...@@ -88,8 +88,10 @@ groupWith (GroupParams l _m _n _) t = ...@@ -88,8 +88,10 @@ groupWith (GroupParams l _m _n _) t =
-- | This lemmatization group done with CoreNLP algo (or others) -- | This lemmatization group done with CoreNLP algo (or others)
groupWith (GroupWithPosTag _ _ m) t = groupWith (GroupWithPosTag _ _ m) t =
case HashMap.lookup (unNgramsTerm t) m of case HashMap.lookup (unNgramsTerm t) m of
Nothing -> t Nothing -> clean t
Just t' -> NgramsTerm t' Just t' -> clean $ NgramsTerm t'
where
clean (NgramsTerm t) = NgramsTerm $ Text.replace "-" " " t
-------------------------------------------------------------------- --------------------------------------------------------------------
stemPatches :: GroupParams stemPatches :: GroupParams
......
...@@ -144,7 +144,8 @@ queryInsertNgramsPostag = [sql| ...@@ -144,7 +144,8 @@ queryInsertNgramsPostag = [sql|
-- ORDER BY s DESC -- ORDER BY s DESC
-- LIMIT 1 -- LIMIT 1
ON CONFLICT (lang_id,algo_id,postag,ngrams_id,lemm_id) ON CONFLICT (lang_id,algo_id,postag,ngrams_id,lemm_id)
DO UPDATE SET score = ngrams_postag.score + 1 DO NOTHING -- acceptable for now since we are using NP mainly
-- DO UPDATE SET score = ngrams_postag.score + 1
) )
SELECT terms,id FROM ins_form_ret SELECT terms,id FROM ins_form_ret
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment