Commit 777cf4cc authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FIX] clean groups + handling duplicates (TODO inside)

parent e55c6044
Pipeline #1360 failed with stage
...@@ -59,7 +59,7 @@ CREATE TABLE public.ngrams_postag ( ...@@ -59,7 +59,7 @@ CREATE TABLE public.ngrams_postag (
lemm_id INTEGER NOT NULL, lemm_id INTEGER NOT NULL,
score INTEGER DEFAULT 1 ::integer NOT NULL, score INTEGER DEFAULT 1 ::integer NOT NULL,
FOREIGN KEY (ngrams_id) REFERENCES public.ngrams(id) ON DELETE CASCADE, FOREIGN KEY (ngrams_id) REFERENCES public.ngrams(id) ON DELETE CASCADE,
FOREIGN KEY (lemm_id) REFERENCES public.ngrams(id) ON DELETE CASCADE FOREIGN KEY (lemm_id) REFERENCES public.ngrams(id) ON DELETE CASCADE
); );
ALTER TABLE public.ngrams_postag OWNER TO gargantua; ALTER TABLE public.ngrams_postag OWNER TO gargantua;
......
...@@ -127,6 +127,7 @@ instance (ToJSONKey a, ToSchema a) => ToSchema (MSet a) where ...@@ -127,6 +127,7 @@ instance (ToJSONKey a, ToSchema a) => ToSchema (MSet a) where
newtype NgramsTerm = NgramsTerm { unNgramsTerm :: Text } newtype NgramsTerm = NgramsTerm { unNgramsTerm :: Text }
deriving (Ord, Eq, Show, Generic, ToJSONKey, ToJSON, FromJSON, Semigroup, Arbitrary, Serialise, ToSchema, Hashable) deriving (Ord, Eq, Show, Generic, ToJSONKey, ToJSON, FromJSON, Semigroup, Arbitrary, Serialise, ToSchema, Hashable)
instance IsHashable NgramsTerm where instance IsHashable NgramsTerm where
hash (NgramsTerm t) = hash t hash (NgramsTerm t) = hash t
......
...@@ -88,8 +88,10 @@ groupWith (GroupParams l _m _n _) t = ...@@ -88,8 +88,10 @@ groupWith (GroupParams l _m _n _) t =
-- | This lemmatization group done with CoreNLP algo (or others) -- | This lemmatization group done with CoreNLP algo (or others)
groupWith (GroupWithPosTag _ _ m) t = groupWith (GroupWithPosTag _ _ m) t =
case HashMap.lookup (unNgramsTerm t) m of case HashMap.lookup (unNgramsTerm t) m of
Nothing -> t Nothing -> clean t
Just t' -> NgramsTerm t' Just t' -> clean $ NgramsTerm t'
where
clean (NgramsTerm t) = NgramsTerm $ Text.replace "-" " " t
-------------------------------------------------------------------- --------------------------------------------------------------------
stemPatches :: GroupParams stemPatches :: GroupParams
......
...@@ -140,11 +140,12 @@ queryInsertNgramsPostag = [sql| ...@@ -140,11 +140,12 @@ queryInsertNgramsPostag = [sql|
FROM input_rows ir FROM input_rows ir
JOIN ins_form_ret form ON form.terms = ir.form JOIN ins_form_ret form ON form.terms = ir.form
JOIN ins_lem_ret lem ON lem.terms = ir.lem JOIN ins_lem_ret lem ON lem.terms = ir.lem
-- GROUP BY ir.lang_id, ir.algo_id, ir.postag, form.id, lem.id -- GROUP BY ir.lang_id, ir.algo_id, ir.postag, form.id, lem.id
-- ORDER BY s DESC -- ORDER BY s DESC
-- LIMIT 1 -- LIMIT 1
ON CONFLICT (lang_id,algo_id,postag,ngrams_id,lemm_id) ON CONFLICT (lang_id,algo_id,postag,ngrams_id,lemm_id)
DO UPDATE SET score = ngrams_postag.score + 1 DO NOTHING -- acceptable for now since we are using NP mainly
-- DO UPDATE SET score = ngrams_postag.score + 1
) )
SELECT terms,id FROM ins_form_ret SELECT terms,id FROM ins_form_ret
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment