[ngrams] selectLems query function refactoring

It's faster now, makes less joins, doesn't need VALUES.
parent fdacee80
Pipeline #5817 passed with stages
in 148 minutes and 10 seconds
...@@ -24,7 +24,7 @@ import Data.HashMap.Strict qualified as HashMap ...@@ -24,7 +24,7 @@ import Data.HashMap.Strict qualified as HashMap
import Data.List qualified as List import Data.List qualified as List
import Database.PostgreSQL.Simple qualified as PGS import Database.PostgreSQL.Simple qualified as PGS
import Gargantext.Core import Gargantext.Core
import Gargantext.Core.Text.Ngrams (Ngrams, ngramsSize, ngramsTerms) import Gargantext.Core.Text.Ngrams (Ngrams(..), ngramsSize, ngramsTerms)
import Gargantext.Core.Types ( POS ) import Gargantext.Core.Types ( POS )
import Gargantext.Database.Prelude (runPGSQuery, runPGSQuery_, DBCmd) import Gargantext.Database.Prelude (runPGSQuery, runPGSQuery_, DBCmd)
import Gargantext.Database.Query.Table.Ngrams ( NgramsId, insertNgrams ) import Gargantext.Database.Query.Table.Ngrams ( NgramsId, insertNgrams )
...@@ -154,14 +154,40 @@ SELECT terms,id FROM ins_form_ret ...@@ -154,14 +154,40 @@ SELECT terms,id FROM ins_form_ret
-- TODO add lang and postag algo -- TODO add lang and postag algo
-- TODO remove when form == lem in insert -- TODO remove when form == lem in insert
selectLems :: Lang -> NLPServerConfig -> [Ngrams] -> DBCmd err [(Form, Lem)] selectLems :: Lang -> NLPServerConfig -> [Ngrams] -> DBCmd err [(Form, Lem)]
selectLems l (NLPServerConfig { server }) ns = runPGSQuery querySelectLems (PGS.Only $ Values fields datas) selectLems l (NLPServerConfig { server }) ns = runPGSQuery querySelectLems (PGS.In (map _ngramsTerms ns), toDBid l, toDBid server)
----------------------
querySelectLems :: PGS.Query
querySelectLems = [sql|
WITH
trms
AS (SELECT id, terms, n
FROM ngrams
WHERE terms IN ?)
, input_rows(lang_id, algo_id, terms,n)
AS (SELECT ? as lang_id, ? as algo_id, terms, n, id
FROM trms)
, lems AS ( select ir.terms as t1, n2.terms as t2, sum(np.score) as score from input_rows ir
JOIN ngrams_postag np ON np.ngrams_id = ir.id
JOIN ngrams n2 ON n2.id = np.lemm_id
WHERE np.lang_id = ir.lang_id
AND np.algo_id = ir.algo_id
GROUP BY ir.terms, n2.terms
ORDER BY score DESC
)
SELECT t1,t2 from lems
|]
-- | This is the same as 'selectLems', but slower.
selectLems' :: Lang -> NLPServerConfig -> [Ngrams] -> DBCmd err [(Form, Lem)]
selectLems' l (NLPServerConfig { server }) ns = runPGSQuery querySelectLems' (PGS.Only $ Values fields datas)
where where
fields = map (QualifiedIdentifier Nothing) ["int4","int4","text", "int4"] fields = map (QualifiedIdentifier Nothing) ["int4","int4","text", "int4"]
datas = map (\d -> [toField $ toDBid l, toField $ toDBid server] <> toRow d) ns datas = map (\d -> [toField $ toDBid l, toField $ toDBid server] <> toRow d) ns
----------------------
querySelectLems :: PGS.Query querySelectLems' :: PGS.Query
querySelectLems = [sql| querySelectLems' = [sql|
WITH input_rows(lang_id, algo_id, terms,n) WITH input_rows(lang_id, algo_id, terms,n)
AS (?) -- ((VALUES ('automata' :: "text"))) AS (?) -- ((VALUES ('automata' :: "text")))
, lems AS ( select n1.terms as t1 ,n2.terms as t2 ,sum(np.score) as score from input_rows ir , lems AS ( select n1.terms as t1 ,n2.terms as t2 ,sum(np.score) as score from input_rows ir
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment