1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
{-|
Module : Gargantext.Database.Query.Table.Ngrams
Description : Deal with in Gargantext Database.
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# OPTIONS_GHC -fno-warn-deprecations #-}
{-# LANGUAGE Arrows #-}
{-# LANGUAGE QuasiQuotes #-}
module Gargantext.Database.Query.Table.Ngrams
( module Gargantext.Database.Schema.Ngrams
, queryNgramsTable
, selectNgramsByDoc
, insertNgrams
, selectNgramsId
)
where
import Data.HashMap.Strict (HashMap)
import Data.HashMap.Strict qualified as HashMap
import Data.List qualified as List
import Data.Map.Strict qualified as Map
import Database.PostgreSQL.Simple qualified as PGS
import Gargantext.Core.Text.Ngrams (Ngrams(..), NgramsType)
import Gargantext.Database.Admin.Types.Node ( pgNodeId, CorpusId, ListId, DocId )
import Gargantext.Database.Prelude (runOpaQuery, formatPGSQuery, runPGSQuery, DBCmd)
import Gargantext.Database.Query.Join (leftJoin3)
import Gargantext.Database.Query.Table.ContextNodeNgrams2
import Gargantext.Database.Query.Table.NodeNgrams (queryNodeNgramsTable)
import Gargantext.Database.Schema.Ngrams
import Gargantext.Database.Schema.NodeNgrams
import Gargantext.Database.Schema.Prelude
import Gargantext.Database.Types ( Indexed(Indexed) )
import Gargantext.Prelude
queryNgramsTable :: Select NgramsRead
queryNgramsTable = selectTable ngramsTable
selectNgramsByDoc :: [ListId] -> DocId -> NgramsType -> DBCmd err [Text]
selectNgramsByDoc lIds dId nt = runOpaQuery (query lIds dId nt)
where
join' :: Select (NgramsRead, NodeNgramsRead, ContextNodeNgrams2Read)
join' = leftJoin3 queryNgramsTable queryNodeNgramsTable queryContextNodeNgrams2Table on1 -- on2
where
on1 :: (NgramsRead, NodeNgramsRead, ContextNodeNgrams2Read) -> Column SqlBool
on1 (ng, nng, cnng) = (.&&)
(ng^.ngrams_id .== nng^.nng_ngrams_id)
(nng^.nng_id .== cnng^.cnng2_nodengrams_id)
query lIds' dId' nt' = proc () -> do
(ng,nng,cnng) <- join' -< ()
restrict -< foldl (\b lId -> ((pgNodeId lId) .== nng^.nng_node_id) .|| b) (sqlBool True) lIds'
restrict -< (pgNodeId dId') .== cnng^.cnng2_context_id
restrict -< (pgNgramsType nt') .== nng^.nng_ngrams_type
returnA -< ng^.ngrams_terms
_postNgrams :: CorpusId -> DocId -> [Text] -> DBCmd err Int
_postNgrams = undefined
_dbGetNgramsDb :: DBCmd err [NgramsDB]
_dbGetNgramsDb = runOpaQuery queryNgramsTable
-- TODO-ACCESS: access must not be checked here but when insertNgrams is called.
insertNgrams :: [Ngrams] -> DBCmd err (HashMap Text NgramsId)
insertNgrams ns =
if List.null ns
then pure HashMap.empty
else HashMap.fromList <$> map (\(Indexed i t) -> (t, i)) <$> (insertNgrams' ns)
-- TODO-ACCESS: access must not be checked here but when insertNgrams' is called.
insertNgrams' :: [Ngrams] -> DBCmd err [Indexed Int Text]
insertNgrams' ns = runPGSQuery queryInsertNgrams (PGS.Only $ Values fields ns')
where
ns' = (\n -> (_ngramsTerms n, _ngramsSize n)) <$> ns
fields = map (QualifiedIdentifier Nothing) ["text", "int4"]
_insertNgrams_Debug :: [(Text, Size)] -> DBCmd err ByteString
_insertNgrams_Debug ns = formatPGSQuery queryInsertNgrams (PGS.Only $ Values fields ns)
where
fields = map (QualifiedIdentifier Nothing) ["text", "int4"]
----------------------
queryInsertNgrams :: PGS.Query
queryInsertNgrams = [sql|
WITH input_rows(terms,n) AS (?)
, ins AS (
INSERT INTO ngrams (terms,n)
SELECT * FROM input_rows
ON CONFLICT (terms) DO NOTHING -- unique index created here
RETURNING id,terms
)
SELECT id, terms
FROM ins
UNION ALL
SELECT c.id, terms
FROM input_rows
JOIN ngrams c USING (terms); -- columns of unique index
|]
--------------------------------------------------------------------------
selectNgramsId :: [Text] -> DBCmd err (Map NgramsId Text)
selectNgramsId ns =
if List.null ns
then pure Map.empty
else Map.fromList <$> map (\(Indexed i t) -> (i, t)) <$> (selectNgramsId' ns)
selectNgramsId' :: [Text] -> DBCmd err [Indexed Int Text]
selectNgramsId' ns = runPGSQuery querySelectNgramsId ( PGS.Only
$ Values fields ns
)
where
fields = map (\t -> QualifiedIdentifier Nothing t) ["text"]
querySelectNgramsId :: PGS.Query
querySelectNgramsId = [sql|
WITH input_rows(terms) AS (?)
SELECT n.id, n.terms
FROM ngrams n
JOIN input_rows ir ON ir.terms = n.terms
GROUP BY n.terms, n.id
|]