1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
{-|
Module : Gargantext.Database.Schema.NodeNgramsNgrams
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
NodeNgramsNgrams table is used to group Ngrams
- NodeId :: List Id
- NgramId_1, NgramId_2 where all NgramId_2 will be added to NgramId_1
- weight: probability of the relation (TODO, fixed to 1 for simple stemming)
Next Step benchmark:
- recursive queries of postgres
- group with: https://en.wikipedia.org/wiki/Nested_set_model
-}
{-# LANGUAGE Arrows #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE TemplateHaskell #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
module Gargantext.Database.Schema.NodeNgramsNgrams
where
import Control.Lens (view)
import Control.Lens.TH (makeLensesWith, abbreviatedFields)
import Control.Monad (void)
import Control.Monad.IO.Class (liftIO)
import Data.ByteString (ByteString)
import Data.Text (Text)
import Data.Maybe (Maybe)
import Data.Profunctor.Product.TH (makeAdaptorAndInstance)
import Database.PostgreSQL.Simple.SqlQQ (sql)
import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
import Debug.Trace (trace)
import Gargantext.Database.Utils (Cmd, runOpaQuery, execPGSQuery, connection, formatPGSQuery)
import Gargantext.Database.Types.Node (ListId)
import Gargantext.Database.Schema.Node (pgNodeId)
import Gargantext.Prelude
import Opaleye
import qualified Database.PostgreSQL.Simple as PGS
data NodeNgramsNgramsPoly node_id ngram1_id ngram2_id weight =
NodeNgramsNgrams { _nng_NodeId :: node_id
, _nng_Ngram1Id :: ngram1_id
, _nng_Ngram2Id :: ngram2_id
, _nng_Weight :: weight
} deriving (Show)
type NodeNgramsNgramsWrite =
NodeNgramsNgramsPoly (Column PGInt4 )
(Column PGInt4 )
(Column PGInt4 )
(Maybe (Column PGFloat8))
type NodeNgramsNgramsRead =
NodeNgramsNgramsPoly (Column PGInt4 )
(Column PGInt4 )
(Column PGInt4 )
(Column PGFloat8)
type NodeNgramsNgrams =
NodeNgramsNgramsPoly ListId
Int
Int
(Maybe Double)
$(makeAdaptorAndInstance "pNodeNgramsNgrams"
''NodeNgramsNgramsPoly)
$(makeLensesWith abbreviatedFields
''NodeNgramsNgramsPoly)
nodeNgramsNgramsTable :: Table NodeNgramsNgramsWrite NodeNgramsNgramsRead
nodeNgramsNgramsTable =
Table "nodes_ngrams_ngrams"
( pNodeNgramsNgrams NodeNgramsNgrams
{ _nng_NodeId = required "node_id"
, _nng_Ngram1Id = required "ngram1_id"
, _nng_Ngram2Id = required "ngram2_id"
, _nng_Weight = optional "weight"
}
)
queryNodeNgramsNgramsTable :: Query NodeNgramsNgramsRead
queryNodeNgramsNgramsTable = queryTable nodeNgramsNgramsTable
-- | Select NodeNgramsNgrams
-- TODO not optimized (get all ngrams without filters)
nodeNgramsNgrams :: Cmd err [NodeNgramsNgrams]
nodeNgramsNgrams = runOpaQuery queryNodeNgramsNgramsTable
instance QueryRunnerColumnDefault PGInt4 (Maybe Int) where
queryRunnerColumnDefault = fieldQueryRunnerColumn
instance QueryRunnerColumnDefault PGFloat8 (Maybe Double) where
queryRunnerColumnDefault = fieldQueryRunnerColumn
-- TODO: Add option on conflict
insertNodeNgramsNgramsNew :: [NodeNgramsNgrams] -> Cmd err Int
insertNodeNgramsNgramsNew = insertNodeNgramsNgramsW
. map (\(NodeNgramsNgrams n ng1 ng2 maybeWeight) ->
NodeNgramsNgrams (pgNodeId n )
(pgInt4 ng1)
(pgInt4 ng2)
(pgDouble <$> maybeWeight)
)
insertNodeNgramsNgramsW :: [NodeNgramsNgramsWrite] -> Cmd err Int
insertNodeNgramsNgramsW ns = do
c <- view connection
liftIO $ fromIntegral <$> runInsertMany c nodeNgramsNgramsTable ns
------------------------------------------------------------------------
data Action = Del | Add
type NgramsParent = Text
type NgramsChild = Text
ngramsGroup :: Action -> ListId -> [(NgramsParent, NgramsChild, Maybe Double)]
-> Cmd err ()
ngramsGroup _ _ [] = pure ()
ngramsGroup action listId ngs = trace (show ngs) $ runNodeNgramsNgrams q listId ngs
where
q = case action of
Del -> queryDelNodeNgramsNgrams
Add -> queryInsertNodeNgramsNgrams
runNodeNgramsNgrams :: PGS.Query -> ListId -> [(NgramsParent, NgramsChild, Maybe Double)] -> Cmd err ()
runNodeNgramsNgrams q listId ngs = void $ execPGSQuery q (listId, Values fields ngs')
where
ngs' = map (\(ng1,ng2,w) -> (ng1,ng2,maybe 0 identity w)) ngs
fields = map (\t -> QualifiedIdentifier Nothing t)
["int4","text","text","float8"]
runNodeNgramsNgramsDebug :: PGS.Query -> ListId -> [(NgramsParent, NgramsChild, Maybe Double)] -> Cmd err ByteString
runNodeNgramsNgramsDebug q listId ngs = formatPGSQuery q (listId, Values fields ngs')
where
ngs' = map (\(ng1,ng2,w) -> (ng1,ng2,maybe 0 identity w)) ngs
fields = map (\t -> QualifiedIdentifier Nothing t)
["int4","text","text","float8"]
--------------------------------------------------------------------
-- TODO: on conflict update weight
queryInsertNodeNgramsNgrams :: PGS.Query
queryInsertNodeNgramsNgrams = [sql|
WITH nId AS ?
WITH input_rows(ng1,ng2,w) AS (?)
INSERT INTO nodes_ngrams_ngrams (node_id,ngram1_id,ngram2_id,weight)
SELECT nId,ngrams1.id,ngrams2.id,w FROM input_rows
JOIN ngrams ngrams1 ON ngrams1.terms = ng1
JOIN ngrams ngrams2 ON ngrams2.terms = ng2
ON CONFLICT (node_id,ngram1_id,ngram2_id) DO NOTHING -- unique index created here
|]
queryDelNodeNgramsNgrams :: PGS.Query
queryDelNodeNgramsNgrams = [sql|
WITH nId AS ?
WITH input(ng1,ng2,w) AS (?)
DELETE FROM nodes_ngrams_ngrams AS nnn
USING ngrams AS ngrams1,
ngrams AS ngrams2,
input AS input
WHERE
ngrams1.terms = input.ng1
AND ngrams2.terms = input.ng2
AND nnn.node_id = input.nId
AND nnn.ngram1_id = ngrams1.id
AND nnn.ngram2_id = ngrams2.id
;
|]