Search.hs 10.6 KB
Newer Older
1 2
{-|
Module      : Gargantext.Database.TextSearch
Alexandre Delanoë's avatar
Alexandre Delanoë committed
3
Description : Postgres text search experimentation
4 5 6 7 8 9 10
Copyright   : (c) CNRS, 2017-Present
License     : AGPL + CECILL v3
Maintainer  : team@gargantext.org
Stability   : experimental
Portability : POSIX
-}

11
{-# LANGUAGE Arrows            #-}
12
{-# LANGUAGE FlexibleContexts  #-}
13 14
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
15
{-# LANGUAGE RankNTypes        #-}
16

17
module Gargantext.Database.Action.Search where
18

19 20
import Control.Arrow (returnA)
import Control.Lens ((^.))
21
import Data.Aeson
22
import Data.List (intersperse, take, drop)
23
import Data.Map.Strict hiding (map, drop, take)
24
import Data.Maybe
25
import Data.String (IsString(..))
26
import Data.Text (Text, words, unpack, intercalate)
27
import Data.Time (UTCTime)
28
import Database.PostgreSQL.Simple (Query)
29
import Database.PostgreSQL.Simple.ToField
30
import Gargantext.Core.Types
31
import Gargantext.Database.Action.Query.Facet
32
import Gargantext.Database.Action.Query.Join (leftJoin6)
33
import Gargantext.Database.Action.Query.Node
34 35 36
import Gargantext.Database.Admin.Config (nodeTypeId)
import Gargantext.Database.Admin.Types.Node (NodeType(..))
import Gargantext.Database.Admin.Utils (Cmd, runPGSQuery, runOpaQuery, runCountOpaQuery)
37
import Gargantext.Database.Schema.Ngrams
38
import Gargantext.Database.Schema.Node
39
import Gargantext.Database.Schema.NodeNode hiding (joinInCorpus)
40
import Gargantext.Database.Schema.NodeNodeNgrams
41
import Gargantext.Prelude
42
import Gargantext.Text.Terms.Mono.Stem.En (stemIt)
43
import Opaleye hiding (Query, Order)
44
import qualified Opaleye as O hiding (Order)
45

46
------------------------------------------------------------------------
47 48 49
searchInDatabase :: ParentId
                 -> Text
                 -> Cmd err [(NodeId, HyperdataDocument)]
50
searchInDatabase p t = runOpaQuery (queryInDatabase p t)
51 52 53 54 55 56 57 58
  where
    -- | Global search query where ParentId is Master Node Corpus Id 
    queryInDatabase :: ParentId -> Text -> O.Query (Column PGInt4, Column PGJsonb)
    queryInDatabase _ q = proc () -> do
        row <- queryNodeSearchTable -< ()
        restrict -< (_ns_search row)    @@ (pgTSQuery (unpack q))
        restrict -< (_ns_typename row) .== (pgInt4 $ nodeTypeId NodeDocument)
        returnA  -< (_ns_id row, _ns_hyperdata row)
59 60 61

------------------------------------------------------------------------
-- | todo add limit and offset and order
62 63 64 65 66 67 68
searchInCorpus :: CorpusId
               -> IsTrash
               -> [Text]
               -> Maybe Offset
               -> Maybe Limit
               -> Maybe OrderBy
               -> Cmd err [FacetDoc]
69 70 71 72 73
searchInCorpus cId t q o l order = runOpaQuery
                                 $ filterWith o l order
                                 $ queryInCorpus cId t
                                 $ intercalate " | "
                                 $ map stemIt q
74

75 76 77 78 79 80 81 82 83
searchCountInCorpus :: CorpusId
                    -> IsTrash
                    -> [Text]
                    -> Cmd err Int
searchCountInCorpus cId t q = runCountOpaQuery
                            $ queryInCorpus cId t
                            $ intercalate " | "
                            $ map stemIt q

84 85 86 87
queryInCorpus :: CorpusId
              -> IsTrash
              -> Text
              -> O.Query FacetDocRead
88
queryInCorpus cId t q = proc () -> do
89
  (n, nn) <- joinInCorpus -< ()
90
  restrict -< (nn^.nn_node1_id) .== (toNullable $ pgNodeId cId)
91
  restrict -< if t
92 93
                 then (nn^.nn_category) .== (toNullable $ pgInt4 0)
                 else (nn^.nn_category) .>= (toNullable $ pgInt4 1)
94
  restrict -< (n ^. ns_search)           @@ (pgTSQuery (unpack q))
95
  restrict -< (n ^. ns_typename )       .== (pgInt4 $ nodeTypeId NodeDocument)
96 97 98 99 100 101
  returnA  -< FacetDoc (n^.ns_id       )
                       (n^.ns_date     )
                       (n^.ns_name     )
                       (n^.ns_hyperdata)
                       (nn^.nn_category)
                       (nn^.nn_score   )
102

103 104
joinInCorpus :: O.Query (NodeSearchRead, NodeNodeReadNull)
joinInCorpus = leftJoin queryNodeSearchTable queryNodeNodeTable cond
105
  where
106
    cond :: (NodeSearchRead, NodeNodeRead) -> Column PGBool
107
    cond (n, nn) = nn^.nn_node2_id .== _ns_id n
108

109
------------------------------------------------------------------------
110
type AuthorName = Text
111

112
-- | TODO Optim: Offset and Limit in the Query
113
-- TODO-SECURITY check
114 115 116 117 118 119 120 121 122 123 124 125
searchInCorpusWithContacts
  :: CorpusId
  -> ListId
  -> [Text]
  -> Maybe Offset
  -> Maybe Limit
  -> Maybe OrderBy
  -> Cmd err [FacetPaired Int UTCTime HyperdataDocument Int [Pair Int Text]]
searchInCorpusWithContacts cId lId q o l order =
      take (maybe 10 identity l)
  <$> drop (maybe 0 identity o)
  <$> map (\((i,u,h,s), ps) -> FacetPaired i u h s ps)
126
  <$> toList <$> fromListWith (<>)
127 128 129 130 131
  <$> map (\(FacetPaired i u h s (p1,p2)) -> ( (i,u,h,s)
                                             , catMaybes [Pair <$> p1 <*> p2]
                                             )
          )
  <$> searchInCorpusWithContacts' cId lId q o l order
132

133
-- TODO-SECURITY check
134 135 136 137 138 139 140 141 142
searchInCorpusWithContacts'
  :: CorpusId
  -> ListId
  -> [Text]
  -> Maybe Offset
  -> Maybe Limit
  -> Maybe OrderBy
  -> Cmd err [(FacetPaired Int UTCTime HyperdataDocument Int (Maybe Int, Maybe Text))]
searchInCorpusWithContacts' cId lId q o l order =
143 144 145
  runOpaQuery $ queryInCorpusWithContacts cId lId o l order
              $ intercalate " | "
              $ map stemIt q
146 147 148 149 150 151 152 153


queryInCorpusWithContacts
  :: CorpusId
  -> ListId
  -> Maybe Offset
  -> Maybe Limit
  -> Maybe OrderBy
154
  -> Text
155
  -> O.Query FacetPairedRead
156
queryInCorpusWithContacts cId _lId _ _ _ q = proc () -> do
157 158 159 160 161 162 163 164 165 166 167 168 169
  (n, (nn, (_nng, (ngrams', (_, contacts))))) <- joinInCorpusWithContacts -< ()
  restrict -< (n^.ns_search)        @@ (pgTSQuery  $ unpack q  )
  restrict -< (n^.ns_typename)     .== (pgInt4 $ nodeTypeId NodeDocument)
--  restrict -< (nng^.nnng_node1_id) .== (toNullable $ pgNodeId lId)
  restrict -< (nn^.nn_node1_id)    .== (toNullable $ pgNodeId cId)
--   -- restrict -< (nng_listType nng)      .== (toNullable $ pgNgramsType Authors)
--  restrict -< (contacts^.node_typename) .== (toNullable $ pgInt4 $ nodeTypeId NodeContact)
--   -- let contact_id    = ifThenElse (isNull $ _node_id contacts) (toNullable $ pgInt4 0) (_node_id contacts)
  returnA  -< FacetPaired (n^.ns_id)
                          (n^.ns_date)
                          (n^.ns_hyperdata)
                          (pgInt4 0)
                          (contacts^.node_id, ngrams'^.ngrams_terms)
170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194

joinInCorpusWithContacts :: O.Query ( NodeSearchRead
                                    , ( NodeNodeReadNull
                                      , ( NodeNodeNgramsReadNull
                                        , ( NgramsReadNull
                                          , ( NodeNodeNgramsReadNull
                                            , NodeReadNull
                                            )
                                          )
                                        )
                                      )
                                    )
joinInCorpusWithContacts =
  leftJoin6
  queryNodeTable
  queryNodeNodeNgramsTable
  queryNgramsTable
  queryNodeNodeNgramsTable
  queryNodeNodeTable
  queryNodeSearchTable
  cond12
  cond23
  cond34
  cond45
  cond56
195
    where
196
      cond12 :: (NodeNodeNgramsRead, NodeRead) -> Column PGBool
197
      cond12 (nnng, n2) = n2^.node_id .== nnng^.nnng_node1_id
198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234

      cond23 :: (NgramsRead, (NodeNodeNgramsRead, NodeReadNull)) -> Column PGBool
      cond23 (ng2, (nnng2, _)) = nnng2^.nnng_ngrams_id .== ng2^.ngrams_id

      cond34 :: ( NodeNodeNgramsRead
                , ( NgramsRead
                  , ( NodeNodeNgramsReadNull
                    , NodeReadNull
                    )
                  )
                ) -> Column PGBool
      cond34 (nng, (ng, (_,_))) = ng^.ngrams_id .== nng^.nnng_ngrams_id
 
      cond45 :: ( NodeNodeRead
                , ( NodeNodeNgramsRead
                  , ( NgramsReadNull
                    , ( NodeNodeNgramsReadNull
                      , NodeReadNull
                      )
                    )
                  )
                ) -> Column PGBool
      cond45 (nn, (nng, (_,(_,_)))) = nng^.nnng_node1_id .== nn^.nn_node2_id
 
      cond56 :: ( NodeSearchRead
                , ( NodeNodeRead
                  , ( NodeNodeNgramsReadNull
                    , ( NgramsReadNull
                      , ( NodeNodeNgramsReadNull
                        , NodeReadNull
                        )
                      )
                    )
                  )
                ) -> Column PGBool
      cond56 (n, (nn, (_,(_,(_,_))))) = _ns_id n .== nn^.nn_node2_id
 
235

236
newtype TSQuery = UnsafeTSQuery [Text]
237

238
-- | TODO [""] -> panic "error"
239
toTSQuery :: [Text] -> TSQuery
240
toTSQuery txt = UnsafeTSQuery $ map stemIt txt
241

242

243 244 245 246 247 248 249 250 251 252 253 254 255 256
instance IsString TSQuery
  where
    fromString = UnsafeTSQuery . words . cs


instance ToField TSQuery
  where
    toField (UnsafeTSQuery xs)
      = Many  $ intersperse (Plain " && ")
              $ map (\q -> Many [ Plain "plainto_tsquery("
                                , Escape (cs q)
                                , Plain ")"
                                ]
                    ) xs
257

258
data Order    = Asc | Desc
259

260 261 262 263
instance ToField Order
  where
    toField Asc  = Plain "ASC"
    toField Desc = Plain "DESC"
264 265 266 267 268

-- TODO
-- FIX fav
-- ADD ngrams count
-- TESTS
269
textSearchQuery :: Query
270
textSearchQuery = "SELECT n.id, n.hyperdata->'publication_year'     \
271 272
\                   , n.hyperdata->'title'                          \
\                   , n.hyperdata->'source'                         \
273 274 275 276 277
\                   , n.hyperdata->'authors'                        \
\                   , COALESCE(nn.score,null)                       \
\                      FROM nodes n                                 \
\            LEFT JOIN nodes_nodes nn  ON nn.node2_id = n.id        \
\              WHERE                                                \
278
\                n.search @@ (?::tsquery)                           \
279 280
\                AND (n.parent_id = ? OR nn.node1_id = ?)           \
\                AND n.typename  = ?                                \
281
\                ORDER BY n.hyperdata -> 'publication_date' ?       \
282
\            offset ? limit ?;"
283

284 285 286
-- | Text Search Function for Master Corpus
-- TODO : text search for user corpus
-- Example:
287 288 289
-- textSearchTest :: ParentId -> TSQuery -> Cmd err [(Int, Value, Value, Value, Value, Maybe Int)]
-- textSearchTest pId q = textSearch q pId 5 0 Asc
textSearch :: TSQuery -> ParentId
290
           -> Limit -> Offset -> Order
291 292
           -> Cmd err [(Int,Value,Value,Value, Value, Maybe Int)]
textSearch q p l o ord = runPGSQuery textSearchQuery (q,p,p,typeId,ord,o,l)
293 294
  where
    typeId = nodeTypeId NodeDocument
295

296