NodeNgramsNgrams.hs 6.55 KB
{-|
Module      : Gargantext.Database.Schema.NodeNgramsNgrams
Description : 
Copyright   : (c) CNRS, 2017-Present
License     : AGPL + CECILL v3
Maintainer  : team@gargantext.org
Stability   : experimental
Portability : POSIX

NodeNgramsNgrams table is used to group Ngrams
- NodeId :: List Id
- NgramId_1, NgramId_2 where all NgramId_2 will be added to NgramId_1
- weight: probability of the relation (TODO, fixed to 1 for simple stemming)

Next Step benchmark:
- recursive queries of postgres
- group with: https://en.wikipedia.org/wiki/Nested_set_model

-}

{-# LANGUAGE Arrows                 #-}
{-# LANGUAGE FlexibleInstances      #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE MultiParamTypeClasses  #-}
{-# LANGUAGE NoImplicitPrelude      #-}
{-# LANGUAGE OverloadedStrings      #-}
{-# LANGUAGE QuasiQuotes            #-}
{-# LANGUAGE RankNTypes             #-}
{-# LANGUAGE TemplateHaskell        #-}
{-# OPTIONS_GHC -fno-warn-orphans   #-}

module Gargantext.Database.Schema.NodeNgramsNgrams
  where

import Control.Lens (view)
import Control.Lens.TH (makeLensesWith, abbreviatedFields)
import Control.Monad (void)
import Control.Monad.IO.Class (liftIO)
import Data.ByteString (ByteString)
import Data.Text (Text)
import Data.Maybe (Maybe)
import Data.Profunctor.Product.TH (makeAdaptorAndInstance)
import Database.PostgreSQL.Simple.SqlQQ (sql)
import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
import Debug.Trace (trace)
import Gargantext.Database.Utils (Cmd, runOpaQuery, execPGSQuery, connection, formatPGSQuery)
import Gargantext.Database.Types.Node (ListId)
import Gargantext.Database.Schema.Node (pgNodeId)
import Gargantext.Prelude
import Opaleye
import qualified Database.PostgreSQL.Simple as PGS

data NodeNgramsNgramsPoly node_id ngram1_id ngram2_id weight =
  NodeNgramsNgrams { _nng_NodeId   :: node_id
                   , _nng_Ngram1Id :: ngram1_id
                   , _nng_Ngram2Id :: ngram2_id
                   , _nng_Weight   :: weight
                   } deriving (Show)

type NodeNgramsNgramsWrite =
  NodeNgramsNgramsPoly (Column PGInt4          )
                     (Column PGInt4          )
                     (Column PGInt4          )
                     (Maybe (Column PGFloat8))

type NodeNgramsNgramsRead  =
  NodeNgramsNgramsPoly (Column PGInt4  )
                     (Column PGInt4  )
                     (Column PGInt4  )
                     (Column PGFloat8)

type NodeNgramsNgrams =
  NodeNgramsNgramsPoly ListId
                     Int
                     Int
                    (Maybe Double)

$(makeAdaptorAndInstance "pNodeNgramsNgrams"
                         ''NodeNgramsNgramsPoly)
$(makeLensesWith abbreviatedFields
                         ''NodeNgramsNgramsPoly)


nodeNgramsNgramsTable :: Table NodeNgramsNgramsWrite NodeNgramsNgramsRead
nodeNgramsNgramsTable  =
  Table "nodes_ngrams_ngrams"
       ( pNodeNgramsNgrams NodeNgramsNgrams
                       { _nng_NodeId   = required "node_id"
                       , _nng_Ngram1Id = required "ngram1_id"
                       , _nng_Ngram2Id = required "ngram2_id"
                       , _nng_Weight   = optional "weight"
                       }
       )

queryNodeNgramsNgramsTable :: Query NodeNgramsNgramsRead
queryNodeNgramsNgramsTable = queryTable nodeNgramsNgramsTable

-- | Select NodeNgramsNgrams
-- TODO not optimized (get all ngrams without filters)
nodeNgramsNgrams :: Cmd err [NodeNgramsNgrams]
nodeNgramsNgrams = runOpaQuery queryNodeNgramsNgramsTable

instance QueryRunnerColumnDefault PGInt4 (Maybe Int) where
    queryRunnerColumnDefault = fieldQueryRunnerColumn

instance QueryRunnerColumnDefault PGFloat8 (Maybe Double) where
    queryRunnerColumnDefault = fieldQueryRunnerColumn


-- TODO: Add option on conflict
insertNodeNgramsNgramsNew :: [NodeNgramsNgrams] -> Cmd err Int
insertNodeNgramsNgramsNew = insertNodeNgramsNgramsW
                 . map (\(NodeNgramsNgrams n ng1 ng2 maybeWeight) ->
                          NodeNgramsNgrams (pgNodeId n  )
                                           (pgInt4 ng1)
                                           (pgInt4 ng2)
                                           (pgDouble <$> maybeWeight)
                        )

insertNodeNgramsNgramsW :: [NodeNgramsNgramsWrite] -> Cmd err Int
insertNodeNgramsNgramsW ns = do
  c <- view connection
  liftIO $ fromIntegral <$> runInsertMany c nodeNgramsNgramsTable ns

------------------------------------------------------------------------
data Action   = Del | Add

type NgramsParent = Text
type NgramsChild  = Text


ngramsGroup :: Action -> ListId -> [(NgramsParent, NgramsChild, Maybe Double)]
             -> Cmd err ()
ngramsGroup _ _ [] = pure ()
ngramsGroup action listId ngs = trace (show ngs) $ runNodeNgramsNgrams q listId ngs
  where
    q = case action of
          Del -> queryDelNodeNgramsNgrams
          Add -> queryInsertNodeNgramsNgrams


runNodeNgramsNgrams :: PGS.Query -> ListId -> [(NgramsParent, NgramsChild, Maybe Double)] -> Cmd err ()
runNodeNgramsNgrams q listId ngs = void $ execPGSQuery q (listId, Values fields ngs')
  where
    ngs'   = map (\(ng1,ng2,w) -> (ng1,ng2,maybe 0 identity w)) ngs
    fields = map (\t -> QualifiedIdentifier Nothing t)
                 ["int4","text","text","float8"]

runNodeNgramsNgramsDebug :: PGS.Query -> ListId -> [(NgramsParent, NgramsChild, Maybe Double)] -> Cmd err ByteString
runNodeNgramsNgramsDebug q listId ngs = formatPGSQuery q (listId, Values fields ngs')
  where
    ngs'   = map (\(ng1,ng2,w) -> (ng1,ng2,maybe 0 identity w)) ngs
    fields = map (\t -> QualifiedIdentifier Nothing t)
                 ["int4","text","text","float8"]


--------------------------------------------------------------------
-- TODO: on conflict update weight
queryInsertNodeNgramsNgrams :: PGS.Query
queryInsertNodeNgramsNgrams = [sql|
    WITH nId AS ?
    WITH input_rows(ng1,ng2,w) AS (?)
    INSERT INTO nodes_ngrams_ngrams (node_id,ngram1_id,ngram2_id,weight)
    SELECT nId,ngrams1.id,ngrams2.id,w FROM input_rows
    JOIN ngrams ngrams1 ON ngrams1.terms = ng1
    JOIN ngrams ngrams2 ON ngrams2.terms = ng2
    ON CONFLICT (node_id,ngram1_id,ngram2_id) DO NOTHING -- unique index created here
           |]

queryDelNodeNgramsNgrams :: PGS.Query
queryDelNodeNgramsNgrams = [sql|
    WITH nId AS ?
    WITH input(ng1,ng2,w) AS (?)
    DELETE FROM nodes_ngrams_ngrams AS nnn
    USING ngrams AS ngrams1,
          ngrams AS ngrams2,
          input  AS input
    WHERE
          ngrams1.terms = input.ng1
      AND ngrams2.terms = input.ng2
      AND nnn.node_id   = input.nId
      AND nnn.ngram1_id = ngrams1.id
      AND nnn.ngram2_id = ngrams2.id
       ;
           |]