Commit f3720b35 authored by Przemyslaw Kaminski's avatar Przemyslaw Kaminski

[postgres] attempt to fix the tsvector issue

However, the code doesn't work currently.
parent 29ee1972
Pipeline #1854 failed with stage
in 7 minutes and 37 seconds
......@@ -40,7 +40,7 @@ CREATE TABLE public.nodes (
ALTER TABLE public.nodes OWNER TO gargantua;
ALTER TABLE nodes
ADD COLUMN search_title tsvector
GENERATED ALWAYS AS (to_tsvector('english', coalesce("name", '') || ' ' || coalesce("hyperdata"->>'abstract', ''))) STORED;
GENERATED ALWAYS AS (to_tsvector('english', coalesce("hyperdata"->>'title', '') || ' ' || coalesce("hyperdata"->>'abstract', ''))) STORED;
--------------------------------------------------------------
-- | Ngrams
......
......@@ -43,11 +43,11 @@ searchDocInDatabase :: HasDBid NodeType
=> ParentId
-> Text
-> Cmd err [(NodeId, HyperdataDocument)]
searchDocInDatabase p t = runOpaQuery (queryDocInDatabase p t)
searchDocInDatabase _p t = runOpaQuery (queryDocInDatabase t)
where
-- | Global search query where ParentId is Master Node Corpus Id
queryDocInDatabase :: ParentId -> Text -> O.Query (Column PGInt4, Column PGJsonb)
queryDocInDatabase _ q = proc () -> do
queryDocInDatabase :: Text -> O.Query (Column PGInt4, Column PGJsonb)
queryDocInDatabase q = proc () -> do
row <- queryNodeSearchTable -< ()
restrict -< (_ns_search row) @@ (pgTSQuery (unpack q))
restrict -< (_ns_typename row) .== (pgInt4 $ toDBid NodeDocument)
......@@ -213,66 +213,3 @@ queryContactViaDoc =
------------------------------------------------------------------------
newtype TSQuery = UnsafeTSQuery [Text]
-- | TODO [""] -> panic "error"
toTSQuery :: [Text] -> TSQuery
toTSQuery txt = UnsafeTSQuery $ map stemIt txt
instance IsString TSQuery
where
fromString = UnsafeTSQuery . words . cs
instance ToField TSQuery
where
toField (UnsafeTSQuery xs)
= Many $ intersperse (Plain " && ")
$ map (\q -> Many [ Plain "plainto_tsquery("
, Escape (cs q)
, Plain ")"
]
) xs
data Order = Asc | Desc
instance ToField Order
where
toField Asc = Plain "ASC"
toField Desc = Plain "DESC"
-- TODO
-- FIX fav
-- ADD ngrams count
-- TESTS
textSearchQuery :: Query
textSearchQuery = "SELECT n.id, n.hyperdata->'publication_year' \
\ , n.hyperdata->'title' \
\ , n.hyperdata->'source' \
\ , n.hyperdata->'authors' \
\ , COALESCE(nn.score,null) \
\ FROM nodes n \
\ LEFT JOIN nodes_nodes nn ON nn.node2_id = n.id \
\ WHERE \
\ n.search @@ (?::tsquery) \
\ AND (n.parent_id = ? OR nn.node1_id = ?) \
\ AND n.typename = ? \
\ ORDER BY n.hyperdata -> 'publication_date' ? \
\ offset ? limit ?;"
-- | Text Search Function for Master Corpus
-- TODO : text search for user corpus
-- Example:
-- textSearchTest :: ParentId -> TSQuery -> Cmd err [(Int, Value, Value, Value, Value, Maybe Int)]
-- textSearchTest pId q = textSearch q pId 5 0 Asc
textSearch :: HasDBid NodeType
=> TSQuery -> ParentId
-> Limit -> Offset -> Order
-> Cmd err [(Int,Value,Value,Value, Value, Maybe Int)]
textSearch q p l o ord = runPGSQuery textSearchQuery (q,p,p,typeId,ord,o,l)
where
typeId = toDBid NodeDocument
module Gargantext.Database.Action.TSQuery where
import Data.Aeson
import Data.List (intersperse)
import Data.Maybe
import Data.String (IsString(..))
import Data.Text (Text, words)
import Database.PostgreSQL.Simple (Query)
import Database.PostgreSQL.Simple.ToField
import Gargantext.Core
import Gargantext.Core.Types
import Gargantext.Database.Prelude (Cmd, runPGSQuery)
import Gargantext.Prelude
import Gargantext.Core.Text.Terms.Mono.Stem.En (stemIt)
newtype TSQuery = UnsafeTSQuery [Text]
-- | TODO [""] -> panic "error"
toTSQuery :: [Text] -> TSQuery
toTSQuery txt = UnsafeTSQuery $ map stemIt txt
instance IsString TSQuery
where
fromString = UnsafeTSQuery . words . cs
instance ToField TSQuery
where
toField (UnsafeTSQuery xs)
= Many $ intersperse (Plain " && ")
$ map (\q -> Many [ Plain "plainto_tsquery("
, Escape (cs q)
, Plain ")"
]
) xs
data Order = Asc | Desc
instance ToField Order
where
toField Asc = Plain "ASC"
toField Desc = Plain "DESC"
-- TODO
-- FIX fav
-- ADD ngrams count
-- TESTS
textSearchQuery :: Query
textSearchQuery = "SELECT n.id, n.hyperdata->'publication_year' \
\ , n.hyperdata->'title' \
\ , n.hyperdata->'source' \
\ , n.hyperdata->'authors' \
\ , COALESCE(nn.score,null) \
\ FROM nodes n \
\ LEFT JOIN nodes_nodes nn ON nn.node2_id = n.id \
\ WHERE \
\ n.search @@ (?::tsquery) \
\ AND (n.parent_id = ? OR nn.node1_id = ?) \
\ AND n.typename = ? \
\ ORDER BY n.hyperdata -> 'publication_date' ? \
\ offset ? limit ?;"
-- | Text Search Function for Master Corpus
-- TODO : text search for user corpus
-- Example:
-- textSearchTest :: ParentId -> TSQuery -> Cmd err [(Int, Value, Value, Value, Value, Maybe Int)]
-- textSearchTest pId q = textSearch q pId 5 0 Asc
textSearch :: HasDBid NodeType
=> TSQuery -> ParentId
-> Limit -> Offset -> Order
-> Cmd err [(Int,Value,Value,Value, Value, Maybe Int)]
textSearch q p l o ord = runPGSQuery textSearchQuery (q,p,p,typeId,ord,o,l)
where
typeId = toDBid NodeDocument
......@@ -82,18 +82,26 @@ instance (Typeable hyperdata, ToSchema hyperdata) =>
declareNamedSchema = wellNamedSchema "_node_"
instance (Typeable hyperdata, ToSchema hyperdata) =>
ToSchema (NodePolySearch NodeId NodeTypeId
ToSchema (NodePolySearch NodeId
NodeTypeId
(Maybe UserId)
ParentId NodeName
UTCTime hyperdata (Maybe TSVector)
ParentId
NodeName
UTCTime
hyperdata
(Maybe TSVector)
) where
declareNamedSchema = wellNamedSchema "_ns_"
instance (Typeable hyperdata, ToSchema hyperdata) =>
ToSchema (NodePolySearch NodeId NodeTypeId
ToSchema (NodePolySearch NodeId
NodeTypeId
UserId
(Maybe ParentId) NodeName
UTCTime hyperdata (Maybe TSVector)
(Maybe ParentId)
NodeName
UTCTime
hyperdata
(Maybe TSVector)
) where
declareNamedSchema = wellNamedSchema "_ns_"
......@@ -115,12 +123,25 @@ instance (Arbitrary hyperdata
,Arbitrary toDBid
,Arbitrary userId
,Arbitrary nodeParentId
) => Arbitrary (NodePolySearch nodeId toDBid userId nodeParentId
NodeName UTCTime hyperdata (Maybe TSVector)) where
) => Arbitrary (NodePolySearch nodeId
toDBid
userId
nodeParentId
NodeName
UTCTime
hyperdata
(Maybe TSVector)
) where
--arbitrary = Node 1 1 (Just 1) 1 "name" (jour 2018 01 01) (arbitrary) (Just "")
arbitrary = NodeSearch <$> arbitrary <*> arbitrary <*> arbitrary
<*> arbitrary <*> arbitrary <*> arbitrary
<*> arbitrary <*> arbitrary
arbitrary = NodeSearch <$> arbitrary
<*> arbitrary
<*> arbitrary
<*> arbitrary
<*> arbitrary
<*> arbitrary
<*> arbitrary
<*> arbitrary
<*> arbitrary
------------------------------------------------------------------------
pgNodeId :: NodeId -> O.Column O.PGInt4
......
......@@ -44,8 +44,8 @@ import Control.Arrow (returnA)
import Control.Lens ((^.))
import Data.Aeson (FromJSON, ToJSON)
import Data.Aeson.TH (deriveJSON)
import qualified Database.PostgreSQL.Simple as DPS
import Database.PostgreSQL.Simple.SqlQQ (sql)
--import qualified Database.PostgreSQL.Simple as DPS
--import Database.PostgreSQL.Simple.SqlQQ (sql)
import Data.Profunctor.Product.TH (makeAdaptorAndInstance)
import Data.Swagger
import qualified Data.Text as T
......@@ -61,14 +61,17 @@ import qualified Opaleye.Internal.Unpackspec()
import Gargantext.Core
import Gargantext.Core.Types
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger, wellNamedSchema)
import Gargantext.Database.Action.TSQuery (toTSQuery)
import Gargantext.Database.Admin.Types.Hyperdata
import Gargantext.Database.Query.Filter
import Gargantext.Database.Query.Join (leftJoin5)
import Gargantext.Database.Query.Table.Ngrams
import Gargantext.Database.Query.Table.Node (queryNodeSearchTable)
import Gargantext.Database.Query.Table.NodeNode
import Gargantext.Database.Query.Table.NodeNodeNgrams
import Gargantext.Database.Prelude
import Gargantext.Database.Schema.Node
import Gargantext.Prelude (printDebug)
--import qualified Opaleye.Internal.Column as C
--import qualified Opaleye.Internal.HaskellDB.PrimQuery as HPQ
......@@ -307,30 +310,31 @@ runViewDocuments :: HasDBid NodeType
-> Maybe OrderBy
-> Maybe Text
-> Cmd err [FacetDoc]
runViewDocuments cId t _o _l _order query = do
docs <- runPGSQuery viewDocuments'
( cId
, ntId
, (if t then 0 else 1) :: Int
, fromMaybe "" query
, fromMaybe "" query)
pure $ (\(id, date, name', hyperdata, category, score) -> FacetDoc id date name' hyperdata category score score) <$> docs
-- runOpaQuery $ filterWith o l order sqlQuery
runViewDocuments cId t o l order query = do
-- docs <- runPGSQuery viewDocuments'
-- ( cId
-- , ntId
-- , (if t then 0 else 1) :: Int
-- , fromMaybe "" query
-- , fromMaybe "" query)
-- pure $ (\(id, date, name', hyperdata, category, score) -> FacetDoc id date name' hyperdata category score score) <$> docs
printDebug "[runViewDocuments] sqlQuery" $ showSql sqlQuery
runOpaQuery $ filterWith o l order sqlQuery
where
ntId :: Int
ntId = toDBid NodeDocument
-- sqlQuery = viewDocuments cId t ntId query
viewDocuments' :: DPS.Query
viewDocuments' = [sql|
SELECT n.id, n.date, n.name, n.hyperdata, nn.category, nn.score
FROM nodes AS n
JOIN nodes_nodes AS nn
ON n.id = nn.node2_id
WHERE nn.node1_id = ? -- corpusId
AND n.typename = ? -- NodeTypeId
AND nn.category = ? -- isTrash or not
AND (n.search_title @@ to_tsquery(?) OR ? = '') -- query with an OR hack for empty to_tsquery('') results
|]
sqlQuery = viewDocuments cId t ntId query
-- viewDocuments' :: DPS.Query
-- viewDocuments' = [sql|
-- SELECT n.id, n.date, n.name, n.hyperdata, nn.category, nn.score
-- FROM nodes AS n
-- JOIN nodes_nodes AS nn
-- ON n.id = nn.node2_id
-- WHERE nn.node1_id = ? -- corpusId
-- AND n.typename = ? -- NodeTypeId
-- AND nn.category = ? -- isTrash or not
-- AND (n.search_title @@ to_tsquery(?) OR ? = '') -- query with an OR hack for empty to_tsquery('') results
-- |]
runCountDocuments :: HasDBid NodeType => CorpusId -> IsTrash -> Maybe Text -> Cmd err Int
runCountDocuments cId t mQuery = do
......@@ -345,22 +349,27 @@ viewDocuments :: CorpusId
-> Maybe Text
-> Query FacetDocRead
viewDocuments cId t ntId mQuery = proc () -> do
n <- queryNodeTable -< ()
--n <- queryNodeTable -< ()
n <- queryNodeSearchTable -< ()
nn <- queryNodeNodeTable -< ()
restrict -< n^.node_id .== nn^.nn_node2_id
restrict -< n^.ns_id .== nn^.nn_node2_id
restrict -< nn^.nn_node1_id .== (pgNodeId cId)
restrict -< n^.node_typename .== (pgInt4 ntId)
restrict -< n^.ns_typename .== (pgInt4 ntId)
restrict -< if t then nn^.nn_category .== (pgInt4 0)
else nn^.nn_category .>= (pgInt4 1)
let query = (fromMaybe "" mQuery)
iLikeQuery = T.intercalate "" ["%", query, "%"]
restrict -< (n^.node_name) `ilike` (pgStrictText iLikeQuery)
returnA -< FacetDoc (_node_id n)
(_node_date n)
(_node_name n)
(_node_hyperdata n)
-- iLikeQuery = T.intercalate "" ["%", query, "%"]
-- restrict -< (n^.node_name) `ilike` (pgStrictText iLikeQuery)
restrict -< if query == ""
then pgBool True
else (n^.ns_search_title) @@ (pgTSQuery (T.unpack query))
--else (n^.ns_search_title) @@ (toTSQuery [query])
returnA -< FacetDoc (_ns_id n)
(_ns_date n)
(_ns_name n)
(_ns_hyperdata n)
(toNullable $ nn^.nn_category)
(toNullable $ nn^.nn_score)
(toNullable $ nn^.nn_score)
......
......@@ -154,6 +154,7 @@ data NodePolySearch id
, _ns_hyperdata :: hyperdata
, _ns_search :: search
, _ns_search_title :: search
} deriving (Show, Generic)
$(makeAdaptorAndInstance "pNodeSearch" ''NodePolySearch)
......@@ -173,6 +174,7 @@ nodeTableSearch = Table "nodes" ( pNodeSearch
, _ns_hyperdata = required "hyperdata"
, _ns_search = optional "search"
, _ns_search_title = optional "search_title"
}
)
------------------------------------------------------------------------
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment