Commit f3720b35 authored by Przemyslaw Kaminski's avatar Przemyslaw Kaminski

[postgres] attempt to fix the tsvector issue

However, the code doesn't work currently.
parent 29ee1972
Pipeline #1854 failed with stage
in 7 minutes and 37 seconds
...@@ -40,7 +40,7 @@ CREATE TABLE public.nodes ( ...@@ -40,7 +40,7 @@ CREATE TABLE public.nodes (
ALTER TABLE public.nodes OWNER TO gargantua; ALTER TABLE public.nodes OWNER TO gargantua;
ALTER TABLE nodes ALTER TABLE nodes
ADD COLUMN search_title tsvector ADD COLUMN search_title tsvector
GENERATED ALWAYS AS (to_tsvector('english', coalesce("name", '') || ' ' || coalesce("hyperdata"->>'abstract', ''))) STORED; GENERATED ALWAYS AS (to_tsvector('english', coalesce("hyperdata"->>'title', '') || ' ' || coalesce("hyperdata"->>'abstract', ''))) STORED;
-------------------------------------------------------------- --------------------------------------------------------------
-- | Ngrams -- | Ngrams
......
...@@ -43,11 +43,11 @@ searchDocInDatabase :: HasDBid NodeType ...@@ -43,11 +43,11 @@ searchDocInDatabase :: HasDBid NodeType
=> ParentId => ParentId
-> Text -> Text
-> Cmd err [(NodeId, HyperdataDocument)] -> Cmd err [(NodeId, HyperdataDocument)]
searchDocInDatabase p t = runOpaQuery (queryDocInDatabase p t) searchDocInDatabase _p t = runOpaQuery (queryDocInDatabase t)
where where
-- | Global search query where ParentId is Master Node Corpus Id -- | Global search query where ParentId is Master Node Corpus Id
queryDocInDatabase :: ParentId -> Text -> O.Query (Column PGInt4, Column PGJsonb) queryDocInDatabase :: Text -> O.Query (Column PGInt4, Column PGJsonb)
queryDocInDatabase _ q = proc () -> do queryDocInDatabase q = proc () -> do
row <- queryNodeSearchTable -< () row <- queryNodeSearchTable -< ()
restrict -< (_ns_search row) @@ (pgTSQuery (unpack q)) restrict -< (_ns_search row) @@ (pgTSQuery (unpack q))
restrict -< (_ns_typename row) .== (pgInt4 $ toDBid NodeDocument) restrict -< (_ns_typename row) .== (pgInt4 $ toDBid NodeDocument)
...@@ -213,66 +213,3 @@ queryContactViaDoc = ...@@ -213,66 +213,3 @@ queryContactViaDoc =
------------------------------------------------------------------------ ------------------------------------------------------------------------
newtype TSQuery = UnsafeTSQuery [Text]
-- | TODO [""] -> panic "error"
toTSQuery :: [Text] -> TSQuery
toTSQuery txt = UnsafeTSQuery $ map stemIt txt
instance IsString TSQuery
where
fromString = UnsafeTSQuery . words . cs
instance ToField TSQuery
where
toField (UnsafeTSQuery xs)
= Many $ intersperse (Plain " && ")
$ map (\q -> Many [ Plain "plainto_tsquery("
, Escape (cs q)
, Plain ")"
]
) xs
data Order = Asc | Desc
instance ToField Order
where
toField Asc = Plain "ASC"
toField Desc = Plain "DESC"
-- TODO
-- FIX fav
-- ADD ngrams count
-- TESTS
textSearchQuery :: Query
textSearchQuery = "SELECT n.id, n.hyperdata->'publication_year' \
\ , n.hyperdata->'title' \
\ , n.hyperdata->'source' \
\ , n.hyperdata->'authors' \
\ , COALESCE(nn.score,null) \
\ FROM nodes n \
\ LEFT JOIN nodes_nodes nn ON nn.node2_id = n.id \
\ WHERE \
\ n.search @@ (?::tsquery) \
\ AND (n.parent_id = ? OR nn.node1_id = ?) \
\ AND n.typename = ? \
\ ORDER BY n.hyperdata -> 'publication_date' ? \
\ offset ? limit ?;"
-- | Text Search Function for Master Corpus
-- TODO : text search for user corpus
-- Example:
-- textSearchTest :: ParentId -> TSQuery -> Cmd err [(Int, Value, Value, Value, Value, Maybe Int)]
-- textSearchTest pId q = textSearch q pId 5 0 Asc
textSearch :: HasDBid NodeType
=> TSQuery -> ParentId
-> Limit -> Offset -> Order
-> Cmd err [(Int,Value,Value,Value, Value, Maybe Int)]
textSearch q p l o ord = runPGSQuery textSearchQuery (q,p,p,typeId,ord,o,l)
where
typeId = toDBid NodeDocument
module Gargantext.Database.Action.TSQuery where
import Data.Aeson
import Data.List (intersperse)
import Data.Maybe
import Data.String (IsString(..))
import Data.Text (Text, words)
import Database.PostgreSQL.Simple (Query)
import Database.PostgreSQL.Simple.ToField
import Gargantext.Core
import Gargantext.Core.Types
import Gargantext.Database.Prelude (Cmd, runPGSQuery)
import Gargantext.Prelude
import Gargantext.Core.Text.Terms.Mono.Stem.En (stemIt)
newtype TSQuery = UnsafeTSQuery [Text]
-- | TODO [""] -> panic "error"
toTSQuery :: [Text] -> TSQuery
toTSQuery txt = UnsafeTSQuery $ map stemIt txt
instance IsString TSQuery
where
fromString = UnsafeTSQuery . words . cs
instance ToField TSQuery
where
toField (UnsafeTSQuery xs)
= Many $ intersperse (Plain " && ")
$ map (\q -> Many [ Plain "plainto_tsquery("
, Escape (cs q)
, Plain ")"
]
) xs
data Order = Asc | Desc
instance ToField Order
where
toField Asc = Plain "ASC"
toField Desc = Plain "DESC"
-- TODO
-- FIX fav
-- ADD ngrams count
-- TESTS
textSearchQuery :: Query
textSearchQuery = "SELECT n.id, n.hyperdata->'publication_year' \
\ , n.hyperdata->'title' \
\ , n.hyperdata->'source' \
\ , n.hyperdata->'authors' \
\ , COALESCE(nn.score,null) \
\ FROM nodes n \
\ LEFT JOIN nodes_nodes nn ON nn.node2_id = n.id \
\ WHERE \
\ n.search @@ (?::tsquery) \
\ AND (n.parent_id = ? OR nn.node1_id = ?) \
\ AND n.typename = ? \
\ ORDER BY n.hyperdata -> 'publication_date' ? \
\ offset ? limit ?;"
-- | Text Search Function for Master Corpus
-- TODO : text search for user corpus
-- Example:
-- textSearchTest :: ParentId -> TSQuery -> Cmd err [(Int, Value, Value, Value, Value, Maybe Int)]
-- textSearchTest pId q = textSearch q pId 5 0 Asc
textSearch :: HasDBid NodeType
=> TSQuery -> ParentId
-> Limit -> Offset -> Order
-> Cmd err [(Int,Value,Value,Value, Value, Maybe Int)]
textSearch q p l o ord = runPGSQuery textSearchQuery (q,p,p,typeId,ord,o,l)
where
typeId = toDBid NodeDocument
...@@ -82,18 +82,26 @@ instance (Typeable hyperdata, ToSchema hyperdata) => ...@@ -82,18 +82,26 @@ instance (Typeable hyperdata, ToSchema hyperdata) =>
declareNamedSchema = wellNamedSchema "_node_" declareNamedSchema = wellNamedSchema "_node_"
instance (Typeable hyperdata, ToSchema hyperdata) => instance (Typeable hyperdata, ToSchema hyperdata) =>
ToSchema (NodePolySearch NodeId NodeTypeId ToSchema (NodePolySearch NodeId
(Maybe UserId) NodeTypeId
ParentId NodeName (Maybe UserId)
UTCTime hyperdata (Maybe TSVector) ParentId
NodeName
UTCTime
hyperdata
(Maybe TSVector)
) where ) where
declareNamedSchema = wellNamedSchema "_ns_" declareNamedSchema = wellNamedSchema "_ns_"
instance (Typeable hyperdata, ToSchema hyperdata) => instance (Typeable hyperdata, ToSchema hyperdata) =>
ToSchema (NodePolySearch NodeId NodeTypeId ToSchema (NodePolySearch NodeId
UserId NodeTypeId
(Maybe ParentId) NodeName UserId
UTCTime hyperdata (Maybe TSVector) (Maybe ParentId)
NodeName
UTCTime
hyperdata
(Maybe TSVector)
) where ) where
declareNamedSchema = wellNamedSchema "_ns_" declareNamedSchema = wellNamedSchema "_ns_"
...@@ -115,12 +123,25 @@ instance (Arbitrary hyperdata ...@@ -115,12 +123,25 @@ instance (Arbitrary hyperdata
,Arbitrary toDBid ,Arbitrary toDBid
,Arbitrary userId ,Arbitrary userId
,Arbitrary nodeParentId ,Arbitrary nodeParentId
) => Arbitrary (NodePolySearch nodeId toDBid userId nodeParentId ) => Arbitrary (NodePolySearch nodeId
NodeName UTCTime hyperdata (Maybe TSVector)) where toDBid
userId
nodeParentId
NodeName
UTCTime
hyperdata
(Maybe TSVector)
) where
--arbitrary = Node 1 1 (Just 1) 1 "name" (jour 2018 01 01) (arbitrary) (Just "") --arbitrary = Node 1 1 (Just 1) 1 "name" (jour 2018 01 01) (arbitrary) (Just "")
arbitrary = NodeSearch <$> arbitrary <*> arbitrary <*> arbitrary arbitrary = NodeSearch <$> arbitrary
<*> arbitrary <*> arbitrary <*> arbitrary <*> arbitrary
<*> arbitrary <*> arbitrary <*> arbitrary
<*> arbitrary
<*> arbitrary
<*> arbitrary
<*> arbitrary
<*> arbitrary
<*> arbitrary
------------------------------------------------------------------------ ------------------------------------------------------------------------
pgNodeId :: NodeId -> O.Column O.PGInt4 pgNodeId :: NodeId -> O.Column O.PGInt4
......
...@@ -44,8 +44,8 @@ import Control.Arrow (returnA) ...@@ -44,8 +44,8 @@ import Control.Arrow (returnA)
import Control.Lens ((^.)) import Control.Lens ((^.))
import Data.Aeson (FromJSON, ToJSON) import Data.Aeson (FromJSON, ToJSON)
import Data.Aeson.TH (deriveJSON) import Data.Aeson.TH (deriveJSON)
import qualified Database.PostgreSQL.Simple as DPS --import qualified Database.PostgreSQL.Simple as DPS
import Database.PostgreSQL.Simple.SqlQQ (sql) --import Database.PostgreSQL.Simple.SqlQQ (sql)
import Data.Profunctor.Product.TH (makeAdaptorAndInstance) import Data.Profunctor.Product.TH (makeAdaptorAndInstance)
import Data.Swagger import Data.Swagger
import qualified Data.Text as T import qualified Data.Text as T
...@@ -61,14 +61,17 @@ import qualified Opaleye.Internal.Unpackspec() ...@@ -61,14 +61,17 @@ import qualified Opaleye.Internal.Unpackspec()
import Gargantext.Core import Gargantext.Core
import Gargantext.Core.Types import Gargantext.Core.Types
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger, wellNamedSchema) import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger, wellNamedSchema)
import Gargantext.Database.Action.TSQuery (toTSQuery)
import Gargantext.Database.Admin.Types.Hyperdata import Gargantext.Database.Admin.Types.Hyperdata
import Gargantext.Database.Query.Filter import Gargantext.Database.Query.Filter
import Gargantext.Database.Query.Join (leftJoin5) import Gargantext.Database.Query.Join (leftJoin5)
import Gargantext.Database.Query.Table.Ngrams import Gargantext.Database.Query.Table.Ngrams
import Gargantext.Database.Query.Table.Node (queryNodeSearchTable)
import Gargantext.Database.Query.Table.NodeNode import Gargantext.Database.Query.Table.NodeNode
import Gargantext.Database.Query.Table.NodeNodeNgrams import Gargantext.Database.Query.Table.NodeNodeNgrams
import Gargantext.Database.Prelude import Gargantext.Database.Prelude
import Gargantext.Database.Schema.Node import Gargantext.Database.Schema.Node
import Gargantext.Prelude (printDebug)
--import qualified Opaleye.Internal.Column as C --import qualified Opaleye.Internal.Column as C
--import qualified Opaleye.Internal.HaskellDB.PrimQuery as HPQ --import qualified Opaleye.Internal.HaskellDB.PrimQuery as HPQ
...@@ -307,30 +310,31 @@ runViewDocuments :: HasDBid NodeType ...@@ -307,30 +310,31 @@ runViewDocuments :: HasDBid NodeType
-> Maybe OrderBy -> Maybe OrderBy
-> Maybe Text -> Maybe Text
-> Cmd err [FacetDoc] -> Cmd err [FacetDoc]
runViewDocuments cId t _o _l _order query = do runViewDocuments cId t o l order query = do
docs <- runPGSQuery viewDocuments' -- docs <- runPGSQuery viewDocuments'
( cId -- ( cId
, ntId -- , ntId
, (if t then 0 else 1) :: Int -- , (if t then 0 else 1) :: Int
, fromMaybe "" query -- , fromMaybe "" query
, fromMaybe "" query) -- , fromMaybe "" query)
pure $ (\(id, date, name', hyperdata, category, score) -> FacetDoc id date name' hyperdata category score score) <$> docs -- pure $ (\(id, date, name', hyperdata, category, score) -> FacetDoc id date name' hyperdata category score score) <$> docs
-- runOpaQuery $ filterWith o l order sqlQuery printDebug "[runViewDocuments] sqlQuery" $ showSql sqlQuery
runOpaQuery $ filterWith o l order sqlQuery
where where
ntId :: Int ntId :: Int
ntId = toDBid NodeDocument ntId = toDBid NodeDocument
-- sqlQuery = viewDocuments cId t ntId query sqlQuery = viewDocuments cId t ntId query
viewDocuments' :: DPS.Query -- viewDocuments' :: DPS.Query
viewDocuments' = [sql| -- viewDocuments' = [sql|
SELECT n.id, n.date, n.name, n.hyperdata, nn.category, nn.score -- SELECT n.id, n.date, n.name, n.hyperdata, nn.category, nn.score
FROM nodes AS n -- FROM nodes AS n
JOIN nodes_nodes AS nn -- JOIN nodes_nodes AS nn
ON n.id = nn.node2_id -- ON n.id = nn.node2_id
WHERE nn.node1_id = ? -- corpusId -- WHERE nn.node1_id = ? -- corpusId
AND n.typename = ? -- NodeTypeId -- AND n.typename = ? -- NodeTypeId
AND nn.category = ? -- isTrash or not -- AND nn.category = ? -- isTrash or not
AND (n.search_title @@ to_tsquery(?) OR ? = '') -- query with an OR hack for empty to_tsquery('') results -- AND (n.search_title @@ to_tsquery(?) OR ? = '') -- query with an OR hack for empty to_tsquery('') results
|] -- |]
runCountDocuments :: HasDBid NodeType => CorpusId -> IsTrash -> Maybe Text -> Cmd err Int runCountDocuments :: HasDBid NodeType => CorpusId -> IsTrash -> Maybe Text -> Cmd err Int
runCountDocuments cId t mQuery = do runCountDocuments cId t mQuery = do
...@@ -345,22 +349,27 @@ viewDocuments :: CorpusId ...@@ -345,22 +349,27 @@ viewDocuments :: CorpusId
-> Maybe Text -> Maybe Text
-> Query FacetDocRead -> Query FacetDocRead
viewDocuments cId t ntId mQuery = proc () -> do viewDocuments cId t ntId mQuery = proc () -> do
n <- queryNodeTable -< () --n <- queryNodeTable -< ()
n <- queryNodeSearchTable -< ()
nn <- queryNodeNodeTable -< () nn <- queryNodeNodeTable -< ()
restrict -< n^.node_id .== nn^.nn_node2_id restrict -< n^.ns_id .== nn^.nn_node2_id
restrict -< nn^.nn_node1_id .== (pgNodeId cId) restrict -< nn^.nn_node1_id .== (pgNodeId cId)
restrict -< n^.node_typename .== (pgInt4 ntId) restrict -< n^.ns_typename .== (pgInt4 ntId)
restrict -< if t then nn^.nn_category .== (pgInt4 0) restrict -< if t then nn^.nn_category .== (pgInt4 0)
else nn^.nn_category .>= (pgInt4 1) else nn^.nn_category .>= (pgInt4 1)
let query = (fromMaybe "" mQuery) let query = (fromMaybe "" mQuery)
iLikeQuery = T.intercalate "" ["%", query, "%"] -- iLikeQuery = T.intercalate "" ["%", query, "%"]
restrict -< (n^.node_name) `ilike` (pgStrictText iLikeQuery) -- restrict -< (n^.node_name) `ilike` (pgStrictText iLikeQuery)
restrict -< if query == ""
then pgBool True
else (n^.ns_search_title) @@ (pgTSQuery (T.unpack query))
--else (n^.ns_search_title) @@ (toTSQuery [query])
returnA -< FacetDoc (_node_id n) returnA -< FacetDoc (_ns_id n)
(_node_date n) (_ns_date n)
(_node_name n) (_ns_name n)
(_node_hyperdata n) (_ns_hyperdata n)
(toNullable $ nn^.nn_category) (toNullable $ nn^.nn_category)
(toNullable $ nn^.nn_score) (toNullable $ nn^.nn_score)
(toNullable $ nn^.nn_score) (toNullable $ nn^.nn_score)
......
...@@ -144,16 +144,17 @@ data NodePolySearch id ...@@ -144,16 +144,17 @@ data NodePolySearch id
date date
hyperdata hyperdata
search = search =
NodeSearch { _ns_id :: id NodeSearch { _ns_id :: id
, _ns_typename :: typename , _ns_typename :: typename
, _ns_user_id :: user_id , _ns_user_id :: user_id
-- , nodeUniqId :: shaId -- , nodeUniqId :: shaId
, _ns_parent_id :: parent_id , _ns_parent_id :: parent_id
, _ns_name :: name , _ns_name :: name
, _ns_date :: date , _ns_date :: date
, _ns_hyperdata :: hyperdata , _ns_hyperdata :: hyperdata
, _ns_search :: search , _ns_search :: search
, _ns_search_title :: search
} deriving (Show, Generic) } deriving (Show, Generic)
$(makeAdaptorAndInstance "pNodeSearch" ''NodePolySearch) $(makeAdaptorAndInstance "pNodeSearch" ''NodePolySearch)
...@@ -163,16 +164,17 @@ $(makeLenses ''NodePolySearch) ...@@ -163,16 +164,17 @@ $(makeLenses ''NodePolySearch)
nodeTableSearch :: Table NodeSearchWrite NodeSearchRead nodeTableSearch :: Table NodeSearchWrite NodeSearchRead
nodeTableSearch = Table "nodes" ( pNodeSearch nodeTableSearch = Table "nodes" ( pNodeSearch
NodeSearch { _ns_id = optional "id" NodeSearch { _ns_id = optional "id"
, _ns_typename = required "typename" , _ns_typename = required "typename"
, _ns_user_id = required "user_id" , _ns_user_id = required "user_id"
, _ns_parent_id = required "parent_id" , _ns_parent_id = required "parent_id"
, _ns_name = required "name" , _ns_name = required "name"
, _ns_date = optional "date" , _ns_date = optional "date"
, _ns_hyperdata = required "hyperdata" , _ns_hyperdata = required "hyperdata"
, _ns_search = optional "search" , _ns_search = optional "search"
, _ns_search_title = optional "search_title"
} }
) )
------------------------------------------------------------------------ ------------------------------------------------------------------------
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment