• Alfredo Di Napoli's avatar
    Add unified stemming interface · 2941ac6e
    Alfredo Di Napoli authored
    This commit refactors ever so slightly the '.Stem' modules to create a
    unified interface; now Gargantext.Core.Text.Terms.Mono.Stem exports a
    single 'stem' function which can be used with different
    `StemmingAlgorithm`s, and now the other algorithms' functions have been
    shielded behind internal modules.
    
    This makes possible to see at glance which part of Garg are using which
    stemming algorithm, as it wasn't always clearer before.
    
    This also allows us to have a better control over the langugages, as not
    all the stemming algorithms supports all the languages that Garg
    supports.
    2941ac6e
TSQuery.hs 2.88 KB
{-|
Module      : Gargantext.Database.Action.TSQuery
Description :
Copyright   : (c) CNRS, 2017-Present
License     : AGPL + CECILL v3
Maintainer  : team@gargantext.org
Stability   : experimental
Portability : POSIX

Here is a longer description of this module, containing some
commentary with @some markup@.
-}

module Gargantext.Database.Action.TSQuery where

import Data.Aeson
import Data.Maybe
import Data.String (IsString(..))
import Database.PostgreSQL.Simple (Query)
import Database.PostgreSQL.Simple.ToField
import Gargantext.Core
import Gargantext.Core.Text.Terms.Mono.Stem (stem, StemmingAlgorithm(..))
import Gargantext.Core.Types
import Gargantext.Core.Types.Query (Limit, Offset)
import Gargantext.Database.Prelude (DBCmd, runPGSQuery)
import Gargantext.Prelude


newtype TSQuery = UnsafeTSQuery [Text]

-- | TODO [""] -> panic "error"
toTSQuery :: [Text] -> TSQuery
toTSQuery txt = UnsafeTSQuery $ map (stem EN GargPorterAlgorithm) txt


instance IsString TSQuery
  where
    fromString = UnsafeTSQuery . words . cs


instance ToField TSQuery
  where
    toField (UnsafeTSQuery xs)
      = Many  $ intersperse (Plain " && ")
              $ map (\q -> Many [ Plain "plainto_tsquery("
                                , Escape (cs q)
                                , Plain ")"
                                ]
                    ) xs

data Order    = Asc | Desc

instance ToField Order
  where
    toField Asc  = Plain "ASC"
    toField Desc = Plain "DESC"

-- TODO
-- FIX fav
-- ADD ngrams count
-- TESTS
textSearchQuery :: Query
textSearchQuery = "SELECT n.id, n.hyperdata->'publication_year'     \
\                   , n.hyperdata->'title'                          \
\                   , n.hyperdata->'source'                         \
\                   , n.hyperdata->'authors'                        \
\                   , COALESCE(nn.score,null)                       \
\                      FROM nodes n                                 \
\            LEFT JOIN nodes_nodes nn  ON nn.node2_id = n.id        \
\              WHERE                                                \
\                n.search @@ (?::tsquery)                           \
\                AND (n.parent_id = ? OR nn.node1_id = ?)           \
\                AND n.typename  = ?                                \
\                ORDER BY n.hyperdata -> 'publication_date' ?       \
\            offset ? limit ?;"

-- | Text Search Function for Master Corpus
-- TODO : text search for user corpus
-- Example:
-- textSearchTest :: ParentId -> TSQuery -> Cmd err [(Int, Value, Value, Value, Value, Maybe Int)]
-- textSearchTest pId q = textSearch q pId 5 0 Asc
textSearch :: HasDBid NodeType
           => TSQuery -> ParentId
           -> Limit -> Offset -> Order
           -> DBCmd err [(Int,Value,Value,Value, Value, Maybe Int)]
textSearch q p l o ord' = runPGSQuery textSearchQuery (q,p,p,typeId,ord',o,l)
  where
    typeId = toDBid NodeDocument