Searx.hs 3.53 KB
Newer Older
1
{-# LANGUAGE NamedFieldPuns #-}
2 3 4 5 6 7 8 9 10 11 12 13
{-# LANGUAGE TemplateHaskell #-}

module Gargantext.API.Node.Corpus.Searx where

import Control.Lens (view)
import qualified Data.Aeson as Aeson
import Data.Aeson.TH (deriveJSON)
import qualified Data.Text as T
import GHC.Generics (Generic)
import Network.HTTP.Client
import Network.HTTP.Client.TLS

14 15
import qualified Prelude as Prelude
import Protolude (encodeUtf8, Text, Either)
16 17 18 19 20 21 22 23 24 25 26 27 28 29
import Gargantext.Prelude
import Gargantext.Prelude.Config

import Gargantext.Core (Lang(..))
import qualified Gargantext.Core.Text.Corpus.API as API
import Gargantext.Core.Utils.Prefix (unPrefix)
import Gargantext.Database.Action.Flow (FlowCmdM)
import Gargantext.Database.Admin.Types.Node (CorpusId)
import Gargantext.Database.Prelude (hasConfig)


data SearxResult = SearxResult
  { _sr_url        :: Text
  , _sr_title      :: Text
30
  , _sr_content    :: Maybe Text
31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
  , _sr_engine     :: Text
  , _sr_score      :: Double
  , _sr_category   :: Text
  , _sr_pretty_url :: Text }
  deriving (Show, Eq, Generic)
--  , _sr_parsed_url
--  , _sr_engines
--  , _sr_positions

$(deriveJSON (unPrefix "_sr_") ''SearxResult)

data SearxResponse = SearxResponse
  { _srs_query                :: Text
  , _srs_number_of_results    :: Int
  , _srs_results              :: [SearxResult] }
  deriving (Show, Eq, Generic)
-- , _srs_answers
-- , _srs_corrections
-- , _srs_infoboxes
--  , _srs_suggestions          :: [Text]
--  , _srs_unresponsive_engines :: [Text] }

$(deriveJSON (unPrefix "_srs_") ''SearxResponse)

55
data FetchSearxParams = FetchSearxParams
56 57
  { _fsp_language :: Lang
  , _fsp_manager :: Manager
58 59 60 61 62 63
  , _fsp_pageno  :: Int
  , _fsp_query   :: Text
  , _fsp_url     :: Text
  }

fetchSearxPage :: FetchSearxParams -> IO (Either Prelude.String SearxResponse)
64 65
fetchSearxPage (FetchSearxParams { _fsp_language
                                 , _fsp_manager
66 67 68 69 70 71 72 73 74
                                 , _fsp_pageno
                                 , _fsp_query
                                 , _fsp_url }) = do
  -- searx search API:
  -- https://searx.github.io/searx/dev/search_api.html?highlight=json
  req <- parseRequest $ T.unpack _fsp_url
  let request = urlEncodedBody
        [ --("category_general", "1")
          ("q", encodeUtf8 _fsp_query)
75
        , ("categories", "news")  -- https://gitlab.iscpif.fr/gargantext/haskell-gargantext/issues/70#note_3976
76 77
        , ("pageno", encodeUtf8 $ T.pack $ show _fsp_pageno)
          --, ("time_range", "None")
78
        , ("language", encodeUtf8 $ T.pack $ show _fsp_language)
79 80 81 82 83 84
        , ("format", "json")
        ] req
  res <- httpLbs request _fsp_manager
  let dec = Aeson.eitherDecode $ responseBody res :: (Either Prelude.String SearxResponse)
  pure dec

85 86 87 88 89 90 91 92 93 94 95 96 97 98
triggerSearxSearch :: (MonadBase IO m, FlowCmdM env err m)
            => CorpusId
            -> API.Query
            -> Lang
            -> m ()

triggerSearxSearch cid q l = do
  printDebug "[triggerSearxSearch] cid" cid
  printDebug "[triggerSearxSearch] q" q
  printDebug "[triggerSearxSearch] l" l
  cfg <- view hasConfig
  let surl = _gc_frame_searx_url cfg
  printDebug "[triggerSearxSearch] surl" surl

99
  manager <- liftBase $ newManager tlsManagerSettings
100 101
  res <- liftBase $ fetchSearxPage $ FetchSearxParams { _fsp_language = l
                                                      , _fsp_manager = manager
102 103 104 105 106
                                                      , _fsp_pageno = 1
                                                      , _fsp_query = q
                                                      , _fsp_url = surl }
   
  printDebug "[triggerSearxSearch] res" res
107 108

  pure ()