Commit 729e004f authored by Przemyslaw Kaminski's avatar Przemyslaw Kaminski

[searx] some more searx parsing work

parent fc1084bf
......@@ -200,7 +200,7 @@ addToCorpusWithQuery user cid (WithQuery { _wq_query = q
Just Web -> do
printDebug "[addToCorpusWithQuery] processing web request" datafield
_ <- triggerSearxSearch cid q l
_ <- triggerSearxSearch user cid q l logStatus
pure JobLog { _scst_succeeded = Just 3
, _scst_failed = Just 0
......
......@@ -17,19 +17,25 @@ import Network.HTTP.Client
import Network.HTTP.Client.TLS
import qualified Prelude as Prelude
import Protolude (encodeUtf8, Text)
import Protolude (catMaybes, encodeUtf8, rightToMaybe, Text)
import Gargantext.Prelude
import Gargantext.Prelude.Config
import Gargantext.API.Admin.Orchestrator.Types (JobLog(..))
--import Gargantext.API.Admin.Types (HasSettings)
import Gargantext.API.Job (jobLogSuccess, jobLogFailTotalWithMessage)
import Gargantext.Core (Lang(..))
import qualified Gargantext.Core.Text.Corpus.API as API
import Gargantext.Core.Text.Terms (TermType(..))
import Gargantext.Core.Types.Individu (User(..))
import Gargantext.Core.Utils.Prefix (unPrefix)
import Gargantext.Database.Action.Flow (flowDataText, DataText(..))
import Gargantext.Database.Action.Flow.Types (FlowCmdM)
import Gargantext.Database.Admin.Config ()
import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
import Gargantext.Database.Admin.Types.Node (CorpusId)
import Gargantext.Database.Prelude (hasConfig)
import Gargantext.Database.Query.Table.Node (defaultList)
import Gargantext.Database.Query.Table.Node (defaultListMaybe)
langToSearx :: Lang -> Text
......@@ -100,36 +106,55 @@ fetchSearxPage (FetchSearxParams { _fsp_language
-- TODO Make an async task out of this?
triggerSearxSearch :: (MonadBase IO m, FlowCmdM env err m)
=> CorpusId
=> User
-> CorpusId
-> API.Query
-> Lang
-> m ()
triggerSearxSearch cId q l = do
-> (JobLog -> m ())
-> m JobLog
triggerSearxSearch user cId q l logStatus = do
let jobLog = JobLog { _scst_succeeded = Just 1
, _scst_failed = Just 0
, _scst_remaining = Just 1
, _scst_events = Just []
}
logStatus jobLog
printDebug "[triggerSearxSearch] cId" cId
printDebug "[triggerSearxSearch] q" q
printDebug "[triggerSearxSearch] l" l
cfg <- view hasConfig
let surl = _gc_frame_searx_url cfg
printDebug "[triggerSearxSearch] surl" surl
listId <- defaultList cId
printDebug "[triggerSearxSearch] listId" listId
manager <- liftBase $ newManager tlsManagerSettings
res <- liftBase $ fetchSearxPage $ FetchSearxParams { _fsp_language = l
, _fsp_manager = manager
, _fsp_pageno = 1
, _fsp_query = q
, _fsp_url = surl }
mListId <- defaultListMaybe cId
case mListId of
Nothing -> do
let failedJobLog = jobLogFailTotalWithMessage "[triggerSearxSearch] no list id" jobLog
logStatus failedJobLog
pure failedJobLog
Just listId -> do
printDebug "[triggerSearxSearch] listId" listId
manager <- liftBase $ newManager tlsManagerSettings
res <- liftBase $ fetchSearxPage $ FetchSearxParams { _fsp_language = l
, _fsp_manager = manager
, _fsp_pageno = 1
, _fsp_query = q
, _fsp_url = surl }
printDebug "[triggerSearxSearch] res" res
_ <- case res of
Left _ -> pure ()
Right (SearxResponse { _srs_results }) -> do
let docs = hyperdataDocumentFromSearxResult <$> _srs_results
printDebug "[triggerSearxSearch] docs" docs
pure ()
printDebug "[triggerSearxSearch] res" res
case res of
Left _ -> pure ()
Right (SearxResponse { _srs_results }) -> do
let docs = hyperdataDocumentFromSearxResult <$> _srs_results
printDebug "[triggerSearxSearch] docs" docs
-- docs :: [Either Text HyperdataDocument]
let docs' = catMaybes $ rightToMaybe <$> docs
_ <- flowDataText user (DataNew [docs']) (Multi EN) cId Nothing logStatus
pure ()
pure $ jobLogSuccess jobLog
hyperdataDocumentFromSearxResult :: SearxResult -> Either T.Text HyperdataDocument
hyperdataDocumentFromSearxResult (SearxResult { _sr_content, _sr_engine, _sr_pubdate, _sr_title }) = do
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment