Commit 729e004f authored by Przemyslaw Kaminski's avatar Przemyslaw Kaminski

[searx] some more searx parsing work

parent fc1084bf
...@@ -200,7 +200,7 @@ addToCorpusWithQuery user cid (WithQuery { _wq_query = q ...@@ -200,7 +200,7 @@ addToCorpusWithQuery user cid (WithQuery { _wq_query = q
Just Web -> do Just Web -> do
printDebug "[addToCorpusWithQuery] processing web request" datafield printDebug "[addToCorpusWithQuery] processing web request" datafield
_ <- triggerSearxSearch cid q l _ <- triggerSearxSearch user cid q l logStatus
pure JobLog { _scst_succeeded = Just 3 pure JobLog { _scst_succeeded = Just 3
, _scst_failed = Just 0 , _scst_failed = Just 0
......
...@@ -17,19 +17,25 @@ import Network.HTTP.Client ...@@ -17,19 +17,25 @@ import Network.HTTP.Client
import Network.HTTP.Client.TLS import Network.HTTP.Client.TLS
import qualified Prelude as Prelude import qualified Prelude as Prelude
import Protolude (encodeUtf8, Text) import Protolude (catMaybes, encodeUtf8, rightToMaybe, Text)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Prelude.Config import Gargantext.Prelude.Config
import Gargantext.API.Admin.Orchestrator.Types (JobLog(..))
--import Gargantext.API.Admin.Types (HasSettings)
import Gargantext.API.Job (jobLogSuccess, jobLogFailTotalWithMessage)
import Gargantext.Core (Lang(..)) import Gargantext.Core (Lang(..))
import qualified Gargantext.Core.Text.Corpus.API as API import qualified Gargantext.Core.Text.Corpus.API as API
import Gargantext.Core.Text.Terms (TermType(..))
import Gargantext.Core.Types.Individu (User(..))
import Gargantext.Core.Utils.Prefix (unPrefix) import Gargantext.Core.Utils.Prefix (unPrefix)
import Gargantext.Database.Action.Flow (flowDataText, DataText(..))
import Gargantext.Database.Action.Flow.Types (FlowCmdM) import Gargantext.Database.Action.Flow.Types (FlowCmdM)
import Gargantext.Database.Admin.Config () import Gargantext.Database.Admin.Config ()
import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..)) import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
import Gargantext.Database.Admin.Types.Node (CorpusId) import Gargantext.Database.Admin.Types.Node (CorpusId)
import Gargantext.Database.Prelude (hasConfig) import Gargantext.Database.Prelude (hasConfig)
import Gargantext.Database.Query.Table.Node (defaultList) import Gargantext.Database.Query.Table.Node (defaultListMaybe)
langToSearx :: Lang -> Text langToSearx :: Lang -> Text
...@@ -100,18 +106,33 @@ fetchSearxPage (FetchSearxParams { _fsp_language ...@@ -100,18 +106,33 @@ fetchSearxPage (FetchSearxParams { _fsp_language
-- TODO Make an async task out of this? -- TODO Make an async task out of this?
triggerSearxSearch :: (MonadBase IO m, FlowCmdM env err m) triggerSearxSearch :: (MonadBase IO m, FlowCmdM env err m)
=> CorpusId => User
-> CorpusId
-> API.Query -> API.Query
-> Lang -> Lang
-> m () -> (JobLog -> m ())
triggerSearxSearch cId q l = do -> m JobLog
triggerSearxSearch user cId q l logStatus = do
let jobLog = JobLog { _scst_succeeded = Just 1
, _scst_failed = Just 0
, _scst_remaining = Just 1
, _scst_events = Just []
}
logStatus jobLog
printDebug "[triggerSearxSearch] cId" cId printDebug "[triggerSearxSearch] cId" cId
printDebug "[triggerSearxSearch] q" q printDebug "[triggerSearxSearch] q" q
printDebug "[triggerSearxSearch] l" l printDebug "[triggerSearxSearch] l" l
cfg <- view hasConfig cfg <- view hasConfig
let surl = _gc_frame_searx_url cfg let surl = _gc_frame_searx_url cfg
printDebug "[triggerSearxSearch] surl" surl printDebug "[triggerSearxSearch] surl" surl
listId <- defaultList cId mListId <- defaultListMaybe cId
case mListId of
Nothing -> do
let failedJobLog = jobLogFailTotalWithMessage "[triggerSearxSearch] no list id" jobLog
logStatus failedJobLog
pure failedJobLog
Just listId -> do
printDebug "[triggerSearxSearch] listId" listId printDebug "[triggerSearxSearch] listId" listId
manager <- liftBase $ newManager tlsManagerSettings manager <- liftBase $ newManager tlsManagerSettings
...@@ -123,14 +144,18 @@ triggerSearxSearch cId q l = do ...@@ -123,14 +144,18 @@ triggerSearxSearch cId q l = do
printDebug "[triggerSearxSearch] res" res printDebug "[triggerSearxSearch] res" res
_ <- case res of case res of
Left _ -> pure () Left _ -> pure ()
Right (SearxResponse { _srs_results }) -> do Right (SearxResponse { _srs_results }) -> do
let docs = hyperdataDocumentFromSearxResult <$> _srs_results let docs = hyperdataDocumentFromSearxResult <$> _srs_results
printDebug "[triggerSearxSearch] docs" docs printDebug "[triggerSearxSearch] docs" docs
-- docs :: [Either Text HyperdataDocument]
let docs' = catMaybes $ rightToMaybe <$> docs
_ <- flowDataText user (DataNew [docs']) (Multi EN) cId Nothing logStatus
pure () pure ()
pure $ jobLogSuccess jobLog
hyperdataDocumentFromSearxResult :: SearxResult -> Either T.Text HyperdataDocument hyperdataDocumentFromSearxResult :: SearxResult -> Either T.Text HyperdataDocument
hyperdataDocumentFromSearxResult (SearxResult { _sr_content, _sr_engine, _sr_pubdate, _sr_title }) = do hyperdataDocumentFromSearxResult (SearxResult { _sr_content, _sr_engine, _sr_pubdate, _sr_title }) = do
let mDate = parseTimeM False defaultTimeLocale "%Y-%m-%d %H:%M:%S" (T.unpack _sr_pubdate) :: Maybe Day let mDate = parseTimeM False defaultTimeLocale "%Y-%m-%d %H:%M:%S" (T.unpack _sr_pubdate) :: Maybe Day
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment