Commit 8f2b5a0e authored by Przemyslaw Kaminski's avatar Przemyslaw Kaminski

[searx] refactoring, fetch 10 pages of results

parent f9b8cd6e
Pipeline #2240 failed with stage
in 36 minutes and 55 seconds
...@@ -39,7 +39,7 @@ import Gargantext.Database.Action.User (getUserId) ...@@ -39,7 +39,7 @@ import Gargantext.Database.Action.User (getUserId)
import Gargantext.Database.Admin.Config (userMaster) import Gargantext.Database.Admin.Config (userMaster)
import Gargantext.Database.Admin.Types.Hyperdata.Corpus (HyperdataCorpus) import Gargantext.Database.Admin.Types.Hyperdata.Corpus (HyperdataCorpus)
import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..)) import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
import Gargantext.Database.Admin.Types.Node (CorpusId) import Gargantext.Database.Admin.Types.Node (CorpusId, ListId)
import Gargantext.Database.Prelude (hasConfig) import Gargantext.Database.Prelude (hasConfig)
import Gargantext.Database.Query.Table.Node (defaultListMaybe, getOrMkList) import Gargantext.Database.Query.Table.Node (defaultListMaybe, getOrMkList)
import Gargantext.Database.Query.Tree.Root (getOrMk_RootWithCorpus) import Gargantext.Database.Query.Tree.Root (getOrMk_RootWithCorpus)
...@@ -111,6 +111,37 @@ fetchSearxPage (FetchSearxParams { _fsp_language ...@@ -111,6 +111,37 @@ fetchSearxPage (FetchSearxParams { _fsp_language
let dec = Aeson.eitherDecode $ responseBody res :: (Either Prelude.String SearxResponse) let dec = Aeson.eitherDecode $ responseBody res :: (Either Prelude.String SearxResponse)
pure dec pure dec
insertSearxResponse :: (MonadBase IO m, FlowCmdM env err m)
=> User
-> CorpusId
-> ListId
-> Lang
-> Either Prelude.String SearxResponse
-> m ()
insertSearxResponse _ _ _ _ (Left _) = pure ()
insertSearxResponse user cId listId l (Right (SearxResponse { _srs_results })) = do
let docs = hyperdataDocumentFromSearxResult <$> _srs_results
--printDebug "[triggerSearxSearch] docs" docs
-- docs :: [Either Text HyperdataDocument]
let docs' = catMaybes $ rightToMaybe <$> docs
Prelude.mapM_ (\(HyperdataDocument { _hd_title, _hd_publication_year, _hd_publication_date }) -> do
printDebug "[triggerSearxSearch] doc time" $
"[title] " <> (show _hd_title) <>
" :: [publication_year] " <> (show _hd_publication_year) <>
" :: [publication_date] " <> (show _hd_publication_date)
) docs'
--_ <- flowDataText user (DataNew [docs']) (Multi EN) cId Nothing logStatus
let mCorpus = Nothing :: Maybe HyperdataCorpus
ids <- insertMasterDocs mCorpus (Multi EN) docs'
_ <- Doc.add cId ids
(_masterUserId, _masterRootId, masterCorpusId)
<- getOrMk_RootWithCorpus (UserName userMaster) (Left "") mCorpus
let gp = GroupWithPosTag l CoreNLP HashMap.empty
ngs <- buildNgramsLists user cId masterCorpusId Nothing gp
_userListId <- flowList_DbRepo listId ngs
pure ()
-- TODO Make an async task out of this? -- TODO Make an async task out of this?
triggerSearxSearch :: (MonadBase IO m, FlowCmdM env err m) triggerSearxSearch :: (MonadBase IO m, FlowCmdM env err m)
=> User => User
...@@ -120,9 +151,10 @@ triggerSearxSearch :: (MonadBase IO m, FlowCmdM env err m) ...@@ -120,9 +151,10 @@ triggerSearxSearch :: (MonadBase IO m, FlowCmdM env err m)
-> (JobLog -> m ()) -> (JobLog -> m ())
-> m JobLog -> m JobLog
triggerSearxSearch user cId q l logStatus = do triggerSearxSearch user cId q l logStatus = do
let jobLog = JobLog { _scst_succeeded = Just 1 let numPages = 10
let jobLog = JobLog { _scst_succeeded = Just 0
, _scst_failed = Just 0 , _scst_failed = Just 0
, _scst_remaining = Just 1 , _scst_remaining = Just numPages
, _scst_events = Just [] , _scst_events = Just []
} }
logStatus jobLog logStatus jobLog
...@@ -147,39 +179,22 @@ triggerSearxSearch user cId q l logStatus = do ...@@ -147,39 +179,22 @@ triggerSearxSearch user cId q l logStatus = do
printDebug "[triggerSearxSearch] listId" listId printDebug "[triggerSearxSearch] listId" listId
manager <- liftBase $ newManager tlsManagerSettings manager <- liftBase $ newManager tlsManagerSettings
res <- liftBase $ fetchSearxPage $ FetchSearxParams { _fsp_language = l _ <- mapM (\page -> do
, _fsp_manager = manager res <- liftBase $ fetchSearxPage $ FetchSearxParams { _fsp_language = l
, _fsp_pageno = 1 , _fsp_manager = manager
, _fsp_query = q , _fsp_pageno = page
, _fsp_url = surl } , _fsp_query = q
, _fsp_url = surl }
insertSearxResponse user cId listId l res
logStatus $ JobLog { _scst_succeeded = Just page
, _scst_failed = Just 0
, _scst_remaining = Just (numPages - page)
, _scst_events = Just [] }
) [1..numPages]
--printDebug "[triggerSearxSearch] res" res --printDebug "[triggerSearxSearch] res" res
case res of
Left _ -> pure ()
Right (SearxResponse { _srs_results }) -> do
let docs = hyperdataDocumentFromSearxResult <$> _srs_results
--printDebug "[triggerSearxSearch] docs" docs
-- docs :: [Either Text HyperdataDocument]
let docs' = catMaybes $ rightToMaybe <$> docs
Prelude.mapM_ (\(HyperdataDocument { _hd_title, _hd_publication_year, _hd_publication_date }) -> do
printDebug "[triggerSearxSearch] doc time" $
"[title] " <> (show _hd_title) <>
" :: [publication_year] " <> (show _hd_publication_year) <>
" :: [publication_date] " <> (show _hd_publication_date)
) docs'
--_ <- flowDataText user (DataNew [docs']) (Multi EN) cId Nothing logStatus
let mCorpus = Nothing :: Maybe HyperdataCorpus
ids <- insertMasterDocs mCorpus (Multi EN) docs'
_ <- Doc.add cId ids
(_masterUserId, _masterRootId, masterCorpusId)
<- getOrMk_RootWithCorpus (UserName userMaster) (Left "") mCorpus
let gp = GroupWithPosTag l CoreNLP HashMap.empty
ngs <- buildNgramsLists user cId masterCorpusId Nothing gp
_userListId <- flowList_DbRepo listId ngs
pure ()
pure $ jobLogSuccess jobLog pure $ jobLogSuccess jobLog
hyperdataDocumentFromSearxResult :: SearxResult -> Either T.Text HyperdataDocument hyperdataDocumentFromSearxResult :: SearxResult -> Either T.Text HyperdataDocument
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment