Commit 91e68737 authored by Przemyslaw Kaminski's avatar Przemyslaw Kaminski

[searx] custom flow for searx docs insert

parent 0e384787
Pipeline #2236 failed with stage
in 10 minutes and 22 seconds
...@@ -8,6 +8,7 @@ import Control.Lens (view) ...@@ -8,6 +8,7 @@ import Control.Lens (view)
import qualified Data.Aeson as Aeson import qualified Data.Aeson as Aeson
import Data.Aeson.TH (deriveJSON) import Data.Aeson.TH (deriveJSON)
import Data.Either (Either(..)) import Data.Either (Either(..))
import qualified Data.HashMap.Strict as HashMap
import qualified Data.Text as T import qualified Data.Text as T
import Data.Time.Calendar (Day, toGregorian) import Data.Time.Calendar (Day, toGregorian)
import Data.Time.Format (defaultTimeLocale, formatTime, parseTimeM) import Data.Time.Format (defaultTimeLocale, formatTime, parseTimeM)
...@@ -23,20 +24,26 @@ import Gargantext.Prelude.Config ...@@ -23,20 +24,26 @@ import Gargantext.Prelude.Config
import Gargantext.API.Admin.Orchestrator.Types (JobLog(..)) import Gargantext.API.Admin.Orchestrator.Types (JobLog(..))
--import Gargantext.API.Admin.Types (HasSettings) --import Gargantext.API.Admin.Types (HasSettings)
import Gargantext.API.Job (jobLogSuccess, jobLogFailTotalWithMessage) import Gargantext.API.Job (jobLogSuccess)
import Gargantext.Core (Lang(..)) import Gargantext.Core (Lang(..), PosTagAlgo(..))
import qualified Gargantext.Core.Text.Corpus.API as API import qualified Gargantext.Core.Text.Corpus.API as API
import Gargantext.Core.Text.List (buildNgramsLists)
import Gargantext.Core.Text.List.Group.WithStem ({-StopSize(..),-} GroupParams(..))
import Gargantext.Core.Text.Terms (TermType(..)) import Gargantext.Core.Text.Terms (TermType(..))
import Gargantext.Core.Types.Individu (User(..)) import Gargantext.Core.Types.Individu (User(..))
import Gargantext.Core.Utils.Prefix (unPrefix) import Gargantext.Core.Utils.Prefix (unPrefix)
import Gargantext.Database.Action.Flow (flowDataText, DataText(..)) import Gargantext.Database.Action.Flow (insertMasterDocs) --, DataText(..))
import Gargantext.Database.Action.Flow.List (flowList_DbRepo)
import Gargantext.Database.Action.Flow.Types (FlowCmdM) import Gargantext.Database.Action.Flow.Types (FlowCmdM)
import Gargantext.Database.Admin.Config () import Gargantext.Database.Action.User (getUserId)
import Gargantext.Database.Admin.Config (userMaster)
import Gargantext.Database.Admin.Types.Hyperdata.Corpus (HyperdataCorpus)
import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..)) import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
import Gargantext.Database.Admin.Types.Node (CorpusId) import Gargantext.Database.Admin.Types.Node (CorpusId)
import Gargantext.Database.Prelude (hasConfig) import Gargantext.Database.Prelude (hasConfig)
import Gargantext.Database.Query.Table.Node (defaultListMaybe) import Gargantext.Database.Query.Table.Node (defaultListMaybe, getOrMkList)
import Gargantext.Database.Query.Tree.Root (getOrMk_RootWithCorpus)
import qualified Gargantext.Database.Query.Table.Node.Document.Add as Doc (add)
langToSearx :: Lang -> Text langToSearx :: Lang -> Text
langToSearx EN = "en-US" langToSearx EN = "en-US"
...@@ -124,43 +131,56 @@ triggerSearxSearch user cId q l logStatus = do ...@@ -124,43 +131,56 @@ triggerSearxSearch user cId q l logStatus = do
printDebug "[triggerSearxSearch] q" q printDebug "[triggerSearxSearch] q" q
printDebug "[triggerSearxSearch] l" l printDebug "[triggerSearxSearch] l" l
cfg <- view hasConfig cfg <- view hasConfig
uId <- getUserId user
let surl = _gc_frame_searx_url cfg let surl = _gc_frame_searx_url cfg
printDebug "[triggerSearxSearch] surl" surl printDebug "[triggerSearxSearch] surl" surl
mListId <- defaultListMaybe cId mListId <- defaultListMaybe cId
case mListId of listId <- case mListId of
Nothing -> do Nothing -> do
let failedJobLog = jobLogFailTotalWithMessage "[triggerSearxSearch] no list id" jobLog --let failedJobLog = jobLogFailTotalWithMessage "[triggerSearxSearch] no list id" jobLog
logStatus failedJobLog --logStatus failedJobLog
pure failedJobLog --pure failedJobLog
Just listId -> do listId <- getOrMkList cId uId
printDebug "[triggerSearxSearch] listId" listId pure listId
Just listId -> pure listId
manager <- liftBase $ newManager tlsManagerSettings
res <- liftBase $ fetchSearxPage $ FetchSearxParams { _fsp_language = l printDebug "[triggerSearxSearch] listId" listId
, _fsp_manager = manager
, _fsp_pageno = 1 manager <- liftBase $ newManager tlsManagerSettings
, _fsp_query = q res <- liftBase $ fetchSearxPage $ FetchSearxParams { _fsp_language = l
, _fsp_url = surl } , _fsp_manager = manager
, _fsp_pageno = 1
--printDebug "[triggerSearxSearch] res" res , _fsp_query = q
, _fsp_url = surl }
case res of
Left _ -> pure () --printDebug "[triggerSearxSearch] res" res
Right (SearxResponse { _srs_results }) -> do
let docs = hyperdataDocumentFromSearxResult <$> _srs_results case res of
--printDebug "[triggerSearxSearch] docs" docs Left _ -> pure ()
-- docs :: [Either Text HyperdataDocument] Right (SearxResponse { _srs_results }) -> do
let docs' = catMaybes $ rightToMaybe <$> docs let docs = hyperdataDocumentFromSearxResult <$> _srs_results
Prelude.mapM_ (\(HyperdataDocument { _hd_title, _hd_publication_year, _hd_publication_date }) -> do --printDebug "[triggerSearxSearch] docs" docs
printDebug "[triggerSearxSearch] doc time" $ -- docs :: [Either Text HyperdataDocument]
"[title] " <> (show _hd_title) <> let docs' = catMaybes $ rightToMaybe <$> docs
" :: [publication_year] " <> (show _hd_publication_year) <> Prelude.mapM_ (\(HyperdataDocument { _hd_title, _hd_publication_year, _hd_publication_date }) -> do
" :: [publication_date] " <> (show _hd_publication_date) printDebug "[triggerSearxSearch] doc time" $
) docs' "[title] " <> (show _hd_title) <>
_ <- flowDataText user (DataNew [docs']) (Multi EN) cId Nothing logStatus " :: [publication_year] " <> (show _hd_publication_year) <>
pure () " :: [publication_date] " <> (show _hd_publication_date)
) docs'
pure $ jobLogSuccess jobLog --_ <- flowDataText user (DataNew [docs']) (Multi EN) cId Nothing logStatus
let mCorpus = Nothing :: Maybe HyperdataCorpus
ids <- insertMasterDocs mCorpus (Multi EN) docs'
_ <- Doc.add cId ids
(_masterUserId, _masterRootId, masterCorpusId)
<- getOrMk_RootWithCorpus (UserName userMaster) (Left "") mCorpus
let gp = GroupWithPosTag l CoreNLP HashMap.empty
ngs <- buildNgramsLists user cId masterCorpusId Nothing gp
_userListId <- flowList_DbRepo listId ngs
pure ()
pure $ jobLogSuccess jobLog
hyperdataDocumentFromSearxResult :: SearxResult -> Either T.Text HyperdataDocument hyperdataDocumentFromSearxResult :: SearxResult -> Either T.Text HyperdataDocument
hyperdataDocumentFromSearxResult (SearxResult { _sr_content, _sr_engine, _sr_pubdate, _sr_title }) = do hyperdataDocumentFromSearxResult (SearxResult { _sr_content, _sr_engine, _sr_pubdate, _sr_title }) = do
......
...@@ -71,9 +71,9 @@ import Gargantext.Core.Ext.IMT (toSchoolName) ...@@ -71,9 +71,9 @@ import Gargantext.Core.Ext.IMT (toSchoolName)
import Gargantext.Core.Ext.IMTUser (readFile_Annuaire) import Gargantext.Core.Ext.IMTUser (readFile_Annuaire)
import Gargantext.Core.Flow.Types import Gargantext.Core.Flow.Types
import Gargantext.Core.Text import Gargantext.Core.Text
import Gargantext.Core.Text.List.Group.WithStem ({-StopSize(..),-} GroupParams(..))
import Gargantext.Core.Text.Corpus.Parsers (parseFile, FileFormat) import Gargantext.Core.Text.Corpus.Parsers (parseFile, FileFormat)
import Gargantext.Core.Text.List (buildNgramsLists) import Gargantext.Core.Text.List (buildNgramsLists)
import Gargantext.Core.Text.List.Group.WithStem ({-StopSize(..),-} GroupParams(..))
import Gargantext.Core.Text.List.Social (FlowSocialListWith) import Gargantext.Core.Text.List.Social (FlowSocialListWith)
import Gargantext.Core.Text.Terms import Gargantext.Core.Text.Terms
import Gargantext.Core.Text.Terms.Mono.Stem.En (stemIt) import Gargantext.Core.Text.Terms.Mono.Stem.En (stemIt)
......
...@@ -279,6 +279,9 @@ instance ToNode HyperdataDocument where ...@@ -279,6 +279,9 @@ instance ToNode HyperdataDocument where
where where
n = maybe "No Title" (DT.take 255) (_hd_title h) n = maybe "No Title" (DT.take 255) (_hd_title h)
date = jour y m d date = jour y m d
-- NOTE: There is no year '0' in postgres, there is year 1 AD and beofre that year 1 BC:
-- select '0001-01-01'::date, '0001-01-01'::date - '1 day'::interval;
-- 0001-01-01 0001-12-31 00:00:00 BC
y = maybe 1 fromIntegral $ _hd_publication_year h y = maybe 1 fromIntegral $ _hd_publication_year h
m = fromMaybe 1 $ _hd_publication_month h m = fromMaybe 1 $ _hd_publication_month h
d = fromMaybe 1 $ _hd_publication_day h d = fromMaybe 1 $ _hd_publication_day h
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment