Commit 966c3ed0 authored by Przemyslaw Kaminski's avatar Przemyslaw Kaminski

[searx] add HyperdataDocument generation to triggerSearxSearch

parent 25fc4df9
dev 102-dev-graph-refresh 104-dev-john-snow-nlp 106-dev-ngrams-score-fix 111-dev-refactor-text-corpus-api-with-conduit 111-dev-refactor-text-corpus-api-with-conduit-alp 117-dev-gql-tree-parent-resolver 118-dev-gql-security 118-dev-gql-security-fix 119-dev-hal-notebook 121-dev-arxiv 124-dev-corpus-export-csv 131-dev-ngrams-table-db-connection 131-dev-ngrams-table-db-connection-2 141-dev-node-stories-db-optimization 143-dev-ngrams-repo-conflicts 144-NgramsTable_Optim 145-dev-graph-explorer-search-tfidf 145-graphExplorerSearch 149-dev-ngrams-table-sorting-fix 151-dev-pubmed-api-key 155-dev-export-list-as-csv 158-dev-ngrams-table-patch-sync 159-dev-node-story-versions-fix 161-dev-conduit-insert-db-fix 162-dev-ghc-9.4 162-dev-ghc-9.x 162-dev-haskell-9.2 163-dev-patch-fix 164-dev-node-write-analysis 175-dev-doc-table-count 175-dev-doc-table-count-temp 177-welcome-door-to-enter-the-project 184-dev-add-nlp-to-gargantext-ini 184-dev-add-support-for-multiple-languages-in-ini-file 186-dev-ngrams-score-fixes 190-dev-framecalc-async-fix 191-dev-list-upload-fixes 193-dev-api-query-dev-fix 199-dev-materialized-view-refresh-issue 201-dev-user-pubmed-api-key 203-dev-corpus-json-import 205-dev-ethercalc-codimd-doc-upload 206-dev-phylo 213-dev-implement-json-errors 217-dev-ngrams-patch 233-dev-terms-dates 239-readme-test-reinstall-project-with-cabal-and-update-the-readme-file 239-readme-test-reinstall-project-with-cabal-and-update-the-readme-file-2 244-dev-cabal-dependencies-versions 244-dev-hal-language-fix-for-abstracts 244-dev-stack2cabal 250-dev-fix-corpus-hyperdata-update 259-dev-security-fixes 260-readme-add-note-about-libraries-devlopment 263-node-content-breadcrumb-display-the-full-path-of-a-node-as-a-breadcrumb-with-each-clickable-parent 272-dev-fixes-for-node-score 274-dev-more-protolude-in-prelude 275-dev-date-split-fix 280-dev-xml-code-cleanup 281-dev-ngrams-fixes 374-dev-document-fix 376-dev-annuaire-contacts-page 376-dev-annuaire-fields 396-dev-team-management 398-dev-ngram-type-parameter 405-dev-lost-password-design 415-dev-user-empty-field 428-dev-profile-img-upload 428-dev-profile-img-upload-2 430-dev-docs-date-filter 438-dev-team-node-creator 443-dev-doc-upload-improvement 445-dev-doc-upload-lang 452-dev-teams-security 465-dev-hyperdata-graph-angle 469-dev-email-change 475-dev-node-team-invite 476-dev-fix-node-story-versions 481-dev-node-calc-upload 497-dev-node-write-selection 506-dev-search-in-tree 506-dev-tree-search-fix 513-dev-pin-tree 548-dev-reverse-url-search 551-dev-graphql-contexts-ngrams 561-dev-document-date 571-dev-node-corpus-api-search-fixes 571-dev-node-corpus-api-search-fixes-take-2 582-dev-node-phylo-init 592-dev-document-upload-abstract 594-dev-ngrams-click-fixes 596-dev-ngrams-initial-sync-fix 603-dev-istex-zip-file-upload 67-dev-ci 70-dev-searx-parser 81-dev-fix-parsers-conduit-length 81-dev-zip-upload 90-dev-async-jobs-with-push-event 90-dev-hal-box-fix 90-dev-hal-fixes 90-dev-max-docs-scrapers-parsers 97-dev-istex-search adinapoli/developer-guidelines adinapoli/disable-policy-checks adinapoli/drop-servant-static-th adinapoli/fix-datafield-instance adinapoli/fix-phylo-types adinapoli/fix-scheduler adinapoli/hotfix-for-datafield-json-instance adinapoli/improve-cabal adinapoli/improve-cabal-ci-caching adinapoli/improve-cabal-old adinapoli/investigate-issue-192 adinapoli/issue-180-deps-audit adinapoli/issue-182 adinapoli/issue-185-job-api adinapoli/issue-185-job-api-part-2 adinapoli/issue-187-update-haskell-igraph adinapoli/issue-188 adinapoli/issue-198 adinapoli/issue-198-part-2 adinapoli/issue-217 adinapoli/issue-226 adinapoli/issue-250 adinapoli/issue-252 adinapoli/issue-258 adinapoli/issue-258-part-2 adinapoli/issue-259 adinapoli/issue-259-design-1 adinapoli/issue-261 adinapoli/issue-267 adinapoli/issue-267-part-2 adinapoli/issue-267-part-3 adinapoli/issue-273 adinapoli/issue-273-part-2 adinapoli/issue-276 adinapoli/issue-279 adinapoli/issue-284 adinapoli/issue-incorrect-pagination adinapoli/mac-fixes adinapoli/more-db-test-coverage adinapoli/raw-query-to-pubmed adinapoli/restore-ci adinapoli/switch-hsvm adinapoli/try-stack2cabal client-executable client-graphql-endpoint dbg-perf-order2-graph debug-nans dev-104-adding-language dev-177-DoorWelcome dev-177-DoorWelcome-v2 dev-250-addChineze dev-593-pubmed-api-key dev-9.0-upgrade dev-addDoc dev-bdd dev-bipartite dev-contributing dev-demo dev-docs-chart-update dev-docs-download dev-epo-integration dev-epo-patents dev-fix-broken-pure-refactor dev-forgot-password dev-gargantext-ini-fix dev-ghc9.2-acc1.3-etc dev-gql-tree-api dev-graph-multipartite dev-graph-search-in-groups-not-in-labels dev-hackathon-fixes dev-ihaskell-codebook dev-infomap dev-iramuteq dev-merge dev-merge-nix dev-merge-nix-2 dev-no-freeze dev-notebook dev-openalex dev-order2 dev-phylo dev-phyloDebug dev-pubmed dev-scores dev-sources-chart-sort dev-tree-gql-improvements dev-treedebug dev-typeclasses-refactoring dev-userid-in-auth developer-guidelines fix-confluence-indices-bugs fix-haddock-parse-error flexible-job-queue garg-init-errror-msg jobqueue-fairness jobqueue-test-delay mailmap nix revert-0b51636b stable testing 0.0.6.9.9.9.3.1 0.0.6.9.9.9.3 0.0.6.9.9.9.2 0.0.6.9.9.9.1 0.0.6.9.9.9 0.0.6.9.9.8.3.4 0.0.6.9.9.8.3.3 0.0.6.9.9.8.3.2 0.0.6.9.9.8.3.1 0.0.6.9.9.8.3 0.0.6.9.9.8.2 0.0.6.9.9.8.1 0.0.6.9.9.8 0.0.6.9.9.7.9 0.0.6.9.9.7.8 0.0.6.9.9.7.7 0.0.6.9.9.7.6.4 0.0.6.9.9.7.6.3 0.0.6.9.9.7.6.2 0.0.6.9.9.7.6.1 0.0.6.9.9.7.6 0.0.6.9.9.7.5.1 0.0.6.9.9.7.5 0.0.6.9.9.7.4 0.0.6.9.9.7.3 0.0.6.9.9.7.2 0.0.6.9.9.7.1 0.0.6.9.9.7 0.0.6.9.9.6.9 0.0.6.9.9.6.8 0.0.6.9.9.6.7 0.0.6.9.9.6.6 0.0.6.9.9.6.5 0.0.6.9.9.6.4 0.0.6.9.9.6.3 0.0.6.9.9.6.2 0.0.6.9.9.6.1 0.0.6.9.9.6 0.0.6.9.9.5.8 0.0.6.9.9.5.7 0.0.6.9.9.5.6 0.0.6.9.9.5.5 0.0.6.9.9.5.4 0.0.6.9.9.5.3 0.0.6.9.9.5.2 0.0.6.9.9.5.1 0.0.6.9.9.5 0.0.6.9.9.4.6 0.0.6.9.9.4.5 0.0.6.9.9.4.4 0.0.6.9.9.4.3 0.0.6.9.9.4.2 0.0.6.9.9.4.1 0.0.6.9.9.4 0.0.6.9.9.3.3 0.0.6.9.9.3.2 0.0.6.9.9.3.1 0.0.6.9.9.3 0.0.6.9.9.2 0.0.6.9.9.1 0.0.6.9.9 0.0.6.9.8.7.1 0.0.6.9.8.7 0.0.6.9.8.6.2 0.0.6.9.8.6.1 0.0.6.9.8.6 0.0.6.9.8.5.1 0.0.6.9.8.5 0.0.6.9.8.4 0.0.6.9.8.3 0.0.6.9.8.2.2 0.0.6.9.8.2.1 0.0.6.9.8.2 0.0.6.9.8.1 0.0.6.9.8 0.0.6.9.7.6 0.0.6.9.7.5 0.0.6.9.7.4 0.0.6.9.7.3 0.0.6.9.7.2 0.0.6.9.7.1 0.0.6.9.7 0.0.6.9.6 0.0.6.9.5 0.0.6.9.4.9 0.0.6.9.4.8 0.0.6.9.4.7 0.0.6.9.4.6 0.0.6.9.4.5 0.0.6.9.4.4 0.0.6.9.4.3 0.0.6.9.4.2 0.0.6.9.4.1 0.0.6.9.4 0.0.6.9.3 0.0.6.9.2 0.0.6.9.1 0.0.6.8.6 0.0.6.8.5.9.4 0.0.6.8.5.9.3 0.0.6.8.5.9.2 0.0.6.8.5.9.1 0.0.6.8.5.9 0.0.6.8.5.8 0.0.6.8.5.7 0.0.6.8.5.6 0.0.6.8.5.5 0.0.6.8.5.4 0.0.6.8.5.3 0.0.6.8.5.2 0.0.6.8.5.1 0.0.6.8.5 0.0.6.8.4 0.0.6.8.3 0.0.6.8.2 0.0.6.8.1 0.0.6.8 0.0.6.7.2 0.0.6.7.1 0.0.6.7 0.0.6.6 0.0.6.5.1 0.0.6.5 0.0.6.4 0.0.6.3 0.0.6.2 0.0.6.1 0.0.6 0.0.5.9.6 0.0.5.9.5 0.0.5.9.4 0.0.5.9.3 0.0.5.9.2 0.0.5.9.1 0.0.5.9 0.0.5.8.9.9 0.0.5.8.9.8 0.0.5.8.9.7 0.0.5.8.9.6 0.0.5.8.9.5 0.0.5.8.9.4 0.0.5.8.9.3 0.0.5.8.9.2 0.0.5.8.9.1 0.0.5.8.9 0.0.5.8.8.2 0.0.5.8.8.1 0.0.5.8.8 0.0.5.8.7.2 0.0.5.8.7.1 0.0.5.8.7 0.0.5.8.6 0.0.5.8.5.1 0.0.5.8.5 0.0.5.8.4 0.0.5.8.3 0.0.5.8.2 0.0.5.8.1 0.0.5.8 0.0.5.7.9.1 0.0.5.7.9 0.0.5.7.8 0.0.5.7.7 0.0.5.7.6 0.0.5.7.5 0.0.5.7.4 0.0.5.7.3 0.0.5.7.2 0.0.5.7.1 0.0.5.7 0.0.5.6.7 0.0.5.6.6 0.0.5.6.5 0.0.5.6.4 0.0.5.6.3 0.0.5.6.2 0.0.5.6.1 0.0.5.6 0.0.5.5.7 0.0.5.5.6 0.0.5.5.5 0.0.5.5.4 0.0.5.5.3 0.0.5.5.2 0.0.5.5.1 0.0.5.5 0.0.5.3 0.0.5.2 0.0.5.1 0.0.5 0.0.4.9.9.6 0.0.4.9.9.5 0.0.4.9.9.4 0.0.4.9.9.3 0.0.4.9.9.2 0.0.4.9.9.1 0.0.4.9.9 0.0.4.9.8 0.0.4.9.7
No related merge requests found
......@@ -246,6 +246,7 @@ library:
- timezone-series
- transformers
- transformers-base
- tuple
- unordered-containers
- utf8-string
- uuid
......
......@@ -7,13 +7,17 @@ module Gargantext.API.Node.Corpus.Searx where
import Control.Lens (view)
import qualified Data.Aeson as Aeson
import Data.Aeson.TH (deriveJSON)
import Data.Either (Either(..))
import qualified Data.Text as T
import Data.Time.Calendar (Day, toGregorian)
import Data.Time.Format (defaultTimeLocale, parseTimeM)
import Data.Tuple.Select (sel1, sel2, sel3)
import GHC.Generics (Generic)
import Network.HTTP.Client
import Network.HTTP.Client.TLS
import qualified Prelude as Prelude
import Protolude (encodeUtf8, Text, Either)
import Protolude (encodeUtf8, Text)
import Gargantext.Prelude
import Gargantext.Prelude.Config
......@@ -21,6 +25,8 @@ import Gargantext.Core (Lang(..))
import qualified Gargantext.Core.Text.Corpus.API as API
import Gargantext.Core.Utils.Prefix (unPrefix)
import Gargantext.Database.Action.Flow.Types (FlowCmdM)
import Gargantext.Database.Admin.Config ()
import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
import Gargantext.Database.Admin.Types.Node (CorpusId)
import Gargantext.Database.Prelude (hasConfig)
import Gargantext.Database.Query.Table.Node (defaultList)
......@@ -32,13 +38,16 @@ langToSearx FR = "fr-FR"
langToSearx All = "en-US"
data SearxResult = SearxResult
{ _sr_url :: Text
, _sr_title :: Text
, _sr_content :: Maybe Text
, _sr_engine :: Text
, _sr_score :: Double
, _sr_category :: Text
, _sr_pretty_url :: Text }
{ _sr_url :: Text
, _sr_title :: Text
, _sr_content :: Maybe Text
, _sr_engine :: Text
, _sr_score :: Double
, _sr_category :: Text
, _sr_pretty_url :: Text
, _sr_publishedDate :: Text -- "Nov 19, 2021"
, _sr_pubdate :: Text -- "2021-11-19 02:12:00+0000"
}
deriving (Show, Eq, Generic)
-- , _sr_parsed_url
-- , _sr_engines
......@@ -114,4 +123,35 @@ triggerSearxSearch cId q l = do
printDebug "[triggerSearxSearch] res" res
_ <- case res of
Left _ -> pure ()
Right (SearxResponse { _srs_results }) -> do
let docs = hyperdataDocumentFromSearxResult <$> _srs_results
printDebug "[triggerSearxSearch] docs" docs
pure ()
hyperdataDocumentFromSearxResult :: SearxResult -> Either T.Text HyperdataDocument
hyperdataDocumentFromSearxResult (SearxResult { _sr_content, _sr_engine, _sr_pubdate, _sr_title }) = do
let mDate = parseTimeM False defaultTimeLocale "%Y-%m-%d %H:%M:%S" (T.unpack _sr_pubdate) :: Maybe Day
let mGregorian = toGregorian <$> mDate
Right HyperdataDocument { _hd_bdd = Just "Searx"
, _hd_doi = Nothing
, _hd_url = Nothing
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing
, _hd_title = Just _sr_title
, _hd_authors = Nothing
, _hd_institutes = Nothing
, _hd_source = Just _sr_engine
, _hd_abstract = _sr_content
, _hd_publication_date = Just _sr_pubdate
, _hd_publication_year = fromIntegral <$> sel1 <$> mGregorian
, _hd_publication_month = sel2 <$> mGregorian
, _hd_publication_day = sel3 <$> mGregorian
, _hd_publication_hour = Nothing
, _hd_publication_minute = Nothing
, _hd_publication_second = Nothing
, _hd_language_iso2 = Just $ T.pack $ show EN }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment