Merge branch 'dev' into julm/nix

parents b0a50fb5 cd0fea68
use_nix
#use_flake
export LANG=C.UTF-8
# Optimising CI speed by using tips from https://blog.nimbleways.com/let-s-make-faster-gitlab-ci-cd-pipelines/
image: adinapoli/gargantext:v3.3
image: adinapoli/gargantext:v3.4
variables:
STACK_ROOT: "${CI_PROJECT_DIR}/.stack-root"
......@@ -37,7 +37,7 @@ cabal:
- .cabal/
policy: pull-push
script:
- nix-shell --run "./bin/update-project-dependencies $CABAL_STORE_DIR && cabal --store-dir=$CABAL_STORE_DIR v2-build --flags test-crypto --ghc-options='-O0 -fclear-plugins'"
- nix-shell --run "./bin/update-project-dependencies $CABAL_STORE_DIR && cabal --store-dir=$CABAL_STORE_DIR v2-build --flags 'test-crypto no-phylo-debug-logs' --ghc-options='-O0 -fclear-plugins'"
allow_failure: false
bench:
......
Alexandre Delanoë <alexandre.delanoe@iscpif.fr> <alexandre.delanoe@iscpif.fr>
Alexandre Delanoë <alexandre.delanoe@iscpif.fr> <devel+git@delanoe.org>
Alfredo Di Napoli <alfredo@well-typed.com> <alfredo.dinapoli@gmail.com>
Alfredo Di Napoli <alfredo@well-typed.com> <alfredo@well-typed.com>
Alp Mestanogullari <alp@well-typed.com> <alp@well-typed.com>
Christian Merten <christian@merten.dev> <christian@merten.dev>
David Chavalarias <david.chavalarias@iscpif.fr> <david.chavalarias@iscpif.fr>
Fabien Manière <fabien@cnrs.iscpif.fr> <fabien.maniere@cnrs.fr>
Fabien Manière <fabien@cnrs.iscpif.fr> <fabien@cnrs.iscpif.fr>
Fabien Manière <fabien@cnrs.iscpif.fr> <fmaniere.pro@gmail.com>
Gargamelle <gargamelle@gargantext.org>
Guillaume Chérel <guillaume.cherel@iscpif.fr> <guillaume.cherel@iscpif.fr>
Justin Woo <moomoowoo@gmail.com> <moomoowoo@gmail.com>
Karen Konou <konoukaren@gmail.com> <konoukaren@gmail.com>
Maël Nicolas <mael.nicolas@imt.fr> <mael.nicolas@imt.fr>
Nicolas Pouillard <nicolas.pouillard@gmail.com> <nicolas.pouillard@gmail.com>
Nicolas Pouillard <nicolas.pouillard@gmail.com> <np.t0@nicolaspouillard.fr>
Przemysław Kaminski <pk@intrepidus.pl> <pk@intrepidus.pl>
Quentin Lobbé <quentin.lobbe@iscpif.fr> <quentin.lobbe@gmail.com>
Quentin Lobbé <quentin.lobbe@iscpif.fr> <quentin.lobbe@iscpif.fr>
Sudhir Kumar <s@atomicits.com> <s@atomicits.com>
## Version 0.0.7
- [BACK/FRONT][RELEASE] OO7 Version
## Version 0.0.6.9.9.9.9.1 [RELEASE CANDIDATE 007]
* [FRONT][FIX][In Document View: show Institute field of the document (#629)](https://gitlab.iscpif.fr/gargantext/purescript-gargantext/issues/629)
* [BACK][FIX] Cabal optim
* [BACK][FIX][duckling fork (#319)](https://gitlab.iscpif.fr/gargantext/haskell-gargantext/issues/319)
* [BACK][FIX][haskell-opaleye fork (#317)](https://gitlab.iscpif.fr/gargantext/haskell-gargantext/issues/317)
* [BACK][FIX][[Node type/API GQL] Extend a little the node GQL query to have an extra &quot;node_type&quot; (or similar) so that we can extend the Purescript Node type with the value form the backend (#336)](https://gitlab.iscpif.fr/gargantext/haskell-gargantext/issues/336)
* [BACK][DOC][Welcome: Door To enter the project (#177)](https://gitlab.iscpif.fr/gargantext/haskell-gargantext/issues/177)
## Version 0.0.6.9.9.9.9 [RELEASE CANDIDATE 007]
* [FRONT][FIX][[Node Documents] In the settings popin, remove the upload button (and also delete button) (#634)](https://gitlab.iscpif.fr/gargantext/purescript-gargantext/issues/634)
* [FRONT][FIX] NoList Serialization
* [FRONT][FIX][[Fonts &amp; CSS] Internalise external calls (initiated by alternative themes) (#626)](https://gitlab.iscpif.fr/gargantext/purescript-gargantext/issues/626)
* [BACK][FIX] Dev Prelude Refactoring
* [BACK][FIX][Test, file missing (#338)](https://gitlab.iscpif.fr/gargantext/haskell-gargantext/issues/338)
* [BACK][FIX][boolexpr has been fixed upstream (#315)](https://gitlab.iscpif.fr/gargantext/haskell-gargantext/issues/315)
* [BACK][FIX][[Node Corpus] Creating a corpus from an empty Notes node make a big document with HTML code instead of simple text (#333)](https://gitlab.iscpif.fr/gargantext/haskell-gargantext/issues/333)
* [BACK][FIX][Export Data as zip for all exports (#312)](https://gitlab.iscpif.fr/gargantext/haskell-gargantext/issues/312)
* [BACK][FIX][Sort by terms is not language-aware (#331)](https://gitlab.iscpif.fr/gargantext/haskell-gargantext/issues/331)
* [BACK][FIX][[API search] When an external service is down (HAL or other), display a message with a more explicit text (#335)](https://gitlab.iscpif.fr/gargantext/haskell-gargantext/issues/335)
## Version 0.0.6.9.9.9.8.1 [RELEASE CANDIDATE 007]
* [FRONT][FEAT][Make `esc` key close current popup window (#640)](https://gitlab.iscpif.fr/gargantext/purescript-gargantext/issues/640)
* [FRONT][FIX][Inconsistency in naming nodes (#641)](https://gitlab.iscpif.fr/gargantext/purescript-gargantext/issues/641)
* [FRONT][FIX][Remove All language](https://gitlab.iscpif.fr/gargantext/haskell-gargantext/merge_requests/261)
## Version 0.0.6.9.9.9.8 [RELEASE CANDIDATE 007]
* [BACK][DEPS][wikiparsec fork (#320)](https://gitlab.iscpif.fr/gargantext/haskell-gargantext/issues/320)
* [BACK][TEST][Output of `toPhylo` &amp; co non-deterministic? (#329)](https://gitlab.iscpif.fr/gargantext/haskell-gargantext/issues/329)
## Version 0.0.6.9.9.9.7.3 [RELEASE CANDIDATE 007]
* [FRONT][FIX][[Forest] Tree node focus: find a way to focus on the current node in the forest layout layout (#556)](https://gitlab.iscpif.fr/gargantext/purescript-gargantext/issues/556)
* [FRONT][DOC] Source code url
## Version 0.0.6.9.9.9.7.2.1 [RELEASE CANDIDATE 007]
* [BACK][WIP][Singulars and plurals not grouped anymore (#169)](https://gitlab.iscpif.fr/gargantext/haskell-gargantext/issues/169)
## Version 0.0.6.9.9.9.7.2 [RELEASE CANDIDATE 007]
* [BACK][FIX][Error at graph O2 generation (#321)](https://gitlab.iscpif.fr/gargantext/haskell-gargantext/issues/321)
## Version 0.0.6.9.9.9.7.1 [RELEASE CANDIDATE 007]
* [BACK][FIX][Singulars and plurals not grouped anymore (#169)](https://gitlab.iscpif.fr/gargantext/haskell-gargantext/issues/169)
## Version 0.0.6.9.9.9.7 [RELEASE CANDIDATE 007]
* [BACK][FIX][[API search HAL] On the HAL api, launching the exact same request several times can give different results (#327)](https://gitlab.iscpif.fr/gargantext/haskell-gargantext/issues/327)
* [FRONT][FIX][[Node phylo] Phylomemy displays terms with broken accented words (#632)](https://gitlab.iscpif.fr/gargantext/purescript-gargantext/issues/632)
## Version 0.0.6.9.9.9.6.7.1 [RELEASE CANDIDATE 007]
* [BACK][FIX] Adding .mailmap file
* [FRONT][FIX][[Tree search] Enrich search results with the path of the node (#638)](https://gitlab.iscpif.fr/gargantext/purescript-gargantext/issues/638)
## Version 0.0.6.9.9.9.6.7 [RELEASE CANDIDATE 007]
* [BACK][RELATED][Singulars and plurals not grouped anymore (#169)](https://gitlab.iscpif.fr/gargantext/haskell-gargantext/issues/169)
* [FRONT][RELATED][Machting Documents are not displayed anymore in graph (#636)](https://gitlab.iscpif.fr/gargantext/purescript-gargantext/issues/636)
## Version 0.0.6.9.9.9.6.6 [RELEASE CANDIDATE 007]
* [BACK][FIX][[Terms] Importing JSON or CSV seems to add new terms to the old ones, rather than overwriting and replacing them all (#313)](https://gitlab.iscpif.fr/gargantext/haskell-gargantext/issues/313)
* [BACK][FIX][Coherent Stemming interface (#324)](https://gitlab.iscpif.fr/gargantext/haskell-gargantext/issues/324)
* [FRONT][RELATED][[Node phylo] Phylomemy displays terms with broken accented words (#632)](https://gitlab.iscpif.fr/gargantext/purescript-gargantext/issues/632)
## Version 0.0.6.9.9.9.6.5.1 [RELEASE CANDIDATE 007]
* [FRONT][FIX][Machting Documents are not displayed anymore in graph (#636)](https://gitlab.iscpif.fr/gargantext/purescript-gargantext/issues/636)
## Version 0.0.6.9.9.9.6.5 [RELEASE CANDIDATE 007]
* [BACK][WIP][Singulars and plurals not grouped anymore (#169)](https://gitlab.iscpif.fr/gargantext/haskell-gargantext/issues/169)
* [BACK][FEAT][Coherent Stemming interface (#324)](https://gitlab.iscpif.fr/gargantext/haskell-gargantext/issues/324)
* [BACK][FIX] Order 1 aligned with first implementation with tests
## Version 0.0.6.9.9.9.6.4 [RELEASE CANDIDATE 007]
* [BACK][FEAT][[Node Phylo] Change the default value of findAncestors to False (#314)](https://gitlab.iscpif.fr/gargantext/haskell-gargantext/issues/314)
* [BACK][OPTIM][Export Data as zip for all exports (#312)](https://gitlab.iscpif.fr/gargantext/haskell-gargantext/issues/312)
* [BACK][METRICS] SQL queries to follow the community (export to CSV)
## Version 0.0.6.9.9.9.6.3 [Release Candidate for 007]
* [BACK][OPTIM] Option to enable GHC buld with O2 option
......
......@@ -25,7 +25,7 @@ phyloConfig = PhyloConfig {
, similarity = WeightedLogJaccard {_wlj_sensibility = 0.5, _wlj_minSharedNgrams = 2}
, seaElevation = Constante {_cons_start = 0.1, _cons_gap = 0.1}
, defaultMode = True
, findAncestors = True
, findAncestors = False
, phyloSynchrony = ByProximityThreshold {_bpt_threshold = 0.5, _bpt_sensibility = 0.0, _bpt_scope = AllBranches, _bpt_strategy = MergeAllGroups}
, phyloQuality = Quality {_qua_granularity = 0.8, _qua_minBranch = 3}
, timeUnit = Year {_year_period = 3, _year_step = 1, _year_matchingFrame = 5}
......
......@@ -11,15 +11,12 @@ Main specifications to index a corpus with a term list
-}
{-# LANGUAGE StandaloneDeriving #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE Strict #-}
module Main where
import Control.Concurrent.Async as CCA (mapConcurrently)
import Control.Monad.IO.Class
import Data.Aeson
import Data.Aeson ( ToJSON, encode )
import Data.List.Split (chunksOf)
import Data.Map.Strict qualified as DM
import Data.Text (pack)
......@@ -28,14 +25,12 @@ import Data.Text.Lazy qualified as DTL
import Data.Text.Lazy.Encoding qualified as TLE
import Data.Tuple.Extra (both)
import Data.Vector qualified as DV
import GHC.Generics
import Gargantext.Core.Text.Context
import Gargantext.Core.Text.Context (TermList)
import Gargantext.Core.Text.Corpus.Parsers.CSV (readCSVFile, csv_title, csv_abstract, csv_publication_year, fromMIntOrDec, defaultYear)
import Gargantext.Core.Text.List.Formats.CSV (csvMapTermList)
import Gargantext.Core.Text.Metrics.Count (coocOnContexts, Coocs)
import Gargantext.Core.Text.Terms.WithList
import Gargantext.Prelude hiding (show)
import Protolude
import Gargantext.Core.Text.Terms.WithList ( Patterns, buildPatterns, extractTermsWithList )
import Gargantext.Prelude
import System.IO (hFlush)
------------------------------------------------------------------------
......
module Main where
import Prelude
import Data.TreeDiff.Class
import Data.TreeDiff.Pretty
import qualified Data.Text as T
import qualified Data.Text.IO as TIO
import System.Environment (getArgs)
import System.Exit (exitFailure)
import Control.Monad (unless)
import qualified Data.List as L
-- | Renders in a pretty way the content of two golden files. The
-- first file should contain the expected output, the second the
-- actual data generated by the test suite.
main :: IO ()
main = do
(refPath:newPath:_) <- getArgs
ref <- T.lines <$> TIO.readFile refPath
new <- T.lines <$> TIO.readFile newPath
let differences = filter (\(r,n) -> r /= n) $ zip ref new
unless (L.null differences) $ do
putStrLn $ show $ ansiWlEditExpr $ ediff' (map fst differences) (map snd differences)
exitFailure
......@@ -15,22 +15,22 @@ Import a corpus binary.
module Main where
import Data.Either
import qualified Data.Text as Text
import Gargantext.API.Dev (withDevEnv, runCmdGargDev)
import Data.Text qualified as Text
import Gargantext.API.Admin.EnvTypes (DevEnv(..), DevJobHandle(..))
import Gargantext.API.Errors.Types
import Gargantext.API.Dev (withDevEnv, runCmdGargDev)
import Gargantext.API.Errors.Types ( BackendInternalError )
import Gargantext.API.Node () -- instances
import Gargantext.Core (Lang(..))
import Gargantext.Core.Text.Corpus.Parsers (FileFormat(..), FileType(..))
import Gargantext.Core.Types.Individu (User(..))
import Gargantext.Core.Types.Query (Limit)
import Gargantext.Database.Action.Flow (flowCorpusFile, flowAnnuaire, TermType(..))
import Gargantext.Database.Action.Flow.Types (FlowCmdM)
import Gargantext.Database.Admin.Types.Node (CorpusId)
import Gargantext.Database.Query.Tree.Root (MkCorpusUser(MkCorpusUserNormalCorpusName))
import Gargantext.Prelude
import Gargantext.Core.Text.Corpus.Parsers (FileFormat(..), FileType(..))
import Gargantext.Utils.Jobs (MonadJobStatus, JobHandle)
import Gargantext.Utils.Jobs.Monad ( MonadJobStatus, JobHandle )
main :: IO ()
main = do
......@@ -46,13 +46,14 @@ main = do
Nothing -> panicTrace $ "Cannot read limit: " <> (Text.pack limit)
Just l -> l
corpus :: forall m. (FlowCmdM DevEnv BackendInternalError m, MonadJobStatus m, JobHandle m ~ DevJobHandle) => m CorpusId
corpus = flowCorpusFile (UserName $ cs user) (Left (cs name :: Text)) limit' tt format Plain corpusPath Nothing DevJobHandle
mkCorpusUser = MkCorpusUserNormalCorpusName (UserName $ cs user) (cs name :: Text)
corpus = flowCorpusFile mkCorpusUser limit' tt format Plain corpusPath Nothing DevJobHandle
corpusCsvHal :: forall m. (FlowCmdM DevEnv BackendInternalError m, MonadJobStatus m, JobHandle m ~ DevJobHandle) => m CorpusId
corpusCsvHal = flowCorpusFile (UserName $ cs user) (Left (cs name :: Text)) limit' tt CsvHal Plain corpusPath Nothing DevJobHandle
corpusCsvHal = flowCorpusFile mkCorpusUser limit' tt CsvHal Plain corpusPath Nothing DevJobHandle
annuaire :: forall m. (FlowCmdM DevEnv BackendInternalError m, MonadJobStatus m, JobHandle m ~ DevJobHandle) => m CorpusId
annuaire = flowAnnuaire (UserName $ cs user) (Left "Annuaire") (Multi EN) corpusPath DevJobHandle
annuaire = flowAnnuaire (MkCorpusUserNormalCorpusName (UserName $ cs user) "Annuaire") (Multi EN) corpusPath DevJobHandle
{-
let debatCorpus :: forall m. FlowCmdM DevEnv BackendInternalError m => m CorpusId
......
......@@ -15,21 +15,21 @@ Import a corpus binary.
module Main where
import Data.List.NonEmpty qualified as NE
import Gargantext.API.Dev (withDevEnv, runCmdDev)
import Gargantext.API.Errors.Types
import Gargantext.API.Node () -- instances only
import Gargantext.Core.Types.Individu (User(..), arbitraryNewUsers, NewUser(..), arbitraryUsername, GargPassword(..))
import Gargantext.Database.Action.Flow (getOrMkRoot, getOrMk_RootWithCorpus)
import Gargantext.Database.Admin.Config (userMaster, corpusMasterName)
import Gargantext.Database.Action.Flow (getOrMkRoot, getOrMkRootWithCorpus)
import Gargantext.Database.Admin.Trigger.Init (initFirstTriggers, initLastTriggers)
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataCorpus)
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Prelude (Cmd, DBCmd)
import Gargantext.Database.Query.Table.Node (getOrMkList)
import Gargantext.Database.Query.Table.User (insertNewUsers, )
import Gargantext.Database.Query.Tree.Root (MkCorpusUser(MkCorpusUserMaster))
import Gargantext.Prelude
import Gargantext.Prelude.Config (GargConfig(..), readConfig)
import qualified Data.List.NonEmpty as NE
main :: IO ()
......@@ -63,8 +63,7 @@ main = do
initMaster :: Cmd BackendInternalError (UserId, RootId, CorpusId, ListId)
initMaster = do
(masterUserId, masterRootId, masterCorpusId)
<- getOrMk_RootWithCorpus (UserName userMaster)
(Left corpusMasterName)
<- getOrMkRootWithCorpus MkCorpusUserMaster
(Nothing :: Maybe HyperdataCorpus)
masterListId <- getOrMkList masterCorpusId masterUserId
_triggers <- initLastTriggers masterListId
......
......@@ -33,7 +33,7 @@ phyloConfig outdir = PhyloConfig {
, similarity = WeightedLogJaccard {_wlj_sensibility = 0.5, _wlj_minSharedNgrams = 2}
, seaElevation = Constante {_cons_start = 0.1, _cons_gap = 0.1}
, defaultMode = True
, findAncestors = True
, findAncestors = False
, phyloSynchrony = ByProximityThreshold {_bpt_threshold = 0.5, _bpt_sensibility = 0.0, _bpt_scope = AllBranches, _bpt_strategy = MergeAllGroups}
, phyloQuality = Quality {_qua_granularity = 0.8, _qua_minBranch = 3}
, timeUnit = Year {_year_period = 3, _year_step = 1, _year_matchingFrame = 5}
......
......@@ -34,6 +34,7 @@ import Gargantext.Core.Text.Corpus.Parsers (FileFormat(..), FileType(..), parseF
import Gargantext.Core.Text.Corpus.Parsers.CSV (csv_title, csv_abstract, csv_publication_year, csv_publication_month, csv_publication_day, csv'_source, csv'_title, csv'_abstract, csv'_publication_year, csv'_publication_month, csv'_publication_day, csv'_weight)
import Gargantext.Core.Text.Corpus.Parsers.CSV qualified as Csv
import Gargantext.Core.Text.List.Formats.CSV (csvMapTermList)
import Gargantext.Core.Text.Ngrams (NgramsType(..))
import Gargantext.Core.Text.Terms.WithList (Patterns, buildPatterns, extractTermsWithList)
import Gargantext.Core.Types.Main (ListType(..))
import Gargantext.Core.Viz.Phylo
......@@ -42,7 +43,6 @@ import Gargantext.Core.Viz.Phylo.PhyloExport (toPhyloExport, dotToFile)
import Gargantext.Core.Viz.Phylo.PhyloMaker (toPhylo, toPhyloWithoutLink)
import Gargantext.Core.Viz.Phylo.PhyloTools (printIOMsg, printIOComment, setConfig, toPeriods, getTimePeriod, getTimeStep)
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.Prelude hiding (hash, replace)
import Prelude qualified
import System.Directory (listDirectory,doesFileExist)
......
......@@ -18,13 +18,13 @@ import Gargantext.Core.Text.Corpus.Parsers (FileFormat(..), FileType(..), parseF
import Gargantext.Core.Text.Corpus.Parsers.CSV (csv_title, csv_abstract, csv_publication_year, csv_publication_month, csv_publication_day, csv'_source, csv'_title, csv'_abstract, csv'_publication_year, csv'_publication_month, csv'_publication_day, csv'_weight)
import Gargantext.Core.Text.Corpus.Parsers.CSV qualified as Csv
import Gargantext.Core.Text.List.Formats.CSV (csvMapTermList)
import Gargantext.Core.Text.Ngrams (NgramsType(..))
import Gargantext.Core.Text.Terms.WithList (Patterns, buildPatterns, extractTermsWithList)
import Gargantext.Core.Types.Main (ListType(..))
import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.API.Tools
import Gargantext.Core.Viz.Phylo.PhyloTools (toPeriods, getTimePeriod, getTimeStep)
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.Prelude hiding (hash, replace)
import Prelude qualified
import System.Directory (listDirectory)
......
select count(*), date_trunc('month', n.date) from nodes n where n.typename = 30 group by 2 ORDER BY 2;
\COPY (SELECT count(*), date_trunc('month', n.date) FROM nodes n WHERE n.typename = 30 GROUP BY 2 ORDER BY 2) TO '/tmp/corpora.csv' (FORMAT csv);
select count(*), date_trunc('month', n.date) from nodes n where n.typename = 9 group by 2 ORDER BY 2;
\COPY (SELECT count(*), date_trunc('month', n.date) from nodes n where n.typename = 9 group by 2 ORDER BY 2) TO '/tmp/graphs.csv' (FORMAT csv);
select count(*), date_trunc('month', n.date) from nodes n where n.typename = 90 group by 2 ORDER BY 2;
\COPY (select count(*), date_trunc('month', n.date) from nodes n where n.typename = 90 group by 2 ORDER BY 2) TO '/tmp/phylos.csv' (FORMAT csv);
select count(*), date_trunc('month', n.date) from nodes n where n.typename = 210 group by 2 ORDER BY 2;
\COPY (select count(*), date_trunc('month', n.date) from nodes n where n.typename = 210 group by 2 ORDER BY 2) TO '/tmp/teams.csv' (FORMAT csv);
......
select count(*), date_trunc('month', a.date_joined) from auth_user a group by 2;
\COPY (select count(*), date_trunc('month', a.date_joined) from auth_user a group by 2) TO '/tmp/users.csv' (FORMAT csv);
......
......@@ -18,8 +18,8 @@ fi
# with the `sha256sum` result calculated on the `cabal.project` and
# `cabal.project.freeze`. This ensures the files stay deterministic so that CI
# cache can kick in.
expected_cabal_project_hash="c9fe39301e8b60bfd183e60e7e25a14cd1c9c66d8739bf9041ca3f4db89db7c6"
expected_cabal_project_freeze_hash="db24c7d3006167102532e3101e2b49bae13d478003459c7d3f1d66590e57740a"
expected_cabal_project_hash="0d3f7f5beed88c1afe95e0df8a91080440ba59049f3610bf2343132635038d22"
expected_cabal_project_freeze_hash="9b2cac3a02e9b129bd80253fc407782bf10c7ed62ed21be41c720d30ed17ef53"
cabal --store-dir=$STORE_DIR v2-build --dry-run
cabal2stack --system-ghc --allow-newer --resolver lts-21.17 --resolver-file devops/stack/lts-21.17.yaml -o stack.yaml
......
......@@ -3,6 +3,7 @@
index-state: 2023-12-10T10:34:46Z
with-compiler: ghc-9.4.7
optimization: 2
packages:
./
......@@ -19,20 +20,16 @@ source-repository-package
subdir: accelerate-llvm-native/
accelerate-llvm/
-- Patch for "Allow NOT to backtrack"
source-repository-package
type: git
location: https://github.com/adinapoli/boolexpr.git
tag: 91928b5d7f9342e9865dde0d94862792d2b88779
location: https://github.com/boolexpr/boolexpr.git
tag: bcd7cb20a1b1bc3b58c4ba1b6ae1bccfe62f67ae
source-repository-package
type: git
location: https://github.com/adinapoli/duckling.git
tag: 23603a832117e5352d5b0fb9bb1110228324b35a
source-repository-package
type: git
location: https://github.com/garganscript/haskell-opaleye.git
tag: 6cf1bcfe215143efac17919cfd0abdd60e0f717c
location: https://gitlab.iscpif.fr/gargantext/opaleye-textsearch.git
tag: cb07b604bfb7a22aa21dd8918de5cb65c8a4bdf1
source-repository-package
type: git
......@@ -41,16 +38,6 @@ source-repository-package
subdir: llvm-hs
llvm-hs-pure
source-repository-package
type: git
location: https://github.com/adinapoli/text16-compat.git
tag: 85533b5d597e6fc5498411b4bcfc76380ec80d71
source-repository-package
type: git
location: https://github.com/adinapoli/wikiparsec.git
tag: b3519a0351ae9515497680571f76200c24dedb53
source-repository-package
type: git
location: https://github.com/alpmestan/accelerate-arithmetic.git
......@@ -106,7 +93,7 @@ source-repository-package
source-repository-package
type: git
location: https://gitlab.iscpif.fr/gargantext/crawlers/hal.git
tag: bfa9069b4ff70f341ca3244e8aff9e83eb4b8b73
tag: b99b9e568c8bdc73af2b8016ed03ba5ee83c2030
source-repository-package
type: git
......@@ -121,7 +108,7 @@ source-repository-package
source-repository-package
type: git
location: https://gitlab.iscpif.fr/gargantext/crawlers/openalex.git
tag: cd179f6dda15d77a085c0176284c921b7bc50c46
tag: ceb8f2cebd4890b6d9d151ab01ee14e925bc0499
source-repository-package
type: git
......@@ -136,7 +123,8 @@ source-repository-package
source-repository-package
type: git
location: https://gitlab.iscpif.fr/gargantext/haskell-gargantext-prelude
tag: 618f711a530df56caefbb1577c4bf3d5ff45e214
-- tag: 618f711a530df56caefbb1577c4bf3d5ff45e214
tag: d4f9bee483d41bbdf8ab4e09efb5a5a8216edff4
source-repository-package
type: git
......@@ -177,7 +165,7 @@ source-repository-package
type: git
location: https://github.com/robstewart57/rdf4h.git
tag: 4fd2edf30c141600ffad6d730cc4c1c08a6dbce4
allow-older: *
allow-newer: *
......
This diff is collapsed.
......@@ -29,7 +29,7 @@ USER 1000
RUN stack install aeson aeson-lens aeson-pretty array \
blaze-html blaze-markup bytestring \
conduit conduit-extra containers \
deepseq directory duckling \
deepseq directory \
ekg-core ekg-json exceptions \
fgl filepath formatting \
hashable hsparql http-api-data http-client http-client-tls http-conduit \
......
This diff is collapsed.
......@@ -14,7 +14,16 @@ rec {
})
else pkgs.haskell.compiler.ghc947;
cabal_install_3_10_1_0 = pkgs.haskell.lib.compose.justStaticExecutables pkgs.haskell.packages.ghc947.cabal-install;
graphviz = pkgs.graphviz.overrideAttrs (finalAttrs: previousAttrs: {
graphviz_dev = pkgs.graphviz.overrideAttrs (finalAttrs: previousAttrs: {
version = "11.0.0~dev";
src = pkgs.fetchFromGitLab {
owner = "graphviz";
repo = "graphviz";
rev = "f3ec849249ef9cb824feb7f97449d7159e1dcb4e"; # head as of 2024-03-25, see gargantext#329
hash = "sha256-s86IqWz6zeKbcRqpV3cVQBVviHbhUSX1U8GVuJBfjC4=";
};
});
graphviz = graphviz_dev.overrideAttrs (finalAttrs: previousAttrs: {
# Increase the YY_BUF_SIZE, see https://gitlab.iscpif.fr/gargantext/haskell-gargantext/issues/290#note_9015
patches = [
(pkgs.fetchpatch {
......
......@@ -7,4 +7,4 @@ LOGFILE=$FOLDER"/"$FILE
mkdir -p $FOLDER
~/.cabal/bin/gargantext-server --ini gargantext.ini --run Prod +RTS > $LOGFILE 2>&1 & tail -F $LOGFILE
nix-shell --run "~/.cabal/bin/gargantext-server --ini gargantext.ini --run Prod +RTS > $LOGFILE 2>&1 & tail -F $LOGFILE"
......@@ -85,9 +85,13 @@ startGargantext mode port file = withLoggerHoisted mode $ \logger -> do
portRouteInfo :: PortNumber -> IO ()
portRouteInfo port = do
putStrLn " ----Main Routes----- "
putStrLn $ "http://localhost:" <> toUrlPiece port <> "/index.html"
putStrLn $ "http://localhost:" <> toUrlPiece port <> "/swagger-ui"
putStrLn "=========================================================================================================="
putStrLn " GarganText Main Routes"
putStrLn "=========================================================================================================="
putStrLn $ " - Web GarganText Frontend..................: " <> "http://localhost:" <> toUrlPiece port <> "/index.html"
putStrLn $ " - Swagger UI (API documentation)...........: " <> "http://localhost:" <> toUrlPiece port <> "/swagger-ui"
putStrLn $ " - Playground GraphQL (API documentation)...: " <> "http://localhost:" <> toUrlPiece port <> "/gql"
putStrLn "=========================================================================================================="
-- | Stops the gargantext server and cancels all the periodic actions
-- scheduled to run up to that point.
......
......@@ -13,11 +13,9 @@ Portability : POSIX
module Gargantext.API.Admin.Auth.Types
where
import Control.Lens hiding (elements, to)
import Data.Aeson.TH (deriveJSON)
import qualified Data.Aeson.TH as JSON
import Data.List (tail)
import Data.Swagger
import Data.Swagger ( ToSchema(..), genericDeclareNamedSchema )
import Gargantext.Core.Types.Individu (Username, GargPassword(..), arbitraryUsername, arbitraryPassword)
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
import Gargantext.Database.Admin.Types.Node (NodeId(..), ListId, DocId, UserId (..))
......
......@@ -19,16 +19,13 @@ Count API part of Gargantext.
module Gargantext.API.Count
where
import Data.Aeson hiding (Error)
import Data.Aeson.TH (deriveJSON)
import Data.Either
import Data.Swagger
import Data.Swagger ( ToSchema(..), genericDeclareNamedSchema )
import Data.Text (pack)
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
import Gargantext.Prelude
import Servant
import Servant (JSON, Post)
import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary
import Test.QuickCheck.Arbitrary (Arbitrary(..))
-----------------------------------------------------------------------
-- TODO-ACCESS: CanCount
......
......@@ -12,20 +12,23 @@ Portability : POSIX
-- Use only for dev/repl
module Gargantext.API.Dev where
import Control.Lens (view)
import Control.Monad (fail)
import Gargantext.API.Admin.EnvTypes
import Gargantext.API.Admin.Settings
import Gargantext.API.Errors.Types
import Gargantext.API.Prelude
import Data.Pool (withResource)
import Database.PostgreSQL.Simple qualified as PGS
import Gargantext.API.Admin.EnvTypes ( DevEnv(..), Mode(Dev) )
import Gargantext.API.Admin.Settings ( devJwkFile, devSettings, newPool )
import Gargantext.API.Errors.Types ( BackendInternalError )
import Gargantext.API.Prelude ( GargM )
import Gargantext.Core.NLP (nlpServerMap)
import Gargantext.Core.NodeStory
import Gargantext.Database.Prelude (Cmd', Cmd'', databaseParameters, runCmd)
import Gargantext.Core.NodeStory (fromDBNodeStoryEnv)
import Gargantext.Database.Prelude (Cmd', Cmd'', connPool, databaseParameters, runCmd)
import Gargantext.Prelude
import Gargantext.Prelude.Config (readConfig)
import Gargantext.Prelude.Mail qualified as Mail
import Gargantext.Prelude.NLP qualified as NLP
import Gargantext.System.Logging
import Servant
import Gargantext.System.Logging ( withLoggerHoisted )
import Servant ( ServerError )
type IniPath = FilePath
-------------------------------------------------------------------
......@@ -67,7 +70,7 @@ runCmdReplServantErr = runCmdRepl
-- using HasConnectionPool and HasRepoVar.
runCmdDev :: Show err => DevEnv -> Cmd'' DevEnv err a -> IO a
runCmdDev env f =
(either (fail . show) pure =<< runCmd env f)
either (fail . show) pure =<< runCmd env f
runCmdGargDev :: DevEnv -> GargM DevEnv BackendInternalError a -> IO a
runCmdGargDev env cmd =
......@@ -81,3 +84,9 @@ runCmdDevServantErr = runCmdDev
runCmdReplEasy :: Cmd'' DevEnv BackendInternalError a -> IO a
runCmdReplEasy f = withDevEnv "gargantext.ini" $ \env -> runCmdDev env f
-- | Execute a function that takes PSQL.Connection from the DB pool as
-- first parameter.
-- e.g.: runCmdReplEasyDB $ \c -> getNodeStory' c
runCmdReplEasyDB :: (PGS.Connection -> IO a) -> IO a
runCmdReplEasyDB f = runCmdReplEasy $ view connPool >>= (\p -> liftBase $ withResource p f)
{-|
Module : Gargantext.API.Errors.Types
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE KindSignatures #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE PolyKinds #-}
{-# LANGUAGE RecordWildCards #-}
......@@ -8,7 +18,6 @@
{-# LANGUAGE StandaloneKindSignatures #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE TypeFamilyDependencies #-}
{-# OPTIONS_GHC -Wno-unused-top-binds #-}
{-# OPTIONS_GHC -Wno-orphans #-} -- instance IsFrontendErrorData and stage restriction
......@@ -36,32 +45,28 @@ module Gargantext.API.Errors.Types (
, genFrontendErr
) where
import Control.Exception
import Control.Lens (makePrisms)
import Control.Monad.Fail (fail)
import Data.Aeson (FromJSON(..), ToJSON(..), Value(..), (.:), (.=), object, withObject, toJSON)
import Data.Aeson (Value(..), (.:), (.=), object, withObject)
import Data.Aeson.Types (typeMismatch, emptyArray)
import Data.List.NonEmpty qualified as NE
import Data.Singletons.TH
import Data.Singletons.TH ( SingI(sing), SingKind(fromSing) )
import Data.Text qualified as T
import Data.Typeable
import Data.Validity (Validation(..), ValidationChain (..), prettyValidation)
import GHC.Generics
import GHC.Stack
import Gargantext.API.Admin.Auth.Types (AuthenticationError)
import Gargantext.API.Errors.Class
import Gargantext.API.Errors.TH
import Gargantext.API.Errors.Class (HasAuthenticationError(..))
import Gargantext.API.Errors.TH ( deriveIsFrontendErrorData )
import Gargantext.API.Errors.Types.Backend
import Gargantext.Core.Types (HasValidationError(..))
import Gargantext.Core.Types.Individu
import Gargantext.Core.Types.Individu (Username)
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Query.Table.Node.Error
import Gargantext.Database.Query.Tree.Error
import Gargantext.Database.Query.Table.Node.Error (HasNodeError(..), NodeError)
import Gargantext.Database.Query.Tree.Error (HasTreeError(..), TreeError)
import Gargantext.Prelude hiding (Location, WithStacktrace)
import Gargantext.Utils.Dict
import Gargantext.Utils.Dict (Dict(..))
import Gargantext.Utils.Jobs.Monad qualified as Jobs
import Servant (ServerError)
import Servant.Job.Core
import Servant.Job.Core ( HasServerError(..) )
import Servant.Job.Types qualified as SJ
import Test.QuickCheck
import Test.QuickCheck.Instances.Text ()
......
......@@ -25,10 +25,10 @@ import Data.Morpheus.Types
import Data.Text (pack)
import Data.Time.Format.ISO8601 (iso8601Show)
import Gargantext.API.Admin.Types (HasSettings)
import Gargantext.API.Errors.Types
import Gargantext.API.Errors.Types ( BackendInternalError )
import Gargantext.API.Prelude (GargM)
import Gargantext.Core.Types.Search (HyperdataRow(..), toHyperdataRow)
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument)
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument )
import Gargantext.Database.Admin.Types.Node (ContextTitle, NodeId(..), NodeTypeId, UserId, unNodeId, ContextId (..))
import Gargantext.Database.Prelude (CmdCommon)
import Gargantext.Database.Query.Table.NodeContext (getNodeContext, getContextsForNgramsTerms, ContextForNgramsTerms(..), {- getContextNgrams, -} getContextNgramsMatchingFTS)
......@@ -71,8 +71,6 @@ data HyperdataRowDocumentGQL =
, hrd_source :: Text
, hrd_title :: Text
, hrd_url :: Text
, hrd_uniqId :: Text
, hrd_uniqIdBdd :: Text
} deriving (Generic, GQLType, Show)
data NodeContextGQL = NodeContextGQL
......@@ -216,8 +214,6 @@ toHyperdataRowDocumentGQL hyperdata =
, hrd_source = _hr_source
, hrd_title = _hr_title
, hrd_url = _hr_url
, hrd_uniqId = _hr_uniqId
, hrd_uniqIdBdd = _hr_uniqIdBdd
}
HyperdataRowContact { } -> Nothing
......
......@@ -22,6 +22,7 @@ import Gargantext.API.Admin.Auth.Types
import Gargantext.API.Auth.PolicyCheck
import Gargantext.API.GraphQL.PolicyCheck (withPolicy)
import Gargantext.API.GraphQL.Types
import Gargantext.Core
import Gargantext.Database.Admin.Types.Node (NodeType)
import Gargantext.Database.Admin.Types.Node qualified as NN
import Gargantext.Database.Prelude (CmdCommon) -- , JSONB)
......@@ -43,6 +44,7 @@ data Node = Node
, name :: Text
, parent_id :: Maybe Int
, type_id :: Int
, node_type :: Maybe NodeType
} deriving (Show, Generic, GQLType)
data CorpusArgs
......@@ -113,10 +115,14 @@ dbParentNodes node_id parent_type = do
pure [toNode node]
toNode :: NN.Node json -> Node
toNode N.Node { .. } = Node { id = NN.unNodeId _node_id
toNode N.Node { .. } = Node { id = nid
, name = _node_name
, parent_id = NN.unNodeId <$> _node_parent_id
, type_id = _node_typename }
, type_id = _node_typename
, node_type = lookupDBid _node_typename
}
where
nid = NN.unNodeId _node_id
toCorpus :: NN.Node Value -> Corpus
toCorpus N.Node { .. } = Corpus { id = NN.unNodeId _node_id
......
......@@ -12,8 +12,7 @@ Portability : POSIX
module Gargantext.API.GraphQL.Utils where
import Control.Lens ((^.))
import Control.Lens.Getter (view)
import Control.Lens (view)
import Data.Morpheus.Types (GQLTypeOptions, fieldLabelModifier)
import Data.Text qualified as T
import Gargantext.API.Admin.Auth.Types (AuthenticatedUser (..), auth_node_id)
......
......@@ -9,12 +9,24 @@ Portability : POSIX
-}
module Gargantext.API.Job where
module Gargantext.API.Job (
jobLogStart
, jobLogProgress
, jobLogComplete
, jobLogAddMore
, jobLogFailures
, jobLogFailTotal
, jobLogEvt
, jobLogFailTotalWithMessage
, RemainingSteps(..)
, addErrorEvent
) where
import Control.Lens (over, _Just)
import Data.Text qualified as T
import Gargantext.API.Admin.Orchestrator.Types
import Gargantext.Prelude
import Gargantext.Utils.Jobs.Error
newtype RemainingSteps = RemainingSteps { _RemainingSteps :: Int }
deriving (Show, Eq, Num)
......@@ -34,8 +46,8 @@ addEvent level message (JobLog { _scst_events = mEvts, .. }) = JobLog { _scst_ev
, _scev_level = Just level
, _scev_date = Nothing }
addErrorEvent :: T.Text -> JobLog -> JobLog
addErrorEvent message = addEvent "ERROR" message
addErrorEvent :: ToHumanFriendlyError e => e -> JobLog -> JobLog
addErrorEvent message = addEvent "ERROR" (mkHumanFriendly message)
jobLogProgress :: Int -> JobLog -> JobLog
jobLogProgress n jl = over (scst_succeeded . _Just) (+ n) $
......@@ -70,7 +82,7 @@ jobLogFailTotal (JobLog { _scst_succeeded = mSucc
Nothing -> (Nothing, mFail)
Just rem' -> (Just 0, (+ rem') <$> mFail)
jobLogFailTotalWithMessage :: T.Text -> JobLog -> JobLog
jobLogFailTotalWithMessage :: ToHumanFriendlyError e => e -> JobLog -> JobLog
jobLogFailTotalWithMessage message jl = addErrorEvent message $ jobLogFailTotal jl
jobLogEvt :: JobLog -> ScraperEvent -> JobLog
......
This diff is collapsed.
......@@ -10,7 +10,6 @@ Portability : POSIX
-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE ViewPatterns #-}
......@@ -29,21 +28,21 @@ import Data.Text (concat, pack, splitOn)
import Data.Vector (Vector)
import Data.Vector qualified as Vec
import Gargantext.API.Admin.EnvTypes (Env, GargJob(..))
import Gargantext.API.Admin.Orchestrator.Types
import Gargantext.API.Errors.Types
import Gargantext.API.Admin.Orchestrator.Types ( AsyncJobs, JobLog )
import Gargantext.API.Errors.Types (BackendInternalError)
import Gargantext.API.Ngrams (setListNgrams)
import Gargantext.API.Ngrams.List.Types
import Gargantext.API.Ngrams.Prelude (getNgramsList)
import Gargantext.API.Ngrams.Types
import Gargantext.API.Prelude (GargServer, GargM, serverError, HasServerError)
import Gargantext.API.Types
import Gargantext.Core.NodeStory
import Gargantext.API.Types (HTML)
import Gargantext.Core.NodeStory.Types ( HasNodeStory )
import Gargantext.Core.Text.Ngrams (Ngrams, NgramsType(NgramsTerms))
import Gargantext.Core.Types.Main (ListType(..))
import Gargantext.Database.Action.Flow (reIndexWith)
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Query.Table.Ngrams qualified as TableNgrams
import Gargantext.Database.Admin.Types.Node ( NodeId(_NodeId), ListId )
import Gargantext.Database.Query.Table.Node (getNode)
import Gargantext.Database.Schema.Ngrams
import Gargantext.Database.Schema.Ngrams ( text2ngrams, NgramsId )
import Gargantext.Database.Schema.Node (_node_parent_id)
import Gargantext.Database.Types (Indexed(..))
import Gargantext.Prelude hiding (concat, toList)
......@@ -55,16 +54,18 @@ import Servant
------------------------------------------------------------------------
type GETAPI = Summary "Get List"
:> "lists"
:> Capture "listId" ListId
:> "json"
:> Get '[JSON, HTML] (Headers '[Header "Content-Disposition" Text] NgramsList)
:<|> "lists"
:> "lists"
:> Capture "listId" ListId
:> "csv"
:> Get '[GUS.CSV] (Headers '[Header "Content-Disposition" Text] NgramsTableMap)
:> ( "json"
:> Get '[JSON, HTML] (Headers '[Header "Content-Disposition" Text] NgramsList)
:<|> "json.zip"
:> Get '[GUS.ZIP] (Headers '[Header "Content-Disposition" Text] NgramsListZIP)
:<|> "csv"
:> Get '[GUS.CSV] (Headers '[Header "Content-Disposition" Text] NgramsTableMap) )
getApi :: GargServer GETAPI
getApi = getJson :<|> getCsv
getApi listId = getJson listId
:<|> getJsonZip listId
:<|> getCsv listId
--
-- JSON API
......@@ -94,12 +95,24 @@ getJson lId = do
]
) lst
getJsonZip :: HasNodeStory env err m
=> ListId
-> m (Headers '[Header "Content-Disposition" Text] NgramsListZIP)
getJsonZip lId = do
lst <- getNgramsList lId
let nlz = NgramsListZIP { _nlz_nl = lst, _nlz_list_id = lId}
pure $ addHeader (concat [ "attachment; filename="
, nlzFileName nlz
, ".zip"
]
) nlz
getCsv :: HasNodeStory env err m
=> ListId
-> m (Headers '[Header "Content-Disposition" Text] NgramsTableMap)
getCsv lId = do
lst <- getNgramsList lId
pure $ case Map.lookup TableNgrams.NgramsTerms lst of
pure $ case Map.lookup NgramsTerms lst of
Nothing -> noHeader Map.empty
Just (Versioned { _v_data }) ->
addHeader (concat [ "attachment; filename=GarganText_NgramsList-"
......@@ -138,7 +151,7 @@ postAsyncJSON l ngramsList jobHandle = do
setList :: HasNodeStory env err m => m ()
setList = do
-- TODO check with Version for optim
mapM_ (\(nt, Versioned _v ns) -> (setListNgrams l nt ns)) $ toList ngramsList
mapM_ (\(nt, Versioned _v ns) -> setListNgrams l nt ns) $ toList ngramsList
-- TODO reindex
......
......@@ -14,18 +14,18 @@ Portability : POSIX
module Gargantext.API.Ngrams.NgramsTree
where
import Data.Aeson.TH (deriveJSON)
import Data.HashMap.Strict (HashMap)
import Data.HashMap.Strict qualified as HashMap
import Data.List qualified as List
import Data.Set qualified as Set
import Data.Swagger
import Data.Tree
import Data.Swagger ( ToSchema(..), genericDeclareNamedSchema )
import Data.Tree ( Tree(Node), unfoldForest )
import Gargantext.API.Ngrams.Types
import Gargantext.Core.Types (ListType(..), NodeId)
import Gargantext.Core.Types.Main ( ListType(..) )
import Gargantext.Database.Admin.Types.Node ( NodeId )
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
import Gargantext.Prelude
import Test.QuickCheck
import Test.QuickCheck ( Arbitrary(arbitrary) )
type Children = Text
type Root = Text
......
......@@ -20,15 +20,14 @@ import Data.List qualified as List
import Data.Map.Strict (fromList)
import Data.Map.Strict qualified as Map
import Data.Text qualified as Text
import Data.Validity
import Gargantext.API.Ngrams (getNgramsTableMap)
import Gargantext.API.Ngrams.Types
import Gargantext.Core.NodeStory (HasNodeStory)
import Gargantext.Core.NodeStory.Types ( HasNodeStory )
import Gargantext.Core.Text.Context (TermList)
import Gargantext.Core.Text.List.Social.Prelude
import Gargantext.Core.Types (ListType)
import Gargantext.Core.Text.List.Social.Prelude ( unPatchMapToHashMap )
import Gargantext.Core.Text.Ngrams (NgramsType, ngramsTypes)
import Gargantext.Core.Types.Main ( ListType )
import Gargantext.Database.Admin.Types.Node (ListId)
import Gargantext.Database.Schema.Ngrams (NgramsType, ngramsTypes)
import Gargantext.Prelude
......@@ -62,7 +61,7 @@ toTermList lt nt nl = toTermList' lt <$> Map.lookup nt nl
where
toTerm = Text.splitOn " " . unNgramsTerm
(roots, children) = List.partition (\(_t, nre) -> view nre_root nre == Nothing)
(roots, children) = List.partition (\(_t, nre) -> isNothing (view nre_root nre))
$ List.filter (\(_t,nre) -> view nre_list nre == lt'') ns
roots' = map (\(t,nre) -> (t, map toTerm $ unMSet $ view nre_children nre )) roots
......
......@@ -11,23 +11,23 @@ Portability : POSIX
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE TypeOperators #-}
{-# OPTIONS_GHC -Wno-unrecognised-pragmas #-}
module Gargantext.API.Ngrams.Tools
where
-- import Gargantext.Core.NodeStoryFile qualified as NSF
import Control.Lens (_Just, (^.), at, view, At, Index, IxValue)
import Control.Monad.Reader
import Control.Lens (_Just, at, ix, view, At, Index, IxValue)
import Data.HashMap.Strict (HashMap)
import Data.HashMap.Strict qualified as HM
import Data.Map.Strict qualified as Map
import Data.Set qualified as Set
import Data.Validity
-- import GHC.Conc (TVar, readTVar)
import Gargantext.API.Ngrams.Types
import Gargantext.Core.NodeStory
import Gargantext.Core.Types (ListType(..), NodeId, ListId)
import Gargantext.Database.Schema.Ngrams (NgramsType)
import Gargantext.API.Ngrams.Types ( NgramsTerm(unNgramsTerm), NgramsRepoElement(_nre_root, _nre_list) )
import Gargantext.Core.NodeStory.Types
import Gargantext.Core.Text.Ngrams (NgramsType)
import Gargantext.Core.Types.Main ( ListType(..) )
import Gargantext.Database.Admin.Types.Node ( NodeId, ListId )
import Gargantext.Prelude
......@@ -93,7 +93,7 @@ listNgramsFromRepo nodeIds ngramsType repo =
^. unNodeStory
. at nodeId . _Just
. a_state
. at ngramsType . _Just
. ix ngramsType
| nodeId <- nodeIds
]
......@@ -153,7 +153,7 @@ filterListWithRoot :: [ListType]
filterListWithRoot lt m = snd <$> HM.filter isMapTerm m
where
isMapTerm (l, maybeRoot) = case maybeRoot of
Nothing -> elem l lt
Nothing -> l `elem` lt
Just r -> case HM.lookup r m of
Nothing -> panicTrace $ "[Garg.API.Ngrams.Tools] filterListWithRoot, unknown key: " <> unNgramsTerm r
Just (l',_) -> elem l' lt
......@@ -175,7 +175,7 @@ groupNodesByNgrams syn occs = HM.fromListWith (<>) occs'
Nothing -> (t, ns)
Just r' -> (r',ns)
data Diagonal = Diagonal Bool
newtype Diagonal = Diagonal Bool
getCoocByNgrams :: Diagonal
-> HashMap NgramsTerm (Set NodeId)
......
This diff is collapsed.
......@@ -28,27 +28,25 @@ Node API
module Gargantext.API.Node
where
import Control.Lens ((^.))
import Data.Aeson (FromJSON, ToJSON)
import Data.Aeson.TH (deriveJSON)
import Data.Swagger
import Data.Swagger (ToSchema)
import Gargantext.API.Admin.Auth (withAccess, withPolicy)
import Gargantext.API.Admin.Auth.Types (PathId(..), AuthenticatedUser (..), auth_node_id)
import Gargantext.API.Admin.EnvTypes
import Gargantext.API.Auth.PolicyCheck
import Gargantext.API.Errors.Types
import Gargantext.API.Admin.EnvTypes (Env)
import Gargantext.API.Auth.PolicyCheck ( PolicyChecked, nodeChecks )
import Gargantext.API.Errors.Types (BackendInternalError)
import Gargantext.API.Metrics
import Gargantext.API.Ngrams (TableNgramsApi, apiNgramsTableCorpus)
import Gargantext.API.Ngrams.Types (TabType(..))
import Gargantext.API.Node.DocumentUpload qualified as DocumentUpload
import Gargantext.API.Node.File
import Gargantext.API.Node.DocumentsFromWriteNodes qualified as DFWN
import Gargantext.API.Node.File ( FileAsyncApi, FileApi, fileApi, fileAsyncApi )
import Gargantext.API.Node.FrameCalcUpload qualified as FrameCalcUpload
import Gargantext.API.Node.New
import Gargantext.API.Node.New ( PostNodeAsync, PostNode, postNode, postNodeAsyncAPI )
import Gargantext.API.Node.Share qualified as Share
import Gargantext.API.Node.Update qualified as Update
import Gargantext.API.Prelude
import Gargantext.API.Prelude ( GargM, GargServer )
import Gargantext.API.Search qualified as Search
import Gargantext.API.Table
import Gargantext.API.Table ( TableApi, tableApi, getPair )
import Gargantext.Core.Types (NodeTableResult)
import Gargantext.Core.Types.Individu (User(..))
import Gargantext.Core.Types.Main (Tree, NodeTree)
......@@ -57,10 +55,11 @@ import Gargantext.Core.Utils.Prefix (unPrefix)
import Gargantext.Core.Viz.Phylo.API (PhyloAPI, phyloAPI)
import Gargantext.Database.Action.Delete qualified as Action (deleteNode)
import Gargantext.Database.Action.Flow.Pairing (pairing)
import Gargantext.Database.Admin.Types.Hyperdata
import Gargantext.Database.Admin.Types.Hyperdata.Prelude ( HyperdataC )
import Gargantext.Database.Admin.Types.Hyperdata.User ( HyperdataUser )
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Prelude (Cmd, JSONB)
import Gargantext.Database.Query.Facet (FacetDoc, OrderBy(..))
import Gargantext.Database.Query.Facet.Types ( FacetDoc, OrderBy(..) )
import Gargantext.Database.Query.Table.Node
import Gargantext.Database.Query.Table.Node.Children (getChildren)
import Gargantext.Database.Query.Table.Node.Error (HasNodeError(..))
......@@ -74,7 +73,6 @@ import Gargantext.Prelude
import Servant
import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
import qualified Gargantext.API.Node.DocumentsFromWriteNodes as DFWN
-- | Admin NodesAPI
......@@ -134,7 +132,7 @@ type NodeAPI a = PolicyChecked (NodeNodeAPI a)
:<|> "category" :> CatApi
:<|> "score" :> ScoreApi
:<|> "search" :> (Search.API Search.SearchResult)
:<|> "search" :> Search.API Search.SearchResult
:<|> "share" :> Share.API
-- Pairing utilities
......
......@@ -12,9 +12,7 @@ Portability : POSIX
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE KindSignatures #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE TypeOperators #-}
......@@ -22,35 +20,31 @@ Portability : POSIX
module Gargantext.API.Node.Contact
where
import Conduit
import Conduit ( yield )
import Data.Aeson
import Data.Either (Either(Right))
import Data.Maybe (Maybe(..))
import Data.Swagger
import Data.Text (Text)
import GHC.Generics (Generic)
import Servant
import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary
import Gargantext.API.Admin.Auth.Types
import Data.Swagger ( ToSchema )
import Gargantext.API.Admin.Auth.Types ( AuthenticatedUser(AuthenticatedUser) )
import Gargantext.API.Admin.EnvTypes (Env, GargJob(..))
import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
import Gargantext.API.Admin.Types (HasSettings)
import Gargantext.API.Errors.Types
import Gargantext.API.Node
import Gargantext.API.Errors.Types ( BackendInternalError )
import Gargantext.API.Node ( nodeNodeAPI, NodeNodeAPI )
import Gargantext.API.Prelude (GargM, simuLogs)
import Gargantext.Core (Lang(..))
import Gargantext.Core.Text.Terms (TermType(..))
import Gargantext.Core.Types.Individu (User(..))
import Gargantext.Database.Action.Flow (flow)
import Gargantext.Database.Action.Flow.Types (FlowCmdM)
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataAnnuaire(..), HyperdataContact)
import Gargantext.Database.Admin.Types.Hyperdata.Contact (hyperdataContact)
import Gargantext.Database.Admin.Types.Node
import Gargantext.Prelude (($), {-printDebug,-})
import Gargantext.Database.Admin.Types.Hyperdata.Contact ( HyperdataContact, hyperdataContact )
import Gargantext.Database.Admin.Types.Hyperdata.Corpus ( HyperdataAnnuaire(..) )
import Gargantext.Database.Admin.Types.Node ( CorpusId, NodeId )
import Gargantext.Prelude (($), Generic, Maybe(..), Text)
import Gargantext.Utils.Aeson qualified as GUA
import Gargantext.Utils.Jobs (serveJobsAPI, MonadJobStatus(..))
import qualified Gargantext.Utils.Aeson as GUA
import Servant
import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary ( Arbitrary(arbitrary) )
import Gargantext.Database.Query.Tree.Root (MkCorpusUser(MkCorpusUserNormalCorpusIds))
------------------------------------------------------------------------
type API = "contact" :> Summary "Contact endpoint"
......@@ -88,7 +82,7 @@ addContact :: (HasSettings env, FlowCmdM env err m, MonadJobStatus m)
addContact u nId (AddContactParams fn ln) jobHandle = do
markStarted 2 jobHandle
_ <- flow (Nothing :: Maybe HyperdataAnnuaire) u (Right [nId]) (Multi EN) Nothing (1, yield $ hyperdataContact fn ln) jobHandle
_ <- flow (Nothing :: Maybe HyperdataAnnuaire) (MkCorpusUserNormalCorpusIds u [nId]) (Multi EN) Nothing (1, yield $ hyperdataContact fn ln) jobHandle
markComplete jobHandle
addContact _uId _nId _p jobHandle = do
......
......@@ -22,22 +22,23 @@ import Data.Map.Strict qualified as Map
import Data.Set qualified as Set
import Data.Text (pack)
import Gargantext.API.Ngrams.Tools (filterListWithRoot, mapTermListRoot, getRepo)
import Gargantext.API.Ngrams.Types
import Gargantext.API.Node.Corpus.Export.Types
import Gargantext.API.Ngrams.Types ( NgramsTerm(unNgramsTerm) )
import Gargantext.API.Node.Corpus.Export.Types ( Corpus(..) )
import Gargantext.API.Node.Document.Export.Types qualified as DocumentExport
import Gargantext.API.Prelude (GargNoServer)
import Gargantext.Core.NodeStory
import Gargantext.Core.Types
import Gargantext.Core.NodeStory.Types ( NodeListStory )
import Gargantext.Core.Text.Ngrams (NgramsType(..))
import Gargantext.Core.Types.Main ( ListType(MapTerm) )
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Action.Metrics.NgramsByContext (getNgramsByContextOnlyUser)
import Gargantext.Database.Admin.Config (userMaster)
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..) )
import Gargantext.Database.Prelude (Cmd)
import Gargantext.Database.Query.Table.Node
import Gargantext.Database.Query.Table.Node ( defaultList )
import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
import Gargantext.Database.Query.Table.Node.Select (selectNodesWithUsername)
import Gargantext.Database.Query.Table.NodeContext (selectDocNodes)
import Gargantext.Database.Schema.Context (_context_id, _context_hyperdata)
import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.Database.Schema.Context (_context_id)
import Gargantext.Prelude hiding (hash)
import Gargantext.Prelude.Crypto.Hash (hash)
import Servant (Headers, Header, addHeader)
......@@ -51,9 +52,7 @@ getCorpus :: CorpusId
getCorpus cId lId nt' = do
let
nt = case nt' of
Nothing -> NgramsTerms
Just t -> t
nt = fromMaybe NgramsTerms nt'
listId <- case lId of
Nothing -> defaultList cId
......@@ -75,10 +74,10 @@ getCorpus cId lId nt' = do
) ns (Map.map (Set.map unNgramsTerm) ngs)
where
d_hash :: Context HyperdataDocument -> Set Text -> Text
d_hash a b = hash [ fromMaybe "" (_hd_uniqId $ _context_hyperdata a)
, hash b
d_hash _a b = hash [ -- fromMaybe "" (_hd_uniqId $ _context_hyperdata a),
hash b
]
pure $ addHeader ("attachment; filename=GarganText_corpus-" <> (pack $ show cId) <> ".json")
pure $ addHeader ("attachment; filename=GarganText_corpus-" <> pack (show cId) <> ".json")
$ Corpus { _c_corpus = Map.elems r
, _c_hash = hash $ List.map DocumentExport._d_hash $ Map.elems r }
......
......@@ -14,13 +14,13 @@ Portability : POSIX
module Gargantext.API.Node.Corpus.Export.Types where
import Data.Aeson.TH (deriveJSON)
import Data.Swagger
import Data.Swagger ( ToSchema(..), genericDeclareNamedSchema, ToParamSchema(..) )
import Data.Text (Text)
import GHC.Generics (Generic)
import qualified Gargantext.API.Node.Document.Export.Types as DocumentExport
import Gargantext.Core.Types
import Gargantext.API.Node.Document.Export.Types qualified as DocumentExport
import Gargantext.Core.Text.Ngrams (NgramsType(..))
import Gargantext.Core.Types ( ListId, TODO )
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Servant
......
......@@ -21,27 +21,27 @@ module Gargantext.API.Node.Corpus.New
import Conduit
import Control.Lens hiding (elements, Empty)
import Data.Aeson
import Data.Aeson.TH (deriveJSON)
import Control.Lens ( view, non )
import Data.Aeson ( genericParseJSON, genericToJSON )
import Data.ByteString.Base64 qualified as BSB64
import Data.Conduit.Internal (zipSources)
import Data.Swagger
import Data.Swagger ( ToSchema(..), genericDeclareNamedSchema )
import Data.Text qualified as T
import Data.Text.Encoding qualified as TE
import EPO.API.Client.Types qualified as EPO
import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
import Gargantext.API.Admin.Types (HasSettings)
import Gargantext.API.Ngrams (commitStatePatch, Versioned(..))
import Gargantext.API.Node.Corpus.New.Types
import Gargantext.API.Node.Corpus.Searx
import Gargantext.API.Node.Corpus.Types
import Gargantext.API.Node.Corpus.New.Types ( FileFormat(..), FileType(..) )
import Gargantext.API.Node.Corpus.Searx ( triggerSearxSearch )
import Gargantext.API.Node.Corpus.Types ( Database, Datafield(Web), database2origin )
import Gargantext.API.Node.Corpus.Update (addLanguageToCorpus)
import Gargantext.API.Node.Types
import Gargantext.Core (Lang(..), withDefaultLanguage, defaultLanguage)
import Gargantext.Core.NodeStory (HasNodeStoryImmediateSaver, HasNodeArchiveStoryImmediateSaver, currentVersion, NgramsStatePatch')
import Gargantext.Core.Text.Corpus.API qualified as API
import Gargantext.Core.Text.Corpus.Parsers qualified as Parser (FileType(..), parseFormatC)
import Gargantext.API.Admin.Orchestrator.Types qualified as API
import Gargantext.Core.Text.Corpus.Query qualified as API
import Gargantext.Core.Text.Corpus.Parsers qualified as Parser (FileType(..), parseFormatC, _ParseFormatError)
import Gargantext.Core.Text.List.Social (FlowSocialListWith(..))
import Gargantext.Core.Types.Individu (User(..))
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
......@@ -50,20 +50,22 @@ import Gargantext.Database.Action.Flow.Types (FlowCmdM)
import Gargantext.Database.Action.Mail (sendMail)
import Gargantext.Database.Action.Node (mkNodeWithParent)
import Gargantext.Database.Action.User (getUserId)
import Gargantext.Database.Admin.Types.Hyperdata
import Gargantext.Database.Admin.Types.Hyperdata.Document ( ToHyperdataDocument(..) )
import Gargantext.Database.Admin.Types.Hyperdata.File ( HyperdataFile(..) )
import Gargantext.Database.Admin.Types.Node (CorpusId, NodeType(..), ParentId)
import Gargantext.Database.GargDB qualified as GargDB
import Gargantext.Database.Prelude (hasConfig)
import Gargantext.Database.Query.Table.Node (getNodeWith, getOrMkList)
import Gargantext.Database.Query.Table.Node.UpdateOpaleye (updateHyperdata)
import Gargantext.Database.Query.Tree.Root (MkCorpusUser(MkCorpusUserNormalCorpusIds))
import Gargantext.Database.Schema.Node (node_hyperdata)
import Gargantext.Prelude
import Gargantext.Prelude.Config (gc_max_docs_parsers)
import Gargantext.System.Logging
import Gargantext.Utils.Jobs (JobHandle, MonadJobStatus(..))
import Servant
import Gargantext.System.Logging ( logLocM, LogLevel(..) )
import Gargantext.Utils.Jobs.Monad (JobHandle, MonadJobStatus(..))
import Servant ( JSON, type (:>), FormUrlEncoded, Capture, Summary )
import Servant.Job.Utils (jsonOptions)
import Test.QuickCheck.Arbitrary
import Test.QuickCheck.Arbitrary (Arbitrary(..))
------------------------------------------------------------------------
{-
......@@ -253,7 +255,7 @@ addToCorpusWithQuery user cid (WithQuery { _wq_query = q
$(logLocM) DEBUG $ T.pack $ "corpus id " <> show corpusId
_ <- commitCorpus cid user
-- printDebug "sending email" ("xxxxxxxxxxxxxxxxxxxxx" :: Text)
sendMail user
-- TODO ...
......@@ -261,8 +263,8 @@ addToCorpusWithQuery user cid (WithQuery { _wq_query = q
Left err -> do
-- printDebug "Error: " err
$(logLocM) ERROR (T.pack $ show err)
markFailed (Just $ T.pack (show err)) jobHandle
$(logLocM) ERROR (T.pack $ show err) -- log the full error
markFailed (Just err) jobHandle
type AddWithForm = Summary "Add with FormUrlEncoded to corpus endpoint"
:> "corpus"
......@@ -335,8 +337,7 @@ addToCorpusWithForm user cid nwf jobHandle = do
-- TODO granularity of the logStatus
-- printDebug "flowCorpus with (corpus_id, lang)" (cid, l)
_cid' <- flowCorpus user
(Right [cid])
_cid' <- flowCorpus (MkCorpusUserNormalCorpusIds user [cid])
(Multi l)
(Just (nwf ^. wf_selection))
--(Just $ fromIntegral $ length docs, docsC')
......@@ -352,9 +353,9 @@ addToCorpusWithForm user cid nwf jobHandle = do
--sendMail user
markComplete jobHandle
Left e -> do
printDebug "[addToCorpusWithForm] parse error" e
markFailed (Just e) jobHandle
Left parseErr -> do
$(logLocM) ERROR $ "parse error: " <> (Parser._ParseFormatError parseErr)
markFailed (Just parseErr) jobHandle
{-
addToCorpusWithFile :: FlowCmdM env err m
......
......@@ -12,24 +12,22 @@ Portability : POSIX
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-}
module Gargantext.API.Node.Corpus.New.File
where
import Control.Lens ((.~), (?~))
import Data.Maybe
import Control.Lens ((?~))
import Data.Swagger
import Gargantext.API.Node.Corpus.New.Types
import Gargantext.API.Node.Corpus.New.Types (FileFormat, FileType)
import Gargantext.Core.Types (TODO)
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Admin.Types.Node (NodeId)
import Gargantext.Database.Prelude (Cmd)
import Gargantext.Prelude hiding (hash)
import Gargantext.Prelude.Crypto.Hash (hash)
import Servant
import Servant.Multipart
import Servant.Swagger.Internal
import Servant ( JSON, type (:>), Post, QueryParam, Summary )
import Servant.Multipart ( Input(iName), Mem, MultipartData(inputs), MultipartForm )
import Servant.Swagger.Internal ( addParam, HasSwagger(..) )
-------------------------------------------------------------
type Hash = Text
......
......@@ -15,7 +15,6 @@ module Gargantext.API.Node.Corpus.Searx where
import Control.Lens (view)
import Data.Aeson qualified as Aeson
import Data.Aeson.TH (deriveJSON)
import Data.HashMap.Strict qualified as HashMap
import Data.Text qualified as T
import Data.Text qualified as Text
......@@ -24,8 +23,8 @@ import Data.Time.Format (defaultTimeLocale, formatTime, parseTimeM)
import Data.Tuple.Select (sel1, sel2, sel3)
import Gargantext.Core (Lang(..))
import Gargantext.Core.NLP (HasNLPServer, nlpServerGet)
import Gargantext.Core.NodeStory (HasNodeStory)
import Gargantext.Core.Text.Corpus.API qualified as API
import Gargantext.Core.NodeStory.Types ( HasNodeStory )
import Gargantext.Core.Text.Corpus.Query qualified as Query
import Gargantext.Core.Text.List (buildNgramsLists)
import Gargantext.Core.Text.List.Group.WithStem ({-StopSize(..),-} GroupParams(..))
import Gargantext.Core.Text.Terms (TermType(..))
......@@ -35,26 +34,23 @@ import Gargantext.Core.Utils.Prefix (unPrefix)
import Gargantext.Database.Action.Flow (addDocumentsToHyperCorpus) --, DataText(..))
import Gargantext.Database.Action.Flow.List (flowList_DbRepo)
import Gargantext.Database.Action.User (getUserId)
import Gargantext.Database.Admin.Config (userMaster)
import Gargantext.Database.Admin.Types.Hyperdata.Corpus (HyperdataCorpus)
import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
import Gargantext.Database.Admin.Types.Node (CorpusId, ListId, NodeType(NodeTexts))
import Gargantext.Database.Prelude (hasConfig)
import Gargantext.Database.Query.Table.Node (getOrMkList)
import Gargantext.Database.Query.Table.Node (insertDefaultNodeIfNotExists)
import Gargantext.Database.Query.Table.Node (getOrMkList, insertDefaultNodeIfNotExists)
import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
import Gargantext.Database.Query.Tree.Error (HasTreeError)
import Gargantext.Database.Query.Tree.Root (getOrMk_RootWithCorpus)
import Gargantext.Database.Query.Tree.Root (getOrMkRootWithCorpus, MkCorpusUser (MkCorpusUserMaster))
import Gargantext.Prelude hiding (All)
import Gargantext.Prelude.Config
import Gargantext.Utils.Jobs (JobHandle, MonadJobStatus(..))
import Gargantext.Prelude.Config (GargConfig(..))
import Gargantext.Utils.Jobs.Monad (JobHandle, MonadJobStatus(..))
import Network.HTTP.Client
import Network.HTTP.Client.TLS
import Network.HTTP.Client.TLS (tlsManagerSettings)
import Prelude qualified
langToSearx :: Lang -> Text
langToSearx All = "en-US"
langToSearx x = (Text.toLower acronym) <> "-" <> acronym
langToSearx x = Text.toLower acronym <> "-" <> acronym
where
acronym = show x
......@@ -137,7 +133,7 @@ insertSearxResponse user cId listId l (Right (SearxResponse { _srs_results })) =
-- docs :: [Either Text HyperdataDocument]
let docs = hyperdataDocumentFromSearxResult l <$> _srs_results
--printDebug "[triggerSearxSearch] docs" docs
let docs' = catMaybes $ rightToMaybe <$> docs
let docs' = mapMaybe rightToMaybe docs
{-
Prelude.mapM_ (\(HyperdataDocument { _hd_title, _hd_publication_year, _hd_publication_date }) -> do
printDebug "[triggerSearxSearch] doc time" $
......@@ -150,7 +146,7 @@ insertSearxResponse user cId listId l (Right (SearxResponse { _srs_results })) =
let mCorpus = Nothing :: Maybe HyperdataCorpus
void $ addDocumentsToHyperCorpus server mCorpus (Multi l) cId docs'
(_masterUserId, _masterRootId, masterCorpusId)
<- getOrMk_RootWithCorpus (UserName userMaster) (Left "") mCorpus
<- getOrMkRootWithCorpus MkCorpusUserMaster mCorpus
let gp = GroupWithPosTag l server HashMap.empty
-- gp = case l of
-- FR -> GroupWithPosTag l Spacy HashMap.empty
......@@ -170,7 +166,7 @@ triggerSearxSearch :: ( MonadBase IO m
, MonadJobStatus m )
=> User
-> CorpusId
-> API.RawQuery
-> Query.RawQuery
-> Lang
-> JobHandle m
-> m ()
......@@ -198,7 +194,7 @@ triggerSearxSearch user cId q l jobHandle = do
res <- liftBase $ fetchSearxPage $ FetchSearxParams { _fsp_language = l
, _fsp_manager = manager
, _fsp_pageno = page
, _fsp_query = API.getRawQuery q
, _fsp_query = Query.getRawQuery q
, _fsp_url = surl }
insertSearxResponse user cId listId l res
......@@ -215,16 +211,14 @@ hyperdataDocumentFromSearxResult l (SearxResult { _sr_content, _sr_engine, _sr_p
Right HyperdataDocument { _hd_bdd = Just "Searx"
, _hd_doi = Nothing
, _hd_url = Nothing
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing
, _hd_title = Just _sr_title
, _hd_authors = Nothing
, _hd_institutes = Nothing
, _hd_source = Just _sr_engine
, _hd_abstract = _sr_content
, _hd_publication_date = T.pack <$> formatTime defaultTimeLocale "%Y-%m-%dT%H:%M:%S" <$> mDate
, _hd_publication_year = fromIntegral <$> sel1 <$> mGregorian
, _hd_publication_date = T.pack Prelude.. formatTime defaultTimeLocale "%Y-%m-%dT%H:%M:%S" Prelude.<$> mDate
, _hd_publication_year = fromIntegral Prelude.. sel1 Prelude.<$> mGregorian
, _hd_publication_month = sel2 <$> mGregorian
, _hd_publication_day = sel3 <$> mGregorian
, _hd_publication_hour = Nothing
......
......@@ -13,17 +13,16 @@ Portability : POSIX
module Gargantext.API.Node.Corpus.Types where
import Control.Lens hiding (elements, Empty)
import Control.Lens ( (?~) )
import Control.Monad.Fail (fail)
import Data.Aeson
import Data.Aeson.TH (deriveJSON)
import Data.Aeson ( Value(..), (.:), withText, object )
import Data.Swagger
import Data.Text qualified as T
import Gargantext.API.Admin.Orchestrator.Types qualified as Types
import Gargantext.Core.Utils.Prefix (unPrefix)
import Gargantext.Database.Action.Flow (DataOrigin(..))
import Gargantext.Database.Action.Flow.Types (DataOrigin(..))
import Gargantext.Prelude
import Test.QuickCheck
import Test.QuickCheck (Arbitrary(..), oneof, arbitraryBoundedEnum)
data Database = Empty
| OpenAlex
......
......@@ -12,28 +12,31 @@ module Gargantext.API.Node.Document.Export
where
import Control.Lens (view)
import Data.ByteString.Lazy.Char8 qualified as BSC
import Data.Csv (encodeDefaultOrderedByName)
import Data.Text qualified as T
import Data.Text.Encoding qualified as TE
import Data.Time.Clock.System (getSystemTime, systemSeconds)
import Data.Version (showVersion)
import Gargantext.API.Node.Document.Export.Types
import Gargantext.API.Prelude (GargNoServer, GargServer)
import Gargantext.Core (toDBid)
import Gargantext.Core.Types
import Gargantext.Database.Admin.Types.Node (DocId, NodeId, NodeType(..))
import Gargantext.Database.Query.Facet (runViewDocuments, Facet(..))
import Gargantext.Database.Query.Table.Node (getClosestParentIdByType)
import Gargantext.Database.Query.Table.Node.User
import Gargantext.Database.Query.Table.Node.User ( getNodeUser )
import Gargantext.Database.Schema.Node (NodePoly(..), node_user_id)
import Gargantext.Prelude
import Servant
import qualified Data.ByteString.Lazy.Char8 as BSC
import qualified Data.Text as T
import qualified Data.Text.Encoding as TE
import qualified Paths_gargantext as PG -- cabal magic build module
import Paths_gargantext qualified as PG -- cabal magic build module
import Servant ( addHeader, (:<|>)((:<|>)), Header, Headers(getResponse) )
import Data.Time.LocalTime (getCurrentTimeZone, TimeZone (timeZoneMinutes))
api :: NodeId
-- ^ The ID of the target user
-> DocId
-> GargServer API
api userNodeId dId = getDocumentsJSON userNodeId dId
:<|> getDocumentsJSONZip userNodeId dId
:<|> getDocumentsCSV userNodeId dId
--------------------------------------------------
......@@ -47,11 +50,12 @@ getDocumentsJSON nodeUserId pId = do
mcId <- getClosestParentIdByType pId NodeCorpus
let cId = maybe (panicTrace "[G.A.N.D.Export] Node has no parent") identity mcId
docs <- runViewDocuments cId False Nothing Nothing Nothing Nothing Nothing
pure $ addHeader (T.concat [ "attachment; filename=GarganText_DocsList-"
, T.pack $ show pId
, ".json"])
DocumentExport { _de_documents = mapFacetDoc uId <$> docs
, _de_garg_version = T.pack $ showVersion PG.version }
let dexp = DocumentExport { _de_documents = mapFacetDoc uId <$> docs
, _de_garg_version = T.pack $ showVersion PG.version }
pure $ addHeader (T.concat [ "attachment; filename="
, "GarganText_DocsList-"
, T.pack (show pId)
, ".json" ]) dexp
where
mapFacetDoc uId (FacetDoc { .. }) =
Document { _d_document =
......@@ -71,6 +75,24 @@ getDocumentsJSON nodeUserId pId = do
, _ng_hash = "" }
, _d_hash = ""}
getDocumentsJSONZip :: NodeId
-- ^ The Node ID of the target user
-> DocId
-> GargNoServer (Headers '[Header "Content-Disposition" T.Text] DocumentExportZIP) -- [Document]
getDocumentsJSONZip userNodeId pId = do
dJSON <- getDocumentsJSON userNodeId pId
systime <- liftBase getSystemTime
tz <- liftBase getCurrentTimeZone
let dexp = getResponse dJSON
let dexpz = DocumentExportZIP { _dez_dexp = dexp
, _dez_doc_id = pId
-- see https://github.com/jgm/zip-archive/commit/efe4423a9a2b1dc2a4d413917a933828d3f8dc0f
, _dez_last_modified = fromIntegral (systemSeconds systime) +
fromIntegral (timeZoneMinutes tz * 60) }
pure $ addHeader (T.concat [ "attachment; filename="
, dezFileName dexpz
, ".zip" ]) dexpz
getDocumentsCSV :: NodeId
-- ^ The Node ID of the target user
-> DocId
......
......@@ -13,19 +13,21 @@ Portability : POSIX
module Gargantext.API.Node.Document.Export.Types where
import Data.Aeson (encode)
import Data.Aeson.TH (deriveJSON)
import Data.Csv (DefaultOrdered(..), ToNamedRecord(..), (.=), header, namedRecord)
import Data.Swagger
--import qualified Data.Text as T
import qualified Data.Text.Encoding as TE
import Gargantext.Core.Types
import Data.Swagger ( genericDeclareNamedSchema, ToParamSchema(..), ToSchema(..) )
import Data.Text qualified as T
import Data.Text.Encoding qualified as TE
import Gargantext.Core.Types ( Node, TODO )
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..) )
import Gargantext.Database.Admin.Types.Node (DocId)
import Gargantext.Database.Schema.Node (NodePoly(..))
--import Gargantext.Utils.Servant (CSV)
import Gargantext.Utils.Servant (ZIP)
import Gargantext.Utils.Zip (zipContentsPureWithLastModified)
import Protolude
--import Protolude.Partial (read)
import Servant
import Servant ((:>), (:<|>), Get, Header, Headers(..), JSON, MimeRender(..), PlainText, Summary)
-- | Document Export
......@@ -34,6 +36,13 @@ data DocumentExport =
, _de_garg_version :: Text
} deriving (Generic)
-- | This is to represent a zipped document export. We want to have doc_id in zipped file name.
data DocumentExportZIP =
DocumentExportZIP { _dez_dexp :: DocumentExport
, _dez_doc_id :: DocId
, _dez_last_modified :: Integer } deriving (Generic)
data Document =
Document { _d_document :: Node HyperdataDocument
, _d_ngrams :: Ngrams
......@@ -71,6 +80,9 @@ type Hash = Text
instance ToSchema DocumentExport where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_de_")
instance ToSchema DocumentExportZIP where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_dez_")
instance ToSchema Document where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_d_")
......@@ -81,6 +93,9 @@ instance ToSchema Ngrams where
instance ToParamSchema DocumentExport where
toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
instance ToParamSchema DocumentExportZIP where
toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
instance ToParamSchema Document where
toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
......@@ -90,10 +105,25 @@ instance ToParamSchema Ngrams where
type API = Summary "Document Export"
:> "export"
:> ( "json"
:> Get '[JSON] (Headers '[Servant.Header "Content-Disposition" Text] DocumentExport)
:> Get '[JSON] (Headers '[Servant.Header "Content-Disposition" Text] DocumentExport)
:<|> "json.zip"
:> Get '[ZIP] (Headers '[Servant.Header "Content-Disposition" Text] DocumentExportZIP)
:<|> "csv"
:> Get '[PlainText] (Headers '[Servant.Header "Content-Disposition" Text] Text)) -- [Document])
:> Get '[PlainText] (Headers '[Servant.Header "Content-Disposition" Text] Text) )
$(deriveJSON (unPrefix "_ng_") ''Ngrams)
$(deriveJSON (unPrefix "_d_") ''Document)
$(deriveJSON (unPrefix "_de_") ''DocumentExport)
------
-- Needs to be here because of deriveJSON TH above
dezFileName :: DocumentExportZIP -> Text
dezFileName (DocumentExportZIP { .. }) = "GarganText_DocsList-" <> show _dez_doc_id <> ".json"
instance MimeRender ZIP DocumentExportZIP where
mimeRender _ dexpz@(DocumentExportZIP { .. }) =
zipContentsPureWithLastModified (T.unpack $ dezFileName dexpz) (encode _dez_dexp) _dez_last_modified
......@@ -16,28 +16,28 @@ Portability : POSIX
module Gargantext.API.Node.DocumentUpload where
import Control.Lens (makeLenses, view)
import Data.Aeson
import Control.Lens (view)
import Data.Aeson ( Options(..), genericParseJSON, defaultOptions, genericToJSON, SumEncoding(..) )
import Data.Swagger (ToSchema)
import Data.Text qualified as T
import Gargantext.API.Admin.EnvTypes (GargJob(..), Env)
import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
import Gargantext.API.Errors.Types
import Gargantext.API.Prelude
import Gargantext.API.Errors.Types ( BackendInternalError )
import Gargantext.API.Prelude ( GargM )
import Gargantext.Core (Lang(..))
import Gargantext.Core.NLP (nlpServerGet)
import Gargantext.Core.Text.Corpus.Parsers.Date (mDateSplit)
import Gargantext.Core.Text.Terms (TermType(..))
import Gargantext.Core.Utils.Prefix (unCapitalize, dropPrefix)
import Gargantext.Database.Action.Flow (addDocumentsToHyperCorpus)
import Gargantext.Database.Action.Flow.Types
import Gargantext.Database.Admin.Types.Hyperdata.Corpus
import Gargantext.Database.Action.Flow.Types ( FlowCmdM )
import Gargantext.Database.Admin.Types.Hyperdata.Corpus ( HyperdataCorpus )
import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Admin.Types.Node ( DocId, NodeId, NodeType(NodeCorpus) )
import Gargantext.Database.Query.Table.Node (getClosestParentIdByType')
import Gargantext.Prelude
import Gargantext.Utils.Jobs (serveJobsAPI, MonadJobStatus(..))
import Servant
import Servant ( JSON, Summary, type (:>), HasServer(ServerT) )
data DocumentUpload = DocumentUpload
......@@ -108,8 +108,6 @@ documentUpload nId doc = do
let hd = HyperdataDocument { _hd_bdd = Nothing
, _hd_doi = Nothing
, _hd_url = Nothing
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing
, _hd_title = Just $ if view du_title doc == "" then T.take 50 (view du_abstract doc) else view du_title doc
, _hd_authors = Just $ view du_authors doc
......
......@@ -16,17 +16,16 @@ Portability : POSIX
module Gargantext.API.Node.DocumentsFromWriteNodes
where
import Conduit
import Control.Lens ((^.))
import Data.Aeson
import Conduit ( yieldMany )
import Data.Aeson ( genericParseJSON, defaultOptions, genericToJSON )
import Data.List qualified as List
import Data.Swagger
import Data.Swagger ( ToSchema )
import Data.Text qualified as T
import Gargantext.API.Admin.Auth.Types
import Gargantext.API.Admin.Auth.Types ( AuthenticatedUser, auth_node_id, auth_user_id )
import Gargantext.API.Admin.EnvTypes (Env, GargJob(..))
import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
import Gargantext.API.Admin.Types (HasSettings)
import Gargantext.API.Errors.Types
import Gargantext.API.Errors.Types ( BackendInternalError )
import Gargantext.API.Ngrams (commitStatePatch, Versioned(..))
import Gargantext.API.Prelude (GargM)
import Gargantext.Core (Lang(..))
......@@ -39,13 +38,15 @@ import Gargantext.Core.Types.Individu (User(..))
import Gargantext.Database.Action.Flow (flowDataText, DataText(..))
import Gargantext.Database.Action.Flow.Types (FlowCmdM)
import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
import Gargantext.Database.Admin.Types.Hyperdata.Frame
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Admin.Types.Hyperdata.Frame ( HyperdataFrame(..), getHyperdataFrameContents )
import Gargantext.Database.Admin.Types.Node ( NodeId, Node, NodeType(..) )
import Gargantext.Database.Query.Table.Node (getChildrenByType, getClosestParentIdByType', getNodeWith, getOrMkList)
import Gargantext.Database.Schema.Node (node_hyperdata, node_name, node_date)
import Gargantext.Prelude
import Gargantext.System.Logging
import Gargantext.Utils.Jobs (serveJobsAPI, MonadJobStatus(..))
import Servant
import Gargantext.Utils.Jobs.Error
import Servant ( JSON, Summary, type (:>), HasServer(ServerT) )
------------------------------------------------------------------------
type API = Summary " Documents from Write nodes."
......@@ -91,8 +92,9 @@ documentsFromWriteNodes authenticatedUser nId Params { selection, lang, paragrap
cId <- case mcId of
Just cId -> pure cId
Nothing -> do
let msg = T.pack $ "[G.A.N.DFWN] Node has no corpus parent: " <> show nId
markFailed (Just msg) jobHandle
let msg = T.pack $ "Node has no corpus parent: " <> show nId
$(logLocM) ERROR msg
markFailed (Just $ UnsafeMkHumanFriendlyErrorText "The requested node has no corpus parent.") jobHandle
panicTrace msg
frameWriteIds <- getChildrenByType nId Notes
......@@ -106,7 +108,7 @@ documentsFromWriteNodes authenticatedUser nId Params { selection, lang, paragrap
pure (node, contents)
) frameWrites
let paragraphs' = fromMaybe (7 :: Int) $ (readMaybe $ T.unpack paragraphs)
let paragraphs' = fromMaybe (7 :: Int) $ readMaybe (T.unpack paragraphs)
let parsedE = (\(node, contents)
-> hyperdataDocumentFromFrameWrite lang paragraphs' (node, contents)) <$> frameWritesWithContents
let parsed = List.concat $ rights parsedE
......@@ -159,8 +161,6 @@ hyperdataDocumentFromFrameWrite lang paragraphSize (node, contents) =
Right (List.map (\(t, ctxt) -> HyperdataDocument { _hd_bdd = Just $ show Notes
, _hd_doi = Nothing
, _hd_url = Nothing
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing
, _hd_title = Just t
, _hd_authors = Just authors'
......
......@@ -17,35 +17,31 @@ Portability : POSIX
module Gargantext.API.Node.File where
import Control.Lens ((^.))
import Data.ByteString qualified as BS
import Data.ByteString.Lazy qualified as BSL
import Data.MIME.Types qualified as DMT
import Data.Swagger
import Data.Swagger (ToSchema(..))
import Data.Text qualified as T
import Data.Text
import Servant
import qualified Gargantext.Database.GargDB as GargDB
import qualified Network.HTTP.Media as M
import Data.Either
import Gargantext.API.Admin.Auth.Types
import Gargantext.API.Admin.Auth.Types ( AuthenticatedUser, auth_user_id )
import Gargantext.API.Admin.EnvTypes (GargJob(..), Env)
import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
import Gargantext.API.Admin.Types (HasSettings)
import Gargantext.API.Errors.Types
import Gargantext.API.Node.Types
import Gargantext.API.Prelude
import Gargantext.API.Errors.Types ( BackendInternalError )
import Gargantext.API.Node.Types ( NewWithFile(NewWithFile) )
import Gargantext.API.Prelude ( GargM, GargServer )
import Gargantext.Core.Types (TODO)
import Gargantext.Database.Action.Flow.Types
import Gargantext.Database.Action.Flow.Types ( FlowCmdM )
import Gargantext.Database.Action.Node (mkNodeWithParent)
import Gargantext.Database.Admin.Types.Hyperdata.File
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Admin.Types.Hyperdata.File ( HyperdataFile(..) )
import Gargantext.Database.Admin.Types.Node ( NodeId, NodeType(..) )
import Gargantext.Database.GargDB qualified as GargDB
import Gargantext.Database.Query.Table.Node (getNodeWith)
import Gargantext.Database.Query.Table.Node.UpdateOpaleye (updateHyperdata)
import Gargantext.Database.Schema.Node (node_hyperdata)
import Gargantext.Prelude
import Gargantext.Utils.Jobs (serveJobsAPI, MonadJobStatus(..))
import Network.HTTP.Media qualified as M
import Servant
data RESPONSE deriving Typeable
......
......@@ -15,37 +15,35 @@ Portability : POSIX
module Gargantext.API.Node.FrameCalcUpload where
import Control.Lens ((^.))
import Data.Aeson
import Data.ByteString.Lazy qualified as BSL
import Data.ByteString.UTF8 qualified as BSU8
import Data.Swagger
import Data.Swagger ( ToSchema )
import Data.Text qualified as T
import Network.HTTP.Client (newManager, httpLbs, parseRequest, responseBody)
import Network.HTTP.Client.TLS (tlsManagerSettings)
import Servant
import Web.FormUrlEncoded (FromForm)
import Gargantext.API.Admin.Auth.Types
import Gargantext.API.Admin.Auth.Types ( auth_node_id, AuthenticatedUser )
import Gargantext.API.Admin.EnvTypes (GargJob(..), Env)
import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
import Gargantext.API.Errors.Types
import Gargantext.API.Errors.Types ( BackendInternalError )
import Gargantext.API.Node.Corpus.New (addToCorpusWithForm)
import Gargantext.API.Node.Corpus.New.Types (FileFormat(..), FileType(..))
import Gargantext.API.Node.Types (NewWithForm(..))
import Gargantext.API.Prelude
import Gargantext.API.Prelude ( GargM )
import Gargantext.Core (Lang)
import Gargantext.Core.NodeStory (HasNodeArchiveStoryImmediateSaver)
import Gargantext.Core.NodeStory.Types ( HasNodeArchiveStoryImmediateSaver )
import Gargantext.Core.Text.List.Social (FlowSocialListWith(..))
import Gargantext.Core.Types.Individu (User(..))
import Gargantext.Database.Action.Flow.Types
import Gargantext.Database.Admin.Types.Hyperdata.Frame
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Action.Flow.Types ( FlowCmdM )
import Gargantext.Database.Admin.Types.Hyperdata.Frame ( HyperdataFrame(..) )
import Gargantext.Database.Admin.Types.Node ( NodeId, NodeType(NodeCorpus) )
import Gargantext.Database.Prelude (HasConfig)
import Gargantext.Database.Query.Table.Node (getClosestParentIdByType, getNodeWith)
import Gargantext.Database.Schema.Node (node_hyperdata)
import Gargantext.Prelude
import Gargantext.Utils.Jobs (serveJobsAPI, MonadJobStatus(..))
import Gargantext.Utils.Jobs (serveJobsAPI, MonadJobStatus(..), markFailureNoErr)
import Network.HTTP.Client (newManager, httpLbs, parseRequest, responseBody)
import Network.HTTP.Client.TLS (tlsManagerSettings)
import Servant ( type (:>), JSON, Summary, HasServer(ServerT) )
import Web.FormUrlEncoded (FromForm)
data FrameCalcUpload = FrameCalcUpload {
_wf_lang :: !(Maybe Lang)
......@@ -105,7 +103,7 @@ frameCalcUploadAsync authenticatedUser nId (FrameCalcUpload _wf_lang _wf_selecti
-- printDebug "[frameCalcUploadAsync] mCId" mCId
case mCId of
Nothing -> markFailure 1 Nothing jobHandle
Nothing -> markFailureNoErr 1 jobHandle
Just cId ->
-- FIXME(adn) Audit this conversion.
addToCorpusWithForm (RootId userNodeId)
......
......@@ -63,8 +63,8 @@ api :: (HasNodeError err, HasNLPServer env, CmdRandom env err m)
api userInviting nId (ShareTeamParams user') = do
let user'' = Text.toLower user'
user <- case guessUserName user'' of
Nothing -> pure user''
Just (u,_) -> do
Nothing -> pure user''
Just (u, _) -> do
isRegistered <- getUserId' (UserName u)
case isRegistered of
Right _ -> do
......@@ -72,7 +72,7 @@ api userInviting nId (ShareTeamParams user') = do
pure u
Left _err -> do
username' <- getUsername userInviting
_ <- case List.elem username' arbitraryUsername of
_ <- case username' `List.elem` arbitraryUsername of
True -> do
-- printDebug "[G.A.N.Share.api]" ("Demo users are not allowed to invite" :: Text)
pure ()
......
......@@ -9,9 +9,8 @@ Portability : POSIX
-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE TypeOperators #-}
module Gargantext.API.Node.Update
where
......@@ -19,16 +18,17 @@ module Gargantext.API.Node.Update
import Control.Lens (view)
import Data.Aeson
import Data.Set qualified as Set
import Data.Swagger
import Data.Swagger ( ToSchema )
import Gargantext.API.Admin.EnvTypes (GargJob(..), Env)
import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
import Gargantext.API.Admin.Types (HasSettings)
import Gargantext.API.Errors.Types
import Gargantext.API.Errors.Types ( BackendInternalError )
import Gargantext.API.Metrics qualified as Metrics
import Gargantext.API.Ngrams.Types qualified as NgramsTypes
import Gargantext.API.Prelude (GargM, simuLogs)
import Gargantext.Core.Methods.Similarities (GraphMetric(..))
import Gargantext.Core.NodeStory (HasNodeStory)
import Gargantext.Core.NodeStory.Types (HasNodeStory)
import Gargantext.Core.Text.Ngrams (NgramsType(NgramsTerms))
import Gargantext.Core.Types.Main (ListType(..))
import Gargantext.Core.Viz.Graph.API (recomputeGraph)
import Gargantext.Core.Viz.Graph.Tools (PartitionMethod(..), BridgenessMethod(..))
......@@ -38,20 +38,19 @@ import Gargantext.Core.Viz.Phylo.API.Tools (flowPhyloAPI)
import Gargantext.Database.Action.Flow (reIndexWith)
import Gargantext.Database.Action.Flow.Pairing (pairing)
import Gargantext.Database.Action.Metrics (updateNgramsOccurrences, updateContextScore)
import Gargantext.Database.Admin.Types.Hyperdata
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Admin.Types.Hyperdata.Phylo ( HyperdataPhylo(HyperdataPhylo) )
import Gargantext.Database.Admin.Types.Node ( NodeId, NodeType(NodeCorpus, NodeAnnuaire) )
import Gargantext.Database.Query.Table.Node (defaultList, getNode)
import Gargantext.Database.Query.Table.Node.UpdateOpaleye (updateHyperdata)
import Gargantext.Database.Schema.Ngrams (NgramsType(NgramsTerms))
import Gargantext.Database.Schema.Node (node_parent_id)
import Gargantext.Prelude
import Gargantext.System.Logging ( MonadLogger )
import Gargantext.Utils.Aeson qualified as GUA
import Gargantext.Utils.Jobs (serveJobsAPI, MonadJobStatus(..))
import Gargantext.Utils.UTCTime (timeMeasured)
import Servant
import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary
import Gargantext.Utils.UTCTime (timeMeasured)
import Gargantext.System.Logging
import Test.QuickCheck.Arbitrary ( Arbitrary(arbitrary) )
------------------------------------------------------------------------
type API = Summary " Update node according to NodeType params"
......
......@@ -15,27 +15,28 @@ Portability : POSIX
module Gargantext.API.Public
where
import Control.Lens ((^?), (^.), _Just)
import Data.Aeson
import Control.Lens ((^?), _Just)
import Data.Aeson ( Options(sumEncoding), genericParseJSON, defaultOptions, genericToJSON )
import Data.List qualified as List
import Data.Map.Strict qualified as Map
import Data.Set qualified as Set
import Data.Swagger hiding (title, url)
import Gargantext.API.Node.File
import Gargantext.API.Prelude
import Data.Swagger (ToSchema)
import Gargantext.API.Node.File (FileApi, fileApi)
import Gargantext.API.Prelude (serverError, GargServer)
import Gargantext.Core.Utils.DateUtils (utc2year)
import Gargantext.Database.Admin.Types.Hyperdata
import Gargantext.Database.Admin.Types.Hyperdata.Corpus ( hc_fields )
import Gargantext.Database.Admin.Types.Hyperdata.Folder ( HyperdataFolder )
import Gargantext.Database.Admin.Types.Hyperdata.CorpusField
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Admin.Types.Node ( NodeId(..), Node, unNodeId )
import Gargantext.Database.Prelude (Cmd, DBCmd)
import Gargantext.Database.Query.Table.Node.Error (HasNodeError(..))
import Gargantext.Database.Query.Table.NodeNode (selectPublicNodes)
import Gargantext.Database.Schema.Node -- (NodePoly(..))
import Gargantext.Database.Schema.Node ( NodePoly(..), node_date, node_hyperdata ) -- (NodePoly(..))
import Gargantext.Prelude
import Gargantext.Utils.Aeson qualified as GUA
import Servant
import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary
import Test.QuickCheck.Arbitrary ( Arbitrary(arbitrary) )
------------------------------------------------------------------------
type API = API_Home
......
......@@ -13,9 +13,10 @@ Portability : POSIX
module Gargantext.API.Server where
import Control.Lens ((^.))
import Control.Monad.Catch (catch, throwM)
import Data.ByteString.Lazy qualified as BL
import Data.Text qualified as T
import Data.Text.Encoding qualified as TE
import Data.Version (showVersion)
import Gargantext.API.Admin.Auth (auth, forgotPassword, forgotPasswordAsync)
import Gargantext.API.Admin.Auth.Types (AuthContext)
......@@ -24,20 +25,18 @@ import Gargantext.API.Admin.FrontEnd (frontEndServer)
import Gargantext.API.Auth.PolicyCheck ()
import Gargantext.API.Errors
import Gargantext.API.GraphQL qualified as GraphQL
import Gargantext.API.Prelude
import Gargantext.API.Prelude (GargM, GargServer)
import Gargantext.API.Public qualified as Public
import Gargantext.API.Routes
import Gargantext.API.Routes (API, GargVersion, GargAPI)
import Gargantext.API.Swagger (swaggerDoc)
import Gargantext.API.ThrowAll (serverPrivateGargAPI)
import Gargantext.Database.Prelude (hasConfig)
import Gargantext.Prelude hiding (Handler, catch)
import Gargantext.Prelude.Config (gc_url_backend_api)
import Gargantext.System.Logging
import Gargantext.System.Logging (logLocM, LogLevel(..))
import Paths_gargantext qualified as PG -- cabal magic build module
import Servant
import Servant.Swagger.UI (swaggerSchemaUIServer)
import qualified Data.ByteString.Lazy as BL
import qualified Data.Text.Encoding as TE
serverGargAPI :: Text -> ServerT GargAPI (GargM Env BackendInternalError)
......
......@@ -31,9 +31,7 @@ Node API
module Gargantext.API.Table
where
import Data.Aeson.TH (deriveJSON)
import Data.Maybe
import Data.Swagger
import Data.Swagger (ToSchema(..), genericDeclareNamedSchema)
import Data.Text qualified as T
import Gargantext.API.HashedResponse
import Gargantext.API.Ngrams.Types (TabType(..))
......
......@@ -46,8 +46,7 @@ import Prelude (userError)
-- NOTE: Use international country codes
-- https://en.wikipedia.org/wiki/List_of_ISO_3166_country_codes
-- TODO This should be deprecated in favor of iso-639 library
data Lang = All
| DE
data Lang = DE
| EL
| EN
| ES
......@@ -58,7 +57,7 @@ data Lang = All
| RU
| UK
| ZH
deriving (Show, Eq, Ord, Enum, Bounded, Generic, GQLType)
deriving (Read, Show, Eq, Ord, Enum, Bounded, Generic, GQLType)
-- | Defaults to 'EN' in all those places where a language is mandatory,
-- but an optional one has been passed.
......@@ -75,41 +74,30 @@ instance ToSchema Lang where
declareNamedSchema = genericDeclareNamedSchemaUnrestricted defaultSchemaOptions
instance FromHttpApiData Lang
where
-- parseUrlPiece "All" = pure All
parseUrlPiece "DE" = pure DE
parseUrlPiece "EL" = pure EL
parseUrlPiece "EN" = pure EN
parseUrlPiece "ES" = pure ES
parseUrlPiece "FR" = pure FR
parseUrlPiece "IT" = pure IT
parseUrlPiece "PL" = pure PL
parseUrlPiece "PT" = pure PT
parseUrlPiece "RU" = pure RU
parseUrlPiece "UK" = pure UK
parseUrlPiece "ZH" = pure ZH
parseUrlPiece _ = Left "Unexpected value of Lang"
-- parseUrlPiece is exactly the 'read' instance,
-- if we are disciplined. Either way, this needs to
-- be tested.
parseUrlPiece fragment = case readMaybe fragment of
Nothing -> Left $ "Unexpected value of Lang: " <> fragment
Just lang -> Right lang
instance ToHttpApiData Lang where
toUrlPiece = pack . show
instance Hashable Lang
instance Arbitrary Lang where
arbitrary = arbitraryBoundedEnum
toISO639 :: Lang -> Maybe ISO639.ISO639_1
toISO639 DE = Just ISO639.DE
toISO639 EL = Just ISO639.EL
toISO639 EN = Just ISO639.EN
toISO639 ES = Just ISO639.ES
toISO639 FR = Just ISO639.FR
toISO639 IT = Just ISO639.IT
toISO639 PL = Just ISO639.PL
toISO639 PT = Just ISO639.PT
toISO639 RU = Just ISO639.RU
toISO639 UK = Just ISO639.UK
toISO639 ZH = Just ISO639.ZH
toISO639 All = Nothing
toISO639EN :: Lang -> ISO639.ISO639_1
toISO639EN l = fromMaybe ISO639.EN $ toISO639 l
toISO639 :: Lang -> ISO639.ISO639_1
toISO639 DE = ISO639.DE
toISO639 EL = ISO639.EL
toISO639 EN = ISO639.EN
toISO639 ES = ISO639.ES
toISO639 FR = ISO639.FR
toISO639 IT = ISO639.IT
toISO639 PL = ISO639.PL
toISO639 PT = ISO639.PT
toISO639 RU = ISO639.RU
toISO639 UK = ISO639.UK
toISO639 ZH = ISO639.ZH
iso639ToText :: ISO639.ISO639_1 -> Text
iso639ToText la = pack [a, b]
......@@ -117,19 +105,18 @@ iso639ToText la = pack [a, b]
(a, b) = ISO639.toChars la
-- | https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes
toISO639Lang :: Lang -> Maybe Text
toISO639Lang All = Nothing
toISO639Lang DE = Just "de"
toISO639Lang EL = Just "el"
toISO639Lang EN = Just "en"
toISO639Lang ES = Just "es"
toISO639Lang FR = Just "fr"
toISO639Lang IT = Just "it"
toISO639Lang PL = Just "pl"
toISO639Lang PT = Just "pt"
toISO639Lang RU = Just "ru"
toISO639Lang UK = Just "uk"
toISO639Lang ZH = Just "zh"
toISO639Lang :: Lang -> Text
toISO639Lang DE = "de"
toISO639Lang EL = "el"
toISO639Lang EN = "en"
toISO639Lang ES = "es"
toISO639Lang FR = "fr"
toISO639Lang IT = "it"
toISO639Lang PL = "pl"
toISO639Lang PT = "pt"
toISO639Lang RU = "ru"
toISO639Lang UK = "uk"
toISO639Lang ZH = "zh"
allLangs :: [Lang]
allLangs = [minBound .. maxBound]
......@@ -145,7 +132,6 @@ class HasDBid a where
-- once we add a new 'Lang'.
langIds :: Bimap Lang Int
langIds = Bimap.fromList $ allLangs <&> \lid -> case lid of
All -> (lid, 0)
DE -> (lid, 276)
EL -> (lid, 300)
EN -> (lid, 2)
......
......@@ -16,13 +16,13 @@ Here is writtent a common interface.
module Gargantext.Core.Ext.IMTUser -- (deserialiseImtUsersFromFile)
where
import Codec.Serialise
import Codec.Serialise ( Serialise, deserialise )
import Data.ByteString.Lazy qualified as BL
import Data.Csv
import Data.Csv ( (.:), header, decodeByNameWith, FromNamedRecord(..), Header )
import Data.Text qualified as T
import Data.Vector (Vector)
import Data.Vector qualified as Vector
import Gargantext.Core.Text.Corpus.Parsers.CSV
import Gargantext.Core.Text.Corpus.Parsers.CSV ( csvDecodeOptions, Delimiter(Tab) )
import Gargantext.Database.Admin.Types.Hyperdata.Contact
import Gargantext.Prelude
import System.FilePath.Posix (takeExtension)
......@@ -156,11 +156,9 @@ imtUser2gargContact (IMTUser { id
, _hc_where = [ou]
, _hc_title = title
, _hc_source = entite
, _hc_lastValidation = date_modification
, _hc_uniqIdBdd = Nothing
, _hc_uniqId = Nothing }
, _hc_lastValidation = date_modification }
where
title = (<>) <$> (fmap (\p -> p <> " ") prenom) <*> nom
title = (<>) <$> fmap (\p -> p <> " ") prenom <*> nom
qui = ContactWho { _cw_id = id
, _cw_firstName = prenom
, _cw_lastName = nom
......@@ -182,7 +180,7 @@ imtUser2gargContact (IMTUser { id
-- meta = ContactMetaData (Just "IMT annuaire") date_modification'
toList' Nothing = []
toList' (Just x) = [x]
......@@ -14,9 +14,8 @@ Portability : POSIX
module Gargantext.Core.Flow.Types where
import Control.Lens
import Gargantext.Database.Admin.Types.Hyperdata
import Gargantext.Database.Admin.Types.Node
import Control.Lens ( Lens' )
import Gargantext.Database.Admin.Types.Node ( Node )
import Gargantext.Database.Schema.Node (node_hash_id)
import Gargantext.Prelude
import Gargantext.Prelude.Crypto.Hash (Hash)
......@@ -25,14 +24,6 @@ class UniqId a
where
uniqId :: Lens' a (Maybe Hash)
instance UniqId HyperdataDocument
where
uniqId = hd_uniqId
instance UniqId HyperdataContact
where
uniqId = hc_uniqId
instance UniqId (Node a)
where
uniqId = node_hash_id
......
......@@ -125,7 +125,7 @@ matrixEye n' =
diagNull :: Num a => Dim -> Acc (Matrix a) -> Acc (Matrix a)
diagNull n m = trace ("diagNull") $ zipWith (*) m (matrixEye n)
diagNull n m = zipWith (*) m (matrixEye n)
-- Returns an N-dimensional array with the values of x for the indices where
......
......@@ -19,7 +19,7 @@ import Data.Array.Accelerate (Matrix)
import Data.Swagger
import Data.Text (Text)
import GHC.Generics (Generic)
import Gargantext.Core.Methods.Similarities.Accelerate.Conditional (measureConditional)
import Gargantext.Core.Methods.Similarities.Accelerate.Conditional (measureConditional')
import Gargantext.Core.Methods.Similarities.Accelerate.Distributional (logDistributional2)
-- import Gargantext.Core.Text.Metrics.Count (coocOn)
-- import Gargantext.Core.Viz.Graph.Index
......@@ -35,13 +35,13 @@ data Similarity = Conditional | Distributional
deriving (Show, Eq)
measure :: Similarity -> Matrix Int -> Matrix Double
measure Conditional x = measureConditional x
measure Conditional x = measureConditional' x
measure Distributional x = logDistributional2 x
------------------------------------------------------------------------
withMetric :: GraphMetric -> Similarity
withMetric Order1 = Conditional
withMetric _ = Distributional
withMetric _ = Distributional
------------------------------------------------------------------------
-- Order2 type is for keeping Database json compatibility
......
......@@ -48,8 +48,30 @@ import qualified Gargantext.Prelude as P
-- Filtered with MiniMax.
measureConditional :: Matrix Int -> Matrix Double
measureConditional m = run $ x $ map fromIntegral $ use m
where
x :: Acc (Matrix Double) -> Acc (Matrix Double)
x mat = maxOnly $ diagNull r $ divByDiag r mat
r :: Dim
r = dim m
-- Maybe we should use backpermute to accelerate it (no need to access to cells then
maxOnly :: Acc (SymetricMatrix Double) -> Acc (Matrix Double)
maxOnly m' = generate (shape m')
((\coord
-> let (Z :. (i :: Exp Int) :. (j :: Exp Int)) = unlift coord
ij = m' ! (lift $ (Z :. i :. j))
ji = m' ! (lift $ (Z :. j :. i))
in
ifThenElse (ij > ji) ij (constant 0)
)
)
measureConditional' :: Matrix Int -> Matrix Double
measureConditional' m = run $ x $ map fromIntegral $ use m
where
x :: Acc (Matrix Double) -> Acc (Matrix Double)
x mat = matMiniMax $ matProba r mat
......@@ -58,6 +80,7 @@ measureConditional m = run $ x $ map fromIntegral $ use m
r = dim m
-- | To filter the nodes
-- The conditional metric P(i|j) of 2 terms @i@ and @j@, also called
-- "confidence" , is the maximum probability between @i@ and @j@ to see
......
......@@ -21,7 +21,9 @@ import Data.HashMap.Strict qualified as Map
import Data.Set qualified as Set
import Gargantext.Core.Viz.Graph.Utils (getMax)
import Gargantext.Prelude
import Data.Map.Strict qualified as M
import Gargantext.Core.Methods.Similarities.Accelerate.Conditional (measureConditional)
import Gargantext.Core.Viz.Graph.Index (score, MatrixShape(..))
type HashMap = Map.HashMap
------------------------------------------------------------------------
......@@ -35,7 +37,11 @@ conditional m' = Map.fromList $ ((catMaybes results') `using` parList rdeepseq)
where
results' = [ let
ij = (/) <$> Map.lookup (i,j) m <*> Map.lookup (j,j) m
ji = (/) <$> Map.lookup (j,i) m <*> Map.lookup (i,i) m
-- proba of i|j, high values means i is more generic than j
ji = (/) <$> Map.lookup (i,j) m <*> Map.lookup (i,i) m
-- proba of j|i, high values means j is more generic than i
in getMax (i,j) ij ji
| i <- keys
......@@ -49,4 +55,45 @@ conditional m' = Map.fromList $ ((catMaybes results') `using` parList rdeepseq)
keys = Set.toList $ Set.fromList (x <> y)
(x,y) = unzip $ Map.keys m
{-
Only for TESTs
-}
conditional_test :: Bool
conditional_test = conditional_test1 == conditional_test2
conditional_test1 :: HashMap (Text,Text) Double
conditional_test1 = conditional $ Map.fromList example_matrix
conditional_test2 :: HashMap (Text,Text) Double
conditional_test2 = Map.fromList
$ M.toList
$ M.filter (>0)
$ score Square measureConditional
$ M.fromList example_matrix
example_matrix :: [((Text,Text), Int)]
example_matrix = concat [
compte "polygon" "polygon" 19
, compte "polygon" "square" 6
, compte "polygon" "triangle" 10
, compte "polygon" "losange" 3
, compte "triangle" "triangle" 11
, compte "square" "square" 7
, compte "losange" "losange" 15
, compte "shape" "shape" 10
, compte "circle" "circle" 6
, compte "shape" "circle" 3
, compte "shape" "square" 2
, compte "polygon" "shape" 10
]
where
compte a b c = if a /= b
then [((a,b),c), ((b,a), c)]
else [((a,b),c)]
......@@ -43,7 +43,6 @@ TODO:
{-# LANGUAGE Arrows #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Core.NodeStory
( module Gargantext.Core.NodeStory.Types
......@@ -54,17 +53,18 @@ module Gargantext.Core.NodeStory
, fromDBNodeStoryEnv
, upsertNodeStories
-- , getNodeStory
, getNodeStory'
, nodeStoriesQuery
, currentVersion
, archiveStateFromList
, archiveStateToList
, fixNodeStoryVersions )
, fixNodeStoryVersions
, fixChildrenDuplicatedAsParents
, getParentsChildren )
where
import Control.Lens ((^.), (.~), (%~), non, _Just, at, view)
import Control.Monad.Except
import Control.Lens ((%~), non, _Just, at, over, view)
import Data.Map.Strict qualified as Map
import Data.Monoid
import Data.Pool (Pool, withResource)
import Data.Set qualified as Set
import Database.PostgreSQL.Simple qualified as PGS
......@@ -73,18 +73,18 @@ import Database.PostgreSQL.Simple.ToField qualified as PGS
import Gargantext.API.Ngrams.Types
import Gargantext.Core.NodeStory.DB
import Gargantext.Core.NodeStory.Types
import Gargantext.Core.Types (ListId, NodeId(..))
import Gargantext.Core.Text.Ngrams qualified as Ngrams
import Gargantext.Database.Admin.Types.Node ( ListId, NodeId(..) )
import Gargantext.Database.Admin.Config ()
import Gargantext.Database.Prelude (HasConnectionPool(..))
import Gargantext.Database.Query.Table.Ngrams qualified as TableNgrams
import Gargantext.Prelude hiding (to)
import Gargantext.Prelude.Database
import Gargantext.Prelude.Database ( runPGSAdvisoryXactLock, runPGSExecute, runPGSQuery )
getNodeStory' :: PGS.Connection -> NodeId -> IO ArchiveList
getNodeStory' c nId = do
--res <- withResource pool $ \c -> runSelect c query :: IO [NodeStoryPoly NodeId Version Int Int NgramsRepoElement]
res <- runPGSQuery c nodeStoriesQuery (PGS.Only $ PGS.toField nId) :: IO [(Version, TableNgrams.NgramsType, NgramsTerm, NgramsRepoElement)]
res <- runPGSQuery c nodeStoriesQuery (PGS.Only $ PGS.toField nId) :: IO [(Version, Ngrams.NgramsType, NgramsTerm, NgramsRepoElement)]
-- We have multiple rows with same node_id and different (ngrams_type_id, ngrams_id).
-- Need to create a map: {<node_id>: {<ngrams_type_id>: {<ngrams_id>: <data>}}}
let dbData = map (\(version, ngramsType, ngrams, ngrams_repo_element) ->
......@@ -105,7 +105,7 @@ getNodeStory' c nId = do
pure ()
-}
pure $ foldl combine initArchive dbData
pure $ foldl' combine initArchive dbData
where
-- NOTE (<>) for Archive doesn't concatenate states, so we have to use `combine`
combine a1 a2 = a1 & a_state %~ combineState (a2 ^. a_state)
......@@ -221,15 +221,12 @@ nodeStoryInc c ns@(NodeStory nls) nId = do
-- `nre_parent` and `nre_children`. We want to make sure that all
-- children entries (i.e. ones that have `nre_parent`) have the same
-- `list` as their parent entry.
fixChildrenInNgramsStatePatch :: NgramsState' -> NgramsState'
fixChildrenInNgramsStatePatch ns = archiveStateFromList $ nsParents <> nsChildrenFixed
fixChildrenInNgrams :: NgramsState' -> NgramsState'
fixChildrenInNgrams ns = archiveStateFromList $ nsParents <> nsChildrenFixed
where
nls = archiveStateToList ns
nsParents = filter (\(_nt, _t, nre) -> isNothing $ nre ^. nre_parent) nls
(nsParents, nsChildren) = getParentsChildren ns
parentNtMap = Map.fromList $ (\(_nt, t, nre) -> (t, nre ^. nre_list)) <$> nsParents
nsChildren = filter (\(_nt, _t, nre) -> isJust $ nre ^. nre_parent) nls
nsChildrenFixed = (\(nt, t, nre) ->
( nt
, t
......@@ -238,18 +235,15 @@ fixChildrenInNgramsStatePatch ns = archiveStateFromList $ nsParents <> nsChildre
)
) <$> nsChildren
-- | Sometimes, when we upload a new list, a child can be left without
-- a parent. Find such ngrams and set their 'root' and 'parent' to
-- 'Nothing'.
fixChildrenWithNoParentStatePatch :: NgramsState' -> NgramsState'
fixChildrenWithNoParentStatePatch ns = archiveStateFromList $ nsParents <> nsChildrenFixed
-- | (#281) Sometimes, when we upload a new list, a child can be left
-- without a parent. Find such ngrams and set their 'root' and
-- 'parent' to 'Nothing'.
fixChildrenWithNoParent :: NgramsState' -> NgramsState'
fixChildrenWithNoParent ns = archiveStateFromList $ nsParents <> nsChildrenFixed
where
nls = archiveStateToList ns
nsParents = filter (\(_nt, _t, nre) -> isNothing $ nre ^. nre_parent) nls
(nsParents, nsChildren) = getParentsChildren ns
parentNtMap = Map.fromList $ (\(_nt, t, nre) -> (t, nre ^. nre_children & mSetToSet)) <$> nsParents
nsChildren = filter (\(_nt, _t, nre) -> isJust $ nre ^. nre_parent) nls
nsChildrenFixFunc (nt, t, nre) =
( nt
, t
......@@ -263,6 +257,30 @@ fixChildrenWithNoParentStatePatch ns = archiveStateFromList $ nsParents <> nsChi
nsChildrenFixed = nsChildrenFixFunc <$> nsChildren
-- | Sometimes children can also become parents (e.g. #313). Find such
-- | children and remove them from the list.
fixChildrenDuplicatedAsParents :: NgramsState' -> NgramsState'
fixChildrenDuplicatedAsParents ns = archiveStateFromList $ nsChildren <> nsParentsFixed
where
(nsParents, nsChildren) = getParentsChildren ns
parentNtMap = Map.fromList $ (\(_nt, t, nre) -> (t, nre ^. nre_children & mSetToSet)) <$> nsParents
parentsSet = Set.fromList $ Map.keys parentNtMap
nsParentsFixed = (\(nt, t, nre) ->
( nt
, t
, over nre_children
(\c -> mSetFromSet $ Set.difference (mSetToSet c) parentsSet)
nre ) ) <$> nsParents
getParentsChildren :: NgramsState' -> (ArchiveStateList, ArchiveStateList)
getParentsChildren ns = (nsParents, nsChildren)
where
nls = archiveStateToList ns
nsParents = filter (\(_nt, _t, nre) -> isNothing $ nre ^. nre_parent) nls
nsChildren = filter (\(_nt, _t, nre) -> isJust $ nre ^. nre_parent) nls
------------------------------------
fromDBNodeStoryEnv :: Pool PGS.Connection -> IO NodeStoryEnv
......@@ -280,8 +298,15 @@ fromDBNodeStoryEnv pool = do
withResource pool $ \c -> do
--printDebug "[mkNodeStorySaver] will call writeNodeStories, ns" ns
-- writeNodeStories c $ fixChildrenWithNoParent $ fixChildrenTermTypes ns
-- |NOTE Fixing a_state is kinda a hack. We shouldn't land
-- |with bad state in the first place.
upsertNodeStories c nId $
a & a_state %~ (fixChildrenInNgramsStatePatch . fixChildrenWithNoParentStatePatch)
a & a_state %~ (
fixChildrenDuplicatedAsParents
. fixChildrenInNgrams
. fixChildrenWithNoParent
)
let archive_saver_immediate nId a = withResource pool $ \c -> do
insertNodeArchiveHistory c nId (a ^. a_version) $ reverse $ a ^. a_history
pure $ a & a_history .~ []
......@@ -289,13 +314,13 @@ fromDBNodeStoryEnv pool = do
-- insertNodeArchiveHistory c nId (a ^. a_version) $ reverse $ a ^. a_history
-- ) $ Map.toList nls
-- pure $ clearHistory ns
pure $ NodeStoryEnv { _nse_saver_immediate = saver_immediate
, _nse_archive_saver_immediate = archive_saver_immediate
, _nse_getter = \nId -> withResource pool $ \c ->
getNodeStory' c nId
, _nse_getter_multi = \nIds -> withResource pool $ \c ->
foldM (\m nId -> nodeStoryInc c m nId) (NodeStory Map.empty) nIds
foldM (nodeStoryInc c) (NodeStory Map.empty) nIds
}
currentVersion :: (HasNodeStory env err m) => ListId -> m Version
......@@ -316,13 +341,13 @@ fixNodeStoryVersions = do
-- printDebug "[fixNodeStoryVersions] nIds" nIds
mapM_ (\(PGS.Only nId) -> do
-- printDebug "[fixNodeStoryVersions] nId" nId
updateVer c TableNgrams.Authors nId
updateVer c Ngrams.Authors nId
updateVer c TableNgrams.Institutes nId
updateVer c Ngrams.Institutes nId
updateVer c TableNgrams.Sources nId
updateVer c Ngrams.Sources nId
updateVer c TableNgrams.NgramsTerms nId
updateVer c Ngrams.NgramsTerms nId
pure ()
) nIds
......@@ -338,7 +363,7 @@ fixNodeStoryVersions = do
SET version = ?
WHERE node_id = ?
AND ngrams_type_id = ? |]
updateVer :: PGS.Connection -> TableNgrams.NgramsType -> Int64 -> IO ()
updateVer :: PGS.Connection -> Ngrams.NgramsType -> Int64 -> IO ()
updateVer c ngramsType nId = do
maxVer <- runPGSQuery c maxVerQuery (nId, ngramsType) :: IO [PGS.Only (Maybe Int64)]
case maxVer of
......
......@@ -12,7 +12,6 @@ Portability : POSIX
{-# LANGUAGE Arrows #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Core.NodeStory.DB
( nodeExists
......@@ -26,25 +25,22 @@ module Gargantext.Core.NodeStory.DB
, updateNodeStoryVersion )
where
import Control.Lens ((^.))
import Control.Monad.Except
import Data.HashMap.Strict (HashMap)
import Data.HashMap.Strict qualified as HashMap
import Data.Map.Strict qualified as Map
import Data.Map.Strict.Patch qualified as PM
import Data.Monoid
import Database.PostgreSQL.Simple qualified as PGS
import Database.PostgreSQL.Simple.SqlQQ (sql)
import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
import Gargantext.API.Ngrams.Types
import Gargantext.Core (toDBid)
import Gargantext.Core.NodeStory.Types
import Gargantext.Core.Types (NodeId(..), NodeType)
import Gargantext.Core.NodeStory.Types ( a_state, a_version, ArchiveList, ArchiveStateList, NgramsStatePatch' )
import Gargantext.Core.Text.Ngrams (NgramsType)
import Gargantext.Database.Admin.Types.Node ( NodeId(..), NodeType )
import Gargantext.Database.Admin.Config ()
import Gargantext.Database.Query.Table.Ngrams qualified as TableNgrams
import Gargantext.Database.Schema.Ngrams (NgramsType)
import Gargantext.Database.Schema.Ngrams ()
import Gargantext.Prelude hiding (to)
import Gargantext.Prelude.Database
import Gargantext.Prelude.Database ( runPGSExecute, runPGSExecuteMany, runPGSQuery, runPGSReturning )
nodeExists :: PGS.Connection -> NodeId -> IO Bool
......@@ -70,7 +66,7 @@ getNodesArchiveHistory :: PGS.Connection
-> IO [(NodeId, (Map NgramsType [HashMap NgramsTerm NgramsPatch]))]
getNodesArchiveHistory c nodesId = do
as <- runPGSQuery c query (PGS.Only $ Values fields nodesId)
:: IO [(Int, TableNgrams.NgramsType, NgramsTerm, NgramsPatch)]
:: IO [(Int, NgramsType, NgramsTerm, NgramsPatch)]
pure $ map (\(nId, ngramsType, terms, patch)
-> ( UnsafeMkNodeId nId
......@@ -96,11 +92,11 @@ insertNodeArchiveHistory _ _ _ [] = pure ()
insertNodeArchiveHistory c nodeId version (h:hs) = do
let tuples = mconcat $ (\(nType, NgramsTablePatch patch) ->
(\(term, p) ->
(nodeId, nType, term, p)) <$> PM.toList patch) <$> PM.toList h :: [(NodeId, TableNgrams.NgramsType, NgramsTerm, NgramsPatch)]
(nodeId, nType, term, p)) <$> PM.toList patch) <$> PM.toList h :: [(NodeId, NgramsType, NgramsTerm, NgramsPatch)]
tuplesM <- mapM (\(nId, nType, term, patch) -> do
[PGS.Only ngramsId] <- runPGSReturning c qInsert [PGS.Only term] :: IO [PGS.Only Int]
pure (nId, nType, ngramsId, term, patch)
) tuples :: IO [(NodeId, TableNgrams.NgramsType, Int, NgramsTerm, NgramsPatch)]
) tuples :: IO [(NodeId, NgramsType, Int, NgramsTerm, NgramsPatch)]
_ <- runPGSExecuteMany c query $ ((\(nId, nType, termId, _term, patch) -> (nId, nType, termId, patch, version)) <$> tuplesM)
_ <- insertNodeArchiveHistory c nodeId version hs
pure ()
......
......@@ -47,23 +47,19 @@ module Gargantext.Core.NodeStory.Types
, ArchiveStateList )
where
import Codec.Serialise.Class
import Control.Lens (makeLenses, Getter, (^.))
import Control.Monad.Except
import Control.Monad.Reader
import Codec.Serialise.Class ( Serialise )
import Control.Lens (Getter)
import Data.Aeson hiding ((.=), decode)
import Data.Map.Strict qualified as Map
import Data.Monoid
import Data.Profunctor.Product.TH (makeAdaptorAndInstance)
import Data.Semigroup
import Data.Set qualified as Set
import Database.PostgreSQL.Simple.FromField (FromField(fromField), fromJSONField)
import Gargantext.API.Ngrams.Types
import Gargantext.Core.Types (NodeId(..))
import Gargantext.Database.Admin.Types.Node ( NodeId(..) )
import Gargantext.Core.Text.Ngrams qualified as Ngrams
import Gargantext.Core.Utils.Prefix (unPrefix)
import Gargantext.Database.Admin.Config ()
import Gargantext.Database.Prelude (DbCmd')
import Gargantext.Database.Query.Table.Ngrams qualified as TableNgrams
import Gargantext.Database.Query.Table.Node.Error (HasNodeError())
import Gargantext.Prelude hiding (to)
import Opaleye (DefaultFromField(..), SqlJsonb, fromPGSFromField)
......@@ -104,8 +100,8 @@ instance (Serialise s, Serialise p) => Serialise (Archive s p)
type NodeListStory = NodeStory NgramsState' NgramsStatePatch'
-- NOTE: 'type NgramsTableMap = Map NgramsTerm NgramsRepoElement'
type NgramsState' = Map TableNgrams.NgramsType NgramsTableMap
type NgramsStatePatch' = PatchMap TableNgrams.NgramsType NgramsTablePatch
type NgramsState' = Map Ngrams.NgramsType NgramsTableMap
type NgramsStatePatch' = PatchMap Ngrams.NgramsType NgramsTablePatch
-- instance Serialise NgramsStatePatch'
instance FromField (Archive NgramsState' NgramsStatePatch')
where
......@@ -171,7 +167,7 @@ initNodeListStoryMock = NodeStory $ Map.singleton nodeListId archive
archive = Archive { _a_version = 0
, _a_state = ngramsTableMap
, _a_history = [] }
ngramsTableMap = Map.singleton TableNgrams.NgramsTerms
ngramsTableMap = Map.singleton Ngrams.NgramsTerms
$ Map.fromList
[ (n ^. ne_ngrams, ngramsElementToRepo n)
| n <- mockTable ^. _NgramsTable
......@@ -235,8 +231,8 @@ class HasNodeArchiveStoryImmediateSaver env where
type ArchiveStateList = [(TableNgrams.NgramsType, NgramsTerm, NgramsRepoElement)]
type ArchiveStateSet = Set.Set (TableNgrams.NgramsType, NgramsTerm)
type ArchiveStateList = [(Ngrams.NgramsType, NgramsTerm, NgramsRepoElement)]
type ArchiveStateSet = Set.Set (Ngrams.NgramsType, NgramsTerm)
------------------------------------------------------------------------
------------------------------------------------------------------------
......
......@@ -9,6 +9,8 @@ Portability : POSIX
-}
{-# LANGUAGE LambdaCase #-}
module Gargantext.Core.Text.Corpus.API
( ExternalAPIs(..)
, Corpus.RawQuery(..)
......@@ -18,12 +20,11 @@ module Gargantext.Core.Text.Corpus.API
, externalAPIs
) where
import Conduit
import Control.Monad.Except
import Conduit ( ConduitT, yieldMany )
import Data.Text qualified as T
import EPO.API.Client.Types qualified as EPO
import Gargantext.API.Admin.Orchestrator.Types (ExternalAPIs(..), externalAPIs)
import Gargantext.Core (Lang(..), toISO639, toISO639EN)
import Gargantext.Core (Lang(..), toISO639)
import Gargantext.Core.Text.Corpus.API.Arxiv qualified as Arxiv
import Gargantext.Core.Text.Corpus.API.EPO qualified as EPO
import Gargantext.Core.Text.Corpus.API.Hal qualified as HAL
......@@ -32,8 +33,9 @@ import Gargantext.Core.Text.Corpus.API.Istex qualified as ISTEX
import Gargantext.Core.Text.Corpus.API.OpenAlex qualified as OpenAlex
import Gargantext.Core.Text.Corpus.API.Pubmed qualified as PUBMED
import Gargantext.Core.Text.Corpus.Query qualified as Corpus
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
import Gargantext.Prelude hiding (get)
import Gargantext.Utils.Jobs.Error
import PUBMED.Types qualified as PUBMED
import Servant.Client (ClientError)
......@@ -41,12 +43,22 @@ data GetCorpusError
= -- | We couldn't parse the user input query into something meaningful.
InvalidInputQuery !Corpus.RawQuery !T.Text
-- | The external service returned an error.
| ExternalAPIError !ClientError
| ExternalAPIError !ExternalAPIs !ClientError
deriving (Show, Eq)
instance ToHumanFriendlyError GetCorpusError where
mkHumanFriendly = \case
InvalidInputQuery rq txt ->
"Invalid input query (" <> Corpus.getRawQuery rq <> ") for corpus search: " <> txt
ExternalAPIError api _ ->
"There was a network problem while contacting the " <> T.pack (show api) <> " API provider. Please try again later or contact your network administrator."
-- | Get External API metadata main function
get :: ExternalAPIs
-> Lang
-- ^ A user-selected language in which documents needs to be retrieved.
-- If the provider doesn't support the search filtered by language, or if the language
-- is not important, the frontend will simply send 'EN' to the backend.
-> Corpus.RawQuery
-> Maybe PUBMED.APIKey
-> Maybe EPO.AuthKey
......@@ -54,26 +66,28 @@ get :: ExternalAPIs
-> Maybe Corpus.Limit
-- -> IO [HyperdataDocument]
-> IO (Either GetCorpusError (Maybe Integer, ConduitT () HyperdataDocument IO ()))
get externalAPI la q mPubmedAPIKey epoAuthKey epoAPIUrl limit = do
get externalAPI lang q mPubmedAPIKey epoAuthKey epoAPIUrl limit = do
-- For PUBMED, HAL, IsTex, Isidore and OpenAlex, we want to send the query as-it.
-- For Arxiv we parse the query into a structured boolean query we submit over.
case externalAPI of
PubMed ->
first ExternalAPIError <$> PUBMED.get (fromMaybe "" mPubmedAPIKey) q limit
first (ExternalAPIError externalAPI) <$> PUBMED.get (fromMaybe "" mPubmedAPIKey) q limit
OpenAlex ->
first ExternalAPIError <$> OpenAlex.get (fromMaybe "" Nothing {- email -}) q (toISO639 la) limit
first (ExternalAPIError externalAPI) <$> OpenAlex.get (fromMaybe "" Nothing {- email -}) q (Just $ toISO639 lang) limit
Arxiv -> runExceptT $ do
corpusQuery <- ExceptT (pure parse_query)
ExceptT $ fmap Right (Arxiv.get la corpusQuery limit)
ExceptT $ fmap Right (Arxiv.get lang corpusQuery limit)
HAL ->
first ExternalAPIError <$> HAL.getC (toISO639 la) (Corpus.getRawQuery q) (Corpus.getLimit <$> limit)
first (ExternalAPIError externalAPI) <$> HAL.getC (Just $ toISO639 lang) (Corpus.getRawQuery q) (Corpus.getLimit <$> limit)
IsTex -> do
docs <- ISTEX.get la (Corpus.getRawQuery q) (Corpus.getLimit <$> limit)
docs <- ISTEX.get lang (Corpus.getRawQuery q) (Corpus.getLimit <$> limit)
pure $ Right (Just $ fromIntegral $ length docs, yieldMany docs)
Isidore -> do
docs <- ISIDORE.get la (Corpus.getLimit <$> limit) (Just $ Corpus.getRawQuery q) Nothing
docs <- ISIDORE.get lang (Corpus.getLimit <$> limit) (Just $ Corpus.getRawQuery q) Nothing
pure $ Right (Just $ fromIntegral $ length docs, yieldMany docs)
EPO -> do
first ExternalAPIError <$> EPO.get epoAuthKey epoAPIUrl q (toISO639EN la) limit
first (ExternalAPIError externalAPI) <$> EPO.get epoAuthKey epoAPIUrl q (toISO639 lang) limit
where
parse_query = first (InvalidInputQuery q . T.pack) $ Corpus.parseQuery q
......@@ -20,11 +20,9 @@ module Gargantext.Core.Text.Corpus.API.Arxiv
import Arxiv qualified as Arxiv
import Conduit
import Data.Text (unpack)
import Data.Text qualified as Text
import Gargantext.Core (Lang(..))
import Gargantext.Core.Text.Corpus.Query as Corpus
import Gargantext.Core.Types (Term(..))
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Prelude hiding (get)
import Network.Api.Arxiv qualified as Ax
......@@ -40,13 +38,16 @@ convertQuery q = mkQuery (interpretQuery q transformAST)
, Ax.qStart = 0
, Ax.qItems = Arxiv.batchSize }
mergeTerms :: [QueryTerm] -> Maybe Ax.Expression
mergeTerms trms = Just $ Ax.Exp $ Ax.Abs [Text.unpack $ Text.unwords $ map renderQueryTerm trms]
-- Converts a 'BoolExpr' with 'Term's on the leaves into an Arxiv's expression.
-- It yields 'Nothing' if the AST cannot be converted into a meaningful expression.
transformAST :: BoolExpr Term -> Maybe Ax.Expression
transformAST :: BoolExpr [QueryTerm] -> Maybe Ax.Expression
transformAST ast = case ast of
BAnd sub (BConst (Negative term))
-- The second term become positive, so that it can be translated.
-> Ax.AndNot <$> (transformAST sub) <*> transformAST (BConst (Positive term))
-> Ax.AndNot <$> transformAST sub <*> transformAST (BConst (Positive term))
BAnd term1 (BNot term2)
-> Ax.AndNot <$> transformAST term1 <*> transformAST term2
BAnd sub1 sub2
......@@ -64,11 +65,17 @@ convertQuery q = mkQuery (interpretQuery q transformAST)
-- BTrue cannot happen is the query parser doesn't support parsing 'FALSE' alone.
BFalse
-> Nothing
BConst (Positive (Term term))
-> Just $ Ax.Exp $ Ax.Abs [unpack term]
-- TODO(adinapoli) Apparently there is some fuzzy search going on under the hood
-- by Arxiv (see for example https://stackoverflow.com/questions/69003677/arxiv-api-problem-with-searching-for-two-keywords)
-- so it should be sufficient to search for the stemmed term. However, for simplicity and
-- backward compat, at the moment we don't stem.
BConst (Positive terms)
-> mergeTerms terms
-- We can handle negatives via `ANDNOT` with itself.
BConst (Negative (Term term))
-> Just $ Ax.AndNot (Ax.Exp $ Ax.Abs [unpack term]) (Ax.Exp $ Ax.Abs [unpack term])
-- TODO(adinapoli) Ditto as per the 'Positive' case (re partial matches)
BConst (Negative terms)
-> let term = Text.unpack $ Text.unwords (map renderQueryTerm terms)
in Just $ Ax.AndNot (Ax.Exp $ Ax.Abs [term]) (Ax.Exp $ Ax.Abs [term])
-- | TODO put default pubmed query in gargantext.ini
-- by default: 10K docs
......@@ -88,7 +95,7 @@ toDoc l (Arxiv.Result { abstract
, authors = aus
--, categories
, doi
, id
-- , id
, journal
--, primaryCategory
, publication_date
......@@ -99,8 +106,6 @@ toDoc l (Arxiv.Result { abstract
) = HyperdataDocument { _hd_bdd = Just "Arxiv"
, _hd_doi = Just $ Text.pack doi
, _hd_url = Just $ Text.pack url
, _hd_uniqId = Just $ Text.pack id
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing
, _hd_title = Just $ Text.pack title
, _hd_authors = authors aus
......@@ -118,13 +123,10 @@ toDoc l (Arxiv.Result { abstract
where
authors :: [Ax.Author] -> Maybe Text
authors [] = Nothing
authors aus' = Just $ (Text.intercalate ", ")
$ map Text.pack
$ map Ax.auName aus'
authors aus' = Just $ Text.intercalate ", "
$ map (Text.pack . Ax.auName) aus'
institutes :: [Ax.Author] -> Maybe Text
institutes [] = Nothing
institutes aus' = Just $ (Text.intercalate ", ")
$ (map (Text.replace ", " " - "))
$ map Text.pack
$ map Ax.auFil aus'
institutes aus' = Just $ Text.intercalate ", "
$ map ((Text.replace ", " " - " . Text.pack) . Ax.auFil) aus'
......@@ -9,7 +9,7 @@ Portability : POSIX
-}
module Gargantext.Core.Text.Corpus.API.EPO where
import Conduit
import Conduit ( ConduitT, (.|), mapC )
import Data.LanguageCodes (ISO639_1)
import Data.Map.Strict qualified as Map
import Data.Text qualified as T
......@@ -17,7 +17,7 @@ import EPO.API.Client.Types qualified as EPO
import EPO.API.Client.Implementation qualified as EPO
import Gargantext.Core (iso639ToText)
import Gargantext.Core.Text.Corpus.Query qualified as Corpus
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..) )
import Network.URI (parseURI)
import Protolude
import Servant.Client.Core (ClientError(ConnectionError))
......@@ -39,7 +39,7 @@ get (Just authKey) epoAPIUrl q lang mLimit = do
Just apiUrl -> do
eRes <- EPO.searchEPOAPIC apiUrl authKey Nothing limit (Corpus.getRawQuery q)
pure $ (\(total, itemsC) -> (Just total, itemsC .| mapC (toDoc lang))) <$> eRes
-- EPO.Paginated { .. } <- EPO.searchEPOAPI apiUrl authKey 1 20 (Corpus.getRawQuery q)
-- pure $ Right ( Just $ fromIntegral total, yieldMany items .| mapC (toDoc lang) )
......@@ -48,8 +48,6 @@ toDoc lang (EPO.HyperdataDocument { .. }) =
HyperdataDocument { _hd_bdd = Just "EPO"
, _hd_doi = Nothing
, _hd_url = Nothing
, _hd_uniqId = id
, _hd_uniqIdBdd = id
, _hd_page = Nothing
, _hd_title = Map.lookup lang titles
, _hd_authors = authors_
......@@ -66,10 +64,10 @@ toDoc lang (EPO.HyperdataDocument { .. }) =
, _hd_language_iso2 = Just $ iso639ToText lang }
where
authors_ = if authors == []
authors_ = if null authors
then Nothing
else Just (T.intercalate ", " authors)
-- EPO.withAuthKey authKey $ \token -> do
-- let range = EPO.Range { rBegin = 1, rEnd = limit }
-- (len, docsC) <- EPO.searchPublishedDataWithFetchC token (Just $ Corpus.getRawQuery q) (Just range)
......
......@@ -12,29 +12,27 @@ Portability : POSIX
module Gargantext.Core.Text.Corpus.API.Hal
where
import Conduit
import Data.Either
import Conduit ( ConduitT, (.|), mapMC )
import Data.LanguageCodes qualified as ISO639
import Data.Map.Strict qualified as Map
import Data.Maybe
import Data.Text (pack, intercalate)
import Data.Text (pack)
import Gargantext.Core.Text.Corpus.Parsers.Date qualified as Date
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..) )
import Gargantext.Defaults qualified as Defaults
import Gargantext.Prelude hiding (intercalate)
import HAL qualified as HAL
import HAL.Client qualified as HAL
import HAL qualified
import HAL.Doc.Corpus qualified as HAL
import HAL.Types qualified as HAL
import Servant.Client (ClientError)
get :: Maybe ISO639.ISO639_1 -> Text -> Maybe Int -> IO [HyperdataDocument]
get la q ml = do
eDocs <- HAL.getMetadataWith [q] (Just 0) (fromIntegral <$> ml) la
either (panicTrace . pack . show) (\d -> mapM (toDoc' la) $ HAL._docs d) eDocs
either (panicTrace . pack . show) (mapM (toDoc' la) . HAL._docs) eDocs
getC :: Maybe ISO639.ISO639_1 -> Text -> Maybe Int -> IO (Either ClientError (Maybe Integer, ConduitT () HyperdataDocument IO ()))
getC la q ml = do
eRes <- HAL.getMetadataWithC [q] (Just 0) (fromIntegral <$> ml) la
eRes <- HAL.getMetadataWithCursorC q (fromIntegral <$> ml) la
pure $ (\(len, docsC) -> (len, docsC .| mapMC (toDoc' la))) <$> eRes
-- case eRes of
-- Left err -> panic $ pack $ show err
......@@ -43,21 +41,19 @@ getC la q ml = do
toDoc' :: Maybe ISO639.ISO639_1 -> HAL.Corpus -> IO HyperdataDocument
toDoc' la (HAL.Corpus { .. }) = do
-- printDebug "[toDoc corpus] h" h
let mDateS = maybe (Just $ pack $ show Defaults.year) Just _corpus_date
let mDateS = _corpus_date <|> Just (pack $ show Defaults.year)
let (utctime, (pub_year, pub_month, pub_day)) = Date.mDateSplit mDateS
let abstractDefault = intercalate " " _corpus_abstract
let abstractDefault = unwords _corpus_abstract
let abstract = case la of
Nothing -> abstractDefault
Just l -> fromMaybe abstractDefault (intercalate " " <$> Map.lookup l _corpus_abstract_lang_map)
Just l -> maybe abstractDefault unwords (Map.lookup l _corpus_abstract_lang_map)
pure HyperdataDocument { _hd_bdd = Just "Hal"
, _hd_doi = Just $ pack $ show _corpus_docid
, _hd_url = Nothing
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing
, _hd_title = Just $ intercalate " " _corpus_title
, _hd_authors = Just $ foldl (\x y -> if x == "" then y else x <> ", " <> y) "" _corpus_authors_names
, _hd_institutes = Just $ foldl (\x y -> if x == "" then y else x <> ", " <> y) "" $ _corpus_authors_affiliations <> map show _corpus_struct_id
, _hd_title = Just $ unwords _corpus_title
, _hd_authors = Just $ foldl' (\x y -> if x == "" then y else x <> ", " <> y) "" _corpus_authors_names
, _hd_institutes = Just $ foldl' (\x y -> if x == "" then y else x <> ", " <> y) "" $ _corpus_authors_affiliations <> map show _corpus_struct_id
, _hd_source = Just $ maybe "Nothing" identity _corpus_source
, _hd_abstract = Just abstract
, _hd_publication_date = fmap show utctime
......
......@@ -11,25 +11,32 @@ Portability : POSIX
{-# LANGUAGE ScopedTypeVariables #-}
module Gargantext.Core.Text.Corpus.API.Isidore where
module Gargantext.Core.Text.Corpus.API.Isidore (
get
-- * Internals (possibly unused?)
, isidore2csvFile
) where
import Data.Text qualified as Text
import Gargantext.Core (Lang(..))
import Gargantext.Core.Text.Corpus.Parsers (cleanText)
import Gargantext.Core.Text.Corpus.Parsers.CSV (writeDocs2Csv)
import Gargantext.Core.Text.Corpus.Parsers.Date qualified as Date
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..) )
import Gargantext.Defaults qualified as Defaults
import Gargantext.Prelude hiding (get)
import Isidore qualified as Isidore
import Isidore.Client
import Servant.Client
import Servant.Client ( ClientError(DecodeFailure) )
-- | TODO work with the ServantErr
get :: Lang -> Maybe Isidore.Limit
-> Maybe Isidore.TextQuery -> Maybe Isidore.AuthorQuery
get :: Lang
-> Maybe Isidore.Limit
-> Maybe Isidore.TextQuery
-> Maybe Isidore.AuthorQuery
-> IO [HyperdataDocument]
get la l q a = do
get lang l q a = do
let
printErr (DecodeFailure e _) = panicTrace e
printErr e = panicTrace (show e)
......@@ -40,21 +47,21 @@ get la l q a = do
iDocs <- either printErr _content <$> Isidore.get l q a
hDocs <- mapM (\d -> isidoreToDoc la d) (toIsidoreDocs iDocs)
hDocs <- mapM (isidoreToDoc lang) (toIsidoreDocs iDocs)
pure hDocs
isidore2csvFile :: FilePath -> Lang -> Maybe Isidore.Limit
-> Maybe Isidore.TextQuery -> Maybe Isidore.AuthorQuery
-> IO ()
isidore2csvFile fp la li tq aq = do
hdocs <- get la li tq aq
isidore2csvFile fp lang li tq aq = do
hdocs <- get lang li tq aq
writeDocs2Csv fp hdocs
isidoreToDoc :: Lang -> IsidoreDoc -> IO HyperdataDocument
isidoreToDoc l (IsidoreDoc t a d u s as) = do
isidoreToDoc lang (IsidoreDoc t a d u s as) = do
let
author :: Author -> Text
author (Author fn ln) = (_name fn) <> ", " <> (_name ln)
author (Author fn ln) = _name fn <> ", " <> _name ln
author (Authors aus) = Text.intercalate ". " $ map author aus
creator2text :: Creator -> Text
......@@ -66,21 +73,19 @@ isidoreToDoc l (IsidoreDoc t a d u s as) = do
langText (OnlyText t2 ) = t2
langText (ArrayText ts ) = Text.intercalate " " $ map langText ts
let mDateS = maybe (Just $ Text.pack $ show Defaults.year) (Just) d
let mDateS = maybe (Just $ Text.pack $ show Defaults.year) Just d
let (utcTime, (pub_year, pub_month, pub_day)) = Date.mDateSplit mDateS
pure HyperdataDocument
{ _hd_bdd = Just "Isidore"
, _hd_doi = Nothing
, _hd_url = u
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing
, _hd_title = Just $ cleanText $ langText t
, _hd_authors = creator2text <$> as
, _hd_institutes = Nothing
, _hd_source = Just $ maybe "Nothing" identity $ _sourceName <$> s
, _hd_abstract = cleanText <$> langText <$> a
, _hd_source = Just $ maybe "Nothing" (identity . _sourceName) s
, _hd_abstract = cleanText . langText <$> a
, _hd_publication_date = fmap (Text.pack . show) utcTime
, _hd_publication_year = pub_year
, _hd_publication_month = pub_month
......@@ -88,5 +93,5 @@ isidoreToDoc l (IsidoreDoc t a d u s as) = do
, _hd_publication_hour = Nothing
, _hd_publication_minute = Nothing
, _hd_publication_second = Nothing
, _hd_language_iso2 = Just $ (Text.pack . show) l
, _hd_language_iso2 = Just . Text.pack . show $ lang
}
{-# LANGUAGE LambdaCase #-}
{-|
Module : Gargantext.Core.Text.Corpus.API.Istex
Description : Pubmed API connection
......@@ -11,15 +13,16 @@ Portability : POSIX
module Gargantext.Core.Text.Corpus.API.Istex
( get )
where
import Data.List qualified as List
import Data.Text qualified as Text
import Gargantext.Core (Lang(..))
import Gargantext.Core.Text.Corpus.Parsers.JSON.Istex (toDoc)
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Prelude
import ISTEX qualified as ISTEX
import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
import Gargantext.Prelude hiding (get)
import ISTEX qualified
import ISTEX.Client qualified as ISTEX
type Query = Text
......@@ -37,17 +40,14 @@ get la query' maxResults = do
-- eDocs <- ISTEX.getMetadataScroll (q <> " abstract:*") "1m" Nothing 0 --(fromIntegral <$> ml)
-- eDocs <- ISTEX.getMetadataScroll q "1m" Nothing 0 --(fromIntegral <$> ml)
let query = case (List.length $ Text.splitOn ":" query') == 1 of
let query = if List.length (Text.splitOn ":" query') == 1 then
-- True case means users is entering default search of IsTex
-- In that case we need to enrich his query with 2 parameters
-- First expected language: user has to define it in GTXT
-- Second : query in abstract
True -> ("language:"<> lang la) <> " AND abstract:"<>query'
where
lang FR = "fre"
lang _ = "eng"
("language:"<> toISTEXLanguageCode la) <> " AND abstract:"<>query'
False -> query'
else query'
-- Complex queries of IsTex needs parameters using ":" so we leave the query as it is
-- in that case we suppose user is knowing what s.he is doing
......@@ -70,4 +70,18 @@ toDoc' :: Lang -> ISTEX.Documents -> IO [HyperdataDocument]
toDoc' la docs' = mapM (toDoc la) (ISTEX._documents_hits docs')
--printDebug "ISTEX" (ISTEX._documents_total docs')
-- | Returns the properly-rendered language code according to
-- https://doc.istex.fr/tdm/annexes/codes-langues.html
toISTEXLanguageCode :: Lang -> Text.Text
toISTEXLanguageCode = \case
DE -> "ger"
EL -> "gre"
EN -> "eng"
ES -> "spa"
FR -> "fre"
IT -> "ita"
PL -> "pol"
PT -> "por"
RU -> "Rus"
UK -> "ukr"
ZH -> "chi"
......@@ -9,15 +9,15 @@ Portability : POSIX
-}
module Gargantext.Core.Text.Corpus.API.OpenAlex where
import Conduit
import Conduit ( ConduitT, (.|), mapC, takeC )
import Data.LanguageCodes qualified as ISO639
import qualified Data.Text as T
import Data.Text qualified as T
import Gargantext.Core (iso639ToText)
import Gargantext.Core.Text.Corpus.Query as Corpus
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
import Protolude
import qualified OpenAlex as OA
import qualified OpenAlex.Types as OA
import OpenAlex qualified as OA
import OpenAlex.Types qualified as OA
import Servant.Client (ClientError)
......@@ -37,8 +37,6 @@ toDoc (OA.Work { .. } ) =
HyperdataDocument { _hd_bdd = Just "OpenAlex"
, _hd_doi = doi
, _hd_url = url
, _hd_uniqId = Just id
, _hd_uniqIdBdd = Just id
, _hd_page = firstPage biblio
, _hd_title = title
, _hd_authors = authors authorships
......@@ -55,25 +53,25 @@ toDoc (OA.Work { .. } ) =
, _hd_language_iso2 = language }
where
firstPage :: OA.Biblio -> Maybe Int
firstPage OA.Biblio { first_page } = maybe Nothing readMaybe $ T.unpack <$> first_page
firstPage OA.Biblio { first_page } = (readMaybe . T.unpack) =<< first_page
authors :: [OA.Authorship] -> Maybe Text
authors [] = Nothing
authors aus = Just $ T.intercalate ", " $ catMaybes (getDisplayName <$> aus)
authors aus = Just $ T.intercalate ", " $ mapMaybe getDisplayName aus
where
getDisplayName :: OA.Authorship -> Maybe Text
getDisplayName OA.Authorship { author = OA.DehydratedAuthor { display_name = dn } } = dn
institutes :: [OA.Authorship] -> Maybe Text
institutes [] = Nothing
institutes aus = Just $ T.intercalate ", " ((T.replace ", " " - ") . getInstitutesNames <$> aus)
institutes aus = Just $ T.intercalate ", " (T.replace ", " " - " . getInstitutesNames <$> aus)
where
getInstitutesNames OA.Authorship { institutions } = T.intercalate ", " $ getDisplayName <$> institutions
getDisplayName :: OA.DehydratedInstitution -> Text
getDisplayName OA.DehydratedInstitution { display_name = dn } = dn
source :: Maybe Text
source = maybe Nothing getSource primary_location
source = getSource =<< primary_location
where
getSource OA.Location { source = s } = getSourceDisplayName <$> s
getSourceDisplayName OA.DehydratedSource { display_name = dn } = dn
......@@ -10,6 +10,7 @@ Portability : POSIX
-}
{-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE LambdaCase #-}
module Gargantext.Core.Text.Corpus.API.Pubmed
( get
......@@ -20,12 +21,11 @@ module Gargantext.Core.Text.Corpus.API.Pubmed
)
where
import Conduit
import Conduit ( ConduitT, (.|), mapC, takeC )
import Data.Text qualified as Text
import Data.Text.Encoding qualified as TE
import Gargantext.Core (Lang(..))
import Gargantext.Core.Text.Corpus.Query as Corpus
import Gargantext.Core.Types (Term(..))
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Prelude hiding (get)
import Network.HTTP.Types.URI (EscapeItem(..), renderQueryPartialEscape)
......@@ -60,11 +60,15 @@ getESearch (ESearch items) =
convertQuery :: Corpus.Query -> ESearch
convertQuery q = ESearch (interpretQuery q transformAST)
where
transformAST :: BoolExpr Term -> [EscapeItem]
mergeTerms :: [QueryTerm] -> [EscapeItem]
mergeTerms trms = [QE $ TE.encodeUtf8 (Text.unwords $ map renderQueryTerm trms)]
transformAST :: BoolExpr [QueryTerm] -> [EscapeItem]
transformAST ast = case ast of
BAnd sub (BConst (Negative term))
-- The second term become positive, so that it can be translated.
-> (transformAST sub) <> [QN "+AND+NOT+"] <> transformAST (BConst (Positive term))
-> transformAST sub <> [QN "+AND+NOT+"] <> transformAST (BConst (Positive term))
BAnd term1 (BNot term2)
-> transformAST term1 <> [QN "+AND+NOT+"] <> transformAST term2
BAnd sub1 sub2
......@@ -81,11 +85,12 @@ convertQuery q = ESearch (interpretQuery q transformAST)
-- BTrue cannot happen is the query parser doesn't support parsing 'FALSE' alone.
BFalse
-> mempty
BConst (Positive (Term term))
-> [QE (TE.encodeUtf8 term)]
BConst (Positive terms)
-> mergeTerms terms
-- TODO(adinapoli) Support partial match queries
-- We can handle negatives via `ANDNOT` with itself.
BConst (Negative (Term term))
-> [QN "NOT+", QE (TE.encodeUtf8 term)]
BConst (Negative terms)
-> [QN "NOT+"] <> mergeTerms terms
get :: Text
-> Corpus.RawQuery
......@@ -108,14 +113,11 @@ get apiKey q l = do
-- <$> PubMed.getMetadataWithC q l
toDoc :: Lang -> PubMedDoc.PubMed -> HyperdataDocument
toDoc l (PubMedDoc.PubMed { pubmed_id
, pubmed_article = PubMedDoc.PubMedArticle t j as aus
toDoc l (PubMedDoc.PubMed { pubmed_article = PubMedDoc.PubMedArticle t j as aus
, pubmed_date = PubMedDoc.PubMedDate a y m d }
) = HyperdataDocument { _hd_bdd = Just "PubMed"
, _hd_doi = Nothing
, _hd_url = Nothing
, _hd_uniqId = Just $ Text.pack $ show pubmed_id
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing
, _hd_title = t
, _hd_authors = authors aus
......@@ -133,16 +135,14 @@ toDoc l (PubMedDoc.PubMed { pubmed_id
where
authors :: [PubMedDoc.Author] -> Maybe Text
authors [] = Nothing
authors au = Just $ (Text.intercalate ", ")
$ catMaybes
$ map (\n -> PubMedDoc.foreName n <> Just " " <> PubMedDoc.lastName n) au
authors au = Just $ Text.intercalate ", "
$ mapMaybe (\n -> PubMedDoc.foreName n <> Just " " <> PubMedDoc.lastName n) au
institutes :: [PubMedDoc.Author] -> Maybe Text
institutes [] = Nothing
institutes au = Just $ (Text.intercalate ", ")
$ (map (Text.replace ", " " - "))
$ catMaybes
$ map PubMedDoc.affiliation au
institutes au = Just $ Text.intercalate ", "
$ map (Text.replace ", " " - ")
$ mapMaybe PubMedDoc.affiliation au
abstract :: [Text] -> Maybe Text
......
......@@ -20,8 +20,17 @@ please follow the types.
{-# LANGUAGE PackageImports #-}
module Gargantext.Core.Text.Corpus.Parsers (FileFormat(..), FileType(..), clean, parseFile, cleanText, parseFormatC, splitOn, etale)
where
module Gargantext.Core.Text.Corpus.Parsers (
FileFormat(..)
, FileType(..)
, ParseFormatError(..)
, clean
, parseFile
, cleanText
, parseFormatC
, splitOn
, etale
) where
-- import Gargantext.Core.Text.Learn (detectLangDefault)
import "zip" Codec.Archive.Zip (EntrySelector, withArchive, getEntry, getEntries, unEntrySelector)
......@@ -46,11 +55,12 @@ import Gargantext.Core.Text.Corpus.Parsers.JSON (parseJSONC, parseIstex)
import Gargantext.Core.Text.Corpus.Parsers.RIS qualified as RIS
import Gargantext.Core.Text.Corpus.Parsers.RIS.Presse (presseEnrich)
import Gargantext.Core.Text.Corpus.Parsers.WOS qualified as WOS
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Database.Query.Table.Ngrams (NgramsType(..))
import Gargantext.Core.Text.Ngrams (NgramsType(..))
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..) )
import Gargantext.Prelude hiding (show, undefined)
import Gargantext.Utils.Jobs.Error
import Gargantext.Utils.Zip qualified as UZip
import Protolude
import Protolude ( show )
import System.FilePath (takeExtension)
------------------------------------------------------------------------
......@@ -81,83 +91,95 @@ data FileType = WOS
-- | PDF -- Not Implemented / pdftotext and import Pandoc ?
-- | XML -- Not Implemented / see :
parseFormatC :: MonadBaseControl IO m
newtype ParseFormatError = ParseFormatError { _ParseFormatError :: DT.Text }
deriving (Show, Eq, Ord, IsString)
instance ToHumanFriendlyError ParseFormatError where
mkHumanFriendly = _ParseFormatError -- nothing sensitive that cannot be shown.
parseFormatC :: forall m. MonadBaseControl IO m
=> FileType
-> FileFormat
-> DB.ByteString
-> m (Either ParseFormatError (Integer, ConduitT () HyperdataDocument IO ()))
parseFormatC ft ff bs0 = first ParseFormatError <$> do_parse ft ff bs0
where
do_parse :: MonadBaseControl IO m
=> FileType
-> FileFormat
-> DB.ByteString
-> m (Either Text (Integer, ConduitT () HyperdataDocument IO ()))
parseFormatC CsvGargV3 Plain bs = do
let eParsedC = parseCsvC $ DBL.fromStrict bs
pure ((\(len, parsedC) -> (len, transPipe (pure . runIdentity) parsedC)) <$> eParsedC)
parseFormatC CsvHal Plain bs = do
let eParsedC = parseCsvC $ DBL.fromStrict bs
pure ((\(len, parsedC) -> (len, transPipe (pure . runIdentity) parsedC)) <$> eParsedC)
parseFormatC Istex Plain bs = do
ep <- liftBase $ parseIstex EN $ DBL.fromStrict bs
pure $ (\p -> (1, yieldMany [p])) <$> ep
parseFormatC RisPresse Plain bs = do
--docs <- enrichWith RisPresse
let eDocs = runParser' RisPresse bs
pure $ (\docs ->
( fromIntegral $ length docs
, yieldMany docs
.| mapC presseEnrich
.| mapC (map $ both decodeUtf8)
.| mapMC (toDoc RIS)) ) <$> eDocs
parseFormatC WOS Plain bs = do
let eDocs = runParser' WOS bs
pure $ (\docs ->
( fromIntegral $ length docs
, yieldMany docs
.| mapC (map $ first WOS.keys)
.| mapC (map $ both decodeUtf8)
.| mapMC (toDoc WOS)) ) <$> eDocs
parseFormatC Iramuteq Plain bs = do
let eDocs = runParser' Iramuteq bs
pure $ (\docs ->
( fromIntegral $ length docs
, yieldMany docs
.| mapC (map $ first Iramuteq.keys)
.| mapC (map $ both decodeUtf8)
.| mapMC ((toDoc Iramuteq) . (map (second (DT.replace "_" " "))))
)
)
<$> eDocs
parseFormatC JSON Plain bs = do
let eParsedC = parseJSONC $ DBL.fromStrict bs
pure ((\(len, parsedC) -> (len, transPipe (pure . runIdentity) parsedC)) <$> eParsedC)
parseFormatC ft ZIP bs = liftBase $ UZip.withZipFileBS bs $ do
fileNames <- filter (filterZIPFileNameP ft) <$> DM.keys <$> getEntries
printDebug "[parseFormatC] fileNames" fileNames
fileContents <- mapM getEntry fileNames
--printDebug "[parseFormatC] fileContents" fileContents
eContents <- mapM (parseFormatC ft Plain) fileContents
--printDebug "[parseFormatC] contents" contents
--pure $ Left $ "Not implemented for ZIP"
let (errs, contents) = partitionEithers eContents
case errs of
[] ->
case contents of
[] -> pure $ Left "No files in zip"
_ -> do
let lenghts = fst <$> contents
let contents' = snd <$> contents
let totalLength = sum lenghts
pure $ Right ( totalLength
, sequenceConduits contents' >> pure () ) -- .| mapM_C (printDebug "[parseFormatC] doc")
_ -> pure $ Left $ DT.intercalate "\n" errs
parseFormatC _ _ _ = pure $ Left "Not implemented"
-> m (Either DT.Text (Integer, ConduitT () HyperdataDocument IO ()))
do_parse CsvGargV3 Plain bs = do
let eParsedC = parseCsvC $ DBL.fromStrict bs
pure (second (transPipe (pure . runIdentity)) <$> eParsedC)
do_parse CsvHal Plain bs = do
let eParsedC = parseCsvC $ DBL.fromStrict bs
pure (second (transPipe (pure . runIdentity)) <$> eParsedC)
do_parse Istex Plain bs = do
ep <- liftBase $ parseIstex EN $ DBL.fromStrict bs
pure $ (\p -> (1, yieldMany [p])) <$> ep
do_parse RisPresse Plain bs = do
--docs <- enrichWith RisPresse
let eDocs = runParser' RisPresse bs
pure $ (\docs ->
( fromIntegral $ length docs
, yieldMany docs
.| mapC presseEnrich
.| mapC (map $ both decodeUtf8)
.| mapMC (toDoc RIS)) ) <$> eDocs
do_parse WOS Plain bs = do
let eDocs = runParser' WOS bs
pure $ (\docs ->
( fromIntegral $ length docs
, yieldMany docs
.| mapC (map $ first WOS.keys)
.| mapC (map $ both decodeUtf8)
.| mapMC (toDoc WOS)) ) <$> eDocs
do_parse Iramuteq Plain bs = do
let eDocs = runParser' Iramuteq bs
pure $ (\docs ->
( fromIntegral $ length docs
, yieldMany docs
.| mapC (map $ first Iramuteq.keys)
.| mapC (map $ both decodeUtf8)
.| mapMC (toDoc Iramuteq . map (second (DT.replace "_" " ")))
)
)
<$> eDocs
do_parse JSON Plain bs = do
let eParsedC = parseJSONC $ DBL.fromStrict bs
pure (second (transPipe (pure . runIdentity)) <$> eParsedC)
do_parse fty ZIP bs = liftBase $ UZip.withZipFileBS bs $ do
fileNames <- filter (filterZIPFileNameP ft) . DM.keys <$> getEntries
printDebug "[do_parse] fileNames" fileNames
fileContents <- mapM getEntry fileNames
--printDebug "[do_parse] fileContents" fileContents
eContents <- mapM (do_parse fty Plain) fileContents
--printDebug "[do_parse] contents" contents
--pure $ Left $ "Not implemented for ZIP"
let (errs, contents) = partitionEithers eContents
case errs of
[] ->
case contents of
[] -> pure $ Left "No files in zip"
_ -> do
let lenghts = fst <$> contents
let contents' = snd <$> contents
let totalLength = sum lenghts
pure $ Right ( totalLength
, void (sequenceConduits contents') ) -- .| mapM_C (printDebug "[do_parse] doc")
_ -> pure $ Left $ DT.intercalate "\n" errs
do_parse _ _ _ = pure $ Left "Not implemented"
filterZIPFileNameP :: FileType -> EntrySelector -> Bool
filterZIPFileNameP Istex f = (takeExtension (unEntrySelector f) == ".json") &&
((unEntrySelector f) /= "manifest.json")
(unEntrySelector f /= "manifest.json")
filterZIPFileNameP _ _ = True
etale :: [HyperdataDocument] -> [HyperdataDocument]
etale = concat . (map etale')
etale = concatMap etale'
where
etale' :: HyperdataDocument -> [HyperdataDocument]
etale' h = map (\t -> h { _hd_abstract = Just t })
......@@ -232,8 +254,6 @@ toDoc ff d = do
let hd = HyperdataDocument { _hd_bdd = Just $ DT.pack $ show ff
, _hd_doi = lookup "doi" d
, _hd_url = lookup "URL" d
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing
, _hd_title = lookup "title" d
, _hd_authors = lookup "authors" d
......@@ -293,7 +313,7 @@ runParser format text = pure $ runParser' format text
runParser' :: FileType
-> DB.ByteString
-> (Either Text [[(DB.ByteString, DB.ByteString)]])
-> Either Text [[(DB.ByteString, DB.ByteString)]]
runParser' format text = first DT.pack $ parseOnly (withParser format) text
openZip :: FilePath -> IO [DB.ByteString]
......@@ -317,5 +337,5 @@ clean txt = DBC.map clean' txt
--
splitOn :: NgramsType -> Maybe Text -> Text -> [Text]
splitOn Authors (Just "WOS") = (DT.splitOn "; ")
splitOn _ _ = (DT.splitOn ", ")
splitOn Authors (Just "WOS") = DT.splitOn "; "
splitOn _ _ = DT.splitOn ", "
......@@ -33,8 +33,8 @@ book2csv :: Int -> FileDir -> FileOut -> IO ()
book2csv n f_in f_out = do
files <- filesOf f_in
texts <- readPublis f_in files
let publis = List.concat $ map (file2publi n) texts
let docs = map (\(y,p) -> publiToHyperdata y p) $ List.zip [1..] publis
let publis = concatMap (file2publi n) texts
let docs = zipWith publiToHyperdata [1..] publis
DBL.writeFile f_out (hyperdataDocument2csv docs)
filesOf :: FileDir -> IO [FilePath]
......@@ -43,7 +43,7 @@ filesOf fd = List.sort -- sort by filenam
<$> getDirectoryContents fd
readPublis :: FileDir -> [FilePath] -> IO [(FilePath, Text)]
readPublis fd fps = mapM (\fp -> DBL.readFile (fd <> fp) >>= \txt -> pure (fp, cs txt)) fps
readPublis fd = mapM (\fp -> DBL.readFile (fd <> fp) >>= \txt -> pure (fp, cs txt))
------------------------------------------------------------------------
-- Main Types
......@@ -63,7 +63,7 @@ type FileDir = FilePath
---------------------------------------------------------------------
file2publi :: Int -> (FilePath, Text) -> [Publi]
file2publi n (fp,theText) = map (\(t,txt) -> Publi authors source t txt) theTexts
file2publi n (fp,theText) = map (uncurry (Publi authors source)) theTexts
where
theTexts = text2titleParagraphs n theText
FileInfo authors source = fileNameInfo fp
......@@ -81,8 +81,6 @@ publiToHyperdata y (Publi a s t txt) =
HyperdataDocument { _hd_bdd = Just "Book File"
, _hd_doi = Nothing
, _hd_url = Nothing
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing
, _hd_title = Just t
, _hd_authors = Just (DT.concat a)
......
......@@ -14,8 +14,7 @@ CSV parser for Gargantext corpus files.
module Gargantext.Core.Text.Corpus.Parsers.CSV where
import Conduit
import Control.Applicative
import Conduit ( ConduitT, (.|), yieldMany, mapC )
import Data.ByteString qualified as BS
import Data.ByteString.Lazy qualified as BL
import Data.Csv
......@@ -24,9 +23,9 @@ import Data.Text qualified as T
import Data.Time.Segment (jour)
import Data.Vector (Vector)
import Data.Vector qualified as V
import Gargantext.Core.Text
import Gargantext.Core.Text.Context
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Core.Text ( sentences, unsentences )
import Gargantext.Core.Text.Context ( splitBy, SplitContext(..) )
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..) )
import Gargantext.Prelude hiding (length, show)
import Protolude
......@@ -60,8 +59,6 @@ toDoc (CsvGargV3 did dt _ dpy dpm dpd dab dau) =
HyperdataDocument { _hd_bdd = Just "CSV"
, _hd_doi = Just . pack . show $ did
, _hd_url = Nothing
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing
, _hd_title = Just dt
, _hd_authors = Nothing
......@@ -93,11 +90,11 @@ toDocs v = V.toList
(V.enumFromN 1 (V.length v'')) v''
where
v'' = V.foldl (\v' sep -> V.concatMap (splitDoc (docsSize v') sep) v') v seps
seps= (V.fromList [Paragraphs 1, Sentences 3, Chars 3])
seps= V.fromList [Paragraphs 1, Sentences 3, Chars 3]
---------------------------------------------------------------
fromDocs :: Vector CsvGargV3 -> Vector CsvDoc
fromDocs docs = V.map fromDocs' docs
fromDocs = V.map fromDocs'
where
fromDocs' (CsvGargV3 { .. }) = CsvDoc { csv_title = d_title
, csv_source = d_source
......@@ -111,16 +108,11 @@ fromDocs docs = V.map fromDocs' docs
-- | Split a document in its context
-- TODO adapt the size of the paragraph according to the corpus average
splitDoc :: Mean -> SplitContext -> CsvDoc -> Vector CsvDoc
splitDoc m splt doc = let docSize = (T.length $ csv_abstract doc) in
if docSize > 1000
then
if (mod (round m) docSize) >= 10
then
splitDoc' splt doc
else
V.fromList [doc]
else
V.fromList [doc]
splitDoc m splt doc =
let docSize = (T.length $ csv_abstract doc) in
if (docSize > 1000) && (mod (round m) docSize >= 10)
then splitDoc' splt doc
else V.fromList [doc]
where
splitDoc' :: SplitContext -> CsvDoc -> Vector CsvDoc
splitDoc' contextSize (CsvDoc { .. }) = V.fromList $ [firstDoc] <> nextDocs
......@@ -152,7 +144,7 @@ unIntOrDec :: IntOrDec -> Int
unIntOrDec (IntOrDec i) = i
instance FromField IntOrDec where
parseField s = case runParser (parseField s :: Parser Int) of
Left _err -> IntOrDec <$> floor <$> (parseField s :: Parser Double)
Left _err -> IntOrDec . floor <$> (parseField s :: Parser Double)
Right n -> pure $ IntOrDec n
instance ToField IntOrDec where
toField (IntOrDec i) = toField i
......@@ -253,15 +245,15 @@ readByteStringStrict :: (FromNamedRecord a)
-> Delimiter
-> BS.ByteString
-> Either Text (Header, Vector a)
readByteStringStrict d ff = (readByteStringLazy d ff) . BL.fromStrict
readByteStringStrict d ff = readByteStringLazy d ff . BL.fromStrict
------------------------------------------------------------------------
-- | TODO use readFileLazy
readCSVFile :: FilePath -> IO (Either Text (Header, Vector CsvDoc))
readCSVFile fp = do
result <- fmap (readCsvLazyBS Comma) $ BL.readFile fp
result <- readCsvLazyBS Comma <$> BL.readFile fp
case result of
Left _err -> fmap (readCsvLazyBS Tab) $ BL.readFile fp
Left _err -> readCsvLazyBS Tab <$> BL.readFile fp
Right res -> pure $ Right res
......@@ -382,8 +374,6 @@ csvHal2doc (CsvHal { .. }) =
HyperdataDocument { _hd_bdd = Just "CsvHal"
, _hd_doi = Just csvHal_doiId_s
, _hd_url = Just csvHal_url
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing
, _hd_title = Just csvHal_title
, _hd_authors = Just csvHal_authors
......@@ -407,8 +397,6 @@ csv2doc (CsvDoc { .. })
= HyperdataDocument { _hd_bdd = Just "CsvHal"
, _hd_doi = Nothing
, _hd_url = Nothing
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing
, _hd_title = Just csv_title
, _hd_authors = Just csv_authors
......@@ -434,10 +422,10 @@ csv2doc (CsvDoc { .. })
parseHal :: FilePath -> IO (Either Text [HyperdataDocument])
parseHal fp = do
r <- readCsvHal fp
pure $ (V.toList . V.map csvHal2doc . snd) <$> r
pure $ V.toList . V.map csvHal2doc . snd <$> r
parseHal' :: BL.ByteString -> Either Text [HyperdataDocument]
parseHal' bs = (V.toList . V.map csvHal2doc . snd) <$> readCsvHalLazyBS bs
parseHal' bs = V.toList . V.map csvHal2doc . snd <$> readCsvHalLazyBS bs
------------------------------------------------------------------------
......@@ -455,7 +443,7 @@ parseCsv' bs = do
result = case readCsvLazyBS Comma bs of
Left _err -> readCsvLazyBS Tab bs
Right res -> Right res
(V.toList . V.map csv2doc . snd) <$> result
V.toList . V.map csv2doc . snd <$> result
parseCsvC :: BL.ByteString
-> Either Text (Integer, ConduitT () HyperdataDocument Identity ())
......
......@@ -18,32 +18,21 @@ DGP.parseDateRaw DGP.FR "12 avril 2010" == "2010-04-12T00:00:00.000+00:00"
{-# LANGUAGE TypeFamilies #-}
module Gargantext.Core.Text.Corpus.Parsers.Date
{-(parse, parseRaw, dateSplit, Year, Month, Day)-}
where
module Gargantext.Core.Text.Corpus.Parsers.Date (
dateSplit
, mDateSplit
, defaultDay
, defaultUTCTime
, split'
) where
import Data.Aeson (toJSON, Value)
import Data.Aeson qualified as Json
import Data.Aeson.KeyMap as KM hiding (map)
import Data.HashSet qualified as HashSet
import Data.List qualified as List
import Data.Text (unpack, splitOn, replace)
import Data.Time (defaultTimeLocale, iso8601DateFormat, parseTimeM, toGregorian)
import Data.Time.Calendar qualified as DTC
import Data.Time.Clock ( secondsToDiffTime)
import Data.Time.Clock (UTCTime(..)) -- , getCurrentTime)
import Data.Time.LocalTime (utc)
import Data.Time.LocalTime.TimeZone.Series (zonedTimeToZoneSeriesTime)
import Duckling.Api (analyze)
import Duckling.Core (makeLocale, Dimension(Time))
import Duckling.Core qualified as DC
import Duckling.Resolve (fromUTC, Context(Context, referenceTime, locale), DucklingTime(DucklingTime), Options(..))
import Duckling.Types (ResolvedToken(..), ResolvedVal(..))
import Duckling.Types (Seal(..))
import Gargantext.Core (Lang(FR,EN))
-- import Gargantext.Core.Types (DebugMode(..), withDebugMode)
import Data.Time.Clock (UTCTime(..), secondsToDiffTime)
import Gargantext.Prelude hiding (replace)
import System.Environment (getEnv)
------------------------------------------------------------------------
-- | Parse date to Ints
-- TODO add hours, minutes and seconds
......@@ -91,13 +80,6 @@ parse s = do
-- $ getCurrentTime)
_ -> Left "[G.C.T.C.Parsers.Date] parse: Should not happen"
defaultDate :: Text
defaultDate = "0-0-0T0:0:0"
type DateFormat = Text
type DateDefault = Text
data DateFlow = DucklingSuccess { ds_result :: Text }
| DucklingFailure { df_result :: Text }
| ReadFailure1 { rf1_result :: Text }
......@@ -133,83 +115,9 @@ readDate txt = do
parseTimeM True defaultTimeLocale (unpack format) (cs txt)
-- TODO add Paris at Duckling.Locale Region datatype
-- | To get Homogeinity of the languages
-- TODO : put this in a more generic place in the source code
parserLang :: Lang -> DC.Lang
parserLang FR = DC.FR
parserLang EN = DC.EN
parserLang lang = panic $ "[G.C.T.C.P.Date] Lang not implemented" <> (show lang)
-- | Final Date parser API
-- IO can be avoided here:
-- currentContext :: Lang -> IO Context
-- currentContext lang = localContext lang <$> utcToDucklingTime <$> getCurrentTime
-- parseRaw :: Context -> Text -> SomeErrorHandling Text
parseRawSafe :: Lang -> Text -> IO DateFlow
parseRawSafe lang text = do
let triedParseRaw = parseRaw lang text
dateStr' <- case triedParseRaw of
--Left (CE.SomeException err) -> do
Left _err -> do
_envLang <- getEnv "LANG"
-- printDebug "[G.C.T.C.P.Date] Exception: " (err, envLang, lang, text)
pure $ DucklingFailure text
Right res -> pure $ DucklingSuccess res
pure dateStr'
--tryParseRaw :: CE.Exception e => Lang -> Text -> IO (Either e Text)
--tryParseRaw lang text = CE.try (parseRaw lang text)
parseRaw :: Lang -> Text -> Either Text Text
parseRaw lang text = do -- case result
let maybeResult = extractValue $ getTimeValue
$ parseDateWithDuckling lang text (Options True)
case maybeResult of
Just result -> Right result
Nothing -> do
-- printDebug ("[G.C.T.C.P.D.parseRaw] ERROR " <> (cs . show) lang) text
Left $ "[G.C.T.C.P.D.parseRaw ERROR] " <> show lang <> " :: " <> text
getTimeValue :: [ResolvedToken] -> Maybe Value
getTimeValue rt = case head rt of
Nothing -> do
Nothing
Just x -> case rval x of
RVal Time t -> Just $ toJSON t
_ -> do
Nothing
extractValue :: Maybe Value -> Maybe Text
extractValue (Just (Json.Object object)) =
case KM.lookup "value" object of
Just (Json.String date) -> Just date
_ -> Nothing
extractValue _ = Nothing
-- | Current Time in DucklingTime format
-- TODO : get local Time in a more generic way
utcToDucklingTime :: UTCTime -> DucklingTime
utcToDucklingTime time = DucklingTime . zonedTimeToZoneSeriesTime $ fromUTC time utc
-- | Local Context which depends on Lang and Time
localContext :: Lang -> DucklingTime -> Context
localContext lang dt = Context { referenceTime = dt
, locale = makeLocale (parserLang lang) Nothing }
defaultDay :: DTC.Day
defaultDay = DTC.fromGregorian 1 1 1
defaultUTCTime :: UTCTime
defaultUTCTime = UTCTime { utctDay = defaultDay
, utctDayTime = secondsToDiffTime 0 }
-- | Date parser with Duckling
parseDateWithDuckling :: Lang -> Text -> Options -> [ResolvedToken]
parseDateWithDuckling lang input options = do
let contxt = localContext lang $ utcToDucklingTime defaultUTCTime
--pure $ parseAndResolve (rulesFor (locale ctx) (HashSet.fromList [(This Time)])) input ctx
-- TODO check/test Options False or True
analyze input contxt options $ HashSet.fromList [(Seal Time)]
......@@ -13,12 +13,12 @@ module Gargantext.Core.Text.Corpus.Parsers.Gitlab (
Issue(..), gitlabIssue2hyperdataDocument, readFile_Issues, readFile_IssuesAsDocs
) where
import Data.Aeson
import Data.Aeson ( decode, (.:), (.:?), withObject )
import Data.ByteString.Lazy qualified as DBL
import Data.Text qualified as DT
import Data.Time
import Gargantext.Core (Lang(..))
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..) )
import Gargantext.Prelude
data Issue = Issue { _issue_id :: !Int
......@@ -42,8 +42,6 @@ gitlabIssue2hyperdataDocument issue = HyperdataDocument
{ _hd_bdd = Nothing
, _hd_doi = Nothing
, _hd_url = Nothing
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing
, _hd_title = Just (_issue_title issue)
, _hd_authors = Nothing
......
......@@ -26,13 +26,12 @@ _flowCorpusDebat u n l fp = do
module Gargantext.Core.Text.Corpus.Parsers.GrandDebat
where
import Data.Aeson (ToJSON, FromJSON)
import Data.ByteString.Lazy qualified as DBL
import Data.JsonStream.Parser qualified as P
import Data.Text qualified as Text
import Gargantext.Core (Lang(..))
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..), ToHyperdataDocument, toHyperdataDocument)
import Gargantext.Database.GargDB
import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..), ToHyperdataDocument, toHyperdataDocument)
import Gargantext.Database.GargDB ( ReadFile(..) )
import Gargantext.Prelude
data GrandDebatReference = GrandDebatReference
......@@ -43,14 +42,14 @@ data GrandDebatReference = GrandDebatReference
, createdAt :: !(Maybe Text)
, publishedAt :: !(Maybe Text)
, updatedAt :: !(Maybe Text)
, trashed :: !(Maybe Bool)
, trashedStatus :: !(Maybe Text)
, authorId :: !(Maybe Text)
, authorType :: !(Maybe Text)
, authorZipCode :: !(Maybe Text)
, responses :: !(Maybe [GrandDebatResponse])
}
deriving (Show, Generic)
......@@ -77,8 +76,6 @@ instance ToHyperdataDocument GrandDebatReference
HyperdataDocument { _hd_bdd = Just "GrandDebat"
, _hd_doi = id
, _hd_url = Nothing
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing
, _hd_title = title
, _hd_authors = authorType
......@@ -94,12 +91,10 @@ instance ToHyperdataDocument GrandDebatReference
, _hd_publication_second = Nothing
, _hd_language_iso2 = Just $ Text.pack $ show FR }
where
toAbstract = (Text.intercalate " . ") . ((filter (/= "")) . (map toSentence))
toAbstract = Text.intercalate " . " . (filter (/= "") . map toSentence)
toSentence (GrandDebatResponse _id _qtitle _qvalue r) = case r of
Nothing -> ""
Just r' -> case Text.length r' > 10 of
True -> r'
False -> ""
Just r' -> if Text.length r' > 10 then r' else ""
instance ReadFile [GrandDebatReference]
where
......
......@@ -20,14 +20,13 @@ TODO:
module Gargantext.Core.Text.Corpus.Parsers.Isidore where
import Control.Lens hiding (contains)
import Data.ByteString.Lazy (ByteString)
import Data.RDF hiding (triple, Query)
import Data.RDF ( Node(LNode, UNode), LValue(PlainLL, TypedL, PlainL) )
import Data.Text qualified as T
import Database.HSparql.Connection
import Database.HSparql.Connection ( BindingValue(..), EndPoint, structureContent )
import Database.HSparql.QueryGenerator
import Gargantext.Core (Lang)
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument(..) )
import Gargantext.Prelude hiding (ByteString)
import Network.Wreq (getWith, Response, defaults, header, param, responseStatus, responseBody)
import Prelude qualified
......@@ -115,7 +114,7 @@ unbound _ Unbound = Nothing
unbound _ (Bound (UNode x)) = Just x
unbound _ (Bound (LNode (TypedL x _))) = Just x
unbound _ (Bound (LNode (PlainL x))) = Just x
unbound l (Bound (LNode (PlainLL x l'))) = if l' == (T.toLower $ show l) then Just x else Nothing
unbound l (Bound (LNode (PlainLL x l'))) = if l' == T.toLower (show l) then Just x else Nothing
unbound _ _ = Nothing
bind2doc :: Lang -> [BindingValue] -> HyperdataDocument
......@@ -123,8 +122,6 @@ bind2doc l [ link', date, langDoc, authors, _source, publisher, title, abstract
HyperdataDocument { _hd_bdd = Just "Isidore"
, _hd_doi = Nothing
, _hd_url = unbound l link'
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing
, _hd_title = unbound l title
, _hd_authors = unbound l authors
......
......@@ -19,11 +19,10 @@ module Gargantext.Core.Text.Corpus.Parsers.JSON.Istex where
import Data.Text qualified as T
import Gargantext.Core (Lang(..))
import Gargantext.Core.Text.Corpus.Parsers.Date qualified as Date
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
import Gargantext.Defaults qualified as Defaults
import Gargantext.Prelude hiding (length, show)
import Gargantext.Prelude hiding (length)
import ISTEX.Client qualified as ISTEX
import Protolude
-- | TODO remove dateSplit here
......@@ -37,13 +36,11 @@ toDoc la (ISTEX.Document i t a ab d s) = do
pure $ HyperdataDocument { _hd_bdd = Just "Istex"
, _hd_doi = Just i
, _hd_url = Nothing
, _hd_uniqId = Nothing
, _hd_uniqIdBdd = Nothing
, _hd_page = Nothing
, _hd_title = t
, _hd_authors = Just $ foldl (\x y -> if x == "" then y else x <> ", " <> y) "" (map ISTEX._author_name a)
, _hd_institutes = Just $ foldl (\x y -> if x == "" then y else x <> ", " <> y) "" (concat $ (map ISTEX._author_affiliations) a)
, _hd_source = Just $ foldl (\x y -> if x == "" then y else x <> ", " <> y) "" (ISTEX._source_title s)
, _hd_authors = Just $ foldl' (\x y -> if x == "" then y else x <> ", " <> y) "" (map ISTEX._author_name a)
, _hd_institutes = Just $ foldl' (\x y -> if x == "" then y else x <> ", " <> y) "" (concatMap ISTEX._author_affiliations a)
, _hd_source = Just $ foldl' (\x y -> if x == "" then y else x <> ", " <> y) "" (ISTEX._source_title s)
, _hd_abstract = ab
, _hd_publication_date = fmap (T.pack . show) utctime
, _hd_publication_year = pub_year
......
......@@ -17,8 +17,7 @@ Json parser to export towoard CSV GargV3 format.
module Gargantext.Core.Text.Corpus.Parsers.Json2Csv (json2csv, readPatents)
where
import Data.Aeson
import Data.Aeson.TH (deriveJSON)
import Data.Aeson ( decode )
import Data.ByteString.Lazy (readFile)
import Data.Text (unpack)
import Data.Vector (fromList)
......
......@@ -18,14 +18,13 @@ Portability : POSIX
module Gargantext.Core.Text.Corpus.Parsers.Wikidata where
import Control.Lens (makeLenses, (^.) )
import Data.List qualified as List
import Data.Text (concat)
import Database.HSparql.Connection
import Database.HSparql.Connection ( BindingValue, EndPoint, selectQueryRaw )
import Gargantext.Core (Lang(..))
import Gargantext.Core.Text.Corpus.Parsers.Date (mDateSplit)
import Gargantext.Core.Text.Corpus.Parsers.Isidore (unbound)
import Gargantext.Core.Text.Corpus.Parsers.Wikidata.Crawler
import Gargantext.Core.Text.Corpus.Parsers.Wikidata.Crawler ( crawlPage )
import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
import Gargantext.Prelude hiding (concat)
import Prelude qualified
......@@ -57,11 +56,9 @@ wikiPageToDocument m wr = do
let bdd = Just "wikidata"
doi = Nothing
url = (wr ^. wr_url)
uniqId = Nothing
uniqIdBdd = Nothing
url = wr ^. wr_url
page = Nothing
title = (wr ^. wr_title)
title = wr ^. wr_title
authors = Nothing
institutes = Nothing
source = Nothing
......@@ -83,8 +80,6 @@ wikiPageToDocument m wr = do
pure $ HyperdataDocument { _hd_bdd = bdd
, _hd_doi = doi
, _hd_url = url
, _hd_uniqId = uniqId
, _hd_uniqIdBdd = uniqIdBdd
, _hd_page = page
, _hd_title = title
, _hd_authors = authors
......
This diff is collapsed.
This diff is collapsed.
......@@ -9,11 +9,8 @@ Portability : POSIX
-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE InstanceSigs #-}
module Gargantext.Core.Text.List.Group
where
......@@ -23,8 +20,8 @@ import Data.HashMap.Strict (HashMap)
import Data.HashMap.Strict qualified as HashMap
import Gargantext.API.Ngrams.Types (NgramsTerm(..))
import Gargantext.Core.Text.List.Group.Prelude
import Gargantext.Core.Text.List.Group.WithScores
import Gargantext.Core.Text.List.Social.Prelude
import Gargantext.Core.Text.List.Group.WithScores ( groupWithScores' )
import Gargantext.Core.Text.List.Social.Prelude ( FlowListScores, FlowCont )
import Gargantext.Prelude
------------------------------------------------------------------------
toGroupedTree :: (Ord a, Monoid a, HasSize a)
......@@ -43,9 +40,7 @@ setScoresWithMap :: (Ord a, Ord b, Monoid b) => HashMap NgramsTerm b
-> HashMap NgramsTerm (GroupedTreeScores b)
setScoresWithMap m = setScoresWith (score m)
where
score m' t = case HashMap.lookup t m' of
Nothing -> mempty
Just r -> r
score m' t = fromMaybe mempty (HashMap.lookup t m')
setScoresWith :: (Ord a, Ord b)
=> (NgramsTerm -> b)
......@@ -58,8 +53,7 @@ setScoresWith f = Map.mapWithKey (\k v -> over gts'_children (setScoresWith f)
)
-}
setScoresWith f = HashMap.mapWithKey (\k v -> v { _gts'_score = f k
, _gts'_children = setScoresWith f
$ view gts'_children v
}
)
, _gts'_children = setScoresWith f $ view gts'_children v
}
)
------------------------------------------------------------------------
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment