Verified Commit 5d172111 authored by Przemyslaw Kaminski's avatar Przemyslaw Kaminski

Merge branch 'dev' into 162-dev-haskell-9.2

parents b19412f7 784a2dc3
......@@ -2,8 +2,7 @@
# https://vadosware.io/post/zero-to-continuous-integrated-testing-a-haskell-project-with-gitlab/
#
#
image: cgenie/stack-build:lts-18.18-garg
#image: cgenie/nixos-stack:latest
image: adinapoli/gargantext:v1
variables:
STACK_ROOT: "${CI_PROJECT_DIR}/.stack-root"
......@@ -15,32 +14,38 @@ variables:
stages:
- deps
- docs
- cabal
- test
- docs
deps:
stage: deps
cache:
# cache per branch name
# key: ${CI_COMMIT_REF_SLUG}
paths:
- /nix
- .stack
- .stack-root/
- .stack-work/
- target
script:
- stack build --no-terminal --haddock --no-haddock-deps --only-dependencies --fast
- echo "Building the project from '$CI_PROJECT_DIR'"
- nix-shell && export LC_ALL=C.UTF-8 && stack -v build --no-terminal --haddock --no-haddock-deps --only-dependencies --fast
docs:
stage: docs
cache:
# cache per branch name
# key: ${CI_COMMIT_REF_SLUG}
paths:
- /nix
- .stack
- .stack-root/
- .stack-work/
- target
script:
- stack build --no-terminal --haddock --no-haddock-deps --fast
- nix-shell && export LC_ALL=C.UTF-8 && stack build --no-terminal --haddock --no-haddock-deps --fast
- cp -R "$(stack path --local-install-root)"/doc ./output
artifacts:
paths:
......@@ -48,16 +53,34 @@ docs:
expire_in: 1 week
test:
stage: test
cache:
# cache per branch name
# key: ${CI_COMMIT_REF_SLUG}
paths:
- /nix
- .stack
- .stack-root/
- .stack-work/
- target
script:
- stack test --no-terminal --fast
- nix-shell && export LC_ALL=C.UTF-8 && stack test --no-terminal --fast
# TOOO
cabal:
stage: cabal
cache:
# cache per branch name
# key: ${CI_COMMIT_REF_SLUG}
paths:
- /nix
- .stack
- .stack-root/
- .stack-work/
- .local/
- .cabal/
- target
script:
- nix-shell && export LC_ALL=C.UTF-8 && hpack && cabal v2-build --dry-run
## Version 0.0.6.9.9.5
* [BACK][OPTIM][[Node terms] Random slowness on loading a page list of terms (#199)](https://gitlab.iscpif.fr/gargantext/haskell-gargantext/issues/199) Refresh index for textflow, reindex of terms list and schedule job
* [BACK][FIX][Docker image with nix (#188)](https://gitlab.iscpif.fr/gargantext/haskell-gargantext/issues/188)
* [FRONT][FEAT][improvement Doc table : default state is stars empty, become one star after reading (#541)](https://gitlab.iscpif.fr/gargantext/purescript-gargantext/issues/541)
## Version 0.0.6.9.9.4.6
* [BACK][FEAT][Import Corpus Docs as Json (#203)](https://gitlab.iscpif.fr/gargantext/haskell-gargantext/issues/203)
* [BACK][OPTIM][[Node terms] Random slowness on loading a page list of terms (#199)](https://gitlab.iscpif.fr/gargantext/haskell-gargantext/issues/199)
* Upgrade indications: run gargantext-upgrade gargantext.ini
## Version 0.0.6.9.9.4.5
* [BACK][OPTIM][[Node terms] Random slowness on loading a page list of terms (#199)](https://gitlab.iscpif.fr/gargantext/haskell-gargantext/issues/199)
## Version 0.0.6.9.9.4.4
* [FRONT][FIX/WIP][[Term table] Add term should not be suggested when term is already added (#544)](https://gitlab.iscpif.fr/gargantext/purescript-gargantext/issues/544)
* [FRONT][FIX] File Type vs Node Type
## Version 0.0.6.9.9.4.3
* [FRONT][FIX][APi tab not shown (#540)](https://gitlab.iscpif.fr/gargantext/purescript-gargantext/issues/540)
* [BACK][FIX] SearX analysis needs its NodeTexts
## Version 0.0.6.9.9.4.2
* [BACK][FIX][[Node terms] Random slowness on loading a page list of terms (#199)](https://gitlab.iscpif.fr/gargantext/haskell-gargantext/issues/199)
......@@ -6,7 +32,6 @@
## Version 0.0.6.9.9.4.1
* [FRONT][FIX][Doc annotation: space selected as ngrams issue (#458)](https://gitlab.iscpif.fr/gargantext/purescript-gargantext/issues/458)
* [BACK][ADM][API key / user (pubmed) (#201)](https://gitlab.iscpif.fr/gargantext/haskell-gargantext/issues/201)
......
......@@ -11,29 +11,29 @@ Script to start gargantext with different modes (Dev, Prod, Mock).
-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE StandaloneDeriving #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE Strict #-}
{-# LANGUAGE TypeOperators #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
module Main where
import Data.Version (showVersion)
import Data.Maybe (fromMaybe)
import Data.Text (unpack)
import qualified Paths_gargantext as PG -- cabal magic build module
import Data.Version (showVersion)
import Database.PostgreSQL.Simple.SqlQQ (sql)
import GHC.IO.Exception (IOException)
import Gargantext.API (startGargantext, Mode(..)) -- , startGargantextMock)
import Gargantext.API.Admin.EnvTypes (DevEnv)
import Gargantext.API.Dev (withDevEnv, runCmdDev)
import Gargantext.Prelude
import Options.Generic
import System.Exit (exitSuccess)
import qualified Paths_gargantext as PG -- cabal magic build module
import Gargantext.Prelude
import Gargantext.API (startGargantext, Mode(..)) -- , startGargantextMock)
--------------------------------------------------------
-- Graph Tests
--import qualified Gargantext.Graph.Utils as U
--import qualified Gargantext.Graph.Distances.Conditional as C
--import qualified Gargantext.Graph.Distances.Distributional as D
--import qualified Gargantext.Graph.Distances.Matrice as M
--------------------------------------------------------
instance ParseRecord Mode
instance ParseField Mode
......@@ -59,24 +59,25 @@ main :: IO ()
main = do
MyOptions myMode myPort myIniFile myVersion <- unwrapRecord
"Gargantext server"
---------------------------------------------------------------
if myVersion then do
putStrLn $ "Version: " <> showVersion PG.version
System.Exit.exitSuccess
else
return ()
---------------------------------------------------------------
let myPort' = case myPort of
Just p -> p
Nothing -> 8008
myIniFile' = case myIniFile of
Nothing -> panic "[ERROR] gargantext.ini needed"
Just i -> i
---------------------------------------------------------------
let start = case myMode of
Mock -> panic "[ERROR] Mock mode unsupported"
_ -> startGargantext myMode myPort' (unpack myIniFile')
where
myIniFile' = case myIniFile of
Nothing -> panic "[ERROR] gargantext.ini needed"
Just i -> i
putStrLn $ "Starting with " <> show myMode <> " mode."
start
---------------------------------------------------------------
......@@ -16,15 +16,14 @@ Import a corpus binary.
module Main where
import Data.Either (Either(..))
import Database.PostgreSQL.Simple.SqlQQ (sql)
import GHC.IO.Exception (IOException)
import Gargantext.API.Admin.EnvTypes (DevEnv)
import Gargantext.API.Dev (withDevEnv, runCmdDev)
import Gargantext.API.Ngrams.Tools (migrateFromDirToDb)
import Gargantext.API.Node () -- instances only
import Gargantext.API.Prelude (GargError)
import Gargantext.API.Ngrams.Tools (migrateFromDirToDb)
import Gargantext.Core (HasDBid(toDBid))
import Gargantext.Core.Types.Individu (User(..))
import Gargantext.Database.Action.Flow (getOrMk_RootWithCorpus)
......@@ -48,7 +47,7 @@ main = do
$ List.cycle ["_"]
___
putStrLn "GarganText upgrade to version 0.0.6"
putStrLn "GarganText upgrade to version 0.0.6.9.9.4.4"
___
params@[iniPath] <- getArgs
......@@ -69,197 +68,36 @@ main = do
let _secret = _gc_secretkey cfg
withDevEnv iniPath $ \env -> do
-- First upgrade the Database Schema
_ <- runCmdDev env (migrateFromDirToDb :: Cmd GargError ())
_ <- runCmdDev env addIndex
_ <- runCmdDev env refreshIndex
___
putStrLn "Uprade done with success !"
___
pure ()
refreshIndex :: Cmd'' DevEnv IOException ()
refreshIndex = do
_ <- execPGSQuery [sql| refresh materialized view context_node_ngrams_view; |] ()
pure ()
{-
sqlUpdateTriggerHash :: Cmd'' DevEnv IOException Int64
sqlUpdateTriggerHash = do
addIndex :: Cmd'' DevEnv IOException Int64
addIndex = do
execPGSQuery query ()
where
query = [sql|
UPDATE nodes SET typename = typename;
UPDATE contexts SET typename = typename;
|]
sqlNodes2Contexts :: Cmd'' DevEnv IOException Int64
sqlNodes2Contexts = do
execPGSQuery query (toDBid NodeDocument,toDBid NodeContact)
where
query = [sql|
-- WITH docs (id,hash_id,typename,user_id,parent_id,name,date,hyperdata, search)
WITH docs AS (SELECT * from nodes WHERE nodes.typename IN (?,?)),
inserted (id, hash_id) AS (
INSERT INTO contexts (hash_id,typename,user_id,parent_id,name,date,hyperdata, search)
SELECT d.hash_id,d.typename,d.user_id,NULL,d.name,d.date,d.hyperdata,search FROM docs AS d
RETURNING contexts.id, contexts.hash_id
),
indexed (node_id, context_id) AS (
SELECT docs.id, inserted.id from inserted
JOIN docs on docs.hash_id = inserted.hash_id
),
-- nodes_nodes -> nodes_contexts
nodes_contexts_query AS (
INSERT INTO nodes_contexts (node_id, context_id,score, category)
SELECT nn.node1_id,i.context_id,nn.score,nn.category FROM nodes_nodes nn
JOIN indexed i ON i.node_id = nn.node2_id
),
-- nodes_nodes_ngrams -> contexts_nodes_ngrams
contexts_nodes_ngrams_query AS (
INSERT INTO context_node_ngrams
SELECT i.context_id, nnn.node1_id, nnn.ngrams_id, nnn.ngrams_type, nnn.weight FROM node_node_ngrams nnn
JOIN indexed i ON i.node_id = nnn.node2_id
),
---- nodes_nodes_ngrams2 -> contexts_nodes_ngrams2
context_node_ngrams2_query AS (
INSERT INTO context_node_ngrams2
SELECT i.context_id, nnn2.nodengrams_id, nnn2.weight FROM node_node_ngrams2 nnn2
JOIN indexed i ON i.node_id = nnn2.node_id
)
-- WITH CASCADE it should update others tables
DELETE FROM nodes n
USING indexed i WHERE i.node_id = n.id
;
UPDATE contexts SET parent_id = id;
create materialized view if not exists context_node_ngrams_view as
select context_node_ngrams.context_id, ngrams_id, nodes_contexts.node_id
from nodes_contexts
join context_node_ngrams
on context_node_ngrams.context_id = nodes_contexts.context_id;
create index if not exists context_node_ngrams_context_id_ngrams_id_idx on context_node_ngrams(context_id, ngrams_id);
create index if not exists context_node_ngrams_view_context_id_idx on context_node_ngrams_view(context_id);
create index if not exists context_node_ngrams_view_ngrams_id_idx on context_node_ngrams_view(ngrams_id);
create index if not exists context_node_ngrams_view_node_id_idx on context_node_ngrams_view(node_id);
create index if not exists node_stories_ngrams_id_idx on node_stories(ngrams_id);
|]
sqlSchema :: Cmd'' DevEnv IOException Int64
sqlSchema = do
execPGSQuery query ()
where
query = [sql|
-- TODO typename -> type_id
CREATE TABLE public.contexts (
id SERIAL,
hash_id CHARACTER varying(66) DEFAULT ''::character varying NOT NULL,
typename INTEGER NOT NULL,
user_id INTEGER NOT NULL,
parent_id INTEGER REFERENCES public.contexts(id) ON DELETE CASCADE ,
name CHARACTER varying(255) DEFAULT ''::character varying NOT NULL,
date TIMESTAMP with time zone DEFAULT now() NOT NULL,
hyperdata jsonb DEFAULT '{}'::jsonb NOT NULL,
search tsvector,
PRIMARY KEY (id),
FOREIGN KEY (user_id) REFERENCES public.auth_user(id) ON DELETE CASCADE
);
ALTER TABLE public.contexts OWNER TO gargantua;
-- To attach contexts to a Corpus
CREATE TABLE public.nodes_contexts (
node_id INTEGER NOT NULL REFERENCES public.nodes(id) ON DELETE CASCADE,
context_id INTEGER NOT NULL REFERENCES public.contexts(id) ON DELETE CASCADE,
score REAL ,
category INTEGER ,
PRIMARY KEY (node_id, context_id)
);
ALTER TABLE public.nodes_contexts OWNER TO gargantua;
---------------------------------------------------------------
CREATE TABLE public.context_node_ngrams (
context_id INTEGER NOT NULL REFERENCES public.contexts (id) ON DELETE CASCADE,
node_id INTEGER NOT NULL REFERENCES public.nodes (id) ON DELETE CASCADE,
ngrams_id INTEGER NOT NULL REFERENCES public.ngrams (id) ON DELETE CASCADE,
ngrams_type INTEGER ,
weight double precision,
PRIMARY KEY (context_id, node_id, ngrams_id, ngrams_type)
);
ALTER TABLE public.context_node_ngrams OWNER TO gargantua;
CREATE TABLE public.context_node_ngrams2 (
context_id INTEGER NOT NULL REFERENCES public.contexts (id) ON DELETE CASCADE,
nodengrams_id INTEGER NOT NULL REFERENCES public.node_ngrams (id) ON DELETE CASCADE,
weight double precision,
PRIMARY KEY (context_id, nodengrams_id)
);
ALTER TABLE public.context_node_ngrams2 OWNER TO gargantua;
CREATE INDEX ON public.contexts USING gin (hyperdata);
CREATE INDEX ON public.contexts USING btree (user_id, typename, parent_id);
CREATE INDEX ON public.contexts USING btree (id, typename, date ASC);
CREATE INDEX ON public.contexts USING btree (id, typename, date DESC);
CREATE INDEX ON public.contexts USING btree (typename, id);
CREATE UNIQUE INDEX ON public.contexts USING btree (hash_id);
-- To make the links between Corpus Node and its contexts
CREATE UNIQUE INDEX ON public.nodes_contexts USING btree (node_id, context_id);
CREATE INDEX ON public.nodes_contexts USING btree (node_id, context_id, category);
------------------------------------------------------------------------
CREATE UNIQUE INDEX ON public.context_node_ngrams USING btree (context_id, node_id, ngrams_id, ngrams_type);
CREATE INDEX ON public.context_node_ngrams USING btree (context_id, node_id);
CREATE INDEX ON public.context_node_ngrams USING btree (ngrams_id, node_id);
CREATE INDEX ON public.context_node_ngrams USING btree (ngrams_type);
CREATE INDEX ON public.context_node_ngrams2 USING btree (context_id);
CREATE INDEX ON public.context_node_ngrams2 USING btree (nodengrams_id);
CREATE INDEX ON public.context_node_ngrams2 USING btree (context_id, nodengrams_id);
DROP TABLE if EXISTS public.node_nodengrams_nodengrams;
DROP TRIGGER if EXISTS trigger_count_delete2 ON nodes_nodes;
DROP TRIGGER if EXISTS trigger_count_update_add ON nodes_nodes;
DROP TRIGGER if EXISTS trigger_delete_count ON nodes_nodes;
DROP TRIGGER if EXISTS trigger_insert_count ON nodes_nodes;
-- Indexes needed to speed up the deletes
-- Trigger for constraint node_ngrams_node_id_fkey
CREATE INDEX IF NOT EXISTS node_ngrams_node_id_idx ON public.node_ngrams USING btree (node_id);
-- Trigger for constraint node_node_ngrams2_node_id_fkey
CREATE INDEX IF NOT EXISTS node_node_ngrams2_node_id_idx ON public.node_node_ngrams2 USING btree (node_id);
-- Trigger for constraint node_node_ngrams_node1_id_fkey
CREATE INDEX IF NOT EXISTS node_node_ngrams_node1_id_idx ON public.node_node_ngrams USING btree (node1_id);
-- Trigger for constraint node_node_ngrams_node2_id_fkey
CREATE INDEX IF NOT EXISTS node_node_ngrams_node2_id_idx ON public.node_node_ngrams USING btree (node2_id);
-- Trigger for constraint nodes_nodes_node1_id_fkey
CREATE INDEX IF NOT EXISTS nodes_nodes_node1_id_idx ON public.nodes_nodes USING btree (node1_id);
-- Trigger for constraint nodes_nodes_node2_id_fkey
CREATE INDEX IF NOT EXISTS nodes_nodes_node2_id_idx ON public.nodes_nodes USING btree (node2_id);
-- Trigger for constraint nodes_parent_id_fkey
CREATE INDEX IF NOT EXISTS nodes_parent_id_idx ON public.nodes USING btree (parent_id);
-- Trigger for constraint rights_node_id_fkey
CREATE INDEX IF NOT EXISTS rights_node_id_idx ON public.rights USING btree (node_id);
-- Trigger for constraint nodes_contexts_node_id_fkey
CREATE INDEX IF NOT EXISTS nodes_contexts_node_id_idx ON public.nodes_contexts USING btree (node_id);
-- Trigger for constraint context_node_ngrams_node_id_fkey
CREATE INDEX IF NOT EXISTS context_node_node_id_idx ON public.context_node_ngrams USING btree (node_id);
|]
-}
FROM fpco/stack-build:lts-18.18
FROM ubuntu:jammy
#RUN apt-key adv --keyserver hkp://pool.sks-keyservers.net:80 --recv-keys 8B1DA6120C2BF624
ARG DEBIAN_FRONTEND=noninteractive
ARG GHC=8.10.7
ARG STACK=2.7.3
ARG CABAL=3.10.1.0
COPY ./shell.nix /builds/gargantext/shell.nix
COPY ./nix/pkgs.nix /builds/gargantext/nix/pkgs.nix
COPY ./nix/pinned-22.05.nix /builds/gargantext/nix/pinned-22.05.nix
ENV TZ=Europe/Rome
RUN apt-get update && \
apt-get install -y ca-certificates git libigraph0-dev && \
rm -rf /var/lib/apt/lists/*
apt-get install --no-install-recommends -y \
apt-transport-https \
autoconf \
automake \
build-essential \
ca-certificates \
curl \
gcc \
git \
gnupg2 \
libffi-dev \
libffi7 \
libgmp-dev \
libgmp10 \
libncurses-dev \
libncurses5 \
libnuma-dev \
libtinfo5 \
locales \
lsb-release \
software-properties-common \
sudo \
wget \
vim \
xz-utils \
zlib1g-dev && \
apt-get clean && rm -rf /var/lib/apt/lists/* && \
mkdir -m 0755 /nix && groupadd -r nixbld && chown root /nix && \
for n in $(seq 1 10); do useradd -c "Nix build user $n" -d /var/empty -g nixbld -G nixbld -M -N -r -s "$(command -v nologin)" "nixbld$n"; done
RUN gpg --batch --keyserver keys.openpgp.org --recv-keys 7D1E8AFD1D4A16D71FADA2F2CCC85C0E40C06A8C && \
gpg --batch --keyserver keyserver.ubuntu.com --recv-keys FE5AB6C91FEA597C3B31180B73EDE9E8CFBAEF01
SHELL ["/bin/bash", "-o", "pipefail", "-c"]
RUN set -o pipefail && \
bash <(curl -L https://releases.nixos.org/nix/nix-2.15.0/install) --no-daemon && \
locale-gen en_US.UTF-8
ENV LANG='en_US.UTF-8' LANGUAGE='en_US:en' LC_ALL='en_US.UTF-8'
ENV USER=root
ENV SHELL /bin/bash
RUN . "$HOME/.nix-profile/etc/profile.d/nix.sh" && \
mkdir -p "/builds/gargantext/" && chmod 777 -R "/builds/gargantext" && \
echo "source $HOME/.nix-profile/etc/profile.d/nix.sh" >> "$HOME/.bashrc" && \
echo `which nix-env`
ENV PATH=/root/.nix-profile/bin:$PATH
RUN . $HOME/.bashrc && nix-env --version
RUN \
curl https://downloads.haskell.org/~ghcup/x86_64-linux-ghcup > /usr/bin/ghcup && \
chmod +x /usr/bin/ghcup && \
ghcup config set gpg-setting GPGLax && \
ghcup -v install ghc --force ${GHC} && \
ghcup -v install cabal --force ${CABAL} && \
ghcup -v install stack --force ${STACK}
ENV PATH=/root/.ghcup/bin:$PATH
ENV PATH=/root/.local/bin:$PATH
RUN cd /builds/gargantext && nix-shell
RUN ghcup set 8.10.7 && cabal v2-update && cabal v2-install hpack --overwrite-policy=always
WORKDIR "/builds/gargantext/"
......@@ -335,3 +335,16 @@ CREATE OR REPLACE function node_pos(int, int) returns bigint
--drop index node_by_pos;
--create index node_by_pos on nodes using btree(node_pos(id,typename));
-- Optimization for Ngrams Table View
create materialized view if not exists context_node_ngrams_view as
select context_node_ngrams.context_id, ngrams_id, nodes_contexts.node_id
from nodes_contexts
join context_node_ngrams
on context_node_ngrams.context_id = nodes_contexts.context_id;
create index if not exists context_node_ngrams_context_id_ngrams_id_idx on context_node_ngrams(context_id, ngrams_id);
create index if not exists context_node_ngrams_view_context_id_idx on context_node_ngrams_view(context_id);
create index if not exists context_node_ngrams_view_ngrams_id_idx on context_node_ngrams_view(ngrams_id);
create index if not exists context_node_ngrams_view_node_id_idx on context_node_ngrams_view(node_id);
create index if not exists node_stories_ngrams_id_idx on node_stories(ngrams_id);
create materialized view if not exists context_node_ngrams_view as
select context_node_ngrams.context_id, ngrams_id, nodes_contexts.node_id
from nodes_contexts
join context_node_ngrams
on context_node_ngrams.context_id = nodes_contexts.context_id;
create index if not exists context_node_ngrams_context_id_ngrams_id_idx on context_node_ngrams(context_id, ngrams_id);
create index if not exists context_node_ngrams_view_context_id_idx on context_node_ngrams_view(context_id);
create index if not exists context_node_ngrams_view_ngrams_id_idx on context_node_ngrams_view(ngrams_id);
create index if not exists context_node_ngrams_view_node_id_idx on context_node_ngrams_view(node_id);
create index if not exists node_stories_ngrams_id_idx on node_stories(ngrams_id);
......@@ -5,7 +5,7 @@ cabal-version: 1.12
-- see: https://github.com/sol/hpack
name: gargantext
version: 0.0.6.9.9.4.2
version: 0.0.6.9.9.5
synopsis: Search, map, share
description: Please see README.md
category: Data
......@@ -56,6 +56,7 @@ library
Gargantext.Core.Text.Corpus.Parsers
Gargantext.Core.Text.Corpus.Parsers.CSV
Gargantext.Core.Text.Corpus.Parsers.Date.Parsec
Gargantext.Core.Text.Corpus.Parsers.JSON
Gargantext.Core.Text.List.Formats.CSV
Gargantext.Core.Text.Metrics
Gargantext.Core.Text.Metrics.CharByChar
......@@ -393,6 +394,7 @@ library
, crawlerISTEX
, crawlerIsidore
, crawlerPubMed
, cron
, cryptohash
, data-time-segment
, deepseq
......@@ -689,6 +691,7 @@ executable gargantext-init
ghc-options: -Wall -threaded -rtsopts -with-rtsopts=-N -O2 -Wmissing-signatures
build-depends:
base
, cron
, extra
, gargantext
, gargantext-prelude
......@@ -798,6 +801,7 @@ executable gargantext-server
, gargantext-prelude
, ini
, optparse-generic
, postgresql-simple
, text
, unordered-containers
, vector
......@@ -826,6 +830,7 @@ executable gargantext-upgrade
ghc-options: -Wall -threaded -rtsopts -with-rtsopts=-N -O2 -Wmissing-signatures
build-depends:
base
, cron
, extra
, gargantext
, gargantext-prelude
......
......@@ -6,7 +6,7 @@ name: gargantext
# | | | +----- Layers * : New versions with API additions
# | | | | +--- Layers * : New versions without API breaking changes
# | | | | |
version: '0.0.6.9.9.4.2'
version: '0.0.6.9.9.5'
synopsis: Search, map, share
description: Please see README.md
category: Data
......@@ -82,6 +82,7 @@ library:
- Gargantext.Core.Text.Corpus.Parsers
- Gargantext.Core.Text.Corpus.Parsers.CSV
- Gargantext.Core.Text.Corpus.Parsers.Date.Parsec
- Gargantext.Core.Text.Corpus.Parsers.JSON
- Gargantext.Core.Text.List.Formats.CSV
- Gargantext.Core.Text.Metrics
- Gargantext.Core.Text.Metrics.CharByChar
......@@ -177,6 +178,7 @@ library:
- crawlerISTEX
- crawlerIsidore
- crawlerPubMed
- cron
- cryptohash
- data-time-segment
- deepseq
......@@ -330,15 +332,16 @@ executables:
- -fprof-auto
dependencies:
- base
- cassava
- containers
- full-text-search
- gargantext
- gargantext-prelude
- vector
- cassava
- ini
- optparse-generic
- postgresql-simple
- unordered-containers
- full-text-search
- vector
gargantext-cli:
main: Main.hs
......@@ -421,6 +424,7 @@ executables:
- gargantext
- gargantext-prelude
- base
- cron
gargantext-invitations:
main: Main.hs
......@@ -451,6 +455,7 @@ executables:
- gargantext-prelude
- base
- postgresql-simple
- cron
gargantext-admin:
main: Main.hs
......
......@@ -26,16 +26,20 @@ Pouillard (who mainly made it).
-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE NumericUnderscores #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeOperators #-}
module Gargantext.API
where
import Control.Exception (catch, finally, SomeException)
import Control.Concurrent
import Control.Exception (catch, finally, SomeException, displayException, IOException)
import Control.Lens
import Control.Monad.Except
import Control.Monad.Reader (runReaderT)
import Data.Either
import Data.Foldable (foldlM)
import Data.List (lookup)
import Data.Text (pack)
import Data.Text.Encoding (encodeUtf8)
......@@ -52,7 +56,8 @@ import Gargantext.API.Ngrams (saveNodeStoryImmediate)
import Gargantext.API.Routes
import Gargantext.API.Server (server)
import Gargantext.Core.NodeStory
import qualified Gargantext.Database.Prelude as DB
import Gargantext.Database.Prelude (Cmd)
import Gargantext.Database.Action.Metrics.NgramsByContext (refreshNgramsMaterialized)
import Gargantext.Prelude hiding (putStrLn)
import Network.HTTP.Types hiding (Query)
import Network.Wai
......@@ -62,6 +67,8 @@ import Network.Wai.Middleware.RequestLogger
import Paths_gargantext (getDataDir)
import Servant
import System.FilePath
import qualified Gargantext.Database.Prelude as DB
import qualified System.Cron.Schedule as Cron
data Mode = Dev | Mock | Prod
deriving (Show, Read, Generic)
......@@ -74,7 +81,8 @@ startGargantext mode port file = do
portRouteInfo port
app <- makeApp env
mid <- makeDevMiddleware mode
run port (mid app) `finally` stopGargantext env
periodicActions <- schedulePeriodicActions env
run port (mid app) `finally` stopGargantext env periodicActions
where runDbCheck env = do
r <- runExceptT (runReaderT DB.dbCheck env) `catch`
......@@ -91,9 +99,12 @@ portRouteInfo port = do
putStrLn $ "http://localhost:" <> toUrlPiece port <> "/index.html"
putStrLn $ "http://localhost:" <> toUrlPiece port <> "/swagger-ui"
-- | Stops the gargantext server and cancels all the periodic actions
-- scheduled to run up to that point.
-- TODO clean this Monad condition (more generic) ?
stopGargantext :: HasNodeStoryImmediateSaver env => env -> IO ()
stopGargantext env = do
stopGargantext :: HasNodeStoryImmediateSaver env => env -> [ThreadId] -> IO ()
stopGargantext env scheduledPeriodicActions = do
forM_ scheduledPeriodicActions killThread
putStrLn "----- Stopping gargantext -----"
runReaderT saveNodeStoryImmediate env
......@@ -105,6 +116,29 @@ startGargantextMock port = do
run port application
-}
-- | Schedules all sorts of useful periodic actions to be run while
-- the server is alive accepting requests.
schedulePeriodicActions :: DB.CmdCommon env => env -> IO [ThreadId]
schedulePeriodicActions env =
-- Add your scheduled actions here.
let actions = [
refreshDBViews
]
in foldlM (\ !acc action -> (`mappend` acc) <$> Cron.execSchedule action) [] actions
where
refreshDBViews :: Cron.Schedule ()
refreshDBViews = do
let doRefresh = do
res <- DB.runCmd env (refreshNgramsMaterialized :: Cmd IOException ())
case res of
Left e -> liftIO $ putStrLn $ pack ("Refreshing Ngrams materialized view failed: " <> displayException e)
Right () -> do
_ <- liftIO $ putStrLn $ pack "Refresh Index Database done"
pure ()
Cron.addJob doRefresh "* 2 * * *"
----------------------------------------------------------------------
fireWall :: Applicative f => Request -> FireWall -> f Bool
......
......@@ -38,6 +38,7 @@ import Gargantext.Core.Text.Terms.WithList (buildPatterns, termsInText)
import Gargantext.Core.Types.Main (ListType(..))
import Gargantext.Database.Action.Flow (saveDocNgramsWith)
import Gargantext.Database.Action.Flow.Types (FlowCmdM)
import Gargantext.Database.Action.Metrics.NgramsByContext (refreshNgramsMaterialized)
import Gargantext.Database.Admin.Types.Hyperdata.Document
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Query.Table.Node (getNode)
......@@ -49,7 +50,6 @@ import Gargantext.Database.Types (Indexed(..))
import Gargantext.Prelude
import Gargantext.Utils.Jobs (serveJobsAPI, MonadJobStatus(..))
import Servant
-- import Servant.Job.Async
import qualified Data.ByteString.Lazy as BSL
import qualified Data.Csv as Csv
import qualified Data.HashMap.Strict as HashMap
......@@ -180,7 +180,7 @@ reIndexWith cId lId nt lts = do
-- Saving the indexation in database
_ <- mapM (saveDocNgramsWith lId) ngramsByDoc
_ <- refreshNgramsMaterialized
pure ()
toIndexedNgrams :: HashMap Text NgramsId -> Text -> Maybe (Indexed Int Ngrams)
......
......@@ -233,7 +233,7 @@ addToCorpusWithQuery user cid (WithQuery { _wq_query = q
markProgress 1 jobHandle
void $ flowDataText user txt (Multi l) cid (Just flw) jobHandle
-- printDebug "corpus id" cids
-- printDebug "corpus id" cids
-- printDebug "sending email" ("xxxxxxxxxxxxxxxxxxxxx" :: Text)
sendMail user
-- TODO ...
......@@ -270,6 +270,7 @@ addToCorpusWithForm user cid (NewWithForm ft ff d l _n sel) jobHandle = do
WOS -> Parser.parseFormatC Parser.WOS
PresseRIS -> Parser.parseFormatC Parser.RisPresse
Iramuteq -> Parser.parseFormatC Parser.Iramuteq
JSON -> Parser.parseFormatC Parser.JSON
-- TODO granularity of the logStatus
let data' = case ff of
......
......@@ -15,6 +15,7 @@ data FileType = CSV
| PresseRIS
| WOS
| Iramuteq
| JSON
deriving (Eq, Show, Generic)
instance ToSchema FileType
instance Arbitrary FileType where arbitrary = elements [CSV, PresseRIS]
......@@ -28,7 +29,8 @@ instance FromHttpApiData FileType where
parseUrlPiece "PresseRis" = pure PresseRIS
parseUrlPiece "WOS" = pure WOS
parseUrlPiece "Iramuteq" = pure Iramuteq
parseUrlPiece _ = panic "[G.A.A.Node.Corpus.New] File Type not implemented (yet)"
parseUrlPiece "JSON" = pure JSON
parseUrlPiece s = panic $ "[G.A.A.Node.Corpus.New] File Type not implemented (yet): " <> s
instance ToHttpApiData FileType where
toUrlPiece = pack . show
......
......@@ -21,9 +21,10 @@ import Gargantext.Database.Action.Flow.List (flowList_DbRepo)
import Gargantext.Database.Action.Flow.Types (FlowCmdM)
import Gargantext.Database.Action.User (getUserId)
import Gargantext.Database.Admin.Config (userMaster)
import Gargantext.Database.Query.Table.Node (insertDefaultNodeIfNotExists)
import Gargantext.Database.Admin.Types.Hyperdata.Corpus (HyperdataCorpus)
import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
import Gargantext.Database.Admin.Types.Node (CorpusId, ListId)
import Gargantext.Database.Admin.Types.Node (CorpusId, ListId, NodeType(NodeTexts))
import Gargantext.Database.Prelude (hasConfig)
import Gargantext.Database.Query.Table.Node (defaultListMaybe, getOrMkList)
import Gargantext.Database.Query.Tree.Root (getOrMk_RootWithCorpus)
......@@ -154,6 +155,10 @@ triggerSearxSearch :: (MonadBase IO m, FlowCmdM env err m, MonadJobStatus m)
-> JobHandle m
-> m ()
triggerSearxSearch user cId q l jobHandle = do
userId <- getUserId user
_tId <- insertDefaultNodeIfNotExists NodeTexts cId userId
let numPages = 100
markStarted numPages jobHandle
......
......@@ -42,6 +42,7 @@ import Data.Tuple.Extra (both, first, second)
import Gargantext.API.Node.Corpus.New.Types (FileFormat(..))
import Gargantext.Core (Lang(..))
import Gargantext.Core.Text.Corpus.Parsers.CSV (parseHal, parseCsv, parseCsvC)
import Gargantext.Core.Text.Corpus.Parsers.JSON (parseJSONC)
import Gargantext.Core.Text.Corpus.Parsers.RIS.Presse (presseEnrich)
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Database.Query.Table.Ngrams (NgramsType(..))
......@@ -79,7 +80,8 @@ data FileType = WOS
| CsvGargV3
| CsvHal
| Iramuteq
deriving (Show)
| JSON
deriving (Show, Eq)
-- Implemented (ISI Format)
-- | DOC -- Not Implemented / import Pandoc
......@@ -132,6 +134,12 @@ parseFormatC Iramuteq Plain bs = do
)
<$> eDocs
parseFormatC JSON Plain bs = do
let eParsedC = parseJSONC $ DBL.fromStrict bs
case eParsedC of
Left err -> pure $ Left err
Right (mLen, parsedC) -> pure $ Right (mLen, transPipe (pure . runIdentity) parsedC)
parseFormatC ft ZIP bs = do
path <- liftBase $ emptySystemTempFile "parsed-zip"
liftBase $ DB.writeFile path bs
......@@ -154,7 +162,7 @@ parseFormatC ft ZIP bs = do
pure $ Right ( Just totalLength
, sequenceConduits contents' >> pure () ) -- .| mapM_C (printDebug "[parseFormatC] doc")
_ -> pure $ Left $ unpack $ intercalate "\n" $ pack <$> errs
parseFormatC _ _ _ = undefined
......@@ -211,7 +219,7 @@ parseFile WOS Plain p = do
parseFile Iramuteq Plain p = do
docs <- join $ mapM ((toDoc Iramuteq) . (map (second (Text.replace "_" " "))))
<$> snd
<$> enrichWith Iramuteq
<$> enrichWith Iramuteq
<$> readFileWith Iramuteq p
pure $ Right docs
......@@ -226,7 +234,7 @@ toDoc ff d = do
-- let abstract = lookup "abstract" d
let lang = EN -- maybe EN identity (join $ detectLangDefault <$> (fmap (DT.take 50) abstract))
let dateToParse = DT.replace " " "" <$> lookup "PY" d -- <> Just " " <> lookup "publication_date" d
let dateToParse = DT.replace " " "" <$> lookup "PY" d -- <> Just " " <> lookup "publication_date" d
-- printDebug "[G.C.T.C.Parsers] dateToParse" dateToParse
(utcTime, (pub_year, pub_month, pub_day)) <- Date.dateSplit lang dateToParse
......@@ -314,9 +322,8 @@ clean txt = DBC.map clean' txt
clean' ';' = '.'
clean' c = c
--
--
splitOn :: NgramsType -> Maybe Text -> Text -> [Text]
splitOn Authors (Just "WOS") = (DT.splitOn "; ")
splitOn _ _ = (DT.splitOn ", ")
{-|
Module : Gargantext.Core.Text.Corpus.Parsers.CSV
Description :
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
......@@ -378,7 +378,7 @@ instance ToNamedRecord CsvHal where
, "instStructId_i" .= csvHal_instStructId_i
, "deptStructId_i" .= csvHal_deptStructId_i
, "labStructId_i" .= csvHal_labStructId_i
, "rteamStructId_i" .= csvHal_rteamStructId_i
, "docType_s" .= csvHal_docType_s
]
......@@ -472,7 +472,7 @@ parseCsvC bs = do
Right res -> Right res
case result of
Left err -> Left err
Right r -> Right $ (Just $ Prelude.fromIntegral $ Prelude.length $ snd r, (yieldMany $ snd r) .| mapC csv2doc)
Right r -> Right (Just $ Prelude.fromIntegral $ Prelude.length $ snd r, (yieldMany $ snd r) .| mapC csv2doc)
------------------------------------------------------------------------
-- Csv v3 weighted for phylo
......
{-|
Module : Gargantext.Core.Text.Corpus.Parsers.JSON
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
JSON parser for Gargantext corpus files.
-}
{-# LANGUAGE DuplicateRecordFields #-}
module Gargantext.Core.Text.Corpus.Parsers.JSON where
import Conduit
import Data.Aeson
import qualified Data.ByteString.Lazy as BL
import Data.Either (Either(..))
import Data.Text
import GHC.Generics
import qualified Prelude
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
-- import Gargantext.Database.Schema.Node (NodePoly(..))
import Gargantext.Prelude hiding (length)
data JSONStruct =
JSONStruct { documents :: [ JSONStructDocument ]
, garg_version :: Text }
deriving (Generic)
instance FromJSON JSONStruct
data JSONStructDocument =
JSONStructDocument { document :: JSONDocument
, ngrams :: JSONNgrams
, hash :: Text }
deriving (Generic)
instance FromJSON JSONStructDocument
data JSONDocument =
JSONDocument { id :: Int
, hash_id :: Maybe Text
, typename :: Int
, user_id :: Int
, parent_id :: Maybe Int
, name :: Text
, date :: Text
, hyperdata :: HyperdataDocument }
deriving (Generic)
instance FromJSON JSONDocument
data JSONNgrams =
JSONNgrams { ngrams :: [Text]
, hash :: Text }
deriving (Generic)
instance FromJSON JSONNgrams
------------------------------------------------------------------------
-- | TODO: documents -> document -> hyperdata + title etc
readJSONLazyBS :: BL.ByteString -> Either Prelude.String JSONStruct
readJSONLazyBS bs = eitherDecode bs
parseJSONC :: BL.ByteString
-> Either Prelude.String (Maybe Integer, ConduitT () HyperdataDocument Identity ())
parseJSONC bs = do
case readJSONLazyBS bs of
Left err -> Left err
Right (JSONStruct { documents }) ->
Right ( Just $ Prelude.fromIntegral $ Prelude.length documents
, yieldMany documents .| mapC doc2hyperdoc )
doc2hyperdoc :: JSONStructDocument -> HyperdataDocument
doc2hyperdoc (JSONStructDocument { document = JSONDocument { hyperdata } }) = hyperdata
......@@ -35,6 +35,7 @@ import Gargantext.Core.Types.Query (Limit(..))
import Gargantext.Database.Action.Flow.Types (FlowCmdM)
import Gargantext.Database.Action.Metrics.NgramsByContext (getContextsByNgramsOnlyUser{-, getTficfWith-})
import Gargantext.Database.Admin.Config (userMaster)
import Gargantext.Database.Action.Metrics.NgramsByContext (refreshNgramsMaterialized)
import Gargantext.Database.Admin.Types.Node (ListId, CorpusId)
import Gargantext.Database.Query.Table.Node (defaultList)
import Gargantext.Database.Query.Table.Node.Select
......@@ -125,7 +126,11 @@ updateNgramsOccurrences' cId maybeListId maybeLimit tabType = do
let fields = map (\t-> QualifiedIdentifier Nothing t)
$ map Text.pack ["int4", "int4","text","int4","int4"]
map (\(Only a) -> a) <$> runPGSQuery queryInsert (Only $ Values fields toInsert)
res <- map (\(Only a) -> a) <$> runPGSQuery queryInsert (Only $ Values fields toInsert)
-- _ <- map (\(Only a) -> a) <$> runPGSQuery [sql|refresh materialized view context_node_ngrams_view;|] ()
_ <- refreshNgramsMaterialized
pure res
......
......@@ -18,6 +18,7 @@ module Gargantext.Database.Action.Metrics.NgramsByContext
-- import Debug.Trace (trace)
--import Data.Map.Strict.Patch (PatchMap, Replace, diff)
import Control.Monad (void)
import Data.HashMap.Strict (HashMap)
import Data.Map.Strict (Map)
import Data.Set (Set)
......@@ -29,7 +30,7 @@ import Gargantext.API.Ngrams.Types (NgramsTerm(..))
import Gargantext.Core
import Gargantext.Data.HashMap.Strict.Utils as HM
import Gargantext.Database.Admin.Types.Node (ListId, CorpusId, NodeId(..), ContextId, MasterCorpusId, NodeType(NodeDocument), UserCorpusId, DocId)
import Gargantext.Database.Prelude (Cmd, runPGSQuery)
import Gargantext.Database.Prelude (Cmd, runPGSQuery, execPGSQuery)
import Gargantext.Database.Schema.Ngrams (ngramsTypeId, NgramsType(..))
import Gargantext.Prelude
import qualified Data.HashMap.Strict as HM
......@@ -122,31 +123,29 @@ getOccByNgramsOnlyFast cId lId nt = do
-> Cmd err [(Text, DPST.PGArray Int)]
run cId' lId' nt' = runPGSQuery query
( cId'
, cId'
, lId'
, ngramsTypeId nt'
)
query :: DPS.Query
query = [sql|
SELECT ng.terms
, ARRAY(
SELECT DISTINCT context_node_ngrams.context_id
FROM context_node_ngrams
JOIN nodes_contexts
ON context_node_ngrams.context_id = nodes_contexts.context_id
WHERE ng.id = context_node_ngrams.ngrams_id
AND nodes_contexts.node_id = ?
) AS context_ids
FROM ngrams ng
JOIN node_stories ns ON ng.id = ns.ngrams_id
JOIN node_node_ngrams nng ON ns.node_id = nng.node2_id
WHERE nng.node1_id = ?
AND nng.node2_id = ?
AND nng.ngrams_type = ?
AND nng.ngrams_id = ng.id
AND nng.ngrams_type = ns.ngrams_type_id
ORDER BY ng.id ASC;
WITH node_context_ids AS
(select context_id, ngrams_id
FROM context_node_ngrams_view
WHERE node_id = ?
), ns AS
(select ngrams_id FROM node_stories
WHERE node_id = ? AND ngrams_type_id = ?
)
SELECT ng.terms,
ARRAY ( SELECT DISTINCT context_id
FROM node_context_ids
WHERE ns.ngrams_id = node_context_ids.ngrams_id
)
AS context_ids
FROM ngrams ng
JOIN ns ON ng.id = ns.ngrams_id
|]
......@@ -219,12 +218,6 @@ queryNgramsOccurrencesOnlyByContextUser_withSample' = [sql|
GROUP BY ng.id
|]
------------------------------------------------------------------------
getContextsByNgramsOnlyUser :: HasDBid NodeType
=> CorpusId
......@@ -399,3 +392,17 @@ queryNgramsByContextMaster' = [sql|
SELECT m.id, m.terms FROM nodesByNgramsMaster m
RIGHT JOIN contextsByNgramsUser u ON u.id = m.id
|]
-- | Refreshes the \"context_node_ngrams_view\" materialized view.
-- This function will be run :
-- - periodically
-- - at reindex stage
-- - at the end of each text flow
refreshNgramsMaterialized :: Cmd err ()
refreshNgramsMaterialized = void $ execPGSQuery refreshNgramsMaterializedQuery ()
where
refreshNgramsMaterializedQuery :: DPS.Query
refreshNgramsMaterializedQuery = [sql| refresh materialized view context_node_ngrams_view; |]
......@@ -12,6 +12,8 @@ TODO_2: quantitative tests (coded)
-}
{-# LANGUAGE QuasiQuotes #-}
module Gargantext.Database.GargDB
where
......@@ -141,11 +143,11 @@ writeFile a = do
-- | Example to read a file with Type
readGargFile :: ( MonadReader env m
, HasConfig env
, MonadBase IO m
, ReadFile a
)
=> FilePath -> m a
, HasConfig env
, MonadBase IO m
, ReadFile a
)
=> FilePath -> m a
readGargFile fp = do
dataPath <- view $ hasConfig . gc_datafilepath
liftBase $ readFile' $ toFilePath dataPath fp
......@@ -153,9 +155,9 @@ readGargFile fp = do
---
rmFile :: ( MonadReader env m
, MonadBase IO m
, HasConfig env
)
, MonadBase IO m
, HasConfig env
)
=> FilePath -> m ()
rmFile = onDisk_1 SD.removeFile
......@@ -165,8 +167,11 @@ cpFile = onDisk_2 SD.copyFile
---
mvFile :: (MonadReader env m, MonadBase IO m, HasConfig env)
=> FilePath -> FilePath -> m ()
mvFile :: ( MonadReader env m
, MonadBase IO m
, HasConfig env
)
=> FilePath -> FilePath -> m ()
mvFile fp1 fp2 = do
cpFile fp1 fp2
rmFile fp1
......@@ -174,10 +179,10 @@ mvFile fp1 fp2 = do
------------------------------------------------------------------------
onDisk_1 :: ( MonadReader env m
, MonadBase IO m
, HasConfig env
)
=> (FilePath -> IO ()) -> FilePath -> m ()
, MonadBase IO m
, HasConfig env
)
=> (FilePath -> IO ()) -> FilePath -> m ()
onDisk_1 action fp = do
dataPath <- view $ hasConfig . gc_datafilepath
liftBase $ action (toFilePath dataPath fp) `catch` handleExists
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment