Commit 4a095f89 authored by Karen Konou's avatar Karen Konou

Merge branch 'dev' into 428-dev-profile-img-upload

parents 5080f67e bf006791
## Version 0.0.6.1
* [FEAT] Spacy Server connection for French (and others) languages
* [FEAT] At startup, check if gargantext.init script has been activated
* [UPGRADE] Use the devops/postgres/upgrade/0.0.6.1.sql uprade script
* [FIX] PubMed Parser with threadDelay
* [BACK][FIX] Hash to remove duplicates on filtered text
## Version 0.0.6 ## Version 0.0.6
* [OPTIM] Ngrams Table optmization. To upgrade: * [OPTIM] Ngrams Table optmization. To upgrade:
1. `./bin/psql gargantext.ini < devops/postgresql/upgrade/0.0.6.sql` 1. `./bin/psql gargantext.ini < devops/postgresql/upgrade/0.0.6.sql`
......
...@@ -229,3 +229,63 @@ Playground is located at http://localhost:8008/gql ...@@ -229,3 +229,63 @@ Playground is located at http://localhost:8008/gql
} }
} }
``` ```
## PostgreSQL
### Upgrading using Docker
https://www.cloudytuts.com/tutorials/docker/how-to-upgrade-postgresql-in-docker-and-kubernetes/
To upgrade PostgreSQL in Docker containers, for example from 11.x to 14.x, simply run:
```sh
docker exec -it <container-id> pg_dumpall -U gargantua > 11-db.dump
```
Then, shut down the container, replace `image` section in
`devops/docker/docker-compose.yaml` with `postgres:14`. Also, it is a good practice to create a new volume, say `garg-pgdata14` and bind the new container to it. If you want to keep the same volume, remember about removing it like so:
```sh
docker-compose rm postgres
docker volume rm docker_garg-pgdata
```
Now, start the container and execute:
```sh
# need to drop the empty DB first, since schema will be created when restoring the dump
docker exec -i <new-container-id> dropdb -U gargantua gargandbV5
# recreate the db, but empty with no schema
docker exec -i <new-container-id> createdb -U gargantua gargandbV5
# now we can restore the dump
docker exec -i <new-container-id> psql -U gargantua -d gargandbV5 < 11-db.dump
```
### Upgrading using
There is a solution using pgupgrade_cluster but you need to manage the
clusters version 14 and 13. Hence here is a simple solution to upgrade.
First save your data:
```
sudo su postgres
pg_dumpall > gargandb.dump
```
Upgrade postgresql:
```
sudo apt install postgresql-server-14 postgresql-client-14
sudo apt remove --purge postgresql-13
```
Restore your data:
```
sudo su postgres
psql < gargandb.dump
```
Maybe you need to restore the gargantua password
```
ALTER ROLE gargantua PASSWORD 'yourPasswordIn_gargantext.ini'
```
Maybe you need to change the port to 5433 for database connection in
your gargantext.ini file.
...@@ -4,3 +4,4 @@ tmux new -d -s gargantext './server' \; \ ...@@ -4,3 +4,4 @@ tmux new -d -s gargantext './server' \; \
split-window -h -d 'cd ./purescript-gargantext ; ./server' \; \ split-window -h -d 'cd ./purescript-gargantext ; ./server' \; \
select-pane -t 1 \; \ select-pane -t 1 \; \
split-window -d 'cd deps/CoreNLP ; ./startServer.sh' \; \ split-window -d 'cd deps/CoreNLP ; ./startServer.sh' \; \
split-window -d 'cd deps/nlp/spacy-server ; source env/bin/activate ; ./server' \; \
version: '3' version: '3'
services: services:
#postgres11:
# #image: 'postgres:latest'
# image: 'postgres:11'
# network_mode: host
# #command: ["postgres", "-c", "log_statement=all"]
# #ports:
# #- 5432:5432
# environment:
# POSTGRES_USER: gargantua
# POSTGRES_PASSWORD: C8kdcUrAQy66U
# POSTGRES_DB: gargandbV5
# volumes:
# - garg-pgdata:/var/lib/postgresql/data
# - ../:/gargantext
# - ../dbs:/dbs
# - ../postgres/schema.sql:/docker-entrypoint-initdb.d/schema.sql:ro
postgres: postgres:
#image: 'postgres:latest' #image: 'postgres:latest'
image: 'postgres:11' image: 'postgres:14'
network_mode: host network_mode: host
#command: ["postgres", "-c", "log_statement=all"] #command: ["postgres", "-c", "log_statement=all"]
#ports: #ports:
...@@ -13,7 +30,7 @@ services: ...@@ -13,7 +30,7 @@ services:
POSTGRES_PASSWORD: C8kdcUrAQy66U POSTGRES_PASSWORD: C8kdcUrAQy66U
POSTGRES_DB: gargandbV5 POSTGRES_DB: gargandbV5
volumes: volumes:
- garg-pgdata:/var/lib/postgresql/data - garg-pgdata14:/var/lib/postgresql-11/data
- ../:/gargantext - ../:/gargantext
- ../dbs:/dbs - ../dbs:/dbs
- ../postgres/schema.sql:/docker-entrypoint-initdb.d/schema.sql:ro - ../postgres/schema.sql:/docker-entrypoint-initdb.d/schema.sql:ro
...@@ -44,5 +61,6 @@ services: ...@@ -44,5 +61,6 @@ services:
- 5000:5000 - 5000:5000
volumes: volumes:
garg-pgdata: #garg-pgdata:
garg-pgdata14:
js-cache: js-cache:
...@@ -221,16 +221,22 @@ ALTER TABLE public.rights OWNER TO gargantua; ...@@ -221,16 +221,22 @@ ALTER TABLE public.rights OWNER TO gargantua;
------------------------------------------------------------ ------------------------------------------------------------
-- Node Story -- Node Story
create table public.node_stories ( CREATE TABLE public.node_stories (
id SERIAL, id SERIAL,
node_id INTEGER NOT NULL, node_id INTEGER NOT NULL,
archive jsonb DEFAULT '{}'::jsonb NOT NULL, version INTEGER NOT NULL,
ngrams_type_id INTEGER NOT NULL,
ngrams_id INTEGER NOT NULL,
--children TEXT[],
ngrams_repo_element jsonb DEFAULT '{}'::jsonb NOT NULL,
PRIMARY KEY (id), PRIMARY KEY (id),
FOREIGN KEY (node_id) REFERENCES public.nodes(id) ON DELETE CASCADE FOREIGN KEY (node_id) REFERENCES public.nodes(id) ON DELETE CASCADE,
FOREIGN KEY (ngrams_id) REFERENCES public.ngrams(id) ON DELETE CASCADE
); );
ALTER TABLE public.node_stories OWNER TO gargantua; ALTER TABLE public.node_stories OWNER TO gargantua;
CREATE UNIQUE INDEX ON public.node_stories USING btree (node_id); CREATE UNIQUE INDEX ON public.node_stories USING btree (node_id, ngrams_type_id, ngrams_id);
create table public.node_story_archive_history ( create table public.node_story_archive_history (
......
-- Start a new transaction. In case data migration goes wrong, we are
-- back to our original table.
BEGIN;
-- we will migrate data here
-- rename old table and create a new one
ALTER TABLE public.node_stories RENAME TO node_stories_old;
CREATE TABLE public.node_stories (
id SERIAL,
node_id INTEGER NOT NULL,
version INTEGER NOT NULL,
ngrams_type_id INTEGER NOT NULL,
ngrams_id INTEGER NOT NULL,
--children TEXT[],
ngrams_repo_element jsonb DEFAULT '{}'::jsonb NOT NULL,
PRIMARY KEY (id),
FOREIGN KEY (node_id) REFERENCES public.nodes(id) ON DELETE CASCADE,
FOREIGN KEY (ngrams_id) REFERENCES public.ngrams(id) ON DELETE CASCADE
);
ALTER TABLE public.node_stories OWNER TO gargantua;
CREATE UNIQUE INDEX ON public.node_stories USING btree (node_id, ngrams_type_id, ngrams_id);
-- Authors (ngrams_type_id = 1), see G.D.S.Ngrams.hs -> ngramsTypeId
INSERT INTO public.node_stories
(node_id, version, ngrams_type_id, ngrams_id, ngrams_repo_element)
SELECT node_id, (archive->'version')::int, 1, ngrams.id, j.value
FROM node_stories_old
CROSS JOIN jsonb_each(archive->'state'->'Authors') AS j
JOIN ngrams ON terms = j.key;
-- we will leave children for later, small steps
-- INSERT INTO public.node_stories
-- (node_id, version, ngrams_type_id, ngrams_id, children, ngrams_repo_element)
-- SELECT node_id, (archive->'version')::int, 1, ngrams.id, c.children, (j.value - 'children')
-- FROM node_stories_old
-- CROSS JOIN jsonb_each(archive->'state'->'Authors') AS j
-- CROSS JOIN LATERAL (SELECT array_agg(d.elem) AS children FROM jsonb_array_elements_text(j.value->'children') AS d(elem)) AS c
-- JOIN ngrams ON terms = j.key;
-- Institutes (ngrams_type_id = 2)
INSERT INTO public.node_stories
(node_id, version, ngrams_type_id, ngrams_id, ngrams_repo_element)
SELECT node_id, (archive->'version')::int, 2, ngrams.id, j.value
FROM node_stories_old
CROSS JOIN jsonb_each(archive->'state'->'Institutes') AS j
JOIN ngrams ON terms = j.key;
-- Sources (ngrams_type_id = 3)
INSERT INTO public.node_stories
(node_id, version, ngrams_type_id, ngrams_id, ngrams_repo_element)
SELECT node_id, (archive->'version')::int, 3, ngrams.id, j.value
FROM node_stories_old
CROSS JOIN jsonb_each(archive->'state'->'Sources') AS j
JOIN ngrams ON terms = j.key;
-- NgramsTerms (ngrams_type_id = 4)
INSERT INTO public.node_stories
(node_id, version, ngrams_type_id, ngrams_id, ngrams_repo_element)
SELECT node_id, (archive->'version')::int, 4, ngrams.id, j.value
FROM node_stories_old
CROSS JOIN jsonb_each(archive->'state'->'NgramsTerms') AS j
JOIN ngrams ON terms = j.key;
-- finally, write out the stuff
COMMIT;
...@@ -36,4 +36,3 @@ ALTER TABLE public.node_story_archive_history OWNER TO gargantua; ...@@ -36,4 +36,3 @@ ALTER TABLE public.node_story_archive_history OWNER TO gargantua;
-- UNION (SELECT node_id, sid, 3 AS ngrams_type_id, "Sources" AS patch FROM q WHERE "Sources" IS NOT NULL) -- UNION (SELECT node_id, sid, 3 AS ngrams_type_id, "Sources" AS patch FROM q WHERE "Sources" IS NOT NULL)
-- ORDER BY node_id, ngrams_type_id, sid -- ORDER BY node_id, ngrams_type_id, sid
-- ) AS t; -- ) AS t;
...@@ -96,7 +96,6 @@ library ...@@ -96,7 +96,6 @@ library
Gargantext.Core.Viz.Phylo.SynchronicClustering Gargantext.Core.Viz.Phylo.SynchronicClustering
Gargantext.Core.Viz.Types Gargantext.Core.Viz.Types
other-modules: other-modules:
-- ConcurrentTest
Gargantext.API.Admin.Auth Gargantext.API.Admin.Auth
Gargantext.API.Admin.FrontEnd Gargantext.API.Admin.FrontEnd
Gargantext.API.Admin.Orchestrator Gargantext.API.Admin.Orchestrator
...@@ -318,6 +317,7 @@ library ...@@ -318,6 +317,7 @@ library
Gargantext.Database.Types Gargantext.Database.Types
Gargantext.Utils.Aeson Gargantext.Utils.Aeson
Gargantext.Utils.JohnSnowNLP Gargantext.Utils.JohnSnowNLP
Gargantext.Utils.SpacyNLP
Gargantext.Utils.Servant Gargantext.Utils.Servant
Gargantext.Utils.UTCTime Gargantext.Utils.UTCTime
Paths_gargantext Paths_gargantext
...@@ -493,9 +493,9 @@ library ...@@ -493,9 +493,9 @@ library
, transformers-base , transformers-base
, tuple , tuple
, unordered-containers , unordered-containers
, uri-encode
, utf8-string , utf8-string
, uuid , uuid
, uri-encode
, validity , validity
, vector , vector
, wai , wai
......
...@@ -6,7 +6,7 @@ name: gargantext ...@@ -6,7 +6,7 @@ name: gargantext
# | | | +----- Layers * : New versions with API additions # | | | +----- Layers * : New versions with API additions
# | | | | +--- Layers * : New versions without API breaking changes # | | | | +--- Layers * : New versions without API breaking changes
# | | | | | # | | | | |
version: '0.0.6' version: '0.0.6.1'
synopsis: Search, map, share synopsis: Search, map, share
description: Please see README.md description: Please see README.md
category: Data category: Data
...@@ -75,6 +75,7 @@ library: ...@@ -75,6 +75,7 @@ library:
- Gargantext.Core.Types.Individu - Gargantext.Core.Types.Individu
- Gargantext.Core.Types.Main - Gargantext.Core.Types.Main
- Gargantext.Core.Utils.Prefix - Gargantext.Core.Utils.Prefix
- Gargantext.Utils.SpacyNLP
- Gargantext.Database.Action.Flow - Gargantext.Database.Action.Flow
- Gargantext.Database.Action.Flow.Types - Gargantext.Database.Action.Flow.Types
- Gargantext.Database.Action.User.New - Gargantext.Database.Action.User.New
...@@ -100,6 +101,7 @@ library: ...@@ -100,6 +101,7 @@ library:
- Gargantext.Core.Text.Metrics.TFICF - Gargantext.Core.Text.Metrics.TFICF
- Gargantext.Core.Text.Metrics.CharByChar - Gargantext.Core.Text.Metrics.CharByChar
- Gargantext.Core.Text.Metrics.Count - Gargantext.Core.Text.Metrics.Count
- Gargantext.Core.Text.Prepare
- Gargantext.Core.Text.Search - Gargantext.Core.Text.Search
- Gargantext.Core.Text.Terms - Gargantext.Core.Text.Terms
- Gargantext.Core.Text.Terms.Mono - Gargantext.Core.Text.Terms.Mono
......
...@@ -31,10 +31,13 @@ Pouillard (who mainly made it). ...@@ -31,10 +31,13 @@ Pouillard (who mainly made it).
module Gargantext.API module Gargantext.API
where where
import Control.Exception (finally) import Control.Exception (catch, finally, SomeException)
import Control.Lens import Control.Lens
import Control.Monad.Except
import Control.Monad.Reader (runReaderT) import Control.Monad.Reader (runReaderT)
import Data.Either
import Data.List (lookup) import Data.List (lookup)
import Data.Text (pack)
import Data.Text.Encoding (encodeUtf8) import Data.Text.Encoding (encodeUtf8)
import Data.Text.IO (putStrLn) import Data.Text.IO (putStrLn)
import Data.Validity import Data.Validity
...@@ -49,6 +52,7 @@ import Gargantext.API.Prelude ...@@ -49,6 +52,7 @@ import Gargantext.API.Prelude
import Gargantext.API.Routes import Gargantext.API.Routes
import Gargantext.API.Server (server) import Gargantext.API.Server (server)
import Gargantext.Core.NodeStory import Gargantext.Core.NodeStory
import qualified Gargantext.Database.Prelude as DB
import Gargantext.Prelude hiding (putStrLn) import Gargantext.Prelude hiding (putStrLn)
import Network.HTTP.Types hiding (Query) import Network.HTTP.Types hiding (Query)
import Network.Wai import Network.Wai
...@@ -66,11 +70,21 @@ data Mode = Dev | Mock | Prod ...@@ -66,11 +70,21 @@ data Mode = Dev | Mock | Prod
startGargantext :: Mode -> PortNumber -> FilePath -> IO () startGargantext :: Mode -> PortNumber -> FilePath -> IO ()
startGargantext mode port file = do startGargantext mode port file = do
env <- newEnv port file env <- newEnv port file
runDbCheck env
portRouteInfo port portRouteInfo port
app <- makeApp env app <- makeApp env
mid <- makeDevMiddleware mode mid <- makeDevMiddleware mode
run port (mid app) `finally` stopGargantext env run port (mid app) `finally` stopGargantext env
where runDbCheck env = do
r <- runExceptT (runReaderT DB.dbCheck env) `catch`
(\(_ :: SomeException) -> return $ Right False)
case r of
Right True -> return ()
_ -> panic $
"You must run 'gargantext-init " <> pack file <>
"' before running gargantext-server (only the first time)."
portRouteInfo :: PortNumber -> IO () portRouteInfo :: PortNumber -> IO ()
portRouteInfo port = do portRouteInfo port = do
putStrLn " ----Main Routes----- " putStrLn " ----Main Routes----- "
......
...@@ -29,6 +29,7 @@ module Gargantext.API.Ngrams ...@@ -29,6 +29,7 @@ module Gargantext.API.Ngrams
, TableNgramsApiPut , TableNgramsApiPut
, getTableNgrams , getTableNgrams
, getTableNgramsCorpus
, setListNgrams , setListNgrams
--, rmListNgrams TODO fix before exporting --, rmListNgrams TODO fix before exporting
, apiNgramsTableCorpus , apiNgramsTableCorpus
...@@ -258,15 +259,6 @@ setListNgrams listId ngramsType ns = do ...@@ -258,15 +259,6 @@ setListNgrams listId ngramsType ns = do
saveNodeStory saveNodeStory
currentVersion :: HasNodeStory env err m
=> ListId -> m Version
currentVersion listId = do
--nls <- getRepo [listId]
pool <- view connPool
nls <- liftBase $ getNodeStory pool listId
pure $ nls ^. unNodeStory . at listId . _Just . a_version
newNgramsFromNgramsStatePatch :: NgramsStatePatch' -> [Ngrams] newNgramsFromNgramsStatePatch :: NgramsStatePatch' -> [Ngrams]
newNgramsFromNgramsStatePatch p = newNgramsFromNgramsStatePatch p =
[ text2ngrams (unNgramsTerm n) [ text2ngrams (unNgramsTerm n)
......
...@@ -19,9 +19,11 @@ import Control.Lens (_Just, (^.), at, view, At, Index, IxValue) ...@@ -19,9 +19,11 @@ import Control.Lens (_Just, (^.), at, view, At, Index, IxValue)
import Control.Monad.Reader import Control.Monad.Reader
import Data.HashMap.Strict (HashMap) import Data.HashMap.Strict (HashMap)
import Data.Hashable (Hashable) import Data.Hashable (Hashable)
import Data.Pool (withResource)
import Data.Set (Set) import Data.Set (Set)
import Data.Validity import Data.Validity
import Gargantext.API.Ngrams.Types import Gargantext.API.Ngrams.Types
import Gargantext.Core.NodeStory
import Gargantext.Core.Types (ListType(..), NodeId, NodeType(..), ListId) import Gargantext.Core.Types (ListType(..), NodeId, NodeType(..), ListId)
import Gargantext.Database.Prelude (CmdM, HasConnectionPool(..)) import Gargantext.Database.Prelude (CmdM, HasConnectionPool(..))
import Gargantext.Database.Schema.Ngrams (NgramsType) import Gargantext.Database.Schema.Ngrams (NgramsType)
...@@ -29,7 +31,6 @@ import Gargantext.Prelude ...@@ -29,7 +31,6 @@ import Gargantext.Prelude
import qualified Data.HashMap.Strict as HM import qualified Data.HashMap.Strict as HM
import qualified Data.Map.Strict as Map import qualified Data.Map.Strict as Map
import qualified Data.Set as Set import qualified Data.Set as Set
import Gargantext.Core.NodeStory
import qualified Gargantext.Core.NodeStoryFile as NSF import qualified Gargantext.Core.NodeStoryFile as NSF
...@@ -202,15 +203,16 @@ migrateFromDirToDb :: (CmdM env err m, HasNodeStory env err m) ...@@ -202,15 +203,16 @@ migrateFromDirToDb :: (CmdM env err m, HasNodeStory env err m)
=> m () => m ()
migrateFromDirToDb = do migrateFromDirToDb = do
pool <- view connPool pool <- view connPool
listIds <- liftBase $ getNodesIdWithType pool NodeList withResource pool $ \c -> do
printDebug "[migrateFromDirToDb] listIds" listIds listIds <- liftBase $ getNodesIdWithType c NodeList
(NodeStory nls) <- NSF.getRepoReadConfig listIds printDebug "[migrateFromDirToDb] listIds" listIds
printDebug "[migrateFromDirToDb] nls" nls (NodeStory nls) <- NSF.getRepoReadConfig listIds
_ <- mapM (\(nId, a) -> do printDebug "[migrateFromDirToDb] nls" nls
n <- liftBase $ nodeExists pool nId _ <- mapM (\(nId, a) -> do
case n of n <- liftBase $ nodeExists c nId
False -> pure 0 case n of
True -> liftBase $ upsertNodeArchive pool nId a False -> pure ()
) $ Map.toList nls True -> liftBase $ upsertNodeStories c nId a
--_ <- nodeStoryIncs (Just $ NodeStory nls) listIds ) $ Map.toList nls
pure () --_ <- nodeStoryIncs (Just $ NodeStory nls) listIds
pure ()
...@@ -28,7 +28,7 @@ import Data.String (IsString, fromString) ...@@ -28,7 +28,7 @@ import Data.String (IsString, fromString)
import Data.Swagger hiding (version, patch) import Data.Swagger hiding (version, patch)
import Data.Text (Text, pack, strip) import Data.Text (Text, pack, strip)
import Data.Validity import Data.Validity
import Database.PostgreSQL.Simple.FromField (FromField, fromField, fromJSONField, ResultError(ConversionFailed), returnError) import Database.PostgreSQL.Simple.FromField (FromField, fromField, fromJSONField)
import Database.PostgreSQL.Simple.ToField (ToField, toJSONField, toField) import Database.PostgreSQL.Simple.ToField (ToField, toJSONField, toField)
import GHC.Generics (Generic) import GHC.Generics (Generic)
import Gargantext.Core.Text (size) import Gargantext.Core.Text (size)
...@@ -44,7 +44,6 @@ import Servant.Job.Utils (jsonOptions) ...@@ -44,7 +44,6 @@ import Servant.Job.Utils (jsonOptions)
import Test.QuickCheck (elements, frequency) import Test.QuickCheck (elements, frequency)
import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary) import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
import qualified Data.HashMap.Strict.InsOrd as InsOrdHashMap import qualified Data.HashMap.Strict.InsOrd as InsOrdHashMap
import qualified Data.List as List
import qualified Data.Map.Strict as Map import qualified Data.Map.Strict as Map
import qualified Data.Map.Strict.Patch as PM import qualified Data.Map.Strict.Patch as PM
import qualified Data.Set as Set import qualified Data.Set as Set
...@@ -124,7 +123,7 @@ instance (ToJSONKey a, ToSchema a) => ToSchema (MSet a) where ...@@ -124,7 +123,7 @@ instance (ToJSONKey a, ToSchema a) => ToSchema (MSet a) where
------------------------------------------------------------------------ ------------------------------------------------------------------------
newtype NgramsTerm = NgramsTerm { unNgramsTerm :: Text } newtype NgramsTerm = NgramsTerm { unNgramsTerm :: Text }
deriving (Ord, Eq, Show, Generic, ToJSONKey, ToJSON, FromJSON, Semigroup, Arbitrary, Serialise, ToSchema, Hashable, NFData) deriving (Ord, Eq, Show, Generic, ToJSONKey, ToJSON, FromJSON, Semigroup, Arbitrary, Serialise, ToSchema, Hashable, NFData, FromField, ToField)
instance IsHashable NgramsTerm where instance IsHashable NgramsTerm where
hash (NgramsTerm t) = hash t hash (NgramsTerm t) = hash t
instance Monoid NgramsTerm where instance Monoid NgramsTerm where
...@@ -133,18 +132,6 @@ instance FromJSONKey NgramsTerm where ...@@ -133,18 +132,6 @@ instance FromJSONKey NgramsTerm where
fromJSONKey = FromJSONKeyTextParser $ \t -> pure $ NgramsTerm $ strip t fromJSONKey = FromJSONKeyTextParser $ \t -> pure $ NgramsTerm $ strip t
instance IsString NgramsTerm where instance IsString NgramsTerm where
fromString s = NgramsTerm $ pack s fromString s = NgramsTerm $ pack s
instance FromField NgramsTerm
where
fromField field mb = do
v <- fromField field mb
case fromJSON v of
Success a -> pure $ NgramsTerm $ strip a
Error _err -> returnError ConversionFailed field
$ List.intercalate " " [ "cannot parse hyperdata for JSON: "
, show v
]
instance ToField NgramsTerm where
toField (NgramsTerm n) = toField n
data RootParent = RootParent data RootParent = RootParent
...@@ -164,19 +151,20 @@ data NgramsRepoElement = NgramsRepoElement ...@@ -164,19 +151,20 @@ data NgramsRepoElement = NgramsRepoElement
, _nre_children :: !(MSet NgramsTerm) , _nre_children :: !(MSet NgramsTerm)
} }
deriving (Ord, Eq, Show, Generic) deriving (Ord, Eq, Show, Generic)
deriveJSON (unPrefix "_nre_") ''NgramsRepoElement deriveJSON (unPrefix "_nre_") ''NgramsRepoElement
-- TODO -- TODO
-- if ngrams & not size => size -- if ngrams & not size => size
-- drop occurrences -- drop occurrences
makeLenses ''NgramsRepoElement makeLenses ''NgramsRepoElement
instance ToSchema NgramsRepoElement where instance ToSchema NgramsRepoElement where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_nre_") declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_nre_")
instance Serialise NgramsRepoElement
instance FromField NgramsRepoElement where
fromField = fromJSONField
instance ToField NgramsRepoElement where
toField = toJSONField
instance Serialise (MSet NgramsTerm) instance Serialise (MSet NgramsTerm)
instance Serialise NgramsRepoElement
data NgramsElement = data NgramsElement =
NgramsElement { _ne_ngrams :: NgramsTerm NgramsElement { _ne_ngrams :: NgramsTerm
......
This diff is collapsed.
{-|
Module : Gargantext.Core.Text.Clean
Description : Tools to clean text
Copyright : (c) CNRS, 2017 - present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Clean some texts before importing it.
For a given Language, chose a big master piece of litteracy to analyze
it with GarganText. Here is a an example with a famous French Writer
that could be the incarnation of the mythic Gargantua.
-}
{-# LANGUAGE OverloadedStrings #-}
module Gargantext.Core.Text.Prepare
where
import Data.Text (Text)
import Gargantext.Core.Text (sentences)
import Gargantext.Prelude
import qualified Data.List as List
import qualified Data.Text as Text
---------------------------------------------------------------------
prepareText :: Paragraph -> Text -> [Text]
prepareText p txt = groupText p
$ List.filter (/= "")
$ toParagraphs
$ Text.lines
$ Text.replace "_" " " -- some texts seem to be underlined
$ Text.replace "--" "" -- removing bullets like of dialogs
$ Text.replace "\xd" "" txt
---------------------------------------------------------------------
groupText :: Paragraph -> [Text] -> [Text]
groupText (Uniform blockSize) = groupUniform blockSize
groupText AuthorLike = groupLines
---------------------------------------------------------------------
data Paragraph = Uniform Grain | AuthorLike
-- Uniform does not preserve the paragraphs of the author but length of paragraphs is uniform
-- Author Like preserve the paragraphs of the Author but length of paragraphs is not uniform
-- Grain: number of Sentences by block of Text
-- Step : overlap of sentence between connex block of Text
groupUniform :: Grain -> [Text] -> [Text]
groupUniform g ts = map (Text.intercalate " ")
$ chunkAlong g g
$ sentences
$ Text.concat ts
groupLines :: [Text] -> [Text]
groupLines xxx@(a:b:xs) =
if Text.length a > moyenne
then [a] <> (groupLines (b:xs))
else let ab = a <> " " <> b in
if Text.length ab > moyenne
then [ab] <> (groupLines xs)
else groupLines ([ab] <> xs)
where
moyenne = round
$ mean
$ (map (fromIntegral . Text.length) xxx :: [Double])
groupLines [a] = [a]
groupLines [] = []
groupLines_test :: [Text]
groupLines_test = groupLines theData
where
theData = ["abxxxx", "bc", "cxxx", "d"]
---------------------------------------------------------------------
toParagraphs :: [Text] -> [Text]
toParagraphs (a:x:xs) =
if a == ""
then [a] <> toParagraphs (x:xs)
else if x == ""
then [a] <> toParagraphs (x:xs)
else toParagraphs $ [a <> " " <> x ] <> xs
toParagraphs [a] = [a]
toParagraphs [] = []
-- Tests
-- TODO for internships: Property tests
toParagraphs_test :: Bool
toParagraphs_test =
toParagraphs ["a","b","","c","d","d","","e","f","","g","h",""]
== [ "a b", "", "c d d", "", "e f", "", "g h", ""]
...@@ -28,7 +28,8 @@ import qualified Gargantext.Core.Text.Terms.Multi.Lang.En as En ...@@ -28,7 +28,8 @@ import qualified Gargantext.Core.Text.Terms.Multi.Lang.En as En
import qualified Gargantext.Core.Text.Terms.Multi.Lang.Fr as Fr import qualified Gargantext.Core.Text.Terms.Multi.Lang.Fr as Fr
import Gargantext.Core.Text.Terms.Multi.RAKE (multiterms_rake) import Gargantext.Core.Text.Terms.Multi.RAKE (multiterms_rake)
import qualified Gargantext.Utils.JohnSnowNLP as JohnSnow -- import qualified Gargantext.Utils.JohnSnowNLP as JohnSnow
import qualified Gargantext.Utils.SpacyNLP as SpacyNLP
------------------------------------------------------------------- -------------------------------------------------------------------
...@@ -51,7 +52,7 @@ tokenTag2terms (TokenTag ws t _ _) = Terms ws t ...@@ -51,7 +52,7 @@ tokenTag2terms (TokenTag ws t _ _) = Terms ws t
tokenTags :: Lang -> Text -> IO [[TokenTag]] tokenTags :: Lang -> Text -> IO [[TokenTag]]
tokenTags EN txt = tokenTagsWith EN txt corenlp tokenTags EN txt = tokenTagsWith EN txt corenlp
tokenTags FR txt = tokenTagsWith FR txt JohnSnow.nlp tokenTags FR txt = tokenTagsWith FR txt SpacyNLP.nlp
tokenTags _ _ = panic "[G.C.T.T.Multi] NLP API not implemented yet" tokenTags _ _ = panic "[G.C.T.T.Multi] NLP API not implemented yet"
tokenTagsWith :: Lang -> Text -> NLP_API -> IO [[TokenTag]] tokenTagsWith :: Lang -> Text -> NLP_API -> IO [[TokenTag]]
......
...@@ -9,6 +9,7 @@ Portability : POSIX ...@@ -9,6 +9,7 @@ Portability : POSIX
-} -}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-} {-# LANGUAGE TypeOperators #-}
...@@ -22,27 +23,27 @@ import Gargantext.Prelude ...@@ -22,27 +23,27 @@ import Gargantext.Prelude
import GHC.Generics import GHC.Generics
data Token = Token { _tokenIndex :: Int data Token = Token { _tokenIndex :: !Int
, _tokenWord :: Text , _tokenWord :: !Text
, _tokenOriginalText :: Text , _tokenOriginalText :: !Text
, _tokenLemma :: Text , _tokenLemma :: !Text
, _tokenCharacterOffsetBegin :: Int , _tokenCharacterOffsetBegin :: !Int
, _tokenCharacterOffsetEnd :: Int , _tokenCharacterOffsetEnd :: !Int
, _tokenPos :: Maybe POS , _tokenPos :: !(Maybe POS)
, _tokenNer :: Maybe NER , _tokenNer :: !(Maybe NER)
, _tokenBefore :: Maybe Text , _tokenBefore :: !(Maybe Text)
, _tokenAfter :: Maybe Text , _tokenAfter :: !(Maybe Text)
} deriving (Show, Generic) } deriving (Show, Generic)
$(deriveJSON (unPrefix "_token") ''Token) $(deriveJSON (unPrefix "_token") ''Token)
data Sentence = Sentence { _sentenceIndex :: Int data Sentence = Sentence { _sentenceIndex :: !Int
, _sentenceTokens :: [Token] , _sentenceTokens :: ![Token]
} deriving (Show, Generic) } deriving (Show, Generic)
$(deriveJSON (unPrefix "_sentence") ''Sentence) $(deriveJSON (unPrefix "_sentence") ''Sentence)
data Properties = Properties { _propertiesAnnotators :: Text data Properties = Properties { _propertiesAnnotators :: !Text
, _propertiesOutputFormat :: Text , _propertiesOutputFormat :: !Text
} deriving (Show, Generic) } deriving (Show, Generic)
$(deriveJSON (unPrefix "_properties") ''Properties) $(deriveJSON (unPrefix "_properties") ''Properties)
......
...@@ -126,7 +126,7 @@ instance FromJSON POS where ...@@ -126,7 +126,7 @@ instance FromJSON POS where
instance ToJSON POS instance ToJSON POS
instance Hashable POS instance Hashable POS
------------------------------------------------------------------------ ------------------------------------------------------------------------
data NER = PERSON | ORGANIZATION | LOCATION | NoNER data NER = PERSON | ORGANIZATION | LOCATION | NoNER { noNer :: !Text }
deriving (Show, Generic) deriving (Show, Generic)
------------------------------------------------------------------------ ------------------------------------------------------------------------
instance FromJSON NER where instance FromJSON NER where
...@@ -134,9 +134,11 @@ instance FromJSON NER where ...@@ -134,9 +134,11 @@ instance FromJSON NER where
where where
ner :: [Char] -> NER ner :: [Char] -> NER
ner "PERSON" = PERSON ner "PERSON" = PERSON
ner "PER" = PERSON
ner "ORGANIZATION" = ORGANIZATION ner "ORGANIZATION" = ORGANIZATION
ner "LOCATION" = LOCATION ner "LOCATION" = LOCATION
ner _ = NoNER ner "LOC" = LOCATION
ner x = NoNER (cs x)
instance ToJSON NER instance ToJSON NER
......
...@@ -27,7 +27,7 @@ import Gargantext.API.Ngrams.Tools (filterListWithRoot, groupNodesByNgrams, Diag ...@@ -27,7 +27,7 @@ import Gargantext.API.Ngrams.Tools (filterListWithRoot, groupNodesByNgrams, Diag
import Gargantext.Database.Prelude (runPGSQuery{-, formatPGSQuery-}) import Gargantext.Database.Prelude (runPGSQuery{-, formatPGSQuery-})
import Gargantext.API.Ngrams.Types (TabType(..), ngramsTypeFromTabType, NgramsTerm(..)) import Gargantext.API.Ngrams.Types (TabType(..), ngramsTypeFromTabType, NgramsTerm(..))
import Gargantext.Core.Mail.Types (HasMail) import Gargantext.Core.Mail.Types (HasMail)
import Gargantext.Core.NodeStory import Gargantext.Core.NodeStory hiding (runPGSQuery)
import Gargantext.Core.Text.Metrics (scored, Scored(..), {-localMetrics, toScored-}) import Gargantext.Core.Text.Metrics (scored, Scored(..), {-localMetrics, toScored-})
import Database.PostgreSQL.Simple.ToField (toField, Action{-, ToField-}) import Database.PostgreSQL.Simple.ToField (toField, Action{-, ToField-})
import Gargantext.Core.Types (ListType(..), Limit, NodeType(..), ContextId) import Gargantext.Core.Types (ListType(..), Limit, NodeType(..), ContextId)
...@@ -88,7 +88,7 @@ updateNgramsOccurrences cId mlId = do ...@@ -88,7 +88,7 @@ updateNgramsOccurrences cId mlId = do
updateNgramsOccurrences' :: (FlowCmdM env err m) updateNgramsOccurrences' :: (FlowCmdM env err m)
=> CorpusId -> Maybe ListId -> Maybe Limit -> TabType => CorpusId -> Maybe ListId -> Maybe Limit -> TabType
-> m [Int] -> m [Int]
updateNgramsOccurrences' cId maybeListId maybeLimit tabType = do updateNgramsOccurrences' cId maybeListId maybeLimit tabType = do
...@@ -97,7 +97,7 @@ updateNgramsOccurrences' cId maybeListId maybeLimit tabType = do ...@@ -97,7 +97,7 @@ updateNgramsOccurrences' cId maybeListId maybeLimit tabType = do
Just lId' -> pure lId' Just lId' -> pure lId'
result <- getNgramsOccurrences cId lId tabType maybeLimit result <- getNgramsOccurrences cId lId tabType maybeLimit
let let
toInsert :: [[Action]] toInsert :: [[Action]]
toInsert = map (\(ngramsTerm, score) toInsert = map (\(ngramsTerm, score)
...@@ -121,7 +121,7 @@ updateNgramsOccurrences' cId maybeListId maybeLimit tabType = do ...@@ -121,7 +121,7 @@ updateNgramsOccurrences' cId maybeListId maybeLimit tabType = do
RETURNING 1 RETURNING 1
|] |]
let fields = map (\t-> QualifiedIdentifier Nothing t) let fields = map (\t-> QualifiedIdentifier Nothing t)
$ map Text.pack ["int4", "int4","text","int4","int4"] $ map Text.pack ["int4", "int4","text","int4","int4"]
map (\(Only a) -> a) <$> runPGSQuery queryInsert (Only $ Values fields toInsert) map (\(Only a) -> a) <$> runPGSQuery queryInsert (Only $ Values fields toInsert)
...@@ -163,7 +163,7 @@ updateContextScore cId maybeListId = do ...@@ -163,7 +163,7 @@ updateContextScore cId maybeListId = do
Just lId' -> pure lId' Just lId' -> pure lId'
result <- getContextsNgramsScore cId lId Terms MapTerm Nothing result <- getContextsNgramsScore cId lId Terms MapTerm Nothing
let let
toInsert :: [[Action]] toInsert :: [[Action]]
toInsert = map (\(contextId, score) toInsert = map (\(contextId, score)
...@@ -185,7 +185,7 @@ updateContextScore cId maybeListId = do ...@@ -185,7 +185,7 @@ updateContextScore cId maybeListId = do
RETURNING 1 RETURNING 1
|] |]
let fields = map (\t-> QualifiedIdentifier Nothing t) let fields = map (\t-> QualifiedIdentifier Nothing t)
$ map Text.pack ["int4", "int4","int4"] $ map Text.pack ["int4", "int4","int4"]
map (\(Only a) -> a) <$> runPGSQuery queryInsert (Only $ Values fields toInsert) map (\(Only a) -> a) <$> runPGSQuery queryInsert (Only $ Values fields toInsert)
...@@ -243,6 +243,3 @@ getNgrams lId tabType = do ...@@ -243,6 +243,3 @@ getNgrams lId tabType = do
take' :: Maybe Int -> [a] -> [a] take' :: Maybe Int -> [a] -> [a]
take' Nothing xs = xs take' Nothing xs = xs
take' (Just n) xs = take n xs take' (Just n) xs = take n xs
...@@ -113,4 +113,3 @@ nodeTypes = [ (n, toDBid n) | n <- allNodeTypes ] ...@@ -113,4 +113,3 @@ nodeTypes = [ (n, toDBid n) | n <- allNodeTypes ]
fromNodeTypeId :: NodeTypeId -> NodeType fromNodeTypeId :: NodeTypeId -> NodeType
fromNodeTypeId tId = fromMaybe (panic $ pack $ "Type Id " <> show tId <> " does not exist") fromNodeTypeId tId = fromMaybe (panic $ pack $ "Type Id " <> show tId <> " does not exist")
(lookup tId nodeTypeInv) (lookup tId nodeTypeInv)
...@@ -30,8 +30,8 @@ import Data.Morpheus.Types (GQLType) ...@@ -30,8 +30,8 @@ import Data.Morpheus.Types (GQLType)
import Data.Swagger import Data.Swagger
import Data.Text (Text, unpack, pack) import Data.Text (Text, unpack, pack)
import Data.Time (UTCTime) import Data.Time (UTCTime)
import Database.PostgreSQL.Simple.FromField (FromField, fromField) import Database.PostgreSQL.Simple.FromField (FromField, fromField, fromJSONField)
import Database.PostgreSQL.Simple.ToField (ToField, toField) import Database.PostgreSQL.Simple.ToField (ToField, toField, toJSONField)
import GHC.Generics (Generic) import GHC.Generics (Generic)
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger, wellNamedSchema) import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger, wellNamedSchema)
import Gargantext.Database.Schema.Context import Gargantext.Database.Schema.Context
...@@ -211,14 +211,6 @@ pgContextId = pgNodeId ...@@ -211,14 +211,6 @@ pgContextId = pgNodeId
------------------------------------------------------------------------ ------------------------------------------------------------------------
newtype NodeId = NodeId Int newtype NodeId = NodeId Int
deriving (Read, Generic, Num, Eq, Ord, Enum, ToJSONKey, FromJSONKey, ToJSON, FromJSON, Hashable, Csv.ToField) deriving (Read, Generic, Num, Eq, Ord, Enum, ToJSONKey, FromJSONKey, ToJSON, FromJSON, Hashable, Csv.ToField)
-- TODO make another type
type ContextId = NodeId
newtype NodeContextId = NodeContextId Int
deriving (Read, Generic, Num, Eq, Ord, Enum, ToJSONKey, FromJSONKey, ToJSON, FromJSON, Hashable, Csv.ToField)
instance GQLType NodeId instance GQLType NodeId
instance Show NodeId where instance Show NodeId where
show (NodeId n) = "nodeId-" <> show n show (NodeId n) = "nodeId-" <> show n
...@@ -232,6 +224,14 @@ instance FromField NodeId where ...@@ -232,6 +224,14 @@ instance FromField NodeId where
then return $ NodeId n then return $ NodeId n
else mzero else mzero
instance ToSchema NodeId instance ToSchema NodeId
-- TODO make another type
type ContextId = NodeId
newtype NodeContextId = NodeContextId Int
deriving (Read, Generic, Num, Eq, Ord, Enum, ToJSONKey, FromJSONKey, ToJSON, FromJSON, Hashable, Csv.ToField)
--instance Csv.ToField NodeId where --instance Csv.ToField NodeId where
-- toField (NodeId nodeId) = Csv.toField nodeId -- toField (NodeId nodeId) = Csv.toField nodeId
...@@ -357,6 +357,20 @@ data NodeType = NodeUser ...@@ -357,6 +357,20 @@ data NodeType = NodeUser
deriving (Show, Read, Eq, Generic, Bounded, Enum) deriving (Show, Read, Eq, Generic, Bounded, Enum)
instance GQLType NodeType instance GQLType NodeType
instance FromJSON NodeType
instance ToJSON NodeType
instance FromHttpApiData NodeType where
parseUrlPiece = Right . read . unpack
instance ToHttpApiData NodeType where
toUrlPiece = pack . show
instance ToParamSchema NodeType
instance ToSchema NodeType
instance Arbitrary NodeType where
arbitrary = elements allNodeTypes
instance FromField NodeType where
fromField = fromJSONField
instance ToField NodeType where
toField = toJSONField
allNodeTypes :: [NodeType] allNodeTypes :: [NodeType]
...@@ -394,21 +408,6 @@ defaultName NodeFrameNotebook = "Code" ...@@ -394,21 +408,6 @@ defaultName NodeFrameNotebook = "Code"
defaultName NodeFile = "File" defaultName NodeFile = "File"
instance FromJSON NodeType
instance ToJSON NodeType
instance FromHttpApiData NodeType where
parseUrlPiece = Right . read . unpack
instance ToHttpApiData NodeType where
toUrlPiece = pack . show
instance ToParamSchema NodeType
instance ToSchema NodeType
instance Arbitrary NodeType where
arbitrary = elements allNodeTypes
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- Instances -- Instances
...@@ -451,4 +450,3 @@ instance DefaultFromField SqlText (Maybe Hash) ...@@ -451,4 +450,3 @@ instance DefaultFromField SqlText (Maybe Hash)
context2node :: Context a -> Node a context2node :: Context a -> Node a
context2node (Context ci ch ct cu cp cn cd chy) = Node ci ch ct cu cp cn cd chy context2node (Context ci ch ct cu cp cn cd chy) = Node ci ch ct cu cp cn cd chy
...@@ -9,7 +9,7 @@ Portability : POSIX ...@@ -9,7 +9,7 @@ Portability : POSIX
-} -}
{-# LANGUAGE ConstraintKinds #-} {-# LANGUAGE ConstraintKinds, ScopedTypeVariables #-}
module Gargantext.Database.Prelude where module Gargantext.Database.Prelude where
...@@ -30,6 +30,7 @@ import Data.Word (Word16) ...@@ -30,6 +30,7 @@ import Data.Word (Word16)
import Database.PostgreSQL.Simple (Connection, connect) import Database.PostgreSQL.Simple (Connection, connect)
import Database.PostgreSQL.Simple.FromField ( Conversion, ResultError(ConversionFailed), fromField, returnError) import Database.PostgreSQL.Simple.FromField ( Conversion, ResultError(ConversionFailed), fromField, returnError)
import Database.PostgreSQL.Simple.Internal (Field) import Database.PostgreSQL.Simple.Internal (Field)
import Database.PostgreSQL.Simple.Types (Query(..))
import Gargantext.Core.Mail.Types (HasMail) import Gargantext.Core.Mail.Types (HasMail)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Prelude.Config (readIniFile', val) import Gargantext.Prelude.Config (readIniFile', val)
...@@ -167,10 +168,9 @@ runPGSQuery_ :: ( CmdM env err m ...@@ -167,10 +168,9 @@ runPGSQuery_ :: ( CmdM env err m
runPGSQuery_ q = mkCmd $ \conn -> catch (PGS.query_ conn q) printError runPGSQuery_ q = mkCmd $ \conn -> catch (PGS.query_ conn q) printError
where where
printError (SomeException e) = do printError (SomeException e) = do
printDebug "[G.D.P.runPGSQuery_]" ("TODO: format query error" :: Text) hPutStrLn stderr (fromQuery q)
throw (SomeException e) throw (SomeException e)
execPGSQuery :: PGS.ToRow a => PGS.Query -> a -> Cmd err Int64 execPGSQuery :: PGS.ToRow a => PGS.Query -> a -> Cmd err Int64
execPGSQuery q a = mkCmd $ \conn -> PGS.execute conn q a execPGSQuery q a = mkCmd $ \conn -> PGS.execute conn q a
...@@ -209,3 +209,9 @@ fromField' field mb = do ...@@ -209,3 +209,9 @@ fromField' field mb = do
printSqlOpa :: Default Unpackspec a a => Select a -> IO () printSqlOpa :: Default Unpackspec a a => Select a -> IO ()
printSqlOpa = putStrLn . maybe "Empty query" identity . showSql printSqlOpa = putStrLn . maybe "Empty query" identity . showSql
dbCheck :: CmdM env err m => m Bool
dbCheck = do
r :: [PGS.Only Text] <- runPGSQuery_ "select username from public.auth_user"
case r of
[] -> return False
_ -> return True
...@@ -132,4 +132,3 @@ selectNgramsId' ns = runPGSQuery querySelectNgramsId ( PGS.Only ...@@ -132,4 +132,3 @@ selectNgramsId' ns = runPGSQuery querySelectNgramsId ( PGS.Only
JOIN input_rows ir ON ir.terms = n.terms JOIN input_rows ir ON ir.terms = n.terms
GROUP BY n.terms, n.id GROUP BY n.terms, n.id
|] |]
...@@ -26,7 +26,6 @@ import Control.Lens (set, view) ...@@ -26,7 +26,6 @@ import Control.Lens (set, view)
import Data.Aeson import Data.Aeson
import Data.Maybe (fromMaybe) import Data.Maybe (fromMaybe)
import Data.Text (Text) import Data.Text (Text)
import qualified Database.PostgreSQL.Simple as DPS
import Database.PostgreSQL.Simple.SqlQQ (sql) import Database.PostgreSQL.Simple.SqlQQ (sql)
import Opaleye hiding (FromField) import Opaleye hiding (FromField)
import Prelude hiding (null, id, map, sum) import Prelude hiding (null, id, map, sum)
...@@ -41,6 +40,9 @@ import Gargantext.Database.Query.Table.Node.Error ...@@ -41,6 +40,9 @@ import Gargantext.Database.Query.Table.Node.Error
import Gargantext.Database.Schema.Node import Gargantext.Database.Schema.Node
import Gargantext.Prelude hiding (sum, head) import Gargantext.Prelude hiding (sum, head)
import qualified Database.PostgreSQL.Simple as PGS
queryNodeSearchTable :: Select NodeSearchRead queryNodeSearchTable :: Select NodeSearchRead
queryNodeSearchTable = selectTable nodeTableSearch queryNodeSearchTable = selectTable nodeTableSearch
...@@ -123,7 +125,7 @@ getClosestParentIdByType :: HasDBid NodeType ...@@ -123,7 +125,7 @@ getClosestParentIdByType :: HasDBid NodeType
-> NodeType -> NodeType
-> Cmd err (Maybe NodeId) -> Cmd err (Maybe NodeId)
getClosestParentIdByType nId nType = do getClosestParentIdByType nId nType = do
result <- runPGSQuery query (nId, 0 :: Int) result <- runPGSQuery query (PGS.Only nId)
case result of case result of
[(NodeId parentId, pTypename)] -> do [(NodeId parentId, pTypename)] -> do
if toDBid nType == pTypename then if toDBid nType == pTypename then
...@@ -132,12 +134,12 @@ getClosestParentIdByType nId nType = do ...@@ -132,12 +134,12 @@ getClosestParentIdByType nId nType = do
getClosestParentIdByType (NodeId parentId) nType getClosestParentIdByType (NodeId parentId) nType
_ -> pure Nothing _ -> pure Nothing
where where
query :: DPS.Query query :: PGS.Query
query = [sql| query = [sql|
SELECT n2.id, n2.typename SELECT n2.id, n2.typename
FROM nodes n1 FROM nodes n1
JOIN nodes n2 ON n1.parent_id = n2.id JOIN nodes n2 ON n1.parent_id = n2.id
WHERE n1.id = ? AND 0 = ?; WHERE n1.id = ?;
|] |]
-- | Similar to `getClosestParentIdByType` but includes current node -- | Similar to `getClosestParentIdByType` but includes current node
...@@ -147,7 +149,7 @@ getClosestParentIdByType' :: HasDBid NodeType ...@@ -147,7 +149,7 @@ getClosestParentIdByType' :: HasDBid NodeType
-> NodeType -> NodeType
-> Cmd err (Maybe NodeId) -> Cmd err (Maybe NodeId)
getClosestParentIdByType' nId nType = do getClosestParentIdByType' nId nType = do
result <- runPGSQuery query (nId, 0 :: Int) result <- runPGSQuery query (PGS.Only nId)
case result of case result of
[(NodeId id, pTypename)] -> do [(NodeId id, pTypename)] -> do
if toDBid nType == pTypename then if toDBid nType == pTypename then
...@@ -156,11 +158,11 @@ getClosestParentIdByType' nId nType = do ...@@ -156,11 +158,11 @@ getClosestParentIdByType' nId nType = do
getClosestParentIdByType nId nType getClosestParentIdByType nId nType
_ -> pure Nothing _ -> pure Nothing
where where
query :: DPS.Query query :: PGS.Query
query = [sql| query = [sql|
SELECT n.id, n.typename SELECT n.id, n.typename
FROM nodes n FROM nodes n
WHERE n.id = ? AND 0 = ?; WHERE n.id = ?;
|] |]
-- | Given a node id, find all it's children (no matter how deep) of -- | Given a node id, find all it's children (no matter how deep) of
...@@ -170,15 +172,15 @@ getChildrenByType :: HasDBid NodeType ...@@ -170,15 +172,15 @@ getChildrenByType :: HasDBid NodeType
-> NodeType -> NodeType
-> Cmd err [NodeId] -> Cmd err [NodeId]
getChildrenByType nId nType = do getChildrenByType nId nType = do
result <- runPGSQuery query (nId, 0 :: Int) result <- runPGSQuery query (PGS.Only nId)
children_lst <- mapM (\(id, _) -> getChildrenByType id nType) result children_lst <- mapM (\(id, _) -> getChildrenByType id nType) result
pure $ concat $ [fst <$> filter (\(_, pTypename) -> pTypename == toDBid nType) result] ++ children_lst pure $ concat $ [fst <$> filter (\(_, pTypename) -> pTypename == toDBid nType) result] ++ children_lst
where where
query :: DPS.Query query :: PGS.Query
query = [sql| query = [sql|
SELECT n.id, n.typename SELECT n.id, n.typename
FROM nodes n FROM nodes n
WHERE n.parent_id = ? AND 0 = ?; WHERE n.parent_id = ?;
|] |]
------------------------------------------------------------------------ ------------------------------------------------------------------------
...@@ -231,8 +233,8 @@ selectNodesIdWithType nt = proc () -> do ...@@ -231,8 +233,8 @@ selectNodesIdWithType nt = proc () -> do
------------------------------------------------------------------------ ------------------------------------------------------------------------
nodeExists :: (HasNodeError err) => NodeId -> Cmd err Bool nodeExists :: (HasNodeError err) => NodeId -> Cmd err Bool
nodeExists nId = (== [DPS.Only True]) nodeExists nId = (== [PGS.Only True])
<$> runPGSQuery [sql|SELECT true FROM nodes WHERE id = ? AND ?|] (nId, True) <$> runPGSQuery [sql|SELECT true FROM nodes WHERE id = ? |] (PGS.Only nId)
getNode :: HasNodeError err => NodeId -> Cmd err (Node Value) getNode :: HasNodeError err => NodeId -> Cmd err (Node Value)
getNode nId = do getNode nId = do
...@@ -397,4 +399,3 @@ defaultListMaybe cId = headMay <$> map (view node_id ) <$> getListsWithParentId ...@@ -397,4 +399,3 @@ defaultListMaybe cId = headMay <$> map (view node_id ) <$> getListsWithParentId
getListsWithParentId :: HasDBid NodeType => NodeId -> Cmd err [Node HyperdataList] getListsWithParentId :: HasDBid NodeType => NodeId -> Cmd err [Node HyperdataList]
getListsWithParentId n = runOpaQuery $ selectNodesWith' n (Just NodeList) getListsWithParentId n = runOpaQuery $ selectNodesWith' n (Just NodeList)
...@@ -58,6 +58,7 @@ import Control.Lens (set, view) ...@@ -58,6 +58,7 @@ import Control.Lens (set, view)
import Control.Lens.Cons import Control.Lens.Cons
import Control.Lens.Prism import Control.Lens.Prism
import Data.Aeson (toJSON, encode, ToJSON) import Data.Aeson (toJSON, encode, ToJSON)
import Data.Char (isAlpha)
import Data.Maybe (fromMaybe) import Data.Maybe (fromMaybe)
import Data.Text (Text) import Data.Text (Text)
-- import Data.ByteString (ByteString) -- import Data.ByteString (ByteString)
...@@ -77,7 +78,7 @@ import Gargantext.Database.Schema.Node (NodePoly(..)) ...@@ -77,7 +78,7 @@ import Gargantext.Database.Schema.Node (NodePoly(..))
import qualified Gargantext.Defaults as Defaults import qualified Gargantext.Defaults as Defaults
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Prelude.Crypto.Hash (hash) import Gargantext.Prelude.Crypto.Hash (hash)
import qualified Data.Text as DT (pack, concat, take) import qualified Data.Text as DT (pack, concat, take, filter, toLower)
{-| To Print result query {-| To Print result query
import Data.ByteString.Internal (ByteString) import Data.ByteString.Internal (ByteString)
...@@ -208,11 +209,15 @@ instance AddUniqId HyperdataDocument ...@@ -208,11 +209,15 @@ instance AddUniqId HyperdataDocument
shaBdd = hash $ DT.concat $ map ($ doc) ([(\d -> maybeText (_hd_bdd d))] <> shaParametersDoc) shaBdd = hash $ DT.concat $ map ($ doc) ([(\d -> maybeText (_hd_bdd d))] <> shaParametersDoc)
shaParametersDoc :: [(HyperdataDocument -> Text)] shaParametersDoc :: [(HyperdataDocument -> Text)]
shaParametersDoc = [ \d -> maybeText (_hd_title d) shaParametersDoc = [ \d -> filterText $ maybeText (_hd_title d)
, \d -> maybeText (_hd_abstract d) , \d -> filterText $ maybeText (_hd_abstract d)
, \d -> maybeText (_hd_source d) , \d -> filterText $ maybeText (_hd_source d)
, \d -> maybeText (_hd_publication_date d) , \d -> maybeText (_hd_publication_date d)
] ]
filterText :: Text -> Text
filterText = DT.toLower . (DT.filter isAlpha)
-- TODO put this elsewhere (fix bin/gargantext-init/Main.hs too) -- TODO put this elsewhere (fix bin/gargantext-init/Main.hs too)
secret :: Text secret :: Text
secret = "Database secret to change" secret = "Database secret to change"
...@@ -266,6 +271,7 @@ addUniqIdsContact hc = set (hc_uniqIdBdd) (Just shaBdd) ...@@ -266,6 +271,7 @@ addUniqIdsContact hc = set (hc_uniqIdBdd) (Just shaBdd)
, \d -> maybeText $ view (hc_where . _head . cw_touch . _Just . ct_mail) d , \d -> maybeText $ view (hc_where . _head . cw_touch . _Just . ct_mail) d
] ]
maybeText :: Maybe Text -> Text maybeText :: Maybe Text -> Text
maybeText = maybe (DT.pack "") identity maybeText = maybe (DT.pack "") identity
......
...@@ -20,25 +20,27 @@ Ngrams connection to the Database. ...@@ -20,25 +20,27 @@ Ngrams connection to the Database.
module Gargantext.Database.Schema.Ngrams module Gargantext.Database.Schema.Ngrams
where where
import Data.Maybe (fromMaybe)
import Data.HashMap.Strict (HashMap)
import Data.Hashable (Hashable)
import Codec.Serialise (Serialise()) import Codec.Serialise (Serialise())
import Control.Lens (over) import Control.Lens (over)
import Control.Monad (mzero) import Control.Monad (mzero)
import Data.Maybe (fromMaybe)
import Data.HashMap.Strict (HashMap)
import Data.Hashable (Hashable)
import Data.Aeson import Data.Aeson
import Data.Aeson.Types (toJSONKeyText) import Data.Aeson.Types (toJSONKeyText)
import Data.Map (fromList, lookup) import Data.Map (fromList, lookup)
import Database.PostgreSQL.Simple.FromField (returnError, ResultError(..))
import Data.Text (Text, splitOn, pack, strip) import Data.Text (Text, splitOn, pack, strip)
import Gargantext.Core.Types (TODO(..), Typed(..)) import Gargantext.Core.Types (TODO(..), Typed(..))
import Gargantext.Prelude import Gargantext.Prelude
import Servant (FromHttpApiData(..), Proxy(..), ToHttpApiData(..)) import Servant (FromHttpApiData(..), Proxy(..), ToHttpApiData(..))
import Text.Read (read)
import Gargantext.Core (HasDBid(..)) import Gargantext.Core (HasDBid(..))
import Gargantext.Database.Types import Gargantext.Database.Types
import Gargantext.Database.Schema.Prelude import Gargantext.Database.Schema.Prelude
import qualified Database.PostgreSQL.Simple as PGS import Text.Read (read)
import qualified Data.ByteString.Char8 as B
import qualified Data.HashMap.Strict as HashMap import qualified Data.HashMap.Strict as HashMap
import qualified Database.PostgreSQL.Simple as PGS
type NgramsId = Int type NgramsId = Int
...@@ -82,8 +84,34 @@ ngramsTable = Table "ngrams" (pNgramsDb NgramsDB { _ngrams_id = optionalTable ...@@ -82,8 +84,34 @@ ngramsTable = Table "ngrams" (pNgramsDb NgramsDB { _ngrams_id = optionalTable
-- ngrams in text fields of documents has Terms Type (i.e. either title or abstract) -- ngrams in text fields of documents has Terms Type (i.e. either title or abstract)
data NgramsType = Authors | Institutes | Sources | NgramsTerms data NgramsType = Authors | Institutes | Sources | NgramsTerms
deriving (Eq, Show, Read, Ord, Enum, Bounded, Generic) deriving (Eq, Show, Read, Ord, Enum, Bounded, Generic)
instance Serialise NgramsType instance Serialise NgramsType
instance FromJSON NgramsType
instance FromJSONKey NgramsType where
fromJSONKey = FromJSONKeyTextParser (parseJSON . String)
instance ToJSON NgramsType
instance ToJSONKey NgramsType where
toJSONKey = toJSONKeyText (pack . show)
instance FromHttpApiData NgramsType where
parseUrlPiece n = pure $ (read . cs) n
instance ToHttpApiData NgramsType where
toUrlPiece = pack . show
instance ToParamSchema NgramsType where
toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
-- map NgramsType to its assigned id
instance FromField NgramsType where
fromField fld mdata =
case B.unpack `fmap` mdata of
Nothing -> returnError UnexpectedNull fld ""
Just dat -> do
n <- fromField fld mdata
if (n :: Int) > 0 then
case fromNgramsTypeId (NgramsTypeId n) of
Nothing -> returnError ConversionFailed fld dat
Just nt -> pure nt
else
returnError ConversionFailed fld dat
instance ToField NgramsType where
toField nt = toField $ ngramsTypeId nt
ngramsTypes :: [NgramsType] ngramsTypes :: [NgramsType]
...@@ -96,33 +124,13 @@ instance ToSchema NgramsType ...@@ -96,33 +124,13 @@ instance ToSchema NgramsType
newtype NgramsTypeId = NgramsTypeId Int newtype NgramsTypeId = NgramsTypeId Int
deriving (Eq, Show, Ord, Num) deriving (Eq, Show, Ord, Num)
instance ToField NgramsTypeId where instance ToField NgramsTypeId where
toField (NgramsTypeId n) = toField n toField (NgramsTypeId n) = toField n
instance FromField NgramsTypeId where instance FromField NgramsTypeId where
fromField fld mdata = do fromField fld mdata = do
n <- fromField fld mdata n <- fromField fld mdata
if (n :: Int) > 0 then return $ NgramsTypeId n if (n :: Int) > 0 then return $ NgramsTypeId n
else mzero else mzero
instance FromJSON NgramsType
instance FromJSONKey NgramsType where
fromJSONKey = FromJSONKeyTextParser (parseJSON . String)
instance ToJSON NgramsType
instance ToJSONKey NgramsType where
toJSONKey = toJSONKeyText (pack . show)
instance FromHttpApiData NgramsType where
parseUrlPiece n = pure $ (read . cs) n
instance ToHttpApiData NgramsType where
toUrlPiece = pack . show
instance ToParamSchema NgramsType where
toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
instance DefaultFromField (Nullable SqlInt4) NgramsTypeId instance DefaultFromField (Nullable SqlInt4) NgramsTypeId
where where
defaultFromField = fromPGSFromField defaultFromField = fromPGSFromField
......
{-|
Module : Gargantext.Utils.SpacyNLP
Description : John Snow NLP API connexion
Copyright : (c) CNRS, 2017
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Spacy ecosystem: https://github.com/explosion/spaCy
Server to be used: https://gitlab.iscpif.fr/gargantext/spacy-server
-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Utils.SpacyNLP where
import Control.Lens
import Data.Aeson (encode)
import Data.Aeson.TH (deriveJSON)
import Data.Text hiding (map, group, filter, concat, zip)
import Gargantext.Core (Lang(..))
import Gargantext.Core.Text.Terms.Multi.PosTagging.Types
import Gargantext.Core.Types (POS(..), NER(..))
import Gargantext.Core.Utils.Prefix (unPrefix)
import Gargantext.Prelude
import Network.HTTP.Simple (parseRequest, httpJSON, setRequestBodyLBS, getResponseBody, Response)
data SpacyData = SpacyData { _spacy_data :: ![SpacyText]}
deriving (Show)
data SpacyText = SpacyText { _spacy_text :: !Text
, _spacy_tags :: ![SpacyTags]
} deriving (Show)
data SpacyTags =
SpacyTags { _spacyTags_text :: !Text
, _spacyTags_text_with_ws :: !Text
, _spacyTags_whitespace :: !Text
, _spacyTags_head :: !Text
, _spacyTags_head_index :: !Int
, _spacyTags_left_edge :: !Text
, _spacyTags_right_edge :: !Text
, _spacyTags_index :: Int
, _spacyTags_ent_type :: !NER
, _spacyTags_ent_iob :: !Text
, _spacyTags_lemma :: !Text
, _spacyTags_normalized :: !Text
, _spacyTags_shape :: !Text
, _spacyTags_prefix :: !Text
, _spacyTags_suffix :: !Text
, _spacyTags_is_alpha :: Bool
, _spacyTags_is_ascii :: Bool
, _spacyTags_is_digit :: Bool
, _spacyTags_is_title :: Bool
, _spacyTags_is_punct :: Bool
, _spacyTags_is_left_punct :: Bool
, _spacyTags_is_right_punct :: Bool
, _spacyTags_is_space :: Bool
, _spacyTags_is_bracket :: Bool
, _spacyTags_is_quote :: Bool
, _spacyTags_is_currency :: Bool
, _spacyTags_like_url :: Bool
, _spacyTags_like_num :: Bool
, _spacyTags_like_email :: Bool
, _spacyTags_is_oov :: Bool
, _spacyTags_is_stop :: Bool
, _spacyTags_pos :: POS
, _spacyTags_tag :: POS
, _spacyTags_dep :: !Text
, _spacyTags_lang :: !Text
, _spacyTags_prob :: !Int
, _spacyTags_char_offset :: !Int
} deriving (Show)
data SpacyRequest = SpacyRequest { _spacyRequest_text :: !Text }
deriving (Show)
spacyRequest :: Text -> IO SpacyData
spacyRequest txt = do
url <- parseRequest $ unpack "POST http://localhost:8001/pos"
let request = setRequestBodyLBS (encode $ SpacyRequest txt) url
result <- httpJSON request :: IO (Response SpacyData)
pure $ getResponseBody result
-- Instances
deriveJSON (unPrefix "_spacy_") ''SpacyData
deriveJSON (unPrefix "_spacy_") ''SpacyText
deriveJSON (unPrefix "_spacyTags_") ''SpacyTags
deriveJSON (unPrefix "_spacyRequest_") ''SpacyRequest
makeLenses ''SpacyData
makeLenses ''SpacyText
makeLenses ''SpacyTags
makeLenses ''SpacyRequest
----------------------------------------------------------------
spacyTagsToToken :: SpacyTags -> Token
spacyTagsToToken st = Token (st ^. spacyTags_index)
(st ^. spacyTags_normalized)
(st ^. spacyTags_text)
(st ^. spacyTags_lemma)
(st ^. spacyTags_head_index)
(st ^. spacyTags_char_offset)
(Just $ st ^. spacyTags_pos)
(Just $ st ^. spacyTags_ent_type)
(Just $ st ^. spacyTags_prefix)
(Just $ st ^. spacyTags_suffix)
spacyDataToPosSentences :: SpacyData -> PosSentences
spacyDataToPosSentences (SpacyData ds) = PosSentences
$ map (\(i, ts) -> Sentence i ts)
$ zip [1..]
$ map (\(SpacyText _ tags)-> map spacyTagsToToken tags) ds
-----------------------------------------------------------------
nlp :: Lang -> Text -> IO PosSentences
nlp FR txt = spacyDataToPosSentences <$> spacyRequest txt
nlp _ _ = panic "Make sure you have the right model for your lang for spacy Server"
...@@ -72,7 +72,7 @@ extra-deps: ...@@ -72,7 +72,7 @@ extra-deps:
# External Data API connectors # External Data API connectors
- git: https://gitlab.iscpif.fr/gargantext/crawlers/pubmed.git - git: https://gitlab.iscpif.fr/gargantext/crawlers/pubmed.git
commit: 02e03d9b856bd35d391f43da8525330f9d184615 commit: 0b906ccc5a4a1b7532eb47c825dc02484a2d6b0e
- git: https://gitlab.iscpif.fr/gargantext/crawlers/istex.git - git: https://gitlab.iscpif.fr/gargantext/crawlers/istex.git
commit: a34bb341236d82cf3d488210bc1d8448a98f5808 commit: a34bb341236d82cf3d488210bc1d8448a98f5808
- git: https://gitlab.iscpif.fr/gargantext/crawlers/hal.git - git: https://gitlab.iscpif.fr/gargantext/crawlers/hal.git
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment