Commit 4a095f89 authored by Karen Konou's avatar Karen Konou

Merge branch 'dev' into 428-dev-profile-img-upload

parents 5080f67e bf006791
## Version 0.0.6.1
* [FEAT] Spacy Server connection for French (and others) languages
* [FEAT] At startup, check if gargantext.init script has been activated
* [UPGRADE] Use the devops/postgres/upgrade/0.0.6.1.sql uprade script
* [FIX] PubMed Parser with threadDelay
* [BACK][FIX] Hash to remove duplicates on filtered text
## Version 0.0.6
* [OPTIM] Ngrams Table optmization. To upgrade:
1. `./bin/psql gargantext.ini < devops/postgresql/upgrade/0.0.6.sql`
......
......@@ -229,3 +229,63 @@ Playground is located at http://localhost:8008/gql
}
}
```
## PostgreSQL
### Upgrading using Docker
https://www.cloudytuts.com/tutorials/docker/how-to-upgrade-postgresql-in-docker-and-kubernetes/
To upgrade PostgreSQL in Docker containers, for example from 11.x to 14.x, simply run:
```sh
docker exec -it <container-id> pg_dumpall -U gargantua > 11-db.dump
```
Then, shut down the container, replace `image` section in
`devops/docker/docker-compose.yaml` with `postgres:14`. Also, it is a good practice to create a new volume, say `garg-pgdata14` and bind the new container to it. If you want to keep the same volume, remember about removing it like so:
```sh
docker-compose rm postgres
docker volume rm docker_garg-pgdata
```
Now, start the container and execute:
```sh
# need to drop the empty DB first, since schema will be created when restoring the dump
docker exec -i <new-container-id> dropdb -U gargantua gargandbV5
# recreate the db, but empty with no schema
docker exec -i <new-container-id> createdb -U gargantua gargandbV5
# now we can restore the dump
docker exec -i <new-container-id> psql -U gargantua -d gargandbV5 < 11-db.dump
```
### Upgrading using
There is a solution using pgupgrade_cluster but you need to manage the
clusters version 14 and 13. Hence here is a simple solution to upgrade.
First save your data:
```
sudo su postgres
pg_dumpall > gargandb.dump
```
Upgrade postgresql:
```
sudo apt install postgresql-server-14 postgresql-client-14
sudo apt remove --purge postgresql-13
```
Restore your data:
```
sudo su postgres
psql < gargandb.dump
```
Maybe you need to restore the gargantua password
```
ALTER ROLE gargantua PASSWORD 'yourPasswordIn_gargantext.ini'
```
Maybe you need to change the port to 5433 for database connection in
your gargantext.ini file.
......@@ -4,3 +4,4 @@ tmux new -d -s gargantext './server' \; \
split-window -h -d 'cd ./purescript-gargantext ; ./server' \; \
select-pane -t 1 \; \
split-window -d 'cd deps/CoreNLP ; ./startServer.sh' \; \
split-window -d 'cd deps/nlp/spacy-server ; source env/bin/activate ; ./server' \; \
version: '3'
services:
#postgres11:
# #image: 'postgres:latest'
# image: 'postgres:11'
# network_mode: host
# #command: ["postgres", "-c", "log_statement=all"]
# #ports:
# #- 5432:5432
# environment:
# POSTGRES_USER: gargantua
# POSTGRES_PASSWORD: C8kdcUrAQy66U
# POSTGRES_DB: gargandbV5
# volumes:
# - garg-pgdata:/var/lib/postgresql/data
# - ../:/gargantext
# - ../dbs:/dbs
# - ../postgres/schema.sql:/docker-entrypoint-initdb.d/schema.sql:ro
postgres:
#image: 'postgres:latest'
image: 'postgres:11'
image: 'postgres:14'
network_mode: host
#command: ["postgres", "-c", "log_statement=all"]
#ports:
......@@ -13,7 +30,7 @@ services:
POSTGRES_PASSWORD: C8kdcUrAQy66U
POSTGRES_DB: gargandbV5
volumes:
- garg-pgdata:/var/lib/postgresql/data
- garg-pgdata14:/var/lib/postgresql-11/data
- ../:/gargantext
- ../dbs:/dbs
- ../postgres/schema.sql:/docker-entrypoint-initdb.d/schema.sql:ro
......@@ -44,5 +61,6 @@ services:
- 5000:5000
volumes:
garg-pgdata:
#garg-pgdata:
garg-pgdata14:
js-cache:
......@@ -221,16 +221,22 @@ ALTER TABLE public.rights OWNER TO gargantua;
------------------------------------------------------------
-- Node Story
create table public.node_stories (
CREATE TABLE public.node_stories (
id SERIAL,
node_id INTEGER NOT NULL,
archive jsonb DEFAULT '{}'::jsonb NOT NULL,
version INTEGER NOT NULL,
ngrams_type_id INTEGER NOT NULL,
ngrams_id INTEGER NOT NULL,
--children TEXT[],
ngrams_repo_element jsonb DEFAULT '{}'::jsonb NOT NULL,
PRIMARY KEY (id),
FOREIGN KEY (node_id) REFERENCES public.nodes(id) ON DELETE CASCADE
FOREIGN KEY (node_id) REFERENCES public.nodes(id) ON DELETE CASCADE,
FOREIGN KEY (ngrams_id) REFERENCES public.ngrams(id) ON DELETE CASCADE
);
ALTER TABLE public.node_stories OWNER TO gargantua;
CREATE UNIQUE INDEX ON public.node_stories USING btree (node_id);
CREATE UNIQUE INDEX ON public.node_stories USING btree (node_id, ngrams_type_id, ngrams_id);
create table public.node_story_archive_history (
......
-- Start a new transaction. In case data migration goes wrong, we are
-- back to our original table.
BEGIN;
-- we will migrate data here
-- rename old table and create a new one
ALTER TABLE public.node_stories RENAME TO node_stories_old;
CREATE TABLE public.node_stories (
id SERIAL,
node_id INTEGER NOT NULL,
version INTEGER NOT NULL,
ngrams_type_id INTEGER NOT NULL,
ngrams_id INTEGER NOT NULL,
--children TEXT[],
ngrams_repo_element jsonb DEFAULT '{}'::jsonb NOT NULL,
PRIMARY KEY (id),
FOREIGN KEY (node_id) REFERENCES public.nodes(id) ON DELETE CASCADE,
FOREIGN KEY (ngrams_id) REFERENCES public.ngrams(id) ON DELETE CASCADE
);
ALTER TABLE public.node_stories OWNER TO gargantua;
CREATE UNIQUE INDEX ON public.node_stories USING btree (node_id, ngrams_type_id, ngrams_id);
-- Authors (ngrams_type_id = 1), see G.D.S.Ngrams.hs -> ngramsTypeId
INSERT INTO public.node_stories
(node_id, version, ngrams_type_id, ngrams_id, ngrams_repo_element)
SELECT node_id, (archive->'version')::int, 1, ngrams.id, j.value
FROM node_stories_old
CROSS JOIN jsonb_each(archive->'state'->'Authors') AS j
JOIN ngrams ON terms = j.key;
-- we will leave children for later, small steps
-- INSERT INTO public.node_stories
-- (node_id, version, ngrams_type_id, ngrams_id, children, ngrams_repo_element)
-- SELECT node_id, (archive->'version')::int, 1, ngrams.id, c.children, (j.value - 'children')
-- FROM node_stories_old
-- CROSS JOIN jsonb_each(archive->'state'->'Authors') AS j
-- CROSS JOIN LATERAL (SELECT array_agg(d.elem) AS children FROM jsonb_array_elements_text(j.value->'children') AS d(elem)) AS c
-- JOIN ngrams ON terms = j.key;
-- Institutes (ngrams_type_id = 2)
INSERT INTO public.node_stories
(node_id, version, ngrams_type_id, ngrams_id, ngrams_repo_element)
SELECT node_id, (archive->'version')::int, 2, ngrams.id, j.value
FROM node_stories_old
CROSS JOIN jsonb_each(archive->'state'->'Institutes') AS j
JOIN ngrams ON terms = j.key;
-- Sources (ngrams_type_id = 3)
INSERT INTO public.node_stories
(node_id, version, ngrams_type_id, ngrams_id, ngrams_repo_element)
SELECT node_id, (archive->'version')::int, 3, ngrams.id, j.value
FROM node_stories_old
CROSS JOIN jsonb_each(archive->'state'->'Sources') AS j
JOIN ngrams ON terms = j.key;
-- NgramsTerms (ngrams_type_id = 4)
INSERT INTO public.node_stories
(node_id, version, ngrams_type_id, ngrams_id, ngrams_repo_element)
SELECT node_id, (archive->'version')::int, 4, ngrams.id, j.value
FROM node_stories_old
CROSS JOIN jsonb_each(archive->'state'->'NgramsTerms') AS j
JOIN ngrams ON terms = j.key;
-- finally, write out the stuff
COMMIT;
......@@ -36,4 +36,3 @@ ALTER TABLE public.node_story_archive_history OWNER TO gargantua;
-- UNION (SELECT node_id, sid, 3 AS ngrams_type_id, "Sources" AS patch FROM q WHERE "Sources" IS NOT NULL)
-- ORDER BY node_id, ngrams_type_id, sid
-- ) AS t;
......@@ -96,7 +96,6 @@ library
Gargantext.Core.Viz.Phylo.SynchronicClustering
Gargantext.Core.Viz.Types
other-modules:
-- ConcurrentTest
Gargantext.API.Admin.Auth
Gargantext.API.Admin.FrontEnd
Gargantext.API.Admin.Orchestrator
......@@ -318,6 +317,7 @@ library
Gargantext.Database.Types
Gargantext.Utils.Aeson
Gargantext.Utils.JohnSnowNLP
Gargantext.Utils.SpacyNLP
Gargantext.Utils.Servant
Gargantext.Utils.UTCTime
Paths_gargantext
......@@ -493,9 +493,9 @@ library
, transformers-base
, tuple
, unordered-containers
, uri-encode
, utf8-string
, uuid
, uri-encode
, validity
, vector
, wai
......
......@@ -6,7 +6,7 @@ name: gargantext
# | | | +----- Layers * : New versions with API additions
# | | | | +--- Layers * : New versions without API breaking changes
# | | | | |
version: '0.0.6'
version: '0.0.6.1'
synopsis: Search, map, share
description: Please see README.md
category: Data
......@@ -75,6 +75,7 @@ library:
- Gargantext.Core.Types.Individu
- Gargantext.Core.Types.Main
- Gargantext.Core.Utils.Prefix
- Gargantext.Utils.SpacyNLP
- Gargantext.Database.Action.Flow
- Gargantext.Database.Action.Flow.Types
- Gargantext.Database.Action.User.New
......@@ -100,6 +101,7 @@ library:
- Gargantext.Core.Text.Metrics.TFICF
- Gargantext.Core.Text.Metrics.CharByChar
- Gargantext.Core.Text.Metrics.Count
- Gargantext.Core.Text.Prepare
- Gargantext.Core.Text.Search
- Gargantext.Core.Text.Terms
- Gargantext.Core.Text.Terms.Mono
......
......@@ -31,10 +31,13 @@ Pouillard (who mainly made it).
module Gargantext.API
where
import Control.Exception (finally)
import Control.Exception (catch, finally, SomeException)
import Control.Lens
import Control.Monad.Except
import Control.Monad.Reader (runReaderT)
import Data.Either
import Data.List (lookup)
import Data.Text (pack)
import Data.Text.Encoding (encodeUtf8)
import Data.Text.IO (putStrLn)
import Data.Validity
......@@ -49,6 +52,7 @@ import Gargantext.API.Prelude
import Gargantext.API.Routes
import Gargantext.API.Server (server)
import Gargantext.Core.NodeStory
import qualified Gargantext.Database.Prelude as DB
import Gargantext.Prelude hiding (putStrLn)
import Network.HTTP.Types hiding (Query)
import Network.Wai
......@@ -66,11 +70,21 @@ data Mode = Dev | Mock | Prod
startGargantext :: Mode -> PortNumber -> FilePath -> IO ()
startGargantext mode port file = do
env <- newEnv port file
runDbCheck env
portRouteInfo port
app <- makeApp env
mid <- makeDevMiddleware mode
run port (mid app) `finally` stopGargantext env
where runDbCheck env = do
r <- runExceptT (runReaderT DB.dbCheck env) `catch`
(\(_ :: SomeException) -> return $ Right False)
case r of
Right True -> return ()
_ -> panic $
"You must run 'gargantext-init " <> pack file <>
"' before running gargantext-server (only the first time)."
portRouteInfo :: PortNumber -> IO ()
portRouteInfo port = do
putStrLn " ----Main Routes----- "
......
......@@ -29,6 +29,7 @@ module Gargantext.API.Ngrams
, TableNgramsApiPut
, getTableNgrams
, getTableNgramsCorpus
, setListNgrams
--, rmListNgrams TODO fix before exporting
, apiNgramsTableCorpus
......@@ -258,15 +259,6 @@ setListNgrams listId ngramsType ns = do
saveNodeStory
currentVersion :: HasNodeStory env err m
=> ListId -> m Version
currentVersion listId = do
--nls <- getRepo [listId]
pool <- view connPool
nls <- liftBase $ getNodeStory pool listId
pure $ nls ^. unNodeStory . at listId . _Just . a_version
newNgramsFromNgramsStatePatch :: NgramsStatePatch' -> [Ngrams]
newNgramsFromNgramsStatePatch p =
[ text2ngrams (unNgramsTerm n)
......
......@@ -19,9 +19,11 @@ import Control.Lens (_Just, (^.), at, view, At, Index, IxValue)
import Control.Monad.Reader
import Data.HashMap.Strict (HashMap)
import Data.Hashable (Hashable)
import Data.Pool (withResource)
import Data.Set (Set)
import Data.Validity
import Gargantext.API.Ngrams.Types
import Gargantext.Core.NodeStory
import Gargantext.Core.Types (ListType(..), NodeId, NodeType(..), ListId)
import Gargantext.Database.Prelude (CmdM, HasConnectionPool(..))
import Gargantext.Database.Schema.Ngrams (NgramsType)
......@@ -29,7 +31,6 @@ import Gargantext.Prelude
import qualified Data.HashMap.Strict as HM
import qualified Data.Map.Strict as Map
import qualified Data.Set as Set
import Gargantext.Core.NodeStory
import qualified Gargantext.Core.NodeStoryFile as NSF
......@@ -202,15 +203,16 @@ migrateFromDirToDb :: (CmdM env err m, HasNodeStory env err m)
=> m ()
migrateFromDirToDb = do
pool <- view connPool
listIds <- liftBase $ getNodesIdWithType pool NodeList
printDebug "[migrateFromDirToDb] listIds" listIds
(NodeStory nls) <- NSF.getRepoReadConfig listIds
printDebug "[migrateFromDirToDb] nls" nls
_ <- mapM (\(nId, a) -> do
n <- liftBase $ nodeExists pool nId
case n of
False -> pure 0
True -> liftBase $ upsertNodeArchive pool nId a
) $ Map.toList nls
--_ <- nodeStoryIncs (Just $ NodeStory nls) listIds
pure ()
withResource pool $ \c -> do
listIds <- liftBase $ getNodesIdWithType c NodeList
printDebug "[migrateFromDirToDb] listIds" listIds
(NodeStory nls) <- NSF.getRepoReadConfig listIds
printDebug "[migrateFromDirToDb] nls" nls
_ <- mapM (\(nId, a) -> do
n <- liftBase $ nodeExists c nId
case n of
False -> pure ()
True -> liftBase $ upsertNodeStories c nId a
) $ Map.toList nls
--_ <- nodeStoryIncs (Just $ NodeStory nls) listIds
pure ()
......@@ -28,7 +28,7 @@ import Data.String (IsString, fromString)
import Data.Swagger hiding (version, patch)
import Data.Text (Text, pack, strip)
import Data.Validity
import Database.PostgreSQL.Simple.FromField (FromField, fromField, fromJSONField, ResultError(ConversionFailed), returnError)
import Database.PostgreSQL.Simple.FromField (FromField, fromField, fromJSONField)
import Database.PostgreSQL.Simple.ToField (ToField, toJSONField, toField)
import GHC.Generics (Generic)
import Gargantext.Core.Text (size)
......@@ -44,7 +44,6 @@ import Servant.Job.Utils (jsonOptions)
import Test.QuickCheck (elements, frequency)
import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
import qualified Data.HashMap.Strict.InsOrd as InsOrdHashMap
import qualified Data.List as List
import qualified Data.Map.Strict as Map
import qualified Data.Map.Strict.Patch as PM
import qualified Data.Set as Set
......@@ -124,7 +123,7 @@ instance (ToJSONKey a, ToSchema a) => ToSchema (MSet a) where
------------------------------------------------------------------------
newtype NgramsTerm = NgramsTerm { unNgramsTerm :: Text }
deriving (Ord, Eq, Show, Generic, ToJSONKey, ToJSON, FromJSON, Semigroup, Arbitrary, Serialise, ToSchema, Hashable, NFData)
deriving (Ord, Eq, Show, Generic, ToJSONKey, ToJSON, FromJSON, Semigroup, Arbitrary, Serialise, ToSchema, Hashable, NFData, FromField, ToField)
instance IsHashable NgramsTerm where
hash (NgramsTerm t) = hash t
instance Monoid NgramsTerm where
......@@ -133,18 +132,6 @@ instance FromJSONKey NgramsTerm where
fromJSONKey = FromJSONKeyTextParser $ \t -> pure $ NgramsTerm $ strip t
instance IsString NgramsTerm where
fromString s = NgramsTerm $ pack s
instance FromField NgramsTerm
where
fromField field mb = do
v <- fromField field mb
case fromJSON v of
Success a -> pure $ NgramsTerm $ strip a
Error _err -> returnError ConversionFailed field
$ List.intercalate " " [ "cannot parse hyperdata for JSON: "
, show v
]
instance ToField NgramsTerm where
toField (NgramsTerm n) = toField n
data RootParent = RootParent
......@@ -164,19 +151,20 @@ data NgramsRepoElement = NgramsRepoElement
, _nre_children :: !(MSet NgramsTerm)
}
deriving (Ord, Eq, Show, Generic)
deriveJSON (unPrefix "_nre_") ''NgramsRepoElement
-- TODO
-- if ngrams & not size => size
-- drop occurrences
makeLenses ''NgramsRepoElement
instance ToSchema NgramsRepoElement where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_nre_")
instance Serialise NgramsRepoElement
instance FromField NgramsRepoElement where
fromField = fromJSONField
instance ToField NgramsRepoElement where
toField = toJSONField
instance Serialise (MSet NgramsTerm)
instance Serialise NgramsRepoElement
data NgramsElement =
NgramsElement { _ne_ngrams :: NgramsTerm
......
This diff is collapsed.
{-|
Module : Gargantext.Core.Text.Clean
Description : Tools to clean text
Copyright : (c) CNRS, 2017 - present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Clean some texts before importing it.
For a given Language, chose a big master piece of litteracy to analyze
it with GarganText. Here is a an example with a famous French Writer
that could be the incarnation of the mythic Gargantua.
-}
{-# LANGUAGE OverloadedStrings #-}
module Gargantext.Core.Text.Prepare
where
import Data.Text (Text)
import Gargantext.Core.Text (sentences)
import Gargantext.Prelude
import qualified Data.List as List
import qualified Data.Text as Text
---------------------------------------------------------------------
prepareText :: Paragraph -> Text -> [Text]
prepareText p txt = groupText p
$ List.filter (/= "")
$ toParagraphs
$ Text.lines
$ Text.replace "_" " " -- some texts seem to be underlined
$ Text.replace "--" "" -- removing bullets like of dialogs
$ Text.replace "\xd" "" txt
---------------------------------------------------------------------
groupText :: Paragraph -> [Text] -> [Text]
groupText (Uniform blockSize) = groupUniform blockSize
groupText AuthorLike = groupLines
---------------------------------------------------------------------
data Paragraph = Uniform Grain | AuthorLike
-- Uniform does not preserve the paragraphs of the author but length of paragraphs is uniform
-- Author Like preserve the paragraphs of the Author but length of paragraphs is not uniform
-- Grain: number of Sentences by block of Text
-- Step : overlap of sentence between connex block of Text
groupUniform :: Grain -> [Text] -> [Text]
groupUniform g ts = map (Text.intercalate " ")
$ chunkAlong g g
$ sentences
$ Text.concat ts
groupLines :: [Text] -> [Text]
groupLines xxx@(a:b:xs) =
if Text.length a > moyenne
then [a] <> (groupLines (b:xs))
else let ab = a <> " " <> b in
if Text.length ab > moyenne
then [ab] <> (groupLines xs)
else groupLines ([ab] <> xs)
where
moyenne = round
$ mean
$ (map (fromIntegral . Text.length) xxx :: [Double])
groupLines [a] = [a]
groupLines [] = []
groupLines_test :: [Text]
groupLines_test = groupLines theData
where
theData = ["abxxxx", "bc", "cxxx", "d"]
---------------------------------------------------------------------
toParagraphs :: [Text] -> [Text]
toParagraphs (a:x:xs) =
if a == ""
then [a] <> toParagraphs (x:xs)
else if x == ""
then [a] <> toParagraphs (x:xs)
else toParagraphs $ [a <> " " <> x ] <> xs
toParagraphs [a] = [a]
toParagraphs [] = []
-- Tests
-- TODO for internships: Property tests
toParagraphs_test :: Bool
toParagraphs_test =
toParagraphs ["a","b","","c","d","d","","e","f","","g","h",""]
== [ "a b", "", "c d d", "", "e f", "", "g h", ""]
......@@ -28,7 +28,8 @@ import qualified Gargantext.Core.Text.Terms.Multi.Lang.En as En
import qualified Gargantext.Core.Text.Terms.Multi.Lang.Fr as Fr
import Gargantext.Core.Text.Terms.Multi.RAKE (multiterms_rake)
import qualified Gargantext.Utils.JohnSnowNLP as JohnSnow
-- import qualified Gargantext.Utils.JohnSnowNLP as JohnSnow
import qualified Gargantext.Utils.SpacyNLP as SpacyNLP
-------------------------------------------------------------------
......@@ -51,7 +52,7 @@ tokenTag2terms (TokenTag ws t _ _) = Terms ws t
tokenTags :: Lang -> Text -> IO [[TokenTag]]
tokenTags EN txt = tokenTagsWith EN txt corenlp
tokenTags FR txt = tokenTagsWith FR txt JohnSnow.nlp
tokenTags FR txt = tokenTagsWith FR txt SpacyNLP.nlp
tokenTags _ _ = panic "[G.C.T.T.Multi] NLP API not implemented yet"
tokenTagsWith :: Lang -> Text -> NLP_API -> IO [[TokenTag]]
......
......@@ -9,6 +9,7 @@ Portability : POSIX
-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-}
......@@ -22,27 +23,27 @@ import Gargantext.Prelude
import GHC.Generics
data Token = Token { _tokenIndex :: Int
, _tokenWord :: Text
, _tokenOriginalText :: Text
, _tokenLemma :: Text
, _tokenCharacterOffsetBegin :: Int
, _tokenCharacterOffsetEnd :: Int
, _tokenPos :: Maybe POS
, _tokenNer :: Maybe NER
, _tokenBefore :: Maybe Text
, _tokenAfter :: Maybe Text
data Token = Token { _tokenIndex :: !Int
, _tokenWord :: !Text
, _tokenOriginalText :: !Text
, _tokenLemma :: !Text
, _tokenCharacterOffsetBegin :: !Int
, _tokenCharacterOffsetEnd :: !Int
, _tokenPos :: !(Maybe POS)
, _tokenNer :: !(Maybe NER)
, _tokenBefore :: !(Maybe Text)
, _tokenAfter :: !(Maybe Text)
} deriving (Show, Generic)
$(deriveJSON (unPrefix "_token") ''Token)
data Sentence = Sentence { _sentenceIndex :: Int
, _sentenceTokens :: [Token]
data Sentence = Sentence { _sentenceIndex :: !Int
, _sentenceTokens :: ![Token]
} deriving (Show, Generic)
$(deriveJSON (unPrefix "_sentence") ''Sentence)
data Properties = Properties { _propertiesAnnotators :: Text
, _propertiesOutputFormat :: Text
data Properties = Properties { _propertiesAnnotators :: !Text
, _propertiesOutputFormat :: !Text
} deriving (Show, Generic)
$(deriveJSON (unPrefix "_properties") ''Properties)
......
......@@ -126,7 +126,7 @@ instance FromJSON POS where
instance ToJSON POS
instance Hashable POS
------------------------------------------------------------------------
data NER = PERSON | ORGANIZATION | LOCATION | NoNER
data NER = PERSON | ORGANIZATION | LOCATION | NoNER { noNer :: !Text }
deriving (Show, Generic)
------------------------------------------------------------------------
instance FromJSON NER where
......@@ -134,9 +134,11 @@ instance FromJSON NER where
where
ner :: [Char] -> NER
ner "PERSON" = PERSON
ner "PER" = PERSON
ner "ORGANIZATION" = ORGANIZATION
ner "LOCATION" = LOCATION
ner _ = NoNER
ner "LOC" = LOCATION
ner x = NoNER (cs x)
instance ToJSON NER
......
......@@ -27,7 +27,7 @@ import Gargantext.API.Ngrams.Tools (filterListWithRoot, groupNodesByNgrams, Diag
import Gargantext.Database.Prelude (runPGSQuery{-, formatPGSQuery-})
import Gargantext.API.Ngrams.Types (TabType(..), ngramsTypeFromTabType, NgramsTerm(..))
import Gargantext.Core.Mail.Types (HasMail)
import Gargantext.Core.NodeStory
import Gargantext.Core.NodeStory hiding (runPGSQuery)
import Gargantext.Core.Text.Metrics (scored, Scored(..), {-localMetrics, toScored-})
import Database.PostgreSQL.Simple.ToField (toField, Action{-, ToField-})
import Gargantext.Core.Types (ListType(..), Limit, NodeType(..), ContextId)
......@@ -88,7 +88,7 @@ updateNgramsOccurrences cId mlId = do
updateNgramsOccurrences' :: (FlowCmdM env err m)
=> CorpusId -> Maybe ListId -> Maybe Limit -> TabType
=> CorpusId -> Maybe ListId -> Maybe Limit -> TabType
-> m [Int]
updateNgramsOccurrences' cId maybeListId maybeLimit tabType = do
......@@ -97,7 +97,7 @@ updateNgramsOccurrences' cId maybeListId maybeLimit tabType = do
Just lId' -> pure lId'
result <- getNgramsOccurrences cId lId tabType maybeLimit
let
toInsert :: [[Action]]
toInsert = map (\(ngramsTerm, score)
......@@ -121,7 +121,7 @@ updateNgramsOccurrences' cId maybeListId maybeLimit tabType = do
RETURNING 1
|]
let fields = map (\t-> QualifiedIdentifier Nothing t)
let fields = map (\t-> QualifiedIdentifier Nothing t)
$ map Text.pack ["int4", "int4","text","int4","int4"]
map (\(Only a) -> a) <$> runPGSQuery queryInsert (Only $ Values fields toInsert)
......@@ -163,7 +163,7 @@ updateContextScore cId maybeListId = do
Just lId' -> pure lId'
result <- getContextsNgramsScore cId lId Terms MapTerm Nothing
let
toInsert :: [[Action]]
toInsert = map (\(contextId, score)
......@@ -185,7 +185,7 @@ updateContextScore cId maybeListId = do
RETURNING 1
|]
let fields = map (\t-> QualifiedIdentifier Nothing t)
let fields = map (\t-> QualifiedIdentifier Nothing t)
$ map Text.pack ["int4", "int4","int4"]
map (\(Only a) -> a) <$> runPGSQuery queryInsert (Only $ Values fields toInsert)
......@@ -243,6 +243,3 @@ getNgrams lId tabType = do
take' :: Maybe Int -> [a] -> [a]
take' Nothing xs = xs
take' (Just n) xs = take n xs
......@@ -113,4 +113,3 @@ nodeTypes = [ (n, toDBid n) | n <- allNodeTypes ]
fromNodeTypeId :: NodeTypeId -> NodeType
fromNodeTypeId tId = fromMaybe (panic $ pack $ "Type Id " <> show tId <> " does not exist")
(lookup tId nodeTypeInv)
......@@ -30,8 +30,8 @@ import Data.Morpheus.Types (GQLType)
import Data.Swagger
import Data.Text (Text, unpack, pack)
import Data.Time (UTCTime)
import Database.PostgreSQL.Simple.FromField (FromField, fromField)
import Database.PostgreSQL.Simple.ToField (ToField, toField)
import Database.PostgreSQL.Simple.FromField (FromField, fromField, fromJSONField)
import Database.PostgreSQL.Simple.ToField (ToField, toField, toJSONField)
import GHC.Generics (Generic)
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger, wellNamedSchema)
import Gargantext.Database.Schema.Context
......@@ -211,14 +211,6 @@ pgContextId = pgNodeId
------------------------------------------------------------------------
newtype NodeId = NodeId Int
deriving (Read, Generic, Num, Eq, Ord, Enum, ToJSONKey, FromJSONKey, ToJSON, FromJSON, Hashable, Csv.ToField)
-- TODO make another type
type ContextId = NodeId
newtype NodeContextId = NodeContextId Int
deriving (Read, Generic, Num, Eq, Ord, Enum, ToJSONKey, FromJSONKey, ToJSON, FromJSON, Hashable, Csv.ToField)
instance GQLType NodeId
instance Show NodeId where
show (NodeId n) = "nodeId-" <> show n
......@@ -232,6 +224,14 @@ instance FromField NodeId where
then return $ NodeId n
else mzero
instance ToSchema NodeId
-- TODO make another type
type ContextId = NodeId
newtype NodeContextId = NodeContextId Int
deriving (Read, Generic, Num, Eq, Ord, Enum, ToJSONKey, FromJSONKey, ToJSON, FromJSON, Hashable, Csv.ToField)
--instance Csv.ToField NodeId where
-- toField (NodeId nodeId) = Csv.toField nodeId
......@@ -357,6 +357,20 @@ data NodeType = NodeUser
deriving (Show, Read, Eq, Generic, Bounded, Enum)
instance GQLType NodeType
instance FromJSON NodeType
instance ToJSON NodeType
instance FromHttpApiData NodeType where
parseUrlPiece = Right . read . unpack
instance ToHttpApiData NodeType where
toUrlPiece = pack . show
instance ToParamSchema NodeType
instance ToSchema NodeType
instance Arbitrary NodeType where
arbitrary = elements allNodeTypes
instance FromField NodeType where
fromField = fromJSONField
instance ToField NodeType where
toField = toJSONField
allNodeTypes :: [NodeType]
......@@ -394,21 +408,6 @@ defaultName NodeFrameNotebook = "Code"
defaultName NodeFile = "File"
instance FromJSON NodeType
instance ToJSON NodeType
instance FromHttpApiData NodeType where
parseUrlPiece = Right . read . unpack
instance ToHttpApiData NodeType where
toUrlPiece = pack . show
instance ToParamSchema NodeType
instance ToSchema NodeType
instance Arbitrary NodeType where
arbitrary = elements allNodeTypes
------------------------------------------------------------------------
-- Instances
......@@ -451,4 +450,3 @@ instance DefaultFromField SqlText (Maybe Hash)
context2node :: Context a -> Node a
context2node (Context ci ch ct cu cp cn cd chy) = Node ci ch ct cu cp cn cd chy
......@@ -9,7 +9,7 @@ Portability : POSIX
-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE ConstraintKinds, ScopedTypeVariables #-}
module Gargantext.Database.Prelude where
......@@ -30,6 +30,7 @@ import Data.Word (Word16)
import Database.PostgreSQL.Simple (Connection, connect)
import Database.PostgreSQL.Simple.FromField ( Conversion, ResultError(ConversionFailed), fromField, returnError)
import Database.PostgreSQL.Simple.Internal (Field)
import Database.PostgreSQL.Simple.Types (Query(..))
import Gargantext.Core.Mail.Types (HasMail)
import Gargantext.Prelude
import Gargantext.Prelude.Config (readIniFile', val)
......@@ -167,10 +168,9 @@ runPGSQuery_ :: ( CmdM env err m
runPGSQuery_ q = mkCmd $ \conn -> catch (PGS.query_ conn q) printError
where
printError (SomeException e) = do
printDebug "[G.D.P.runPGSQuery_]" ("TODO: format query error" :: Text)
hPutStrLn stderr (fromQuery q)
throw (SomeException e)
execPGSQuery :: PGS.ToRow a => PGS.Query -> a -> Cmd err Int64
execPGSQuery q a = mkCmd $ \conn -> PGS.execute conn q a
......@@ -209,3 +209,9 @@ fromField' field mb = do
printSqlOpa :: Default Unpackspec a a => Select a -> IO ()
printSqlOpa = putStrLn . maybe "Empty query" identity . showSql
dbCheck :: CmdM env err m => m Bool
dbCheck = do
r :: [PGS.Only Text] <- runPGSQuery_ "select username from public.auth_user"
case r of
[] -> return False
_ -> return True
......@@ -132,4 +132,3 @@ selectNgramsId' ns = runPGSQuery querySelectNgramsId ( PGS.Only
JOIN input_rows ir ON ir.terms = n.terms
GROUP BY n.terms, n.id
|]
......@@ -26,7 +26,6 @@ import Control.Lens (set, view)
import Data.Aeson
import Data.Maybe (fromMaybe)
import Data.Text (Text)
import qualified Database.PostgreSQL.Simple as DPS
import Database.PostgreSQL.Simple.SqlQQ (sql)
import Opaleye hiding (FromField)
import Prelude hiding (null, id, map, sum)
......@@ -41,6 +40,9 @@ import Gargantext.Database.Query.Table.Node.Error
import Gargantext.Database.Schema.Node
import Gargantext.Prelude hiding (sum, head)
import qualified Database.PostgreSQL.Simple as PGS
queryNodeSearchTable :: Select NodeSearchRead
queryNodeSearchTable = selectTable nodeTableSearch
......@@ -123,7 +125,7 @@ getClosestParentIdByType :: HasDBid NodeType
-> NodeType
-> Cmd err (Maybe NodeId)
getClosestParentIdByType nId nType = do
result <- runPGSQuery query (nId, 0 :: Int)
result <- runPGSQuery query (PGS.Only nId)
case result of
[(NodeId parentId, pTypename)] -> do
if toDBid nType == pTypename then
......@@ -132,12 +134,12 @@ getClosestParentIdByType nId nType = do
getClosestParentIdByType (NodeId parentId) nType
_ -> pure Nothing
where
query :: DPS.Query
query :: PGS.Query
query = [sql|
SELECT n2.id, n2.typename
FROM nodes n1
JOIN nodes n2 ON n1.parent_id = n2.id
WHERE n1.id = ? AND 0 = ?;
WHERE n1.id = ?;
|]
-- | Similar to `getClosestParentIdByType` but includes current node
......@@ -147,7 +149,7 @@ getClosestParentIdByType' :: HasDBid NodeType
-> NodeType
-> Cmd err (Maybe NodeId)
getClosestParentIdByType' nId nType = do
result <- runPGSQuery query (nId, 0 :: Int)
result <- runPGSQuery query (PGS.Only nId)
case result of
[(NodeId id, pTypename)] -> do
if toDBid nType == pTypename then
......@@ -156,11 +158,11 @@ getClosestParentIdByType' nId nType = do
getClosestParentIdByType nId nType
_ -> pure Nothing
where
query :: DPS.Query
query :: PGS.Query
query = [sql|
SELECT n.id, n.typename
FROM nodes n
WHERE n.id = ? AND 0 = ?;
WHERE n.id = ?;
|]
-- | Given a node id, find all it's children (no matter how deep) of
......@@ -170,15 +172,15 @@ getChildrenByType :: HasDBid NodeType
-> NodeType
-> Cmd err [NodeId]
getChildrenByType nId nType = do
result <- runPGSQuery query (nId, 0 :: Int)
result <- runPGSQuery query (PGS.Only nId)
children_lst <- mapM (\(id, _) -> getChildrenByType id nType) result
pure $ concat $ [fst <$> filter (\(_, pTypename) -> pTypename == toDBid nType) result] ++ children_lst
where
query :: DPS.Query
query :: PGS.Query
query = [sql|
SELECT n.id, n.typename
FROM nodes n
WHERE n.parent_id = ? AND 0 = ?;
WHERE n.parent_id = ?;
|]
------------------------------------------------------------------------
......@@ -231,8 +233,8 @@ selectNodesIdWithType nt = proc () -> do
------------------------------------------------------------------------
nodeExists :: (HasNodeError err) => NodeId -> Cmd err Bool
nodeExists nId = (== [DPS.Only True])
<$> runPGSQuery [sql|SELECT true FROM nodes WHERE id = ? AND ?|] (nId, True)
nodeExists nId = (== [PGS.Only True])
<$> runPGSQuery [sql|SELECT true FROM nodes WHERE id = ? |] (PGS.Only nId)
getNode :: HasNodeError err => NodeId -> Cmd err (Node Value)
getNode nId = do
......@@ -397,4 +399,3 @@ defaultListMaybe cId = headMay <$> map (view node_id ) <$> getListsWithParentId
getListsWithParentId :: HasDBid NodeType => NodeId -> Cmd err [Node HyperdataList]
getListsWithParentId n = runOpaQuery $ selectNodesWith' n (Just NodeList)
......@@ -58,6 +58,7 @@ import Control.Lens (set, view)
import Control.Lens.Cons
import Control.Lens.Prism
import Data.Aeson (toJSON, encode, ToJSON)
import Data.Char (isAlpha)
import Data.Maybe (fromMaybe)
import Data.Text (Text)
-- import Data.ByteString (ByteString)
......@@ -77,7 +78,7 @@ import Gargantext.Database.Schema.Node (NodePoly(..))
import qualified Gargantext.Defaults as Defaults
import Gargantext.Prelude
import Gargantext.Prelude.Crypto.Hash (hash)
import qualified Data.Text as DT (pack, concat, take)
import qualified Data.Text as DT (pack, concat, take, filter, toLower)
{-| To Print result query
import Data.ByteString.Internal (ByteString)
......@@ -208,11 +209,15 @@ instance AddUniqId HyperdataDocument
shaBdd = hash $ DT.concat $ map ($ doc) ([(\d -> maybeText (_hd_bdd d))] <> shaParametersDoc)
shaParametersDoc :: [(HyperdataDocument -> Text)]
shaParametersDoc = [ \d -> maybeText (_hd_title d)
, \d -> maybeText (_hd_abstract d)
, \d -> maybeText (_hd_source d)
shaParametersDoc = [ \d -> filterText $ maybeText (_hd_title d)
, \d -> filterText $ maybeText (_hd_abstract d)
, \d -> filterText $ maybeText (_hd_source d)
, \d -> maybeText (_hd_publication_date d)
]
filterText :: Text -> Text
filterText = DT.toLower . (DT.filter isAlpha)
-- TODO put this elsewhere (fix bin/gargantext-init/Main.hs too)
secret :: Text
secret = "Database secret to change"
......@@ -266,6 +271,7 @@ addUniqIdsContact hc = set (hc_uniqIdBdd) (Just shaBdd)
, \d -> maybeText $ view (hc_where . _head . cw_touch . _Just . ct_mail) d
]
maybeText :: Maybe Text -> Text
maybeText = maybe (DT.pack "") identity
......
......@@ -20,25 +20,27 @@ Ngrams connection to the Database.
module Gargantext.Database.Schema.Ngrams
where
import Data.Maybe (fromMaybe)
import Data.HashMap.Strict (HashMap)
import Data.Hashable (Hashable)
import Codec.Serialise (Serialise())
import Control.Lens (over)
import Control.Monad (mzero)
import Data.Maybe (fromMaybe)
import Data.HashMap.Strict (HashMap)
import Data.Hashable (Hashable)
import Data.Aeson
import Data.Aeson.Types (toJSONKeyText)
import Data.Map (fromList, lookup)
import Database.PostgreSQL.Simple.FromField (returnError, ResultError(..))
import Data.Text (Text, splitOn, pack, strip)
import Gargantext.Core.Types (TODO(..), Typed(..))
import Gargantext.Prelude
import Servant (FromHttpApiData(..), Proxy(..), ToHttpApiData(..))
import Text.Read (read)
import Gargantext.Core (HasDBid(..))
import Gargantext.Database.Types
import Gargantext.Database.Schema.Prelude
import qualified Database.PostgreSQL.Simple as PGS
import Text.Read (read)
import qualified Data.ByteString.Char8 as B
import qualified Data.HashMap.Strict as HashMap
import qualified Database.PostgreSQL.Simple as PGS
type NgramsId = Int
......@@ -82,8 +84,34 @@ ngramsTable = Table "ngrams" (pNgramsDb NgramsDB { _ngrams_id = optionalTable
-- ngrams in text fields of documents has Terms Type (i.e. either title or abstract)
data NgramsType = Authors | Institutes | Sources | NgramsTerms
deriving (Eq, Show, Read, Ord, Enum, Bounded, Generic)
instance Serialise NgramsType
instance FromJSON NgramsType
instance FromJSONKey NgramsType where
fromJSONKey = FromJSONKeyTextParser (parseJSON . String)
instance ToJSON NgramsType
instance ToJSONKey NgramsType where
toJSONKey = toJSONKeyText (pack . show)
instance FromHttpApiData NgramsType where
parseUrlPiece n = pure $ (read . cs) n
instance ToHttpApiData NgramsType where
toUrlPiece = pack . show
instance ToParamSchema NgramsType where
toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
-- map NgramsType to its assigned id
instance FromField NgramsType where
fromField fld mdata =
case B.unpack `fmap` mdata of
Nothing -> returnError UnexpectedNull fld ""
Just dat -> do
n <- fromField fld mdata
if (n :: Int) > 0 then
case fromNgramsTypeId (NgramsTypeId n) of
Nothing -> returnError ConversionFailed fld dat
Just nt -> pure nt
else
returnError ConversionFailed fld dat
instance ToField NgramsType where
toField nt = toField $ ngramsTypeId nt
ngramsTypes :: [NgramsType]
......@@ -96,33 +124,13 @@ instance ToSchema NgramsType
newtype NgramsTypeId = NgramsTypeId Int
deriving (Eq, Show, Ord, Num)
instance ToField NgramsTypeId where
toField (NgramsTypeId n) = toField n
instance FromField NgramsTypeId where
fromField fld mdata = do
n <- fromField fld mdata
if (n :: Int) > 0 then return $ NgramsTypeId n
else mzero
instance FromJSON NgramsType
instance FromJSONKey NgramsType where
fromJSONKey = FromJSONKeyTextParser (parseJSON . String)
instance ToJSON NgramsType
instance ToJSONKey NgramsType where
toJSONKey = toJSONKeyText (pack . show)
instance FromHttpApiData NgramsType where
parseUrlPiece n = pure $ (read . cs) n
instance ToHttpApiData NgramsType where
toUrlPiece = pack . show
instance ToParamSchema NgramsType where
toParamSchema _ = toParamSchema (Proxy :: Proxy TODO)
instance DefaultFromField (Nullable SqlInt4) NgramsTypeId
where
defaultFromField = fromPGSFromField
......
{-|
Module : Gargantext.Utils.SpacyNLP
Description : John Snow NLP API connexion
Copyright : (c) CNRS, 2017
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Spacy ecosystem: https://github.com/explosion/spaCy
Server to be used: https://gitlab.iscpif.fr/gargantext/spacy-server
-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Utils.SpacyNLP where
import Control.Lens
import Data.Aeson (encode)
import Data.Aeson.TH (deriveJSON)
import Data.Text hiding (map, group, filter, concat, zip)
import Gargantext.Core (Lang(..))
import Gargantext.Core.Text.Terms.Multi.PosTagging.Types
import Gargantext.Core.Types (POS(..), NER(..))
import Gargantext.Core.Utils.Prefix (unPrefix)
import Gargantext.Prelude
import Network.HTTP.Simple (parseRequest, httpJSON, setRequestBodyLBS, getResponseBody, Response)
data SpacyData = SpacyData { _spacy_data :: ![SpacyText]}
deriving (Show)
data SpacyText = SpacyText { _spacy_text :: !Text
, _spacy_tags :: ![SpacyTags]
} deriving (Show)
data SpacyTags =
SpacyTags { _spacyTags_text :: !Text
, _spacyTags_text_with_ws :: !Text
, _spacyTags_whitespace :: !Text
, _spacyTags_head :: !Text
, _spacyTags_head_index :: !Int
, _spacyTags_left_edge :: !Text
, _spacyTags_right_edge :: !Text
, _spacyTags_index :: Int
, _spacyTags_ent_type :: !NER
, _spacyTags_ent_iob :: !Text
, _spacyTags_lemma :: !Text
, _spacyTags_normalized :: !Text
, _spacyTags_shape :: !Text
, _spacyTags_prefix :: !Text
, _spacyTags_suffix :: !Text
, _spacyTags_is_alpha :: Bool
, _spacyTags_is_ascii :: Bool
, _spacyTags_is_digit :: Bool
, _spacyTags_is_title :: Bool
, _spacyTags_is_punct :: Bool
, _spacyTags_is_left_punct :: Bool
, _spacyTags_is_right_punct :: Bool
, _spacyTags_is_space :: Bool
, _spacyTags_is_bracket :: Bool
, _spacyTags_is_quote :: Bool
, _spacyTags_is_currency :: Bool
, _spacyTags_like_url :: Bool
, _spacyTags_like_num :: Bool
, _spacyTags_like_email :: Bool
, _spacyTags_is_oov :: Bool
, _spacyTags_is_stop :: Bool
, _spacyTags_pos :: POS
, _spacyTags_tag :: POS
, _spacyTags_dep :: !Text
, _spacyTags_lang :: !Text
, _spacyTags_prob :: !Int
, _spacyTags_char_offset :: !Int
} deriving (Show)
data SpacyRequest = SpacyRequest { _spacyRequest_text :: !Text }
deriving (Show)
spacyRequest :: Text -> IO SpacyData
spacyRequest txt = do
url <- parseRequest $ unpack "POST http://localhost:8001/pos"
let request = setRequestBodyLBS (encode $ SpacyRequest txt) url
result <- httpJSON request :: IO (Response SpacyData)
pure $ getResponseBody result
-- Instances
deriveJSON (unPrefix "_spacy_") ''SpacyData
deriveJSON (unPrefix "_spacy_") ''SpacyText
deriveJSON (unPrefix "_spacyTags_") ''SpacyTags
deriveJSON (unPrefix "_spacyRequest_") ''SpacyRequest
makeLenses ''SpacyData
makeLenses ''SpacyText
makeLenses ''SpacyTags
makeLenses ''SpacyRequest
----------------------------------------------------------------
spacyTagsToToken :: SpacyTags -> Token
spacyTagsToToken st = Token (st ^. spacyTags_index)
(st ^. spacyTags_normalized)
(st ^. spacyTags_text)
(st ^. spacyTags_lemma)
(st ^. spacyTags_head_index)
(st ^. spacyTags_char_offset)
(Just $ st ^. spacyTags_pos)
(Just $ st ^. spacyTags_ent_type)
(Just $ st ^. spacyTags_prefix)
(Just $ st ^. spacyTags_suffix)
spacyDataToPosSentences :: SpacyData -> PosSentences
spacyDataToPosSentences (SpacyData ds) = PosSentences
$ map (\(i, ts) -> Sentence i ts)
$ zip [1..]
$ map (\(SpacyText _ tags)-> map spacyTagsToToken tags) ds
-----------------------------------------------------------------
nlp :: Lang -> Text -> IO PosSentences
nlp FR txt = spacyDataToPosSentences <$> spacyRequest txt
nlp _ _ = panic "Make sure you have the right model for your lang for spacy Server"
......@@ -72,7 +72,7 @@ extra-deps:
# External Data API connectors
- git: https://gitlab.iscpif.fr/gargantext/crawlers/pubmed.git
commit: 02e03d9b856bd35d391f43da8525330f9d184615
commit: 0b906ccc5a4a1b7532eb47c825dc02484a2d6b0e
- git: https://gitlab.iscpif.fr/gargantext/crawlers/istex.git
commit: a34bb341236d82cf3d488210bc1d8448a98f5808
- git: https://gitlab.iscpif.fr/gargantext/crawlers/hal.git
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment