Commit a7be6271 authored by Przemyslaw Kaminski's avatar Przemyslaw Kaminski

Merge branch 'dev' into 141-dev-node-stories-db-optimization

parents 18306d29 bf006791
Pipeline #3150 passed with stage
in 91 minutes and 55 seconds
## Version 0.0.6.1
* [FEAT] Spacy Server connection for French (and others) languages
* [FEAT] At startup, check if gargantext.init script has been activated
* [UPGRADE] Use the devops/postgres/upgrade/0.0.6.1.sql uprade script
* [FIX] PubMed Parser with threadDelay
* [BACK][FIX] Hash to remove duplicates on filtered text
## Version 0.0.6 ## Version 0.0.6
* [OPTIM] Ngrams Table optmization. To upgrade: * [OPTIM] Ngrams Table optmization. To upgrade:
1. `./bin/psql gargantext.ini < devops/postgresql/upgrade/0.0.6.sql` 1. `./bin/psql gargantext.ini < devops/postgresql/upgrade/0.0.6.sql`
2. in `stack --nix repl` run `runCmdReplEasy $ migrateFromDirToDb` 2. in `stack --nix repl` run `runCmdReplEasy $ migrateFromDirToDb`
* [FIX] Ngrams Table next button: loads only one time instead of twice previously
* [FRONT][FIX] Resize handler on Write Node
* [FRONT][FIX] Do not highlight ngrams if maximum abstract length > 4500 characters
## Version 0.0.5.9.6 ## Version 0.0.5.9.6
* [BACK][FIX] Nix build ok * [BACK][FIX] Nix build ok
......
...@@ -230,7 +230,8 @@ Playground is located at http://localhost:8008/gql ...@@ -230,7 +230,8 @@ Playground is located at http://localhost:8008/gql
} }
``` ```
## PostgreSQL ## PostgreSQL
### Upgrading
### Upgrading using Docker
https://www.cloudytuts.com/tutorials/docker/how-to-upgrade-postgresql-in-docker-and-kubernetes/ https://www.cloudytuts.com/tutorials/docker/how-to-upgrade-postgresql-in-docker-and-kubernetes/
...@@ -255,3 +256,36 @@ docker exec -i <new-container-id> createdb -U gargantua gargandbV5 ...@@ -255,3 +256,36 @@ docker exec -i <new-container-id> createdb -U gargantua gargandbV5
# now we can restore the dump # now we can restore the dump
docker exec -i <new-container-id> psql -U gargantua -d gargandbV5 < 11-db.dump docker exec -i <new-container-id> psql -U gargantua -d gargandbV5 < 11-db.dump
``` ```
### Upgrading using
There is a solution using pgupgrade_cluster but you need to manage the
clusters version 14 and 13. Hence here is a simple solution to upgrade.
First save your data:
```
sudo su postgres
pg_dumpall > gargandb.dump
```
Upgrade postgresql:
```
sudo apt install postgresql-server-14 postgresql-client-14
sudo apt remove --purge postgresql-13
```
Restore your data:
```
sudo su postgres
psql < gargandb.dump
```
Maybe you need to restore the gargantua password
```
ALTER ROLE gargantua PASSWORD 'yourPasswordIn_gargantext.ini'
```
Maybe you need to change the port to 5433 for database connection in
your gargantext.ini file.
...@@ -40,7 +40,7 @@ main = do ...@@ -40,7 +40,7 @@ main = do
--let q = ["water", "scarcity", "morocco", "shortage","flood"] --let q = ["water", "scarcity", "morocco", "shortage","flood"]
let q = ["gratuit", "gratuité", "culture", "culturel"] let q = ["gratuit", "gratuité", "culture", "culturel"]
eDocs <- CSV.readFile rPath eDocs <- CSV.readCSVFile rPath
case eDocs of case eDocs of
Right (h, csvDocs) -> do Right (h, csvDocs) -> do
putStrLn $ "Number of documents before:" <> show (V.length csvDocs) putStrLn $ "Number of documents before:" <> show (V.length csvDocs)
......
...@@ -42,7 +42,7 @@ import Gargantext.Core.Types ...@@ -42,7 +42,7 @@ import Gargantext.Core.Types
import Gargantext.Core.Text.Terms import Gargantext.Core.Text.Terms
import Gargantext.Core.Text.Context import Gargantext.Core.Text.Context
import Gargantext.Core.Text.Terms.WithList import Gargantext.Core.Text.Terms.WithList
import Gargantext.Core.Text.Corpus.Parsers.CSV (readFile, csv_title, csv_abstract, csv_publication_year, unIntOrDec, fromMIntOrDec, defaultYear) import Gargantext.Core.Text.Corpus.Parsers.CSV (readCSVFile, csv_title, csv_abstract, csv_publication_year, unIntOrDec, fromMIntOrDec, defaultYear)
import Gargantext.Core.Text.List.Formats.CSV (csvMapTermList) import Gargantext.Core.Text.List.Formats.CSV (csvMapTermList)
import Gargantext.Core.Text.Terms (terms) import Gargantext.Core.Text.Terms (terms)
import Gargantext.Core.Text.Metrics.Count (coocOnContexts, Coocs) import Gargantext.Core.Text.Metrics.Count (coocOnContexts, Coocs)
...@@ -86,7 +86,7 @@ main = do ...@@ -86,7 +86,7 @@ main = do
[corpusFile, termListFile, outputFile] <- getArgs [corpusFile, termListFile, outputFile] <- getArgs
--corpus :: IO (DM.IntMap [[Text]]) --corpus :: IO (DM.IntMap [[Text]])
eCorpusFile <- readFile corpusFile eCorpusFile <- readCSVFile corpusFile
case eCorpusFile of case eCorpusFile of
Right cf -> do Right cf -> do
let corpus = DM.fromListWith (<>) let corpus = DM.fromListWith (<>)
......
...@@ -109,7 +109,7 @@ csvToDocs parser patterns time path = ...@@ -109,7 +109,7 @@ csvToDocs parser patterns time path =
(termsInText patterns $ (csv_title row) <> " " <> (csv_abstract row)) (termsInText patterns $ (csv_title row) <> " " <> (csv_abstract row))
Nothing Nothing
[] []
) <$> snd <$> either (\err -> panic $ cs $ "CSV error" <> (show err)) identity <$> Csv.readFile path ) <$> snd <$> either (\err -> panic $ cs $ "CSV error" <> (show err)) identity <$> Csv.readCSVFile path
Csv' limit -> Vector.toList Csv' limit -> Vector.toList
<$> Vector.take limit <$> Vector.take limit
<$> Vector.map (\row -> Document (toPhyloDate (csv'_publication_year row) (csv'_publication_month row) (csv'_publication_day row) time) <$> Vector.map (\row -> Document (toPhyloDate (csv'_publication_year row) (csv'_publication_month row) (csv'_publication_day row) time)
......
...@@ -4,3 +4,4 @@ tmux new -d -s gargantext './server' \; \ ...@@ -4,3 +4,4 @@ tmux new -d -s gargantext './server' \; \
split-window -h -d 'cd ./purescript-gargantext ; ./server' \; \ split-window -h -d 'cd ./purescript-gargantext ; ./server' \; \
select-pane -t 1 \; \ select-pane -t 1 \; \
split-window -d 'cd deps/CoreNLP ; ./startServer.sh' \; \ split-window -d 'cd deps/CoreNLP ; ./startServer.sh' \; \
split-window -d 'cd deps/nlp/spacy-server ; source env/bin/activate ; ./server' \; \
...@@ -221,7 +221,7 @@ ALTER TABLE public.rights OWNER TO gargantua; ...@@ -221,7 +221,7 @@ ALTER TABLE public.rights OWNER TO gargantua;
------------------------------------------------------------ ------------------------------------------------------------
-- Node Story -- Node Story
create table public.node_stories ( CREATE TABLE public.node_stories (
id SERIAL, id SERIAL,
node_id INTEGER NOT NULL, node_id INTEGER NOT NULL,
version INTEGER NOT NULL, version INTEGER NOT NULL,
......
...@@ -316,6 +316,7 @@ library ...@@ -316,6 +316,7 @@ library
Gargantext.Database.Types Gargantext.Database.Types
Gargantext.Utils.Aeson Gargantext.Utils.Aeson
Gargantext.Utils.JohnSnowNLP Gargantext.Utils.JohnSnowNLP
Gargantext.Utils.SpacyNLP
Gargantext.Utils.Servant Gargantext.Utils.Servant
Gargantext.Utils.UTCTime Gargantext.Utils.UTCTime
Paths_gargantext Paths_gargantext
...@@ -492,6 +493,7 @@ library ...@@ -492,6 +493,7 @@ library
, unordered-containers , unordered-containers
, utf8-string , utf8-string
, uuid , uuid
, uri-encode
, validity , validity
, vector , vector
, wai , wai
......
...@@ -6,7 +6,7 @@ name: gargantext ...@@ -6,7 +6,7 @@ name: gargantext
# | | | +----- Layers * : New versions with API additions # | | | +----- Layers * : New versions with API additions
# | | | | +--- Layers * : New versions without API breaking changes # | | | | +--- Layers * : New versions without API breaking changes
# | | | | | # | | | | |
version: '0.0.6' version: '0.0.6.1'
synopsis: Search, map, share synopsis: Search, map, share
description: Please see README.md description: Please see README.md
category: Data category: Data
...@@ -75,6 +75,7 @@ library: ...@@ -75,6 +75,7 @@ library:
- Gargantext.Core.Types.Individu - Gargantext.Core.Types.Individu
- Gargantext.Core.Types.Main - Gargantext.Core.Types.Main
- Gargantext.Core.Utils.Prefix - Gargantext.Core.Utils.Prefix
- Gargantext.Utils.SpacyNLP
- Gargantext.Database.Action.Flow - Gargantext.Database.Action.Flow
- Gargantext.Database.Action.Flow.Types - Gargantext.Database.Action.Flow.Types
- Gargantext.Database.Action.User.New - Gargantext.Database.Action.User.New
...@@ -100,6 +101,7 @@ library: ...@@ -100,6 +101,7 @@ library:
- Gargantext.Core.Text.Metrics.TFICF - Gargantext.Core.Text.Metrics.TFICF
- Gargantext.Core.Text.Metrics.CharByChar - Gargantext.Core.Text.Metrics.CharByChar
- Gargantext.Core.Text.Metrics.Count - Gargantext.Core.Text.Metrics.Count
- Gargantext.Core.Text.Prepare
- Gargantext.Core.Text.Search - Gargantext.Core.Text.Search
- Gargantext.Core.Text.Terms - Gargantext.Core.Text.Terms
- Gargantext.Core.Text.Terms.Mono - Gargantext.Core.Text.Terms.Mono
...@@ -275,6 +277,7 @@ library: ...@@ -275,6 +277,7 @@ library:
- unordered-containers - unordered-containers
- utf8-string - utf8-string
- uuid - uuid
- uri-encode
- validity - validity
- vector - vector
- wai - wai
......
...@@ -31,10 +31,13 @@ Pouillard (who mainly made it). ...@@ -31,10 +31,13 @@ Pouillard (who mainly made it).
module Gargantext.API module Gargantext.API
where where
import Control.Exception (finally) import Control.Exception (catch, finally, SomeException)
import Control.Lens import Control.Lens
import Control.Monad.Except
import Control.Monad.Reader (runReaderT) import Control.Monad.Reader (runReaderT)
import Data.Either
import Data.List (lookup) import Data.List (lookup)
import Data.Text (pack)
import Data.Text.Encoding (encodeUtf8) import Data.Text.Encoding (encodeUtf8)
import Data.Text.IO (putStrLn) import Data.Text.IO (putStrLn)
import Data.Validity import Data.Validity
...@@ -49,6 +52,7 @@ import Gargantext.API.Prelude ...@@ -49,6 +52,7 @@ import Gargantext.API.Prelude
import Gargantext.API.Routes import Gargantext.API.Routes
import Gargantext.API.Server (server) import Gargantext.API.Server (server)
import Gargantext.Core.NodeStory import Gargantext.Core.NodeStory
import qualified Gargantext.Database.Prelude as DB
import Gargantext.Prelude hiding (putStrLn) import Gargantext.Prelude hiding (putStrLn)
import Network.HTTP.Types hiding (Query) import Network.HTTP.Types hiding (Query)
import Network.Wai import Network.Wai
...@@ -66,11 +70,21 @@ data Mode = Dev | Mock | Prod ...@@ -66,11 +70,21 @@ data Mode = Dev | Mock | Prod
startGargantext :: Mode -> PortNumber -> FilePath -> IO () startGargantext :: Mode -> PortNumber -> FilePath -> IO ()
startGargantext mode port file = do startGargantext mode port file = do
env <- newEnv port file env <- newEnv port file
runDbCheck env
portRouteInfo port portRouteInfo port
app <- makeApp env app <- makeApp env
mid <- makeDevMiddleware mode mid <- makeDevMiddleware mode
run port (mid app) `finally` stopGargantext env run port (mid app) `finally` stopGargantext env
where runDbCheck env = do
r <- runExceptT (runReaderT DB.dbCheck env) `catch`
(\(_ :: SomeException) -> return $ Right False)
case r of
Right True -> return ()
_ -> panic $
"You must run 'gargantext-init " <> pack file <>
"' before running gargantext-server (only the first time)."
portRouteInfo :: PortNumber -> IO () portRouteInfo :: PortNumber -> IO ()
portRouteInfo port = do portRouteInfo port = do
putStrLn " ----Main Routes----- " putStrLn " ----Main Routes----- "
......
...@@ -48,8 +48,6 @@ import GHC.Generics (Generic) ...@@ -48,8 +48,6 @@ import GHC.Generics (Generic)
import Servant import Servant
import Servant.Auth.Server import Servant.Auth.Server
import Servant.Job.Async (JobFunction(..), serveJobsAPI) import Servant.Job.Async (JobFunction(..), serveJobsAPI)
import qualified Text.Blaze.Html.Renderer.Text as H
import qualified Text.Blaze.Html5 as H
--import qualified Text.Blaze.Html5.Attributes as HA --import qualified Text.Blaze.Html5.Attributes as HA
import qualified Gargantext.Prelude.Crypto.Auth as Auth import qualified Gargantext.Prelude.Crypto.Auth as Auth
...@@ -59,7 +57,6 @@ import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs) ...@@ -59,7 +57,6 @@ import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
import Gargantext.API.Admin.Types import Gargantext.API.Admin.Types
import Gargantext.API.Job (jobLogSuccess) import Gargantext.API.Job (jobLogSuccess)
import Gargantext.API.Prelude (HasJoseError(..), joseError, HasServerError, GargServerC, GargServer, _ServerError) import Gargantext.API.Prelude (HasJoseError(..), joseError, HasServerError, GargServerC, GargServer, _ServerError)
import Gargantext.API.Types
import Gargantext.Core.Mail (MailModel(..), mail) import Gargantext.Core.Mail (MailModel(..), mail)
import Gargantext.Core.Mail.Types (HasMail, mailSettings) import Gargantext.Core.Mail.Types (HasMail, mailSettings)
import Gargantext.Core.Types.Individu (User(..), Username, GargPassword(..)) import Gargantext.Core.Types.Individu (User(..), Username, GargPassword(..))
...@@ -173,7 +170,7 @@ type ForgotPasswordAPI = Summary "Forgot password POST API" ...@@ -173,7 +170,7 @@ type ForgotPasswordAPI = Summary "Forgot password POST API"
:> Post '[JSON] ForgotPasswordResponse :> Post '[JSON] ForgotPasswordResponse
:<|> Summary "Forgot password GET API" :<|> Summary "Forgot password GET API"
:> QueryParam "uuid" Text :> QueryParam "uuid" Text
:> Get '[HTML] Text :> Get '[JSON] ForgotPasswordGet
forgotPassword :: GargServer ForgotPasswordAPI forgotPassword :: GargServer ForgotPasswordAPI
...@@ -193,8 +190,8 @@ forgotPasswordPost (ForgotPasswordRequest email) = do ...@@ -193,8 +190,8 @@ forgotPasswordPost (ForgotPasswordRequest email) = do
pure $ ForgotPasswordResponse "ok" pure $ ForgotPasswordResponse "ok"
forgotPasswordGet :: (HasSettings env, HasConnectionPool env, HasJoseError err, HasConfig env, HasMail env, HasServerError err) forgotPasswordGet :: (HasSettings env, HasConnectionPool env, HasJoseError err, HasConfig env, HasMail env, HasServerError err)
=> Maybe Text -> Cmd' env err Text => Maybe Text -> Cmd' env err ForgotPasswordGet
forgotPasswordGet Nothing = pure "" forgotPasswordGet Nothing = pure $ ForgotPasswordGet ""
forgotPasswordGet (Just uuid) = do forgotPasswordGet (Just uuid) = do
let mUuid = fromText uuid let mUuid = fromText uuid
case mUuid of case mUuid of
...@@ -209,7 +206,7 @@ forgotPasswordGet (Just uuid) = do ...@@ -209,7 +206,7 @@ forgotPasswordGet (Just uuid) = do
--------------------- ---------------------
forgotPasswordGetUser :: (HasSettings env, HasConnectionPool env, HasJoseError err, HasConfig env, HasMail env, HasServerError err) forgotPasswordGetUser :: (HasSettings env, HasConnectionPool env, HasJoseError err, HasConfig env, HasMail env, HasServerError err)
=> UserLight -> Cmd' env err Text => UserLight -> Cmd' env err ForgotPasswordGet
forgotPasswordGetUser (UserLight { .. }) = do forgotPasswordGetUser (UserLight { .. }) = do
-- pick some random password -- pick some random password
password <- liftBase gargPass password <- liftBase gargPass
...@@ -225,16 +222,7 @@ forgotPasswordGetUser (UserLight { .. }) = do ...@@ -225,16 +222,7 @@ forgotPasswordGetUser (UserLight { .. }) = do
-- clear the uuid so that the page can't be refreshed -- clear the uuid so that the page can't be refreshed
_ <- updateUserForgotPasswordUUID $ UserLight { userLight_forgot_password_uuid = Nothing, .. } _ <- updateUserForgotPasswordUUID $ UserLight { userLight_forgot_password_uuid = Nothing, .. }
pure $ toStrict $ H.renderHtml $ pure $ ForgotPasswordGet password
H.docTypeHtml $ do
H.html $ do
H.head $ do
H.title "Gargantext - forgot password"
H.body $ do
H.h1 "Forgot password"
H.p $ do
H.span "Here is your password (will be shown only once): "
H.b $ H.toHtml password
forgotUserPassword :: (HasConnectionPool env, HasConfig env, HasMail env) forgotUserPassword :: (HasConnectionPool env, HasConfig env, HasMail env)
=> UserLight -> Cmd' env err () => UserLight -> Cmd' env err ()
......
...@@ -112,6 +112,7 @@ data PathId = PathNode NodeId | PathNodeNode ListId DocId ...@@ -112,6 +112,7 @@ data PathId = PathNode NodeId | PathNodeNode ListId DocId
--------------------------- ---------------------------
type Email = Text type Email = Text
type Password = Text
data ForgotPasswordRequest = ForgotPasswordRequest { _fpReq_email :: Email } data ForgotPasswordRequest = ForgotPasswordRequest { _fpReq_email :: Email }
deriving (Generic ) deriving (Generic )
...@@ -124,3 +125,9 @@ data ForgotPasswordResponse = ForgotPasswordResponse { _fpRes_status :: Text } ...@@ -124,3 +125,9 @@ data ForgotPasswordResponse = ForgotPasswordResponse { _fpRes_status :: Text }
$(deriveJSON (unPrefix "_fpRes_") ''ForgotPasswordResponse) $(deriveJSON (unPrefix "_fpRes_") ''ForgotPasswordResponse)
instance ToSchema ForgotPasswordResponse where instance ToSchema ForgotPasswordResponse where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_fpRes_") declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_fpRes_")
data ForgotPasswordGet = ForgotPasswordGet {_fpGet_password :: Password}
deriving (Generic )
$(deriveJSON (unPrefix "_fpGet_") ''ForgotPasswordGet)
instance ToSchema ForgotPasswordGet where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_fpGet_")
\ No newline at end of file
...@@ -66,7 +66,7 @@ getBackendVersion :: ClientM Text ...@@ -66,7 +66,7 @@ getBackendVersion :: ClientM Text
-- * auth API -- * auth API
postAuth :: AuthRequest -> ClientM AuthResponse postAuth :: AuthRequest -> ClientM AuthResponse
forgotPasswordPost :: ForgotPasswordRequest -> ClientM ForgotPasswordResponse forgotPasswordPost :: ForgotPasswordRequest -> ClientM ForgotPasswordResponse
forgotPasswordGet :: Maybe Text -> ClientM Text forgotPasswordGet :: Maybe Text -> ClientM ForgotPasswordGet
postForgotPasswordAsync :: ClientM (JobStatus 'Safe JobLog) postForgotPasswordAsync :: ClientM (JobStatus 'Safe JobLog)
postForgotPasswordAsyncJob :: JobInput Maybe ForgotPasswordAsyncParams -> ClientM (JobStatus 'Safe JobLog) postForgotPasswordAsyncJob :: JobInput Maybe ForgotPasswordAsyncParams -> ClientM (JobStatus 'Safe JobLog)
killForgotPasswordAsyncJob :: JobID 'Unsafe -> Maybe Limit -> Maybe Offset -> ClientM (JobStatus 'Safe JobLog) killForgotPasswordAsyncJob :: JobID 'Unsafe -> Maybe Limit -> Maybe Offset -> ClientM (JobStatus 'Safe JobLog)
......
...@@ -76,7 +76,7 @@ fileDownload uId nId = do ...@@ -76,7 +76,7 @@ fileDownload uId nId = do
let (HyperdataFile { _hff_name = name' let (HyperdataFile { _hff_name = name'
, _hff_path = path }) = node ^. node_hyperdata , _hff_path = path }) = node ^. node_hyperdata
Contents c <- GargDB.readFile $ unpack path Contents c <- GargDB.readGargFile $ unpack path
let (mMime, _) = DMT.guessType DMT.defaultmtd False $ unpack name' let (mMime, _) = DMT.guessType DMT.defaultmtd False $ unpack name'
mime = case mMime of mime = case mMime of
......
...@@ -12,6 +12,7 @@ Portability : POSIX ...@@ -12,6 +12,7 @@ Portability : POSIX
module Gargantext.Core.Mail where module Gargantext.Core.Mail where
import Control.Lens (view) import Control.Lens (view)
import Network.URI.Encode (encodeText)
import Data.Text (Text, unlines, splitOn) import Data.Text (Text, unlines, splitOn)
import Gargantext.Core.Types.Individu import Gargantext.Core.Types.Individu
import Gargantext.Database.Schema.User (UserLight(..)) import Gargantext.Database.Schema.User (UserLight(..))
...@@ -90,7 +91,7 @@ bodyWith server (ForgotPassword { user = UserLight { userLight_forgot_password_u ...@@ -90,7 +91,7 @@ bodyWith server (ForgotPassword { user = UserLight { userLight_forgot_password_u
, forgot_password_link server uuid ] , forgot_password_link server uuid ]
forgot_password_link :: ServerAddress -> Text -> Text forgot_password_link :: ServerAddress -> Text -> Text
forgot_password_link server uuid = server <> "/api/v1.0/forgot-password?uuid=" <> uuid forgot_password_link server uuid = server <> "/#/forgotPassword?uuid=" <> uuid <> "&server=" <> encodeText server
------------------------------------------------------------------------ ------------------------------------------------------------------------
email_subject :: MailModel -> Text email_subject :: MailModel -> Text
......
...@@ -234,7 +234,7 @@ delimiter Comma = fromIntegral $ ord ',' ...@@ -234,7 +234,7 @@ delimiter Comma = fromIntegral $ ord ','
------------------------------------------------------------------------ ------------------------------------------------------------------------
readCsvOn' :: [CsvDoc -> Text] -> FilePath -> IO (Either Prelude.String [Text]) readCsvOn' :: [CsvDoc -> Text] -> FilePath -> IO (Either Prelude.String [Text])
readCsvOn' fields fp = do readCsvOn' fields fp = do
r <- readFile fp r <- readCSVFile fp
pure $ ( V.toList pure $ ( V.toList
. V.map (\l -> intercalate (pack " ") $ map (\field -> field l) fields) . V.map (\l -> intercalate (pack " ") $ map (\field -> field l) fields)
. snd ) <$> r . snd ) <$> r
...@@ -267,8 +267,8 @@ readByteStringStrict d ff = (readByteStringLazy d ff) . BL.fromStrict ...@@ -267,8 +267,8 @@ readByteStringStrict d ff = (readByteStringLazy d ff) . BL.fromStrict
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | TODO use readFileLazy -- | TODO use readFileLazy
readFile :: FilePath -> IO (Either Prelude.String (Header, Vector CsvDoc)) readCSVFile :: FilePath -> IO (Either Prelude.String (Header, Vector CsvDoc))
readFile fp = do readCSVFile fp = do
result <- fmap (readCsvLazyBS Comma) $ BL.readFile fp result <- fmap (readCsvLazyBS Comma) $ BL.readFile fp
case result of case result of
Left _err -> fmap (readCsvLazyBS Tab) $ BL.readFile fp Left _err -> fmap (readCsvLazyBS Tab) $ BL.readFile fp
...@@ -448,7 +448,7 @@ parseHal' bs = (V.toList . V.map csvHal2doc . snd) <$> readCsvHalLazyBS bs ...@@ -448,7 +448,7 @@ parseHal' bs = (V.toList . V.map csvHal2doc . snd) <$> readCsvHalLazyBS bs
------------------------------------------------------------------------ ------------------------------------------------------------------------
parseCsv :: FilePath -> IO (Either Prelude.String [HyperdataDocument]) parseCsv :: FilePath -> IO (Either Prelude.String [HyperdataDocument])
parseCsv fp = fmap (V.toList . V.map csv2doc . snd) <$> readFile fp parseCsv fp = fmap (V.toList . V.map csv2doc . snd) <$> readCSVFile fp
{- {-
parseCsv' :: BL.ByteString -> Either Prelude.String [HyperdataDocument] parseCsv' :: BL.ByteString -> Either Prelude.String [HyperdataDocument]
......
{-|
Module : Gargantext.Core.Text.Clean
Description : Tools to clean text
Copyright : (c) CNRS, 2017 - present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Clean some texts before importing it.
For a given Language, chose a big master piece of litteracy to analyze
it with GarganText. Here is a an example with a famous French Writer
that could be the incarnation of the mythic Gargantua.
-}
{-# LANGUAGE OverloadedStrings #-}
module Gargantext.Core.Text.Prepare
where
import Data.Text (Text)
import Gargantext.Core.Text (sentences)
import Gargantext.Prelude
import qualified Data.List as List
import qualified Data.Text as Text
---------------------------------------------------------------------
prepareText :: Paragraph -> Text -> [Text]
prepareText p txt = groupText p
$ List.filter (/= "")
$ toParagraphs
$ Text.lines
$ Text.replace "_" " " -- some texts seem to be underlined
$ Text.replace "--" "" -- removing bullets like of dialogs
$ Text.replace "\xd" "" txt
---------------------------------------------------------------------
groupText :: Paragraph -> [Text] -> [Text]
groupText (Uniform blockSize) = groupUniform blockSize
groupText AuthorLike = groupLines
---------------------------------------------------------------------
data Paragraph = Uniform Grain | AuthorLike
-- Uniform does not preserve the paragraphs of the author but length of paragraphs is uniform
-- Author Like preserve the paragraphs of the Author but length of paragraphs is not uniform
-- Grain: number of Sentences by block of Text
-- Step : overlap of sentence between connex block of Text
groupUniform :: Grain -> [Text] -> [Text]
groupUniform g ts = map (Text.intercalate " ")
$ chunkAlong g g
$ sentences
$ Text.concat ts
groupLines :: [Text] -> [Text]
groupLines xxx@(a:b:xs) =
if Text.length a > moyenne
then [a] <> (groupLines (b:xs))
else let ab = a <> " " <> b in
if Text.length ab > moyenne
then [ab] <> (groupLines xs)
else groupLines ([ab] <> xs)
where
moyenne = round
$ mean
$ (map (fromIntegral . Text.length) xxx :: [Double])
groupLines [a] = [a]
groupLines [] = []
groupLines_test :: [Text]
groupLines_test = groupLines theData
where
theData = ["abxxxx", "bc", "cxxx", "d"]
---------------------------------------------------------------------
toParagraphs :: [Text] -> [Text]
toParagraphs (a:x:xs) =
if a == ""
then [a] <> toParagraphs (x:xs)
else if x == ""
then [a] <> toParagraphs (x:xs)
else toParagraphs $ [a <> " " <> x ] <> xs
toParagraphs [a] = [a]
toParagraphs [] = []
-- Tests
-- TODO for internships: Property tests
toParagraphs_test :: Bool
toParagraphs_test =
toParagraphs ["a","b","","c","d","d","","e","f","","g","h",""]
== [ "a b", "", "c d d", "", "e f", "", "g h", ""]
...@@ -28,7 +28,8 @@ import qualified Gargantext.Core.Text.Terms.Multi.Lang.En as En ...@@ -28,7 +28,8 @@ import qualified Gargantext.Core.Text.Terms.Multi.Lang.En as En
import qualified Gargantext.Core.Text.Terms.Multi.Lang.Fr as Fr import qualified Gargantext.Core.Text.Terms.Multi.Lang.Fr as Fr
import Gargantext.Core.Text.Terms.Multi.RAKE (multiterms_rake) import Gargantext.Core.Text.Terms.Multi.RAKE (multiterms_rake)
import qualified Gargantext.Utils.JohnSnowNLP as JohnSnow -- import qualified Gargantext.Utils.JohnSnowNLP as JohnSnow
import qualified Gargantext.Utils.SpacyNLP as SpacyNLP
------------------------------------------------------------------- -------------------------------------------------------------------
...@@ -51,7 +52,7 @@ tokenTag2terms (TokenTag ws t _ _) = Terms ws t ...@@ -51,7 +52,7 @@ tokenTag2terms (TokenTag ws t _ _) = Terms ws t
tokenTags :: Lang -> Text -> IO [[TokenTag]] tokenTags :: Lang -> Text -> IO [[TokenTag]]
tokenTags EN txt = tokenTagsWith EN txt corenlp tokenTags EN txt = tokenTagsWith EN txt corenlp
tokenTags FR txt = tokenTagsWith FR txt JohnSnow.nlp tokenTags FR txt = tokenTagsWith FR txt SpacyNLP.nlp
tokenTags _ _ = panic "[G.C.T.T.Multi] NLP API not implemented yet" tokenTags _ _ = panic "[G.C.T.T.Multi] NLP API not implemented yet"
tokenTagsWith :: Lang -> Text -> NLP_API -> IO [[TokenTag]] tokenTagsWith :: Lang -> Text -> NLP_API -> IO [[TokenTag]]
......
...@@ -9,6 +9,7 @@ Portability : POSIX ...@@ -9,6 +9,7 @@ Portability : POSIX
-} -}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-} {-# LANGUAGE TypeOperators #-}
...@@ -22,27 +23,27 @@ import Gargantext.Prelude ...@@ -22,27 +23,27 @@ import Gargantext.Prelude
import GHC.Generics import GHC.Generics
data Token = Token { _tokenIndex :: Int data Token = Token { _tokenIndex :: !Int
, _tokenWord :: Text , _tokenWord :: !Text
, _tokenOriginalText :: Text , _tokenOriginalText :: !Text
, _tokenLemma :: Text , _tokenLemma :: !Text
, _tokenCharacterOffsetBegin :: Int , _tokenCharacterOffsetBegin :: !Int
, _tokenCharacterOffsetEnd :: Int , _tokenCharacterOffsetEnd :: !Int
, _tokenPos :: Maybe POS , _tokenPos :: !(Maybe POS)
, _tokenNer :: Maybe NER , _tokenNer :: !(Maybe NER)
, _tokenBefore :: Maybe Text , _tokenBefore :: !(Maybe Text)
, _tokenAfter :: Maybe Text , _tokenAfter :: !(Maybe Text)
} deriving (Show, Generic) } deriving (Show, Generic)
$(deriveJSON (unPrefix "_token") ''Token) $(deriveJSON (unPrefix "_token") ''Token)
data Sentence = Sentence { _sentenceIndex :: Int data Sentence = Sentence { _sentenceIndex :: !Int
, _sentenceTokens :: [Token] , _sentenceTokens :: ![Token]
} deriving (Show, Generic) } deriving (Show, Generic)
$(deriveJSON (unPrefix "_sentence") ''Sentence) $(deriveJSON (unPrefix "_sentence") ''Sentence)
data Properties = Properties { _propertiesAnnotators :: Text data Properties = Properties { _propertiesAnnotators :: !Text
, _propertiesOutputFormat :: Text , _propertiesOutputFormat :: !Text
} deriving (Show, Generic) } deriving (Show, Generic)
$(deriveJSON (unPrefix "_properties") ''Properties) $(deriveJSON (unPrefix "_properties") ''Properties)
......
...@@ -126,7 +126,7 @@ instance FromJSON POS where ...@@ -126,7 +126,7 @@ instance FromJSON POS where
instance ToJSON POS instance ToJSON POS
instance Hashable POS instance Hashable POS
------------------------------------------------------------------------ ------------------------------------------------------------------------
data NER = PERSON | ORGANIZATION | LOCATION | NoNER data NER = PERSON | ORGANIZATION | LOCATION | NoNER { noNer :: !Text }
deriving (Show, Generic) deriving (Show, Generic)
------------------------------------------------------------------------ ------------------------------------------------------------------------
instance FromJSON NER where instance FromJSON NER where
...@@ -134,9 +134,11 @@ instance FromJSON NER where ...@@ -134,9 +134,11 @@ instance FromJSON NER where
where where
ner :: [Char] -> NER ner :: [Char] -> NER
ner "PERSON" = PERSON ner "PERSON" = PERSON
ner "PER" = PERSON
ner "ORGANIZATION" = ORGANIZATION ner "ORGANIZATION" = ORGANIZATION
ner "LOCATION" = LOCATION ner "LOCATION" = LOCATION
ner _ = NoNER ner "LOC" = LOCATION
ner x = NoNER (cs x)
instance ToJSON NER instance ToJSON NER
......
...@@ -132,7 +132,7 @@ cooc2graphWith' doPartitions distance threshold strength myCooc = do ...@@ -132,7 +132,7 @@ cooc2graphWith' doPartitions distance threshold strength myCooc = do
(as, bs) = List.unzip $ Map.keys distanceMap (as, bs) = List.unzip $ Map.keys distanceMap
n' = Set.size $ Set.fromList $ as <> bs n' = Set.size $ Set.fromList $ as <> bs
bridgeness' = bridgeness (fromIntegral nodesApprox) partitions distanceMap bridgeness' = bridgeness (fromIntegral nodesApprox) partitions distanceMap
confluence' = BAC.computeConfluences 3 (Map.keys bridgeness') True confluence' = Map.empty -- BAC.computeConfluences 3 (Map.keys bridgeness') True
-- confluence (Map.keys bridgeness') 3 True False -- confluence (Map.keys bridgeness') 3 True False
seq bridgeness' $ printDebug "bridgeness OK" () seq bridgeness' $ printDebug "bridgeness OK" ()
seq confluence' $ printDebug "confluence OK" () seq confluence' $ printDebug "confluence OK" ()
......
...@@ -140,13 +140,13 @@ writeFile a = do ...@@ -140,13 +140,13 @@ writeFile a = do
--- ---
-- | Example to read a file with Type -- | Example to read a file with Type
readFile :: ( MonadReader env m readGargFile :: ( MonadReader env m
, HasConfig env , HasConfig env
, MonadBase IO m , MonadBase IO m
, ReadFile a , ReadFile a
) )
=> FilePath -> m a => FilePath -> m a
readFile fp = do readGargFile fp = do
dataPath <- view $ hasConfig . gc_datafilepath dataPath <- view $ hasConfig . gc_datafilepath
liftBase $ readFile' $ toFilePath dataPath fp liftBase $ readFile' $ toFilePath dataPath fp
......
...@@ -9,7 +9,7 @@ Portability : POSIX ...@@ -9,7 +9,7 @@ Portability : POSIX
-} -}
{-# LANGUAGE ConstraintKinds #-} {-# LANGUAGE ConstraintKinds, ScopedTypeVariables #-}
module Gargantext.Database.Prelude where module Gargantext.Database.Prelude where
...@@ -30,6 +30,7 @@ import Data.Word (Word16) ...@@ -30,6 +30,7 @@ import Data.Word (Word16)
import Database.PostgreSQL.Simple (Connection, connect) import Database.PostgreSQL.Simple (Connection, connect)
import Database.PostgreSQL.Simple.FromField ( Conversion, ResultError(ConversionFailed), fromField, returnError) import Database.PostgreSQL.Simple.FromField ( Conversion, ResultError(ConversionFailed), fromField, returnError)
import Database.PostgreSQL.Simple.Internal (Field) import Database.PostgreSQL.Simple.Internal (Field)
import Database.PostgreSQL.Simple.Types (Query(..))
import Gargantext.Core.Mail.Types (HasMail) import Gargantext.Core.Mail.Types (HasMail)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Prelude.Config (readIniFile', val) import Gargantext.Prelude.Config (readIniFile', val)
...@@ -167,10 +168,9 @@ runPGSQuery_ :: ( CmdM env err m ...@@ -167,10 +168,9 @@ runPGSQuery_ :: ( CmdM env err m
runPGSQuery_ q = mkCmd $ \conn -> catch (PGS.query_ conn q) printError runPGSQuery_ q = mkCmd $ \conn -> catch (PGS.query_ conn q) printError
where where
printError (SomeException e) = do printError (SomeException e) = do
printDebug "[G.D.P.runPGSQuery_]" ("TODO: format query error" :: Text) hPutStrLn stderr (fromQuery q)
throw (SomeException e) throw (SomeException e)
execPGSQuery :: PGS.ToRow a => PGS.Query -> a -> Cmd err Int64 execPGSQuery :: PGS.ToRow a => PGS.Query -> a -> Cmd err Int64
execPGSQuery q a = mkCmd $ \conn -> PGS.execute conn q a execPGSQuery q a = mkCmd $ \conn -> PGS.execute conn q a
...@@ -209,3 +209,9 @@ fromField' field mb = do ...@@ -209,3 +209,9 @@ fromField' field mb = do
printSqlOpa :: Default Unpackspec a a => Select a -> IO () printSqlOpa :: Default Unpackspec a a => Select a -> IO ()
printSqlOpa = putStrLn . maybe "Empty query" identity . showSql printSqlOpa = putStrLn . maybe "Empty query" identity . showSql
dbCheck :: CmdM env err m => m Bool
dbCheck = do
r :: [PGS.Only Text] <- runPGSQuery_ "select username from public.auth_user"
case r of
[] -> return False
_ -> return True
...@@ -58,6 +58,7 @@ import Control.Lens (set, view) ...@@ -58,6 +58,7 @@ import Control.Lens (set, view)
import Control.Lens.Cons import Control.Lens.Cons
import Control.Lens.Prism import Control.Lens.Prism
import Data.Aeson (toJSON, encode, ToJSON) import Data.Aeson (toJSON, encode, ToJSON)
import Data.Char (isAlpha)
import Data.Maybe (fromMaybe) import Data.Maybe (fromMaybe)
import Data.Text (Text) import Data.Text (Text)
-- import Data.ByteString (ByteString) -- import Data.ByteString (ByteString)
...@@ -77,7 +78,7 @@ import Gargantext.Database.Schema.Node (NodePoly(..)) ...@@ -77,7 +78,7 @@ import Gargantext.Database.Schema.Node (NodePoly(..))
import qualified Gargantext.Defaults as Defaults import qualified Gargantext.Defaults as Defaults
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Prelude.Crypto.Hash (hash) import Gargantext.Prelude.Crypto.Hash (hash)
import qualified Data.Text as DT (pack, concat, take) import qualified Data.Text as DT (pack, concat, take, filter, toLower)
{-| To Print result query {-| To Print result query
import Data.ByteString.Internal (ByteString) import Data.ByteString.Internal (ByteString)
...@@ -208,11 +209,15 @@ instance AddUniqId HyperdataDocument ...@@ -208,11 +209,15 @@ instance AddUniqId HyperdataDocument
shaBdd = hash $ DT.concat $ map ($ doc) ([(\d -> maybeText (_hd_bdd d))] <> shaParametersDoc) shaBdd = hash $ DT.concat $ map ($ doc) ([(\d -> maybeText (_hd_bdd d))] <> shaParametersDoc)
shaParametersDoc :: [(HyperdataDocument -> Text)] shaParametersDoc :: [(HyperdataDocument -> Text)]
shaParametersDoc = [ \d -> maybeText (_hd_title d) shaParametersDoc = [ \d -> filterText $ maybeText (_hd_title d)
, \d -> maybeText (_hd_abstract d) , \d -> filterText $ maybeText (_hd_abstract d)
, \d -> maybeText (_hd_source d) , \d -> filterText $ maybeText (_hd_source d)
, \d -> maybeText (_hd_publication_date d) , \d -> maybeText (_hd_publication_date d)
] ]
filterText :: Text -> Text
filterText = DT.toLower . (DT.filter isAlpha)
-- TODO put this elsewhere (fix bin/gargantext-init/Main.hs too) -- TODO put this elsewhere (fix bin/gargantext-init/Main.hs too)
secret :: Text secret :: Text
secret = "Database secret to change" secret = "Database secret to change"
...@@ -266,6 +271,7 @@ addUniqIdsContact hc = set (hc_uniqIdBdd) (Just shaBdd) ...@@ -266,6 +271,7 @@ addUniqIdsContact hc = set (hc_uniqIdBdd) (Just shaBdd)
, \d -> maybeText $ view (hc_where . _head . cw_touch . _Just . ct_mail) d , \d -> maybeText $ view (hc_where . _head . cw_touch . _Just . ct_mail) d
] ]
maybeText :: Maybe Text -> Text maybeText :: Maybe Text -> Text
maybeText = maybe (DT.pack "") identity maybeText = maybe (DT.pack "") identity
......
...@@ -19,27 +19,35 @@ commentary with @some markup@. ...@@ -19,27 +19,35 @@ commentary with @some markup@.
module Gargantext.Database.Query.Table.NodeNode module Gargantext.Database.Query.Table.NodeNode
( module Gargantext.Database.Schema.NodeNode ( module Gargantext.Database.Schema.NodeNode
, queryNodeNodeTable , deleteNodeNode
, getNodeNode , getNodeNode
, insertNodeNode , insertNodeNode
, deleteNodeNode , nodeNodesCategory
, nodeNodesScore
, queryNodeNodeTable
, selectDocNodes
, selectDocs
, selectDocsDates
, selectPublicNodes , selectPublicNodes
) )
where where
import Control.Arrow (returnA) import Control.Arrow (returnA)
import Control.Lens ((^.)) import Control.Lens ((^.), view)
import qualified Opaleye as O import Data.Text (Text, splitOn)
import Opaleye import Data.Maybe (catMaybes)
import Database.PostgreSQL.Simple.SqlQQ (sql)
import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
import Gargantext.Core import Gargantext.Core
import Gargantext.Core.Types import Gargantext.Core.Types
import Gargantext.Database.Schema.NodeNode
import Gargantext.Database.Admin.Types.Hyperdata import Gargantext.Database.Admin.Types.Hyperdata
import Gargantext.Database.Prelude import Gargantext.Database.Prelude
import Gargantext.Database.Schema.Node import Gargantext.Database.Schema.Node
import Gargantext.Database.Schema.NodeNode
import Gargantext.Prelude import Gargantext.Prelude
import Opaleye
import qualified Database.PostgreSQL.Simple as PGS
import qualified Opaleye as O
queryNodeNodeTable :: Select NodeNodeRead queryNodeNodeTable :: Select NodeNodeRead
queryNodeNodeTable = selectTable nodeNodeTable queryNodeNodeTable = selectTable nodeNodeTable
...@@ -113,8 +121,113 @@ deleteNodeNode n1 n2 = mkCmd $ \conn -> ...@@ -113,8 +121,113 @@ deleteNodeNode n1 n2 = mkCmd $ \conn ->
) )
------------------------------------------------------------------------ ------------------------------------------------------------------------
selectPublicNodes :: HasDBid NodeType -- | Favorite management
=> (Hyperdata a, DefaultFromField SqlJsonb a) _nodeNodeCategory :: CorpusId -> DocId -> Int -> Cmd err [Int]
_nodeNodeCategory cId dId c = map (\(PGS.Only a) -> a) <$> runPGSQuery favQuery (c,cId,dId)
where
favQuery :: PGS.Query
favQuery = [sql|UPDATE nodes_nodes SET category = ?
WHERE node1_id = ? AND node2_id = ?
RETURNING node2_id;
|]
nodeNodesCategory :: [(CorpusId, DocId, Int)] -> Cmd err [Int]
nodeNodesCategory inputData = map (\(PGS.Only a) -> a)
<$> runPGSQuery catQuery (PGS.Only $ Values fields inputData)
where
fields = map (\t-> QualifiedIdentifier Nothing t) ["int4","int4","int4"]
catQuery :: PGS.Query
catQuery = [sql| UPDATE nodes_nodes as nn0
SET category = nn1.category
FROM (?) as nn1(node1_id,node2_id,category)
WHERE nn0.node1_id = nn1.node1_id
AND nn0.node2_id = nn1.node2_id
RETURNING nn1.node2_id
|]
------------------------------------------------------------------------
-- | Score management
_nodeNodeScore :: CorpusId -> DocId -> Int -> Cmd err [Int]
_nodeNodeScore cId dId c = map (\(PGS.Only a) -> a) <$> runPGSQuery scoreQuery (c,cId,dId)
where
scoreQuery :: PGS.Query
scoreQuery = [sql|UPDATE nodes_nodes SET score = ?
WHERE node1_id = ? AND node2_id = ?
RETURNING node2_id;
|]
nodeNodesScore :: [(CorpusId, DocId, Int)] -> Cmd err [Int]
nodeNodesScore inputData = map (\(PGS.Only a) -> a)
<$> runPGSQuery catScore (PGS.Only $ Values fields inputData)
where
fields = map (\t-> QualifiedIdentifier Nothing t) ["int4","int4","int4"]
catScore :: PGS.Query
catScore = [sql| UPDATE nodes_nodes as nn0
SET score = nn1.score
FROM (?) as nn1(node1_id, node2_id, score)
WHERE nn0.node1_id = nn1.node1_id
AND nn0.node2_id = nn1.node2_id
RETURNING nn1.node2_id
|]
------------------------------------------------------------------------
_selectCountDocs :: HasDBid NodeType => CorpusId -> Cmd err Int
_selectCountDocs cId = runCountOpaQuery (queryCountDocs cId)
where
queryCountDocs cId' = proc () -> do
(n, nn) <- joinInCorpus -< ()
restrict -< nn^.nn_node1_id .== (toNullable $ pgNodeId cId')
restrict -< nn^.nn_category .>= (toNullable $ sqlInt4 1)
restrict -< n^.node_typename .== (sqlInt4 $ toDBid NodeDocument)
returnA -< n
-- | TODO use UTCTime fast
selectDocsDates :: HasDBid NodeType => CorpusId -> Cmd err [Text]
selectDocsDates cId = map (head' "selectDocsDates" . splitOn "-")
<$> catMaybes
<$> map (view hd_publication_date)
<$> selectDocs cId
selectDocs :: HasDBid NodeType => CorpusId -> Cmd err [HyperdataDocument]
selectDocs cId = runOpaQuery (queryDocs cId)
queryDocs :: HasDBid NodeType => CorpusId -> O.Select (Column SqlJsonb)
queryDocs cId = proc () -> do
(n, nn) <- joinInCorpus -< ()
restrict -< nn^.nn_node1_id .== (toNullable $ pgNodeId cId)
restrict -< nn^.nn_category .>= (toNullable $ sqlInt4 1)
restrict -< n^.node_typename .== (sqlInt4 $ toDBid NodeDocument)
returnA -< view (node_hyperdata) n
selectDocNodes :: HasDBid NodeType =>CorpusId -> Cmd err [Node HyperdataDocument]
selectDocNodes cId = runOpaQuery (queryDocNodes cId)
queryDocNodes :: HasDBid NodeType =>CorpusId -> O.Select NodeRead
queryDocNodes cId = proc () -> do
(n, nn) <- joinInCorpus -< ()
restrict -< nn^.nn_node1_id .== (toNullable $ pgNodeId cId)
restrict -< nn^.nn_category .>= (toNullable $ sqlInt4 1)
restrict -< n^.node_typename .== (sqlInt4 $ toDBid NodeDocument)
returnA -< n
joinInCorpus :: O.Select (NodeRead, NodeNodeReadNull)
joinInCorpus = leftJoin queryNodeTable queryNodeNodeTable cond
where
cond :: (NodeRead, NodeNodeRead) -> Column SqlBool
cond (n, nn) = nn^.nn_node2_id .== (view node_id n)
_joinOn1 :: O.Select (NodeRead, NodeNodeReadNull)
_joinOn1 = leftJoin queryNodeTable queryNodeNodeTable cond
where
cond :: (NodeRead, NodeNodeRead) -> Column SqlBool
cond (n, nn) = nn^.nn_node1_id .== n^.node_id
------------------------------------------------------------------------
selectPublicNodes :: HasDBid NodeType => (Hyperdata a, DefaultFromField SqlJsonb a)
=> Cmd err [(Node a, Maybe Int)] => Cmd err [(Node a, Maybe Int)]
selectPublicNodes = runOpaQuery (queryWithType NodeFolderPublic) selectPublicNodes = runOpaQuery (queryWithType NodeFolderPublic)
......
{-|
Module : Gargantext.Utils.SpacyNLP
Description : John Snow NLP API connexion
Copyright : (c) CNRS, 2017
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Spacy ecosystem: https://github.com/explosion/spaCy
Server to be used: https://gitlab.iscpif.fr/gargantext/spacy-server
-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Utils.SpacyNLP where
import Control.Lens
import Data.Aeson (encode)
import Data.Aeson.TH (deriveJSON)
import Data.Text hiding (map, group, filter, concat, zip)
import Gargantext.Core (Lang(..))
import Gargantext.Core.Text.Terms.Multi.PosTagging.Types
import Gargantext.Core.Types (POS(..), NER(..))
import Gargantext.Core.Utils.Prefix (unPrefix)
import Gargantext.Prelude
import Network.HTTP.Simple (parseRequest, httpJSON, setRequestBodyLBS, getResponseBody, Response)
data SpacyData = SpacyData { _spacy_data :: ![SpacyText]}
deriving (Show)
data SpacyText = SpacyText { _spacy_text :: !Text
, _spacy_tags :: ![SpacyTags]
} deriving (Show)
data SpacyTags =
SpacyTags { _spacyTags_text :: !Text
, _spacyTags_text_with_ws :: !Text
, _spacyTags_whitespace :: !Text
, _spacyTags_head :: !Text
, _spacyTags_head_index :: !Int
, _spacyTags_left_edge :: !Text
, _spacyTags_right_edge :: !Text
, _spacyTags_index :: Int
, _spacyTags_ent_type :: !NER
, _spacyTags_ent_iob :: !Text
, _spacyTags_lemma :: !Text
, _spacyTags_normalized :: !Text
, _spacyTags_shape :: !Text
, _spacyTags_prefix :: !Text
, _spacyTags_suffix :: !Text
, _spacyTags_is_alpha :: Bool
, _spacyTags_is_ascii :: Bool
, _spacyTags_is_digit :: Bool
, _spacyTags_is_title :: Bool
, _spacyTags_is_punct :: Bool
, _spacyTags_is_left_punct :: Bool
, _spacyTags_is_right_punct :: Bool
, _spacyTags_is_space :: Bool
, _spacyTags_is_bracket :: Bool
, _spacyTags_is_quote :: Bool
, _spacyTags_is_currency :: Bool
, _spacyTags_like_url :: Bool
, _spacyTags_like_num :: Bool
, _spacyTags_like_email :: Bool
, _spacyTags_is_oov :: Bool
, _spacyTags_is_stop :: Bool
, _spacyTags_pos :: POS
, _spacyTags_tag :: POS
, _spacyTags_dep :: !Text
, _spacyTags_lang :: !Text
, _spacyTags_prob :: !Int
, _spacyTags_char_offset :: !Int
} deriving (Show)
data SpacyRequest = SpacyRequest { _spacyRequest_text :: !Text }
deriving (Show)
spacyRequest :: Text -> IO SpacyData
spacyRequest txt = do
url <- parseRequest $ unpack "POST http://localhost:8001/pos"
let request = setRequestBodyLBS (encode $ SpacyRequest txt) url
result <- httpJSON request :: IO (Response SpacyData)
pure $ getResponseBody result
-- Instances
deriveJSON (unPrefix "_spacy_") ''SpacyData
deriveJSON (unPrefix "_spacy_") ''SpacyText
deriveJSON (unPrefix "_spacyTags_") ''SpacyTags
deriveJSON (unPrefix "_spacyRequest_") ''SpacyRequest
makeLenses ''SpacyData
makeLenses ''SpacyText
makeLenses ''SpacyTags
makeLenses ''SpacyRequest
----------------------------------------------------------------
spacyTagsToToken :: SpacyTags -> Token
spacyTagsToToken st = Token (st ^. spacyTags_index)
(st ^. spacyTags_normalized)
(st ^. spacyTags_text)
(st ^. spacyTags_lemma)
(st ^. spacyTags_head_index)
(st ^. spacyTags_char_offset)
(Just $ st ^. spacyTags_pos)
(Just $ st ^. spacyTags_ent_type)
(Just $ st ^. spacyTags_prefix)
(Just $ st ^. spacyTags_suffix)
spacyDataToPosSentences :: SpacyData -> PosSentences
spacyDataToPosSentences (SpacyData ds) = PosSentences
$ map (\(i, ts) -> Sentence i ts)
$ zip [1..]
$ map (\(SpacyText _ tags)-> map spacyTagsToToken tags) ds
-----------------------------------------------------------------
nlp :: Lang -> Text -> IO PosSentences
nlp FR txt = spacyDataToPosSentences <$> spacyRequest txt
nlp _ _ = panic "Make sure you have the right model for your lang for spacy Server"
...@@ -72,7 +72,7 @@ extra-deps: ...@@ -72,7 +72,7 @@ extra-deps:
# External Data API connectors # External Data API connectors
- git: https://gitlab.iscpif.fr/gargantext/crawlers/pubmed.git - git: https://gitlab.iscpif.fr/gargantext/crawlers/pubmed.git
commit: 02e03d9b856bd35d391f43da8525330f9d184615 commit: 0b906ccc5a4a1b7532eb47c825dc02484a2d6b0e
- git: https://gitlab.iscpif.fr/gargantext/crawlers/istex.git - git: https://gitlab.iscpif.fr/gargantext/crawlers/istex.git
commit: a34bb341236d82cf3d488210bc1d8448a98f5808 commit: a34bb341236d82cf3d488210bc1d8448a98f5808
- git: https://gitlab.iscpif.fr/gargantext/crawlers/hal.git - git: https://gitlab.iscpif.fr/gargantext/crawlers/hal.git
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment