Commit a7be6271 authored by Przemyslaw Kaminski's avatar Przemyslaw Kaminski

Merge branch 'dev' into 141-dev-node-stories-db-optimization

parents 18306d29 bf006791
Pipeline #3150 passed with stage
in 91 minutes and 55 seconds
## Version 0.0.6.1
* [FEAT] Spacy Server connection for French (and others) languages
* [FEAT] At startup, check if gargantext.init script has been activated
* [UPGRADE] Use the devops/postgres/upgrade/0.0.6.1.sql uprade script
* [FIX] PubMed Parser with threadDelay
* [BACK][FIX] Hash to remove duplicates on filtered text
## Version 0.0.6
* [OPTIM] Ngrams Table optmization. To upgrade:
1. `./bin/psql gargantext.ini < devops/postgresql/upgrade/0.0.6.sql`
2. in `stack --nix repl` run `runCmdReplEasy $ migrateFromDirToDb`
* [FIX] Ngrams Table next button: loads only one time instead of twice previously
* [FRONT][FIX] Resize handler on Write Node
* [FRONT][FIX] Do not highlight ngrams if maximum abstract length > 4500 characters
## Version 0.0.5.9.6
* [BACK][FIX] Nix build ok
......
......@@ -230,7 +230,8 @@ Playground is located at http://localhost:8008/gql
}
```
## PostgreSQL
### Upgrading
### Upgrading using Docker
https://www.cloudytuts.com/tutorials/docker/how-to-upgrade-postgresql-in-docker-and-kubernetes/
......@@ -255,3 +256,36 @@ docker exec -i <new-container-id> createdb -U gargantua gargandbV5
# now we can restore the dump
docker exec -i <new-container-id> psql -U gargantua -d gargandbV5 < 11-db.dump
```
### Upgrading using
There is a solution using pgupgrade_cluster but you need to manage the
clusters version 14 and 13. Hence here is a simple solution to upgrade.
First save your data:
```
sudo su postgres
pg_dumpall > gargandb.dump
```
Upgrade postgresql:
```
sudo apt install postgresql-server-14 postgresql-client-14
sudo apt remove --purge postgresql-13
```
Restore your data:
```
sudo su postgres
psql < gargandb.dump
```
Maybe you need to restore the gargantua password
```
ALTER ROLE gargantua PASSWORD 'yourPasswordIn_gargantext.ini'
```
Maybe you need to change the port to 5433 for database connection in
your gargantext.ini file.
......@@ -40,7 +40,7 @@ main = do
--let q = ["water", "scarcity", "morocco", "shortage","flood"]
let q = ["gratuit", "gratuité", "culture", "culturel"]
eDocs <- CSV.readFile rPath
eDocs <- CSV.readCSVFile rPath
case eDocs of
Right (h, csvDocs) -> do
putStrLn $ "Number of documents before:" <> show (V.length csvDocs)
......
......@@ -42,7 +42,7 @@ import Gargantext.Core.Types
import Gargantext.Core.Text.Terms
import Gargantext.Core.Text.Context
import Gargantext.Core.Text.Terms.WithList
import Gargantext.Core.Text.Corpus.Parsers.CSV (readFile, csv_title, csv_abstract, csv_publication_year, unIntOrDec, fromMIntOrDec, defaultYear)
import Gargantext.Core.Text.Corpus.Parsers.CSV (readCSVFile, csv_title, csv_abstract, csv_publication_year, unIntOrDec, fromMIntOrDec, defaultYear)
import Gargantext.Core.Text.List.Formats.CSV (csvMapTermList)
import Gargantext.Core.Text.Terms (terms)
import Gargantext.Core.Text.Metrics.Count (coocOnContexts, Coocs)
......@@ -86,7 +86,7 @@ main = do
[corpusFile, termListFile, outputFile] <- getArgs
--corpus :: IO (DM.IntMap [[Text]])
eCorpusFile <- readFile corpusFile
eCorpusFile <- readCSVFile corpusFile
case eCorpusFile of
Right cf -> do
let corpus = DM.fromListWith (<>)
......
......@@ -109,7 +109,7 @@ csvToDocs parser patterns time path =
(termsInText patterns $ (csv_title row) <> " " <> (csv_abstract row))
Nothing
[]
) <$> snd <$> either (\err -> panic $ cs $ "CSV error" <> (show err)) identity <$> Csv.readFile path
) <$> snd <$> either (\err -> panic $ cs $ "CSV error" <> (show err)) identity <$> Csv.readCSVFile path
Csv' limit -> Vector.toList
<$> Vector.take limit
<$> Vector.map (\row -> Document (toPhyloDate (csv'_publication_year row) (csv'_publication_month row) (csv'_publication_day row) time)
......
......@@ -4,3 +4,4 @@ tmux new -d -s gargantext './server' \; \
split-window -h -d 'cd ./purescript-gargantext ; ./server' \; \
select-pane -t 1 \; \
split-window -d 'cd deps/CoreNLP ; ./startServer.sh' \; \
split-window -d 'cd deps/nlp/spacy-server ; source env/bin/activate ; ./server' \; \
......@@ -221,7 +221,7 @@ ALTER TABLE public.rights OWNER TO gargantua;
------------------------------------------------------------
-- Node Story
create table public.node_stories (
CREATE TABLE public.node_stories (
id SERIAL,
node_id INTEGER NOT NULL,
version INTEGER NOT NULL,
......
......@@ -316,6 +316,7 @@ library
Gargantext.Database.Types
Gargantext.Utils.Aeson
Gargantext.Utils.JohnSnowNLP
Gargantext.Utils.SpacyNLP
Gargantext.Utils.Servant
Gargantext.Utils.UTCTime
Paths_gargantext
......@@ -492,6 +493,7 @@ library
, unordered-containers
, utf8-string
, uuid
, uri-encode
, validity
, vector
, wai
......
......@@ -6,7 +6,7 @@ name: gargantext
# | | | +----- Layers * : New versions with API additions
# | | | | +--- Layers * : New versions without API breaking changes
# | | | | |
version: '0.0.6'
version: '0.0.6.1'
synopsis: Search, map, share
description: Please see README.md
category: Data
......@@ -75,6 +75,7 @@ library:
- Gargantext.Core.Types.Individu
- Gargantext.Core.Types.Main
- Gargantext.Core.Utils.Prefix
- Gargantext.Utils.SpacyNLP
- Gargantext.Database.Action.Flow
- Gargantext.Database.Action.Flow.Types
- Gargantext.Database.Action.User.New
......@@ -100,6 +101,7 @@ library:
- Gargantext.Core.Text.Metrics.TFICF
- Gargantext.Core.Text.Metrics.CharByChar
- Gargantext.Core.Text.Metrics.Count
- Gargantext.Core.Text.Prepare
- Gargantext.Core.Text.Search
- Gargantext.Core.Text.Terms
- Gargantext.Core.Text.Terms.Mono
......@@ -275,6 +277,7 @@ library:
- unordered-containers
- utf8-string
- uuid
- uri-encode
- validity
- vector
- wai
......
......@@ -31,10 +31,13 @@ Pouillard (who mainly made it).
module Gargantext.API
where
import Control.Exception (finally)
import Control.Exception (catch, finally, SomeException)
import Control.Lens
import Control.Monad.Except
import Control.Monad.Reader (runReaderT)
import Data.Either
import Data.List (lookup)
import Data.Text (pack)
import Data.Text.Encoding (encodeUtf8)
import Data.Text.IO (putStrLn)
import Data.Validity
......@@ -49,6 +52,7 @@ import Gargantext.API.Prelude
import Gargantext.API.Routes
import Gargantext.API.Server (server)
import Gargantext.Core.NodeStory
import qualified Gargantext.Database.Prelude as DB
import Gargantext.Prelude hiding (putStrLn)
import Network.HTTP.Types hiding (Query)
import Network.Wai
......@@ -66,11 +70,21 @@ data Mode = Dev | Mock | Prod
startGargantext :: Mode -> PortNumber -> FilePath -> IO ()
startGargantext mode port file = do
env <- newEnv port file
runDbCheck env
portRouteInfo port
app <- makeApp env
mid <- makeDevMiddleware mode
run port (mid app) `finally` stopGargantext env
where runDbCheck env = do
r <- runExceptT (runReaderT DB.dbCheck env) `catch`
(\(_ :: SomeException) -> return $ Right False)
case r of
Right True -> return ()
_ -> panic $
"You must run 'gargantext-init " <> pack file <>
"' before running gargantext-server (only the first time)."
portRouteInfo :: PortNumber -> IO ()
portRouteInfo port = do
putStrLn " ----Main Routes----- "
......
......@@ -48,8 +48,6 @@ import GHC.Generics (Generic)
import Servant
import Servant.Auth.Server
import Servant.Job.Async (JobFunction(..), serveJobsAPI)
import qualified Text.Blaze.Html.Renderer.Text as H
import qualified Text.Blaze.Html5 as H
--import qualified Text.Blaze.Html5.Attributes as HA
import qualified Gargantext.Prelude.Crypto.Auth as Auth
......@@ -59,7 +57,6 @@ import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
import Gargantext.API.Admin.Types
import Gargantext.API.Job (jobLogSuccess)
import Gargantext.API.Prelude (HasJoseError(..), joseError, HasServerError, GargServerC, GargServer, _ServerError)
import Gargantext.API.Types
import Gargantext.Core.Mail (MailModel(..), mail)
import Gargantext.Core.Mail.Types (HasMail, mailSettings)
import Gargantext.Core.Types.Individu (User(..), Username, GargPassword(..))
......@@ -173,7 +170,7 @@ type ForgotPasswordAPI = Summary "Forgot password POST API"
:> Post '[JSON] ForgotPasswordResponse
:<|> Summary "Forgot password GET API"
:> QueryParam "uuid" Text
:> Get '[HTML] Text
:> Get '[JSON] ForgotPasswordGet
forgotPassword :: GargServer ForgotPasswordAPI
......@@ -193,8 +190,8 @@ forgotPasswordPost (ForgotPasswordRequest email) = do
pure $ ForgotPasswordResponse "ok"
forgotPasswordGet :: (HasSettings env, HasConnectionPool env, HasJoseError err, HasConfig env, HasMail env, HasServerError err)
=> Maybe Text -> Cmd' env err Text
forgotPasswordGet Nothing = pure ""
=> Maybe Text -> Cmd' env err ForgotPasswordGet
forgotPasswordGet Nothing = pure $ ForgotPasswordGet ""
forgotPasswordGet (Just uuid) = do
let mUuid = fromText uuid
case mUuid of
......@@ -209,7 +206,7 @@ forgotPasswordGet (Just uuid) = do
---------------------
forgotPasswordGetUser :: (HasSettings env, HasConnectionPool env, HasJoseError err, HasConfig env, HasMail env, HasServerError err)
=> UserLight -> Cmd' env err Text
=> UserLight -> Cmd' env err ForgotPasswordGet
forgotPasswordGetUser (UserLight { .. }) = do
-- pick some random password
password <- liftBase gargPass
......@@ -225,16 +222,7 @@ forgotPasswordGetUser (UserLight { .. }) = do
-- clear the uuid so that the page can't be refreshed
_ <- updateUserForgotPasswordUUID $ UserLight { userLight_forgot_password_uuid = Nothing, .. }
pure $ toStrict $ H.renderHtml $
H.docTypeHtml $ do
H.html $ do
H.head $ do
H.title "Gargantext - forgot password"
H.body $ do
H.h1 "Forgot password"
H.p $ do
H.span "Here is your password (will be shown only once): "
H.b $ H.toHtml password
pure $ ForgotPasswordGet password
forgotUserPassword :: (HasConnectionPool env, HasConfig env, HasMail env)
=> UserLight -> Cmd' env err ()
......
......@@ -112,6 +112,7 @@ data PathId = PathNode NodeId | PathNodeNode ListId DocId
---------------------------
type Email = Text
type Password = Text
data ForgotPasswordRequest = ForgotPasswordRequest { _fpReq_email :: Email }
deriving (Generic )
......@@ -124,3 +125,9 @@ data ForgotPasswordResponse = ForgotPasswordResponse { _fpRes_status :: Text }
$(deriveJSON (unPrefix "_fpRes_") ''ForgotPasswordResponse)
instance ToSchema ForgotPasswordResponse where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_fpRes_")
data ForgotPasswordGet = ForgotPasswordGet {_fpGet_password :: Password}
deriving (Generic )
$(deriveJSON (unPrefix "_fpGet_") ''ForgotPasswordGet)
instance ToSchema ForgotPasswordGet where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_fpGet_")
\ No newline at end of file
......@@ -66,7 +66,7 @@ getBackendVersion :: ClientM Text
-- * auth API
postAuth :: AuthRequest -> ClientM AuthResponse
forgotPasswordPost :: ForgotPasswordRequest -> ClientM ForgotPasswordResponse
forgotPasswordGet :: Maybe Text -> ClientM Text
forgotPasswordGet :: Maybe Text -> ClientM ForgotPasswordGet
postForgotPasswordAsync :: ClientM (JobStatus 'Safe JobLog)
postForgotPasswordAsyncJob :: JobInput Maybe ForgotPasswordAsyncParams -> ClientM (JobStatus 'Safe JobLog)
killForgotPasswordAsyncJob :: JobID 'Unsafe -> Maybe Limit -> Maybe Offset -> ClientM (JobStatus 'Safe JobLog)
......
......@@ -76,7 +76,7 @@ fileDownload uId nId = do
let (HyperdataFile { _hff_name = name'
, _hff_path = path }) = node ^. node_hyperdata
Contents c <- GargDB.readFile $ unpack path
Contents c <- GargDB.readGargFile $ unpack path
let (mMime, _) = DMT.guessType DMT.defaultmtd False $ unpack name'
mime = case mMime of
......
......@@ -12,6 +12,7 @@ Portability : POSIX
module Gargantext.Core.Mail where
import Control.Lens (view)
import Network.URI.Encode (encodeText)
import Data.Text (Text, unlines, splitOn)
import Gargantext.Core.Types.Individu
import Gargantext.Database.Schema.User (UserLight(..))
......@@ -90,7 +91,7 @@ bodyWith server (ForgotPassword { user = UserLight { userLight_forgot_password_u
, forgot_password_link server uuid ]
forgot_password_link :: ServerAddress -> Text -> Text
forgot_password_link server uuid = server <> "/api/v1.0/forgot-password?uuid=" <> uuid
forgot_password_link server uuid = server <> "/#/forgotPassword?uuid=" <> uuid <> "&server=" <> encodeText server
------------------------------------------------------------------------
email_subject :: MailModel -> Text
......
......@@ -234,7 +234,7 @@ delimiter Comma = fromIntegral $ ord ','
------------------------------------------------------------------------
readCsvOn' :: [CsvDoc -> Text] -> FilePath -> IO (Either Prelude.String [Text])
readCsvOn' fields fp = do
r <- readFile fp
r <- readCSVFile fp
pure $ ( V.toList
. V.map (\l -> intercalate (pack " ") $ map (\field -> field l) fields)
. snd ) <$> r
......@@ -267,8 +267,8 @@ readByteStringStrict d ff = (readByteStringLazy d ff) . BL.fromStrict
------------------------------------------------------------------------
-- | TODO use readFileLazy
readFile :: FilePath -> IO (Either Prelude.String (Header, Vector CsvDoc))
readFile fp = do
readCSVFile :: FilePath -> IO (Either Prelude.String (Header, Vector CsvDoc))
readCSVFile fp = do
result <- fmap (readCsvLazyBS Comma) $ BL.readFile fp
case result of
Left _err -> fmap (readCsvLazyBS Tab) $ BL.readFile fp
......@@ -448,7 +448,7 @@ parseHal' bs = (V.toList . V.map csvHal2doc . snd) <$> readCsvHalLazyBS bs
------------------------------------------------------------------------
parseCsv :: FilePath -> IO (Either Prelude.String [HyperdataDocument])
parseCsv fp = fmap (V.toList . V.map csv2doc . snd) <$> readFile fp
parseCsv fp = fmap (V.toList . V.map csv2doc . snd) <$> readCSVFile fp
{-
parseCsv' :: BL.ByteString -> Either Prelude.String [HyperdataDocument]
......
{-|
Module : Gargantext.Core.Text.Clean
Description : Tools to clean text
Copyright : (c) CNRS, 2017 - present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Clean some texts before importing it.
For a given Language, chose a big master piece of litteracy to analyze
it with GarganText. Here is a an example with a famous French Writer
that could be the incarnation of the mythic Gargantua.
-}
{-# LANGUAGE OverloadedStrings #-}
module Gargantext.Core.Text.Prepare
where
import Data.Text (Text)
import Gargantext.Core.Text (sentences)
import Gargantext.Prelude
import qualified Data.List as List
import qualified Data.Text as Text
---------------------------------------------------------------------
prepareText :: Paragraph -> Text -> [Text]
prepareText p txt = groupText p
$ List.filter (/= "")
$ toParagraphs
$ Text.lines
$ Text.replace "_" " " -- some texts seem to be underlined
$ Text.replace "--" "" -- removing bullets like of dialogs
$ Text.replace "\xd" "" txt
---------------------------------------------------------------------
groupText :: Paragraph -> [Text] -> [Text]
groupText (Uniform blockSize) = groupUniform blockSize
groupText AuthorLike = groupLines
---------------------------------------------------------------------
data Paragraph = Uniform Grain | AuthorLike
-- Uniform does not preserve the paragraphs of the author but length of paragraphs is uniform
-- Author Like preserve the paragraphs of the Author but length of paragraphs is not uniform
-- Grain: number of Sentences by block of Text
-- Step : overlap of sentence between connex block of Text
groupUniform :: Grain -> [Text] -> [Text]
groupUniform g ts = map (Text.intercalate " ")
$ chunkAlong g g
$ sentences
$ Text.concat ts
groupLines :: [Text] -> [Text]
groupLines xxx@(a:b:xs) =
if Text.length a > moyenne
then [a] <> (groupLines (b:xs))
else let ab = a <> " " <> b in
if Text.length ab > moyenne
then [ab] <> (groupLines xs)
else groupLines ([ab] <> xs)
where
moyenne = round
$ mean
$ (map (fromIntegral . Text.length) xxx :: [Double])
groupLines [a] = [a]
groupLines [] = []
groupLines_test :: [Text]
groupLines_test = groupLines theData
where
theData = ["abxxxx", "bc", "cxxx", "d"]
---------------------------------------------------------------------
toParagraphs :: [Text] -> [Text]
toParagraphs (a:x:xs) =
if a == ""
then [a] <> toParagraphs (x:xs)
else if x == ""
then [a] <> toParagraphs (x:xs)
else toParagraphs $ [a <> " " <> x ] <> xs
toParagraphs [a] = [a]
toParagraphs [] = []
-- Tests
-- TODO for internships: Property tests
toParagraphs_test :: Bool
toParagraphs_test =
toParagraphs ["a","b","","c","d","d","","e","f","","g","h",""]
== [ "a b", "", "c d d", "", "e f", "", "g h", ""]
......@@ -28,7 +28,8 @@ import qualified Gargantext.Core.Text.Terms.Multi.Lang.En as En
import qualified Gargantext.Core.Text.Terms.Multi.Lang.Fr as Fr
import Gargantext.Core.Text.Terms.Multi.RAKE (multiterms_rake)
import qualified Gargantext.Utils.JohnSnowNLP as JohnSnow
-- import qualified Gargantext.Utils.JohnSnowNLP as JohnSnow
import qualified Gargantext.Utils.SpacyNLP as SpacyNLP
-------------------------------------------------------------------
......@@ -51,7 +52,7 @@ tokenTag2terms (TokenTag ws t _ _) = Terms ws t
tokenTags :: Lang -> Text -> IO [[TokenTag]]
tokenTags EN txt = tokenTagsWith EN txt corenlp
tokenTags FR txt = tokenTagsWith FR txt JohnSnow.nlp
tokenTags FR txt = tokenTagsWith FR txt SpacyNLP.nlp
tokenTags _ _ = panic "[G.C.T.T.Multi] NLP API not implemented yet"
tokenTagsWith :: Lang -> Text -> NLP_API -> IO [[TokenTag]]
......
......@@ -9,6 +9,7 @@ Portability : POSIX
-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeOperators #-}
......@@ -22,27 +23,27 @@ import Gargantext.Prelude
import GHC.Generics
data Token = Token { _tokenIndex :: Int
, _tokenWord :: Text
, _tokenOriginalText :: Text
, _tokenLemma :: Text
, _tokenCharacterOffsetBegin :: Int
, _tokenCharacterOffsetEnd :: Int
, _tokenPos :: Maybe POS
, _tokenNer :: Maybe NER
, _tokenBefore :: Maybe Text
, _tokenAfter :: Maybe Text
data Token = Token { _tokenIndex :: !Int
, _tokenWord :: !Text
, _tokenOriginalText :: !Text
, _tokenLemma :: !Text
, _tokenCharacterOffsetBegin :: !Int
, _tokenCharacterOffsetEnd :: !Int
, _tokenPos :: !(Maybe POS)
, _tokenNer :: !(Maybe NER)
, _tokenBefore :: !(Maybe Text)
, _tokenAfter :: !(Maybe Text)
} deriving (Show, Generic)
$(deriveJSON (unPrefix "_token") ''Token)
data Sentence = Sentence { _sentenceIndex :: Int
, _sentenceTokens :: [Token]
data Sentence = Sentence { _sentenceIndex :: !Int
, _sentenceTokens :: ![Token]
} deriving (Show, Generic)
$(deriveJSON (unPrefix "_sentence") ''Sentence)
data Properties = Properties { _propertiesAnnotators :: Text
, _propertiesOutputFormat :: Text
data Properties = Properties { _propertiesAnnotators :: !Text
, _propertiesOutputFormat :: !Text
} deriving (Show, Generic)
$(deriveJSON (unPrefix "_properties") ''Properties)
......
......@@ -126,7 +126,7 @@ instance FromJSON POS where
instance ToJSON POS
instance Hashable POS
------------------------------------------------------------------------
data NER = PERSON | ORGANIZATION | LOCATION | NoNER
data NER = PERSON | ORGANIZATION | LOCATION | NoNER { noNer :: !Text }
deriving (Show, Generic)
------------------------------------------------------------------------
instance FromJSON NER where
......@@ -134,9 +134,11 @@ instance FromJSON NER where
where
ner :: [Char] -> NER
ner "PERSON" = PERSON
ner "PER" = PERSON
ner "ORGANIZATION" = ORGANIZATION
ner "LOCATION" = LOCATION
ner _ = NoNER
ner "LOC" = LOCATION
ner x = NoNER (cs x)
instance ToJSON NER
......
......@@ -132,7 +132,7 @@ cooc2graphWith' doPartitions distance threshold strength myCooc = do
(as, bs) = List.unzip $ Map.keys distanceMap
n' = Set.size $ Set.fromList $ as <> bs
bridgeness' = bridgeness (fromIntegral nodesApprox) partitions distanceMap
confluence' = BAC.computeConfluences 3 (Map.keys bridgeness') True
confluence' = Map.empty -- BAC.computeConfluences 3 (Map.keys bridgeness') True
-- confluence (Map.keys bridgeness') 3 True False
seq bridgeness' $ printDebug "bridgeness OK" ()
seq confluence' $ printDebug "confluence OK" ()
......
......@@ -140,13 +140,13 @@ writeFile a = do
---
-- | Example to read a file with Type
readFile :: ( MonadReader env m
readGargFile :: ( MonadReader env m
, HasConfig env
, MonadBase IO m
, ReadFile a
)
=> FilePath -> m a
readFile fp = do
readGargFile fp = do
dataPath <- view $ hasConfig . gc_datafilepath
liftBase $ readFile' $ toFilePath dataPath fp
......
......@@ -9,7 +9,7 @@ Portability : POSIX
-}
{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE ConstraintKinds, ScopedTypeVariables #-}
module Gargantext.Database.Prelude where
......@@ -30,6 +30,7 @@ import Data.Word (Word16)
import Database.PostgreSQL.Simple (Connection, connect)
import Database.PostgreSQL.Simple.FromField ( Conversion, ResultError(ConversionFailed), fromField, returnError)
import Database.PostgreSQL.Simple.Internal (Field)
import Database.PostgreSQL.Simple.Types (Query(..))
import Gargantext.Core.Mail.Types (HasMail)
import Gargantext.Prelude
import Gargantext.Prelude.Config (readIniFile', val)
......@@ -167,10 +168,9 @@ runPGSQuery_ :: ( CmdM env err m
runPGSQuery_ q = mkCmd $ \conn -> catch (PGS.query_ conn q) printError
where
printError (SomeException e) = do
printDebug "[G.D.P.runPGSQuery_]" ("TODO: format query error" :: Text)
hPutStrLn stderr (fromQuery q)
throw (SomeException e)
execPGSQuery :: PGS.ToRow a => PGS.Query -> a -> Cmd err Int64
execPGSQuery q a = mkCmd $ \conn -> PGS.execute conn q a
......@@ -209,3 +209,9 @@ fromField' field mb = do
printSqlOpa :: Default Unpackspec a a => Select a -> IO ()
printSqlOpa = putStrLn . maybe "Empty query" identity . showSql
dbCheck :: CmdM env err m => m Bool
dbCheck = do
r :: [PGS.Only Text] <- runPGSQuery_ "select username from public.auth_user"
case r of
[] -> return False
_ -> return True
......@@ -58,6 +58,7 @@ import Control.Lens (set, view)
import Control.Lens.Cons
import Control.Lens.Prism
import Data.Aeson (toJSON, encode, ToJSON)
import Data.Char (isAlpha)
import Data.Maybe (fromMaybe)
import Data.Text (Text)
-- import Data.ByteString (ByteString)
......@@ -77,7 +78,7 @@ import Gargantext.Database.Schema.Node (NodePoly(..))
import qualified Gargantext.Defaults as Defaults
import Gargantext.Prelude
import Gargantext.Prelude.Crypto.Hash (hash)
import qualified Data.Text as DT (pack, concat, take)
import qualified Data.Text as DT (pack, concat, take, filter, toLower)
{-| To Print result query
import Data.ByteString.Internal (ByteString)
......@@ -208,11 +209,15 @@ instance AddUniqId HyperdataDocument
shaBdd = hash $ DT.concat $ map ($ doc) ([(\d -> maybeText (_hd_bdd d))] <> shaParametersDoc)
shaParametersDoc :: [(HyperdataDocument -> Text)]
shaParametersDoc = [ \d -> maybeText (_hd_title d)
, \d -> maybeText (_hd_abstract d)
, \d -> maybeText (_hd_source d)
shaParametersDoc = [ \d -> filterText $ maybeText (_hd_title d)
, \d -> filterText $ maybeText (_hd_abstract d)
, \d -> filterText $ maybeText (_hd_source d)
, \d -> maybeText (_hd_publication_date d)
]
filterText :: Text -> Text
filterText = DT.toLower . (DT.filter isAlpha)
-- TODO put this elsewhere (fix bin/gargantext-init/Main.hs too)
secret :: Text
secret = "Database secret to change"
......@@ -266,6 +271,7 @@ addUniqIdsContact hc = set (hc_uniqIdBdd) (Just shaBdd)
, \d -> maybeText $ view (hc_where . _head . cw_touch . _Just . ct_mail) d
]
maybeText :: Maybe Text -> Text
maybeText = maybe (DT.pack "") identity
......
......@@ -19,27 +19,35 @@ commentary with @some markup@.
module Gargantext.Database.Query.Table.NodeNode
( module Gargantext.Database.Schema.NodeNode
, queryNodeNodeTable
, deleteNodeNode
, getNodeNode
, insertNodeNode
, deleteNodeNode
, nodeNodesCategory
, nodeNodesScore
, queryNodeNodeTable
, selectDocNodes
, selectDocs
, selectDocsDates
, selectPublicNodes
)
where
import Control.Arrow (returnA)
import Control.Lens ((^.))
import qualified Opaleye as O
import Opaleye
import Control.Lens ((^.), view)
import Data.Text (Text, splitOn)
import Data.Maybe (catMaybes)
import Database.PostgreSQL.Simple.SqlQQ (sql)
import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
import Gargantext.Core
import Gargantext.Core.Types
import Gargantext.Database.Schema.NodeNode
import Gargantext.Database.Admin.Types.Hyperdata
import Gargantext.Database.Prelude
import Gargantext.Database.Schema.Node
import Gargantext.Database.Schema.NodeNode
import Gargantext.Prelude
import Opaleye
import qualified Database.PostgreSQL.Simple as PGS
import qualified Opaleye as O
queryNodeNodeTable :: Select NodeNodeRead
queryNodeNodeTable = selectTable nodeNodeTable
......@@ -113,8 +121,113 @@ deleteNodeNode n1 n2 = mkCmd $ \conn ->
)
------------------------------------------------------------------------
selectPublicNodes :: HasDBid NodeType
=> (Hyperdata a, DefaultFromField SqlJsonb a)
-- | Favorite management
_nodeNodeCategory :: CorpusId -> DocId -> Int -> Cmd err [Int]
_nodeNodeCategory cId dId c = map (\(PGS.Only a) -> a) <$> runPGSQuery favQuery (c,cId,dId)
where
favQuery :: PGS.Query
favQuery = [sql|UPDATE nodes_nodes SET category = ?
WHERE node1_id = ? AND node2_id = ?
RETURNING node2_id;
|]
nodeNodesCategory :: [(CorpusId, DocId, Int)] -> Cmd err [Int]
nodeNodesCategory inputData = map (\(PGS.Only a) -> a)
<$> runPGSQuery catQuery (PGS.Only $ Values fields inputData)
where
fields = map (\t-> QualifiedIdentifier Nothing t) ["int4","int4","int4"]
catQuery :: PGS.Query
catQuery = [sql| UPDATE nodes_nodes as nn0
SET category = nn1.category
FROM (?) as nn1(node1_id,node2_id,category)
WHERE nn0.node1_id = nn1.node1_id
AND nn0.node2_id = nn1.node2_id
RETURNING nn1.node2_id
|]
------------------------------------------------------------------------
-- | Score management
_nodeNodeScore :: CorpusId -> DocId -> Int -> Cmd err [Int]
_nodeNodeScore cId dId c = map (\(PGS.Only a) -> a) <$> runPGSQuery scoreQuery (c,cId,dId)
where
scoreQuery :: PGS.Query
scoreQuery = [sql|UPDATE nodes_nodes SET score = ?
WHERE node1_id = ? AND node2_id = ?
RETURNING node2_id;
|]
nodeNodesScore :: [(CorpusId, DocId, Int)] -> Cmd err [Int]
nodeNodesScore inputData = map (\(PGS.Only a) -> a)
<$> runPGSQuery catScore (PGS.Only $ Values fields inputData)
where
fields = map (\t-> QualifiedIdentifier Nothing t) ["int4","int4","int4"]
catScore :: PGS.Query
catScore = [sql| UPDATE nodes_nodes as nn0
SET score = nn1.score
FROM (?) as nn1(node1_id, node2_id, score)
WHERE nn0.node1_id = nn1.node1_id
AND nn0.node2_id = nn1.node2_id
RETURNING nn1.node2_id
|]
------------------------------------------------------------------------
_selectCountDocs :: HasDBid NodeType => CorpusId -> Cmd err Int
_selectCountDocs cId = runCountOpaQuery (queryCountDocs cId)
where
queryCountDocs cId' = proc () -> do
(n, nn) <- joinInCorpus -< ()
restrict -< nn^.nn_node1_id .== (toNullable $ pgNodeId cId')
restrict -< nn^.nn_category .>= (toNullable $ sqlInt4 1)
restrict -< n^.node_typename .== (sqlInt4 $ toDBid NodeDocument)
returnA -< n
-- | TODO use UTCTime fast
selectDocsDates :: HasDBid NodeType => CorpusId -> Cmd err [Text]
selectDocsDates cId = map (head' "selectDocsDates" . splitOn "-")
<$> catMaybes
<$> map (view hd_publication_date)
<$> selectDocs cId
selectDocs :: HasDBid NodeType => CorpusId -> Cmd err [HyperdataDocument]
selectDocs cId = runOpaQuery (queryDocs cId)
queryDocs :: HasDBid NodeType => CorpusId -> O.Select (Column SqlJsonb)
queryDocs cId = proc () -> do
(n, nn) <- joinInCorpus -< ()
restrict -< nn^.nn_node1_id .== (toNullable $ pgNodeId cId)
restrict -< nn^.nn_category .>= (toNullable $ sqlInt4 1)
restrict -< n^.node_typename .== (sqlInt4 $ toDBid NodeDocument)
returnA -< view (node_hyperdata) n
selectDocNodes :: HasDBid NodeType =>CorpusId -> Cmd err [Node HyperdataDocument]
selectDocNodes cId = runOpaQuery (queryDocNodes cId)
queryDocNodes :: HasDBid NodeType =>CorpusId -> O.Select NodeRead
queryDocNodes cId = proc () -> do
(n, nn) <- joinInCorpus -< ()
restrict -< nn^.nn_node1_id .== (toNullable $ pgNodeId cId)
restrict -< nn^.nn_category .>= (toNullable $ sqlInt4 1)
restrict -< n^.node_typename .== (sqlInt4 $ toDBid NodeDocument)
returnA -< n
joinInCorpus :: O.Select (NodeRead, NodeNodeReadNull)
joinInCorpus = leftJoin queryNodeTable queryNodeNodeTable cond
where
cond :: (NodeRead, NodeNodeRead) -> Column SqlBool
cond (n, nn) = nn^.nn_node2_id .== (view node_id n)
_joinOn1 :: O.Select (NodeRead, NodeNodeReadNull)
_joinOn1 = leftJoin queryNodeTable queryNodeNodeTable cond
where
cond :: (NodeRead, NodeNodeRead) -> Column SqlBool
cond (n, nn) = nn^.nn_node1_id .== n^.node_id
------------------------------------------------------------------------
selectPublicNodes :: HasDBid NodeType => (Hyperdata a, DefaultFromField SqlJsonb a)
=> Cmd err [(Node a, Maybe Int)]
selectPublicNodes = runOpaQuery (queryWithType NodeFolderPublic)
......
{-|
Module : Gargantext.Utils.SpacyNLP
Description : John Snow NLP API connexion
Copyright : (c) CNRS, 2017
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Spacy ecosystem: https://github.com/explosion/spaCy
Server to be used: https://gitlab.iscpif.fr/gargantext/spacy-server
-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Utils.SpacyNLP where
import Control.Lens
import Data.Aeson (encode)
import Data.Aeson.TH (deriveJSON)
import Data.Text hiding (map, group, filter, concat, zip)
import Gargantext.Core (Lang(..))
import Gargantext.Core.Text.Terms.Multi.PosTagging.Types
import Gargantext.Core.Types (POS(..), NER(..))
import Gargantext.Core.Utils.Prefix (unPrefix)
import Gargantext.Prelude
import Network.HTTP.Simple (parseRequest, httpJSON, setRequestBodyLBS, getResponseBody, Response)
data SpacyData = SpacyData { _spacy_data :: ![SpacyText]}
deriving (Show)
data SpacyText = SpacyText { _spacy_text :: !Text
, _spacy_tags :: ![SpacyTags]
} deriving (Show)
data SpacyTags =
SpacyTags { _spacyTags_text :: !Text
, _spacyTags_text_with_ws :: !Text
, _spacyTags_whitespace :: !Text
, _spacyTags_head :: !Text
, _spacyTags_head_index :: !Int
, _spacyTags_left_edge :: !Text
, _spacyTags_right_edge :: !Text
, _spacyTags_index :: Int
, _spacyTags_ent_type :: !NER
, _spacyTags_ent_iob :: !Text
, _spacyTags_lemma :: !Text
, _spacyTags_normalized :: !Text
, _spacyTags_shape :: !Text
, _spacyTags_prefix :: !Text
, _spacyTags_suffix :: !Text
, _spacyTags_is_alpha :: Bool
, _spacyTags_is_ascii :: Bool
, _spacyTags_is_digit :: Bool
, _spacyTags_is_title :: Bool
, _spacyTags_is_punct :: Bool
, _spacyTags_is_left_punct :: Bool
, _spacyTags_is_right_punct :: Bool
, _spacyTags_is_space :: Bool
, _spacyTags_is_bracket :: Bool
, _spacyTags_is_quote :: Bool
, _spacyTags_is_currency :: Bool
, _spacyTags_like_url :: Bool
, _spacyTags_like_num :: Bool
, _spacyTags_like_email :: Bool
, _spacyTags_is_oov :: Bool
, _spacyTags_is_stop :: Bool
, _spacyTags_pos :: POS
, _spacyTags_tag :: POS
, _spacyTags_dep :: !Text
, _spacyTags_lang :: !Text
, _spacyTags_prob :: !Int
, _spacyTags_char_offset :: !Int
} deriving (Show)
data SpacyRequest = SpacyRequest { _spacyRequest_text :: !Text }
deriving (Show)
spacyRequest :: Text -> IO SpacyData
spacyRequest txt = do
url <- parseRequest $ unpack "POST http://localhost:8001/pos"
let request = setRequestBodyLBS (encode $ SpacyRequest txt) url
result <- httpJSON request :: IO (Response SpacyData)
pure $ getResponseBody result
-- Instances
deriveJSON (unPrefix "_spacy_") ''SpacyData
deriveJSON (unPrefix "_spacy_") ''SpacyText
deriveJSON (unPrefix "_spacyTags_") ''SpacyTags
deriveJSON (unPrefix "_spacyRequest_") ''SpacyRequest
makeLenses ''SpacyData
makeLenses ''SpacyText
makeLenses ''SpacyTags
makeLenses ''SpacyRequest
----------------------------------------------------------------
spacyTagsToToken :: SpacyTags -> Token
spacyTagsToToken st = Token (st ^. spacyTags_index)
(st ^. spacyTags_normalized)
(st ^. spacyTags_text)
(st ^. spacyTags_lemma)
(st ^. spacyTags_head_index)
(st ^. spacyTags_char_offset)
(Just $ st ^. spacyTags_pos)
(Just $ st ^. spacyTags_ent_type)
(Just $ st ^. spacyTags_prefix)
(Just $ st ^. spacyTags_suffix)
spacyDataToPosSentences :: SpacyData -> PosSentences
spacyDataToPosSentences (SpacyData ds) = PosSentences
$ map (\(i, ts) -> Sentence i ts)
$ zip [1..]
$ map (\(SpacyText _ tags)-> map spacyTagsToToken tags) ds
-----------------------------------------------------------------
nlp :: Lang -> Text -> IO PosSentences
nlp FR txt = spacyDataToPosSentences <$> spacyRequest txt
nlp _ _ = panic "Make sure you have the right model for your lang for spacy Server"
......@@ -72,7 +72,7 @@ extra-deps:
# External Data API connectors
- git: https://gitlab.iscpif.fr/gargantext/crawlers/pubmed.git
commit: 02e03d9b856bd35d391f43da8525330f9d184615
commit: 0b906ccc5a4a1b7532eb47c825dc02484a2d6b0e
- git: https://gitlab.iscpif.fr/gargantext/crawlers/istex.git
commit: a34bb341236d82cf3d488210bc1d8448a98f5808
- git: https://gitlab.iscpif.fr/gargantext/crawlers/hal.git
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment