Commit 2f0c326f authored by Alexandre Delanoë's avatar Alexandre Delanoë

Merge branch 'dev-occ-opt' into dev

parents 5a332bda b4a75fc9
Pipeline #304 failed with stage
...@@ -35,6 +35,7 @@ module Gargantext.API.Ngrams ...@@ -35,6 +35,7 @@ module Gargantext.API.Ngrams
where where
-- import Debug.Trace (trace) -- import Debug.Trace (trace)
import Control.Exception (Exception)
import Prelude (Enum, Bounded, Semigroup(..), minBound, maxBound {-, round-}, error) import Prelude (Enum, Bounded, Semigroup(..), minBound, maxBound {-, round-}, error)
-- import Gargantext.Database.Schema.User (UserId) -- import Gargantext.Database.Schema.User (UserId)
import Data.Functor (($>)) import Data.Functor (($>))
...@@ -893,7 +894,7 @@ type MaxSize = Int ...@@ -893,7 +894,7 @@ type MaxSize = Int
-- TODO: polymorphic for Annuaire or Corpus or ... -- TODO: polymorphic for Annuaire or Corpus or ...
-- | Table of Ngrams is a ListNgrams formatted (sorted and/or cut). -- | Table of Ngrams is a ListNgrams formatted (sorted and/or cut).
-- TODO: should take only one ListId -- TODO: should take only one ListId
getTableNgrams :: (RepoCmdM env err m, HasNodeError err, HasConnection env) getTableNgrams :: (RepoCmdM env err m, HasNodeError err, HasConnection env, Exception err)
=> CorpusId -> TabType => CorpusId -> TabType
-> ListId -> Limit -> Maybe Offset -> ListId -> Limit -> Maybe Offset
-> Maybe ListType -> Maybe ListType
......
...@@ -15,6 +15,7 @@ Node API ...@@ -15,6 +15,7 @@ Node API
{-# LANGUAGE DataKinds #-} {-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE MonoLocalBinds #-}
{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-} {-# LANGUAGE RankNTypes #-}
......
...@@ -8,22 +8,24 @@ Stability : experimental ...@@ -8,22 +8,24 @@ Stability : experimental
Portability : POSIX Portability : POSIX
-} -}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# OPTIONS_GHC -fno-warn-name-shadowing #-} {-# OPTIONS_GHC -fno-warn-name-shadowing #-}
{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE DataKinds #-}
{-# LANGUAGE DataKinds #-} {-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE MonoLocalBinds #-}
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE RankNTypes #-}
{-# LANGUAGE RankNTypes #-} {-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.API.Settings module Gargantext.API.Settings
where where
import Control.Exception (Exception)
import System.Directory import System.Directory
import System.Log.FastLogger import System.Log.FastLogger
import GHC.Enum import GHC.Enum
...@@ -274,7 +276,7 @@ withDevEnv k = do ...@@ -274,7 +276,7 @@ withDevEnv k = do
k env `finally` unlockFile (env ^. repoEnv . renv_lock) k env `finally` unlockFile (env ^. repoEnv . renv_lock)
-- | Run Cmd Sugar for the Repl (GHCI) -- | Run Cmd Sugar for the Repl (GHCI)
runCmdRepl :: Show err => Cmd' DevEnv err a -> IO a runCmdRepl :: (Show err, Exception err) => Cmd' DevEnv err a -> IO a
runCmdRepl f = withDevEnv $ \env -> runCmdDev env f runCmdRepl f = withDevEnv $ \env -> runCmdDev env f
runCmdReplServantErr :: Cmd' DevEnv ServantErr a -> IO a runCmdReplServantErr :: Cmd' DevEnv ServantErr a -> IO a
...@@ -288,12 +290,14 @@ newDevEnv = newDevEnvWith "gargantext.ini" ...@@ -288,12 +290,14 @@ newDevEnv = newDevEnvWith "gargantext.ini"
-- the command. -- the command.
-- This function is constrained to the DevEnv rather than -- This function is constrained to the DevEnv rather than
-- using HasConnection and HasRepoVar. -- using HasConnection and HasRepoVar.
runCmdDev :: Show err => DevEnv -> Cmd' DevEnv err a -> IO a runCmdDev :: (Show err, Exception err) => DevEnv -> Cmd' DevEnv err a -> IO a
runCmdDev env f = runCmdDev env f =
(either (fail . show) pure =<< runCmd env f) (either (fail . show) pure =<< runCmd env f)
`finally` `finally`
runReaderT saveRepo env runReaderT saveRepo env
instance Exception ()
-- Use only for dev -- Use only for dev
runCmdDevNoErr :: DevEnv -> Cmd' DevEnv () a -> IO a runCmdDevNoErr :: DevEnv -> Cmd' DevEnv () a -> IO a
runCmdDevNoErr = runCmdDev runCmdDevNoErr = runCmdDev
......
...@@ -20,6 +20,7 @@ Portability : POSIX ...@@ -20,6 +20,7 @@ Portability : POSIX
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-} {-# LANGUAGE RankNTypes #-}
{-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE MonoLocalBinds #-}
module Gargantext.Database.Flow -- (flowDatabase, ngrams2list) module Gargantext.Database.Flow -- (flowDatabase, ngrams2list)
where where
......
...@@ -11,9 +11,9 @@ Node API ...@@ -11,9 +11,9 @@ Node API
-} -}
{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE MonoLocalBinds #-}
module Gargantext.Database.Metrics module Gargantext.Database.Metrics
where where
......
...@@ -137,37 +137,77 @@ queryNgramsByNodeUser = [sql| ...@@ -137,37 +137,77 @@ queryNgramsByNodeUser = [sql|
-- TODO add groups -- TODO add groups
getOccByNgramsOnly :: CorpusId -> NgramsType -> [Text] getOccByNgramsOnly :: CorpusId -> NgramsType -> [Text]
-> Cmd err (Map Text Int) -> Cmd err (Map Text Int)
getOccByNgramsOnly cId nt ngs = Map.map Set.size getOccByNgramsOnly cId nt ngs =
<$> getNodesByNgramsOnlyUser cId nt ngs fromListWith (+) <$> selectNgramsOccurrencesOnlyByNodeUser cId nt ngs
-- just slower than getOccByNgramsOnly
getOccByNgramsOnly' :: CorpusId -> NgramsType -> [Text]
-> Cmd err (Map Text Int)
getOccByNgramsOnly' cId nt ngs =
Map.map Set.size <$> getNodesByNgramsOnlyUser cId nt ngs
selectNgramsOccurrencesOnlyByNodeUser :: CorpusId -> NgramsType -> [Text]
-> Cmd err [(Text, Int)]
selectNgramsOccurrencesOnlyByNodeUser cId nt tms =
runPGSQuery queryNgramsOccurrencesOnlyByNodeUser
( Values fields (DPS.Only <$> tms)
, cId
, nodeTypeId NodeDocument
, ngramsTypeId nt
)
where
fields = [QualifiedIdentifier Nothing "text"]
-- same as queryNgramsOnlyByNodeUser but using COUNT on the node ids.
-- Question: with the grouping is the result exactly the same (since Set NodeId for
-- equivalent ngrams intersections are not empty)
queryNgramsOccurrencesOnlyByNodeUser :: DPS.Query
queryNgramsOccurrencesOnlyByNodeUser = [sql|
WITH input_rows(terms) AS (?)
SELECT ng.terms, COUNT(nng.node_id) FROM nodes_ngrams nng
JOIN ngrams ng ON nng.ngrams_id = ng.id
JOIN input_rows ir ON ir.terms = ng.terms
JOIN nodes_nodes nn ON nn.node2_id = nng.node_id
JOIN nodes n ON nn.node2_id = n.id
WHERE nn.node1_id = ? -- CorpusId
AND n.typename = ? -- NodeTypeId
AND nng.ngrams_type = ? -- NgramsTypeId
AND nn.delete = False
GROUP BY nng.node_id, ng.terms
|]
getNodesByNgramsOnlyUser :: CorpusId -> NgramsType -> [Text] getNodesByNgramsOnlyUser :: CorpusId -> NgramsType -> [Text]
-> Cmd err (Map Text (Set NodeId)) -> Cmd err (Map Text (Set NodeId))
getNodesByNgramsOnlyUser cId nt ngs = getNodesByNgramsOnlyUser cId nt ngs =
fromListWith (<>) <$> map (\(n,t) -> (t, Set.singleton n)) fromListWith (<>) <$> map (second Set.singleton)
<$> selectNgramsOnlyByNodeUser cId nt ngs <$> selectNgramsOnlyByNodeUser cId nt ngs
selectNgramsOnlyByNodeUser :: CorpusId -> NgramsType -> [Text] selectNgramsOnlyByNodeUser :: CorpusId -> NgramsType -> [Text]
-> Cmd err [(NodeId, Text)] -> Cmd err [(Text, NodeId)]
selectNgramsOnlyByNodeUser cId nt tms = selectNgramsOnlyByNodeUser cId nt tms =
runPGSQuery queryNgramsOnlyByNodeUser (DPS.Only $ Values fields tms' ) runPGSQuery queryNgramsOnlyByNodeUser
( Values fields (DPS.Only <$> tms)
, cId
, nodeTypeId NodeDocument
, ngramsTypeId nt
)
where where
fields = map (\t -> QualifiedIdentifier Nothing t) ["text", "int4", "int4", "int4"] fields = [QualifiedIdentifier Nothing "text"]
tms' = map (\t -> (t,cId,nodeTypeId NodeDocument, ngramsTypeId nt)) tms
queryNgramsOnlyByNodeUser :: DPS.Query queryNgramsOnlyByNodeUser :: DPS.Query
queryNgramsOnlyByNodeUser = [sql| queryNgramsOnlyByNodeUser = [sql|
WITH input_rows(terms,corpus_id,docType,ngramsType) AS (?) WITH input_rows(terms) AS (?)
SELECT nng.node_id, ng.terms FROM nodes_ngrams nng SELECT ng.terms, nng.node_id FROM nodes_ngrams nng
JOIN ngrams ng ON nng.ngrams_id = ng.id JOIN ngrams ng ON nng.ngrams_id = ng.id
JOIN input_rows ir ON ir.terms = ng.terms JOIN input_rows ir ON ir.terms = ng.terms
JOIN nodes_nodes nn ON nn.node2_id = nng.node_id JOIN nodes_nodes nn ON nn.node2_id = nng.node_id
JOIN nodes n ON nn.node2_id = n.id JOIN nodes n ON nn.node2_id = n.id
WHERE nn.node1_id = ir.corpus_id -- CorpusId WHERE nn.node1_id = ? -- CorpusId
AND n.typename = ir.docType -- NodeTypeId AND n.typename = ? -- NodeTypeId
AND nng.ngrams_type = ir.ngramsType -- NgramsTypeId AND nng.ngrams_type = ? -- NgramsTypeId
AND nn.delete = False AND nn.delete = False
GROUP BY nng.node_id, ng.terms GROUP BY nng.node_id, ng.terms
|] |]
...@@ -215,7 +255,7 @@ SELECT nng.node_id, ng.id, ng.terms FROM nodes_ngrams nng ...@@ -215,7 +255,7 @@ SELECT nng.node_id, ng.id, ng.terms FROM nodes_ngrams nng
AND nng.ngrams_type = ? -- NgramsTypeId AND nng.ngrams_type = ? -- NgramsTypeId
GROUP BY nng.node_id, ng.id, ng.terms) GROUP BY nng.node_id, ng.id, ng.terms)
SELECT m.node_id, m.terms FROM nodesByNgramsMaster m SELECTx m.node_id, m.terms FROM nodesByNgramsMaster m
RIGHT JOIN nodesByNgramsUser u ON u.id = m.id RIGHT JOIN nodesByNgramsUser u ON u.id = m.id
|] |]
......
...@@ -20,6 +20,10 @@ commentary with @some markup@. ...@@ -20,6 +20,10 @@ commentary with @some markup@.
module Gargantext.Database.Utils where module Gargantext.Database.Utils where
import Data.ByteString.Char8 (hPutStrLn)
import System.IO (stderr)
import Control.Exception
import Control.Monad.Error.Class -- (MonadError(..), Error)
import Control.Lens (Getter, view) import Control.Lens (Getter, view)
import Control.Monad.Reader import Control.Monad.Reader
import Control.Monad.Except import Control.Monad.Except
...@@ -52,11 +56,13 @@ type CmdM' env err m = ...@@ -52,11 +56,13 @@ type CmdM' env err m =
( MonadReader env m ( MonadReader env m
, MonadError err m , MonadError err m
, MonadIO m , MonadIO m
, Exception err
) )
type CmdM env err m = type CmdM env err m =
( CmdM' env err m ( CmdM' env err m
, HasConnection env , HasConnection env
, Exception err
) )
type Cmd' env err a = forall m. CmdM' env err m => m a type Cmd' env err a = forall m. CmdM' env err m => m a
...@@ -69,7 +75,7 @@ mkCmd k = do ...@@ -69,7 +75,7 @@ mkCmd k = do
conn <- view connection conn <- view connection
liftIO $ k conn liftIO $ k conn
runCmd :: HasConnection env => env runCmd :: (HasConnection env, Exception err) => env
-> Cmd' env err a -> Cmd' env err a
-> IO (Either err a) -> IO (Either err a)
runCmd env m = runExceptT $ runReaderT m env runCmd env m = runExceptT $ runReaderT m env
...@@ -80,8 +86,20 @@ runOpaQuery q = mkCmd $ \c -> runQuery c q ...@@ -80,8 +86,20 @@ runOpaQuery q = mkCmd $ \c -> runQuery c q
formatPGSQuery :: PGS.ToRow a => PGS.Query -> a -> Cmd err DB.ByteString formatPGSQuery :: PGS.ToRow a => PGS.Query -> a -> Cmd err DB.ByteString
formatPGSQuery q a = mkCmd $ \conn -> PGS.formatQuery conn q a formatPGSQuery q a = mkCmd $ \conn -> PGS.formatQuery conn q a
runPGSQuery :: (PGS.ToRow a, PGS.FromRow b) => PGS.Query -> a -> Cmd err [b] -- TODO use runPGSQueryDebug everywhere
runPGSQuery q a = mkCmd $ \conn -> PGS.query conn q a runPGSQuery' :: (PGS.ToRow a, PGS.FromRow b) => PGS.Query -> a -> Cmd err [b]
runPGSQuery' q a = mkCmd $ \conn -> PGS.query conn q a
runPGSQuery :: (MonadError err m, MonadReader env m,
PGS.FromRow r, PGS.ToRow q, MonadIO m, HasConnection env, Exception err)
=> PGS.Query -> q -> m [r]
runPGSQuery q a = mkCmd $ \conn -> catch (PGS.query conn q a) (printError conn)
where
printError c (SomeException e) = do
q' <- (PGS.formatQuery c q a :: IO DB.ByteString)
hPutStrLn stderr q'
throw e
execPGSQuery :: PGS.ToRow a => PGS.Query -> a -> Cmd err Int64 execPGSQuery :: PGS.ToRow a => PGS.Query -> a -> Cmd err Int64
execPGSQuery q a = mkCmd $ \conn -> PGS.execute conn q a execPGSQuery q a = mkCmd $ \conn -> PGS.execute conn q a
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment