Commit 2f0c326f authored by Alexandre Delanoë's avatar Alexandre Delanoë

Merge branch 'dev-occ-opt' into dev

parents 5a332bda b4a75fc9
Pipeline #304 failed with stage
......@@ -35,6 +35,7 @@ module Gargantext.API.Ngrams
where
-- import Debug.Trace (trace)
import Control.Exception (Exception)
import Prelude (Enum, Bounded, Semigroup(..), minBound, maxBound {-, round-}, error)
-- import Gargantext.Database.Schema.User (UserId)
import Data.Functor (($>))
......@@ -893,7 +894,7 @@ type MaxSize = Int
-- TODO: polymorphic for Annuaire or Corpus or ...
-- | Table of Ngrams is a ListNgrams formatted (sorted and/or cut).
-- TODO: should take only one ListId
getTableNgrams :: (RepoCmdM env err m, HasNodeError err, HasConnection env)
getTableNgrams :: (RepoCmdM env err m, HasNodeError err, HasConnection env, Exception err)
=> CorpusId -> TabType
-> ListId -> Limit -> Maybe Offset
-> Maybe ListType
......
......@@ -15,6 +15,7 @@ Node API
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE MonoLocalBinds #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
......
......@@ -8,22 +8,24 @@ Stability : experimental
Portability : POSIX
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# OPTIONS_GHC -fno-warn-name-shadowing #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MonoLocalBinds #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.API.Settings
where
import Control.Exception (Exception)
import System.Directory
import System.Log.FastLogger
import GHC.Enum
......@@ -274,7 +276,7 @@ withDevEnv k = do
k env `finally` unlockFile (env ^. repoEnv . renv_lock)
-- | Run Cmd Sugar for the Repl (GHCI)
runCmdRepl :: Show err => Cmd' DevEnv err a -> IO a
runCmdRepl :: (Show err, Exception err) => Cmd' DevEnv err a -> IO a
runCmdRepl f = withDevEnv $ \env -> runCmdDev env f
runCmdReplServantErr :: Cmd' DevEnv ServantErr a -> IO a
......@@ -288,12 +290,14 @@ newDevEnv = newDevEnvWith "gargantext.ini"
-- the command.
-- This function is constrained to the DevEnv rather than
-- using HasConnection and HasRepoVar.
runCmdDev :: Show err => DevEnv -> Cmd' DevEnv err a -> IO a
runCmdDev :: (Show err, Exception err) => DevEnv -> Cmd' DevEnv err a -> IO a
runCmdDev env f =
(either (fail . show) pure =<< runCmd env f)
`finally`
runReaderT saveRepo env
instance Exception ()
-- Use only for dev
runCmdDevNoErr :: DevEnv -> Cmd' DevEnv () a -> IO a
runCmdDevNoErr = runCmdDev
......
......@@ -20,6 +20,7 @@ Portability : POSIX
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE MonoLocalBinds #-}
module Gargantext.Database.Flow -- (flowDatabase, ngrams2list)
where
......
......@@ -11,9 +11,9 @@ Node API
-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE MonoLocalBinds #-}
module Gargantext.Database.Metrics
where
......
......@@ -137,37 +137,77 @@ queryNgramsByNodeUser = [sql|
-- TODO add groups
getOccByNgramsOnly :: CorpusId -> NgramsType -> [Text]
-> Cmd err (Map Text Int)
getOccByNgramsOnly cId nt ngs = Map.map Set.size
<$> getNodesByNgramsOnlyUser cId nt ngs
getOccByNgramsOnly cId nt ngs =
fromListWith (+) <$> selectNgramsOccurrencesOnlyByNodeUser cId nt ngs
-- just slower than getOccByNgramsOnly
getOccByNgramsOnly' :: CorpusId -> NgramsType -> [Text]
-> Cmd err (Map Text Int)
getOccByNgramsOnly' cId nt ngs =
Map.map Set.size <$> getNodesByNgramsOnlyUser cId nt ngs
selectNgramsOccurrencesOnlyByNodeUser :: CorpusId -> NgramsType -> [Text]
-> Cmd err [(Text, Int)]
selectNgramsOccurrencesOnlyByNodeUser cId nt tms =
runPGSQuery queryNgramsOccurrencesOnlyByNodeUser
( Values fields (DPS.Only <$> tms)
, cId
, nodeTypeId NodeDocument
, ngramsTypeId nt
)
where
fields = [QualifiedIdentifier Nothing "text"]
-- same as queryNgramsOnlyByNodeUser but using COUNT on the node ids.
-- Question: with the grouping is the result exactly the same (since Set NodeId for
-- equivalent ngrams intersections are not empty)
queryNgramsOccurrencesOnlyByNodeUser :: DPS.Query
queryNgramsOccurrencesOnlyByNodeUser = [sql|
WITH input_rows(terms) AS (?)
SELECT ng.terms, COUNT(nng.node_id) FROM nodes_ngrams nng
JOIN ngrams ng ON nng.ngrams_id = ng.id
JOIN input_rows ir ON ir.terms = ng.terms
JOIN nodes_nodes nn ON nn.node2_id = nng.node_id
JOIN nodes n ON nn.node2_id = n.id
WHERE nn.node1_id = ? -- CorpusId
AND n.typename = ? -- NodeTypeId
AND nng.ngrams_type = ? -- NgramsTypeId
AND nn.delete = False
GROUP BY nng.node_id, ng.terms
|]
getNodesByNgramsOnlyUser :: CorpusId -> NgramsType -> [Text]
-> Cmd err (Map Text (Set NodeId))
getNodesByNgramsOnlyUser cId nt ngs =
fromListWith (<>) <$> map (\(n,t) -> (t, Set.singleton n))
fromListWith (<>) <$> map (second Set.singleton)
<$> selectNgramsOnlyByNodeUser cId nt ngs
selectNgramsOnlyByNodeUser :: CorpusId -> NgramsType -> [Text]
-> Cmd err [(NodeId, Text)]
-> Cmd err [(Text, NodeId)]
selectNgramsOnlyByNodeUser cId nt tms =
runPGSQuery queryNgramsOnlyByNodeUser (DPS.Only $ Values fields tms' )
runPGSQuery queryNgramsOnlyByNodeUser
( Values fields (DPS.Only <$> tms)
, cId
, nodeTypeId NodeDocument
, ngramsTypeId nt
)
where
fields = map (\t -> QualifiedIdentifier Nothing t) ["text", "int4", "int4", "int4"]
tms' = map (\t -> (t,cId,nodeTypeId NodeDocument, ngramsTypeId nt)) tms
fields = [QualifiedIdentifier Nothing "text"]
queryNgramsOnlyByNodeUser :: DPS.Query
queryNgramsOnlyByNodeUser = [sql|
WITH input_rows(terms,corpus_id,docType,ngramsType) AS (?)
SELECT nng.node_id, ng.terms FROM nodes_ngrams nng
WITH input_rows(terms) AS (?)
SELECT ng.terms, nng.node_id FROM nodes_ngrams nng
JOIN ngrams ng ON nng.ngrams_id = ng.id
JOIN input_rows ir ON ir.terms = ng.terms
JOIN nodes_nodes nn ON nn.node2_id = nng.node_id
JOIN nodes n ON nn.node2_id = n.id
WHERE nn.node1_id = ir.corpus_id -- CorpusId
AND n.typename = ir.docType -- NodeTypeId
AND nng.ngrams_type = ir.ngramsType -- NgramsTypeId
AND nn.delete = False
WHERE nn.node1_id = ? -- CorpusId
AND n.typename = ? -- NodeTypeId
AND nng.ngrams_type = ? -- NgramsTypeId
AND nn.delete = False
GROUP BY nng.node_id, ng.terms
|]
......@@ -215,7 +255,7 @@ SELECT nng.node_id, ng.id, ng.terms FROM nodes_ngrams nng
AND nng.ngrams_type = ? -- NgramsTypeId
GROUP BY nng.node_id, ng.id, ng.terms)
SELECT m.node_id, m.terms FROM nodesByNgramsMaster m
SELECTx m.node_id, m.terms FROM nodesByNgramsMaster m
RIGHT JOIN nodesByNgramsUser u ON u.id = m.id
|]
......
......@@ -20,6 +20,10 @@ commentary with @some markup@.
module Gargantext.Database.Utils where
import Data.ByteString.Char8 (hPutStrLn)
import System.IO (stderr)
import Control.Exception
import Control.Monad.Error.Class -- (MonadError(..), Error)
import Control.Lens (Getter, view)
import Control.Monad.Reader
import Control.Monad.Except
......@@ -52,11 +56,13 @@ type CmdM' env err m =
( MonadReader env m
, MonadError err m
, MonadIO m
, Exception err
)
type CmdM env err m =
( CmdM' env err m
, HasConnection env
, Exception err
)
type Cmd' env err a = forall m. CmdM' env err m => m a
......@@ -69,7 +75,7 @@ mkCmd k = do
conn <- view connection
liftIO $ k conn
runCmd :: HasConnection env => env
runCmd :: (HasConnection env, Exception err) => env
-> Cmd' env err a
-> IO (Either err a)
runCmd env m = runExceptT $ runReaderT m env
......@@ -80,8 +86,20 @@ runOpaQuery q = mkCmd $ \c -> runQuery c q
formatPGSQuery :: PGS.ToRow a => PGS.Query -> a -> Cmd err DB.ByteString
formatPGSQuery q a = mkCmd $ \conn -> PGS.formatQuery conn q a
runPGSQuery :: (PGS.ToRow a, PGS.FromRow b) => PGS.Query -> a -> Cmd err [b]
runPGSQuery q a = mkCmd $ \conn -> PGS.query conn q a
-- TODO use runPGSQueryDebug everywhere
runPGSQuery' :: (PGS.ToRow a, PGS.FromRow b) => PGS.Query -> a -> Cmd err [b]
runPGSQuery' q a = mkCmd $ \conn -> PGS.query conn q a
runPGSQuery :: (MonadError err m, MonadReader env m,
PGS.FromRow r, PGS.ToRow q, MonadIO m, HasConnection env, Exception err)
=> PGS.Query -> q -> m [r]
runPGSQuery q a = mkCmd $ \conn -> catch (PGS.query conn q a) (printError conn)
where
printError c (SomeException e) = do
q' <- (PGS.formatQuery c q a :: IO DB.ByteString)
hPutStrLn stderr q'
throw e
execPGSQuery :: PGS.ToRow a => PGS.Query -> a -> Cmd err Int64
execPGSQuery q a = mkCmd $ \conn -> PGS.execute conn q a
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment