Commit 261af659 authored by Przemyslaw Kaminski's avatar Przemyslaw Kaminski

Merge branch 'dev' into 104-dev-john-snow-nlp

parents 0d2c688f 92316028
Pipeline #2474 failed with stage
in 0 seconds
## Version 0.0.5.5.7
* [FRONT][FIX] NgramsTable Cache search.
## Version 0.0.5.5.6
* [BACK][FIX] ./bin/psql gargantext.ini < devops/posgres/upgrade/0.0.5.5.6.sql
* [FRONT] fix NodeType list show (Nodes options)
## Version 0.0.5.5.5
* [FORNT] fix Graph Explorer search ngrams
* [FRONT] fix NodeType list show (main Nodes)
## Version 0.0.5.5.4
* [BACK][OPTIM] NgramsTable scores
* [BACK] bin/client script to analyze backend performance and reproduce bugs
* [FRONT] Adding Language selection
## Version 0.0.5.5.3
* [BACK] Adding a Max limit for others lists.
## Version 0.0.5.5.2
* [BACK][OPTIM] Index on node_node_ngrams to seed up ngrams table score
queries. Please execute the upgrade SQL script
devops/postgres/0.0.5.5.2.sql
## Version 0.0.5.5.1
* [BACK] FIX Graph Explorer search with selected ngrams
* [FRONT] Clean CSS
## Version 0.0.5.5
* [FRONT] Visio frame removed, using a new tab instead (which is working)
* [BACK] Scores on the docs view fixed
## Version 0.0.5.3
* [FRONT] SSL local option
## Version 0.0.5.2
* [QUAL] Scores in Ngrams Table fixed during workflow and user can
refresh it if needed.
## Version 0.0.5.1
* [OPTIM] Upgrade fix with indexes and scores counts
## Version 0.0.5 ## Version 0.0.5
* [OPTIM][DATABASE] Upgrade Schema, move conTexts in contexts table which requires a version bump. * [OPTIM][DATABASE] Upgrade Schema, move conTexts in contexts table which requires a version bump.
......
module Auth where
import Core
import Options
import Control.Monad.IO.Class
import Data.Text.Encoding (encodeUtf8)
import Options.Generic
import Servant.Client
import qualified Servant.Auth.Client as SA
import Gargantext.API.Client
import qualified Gargantext.API.Admin.Auth.Types as Auth
import qualified Gargantext.Core.Types.Individu as Auth
import qualified Gargantext.Database.Admin.Types.Node as Node
-- | Authenticate and use the resulting Token to perform
-- auth-restricted actions
withAuthToken
:: ClientOpts -- ^ source of user/pass data
-> (SA.Token -> Node.NodeId -> ClientM a) -- ^ do something once authenticated
-> ClientM a
withAuthToken opts act
-- both user and password CLI arguments passed
| Helpful (Just usr) <- user opts
, Helpful (Just pw) <- pass opts = do
authRes <- postAuth (Auth.AuthRequest usr (Auth.GargPassword pw))
case Auth._authRes_valid authRes of
-- authentication failed, this function critically needs it to
-- be able to run the action, so we abort
Nothing -> problem $
"invalid auth response: " ++
maybe "" (show . Auth._authInv_message)
(Auth._authRes_inval authRes)
-- authentication went through, we can run the action
Just (Auth.AuthValid tok tree_id) -> do
let tok' = SA.Token (encodeUtf8 tok)
whenVerbose opts $ do
liftIO . putStrLn $ "[Debug] Authenticated: token=" ++ show tok ++
", tree_id=" ++ show tree_id
act tok' tree_id
-- user and/or pass CLI arguments not passed
| otherwise =
problem "auth-protected actions require --user and --pass"
module Core (problem, whenVerbose) where
import Options
import Options.Generic
import Control.Exception
import Control.Monad
import Control.Monad.Catch
import Servant.Client
newtype GargClientException = GCE String
instance Show GargClientException where
show (GCE s) = "Garg client exception: " ++ s
instance Exception GargClientException
-- | Abort with a message
problem :: String -> ClientM a
problem = throwM . GCE
-- | Only run the given computation when the @--verbose@ flag is
-- passed.
whenVerbose :: Monad m => ClientOpts -> m () -> m ()
whenVerbose opts act = when (unHelpful $ verbose opts) act
module Main where
import Control.Monad
import Network.HTTP.Client
import Options.Generic
import Servant.Client
import Options
import Script (script)
main :: IO ()
main = do
-- we parse CLI options
opts@(ClientOpts (Helpful uri) _ _ (Helpful verb)) <- getRecord "Gargantext client"
mgr <- newManager defaultManagerSettings
burl <- parseBaseUrl uri
when verb $ do
putStrLn $ "[Debug] user: " ++ maybe "<none>" show (unHelpful $ user opts)
putStrLn $ "[Debug] backend: " ++ show burl
-- we run 'script' from the Script module, reporting potential errors
res <- runClientM (script opts) (mkClientEnv mgr burl)
case res of
Left err -> putStrLn $ "[Client error] " ++ show err
Right a -> print a
{-# LANGUAGE TypeOperators #-}
module Options where
import Options.Generic
-- | Some general options to be specified on the command line.
data ClientOpts = ClientOpts
{ url :: String <?> "URL to gargantext backend"
, user :: Maybe Text <?> "(optional) username for auth-restricted actions"
, pass :: Maybe Text <?> "(optional) password for auth-restricted actions"
, verbose :: Bool <?> "Enable verbose output"
} deriving (Generic, Show)
instance ParseRecord ClientOpts
module Script (script) where
import Control.Monad.IO.Class
import Gargantext.API.Client
import Servant.Client
import Auth
import Core
import Options
import Tracking
-- | An example script. Tweak, rebuild and re-run the executable to see the
-- effect of your changes. You can hit any gargantext endpoint in the body
-- of 'script' using the many (many!) client functions exposed by the
-- 'Gargantext.API.Client' module.
--
-- Don't forget to pass @--user@ and @--pass@ if you're using 'withAuthToken'.
script :: ClientOpts -> ClientM ()
script opts = do
-- we start by asking the backend for its version
ver <- getBackendVersion
liftIO . putStrLn $ "Backend version: " ++ show ver
-- next we authenticate using the credentials given on the command line
-- (through --user and --pass), erroring out loudly if the auth creds don't
-- go through, running the continuation otherwise.
withAuthToken opts $ \tok userNode -> do
liftIO . putStrLn $ "user node: " ++ show userNode
steps <-
-- we run a few client computations while tracking some EKG metrics
-- (any RTS stats or routing-related data), which means that we sample the
-- metrics at the beginning, the end, and in between each pair of steps.
tracking opts ["rts.gc.bytes_allocated"]
[ ("get roots", do
roots <- getRoots tok
liftIO . putStrLn $ "roots: " ++ show roots
)
, ("get user node detail", do
userNodeDetail <- getNode tok userNode
liftIO . putStrLn $ "user node details: " ++ show userNodeDetail
)
]
-- we pretty print the values we sampled for all metrics and the
-- results of all the steps
whenVerbose opts (ppTracked steps)
{-# LANGUAGE TupleSections #-}
module Tracking
( tracking
, ppTracked
, EkgMetric
, Step
) where
import Core
import Options
import Control.Monad.IO.Class
import Data.List (intersperse)
import Data.Text (Text)
import Servant.Client
import System.Metrics.Json (Value)
import Gargantext.API.Client
import qualified Data.Text as T
-- | e.g @["rts", "gc", "bytes_allocated"]@
type EkgMetric = [Text]
-- | Any textual description of a step
type Step = Text
-- | Track EKG metrics before/after running a bunch of computations
-- that can talk to the backend.
tracking
:: ClientOpts
-> [Text] -- ^ e.g @["rts.gc.bytes_allocated"]@
-> [(Step, ClientM a)]
-> ClientM [Either [(EkgMetric, Value)] (Step, a)]
-- no steps, nothing to do
tracking _ _ [] = return []
-- no metrics to track, we just run the steps
tracking _ [] steps = traverse runStep steps
-- metrics to track: we intersperse metric fetching and steps,
-- starting and ending with metric fetching
tracking opts ms' steps = mix (Left <$> fetchMetrics) (map runStep steps)
where fetchMetrics :: ClientM [(EkgMetric, Value)]
fetchMetrics = flip traverse ms $ \metric -> do
whenVerbose opts $
liftIO . putStrLn $ "[Debug] metric to track: " ++ T.unpack (T.intercalate "." metric)
dat <- (metric,) <$> getMetricSample metric
whenVerbose opts $
liftIO . putStrLn $ "[Debug] metric pulled: " ++ show dat
return dat
mix :: ClientM a -> [ClientM a] -> ClientM [a]
mix x xs = sequence $ [x] ++ intersperse x xs ++ [x]
ms = map (T.splitOn ".") ms'
-- ^ A trivial function to print results of steps and sampled metrics
ppTracked :: Show a => [Either [(EkgMetric, Value)] (Step, a)] -> ClientM ()
ppTracked [] = return ()
ppTracked (Right (step, a) : rest) = do
liftIO . putStrLn $ "[step: " ++ T.unpack step ++ "] returned: " ++ show a
ppTracked rest
ppTracked (Left ms : rest) = do
liftIO . putStrLn $ unlines
[ T.unpack (T.intercalate "." metric) ++ " = " ++ show val
| (metric, val) <- ms
]
ppTracked rest
runStep :: (Step, ClientM a) -> ClientM (Either e (Step, a))
runStep (step, act) = Right . (step,) <$> act
...@@ -252,6 +252,35 @@ sqlSchema = do ...@@ -252,6 +252,35 @@ sqlSchema = do
DROP TRIGGER if EXISTS trigger_insert_count ON nodes_nodes; DROP TRIGGER if EXISTS trigger_insert_count ON nodes_nodes;
-- Indexes needed to speed up the deletes
-- Trigger for constraint node_ngrams_node_id_fkey
CREATE INDEX IF NOT EXISTS node_ngrams_node_id_idx ON public.node_ngrams USING btree (node_id);
-- Trigger for constraint node_node_ngrams2_node_id_fkey
CREATE INDEX IF NOT EXISTS node_node_ngrams2_node_id_idx ON public.node_node_ngrams2 USING btree (node_id);
-- Trigger for constraint node_node_ngrams_node1_id_fkey
CREATE INDEX IF NOT EXISTS node_node_ngrams_node1_id_idx ON public.node_node_ngrams USING btree (node1_id);
-- Trigger for constraint node_node_ngrams_node2_id_fkey
CREATE INDEX IF NOT EXISTS node_node_ngrams_node2_id_idx ON public.node_node_ngrams USING btree (node2_id);
-- Trigger for constraint nodes_nodes_node1_id_fkey
CREATE INDEX IF NOT EXISTS nodes_nodes_node1_id_idx ON public.nodes_nodes USING btree (node1_id);
-- Trigger for constraint nodes_nodes_node2_id_fkey
CREATE INDEX IF NOT EXISTS nodes_nodes_node2_id_idx ON public.nodes_nodes USING btree (node2_id);
-- Trigger for constraint nodes_parent_id_fkey
CREATE INDEX IF NOT EXISTS nodes_parent_id_idx ON public.nodes USING btree (parent_id);
-- Trigger for constraint rights_node_id_fkey
CREATE INDEX IF NOT EXISTS rights_node_id_idx ON public.rights USING btree (node_id);
-- Trigger for constraint nodes_contexts_node_id_fkey
CREATE INDEX IF NOT EXISTS nodes_contexts_node_id_idx ON public.nodes_contexts USING btree (node_id);
-- Trigger for constraint context_node_ngrams_node_id_fkey
CREATE INDEX IF NOT EXISTS context_node_node_id_idx ON public.context_node_ngrams USING btree (node_id);
|] |]
......
packages: . packages: .
-- ../servant-job
-- ../ekg-json allow-newer: base, accelerate, servant, time, classy-prelude
-- ../../../code/servant/servant
-- ../../../code/servant/servant-server
-- ../../../code/servant/servant-client-core
-- ../../../code/servant/servant-client
-- ../../../code/servant/servant-auth/servant-auth
-- ../../../code/servant/servant-auth/servant-auth-client
-- ../../../code/servant/servant-auth/servant-auth-server
allow-newer: base, accelerate, servant, time
-- Patches -- Patches
source-repository-package source-repository-package
...@@ -20,7 +11,7 @@ source-repository-package ...@@ -20,7 +11,7 @@ source-repository-package
source-repository-package source-repository-package
type: git type: git
location: https://github.com/alpmestan/ekg-json.git location: https://github.com/alpmestan/ekg-json.git
tag: c7bde4851a7cd41b3f3debf0c57f11bbcb11d698 tag: fd7e5d7325939103cd87d0dc592faf644160341c
source-repository-package source-repository-package
type: git type: git
...@@ -53,7 +44,7 @@ source-repository-package ...@@ -53,7 +44,7 @@ source-repository-package
source-repository-package source-repository-package
type: git type: git
location: https://gitlab.iscpif.fr/gargantext/crawlers/pubmed.git location: https://gitlab.iscpif.fr/gargantext/crawlers/pubmed.git
tag: a9d8e08a7ef82f90e29dfaced4071704a3163394 tag: 9cdba6423decad5acfacb0f274212fd8723ce734
source-repository-package source-repository-package
type: git type: git
...@@ -112,7 +103,7 @@ source-repository-package ...@@ -112,7 +103,7 @@ source-repository-package
source-repository-package source-repository-package
type: git type: git
location: https://github.com/delanoe/haskell-opaleye.git location: https://github.com/delanoe/haskell-opaleye.git
tag: d3ab7acd5ede737478763630035aa880f7e34444 tag: 756cb90f4ce725463d957bc899d764e0ed73738c
source-repository-package source-repository-package
type: git type: git
...@@ -146,4 +137,5 @@ source-repository-package ...@@ -146,4 +137,5 @@ source-repository-package
constraints: unordered-containers==0.2.14.*, constraints: unordered-containers==0.2.14.*,
servant-ekg==0.3.1, servant-ekg==0.3.1,
time==1.9.3 time==1.9.3,
stm==2.5.0.1
#!/bin/bash #!/bin/bash
# sudo su postgres # sudo su postgres
# postgresql://$USER:$PW@localhost/$DB # postgresql://$USER:$PW@localhost/$DB
PW="C8kdcUrAQy66U" INIFILE=$1
DB="gargandb1"
USER="gargantua"
#psql -c "CREATE USER \"${USER}\"" getter () {
#psql -c "ALTER USER \"${USER}\" with PASSWORD '${PW}'" grep $1 $INIFILE | sed "s/^.*= //"
}
psql -c "DROP DATABASE IF EXISTS \"${DB}\"" USER=$(getter "DB_USER")
createdb "${DB}" NAME=$(getter "DB_NAME")
#psql "${DB}" < schema.sql PASS=$(getter "DB_PASS")
HOST=$(getter "DB_HOST")
PORT=$(getter "DB_PORT")
../../bin/psql ../../gargantext.ini < gargandb.dump
psql -c "ALTER DATABASE \"${DB}\" OWNER to \"${USER}\"" #psql -c "CREATE USER \"${USER}\""
#psql -c "ALTER USER \"${USER}\" with PASSWORD '${PW}'"
psql -c "DROP DATABASE IF EXISTS \"${NAME}\""
createdb "${NAME}"
psql "${NAME}" < extensions.sql
#psql "${NAME}" < schema.sql
#../../bin/psql ../../gargantext.ini < gargandb.dump
psql -c "ALTER DATABASE \"${NAME}\" OWNER to \"${USER}\""
CREATE EXTENSION IF NOT EXISTS pgcrypto;
CREATE EXTENSION IF NOT EXISTS tsm_system_rows;
CREATE INDEX ON public.node_node_ngrams USING btree (node1_id, node2_id, ngrams_type);
drop trigger trigger_count_insert on node_node_ngrams ;
name: gargantext name: gargantext
version: '0.0.5' version: '0.0.5.5.7'
synopsis: Search, map, share synopsis: Search, map, share
description: Please see README.md description: Please see README.md
category: Data category: Data
......
...@@ -106,7 +106,7 @@ repoSnapshot repoDir = repoDir <> "/repo.cbor" ...@@ -106,7 +106,7 @@ repoSnapshot repoDir = repoDir <> "/repo.cbor"
repoSaverAction :: RepoDirFilePath -> Serialise a => a -> IO () repoSaverAction :: RepoDirFilePath -> Serialise a => a -> IO ()
repoSaverAction repoDir a = do repoSaverAction repoDir a = do
withTempFile repoDir "tmp-repo.cbor" $ \fp h -> do withTempFile repoDir "tmp-repo.cbor" $ \fp h -> do
printDebug "repoSaverAction" fp -- printDebug "repoSaverAction" fp
L.hPut h $ serialise a L.hPut h $ serialise a
hClose h hClose h
renameFile fp (repoSnapshot repoDir) renameFile fp (repoSnapshot repoDir)
......
{-# OPTIONS_GHC -freduction-depth=0 #-} {-# OPTIONS_GHC -freduction-depth=0 #-}
{-# OPTIONS_GHC -O0 #-} {-# OPTIONS_GHC -O0 #-}
module Gargantext.API.Client where module Gargantext.API.Client where
...@@ -55,17 +55,18 @@ import Servant.Job.Core ...@@ -55,17 +55,18 @@ import Servant.Job.Core
import Servant.Job.Types import Servant.Job.Types
import System.Metrics.Json (Sample, Value) import System.Metrics.Json (Sample, Value)
-- * actual client functions for individual endpoints -- * version API
getBackendVersion :: ClientM Text getBackendVersion :: ClientM Text
-- * auth API
postAuth :: AuthRequest -> ClientM AuthResponse postAuth :: AuthRequest -> ClientM AuthResponse
-- * admin api
getRoots :: Token -> ClientM [Node HyperdataUser] getRoots :: Token -> ClientM [Node HyperdataUser]
putRoots :: Token -> ClientM Int -- not actually implemented in the backend putRoots :: Token -> ClientM Int -- not actually implemented in the backend
deleteNodes :: Token -> [NodeId] -> ClientM Int deleteNodes :: Token -> [NodeId] -> ClientM Int
-- * node api
getNode :: Token -> NodeId -> ClientM (Node HyperdataAny) getNode :: Token -> NodeId -> ClientM (Node HyperdataAny)
getContext :: Token -> ContextId -> ClientM (Node HyperdataAny) getContext :: Token -> ContextId -> ClientM (Node HyperdataAny)
renameNode :: Token -> NodeId -> RenameNode -> ClientM [Int] renameNode :: Token -> NodeId -> RenameNode -> ClientM [Int]
...@@ -155,7 +156,7 @@ killNodeDocumentUploadAsyncJob :: Token -> NodeId -> JobID 'Unsafe -> Maybe Limi ...@@ -155,7 +156,7 @@ killNodeDocumentUploadAsyncJob :: Token -> NodeId -> JobID 'Unsafe -> Maybe Limi
pollNodeDocumentUploadAsyncJob :: Token -> NodeId -> JobID 'Unsafe -> Maybe Limit -> Maybe Offset -> ClientM (JobStatus 'Safe JobLog) pollNodeDocumentUploadAsyncJob :: Token -> NodeId -> JobID 'Unsafe -> Maybe Limit -> Maybe Offset -> ClientM (JobStatus 'Safe JobLog)
waitNodeDocumentUploadAsyncJob :: Token -> NodeId -> JobID 'Unsafe -> ClientM (JobOutput JobLog) waitNodeDocumentUploadAsyncJob :: Token -> NodeId -> JobID 'Unsafe -> ClientM (JobOutput JobLog)
-- corpus api -- * corpus api
getCorpus :: Token -> CorpusId -> ClientM (Node HyperdataCorpus) getCorpus :: Token -> CorpusId -> ClientM (Node HyperdataCorpus)
renameCorpus :: Token -> CorpusId -> RenameNode -> ClientM [Int] renameCorpus :: Token -> CorpusId -> RenameNode -> ClientM [Int]
postCorpus :: Token -> CorpusId -> PostNode -> ClientM [CorpusId] postCorpus :: Token -> CorpusId -> PostNode -> ClientM [CorpusId]
...@@ -244,13 +245,13 @@ killCorpusDocumentUploadAsyncJob :: Token -> CorpusId -> JobID 'Unsafe -> Maybe ...@@ -244,13 +245,13 @@ killCorpusDocumentUploadAsyncJob :: Token -> CorpusId -> JobID 'Unsafe -> Maybe
pollCorpusDocumentUploadAsyncJob :: Token -> CorpusId -> JobID 'Unsafe -> Maybe Limit -> Maybe Offset -> ClientM (JobStatus 'Safe JobLog) pollCorpusDocumentUploadAsyncJob :: Token -> CorpusId -> JobID 'Unsafe -> Maybe Limit -> Maybe Offset -> ClientM (JobStatus 'Safe JobLog)
waitCorpusDocumentUploadAsyncJob :: Token -> CorpusId -> JobID 'Unsafe -> ClientM (JobOutput JobLog) waitCorpusDocumentUploadAsyncJob :: Token -> CorpusId -> JobID 'Unsafe -> ClientM (JobOutput JobLog)
-- corpus node/node API -- * corpus node/node API
getCorpusNodeNode :: Token -> NodeId -> NodeId -> ClientM (Node HyperdataAny) getCorpusNodeNode :: Token -> NodeId -> NodeId -> ClientM (Node HyperdataAny)
-- corpus export API -- * corpus export API
getCorpusExport :: Token -> CorpusId -> Maybe ListId -> Maybe NgramsType -> ClientM Corpus getCorpusExport :: Token -> CorpusId -> Maybe ListId -> Maybe NgramsType -> ClientM Corpus
-- * annuaire api
getAnnuaire :: Token -> AnnuaireId -> ClientM (Node HyperdataAnnuaire) getAnnuaire :: Token -> AnnuaireId -> ClientM (Node HyperdataAnnuaire)
renameAnnuaire :: Token -> AnnuaireId -> RenameNode -> ClientM [Int] renameAnnuaire :: Token -> AnnuaireId -> RenameNode -> ClientM [Int]
postAnnuaire :: Token -> AnnuaireId -> PostNode -> ClientM [AnnuaireId] postAnnuaire :: Token -> AnnuaireId -> PostNode -> ClientM [AnnuaireId]
...@@ -338,17 +339,17 @@ killAnnuaireDocumentUploadAsyncJob :: Token -> AnnuaireId -> JobID 'Unsafe -> Ma ...@@ -338,17 +339,17 @@ killAnnuaireDocumentUploadAsyncJob :: Token -> AnnuaireId -> JobID 'Unsafe -> Ma
pollAnnuaireDocumentUploadAsyncJob :: Token -> AnnuaireId -> JobID 'Unsafe -> Maybe Limit -> Maybe Offset -> ClientM (JobStatus 'Safe JobLog) pollAnnuaireDocumentUploadAsyncJob :: Token -> AnnuaireId -> JobID 'Unsafe -> Maybe Limit -> Maybe Offset -> ClientM (JobStatus 'Safe JobLog)
waitAnnuaireDocumentUploadAsyncJob :: Token -> AnnuaireId -> JobID 'Unsafe -> ClientM (JobOutput JobLog) waitAnnuaireDocumentUploadAsyncJob :: Token -> AnnuaireId -> JobID 'Unsafe -> ClientM (JobOutput JobLog)
-- contact api -- * contact api
postAnnuaireContactAsync :: Token -> AnnuaireId -> ClientM (JobStatus 'Safe JobLog) postAnnuaireContactAsync :: Token -> AnnuaireId -> ClientM (JobStatus 'Safe JobLog)
postAnnuaireContactAsyncJob :: Token -> AnnuaireId -> JobInput Maybe AddContactParams -> ClientM (JobStatus 'Safe JobLog) postAnnuaireContactAsyncJob :: Token -> AnnuaireId -> JobInput Maybe AddContactParams -> ClientM (JobStatus 'Safe JobLog)
killAnnuaireContactAsyncJob :: Token -> AnnuaireId -> JobID 'Unsafe -> Maybe Limit -> Maybe Offset -> ClientM (JobStatus 'Safe JobLog) killAnnuaireContactAsyncJob :: Token -> AnnuaireId -> JobID 'Unsafe -> Maybe Limit -> Maybe Offset -> ClientM (JobStatus 'Safe JobLog)
pollAnnuaireContactAsyncJob :: Token -> AnnuaireId -> JobID 'Unsafe -> Maybe Limit -> Maybe Offset -> ClientM (JobStatus 'Safe JobLog) pollAnnuaireContactAsyncJob :: Token -> AnnuaireId -> JobID 'Unsafe -> Maybe Limit -> Maybe Offset -> ClientM (JobStatus 'Safe JobLog)
waitAnnuaireContactAsyncJob :: Token -> AnnuaireId -> JobID 'Unsafe -> ClientM (JobOutput JobLog) waitAnnuaireContactAsyncJob :: Token -> AnnuaireId -> JobID 'Unsafe -> ClientM (JobOutput JobLog)
-- contact node/node API -- * contact node/node API
getAnnuaireContactNodeNode :: Token -> NodeId -> NodeId -> ClientM (Node HyperdataContact) getAnnuaireContactNodeNode :: Token -> NodeId -> NodeId -> ClientM (Node HyperdataContact)
-- document ngrams api -- * document ngrams api
getDocumentNgramsTable :: Token -> DocId -> TabType -> ListId -> Int -> Maybe Int -> Maybe ListType -> Maybe MinSize -> Maybe MaxSize -> Maybe Ngrams.OrderBy -> Maybe Text -> ClientM (VersionedWithCount NgramsTable) getDocumentNgramsTable :: Token -> DocId -> TabType -> ListId -> Int -> Maybe Int -> Maybe ListType -> Maybe MinSize -> Maybe MaxSize -> Maybe Ngrams.OrderBy -> Maybe Text -> ClientM (VersionedWithCount NgramsTable)
putDocumentNgramsTable :: Token -> DocId -> TabType -> ListId -> Versioned NgramsTablePatch -> ClientM (Versioned NgramsTablePatch) putDocumentNgramsTable :: Token -> DocId -> TabType -> ListId -> Versioned NgramsTablePatch -> ClientM (Versioned NgramsTablePatch)
postRecomputeDocumentNgramsTableScore :: Token -> DocId -> TabType -> ListId -> ClientM Int postRecomputeDocumentNgramsTableScore :: Token -> DocId -> TabType -> ListId -> ClientM Int
...@@ -359,15 +360,14 @@ killDocumentNgramsTableAsyncJob :: Token -> DocId -> JobID 'Unsafe -> Maybe Limi ...@@ -359,15 +360,14 @@ killDocumentNgramsTableAsyncJob :: Token -> DocId -> JobID 'Unsafe -> Maybe Limi
pollDocumentNgramsTableAsyncJob :: Token -> DocId -> JobID 'Unsafe -> Maybe Limit -> Maybe Offset -> ClientM (JobStatus 'Safe JobLog) pollDocumentNgramsTableAsyncJob :: Token -> DocId -> JobID 'Unsafe -> Maybe Limit -> Maybe Offset -> ClientM (JobStatus 'Safe JobLog)
waitDocumentNgramsTableAsyncJob :: Token -> DocId -> JobID 'Unsafe -> ClientM (JobOutput JobLog) waitDocumentNgramsTableAsyncJob :: Token -> DocId -> JobID 'Unsafe -> ClientM (JobOutput JobLog)
-- document export API -- * document export API
getDocumentExportJSON :: Token -> DocId -> ClientM DocumentExport.DocumentExport getDocumentExportJSON :: Token -> DocId -> ClientM DocumentExport.DocumentExport
getDocumentExportCSV :: Token -> DocId -> ClientM Text getDocumentExportCSV :: Token -> DocId -> ClientM Text
--getDocumentExportCSV :: Token -> DocId -> ClientM [DocumentExport.Document]
-- count api -- * count api
postCountQuery :: Token -> Query -> ClientM Counts postCountQuery :: Token -> Query -> ClientM Counts
-- graph api -- * graph api
getGraphHyperdata :: Token -> NodeId -> ClientM HyperdataGraphAPI getGraphHyperdata :: Token -> NodeId -> ClientM HyperdataGraphAPI
postGraphAsync :: Token -> NodeId -> ClientM (JobStatus 'Safe JobLog) postGraphAsync :: Token -> NodeId -> ClientM (JobStatus 'Safe JobLog)
postGraphAsyncJob :: Token -> NodeId -> JobInput Maybe () -> ClientM (JobStatus 'Safe JobLog) postGraphAsyncJob :: Token -> NodeId -> JobInput Maybe () -> ClientM (JobStatus 'Safe JobLog)
...@@ -382,6 +382,7 @@ postGraphRecomputeVersion :: Token -> NodeId -> ClientM Graph ...@@ -382,6 +382,7 @@ postGraphRecomputeVersion :: Token -> NodeId -> ClientM Graph
getTree :: Token -> NodeId -> [NodeType] -> ClientM (Tree NodeTree) getTree :: Token -> NodeId -> [NodeType] -> ClientM (Tree NodeTree)
getTreeFirstLevel :: Token -> NodeId -> [NodeType] -> ClientM (Tree NodeTree) getTreeFirstLevel :: Token -> NodeId -> [NodeType] -> ClientM (Tree NodeTree)
-- * new corpus API
postNewCorpusWithFormAsync :: Token -> NodeId -> ClientM (JobStatus 'Safe JobLog) postNewCorpusWithFormAsync :: Token -> NodeId -> ClientM (JobStatus 'Safe JobLog)
postNewCorpusWithFormAsyncJob :: Token -> NodeId -> JobInput Maybe NewWithForm -> ClientM (JobStatus 'Safe JobLog) postNewCorpusWithFormAsyncJob :: Token -> NodeId -> JobInput Maybe NewWithForm -> ClientM (JobStatus 'Safe JobLog)
killNewCorpusWithFormAsyncJob :: Token -> NodeId -> JobID 'Unsafe -> Maybe Limit -> Maybe Offset -> ClientM (JobStatus 'Safe JobLog) killNewCorpusWithFormAsyncJob :: Token -> NodeId -> JobID 'Unsafe -> Maybe Limit -> Maybe Offset -> ClientM (JobStatus 'Safe JobLog)
...@@ -394,6 +395,7 @@ killNewCorpusWithQueryAsyncJob :: Token -> NodeId -> JobID 'Unsafe -> Maybe Limi ...@@ -394,6 +395,7 @@ killNewCorpusWithQueryAsyncJob :: Token -> NodeId -> JobID 'Unsafe -> Maybe Limi
pollNewCorpusWithQueryAsyncJob :: Token -> NodeId -> JobID 'Unsafe -> Maybe Limit -> Maybe Offset -> ClientM (JobStatus 'Safe JobLog) pollNewCorpusWithQueryAsyncJob :: Token -> NodeId -> JobID 'Unsafe -> Maybe Limit -> Maybe Offset -> ClientM (JobStatus 'Safe JobLog)
waitNewCorpusWithQueryAsyncJob :: Token -> NodeId -> JobID 'Unsafe -> ClientM (JobOutput JobLog) waitNewCorpusWithQueryAsyncJob :: Token -> NodeId -> JobID 'Unsafe -> ClientM (JobOutput JobLog)
-- * list API
getList :: Token -> NodeId -> ClientM (Headers '[Header "Content-Disposition" Text] (Map NgramsType (Versioned NgramsTableMap))) getList :: Token -> NodeId -> ClientM (Headers '[Header "Content-Disposition" Text] (Map NgramsType (Versioned NgramsTableMap)))
postListJsonUpdateAsync :: Token -> NodeId -> ClientM (JobStatus 'Safe JobLog) postListJsonUpdateAsync :: Token -> NodeId -> ClientM (JobStatus 'Safe JobLog)
postListJsonUpdateAsyncJob :: Token -> NodeId -> JobInput Maybe WithFile -> ClientM (JobStatus 'Safe JobLog) postListJsonUpdateAsyncJob :: Token -> NodeId -> JobInput Maybe WithFile -> ClientM (JobStatus 'Safe JobLog)
...@@ -407,11 +409,14 @@ killListCsvUpdateAsyncJob :: Token -> NodeId -> JobID 'Unsafe -> Maybe Limit -> ...@@ -407,11 +409,14 @@ killListCsvUpdateAsyncJob :: Token -> NodeId -> JobID 'Unsafe -> Maybe Limit ->
pollListCsvUpdateAsyncJob :: Token -> NodeId -> JobID 'Unsafe -> Maybe Limit -> Maybe Offset -> ClientM (JobStatus 'Safe JobLog) pollListCsvUpdateAsyncJob :: Token -> NodeId -> JobID 'Unsafe -> Maybe Limit -> Maybe Offset -> ClientM (JobStatus 'Safe JobLog)
waitListCsvUpdateAsyncJob :: Token -> NodeId -> JobID 'Unsafe -> ClientM (JobOutput JobLog) waitListCsvUpdateAsyncJob :: Token -> NodeId -> JobID 'Unsafe -> ClientM (JobOutput JobLog)
-- * public API
getPublicData :: ClientM [PublicData] getPublicData :: ClientM [PublicData]
getPublicNodeFile :: NodeId -> ClientM (Headers '[Header "Content-Type" Text] BSResponse) getPublicNodeFile :: NodeId -> ClientM (Headers '[Header "Content-Type" Text] BSResponse)
-- ekg api -- * ekg api
-- | get a sample of all metrics
getMetricsSample :: ClientM Sample getMetricsSample :: ClientM Sample
-- | open @<backend:port\/ekg\/index.html@ to see a list of metrics
getMetricSample :: [Text] -> ClientM Value getMetricSample :: [Text] -> ClientM Value
-- * unpacking of client functions to derive all the individual clients -- * unpacking of client functions to derive all the individual clients
......
...@@ -245,7 +245,7 @@ setListNgrams :: HasNodeStory env err m ...@@ -245,7 +245,7 @@ setListNgrams :: HasNodeStory env err m
-> Map NgramsTerm NgramsRepoElement -> Map NgramsTerm NgramsRepoElement
-> m () -> m ()
setListNgrams listId ngramsType ns = do setListNgrams listId ngramsType ns = do
printDebug "[setListNgrams]" (listId, ngramsType) -- printDebug "[setListNgrams]" (listId, ngramsType)
getter <- view hasNodeStory getter <- view hasNodeStory
var <- liftBase $ (getter ^. nse_getter) [listId] var <- liftBase $ (getter ^. nse_getter) [listId]
liftBase $ modifyMVar_ var $ liftBase $ modifyMVar_ var $
...@@ -283,7 +283,7 @@ commitStatePatch :: (HasNodeStory env err m, HasMail env) ...@@ -283,7 +283,7 @@ commitStatePatch :: (HasNodeStory env err m, HasMail env)
-> Versioned NgramsStatePatch' -> Versioned NgramsStatePatch'
-> m (Versioned NgramsStatePatch') -> m (Versioned NgramsStatePatch')
commitStatePatch listId (Versioned p_version p) = do commitStatePatch listId (Versioned p_version p) = do
printDebug "[commitStatePatch]" listId -- printDebug "[commitStatePatch]" listId
var <- getNodeStoryVar [listId] var <- getNodeStoryVar [listId]
vq' <- liftBase $ modifyMVar var $ \ns -> do vq' <- liftBase $ modifyMVar var $ \ns -> do
let let
...@@ -523,14 +523,13 @@ getTableNgrams _nType nId tabType listId limit_ offset ...@@ -523,14 +523,13 @@ getTableNgrams _nType nId tabType listId limit_ offset
selected_inner roots n = maybe False (`Set.member` roots) (n ^. ne_root) selected_inner roots n = maybe False (`Set.member` roots) (n ^. ne_root)
--------------------------------------- ---------------------------------------
sortOnOrder Nothing = identity sortOnOrder Nothing = sortOnOrder (Just ScoreDesc)
sortOnOrder (Just TermAsc) = List.sortOn $ view ne_ngrams sortOnOrder (Just TermAsc) = List.sortOn $ view ne_ngrams
sortOnOrder (Just TermDesc) = List.sortOn $ Down . view ne_ngrams sortOnOrder (Just TermDesc) = List.sortOn $ Down . view ne_ngrams
sortOnOrder (Just ScoreAsc) = List.sortOn $ view ne_occurrences sortOnOrder (Just ScoreAsc) = List.sortOn $ view ne_occurrences
sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view ne_occurrences sortOnOrder (Just ScoreDesc) = List.sortOn $ Down . view ne_occurrences
--------------------------------------- ---------------------------------------
filteredNodes :: Map NgramsTerm NgramsElement -> [NgramsElement] filteredNodes :: Map NgramsTerm NgramsElement -> [NgramsElement]
filteredNodes tableMap = rootOf <$> list & filter selected_node filteredNodes tableMap = rootOf <$> list & filter selected_node
where where
...@@ -562,13 +561,13 @@ getTableNgrams _nType nId tabType listId limit_ offset ...@@ -562,13 +561,13 @@ getTableNgrams _nType nId tabType listId limit_ offset
setScores False table = pure table setScores False table = pure table
setScores True table = do setScores True table = do
let ngrams_terms = table ^.. each . ne_ngrams let ngrams_terms = table ^.. each . ne_ngrams
printDebug "ngrams_terms" ngrams_terms -- printDebug "ngrams_terms" ngrams_terms
t1 <- getTime t1 <- getTime
occurrences <- getOccByNgramsOnlyFast' nId occurrences <- getOccByNgramsOnlyFast' nId
listId listId
ngramsType ngramsType
ngrams_terms ngrams_terms
printDebug "occurrences" occurrences --printDebug "occurrences" occurrences
t2 <- getTime t2 <- getTime
liftBase $ hprint stderr liftBase $ hprint stderr
("getTableNgrams/setScores #ngrams=" % int % " time=" % hasTime % "\n") ("getTableNgrams/setScores #ngrams=" % int % " time=" % hasTime % "\n")
...@@ -587,11 +586,13 @@ getTableNgrams _nType nId tabType listId limit_ offset ...@@ -587,11 +586,13 @@ getTableNgrams _nType nId tabType listId limit_ offset
let scoresNeeded = needsScores orderBy let scoresNeeded = needsScores orderBy
tableMap1 <- getNgramsTableMap listId ngramsType tableMap1 <- getNgramsTableMap listId ngramsType
t1 <- getTime t1 <- getTime
tableMap2 <- tableMap1 & v_data %%~ setScores scoresNeeded tableMap2 <- tableMap1 & v_data %%~ setScores scoresNeeded
. Map.mapWithKey ngramsElementFromRepo . Map.mapWithKey ngramsElementFromRepo
fltr <- tableMap2 & v_data %%~ fmap NgramsTable . setScores (not scoresNeeded) fltr <- tableMap2 & v_data %%~ fmap NgramsTable . setScores (not scoresNeeded)
. filteredNodes . filteredNodes
let fltrCount = length $ fltr ^. v_data . _NgramsTable let fltrCount = length $ fltr ^. v_data . _NgramsTable
t2 <- getTime t2 <- getTime
......
...@@ -41,7 +41,7 @@ import Gargantext.Database.Admin.Types.Hyperdata.Document ...@@ -41,7 +41,7 @@ import Gargantext.Database.Admin.Types.Hyperdata.Document
import Gargantext.Database.Admin.Types.Node import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Query.Table.NodeContext (selectDocNodes) import Gargantext.Database.Query.Table.NodeContext (selectDocNodes)
import Gargantext.Database.Schema.Ngrams import Gargantext.Database.Schema.Ngrams
import Gargantext.Database.Schema.Node import Gargantext.Database.Schema.Context
import Gargantext.Database.Types (Indexed(..)) import Gargantext.Database.Types (Indexed(..))
import Gargantext.Prelude import Gargantext.Prelude
import Network.HTTP.Media ((//), (/:)) import Network.HTTP.Media ((//), (/:))
...@@ -155,12 +155,12 @@ reIndexWith cId lId nt lts = do ...@@ -155,12 +155,12 @@ reIndexWith cId lId nt lts = do
<$> HashMap.toList <$> HashMap.toList
<$> getTermsWith identity [lId] nt lts <$> getTermsWith identity [lId] nt lts
-- printDebug "ts" ts printDebug "ts" ts
-- Taking the ngrams with 0 occurrences only (orphans) -- Taking the ngrams with 0 occurrences only (orphans)
occs <- getOccByNgramsOnlyFast' cId lId nt ts occs <- getOccByNgramsOnlyFast' cId lId nt ts
-- printDebug "occs" occs printDebug "occs" occs
let orphans = List.concat let orphans = List.concat
$ map (\t -> case HashMap.lookup t occs of $ map (\t -> case HashMap.lookup t occs of
...@@ -168,28 +168,28 @@ reIndexWith cId lId nt lts = do ...@@ -168,28 +168,28 @@ reIndexWith cId lId nt lts = do
Just n -> if n <= 1 then [t] else [ ] Just n -> if n <= 1 then [t] else [ ]
) ts ) ts
-- printDebug "orphans" orphans printDebug "orphans" orphans
-- Get all documents of the corpus -- Get all documents of the corpus
docs <- selectDocNodes cId docs <- selectDocNodes cId
-- printDebug "docs length" (List.length docs) printDebug "docs length" (List.length docs)
-- Checking Text documents where orphans match -- Checking Text documents where orphans match
-- TODO Tests here -- TODO Tests here
let let
ngramsByDoc = map (HashMap.fromList) ngramsByDoc = map (HashMap.fromList)
$ map (map (\(k,v) -> (SimpleNgrams (text2ngrams k), v))) $ map (map (\(k,v) -> (SimpleNgrams (text2ngrams k), v)))
$ map (\doc -> List.zip $ map (\doc -> List.zip
(termsInText (buildPatterns $ map (\k -> (Text.splitOn " " $ unNgramsTerm k, [])) orphans) (termsInText (buildPatterns $ map (\k -> (Text.splitOn " " $ unNgramsTerm k, [])) orphans)
$ Text.unlines $ catMaybes $ Text.unlines $ catMaybes
[ doc ^. node_hyperdata . hd_title [ doc ^. context_hyperdata . hd_title
, doc ^. node_hyperdata . hd_abstract , doc ^. context_hyperdata . hd_abstract
] ]
) )
(List.cycle [Map.fromList $ [(nt, Map.singleton (doc ^. node_id) 1 )]]) (List.cycle [Map.fromList $ [(nt, Map.singleton (doc ^. context_id) 1 )]])
) (map context2node docs) ) docs
-- printDebug "ngramsByDoc" ngramsByDoc printDebug "ngramsByDoc" ngramsByDoc
-- Saving the indexation in database -- Saving the indexation in database
_ <- mapM (saveDocNgramsWith lId) ngramsByDoc _ <- mapM (saveDocNgramsWith lId) ngramsByDoc
......
...@@ -578,7 +578,7 @@ ngramsElementFromRepo ...@@ -578,7 +578,7 @@ ngramsElementFromRepo
, _ne_parent = p , _ne_parent = p
, _ne_children = c , _ne_children = c
, _ne_ngrams = ngrams , _ne_ngrams = ngrams
, _ne_occurrences = panic $ "API.Ngrams.Types._ne_occurrences" , _ne_occurrences = 0 -- panic $ "API.Ngrams.Types._ne_occurrences"
{- {-
-- Here we could use 0 if we want to avoid any `panic`. -- Here we could use 0 if we want to avoid any `panic`.
-- It will not happen using getTableNgrams if -- It will not happen using getTableNgrams if
......
...@@ -18,7 +18,7 @@ module Gargantext.API.Node.Update ...@@ -18,7 +18,7 @@ module Gargantext.API.Node.Update
import Control.Lens (view) import Control.Lens (view)
import Data.Aeson import Data.Aeson
import Data.Maybe (Maybe(..)) import Data.Maybe (Maybe(..), fromMaybe)
import Data.Swagger import Data.Swagger
import GHC.Generics (Generic) import GHC.Generics (Generic)
import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs) import Gargantext.API.Admin.Orchestrator.Types (JobLog(..), AsyncJobs)
...@@ -30,7 +30,9 @@ import Gargantext.API.Prelude (GargServer, simuLogs) ...@@ -30,7 +30,9 @@ import Gargantext.API.Prelude (GargServer, simuLogs)
import Gargantext.Core.Methods.Distances (GraphMetric(..)) import Gargantext.Core.Methods.Distances (GraphMetric(..))
import Gargantext.Core.Types.Main (ListType(..)) import Gargantext.Core.Types.Main (ListType(..))
import Gargantext.Core.Viz.Graph.API (recomputeGraph) import Gargantext.Core.Viz.Graph.API (recomputeGraph)
import Gargantext.Database.Action.Metrics (updateNgramsOccurrences, updateContextScore)
import Gargantext.Database.Action.Flow.Pairing (pairing) import Gargantext.Database.Action.Flow.Pairing (pairing)
import Gargantext.Database.Query.Table.Node (defaultList)
import Gargantext.Database.Action.Flow.Types (FlowCmdM) import Gargantext.Database.Action.Flow.Types (FlowCmdM)
import Gargantext.Database.Admin.Types.Node import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Query.Table.Node (getNode) import Gargantext.Database.Query.Table.Node (getNode)
...@@ -165,7 +167,40 @@ updateNode _uId lId (UpdateNodeParamsList _mode) logStatus = do ...@@ -165,7 +167,40 @@ updateNode _uId lId (UpdateNodeParamsList _mode) logStatus = do
} }
_ <- case corpusId of _ <- case corpusId of
Just cId -> reIndexWith cId lId NgramsTerms (Set.singleton MapTerm) Just cId -> do
_ <- reIndexWith cId lId NgramsTerms (Set.singleton MapTerm)
_ <- updateNgramsOccurrences cId (Just lId)
pure ()
Nothing -> pure ()
pure JobLog { _scst_succeeded = Just 3
, _scst_failed = Just 0
, _scst_remaining = Just 0
, _scst_events = Just []
}
updateNode _uId tId (UpdateNodeParamsTexts _mode) logStatus = do
logStatus JobLog { _scst_succeeded = Just 1
, _scst_failed = Just 0
, _scst_remaining = Just 2
, _scst_events = Just []
}
corpusId <- view node_parent_id <$> getNode tId
lId <- defaultList $ fromMaybe (panic "[G.A.N.Update] updateNode/UpdateNodeParamsTexts: no defaultList") corpusId
logStatus JobLog { _scst_succeeded = Just 2
, _scst_failed = Just 0
, _scst_remaining = Just 1
, _scst_events = Just []
}
_ <- case corpusId of
Just cId -> do
_ <- reIndexWith cId lId NgramsTerms (Set.singleton MapTerm)
_ <- updateNgramsOccurrences cId (Just lId)
_ <- updateContextScore cId (Just lId)
-- printDebug "updateContextsScore" (cId, lId, u)
pure ()
Nothing -> pure () Nothing -> pure ()
pure JobLog { _scst_succeeded = Just 3 pure JobLog { _scst_succeeded = Just 3
...@@ -175,6 +210,9 @@ updateNode _uId lId (UpdateNodeParamsList _mode) logStatus = do ...@@ -175,6 +210,9 @@ updateNode _uId lId (UpdateNodeParamsList _mode) logStatus = do
} }
updateNode _uId _nId _p logStatus = do updateNode _uId _nId _p logStatus = do
simuLogs logStatus 10 simuLogs logStatus 10
......
...@@ -177,8 +177,8 @@ type NodeStoryDir = FilePath ...@@ -177,8 +177,8 @@ type NodeStoryDir = FilePath
writeNodeStories :: NodeStoryDir -> NodeListStory -> IO () writeNodeStories :: NodeStoryDir -> NodeListStory -> IO ()
writeNodeStories fp nls = do writeNodeStories fp nls = do
done <- mapM (writeNodeStory fp) $ splitByNode nls _done <- mapM (writeNodeStory fp) $ splitByNode nls
printDebug "[writeNodeStories]" done -- printDebug "[writeNodeStories]" done
pure () pure ()
writeNodeStory :: NodeStoryDir -> (NodeId, NodeListStory) -> IO () writeNodeStory :: NodeStoryDir -> (NodeId, NodeListStory) -> IO ()
...@@ -192,7 +192,7 @@ splitByNode (NodeStory m) = ...@@ -192,7 +192,7 @@ splitByNode (NodeStory m) =
saverAction' :: Serialise a => NodeStoryDir -> NodeId -> a -> IO () saverAction' :: Serialise a => NodeStoryDir -> NodeId -> a -> IO ()
saverAction' repoDir nId a = do saverAction' repoDir nId a = do
withTempFile repoDir ((cs $ show nId) <> "-tmp-repo.cbor") $ \fp h -> do withTempFile repoDir ((cs $ show nId) <> "-tmp-repo.cbor") $ \fp h -> do
printDebug "[repoSaverAction]" fp -- printDebug "[repoSaverAction]" fp
DBL.hPut h $ serialise a DBL.hPut h $ serialise a
hClose h hClose h
renameFile fp (nodeStoryPath repoDir nId) renameFile fp (nodeStoryPath repoDir nId)
......
...@@ -76,15 +76,16 @@ buildNgramsLists :: ( HasNodeStory env err m ...@@ -76,15 +76,16 @@ buildNgramsLists :: ( HasNodeStory env err m
buildNgramsLists user uCid mCid mfslw gp = do buildNgramsLists user uCid mCid mfslw gp = do
ngTerms <- buildNgramsTermsList user uCid mCid mfslw gp (NgramsTerms, MapListSize 350) ngTerms <- buildNgramsTermsList user uCid mCid mfslw gp (NgramsTerms, MapListSize 350)
othersTerms <- mapM (buildNgramsOthersList user uCid mfslw GroupIdentity) othersTerms <- mapM (buildNgramsOthersList user uCid mfslw GroupIdentity)
[ (Authors , MapListSize 9) [ (Authors , MapListSize 9, MaxListSize 1000)
, (Sources , MapListSize 9) , (Sources , MapListSize 9, MaxListSize 1000)
, (Institutes, MapListSize 9) , (Institutes, MapListSize 9, MaxListSize 1000)
] ]
pure $ Map.unions $ [ngTerms] <> othersTerms pure $ Map.unions $ [ngTerms] <> othersTerms
data MapListSize = MapListSize { unMapListSize :: !Int } data MapListSize = MapListSize { unMapListSize :: !Int }
data MaxListSize = MaxListSize { unMaxListSize :: !Int }
buildNgramsOthersList :: ( HasNodeError err buildNgramsOthersList :: ( HasNodeError err
, CmdM env err m , CmdM env err m
...@@ -95,9 +96,9 @@ buildNgramsOthersList :: ( HasNodeError err ...@@ -95,9 +96,9 @@ buildNgramsOthersList :: ( HasNodeError err
-> UserCorpusId -> UserCorpusId
-> Maybe FlowSocialListWith -> Maybe FlowSocialListWith
-> GroupParams -> GroupParams
-> (NgramsType, MapListSize) -> (NgramsType, MapListSize, MaxListSize)
-> m (Map NgramsType [NgramsElement]) -> m (Map NgramsType [NgramsElement])
buildNgramsOthersList user uCid mfslw _groupParams (nt, MapListSize mapListSize) = do buildNgramsOthersList user uCid mfslw _groupParams (nt, MapListSize mapListSize, MaxListSize maxListSize) = do
allTerms :: HashMap NgramsTerm (Set NodeId) <- getContextsByNgramsUser uCid nt allTerms :: HashMap NgramsTerm (Set NodeId) <- getContextsByNgramsUser uCid nt
-- PrivateFirst for first developments since Public NodeMode is not implemented yet -- PrivateFirst for first developments since Public NodeMode is not implemented yet
...@@ -118,6 +119,7 @@ buildNgramsOthersList user uCid mfslw _groupParams (nt, MapListSize mapListSize) ...@@ -118,6 +119,7 @@ buildNgramsOthersList user uCid mfslw _groupParams (nt, MapListSize mapListSize)
listSize = mapListSize - (List.length mapTerms) listSize = mapListSize - (List.length mapTerms)
(mapTerms', candiTerms) = both HashMap.fromList (mapTerms', candiTerms) = both HashMap.fromList
$ List.splitAt listSize $ List.splitAt listSize
$ List.take maxListSize
$ List.sortOn (Down . viewScore . snd) $ List.sortOn (Down . viewScore . snd)
$ HashMap.toList tailTerms' $ HashMap.toList tailTerms'
......
...@@ -86,6 +86,7 @@ import Gargantext.Database.Action.Flow.Types ...@@ -86,6 +86,7 @@ import Gargantext.Database.Action.Flow.Types
import Gargantext.Database.Action.Flow.Utils (insertDocNgrams, DocumentIdWithNgrams(..)) import Gargantext.Database.Action.Flow.Utils (insertDocNgrams, DocumentIdWithNgrams(..))
import Gargantext.Database.Action.Search (searchDocInDatabase) import Gargantext.Database.Action.Search (searchDocInDatabase)
import Gargantext.Database.Admin.Config (userMaster, corpusMasterName) import Gargantext.Database.Admin.Config (userMaster, corpusMasterName)
import Gargantext.Database.Action.Metrics (updateNgramsOccurrences)
import Gargantext.Database.Admin.Types.Hyperdata import Gargantext.Database.Admin.Types.Hyperdata
import Gargantext.Database.Admin.Types.Node -- (HyperdataDocument(..), NodeType(..), NodeId, UserId, ListId, CorpusId, RootId, MasterCorpusId, MasterUserId) import Gargantext.Database.Admin.Types.Node -- (HyperdataDocument(..), NodeType(..), NodeId, UserId, ListId, CorpusId, RootId, MasterCorpusId, MasterUserId)
import Gargantext.Database.Prelude import Gargantext.Database.Prelude
...@@ -281,6 +282,8 @@ flowCorpusUser l user corpusName ctype ids mfslw = do ...@@ -281,6 +282,8 @@ flowCorpusUser l user corpusName ctype ids mfslw = do
--_ <- mkPhylo userCorpusId userId --_ <- mkPhylo userCorpusId userId
-- Annuaire Flow -- Annuaire Flow
-- _ <- mkAnnuaire rootUserId userId -- _ <- mkAnnuaire rootUserId userId
_ <- updateNgramsOccurrences userCorpusId (Just listId)
pure userCorpusId pure userCorpusId
...@@ -320,6 +323,8 @@ saveDocNgramsWith :: ( FlowCmdM env err m) ...@@ -320,6 +323,8 @@ saveDocNgramsWith :: ( FlowCmdM env err m)
-> m () -> m ()
saveDocNgramsWith lId mapNgramsDocs' = do saveDocNgramsWith lId mapNgramsDocs' = do
terms2id <- insertExtractedNgrams $ HashMap.keys mapNgramsDocs' terms2id <- insertExtractedNgrams $ HashMap.keys mapNgramsDocs'
printDebug "terms2id" terms2id
let mapNgramsDocs = HashMap.mapKeys extracted2ngrams mapNgramsDocs' let mapNgramsDocs = HashMap.mapKeys extracted2ngrams mapNgramsDocs'
-- new -- new
...@@ -327,7 +332,7 @@ saveDocNgramsWith lId mapNgramsDocs' = do ...@@ -327,7 +332,7 @@ saveDocNgramsWith lId mapNgramsDocs' = do
$ map (first _ngramsTerms . second Map.keys) $ map (first _ngramsTerms . second Map.keys)
$ HashMap.toList mapNgramsDocs $ HashMap.toList mapNgramsDocs
-- printDebug "saveDocNgramsWith" mapCgramsId printDebug "saveDocNgramsWith" mapCgramsId
-- insertDocNgrams -- insertDocNgrams
_return <- insertContextNodeNgrams2 _return <- insertContextNodeNgrams2
$ catMaybes [ ContextNodeNgrams2 <$> Just nId $ catMaybes [ ContextNodeNgrams2 <$> Just nId
......
...@@ -10,17 +10,27 @@ Portability : POSIX ...@@ -10,17 +10,27 @@ Portability : POSIX
Node API Node API
-} -}
{-# LANGUAGE QuasiQuotes #-}
module Gargantext.Database.Action.Metrics module Gargantext.Database.Action.Metrics
where where
import Database.PostgreSQL.Simple.SqlQQ (sql)
import Data.HashMap.Strict (HashMap) import Data.HashMap.Strict (HashMap)
import Data.Map (Map)
import Data.Set (Set)
import Database.PostgreSQL.Simple (Query, Only(..))
import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
import Data.Vector (Vector) import Data.Vector (Vector)
import Gargantext.Core (HasDBid(toDBid))
import Gargantext.API.Ngrams.Tools (filterListWithRoot, groupNodesByNgrams, Diagonal(..), getCoocByNgrams, mapTermListRoot, RootTerm, getRepo') import Gargantext.API.Ngrams.Tools (filterListWithRoot, groupNodesByNgrams, Diagonal(..), getCoocByNgrams, mapTermListRoot, RootTerm, getRepo')
import Gargantext.API.Ngrams.Types (TabType(..), ngramsTypeFromTabType, NgramsTerm) import Gargantext.Database.Prelude (runPGSQuery{-, formatPGSQuery-})
import Gargantext.Core.Text.Metrics (scored, Scored(..), {-localMetrics, toScored-}) import Gargantext.API.Ngrams.Types (TabType(..), ngramsTypeFromTabType, NgramsTerm(..))
import Gargantext.Core.Mail.Types (HasMail) import Gargantext.Core.Mail.Types (HasMail)
import Gargantext.Core.Types (ListType(..), Limit, NodeType(..))
import Gargantext.Core.NodeStory import Gargantext.Core.NodeStory
import Gargantext.Core.Text.Metrics (scored, Scored(..), {-localMetrics, toScored-})
import Database.PostgreSQL.Simple.ToField (toField, Action{-, ToField-})
import Gargantext.Core.Types (ListType(..), Limit, NodeType(..), ContextId)
import Gargantext.Database.Action.Flow.Types (FlowCmdM) import Gargantext.Database.Action.Flow.Types (FlowCmdM)
import Gargantext.Database.Action.Metrics.NgramsByContext (getContextsByNgramsOnlyUser{-, getTficfWith-}) import Gargantext.Database.Action.Metrics.NgramsByContext (getContextsByNgramsOnlyUser{-, getTficfWith-})
import Gargantext.Database.Admin.Config (userMaster) import Gargantext.Database.Admin.Config (userMaster)
...@@ -29,6 +39,10 @@ import Gargantext.Database.Query.Table.Node (defaultList) ...@@ -29,6 +39,10 @@ import Gargantext.Database.Query.Table.Node (defaultList)
import Gargantext.Database.Query.Table.Node.Select import Gargantext.Database.Query.Table.Node.Select
import Gargantext.Prelude import Gargantext.Prelude
import qualified Data.HashMap.Strict as HM import qualified Data.HashMap.Strict as HM
import qualified Data.Map as Map
import qualified Data.Set as Set
import qualified Data.List as List
import qualified Data.Text as Text
getMetrics :: FlowCmdM env err m getMetrics :: FlowCmdM env err m
=> CorpusId -> Maybe ListId -> TabType -> Maybe Limit => CorpusId -> Maybe ListId -> TabType -> Maybe Limit
...@@ -46,39 +60,188 @@ getNgramsCooc :: (FlowCmdM env err m) ...@@ -46,39 +60,188 @@ getNgramsCooc :: (FlowCmdM env err m)
, HashMap (NgramsTerm, NgramsTerm) Int , HashMap (NgramsTerm, NgramsTerm) Int
) )
getNgramsCooc cId maybeListId tabType maybeLimit = do getNgramsCooc cId maybeListId tabType maybeLimit = do
(ngs', ngs) <- getNgrams cId maybeListId tabType
let lId <- case maybeListId of
take' Nothing xs = xs Nothing -> defaultList cId
take' (Just n) xs = take n xs Just lId' -> pure lId'
(ngs', ngs) <- getNgrams lId tabType
lId <- defaultList cId
lIds <- selectNodesWithUsername NodeList userMaster lIds <- selectNodesWithUsername NodeList userMaster
myCooc <- HM.filter (>1) <$> getCoocByNgrams (Diagonal True) myCooc <- HM.filter (>1) <$> getCoocByNgrams (Diagonal True)
<$> groupNodesByNgrams ngs <$> groupNodesByNgrams ngs
<$> getContextsByNgramsOnlyUser cId (lIds <> [lId]) (ngramsTypeFromTabType tabType) <$> getContextsByNgramsOnlyUser cId
(take' maybeLimit $ HM.keys ngs) (lIds <> [lId])
(ngramsTypeFromTabType tabType)
(take' maybeLimit $ HM.keys ngs)
pure $ (ngs', ngs, myCooc) pure $ (ngs', ngs, myCooc)
------------------------------------------------------------------------
------------------------------------------------------------------------
updateNgramsOccurrences :: (FlowCmdM env err m)
=> CorpusId -> Maybe ListId
-> m ()
updateNgramsOccurrences cId mlId = do
_ <- mapM (updateNgramsOccurrences' cId mlId Nothing) [Terms, Sources, Authors, Institutes]
pure ()
getNgrams :: (HasMail env, HasNodeStory env err m) updateNgramsOccurrences' :: (FlowCmdM env err m)
=> CorpusId -> Maybe ListId -> TabType => CorpusId -> Maybe ListId -> Maybe Limit -> TabType
-> m ( HashMap NgramsTerm (ListType, Maybe NgramsTerm) -> m [Int]
, HashMap NgramsTerm (Maybe RootTerm) updateNgramsOccurrences' cId maybeListId maybeLimit tabType = do
)
getNgrams cId maybeListId tabType = do
lId <- case maybeListId of lId <- case maybeListId of
Nothing -> defaultList cId Nothing -> defaultList cId
Just lId' -> pure lId' Just lId' -> pure lId'
result <- getNgramsOccurrences cId lId tabType maybeLimit
let
toInsert :: [[Action]]
toInsert = map (\(ngramsTerm, score)
-> [ toField cId
, toField lId
, toField $ unNgramsTerm ngramsTerm
, toField $ toDBid $ ngramsTypeFromTabType tabType
, toField score
]
)
$ HM.toList result
queryInsert :: Query
queryInsert = [sql|
WITH input(corpus_id, list_id, terms, type_id, weight) AS (?)
INSERT into node_node_ngrams (node1_id, node2_id, ngrams_id, ngrams_type, weight)
SELECT input.corpus_id,input.list_id,ngrams.id,input.type_id,input.weight FROM input
JOIN ngrams on ngrams.terms = input.terms
ON CONFLICT (node1_id, node2_id, ngrams_id, ngrams_type)
DO UPDATE SET weight = excluded.weight
RETURNING 1
|]
let fields = map (\t-> QualifiedIdentifier Nothing t)
$ map Text.pack ["int4", "int4","text","int4","int4"]
map (\(Only a) -> a) <$> runPGSQuery queryInsert (Only $ Values fields toInsert)
------------------------------------------------------------------------
-- Used for scores in Ngrams Table
getNgramsOccurrences :: (FlowCmdM env err m)
=> CorpusId -> ListId -> TabType -> Maybe Limit
-> m (HashMap NgramsTerm Int)
getNgramsOccurrences c l t ml = HM.map Set.size <$> getNgramsContexts c l t ml
getNgramsContexts :: (FlowCmdM env err m)
=> CorpusId -> ListId -> TabType -> Maybe Limit
-> m (HashMap NgramsTerm (Set ContextId))
getNgramsContexts cId lId tabType maybeLimit = do
(_ngs', ngs) <- getNgrams lId tabType
lIds <- selectNodesWithUsername NodeList userMaster
-- TODO maybe add an option to group here
getContextsByNgramsOnlyUser cId
(lIds <> [lId])
(ngramsTypeFromTabType tabType)
(take' maybeLimit $ HM.keys ngs)
------------------------------------------------------------------------
updateContextScore :: (FlowCmdM env err m)
=> CorpusId -> Maybe ListId
-> m [Int]
updateContextScore cId maybeListId = do
lId <- case maybeListId of
Nothing -> defaultList cId
Just lId' -> pure lId'
result <- getContextsNgramsScore cId lId Terms MapTerm Nothing
let
toInsert :: [[Action]]
toInsert = map (\(contextId, score)
-> [ toField cId
, toField contextId
, toField score
]
)
$ Map.toList result
queryInsert :: Query
queryInsert = [sql|
WITH input(node_id, context_id, score) AS (?)
UPDATE nodes_contexts nc
SET score = input.score
FROM input
WHERE nc.node_id = input.node_id
AND nc.context_id = input.context_id
RETURNING 1
|]
let fields = map (\t-> QualifiedIdentifier Nothing t)
$ map Text.pack ["int4", "int4","int4"]
map (\(Only a) -> a) <$> runPGSQuery queryInsert (Only $ Values fields toInsert)
-- Used for scores in Doc Table
getContextsNgramsScore :: (FlowCmdM env err m)
=> CorpusId -> ListId -> TabType -> ListType -> Maybe Limit
-> m (Map ContextId Int)
getContextsNgramsScore cId lId tabType listType maybeLimit
= Map.map Set.size <$> getContextsNgrams cId lId tabType listType maybeLimit
getContextsNgrams :: (FlowCmdM env err m)
=> CorpusId -> ListId -> TabType -> ListType -> Maybe Limit
-> m (Map ContextId (Set NgramsTerm))
getContextsNgrams cId lId tabType listType maybeLimit = do
(ngs', ngs) <- getNgrams lId tabType
lIds <- selectNodesWithUsername NodeList userMaster
result <- groupNodesByNgrams ngs <$> getContextsByNgramsOnlyUser
cId
(lIds <> [lId])
(ngramsTypeFromTabType tabType)
( take' maybeLimit
$ HM.keys
$ HM.filter (\v -> fst v == listType) ngs'
)
-- printDebug "getCoocByNgrams" result
pure $ Map.fromListWith (<>)
$ List.concat
$ map (\(ng, contexts) -> List.zip (Set.toList contexts) (List.cycle [Set.singleton ng]))
$ HM.toList result
------------------------------------------------------------------------
------------------------------------------------------------------------
getNgrams :: (HasMail env, HasNodeStory env err m)
=> ListId -> TabType
-> m ( HashMap NgramsTerm (ListType, Maybe NgramsTerm)
, HashMap NgramsTerm (Maybe RootTerm)
)
getNgrams lId tabType = do
lists <- mapTermListRoot [lId] (ngramsTypeFromTabType tabType) <$> getRepo' [lId] lists <- mapTermListRoot [lId] (ngramsTypeFromTabType tabType) <$> getRepo' [lId]
let maybeSyn = HM.unions $ map (\t -> filterListWithRoot t lists) let maybeSyn = HM.unions $ map (\t -> filterListWithRoot t lists)
[MapTerm, StopTerm, CandidateTerm] [MapTerm, StopTerm, CandidateTerm]
pure (lists, maybeSyn) pure (lists, maybeSyn)
-- Some useful Tools
take' :: Maybe Int -> [a] -> [a]
take' Nothing xs = xs
take' (Just n) xs = take n xs
...@@ -16,21 +16,23 @@ Ngrams by node enable contextual metrics. ...@@ -16,21 +16,23 @@ Ngrams by node enable contextual metrics.
module Gargantext.Database.Action.Metrics.NgramsByContext module Gargantext.Database.Action.Metrics.NgramsByContext
where where
-- import Debug.Trace (trace)
--import Data.Map.Strict.Patch (PatchMap, Replace, diff) --import Data.Map.Strict.Patch (PatchMap, Replace, diff)
import Data.HashMap.Strict (HashMap) import Data.HashMap.Strict (HashMap)
import Data.Map (Map) import Data.Map (Map)
import Data.Maybe (catMaybes)
import Data.Set (Set) import Data.Set (Set)
import Data.Text (Text) import Data.Text (Text)
import Data.Tuple.Extra (first, second, swap) import Data.Tuple.Extra (first, second, swap)
import Database.PostgreSQL.Simple.SqlQQ (sql) import Database.PostgreSQL.Simple.SqlQQ (sql)
import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..)) import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
-- import Debug.Trace (trace)
import Gargantext.Core
import Gargantext.API.Ngrams.Types (NgramsTerm(..)) import Gargantext.API.Ngrams.Types (NgramsTerm(..))
import Gargantext.Core
import Gargantext.Data.HashMap.Strict.Utils as HM import Gargantext.Data.HashMap.Strict.Utils as HM
import Gargantext.Database.Admin.Types.Node (ListId, CorpusId, NodeId(..), ContextId, MasterCorpusId, NodeType(NodeDocument), UserCorpusId, DocId) import Gargantext.Database.Admin.Types.Node (ListId, CorpusId, NodeId(..), ContextId, MasterCorpusId, NodeType(NodeDocument), UserCorpusId, DocId)
import Gargantext.Database.Prelude (Cmd, runPGSQuery) import Gargantext.Database.Prelude (Cmd, runPGSQuery)
import Gargantext.Database.Schema.Ngrams (ngramsTypeId, NgramsType(..)) import Gargantext.Database.Query.Table.Ngrams (selectNgramsId)
import Gargantext.Database.Schema.Ngrams (ngramsTypeId, NgramsType(..), NgramsId)
import Gargantext.Prelude import Gargantext.Prelude
import qualified Data.HashMap.Strict as HM import qualified Data.HashMap.Strict as HM
import qualified Data.Map as Map import qualified Data.Map as Map
...@@ -111,37 +113,43 @@ getOccByNgramsOnlyFast' :: CorpusId ...@@ -111,37 +113,43 @@ getOccByNgramsOnlyFast' :: CorpusId
-> NgramsType -> NgramsType
-> [NgramsTerm] -> [NgramsTerm]
-> Cmd err (HashMap NgramsTerm Int) -> Cmd err (HashMap NgramsTerm Int)
getOccByNgramsOnlyFast' cId lId nt tms = -- trace (show (cId, lId)) $ getOccByNgramsOnlyFast' cId lId nt tms = do -- trace (show (cId, lId)) $
HM.fromListWith (+) <$> map (second round) <$> run cId lId nt tms mapNgramsIds <- selectNgramsId $ map unNgramsTerm tms
HM.fromListWith (+) <$> catMaybes
<$> map (\(nId, s) -> (,) <$> (NgramsTerm <$> (Map.lookup nId mapNgramsIds)) <*> (Just $ round s) )
<$> run cId lId nt (Map.keys mapNgramsIds)
where where
fields = [QualifiedIdentifier Nothing "text"]
run :: CorpusId run :: CorpusId
-> ListId -> ListId
-> NgramsType -> NgramsType
-> [NgramsTerm] -> [NgramsId]
-> Cmd err [(NgramsTerm, Double)] -> Cmd err [(NgramsId, Double)]
run cId' lId' nt' tms' = map (first NgramsTerm) <$> runPGSQuery query run cId' lId' nt' tms' = runPGSQuery query
( Values fields ((DPS.Only . unNgramsTerm) <$> tms') ( Values fields ((DPS.Only) <$> tms')
, cId' , cId'
, lId' , lId'
, ngramsTypeId nt' , ngramsTypeId nt'
) )
fields = [QualifiedIdentifier Nothing "int4"]
query :: DPS.Query query :: DPS.Query
query = [sql| query = [sql|
WITH input_rows(terms) AS (?) WITH input_ngrams(id) AS (?)
SELECT ng.terms, nng.weight FROM nodes_contexts nc
JOIN node_node_ngrams nng ON nng.node1_id = nc.node_id SELECT ngi.id, nng.weight FROM nodes_contexts nc
JOIN ngrams ng ON nng.ngrams_id = ng.id JOIN node_node_ngrams nng ON nng.node1_id = nc.node_id
JOIN input_rows ir ON ir.terms = ng.terms JOIN input_ngrams ngi ON nng.ngrams_id = ngi.id
WHERE nng.node1_id = ? -- CorpusId WHERE nng.node1_id = ?
AND nng.node2_id = ? -- ListId AND nng.node2_id = ?
AND nng.ngrams_type = ? -- NgramsTypeId AND nng.ngrams_type = ?
AND nc.category > 0 -- Not trash AND nc.category > 0
GROUP BY ng.terms, nng.weight GROUP BY ngi.id, nng.weight
|] |]
selectNgramsOccurrencesOnlyByContextUser_withSample :: HasDBid NodeType selectNgramsOccurrencesOnlyByContextUser_withSample :: HasDBid NodeType
......
...@@ -25,8 +25,11 @@ import Gargantext.Database.Query.Facet ...@@ -25,8 +25,11 @@ import Gargantext.Database.Query.Facet
import Gargantext.Database.Query.Filter import Gargantext.Database.Query.Filter
import Gargantext.Database.Query.Join (leftJoin5) import Gargantext.Database.Query.Join (leftJoin5)
import Gargantext.Database.Query.Table.Node import Gargantext.Database.Query.Table.Node
import Gargantext.Database.Query.Table.Context
import Gargantext.Database.Query.Table.NodeNode import Gargantext.Database.Query.Table.NodeNode
import Gargantext.Database.Query.Table.NodeContext
import Gargantext.Database.Schema.Node import Gargantext.Database.Schema.Node
import Gargantext.Database.Schema.Context
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Core.Text.Terms.Mono.Stem.En (stemIt) import Gargantext.Core.Text.Terms.Mono.Stem.En (stemIt)
import Opaleye hiding (Order) import Opaleye hiding (Order)
...@@ -80,27 +83,27 @@ queryInCorpus :: HasDBid NodeType ...@@ -80,27 +83,27 @@ queryInCorpus :: HasDBid NodeType
-> Text -> Text
-> O.Select FacetDocRead -> O.Select FacetDocRead
queryInCorpus cId t q = proc () -> do queryInCorpus cId t q = proc () -> do
(n, nn) <- joinInCorpus -< () (c, nc) <- joinInCorpus -< ()
restrict -< (nn^.nn_node1_id) .== (toNullable $ pgNodeId cId) restrict -< (nc^.nc_node_id) .== (toNullable $ pgNodeId cId)
restrict -< if t restrict -< if t
then (nn^.nn_category) .== (toNullable $ sqlInt4 0) then (nc^.nc_category) .== (toNullable $ sqlInt4 0)
else (nn^.nn_category) .>= (toNullable $ sqlInt4 1) else (nc^.nc_category) .>= (toNullable $ sqlInt4 1)
restrict -< (n ^. ns_search) @@ (sqlTSQuery (unpack q)) restrict -< (c ^. cs_search) @@ (sqlTSQuery (unpack q))
restrict -< (n ^. ns_typename ) .== (sqlInt4 $ toDBid NodeDocument) restrict -< (c ^. cs_typename ) .== (sqlInt4 $ toDBid NodeDocument)
returnA -< FacetDoc { facetDoc_id = n^.ns_id returnA -< FacetDoc { facetDoc_id = c^.cs_id
, facetDoc_created = n^.ns_date , facetDoc_created = c^.cs_date
, facetDoc_title = n^.ns_name , facetDoc_title = c^.cs_name
, facetDoc_hyperdata = n^.ns_hyperdata , facetDoc_hyperdata = c^.cs_hyperdata
, facetDoc_category = nn^.nn_category , facetDoc_category = nc^.nc_category
, facetDoc_ngramCount = nn^.nn_score , facetDoc_ngramCount = nc^.nc_score
, facetDoc_score = nn^.nn_score , facetDoc_score = nc^.nc_score
} }
joinInCorpus :: O.Select (NodeSearchRead, NodeNodeReadNull) joinInCorpus :: O.Select (ContextSearchRead, NodeContextReadNull)
joinInCorpus = leftJoin queryNodeSearchTable queryNodeNodeTable cond joinInCorpus = leftJoin queryContextSearchTable queryNodeContextTable cond
where where
cond :: (NodeSearchRead, NodeNodeRead) -> Column SqlBool cond :: (ContextSearchRead, NodeContextRead) -> Column SqlBool
cond (n, nn) = nn^.nn_node2_id .== _ns_id n cond (c, nc) = nc^.nc_context_id .== _cs_id c
------------------------------------------------------------------------ ------------------------------------------------------------------------
searchInCorpusWithContacts searchInCorpusWithContacts
......
...@@ -35,7 +35,7 @@ triggerCountInsert = execPGSQuery query (toDBid NodeDocument, toDBid NodeList) ...@@ -35,7 +35,7 @@ triggerCountInsert = execPGSQuery query (toDBid NodeDocument, toDBid NodeList)
RETURN NEW; RETURN NEW;
END IF; END IF;
IF TG_OP = 'INSERT' THEN IF TG_OP = 'INSERT' THEN
INSERT INTO context_node_ngrams (context_id, node_id, ngrams_id, ngrams_type, weight) INSERT INTO node_node_ngrams (node1_id, node2_id, ngrams_id, ngrams_type, weight)
select n.parent_id, n.id, new0.ngrams_id, new0.ngrams_type, count(*) from NEW as new0 select n.parent_id, n.id, new0.ngrams_id, new0.ngrams_type, count(*) from NEW as new0
INNER JOIN contexts n ON n.id = new0.context_id INNER JOIN contexts n ON n.id = new0.context_id
INNER JOIN nodes n2 ON n2.id = new0.node_id INNER JOIN nodes n2 ON n2.id = new0.node_id
...@@ -43,8 +43,8 @@ triggerCountInsert = execPGSQuery query (toDBid NodeDocument, toDBid NodeList) ...@@ -43,8 +43,8 @@ triggerCountInsert = execPGSQuery query (toDBid NodeDocument, toDBid NodeList)
AND n.typename = ? -- not mandatory AND n.typename = ? -- not mandatory
AND n.parent_id <> n2.id -- not mandatory AND n.parent_id <> n2.id -- not mandatory
GROUP BY n.parent_id, n.id, new0.ngrams_id, new0.ngrams_type GROUP BY n.parent_id, n.id, new0.ngrams_id, new0.ngrams_type
ON CONFLICT (context_id, node_id, ngrams_id, ngrams_type) ON CONFLICT (node1_id, node2_id, ngrams_id, ngrams_type)
DO UPDATE set weight = context_node_ngrams.weight + excluded.weight DO UPDATE set weight = node_node_ngrams.weight + excluded.weight
; ;
END IF; END IF;
......
...@@ -42,7 +42,7 @@ triggerInsertCount lId = execPGSQuery query (lId, nodeTypeId NodeList) ...@@ -42,7 +42,7 @@ triggerInsertCount lId = execPGSQuery query (lId, nodeTypeId NodeList)
, count(*) AS weight , count(*) AS weight
FROM NEW as new1 FROM NEW as new1
INNER JOIN contexts doc ON doc.id = new1.context_id INNER JOIN contexts doc ON doc.id = new1.context_id
INNER JOIN nodes lists ON lists.parent_id = lists.parent_id INNER JOIN nodes lists ON lists.parent_id = new1.node_id
INNER JOIN context_node_ngrams cnn ON cnn.context_id = doc.id INNER JOIN context_node_ngrams cnn ON cnn.context_id = doc.id
WHERE lists.id in (?, lists.id) WHERE lists.id in (?, lists.id)
AND lists.typename = ? AND lists.typename = ?
...@@ -76,9 +76,9 @@ triggerUpdateAdd lId = execPGSQuery query (lId, nodeTypeId NodeList) ...@@ -76,9 +76,9 @@ triggerUpdateAdd lId = execPGSQuery query (lId, nodeTypeId NodeList)
, cnn.ngrams_type AS ngrams_type , cnn.ngrams_type AS ngrams_type
, count(*) AS fix_count , count(*) AS fix_count
FROM NEW as new1 FROM NEW as new1
INNER JOIN contexts doc ON doc.id = new1.context_id INNER JOIN contexts doc ON doc.id = new1.context_id
INNER JOIN nodes lists ON new1.node_id = lists.parent_id INNER JOIN nodes lists ON lists.parent_id = new1.node_id
INNER JOIN context_node_ngrams cnn ON cnn.context_id = doc.id INNER JOIN context_node_ngrams cnn ON cnn.context_id = doc.id
WHERE lists.id in (?, lists.id) -- (masterList_id, userLists) WHERE lists.id in (?, lists.id) -- (masterList_id, userLists)
AND lists.typename = ? AND lists.typename = ?
GROUP BY node1_id, node2_id, ngrams_id, ngrams_type GROUP BY node1_id, node2_id, ngrams_id, ngrams_type
......
...@@ -140,6 +140,22 @@ runPGSQuery q a = mkCmd $ \conn -> catch (PGS.query conn q a) (printError conn) ...@@ -140,6 +140,22 @@ runPGSQuery q a = mkCmd $ \conn -> catch (PGS.query conn q a) (printError conn)
hPutStrLn stderr q' hPutStrLn stderr q'
throw (SomeException e) throw (SomeException e)
{-
-- TODO
runPGSQueryFold :: ( CmdM env err m
, PGS.FromRow r
)
=> PGS.Query -> a -> (a -> r -> IO a) -> m a
runPGSQueryFold q initialState consume = mkCmd $ \conn -> catch (PGS.fold_ conn initialState consume) (printError conn)
where
printError c (SomeException e) = do
q' <- PGS.formatQuery c q
hPutStrLn stderr q'
throw (SomeException e)
-}
-- | TODO catch error -- | TODO catch error
runPGSQuery_ :: ( CmdM env err m runPGSQuery_ :: ( CmdM env err m
, PGS.FromRow r , PGS.FromRow r
......
...@@ -20,7 +20,7 @@ Portability : POSIX ...@@ -20,7 +20,7 @@ Portability : POSIX
module Gargantext.Database.Query.Facet module Gargantext.Database.Query.Facet
( runViewAuthorsDoc ( runViewAuthorsDoc
, runViewDocuments , runViewDocuments
, viewDocuments' -- , viewDocuments'
, runCountDocuments , runCountDocuments
, filterWith , filterWith
...@@ -306,8 +306,7 @@ runViewDocuments cId t o l order query = do ...@@ -306,8 +306,7 @@ runViewDocuments cId t o l order query = do
printDebug "[runViewDocuments] sqlQuery" $ showSql sqlQuery printDebug "[runViewDocuments] sqlQuery" $ showSql sqlQuery
runOpaQuery $ filterWith o l order sqlQuery runOpaQuery $ filterWith o l order sqlQuery
where where
ntId = toDBid NodeDocument sqlQuery = viewDocuments cId t (toDBid NodeDocument) query
sqlQuery = viewDocuments cId t ntId query
runCountDocuments :: HasDBid NodeType => CorpusId -> IsTrash -> Maybe Text -> Cmd err Int runCountDocuments :: HasDBid NodeType => CorpusId -> IsTrash -> Maybe Text -> Cmd err Int
runCountDocuments cId t mQuery = do runCountDocuments cId t mQuery = do
...@@ -331,22 +330,6 @@ viewDocuments cId t ntId mQuery = viewDocumentsQuery cId t ntId mQuery >>> proc ...@@ -331,22 +330,6 @@ viewDocuments cId t ntId mQuery = viewDocumentsQuery cId t ntId mQuery >>> proc
, facetDoc_score = toNullable $ nc^.nc_score , facetDoc_score = toNullable $ nc^.nc_score
} }
viewDocuments' :: CorpusId
-> IsTrash
-> NodeTypeId
-> Maybe Text
-> Select NodeRead
viewDocuments' cId t ntId mQuery = viewDocumentsQuery cId t ntId mQuery >>> proc (c, _nc) -> do
returnA -< Node { _node_id = _cs_id c
, _node_hash_id = ""
, _node_typename = _cs_typename c
, _node_user_id = _cs_user_id c
, _node_parent_id = -1
, _node_name = _cs_name c
, _node_date = _cs_date c
, _node_hyperdata = _cs_hyperdata c
}
viewDocumentsQuery :: CorpusId viewDocumentsQuery :: CorpusId
-> IsTrash -> IsTrash
-> NodeTypeId -> NodeTypeId
......
...@@ -18,27 +18,29 @@ module Gargantext.Database.Query.Table.Ngrams ...@@ -18,27 +18,29 @@ module Gargantext.Database.Query.Table.Ngrams
, queryNgramsTable , queryNgramsTable
, selectNgramsByDoc , selectNgramsByDoc
, insertNgrams , insertNgrams
, selectNgramsId
) )
where where
import Control.Lens ((^.)) import Control.Lens ((^.))
import Data.ByteString.Internal (ByteString) import Data.ByteString.Internal (ByteString)
import Data.HashMap.Strict (HashMap) import Data.HashMap.Strict (HashMap)
import Data.Map (Map)
import Data.Text (Text) import Data.Text (Text)
import qualified Data.HashMap.Strict as HashMap
import qualified Data.List as List
import qualified Database.PostgreSQL.Simple as PGS
import Gargantext.Core.Types import Gargantext.Core.Types
import Gargantext.Database.Prelude (runOpaQuery, Cmd, formatPGSQuery, runPGSQuery) import Gargantext.Database.Prelude (runOpaQuery, Cmd, formatPGSQuery, runPGSQuery)
import Gargantext.Database.Query.Join (leftJoin3) import Gargantext.Database.Query.Join (leftJoin3)
import Gargantext.Database.Query.Table.ContextNodeNgrams2 import Gargantext.Database.Query.Table.ContextNodeNgrams2
import Gargantext.Database.Query.Table.NodeNgrams (queryNodeNgramsTable)
import Gargantext.Database.Schema.Ngrams import Gargantext.Database.Schema.Ngrams
import Gargantext.Database.Schema.NodeNgrams import Gargantext.Database.Schema.NodeNgrams
import Gargantext.Database.Query.Table.NodeNgrams (queryNodeNgramsTable)
import Gargantext.Database.Schema.Prelude import Gargantext.Database.Schema.Prelude
import Gargantext.Database.Types import Gargantext.Database.Types
import Gargantext.Prelude import Gargantext.Prelude
import qualified Data.HashMap.Strict as HashMap
import qualified Data.List as List
import qualified Data.Map as Map
import qualified Database.PostgreSQL.Simple as PGS
queryNgramsTable :: Select NgramsRead queryNgramsTable :: Select NgramsRead
queryNgramsTable = selectTable ngramsTable queryNgramsTable = selectTable ngramsTable
...@@ -106,3 +108,28 @@ queryInsertNgrams = [sql| ...@@ -106,3 +108,28 @@ queryInsertNgrams = [sql|
FROM input_rows FROM input_rows
JOIN ngrams c USING (terms); -- columns of unique index JOIN ngrams c USING (terms); -- columns of unique index
|] |]
--------------------------------------------------------------------------
selectNgramsId :: [Text] -> Cmd err (Map NgramsId Text)
selectNgramsId ns =
if List.null ns
then pure Map.empty
else Map.fromList <$> map (\(Indexed i t) -> (i, t)) <$> (selectNgramsId' ns)
selectNgramsId' :: [Text] -> Cmd err [Indexed Int Text]
selectNgramsId' ns = runPGSQuery querySelectNgramsId ( PGS.Only
$ Values fields ns
)
where
fields = map (\t -> QualifiedIdentifier Nothing t) ["text"]
querySelectNgramsId :: PGS.Query
querySelectNgramsId = [sql|
WITH input_rows(terms) AS (?)
SELECT n.id, n.terms
FROM ngrams n
JOIN input_rows ir ON ir.terms = n.terms
GROUP BY n.terms, n.id
|]
...@@ -48,14 +48,14 @@ add_debug pId ns = formatPGSQuery queryAdd (Only $ Values fields inputData) ...@@ -48,14 +48,14 @@ add_debug pId ns = formatPGSQuery queryAdd (Only $ Values fields inputData)
-- | Input Tables: types of the tables -- | Input Tables: types of the tables
inputSqlTypes :: [Text] inputSqlTypes :: [Text]
inputSqlTypes = ["int4","int4","int4"] inputSqlTypes = ["int4","int4","int4","int4"]
-- | SQL query to add documents -- | SQL query to add documents
-- TODO return id of added documents only -- TODO return id of added documents only
queryAdd :: Query queryAdd :: Query
queryAdd = [sql| queryAdd = [sql|
WITH input_rows(node_id,context_id,category) AS (?) WITH input_rows(node_id,context_id,score,category) AS (?)
INSERT INTO nodes_contexts (node_id, context_id,category) INSERT INTO nodes_contexts (node_id, context_id,score,category)
SELECT * FROM input_rows SELECT * FROM input_rows
ON CONFLICT (node_id, context_id) DO NOTHING -- on unique index ON CONFLICT (node_id, context_id) DO NOTHING -- on unique index
RETURNING 1 RETURNING 1
...@@ -75,6 +75,7 @@ data InputData = InputData { inNode_id :: NodeId ...@@ -75,6 +75,7 @@ data InputData = InputData { inNode_id :: NodeId
instance ToRow InputData where instance ToRow InputData where
toRow inputData = [ toField (inNode_id inputData) toRow inputData = [ toField (inNode_id inputData)
, toField (inContext_id inputData) , toField (inContext_id inputData)
, toField (0 :: Int)
, toField (1 :: Int) , toField (1 :: Int)
] ]
...@@ -54,3 +54,6 @@ insertNodeNodeNgramsW nnnw = ...@@ -54,3 +54,6 @@ insertNodeNodeNgramsW nnnw =
, iOnConflict = (Just DoNothing) , iOnConflict = (Just DoNothing)
}) })
...@@ -11,14 +11,16 @@ Ngrams connection to the Database. ...@@ -11,14 +11,16 @@ Ngrams connection to the Database.
-} -}
{-# LANGUAGE Arrows #-} {-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE FunctionalDependencies #-} {-# LANGUAGE Arrows #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE QuasiQuotes #-} {-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE TemplateHaskell #-}
module Gargantext.Database.Schema.Ngrams module Gargantext.Database.Schema.Ngrams
where where
import Data.Maybe (fromMaybe)
import Data.HashMap.Strict (HashMap) import Data.HashMap.Strict (HashMap)
import Data.Hashable (Hashable) import Data.Hashable (Hashable)
import Codec.Serialise (Serialise()) import Codec.Serialise (Serialise())
...@@ -32,6 +34,7 @@ import Gargantext.Core.Types (TODO(..), Typed(..)) ...@@ -32,6 +34,7 @@ import Gargantext.Core.Types (TODO(..), Typed(..))
import Gargantext.Prelude import Gargantext.Prelude
import Servant (FromHttpApiData(..), Proxy(..), ToHttpApiData(..)) import Servant (FromHttpApiData(..), Proxy(..), ToHttpApiData(..))
import Text.Read (read) import Text.Read (read)
import Gargantext.Core (HasDBid(..))
import Gargantext.Database.Types import Gargantext.Database.Types
import Gargantext.Database.Schema.Prelude import Gargantext.Database.Schema.Prelude
import qualified Database.PostgreSQL.Simple as PGS import qualified Database.PostgreSQL.Simple as PGS
...@@ -82,6 +85,7 @@ data NgramsType = Authors | Institutes | Sources | NgramsTerms ...@@ -82,6 +85,7 @@ data NgramsType = Authors | Institutes | Sources | NgramsTerms
instance Serialise NgramsType instance Serialise NgramsType
ngramsTypes :: [NgramsType] ngramsTypes :: [NgramsType]
ngramsTypes = [minBound..] ngramsTypes = [minBound..]
...@@ -141,6 +145,16 @@ fromNgramsTypeId id = lookup id ...@@ -141,6 +145,16 @@ fromNgramsTypeId id = lookup id
| nt <- [minBound .. maxBound] :: [NgramsType] | nt <- [minBound .. maxBound] :: [NgramsType]
] ]
unNgramsTypeId :: NgramsTypeId -> Int
unNgramsTypeId (NgramsTypeId i) = i
toNgramsTypeId :: Int -> NgramsTypeId
toNgramsTypeId i = NgramsTypeId i
instance HasDBid NgramsType where
toDBid = unNgramsTypeId . ngramsTypeId
fromDBid = fromMaybe (panic "NgramsType id not indexed") . fromNgramsTypeId . toNgramsTypeId
------------------------------------------------------------------------ ------------------------------------------------------------------------
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | TODO put it in Gargantext.Core.Text.Ngrams -- | TODO put it in Gargantext.Core.Text.Ngrams
...@@ -160,6 +174,9 @@ instance FromField Ngrams where ...@@ -160,6 +174,9 @@ instance FromField Ngrams where
x <- fromField fld mdata x <- fromField fld mdata
pure $ text2ngrams x pure $ text2ngrams x
instance PGS.ToRow Text where
toRow t = [toField t]
text2ngrams :: Text -> Ngrams text2ngrams :: Text -> Ngrams
text2ngrams txt = UnsafeNgrams txt' $ length $ splitOn " " txt' text2ngrams txt = UnsafeNgrams txt' $ length $ splitOn " " txt'
where where
......
...@@ -57,7 +57,7 @@ extra-deps: ...@@ -57,7 +57,7 @@ extra-deps:
- git: https://github.com/delanoe/servant-static-th.git - git: https://github.com/delanoe/servant-static-th.git
commit: 8cb8aaf2962ad44d319fcea48442e4397b3c49e8 commit: 8cb8aaf2962ad44d319fcea48442e4397b3c49e8
- git: https://github.com/alpmestan/ekg-json.git - git: https://github.com/alpmestan/ekg-json.git
commit: c7bde4851a7cd41b3f3debf0c57f11bbcb11d698 commit: fd7e5d7325939103cd87d0dc592faf644160341c
# Databases libs # Databases libs
- git: https://github.com/delanoe/haskell-opaleye.git - git: https://github.com/delanoe/haskell-opaleye.git
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment