From f9c86cac314e6b71238973a4380d500c287eb3e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexandre=20Delano=C3=AB?= <devel+git@delanoe.org> Date: Thu, 9 Jan 2020 15:46:56 +0100 Subject: [PATCH] [DB] Master User Texts --- bin/gargantext-init/Main.hs | 4 +- devops/postgres/schema.sql | 80 ++++++++-------- src/Gargantext/Database/Flow.hs | 93 +++++++++++-------- src/Gargantext/Database/Flow/List.hs | 54 ++++++----- .../Database/Schema/NodeNodeNgrams.hs | 4 - .../Schema/Node_NodeNgramsNodeNgrams.hs | 2 +- 6 files changed, 130 insertions(+), 107 deletions(-) diff --git a/bin/gargantext-init/Main.hs b/bin/gargantext-init/Main.hs index d65cb3c8..fd438005 100644 --- a/bin/gargantext-init/Main.hs +++ b/bin/gargantext-init/Main.hs @@ -23,7 +23,7 @@ import Data.Either (Either(..)) import Data.Maybe (Maybe(..)) import System.Environment (getArgs) import Gargantext.Prelude -import Gargantext.Database.Flow (getOrMkRoot, getOrMkRootWithCorpus) +import Gargantext.Database.Flow (getOrMkRoot, getOrMk_RootWithCorpus) import Gargantext.Database.Schema.Node (getOrMkList) import Gargantext.Database.Utils (Cmd, ) import Gargantext.Database.Types.Node (CorpusId, RootId, HyperdataCorpus, ListId) @@ -48,7 +48,7 @@ main = do let initMaster :: Cmd GargError (UserId, RootId, CorpusId, ListId) initMaster = do - (masterUserId, masterRootId, masterCorpusId) <- getOrMkRootWithCorpus userMaster (Left corpusMasterName) (Nothing :: Maybe HyperdataCorpus) + (masterUserId, masterRootId, masterCorpusId) <- getOrMk_RootWithCorpus userMaster (Left corpusMasterName) (Nothing :: Maybe HyperdataCorpus) masterListId <- getOrMkList masterCorpusId masterUserId _ <- initTriggers masterListId pure (masterUserId, masterRootId, masterCorpusId, masterListId) diff --git a/devops/postgres/schema.sql b/devops/postgres/schema.sql index de75f43d..478d69fb 100644 --- a/devops/postgres/schema.sql +++ b/devops/postgres/schema.sql @@ -4,16 +4,16 @@ COMMENT ON EXTENSION plpgsql IS 'PL/pgSQL procedural language'; CREATE TABLE public.auth_user ( id SERIAL, - password character varying(128) NOT NULL, - last_login timestamp with time zone, - is_superuser boolean NOT NULL, - username character varying(150) NOT NULL, - first_name character varying(30) NOT NULL, - last_name character varying(30) NOT NULL, - email character varying(254) NOT NULL, - is_staff boolean NOT NULL, - is_active boolean NOT NULL, - date_joined timestamp with time zone DEFAULT now() NOT NULL, + password CHARACTER varying(128) NOT NULL, + last_login TIMESTAMP with time zone, + is_superuser BOOLEAN NOT NULL, + username CHARACTER varying(150) NOT NULL, + first_name CHARACTER varying(30) NOT NULL, + last_name CHARACTER varying(30) NOT NULL, + email CHARACTER varying(254) NOT NULL, + is_staff BOOLEAN NOT NULL, + is_active BOOLEAN NOT NULL, + date_joined TIMESTAMP with time zone DEFAULT now() NOT NULL, PRIMARY KEY (id) ); @@ -23,11 +23,11 @@ ALTER TABLE public.auth_user OWNER TO gargantua; -- TODO typename -> type_id CREATE TABLE public.nodes ( id SERIAL, - typename integer NOT NULL, - user_id integer NOT NULL, - parent_id integer REFERENCES public.nodes(id) ON DELETE CASCADE , - name character varying(255) DEFAULT ''::character varying NOT NULL, - date timestamp with time zone DEFAULT now() NOT NULL, + typename INTEGER NOT NULL, + user_id INTEGER NOT NULL, + parent_id INTEGER REFERENCES public.nodes(id) ON DELETE CASCADE , + name CHARACTER varying(255) DEFAULT ''::character varying NOT NULL, + date TIMESTAMP with time zone DEFAULT now() NOT NULL, hyperdata jsonb DEFAULT '{}'::jsonb NOT NULL, search tsvector, PRIMARY KEY (id), @@ -37,8 +37,8 @@ ALTER TABLE public.nodes OWNER TO gargantua; CREATE TABLE public.ngrams ( id SERIAL, - terms character varying(255), - n integer, + terms CHARACTER varying(255), + n INTEGER, PRIMARY KEY (id) ); ALTER TABLE public.ngrams OWNER TO gargantua; @@ -46,13 +46,13 @@ ALTER TABLE public.ngrams OWNER TO gargantua; -------------------------------------------------------------- CREATE TABLE public.node_ngrams ( id SERIAL, - node_id integer NOT NULL, - node_subtype integer, - ngrams_id integer NOT NULL, - ngrams_type integer, -- change to ngrams_field? (no for pedagogic reason) - ngrams_field integer, - ngrams_tag integer, - ngrams_class integer, + node_id INTEGER NOT NULL, + node_subtype INTEGER, + ngrams_id INTEGER NOT NULL, + ngrams_type INTEGER, -- change to ngrams_field? (no for pedagogic reason) + ngrams_field INTEGER, + ngrams_tag INTEGER, + ngrams_class INTEGER, weight double precision, PRIMARY KEY (id), FOREIGN KEY (node_id) REFERENCES public.nodes(id) ON DELETE CASCADE, @@ -60,17 +60,17 @@ CREATE TABLE public.node_ngrams ( ); ALTER TABLE public.node_ngrams OWNER TO gargantua; -CREATE TABLE public.node_ngrams_ngrams ( - node_id integer NOT NULL, - node_ngrams1_id integer NOT NULL, - node_ngrams2_id integer NOT NULL, +CREATE TABLE public.node_nodengrams_nodengrams ( + node_id INTEGER NOT NULL, + node_ngrams1_id INTEGER NOT NULL, + node_ngrams2_id INTEGER NOT NULL, weight double precision, FOREIGN KEY (node_id) REFERENCES public.nodes(id) ON DELETE CASCADE, FOREIGN KEY (node_ngrams1_id) REFERENCES public.node_ngrams(id) ON DELETE CASCADE, FOREIGN KEY (node_ngrams2_id) REFERENCES public.node_ngrams(id) ON DELETE CASCADE, PRIMARY KEY (node_id, node_ngrams1_id, node_ngrams2_id) ); -ALTER TABLE public.node_ngrams_ngrams OWNER TO gargantua; +ALTER TABLE public.node_nodengrams_nodengrams OWNER TO gargantua; -------------------------------------------------------------- -------------------------------------------------------------- @@ -88,28 +88,34 @@ ALTER TABLE public.node_ngrams_ngrams OWNER TO gargantua; --------------------------------------------------------------- -- TODO nodes_nodes(node1_id int, node2_id int, edge_type int , weight real) CREATE TABLE public.nodes_nodes ( - node1_id integer NOT NULL REFERENCES public.nodes(id) ON DELETE CASCADE, - node2_id integer NOT NULL REFERENCES public.nodes(id) ON DELETE CASCADE, - score real, - category integer, + node1_id INTEGER NOT NULL REFERENCES public.nodes(id) ON DELETE CASCADE, + node2_id INTEGER NOT NULL REFERENCES public.nodes(id) ON DELETE CASCADE, + score REAL, + category INTEGER, PRIMARY KEY (node1_id,node2_id) ); ALTER TABLE public.nodes_nodes OWNER TO gargantua; --------------------------------------------------------------- --- TODO should reference "id" of nodes_nodes (instead of node1_id, node2_id) CREATE TABLE public.node_node_ngrams ( node1_id INTEGER NOT NULL REFERENCES public.nodes (id) ON DELETE CASCADE, --- here id to node_ngrams node2_id INTEGER NOT NULL REFERENCES public.nodes (id) ON DELETE CASCADE, ngrams_id INTEGER NOT NULL REFERENCES public.ngrams (id) ON DELETE CASCADE, ngrams_type INTEGER, ---ngrams_tag INTEGER, ---ngrams_class INTEGER, weight double precision, PRIMARY KEY (node1_id, node2_id, ngrams_id, ngrams_type) ); ALTER TABLE public.node_node_ngrams OWNER TO gargantua; + + +CREATE TABLE public.node_node_ngrams2 ( +node_id INTEGER NOT NULL REFERENCES public.nodes (id) ON DELETE CASCADE, +node_ngrams_id INTEGER NOT NULL REFERENCES public.node_ngrams (id) ON DELETE CASCADE, +weight double precision, +PRIMARY KEY (node_id, node_ngrams_id, ngrams_field) +); +ALTER TABLE public.node_node_ngrams2 OWNER TO gargantua; + -------------------------------------------------------------- --CREATE TABLE public.nodes_ngrams_repo ( diff --git a/src/Gargantext/Database/Flow.hs b/src/Gargantext/Database/Flow.hs index 5cdd0030..b913d923 100644 --- a/src/Gargantext/Database/Flow.hs +++ b/src/Gargantext/Database/Flow.hs @@ -33,12 +33,13 @@ module Gargantext.Database.Flow -- (flowDatabase, ngrams2list) , flowCorpus , flowCorpusSearchInDatabase , getOrMkRoot - , getOrMkRootWithCorpus + , getOrMk_RootWithCorpus , flowAnnuaire ) where import Prelude (String) import Data.Either +import Data.Traversable (traverse) import Debug.Trace (trace) import Control.Lens ((^.), view, _Just) import Control.Monad.IO.Class (liftIO) @@ -59,8 +60,10 @@ import Gargantext.Database.Flow.Types import Gargantext.Database.Node.Contact -- (HyperdataContact(..), ContactWho(..)) import Gargantext.Database.Node.Document.Insert -- (insertDocuments, ReturnId(..), addUniqIdsDoc, addUniqIdsContact, ToDbData(..)) import Gargantext.Database.Root (getRoot) + import Gargantext.Database.Schema.Ngrams -- (insertNgrams, Ngrams(..), NgramsIndexed(..), indexNgrams, NgramsType(..), text2ngrams, ngramsTypeId) import Gargantext.Database.Schema.Node -- (mkRoot, mkCorpus, getOrMkList, mkGraph, {-mkPhylo,-} mkDashboard, mkAnnuaire, getCorporaWithParentId, HasNodeError, NodeError(..), nodeError) + import Gargantext.Database.Schema.User (getUser, UserLight(..)) import Gargantext.Database.TextSearch (searchInDatabase) import Gargantext.Database.Types.Node -- (HyperdataDocument(..), NodeType(..), NodeId, UserId, ListId, CorpusId, RootId, MasterCorpusId, MasterUserId) @@ -150,7 +153,7 @@ flowCorpusSearchInDatabase :: FlowCmdM env err m -> Text -> m CorpusId flowCorpusSearchInDatabase u la q = do - (_masterUserId, _masterRootId, cId) <- getOrMkRootWithCorpus + (_masterUserId, _masterRootId, cId) <- getOrMk_RootWithCorpus userMaster (Left "") (Nothing :: Maybe HyperdataCorpus) @@ -165,7 +168,7 @@ _flowCorpusSearchInDatabaseApi :: FlowCmdM env err m -> Text -> m CorpusId _flowCorpusSearchInDatabaseApi u la q = do - (_masterUserId, _masterRootId, cId) <- getOrMkRootWithCorpus + (_masterUserId, _masterRootId, cId) <- getOrMk_RootWithCorpus userMaster (Left "") (Nothing :: Maybe HyperdataCorpus) @@ -189,7 +192,7 @@ flow :: (FlowCmdM env err m, FlowCorpus a, MkCorpus c) -> [[a]] -> m CorpusId flow c u cn la docs = do - ids <- mapM (insertMasterDocs c la ) docs + ids <- traverse (insertMasterDocs c la ) docs flowCorpusUser (la ^. tt_lang) u cn c (concat ids) flowCorpus :: (FlowCmdM env err m, FlowCorpus a) @@ -210,7 +213,7 @@ flowCorpusUser :: (FlowCmdM env err m, MkCorpus c) -> m CorpusId flowCorpusUser l userName corpusName ctype ids = do -- User Flow - (userId, _rootId, userCorpusId) <- getOrMkRootWithCorpus userName corpusName ctype + (userId, _rootId, userCorpusId) <- getOrMk_RootWithCorpus userName corpusName ctype listId <- getOrMkList userCorpusId userId _cooc <- mkNode NodeListCooc listId userId -- TODO: check if present already, ignore @@ -220,10 +223,9 @@ flowCorpusUser l userName corpusName ctype ids = do -- printDebug "Node Text Id" tId -- User List Flow - --{- - (_masterUserId, _masterRootId, masterCorpusId) <- getOrMkRootWithCorpus userMaster (Left "") ctype - ngs <- buildNgramsLists l 2 3 (StopSize 3) userCorpusId masterCorpusId - _userListId <- flowList masterCorpusId listId ngs + (_masterUserId, _masterRootId, masterCorpusId) <- getOrMk_RootWithCorpus userMaster (Left "") ctype + ngs <- buildNgramsLists l 2 3 (StopSize 3) userCorpusId masterCorpusId + _userListId <- flowList_DbRepo listId ngs --mastListId <- getOrMkList masterCorpusId masterUserId -- _ <- insertOccsUpdates userCorpusId mastListId -- printDebug "userListId" userListId @@ -231,8 +233,6 @@ flowCorpusUser l userName corpusName ctype ids = do _ <- mkDashboard userCorpusId userId _ <- mkGraph userCorpusId userId --_ <- mkPhylo userCorpusId userId - --} - -- Annuaire Flow -- _ <- mkAnnuaire rootUserId userId @@ -248,38 +248,50 @@ insertMasterDocs :: ( FlowCmdM env err m -> [a] -> m [DocId] insertMasterDocs c lang hs = do - (masterUserId, _, masterCorpusId) <- getOrMkRootWithCorpus userMaster (Left corpusMasterName) c + (masterUserId, _, masterCorpusId) <- getOrMk_RootWithCorpus userMaster (Left corpusMasterName) c -- TODO Type NodeDocumentUnicised - let hs' = map addUniqId hs - ids <- insertDb masterUserId masterCorpusId hs' - let documentsWithId = mergeData (toInserted ids) (Map.fromList $ map viewUniqId' hs') - + let docs = map addUniqId hs + ids <- insertDb masterUserId masterCorpusId docs let - fixLang (Unsupervised l n s m) = Unsupervised l n s m' - where - m' = case m of - Nothing -> trace ("buildTries here" :: String) - $ Just - $ buildTries n ( fmap toToken $ uniText - $ Text.intercalate " . " - $ List.concat - $ map hasText documentsWithId - ) - just_m -> just_m - fixLang l = l - - lang' = fixLang lang + ids' = map reId ids + documentsWithId = mergeData (toInserted ids) (Map.fromList $ map viewUniqId' docs) + _ <- Doc.add masterCorpusId ids' + -- TODO + -- create a corpus with database name (CSV or PubMed) + -- add documents to the corpus (create node_node link) + -- this will enable global database monitoring + -- maps :: IO Map Ngrams (Map NgramsType (Map NodeId Int)) - maps <- mapNodeIdNgrams <$> documentIdWithNgrams (extractNgramsT lang') documentsWithId + maps <- mapNodeIdNgrams + <$> documentIdWithNgrams (extractNgramsT $ withLang lang documentsWithId) documentsWithId + + lId <- getOrMkList masterCorpusId masterUserId terms2id <- insertNgrams $ Map.keys maps let indexedNgrams = Map.mapKeys (indexNgrams terms2id) maps - lId <- getOrMkList masterCorpusId masterUserId _cooc <- mkNode NodeListCooc lId masterUserId _ <- insertDocNgrams lId indexedNgrams - pure $ map reId ids + pure ids' + + +withLang :: HasText a => TermType Lang + -> [DocumentWithId a] + -> TermType Lang +withLang (Unsupervised l n s m) ns = Unsupervised l n s m' + where + m' = case m of + Nothing -> trace ("buildTries here" :: String) + $ Just + $ buildTries n ( fmap toToken $ uniText + $ Text.intercalate " . " + $ List.concat + $ map hasText ns + ) + just_m -> just_m +withLang l _ = l + type CorpusName = Text @@ -306,12 +318,12 @@ getOrMkRoot username = do pure (userId, rootId) -getOrMkRootWithCorpus :: (HasNodeError err, MkCorpus a) +getOrMk_RootWithCorpus :: (HasNodeError err, MkCorpus a) => Username -> Either CorpusName [CorpusId] -> Maybe a -> Cmd err (UserId, RootId, CorpusId) -getOrMkRootWithCorpus username cName c = do +getOrMk_RootWithCorpus username cName c = do (userId, rootId) <- getOrMkRoot username corpusId'' <- if username == userMaster then do @@ -322,7 +334,12 @@ getOrMkRootWithCorpus username cName c = do corpusId' <- if corpusId'' /= [] then pure corpusId'' - else mk (Just $ fromLeft "Default" cName) c rootId userId + else do + c' <- mk (Just $ fromLeft "Default" cName) c rootId userId + _tId <- case head c' of + Nothing -> pure [0] + Just c'' -> mkNode NodeTexts c'' userId + pure c' corpusId <- maybe (nodeError NoCorpusFound) pure (head corpusId') @@ -369,7 +386,7 @@ instance ExtractNgramsT HyperdataContact let authors = map text2ngrams $ maybe ["Nothing"] (\a -> [a]) $ view (hc_who . _Just . cw_lastName) hc' - + pure $ Map.fromList $ [(a', Map.singleton Authors 1) | a' <- authors ] instance HasText HyperdataDocument @@ -425,7 +442,7 @@ documentIdWithNgrams :: HasNodeError err -> Cmd err (Map Ngrams (Map NgramsType Int))) -> [DocumentWithId a] -> Cmd err [DocumentIdWithNgrams a] -documentIdWithNgrams f = mapM toDocumentIdWithNgrams +documentIdWithNgrams f = traverse toDocumentIdWithNgrams where toDocumentIdWithNgrams d = do e <- f $ documentData d diff --git a/src/Gargantext/Database/Flow/List.hs b/src/Gargantext/Database/Flow/List.hs index a45878dc..184f0eb8 100644 --- a/src/Gargantext/Database/Flow/List.hs +++ b/src/Gargantext/Database/Flow/List.hs @@ -50,13 +50,26 @@ mapNodeIdNgrams = Map.unionsWith (Map.unionWith (Map.unionWith (+))) . fmap f nId = documentId $ documentWithId d ------------------------------------------------------------------------ -listInsert :: FlowCmdM env err m - => ListId - -> Map NgramsType [NgramsElement] - -> m () -listInsert lId ngs = mapM_ (\(typeList, ngElmts) - -> putListNgrams lId typeList ngElmts - ) $ toList ngs +flowList_DbRepo :: FlowCmdM env err m + => ListId + -> Map NgramsType [NgramsElement] + -> m ListId +flowList_DbRepo lId ngs = do + -- printDebug "listId flowList" lId + mapCgramsId <- listInsertDb lId toNodeNgramsW (Map.toList ngs) + let toInsert = catMaybes [ (,) <$> (getCgramsId mapCgramsId ntype <$> parent) + <*> getCgramsId mapCgramsId ntype ngram + | (ntype, ngs') <- Map.toList ngs + , NgramsElement ngram _ _ _ _ parent _ <- ngs' + ] + -- Inserting groups of ngrams + _r <- insert_Node_NodeNgrams_NodeNgrams + $ map (\(a,b) -> Node_NodeNgrams_NodeNgrams lId a b Nothing) toInsert + listInsert lId ngs + --trace (show $ List.filter (\n -> _ne_ngrams n == "versatile") $ List.concat $ Map.elems ngs) $ listInsert lId ngs + pure lId +------------------------------------------------------------------------ +------------------------------------------------------------------------ toNodeNgramsW :: ListId -> [(NgramsType, [NgramsElement])] @@ -71,22 +84,13 @@ toNodeNgramsW l ngs = List.concat $ map (toNodeNgramsW' l) ngs (NgramsElement ngrams_terms' _size list_type _occ _root _parent _children) <- elms ] -flowList :: FlowCmdM env err m - => CorpusId - -> ListId - -> Map NgramsType [NgramsElement] - -> m ListId -flowList _cId lId ngs = do - -- printDebug "listId flowList" lId - -- TODO save in database - mapCgramsId <- listInsertDb lId toNodeNgramsW (Map.toList ngs) - let toInsert = catMaybes [ (,) <$> (getCgramsId mapCgramsId ntype <$> parent) - <*> getCgramsId mapCgramsId ntype ngram - | (ntype, ngs') <- Map.toList ngs - , NgramsElement ngram _ _ _ _ parent _ <- ngs' - ] - _r <- insert_Node_NodeNgrams_NodeNgrams $ map (\(a,b) -> Node_NodeNgrams_NodeNgrams lId a b Nothing) toInsert - listInsert lId ngs - --trace (show $ List.filter (\n -> _ne_ngrams n == "versatile") $ List.concat $ Map.elems ngs) $ listInsert lId ngs - pure lId +listInsert :: FlowCmdM env err m + => ListId + -> Map NgramsType [NgramsElement] + -> m () +listInsert lId ngs = mapM_ (\(typeList, ngElmts) + -> putListNgrams lId typeList ngElmts + ) $ toList ngs +------------------------------------------------------------------------ +------------------------------------------------------------------------ diff --git a/src/Gargantext/Database/Schema/NodeNodeNgrams.hs b/src/Gargantext/Database/Schema/NodeNodeNgrams.hs index d63a0e6d..dfe13912 100644 --- a/src/Gargantext/Database/Schema/NodeNodeNgrams.hs +++ b/src/Gargantext/Database/Schema/NodeNodeNgrams.hs @@ -31,7 +31,6 @@ import Gargantext.Database.Schema.Node (pgNodeId) import Gargantext.Database.Types.Node import Opaleye - data NodeNodeNgramsPoly n1 n2 ngrams_id ngt w = NodeNodeNgrams { _nnng_node1_id :: n1 , _nnng_node2_id :: n2 @@ -40,7 +39,6 @@ data NodeNodeNgramsPoly n1 n2 ngrams_id ngt w , _nnng_weight :: w } deriving (Show) - type NodeNodeNgramsWrite = NodeNodeNgramsPoly (Column PGInt4 ) (Column PGInt4 ) @@ -83,7 +81,6 @@ nodeNodeNgramsTable = Table "node_node_ngrams" queryNodeNodeNgramsTable :: Query NodeNodeNgramsRead queryNodeNodeNgramsTable = queryTable nodeNodeNgramsTable - -- | Insert utils insertNodeNodeNgrams :: [NodeNodeNgrams] -> Cmd err Int insertNodeNodeNgrams = insertNodeNodeNgramsW @@ -105,4 +102,3 @@ insertNodeNodeNgramsW nnnw = , iOnConflict = (Just DoNothing) }) - diff --git a/src/Gargantext/Database/Schema/Node_NodeNgramsNodeNgrams.hs b/src/Gargantext/Database/Schema/Node_NodeNgramsNodeNgrams.hs index d66defe6..927271a6 100644 --- a/src/Gargantext/Database/Schema/Node_NodeNgramsNodeNgrams.hs +++ b/src/Gargantext/Database/Schema/Node_NodeNgramsNodeNgrams.hs @@ -78,7 +78,7 @@ $(makeLensesWith abbreviatedFields node_NodeNgrams_NodeNgrams_Table :: Table Node_NodeNgrams_NodeNgrams_Write Node_NodeNgrams_NodeNgrams_Read node_NodeNgrams_NodeNgrams_Table = - Table "node_ngrams_ngrams" + Table "node_nodengrams_nodengrams" ( pNode_NodeNgrams_NodeNgrams Node_NodeNgrams_NodeNgrams { _nnn_node_id = required "node_id" , _nnn_nng1_id = optional "node_ngrams1_id" -- 2.21.0