Commit e728a20d authored by Alexandre Delanoë's avatar Alexandre Delanoë

[DB] Master User Texts

parent 0e1ef893
......@@ -23,7 +23,7 @@ import Data.Either (Either(..))
import Data.Maybe (Maybe(..))
import System.Environment (getArgs)
import Gargantext.Prelude
import Gargantext.Database.Flow (getOrMkRoot, getOrMkRootWithCorpus)
import Gargantext.Database.Flow (getOrMkRoot, getOrMk_RootWithCorpus)
import Gargantext.Database.Schema.Node (getOrMkList)
import Gargantext.Database.Utils (Cmd, )
import Gargantext.Database.Types.Node (CorpusId, RootId, HyperdataCorpus, ListId)
......@@ -48,7 +48,7 @@ main = do
let
initMaster :: Cmd GargError (UserId, RootId, CorpusId, ListId)
initMaster = do
(masterUserId, masterRootId, masterCorpusId) <- getOrMkRootWithCorpus userMaster (Left corpusMasterName) (Nothing :: Maybe HyperdataCorpus)
(masterUserId, masterRootId, masterCorpusId) <- getOrMk_RootWithCorpus userMaster (Left corpusMasterName) (Nothing :: Maybe HyperdataCorpus)
masterListId <- getOrMkList masterCorpusId masterUserId
_ <- initTriggers masterListId
pure (masterUserId, masterRootId, masterCorpusId, masterListId)
......
......@@ -4,16 +4,16 @@ COMMENT ON EXTENSION plpgsql IS 'PL/pgSQL procedural language';
CREATE TABLE public.auth_user (
id SERIAL,
password character varying(128) NOT NULL,
last_login timestamp with time zone,
is_superuser boolean NOT NULL,
username character varying(150) NOT NULL,
first_name character varying(30) NOT NULL,
last_name character varying(30) NOT NULL,
email character varying(254) NOT NULL,
is_staff boolean NOT NULL,
is_active boolean NOT NULL,
date_joined timestamp with time zone DEFAULT now() NOT NULL,
password CHARACTER varying(128) NOT NULL,
last_login TIMESTAMP with time zone,
is_superuser BOOLEAN NOT NULL,
username CHARACTER varying(150) NOT NULL,
first_name CHARACTER varying(30) NOT NULL,
last_name CHARACTER varying(30) NOT NULL,
email CHARACTER varying(254) NOT NULL,
is_staff BOOLEAN NOT NULL,
is_active BOOLEAN NOT NULL,
date_joined TIMESTAMP with time zone DEFAULT now() NOT NULL,
PRIMARY KEY (id)
);
......@@ -23,11 +23,11 @@ ALTER TABLE public.auth_user OWNER TO gargantua;
-- TODO typename -> type_id
CREATE TABLE public.nodes (
id SERIAL,
typename integer NOT NULL,
user_id integer NOT NULL,
parent_id integer REFERENCES public.nodes(id) ON DELETE CASCADE ,
name character varying(255) DEFAULT ''::character varying NOT NULL,
date timestamp with time zone DEFAULT now() NOT NULL,
typename INTEGER NOT NULL,
user_id INTEGER NOT NULL,
parent_id INTEGER REFERENCES public.nodes(id) ON DELETE CASCADE ,
name CHARACTER varying(255) DEFAULT ''::character varying NOT NULL,
date TIMESTAMP with time zone DEFAULT now() NOT NULL,
hyperdata jsonb DEFAULT '{}'::jsonb NOT NULL,
search tsvector,
PRIMARY KEY (id),
......@@ -37,8 +37,8 @@ ALTER TABLE public.nodes OWNER TO gargantua;
CREATE TABLE public.ngrams (
id SERIAL,
terms character varying(255),
n integer,
terms CHARACTER varying(255),
n INTEGER,
PRIMARY KEY (id)
);
ALTER TABLE public.ngrams OWNER TO gargantua;
......@@ -46,13 +46,13 @@ ALTER TABLE public.ngrams OWNER TO gargantua;
--------------------------------------------------------------
CREATE TABLE public.node_ngrams (
id SERIAL,
node_id integer NOT NULL,
node_subtype integer,
ngrams_id integer NOT NULL,
ngrams_type integer, -- change to ngrams_field? (no for pedagogic reason)
ngrams_field integer,
ngrams_tag integer,
ngrams_class integer,
node_id INTEGER NOT NULL,
node_subtype INTEGER,
ngrams_id INTEGER NOT NULL,
ngrams_type INTEGER, -- change to ngrams_field? (no for pedagogic reason)
ngrams_field INTEGER,
ngrams_tag INTEGER,
ngrams_class INTEGER,
weight double precision,
PRIMARY KEY (id),
FOREIGN KEY (node_id) REFERENCES public.nodes(id) ON DELETE CASCADE,
......@@ -60,17 +60,17 @@ CREATE TABLE public.node_ngrams (
);
ALTER TABLE public.node_ngrams OWNER TO gargantua;
CREATE TABLE public.node_ngrams_ngrams (
node_id integer NOT NULL,
node_ngrams1_id integer NOT NULL,
node_ngrams2_id integer NOT NULL,
CREATE TABLE public.node_nodengrams_nodengrams (
node_id INTEGER NOT NULL,
node_ngrams1_id INTEGER NOT NULL,
node_ngrams2_id INTEGER NOT NULL,
weight double precision,
FOREIGN KEY (node_id) REFERENCES public.nodes(id) ON DELETE CASCADE,
FOREIGN KEY (node_ngrams1_id) REFERENCES public.node_ngrams(id) ON DELETE CASCADE,
FOREIGN KEY (node_ngrams2_id) REFERENCES public.node_ngrams(id) ON DELETE CASCADE,
PRIMARY KEY (node_id, node_ngrams1_id, node_ngrams2_id)
);
ALTER TABLE public.node_ngrams_ngrams OWNER TO gargantua;
ALTER TABLE public.node_nodengrams_nodengrams OWNER TO gargantua;
--------------------------------------------------------------
--------------------------------------------------------------
......@@ -88,28 +88,34 @@ ALTER TABLE public.node_ngrams_ngrams OWNER TO gargantua;
---------------------------------------------------------------
-- TODO nodes_nodes(node1_id int, node2_id int, edge_type int , weight real)
CREATE TABLE public.nodes_nodes (
node1_id integer NOT NULL REFERENCES public.nodes(id) ON DELETE CASCADE,
node2_id integer NOT NULL REFERENCES public.nodes(id) ON DELETE CASCADE,
score real,
category integer,
node1_id INTEGER NOT NULL REFERENCES public.nodes(id) ON DELETE CASCADE,
node2_id INTEGER NOT NULL REFERENCES public.nodes(id) ON DELETE CASCADE,
score REAL,
category INTEGER,
PRIMARY KEY (node1_id,node2_id)
);
ALTER TABLE public.nodes_nodes OWNER TO gargantua;
---------------------------------------------------------------
-- TODO should reference "id" of nodes_nodes (instead of node1_id, node2_id)
CREATE TABLE public.node_node_ngrams (
node1_id INTEGER NOT NULL REFERENCES public.nodes (id) ON DELETE CASCADE,
-- here id to node_ngrams
node2_id INTEGER NOT NULL REFERENCES public.nodes (id) ON DELETE CASCADE,
ngrams_id INTEGER NOT NULL REFERENCES public.ngrams (id) ON DELETE CASCADE,
ngrams_type INTEGER,
--ngrams_tag INTEGER,
--ngrams_class INTEGER,
weight double precision,
PRIMARY KEY (node1_id, node2_id, ngrams_id, ngrams_type)
);
ALTER TABLE public.node_node_ngrams OWNER TO gargantua;
CREATE TABLE public.node_node_ngrams2 (
node_id INTEGER NOT NULL REFERENCES public.nodes (id) ON DELETE CASCADE,
node_ngrams_id INTEGER NOT NULL REFERENCES public.node_ngrams (id) ON DELETE CASCADE,
weight double precision,
PRIMARY KEY (node_id, node_ngrams_id, ngrams_field)
);
ALTER TABLE public.node_node_ngrams2 OWNER TO gargantua;
--------------------------------------------------------------
--CREATE TABLE public.nodes_ngrams_repo (
......
......@@ -33,12 +33,13 @@ module Gargantext.Database.Flow -- (flowDatabase, ngrams2list)
, flowCorpus
, flowCorpusSearchInDatabase
, getOrMkRoot
, getOrMkRootWithCorpus
, getOrMk_RootWithCorpus
, flowAnnuaire
)
where
import Prelude (String)
import Data.Either
import Data.Traversable (traverse)
import Debug.Trace (trace)
import Control.Lens ((^.), view, _Just)
import Control.Monad.IO.Class (liftIO)
......@@ -59,8 +60,10 @@ import Gargantext.Database.Flow.Types
import Gargantext.Database.Node.Contact -- (HyperdataContact(..), ContactWho(..))
import Gargantext.Database.Node.Document.Insert -- (insertDocuments, ReturnId(..), addUniqIdsDoc, addUniqIdsContact, ToDbData(..))
import Gargantext.Database.Root (getRoot)
import Gargantext.Database.Schema.Ngrams -- (insertNgrams, Ngrams(..), NgramsIndexed(..), indexNgrams, NgramsType(..), text2ngrams, ngramsTypeId)
import Gargantext.Database.Schema.Node -- (mkRoot, mkCorpus, getOrMkList, mkGraph, {-mkPhylo,-} mkDashboard, mkAnnuaire, getCorporaWithParentId, HasNodeError, NodeError(..), nodeError)
import Gargantext.Database.Schema.User (getUser, UserLight(..))
import Gargantext.Database.TextSearch (searchInDatabase)
import Gargantext.Database.Types.Node -- (HyperdataDocument(..), NodeType(..), NodeId, UserId, ListId, CorpusId, RootId, MasterCorpusId, MasterUserId)
......@@ -150,7 +153,7 @@ flowCorpusSearchInDatabase :: FlowCmdM env err m
-> Text
-> m CorpusId
flowCorpusSearchInDatabase u la q = do
(_masterUserId, _masterRootId, cId) <- getOrMkRootWithCorpus
(_masterUserId, _masterRootId, cId) <- getOrMk_RootWithCorpus
userMaster
(Left "")
(Nothing :: Maybe HyperdataCorpus)
......@@ -165,7 +168,7 @@ _flowCorpusSearchInDatabaseApi :: FlowCmdM env err m
-> Text
-> m CorpusId
_flowCorpusSearchInDatabaseApi u la q = do
(_masterUserId, _masterRootId, cId) <- getOrMkRootWithCorpus
(_masterUserId, _masterRootId, cId) <- getOrMk_RootWithCorpus
userMaster
(Left "")
(Nothing :: Maybe HyperdataCorpus)
......@@ -189,7 +192,7 @@ flow :: (FlowCmdM env err m, FlowCorpus a, MkCorpus c)
-> [[a]]
-> m CorpusId
flow c u cn la docs = do
ids <- mapM (insertMasterDocs c la ) docs
ids <- traverse (insertMasterDocs c la ) docs
flowCorpusUser (la ^. tt_lang) u cn c (concat ids)
flowCorpus :: (FlowCmdM env err m, FlowCorpus a)
......@@ -210,7 +213,7 @@ flowCorpusUser :: (FlowCmdM env err m, MkCorpus c)
-> m CorpusId
flowCorpusUser l userName corpusName ctype ids = do
-- User Flow
(userId, _rootId, userCorpusId) <- getOrMkRootWithCorpus userName corpusName ctype
(userId, _rootId, userCorpusId) <- getOrMk_RootWithCorpus userName corpusName ctype
listId <- getOrMkList userCorpusId userId
_cooc <- mkNode NodeListCooc listId userId
-- TODO: check if present already, ignore
......@@ -220,10 +223,9 @@ flowCorpusUser l userName corpusName ctype ids = do
-- printDebug "Node Text Id" tId
-- User List Flow
--{-
(_masterUserId, _masterRootId, masterCorpusId) <- getOrMkRootWithCorpus userMaster (Left "") ctype
ngs <- buildNgramsLists l 2 3 (StopSize 3) userCorpusId masterCorpusId
_userListId <- flowList masterCorpusId listId ngs
(_masterUserId, _masterRootId, masterCorpusId) <- getOrMk_RootWithCorpus userMaster (Left "") ctype
ngs <- buildNgramsLists l 2 3 (StopSize 3) userCorpusId masterCorpusId
_userListId <- flowList_DbRepo listId ngs
--mastListId <- getOrMkList masterCorpusId masterUserId
-- _ <- insertOccsUpdates userCorpusId mastListId
-- printDebug "userListId" userListId
......@@ -231,8 +233,6 @@ flowCorpusUser l userName corpusName ctype ids = do
_ <- mkDashboard userCorpusId userId
_ <- mkGraph userCorpusId userId
--_ <- mkPhylo userCorpusId userId
--}
-- Annuaire Flow
-- _ <- mkAnnuaire rootUserId userId
......@@ -248,38 +248,50 @@ insertMasterDocs :: ( FlowCmdM env err m
-> [a]
-> m [DocId]
insertMasterDocs c lang hs = do
(masterUserId, _, masterCorpusId) <- getOrMkRootWithCorpus userMaster (Left corpusMasterName) c
(masterUserId, _, masterCorpusId) <- getOrMk_RootWithCorpus userMaster (Left corpusMasterName) c
-- TODO Type NodeDocumentUnicised
let hs' = map addUniqId hs
ids <- insertDb masterUserId masterCorpusId hs'
let documentsWithId = mergeData (toInserted ids) (Map.fromList $ map viewUniqId' hs')
let docs = map addUniqId hs
ids <- insertDb masterUserId masterCorpusId docs
let
fixLang (Unsupervised l n s m) = Unsupervised l n s m'
where
m' = case m of
Nothing -> trace ("buildTries here" :: String)
$ Just
$ buildTries n ( fmap toToken $ uniText
$ Text.intercalate " . "
$ List.concat
$ map hasText documentsWithId
)
just_m -> just_m
fixLang l = l
lang' = fixLang lang
ids' = map reId ids
documentsWithId = mergeData (toInserted ids) (Map.fromList $ map viewUniqId' docs)
_ <- Doc.add masterCorpusId ids'
-- TODO
-- create a corpus with database name (CSV or PubMed)
-- add documents to the corpus (create node_node link)
-- this will enable global database monitoring
-- maps :: IO Map Ngrams (Map NgramsType (Map NodeId Int))
maps <- mapNodeIdNgrams <$> documentIdWithNgrams (extractNgramsT lang') documentsWithId
maps <- mapNodeIdNgrams
<$> documentIdWithNgrams (extractNgramsT $ withLang lang documentsWithId) documentsWithId
lId <- getOrMkList masterCorpusId masterUserId
terms2id <- insertNgrams $ Map.keys maps
let indexedNgrams = Map.mapKeys (indexNgrams terms2id) maps
lId <- getOrMkList masterCorpusId masterUserId
_cooc <- mkNode NodeListCooc lId masterUserId
_ <- insertDocNgrams lId indexedNgrams
pure $ map reId ids
pure ids'
withLang :: HasText a => TermType Lang
-> [DocumentWithId a]
-> TermType Lang
withLang (Unsupervised l n s m) ns = Unsupervised l n s m'
where
m' = case m of
Nothing -> trace ("buildTries here" :: String)
$ Just
$ buildTries n ( fmap toToken $ uniText
$ Text.intercalate " . "
$ List.concat
$ map hasText ns
)
just_m -> just_m
withLang l _ = l
type CorpusName = Text
......@@ -306,12 +318,12 @@ getOrMkRoot username = do
pure (userId, rootId)
getOrMkRootWithCorpus :: (HasNodeError err, MkCorpus a)
getOrMk_RootWithCorpus :: (HasNodeError err, MkCorpus a)
=> Username
-> Either CorpusName [CorpusId]
-> Maybe a
-> Cmd err (UserId, RootId, CorpusId)
getOrMkRootWithCorpus username cName c = do
getOrMk_RootWithCorpus username cName c = do
(userId, rootId) <- getOrMkRoot username
corpusId'' <- if username == userMaster
then do
......@@ -322,7 +334,12 @@ getOrMkRootWithCorpus username cName c = do
corpusId' <- if corpusId'' /= []
then pure corpusId''
else mk (Just $ fromLeft "Default" cName) c rootId userId
else do
c' <- mk (Just $ fromLeft "Default" cName) c rootId userId
_tId <- case head c' of
Nothing -> pure [0]
Just c'' -> mkNode NodeTexts c'' userId
pure c'
corpusId <- maybe (nodeError NoCorpusFound) pure (head corpusId')
......@@ -369,7 +386,7 @@ instance ExtractNgramsT HyperdataContact
let authors = map text2ngrams
$ maybe ["Nothing"] (\a -> [a])
$ view (hc_who . _Just . cw_lastName) hc'
pure $ Map.fromList $ [(a', Map.singleton Authors 1) | a' <- authors ]
instance HasText HyperdataDocument
......@@ -425,7 +442,7 @@ documentIdWithNgrams :: HasNodeError err
-> Cmd err (Map Ngrams (Map NgramsType Int)))
-> [DocumentWithId a]
-> Cmd err [DocumentIdWithNgrams a]
documentIdWithNgrams f = mapM toDocumentIdWithNgrams
documentIdWithNgrams f = traverse toDocumentIdWithNgrams
where
toDocumentIdWithNgrams d = do
e <- f $ documentData d
......
......@@ -50,13 +50,26 @@ mapNodeIdNgrams = Map.unionsWith (Map.unionWith (Map.unionWith (+))) . fmap f
nId = documentId $ documentWithId d
------------------------------------------------------------------------
listInsert :: FlowCmdM env err m
=> ListId
-> Map NgramsType [NgramsElement]
-> m ()
listInsert lId ngs = mapM_ (\(typeList, ngElmts)
-> putListNgrams lId typeList ngElmts
) $ toList ngs
flowList_DbRepo :: FlowCmdM env err m
=> ListId
-> Map NgramsType [NgramsElement]
-> m ListId
flowList_DbRepo lId ngs = do
-- printDebug "listId flowList" lId
mapCgramsId <- listInsertDb lId toNodeNgramsW (Map.toList ngs)
let toInsert = catMaybes [ (,) <$> (getCgramsId mapCgramsId ntype <$> parent)
<*> getCgramsId mapCgramsId ntype ngram
| (ntype, ngs') <- Map.toList ngs
, NgramsElement ngram _ _ _ _ parent _ <- ngs'
]
-- Inserting groups of ngrams
_r <- insert_Node_NodeNgrams_NodeNgrams
$ map (\(a,b) -> Node_NodeNgrams_NodeNgrams lId a b Nothing) toInsert
listInsert lId ngs
--trace (show $ List.filter (\n -> _ne_ngrams n == "versatile") $ List.concat $ Map.elems ngs) $ listInsert lId ngs
pure lId
------------------------------------------------------------------------
------------------------------------------------------------------------
toNodeNgramsW :: ListId
-> [(NgramsType, [NgramsElement])]
......@@ -71,22 +84,15 @@ toNodeNgramsW l ngs = List.concat $ map (toNodeNgramsW' l) ngs
(NgramsElement ngrams_terms' _size list_type _occ _root _parent _children) <- elms
]
flowList :: FlowCmdM env err m
=> CorpusId
-> ListId
-> Map NgramsType [NgramsElement]
-> m ListId
flowList _cId lId ngs = do
-- printDebug "listId flowList" lId
-- TODO save in database
mapCgramsId <- listInsertDb lId toNodeNgramsW (Map.toList ngs)
let toInsert = catMaybes [ (,) <$> (getCgramsId mapCgramsId ntype <$> parent)
<*> getCgramsId mapCgramsId ntype ngram
| (ntype, ngs') <- Map.toList ngs
, NgramsElement ngram _ _ _ _ parent _ <- ngs'
]
_r <- insert_Node_NodeNgrams_NodeNgrams $ map (\(a,b) -> Node_NodeNgrams_NodeNgrams lId a b Nothing) toInsert
listInsert lId ngs
--trace (show $ List.filter (\n -> _ne_ngrams n == "versatile") $ List.concat $ Map.elems ngs) $ listInsert lId ngs
pure lId
listInsert :: FlowCmdM env err m
=> ListId
-> Map NgramsType [NgramsElement]
-> m ()
listInsert lId ngs = mapM_ (\(typeList, ngElmts)
-> putListNgrams lId typeList ngElmts
) $ toList ngs
------------------------------------------------------------------------
------------------------------------------------------------------------
......@@ -31,7 +31,6 @@ import Gargantext.Database.Schema.Node (pgNodeId)
import Gargantext.Database.Types.Node
import Opaleye
data NodeNodeNgramsPoly n1 n2 ngrams_id ngt w
= NodeNodeNgrams { _nnng_node1_id :: n1
, _nnng_node2_id :: n2
......@@ -40,7 +39,6 @@ data NodeNodeNgramsPoly n1 n2 ngrams_id ngt w
, _nnng_weight :: w
} deriving (Show)
type NodeNodeNgramsWrite =
NodeNodeNgramsPoly (Column PGInt4 )
(Column PGInt4 )
......@@ -83,7 +81,6 @@ nodeNodeNgramsTable = Table "node_node_ngrams"
queryNodeNodeNgramsTable :: Query NodeNodeNgramsRead
queryNodeNodeNgramsTable = queryTable nodeNodeNgramsTable
-- | Insert utils
insertNodeNodeNgrams :: [NodeNodeNgrams] -> Cmd err Int
insertNodeNodeNgrams = insertNodeNodeNgramsW
......@@ -105,4 +102,3 @@ insertNodeNodeNgramsW nnnw =
, iOnConflict = (Just DoNothing)
})
......@@ -78,7 +78,7 @@ $(makeLensesWith abbreviatedFields
node_NodeNgrams_NodeNgrams_Table :: Table Node_NodeNgrams_NodeNgrams_Write Node_NodeNgrams_NodeNgrams_Read
node_NodeNgrams_NodeNgrams_Table =
Table "node_ngrams_ngrams"
Table "node_nodengrams_nodengrams"
( pNode_NodeNgrams_NodeNgrams Node_NodeNgrams_NodeNgrams
{ _nnn_node_id = required "node_id"
, _nnn_nng1_id = optional "node_ngrams1_id"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment