Commit ba75f548 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[DB][WIP] NodeNgrams

parent bef5f7da
Pipeline #669 failed with stage
CREATE EXTENSION IF NOT EXISTS plpgsql WITH SCHEMA pg_catalog;
CREATE EXTENSION IF NOT EXISTS tsm_system_rows;
COMMENT ON EXTENSION plpgsql IS 'PL/pgSQL procedural language';
-- CREATE USER WITH ...
-- createdb "gargandb"
CREATE TABLE public.auth_user (
id SERIAL,
password character varying(128) NOT NULL,
......@@ -23,7 +19,6 @@ CREATE TABLE public.auth_user (
ALTER TABLE public.auth_user OWNER TO gargantua;
-- TODO add publication_date
-- TODO typename -> type_id
CREATE TABLE public.nodes (
......@@ -40,7 +35,6 @@ CREATE TABLE public.nodes (
);
ALTER TABLE public.nodes OWNER TO gargantua;
CREATE TABLE public.ngrams (
id SERIAL,
terms character varying(255),
......@@ -53,20 +47,19 @@ ALTER TABLE public.ngrams OWNER TO gargantua;
CREATE TABLE public.node_ngrams (
id SERIAL,
node_id integer NOT NULL,
node_subtype integer,
ngrams_id integer NOT NULL,
list_type integer,
ngrams_type integer, -- change to ngrams_field? (no for pedagogic reason)
ngrams_field integer,
ngrams_tag integer,
ngrams_class integer,
weight double precision,
PRIMARY KEY (id),
FOREIGN KEY (node_id) REFERENCES public.nodes(id) ON DELETE CASCADE,
FOREIGN KEY (ngrams_id) REFERENCES public.ngrams(id) ON DELETE CASCADE,
PRIMARY KEY (id)
FOREIGN KEY (ngrams_id) REFERENCES public.ngrams(id) ON DELETE CASCADE
);
ALTER TABLE public.node_ngrams OWNER TO gargantua;
CREATE TABLE public.node_node_ngrams_ngrams (
node_id integer NOT NULL,
node_ngrams1_id integer NOT NULL,
......
......@@ -47,7 +47,7 @@ module Gargantext.API.Ngrams
, NgramsStatePatch
, NgramsTablePatch
, NgramsElement
, NgramsElement(..)
, mkNgramsElement
, mergeNgramsElement
......
......@@ -49,8 +49,7 @@ import Data.Maybe (Maybe(..), catMaybes)
import Data.Monoid
import Data.Text (Text, splitOn, intercalate)
import GHC.Show (Show)
import Gargantext.API.Ngrams (HasRepoVar)
import Gargantext.API.Ngrams (NgramsElement, putListNgrams, RepoCmdM)
import Gargantext.API.Ngrams (HasRepoVar, NgramsElement(..), putListNgrams, RepoCmdM)
import Gargantext.Core (Lang(..))
import Gargantext.Core.Types (NodePoly(..), Terms(..))
import Gargantext.Core.Types.Individu (Username)
......@@ -63,6 +62,7 @@ import Gargantext.Database.Node.Document.Insert -- (insertDocuments, ReturnId(..
import Gargantext.Database.Root (getRoot)
import Gargantext.Database.Schema.Ngrams -- (insertNgrams, Ngrams(..), NgramsIndexed(..), indexNgrams, NgramsType(..), text2ngrams, ngramsTypeId)
import Gargantext.Database.Schema.Node -- (mkRoot, mkCorpus, getOrMkList, mkGraph, {-mkPhylo,-} mkDashboard, mkAnnuaire, getCorporaWithParentId, HasNodeError, NodeError(..), nodeError)
import Gargantext.Database.Schema.NodeNgrams (NodeNgramsPoly(..), NodeNgramsW, listInsertDb)
import Gargantext.Database.Schema.User (getUser, UserLight(..))
import Gargantext.Database.TextSearch (searchInDatabase)
import Gargantext.Database.Types.Node -- (HyperdataDocument(..), NodeType(..), NodeId, UserId, ListId, CorpusId, RootId, MasterCorpusId, MasterUserId)
......@@ -474,6 +474,19 @@ listInsert lId ngs = mapM_ (\(typeList, ngElmts)
-> putListNgrams lId typeList ngElmts
) $ toList ngs
toNodeNgramsW :: ListId
-> [(NgramsType, [NgramsElement])]
-> [NodeNgramsW]
toNodeNgramsW l ngs = List.concat $ map (toNodeNgramsW' l) ngs
where
toNodeNgramsW' :: ListId
-> (NgramsType, [NgramsElement])
-> [NodeNgramsW]
toNodeNgramsW' l' (ngrams_type, elms) =
[ NodeNgrams Nothing l' list_type ngrams_terms' ngrams_type Nothing Nothing Nothing 0 |
(NgramsElement ngrams_terms' _size list_type _occ _root _parent _children) <- elms
]
flowList :: FlowCmdM env err m
=> ListId
-> Map NgramsType [NgramsElement]
......@@ -481,6 +494,7 @@ flowList :: FlowCmdM env err m
flowList lId ngs = do
printDebug "listId flowList" lId
-- TODO save in database
_ <- listInsertDb lId toNodeNgramsW (Map.toList ngs)
listInsert lId ngs
--trace (show $ List.filter (\n -> _ne_ngrams n == "versatile") $ List.concat $ Map.elems ngs) $ listInsert lId ngs
pure lId
......
{-|
Module : Gargantext.Database.Schema.NodeNgrams
Description :
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
NodeNgrams: mainly NodeList and its ngrams.
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE Arrows #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Database.Schema.NodeNgrams where
import Data.Text (Text)
import qualified Database.PostgreSQL.Simple as PGS (Query, Only(..))
import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
import Database.PostgreSQL.Simple.FromRow (fromRow, field)
import Database.PostgreSQL.Simple (FromRow)
import Database.PostgreSQL.Simple.SqlQQ (sql)
-- import Control.Lens.TH (makeLenses)
import Data.Maybe (Maybe, fromMaybe)
import Gargantext.Core.Types
import Gargantext.Database.Utils
import Gargantext.Database.Config (nodeTypeId)
import Gargantext.Database.Schema.Ngrams (NgramsType, NgramsTypeId, ngramsTypeId)
import Gargantext.Prelude
data NodeNgramsPoly id
node_id'
node_subtype
ngrams_id
ngrams_type
ngrams_field
ngrams_tag
ngrams_class
weight
= NodeNgrams { _nng_id :: id
, _nng_node_id :: node_id'
, _nng_node_subtype :: node_subtype
, _nng_ngrams_id :: ngrams_id
, _nng_ngrams_type :: ngrams_type
, _nng_ngrams_field :: ngrams_field
, _nng_ngrams_tag :: ngrams_tag
, _nng_ngrams_class :: ngrams_class
, _nng_ngrams_weight :: weight
} deriving (Show)
{-
type NodeNgramsWrite = NodeNgramsPoly (Maybe (Column (PGInt4)))
(Column (PGInt4))
(Maybe (Column (PGInt4)))
(Column (PGInt4))
(Maybe (Column (PGInt4)))
(Maybe (Column (PGInt4)))
(Maybe (Column (PGInt4)))
(Maybe (Column (PGInt4)))
(Maybe (Column (PGFloat8)))
type NodeNodeRead = NodeNgramsPoly (Column PGInt4)
(Column PGInt4)
(Column PGInt4)
(Column PGInt4)
(Column PGInt4)
(Column PGInt4)
(Column PGInt4)
(Column PGInt4)
(Column PGFloat8)
type NodeNgramsReadNull = NodeNgramsPoly (Column (Nullable PGInt4))
(Column (Nullable PGInt4))
(Column (Nullable PGInt4))
(Column (Nullable PGInt4))
(Column (Nullable PGInt4))
(Column (Nullable PGInt4))
(Column (Nullable PGInt4))
(Column (Nullable PGInt4))
(Column (Nullable PGFloat8))
-}
type NgramsId = Int
type NgramsField = Int
type NgramsTag = Int
type NgramsClass = Int
type NgramsText = Text
-- Example of list Ngrams
-- type ListNgrams = NodeNgramsPoly (Maybe Int) ListType Text
type NodeNgramsW =
NodeNgramsPoly (Maybe Int) NodeId ListType NgramsText
NgramsType (Maybe NgramsField) (Maybe NgramsTag) (Maybe NgramsClass)
Double
data Result = Result { unResult :: Int }
deriving (Show)
instance FromRow Result where
fromRow = Result <$> field
-- insertDb :: ListId -> Map NgramsType [NgramsElemet] -> Cmd err [Result]
listInsertDb :: ListId
-> (ListId -> a -> [NodeNgramsW])
-> a
-> Cmd err [Result]
listInsertDb l f ngs = insertNodeNgrams (f l ngs)
-- TODO optimize with size of ngrams
insertNodeNgrams :: [NodeNgramsW] -> Cmd err [Result]
insertNodeNgrams nns = runPGSQuery query (PGS.Only $ Values fields nns')
where
fields = map (\t-> QualifiedIdentifier Nothing t) [ "int4","int4","text","int4"
,"int4","int4","int4","int4"
,"float8"]
nns' :: [(Int, ListTypeId, NgramsText, NgramsTypeId ,NgramsField, NgramsTag, NgramsClass, Double)]
nns' = map (\(NodeNgrams _id (NodeId node_id'') node_subtype ngrams_terms ngrams_type ngrams_field ngrams_tag ngrams_class weight)
-> ( node_id''
, listTypeId node_subtype
, ngrams_terms
, ngramsTypeId ngrams_type
, fromMaybe 0 ngrams_field
, fromMaybe 0 ngrams_tag
, fromMaybe 0 ngrams_class
, weight
)
) nns
query :: PGS.Query
query = [sql|
INSERT INTO node_ngrams_ngrams VALUES (node_id, node_type, ngrams_id, ngrams_type, ngrams_field, ngrams_tag, ngrams_class, weight)
SELECT n.node_id, n.node_type, ng.ngrams_id, n.ngrams_type, n.ngrams_field, n.ngrams_tag, n.ngrams_class, n.weight FROM (?)
AS n(node_id, node_type, ngrams_terms, ngrams_type, ngrams_field, ngrams_tag, ngrams_class, weight)
INNER JOIN ngrams as ng ON ng.terms = n.ngrams_terms
ON CONFLICT(node_id, ngrams_id)
DO UPDATE SET node_type = excluded.node_type, ngrams_type = excluded.ngrams_type, ngrams_field = excluded.ngrams_field, ngrams_tag = excluded.ngrams_tag, ngrams_class = excluded.ngrams_class, weight = excluded.weight
|]
......@@ -101,7 +101,7 @@ instance QueryRunnerColumnDefault PGFloat8 (Maybe Double) where
-- TODO: Add option on conflict
insertNodeNgramsNgramsNew :: [NodeNgramsNgrams] -> Cmd err Int
insertNodeNgramsNgramsNew :: [NodeNgramsNgrams] -> Cmd err Int64
insertNodeNgramsNgramsNew = insertNodeNgramsNgramsW
. map (\(NodeNgramsNgrams n ng1 ng2 maybeWeight) ->
NodeNgramsNgrams (pgNodeId n )
......@@ -110,8 +110,8 @@ insertNodeNgramsNgramsNew = insertNodeNgramsNgramsW
(pgDouble <$> maybeWeight)
)
insertNodeNgramsNgramsW :: [NodeNgramsNgramsWrite] -> Cmd err Int
insertNodeNgramsNgramsW :: [NodeNgramsNgramsWrite] -> Cmd err Int64
insertNodeNgramsNgramsW ns = do
c <- view connection
liftIO $ fromIntegral <$> runInsertMany c nodeNgramsNgramsTable ns
liftIO $ runInsertMany c nodeNgramsNgramsTable ns
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment