Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
158
Issues
158
List
Board
Labels
Milestones
Merge Requests
11
Merge Requests
11
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
b37bd924
Commit
b37bd924
authored
Jan 09, 2020
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[DB][Optim] Cooc without groups (wip).
parent
f9c86cac
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
224 additions
and
11 deletions
+224
-11
Main.hs
bin/gargantext-init/Main.hs
+1
-1
schema.sql
devops/postgres/schema.sql
+4
-3
Flow.hs
src/Gargantext/Database/Flow.hs
+21
-2
List.hs
src/Gargantext/Database/Flow/List.hs
+14
-3
Init.hs
src/Gargantext/Database/Init.hs
+4
-2
NodeNgrams.hs
src/Gargantext/Database/Schema/NodeNgrams.hs
+2
-0
NodeNodeNgrams2.hs
src/Gargantext/Database/Schema/NodeNodeNgrams2.hs
+91
-0
NodeNodeNgrams.hs
src/Gargantext/Database/Triggers/NodeNodeNgrams.hs
+87
-0
No files found.
bin/gargantext-init/Main.hs
View file @
b37bd924
...
...
@@ -50,7 +50,7 @@ main = do
initMaster
=
do
(
masterUserId
,
masterRootId
,
masterCorpusId
)
<-
getOrMk_RootWithCorpus
userMaster
(
Left
corpusMasterName
)
(
Nothing
::
Maybe
HyperdataCorpus
)
masterListId
<-
getOrMkList
masterCorpusId
masterUserId
_
<-
initTriggers
masterListId
_
triggers
<-
initTriggers
masterListId
pure
(
masterUserId
,
masterRootId
,
masterCorpusId
,
masterListId
)
withDevEnv
iniPath
$
\
env
->
do
...
...
devops/postgres/schema.sql
View file @
b37bd924
...
...
@@ -110,9 +110,9 @@ ALTER TABLE public.node_node_ngrams OWNER TO gargantua;
CREATE
TABLE
public
.
node_node_ngrams2
(
node_id
INTEGER
NOT
NULL
REFERENCES
public
.
nodes
(
id
)
ON
DELETE
CASCADE
,
node
_
ngrams_id
INTEGER
NOT
NULL
REFERENCES
public
.
node_ngrams
(
id
)
ON
DELETE
CASCADE
,
nodengrams_id
INTEGER
NOT
NULL
REFERENCES
public
.
node_ngrams
(
id
)
ON
DELETE
CASCADE
,
weight
double
precision
,
PRIMARY
KEY
(
node_id
,
node
_ngrams_id
,
ngrams_fiel
d
)
PRIMARY
KEY
(
node_id
,
node
ngrams_i
d
)
);
ALTER
TABLE
public
.
node_node_ngrams2
OWNER
TO
gargantua
;
...
...
@@ -164,7 +164,8 @@ CREATE UNIQUE INDEX ON public.node_node_ngrams USING btree (node1_id, node2_id,
CREATE
INDEX
ON
public
.
node_node_ngrams
USING
btree
(
node1_id
,
node2_id
);
CREATE
INDEX
ON
public
.
node_node_ngrams
USING
btree
(
ngrams_id
,
node2_id
);
CREATE
INDEX
ON
public
.
node_nodengrams_nodengrams
USING
btree
(
node_id
,
node_ngrams1_id
,
node_ngrams2_id
);
CREATE
INDEX
ON
public
.
node_node_ngrams2
USING
btree
(
node_id
,
nodengrams_id
);
------------------------------------------------------------------------
-- Ngrams Full DB Extraction Optim
-- TODO remove hard parameter and move elsewhere
...
...
src/Gargantext/Database/Flow.hs
View file @
b37bd924
...
...
@@ -37,8 +37,10 @@ module Gargantext.Database.Flow -- (flowDatabase, ngrams2list)
,
flowAnnuaire
)
where
import
Prelude
(
String
)
import
Data.Either
import
Data.Tuple.Extra
(
first
,
second
)
import
Data.Traversable
(
traverse
)
import
Debug.Trace
(
trace
)
import
Control.Lens
((
^.
),
view
,
_Just
)
...
...
@@ -63,7 +65,8 @@ import Gargantext.Database.Root (getRoot)
import
Gargantext.Database.Schema.Ngrams
-- (insertNgrams, Ngrams(..), NgramsIndexed(..), indexNgrams, NgramsType(..), text2ngrams, ngramsTypeId)
import
Gargantext.Database.Schema.Node
-- (mkRoot, mkCorpus, getOrMkList, mkGraph, {-mkPhylo,-} mkDashboard, mkAnnuaire, getCorporaWithParentId, HasNodeError, NodeError(..), nodeError)
import
Gargantext.Database.Schema.NodeNgrams
(
listInsertDb
,
getCgramsId
)
import
Gargantext.Database.Schema.NodeNodeNgrams2
-- (NodeNodeNgrams2, insertNodeNodeNgrams2)
import
Gargantext.Database.Schema.User
(
getUser
,
UserLight
(
..
))
import
Gargantext.Database.TextSearch
(
searchInDatabase
)
import
Gargantext.Database.Types.Node
-- (HyperdataDocument(..), NodeType(..), NodeId, UserId, ListId, CorpusId, RootId, MasterCorpusId, MasterUserId)
...
...
@@ -266,11 +269,27 @@ insertMasterDocs c lang hs = do
maps
<-
mapNodeIdNgrams
<$>
documentIdWithNgrams
(
extractNgramsT
$
withLang
lang
documentsWithId
)
documentsWithId
lId
<-
getOrMkList
masterCorpusId
masterUserId
terms2id
<-
insertNgrams
$
Map
.
keys
maps
-- to be removed
let
indexedNgrams
=
Map
.
mapKeys
(
indexNgrams
terms2id
)
maps
-- new
lId
<-
getOrMkList
masterCorpusId
masterUserId
mapCgramsId
<-
listInsertDb
lId
toNodeNgramsW'
$
map
(
first
_ngramsTerms
.
second
Map
.
keys
)
$
Map
.
toList
maps
-- insertDocNgrams
_return
<-
insertNodeNodeNgrams2
$
catMaybes
[
NodeNodeNgrams2
<$>
Just
nId
<*>
getCgramsId
mapCgramsId
ngrams_type
(
_ngramsTerms
terms
)
<*>
Just
(
fromIntegral
w
::
Double
)
|
(
terms
,
mapNgramsTypes
)
<-
Map
.
toList
maps
,
(
ngrams_type
,
mapNodeIdWeight
)
<-
Map
.
toList
mapNgramsTypes
,
(
nId
,
w
)
<-
Map
.
toList
mapNodeIdWeight
]
_cooc
<-
mkNode
NodeListCooc
lId
masterUserId
-- to be removed
_
<-
insertDocNgrams
lId
indexedNgrams
pure
ids'
...
...
src/Gargantext/Database/Flow/List.hs
View file @
b37bd924
...
...
@@ -23,11 +23,13 @@ Portability : POSIX
module
Gargantext.Database.Flow.List
where
import
Data.Text
(
Text
)
import
Control.Monad
(
mapM_
)
import
Data.Map
(
Map
,
toList
)
import
Data.Maybe
(
Maybe
(
..
),
catMaybes
)
import
Gargantext.API.Ngrams
(
NgramsElement
(
..
),
putListNgrams
)
import
Gargantext.Database.Schema.Ngrams
-- (insertNgrams, Ngrams(..), NgramsIndexed(..), indexNgrams, NgramsType(..), text2ngrams, ngramsTypeId)
import
Gargantext.Core.Types.Main
(
ListType
(
CandidateTerm
))
import
Gargantext.Database.Schema.NodeNgrams
(
NodeNgramsPoly
(
..
),
NodeNgramsW
,
listInsertDb
,
getCgramsId
)
import
Gargantext.Database.Schema.Node_NodeNgramsNodeNgrams
-- (insert_Node_NodeNgrams_NodeNgrams, Node_NodeNgrams_NodeNgrams(..))
import
Gargantext.Database.Types.Node
-- (HyperdataDocument(..), NodeType(..), NodeId, UserId, ListId, CorpusId, RootId, MasterCorpusId, MasterUserId)
...
...
@@ -74,16 +76,25 @@ flowList_DbRepo lId ngs = do
toNodeNgramsW
::
ListId
->
[(
NgramsType
,
[
NgramsElement
])]
->
[
NodeNgramsW
]
toNodeNgramsW
l
ngs
=
List
.
concat
$
map
(
toNodeNgramsW'
l
)
ngs
toNodeNgramsW
l
ngs
=
List
.
concat
$
map
(
toNodeNgramsW'
'
l
)
ngs
where
toNodeNgramsW'
::
ListId
toNodeNgramsW'
'
::
ListId
->
(
NgramsType
,
[
NgramsElement
])
->
[
NodeNgramsW
]
toNodeNgramsW'
l'
(
ngrams_type
,
elms
)
=
toNodeNgramsW'
'
l'
(
ngrams_type
,
elms
)
=
[
NodeNgrams
Nothing
l'
list_type
ngrams_terms'
ngrams_type
Nothing
Nothing
Nothing
0
|
(
NgramsElement
ngrams_terms'
_size
list_type
_occ
_root
_parent
_children
)
<-
elms
]
toNodeNgramsW'
::
ListId
->
[(
Text
,
[
NgramsType
])]
->
[
NodeNgramsW
]
toNodeNgramsW'
l''
ngs
=
[
NodeNgrams
Nothing
l''
CandidateTerm
terms
ngrams_type
Nothing
Nothing
Nothing
0
|
(
terms
,
ngrams_types
)
<-
ngs
,
ngrams_type
<-
ngrams_types
]
listInsert
::
FlowCmdM
env
err
m
=>
ListId
->
Map
NgramsType
[
NgramsElement
]
...
...
src/Gargantext/Database/Init.hs
View file @
b37bd924
...
...
@@ -24,17 +24,19 @@ import Gargantext.Database.Utils (Cmd)
import
Gargantext.Prelude
import
Gargantext.Database.Triggers.Nodes
(
triggerSearchUpdate
)
import
Gargantext.Database.Triggers.NodesNodes
(
triggerDeleteCount
,
triggerInsertCount
,
triggerUpdateAdd
,
triggerUpdateDel
,
MasterListId
)
import
Gargantext.Database.Triggers.NodeNodeNgrams
(
triggerCountInsert
)
import
Gargantext.Database.Triggers.NodeNodeNgrams
(
triggerCountInsert
,
triggerCountInsert2
,
triggerCoocInsert
)
------------------------------------------------------------------------
initTriggers
::
MasterListId
->
Cmd
err
[
Int64
]
initTriggers
lId
=
do
t0
<-
triggerSearchUpdate
t1
<-
triggerCountInsert
t1'
<-
triggerCountInsert2
t1''
<-
triggerCoocInsert
t2
<-
triggerDeleteCount
lId
t3
<-
triggerInsertCount
lId
t4
<-
triggerUpdateAdd
lId
t5
<-
triggerUpdateDel
lId
pure
[
t0
,
t1
,
t2
,
t3
,
t4
,
t5
]
pure
[
t0
,
t1
,
t1'
,
t1''
,
t2
,
t3
,
t4
,
t5
]
src/Gargantext/Database/Schema/NodeNgrams.hs
View file @
b37bd924
...
...
@@ -43,6 +43,8 @@ import Gargantext.Database.Utils
import
Gargantext.Database.Schema.Ngrams
(
NgramsType
,
ngramsTypeId
,
fromNgramsTypeId
)
import
Gargantext.Prelude
type
NodeNgramsId
=
Int
data
NodeNgramsPoly
id
node_id'
node_subtype
...
...
src/Gargantext/Database/Schema/NodeNodeNgrams2.hs
0 → 100644
View file @
b37bd924
{-|
Module : Gargantext.Database.Schema.NodeNodeNgrams
Description : TODO: remove this module and table in database
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE Arrows #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE TemplateHaskell #-}
module
Gargantext.Database.Schema.NodeNodeNgrams2
where
import
Prelude
import
Data.Profunctor.Product.TH
(
makeAdaptorAndInstance
)
import
Control.Lens.TH
(
makeLenses
)
import
Gargantext.Database.Utils
(
Cmd
,
mkCmd
)
import
Gargantext.Database.Schema.NodeNgrams
(
NodeNgramsId
)
import
Gargantext.Database.Schema.Node
(
pgNodeId
)
import
Gargantext.Database.Types.Node
import
Opaleye
data
NodeNodeNgrams2Poly
node_id
nodengrams_id
w
=
NodeNodeNgrams2
{
_nnng2_node_id
::
node_id
,
_nnng2_nodengrams_id
::
nodengrams_id
,
_nnng2_weight
::
w
}
deriving
(
Show
)
type
NodeNodeNgrams2Write
=
NodeNodeNgrams2Poly
(
Column
PGInt4
)
(
Column
PGInt4
)
(
Column
PGFloat8
)
type
NodeNodeNgrams2Read
=
NodeNodeNgrams2Poly
(
Column
PGInt4
)
(
Column
PGInt4
)
(
Column
PGFloat8
)
type
NodeNodeNgrams2ReadNull
=
NodeNodeNgrams2Poly
(
Column
(
Nullable
PGInt4
))
(
Column
(
Nullable
PGInt4
))
(
Column
(
Nullable
PGFloat8
))
type
NodeNodeNgrams2
=
NodeNodeNgrams2Poly
DocId
NodeNgramsId
Double
$
(
makeAdaptorAndInstance
"pNodeNodeNgrams2"
''
N
odeNodeNgrams2Poly
)
makeLenses
''
N
odeNodeNgrams2Poly
nodeNodeNgrams2Table
::
Table
NodeNodeNgrams2Write
NodeNodeNgrams2Read
nodeNodeNgrams2Table
=
Table
"node_node_ngrams2"
(
pNodeNodeNgrams2
NodeNodeNgrams2
{
_nnng2_node_id
=
required
"node_id"
,
_nnng2_nodengrams_id
=
required
"nodengrams_id"
,
_nnng2_weight
=
required
"weight"
}
)
queryNodeNodeNgrams2Table
::
Query
NodeNodeNgrams2Read
queryNodeNodeNgrams2Table
=
queryTable
nodeNodeNgrams2Table
-- | Insert utils
insertNodeNodeNgrams2
::
[
NodeNodeNgrams2
]
->
Cmd
err
Int
insertNodeNodeNgrams2
=
insertNodeNodeNgrams2W
.
map
(
\
(
NodeNodeNgrams2
n1
n2
w
)
->
NodeNodeNgrams2
(
pgNodeId
n1
)
(
pgInt4
n2
)
(
pgDouble
w
)
)
insertNodeNodeNgrams2W
::
[
NodeNodeNgrams2Write
]
->
Cmd
err
Int
insertNodeNodeNgrams2W
nnnw
=
mkCmd
$
\
c
->
fromIntegral
<$>
runInsert_
c
insertNothing
where
insertNothing
=
(
Insert
{
iTable
=
nodeNodeNgrams2Table
,
iRows
=
nnnw
,
iReturning
=
rCount
,
iOnConflict
=
(
Just
DoNothing
)
})
src/Gargantext/Database/Triggers/NodeNodeNgrams.hs
View file @
b37bd924
...
...
@@ -63,4 +63,91 @@ triggerCountInsert = execPGSQuery query (nodeTypeId NodeDocument, nodeTypeId Nod
EXECUTE PROCEDURE set_ngrams_global_count();
|]
triggerCountInsert2
::
Cmd
err
Int64
triggerCountInsert2
=
execPGSQuery
query
(
nodeTypeId
NodeCorpus
,
nodeTypeId
NodeDocument
,
nodeTypeId
NodeList
)
where
query
::
DPS
.
Query
query
=
[
sql
|
CREATE OR REPLACE FUNCTION set_ngrams_global_count2() RETURNS trigger AS $$
BEGIN
IF pg_trigger_depth() <> 1 THEN
RETURN NEW;
END IF;
IF TG_OP = 'INSERT' THEN
INSERT INTO node_node_ngrams2 (node_id, nodengrams_id, weight)
SELECT corpus.id, nng.id, count(*) from NEW as new1
INNER JOIN node_ngrams nng ON nng.id = new1.nodengrams_id
INNER JOIN nodes list ON list.id = nng.node_id
INNER JOIN nodes_nodes nn ON nn.node2_id = new1.node_id
INNER JOIN nodes corpus ON corpus.id = nn.node1_id
INNER JOIN nodes doc ON doc.id = nn.node2_id
WHERE corpus.typename = ? -- 30 -- corpus
AND doc.typename = ? -- 4 -- maybe not mandatory
AND list.typename = ? -- 5 -- list
GROUP BY corpus.id, nng.id
ON CONFLICT (node_id, nodengrams_id)
DO UPDATE set weight = node_node_ngrams2.weight + excluded.weight
;
END IF;
RETURN NULL;
END
$$ LANGUAGE plpgsql;
-- DROP trigger trigger_count_insert2 on node_node_ngrams2;
CREATE TRIGGER trigger_count_insert2 AFTER INSERT on node_node_ngrams2
REFERENCING NEW TABLE AS NEW
FOR EACH STATEMENT
EXECUTE PROCEDURE set_ngrams_global_count2();
|]
-- TODO add the groups
triggerCoocInsert
::
Cmd
err
Int64
triggerCoocInsert
=
execPGSQuery
query
(
nodeTypeId
NodeCorpus
,
nodeTypeId
NodeDocument
,
nodeTypeId
NodeList
)
where
query
::
DPS
.
Query
query
=
[
sql
|
CREATE OR REPLACE FUNCTION set_cooc() RETURNS trigger AS $$
BEGIN
IF pg_trigger_depth() <> 1 THEN
RETURN NEW;
END IF;
IF TG_OP = 'INSERT' THEN
INSERT INTO node_nodengrams_nodengrams (node_id, node_ngrams1_id, node_ngrams2_id, weight)
SELECT corpus.id, nng1.id, nng2.id, count(*) from NEW as new1
INNER JOIN node_ngrams nng1 ON nng1.id = new1.nodengrams_id
INNER JOIN nodes list ON list.id = nng1.node_id
INNER JOIN nodes_nodes nn ON nn.node2_id = new1.node_id
INNER JOIN nodes corpus ON corpus.id = nn.node1_id
INNER JOIN nodes doc ON doc.id = nn.node2_id
INNER JOIN node_node_ngrams2 nnng2 ON nnng2.node_id = doc.id
INNER JOIN node_ngrams nng2 ON nng2.id = nnng2.nodengrams_id
WHERE corpus.typename = ? -- 30 -- corpus
AND doc.typename = ? -- 4 -- maybe not mandatory
AND list.typename = ? -- 5 -- list
AND nng2.node_id = list.id
-- AND nng1.id <> nng2.id
GROUP BY corpus.id, nng1.id, nng2.id
ON CONFLICT (node_id, node_ngrams1_id, node_ngrams2_id)
DO UPDATE set weight = node_nodengrams_nodengrams.weight + excluded.weight
;
END IF;
RETURN NULL;
END
$$ LANGUAGE plpgsql;
-- DROP trigger trigger_cooc on node_node_ngrams2;
CREATE TRIGGER trigger_cooc_insert AFTER INSERT on node_node_ngrams2
REFERENCING NEW TABLE AS NEW
FOR EACH STATEMENT
EXECUTE PROCEDURE set_cooc();
|]
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment