Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
H
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Przemyslaw Kaminski
haskell-gargantext
Commits
b0c4cf28
Commit
b0c4cf28
authored
Jan 09, 2020
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[DB][Optim] Cooc without groups (wip).
parent
e728a20d
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
226 additions
and
11 deletions
+226
-11
Main.hs
bin/gargantext-init/Main.hs
+1
-1
schema.sql
devops/postgres/schema.sql
+4
-3
Flow.hs
src/Gargantext/Database/Flow.hs
+21
-2
List.hs
src/Gargantext/Database/Flow/List.hs
+14
-3
Init.hs
src/Gargantext/Database/Init.hs
+4
-2
NodeNgrams.hs
src/Gargantext/Database/Schema/NodeNgrams.hs
+2
-0
NodeNodeNgrams2.hs
src/Gargantext/Database/Schema/NodeNodeNgrams2.hs
+92
-0
NodeNodeNgrams.hs
src/Gargantext/Database/Triggers/NodeNodeNgrams.hs
+88
-0
No files found.
bin/gargantext-init/Main.hs
View file @
b0c4cf28
...
...
@@ -50,7 +50,7 @@ main = do
initMaster
=
do
(
masterUserId
,
masterRootId
,
masterCorpusId
)
<-
getOrMk_RootWithCorpus
userMaster
(
Left
corpusMasterName
)
(
Nothing
::
Maybe
HyperdataCorpus
)
masterListId
<-
getOrMkList
masterCorpusId
masterUserId
_
<-
initTriggers
masterListId
_
triggers
<-
initTriggers
masterListId
pure
(
masterUserId
,
masterRootId
,
masterCorpusId
,
masterListId
)
withDevEnv
iniPath
$
\
env
->
do
...
...
devops/postgres/schema.sql
View file @
b0c4cf28
...
...
@@ -110,9 +110,9 @@ ALTER TABLE public.node_node_ngrams OWNER TO gargantua;
CREATE
TABLE
public
.
node_node_ngrams2
(
node_id
INTEGER
NOT
NULL
REFERENCES
public
.
nodes
(
id
)
ON
DELETE
CASCADE
,
node
_
ngrams_id
INTEGER
NOT
NULL
REFERENCES
public
.
node_ngrams
(
id
)
ON
DELETE
CASCADE
,
nodengrams_id
INTEGER
NOT
NULL
REFERENCES
public
.
node_ngrams
(
id
)
ON
DELETE
CASCADE
,
weight
double
precision
,
PRIMARY
KEY
(
node_id
,
node
_ngrams_id
,
ngrams_fiel
d
)
PRIMARY
KEY
(
node_id
,
node
ngrams_i
d
)
);
ALTER
TABLE
public
.
node_node_ngrams2
OWNER
TO
gargantua
;
...
...
@@ -164,7 +164,8 @@ CREATE UNIQUE INDEX ON public.node_node_ngrams USING btree (node1_id, node2_id,
CREATE
INDEX
ON
public
.
node_node_ngrams
USING
btree
(
node1_id
,
node2_id
);
CREATE
INDEX
ON
public
.
node_node_ngrams
USING
btree
(
ngrams_id
,
node2_id
);
CREATE
INDEX
ON
public
.
node_nodengrams_nodengrams
USING
btree
(
node_id
,
node_ngrams1_id
,
node_ngrams2_id
);
CREATE
INDEX
ON
public
.
node_node_ngrams2
USING
btree
(
node_id
,
nodengrams_id
);
------------------------------------------------------------------------
-- Ngrams Full DB Extraction Optim
-- TODO remove hard parameter and move elsewhere
...
...
src/Gargantext/Database/Flow.hs
View file @
b0c4cf28
...
...
@@ -37,8 +37,10 @@ module Gargantext.Database.Flow -- (flowDatabase, ngrams2list)
,
flowAnnuaire
)
where
import
Prelude
(
String
)
import
Data.Either
import
Data.Tuple.Extra
(
first
,
second
)
import
Data.Traversable
(
traverse
)
import
Debug.Trace
(
trace
)
import
Control.Lens
((
^.
),
view
,
_Just
)
...
...
@@ -63,7 +65,8 @@ import Gargantext.Database.Root (getRoot)
import
Gargantext.Database.Schema.Ngrams
-- (insertNgrams, Ngrams(..), NgramsIndexed(..), indexNgrams, NgramsType(..), text2ngrams, ngramsTypeId)
import
Gargantext.Database.Schema.Node
-- (mkRoot, mkCorpus, getOrMkList, mkGraph, {-mkPhylo,-} mkDashboard, mkAnnuaire, getCorporaWithParentId, HasNodeError, NodeError(..), nodeError)
import
Gargantext.Database.Schema.NodeNgrams
(
listInsertDb
,
getCgramsId
)
import
Gargantext.Database.Schema.NodeNodeNgrams2
-- (NodeNodeNgrams2, insertNodeNodeNgrams2)
import
Gargantext.Database.Schema.User
(
getUser
,
UserLight
(
..
))
import
Gargantext.Database.TextSearch
(
searchInDatabase
)
import
Gargantext.Database.Types.Node
-- (HyperdataDocument(..), NodeType(..), NodeId, UserId, ListId, CorpusId, RootId, MasterCorpusId, MasterUserId)
...
...
@@ -266,11 +269,27 @@ insertMasterDocs c lang hs = do
maps
<-
mapNodeIdNgrams
<$>
documentIdWithNgrams
(
extractNgramsT
$
withLang
lang
documentsWithId
)
documentsWithId
lId
<-
getOrMkList
masterCorpusId
masterUserId
terms2id
<-
insertNgrams
$
Map
.
keys
maps
-- to be removed
let
indexedNgrams
=
Map
.
mapKeys
(
indexNgrams
terms2id
)
maps
-- new
lId
<-
getOrMkList
masterCorpusId
masterUserId
mapCgramsId
<-
listInsertDb
lId
toNodeNgramsW'
$
map
(
first
_ngramsTerms
.
second
Map
.
keys
)
$
Map
.
toList
maps
-- insertDocNgrams
_return
<-
insertNodeNodeNgrams2
$
catMaybes
[
NodeNodeNgrams2
<$>
Just
nId
<*>
getCgramsId
mapCgramsId
ngrams_type
(
_ngramsTerms
terms
)
<*>
Just
(
fromIntegral
w
::
Double
)
|
(
terms
,
mapNgramsTypes
)
<-
Map
.
toList
maps
,
(
ngrams_type
,
mapNodeIdWeight
)
<-
Map
.
toList
mapNgramsTypes
,
(
nId
,
w
)
<-
Map
.
toList
mapNodeIdWeight
]
_cooc
<-
mkNode
NodeListCooc
lId
masterUserId
-- to be removed
_
<-
insertDocNgrams
lId
indexedNgrams
pure
ids'
...
...
src/Gargantext/Database/Flow/List.hs
View file @
b0c4cf28
...
...
@@ -23,11 +23,13 @@ Portability : POSIX
module
Gargantext.Database.Flow.List
where
import
Data.Text
(
Text
)
import
Control.Monad
(
mapM_
)
import
Data.Map
(
Map
,
toList
)
import
Data.Maybe
(
Maybe
(
..
),
catMaybes
)
import
Gargantext.API.Ngrams
(
NgramsElement
(
..
),
putListNgrams
)
import
Gargantext.Database.Schema.Ngrams
-- (insertNgrams, Ngrams(..), NgramsIndexed(..), indexNgrams, NgramsType(..), text2ngrams, ngramsTypeId)
import
Gargantext.Core.Types.Main
(
ListType
(
CandidateTerm
))
import
Gargantext.Database.Schema.NodeNgrams
(
NodeNgramsPoly
(
..
),
NodeNgramsW
,
listInsertDb
,
getCgramsId
)
import
Gargantext.Database.Schema.Node_NodeNgramsNodeNgrams
-- (insert_Node_NodeNgrams_NodeNgrams, Node_NodeNgrams_NodeNgrams(..))
import
Gargantext.Database.Types.Node
-- (HyperdataDocument(..), NodeType(..), NodeId, UserId, ListId, CorpusId, RootId, MasterCorpusId, MasterUserId)
...
...
@@ -74,16 +76,25 @@ flowList_DbRepo lId ngs = do
toNodeNgramsW
::
ListId
->
[(
NgramsType
,
[
NgramsElement
])]
->
[
NodeNgramsW
]
toNodeNgramsW
l
ngs
=
List
.
concat
$
map
(
toNodeNgramsW'
l
)
ngs
toNodeNgramsW
l
ngs
=
List
.
concat
$
map
(
toNodeNgramsW'
'
l
)
ngs
where
toNodeNgramsW'
::
ListId
toNodeNgramsW'
'
::
ListId
->
(
NgramsType
,
[
NgramsElement
])
->
[
NodeNgramsW
]
toNodeNgramsW'
l'
(
ngrams_type
,
elms
)
=
toNodeNgramsW'
'
l'
(
ngrams_type
,
elms
)
=
[
NodeNgrams
Nothing
l'
list_type
ngrams_terms'
ngrams_type
Nothing
Nothing
Nothing
0
|
(
NgramsElement
ngrams_terms'
_size
list_type
_occ
_root
_parent
_children
)
<-
elms
]
toNodeNgramsW'
::
ListId
->
[(
Text
,
[
NgramsType
])]
->
[
NodeNgramsW
]
toNodeNgramsW'
l''
ngs
=
[
NodeNgrams
Nothing
l''
CandidateTerm
terms
ngrams_type
Nothing
Nothing
Nothing
0
|
(
terms
,
ngrams_types
)
<-
ngs
,
ngrams_type
<-
ngrams_types
]
listInsert
::
FlowCmdM
env
err
m
=>
ListId
->
Map
NgramsType
[
NgramsElement
]
...
...
src/Gargantext/Database/Init.hs
View file @
b0c4cf28
...
...
@@ -24,17 +24,19 @@ import Gargantext.Database.Utils (Cmd)
import
Gargantext.Prelude
import
Gargantext.Database.Triggers.Nodes
(
triggerSearchUpdate
)
import
Gargantext.Database.Triggers.NodesNodes
(
triggerDeleteCount
,
triggerInsertCount
,
triggerUpdateAdd
,
triggerUpdateDel
,
MasterListId
)
import
Gargantext.Database.Triggers.NodeNodeNgrams
(
triggerCountInsert
)
import
Gargantext.Database.Triggers.NodeNodeNgrams
(
triggerCountInsert
,
triggerCountInsert2
,
triggerCoocInsert
)
------------------------------------------------------------------------
initTriggers
::
MasterListId
->
Cmd
err
[
Int64
]
initTriggers
lId
=
do
t0
<-
triggerSearchUpdate
t1
<-
triggerCountInsert
t1'
<-
triggerCountInsert2
t1''
<-
triggerCoocInsert
t2
<-
triggerDeleteCount
lId
t3
<-
triggerInsertCount
lId
t4
<-
triggerUpdateAdd
lId
t5
<-
triggerUpdateDel
lId
pure
[
t0
,
t1
,
t2
,
t3
,
t4
,
t5
]
pure
[
t0
,
t1
,
t1'
,
t1''
,
t2
,
t3
,
t4
,
t5
]
src/Gargantext/Database/Schema/NodeNgrams.hs
View file @
b0c4cf28
...
...
@@ -43,6 +43,8 @@ import Gargantext.Database.Utils
import
Gargantext.Database.Schema.Ngrams
(
NgramsType
,
ngramsTypeId
,
fromNgramsTypeId
)
import
Gargantext.Prelude
type
NodeNgramsId
=
Int
data
NodeNgramsPoly
id
node_id'
node_subtype
...
...
src/Gargantext/Database/Schema/NodeNodeNgrams2.hs
0 → 100644
View file @
b0c4cf28
{-|
Module : Gargantext.Database.Schema.NodeNodeNgrams
Description : TODO: remove this module and table in database
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE Arrows #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE TemplateHaskell #-}
module
Gargantext.Database.Schema.NodeNodeNgrams2
where
import
Prelude
import
Data.Profunctor.Product.TH
(
makeAdaptorAndInstance
)
import
Control.Lens.TH
(
makeLenses
)
import
Gargantext.Database.Utils
(
Cmd
,
mkCmd
)
import
Gargantext.Database.Schema.NodeNgrams
(
NodeNgramsId
)
import
Gargantext.Database.Schema.Node
(
pgNodeId
)
import
Gargantext.Database.Types.Node
import
Opaleye
data
NodeNodeNgrams2Poly
node_id
nodengrams_id
w
=
NodeNodeNgrams2
{
_nnng2_node_id
::
node_id
,
_nnng2_nodengrams_id
::
nodengrams_id
,
_nnng2_weight
::
w
}
deriving
(
Show
)
type
NodeNodeNgrams2Write
=
NodeNodeNgrams2Poly
(
Column
PGInt4
)
(
Column
PGInt4
)
(
Column
PGFloat8
)
type
NodeNodeNgrams2Read
=
NodeNodeNgrams2Poly
(
Column
PGInt4
)
(
Column
PGInt4
)
(
Column
PGFloat8
)
type
NodeNodeNgrams2ReadNull
=
NodeNodeNgrams2Poly
(
Column
(
Nullable
PGInt4
))
(
Column
(
Nullable
PGInt4
))
(
Column
(
Nullable
PGFloat8
))
type
NodeNodeNgrams2
=
NodeNodeNgrams2Poly
DocId
NodeNgramsId
Double
$
(
makeAdaptorAndInstance
"pNodeNodeNgrams2"
''
N
odeNodeNgrams2Poly
)
makeLenses
''
N
odeNodeNgrams2Poly
nodeNodeNgrams2Table
::
Table
NodeNodeNgrams2Write
NodeNodeNgrams2Read
nodeNodeNgrams2Table
=
Table
"node_node_ngrams2"
(
pNodeNodeNgrams2
NodeNodeNgrams2
{
_nnng2_node_id
=
required
"node_id"
,
_nnng2_nodengrams_id
=
required
"nodengrams_id"
,
_nnng2_weight
=
required
"weight"
}
)
queryNodeNodeNgrams2Table
::
Query
NodeNodeNgrams2Read
queryNodeNodeNgrams2Table
=
queryTable
nodeNodeNgrams2Table
-- | Insert utils
insertNodeNodeNgrams2
::
[
NodeNodeNgrams2
]
->
Cmd
err
Int
insertNodeNodeNgrams2
=
insertNodeNodeNgrams2W
.
map
(
\
(
NodeNodeNgrams2
n1
n2
w
)
->
NodeNodeNgrams2
(
pgNodeId
n1
)
(
pgInt4
n2
)
(
pgDouble
w
)
)
insertNodeNodeNgrams2W
::
[
NodeNodeNgrams2Write
]
->
Cmd
err
Int
insertNodeNodeNgrams2W
nnnw
=
mkCmd
$
\
c
->
fromIntegral
<$>
runInsert_
c
insertNothing
where
insertNothing
=
(
Insert
{
iTable
=
nodeNodeNgrams2Table
,
iRows
=
nnnw
,
iReturning
=
rCount
,
iOnConflict
=
(
Just
DoNothing
)
})
src/Gargantext/Database/Triggers/NodeNodeNgrams.hs
View file @
b0c4cf28
...
...
@@ -63,4 +63,92 @@ triggerCountInsert = execPGSQuery query (nodeTypeId NodeDocument, nodeTypeId Nod
EXECUTE PROCEDURE set_ngrams_global_count();
|]
triggerCountInsert2
::
Cmd
err
Int64
triggerCountInsert2
=
execPGSQuery
query
(
nodeTypeId
NodeCorpus
,
nodeTypeId
NodeDocument
,
nodeTypeId
NodeList
)
where
query
::
DPS
.
Query
query
=
[
sql
|
CREATE OR REPLACE FUNCTION set_ngrams_global_count2() RETURNS trigger AS $$
BEGIN
IF pg_trigger_depth() <> 1 THEN
RETURN NEW;
END IF;
IF TG_OP = 'INSERT' THEN
INSERT INTO node_node_ngrams2 (node_id, nodengrams_id, weight)
SELECT corpus.id, nng.id, count(*) from NEW as new1
INNER JOIN node_ngrams nng ON nng.id = new1.nodengrams_id
INNER JOIN nodes list ON list.id = nng.node_id
INNER JOIN nodes_nodes nn ON nn.node2_id = new1.node_id
INNER JOIN nodes corpus ON corpus.id = nn.node1_id
INNER JOIN nodes doc ON doc.id = nn.node2_id
WHERE corpus.typename = ? -- 30 -- corpus
AND doc.typename = ? -- 4 -- maybe not mandatory
AND list.typename = ? -- 5 -- list
GROUP BY corpus.id, nng.id
ON CONFLICT (node_id, nodengrams_id)
DO UPDATE set weight = node_node_ngrams2.weight + excluded.weight
;
END IF;
RETURN NULL;
END
$$ LANGUAGE plpgsql;
-- DROP trigger trigger_count_insert2 on node_node_ngrams2;
CREATE TRIGGER trigger_count_insert2 AFTER INSERT on node_node_ngrams2
REFERENCING NEW TABLE AS NEW
FOR EACH STATEMENT
EXECUTE PROCEDURE set_ngrams_global_count2();
|]
-- TODO add the groups
triggerCoocInsert
::
Cmd
err
Int64
triggerCoocInsert
=
execPGSQuery
query
(
nodeTypeId
NodeCorpus
,
nodeTypeId
NodeDocument
,
nodeTypeId
NodeList
)
where
query
::
DPS
.
Query
query
=
[
sql
|
CREATE OR REPLACE FUNCTION set_cooc() RETURNS trigger AS $$
BEGIN
IF pg_trigger_depth() <> 1 THEN
RETURN NEW;
END IF;
IF TG_OP = 'INSERT' THEN
INSERT INTO node_nodengrams_nodengrams (node_id, node_ngrams1_id, node_ngrams2_id, weight)
SELECT corpus.id, nng1.id, nng2.id, count(*) from NEW as new1
INNER JOIN node_ngrams nng1 ON nng1.id = new1.nodengrams_id
INNER JOIN nodes list ON list.id = nng1.node_id
INNER JOIN nodes_nodes nn ON nn.node2_id = new1.node_id
INNER JOIN nodes corpus ON corpus.id = nn.node1_id
INNER JOIN nodes doc ON doc.id = nn.node2_id
INNER JOIN node_node_ngrams2 nnng2 ON nnng2.node_id = doc.id
INNER JOIN node_ngrams nng2 ON nng2.id = nnng2.nodengrams_id
WHERE corpus.typename = ? -- 30 -- corpus
AND doc.typename = ? -- 4 -- maybe not mandatory
AND list.typename = ? -- 5 -- list
AND nng2.node_id = list.id
-- AND nng1.id <> nng2.id
GROUP BY corpus.id, nng1.id, nng2.id
ON CONFLICT (node_id, node_ngrams1_id, node_ngrams2_id)
DO UPDATE set weight = node_nodengrams_nodengrams.weight + excluded.weight
;
END IF;
RETURN NULL;
END
$$ LANGUAGE plpgsql;
-- DROP trigger trigger_cooc on node_node_ngrams2;
CREATE TRIGGER trigger_cooc_insert AFTER INSERT on node_node_ngrams2
REFERENCING NEW TABLE AS NEW
FOR EACH STATEMENT
EXECUTE PROCEDURE set_cooc();
|]
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment