Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
175
Issues
175
List
Board
Labels
Milestones
Merge Requests
10
Merge Requests
10
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
e608bb05
Commit
e608bb05
authored
Jan 26, 2022
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[FIX] Indexes for database and score counts
parent
262a4e7d
Changes
9
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
75 additions
and
34 deletions
+75
-34
Main.hs
bin/gargantext-upgrade/Main.hs
+29
-0
create
devops/postgres/create
+18
-11
extensions.sql
devops/postgres/extensions.sql
+3
-0
Settings.hs
src/Gargantext/API/Admin/Settings.hs
+1
-1
List.hs
src/Gargantext/API/Ngrams/List.hs
+11
-11
NodeStory.hs
src/Gargantext/Core/NodeStory.hs
+3
-3
Flow.hs
src/Gargantext/Database/Action/Flow.hs
+3
-1
ContextNodeNgrams.hs
src/Gargantext/Database/Admin/Trigger/ContextNodeNgrams.hs
+3
-3
NodesContexts.hs
src/Gargantext/Database/Admin/Trigger/NodesContexts.hs
+4
-4
No files found.
bin/gargantext-upgrade/Main.hs
View file @
e608bb05
...
@@ -252,6 +252,35 @@ sqlSchema = do
...
@@ -252,6 +252,35 @@ sqlSchema = do
DROP TRIGGER if EXISTS trigger_insert_count ON nodes_nodes;
DROP TRIGGER if EXISTS trigger_insert_count ON nodes_nodes;
-- Indexes needed to speed up the deletes
-- Trigger for constraint node_ngrams_node_id_fkey
CREATE INDEX IF NOT EXISTS node_ngrams_node_id_idx ON public.node_ngrams USING btree (node_id);
-- Trigger for constraint node_node_ngrams2_node_id_fkey
CREATE INDEX IF NOT EXISTS node_node_ngrams2_node_id_idx ON public.node_node_ngrams2 USING btree (node_id);
-- Trigger for constraint node_node_ngrams_node1_id_fkey
CREATE INDEX IF NOT EXISTS node_node_ngrams_node1_id_idx ON public.node_node_ngrams USING btree (node1_id);
-- Trigger for constraint node_node_ngrams_node2_id_fkey
CREATE INDEX IF NOT EXISTS node_node_ngrams_node2_id_idx ON public.node_node_ngrams USING btree (node2_id);
-- Trigger for constraint nodes_nodes_node1_id_fkey
CREATE INDEX IF NOT EXISTS nodes_nodes_node1_id_idx ON public.nodes_nodes USING btree (node1_id);
-- Trigger for constraint nodes_nodes_node2_id_fkey
CREATE INDEX IF NOT EXISTS nodes_nodes_node2_id_idx ON public.nodes_nodes USING btree (node2_id);
-- Trigger for constraint nodes_parent_id_fkey
CREATE INDEX IF NOT EXISTS nodes_parent_id_idx ON public.nodes USING btree (parent_id);
-- Trigger for constraint rights_node_id_fkey
CREATE INDEX IF NOT EXISTS rights_node_id_idx ON public.rights USING btree (node_id);
-- Trigger for constraint nodes_contexts_node_id_fkey
CREATE INDEX IF NOT EXISTS nodes_contexts_node_id_idx ON public.nodes_contexts USING btree (node_id);
-- Trigger for constraint context_node_ngrams_node_id_fkey
CREATE INDEX IF NOT EXISTS context_node_node_id_idx ON public.context_node_ngrams USING btree (node_id);
|]
|]
...
...
devops/postgres/create
View file @
e608bb05
#!/bin/bash
#!/bin/bash
# sudo su postgres
# sudo su postgres
# postgresql://$USER:$PW@localhost/$DB
# postgresql://$USER:$PW@localhost/$DB
PW
=
"C8kdcUrAQy66U"
INIFILE
=
$1
DB
=
"gargandb1"
USER
=
"gargantua"
#psql -c "CREATE USER \"${USER}\""
getter
()
{
#psql -c "ALTER USER \"${USER}\" with PASSWORD '${PW}'"
grep
$1
$INIFILE
|
sed
"s/^.*= //"
}
psql
-c
"DROP DATABASE IF EXISTS
\"
${
DB
}
\"
"
USER
=
$(
getter
"DB_USER"
)
createdb
"
${
DB
}
"
NAME
=
$(
getter
"DB_NAME"
)
#psql "${DB}" < schema.sql
PASS
=
$(
getter
"DB_PASS"
)
HOST
=
$(
getter
"DB_HOST"
)
PORT
=
$(
getter
"DB_PORT"
)
../../bin/psql ../../gargantext.ini < gargandb.dump
psql
-c
"ALTER DATABASE
\"
${
DB
}
\"
OWNER to
\"
${
USER
}
\"
"
#psql -c "CREATE USER \"${USER}\""
#psql -c "ALTER USER \"${USER}\" with PASSWORD '${PW}'"
psql
-c
"DROP DATABASE IF EXISTS
\"
${
NAME
}
\"
"
createdb
"
${
NAME
}
"
psql
"
${
NAME
}
"
< extensions.sql
#psql "${NAME}" < schema.sql
#../../bin/psql ../../gargantext.ini < gargandb.dump
psql
-c
"ALTER DATABASE
\"
${
NAME
}
\"
OWNER to
\"
${
USER
}
\"
"
devops/postgres/extensions.sql
0 → 100644
View file @
e608bb05
CREATE
EXTENSION
IF
NOT
EXISTS
pgcrypto
;
CREATE
EXTENSION
IF
NOT
EXISTS
tsm_system_rows
;
src/Gargantext/API/Admin/Settings.hs
View file @
e608bb05
...
@@ -106,7 +106,7 @@ repoSnapshot repoDir = repoDir <> "/repo.cbor"
...
@@ -106,7 +106,7 @@ repoSnapshot repoDir = repoDir <> "/repo.cbor"
repoSaverAction
::
RepoDirFilePath
->
Serialise
a
=>
a
->
IO
()
repoSaverAction
::
RepoDirFilePath
->
Serialise
a
=>
a
->
IO
()
repoSaverAction
repoDir
a
=
do
repoSaverAction
repoDir
a
=
do
withTempFile
repoDir
"tmp-repo.cbor"
$
\
fp
h
->
do
withTempFile
repoDir
"tmp-repo.cbor"
$
\
fp
h
->
do
printDebug
"repoSaverAction"
fp
--
printDebug "repoSaverAction" fp
L
.
hPut
h
$
serialise
a
L
.
hPut
h
$
serialise
a
hClose
h
hClose
h
renameFile
fp
(
repoSnapshot
repoDir
)
renameFile
fp
(
repoSnapshot
repoDir
)
...
...
src/Gargantext/API/Ngrams/List.hs
View file @
e608bb05
...
@@ -41,7 +41,7 @@ import Gargantext.Database.Admin.Types.Hyperdata.Document
...
@@ -41,7 +41,7 @@ import Gargantext.Database.Admin.Types.Hyperdata.Document
import
Gargantext.Database.Admin.Types.Node
import
Gargantext.Database.Admin.Types.Node
import
Gargantext.Database.Query.Table.NodeContext
(
selectDocNodes
)
import
Gargantext.Database.Query.Table.NodeContext
(
selectDocNodes
)
import
Gargantext.Database.Schema.Ngrams
import
Gargantext.Database.Schema.Ngrams
import
Gargantext.Database.Schema.
Node
import
Gargantext.Database.Schema.
Context
import
Gargantext.Database.Types
(
Indexed
(
..
))
import
Gargantext.Database.Types
(
Indexed
(
..
))
import
Gargantext.Prelude
import
Gargantext.Prelude
import
Network.HTTP.Media
((
//
),
(
/:
))
import
Network.HTTP.Media
((
//
),
(
/:
))
...
@@ -155,12 +155,12 @@ reIndexWith cId lId nt lts = do
...
@@ -155,12 +155,12 @@ reIndexWith cId lId nt lts = do
<$>
HashMap
.
toList
<$>
HashMap
.
toList
<$>
getTermsWith
identity
[
lId
]
nt
lts
<$>
getTermsWith
identity
[
lId
]
nt
lts
--
printDebug "ts" ts
printDebug
"ts"
ts
-- Taking the ngrams with 0 occurrences only (orphans)
-- Taking the ngrams with 0 occurrences only (orphans)
occs
<-
getOccByNgramsOnlyFast'
cId
lId
nt
ts
occs
<-
getOccByNgramsOnlyFast'
cId
lId
nt
ts
--
printDebug "occs" occs
printDebug
"occs"
occs
let
orphans
=
List
.
concat
let
orphans
=
List
.
concat
$
map
(
\
t
->
case
HashMap
.
lookup
t
occs
of
$
map
(
\
t
->
case
HashMap
.
lookup
t
occs
of
...
@@ -168,28 +168,28 @@ reIndexWith cId lId nt lts = do
...
@@ -168,28 +168,28 @@ reIndexWith cId lId nt lts = do
Just
n
->
if
n
<=
1
then
[
t
]
else
[ ]
Just
n
->
if
n
<=
1
then
[
t
]
else
[ ]
)
ts
)
ts
--
printDebug "orphans" orphans
printDebug
"orphans"
orphans
-- Get all documents of the corpus
-- Get all documents of the corpus
docs
<-
selectDocNodes
cId
docs
<-
selectDocNodes
cId
--
printDebug "docs length" (List.length docs)
printDebug
"docs length"
(
List
.
length
docs
)
-- Checking Text documents where orphans match
-- Checking Text documents where orphans match
-- TODO Tests here
-- TODO Tests here
let
let
ngramsByDoc
=
map
(
HashMap
.
fromList
)
ngramsByDoc
=
map
(
HashMap
.
fromList
)
$
map
(
map
(
\
(
k
,
v
)
->
(
SimpleNgrams
(
text2ngrams
k
),
v
)))
$
map
(
map
(
\
(
k
,
v
)
->
(
SimpleNgrams
(
text2ngrams
k
),
v
)))
$
map
(
\
doc
->
List
.
zip
$
map
(
\
doc
->
List
.
zip
(
termsInText
(
buildPatterns
$
map
(
\
k
->
(
Text
.
splitOn
" "
$
unNgramsTerm
k
,
[]
))
orphans
)
(
termsInText
(
buildPatterns
$
map
(
\
k
->
(
Text
.
splitOn
" "
$
unNgramsTerm
k
,
[]
))
orphans
)
$
Text
.
unlines
$
catMaybes
$
Text
.
unlines
$
catMaybes
[
doc
^.
node
_hyperdata
.
hd_title
[
doc
^.
context
_hyperdata
.
hd_title
,
doc
^.
node
_hyperdata
.
hd_abstract
,
doc
^.
context
_hyperdata
.
hd_abstract
]
]
)
)
(
List
.
cycle
[
Map
.
fromList
$
[(
nt
,
Map
.
singleton
(
doc
^.
node
_id
)
1
)]])
(
List
.
cycle
[
Map
.
fromList
$
[(
nt
,
Map
.
singleton
(
doc
^.
context
_id
)
1
)]])
)
(
map
context2node
docs
)
)
docs
--
printDebug "ngramsByDoc" ngramsByDoc
printDebug
"ngramsByDoc"
ngramsByDoc
-- Saving the indexation in database
-- Saving the indexation in database
_
<-
mapM
(
saveDocNgramsWith
lId
)
ngramsByDoc
_
<-
mapM
(
saveDocNgramsWith
lId
)
ngramsByDoc
...
...
src/Gargantext/Core/NodeStory.hs
View file @
e608bb05
...
@@ -177,8 +177,8 @@ type NodeStoryDir = FilePath
...
@@ -177,8 +177,8 @@ type NodeStoryDir = FilePath
writeNodeStories
::
NodeStoryDir
->
NodeListStory
->
IO
()
writeNodeStories
::
NodeStoryDir
->
NodeListStory
->
IO
()
writeNodeStories
fp
nls
=
do
writeNodeStories
fp
nls
=
do
done
<-
mapM
(
writeNodeStory
fp
)
$
splitByNode
nls
_
done
<-
mapM
(
writeNodeStory
fp
)
$
splitByNode
nls
printDebug
"[writeNodeStories]"
done
--
printDebug "[writeNodeStories]" done
pure
()
pure
()
writeNodeStory
::
NodeStoryDir
->
(
NodeId
,
NodeListStory
)
->
IO
()
writeNodeStory
::
NodeStoryDir
->
(
NodeId
,
NodeListStory
)
->
IO
()
...
@@ -192,7 +192,7 @@ splitByNode (NodeStory m) =
...
@@ -192,7 +192,7 @@ splitByNode (NodeStory m) =
saverAction'
::
Serialise
a
=>
NodeStoryDir
->
NodeId
->
a
->
IO
()
saverAction'
::
Serialise
a
=>
NodeStoryDir
->
NodeId
->
a
->
IO
()
saverAction'
repoDir
nId
a
=
do
saverAction'
repoDir
nId
a
=
do
withTempFile
repoDir
((
cs
$
show
nId
)
<>
"-tmp-repo.cbor"
)
$
\
fp
h
->
do
withTempFile
repoDir
((
cs
$
show
nId
)
<>
"-tmp-repo.cbor"
)
$
\
fp
h
->
do
printDebug
"[repoSaverAction]"
fp
--
printDebug "[repoSaverAction]" fp
DBL
.
hPut
h
$
serialise
a
DBL
.
hPut
h
$
serialise
a
hClose
h
hClose
h
renameFile
fp
(
nodeStoryPath
repoDir
nId
)
renameFile
fp
(
nodeStoryPath
repoDir
nId
)
...
...
src/Gargantext/Database/Action/Flow.hs
View file @
e608bb05
...
@@ -319,6 +319,8 @@ saveDocNgramsWith :: ( FlowCmdM env err m)
...
@@ -319,6 +319,8 @@ saveDocNgramsWith :: ( FlowCmdM env err m)
->
m
()
->
m
()
saveDocNgramsWith
lId
mapNgramsDocs'
=
do
saveDocNgramsWith
lId
mapNgramsDocs'
=
do
terms2id
<-
insertExtractedNgrams
$
HashMap
.
keys
mapNgramsDocs'
terms2id
<-
insertExtractedNgrams
$
HashMap
.
keys
mapNgramsDocs'
printDebug
"terms2id"
terms2id
let
mapNgramsDocs
=
HashMap
.
mapKeys
extracted2ngrams
mapNgramsDocs'
let
mapNgramsDocs
=
HashMap
.
mapKeys
extracted2ngrams
mapNgramsDocs'
-- new
-- new
...
@@ -326,7 +328,7 @@ saveDocNgramsWith lId mapNgramsDocs' = do
...
@@ -326,7 +328,7 @@ saveDocNgramsWith lId mapNgramsDocs' = do
$
map
(
first
_ngramsTerms
.
second
Map
.
keys
)
$
map
(
first
_ngramsTerms
.
second
Map
.
keys
)
$
HashMap
.
toList
mapNgramsDocs
$
HashMap
.
toList
mapNgramsDocs
--
printDebug "saveDocNgramsWith" mapCgramsId
printDebug
"saveDocNgramsWith"
mapCgramsId
-- insertDocNgrams
-- insertDocNgrams
_return
<-
insertContextNodeNgrams2
_return
<-
insertContextNodeNgrams2
$
catMaybes
[
ContextNodeNgrams2
<$>
Just
nId
$
catMaybes
[
ContextNodeNgrams2
<$>
Just
nId
...
...
src/Gargantext/Database/Admin/Trigger/ContextNodeNgrams.hs
View file @
e608bb05
...
@@ -35,7 +35,7 @@ triggerCountInsert = execPGSQuery query (toDBid NodeDocument, toDBid NodeList)
...
@@ -35,7 +35,7 @@ triggerCountInsert = execPGSQuery query (toDBid NodeDocument, toDBid NodeList)
RETURN NEW;
RETURN NEW;
END IF;
END IF;
IF TG_OP = 'INSERT' THEN
IF TG_OP = 'INSERT' THEN
INSERT INTO
context_node_ngrams (context_id, node
_id, ngrams_id, ngrams_type, weight)
INSERT INTO
node_node_ngrams (node1_id, node2
_id, ngrams_id, ngrams_type, weight)
select n.parent_id, n.id, new0.ngrams_id, new0.ngrams_type, count(*) from NEW as new0
select n.parent_id, n.id, new0.ngrams_id, new0.ngrams_type, count(*) from NEW as new0
INNER JOIN contexts n ON n.id = new0.context_id
INNER JOIN contexts n ON n.id = new0.context_id
INNER JOIN nodes n2 ON n2.id = new0.node_id
INNER JOIN nodes n2 ON n2.id = new0.node_id
...
@@ -43,8 +43,8 @@ triggerCountInsert = execPGSQuery query (toDBid NodeDocument, toDBid NodeList)
...
@@ -43,8 +43,8 @@ triggerCountInsert = execPGSQuery query (toDBid NodeDocument, toDBid NodeList)
AND n.typename = ? -- not mandatory
AND n.typename = ? -- not mandatory
AND n.parent_id <> n2.id -- not mandatory
AND n.parent_id <> n2.id -- not mandatory
GROUP BY n.parent_id, n.id, new0.ngrams_id, new0.ngrams_type
GROUP BY n.parent_id, n.id, new0.ngrams_id, new0.ngrams_type
ON CONFLICT (
context_id, node
_id, ngrams_id, ngrams_type)
ON CONFLICT (
node1_id, node2
_id, ngrams_id, ngrams_type)
DO UPDATE set weight =
context
_node_ngrams.weight + excluded.weight
DO UPDATE set weight =
node
_node_ngrams.weight + excluded.weight
;
;
END IF;
END IF;
...
...
src/Gargantext/Database/Admin/Trigger/NodesContexts.hs
View file @
e608bb05
...
@@ -42,7 +42,7 @@ triggerInsertCount lId = execPGSQuery query (lId, nodeTypeId NodeList)
...
@@ -42,7 +42,7 @@ triggerInsertCount lId = execPGSQuery query (lId, nodeTypeId NodeList)
, count(*) AS weight
, count(*) AS weight
FROM NEW as new1
FROM NEW as new1
INNER JOIN contexts doc ON doc.id = new1.context_id
INNER JOIN contexts doc ON doc.id = new1.context_id
INNER JOIN nodes lists ON lists.parent_id =
lists.parent
_id
INNER JOIN nodes lists ON lists.parent_id =
new1.node
_id
INNER JOIN context_node_ngrams cnn ON cnn.context_id = doc.id
INNER JOIN context_node_ngrams cnn ON cnn.context_id = doc.id
WHERE lists.id in (?, lists.id)
WHERE lists.id in (?, lists.id)
AND lists.typename = ?
AND lists.typename = ?
...
@@ -76,9 +76,9 @@ triggerUpdateAdd lId = execPGSQuery query (lId, nodeTypeId NodeList)
...
@@ -76,9 +76,9 @@ triggerUpdateAdd lId = execPGSQuery query (lId, nodeTypeId NodeList)
, cnn.ngrams_type AS ngrams_type
, cnn.ngrams_type AS ngrams_type
, count(*) AS fix_count
, count(*) AS fix_count
FROM NEW as new1
FROM NEW as new1
INNER JOIN contexts doc ON doc.id = new1.context_id
INNER JOIN contexts doc ON doc.id
= new1.context_id
INNER JOIN nodes lists ON
new1.node_id = lists.parent
_id
INNER JOIN nodes lists ON
lists.parent_id = new1.node
_id
INNER JOIN context_node_ngrams cnn ON cnn.context_id = doc.id
INNER JOIN context_node_ngrams cnn ON cnn.context_id
= doc.id
WHERE lists.id in (?, lists.id) -- (masterList_id, userLists)
WHERE lists.id in (?, lists.id) -- (masterList_id, userLists)
AND lists.typename = ?
AND lists.typename = ?
GROUP BY node1_id, node2_id, ngrams_id, ngrams_type
GROUP BY node1_id, node2_id, ngrams_id, ngrams_type
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment