Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
158
Issues
158
List
Board
Labels
Milestones
Merge Requests
11
Merge Requests
11
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
e608bb05
Commit
e608bb05
authored
Jan 26, 2022
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[FIX] Indexes for database and score counts
parent
262a4e7d
Changes
9
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
75 additions
and
34 deletions
+75
-34
Main.hs
bin/gargantext-upgrade/Main.hs
+29
-0
create
devops/postgres/create
+18
-11
extensions.sql
devops/postgres/extensions.sql
+3
-0
Settings.hs
src/Gargantext/API/Admin/Settings.hs
+1
-1
List.hs
src/Gargantext/API/Ngrams/List.hs
+11
-11
NodeStory.hs
src/Gargantext/Core/NodeStory.hs
+3
-3
Flow.hs
src/Gargantext/Database/Action/Flow.hs
+3
-1
ContextNodeNgrams.hs
src/Gargantext/Database/Admin/Trigger/ContextNodeNgrams.hs
+3
-3
NodesContexts.hs
src/Gargantext/Database/Admin/Trigger/NodesContexts.hs
+4
-4
No files found.
bin/gargantext-upgrade/Main.hs
View file @
e608bb05
...
...
@@ -252,6 +252,35 @@ sqlSchema = do
DROP TRIGGER if EXISTS trigger_insert_count ON nodes_nodes;
-- Indexes needed to speed up the deletes
-- Trigger for constraint node_ngrams_node_id_fkey
CREATE INDEX IF NOT EXISTS node_ngrams_node_id_idx ON public.node_ngrams USING btree (node_id);
-- Trigger for constraint node_node_ngrams2_node_id_fkey
CREATE INDEX IF NOT EXISTS node_node_ngrams2_node_id_idx ON public.node_node_ngrams2 USING btree (node_id);
-- Trigger for constraint node_node_ngrams_node1_id_fkey
CREATE INDEX IF NOT EXISTS node_node_ngrams_node1_id_idx ON public.node_node_ngrams USING btree (node1_id);
-- Trigger for constraint node_node_ngrams_node2_id_fkey
CREATE INDEX IF NOT EXISTS node_node_ngrams_node2_id_idx ON public.node_node_ngrams USING btree (node2_id);
-- Trigger for constraint nodes_nodes_node1_id_fkey
CREATE INDEX IF NOT EXISTS nodes_nodes_node1_id_idx ON public.nodes_nodes USING btree (node1_id);
-- Trigger for constraint nodes_nodes_node2_id_fkey
CREATE INDEX IF NOT EXISTS nodes_nodes_node2_id_idx ON public.nodes_nodes USING btree (node2_id);
-- Trigger for constraint nodes_parent_id_fkey
CREATE INDEX IF NOT EXISTS nodes_parent_id_idx ON public.nodes USING btree (parent_id);
-- Trigger for constraint rights_node_id_fkey
CREATE INDEX IF NOT EXISTS rights_node_id_idx ON public.rights USING btree (node_id);
-- Trigger for constraint nodes_contexts_node_id_fkey
CREATE INDEX IF NOT EXISTS nodes_contexts_node_id_idx ON public.nodes_contexts USING btree (node_id);
-- Trigger for constraint context_node_ngrams_node_id_fkey
CREATE INDEX IF NOT EXISTS context_node_node_id_idx ON public.context_node_ngrams USING btree (node_id);
|]
...
...
devops/postgres/create
View file @
e608bb05
#!/bin/bash
# sudo su postgres
# postgresql://$USER:$PW@localhost/$DB
PW
=
"C8kdcUrAQy66U"
DB
=
"gargandb1"
USER
=
"gargantua"
INIFILE
=
$1
#psql -c "CREATE USER \"${USER}\""
#psql -c "ALTER USER \"${USER}\" with PASSWORD '${PW}'"
getter
()
{
grep
$1
$INIFILE
|
sed
"s/^.*= //"
}
psql
-c
"DROP DATABASE IF EXISTS
\"
${
DB
}
\"
"
createdb
"
${
DB
}
"
#psql "${DB}" < schema.sql
USER
=
$(
getter
"DB_USER"
)
NAME
=
$(
getter
"DB_NAME"
)
PASS
=
$(
getter
"DB_PASS"
)
HOST
=
$(
getter
"DB_HOST"
)
PORT
=
$(
getter
"DB_PORT"
)
../../bin/psql ../../gargantext.ini < gargandb.dump
psql
-c
"ALTER DATABASE
\"
${
DB
}
\"
OWNER to
\"
${
USER
}
\"
"
#psql -c "CREATE USER \"${USER}\""
#psql -c "ALTER USER \"${USER}\" with PASSWORD '${PW}'"
psql
-c
"DROP DATABASE IF EXISTS
\"
${
NAME
}
\"
"
createdb
"
${
NAME
}
"
psql
"
${
NAME
}
"
< extensions.sql
#psql "${NAME}" < schema.sql
#../../bin/psql ../../gargantext.ini < gargandb.dump
psql
-c
"ALTER DATABASE
\"
${
NAME
}
\"
OWNER to
\"
${
USER
}
\"
"
devops/postgres/extensions.sql
0 → 100644
View file @
e608bb05
CREATE
EXTENSION
IF
NOT
EXISTS
pgcrypto
;
CREATE
EXTENSION
IF
NOT
EXISTS
tsm_system_rows
;
src/Gargantext/API/Admin/Settings.hs
View file @
e608bb05
...
...
@@ -106,7 +106,7 @@ repoSnapshot repoDir = repoDir <> "/repo.cbor"
repoSaverAction
::
RepoDirFilePath
->
Serialise
a
=>
a
->
IO
()
repoSaverAction
repoDir
a
=
do
withTempFile
repoDir
"tmp-repo.cbor"
$
\
fp
h
->
do
printDebug
"repoSaverAction"
fp
--
printDebug "repoSaverAction" fp
L
.
hPut
h
$
serialise
a
hClose
h
renameFile
fp
(
repoSnapshot
repoDir
)
...
...
src/Gargantext/API/Ngrams/List.hs
View file @
e608bb05
...
...
@@ -41,7 +41,7 @@ import Gargantext.Database.Admin.Types.Hyperdata.Document
import
Gargantext.Database.Admin.Types.Node
import
Gargantext.Database.Query.Table.NodeContext
(
selectDocNodes
)
import
Gargantext.Database.Schema.Ngrams
import
Gargantext.Database.Schema.
Node
import
Gargantext.Database.Schema.
Context
import
Gargantext.Database.Types
(
Indexed
(
..
))
import
Gargantext.Prelude
import
Network.HTTP.Media
((
//
),
(
/:
))
...
...
@@ -155,12 +155,12 @@ reIndexWith cId lId nt lts = do
<$>
HashMap
.
toList
<$>
getTermsWith
identity
[
lId
]
nt
lts
--
printDebug "ts" ts
printDebug
"ts"
ts
-- Taking the ngrams with 0 occurrences only (orphans)
occs
<-
getOccByNgramsOnlyFast'
cId
lId
nt
ts
--
printDebug "occs" occs
printDebug
"occs"
occs
let
orphans
=
List
.
concat
$
map
(
\
t
->
case
HashMap
.
lookup
t
occs
of
...
...
@@ -168,28 +168,28 @@ reIndexWith cId lId nt lts = do
Just
n
->
if
n
<=
1
then
[
t
]
else
[ ]
)
ts
--
printDebug "orphans" orphans
printDebug
"orphans"
orphans
-- Get all documents of the corpus
docs
<-
selectDocNodes
cId
--
printDebug "docs length" (List.length docs)
printDebug
"docs length"
(
List
.
length
docs
)
-- Checking Text documents where orphans match
-- TODO Tests here
let
ngramsByDoc
=
map
(
HashMap
.
fromList
)
$
map
(
map
(
\
(
k
,
v
)
->
(
SimpleNgrams
(
text2ngrams
k
),
v
)))
$
map
(
\
doc
->
List
.
zip
$
map
(
\
doc
->
List
.
zip
(
termsInText
(
buildPatterns
$
map
(
\
k
->
(
Text
.
splitOn
" "
$
unNgramsTerm
k
,
[]
))
orphans
)
$
Text
.
unlines
$
catMaybes
[
doc
^.
node
_hyperdata
.
hd_title
,
doc
^.
node
_hyperdata
.
hd_abstract
[
doc
^.
context
_hyperdata
.
hd_title
,
doc
^.
context
_hyperdata
.
hd_abstract
]
)
(
List
.
cycle
[
Map
.
fromList
$
[(
nt
,
Map
.
singleton
(
doc
^.
node
_id
)
1
)]])
)
(
map
context2node
docs
)
(
List
.
cycle
[
Map
.
fromList
$
[(
nt
,
Map
.
singleton
(
doc
^.
context
_id
)
1
)]])
)
docs
--
printDebug "ngramsByDoc" ngramsByDoc
printDebug
"ngramsByDoc"
ngramsByDoc
-- Saving the indexation in database
_
<-
mapM
(
saveDocNgramsWith
lId
)
ngramsByDoc
...
...
src/Gargantext/Core/NodeStory.hs
View file @
e608bb05
...
...
@@ -177,8 +177,8 @@ type NodeStoryDir = FilePath
writeNodeStories
::
NodeStoryDir
->
NodeListStory
->
IO
()
writeNodeStories
fp
nls
=
do
done
<-
mapM
(
writeNodeStory
fp
)
$
splitByNode
nls
printDebug
"[writeNodeStories]"
done
_
done
<-
mapM
(
writeNodeStory
fp
)
$
splitByNode
nls
--
printDebug "[writeNodeStories]" done
pure
()
writeNodeStory
::
NodeStoryDir
->
(
NodeId
,
NodeListStory
)
->
IO
()
...
...
@@ -192,7 +192,7 @@ splitByNode (NodeStory m) =
saverAction'
::
Serialise
a
=>
NodeStoryDir
->
NodeId
->
a
->
IO
()
saverAction'
repoDir
nId
a
=
do
withTempFile
repoDir
((
cs
$
show
nId
)
<>
"-tmp-repo.cbor"
)
$
\
fp
h
->
do
printDebug
"[repoSaverAction]"
fp
--
printDebug "[repoSaverAction]" fp
DBL
.
hPut
h
$
serialise
a
hClose
h
renameFile
fp
(
nodeStoryPath
repoDir
nId
)
...
...
src/Gargantext/Database/Action/Flow.hs
View file @
e608bb05
...
...
@@ -319,6 +319,8 @@ saveDocNgramsWith :: ( FlowCmdM env err m)
->
m
()
saveDocNgramsWith
lId
mapNgramsDocs'
=
do
terms2id
<-
insertExtractedNgrams
$
HashMap
.
keys
mapNgramsDocs'
printDebug
"terms2id"
terms2id
let
mapNgramsDocs
=
HashMap
.
mapKeys
extracted2ngrams
mapNgramsDocs'
-- new
...
...
@@ -326,7 +328,7 @@ saveDocNgramsWith lId mapNgramsDocs' = do
$
map
(
first
_ngramsTerms
.
second
Map
.
keys
)
$
HashMap
.
toList
mapNgramsDocs
--
printDebug "saveDocNgramsWith" mapCgramsId
printDebug
"saveDocNgramsWith"
mapCgramsId
-- insertDocNgrams
_return
<-
insertContextNodeNgrams2
$
catMaybes
[
ContextNodeNgrams2
<$>
Just
nId
...
...
src/Gargantext/Database/Admin/Trigger/ContextNodeNgrams.hs
View file @
e608bb05
...
...
@@ -35,7 +35,7 @@ triggerCountInsert = execPGSQuery query (toDBid NodeDocument, toDBid NodeList)
RETURN NEW;
END IF;
IF TG_OP = 'INSERT' THEN
INSERT INTO
context_node_ngrams (context_id, node
_id, ngrams_id, ngrams_type, weight)
INSERT INTO
node_node_ngrams (node1_id, node2
_id, ngrams_id, ngrams_type, weight)
select n.parent_id, n.id, new0.ngrams_id, new0.ngrams_type, count(*) from NEW as new0
INNER JOIN contexts n ON n.id = new0.context_id
INNER JOIN nodes n2 ON n2.id = new0.node_id
...
...
@@ -43,8 +43,8 @@ triggerCountInsert = execPGSQuery query (toDBid NodeDocument, toDBid NodeList)
AND n.typename = ? -- not mandatory
AND n.parent_id <> n2.id -- not mandatory
GROUP BY n.parent_id, n.id, new0.ngrams_id, new0.ngrams_type
ON CONFLICT (
context_id, node
_id, ngrams_id, ngrams_type)
DO UPDATE set weight =
context
_node_ngrams.weight + excluded.weight
ON CONFLICT (
node1_id, node2
_id, ngrams_id, ngrams_type)
DO UPDATE set weight =
node
_node_ngrams.weight + excluded.weight
;
END IF;
...
...
src/Gargantext/Database/Admin/Trigger/NodesContexts.hs
View file @
e608bb05
...
...
@@ -42,7 +42,7 @@ triggerInsertCount lId = execPGSQuery query (lId, nodeTypeId NodeList)
, count(*) AS weight
FROM NEW as new1
INNER JOIN contexts doc ON doc.id = new1.context_id
INNER JOIN nodes lists ON lists.parent_id =
lists.parent
_id
INNER JOIN nodes lists ON lists.parent_id =
new1.node
_id
INNER JOIN context_node_ngrams cnn ON cnn.context_id = doc.id
WHERE lists.id in (?, lists.id)
AND lists.typename = ?
...
...
@@ -76,9 +76,9 @@ triggerUpdateAdd lId = execPGSQuery query (lId, nodeTypeId NodeList)
, cnn.ngrams_type AS ngrams_type
, count(*) AS fix_count
FROM NEW as new1
INNER JOIN contexts doc ON doc.id = new1.context_id
INNER JOIN nodes lists ON
new1.node_id = lists.parent
_id
INNER JOIN context_node_ngrams cnn ON cnn.context_id = doc.id
INNER JOIN contexts doc ON doc.id
= new1.context_id
INNER JOIN nodes lists ON
lists.parent_id = new1.node
_id
INNER JOIN context_node_ngrams cnn ON cnn.context_id
= doc.id
WHERE lists.id in (?, lists.id) -- (masterList_id, userLists)
AND lists.typename = ?
GROUP BY node1_id, node2_id, ngrams_id, ngrams_type
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment