Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Christian Merten
haskell-gargantext
Commits
e608bb05
Commit
e608bb05
authored
Jan 26, 2022
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[FIX] Indexes for database and score counts
parent
262a4e7d
Changes
9
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
75 additions
and
34 deletions
+75
-34
Main.hs
bin/gargantext-upgrade/Main.hs
+29
-0
create
devops/postgres/create
+18
-11
extensions.sql
devops/postgres/extensions.sql
+3
-0
Settings.hs
src/Gargantext/API/Admin/Settings.hs
+1
-1
List.hs
src/Gargantext/API/Ngrams/List.hs
+11
-11
NodeStory.hs
src/Gargantext/Core/NodeStory.hs
+3
-3
Flow.hs
src/Gargantext/Database/Action/Flow.hs
+3
-1
ContextNodeNgrams.hs
src/Gargantext/Database/Admin/Trigger/ContextNodeNgrams.hs
+3
-3
NodesContexts.hs
src/Gargantext/Database/Admin/Trigger/NodesContexts.hs
+4
-4
No files found.
bin/gargantext-upgrade/Main.hs
View file @
e608bb05
...
...
@@ -252,6 +252,35 @@ sqlSchema = do
DROP TRIGGER if EXISTS trigger_insert_count ON nodes_nodes;
-- Indexes needed to speed up the deletes
-- Trigger for constraint node_ngrams_node_id_fkey
CREATE INDEX IF NOT EXISTS node_ngrams_node_id_idx ON public.node_ngrams USING btree (node_id);
-- Trigger for constraint node_node_ngrams2_node_id_fkey
CREATE INDEX IF NOT EXISTS node_node_ngrams2_node_id_idx ON public.node_node_ngrams2 USING btree (node_id);
-- Trigger for constraint node_node_ngrams_node1_id_fkey
CREATE INDEX IF NOT EXISTS node_node_ngrams_node1_id_idx ON public.node_node_ngrams USING btree (node1_id);
-- Trigger for constraint node_node_ngrams_node2_id_fkey
CREATE INDEX IF NOT EXISTS node_node_ngrams_node2_id_idx ON public.node_node_ngrams USING btree (node2_id);
-- Trigger for constraint nodes_nodes_node1_id_fkey
CREATE INDEX IF NOT EXISTS nodes_nodes_node1_id_idx ON public.nodes_nodes USING btree (node1_id);
-- Trigger for constraint nodes_nodes_node2_id_fkey
CREATE INDEX IF NOT EXISTS nodes_nodes_node2_id_idx ON public.nodes_nodes USING btree (node2_id);
-- Trigger for constraint nodes_parent_id_fkey
CREATE INDEX IF NOT EXISTS nodes_parent_id_idx ON public.nodes USING btree (parent_id);
-- Trigger for constraint rights_node_id_fkey
CREATE INDEX IF NOT EXISTS rights_node_id_idx ON public.rights USING btree (node_id);
-- Trigger for constraint nodes_contexts_node_id_fkey
CREATE INDEX IF NOT EXISTS nodes_contexts_node_id_idx ON public.nodes_contexts USING btree (node_id);
-- Trigger for constraint context_node_ngrams_node_id_fkey
CREATE INDEX IF NOT EXISTS context_node_node_id_idx ON public.context_node_ngrams USING btree (node_id);
|]
...
...
devops/postgres/create
View file @
e608bb05
#!/bin/bash
# sudo su postgres
# postgresql://$USER:$PW@localhost/$DB
PW
=
"C8kdcUrAQy66U"
DB
=
"gargandb1"
USER
=
"gargantua"
INIFILE
=
$1
#psql -c "CREATE USER \"${USER}\""
#psql -c "ALTER USER \"${USER}\" with PASSWORD '${PW}'"
getter
()
{
grep
$1
$INIFILE
|
sed
"s/^.*= //"
}
psql
-c
"DROP DATABASE IF EXISTS
\"
${
DB
}
\"
"
createdb
"
${
DB
}
"
#psql "${DB}" < schema.sql
USER
=
$(
getter
"DB_USER"
)
NAME
=
$(
getter
"DB_NAME"
)
PASS
=
$(
getter
"DB_PASS"
)
HOST
=
$(
getter
"DB_HOST"
)
PORT
=
$(
getter
"DB_PORT"
)
../../bin/psql ../../gargantext.ini < gargandb.dump
psql
-c
"ALTER DATABASE
\"
${
DB
}
\"
OWNER to
\"
${
USER
}
\"
"
#psql -c "CREATE USER \"${USER}\""
#psql -c "ALTER USER \"${USER}\" with PASSWORD '${PW}'"
psql
-c
"DROP DATABASE IF EXISTS
\"
${
NAME
}
\"
"
createdb
"
${
NAME
}
"
psql
"
${
NAME
}
"
< extensions.sql
#psql "${NAME}" < schema.sql
#../../bin/psql ../../gargantext.ini < gargandb.dump
psql
-c
"ALTER DATABASE
\"
${
NAME
}
\"
OWNER to
\"
${
USER
}
\"
"
devops/postgres/extensions.sql
0 → 100644
View file @
e608bb05
CREATE
EXTENSION
IF
NOT
EXISTS
pgcrypto
;
CREATE
EXTENSION
IF
NOT
EXISTS
tsm_system_rows
;
src/Gargantext/API/Admin/Settings.hs
View file @
e608bb05
...
...
@@ -106,7 +106,7 @@ repoSnapshot repoDir = repoDir <> "/repo.cbor"
repoSaverAction
::
RepoDirFilePath
->
Serialise
a
=>
a
->
IO
()
repoSaverAction
repoDir
a
=
do
withTempFile
repoDir
"tmp-repo.cbor"
$
\
fp
h
->
do
printDebug
"repoSaverAction"
fp
--
printDebug "repoSaverAction" fp
L
.
hPut
h
$
serialise
a
hClose
h
renameFile
fp
(
repoSnapshot
repoDir
)
...
...
src/Gargantext/API/Ngrams/List.hs
View file @
e608bb05
...
...
@@ -41,7 +41,7 @@ import Gargantext.Database.Admin.Types.Hyperdata.Document
import
Gargantext.Database.Admin.Types.Node
import
Gargantext.Database.Query.Table.NodeContext
(
selectDocNodes
)
import
Gargantext.Database.Schema.Ngrams
import
Gargantext.Database.Schema.
Node
import
Gargantext.Database.Schema.
Context
import
Gargantext.Database.Types
(
Indexed
(
..
))
import
Gargantext.Prelude
import
Network.HTTP.Media
((
//
),
(
/:
))
...
...
@@ -155,12 +155,12 @@ reIndexWith cId lId nt lts = do
<$>
HashMap
.
toList
<$>
getTermsWith
identity
[
lId
]
nt
lts
--
printDebug "ts" ts
printDebug
"ts"
ts
-- Taking the ngrams with 0 occurrences only (orphans)
occs
<-
getOccByNgramsOnlyFast'
cId
lId
nt
ts
--
printDebug "occs" occs
printDebug
"occs"
occs
let
orphans
=
List
.
concat
$
map
(
\
t
->
case
HashMap
.
lookup
t
occs
of
...
...
@@ -168,28 +168,28 @@ reIndexWith cId lId nt lts = do
Just
n
->
if
n
<=
1
then
[
t
]
else
[ ]
)
ts
--
printDebug "orphans" orphans
printDebug
"orphans"
orphans
-- Get all documents of the corpus
docs
<-
selectDocNodes
cId
--
printDebug "docs length" (List.length docs)
printDebug
"docs length"
(
List
.
length
docs
)
-- Checking Text documents where orphans match
-- TODO Tests here
let
ngramsByDoc
=
map
(
HashMap
.
fromList
)
$
map
(
map
(
\
(
k
,
v
)
->
(
SimpleNgrams
(
text2ngrams
k
),
v
)))
$
map
(
\
doc
->
List
.
zip
$
map
(
\
doc
->
List
.
zip
(
termsInText
(
buildPatterns
$
map
(
\
k
->
(
Text
.
splitOn
" "
$
unNgramsTerm
k
,
[]
))
orphans
)
$
Text
.
unlines
$
catMaybes
[
doc
^.
node
_hyperdata
.
hd_title
,
doc
^.
node
_hyperdata
.
hd_abstract
[
doc
^.
context
_hyperdata
.
hd_title
,
doc
^.
context
_hyperdata
.
hd_abstract
]
)
(
List
.
cycle
[
Map
.
fromList
$
[(
nt
,
Map
.
singleton
(
doc
^.
node
_id
)
1
)]])
)
(
map
context2node
docs
)
(
List
.
cycle
[
Map
.
fromList
$
[(
nt
,
Map
.
singleton
(
doc
^.
context
_id
)
1
)]])
)
docs
--
printDebug "ngramsByDoc" ngramsByDoc
printDebug
"ngramsByDoc"
ngramsByDoc
-- Saving the indexation in database
_
<-
mapM
(
saveDocNgramsWith
lId
)
ngramsByDoc
...
...
src/Gargantext/Core/NodeStory.hs
View file @
e608bb05
...
...
@@ -177,8 +177,8 @@ type NodeStoryDir = FilePath
writeNodeStories
::
NodeStoryDir
->
NodeListStory
->
IO
()
writeNodeStories
fp
nls
=
do
done
<-
mapM
(
writeNodeStory
fp
)
$
splitByNode
nls
printDebug
"[writeNodeStories]"
done
_
done
<-
mapM
(
writeNodeStory
fp
)
$
splitByNode
nls
--
printDebug "[writeNodeStories]" done
pure
()
writeNodeStory
::
NodeStoryDir
->
(
NodeId
,
NodeListStory
)
->
IO
()
...
...
@@ -192,7 +192,7 @@ splitByNode (NodeStory m) =
saverAction'
::
Serialise
a
=>
NodeStoryDir
->
NodeId
->
a
->
IO
()
saverAction'
repoDir
nId
a
=
do
withTempFile
repoDir
((
cs
$
show
nId
)
<>
"-tmp-repo.cbor"
)
$
\
fp
h
->
do
printDebug
"[repoSaverAction]"
fp
--
printDebug "[repoSaverAction]" fp
DBL
.
hPut
h
$
serialise
a
hClose
h
renameFile
fp
(
nodeStoryPath
repoDir
nId
)
...
...
src/Gargantext/Database/Action/Flow.hs
View file @
e608bb05
...
...
@@ -319,6 +319,8 @@ saveDocNgramsWith :: ( FlowCmdM env err m)
->
m
()
saveDocNgramsWith
lId
mapNgramsDocs'
=
do
terms2id
<-
insertExtractedNgrams
$
HashMap
.
keys
mapNgramsDocs'
printDebug
"terms2id"
terms2id
let
mapNgramsDocs
=
HashMap
.
mapKeys
extracted2ngrams
mapNgramsDocs'
-- new
...
...
@@ -326,7 +328,7 @@ saveDocNgramsWith lId mapNgramsDocs' = do
$
map
(
first
_ngramsTerms
.
second
Map
.
keys
)
$
HashMap
.
toList
mapNgramsDocs
--
printDebug "saveDocNgramsWith" mapCgramsId
printDebug
"saveDocNgramsWith"
mapCgramsId
-- insertDocNgrams
_return
<-
insertContextNodeNgrams2
$
catMaybes
[
ContextNodeNgrams2
<$>
Just
nId
...
...
src/Gargantext/Database/Admin/Trigger/ContextNodeNgrams.hs
View file @
e608bb05
...
...
@@ -35,7 +35,7 @@ triggerCountInsert = execPGSQuery query (toDBid NodeDocument, toDBid NodeList)
RETURN NEW;
END IF;
IF TG_OP = 'INSERT' THEN
INSERT INTO
context_node_ngrams (context_id, node
_id, ngrams_id, ngrams_type, weight)
INSERT INTO
node_node_ngrams (node1_id, node2
_id, ngrams_id, ngrams_type, weight)
select n.parent_id, n.id, new0.ngrams_id, new0.ngrams_type, count(*) from NEW as new0
INNER JOIN contexts n ON n.id = new0.context_id
INNER JOIN nodes n2 ON n2.id = new0.node_id
...
...
@@ -43,8 +43,8 @@ triggerCountInsert = execPGSQuery query (toDBid NodeDocument, toDBid NodeList)
AND n.typename = ? -- not mandatory
AND n.parent_id <> n2.id -- not mandatory
GROUP BY n.parent_id, n.id, new0.ngrams_id, new0.ngrams_type
ON CONFLICT (
context_id, node
_id, ngrams_id, ngrams_type)
DO UPDATE set weight =
context
_node_ngrams.weight + excluded.weight
ON CONFLICT (
node1_id, node2
_id, ngrams_id, ngrams_type)
DO UPDATE set weight =
node
_node_ngrams.weight + excluded.weight
;
END IF;
...
...
src/Gargantext/Database/Admin/Trigger/NodesContexts.hs
View file @
e608bb05
...
...
@@ -42,7 +42,7 @@ triggerInsertCount lId = execPGSQuery query (lId, nodeTypeId NodeList)
, count(*) AS weight
FROM NEW as new1
INNER JOIN contexts doc ON doc.id = new1.context_id
INNER JOIN nodes lists ON lists.parent_id =
lists.parent
_id
INNER JOIN nodes lists ON lists.parent_id =
new1.node
_id
INNER JOIN context_node_ngrams cnn ON cnn.context_id = doc.id
WHERE lists.id in (?, lists.id)
AND lists.typename = ?
...
...
@@ -76,9 +76,9 @@ triggerUpdateAdd lId = execPGSQuery query (lId, nodeTypeId NodeList)
, cnn.ngrams_type AS ngrams_type
, count(*) AS fix_count
FROM NEW as new1
INNER JOIN contexts doc ON doc.id = new1.context_id
INNER JOIN nodes lists ON
new1.node_id = lists.parent
_id
INNER JOIN context_node_ngrams cnn ON cnn.context_id = doc.id
INNER JOIN contexts doc ON doc.id
= new1.context_id
INNER JOIN nodes lists ON
lists.parent_id = new1.node
_id
INNER JOIN context_node_ngrams cnn ON cnn.context_id
= doc.id
WHERE lists.id in (?, lists.id) -- (masterList_id, userLists)
AND lists.typename = ?
GROUP BY node1_id, node2_id, ngrams_id, ngrams_type
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment