Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
H
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Przemyslaw Kaminski
haskell-gargantext
Commits
5d0a03bc
Commit
5d0a03bc
authored
Jan 15, 2019
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'dev-ngrams-table' into dev
[MERGE] fix merge [NgramsTable] fix order and bugs of groups
parents
e99e4e05
9187d327
Changes
8
Show whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
371 additions
and
162 deletions
+371
-162
Ngrams.hs
src/Gargantext/API/Ngrams.hs
+43
-29
Flow.hs
src/Gargantext/Database/Flow.hs
+1
-1
Utils.hs
src/Gargantext/Database/Flow/Utils.hs
+3
-2
Ngrams.hs
src/Gargantext/Database/Schema/Ngrams.hs
+150
-10
NodeNgram.hs
src/Gargantext/Database/Schema/NodeNgram.hs
+152
-22
NodeNgramsNgrams.hs
src/Gargantext/Database/Schema/NodeNgramsNgrams.hs
+1
-70
schema.sql
src/Gargantext/Database/Schema/schema.sql
+4
-2
Tree.hs
src/Gargantext/Database/Tree.hs
+17
-26
No files found.
src/Gargantext/API/Ngrams.hs
View file @
5d0a03bc
...
...
@@ -39,9 +39,10 @@ import Data.Monoid
--import Data.Semigroup
import
Data.Set
(
Set
)
import
qualified
Data.Set
as
Set
--import Data.Maybe (catMaybes)
import
Data.Maybe
(
isJust
)
import
Data.Tuple.Extra
(
first
)
-- import qualified Data.Map.Strict as DM
import
Data.Map.Strict
(
Map
)
import
Data.Map.Strict
(
Map
,
mapKeys
,
fromListWith
)
--import qualified Data.Set as Set
import
Control.Lens
(
makeLenses
,
Prism
'
,
prism'
,
(
^..
),
(
.~
),
(
#
),
to
,
withIndex
,
folded
,
ifolded
)
import
Control.Monad
(
guard
)
...
...
@@ -57,10 +58,9 @@ import GHC.Generics (Generic)
import
Gargantext.Core.Utils.Prefix
(
unPrefix
)
import
Gargantext.Database.Types.Node
(
NodeType
(
..
))
import
Gargantext.Database.Schema.Node
(
defaultList
,
HasNodeError
)
import
Gargantext.Database.Schema.Ngrams
(
NgramsType
,
NgramsTypeId
,
ngramsTypeId
)
import
Gargantext.Database.Schema.Ngrams
(
NgramsType
,
NgramsTypeId
,
ngramsTypeId
,
NgramsTableData
'
(
..
)
)
import
qualified
Gargantext.Database.Schema.Ngrams
as
Ngrams
import
Gargantext.Database.Schema.NodeNgram
import
Gargantext.Database.Schema.NodeNgramsNgrams
import
Gargantext.Database.Utils
(
Cmd
)
import
Gargantext.Prelude
import
Gargantext.Core.Types
(
ListType
(
..
),
ListTypeId
,
ListId
,
CorpusId
,
Limit
,
Offset
,
listTypeId
)
...
...
@@ -119,6 +119,29 @@ instance Arbitrary NgramsElement where
newtype
NgramsTable
=
NgramsTable
{
_ngramsTable
::
[
NgramsElement
]
}
deriving
(
Ord
,
Eq
,
Generic
,
ToJSON
,
FromJSON
,
Show
)
-- | TODO Check N and Weight
toNgramsElement
::
[
NgramsTableData'
]
->
[
NgramsElement
]
toNgramsElement
ns
=
map
toNgramsElement'
ns
where
toNgramsElement'
(
NgramsTableData'
_
p
t
_
lt
w
)
=
NgramsElement
t
lt'
(
round
w
)
p'
c'
where
p'
=
case
p
of
Nothing
->
Nothing
Just
x
->
lookup
x
mapParent
c'
=
maybe
mempty
identity
$
lookup
t
mapChildren
lt'
=
maybe
(
panic
"API.Ngrams: listypeId"
)
identity
lt
mapParent
::
Map
Int
Text
mapParent
=
fromListWith
(
<>
)
$
map
(
\
(
NgramsTableData'
i
_
t
_
_
_
)
->
(
i
,
t
))
ns
mapChildren
::
Map
Text
(
Set
Text
)
mapChildren
=
mapKeys
(
\
i
->
(
maybe
(
panic
"API.Ngrams.mapChildren: ParentId with no Terms: Impossible"
)
identity
$
lookup
i
mapParent
))
$
fromListWith
(
<>
)
$
map
(
first
fromJust
)
$
filter
(
isJust
.
fst
)
$
map
(
\
(
NgramsTableData'
_
p
t
_
_
_
)
->
(
p
,
Set
.
singleton
t
))
ns
instance
Arbitrary
NgramsTable
where
arbitrary
=
elements
[
NgramsTable
...
...
@@ -278,10 +301,11 @@ mkListsUpdate nt patches =
]
mkChildrenGroups
::
(
PatchSet
NgramsTerm
->
Set
NgramsTerm
)
->
NgramsType
->
NgramsTablePatch
->
[(
Ngrams
Parent
,
NgramsChild
,
Maybe
Double
)]
mkChildrenGroups
addOrRem
patches
=
[
(
parent
,
child
,
Just
1
)
->
[(
Ngrams
TypeId
,
NgramsParent
,
NgramsChild
)]
mkChildrenGroups
addOrRem
nt
patches
=
[
(
ngramsTypeId
nt
,
parent
,
child
)
|
(
parent
,
patch
)
<-
patches
^..
ntp_ngrams_patches
.
ifolded
.
withIndex
,
child
<-
patch
^..
patch_children
.
to
addOrRem
.
folded
]
...
...
@@ -290,7 +314,7 @@ ngramsTypeFromTabType :: Maybe TabType -> NgramsType
ngramsTypeFromTabType
maybeTabType
=
let
lieu
=
"Garg.API.Ngrams: "
::
Text
in
case
maybeTabType
of
Nothing
->
Ngrams
.
Sources
--
panic (lieu <> "Indicate the Table")
Nothing
->
panic
(
lieu
<>
"Indicate the Table"
)
Just
tab
->
case
tab
of
Sources
->
Ngrams
.
Sources
Authors
->
Ngrams
.
Authors
...
...
@@ -315,8 +339,8 @@ tableNgramsPatch corpusId maybeTabType maybeList (Versioned version patch) = do
updateNodeNgrams
$
NodeNgramsUpdate
{
_nnu_user_list_id
=
listId
,
_nnu_lists_update
=
mkListsUpdate
ngramsType
patch
,
_nnu_rem_children
=
mkChildrenGroups
_rem
patch
,
_nnu_add_children
=
mkChildrenGroups
_add
patch
,
_nnu_rem_children
=
mkChildrenGroups
_rem
ngramsType
patch
,
_nnu_add_children
=
mkChildrenGroups
_add
ngramsType
patch
}
pure
$
Versioned
1
emptyNgramsTablePatch
...
...
@@ -327,7 +351,6 @@ getTableNgrams :: HasNodeError err
->
Maybe
ListId
->
Maybe
Limit
->
Maybe
Offset
->
Cmd
err
(
Versioned
NgramsTable
)
getTableNgrams
cId
maybeTabType
maybeListId
mlimit
moffset
=
do
let
lieu
=
"Garg.API.Ngrams: "
::
Text
let
ngramsType
=
ngramsTypeFromTabType
maybeTabType
listId
<-
maybe
(
defaultList
cId
)
pure
maybeListId
...
...
@@ -336,18 +359,9 @@ getTableNgrams cId maybeTabType maybeListId mlimit moffset = do
limit_
=
maybe
defaultLimit
identity
mlimit
offset_
=
maybe
0
identity
moffset
(
ngramsTableDatas
,
mapToParent
,
mapToChildren
)
<-
ngramsTableDatas
<-
Ngrams
.
getNgramsTableDb
NodeDocument
ngramsType
(
Ngrams
.
NgramsTableParam
listId
cId
)
limit_
offset_
-- printDebug "ngramsTableDatas" ngramsTableDatas
pure
$
Versioned
1
$
NgramsTable
$
map
(
\
(
Ngrams
.
NgramsTableData
ngs
_
lt
w
)
->
NgramsElement
ngs
(
maybe
(
panic
$
lieu
<>
"listType"
)
identity
lt
)
(
round
w
)
(
lookup
ngs
mapToParent
)
(
maybe
mempty
identity
$
lookup
ngs
mapToChildren
)
)
ngramsTableDatas
pure
$
Versioned
1
$
NgramsTable
(
toNgramsElement
ngramsTableDatas
)
src/Gargantext/Database/Flow.hs
View file @
5d0a03bc
...
...
@@ -315,7 +315,7 @@ ngrams2list m =
-- | TODO: weight of the list could be a probability
insertLists
::
HasNodeError
err
=>
ListId
->
[(
ListType
,
(
NgramsType
,
NgramsIndexed
))]
->
Cmd
err
Int
insertLists
lId
lngs
=
insertNodeNgrams
[
NodeNgram
lId
(
_ngramsId
ng
)
(
ngramsTypeId
ngt
)
(
fromIntegral
$
listTypeId
l
)
1
insertLists
lId
lngs
=
insertNodeNgrams
[
NodeNgram
lId
(
_ngramsId
ng
)
Nothing
(
ngramsTypeId
ngt
)
(
fromIntegral
$
listTypeId
l
)
1
|
(
l
,(
ngt
,
ng
))
<-
lngs
]
------------------------------------------------------------------------
src/Gargantext/Database/Flow/Utils.hs
View file @
5d0a03bc
...
...
@@ -54,9 +54,10 @@ data DocumentIdWithNgrams a =
,
document_ngrams
::
Map
(
NgramsT
Ngrams
)
Int
}
deriving
(
Show
)
-- | TODO for now, list Type is CandidateList, why ?
-- | TODO for now, list Type is CandidateList because Graph Terms
-- have to be detected in next step in the flow
insertToNodeNgrams
::
Map
NgramsIndexed
(
Map
NgramsType
(
Map
NodeId
Int
))
->
Cmd
err
Int
insertToNodeNgrams
m
=
insertNodeNgrams
[
NodeNgram
n
(
_ngramsId
ng
)
(
ngramsTypeId
t
)
(
listTypeId
CandidateList
)
(
fromIntegral
i
)
insertToNodeNgrams
m
=
insertNodeNgrams
[
NodeNgram
n
(
_ngramsId
ng
)
Nothing
(
ngramsTypeId
t
)
(
listTypeId
CandidateList
)
(
fromIntegral
i
)
|
(
ng
,
t2n2i
)
<-
DM
.
toList
m
,
(
t
,
n2i
)
<-
DM
.
toList
t2n2i
,
(
n
,
i
)
<-
DM
.
toList
n2i
...
...
src/Gargantext/Database/Schema/Ngrams.hs
View file @
5d0a03bc
...
...
@@ -25,7 +25,6 @@ Ngrams connection to the Database.
module
Gargantext.Database.Schema.Ngrams
where
import
Control.Lens
(
makeLenses
,
view
,
over
)
import
Control.Monad
(
mzero
)
import
Data.ByteString.Internal
(
ByteString
)
...
...
@@ -40,7 +39,7 @@ import Database.PostgreSQL.Simple.ToField (toField, ToField)
import
Database.PostgreSQL.Simple.FromField
(
FromField
,
fromField
)
import
Database.PostgreSQL.Simple.ToRow
(
toRow
)
import
Database.PostgreSQL.Simple.Types
(
Values
(
..
),
QualifiedIdentifier
(
..
))
--
import Debug.Trace (trace)
import
Debug.Trace
(
trace
)
import
GHC.Generics
(
Generic
)
import
Gargantext.Core.Types
-- (fromListTypeId, ListType, NodePoly(Node))
import
Gargantext.Database.Config
(
nodeTypeId
,
userMaster
)
...
...
@@ -233,8 +232,8 @@ queryInsertNgrams = [sql|
getNgramsTableDb
::
NodeType
->
NgramsType
->
NgramsTableParamUser
->
Limit
->
Offset
->
Cmd
err
([
NgramsTableData
],
MapToParent
,
MapToChildren
)
getNgramsTableDb
nt
ngrt
ntp
@
(
NgramsTableParam
listIdUser
_
)
limit_
offset_
=
do
->
Cmd
err
[
NgramsTableData'
]
getNgramsTableDb
nt
ngrt
ntp
limit_
offset_
=
do
maybeRoot
<-
head
<$>
getRoot
userMaster
...
...
@@ -246,11 +245,7 @@ getNgramsTableDb nt ngrt ntp@(NgramsTableParam listIdUser _) limit_ offset_ = do
listMasterId
<-
maybe
(
panic
"error master list"
)
(
view
node_id
)
<$>
head
<$>
getListsWithParentId
corpusMasterId
ngramsTableData
<-
getNgramsTableData
nt
ngrt
ntp
(
NgramsTableParam
listMasterId
corpusMasterId
)
limit_
offset_
(
mapToParent
,
mapToChildren
)
<-
getNgramsGroup
listIdUser
listMasterId
pure
(
ngramsTableData
,
mapToParent
,
mapToChildren
)
getNgramsTableData'
nt
ngrt
ntp
(
NgramsTableParam
listMasterId
corpusMasterId
)
limit_
offset_
data
NgramsTableParam
=
NgramsTableParam
{
_nt_listId
::
NodeId
...
...
@@ -271,7 +266,7 @@ getNgramsTableData :: NodeType -> NgramsType
->
Limit
->
Offset
->
Cmd
err
[
NgramsTableData
]
getNgramsTableData
nodeT
ngrmT
(
NgramsTableParam
ul
uc
)
(
NgramsTableParam
ml
mc
)
limit_
offset_
=
--
trace ("Ngrams table params" <> show params) <$>
trace
(
"Ngrams table params"
<>
show
params
)
<$>
map
(
\
(
t
,
n
,
nt
,
w
)
->
NgramsTableData
t
n
(
fromListTypeId
nt
)
w
)
<$>
runPGSQuery
querySelectTableNgrams
params
where
...
...
@@ -281,6 +276,32 @@ getNgramsTableData nodeT ngrmT (NgramsTableParam ul uc) (NgramsTableParam ml mc)
(
limit_
,
offset_
)
data
NgramsTableData'
=
NgramsTableData'
{
_ntd2_id
::
Int
,
_ntd2_parent_id
::
Maybe
Int
,
_ntd2_terms
::
Text
,
_ntd2_n
::
Int
,
_ntd2_listType
::
Maybe
ListType
,
_ntd2_weight
::
Double
}
deriving
(
Show
)
getNgramsTableData'
::
NodeType
->
NgramsType
->
NgramsTableParamUser
->
NgramsTableParamMaster
->
Limit
->
Offset
->
Cmd
err
[
NgramsTableData'
]
getNgramsTableData'
nodeT
ngrmT
(
NgramsTableParam
ul
uc
)
(
NgramsTableParam
ml
mc
)
limit_
offset_
=
trace
(
"Ngrams table params: "
<>
show
params
)
<$>
map
(
\
(
i
,
p
,
t
,
n
,
lt
,
w
)
->
NgramsTableData'
i
p
t
n
(
fromListTypeId
lt
)
w
)
<$>
runPGSQuery
querySelectTableNgramsTrees
params
where
nodeTId
=
nodeTypeId
nodeT
ngrmTId
=
ngramsTypeId
ngrmT
params
=
(
ul
,
ml
,
uc
,
mc
,
nodeTId
,
ngrmTId
)
:.
(
limit_
,
offset_
)
getNgramsTableDataDebug
::
PGS
.
ToRow
a
=>
a
->
Cmd
err
ByteString
getNgramsTableDataDebug
=
formatPGSQuery
querySelectTableNgramsTrees
querySelectTableNgrams
::
PGS
.
Query
querySelectTableNgrams
=
[
sql
|
...
...
@@ -296,6 +317,7 @@ querySelectTableNgrams = [sql|
AND nn.node1_id = ? -- User CorpusId or AnnuaireId
AND n.typename = ? -- both type of childs (Documents or Contacts)
AND corp.ngrams_type = ? -- both type of ngrams (Authors or Terms or...)
AND list.parent_id IS NULL
)
, tableMaster AS (
SELECT ngs.terms, ngs.n, list.list_type, corp.weight FROM ngrams ngs
...
...
@@ -309,6 +331,7 @@ querySelectTableNgrams = [sql|
AND n.typename = ? -- Master childs (Documents or Contacts)
AND corp.ngrams_type = ? -- both type of ngrams (Authors or Terms?)
AND nn.node1_id = ? -- User CorpusId or AnnuaireId
AND list.parent_id IS NULL
)
SELECT COALESCE(tu.terms,tm.terms) AS terms
...
...
@@ -323,6 +346,123 @@ querySelectTableNgrams = [sql|
|]
querySelectTableNgramsTrees
::
PGS
.
Query
querySelectTableNgramsTrees
=
[
sql
|
DROP FUNCTION tree_start(integer,integer,integer,integer,integer,integer,integer,integer);
DROP FUNCTION tree_end(integer,integer,integer,integer,integer,integer);
DROP FUNCTION tree_ngrams(integer,integer,integer,integer,integer,integer,integer,integer);
CREATE OR REPLACE FUNCTION public.tree_start(luid INT, lmid INT,cuid INT, cmid INT, tdoc INT, tngrams INT, lmt INT, ofst INT)
RETURNS TABLE (id INT, parent_id INT, terms VARCHAR(255), n int, list_type int, weight float8) AS $$
BEGIN
RETURN QUERY
WITH tableUser AS (
SELECT list.id, list.parent_id, ngs.terms, ngs.n, list.list_type, corp.weight FROM ngrams ngs
JOIN nodes_ngrams list ON list.ngrams_id = ngs.id
JOIN nodes_ngrams corp ON corp.ngrams_id = ngs.id
JOIN nodes_nodes nn ON nn.node2_id = corp.node_id
JOIN nodes n ON n.id = corp.node_id
WHERE list.node_id = luid -- User listId
AND nn.node1_id = cuid -- User CorpusId or AnnuaireId
AND n.typename = tdoc -- both type of childs (Documents or Contacts)
AND corp.ngrams_type = tngrams -- both type of ngrams (Authors or Terms or...)
AND list.parent_id IS NULL
),
tableMaster AS (
SELECT list.id, list.parent_id, ngs.terms, ngs.n, list.list_type, corp.weight FROM ngrams ngs
JOIN nodes_ngrams list ON list.ngrams_id = ngs.id
JOIN nodes_ngrams corp ON corp.ngrams_id = ngs.id
JOIN nodes n ON n.id = corp.node_id
JOIN nodes_nodes nn ON nn.node2_id = n.id
WHERE list.node_id = lmid -- Master listId
AND n.parent_id = cmid -- Master CorpusId or AnnuaireId
AND n.typename = tdoc -- Master childs (Documents or Contacts)
AND corp.ngrams_type = tngrams -- both type of ngrams (Authors or Terms1)
AND nn.node1_id = cuid -- User CorpusId or AnnuaireId
AND list.parent_id IS NULL
)
SELECT COALESCE(tu.id,tm.id) AS id
, COALESCE(tu.parent_id,tm.parent_id) AS parent_id
, COALESCE(tu.terms,tm.terms) AS terms
, COALESCE(tu.n,tm.n) AS n
, COALESCE(tu.list_type,tm.list_type) AS ngrams_type
, SUM(COALESCE(tu.weight,tm.weight)) AS weight
FROM tableUser tu RIGHT JOIN tableMaster tm ON tu.terms = tm.terms
GROUP BY tu.id,tm.id,tu.parent_id,tm.parent_id,tu.terms,tm.terms,tu.n,tm.n,tu.list_type,tm.list_type
ORDER BY 3
LIMIT lmt
OFFSET ofst
;
END $$
LANGUAGE plpgsql ;
CREATE OR REPLACE FUNCTION public.tree_end(luid INT, lmid INT,cuid INT, cmid INT, tdoc INT, tngrams INT)
RETURNS TABLE (id INT, parent_id INT, terms VARCHAR(255), n int, list_type int, weight float8) AS $$
BEGIN
RETURN QUERY
WITH tableUser2 AS (
SELECT list.id, list.parent_id, ngs.terms, ngs.n, list.list_type, corp.weight FROM ngrams ngs
JOIN nodes_ngrams list ON list.ngrams_id = ngs.id
JOIN nodes_ngrams corp ON corp.ngrams_id = ngs.id
JOIN nodes_nodes nn ON nn.node2_id = corp.node_id
JOIN nodes n ON n.id = corp.node_id
WHERE list.node_id = luid -- User listId
AND nn.node1_id = cuid -- User CorpusId or AnnuaireId
AND n.typename = tdoc -- both type of childs (Documents or Contacts)
AND corp.ngrams_type = tngrams -- both type of ngrams (Authors or Terms or...)
)
, tableMaster2 AS (
SELECT list.id, list.parent_id, ngs.terms, ngs.n, list.list_type, corp.weight FROM ngrams ngs
JOIN nodes_ngrams list ON list.ngrams_id = ngs.id
JOIN nodes_ngrams corp ON corp.ngrams_id = ngs.id
JOIN nodes n ON n.id = corp.node_id
JOIN nodes_nodes nn ON nn.node2_id = n.id
WHERE list.node_id = lmid -- Master listId
AND n.parent_id = cmid -- Master CorpusId or AnnuaireId
AND n.typename = tdoc -- Master childs (Documents or Contacts)
AND corp.ngrams_type = tngrams -- both type of ngrams (Authors or Terms1)
AND nn.node1_id = cuid -- User CorpusId or AnnuaireId
)
SELECT COALESCE(tu.id,tm.id) as id
, COALESCE(tu.parent_id,tm.parent_id) as parent_id
, COALESCE(tu.terms,tm.terms) AS terms
, COALESCE(tu.n,tm.n) AS n
, COALESCE(tu.list_type,tm.list_type) AS list_type
, SUM(COALESCE(tu.weight,tm.weight)) AS weight
FROM tableUser2 tu RIGHT JOIN tableMaster2 tm ON tu.terms = tm.terms
GROUP BY tu.id,tm.id,tu.parent_id,tm.parent_id,tu.terms,tm.terms,tu.n,tm.n,tu.list_type,tm.list_type
;
END $$
LANGUAGE plpgsql ;
CREATE OR REPLACE FUNCTION public.tree_ngrams(luid INT, lmid INT,cuid INT, cmid INT, tdoc INT, tngrams INT, lmt INT, ofst INT)
RETURNS TABLE (id INT, parent_id INT, terms VARCHAR(255), n int, list_type int, weight float8) AS $$
BEGIN
RETURN QUERY WITH RECURSIVE
ngrams_tree (id,parent_id,terms,n,list_type,weight) AS (
SELECT ts.id,ts.parent_id,ts.terms,ts.n,ts.list_type,ts.weight FROM tree_start($1,$2,$3,$4,$5,$6,$7,$8) ts
UNION
SELECT te.id,te.parent_id,te.terms,te.n,te.list_type,te.weight FROM tree_end($1,$2,$3,$4,$5,$6) as te
INNER JOIN ngrams_tree ON te.parent_id = ngrams_tree.id
)
SELECT * from ngrams_tree;
END $$
LANGUAGE plpgsql ;
select * from tree_ngrams(?,?,?,?,?,?,?,?)
|]
type
ListIdUser
=
NodeId
type
ListIdMaster
=
NodeId
...
...
src/Gargantext/Database/Schema/NodeNgram.hs
View file @
5d0a03bc
...
...
@@ -19,6 +19,7 @@ if Node is a List then it is listing (either Stop, Candidate or Map)
{-# LANGUAGE Arrows #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
...
...
@@ -32,26 +33,29 @@ module Gargantext.Database.Schema.NodeNgram where
import
Data.ByteString
(
ByteString
)
import
Data.Text
(
Text
)
import
Debug.Trace
(
trace
)
import
Control.Lens.TH
(
makeLenses
)
import
Control.Monad
(
void
)
import
Data.Profunctor.Product.TH
(
makeAdaptorAndInstance
)
import
Database.PostgreSQL.Simple.Types
(
Values
(
..
),
QualifiedIdentifier
(
..
))
import
Database.PostgreSQL.Simple.SqlQQ
(
sql
)
import
Gargantext.Core.Types.Main
(
ListTypeId
)
import
Gargantext.Database.Utils
(
mkCmd
,
Cmd
,
execPGSQuery
)
import
Gargantext.Database.Types.Node
(
NodeId
,
ListId
)
import
Gargantext.Core.Types.Main
(
ListTypeId
)
import
Gargantext.Database.Types.Node
(
NodeId
,
ListId
,
NodeType
(
..
))
import
Gargantext.Database.Config
(
nodeTypeId
,
userMaster
)
import
Gargantext.Database.Schema.Node
(
pgNodeId
)
import
Gargantext.Database.Schema.Ngrams
(
NgramsTypeId
,
pgNgramsTypeId
)
import
Gargantext.Database.Schema.NodeNgramsNgrams
(
NgramsChild
,
NgramsParent
,
ngramsGroup
,
Action
(
..
))
import
Gargantext.Prelude
import
Gargantext.Database.Utils
(
formatPGSQuery
)
import
Opaleye
import
qualified
Database.PostgreSQL.Simple
as
PGS
(
Query
)
import
qualified
Database.PostgreSQL.Simple
as
DPS
-- | TODO : remove id
data
NodeNgramPoly
node_id
ngrams_id
ngrams_type
list_type
weight
data
NodeNgramPoly
node_id
ngrams_id
parent_id
ngrams_type
list_type
weight
=
NodeNgram
{
_nn_node_id
::
node_id
,
_nn_ngrams_id
::
ngrams_id
,
_nn_parent_id
::
parent_id
,
_nn_ngramsType
::
ngrams_type
,
_nn_listType
::
list_type
,
_nn_weight
::
weight
...
...
@@ -61,6 +65,8 @@ type NodeNgramWrite =
NodeNgramPoly
(
Column
PGInt4
)
(
Column
PGInt4
)
(
Maybe
(
Column
PGInt4
))
(
Column
PGInt4
)
(
Column
PGInt4
)
(
Column
PGFloat8
)
...
...
@@ -69,6 +75,8 @@ type NodeNgramRead =
NodeNgramPoly
(
Column
PGInt4
)
(
Column
PGInt4
)
(
Column
PGInt4
)
(
Column
PGInt4
)
(
Column
PGInt4
)
(
Column
PGFloat8
)
...
...
@@ -77,12 +85,20 @@ type NodeNgramReadNull =
NodeNgramPoly
(
Column
(
Nullable
PGInt4
))
(
Column
(
Nullable
PGInt4
))
(
Column
(
Nullable
PGInt4
))
(
Column
(
Nullable
PGInt4
))
(
Column
(
Nullable
PGInt4
))
(
Column
(
Nullable
PGFloat8
))
type
NodeNgram
=
NodeNgramPoly
NodeId
Int
NgramsTypeId
Int
Double
NodeNgramPoly
NodeId
Int
(
Maybe
NgramsParentId
)
NgramsTypeId
Int
Double
newtype
NgramsParentId
=
NgramsParentId
Int
deriving
(
Show
,
Eq
,
Num
)
pgNgramsParentId
::
NgramsParentId
->
Column
PGInt4
pgNgramsParentId
(
NgramsParentId
n
)
=
pgInt4
n
$
(
makeAdaptorAndInstance
"pNodeNgram"
''
N
odeNgramPoly
)
makeLenses
''
N
odeNgramPoly
...
...
@@ -92,6 +108,7 @@ nodeNgramTable = Table "nodes_ngrams"
(
pNodeNgram
NodeNgram
{
_nn_node_id
=
required
"node_id"
,
_nn_ngrams_id
=
required
"ngrams_id"
,
_nn_parent_id
=
optional
"parent_id"
,
_nn_ngramsType
=
required
"ngrams_type"
,
_nn_listType
=
required
"list_type"
,
_nn_weight
=
required
"weight"
...
...
@@ -103,9 +120,10 @@ queryNodeNgramTable = queryTable nodeNgramTable
insertNodeNgrams
::
[
NodeNgram
]
->
Cmd
err
Int
insertNodeNgrams
=
insertNodeNgramW
.
map
(
\
(
NodeNgram
n
g
ngt
lt
w
)
->
.
map
(
\
(
NodeNgram
n
g
p
ngt
lt
w
)
->
NodeNgram
(
pgNodeId
n
)
(
pgInt4
g
)
(
pgNgramsParentId
<$>
p
)
(
pgNgramsTypeId
ngt
)
(
pgInt4
lt
)
(
pgDouble
w
)
...
...
@@ -125,16 +143,18 @@ type NgramsText = Text
updateNodeNgrams'
::
ListId
->
[(
NgramsTypeId
,
NgramsText
,
ListTypeId
)]
->
Cmd
err
()
updateNodeNgrams'
_
[]
=
pure
()
updateNodeNgrams'
listId
input
=
void
$
execPGSQuery
updateQuery
(
listId
,
Values
fields
input
)
updateNodeNgrams'
listId
input
=
void
$
execPGSQuery
updateQuery
(
DPS
.
Only
$
Values
fields
input'
)
where
fields
=
map
(
\
t
->
QualifiedIdentifier
Nothing
t
)
[
"int4"
,
"int4"
,
"text"
,
"int4"
]
input'
=
map
(
\
(
nt
,
t
,
lt
)
->
(
listId
,
nt
,
t
,
lt
))
input
updateNodeNgrams'_debug
::
ListId
->
[(
NgramsTypeId
,
NgramsText
,
ListTypeId
)]
->
Cmd
err
ByteString
updateNodeNgrams'_debug
listId
input
=
formatPGSQuery
updateQuery
(
listId
,
Values
fields
input
)
updateNodeNgrams'_debug
listId
input
=
formatPGSQuery
updateQuery
(
DPS
.
Only
$
Values
fields
input'
)
where
fields
=
map
(
\
t
->
QualifiedIdentifier
Nothing
t
)
[
"int4"
,
"int4"
,
"text"
,
"int4"
]
input'
=
map
(
\
(
nt
,
t
,
lt
)
->
(
listId
,
nt
,
t
,
lt
))
input
updateQuery
::
PG
S
.
Query
updateQuery
::
DP
S
.
Query
updateQuery
=
[
sql
|
WITH new(node_id,ngrams_type,terms,typeList) as (?)
...
...
@@ -147,16 +167,124 @@ ON CONFLICT (node_id, ngrams_id, ngrams_type) DO
UPDATE SET list_type = excluded.list_type
;
|]
data
Action
=
Del
|
Add
type
NgramsParent
=
Text
type
NgramsChild
=
Text
ngramsGroup
::
Action
->
ListId
->
[(
NgramsTypeId
,
NgramsParent
,
NgramsChild
)]
->
Cmd
err
()
ngramsGroup
_
_
[]
=
pure
()
ngramsGroup
a
lid
input
=
void
$
trace
(
show
input
)
$
execPGSQuery
(
ngramsGroupQuery
a
)
(
DPS
.
Only
$
Values
fields
input'
)
where
fields
=
map
(
\
t
->
QualifiedIdentifier
Nothing
t
)
[
"int4"
,
"int4"
,
"text"
,
"int4"
,
"text"
,
"text"
]
input'
=
map
(
\
(
ntpid
,
p
,
c
)
->
(
lid
,
nodeTypeId
NodeList
,
userMaster
,
ntpid
,
p
,
c
))
input
ngramsGroupQuery
::
Action
->
DPS
.
Query
ngramsGroupQuery
a
=
case
a
of
Add
->
[
sql
|
WITH input(lid,listTypeId,masterUsername,ntype,parent_terms,child_terms)
AS (?),
-- (VALUES (15::"int4",5::"int4",'gargantua'::"text",4::"int4",'input'::"text",'designer'::"text"))),
list_master AS (
SELECT n.id from nodes n
JOIN auth_user u ON n.user_id = u.id
JOIN input ON n.typename = input.listTypeId
WHERE u.username = input.masterUsername
LIMIT 1
),
list_user AS(
-- FIRST import parent from master to user list
INSERT INTO nodes_ngrams (node_id,ngrams_id,parent_id,ngrams_type,list_type,weight)
SELECT input.lid,nn.ngrams_id,nn.parent_id,nn.ngrams_type,nn.list_type, nn.weight
FROM INPUT
JOIN ngrams ng ON ng.terms = input.parent_terms
JOIN nodes_ngrams nn ON nn.ngrams_id = ng.id
JOIN list_master ON nn.node_id = list_master.id
WHERE
nn.ngrams_id = ng.id
ON CONFLICT (node_id,ngrams_id,ngrams_type) DO NOTHING
)
INSERT INTO nodes_ngrams (node_id,ngrams_id,parent_id,ngrams_type,list_type,weight)
SELECT input.lid, nc.id, nnpu.id, input.ntype, nnmaster.list_type, nnmaster.weight
FROM input
JOIN ngrams np ON np.terms = input.parent_terms
JOIN ngrams nc ON nc.terms = input.child_terms
JOIN nodes_ngrams nnpu ON nnpu.ngrams_id = np.id
JOIN nodes_ngrams nnmaster ON nnmaster.ngrams_id = nc.id
JOIN list_master ON nnmaster.node_id = list_master.id
WHERE
nnpu.node_id = input.lid
AND nnpu.ngrams_type = input.ntype
AND nnmaster.ngrams_id = nc.id
AND nnmaster.ngrams_type = ntype
ON CONFLICT (node_id,ngrams_id,ngrams_type) DO
UPDATE SET parent_id = excluded.parent_id
|]
Del
->
[
sql
|
WITH input(lid,listTypeId,masterUsername,ntype,parent_terms,child_terms)
AS (?),
-- (VALUES (15::"int4",5::"int4",'gargantua'::"text",4::"int4",'input'::"text",'designer'::"text"))),
list_master AS (
SELECT n.id from nodes n
JOIN auth_user u ON n.user_id = u.id
JOIN input ON n.typename = input.listTypeId
WHERE u.username = input.masterUsername
LIMIT 1
),
list_user AS(
-- FIRST import parent from master to user list
INSERT INTO nodes_ngrams (node_id,ngrams_id,parent_id,ngrams_type,list_type,weight)
SELECT input.lid,nn.ngrams_id,nn.parent_id,nn.ngrams_type,nn.list_type, nn.weight
FROM INPUT
JOIN ngrams ng ON ng.terms = input.parent_terms
JOIN nodes_ngrams nn ON nn.ngrams_id = ng.id
JOIN list_master ON nn.node_id = list_master.id
WHERE
nn.ngrams_id = ng.id
ON CONFLICT (node_id,ngrams_id,ngrams_type) DO NOTHING
)
INSERT INTO nodes_ngrams (node_id,ngrams_id,parent_id,ngrams_type,list_type,weight)
SELECT input.lid, nc.id, NULL, input.ntype, nnmaster.list_type, nnmaster.weight
FROM input
JOIN ngrams np ON np.terms = input.parent_terms
JOIN ngrams nc ON nc.terms = input.child_terms
JOIN nodes_ngrams nnmaster ON nnmaster.ngrams_id = nc.id
JOIN list_master ON nnmaster.node_id = list_master.id
WHERE
nnmaster.ngrams_id = nc.id
AND nnmaster.ngrams_type = ntype
ON CONFLICT (node_id,ngrams_id,ngrams_type) DO
UPDATE SET parent_id = NULL
|]
data
NodeNgramsUpdate
=
NodeNgramsUpdate
{
_nnu_user_list_id
::
ListId
,
_nnu_lists_update
::
[(
NgramsTypeId
,
NgramsText
,
ListTypeId
)]
,
_nnu_add_children
::
[(
Ngrams
Parent
,
NgramsChild
,
Maybe
Double
)]
,
_nnu_rem_children
::
[(
Ngrams
Parent
,
NgramsChild
,
Maybe
Double
)]
,
_nnu_add_children
::
[(
Ngrams
TypeId
,
NgramsParent
,
NgramsChild
)]
,
_nnu_rem_children
::
[(
Ngrams
TypeId
,
NgramsParent
,
NgramsChild
)]
}
-- TODO wrap these updates in a transaction.
...
...
@@ -165,5 +293,7 @@ updateNodeNgrams nnu = do
updateNodeNgrams'
userListId
$
_nnu_lists_update
nnu
ngramsGroup
Del
userListId
$
_nnu_rem_children
nnu
ngramsGroup
Add
userListId
$
_nnu_add_children
nnu
-- TODO remove duplicate line (fix SQL query)
ngramsGroup
Add
userListId
$
_nnu_add_children
nnu
where
userListId
=
_nnu_user_list_id
nnu
src/Gargantext/Database/Schema/NodeNgramsNgrams.hs
View file @
5d0a03bc
...
...
@@ -34,21 +34,14 @@ module Gargantext.Database.Schema.NodeNgramsNgrams
import
Control.Lens
(
view
)
import
Control.Lens.TH
(
makeLensesWith
,
abbreviatedFields
)
import
Control.Monad
(
void
)
import
Control.Monad.IO.Class
(
liftIO
)
import
Data.ByteString
(
ByteString
)
import
Data.Text
(
Text
)
import
Data.Maybe
(
Maybe
)
import
Data.Profunctor.Product.TH
(
makeAdaptorAndInstance
)
import
Database.PostgreSQL.Simple.SqlQQ
(
sql
)
import
Database.PostgreSQL.Simple.Types
(
Values
(
..
),
QualifiedIdentifier
(
..
))
import
Debug.Trace
(
trace
)
import
Gargantext.Database.Utils
(
Cmd
,
runOpaQuery
,
execPGSQuery
,
connection
,
formatPGSQuery
)
import
Gargantext.Database.Utils
(
Cmd
,
runOpaQuery
,
connection
)
import
Gargantext.Database.Types.Node
(
ListId
)
import
Gargantext.Database.Schema.Node
(
pgNodeId
)
import
Gargantext.Prelude
import
Opaleye
import
qualified
Database.PostgreSQL.Simple
as
PGS
data
NodeNgramsNgramsPoly
node_id
ngram1_id
ngram2_id
weight
=
NodeNgramsNgrams
{
_nng_NodeId
::
node_id
...
...
@@ -122,65 +115,3 @@ insertNodeNgramsNgramsW ns = do
c
<-
view
connection
liftIO
$
fromIntegral
<$>
runInsertMany
c
nodeNgramsNgramsTable
ns
------------------------------------------------------------------------
data
Action
=
Del
|
Add
type
NgramsParent
=
Text
type
NgramsChild
=
Text
ngramsGroup
::
Action
->
ListId
->
[(
NgramsParent
,
NgramsChild
,
Maybe
Double
)]
->
Cmd
err
()
ngramsGroup
_
_
[]
=
pure
()
ngramsGroup
action
listId
ngs
=
trace
(
show
ngs
)
$
runNodeNgramsNgrams
q
listId
ngs
where
q
=
case
action
of
Del
->
queryDelNodeNgramsNgrams
Add
->
queryInsertNodeNgramsNgrams
runNodeNgramsNgrams
::
PGS
.
Query
->
ListId
->
[(
NgramsParent
,
NgramsChild
,
Maybe
Double
)]
->
Cmd
err
()
runNodeNgramsNgrams
q
listId
ngs
=
void
$
execPGSQuery
q
(
listId
,
Values
fields
ngs'
)
where
ngs'
=
map
(
\
(
ng1
,
ng2
,
w
)
->
(
ng1
,
ng2
,
maybe
0
identity
w
))
ngs
fields
=
map
(
\
t
->
QualifiedIdentifier
Nothing
t
)
[
"int4"
,
"text"
,
"text"
,
"float8"
]
runNodeNgramsNgramsDebug
::
PGS
.
Query
->
ListId
->
[(
NgramsParent
,
NgramsChild
,
Maybe
Double
)]
->
Cmd
err
ByteString
runNodeNgramsNgramsDebug
q
listId
ngs
=
formatPGSQuery
q
(
listId
,
Values
fields
ngs'
)
where
ngs'
=
map
(
\
(
ng1
,
ng2
,
w
)
->
(
ng1
,
ng2
,
maybe
0
identity
w
))
ngs
fields
=
map
(
\
t
->
QualifiedIdentifier
Nothing
t
)
[
"int4"
,
"text"
,
"text"
,
"float8"
]
--------------------------------------------------------------------
-- TODO: on conflict update weight
queryInsertNodeNgramsNgrams
::
PGS
.
Query
queryInsertNodeNgramsNgrams
=
[
sql
|
WITH nId AS ?
WITH input_rows(ng1,ng2,w) AS (?)
INSERT INTO nodes_ngrams_ngrams (node_id,ngram1_id,ngram2_id,weight)
SELECT nId,ngrams1.id,ngrams2.id,w FROM input_rows
JOIN ngrams ngrams1 ON ngrams1.terms = ng1
JOIN ngrams ngrams2 ON ngrams2.terms = ng2
ON CONFLICT (node_id,ngram1_id,ngram2_id) DO NOTHING -- unique index created here
|]
queryDelNodeNgramsNgrams
::
PGS
.
Query
queryDelNodeNgramsNgrams
=
[
sql
|
WITH nId AS ?
WITH input(ng1,ng2,w) AS (?)
DELETE FROM nodes_ngrams_ngrams AS nnn
USING ngrams AS ngrams1,
ngrams AS ngrams2,
input AS input
WHERE
ngrams1.terms = input.ng1
AND ngrams2.terms = input.ng2
AND nnn.node_id = input.nId
AND nnn.ngram1_id = ngrams1.id
AND nnn.ngram2_id = ngrams2.id
;
|]
src/Gargantext/Database/Schema/schema.sql
View file @
5d0a03bc
...
...
@@ -54,12 +54,14 @@ CREATE TABLE public.nodes_ngrams (
id
SERIAL
,
node_id
integer
NOT
NULL
,
ngrams_id
integer
NOT
NULL
,
parent_id
integer
REFERENCES
public
.
nodes_ngrams
(
id
)
ON
DELETE
SET
NULL
,
ngrams_type
integer
,
list_type
integer
,
weight
double
precision
,
FOREIGN
KEY
(
node_id
)
REFERENCES
public
.
nodes
(
id
)
ON
DELETE
CASCADE
,
FOREIGN
KEY
(
ngrams_id
)
REFERENCES
public
.
ngrams
(
id
)
ON
DELETE
CASCADE
,
PRIMARY
KEY
(
node_id
,
ngrams_id
)
PRIMARY
KEY
(
id
)
-- PRIMARY KEY (node_id,ngrams_id)
);
ALTER
TABLE
public
.
nodes_ngrams
OWNER
TO
gargantua
;
...
...
@@ -103,10 +105,10 @@ CREATE UNIQUE INDEX nodes_expr_idx2 ON public.nodes USING btree (((hyperdata ->>
CREATE
UNIQUE
INDEX
nodes_typename_parent_id_expr_idx
ON
public
.
nodes
USING
btree
(
typename
,
parent_id
,
((
hyperdata
->>
'uniqId'
::
text
)));
CREATE
INDEX
nodes_user_id_typename_parent_id_idx
ON
public
.
nodes
USING
btree
(
user_id
,
typename
,
parent_id
);
CREATE
UNIQUE
INDEX
ON
public
.
ngrams
(
terms
);
--CREATE UNIQUE INDEX ON public.ngrams(terms,n);
CREATE
UNIQUE
INDEX
ON
public
.
nodes_ngrams
USING
btree
(
node_id
,
ngrams_id
);
CREATE
INDEX
nodes_ngrams_ngrams_id_idx
ON
public
.
nodes_ngrams
USING
btree
(
ngrams_id
);
CREATE
INDEX
nodes_ngrams_ngrams_node_id_idx
ON
public
.
nodes_ngrams_ngrams
USING
btree
(
node_id
);
CREATE
UNIQUE
INDEX
ON
public
.
nodes_ngrams
USING
btree
(
node_id
,
ngrams_id
,
ngrams_type
);
...
...
src/Gargantext/Database/Tree.hs
View file @
5d0a03bc
...
...
@@ -86,33 +86,24 @@ data DbTreeNode = DbTreeNode { dt_nodeId :: NodeId
dbTree
::
RootId
->
Cmd
err
[
DbTreeNode
]
dbTree
rootId
=
map
(
\
(
nId
,
tId
,
pId
,
n
)
->
DbTreeNode
nId
tId
pId
n
)
<$>
runPGSQuery
[
sql
|
WITH RECURSIVE
-- starting node(s)
starting (id, typename, parent_id, name) AS
tree (id, typename, parent_id, name) AS
(
SELECT n.id, n.typename, n.parent_id, n.name
FROM nodes AS n
WHERE n.parent_id = ? -- this can be arbitrary
),
descendants (id, typename, parent_id, name) AS
(
SELECT id, typename, parent_id, name
FROM starting
UNION ALL
SELECT n.id, n.typename, n.parent_id, n.name
FROM nodes AS n JOIN descendants AS d ON n.parent_id = d.id
where n.typename in (2,3,30,31,5,7,9)
),
ancestors (id, typename, parent_id, name) AS
(
SELECT n.id, n.typename, n.parent_id, n.name
FROM nodes AS n
WHERE n.id IN (SELECT parent_id FROM starting)
UNION ALL
SELECT n.id, n.typename, n.parent_id, n.name
FROM nodes AS n JOIN ancestors AS a ON n.id = a.parent_id
SELECT p.id, p.typename, p.parent_id, p.name
FROM nodes AS p
WHERE p.id = ?
UNION
SELECT c.id, c.typename, c.parent_id, c.name
FROM nodes AS c
INNER JOIN tree AS s ON c.parent_id = s.id
WHERE c.typename IN (2,3,30,31,5,7,9)
)
TABLE ancestors
UNION ALL
TABLE descendants ;
SELECT * from tree;
|]
(
Only
rootId
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment