Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Christian Merten
haskell-gargantext
Commits
d4a2e775
Commit
d4a2e775
authored
Feb 10, 2022
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[OPTIM] Ngrams Table scores
parent
df2a6dfe
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
83 additions
and
28 deletions
+83
-28
NgramsByContext.hs
src/Gargantext/Database/Action/Metrics/NgramsByContext.hs
+29
-21
Prelude.hs
src/Gargantext/Database/Prelude.hs
+16
-0
Ngrams.hs
src/Gargantext/Database/Query/Table/Ngrams.hs
+32
-5
Ngrams.hs
src/Gargantext/Database/Schema/Ngrams.hs
+6
-2
No files found.
src/Gargantext/Database/Action/Metrics/NgramsByContext.hs
View file @
d4a2e775
...
@@ -16,21 +16,23 @@ Ngrams by node enable contextual metrics.
...
@@ -16,21 +16,23 @@ Ngrams by node enable contextual metrics.
module
Gargantext.Database.Action.Metrics.NgramsByContext
module
Gargantext.Database.Action.Metrics.NgramsByContext
where
where
-- import Debug.Trace (trace)
--import Data.Map.Strict.Patch (PatchMap, Replace, diff)
--import Data.Map.Strict.Patch (PatchMap, Replace, diff)
import
Data.HashMap.Strict
(
HashMap
)
import
Data.HashMap.Strict
(
HashMap
)
import
Data.Map
(
Map
)
import
Data.Map
(
Map
)
import
Data.Maybe
(
catMaybes
)
import
Data.Set
(
Set
)
import
Data.Set
(
Set
)
import
Data.Text
(
Text
)
import
Data.Text
(
Text
)
import
Data.Tuple.Extra
(
first
,
second
,
swap
)
import
Data.Tuple.Extra
(
first
,
second
,
swap
)
import
Database.PostgreSQL.Simple.SqlQQ
(
sql
)
import
Database.PostgreSQL.Simple.SqlQQ
(
sql
)
import
Database.PostgreSQL.Simple.Types
(
Values
(
..
),
QualifiedIdentifier
(
..
))
import
Database.PostgreSQL.Simple.Types
(
Values
(
..
),
QualifiedIdentifier
(
..
))
-- import Debug.Trace (trace)
import
Gargantext.Core
import
Gargantext.API.Ngrams.Types
(
NgramsTerm
(
..
))
import
Gargantext.API.Ngrams.Types
(
NgramsTerm
(
..
))
import
Gargantext.Core
import
Gargantext.Data.HashMap.Strict.Utils
as
HM
import
Gargantext.Data.HashMap.Strict.Utils
as
HM
import
Gargantext.Database.Admin.Types.Node
(
ListId
,
CorpusId
,
NodeId
(
..
),
ContextId
,
MasterCorpusId
,
NodeType
(
NodeDocument
),
UserCorpusId
,
DocId
)
import
Gargantext.Database.Admin.Types.Node
(
ListId
,
CorpusId
,
NodeId
(
..
),
ContextId
,
MasterCorpusId
,
NodeType
(
NodeDocument
),
UserCorpusId
,
DocId
)
import
Gargantext.Database.Prelude
(
Cmd
,
runPGSQuery
)
import
Gargantext.Database.Prelude
(
Cmd
,
runPGSQuery
)
import
Gargantext.Database.Schema.Ngrams
(
ngramsTypeId
,
NgramsType
(
..
))
import
Gargantext.Database.Query.Table.Ngrams
(
selectNgramsId
)
import
Gargantext.Database.Schema.Ngrams
(
ngramsTypeId
,
NgramsType
(
..
),
NgramsId
)
import
Gargantext.Prelude
import
Gargantext.Prelude
import
qualified
Data.HashMap.Strict
as
HM
import
qualified
Data.HashMap.Strict
as
HM
import
qualified
Data.Map
as
Map
import
qualified
Data.Map
as
Map
...
@@ -111,37 +113,43 @@ getOccByNgramsOnlyFast' :: CorpusId
...
@@ -111,37 +113,43 @@ getOccByNgramsOnlyFast' :: CorpusId
->
NgramsType
->
NgramsType
->
[
NgramsTerm
]
->
[
NgramsTerm
]
->
Cmd
err
(
HashMap
NgramsTerm
Int
)
->
Cmd
err
(
HashMap
NgramsTerm
Int
)
getOccByNgramsOnlyFast'
cId
lId
nt
tms
=
-- trace (show (cId, lId)) $
getOccByNgramsOnlyFast'
cId
lId
nt
tms
=
do
-- trace (show (cId, lId)) $
HM
.
fromListWith
(
+
)
<$>
map
(
second
round
)
<$>
run
cId
lId
nt
tms
mapNgramsIds
<-
selectNgramsId
$
map
unNgramsTerm
tms
HM
.
fromListWith
(
+
)
<$>
catMaybes
<$>
map
(
\
(
nId
,
s
)
->
(,)
<$>
(
NgramsTerm
<$>
(
Map
.
lookup
nId
mapNgramsIds
))
<*>
(
Just
$
round
s
)
)
<$>
run
cId
lId
nt
(
Map
.
keys
mapNgramsIds
)
where
where
fields
=
[
QualifiedIdentifier
Nothing
"text"
]
run
::
CorpusId
run
::
CorpusId
->
ListId
->
ListId
->
NgramsType
->
NgramsType
->
[
Ngrams
Term
]
->
[
Ngrams
Id
]
->
Cmd
err
[(
Ngrams
Term
,
Double
)]
->
Cmd
err
[(
Ngrams
Id
,
Double
)]
run
cId'
lId'
nt'
tms'
=
map
(
first
NgramsTerm
)
<$>
runPGSQuery
query
run
cId'
lId'
nt'
tms'
=
runPGSQuery
query
(
Values
fields
((
DPS
.
Only
.
unNgramsTerm
)
<$>
tms'
)
(
Values
fields
((
DPS
.
Only
)
<$>
tms'
)
,
cId'
,
cId'
,
lId'
,
lId'
,
ngramsTypeId
nt'
,
ngramsTypeId
nt'
)
)
fields
=
[
QualifiedIdentifier
Nothing
"int4"
]
query
::
DPS
.
Query
query
::
DPS
.
Query
query
=
[
sql
|
query
=
[
sql
|
WITH input_rows(terms) AS (?)
WITH input_ngrams(id) AS (?)
SELECT ng.terms, nng.weight FROM nodes_contexts nc
JOIN node_node_ngrams nng ON nng.node1_id = nc.node_id
SELECT ngi.id, nng.weight FROM nodes_contexts nc
JOIN ngrams ng ON nng.ngrams_id = ng.id
JOIN node_node_ngrams nng ON nng.node1_id = nc.node_id
JOIN input_rows ir ON ir.terms = ng.terms
JOIN input_ngrams ngi ON nng.ngrams_id = ngi.id
WHERE nng.node1_id = ? -- CorpusId
WHERE nng.node1_id = ?
AND nng.node2_id = ? -- ListId
AND nng.node2_id = ?
AND nng.ngrams_type = ? -- NgramsTypeId
AND nng.ngrams_type = ?
AND nc.category > 0 -- Not trash
AND nc.category > 0
GROUP BY ng.terms, nng.weight
GROUP BY ngi.id, nng.weight
|]
|]
selectNgramsOccurrencesOnlyByContextUser_withSample
::
HasDBid
NodeType
selectNgramsOccurrencesOnlyByContextUser_withSample
::
HasDBid
NodeType
...
...
src/Gargantext/Database/Prelude.hs
View file @
d4a2e775
...
@@ -140,6 +140,22 @@ runPGSQuery q a = mkCmd $ \conn -> catch (PGS.query conn q a) (printError conn)
...
@@ -140,6 +140,22 @@ runPGSQuery q a = mkCmd $ \conn -> catch (PGS.query conn q a) (printError conn)
hPutStrLn
stderr
q'
hPutStrLn
stderr
q'
throw
(
SomeException
e
)
throw
(
SomeException
e
)
{-
-- TODO
runPGSQueryFold :: ( CmdM env err m
, PGS.FromRow r
)
=> PGS.Query -> a -> (a -> r -> IO a) -> m a
runPGSQueryFold q initialState consume = mkCmd $ \conn -> catch (PGS.fold_ conn initialState consume) (printError conn)
where
printError c (SomeException e) = do
q' <- PGS.formatQuery c q
hPutStrLn stderr q'
throw (SomeException e)
-}
-- | TODO catch error
-- | TODO catch error
runPGSQuery_
::
(
CmdM
env
err
m
runPGSQuery_
::
(
CmdM
env
err
m
,
PGS
.
FromRow
r
,
PGS
.
FromRow
r
...
...
src/Gargantext/Database/Query/Table/Ngrams.hs
View file @
d4a2e775
...
@@ -18,27 +18,29 @@ module Gargantext.Database.Query.Table.Ngrams
...
@@ -18,27 +18,29 @@ module Gargantext.Database.Query.Table.Ngrams
,
queryNgramsTable
,
queryNgramsTable
,
selectNgramsByDoc
,
selectNgramsByDoc
,
insertNgrams
,
insertNgrams
,
selectNgramsId
)
)
where
where
import
Control.Lens
((
^.
))
import
Control.Lens
((
^.
))
import
Data.ByteString.Internal
(
ByteString
)
import
Data.ByteString.Internal
(
ByteString
)
import
Data.HashMap.Strict
(
HashMap
)
import
Data.HashMap.Strict
(
HashMap
)
import
Data.Map
(
Map
)
import
Data.Text
(
Text
)
import
Data.Text
(
Text
)
import
qualified
Data.HashMap.Strict
as
HashMap
import
qualified
Data.List
as
List
import
qualified
Database.PostgreSQL.Simple
as
PGS
import
Gargantext.Core.Types
import
Gargantext.Core.Types
import
Gargantext.Database.Prelude
(
runOpaQuery
,
Cmd
,
formatPGSQuery
,
runPGSQuery
)
import
Gargantext.Database.Prelude
(
runOpaQuery
,
Cmd
,
formatPGSQuery
,
runPGSQuery
)
import
Gargantext.Database.Query.Join
(
leftJoin3
)
import
Gargantext.Database.Query.Join
(
leftJoin3
)
import
Gargantext.Database.Query.Table.ContextNodeNgrams2
import
Gargantext.Database.Query.Table.ContextNodeNgrams2
import
Gargantext.Database.Query.Table.NodeNgrams
(
queryNodeNgramsTable
)
import
Gargantext.Database.Schema.Ngrams
import
Gargantext.Database.Schema.Ngrams
import
Gargantext.Database.Schema.NodeNgrams
import
Gargantext.Database.Schema.NodeNgrams
import
Gargantext.Database.Query.Table.NodeNgrams
(
queryNodeNgramsTable
)
import
Gargantext.Database.Schema.Prelude
import
Gargantext.Database.Schema.Prelude
import
Gargantext.Database.Types
import
Gargantext.Database.Types
import
Gargantext.Prelude
import
Gargantext.Prelude
import
qualified
Data.HashMap.Strict
as
HashMap
import
qualified
Data.List
as
List
import
qualified
Data.Map
as
Map
import
qualified
Database.PostgreSQL.Simple
as
PGS
queryNgramsTable
::
Select
NgramsRead
queryNgramsTable
::
Select
NgramsRead
queryNgramsTable
=
selectTable
ngramsTable
queryNgramsTable
=
selectTable
ngramsTable
...
@@ -106,3 +108,28 @@ queryInsertNgrams = [sql|
...
@@ -106,3 +108,28 @@ queryInsertNgrams = [sql|
FROM input_rows
FROM input_rows
JOIN ngrams c USING (terms); -- columns of unique index
JOIN ngrams c USING (terms); -- columns of unique index
|]
|]
--------------------------------------------------------------------------
selectNgramsId
::
[
Text
]
->
Cmd
err
(
Map
NgramsId
Text
)
selectNgramsId
ns
=
if
List
.
null
ns
then
pure
Map
.
empty
else
Map
.
fromList
<$>
map
(
\
(
Indexed
i
t
)
->
(
i
,
t
))
<$>
(
selectNgramsId'
ns
)
selectNgramsId'
::
[
Text
]
->
Cmd
err
[
Indexed
Int
Text
]
selectNgramsId'
ns
=
runPGSQuery
querySelectNgramsId
(
PGS
.
Only
$
Values
fields
ns
)
where
fields
=
map
(
\
t
->
QualifiedIdentifier
Nothing
t
)
[
"text"
]
querySelectNgramsId
::
PGS
.
Query
querySelectNgramsId
=
[
sql
|
WITH input_rows(terms) AS (?)
SELECT n.id, n.terms
FROM ngrams n
JOIN input_rows ir ON ir.terms = n.terms
GROUP BY n.terms, n.id
|]
src/Gargantext/Database/Schema/Ngrams.hs
View file @
d4a2e775
...
@@ -11,8 +11,9 @@ Ngrams connection to the Database.
...
@@ -11,8 +11,9 @@ Ngrams connection to the Database.
-}
-}
{-# LANGUAGE Arrows #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE Arrows #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TemplateHaskell #-}
...
@@ -173,6 +174,9 @@ instance FromField Ngrams where
...
@@ -173,6 +174,9 @@ instance FromField Ngrams where
x
<-
fromField
fld
mdata
x
<-
fromField
fld
mdata
pure
$
text2ngrams
x
pure
$
text2ngrams
x
instance
PGS
.
ToRow
Text
where
toRow
t
=
[
toField
t
]
text2ngrams
::
Text
->
Ngrams
text2ngrams
::
Text
->
Ngrams
text2ngrams
txt
=
UnsafeNgrams
txt'
$
length
$
splitOn
" "
txt'
text2ngrams
txt
=
UnsafeNgrams
txt'
$
length
$
splitOn
" "
txt'
where
where
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment