Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
158
Issues
158
List
Board
Labels
Milestones
Merge Requests
11
Merge Requests
11
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
d4a2e775
Commit
d4a2e775
authored
Feb 10, 2022
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[OPTIM] Ngrams Table scores
parent
df2a6dfe
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
83 additions
and
28 deletions
+83
-28
NgramsByContext.hs
src/Gargantext/Database/Action/Metrics/NgramsByContext.hs
+29
-21
Prelude.hs
src/Gargantext/Database/Prelude.hs
+16
-0
Ngrams.hs
src/Gargantext/Database/Query/Table/Ngrams.hs
+32
-5
Ngrams.hs
src/Gargantext/Database/Schema/Ngrams.hs
+6
-2
No files found.
src/Gargantext/Database/Action/Metrics/NgramsByContext.hs
View file @
d4a2e775
...
...
@@ -16,21 +16,23 @@ Ngrams by node enable contextual metrics.
module
Gargantext.Database.Action.Metrics.NgramsByContext
where
-- import Debug.Trace (trace)
--import Data.Map.Strict.Patch (PatchMap, Replace, diff)
import
Data.HashMap.Strict
(
HashMap
)
import
Data.Map
(
Map
)
import
Data.Maybe
(
catMaybes
)
import
Data.Set
(
Set
)
import
Data.Text
(
Text
)
import
Data.Tuple.Extra
(
first
,
second
,
swap
)
import
Database.PostgreSQL.Simple.SqlQQ
(
sql
)
import
Database.PostgreSQL.Simple.Types
(
Values
(
..
),
QualifiedIdentifier
(
..
))
-- import Debug.Trace (trace)
import
Gargantext.Core
import
Gargantext.API.Ngrams.Types
(
NgramsTerm
(
..
))
import
Gargantext.Core
import
Gargantext.Data.HashMap.Strict.Utils
as
HM
import
Gargantext.Database.Admin.Types.Node
(
ListId
,
CorpusId
,
NodeId
(
..
),
ContextId
,
MasterCorpusId
,
NodeType
(
NodeDocument
),
UserCorpusId
,
DocId
)
import
Gargantext.Database.Prelude
(
Cmd
,
runPGSQuery
)
import
Gargantext.Database.Schema.Ngrams
(
ngramsTypeId
,
NgramsType
(
..
))
import
Gargantext.Database.Query.Table.Ngrams
(
selectNgramsId
)
import
Gargantext.Database.Schema.Ngrams
(
ngramsTypeId
,
NgramsType
(
..
),
NgramsId
)
import
Gargantext.Prelude
import
qualified
Data.HashMap.Strict
as
HM
import
qualified
Data.Map
as
Map
...
...
@@ -111,37 +113,43 @@ getOccByNgramsOnlyFast' :: CorpusId
->
NgramsType
->
[
NgramsTerm
]
->
Cmd
err
(
HashMap
NgramsTerm
Int
)
getOccByNgramsOnlyFast'
cId
lId
nt
tms
=
-- trace (show (cId, lId)) $
HM
.
fromListWith
(
+
)
<$>
map
(
second
round
)
<$>
run
cId
lId
nt
tms
getOccByNgramsOnlyFast'
cId
lId
nt
tms
=
do
-- trace (show (cId, lId)) $
mapNgramsIds
<-
selectNgramsId
$
map
unNgramsTerm
tms
HM
.
fromListWith
(
+
)
<$>
catMaybes
<$>
map
(
\
(
nId
,
s
)
->
(,)
<$>
(
NgramsTerm
<$>
(
Map
.
lookup
nId
mapNgramsIds
))
<*>
(
Just
$
round
s
)
)
<$>
run
cId
lId
nt
(
Map
.
keys
mapNgramsIds
)
where
fields
=
[
QualifiedIdentifier
Nothing
"text"
]
run
::
CorpusId
->
ListId
->
NgramsType
->
[
Ngrams
Term
]
->
Cmd
err
[(
Ngrams
Term
,
Double
)]
run
cId'
lId'
nt'
tms'
=
map
(
first
NgramsTerm
)
<$>
runPGSQuery
query
(
Values
fields
((
DPS
.
Only
.
unNgramsTerm
)
<$>
tms'
)
->
[
Ngrams
Id
]
->
Cmd
err
[(
Ngrams
Id
,
Double
)]
run
cId'
lId'
nt'
tms'
=
runPGSQuery
query
(
Values
fields
((
DPS
.
Only
)
<$>
tms'
)
,
cId'
,
lId'
,
ngramsTypeId
nt'
)
fields
=
[
QualifiedIdentifier
Nothing
"int4"
]
query
::
DPS
.
Query
query
=
[
sql
|
WITH input_rows(terms) AS (?)
SELECT ng.terms, nng.weight FROM nodes_contexts nc
JOIN node_node_ngrams nng ON nng.node1_id = nc.node_id
JOIN ngrams ng ON nng.ngrams_id = ng.id
JOIN input_rows ir ON ir.terms = ng.terms
WHERE nng.node1_id = ? -- CorpusId
AND nng.node2_id = ? -- ListId
AND nng.ngrams_type = ? -- NgramsTypeId
AND nc.category > 0 -- Not trash
GROUP BY ng.terms, nng.weight
WITH input_ngrams(id) AS (?)
SELECT ngi.id, nng.weight FROM nodes_contexts nc
JOIN node_node_ngrams nng ON nng.node1_id = nc.node_id
JOIN input_ngrams ngi ON nng.ngrams_id = ngi.id
WHERE nng.node1_id = ?
AND nng.node2_id = ?
AND nng.ngrams_type = ?
AND nc.category > 0
GROUP BY ngi.id, nng.weight
|]
selectNgramsOccurrencesOnlyByContextUser_withSample
::
HasDBid
NodeType
...
...
src/Gargantext/Database/Prelude.hs
View file @
d4a2e775
...
...
@@ -140,6 +140,22 @@ runPGSQuery q a = mkCmd $ \conn -> catch (PGS.query conn q a) (printError conn)
hPutStrLn
stderr
q'
throw
(
SomeException
e
)
{-
-- TODO
runPGSQueryFold :: ( CmdM env err m
, PGS.FromRow r
)
=> PGS.Query -> a -> (a -> r -> IO a) -> m a
runPGSQueryFold q initialState consume = mkCmd $ \conn -> catch (PGS.fold_ conn initialState consume) (printError conn)
where
printError c (SomeException e) = do
q' <- PGS.formatQuery c q
hPutStrLn stderr q'
throw (SomeException e)
-}
-- | TODO catch error
runPGSQuery_
::
(
CmdM
env
err
m
,
PGS
.
FromRow
r
...
...
src/Gargantext/Database/Query/Table/Ngrams.hs
View file @
d4a2e775
...
...
@@ -18,27 +18,29 @@ module Gargantext.Database.Query.Table.Ngrams
,
queryNgramsTable
,
selectNgramsByDoc
,
insertNgrams
,
selectNgramsId
)
where
import
Control.Lens
((
^.
))
import
Data.ByteString.Internal
(
ByteString
)
import
Data.HashMap.Strict
(
HashMap
)
import
Data.Map
(
Map
)
import
Data.Text
(
Text
)
import
qualified
Data.HashMap.Strict
as
HashMap
import
qualified
Data.List
as
List
import
qualified
Database.PostgreSQL.Simple
as
PGS
import
Gargantext.Core.Types
import
Gargantext.Database.Prelude
(
runOpaQuery
,
Cmd
,
formatPGSQuery
,
runPGSQuery
)
import
Gargantext.Database.Query.Join
(
leftJoin3
)
import
Gargantext.Database.Query.Table.ContextNodeNgrams2
import
Gargantext.Database.Query.Table.NodeNgrams
(
queryNodeNgramsTable
)
import
Gargantext.Database.Schema.Ngrams
import
Gargantext.Database.Schema.NodeNgrams
import
Gargantext.Database.Query.Table.NodeNgrams
(
queryNodeNgramsTable
)
import
Gargantext.Database.Schema.Prelude
import
Gargantext.Database.Types
import
Gargantext.Prelude
import
qualified
Data.HashMap.Strict
as
HashMap
import
qualified
Data.List
as
List
import
qualified
Data.Map
as
Map
import
qualified
Database.PostgreSQL.Simple
as
PGS
queryNgramsTable
::
Select
NgramsRead
queryNgramsTable
=
selectTable
ngramsTable
...
...
@@ -106,3 +108,28 @@ queryInsertNgrams = [sql|
FROM input_rows
JOIN ngrams c USING (terms); -- columns of unique index
|]
--------------------------------------------------------------------------
selectNgramsId
::
[
Text
]
->
Cmd
err
(
Map
NgramsId
Text
)
selectNgramsId
ns
=
if
List
.
null
ns
then
pure
Map
.
empty
else
Map
.
fromList
<$>
map
(
\
(
Indexed
i
t
)
->
(
i
,
t
))
<$>
(
selectNgramsId'
ns
)
selectNgramsId'
::
[
Text
]
->
Cmd
err
[
Indexed
Int
Text
]
selectNgramsId'
ns
=
runPGSQuery
querySelectNgramsId
(
PGS
.
Only
$
Values
fields
ns
)
where
fields
=
map
(
\
t
->
QualifiedIdentifier
Nothing
t
)
[
"text"
]
querySelectNgramsId
::
PGS
.
Query
querySelectNgramsId
=
[
sql
|
WITH input_rows(terms) AS (?)
SELECT n.id, n.terms
FROM ngrams n
JOIN input_rows ir ON ir.terms = n.terms
GROUP BY n.terms, n.id
|]
src/Gargantext/Database/Schema/Ngrams.hs
View file @
d4a2e775
...
...
@@ -11,8 +11,9 @@ Ngrams connection to the Database.
-}
{-# LANGUAGE Arrows #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE Arrows #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE TemplateHaskell #-}
...
...
@@ -173,6 +174,9 @@ instance FromField Ngrams where
x
<-
fromField
fld
mdata
pure
$
text2ngrams
x
instance
PGS
.
ToRow
Text
where
toRow
t
=
[
toField
t
]
text2ngrams
::
Text
->
Ngrams
text2ngrams
txt
=
UnsafeNgrams
txt'
$
length
$
splitOn
" "
txt'
where
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment