Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
H
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Przemyslaw Kaminski
haskell-gargantext
Commits
d4a2e775
Commit
d4a2e775
authored
Feb 10, 2022
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[OPTIM] Ngrams Table scores
parent
df2a6dfe
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
83 additions
and
28 deletions
+83
-28
NgramsByContext.hs
src/Gargantext/Database/Action/Metrics/NgramsByContext.hs
+29
-21
Prelude.hs
src/Gargantext/Database/Prelude.hs
+16
-0
Ngrams.hs
src/Gargantext/Database/Query/Table/Ngrams.hs
+32
-5
Ngrams.hs
src/Gargantext/Database/Schema/Ngrams.hs
+6
-2
No files found.
src/Gargantext/Database/Action/Metrics/NgramsByContext.hs
View file @
d4a2e775
...
...
@@ -16,21 +16,23 @@ Ngrams by node enable contextual metrics.
module
Gargantext.Database.Action.Metrics.NgramsByContext
where
-- import Debug.Trace (trace)
--import Data.Map.Strict.Patch (PatchMap, Replace, diff)
import
Data.HashMap.Strict
(
HashMap
)
import
Data.Map
(
Map
)
import
Data.Maybe
(
catMaybes
)
import
Data.Set
(
Set
)
import
Data.Text
(
Text
)
import
Data.Tuple.Extra
(
first
,
second
,
swap
)
import
Database.PostgreSQL.Simple.SqlQQ
(
sql
)
import
Database.PostgreSQL.Simple.Types
(
Values
(
..
),
QualifiedIdentifier
(
..
))
-- import Debug.Trace (trace)
import
Gargantext.Core
import
Gargantext.API.Ngrams.Types
(
NgramsTerm
(
..
))
import
Gargantext.Core
import
Gargantext.Data.HashMap.Strict.Utils
as
HM
import
Gargantext.Database.Admin.Types.Node
(
ListId
,
CorpusId
,
NodeId
(
..
),
ContextId
,
MasterCorpusId
,
NodeType
(
NodeDocument
),
UserCorpusId
,
DocId
)
import
Gargantext.Database.Prelude
(
Cmd
,
runPGSQuery
)
import
Gargantext.Database.Schema.Ngrams
(
ngramsTypeId
,
NgramsType
(
..
))
import
Gargantext.Database.Query.Table.Ngrams
(
selectNgramsId
)
import
Gargantext.Database.Schema.Ngrams
(
ngramsTypeId
,
NgramsType
(
..
),
NgramsId
)
import
Gargantext.Prelude
import
qualified
Data.HashMap.Strict
as
HM
import
qualified
Data.Map
as
Map
...
...
@@ -111,39 +113,45 @@ getOccByNgramsOnlyFast' :: CorpusId
->
NgramsType
->
[
NgramsTerm
]
->
Cmd
err
(
HashMap
NgramsTerm
Int
)
getOccByNgramsOnlyFast'
cId
lId
nt
tms
=
-- trace (show (cId, lId)) $
HM
.
fromListWith
(
+
)
<$>
map
(
second
round
)
<$>
run
cId
lId
nt
tms
getOccByNgramsOnlyFast'
cId
lId
nt
tms
=
do
-- trace (show (cId, lId)) $
mapNgramsIds
<-
selectNgramsId
$
map
unNgramsTerm
tms
HM
.
fromListWith
(
+
)
<$>
catMaybes
<$>
map
(
\
(
nId
,
s
)
->
(,)
<$>
(
NgramsTerm
<$>
(
Map
.
lookup
nId
mapNgramsIds
))
<*>
(
Just
$
round
s
)
)
<$>
run
cId
lId
nt
(
Map
.
keys
mapNgramsIds
)
where
fields
=
[
QualifiedIdentifier
Nothing
"text"
]
run
::
CorpusId
->
ListId
->
NgramsType
->
[
Ngrams
Term
]
->
Cmd
err
[(
Ngrams
Term
,
Double
)]
run
cId'
lId'
nt'
tms'
=
map
(
first
NgramsTerm
)
<$>
runPGSQuery
query
(
Values
fields
((
DPS
.
Only
.
unNgramsTerm
)
<$>
tms'
)
->
[
Ngrams
Id
]
->
Cmd
err
[(
Ngrams
Id
,
Double
)]
run
cId'
lId'
nt'
tms'
=
runPGSQuery
query
(
Values
fields
((
DPS
.
Only
)
<$>
tms'
)
,
cId'
,
lId'
,
ngramsTypeId
nt'
)
fields
=
[
QualifiedIdentifier
Nothing
"int4"
]
query
::
DPS
.
Query
query
=
[
sql
|
WITH input_rows(terms) AS (?)
SELECT ng.terms, nng.weight FROM nodes_contexts nc
WITH input_ngrams(id) AS (?)
SELECT ngi.id, nng.weight FROM nodes_contexts nc
JOIN node_node_ngrams nng ON nng.node1_id = nc.node_id
JOIN ngrams ng ON nng.ngrams_id = ng
.id
JOIN input_rows ir ON ir.terms = ng.terms
WHERE nng.node1_id = ? -- CorpusId
AND nng.node2_id = ? -- ListId
AND nng.ngrams_type = ? -- NgramsTypeId
AND nc.category > 0 -- Not trash
GROUP BY ng.terms, nng.weight
JOIN input_ngrams ngi ON nng.ngrams_id = ngi
.id
WHERE nng.node1_id = ?
AND nng.node2_id = ?
AND nng.ngrams_type = ?
AND nc.category > 0
GROUP BY ngi.id, nng.weight
|]
selectNgramsOccurrencesOnlyByContextUser_withSample
::
HasDBid
NodeType
=>
CorpusId
->
Int
...
...
src/Gargantext/Database/Prelude.hs
View file @
d4a2e775
...
...
@@ -140,6 +140,22 @@ runPGSQuery q a = mkCmd $ \conn -> catch (PGS.query conn q a) (printError conn)
hPutStrLn
stderr
q'
throw
(
SomeException
e
)
{-
-- TODO
runPGSQueryFold :: ( CmdM env err m
, PGS.FromRow r
)
=> PGS.Query -> a -> (a -> r -> IO a) -> m a
runPGSQueryFold q initialState consume = mkCmd $ \conn -> catch (PGS.fold_ conn initialState consume) (printError conn)
where
printError c (SomeException e) = do
q' <- PGS.formatQuery c q
hPutStrLn stderr q'
throw (SomeException e)
-}
-- | TODO catch error
runPGSQuery_
::
(
CmdM
env
err
m
,
PGS
.
FromRow
r
...
...
src/Gargantext/Database/Query/Table/Ngrams.hs
View file @
d4a2e775
...
...
@@ -18,27 +18,29 @@ module Gargantext.Database.Query.Table.Ngrams
,
queryNgramsTable
,
selectNgramsByDoc
,
insertNgrams
,
selectNgramsId
)
where
import
Control.Lens
((
^.
))
import
Data.ByteString.Internal
(
ByteString
)
import
Data.HashMap.Strict
(
HashMap
)
import
Data.Map
(
Map
)
import
Data.Text
(
Text
)
import
qualified
Data.HashMap.Strict
as
HashMap
import
qualified
Data.List
as
List
import
qualified
Database.PostgreSQL.Simple
as
PGS
import
Gargantext.Core.Types
import
Gargantext.Database.Prelude
(
runOpaQuery
,
Cmd
,
formatPGSQuery
,
runPGSQuery
)
import
Gargantext.Database.Query.Join
(
leftJoin3
)
import
Gargantext.Database.Query.Table.ContextNodeNgrams2
import
Gargantext.Database.Query.Table.NodeNgrams
(
queryNodeNgramsTable
)
import
Gargantext.Database.Schema.Ngrams
import
Gargantext.Database.Schema.NodeNgrams
import
Gargantext.Database.Query.Table.NodeNgrams
(
queryNodeNgramsTable
)
import
Gargantext.Database.Schema.Prelude
import
Gargantext.Database.Types
import
Gargantext.Prelude
import
qualified
Data.HashMap.Strict
as
HashMap
import
qualified
Data.List
as
List
import
qualified
Data.Map
as
Map
import
qualified
Database.PostgreSQL.Simple
as
PGS
queryNgramsTable
::
Select
NgramsRead
queryNgramsTable
=
selectTable
ngramsTable
...
...
@@ -106,3 +108,28 @@ queryInsertNgrams = [sql|
FROM input_rows
JOIN ngrams c USING (terms); -- columns of unique index
|]
--------------------------------------------------------------------------
selectNgramsId
::
[
Text
]
->
Cmd
err
(
Map
NgramsId
Text
)
selectNgramsId
ns
=
if
List
.
null
ns
then
pure
Map
.
empty
else
Map
.
fromList
<$>
map
(
\
(
Indexed
i
t
)
->
(
i
,
t
))
<$>
(
selectNgramsId'
ns
)
selectNgramsId'
::
[
Text
]
->
Cmd
err
[
Indexed
Int
Text
]
selectNgramsId'
ns
=
runPGSQuery
querySelectNgramsId
(
PGS
.
Only
$
Values
fields
ns
)
where
fields
=
map
(
\
t
->
QualifiedIdentifier
Nothing
t
)
[
"text"
]
querySelectNgramsId
::
PGS
.
Query
querySelectNgramsId
=
[
sql
|
WITH input_rows(terms) AS (?)
SELECT n.id, n.terms
FROM ngrams n
JOIN input_rows ir ON ir.terms = n.terms
GROUP BY n.terms, n.id
|]
src/Gargantext/Database/Schema/Ngrams.hs
View file @
d4a2e775
...
...
@@ -11,6 +11,7 @@ Ngrams connection to the Database.
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE Arrows #-}
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE QuasiQuotes #-}
...
...
@@ -173,6 +174,9 @@ instance FromField Ngrams where
x
<-
fromField
fld
mdata
pure
$
text2ngrams
x
instance
PGS
.
ToRow
Text
where
toRow
t
=
[
toField
t
]
text2ngrams
::
Text
->
Ngrams
text2ngrams
txt
=
UnsafeNgrams
txt'
$
length
$
splitOn
" "
txt'
where
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment