Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
160
Issues
160
List
Board
Labels
Milestones
Merge Requests
8
Merge Requests
8
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
7e903e99
Unverified
Commit
7e903e99
authored
Mar 21, 2019
by
Nicolas Pouillard
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[OPT] Proposal for faster occ computation (not tested)
parent
a16aa3d3
Pipeline
#299
failed with stage
Changes
1
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
52 additions
and
14 deletions
+52
-14
NgramsByNode.hs
src/Gargantext/Database/Metrics/NgramsByNode.hs
+52
-14
No files found.
src/Gargantext/Database/Metrics/NgramsByNode.hs
View file @
7e903e99
...
@@ -137,37 +137,75 @@ queryNgramsByNodeUser = [sql|
...
@@ -137,37 +137,75 @@ queryNgramsByNodeUser = [sql|
-- TODO add groups
-- TODO add groups
getOccByNgramsOnly
::
CorpusId
->
NgramsType
->
[
Text
]
getOccByNgramsOnly
::
CorpusId
->
NgramsType
->
[
Text
]
->
Cmd
err
(
Map
Text
Int
)
->
Cmd
err
(
Map
Text
Int
)
getOccByNgramsOnly
cId
nt
ngs
=
Map
.
map
Set
.
size
getOccByNgramsOnly
cId
nt
ngs
=
<$>
getNodesByNgramsOnlyUser
cId
nt
ngs
fromListWith
(
+
)
<$>
selectNgramsOccurrencesOnlyByNodeUser
cId
nt
ngs
-- just slower than getOccByNgramsOnly
getOccByNgramsOnly'
::
CorpusId
->
NgramsType
->
[
Text
]
->
Cmd
err
(
Map
Text
Int
)
getOccByNgramsOnly'
cId
nt
ngs
=
Map
.
map
Set
.
size
<$>
getNodesByNgramsOnlyUser
cId
nt
ngs
selectNgramsOccurrencesOnlyByNodeUser
::
CorpusId
->
NgramsType
->
[
Text
]
->
Cmd
err
[(
Text
,
Int
)]
selectNgramsOccurrencesOnlyByNodeUser
cId
nt
tms
=
runPGSQuery
queryNgramsOccurrencesOnlyByNodeUser
(
cId
,
nodeTypeId
NodeDocument
,
ngramsTypeId
nt
,
Values
fields
(
DPS
.
Only
<$>
tms
))
where
fields
=
[
QualifiedIdentifier
Nothing
"text"
]
-- same as queryNgramsOnlyByNodeUser but using COUNT on the node ids.
queryNgramsOccurrencesOnlyByNodeUser
::
DPS
.
Query
queryNgramsOccurrencesOnlyByNodeUser
=
[
sql
|
WITH corpus_id AS ?
WITH docType AS ?
WITH ngramsType AS ?
WITH input_rows(terms) AS (?)
SELECT ng.terms, COUNT(nng.node_id) FROM nodes_ngrams nng
JOIN ngrams ng ON nng.ngrams_id = ng.id
JOIN input_rows ir ON ir.terms = ng.terms
JOIN nodes_nodes nn ON nn.node2_id = nng.node_id
JOIN nodes n ON nn.node2_id = n.id
WHERE nn.node1_id = corpus_id -- CorpusId
AND n.typename = docType -- NodeTypeId
AND nng.ngrams_type = ngramsType -- NgramsTypeId
AND nn.delete = False
GROUP BY nng.node_id, ng.terms
|]
getNodesByNgramsOnlyUser
::
CorpusId
->
NgramsType
->
[
Text
]
getNodesByNgramsOnlyUser
::
CorpusId
->
NgramsType
->
[
Text
]
->
Cmd
err
(
Map
Text
(
Set
NodeId
))
->
Cmd
err
(
Map
Text
(
Set
NodeId
))
getNodesByNgramsOnlyUser
cId
nt
ngs
=
getNodesByNgramsOnlyUser
cId
nt
ngs
=
fromListWith
(
<>
)
<$>
map
(
\
(
n
,
t
)
->
(
t
,
Set
.
singleton
n
)
)
fromListWith
(
<>
)
<$>
map
(
second
Set
.
singleton
)
<$>
selectNgramsOnlyByNodeUser
cId
nt
ngs
<$>
selectNgramsOnlyByNodeUser
cId
nt
ngs
selectNgramsOnlyByNodeUser
::
CorpusId
->
NgramsType
->
[
Text
]
selectNgramsOnlyByNodeUser
::
CorpusId
->
NgramsType
->
[
Text
]
->
Cmd
err
[(
NodeId
,
Text
)]
->
Cmd
err
[(
Text
,
NodeId
)]
selectNgramsOnlyByNodeUser
cId
nt
tms
=
selectNgramsOnlyByNodeUser
cId
nt
tms
=
runPGSQuery
queryNgramsOnlyByNodeUser
(
DPS
.
Only
$
Values
fields
tms'
)
runPGSQuery
queryNgramsOnlyByNodeUser
(
cId
,
nodeTypeId
NodeDocument
,
ngramsTypeId
nt
,
Values
fields
(
DPS
.
Only
<$>
tms
))
where
where
fields
=
map
(
\
t
->
QualifiedIdentifier
Nothing
t
)
[
"text"
,
"int4"
,
"int4"
,
"int4"
]
fields
=
[
QualifiedIdentifier
Nothing
"text"
]
tms'
=
map
(
\
t
->
(
t
,
cId
,
nodeTypeId
NodeDocument
,
ngramsTypeId
nt
))
tms
queryNgramsOnlyByNodeUser
::
DPS
.
Query
queryNgramsOnlyByNodeUser
::
DPS
.
Query
queryNgramsOnlyByNodeUser
=
[
sql
|
queryNgramsOnlyByNodeUser
=
[
sql
|
WITH input_rows(terms,corpus_id,docType,ngramsType) AS (?)
WITH corpus_id AS ?
SELECT nng.node_id, ng.terms FROM nodes_ngrams nng
WITH docType AS ?
WITH ngramsType AS ?
WITH input_rows(terms) AS (?)
SELECT ng.terms, nng.node_id FROM nodes_ngrams nng
JOIN ngrams ng ON nng.ngrams_id = ng.id
JOIN ngrams ng ON nng.ngrams_id = ng.id
JOIN input_rows ir ON ir.terms = ng.terms
JOIN input_rows ir ON ir.terms = ng.terms
JOIN nodes_nodes nn ON nn.node2_id = nng.node_id
JOIN nodes_nodes nn ON nn.node2_id = nng.node_id
JOIN nodes n ON nn.node2_id = n.id
JOIN nodes n ON nn.node2_id = n.id
WHERE nn.node1_id
= ir.corpus_id
-- CorpusId
WHERE nn.node1_id
= corpus_id
-- CorpusId
AND n.typename
= ir.docType
-- NodeTypeId
AND n.typename
= docType
-- NodeTypeId
AND nng.ngrams_type =
ir.
ngramsType -- NgramsTypeId
AND nng.ngrams_type = ngramsType -- NgramsTypeId
AND nn.delete = False
AND nn.delete
= False
GROUP BY nng.node_id, ng.terms
GROUP BY nng.node_id, ng.terms
|]
|]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment