Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
154
Issues
154
List
Board
Labels
Milestones
Merge Requests
13
Merge Requests
13
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
6019e088
Commit
6019e088
authored
Aug 27, 2021
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[OPTIM] Flow List / Metrics TFICF with sample
parent
f35d84d9
Pipeline
#1743
passed with stage
in 36 minutes and 21 seconds
Changes
3
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
88 additions
and
3 deletions
+88
-3
List.hs
src/Gargantext/Core/Text/List.hs
+7
-2
NgramsByNode.hs
src/Gargantext/Database/Action/Metrics/NgramsByNode.hs
+54
-0
TFICF.hs
src/Gargantext/Database/Action/Metrics/TFICF.hs
+27
-1
No files found.
src/Gargantext/Core/Text/List.hs
View file @
6019e088
...
...
@@ -35,7 +35,7 @@ import Gargantext.Core.Text.Metrics (scored', Scored(..), scored_speExc, scored_
import
Gargantext.Core.Types
(
ListType
(
..
),
MasterCorpusId
,
UserCorpusId
)
import
Gargantext.Core.Types.Individu
(
User
(
..
))
import
Gargantext.Database.Action.Metrics.NgramsByNode
(
getNodesByNgramsUser
,
getNodesByNgramsOnlyUser
)
import
Gargantext.Database.Action.Metrics.TFICF
(
getTficf
)
import
Gargantext.Database.Action.Metrics.TFICF
(
getTficf
_withSample
)
import
Gargantext.Database.Admin.Types.Node
(
NodeId
)
import
Gargantext.Database.Prelude
(
CmdM
)
import
Gargantext.Database.Query.Table.Ngrams
(
text2ngrams
)
...
...
@@ -156,8 +156,11 @@ buildNgramsTermsList user uCid mCid groupParams (nt, _mapListSize)= do
-- Filter 0 With Double
-- Computing global speGen score
allTerms
::
HashMap
NgramsTerm
Double
<-
getTficf
uCid
mCid
nt
printDebug
"[buldNgramsTermsList: Sample List] / start"
nt
allTerms
::
HashMap
NgramsTerm
Double
<-
getTficf_withSample
uCid
mCid
nt
printDebug
"[buldNgramsTermsList: Sample List / end]"
nt
printDebug
"[buldNgramsTermsList: Flow Social List / start]"
nt
-- PrivateFirst for first developments since Public NodeMode is not implemented yet
socialLists
::
FlowCont
NgramsTerm
FlowListScores
<-
flowSocialList
MySelfFirst
user
nt
(
FlowCont
HashMap
.
empty
...
...
@@ -165,6 +168,8 @@ buildNgramsTermsList user uCid mCid groupParams (nt, _mapListSize)= do
$
List
.
zip
(
HashMap
.
keys
allTerms
)
(
List
.
cycle
[
mempty
])
)
printDebug
"[buldNgramsTermsList: Flow Social List / end]"
nt
let
ngramsKeys
=
HashMap
.
keysSet
allTerms
groupParams'
<-
getGroupParams
groupParams
(
HashSet
.
map
(
text2ngrams
.
unNgramsTerm
)
ngramsKeys
)
...
...
src/Gargantext/Database/Action/Metrics/NgramsByNode.hs
View file @
6019e088
...
...
@@ -105,6 +105,18 @@ getOccByNgramsOnlyFast cId nt ngs =
HM
.
fromListWith
(
+
)
<$>
selectNgramsOccurrencesOnlyByNodeUser
cId
nt
ngs
getOccByNgramsOnlyFast_withSample
::
HasDBid
NodeType
=>
CorpusId
->
Int
->
NgramsType
->
[
NgramsTerm
]
->
Cmd
err
(
HashMap
NgramsTerm
Int
)
getOccByNgramsOnlyFast_withSample
cId
int
nt
ngs
=
HM
.
fromListWith
(
+
)
<$>
selectNgramsOccurrencesOnlyByNodeUser_withSample
cId
int
nt
ngs
getOccByNgramsOnlyFast'
::
CorpusId
->
ListId
->
NgramsType
...
...
@@ -190,6 +202,8 @@ selectNgramsOccurrencesOnlyByNodeUser cId nt tms =
where
fields
=
[
QualifiedIdentifier
Nothing
"text"
]
-- same as queryNgramsOnlyByNodeUser but using COUNT on the node ids.
-- Question: with the grouping is the result exactly the same (since Set NodeId for
-- equivalent ngrams intersections are not empty)
...
...
@@ -208,6 +222,46 @@ queryNgramsOccurrencesOnlyByNodeUser = [sql|
GROUP BY nng.node2_id, ng.terms
|]
selectNgramsOccurrencesOnlyByNodeUser_withSample
::
HasDBid
NodeType
=>
CorpusId
->
Int
->
NgramsType
->
[
NgramsTerm
]
->
Cmd
err
[(
NgramsTerm
,
Int
)]
selectNgramsOccurrencesOnlyByNodeUser_withSample
cId
int
nt
tms
=
fmap
(
first
NgramsTerm
)
<$>
runPGSQuery
queryNgramsOccurrencesOnlyByNodeUser_withSample
(
int
,
toDBid
NodeDocument
,
cId
,
Values
fields
((
DPS
.
Only
.
unNgramsTerm
)
<$>
tms
)
,
cId
,
ngramsTypeId
nt
)
where
fields
=
[
QualifiedIdentifier
Nothing
"text"
]
queryNgramsOccurrencesOnlyByNodeUser_withSample
::
DPS
.
Query
queryNgramsOccurrencesOnlyByNodeUser_withSample
=
[
sql
|
WITH nodes_sample AS (SELECT id FROM nodes n TABLESAMPLE SYSTEM_ROWS (?)
JOIN nodes_nodes nn ON n.id = nn.node2_id
WHERE n.typename = ?
AND nn.node1_id = ?),
input_rows(terms) AS (?)
SELECT ng.terms, COUNT(nng.node2_id) FROM node_node_ngrams nng
JOIN ngrams ng ON nng.ngrams_id = ng.id
JOIN input_rows ir ON ir.terms = ng.terms
JOIN nodes_nodes nn ON nn.node2_id = nng.node2_id
JOIN nodes_sample n ON nn.node2_id = n.id
WHERE nn.node1_id = ? -- CorpusId
AND nng.ngrams_type = ? -- NgramsTypeId
AND nn.category > 0
GROUP BY nng.node2_id, ng.terms
|]
queryNgramsOccurrencesOnlyByNodeUser'
::
DPS
.
Query
queryNgramsOccurrencesOnlyByNodeUser'
=
[
sql
|
WITH input_rows(terms) AS (?)
...
...
src/Gargantext/Database/Action/Metrics/TFICF.hs
View file @
6019e088
...
...
@@ -21,7 +21,7 @@ import qualified Data.HashMap.Strict as HM
import
Data.Maybe
(
fromMaybe
)
import
Gargantext.Core
import
Gargantext.Core.Text.Metrics.TFICF
import
Gargantext.Database.Action.Metrics.NgramsByNode
(
getNodesByNgramsUser
,
getOccByNgramsOnlyFast
)
import
Gargantext.Database.Action.Metrics.NgramsByNode
(
getNodesByNgramsUser
,
getOccByNgramsOnlyFast
,
getOccByNgramsOnlyFast_withSample
)
import
Gargantext.Database.Admin.Types.Node
-- (ListId, CorpusId, NodeId)
import
Gargantext.Database.Prelude
(
Cmd
)
import
Gargantext.Database.Query.Table.NodeNode
(
selectCountDocs
)
...
...
@@ -52,3 +52,29 @@ getTficf cId mId nt = do
(
TficfSupra
(
Count
$
fromMaybe
0
$
HM
.
lookup
t
mapTextDoubleGlobal
)
(
Total
$
fromIntegral
countGlobal
))
)
mapTextDoubleLocal
getTficf_withSample
::
HasDBid
NodeType
=>
UserCorpusId
->
MasterCorpusId
->
NgramsType
->
Cmd
err
(
HashMap
NgramsTerm
Double
)
getTficf_withSample
cId
mId
nt
=
do
mapTextDoubleLocal
<-
HM
.
filter
(
>
1
)
<$>
HM
.
map
(
fromIntegral
.
Set
.
size
)
<$>
getNodesByNgramsUser
cId
nt
countLocal
<-
selectCountDocs
cId
let
countGlobal
=
countLocal
*
10
mapTextDoubleGlobal
<-
HM
.
map
fromIntegral
<$>
getOccByNgramsOnlyFast_withSample
mId
countGlobal
nt
(
HM
.
keys
mapTextDoubleLocal
)
pure
$
HM
.
mapWithKey
(
\
t
n
->
tficf
(
TficfInfra
(
Count
n
)
(
Total
$
fromIntegral
countLocal
))
(
TficfSupra
(
Count
$
fromMaybe
0
$
HM
.
lookup
t
mapTextDoubleGlobal
)
(
Total
$
fromIntegral
countGlobal
))
)
mapTextDoubleLocal
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment