Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
147
Issues
147
List
Board
Labels
Milestones
Merge Requests
6
Merge Requests
6
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
6019e088
Commit
6019e088
authored
Aug 27, 2021
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[OPTIM] Flow List / Metrics TFICF with sample
parent
f35d84d9
Pipeline
#1743
passed with stage
in 36 minutes and 21 seconds
Changes
3
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
88 additions
and
3 deletions
+88
-3
List.hs
src/Gargantext/Core/Text/List.hs
+7
-2
NgramsByNode.hs
src/Gargantext/Database/Action/Metrics/NgramsByNode.hs
+54
-0
TFICF.hs
src/Gargantext/Database/Action/Metrics/TFICF.hs
+27
-1
No files found.
src/Gargantext/Core/Text/List.hs
View file @
6019e088
...
...
@@ -35,7 +35,7 @@ import Gargantext.Core.Text.Metrics (scored', Scored(..), scored_speExc, scored_
import
Gargantext.Core.Types
(
ListType
(
..
),
MasterCorpusId
,
UserCorpusId
)
import
Gargantext.Core.Types.Individu
(
User
(
..
))
import
Gargantext.Database.Action.Metrics.NgramsByNode
(
getNodesByNgramsUser
,
getNodesByNgramsOnlyUser
)
import
Gargantext.Database.Action.Metrics.TFICF
(
getTficf
)
import
Gargantext.Database.Action.Metrics.TFICF
(
getTficf
_withSample
)
import
Gargantext.Database.Admin.Types.Node
(
NodeId
)
import
Gargantext.Database.Prelude
(
CmdM
)
import
Gargantext.Database.Query.Table.Ngrams
(
text2ngrams
)
...
...
@@ -156,8 +156,11 @@ buildNgramsTermsList user uCid mCid groupParams (nt, _mapListSize)= do
-- Filter 0 With Double
-- Computing global speGen score
allTerms
::
HashMap
NgramsTerm
Double
<-
getTficf
uCid
mCid
nt
printDebug
"[buldNgramsTermsList: Sample List] / start"
nt
allTerms
::
HashMap
NgramsTerm
Double
<-
getTficf_withSample
uCid
mCid
nt
printDebug
"[buldNgramsTermsList: Sample List / end]"
nt
printDebug
"[buldNgramsTermsList: Flow Social List / start]"
nt
-- PrivateFirst for first developments since Public NodeMode is not implemented yet
socialLists
::
FlowCont
NgramsTerm
FlowListScores
<-
flowSocialList
MySelfFirst
user
nt
(
FlowCont
HashMap
.
empty
...
...
@@ -165,6 +168,8 @@ buildNgramsTermsList user uCid mCid groupParams (nt, _mapListSize)= do
$
List
.
zip
(
HashMap
.
keys
allTerms
)
(
List
.
cycle
[
mempty
])
)
printDebug
"[buldNgramsTermsList: Flow Social List / end]"
nt
let
ngramsKeys
=
HashMap
.
keysSet
allTerms
groupParams'
<-
getGroupParams
groupParams
(
HashSet
.
map
(
text2ngrams
.
unNgramsTerm
)
ngramsKeys
)
...
...
src/Gargantext/Database/Action/Metrics/NgramsByNode.hs
View file @
6019e088
...
...
@@ -105,6 +105,18 @@ getOccByNgramsOnlyFast cId nt ngs =
HM
.
fromListWith
(
+
)
<$>
selectNgramsOccurrencesOnlyByNodeUser
cId
nt
ngs
getOccByNgramsOnlyFast_withSample
::
HasDBid
NodeType
=>
CorpusId
->
Int
->
NgramsType
->
[
NgramsTerm
]
->
Cmd
err
(
HashMap
NgramsTerm
Int
)
getOccByNgramsOnlyFast_withSample
cId
int
nt
ngs
=
HM
.
fromListWith
(
+
)
<$>
selectNgramsOccurrencesOnlyByNodeUser_withSample
cId
int
nt
ngs
getOccByNgramsOnlyFast'
::
CorpusId
->
ListId
->
NgramsType
...
...
@@ -190,6 +202,8 @@ selectNgramsOccurrencesOnlyByNodeUser cId nt tms =
where
fields
=
[
QualifiedIdentifier
Nothing
"text"
]
-- same as queryNgramsOnlyByNodeUser but using COUNT on the node ids.
-- Question: with the grouping is the result exactly the same (since Set NodeId for
-- equivalent ngrams intersections are not empty)
...
...
@@ -208,6 +222,46 @@ queryNgramsOccurrencesOnlyByNodeUser = [sql|
GROUP BY nng.node2_id, ng.terms
|]
selectNgramsOccurrencesOnlyByNodeUser_withSample
::
HasDBid
NodeType
=>
CorpusId
->
Int
->
NgramsType
->
[
NgramsTerm
]
->
Cmd
err
[(
NgramsTerm
,
Int
)]
selectNgramsOccurrencesOnlyByNodeUser_withSample
cId
int
nt
tms
=
fmap
(
first
NgramsTerm
)
<$>
runPGSQuery
queryNgramsOccurrencesOnlyByNodeUser_withSample
(
int
,
toDBid
NodeDocument
,
cId
,
Values
fields
((
DPS
.
Only
.
unNgramsTerm
)
<$>
tms
)
,
cId
,
ngramsTypeId
nt
)
where
fields
=
[
QualifiedIdentifier
Nothing
"text"
]
queryNgramsOccurrencesOnlyByNodeUser_withSample
::
DPS
.
Query
queryNgramsOccurrencesOnlyByNodeUser_withSample
=
[
sql
|
WITH nodes_sample AS (SELECT id FROM nodes n TABLESAMPLE SYSTEM_ROWS (?)
JOIN nodes_nodes nn ON n.id = nn.node2_id
WHERE n.typename = ?
AND nn.node1_id = ?),
input_rows(terms) AS (?)
SELECT ng.terms, COUNT(nng.node2_id) FROM node_node_ngrams nng
JOIN ngrams ng ON nng.ngrams_id = ng.id
JOIN input_rows ir ON ir.terms = ng.terms
JOIN nodes_nodes nn ON nn.node2_id = nng.node2_id
JOIN nodes_sample n ON nn.node2_id = n.id
WHERE nn.node1_id = ? -- CorpusId
AND nng.ngrams_type = ? -- NgramsTypeId
AND nn.category > 0
GROUP BY nng.node2_id, ng.terms
|]
queryNgramsOccurrencesOnlyByNodeUser'
::
DPS
.
Query
queryNgramsOccurrencesOnlyByNodeUser'
=
[
sql
|
WITH input_rows(terms) AS (?)
...
...
src/Gargantext/Database/Action/Metrics/TFICF.hs
View file @
6019e088
...
...
@@ -21,7 +21,7 @@ import qualified Data.HashMap.Strict as HM
import
Data.Maybe
(
fromMaybe
)
import
Gargantext.Core
import
Gargantext.Core.Text.Metrics.TFICF
import
Gargantext.Database.Action.Metrics.NgramsByNode
(
getNodesByNgramsUser
,
getOccByNgramsOnlyFast
)
import
Gargantext.Database.Action.Metrics.NgramsByNode
(
getNodesByNgramsUser
,
getOccByNgramsOnlyFast
,
getOccByNgramsOnlyFast_withSample
)
import
Gargantext.Database.Admin.Types.Node
-- (ListId, CorpusId, NodeId)
import
Gargantext.Database.Prelude
(
Cmd
)
import
Gargantext.Database.Query.Table.NodeNode
(
selectCountDocs
)
...
...
@@ -52,3 +52,29 @@ getTficf cId mId nt = do
(
TficfSupra
(
Count
$
fromMaybe
0
$
HM
.
lookup
t
mapTextDoubleGlobal
)
(
Total
$
fromIntegral
countGlobal
))
)
mapTextDoubleLocal
getTficf_withSample
::
HasDBid
NodeType
=>
UserCorpusId
->
MasterCorpusId
->
NgramsType
->
Cmd
err
(
HashMap
NgramsTerm
Double
)
getTficf_withSample
cId
mId
nt
=
do
mapTextDoubleLocal
<-
HM
.
filter
(
>
1
)
<$>
HM
.
map
(
fromIntegral
.
Set
.
size
)
<$>
getNodesByNgramsUser
cId
nt
countLocal
<-
selectCountDocs
cId
let
countGlobal
=
countLocal
*
10
mapTextDoubleGlobal
<-
HM
.
map
fromIntegral
<$>
getOccByNgramsOnlyFast_withSample
mId
countGlobal
nt
(
HM
.
keys
mapTextDoubleLocal
)
pure
$
HM
.
mapWithKey
(
\
t
n
->
tficf
(
TficfInfra
(
Count
n
)
(
Total
$
fromIntegral
countLocal
))
(
TficfSupra
(
Count
$
fromMaybe
0
$
HM
.
lookup
t
mapTextDoubleGlobal
)
(
Total
$
fromIntegral
countGlobal
))
)
mapTextDoubleLocal
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment