Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
152
Issues
152
List
Board
Labels
Milestones
Merge Requests
9
Merge Requests
9
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
ba15b251
Commit
ba15b251
authored
Nov 15, 2022
by
Przemyslaw Kaminski
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[ngrams] send array of context ids, instead of occurrences int
parent
ff6d1d32
Pipeline
#3382
canceled with stage
Changes
3
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
28 additions
and
21 deletions
+28
-21
Ngrams.hs
src/Gargantext/API/Ngrams.hs
+17
-14
Types.hs
src/Gargantext/API/Ngrams/Types.hs
+4
-3
NgramsByContext.hs
src/Gargantext/Database/Action/Metrics/NgramsByContext.hs
+7
-4
No files found.
src/Gargantext/API/Ngrams.hs
View file @
ba15b251
...
...
@@ -83,7 +83,7 @@ module Gargantext.API.Ngrams
where
import
Control.Concurrent
import
Control.Lens
((
.~
),
view
,
(
^.
),
(
^..
),
(
+~
),
(
%~
),
(
.~
),
sumOf
,
at
,
_Just
,
Each
(
..
),
(
%%~
),
mapped
,
ifolded
,
withIndex
,
over
)
import
Control.Lens
((
.~
),
view
,
(
^.
),
(
^..
),
(
+~
),
(
%~
),
(
.~
),
msumOf
,
at
,
_Just
,
Each
(
..
),
(
%%~
),
mapped
,
ifolded
,
to
,
withIndex
,
over
)
import
Control.Monad.Reader
import
Data.Aeson
hiding
((
.=
))
import
Data.Either
(
Either
(
..
))
...
...
@@ -553,8 +553,8 @@ getTableNgrams _nType nId tabType listId limit_ offset
sortOnOrder
Nothing
=
sortOnOrder
(
Just
ScoreDesc
)
sortOnOrder
(
Just
TermAsc
)
=
List
.
sortOn
$
view
ne_ngrams
sortOnOrder
(
Just
TermDesc
)
=
List
.
sortOn
$
Down
.
view
ne_ngrams
sortOnOrder
(
Just
ScoreAsc
)
=
List
.
sortOn
$
view
ne_occurrences
sortOnOrder
(
Just
ScoreDesc
)
=
List
.
sortOn
$
Down
.
view
ne_occurrences
sortOnOrder
(
Just
ScoreAsc
)
=
List
.
sortOn
$
view
(
ne_occurrences
.
to
length
)
sortOnOrder
(
Just
ScoreDesc
)
=
List
.
sortOn
$
Down
.
view
(
ne_occurrences
.
to
length
)
---------------------------------------
-- | Filter the given `tableMap` with the search criteria.
...
...
@@ -584,15 +584,16 @@ getTableNgrams _nType nId tabType listId limit_ offset
let
scoresNeeded
=
needsScores
orderBy
t1
<-
getTime
tableMap
2
<-
getNgramsTable'
nId
listId
ngramsType
::
m
(
Versioned
(
Map
NgramsTerm
NgramsElement
))
tableMap
<-
getNgramsTable'
nId
listId
ngramsType
::
m
(
Versioned
(
Map
NgramsTerm
NgramsElement
))
let
fltr
=
tableMap
2
&
v_data
%~
NgramsTable
.
filteredNodes
::
Versioned
NgramsTable
let
fltr
=
tableMap
&
v_data
%~
NgramsTable
.
filteredNodes
::
Versioned
NgramsTable
let
fltrCount
=
length
$
fltr
^.
v_data
.
_NgramsTable
t2
<-
getTime
let
tableMap
3
=
over
(
v_data
.
_NgramsTable
)
((
withInners
(
tableMap2
^.
v_data
))
.
sortAndPaginate
)
fltr
let
tableMap
Sorted
=
over
(
v_data
.
_NgramsTable
)
((
withInners
(
tableMap
^.
v_data
))
.
sortAndPaginate
)
fltr
t3
<-
getTime
--printDebug "[getTableNgrams] tableMapSorted" tableMapSorted
liftBase
$
do
hprint
stderr
(
"getTableNgrams total="
%
hasTime
...
...
@@ -603,8 +604,8 @@ getTableNgrams _nType nId tabType listId limit_ offset
%
"
\n
"
)
t0
t3
t0
t1
t1
t2
t2
t3
-- printDebug "[getTableNgrams] tableMap
3" $ show tableMap3
pure
$
toVersionedWithCount
fltrCount
tableMap
3
-- printDebug "[getTableNgrams] tableMap
Sorted" $ show tableMapSorted
pure
$
toVersionedWithCount
fltrCount
tableMap
Sorted
-- | Helper function to get the ngrams table with scores.
...
...
@@ -619,9 +620,9 @@ getNgramsTable' :: forall env err m.
->
TableNgrams
.
NgramsType
->
m
(
Versioned
(
Map
.
Map
NgramsTerm
NgramsElement
))
getNgramsTable'
nId
listId
ngramsType
=
do
tableMap
1
<-
getNgramsTableMap
listId
ngramsType
tableMap
1
&
v_data
%%~
(
setNgramsTableScores
nId
listId
ngramsType
)
.
Map
.
mapWithKey
ngramsElementFromRepo
tableMap
<-
getNgramsTableMap
listId
ngramsType
tableMap
&
v_data
%%~
(
setNgramsTableScores
nId
listId
ngramsType
)
.
Map
.
mapWithKey
ngramsElementFromRepo
-- | Helper function to set scores on an `NgramsTable`.
setNgramsTableScores
::
forall
env
err
m
t
.
...
...
@@ -636,10 +637,10 @@ setNgramsTableScores :: forall env err m t.
->
TableNgrams
.
NgramsType
->
t
->
m
t
setNgramsTableScores
nId
listId
ngramsType
table
=
do
setNgramsTableScores
nId
listId
ngramsType
table
=
do
t1
<-
getTime
occurrences
<-
getOccByNgramsOnlyFast
nId
listId
ngramsType
printDebug
"[setNgramsTableScores] occurrences"
occurrences
--
printDebug "[setNgramsTableScores] occurrences" occurrences
t2
<-
getTime
liftBase
$
do
let
ngrams_terms
=
table
^..
each
.
ne_ngrams
...
...
@@ -648,7 +649,9 @@ setNgramsTableScores nId listId ngramsType table = do
(
"getTableNgrams/setScores #ngrams="
%
int
%
" time="
%
hasTime
%
"
\n
"
)
(
length
ngrams_terms
)
t1
t2
let
setOcc
ne
=
ne
&
ne_occurrences
.~
sumOf
(
at
(
ne
^.
ne_ngrams
)
.
_Just
)
occurrences
setOcc
ne
=
ne
&
ne_occurrences
.~
msumOf
(
at
(
ne
^.
ne_ngrams
)
.
_Just
)
occurrences
--printDebug "[setNgramsTableScores] with occurences" $ table & each %~ setOcc
pure
$
table
&
each
%~
setOcc
...
...
src/Gargantext/API/Ngrams/Types.hs
View file @
ba15b251
...
...
@@ -34,6 +34,7 @@ import GHC.Generics (Generic)
import
Gargantext.Core.Text
(
size
)
import
Gargantext.Core.Types
(
ListType
(
..
),
ListId
,
NodeId
,
TODO
)
import
Gargantext.Core.Utils.Prefix
(
unPrefix
,
unPrefixUntagged
,
unPrefixSwagger
,
wellNamedSchema
)
import
Gargantext.Database.Admin.Types.Node
(
ContextId
)
import
Gargantext.Database.Prelude
(
fromField'
,
HasConnectionPool
,
HasConfig
,
CmdM
'
)
import
Gargantext.Prelude
import
Gargantext.Prelude.Crypto.Hash
(
IsHashable
(
..
))
...
...
@@ -170,7 +171,7 @@ data NgramsElement =
NgramsElement
{
_ne_ngrams
::
NgramsTerm
,
_ne_size
::
Int
,
_ne_list
::
ListType
,
_ne_occurrences
::
Int
,
_ne_occurrences
::
[
ContextId
]
,
_ne_root
::
Maybe
NgramsTerm
,
_ne_parent
::
Maybe
NgramsTerm
,
_ne_children
::
MSet
NgramsTerm
...
...
@@ -186,7 +187,7 @@ mkNgramsElement :: NgramsTerm
->
MSet
NgramsTerm
->
NgramsElement
mkNgramsElement
ngrams
list
rp
children
=
NgramsElement
ngrams
(
size
(
unNgramsTerm
ngrams
))
list
1
(
_rp_root
<$>
rp
)
(
_rp_parent
<$>
rp
)
children
NgramsElement
ngrams
(
size
(
unNgramsTerm
ngrams
))
list
[]
(
_rp_root
<$>
rp
)
(
_rp_parent
<$>
rp
)
children
newNgramsElement
::
Maybe
ListType
->
NgramsTerm
->
NgramsElement
newNgramsElement
mayList
ngrams
=
...
...
@@ -571,7 +572,7 @@ ngramsElementFromRepo
,
_ne_parent
=
p
,
_ne_children
=
c
,
_ne_ngrams
=
ngrams
,
_ne_occurrences
=
0
-- panic $ "API.Ngrams.Types._ne_occurrences"
,
_ne_occurrences
=
[]
-- panic $ "API.Ngrams.Types._ne_occurrences"
{-
-- Here we could use 0 if we want to avoid any `panic`.
-- It will not happen using getTableNgrams if
...
...
src/Gargantext/Database/Action/Metrics/NgramsByContext.hs
View file @
ba15b251
...
...
@@ -36,6 +36,7 @@ import qualified Data.HashMap.Strict as HM
import
qualified
Data.Map
as
Map
import
qualified
Data.Set
as
Set
import
qualified
Database.PostgreSQL.Simple
as
DPS
import
qualified
Database.PostgreSQL.Simple.Types
as
DPST
-- | fst is size of Supra Corpus
-- snd is Texts and size of Occurrences (different docs)
...
...
@@ -108,15 +109,16 @@ getOccByNgramsOnlyFast_withSample cId int nt ngs =
getOccByNgramsOnlyFast
::
CorpusId
->
ListId
->
NgramsType
->
Cmd
err
(
HashMap
NgramsTerm
Int
)
->
Cmd
err
(
HashMap
NgramsTerm
[
ContextId
]
)
getOccByNgramsOnlyFast
cId
lId
nt
=
do
HM
.
fromList
<$>
map
(
\
(
t
,
n
)
->
(
NgramsTerm
t
,
round
n
))
<$>
run
cId
lId
nt
--HM.fromList <$> map (\(t,n) -> (NgramsTerm t, round n)) <$> run cId lId nt
HM
.
fromList
<$>
map
(
\
(
t
,
ns
)
->
(
NgramsTerm
t
,
NodeId
<$>
DPST
.
fromPGArray
ns
))
<$>
run
cId
lId
nt
where
run
::
CorpusId
->
ListId
->
NgramsType
->
Cmd
err
[(
Text
,
D
ouble
)]
->
Cmd
err
[(
Text
,
D
PST
.
PGArray
Int
)]
run
cId'
lId'
nt'
=
runPGSQuery
query
(
cId'
,
lId'
...
...
@@ -127,7 +129,8 @@ getOccByNgramsOnlyFast cId lId nt = do
query
=
[
sql
|
SELECT ng.terms
-- , ng.id
, round(nng.weight)
--, round(nng.weight)
, ARRAY(SELECT DISTINCT context_node_ngrams.context_id FROM context_node_ngrams WHERE ng.id = context_id) AS context_ids
-- , ns.version
-- , nng.ngrams_type
-- , ns.ngrams_type_id
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment