Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
195
Issues
195
List
Board
Labels
Milestones
Merge Requests
12
Merge Requests
12
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
ba15b251
Commit
ba15b251
authored
Nov 15, 2022
by
Przemyslaw Kaminski
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[ngrams] send array of context ids, instead of occurrences int
parent
ff6d1d32
Pipeline
#3382
canceled with stage
Changes
3
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
28 additions
and
21 deletions
+28
-21
Ngrams.hs
src/Gargantext/API/Ngrams.hs
+17
-14
Types.hs
src/Gargantext/API/Ngrams/Types.hs
+4
-3
NgramsByContext.hs
src/Gargantext/Database/Action/Metrics/NgramsByContext.hs
+7
-4
No files found.
src/Gargantext/API/Ngrams.hs
View file @
ba15b251
...
...
@@ -83,7 +83,7 @@ module Gargantext.API.Ngrams
where
import
Control.Concurrent
import
Control.Lens
((
.~
),
view
,
(
^.
),
(
^..
),
(
+~
),
(
%~
),
(
.~
),
sumOf
,
at
,
_Just
,
Each
(
..
),
(
%%~
),
mapped
,
ifolded
,
withIndex
,
over
)
import
Control.Lens
((
.~
),
view
,
(
^.
),
(
^..
),
(
+~
),
(
%~
),
(
.~
),
msumOf
,
at
,
_Just
,
Each
(
..
),
(
%%~
),
mapped
,
ifolded
,
to
,
withIndex
,
over
)
import
Control.Monad.Reader
import
Data.Aeson
hiding
((
.=
))
import
Data.Either
(
Either
(
..
))
...
...
@@ -553,8 +553,8 @@ getTableNgrams _nType nId tabType listId limit_ offset
sortOnOrder
Nothing
=
sortOnOrder
(
Just
ScoreDesc
)
sortOnOrder
(
Just
TermAsc
)
=
List
.
sortOn
$
view
ne_ngrams
sortOnOrder
(
Just
TermDesc
)
=
List
.
sortOn
$
Down
.
view
ne_ngrams
sortOnOrder
(
Just
ScoreAsc
)
=
List
.
sortOn
$
view
ne_occurrences
sortOnOrder
(
Just
ScoreDesc
)
=
List
.
sortOn
$
Down
.
view
ne_occurrences
sortOnOrder
(
Just
ScoreAsc
)
=
List
.
sortOn
$
view
(
ne_occurrences
.
to
length
)
sortOnOrder
(
Just
ScoreDesc
)
=
List
.
sortOn
$
Down
.
view
(
ne_occurrences
.
to
length
)
---------------------------------------
-- | Filter the given `tableMap` with the search criteria.
...
...
@@ -584,15 +584,16 @@ getTableNgrams _nType nId tabType listId limit_ offset
let
scoresNeeded
=
needsScores
orderBy
t1
<-
getTime
tableMap
2
<-
getNgramsTable'
nId
listId
ngramsType
::
m
(
Versioned
(
Map
NgramsTerm
NgramsElement
))
tableMap
<-
getNgramsTable'
nId
listId
ngramsType
::
m
(
Versioned
(
Map
NgramsTerm
NgramsElement
))
let
fltr
=
tableMap
2
&
v_data
%~
NgramsTable
.
filteredNodes
::
Versioned
NgramsTable
let
fltr
=
tableMap
&
v_data
%~
NgramsTable
.
filteredNodes
::
Versioned
NgramsTable
let
fltrCount
=
length
$
fltr
^.
v_data
.
_NgramsTable
t2
<-
getTime
let
tableMap
3
=
over
(
v_data
.
_NgramsTable
)
((
withInners
(
tableMap2
^.
v_data
))
.
sortAndPaginate
)
fltr
let
tableMap
Sorted
=
over
(
v_data
.
_NgramsTable
)
((
withInners
(
tableMap
^.
v_data
))
.
sortAndPaginate
)
fltr
t3
<-
getTime
--printDebug "[getTableNgrams] tableMapSorted" tableMapSorted
liftBase
$
do
hprint
stderr
(
"getTableNgrams total="
%
hasTime
...
...
@@ -603,8 +604,8 @@ getTableNgrams _nType nId tabType listId limit_ offset
%
"
\n
"
)
t0
t3
t0
t1
t1
t2
t2
t3
-- printDebug "[getTableNgrams] tableMap
3" $ show tableMap3
pure
$
toVersionedWithCount
fltrCount
tableMap
3
-- printDebug "[getTableNgrams] tableMap
Sorted" $ show tableMapSorted
pure
$
toVersionedWithCount
fltrCount
tableMap
Sorted
-- | Helper function to get the ngrams table with scores.
...
...
@@ -619,9 +620,9 @@ getNgramsTable' :: forall env err m.
->
TableNgrams
.
NgramsType
->
m
(
Versioned
(
Map
.
Map
NgramsTerm
NgramsElement
))
getNgramsTable'
nId
listId
ngramsType
=
do
tableMap
1
<-
getNgramsTableMap
listId
ngramsType
tableMap
1
&
v_data
%%~
(
setNgramsTableScores
nId
listId
ngramsType
)
.
Map
.
mapWithKey
ngramsElementFromRepo
tableMap
<-
getNgramsTableMap
listId
ngramsType
tableMap
&
v_data
%%~
(
setNgramsTableScores
nId
listId
ngramsType
)
.
Map
.
mapWithKey
ngramsElementFromRepo
-- | Helper function to set scores on an `NgramsTable`.
setNgramsTableScores
::
forall
env
err
m
t
.
...
...
@@ -636,10 +637,10 @@ setNgramsTableScores :: forall env err m t.
->
TableNgrams
.
NgramsType
->
t
->
m
t
setNgramsTableScores
nId
listId
ngramsType
table
=
do
setNgramsTableScores
nId
listId
ngramsType
table
=
do
t1
<-
getTime
occurrences
<-
getOccByNgramsOnlyFast
nId
listId
ngramsType
printDebug
"[setNgramsTableScores] occurrences"
occurrences
--
printDebug "[setNgramsTableScores] occurrences" occurrences
t2
<-
getTime
liftBase
$
do
let
ngrams_terms
=
table
^..
each
.
ne_ngrams
...
...
@@ -648,7 +649,9 @@ setNgramsTableScores nId listId ngramsType table = do
(
"getTableNgrams/setScores #ngrams="
%
int
%
" time="
%
hasTime
%
"
\n
"
)
(
length
ngrams_terms
)
t1
t2
let
setOcc
ne
=
ne
&
ne_occurrences
.~
sumOf
(
at
(
ne
^.
ne_ngrams
)
.
_Just
)
occurrences
setOcc
ne
=
ne
&
ne_occurrences
.~
msumOf
(
at
(
ne
^.
ne_ngrams
)
.
_Just
)
occurrences
--printDebug "[setNgramsTableScores] with occurences" $ table & each %~ setOcc
pure
$
table
&
each
%~
setOcc
...
...
src/Gargantext/API/Ngrams/Types.hs
View file @
ba15b251
...
...
@@ -34,6 +34,7 @@ import GHC.Generics (Generic)
import
Gargantext.Core.Text
(
size
)
import
Gargantext.Core.Types
(
ListType
(
..
),
ListId
,
NodeId
,
TODO
)
import
Gargantext.Core.Utils.Prefix
(
unPrefix
,
unPrefixUntagged
,
unPrefixSwagger
,
wellNamedSchema
)
import
Gargantext.Database.Admin.Types.Node
(
ContextId
)
import
Gargantext.Database.Prelude
(
fromField'
,
HasConnectionPool
,
HasConfig
,
CmdM
'
)
import
Gargantext.Prelude
import
Gargantext.Prelude.Crypto.Hash
(
IsHashable
(
..
))
...
...
@@ -170,7 +171,7 @@ data NgramsElement =
NgramsElement
{
_ne_ngrams
::
NgramsTerm
,
_ne_size
::
Int
,
_ne_list
::
ListType
,
_ne_occurrences
::
Int
,
_ne_occurrences
::
[
ContextId
]
,
_ne_root
::
Maybe
NgramsTerm
,
_ne_parent
::
Maybe
NgramsTerm
,
_ne_children
::
MSet
NgramsTerm
...
...
@@ -186,7 +187,7 @@ mkNgramsElement :: NgramsTerm
->
MSet
NgramsTerm
->
NgramsElement
mkNgramsElement
ngrams
list
rp
children
=
NgramsElement
ngrams
(
size
(
unNgramsTerm
ngrams
))
list
1
(
_rp_root
<$>
rp
)
(
_rp_parent
<$>
rp
)
children
NgramsElement
ngrams
(
size
(
unNgramsTerm
ngrams
))
list
[]
(
_rp_root
<$>
rp
)
(
_rp_parent
<$>
rp
)
children
newNgramsElement
::
Maybe
ListType
->
NgramsTerm
->
NgramsElement
newNgramsElement
mayList
ngrams
=
...
...
@@ -571,7 +572,7 @@ ngramsElementFromRepo
,
_ne_parent
=
p
,
_ne_children
=
c
,
_ne_ngrams
=
ngrams
,
_ne_occurrences
=
0
-- panic $ "API.Ngrams.Types._ne_occurrences"
,
_ne_occurrences
=
[]
-- panic $ "API.Ngrams.Types._ne_occurrences"
{-
-- Here we could use 0 if we want to avoid any `panic`.
-- It will not happen using getTableNgrams if
...
...
src/Gargantext/Database/Action/Metrics/NgramsByContext.hs
View file @
ba15b251
...
...
@@ -36,6 +36,7 @@ import qualified Data.HashMap.Strict as HM
import
qualified
Data.Map
as
Map
import
qualified
Data.Set
as
Set
import
qualified
Database.PostgreSQL.Simple
as
DPS
import
qualified
Database.PostgreSQL.Simple.Types
as
DPST
-- | fst is size of Supra Corpus
-- snd is Texts and size of Occurrences (different docs)
...
...
@@ -108,15 +109,16 @@ getOccByNgramsOnlyFast_withSample cId int nt ngs =
getOccByNgramsOnlyFast
::
CorpusId
->
ListId
->
NgramsType
->
Cmd
err
(
HashMap
NgramsTerm
Int
)
->
Cmd
err
(
HashMap
NgramsTerm
[
ContextId
]
)
getOccByNgramsOnlyFast
cId
lId
nt
=
do
HM
.
fromList
<$>
map
(
\
(
t
,
n
)
->
(
NgramsTerm
t
,
round
n
))
<$>
run
cId
lId
nt
--HM.fromList <$> map (\(t,n) -> (NgramsTerm t, round n)) <$> run cId lId nt
HM
.
fromList
<$>
map
(
\
(
t
,
ns
)
->
(
NgramsTerm
t
,
NodeId
<$>
DPST
.
fromPGArray
ns
))
<$>
run
cId
lId
nt
where
run
::
CorpusId
->
ListId
->
NgramsType
->
Cmd
err
[(
Text
,
D
ouble
)]
->
Cmd
err
[(
Text
,
D
PST
.
PGArray
Int
)]
run
cId'
lId'
nt'
=
runPGSQuery
query
(
cId'
,
lId'
...
...
@@ -127,7 +129,8 @@ getOccByNgramsOnlyFast cId lId nt = do
query
=
[
sql
|
SELECT ng.terms
-- , ng.id
, round(nng.weight)
--, round(nng.weight)
, ARRAY(SELECT DISTINCT context_node_ngrams.context_id FROM context_node_ngrams WHERE ng.id = context_id) AS context_ids
-- , ns.version
-- , nng.ngrams_type
-- , ns.ngrams_type_id
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment