Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
153
Issues
153
List
Board
Labels
Milestones
Merge Requests
12
Merge Requests
12
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
ba15b251
Commit
ba15b251
authored
Nov 15, 2022
by
Przemyslaw Kaminski
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[ngrams] send array of context ids, instead of occurrences int
parent
ff6d1d32
Pipeline
#3382
canceled with stage
Changes
3
Pipelines
1
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
28 additions
and
21 deletions
+28
-21
Ngrams.hs
src/Gargantext/API/Ngrams.hs
+17
-14
Types.hs
src/Gargantext/API/Ngrams/Types.hs
+4
-3
NgramsByContext.hs
src/Gargantext/Database/Action/Metrics/NgramsByContext.hs
+7
-4
No files found.
src/Gargantext/API/Ngrams.hs
View file @
ba15b251
...
@@ -83,7 +83,7 @@ module Gargantext.API.Ngrams
...
@@ -83,7 +83,7 @@ module Gargantext.API.Ngrams
where
where
import
Control.Concurrent
import
Control.Concurrent
import
Control.Lens
((
.~
),
view
,
(
^.
),
(
^..
),
(
+~
),
(
%~
),
(
.~
),
sumOf
,
at
,
_Just
,
Each
(
..
),
(
%%~
),
mapped
,
ifolded
,
withIndex
,
over
)
import
Control.Lens
((
.~
),
view
,
(
^.
),
(
^..
),
(
+~
),
(
%~
),
(
.~
),
msumOf
,
at
,
_Just
,
Each
(
..
),
(
%%~
),
mapped
,
ifolded
,
to
,
withIndex
,
over
)
import
Control.Monad.Reader
import
Control.Monad.Reader
import
Data.Aeson
hiding
((
.=
))
import
Data.Aeson
hiding
((
.=
))
import
Data.Either
(
Either
(
..
))
import
Data.Either
(
Either
(
..
))
...
@@ -553,8 +553,8 @@ getTableNgrams _nType nId tabType listId limit_ offset
...
@@ -553,8 +553,8 @@ getTableNgrams _nType nId tabType listId limit_ offset
sortOnOrder
Nothing
=
sortOnOrder
(
Just
ScoreDesc
)
sortOnOrder
Nothing
=
sortOnOrder
(
Just
ScoreDesc
)
sortOnOrder
(
Just
TermAsc
)
=
List
.
sortOn
$
view
ne_ngrams
sortOnOrder
(
Just
TermAsc
)
=
List
.
sortOn
$
view
ne_ngrams
sortOnOrder
(
Just
TermDesc
)
=
List
.
sortOn
$
Down
.
view
ne_ngrams
sortOnOrder
(
Just
TermDesc
)
=
List
.
sortOn
$
Down
.
view
ne_ngrams
sortOnOrder
(
Just
ScoreAsc
)
=
List
.
sortOn
$
view
ne_occurrences
sortOnOrder
(
Just
ScoreAsc
)
=
List
.
sortOn
$
view
(
ne_occurrences
.
to
length
)
sortOnOrder
(
Just
ScoreDesc
)
=
List
.
sortOn
$
Down
.
view
ne_occurrences
sortOnOrder
(
Just
ScoreDesc
)
=
List
.
sortOn
$
Down
.
view
(
ne_occurrences
.
to
length
)
---------------------------------------
---------------------------------------
-- | Filter the given `tableMap` with the search criteria.
-- | Filter the given `tableMap` with the search criteria.
...
@@ -584,15 +584,16 @@ getTableNgrams _nType nId tabType listId limit_ offset
...
@@ -584,15 +584,16 @@ getTableNgrams _nType nId tabType listId limit_ offset
let
scoresNeeded
=
needsScores
orderBy
let
scoresNeeded
=
needsScores
orderBy
t1
<-
getTime
t1
<-
getTime
tableMap
2
<-
getNgramsTable'
nId
listId
ngramsType
::
m
(
Versioned
(
Map
NgramsTerm
NgramsElement
))
tableMap
<-
getNgramsTable'
nId
listId
ngramsType
::
m
(
Versioned
(
Map
NgramsTerm
NgramsElement
))
let
fltr
=
tableMap
2
&
v_data
%~
NgramsTable
.
filteredNodes
::
Versioned
NgramsTable
let
fltr
=
tableMap
&
v_data
%~
NgramsTable
.
filteredNodes
::
Versioned
NgramsTable
let
fltrCount
=
length
$
fltr
^.
v_data
.
_NgramsTable
let
fltrCount
=
length
$
fltr
^.
v_data
.
_NgramsTable
t2
<-
getTime
t2
<-
getTime
let
tableMap
3
=
over
(
v_data
.
_NgramsTable
)
((
withInners
(
tableMap2
^.
v_data
))
.
sortAndPaginate
)
fltr
let
tableMap
Sorted
=
over
(
v_data
.
_NgramsTable
)
((
withInners
(
tableMap
^.
v_data
))
.
sortAndPaginate
)
fltr
t3
<-
getTime
t3
<-
getTime
--printDebug "[getTableNgrams] tableMapSorted" tableMapSorted
liftBase
$
do
liftBase
$
do
hprint
stderr
hprint
stderr
(
"getTableNgrams total="
%
hasTime
(
"getTableNgrams total="
%
hasTime
...
@@ -603,8 +604,8 @@ getTableNgrams _nType nId tabType listId limit_ offset
...
@@ -603,8 +604,8 @@ getTableNgrams _nType nId tabType listId limit_ offset
%
"
\n
"
%
"
\n
"
)
t0
t3
t0
t1
t1
t2
t2
t3
)
t0
t3
t0
t1
t1
t2
t2
t3
-- printDebug "[getTableNgrams] tableMap
3" $ show tableMap3
-- printDebug "[getTableNgrams] tableMap
Sorted" $ show tableMapSorted
pure
$
toVersionedWithCount
fltrCount
tableMap
3
pure
$
toVersionedWithCount
fltrCount
tableMap
Sorted
-- | Helper function to get the ngrams table with scores.
-- | Helper function to get the ngrams table with scores.
...
@@ -619,8 +620,8 @@ getNgramsTable' :: forall env err m.
...
@@ -619,8 +620,8 @@ getNgramsTable' :: forall env err m.
->
TableNgrams
.
NgramsType
->
TableNgrams
.
NgramsType
->
m
(
Versioned
(
Map
.
Map
NgramsTerm
NgramsElement
))
->
m
(
Versioned
(
Map
.
Map
NgramsTerm
NgramsElement
))
getNgramsTable'
nId
listId
ngramsType
=
do
getNgramsTable'
nId
listId
ngramsType
=
do
tableMap
1
<-
getNgramsTableMap
listId
ngramsType
tableMap
<-
getNgramsTableMap
listId
ngramsType
tableMap
1
&
v_data
%%~
(
setNgramsTableScores
nId
listId
ngramsType
)
tableMap
&
v_data
%%~
(
setNgramsTableScores
nId
listId
ngramsType
)
.
Map
.
mapWithKey
ngramsElementFromRepo
.
Map
.
mapWithKey
ngramsElementFromRepo
-- | Helper function to set scores on an `NgramsTable`.
-- | Helper function to set scores on an `NgramsTable`.
...
@@ -639,7 +640,7 @@ setNgramsTableScores :: forall env err m t.
...
@@ -639,7 +640,7 @@ setNgramsTableScores :: forall env err m t.
setNgramsTableScores
nId
listId
ngramsType
table
=
do
setNgramsTableScores
nId
listId
ngramsType
table
=
do
t1
<-
getTime
t1
<-
getTime
occurrences
<-
getOccByNgramsOnlyFast
nId
listId
ngramsType
occurrences
<-
getOccByNgramsOnlyFast
nId
listId
ngramsType
printDebug
"[setNgramsTableScores] occurrences"
occurrences
--
printDebug "[setNgramsTableScores] occurrences" occurrences
t2
<-
getTime
t2
<-
getTime
liftBase
$
do
liftBase
$
do
let
ngrams_terms
=
table
^..
each
.
ne_ngrams
let
ngrams_terms
=
table
^..
each
.
ne_ngrams
...
@@ -648,7 +649,9 @@ setNgramsTableScores nId listId ngramsType table = do
...
@@ -648,7 +649,9 @@ setNgramsTableScores nId listId ngramsType table = do
(
"getTableNgrams/setScores #ngrams="
%
int
%
" time="
%
hasTime
%
"
\n
"
)
(
"getTableNgrams/setScores #ngrams="
%
int
%
" time="
%
hasTime
%
"
\n
"
)
(
length
ngrams_terms
)
t1
t2
(
length
ngrams_terms
)
t1
t2
let
let
setOcc
ne
=
ne
&
ne_occurrences
.~
sumOf
(
at
(
ne
^.
ne_ngrams
)
.
_Just
)
occurrences
setOcc
ne
=
ne
&
ne_occurrences
.~
msumOf
(
at
(
ne
^.
ne_ngrams
)
.
_Just
)
occurrences
--printDebug "[setNgramsTableScores] with occurences" $ table & each %~ setOcc
pure
$
table
&
each
%~
setOcc
pure
$
table
&
each
%~
setOcc
...
...
src/Gargantext/API/Ngrams/Types.hs
View file @
ba15b251
...
@@ -34,6 +34,7 @@ import GHC.Generics (Generic)
...
@@ -34,6 +34,7 @@ import GHC.Generics (Generic)
import
Gargantext.Core.Text
(
size
)
import
Gargantext.Core.Text
(
size
)
import
Gargantext.Core.Types
(
ListType
(
..
),
ListId
,
NodeId
,
TODO
)
import
Gargantext.Core.Types
(
ListType
(
..
),
ListId
,
NodeId
,
TODO
)
import
Gargantext.Core.Utils.Prefix
(
unPrefix
,
unPrefixUntagged
,
unPrefixSwagger
,
wellNamedSchema
)
import
Gargantext.Core.Utils.Prefix
(
unPrefix
,
unPrefixUntagged
,
unPrefixSwagger
,
wellNamedSchema
)
import
Gargantext.Database.Admin.Types.Node
(
ContextId
)
import
Gargantext.Database.Prelude
(
fromField'
,
HasConnectionPool
,
HasConfig
,
CmdM
'
)
import
Gargantext.Database.Prelude
(
fromField'
,
HasConnectionPool
,
HasConfig
,
CmdM
'
)
import
Gargantext.Prelude
import
Gargantext.Prelude
import
Gargantext.Prelude.Crypto.Hash
(
IsHashable
(
..
))
import
Gargantext.Prelude.Crypto.Hash
(
IsHashable
(
..
))
...
@@ -170,7 +171,7 @@ data NgramsElement =
...
@@ -170,7 +171,7 @@ data NgramsElement =
NgramsElement
{
_ne_ngrams
::
NgramsTerm
NgramsElement
{
_ne_ngrams
::
NgramsTerm
,
_ne_size
::
Int
,
_ne_size
::
Int
,
_ne_list
::
ListType
,
_ne_list
::
ListType
,
_ne_occurrences
::
Int
,
_ne_occurrences
::
[
ContextId
]
,
_ne_root
::
Maybe
NgramsTerm
,
_ne_root
::
Maybe
NgramsTerm
,
_ne_parent
::
Maybe
NgramsTerm
,
_ne_parent
::
Maybe
NgramsTerm
,
_ne_children
::
MSet
NgramsTerm
,
_ne_children
::
MSet
NgramsTerm
...
@@ -186,7 +187,7 @@ mkNgramsElement :: NgramsTerm
...
@@ -186,7 +187,7 @@ mkNgramsElement :: NgramsTerm
->
MSet
NgramsTerm
->
MSet
NgramsTerm
->
NgramsElement
->
NgramsElement
mkNgramsElement
ngrams
list
rp
children
=
mkNgramsElement
ngrams
list
rp
children
=
NgramsElement
ngrams
(
size
(
unNgramsTerm
ngrams
))
list
1
(
_rp_root
<$>
rp
)
(
_rp_parent
<$>
rp
)
children
NgramsElement
ngrams
(
size
(
unNgramsTerm
ngrams
))
list
[]
(
_rp_root
<$>
rp
)
(
_rp_parent
<$>
rp
)
children
newNgramsElement
::
Maybe
ListType
->
NgramsTerm
->
NgramsElement
newNgramsElement
::
Maybe
ListType
->
NgramsTerm
->
NgramsElement
newNgramsElement
mayList
ngrams
=
newNgramsElement
mayList
ngrams
=
...
@@ -571,7 +572,7 @@ ngramsElementFromRepo
...
@@ -571,7 +572,7 @@ ngramsElementFromRepo
,
_ne_parent
=
p
,
_ne_parent
=
p
,
_ne_children
=
c
,
_ne_children
=
c
,
_ne_ngrams
=
ngrams
,
_ne_ngrams
=
ngrams
,
_ne_occurrences
=
0
-- panic $ "API.Ngrams.Types._ne_occurrences"
,
_ne_occurrences
=
[]
-- panic $ "API.Ngrams.Types._ne_occurrences"
{-
{-
-- Here we could use 0 if we want to avoid any `panic`.
-- Here we could use 0 if we want to avoid any `panic`.
-- It will not happen using getTableNgrams if
-- It will not happen using getTableNgrams if
...
...
src/Gargantext/Database/Action/Metrics/NgramsByContext.hs
View file @
ba15b251
...
@@ -36,6 +36,7 @@ import qualified Data.HashMap.Strict as HM
...
@@ -36,6 +36,7 @@ import qualified Data.HashMap.Strict as HM
import
qualified
Data.Map
as
Map
import
qualified
Data.Map
as
Map
import
qualified
Data.Set
as
Set
import
qualified
Data.Set
as
Set
import
qualified
Database.PostgreSQL.Simple
as
DPS
import
qualified
Database.PostgreSQL.Simple
as
DPS
import
qualified
Database.PostgreSQL.Simple.Types
as
DPST
-- | fst is size of Supra Corpus
-- | fst is size of Supra Corpus
-- snd is Texts and size of Occurrences (different docs)
-- snd is Texts and size of Occurrences (different docs)
...
@@ -108,15 +109,16 @@ getOccByNgramsOnlyFast_withSample cId int nt ngs =
...
@@ -108,15 +109,16 @@ getOccByNgramsOnlyFast_withSample cId int nt ngs =
getOccByNgramsOnlyFast
::
CorpusId
getOccByNgramsOnlyFast
::
CorpusId
->
ListId
->
ListId
->
NgramsType
->
NgramsType
->
Cmd
err
(
HashMap
NgramsTerm
Int
)
->
Cmd
err
(
HashMap
NgramsTerm
[
ContextId
]
)
getOccByNgramsOnlyFast
cId
lId
nt
=
do
getOccByNgramsOnlyFast
cId
lId
nt
=
do
HM
.
fromList
<$>
map
(
\
(
t
,
n
)
->
(
NgramsTerm
t
,
round
n
))
<$>
run
cId
lId
nt
--HM.fromList <$> map (\(t,n) -> (NgramsTerm t, round n)) <$> run cId lId nt
HM
.
fromList
<$>
map
(
\
(
t
,
ns
)
->
(
NgramsTerm
t
,
NodeId
<$>
DPST
.
fromPGArray
ns
))
<$>
run
cId
lId
nt
where
where
run
::
CorpusId
run
::
CorpusId
->
ListId
->
ListId
->
NgramsType
->
NgramsType
->
Cmd
err
[(
Text
,
D
ouble
)]
->
Cmd
err
[(
Text
,
D
PST
.
PGArray
Int
)]
run
cId'
lId'
nt'
=
runPGSQuery
query
run
cId'
lId'
nt'
=
runPGSQuery
query
(
cId'
(
cId'
,
lId'
,
lId'
...
@@ -127,7 +129,8 @@ getOccByNgramsOnlyFast cId lId nt = do
...
@@ -127,7 +129,8 @@ getOccByNgramsOnlyFast cId lId nt = do
query
=
[
sql
|
query
=
[
sql
|
SELECT ng.terms
SELECT ng.terms
-- , ng.id
-- , ng.id
, round(nng.weight)
--, round(nng.weight)
, ARRAY(SELECT DISTINCT context_node_ngrams.context_id FROM context_node_ngrams WHERE ng.id = context_id) AS context_ids
-- , ns.version
-- , ns.version
-- , nng.ngrams_type
-- , nng.ngrams_type
-- , ns.ngrams_type_id
-- , ns.ngrams_type_id
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment