Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
153
Issues
153
List
Board
Labels
Milestones
Merge Requests
9
Merge Requests
9
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
e4e7ee07
Commit
e4e7ee07
authored
Jan 31, 2022
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[FIX] Scores in ngrams table
parent
c7d64791
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
99 additions
and
23 deletions
+99
-23
Update.hs
src/Gargantext/API/Node/Update.hs
+5
-1
Flow.hs
src/Gargantext/Database/Action/Flow.hs
+3
-0
Metrics.hs
src/Gargantext/Database/Action/Metrics.hs
+78
-22
Ngrams.hs
src/Gargantext/Database/Schema/Ngrams.hs
+13
-0
No files found.
src/Gargantext/API/Node/Update.hs
View file @
e4e7ee07
...
...
@@ -30,6 +30,7 @@ import Gargantext.API.Prelude (GargServer, simuLogs)
import
Gargantext.Core.Methods.Distances
(
GraphMetric
(
..
))
import
Gargantext.Core.Types.Main
(
ListType
(
..
))
import
Gargantext.Core.Viz.Graph.API
(
recomputeGraph
)
import
Gargantext.Database.Action.Metrics
(
updateNgramsOccurrences
)
import
Gargantext.Database.Action.Flow.Pairing
(
pairing
)
import
Gargantext.Database.Action.Flow.Types
(
FlowCmdM
)
import
Gargantext.Database.Admin.Types.Node
...
...
@@ -165,7 +166,10 @@ updateNode _uId lId (UpdateNodeParamsList _mode) logStatus = do
}
_
<-
case
corpusId
of
Just
cId
->
reIndexWith
cId
lId
NgramsTerms
(
Set
.
singleton
MapTerm
)
Just
cId
->
do
_
<-
reIndexWith
cId
lId
NgramsTerms
(
Set
.
singleton
MapTerm
)
_
<-
updateNgramsOccurrences
cId
(
Just
lId
)
pure
()
Nothing
->
pure
()
pure
JobLog
{
_scst_succeeded
=
Just
3
...
...
src/Gargantext/Database/Action/Flow.hs
View file @
e4e7ee07
...
...
@@ -86,6 +86,7 @@ import Gargantext.Database.Action.Flow.Types
import
Gargantext.Database.Action.Flow.Utils
(
insertDocNgrams
,
DocumentIdWithNgrams
(
..
))
import
Gargantext.Database.Action.Search
(
searchDocInDatabase
)
import
Gargantext.Database.Admin.Config
(
userMaster
,
corpusMasterName
)
import
Gargantext.Database.Action.Metrics
(
updateNgramsOccurrences
)
import
Gargantext.Database.Admin.Types.Hyperdata
import
Gargantext.Database.Admin.Types.Node
-- (HyperdataDocument(..), NodeType(..), NodeId, UserId, ListId, CorpusId, RootId, MasterCorpusId, MasterUserId)
import
Gargantext.Database.Prelude
...
...
@@ -280,6 +281,8 @@ flowCorpusUser l user corpusName ctype ids mfslw = do
--
_
<-
mkPhylo
userCorpusId
userId
-- Annuaire Flow
-- _ <- mkAnnuaire rootUserId userId
_
<-
updateNgramsOccurrences
userCorpusId
(
Just
listId
)
pure
userCorpusId
...
...
src/Gargantext/Database/Action/Metrics.hs
View file @
e4e7ee07
...
...
@@ -10,18 +10,26 @@ Portability : POSIX
Node API
-}
{-# LANGUAGE QuasiQuotes #-}
module
Gargantext.Database.Action.Metrics
where
import
Database.PostgreSQL.Simple.SqlQQ
(
sql
)
import
Data.HashMap.Strict
(
HashMap
)
import
Data.Map
(
Map
)
import
Data.Set
(
Set
)
import
Database.PostgreSQL.Simple
(
Query
,
Only
(
..
))
import
Database.PostgreSQL.Simple.Types
(
Values
(
..
),
QualifiedIdentifier
(
..
))
import
Data.Vector
(
Vector
)
import
Gargantext.Core
(
HasDBid
(
toDBid
))
import
Gargantext.API.Ngrams.Tools
(
filterListWithRoot
,
groupNodesByNgrams
,
Diagonal
(
..
),
getCoocByNgrams
,
mapTermListRoot
,
RootTerm
,
getRepo'
)
import
Gargantext.API.Ngrams.Types
(
TabType
(
..
),
ngramsTypeFromTabType
,
NgramsTerm
)
import
Gargantext.Database.Prelude
(
runPGSQuery
{-, formatPGSQuery-}
)
import
Gargantext.API.Ngrams.Types
(
TabType
(
..
),
ngramsTypeFromTabType
,
NgramsTerm
(
..
))
import
Gargantext.Core.Mail.Types
(
HasMail
)
import
Gargantext.Core.NodeStory
import
Gargantext.Core.Text.Metrics
(
scored
,
Scored
(
..
),
{-localMetrics, toScored-}
)
import
Database.PostgreSQL.Simple.ToField
(
toField
,
Action
{-, ToField-}
)
import
Gargantext.Core.Types
(
ListType
(
..
),
Limit
,
NodeType
(
..
),
ContextId
)
import
Gargantext.Database.Action.Flow.Types
(
FlowCmdM
)
import
Gargantext.Database.Action.Metrics.NgramsByContext
(
getContextsByNgramsOnlyUser
{-, getTficfWith-}
)
...
...
@@ -34,6 +42,7 @@ import qualified Data.HashMap.Strict as HM
import
qualified
Data.Map
as
Map
import
qualified
Data.Set
as
Set
import
qualified
Data.List
as
List
import
qualified
Data.Text
as
Text
getMetrics
::
FlowCmdM
env
err
m
=>
CorpusId
->
Maybe
ListId
->
TabType
->
Maybe
Limit
...
...
@@ -51,9 +60,13 @@ getNgramsCooc :: (FlowCmdM env err m)
,
HashMap
(
NgramsTerm
,
NgramsTerm
)
Int
)
getNgramsCooc
cId
maybeListId
tabType
maybeLimit
=
do
(
ngs'
,
ngs
)
<-
getNgrams
cId
maybeListId
tabType
lId
<-
defaultList
cId
lId
<-
case
maybeListId
of
Nothing
->
defaultList
cId
Just
lId'
->
pure
lId'
(
ngs'
,
ngs
)
<-
getNgrams
lId
tabType
lIds
<-
selectNodesWithUsername
NodeList
userMaster
myCooc
<-
HM
.
filter
(
>
1
)
<$>
getCoocByNgrams
(
Diagonal
True
)
...
...
@@ -64,21 +77,69 @@ getNgramsCooc cId maybeListId tabType maybeLimit = do
(
take'
maybeLimit
$
HM
.
keys
ngs
)
pure
$
(
ngs'
,
ngs
,
myCooc
)
------------------------------------------------------------------------
------------------------------------------------------------------------
updateNgramsOccurrences
::
(
FlowCmdM
env
err
m
)
=>
CorpusId
->
Maybe
ListId
->
m
()
updateNgramsOccurrences
cId
mlId
=
do
_
<-
mapM
(
updateNgramsOccurrences'
cId
mlId
Nothing
)
[
Terms
,
Sources
,
Authors
,
Institutes
]
pure
()
updateNgramsOccurrences'
::
(
FlowCmdM
env
err
m
)
=>
CorpusId
->
Maybe
ListId
->
Maybe
Limit
->
TabType
->
m
[
Int
]
updateNgramsOccurrences'
cId
maybeListId
maybeLimit
tabType
=
do
lId
<-
case
maybeListId
of
Nothing
->
defaultList
cId
Just
lId'
->
pure
lId'
result
<-
getNgramsOccurrences
cId
lId
tabType
maybeLimit
let
toInsert
::
[[
Action
]]
toInsert
=
map
(
\
(
ngramsTerm
,
score
)
->
[
toField
cId
,
toField
lId
,
toField
$
unNgramsTerm
ngramsTerm
,
toField
$
toDBid
$
ngramsTypeFromTabType
tabType
,
toField
score
]
)
$
HM
.
toList
result
queryInsert
::
Query
queryInsert
=
[
sql
|
WITH input(corpus_id, list_id, terms, type_id, weight) AS (?)
INSERT into node_node_ngrams (node1_id, node2_id, ngrams_id, ngrams_type, weight)
SELECT input.corpus_id,input.list_id,ngrams.id,input.type_id,input.weight FROM input
JOIN ngrams on ngrams.terms = input.terms
ON CONFLICT (node1_id, node2_id, ngrams_id, ngrams_type)
DO UPDATE SET weight = excluded.weight
RETURNING 1
|]
let
fields
=
map
(
\
t
->
QualifiedIdentifier
Nothing
t
)
$
map
Text
.
pack
[
"int4"
,
"int4"
,
"text"
,
"int4"
,
"int4"
]
map
(
\
(
Only
a
)
->
a
)
<$>
runPGSQuery
queryInsert
(
Only
$
Values
fields
toInsert
)
------------------------------------------------------------------------
-- Used for scores in Ngrams Table
getNgramsOccurrences
::
(
FlowCmdM
env
err
m
)
=>
CorpusId
->
Maybe
ListId
->
TabType
->
Maybe
Limit
=>
CorpusId
->
ListId
->
TabType
->
Maybe
Limit
->
m
(
HashMap
NgramsTerm
Int
)
getNgramsOccurrences
c
l
t
ml
=
HM
.
map
Set
.
size
<$>
getNgramsContexts
c
l
t
ml
getNgramsContexts
::
(
FlowCmdM
env
err
m
)
=>
CorpusId
->
Maybe
ListId
->
TabType
->
Maybe
Limit
=>
CorpusId
->
ListId
->
TabType
->
Maybe
Limit
->
m
(
HashMap
NgramsTerm
(
Set
ContextId
))
getNgramsContexts
cId
maybeListId
tabType
maybeLimit
=
do
(
_ngs'
,
ngs
)
<-
getNgrams
cId
maybeListId
tabType
lId
<-
defaultList
cId
getNgramsContexts
cId
lId
tabType
maybeLimit
=
do
(
_ngs'
,
ngs
)
<-
getNgrams
lId
tabType
lIds
<-
selectNodesWithUsername
NodeList
userMaster
-- TODO maybe add an option to group here
...
...
@@ -91,17 +152,16 @@ getNgramsContexts cId maybeListId tabType maybeLimit = do
-- Used for scores in Doc Table
getContextsNgramsScore
::
(
FlowCmdM
env
err
m
)
=>
CorpusId
->
Maybe
ListId
->
TabType
->
ListType
->
Maybe
Limit
=>
CorpusId
->
ListId
->
TabType
->
ListType
->
Maybe
Limit
->
m
(
Map
ContextId
Int
)
getContextsNgramsScore
cId
maybeListId
tabType
listType
maybeLimit
=
Map
.
map
Set
.
size
<$>
getContextsNgrams
cId
maybeList
Id
tabType
listType
maybeLimit
getContextsNgramsScore
cId
lId
tabType
listType
maybeLimit
=
Map
.
map
Set
.
size
<$>
getContextsNgrams
cId
l
Id
tabType
listType
maybeLimit
getContextsNgrams
::
(
FlowCmdM
env
err
m
)
=>
CorpusId
->
Maybe
ListId
->
TabType
->
ListType
->
Maybe
Limit
=>
CorpusId
->
ListId
->
TabType
->
ListType
->
Maybe
Limit
->
m
(
Map
ContextId
(
Set
NgramsTerm
))
getContextsNgrams
cId
maybeListId
tabType
listType
maybeLimit
=
do
(
ngs'
,
ngs
)
<-
getNgrams
cId
maybeListId
tabType
lId
<-
defaultList
cId
getContextsNgrams
cId
lId
tabType
listType
maybeLimit
=
do
(
ngs'
,
ngs
)
<-
getNgrams
lId
tabType
lIds
<-
selectNodesWithUsername
NodeList
userMaster
result
<-
groupNodesByNgrams
ngs
<$>
getContextsByNgramsOnlyUser
...
...
@@ -121,15 +181,11 @@ getContextsNgrams cId maybeListId tabType listType maybeLimit = do
getNgrams
::
(
HasMail
env
,
HasNodeStory
env
err
m
)
=>
CorpusId
->
Maybe
ListId
->
TabType
=>
ListId
->
TabType
->
m
(
HashMap
NgramsTerm
(
ListType
,
Maybe
NgramsTerm
)
,
HashMap
NgramsTerm
(
Maybe
RootTerm
)
)
getNgrams
cId
maybeListId
tabType
=
do
lId
<-
case
maybeListId
of
Nothing
->
defaultList
cId
Just
lId'
->
pure
lId'
getNgrams
lId
tabType
=
do
lists
<-
mapTermListRoot
[
lId
]
(
ngramsTypeFromTabType
tabType
)
<$>
getRepo'
[
lId
]
let
maybeSyn
=
HM
.
unions
$
map
(
\
t
->
filterListWithRoot
t
lists
)
...
...
src/Gargantext/Database/Schema/Ngrams.hs
View file @
e4e7ee07
...
...
@@ -19,6 +19,7 @@ Ngrams connection to the Database.
module
Gargantext.Database.Schema.Ngrams
where
import
Data.Maybe
(
fromMaybe
)
import
Data.HashMap.Strict
(
HashMap
)
import
Data.Hashable
(
Hashable
)
import
Codec.Serialise
(
Serialise
())
...
...
@@ -32,6 +33,7 @@ import Gargantext.Core.Types (TODO(..), Typed(..))
import
Gargantext.Prelude
import
Servant
(
FromHttpApiData
(
..
),
Proxy
(
..
),
ToHttpApiData
(
..
))
import
Text.Read
(
read
)
import
Gargantext.Core
(
HasDBid
(
..
))
import
Gargantext.Database.Types
import
Gargantext.Database.Schema.Prelude
import
qualified
Database.PostgreSQL.Simple
as
PGS
...
...
@@ -82,6 +84,7 @@ data NgramsType = Authors | Institutes | Sources | NgramsTerms
instance
Serialise
NgramsType
ngramsTypes
::
[
NgramsType
]
ngramsTypes
=
[
minBound
..
]
...
...
@@ -141,6 +144,16 @@ fromNgramsTypeId id = lookup id
|
nt
<-
[
minBound
..
maxBound
]
::
[
NgramsType
]
]
unNgramsTypeId
::
NgramsTypeId
->
Int
unNgramsTypeId
(
NgramsTypeId
i
)
=
i
toNgramsTypeId
::
Int
->
NgramsTypeId
toNgramsTypeId
i
=
NgramsTypeId
i
instance
HasDBid
NgramsType
where
toDBid
=
unNgramsTypeId
.
ngramsTypeId
fromDBid
=
fromMaybe
(
panic
"NgramsType id not indexed"
)
.
fromNgramsTypeId
.
toNgramsTypeId
------------------------------------------------------------------------
------------------------------------------------------------------------
-- | TODO put it in Gargantext.Core.Text.Ngrams
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment