Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
haskell-gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
153
Issues
153
List
Board
Labels
Milestones
Merge Requests
10
Merge Requests
10
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
gargantext
haskell-gargantext
Commits
e4e7ee07
Commit
e4e7ee07
authored
Jan 31, 2022
by
Alexandre Delanoë
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[FIX] Scores in ngrams table
parent
c7d64791
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
99 additions
and
23 deletions
+99
-23
Update.hs
src/Gargantext/API/Node/Update.hs
+5
-1
Flow.hs
src/Gargantext/Database/Action/Flow.hs
+3
-0
Metrics.hs
src/Gargantext/Database/Action/Metrics.hs
+78
-22
Ngrams.hs
src/Gargantext/Database/Schema/Ngrams.hs
+13
-0
No files found.
src/Gargantext/API/Node/Update.hs
View file @
e4e7ee07
...
...
@@ -30,6 +30,7 @@ import Gargantext.API.Prelude (GargServer, simuLogs)
import
Gargantext.Core.Methods.Distances
(
GraphMetric
(
..
))
import
Gargantext.Core.Types.Main
(
ListType
(
..
))
import
Gargantext.Core.Viz.Graph.API
(
recomputeGraph
)
import
Gargantext.Database.Action.Metrics
(
updateNgramsOccurrences
)
import
Gargantext.Database.Action.Flow.Pairing
(
pairing
)
import
Gargantext.Database.Action.Flow.Types
(
FlowCmdM
)
import
Gargantext.Database.Admin.Types.Node
...
...
@@ -165,7 +166,10 @@ updateNode _uId lId (UpdateNodeParamsList _mode) logStatus = do
}
_
<-
case
corpusId
of
Just
cId
->
reIndexWith
cId
lId
NgramsTerms
(
Set
.
singleton
MapTerm
)
Just
cId
->
do
_
<-
reIndexWith
cId
lId
NgramsTerms
(
Set
.
singleton
MapTerm
)
_
<-
updateNgramsOccurrences
cId
(
Just
lId
)
pure
()
Nothing
->
pure
()
pure
JobLog
{
_scst_succeeded
=
Just
3
...
...
src/Gargantext/Database/Action/Flow.hs
View file @
e4e7ee07
...
...
@@ -86,6 +86,7 @@ import Gargantext.Database.Action.Flow.Types
import
Gargantext.Database.Action.Flow.Utils
(
insertDocNgrams
,
DocumentIdWithNgrams
(
..
))
import
Gargantext.Database.Action.Search
(
searchDocInDatabase
)
import
Gargantext.Database.Admin.Config
(
userMaster
,
corpusMasterName
)
import
Gargantext.Database.Action.Metrics
(
updateNgramsOccurrences
)
import
Gargantext.Database.Admin.Types.Hyperdata
import
Gargantext.Database.Admin.Types.Node
-- (HyperdataDocument(..), NodeType(..), NodeId, UserId, ListId, CorpusId, RootId, MasterCorpusId, MasterUserId)
import
Gargantext.Database.Prelude
...
...
@@ -280,6 +281,8 @@ flowCorpusUser l user corpusName ctype ids mfslw = do
--
_
<-
mkPhylo
userCorpusId
userId
-- Annuaire Flow
-- _ <- mkAnnuaire rootUserId userId
_
<-
updateNgramsOccurrences
userCorpusId
(
Just
listId
)
pure
userCorpusId
...
...
src/Gargantext/Database/Action/Metrics.hs
View file @
e4e7ee07
...
...
@@ -10,18 +10,26 @@ Portability : POSIX
Node API
-}
{-# LANGUAGE QuasiQuotes #-}
module
Gargantext.Database.Action.Metrics
where
import
Database.PostgreSQL.Simple.SqlQQ
(
sql
)
import
Data.HashMap.Strict
(
HashMap
)
import
Data.Map
(
Map
)
import
Data.Set
(
Set
)
import
Database.PostgreSQL.Simple
(
Query
,
Only
(
..
))
import
Database.PostgreSQL.Simple.Types
(
Values
(
..
),
QualifiedIdentifier
(
..
))
import
Data.Vector
(
Vector
)
import
Gargantext.Core
(
HasDBid
(
toDBid
))
import
Gargantext.API.Ngrams.Tools
(
filterListWithRoot
,
groupNodesByNgrams
,
Diagonal
(
..
),
getCoocByNgrams
,
mapTermListRoot
,
RootTerm
,
getRepo'
)
import
Gargantext.API.Ngrams.Types
(
TabType
(
..
),
ngramsTypeFromTabType
,
NgramsTerm
)
import
Gargantext.Database.Prelude
(
runPGSQuery
{-, formatPGSQuery-}
)
import
Gargantext.API.Ngrams.Types
(
TabType
(
..
),
ngramsTypeFromTabType
,
NgramsTerm
(
..
))
import
Gargantext.Core.Mail.Types
(
HasMail
)
import
Gargantext.Core.NodeStory
import
Gargantext.Core.Text.Metrics
(
scored
,
Scored
(
..
),
{-localMetrics, toScored-}
)
import
Database.PostgreSQL.Simple.ToField
(
toField
,
Action
{-, ToField-}
)
import
Gargantext.Core.Types
(
ListType
(
..
),
Limit
,
NodeType
(
..
),
ContextId
)
import
Gargantext.Database.Action.Flow.Types
(
FlowCmdM
)
import
Gargantext.Database.Action.Metrics.NgramsByContext
(
getContextsByNgramsOnlyUser
{-, getTficfWith-}
)
...
...
@@ -34,6 +42,7 @@ import qualified Data.HashMap.Strict as HM
import
qualified
Data.Map
as
Map
import
qualified
Data.Set
as
Set
import
qualified
Data.List
as
List
import
qualified
Data.Text
as
Text
getMetrics
::
FlowCmdM
env
err
m
=>
CorpusId
->
Maybe
ListId
->
TabType
->
Maybe
Limit
...
...
@@ -51,9 +60,13 @@ getNgramsCooc :: (FlowCmdM env err m)
,
HashMap
(
NgramsTerm
,
NgramsTerm
)
Int
)
getNgramsCooc
cId
maybeListId
tabType
maybeLimit
=
do
(
ngs'
,
ngs
)
<-
getNgrams
cId
maybeListId
tabType
lId
<-
defaultList
cId
lId
<-
case
maybeListId
of
Nothing
->
defaultList
cId
Just
lId'
->
pure
lId'
(
ngs'
,
ngs
)
<-
getNgrams
lId
tabType
lIds
<-
selectNodesWithUsername
NodeList
userMaster
myCooc
<-
HM
.
filter
(
>
1
)
<$>
getCoocByNgrams
(
Diagonal
True
)
...
...
@@ -64,21 +77,69 @@ getNgramsCooc cId maybeListId tabType maybeLimit = do
(
take'
maybeLimit
$
HM
.
keys
ngs
)
pure
$
(
ngs'
,
ngs
,
myCooc
)
------------------------------------------------------------------------
------------------------------------------------------------------------
updateNgramsOccurrences
::
(
FlowCmdM
env
err
m
)
=>
CorpusId
->
Maybe
ListId
->
m
()
updateNgramsOccurrences
cId
mlId
=
do
_
<-
mapM
(
updateNgramsOccurrences'
cId
mlId
Nothing
)
[
Terms
,
Sources
,
Authors
,
Institutes
]
pure
()
updateNgramsOccurrences'
::
(
FlowCmdM
env
err
m
)
=>
CorpusId
->
Maybe
ListId
->
Maybe
Limit
->
TabType
->
m
[
Int
]
updateNgramsOccurrences'
cId
maybeListId
maybeLimit
tabType
=
do
lId
<-
case
maybeListId
of
Nothing
->
defaultList
cId
Just
lId'
->
pure
lId'
result
<-
getNgramsOccurrences
cId
lId
tabType
maybeLimit
let
toInsert
::
[[
Action
]]
toInsert
=
map
(
\
(
ngramsTerm
,
score
)
->
[
toField
cId
,
toField
lId
,
toField
$
unNgramsTerm
ngramsTerm
,
toField
$
toDBid
$
ngramsTypeFromTabType
tabType
,
toField
score
]
)
$
HM
.
toList
result
queryInsert
::
Query
queryInsert
=
[
sql
|
WITH input(corpus_id, list_id, terms, type_id, weight) AS (?)
INSERT into node_node_ngrams (node1_id, node2_id, ngrams_id, ngrams_type, weight)
SELECT input.corpus_id,input.list_id,ngrams.id,input.type_id,input.weight FROM input
JOIN ngrams on ngrams.terms = input.terms
ON CONFLICT (node1_id, node2_id, ngrams_id, ngrams_type)
DO UPDATE SET weight = excluded.weight
RETURNING 1
|]
let
fields
=
map
(
\
t
->
QualifiedIdentifier
Nothing
t
)
$
map
Text
.
pack
[
"int4"
,
"int4"
,
"text"
,
"int4"
,
"int4"
]
map
(
\
(
Only
a
)
->
a
)
<$>
runPGSQuery
queryInsert
(
Only
$
Values
fields
toInsert
)
------------------------------------------------------------------------
-- Used for scores in Ngrams Table
getNgramsOccurrences
::
(
FlowCmdM
env
err
m
)
=>
CorpusId
->
Maybe
ListId
->
TabType
->
Maybe
Limit
=>
CorpusId
->
ListId
->
TabType
->
Maybe
Limit
->
m
(
HashMap
NgramsTerm
Int
)
getNgramsOccurrences
c
l
t
ml
=
HM
.
map
Set
.
size
<$>
getNgramsContexts
c
l
t
ml
getNgramsContexts
::
(
FlowCmdM
env
err
m
)
=>
CorpusId
->
Maybe
ListId
->
TabType
->
Maybe
Limit
=>
CorpusId
->
ListId
->
TabType
->
Maybe
Limit
->
m
(
HashMap
NgramsTerm
(
Set
ContextId
))
getNgramsContexts
cId
maybeListId
tabType
maybeLimit
=
do
(
_ngs'
,
ngs
)
<-
getNgrams
cId
maybeListId
tabType
lId
<-
defaultList
cId
getNgramsContexts
cId
lId
tabType
maybeLimit
=
do
(
_ngs'
,
ngs
)
<-
getNgrams
lId
tabType
lIds
<-
selectNodesWithUsername
NodeList
userMaster
-- TODO maybe add an option to group here
...
...
@@ -91,17 +152,16 @@ getNgramsContexts cId maybeListId tabType maybeLimit = do
-- Used for scores in Doc Table
getContextsNgramsScore
::
(
FlowCmdM
env
err
m
)
=>
CorpusId
->
Maybe
ListId
->
TabType
->
ListType
->
Maybe
Limit
=>
CorpusId
->
ListId
->
TabType
->
ListType
->
Maybe
Limit
->
m
(
Map
ContextId
Int
)
getContextsNgramsScore
cId
maybeListId
tabType
listType
maybeLimit
=
Map
.
map
Set
.
size
<$>
getContextsNgrams
cId
maybeList
Id
tabType
listType
maybeLimit
getContextsNgramsScore
cId
lId
tabType
listType
maybeLimit
=
Map
.
map
Set
.
size
<$>
getContextsNgrams
cId
l
Id
tabType
listType
maybeLimit
getContextsNgrams
::
(
FlowCmdM
env
err
m
)
=>
CorpusId
->
Maybe
ListId
->
TabType
->
ListType
->
Maybe
Limit
=>
CorpusId
->
ListId
->
TabType
->
ListType
->
Maybe
Limit
->
m
(
Map
ContextId
(
Set
NgramsTerm
))
getContextsNgrams
cId
maybeListId
tabType
listType
maybeLimit
=
do
(
ngs'
,
ngs
)
<-
getNgrams
cId
maybeListId
tabType
lId
<-
defaultList
cId
getContextsNgrams
cId
lId
tabType
listType
maybeLimit
=
do
(
ngs'
,
ngs
)
<-
getNgrams
lId
tabType
lIds
<-
selectNodesWithUsername
NodeList
userMaster
result
<-
groupNodesByNgrams
ngs
<$>
getContextsByNgramsOnlyUser
...
...
@@ -121,15 +181,11 @@ getContextsNgrams cId maybeListId tabType listType maybeLimit = do
getNgrams
::
(
HasMail
env
,
HasNodeStory
env
err
m
)
=>
CorpusId
->
Maybe
ListId
->
TabType
=>
ListId
->
TabType
->
m
(
HashMap
NgramsTerm
(
ListType
,
Maybe
NgramsTerm
)
,
HashMap
NgramsTerm
(
Maybe
RootTerm
)
)
getNgrams
cId
maybeListId
tabType
=
do
lId
<-
case
maybeListId
of
Nothing
->
defaultList
cId
Just
lId'
->
pure
lId'
getNgrams
lId
tabType
=
do
lists
<-
mapTermListRoot
[
lId
]
(
ngramsTypeFromTabType
tabType
)
<$>
getRepo'
[
lId
]
let
maybeSyn
=
HM
.
unions
$
map
(
\
t
->
filterListWithRoot
t
lists
)
...
...
src/Gargantext/Database/Schema/Ngrams.hs
View file @
e4e7ee07
...
...
@@ -19,6 +19,7 @@ Ngrams connection to the Database.
module
Gargantext.Database.Schema.Ngrams
where
import
Data.Maybe
(
fromMaybe
)
import
Data.HashMap.Strict
(
HashMap
)
import
Data.Hashable
(
Hashable
)
import
Codec.Serialise
(
Serialise
())
...
...
@@ -32,6 +33,7 @@ import Gargantext.Core.Types (TODO(..), Typed(..))
import
Gargantext.Prelude
import
Servant
(
FromHttpApiData
(
..
),
Proxy
(
..
),
ToHttpApiData
(
..
))
import
Text.Read
(
read
)
import
Gargantext.Core
(
HasDBid
(
..
))
import
Gargantext.Database.Types
import
Gargantext.Database.Schema.Prelude
import
qualified
Database.PostgreSQL.Simple
as
PGS
...
...
@@ -82,6 +84,7 @@ data NgramsType = Authors | Institutes | Sources | NgramsTerms
instance
Serialise
NgramsType
ngramsTypes
::
[
NgramsType
]
ngramsTypes
=
[
minBound
..
]
...
...
@@ -141,6 +144,16 @@ fromNgramsTypeId id = lookup id
|
nt
<-
[
minBound
..
maxBound
]
::
[
NgramsType
]
]
unNgramsTypeId
::
NgramsTypeId
->
Int
unNgramsTypeId
(
NgramsTypeId
i
)
=
i
toNgramsTypeId
::
Int
->
NgramsTypeId
toNgramsTypeId
i
=
NgramsTypeId
i
instance
HasDBid
NgramsType
where
toDBid
=
unNgramsTypeId
.
ngramsTypeId
fromDBid
=
fromMaybe
(
panic
"NgramsType id not indexed"
)
.
fromNgramsTypeId
.
toNgramsTypeId
------------------------------------------------------------------------
------------------------------------------------------------------------
-- | TODO put it in Gargantext.Core.Text.Ngrams
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment