Commit e4e7ee07 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FIX] Scores in ngrams table

parent c7d64791
...@@ -30,6 +30,7 @@ import Gargantext.API.Prelude (GargServer, simuLogs) ...@@ -30,6 +30,7 @@ import Gargantext.API.Prelude (GargServer, simuLogs)
import Gargantext.Core.Methods.Distances (GraphMetric(..)) import Gargantext.Core.Methods.Distances (GraphMetric(..))
import Gargantext.Core.Types.Main (ListType(..)) import Gargantext.Core.Types.Main (ListType(..))
import Gargantext.Core.Viz.Graph.API (recomputeGraph) import Gargantext.Core.Viz.Graph.API (recomputeGraph)
import Gargantext.Database.Action.Metrics (updateNgramsOccurrences)
import Gargantext.Database.Action.Flow.Pairing (pairing) import Gargantext.Database.Action.Flow.Pairing (pairing)
import Gargantext.Database.Action.Flow.Types (FlowCmdM) import Gargantext.Database.Action.Flow.Types (FlowCmdM)
import Gargantext.Database.Admin.Types.Node import Gargantext.Database.Admin.Types.Node
...@@ -165,7 +166,10 @@ updateNode _uId lId (UpdateNodeParamsList _mode) logStatus = do ...@@ -165,7 +166,10 @@ updateNode _uId lId (UpdateNodeParamsList _mode) logStatus = do
} }
_ <- case corpusId of _ <- case corpusId of
Just cId -> reIndexWith cId lId NgramsTerms (Set.singleton MapTerm) Just cId -> do
_ <- reIndexWith cId lId NgramsTerms (Set.singleton MapTerm)
_ <- updateNgramsOccurrences cId (Just lId)
pure ()
Nothing -> pure () Nothing -> pure ()
pure JobLog { _scst_succeeded = Just 3 pure JobLog { _scst_succeeded = Just 3
......
...@@ -86,6 +86,7 @@ import Gargantext.Database.Action.Flow.Types ...@@ -86,6 +86,7 @@ import Gargantext.Database.Action.Flow.Types
import Gargantext.Database.Action.Flow.Utils (insertDocNgrams, DocumentIdWithNgrams(..)) import Gargantext.Database.Action.Flow.Utils (insertDocNgrams, DocumentIdWithNgrams(..))
import Gargantext.Database.Action.Search (searchDocInDatabase) import Gargantext.Database.Action.Search (searchDocInDatabase)
import Gargantext.Database.Admin.Config (userMaster, corpusMasterName) import Gargantext.Database.Admin.Config (userMaster, corpusMasterName)
import Gargantext.Database.Action.Metrics (updateNgramsOccurrences)
import Gargantext.Database.Admin.Types.Hyperdata import Gargantext.Database.Admin.Types.Hyperdata
import Gargantext.Database.Admin.Types.Node -- (HyperdataDocument(..), NodeType(..), NodeId, UserId, ListId, CorpusId, RootId, MasterCorpusId, MasterUserId) import Gargantext.Database.Admin.Types.Node -- (HyperdataDocument(..), NodeType(..), NodeId, UserId, ListId, CorpusId, RootId, MasterCorpusId, MasterUserId)
import Gargantext.Database.Prelude import Gargantext.Database.Prelude
...@@ -280,6 +281,8 @@ flowCorpusUser l user corpusName ctype ids mfslw = do ...@@ -280,6 +281,8 @@ flowCorpusUser l user corpusName ctype ids mfslw = do
--_ <- mkPhylo userCorpusId userId --_ <- mkPhylo userCorpusId userId
-- Annuaire Flow -- Annuaire Flow
-- _ <- mkAnnuaire rootUserId userId -- _ <- mkAnnuaire rootUserId userId
_ <- updateNgramsOccurrences userCorpusId (Just listId)
pure userCorpusId pure userCorpusId
......
...@@ -10,18 +10,26 @@ Portability : POSIX ...@@ -10,18 +10,26 @@ Portability : POSIX
Node API Node API
-} -}
{-# LANGUAGE QuasiQuotes #-}
module Gargantext.Database.Action.Metrics module Gargantext.Database.Action.Metrics
where where
import Database.PostgreSQL.Simple.SqlQQ (sql)
import Data.HashMap.Strict (HashMap) import Data.HashMap.Strict (HashMap)
import Data.Map (Map) import Data.Map (Map)
import Data.Set (Set) import Data.Set (Set)
import Database.PostgreSQL.Simple (Query, Only(..))
import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
import Data.Vector (Vector) import Data.Vector (Vector)
import Gargantext.Core (HasDBid(toDBid))
import Gargantext.API.Ngrams.Tools (filterListWithRoot, groupNodesByNgrams, Diagonal(..), getCoocByNgrams, mapTermListRoot, RootTerm, getRepo') import Gargantext.API.Ngrams.Tools (filterListWithRoot, groupNodesByNgrams, Diagonal(..), getCoocByNgrams, mapTermListRoot, RootTerm, getRepo')
import Gargantext.API.Ngrams.Types (TabType(..), ngramsTypeFromTabType, NgramsTerm) import Gargantext.Database.Prelude (runPGSQuery{-, formatPGSQuery-})
import Gargantext.API.Ngrams.Types (TabType(..), ngramsTypeFromTabType, NgramsTerm(..))
import Gargantext.Core.Mail.Types (HasMail) import Gargantext.Core.Mail.Types (HasMail)
import Gargantext.Core.NodeStory import Gargantext.Core.NodeStory
import Gargantext.Core.Text.Metrics (scored, Scored(..), {-localMetrics, toScored-}) import Gargantext.Core.Text.Metrics (scored, Scored(..), {-localMetrics, toScored-})
import Database.PostgreSQL.Simple.ToField (toField, Action{-, ToField-})
import Gargantext.Core.Types (ListType(..), Limit, NodeType(..), ContextId) import Gargantext.Core.Types (ListType(..), Limit, NodeType(..), ContextId)
import Gargantext.Database.Action.Flow.Types (FlowCmdM) import Gargantext.Database.Action.Flow.Types (FlowCmdM)
import Gargantext.Database.Action.Metrics.NgramsByContext (getContextsByNgramsOnlyUser{-, getTficfWith-}) import Gargantext.Database.Action.Metrics.NgramsByContext (getContextsByNgramsOnlyUser{-, getTficfWith-})
...@@ -34,6 +42,7 @@ import qualified Data.HashMap.Strict as HM ...@@ -34,6 +42,7 @@ import qualified Data.HashMap.Strict as HM
import qualified Data.Map as Map import qualified Data.Map as Map
import qualified Data.Set as Set import qualified Data.Set as Set
import qualified Data.List as List import qualified Data.List as List
import qualified Data.Text as Text
getMetrics :: FlowCmdM env err m getMetrics :: FlowCmdM env err m
=> CorpusId -> Maybe ListId -> TabType -> Maybe Limit => CorpusId -> Maybe ListId -> TabType -> Maybe Limit
...@@ -51,9 +60,13 @@ getNgramsCooc :: (FlowCmdM env err m) ...@@ -51,9 +60,13 @@ getNgramsCooc :: (FlowCmdM env err m)
, HashMap (NgramsTerm, NgramsTerm) Int , HashMap (NgramsTerm, NgramsTerm) Int
) )
getNgramsCooc cId maybeListId tabType maybeLimit = do getNgramsCooc cId maybeListId tabType maybeLimit = do
(ngs', ngs) <- getNgrams cId maybeListId tabType
lId <- defaultList cId lId <- case maybeListId of
Nothing -> defaultList cId
Just lId' -> pure lId'
(ngs', ngs) <- getNgrams lId tabType
lIds <- selectNodesWithUsername NodeList userMaster lIds <- selectNodesWithUsername NodeList userMaster
myCooc <- HM.filter (>1) <$> getCoocByNgrams (Diagonal True) myCooc <- HM.filter (>1) <$> getCoocByNgrams (Diagonal True)
...@@ -64,21 +77,69 @@ getNgramsCooc cId maybeListId tabType maybeLimit = do ...@@ -64,21 +77,69 @@ getNgramsCooc cId maybeListId tabType maybeLimit = do
(take' maybeLimit $ HM.keys ngs) (take' maybeLimit $ HM.keys ngs)
pure $ (ngs', ngs, myCooc) pure $ (ngs', ngs, myCooc)
------------------------------------------------------------------------
------------------------------------------------------------------------
updateNgramsOccurrences :: (FlowCmdM env err m)
=> CorpusId -> Maybe ListId
-> m ()
updateNgramsOccurrences cId mlId = do
_ <- mapM (updateNgramsOccurrences' cId mlId Nothing) [Terms, Sources, Authors, Institutes]
pure ()
updateNgramsOccurrences' :: (FlowCmdM env err m)
=> CorpusId -> Maybe ListId -> Maybe Limit -> TabType
-> m [Int]
updateNgramsOccurrences' cId maybeListId maybeLimit tabType = do
lId <- case maybeListId of
Nothing -> defaultList cId
Just lId' -> pure lId'
result <- getNgramsOccurrences cId lId tabType maybeLimit
let
toInsert :: [[Action]]
toInsert = map (\(ngramsTerm, score)
-> [ toField cId
, toField lId
, toField $ unNgramsTerm ngramsTerm
, toField $ toDBid $ ngramsTypeFromTabType tabType
, toField score
]
)
$ HM.toList result
queryInsert :: Query
queryInsert = [sql|
WITH input(corpus_id, list_id, terms, type_id, weight) AS (?)
INSERT into node_node_ngrams (node1_id, node2_id, ngrams_id, ngrams_type, weight)
SELECT input.corpus_id,input.list_id,ngrams.id,input.type_id,input.weight FROM input
JOIN ngrams on ngrams.terms = input.terms
ON CONFLICT (node1_id, node2_id, ngrams_id, ngrams_type)
DO UPDATE SET weight = excluded.weight
RETURNING 1
|]
let fields = map (\t-> QualifiedIdentifier Nothing t)
$ map Text.pack ["int4", "int4","text","int4","int4"]
map (\(Only a) -> a) <$> runPGSQuery queryInsert (Only $ Values fields toInsert)
------------------------------------------------------------------------
-- Used for scores in Ngrams Table -- Used for scores in Ngrams Table
getNgramsOccurrences :: (FlowCmdM env err m) getNgramsOccurrences :: (FlowCmdM env err m)
=> CorpusId -> Maybe ListId -> TabType -> Maybe Limit => CorpusId -> ListId -> TabType -> Maybe Limit
-> m (HashMap NgramsTerm Int) -> m (HashMap NgramsTerm Int)
getNgramsOccurrences c l t ml = HM.map Set.size <$> getNgramsContexts c l t ml getNgramsOccurrences c l t ml = HM.map Set.size <$> getNgramsContexts c l t ml
getNgramsContexts :: (FlowCmdM env err m) getNgramsContexts :: (FlowCmdM env err m)
=> CorpusId -> Maybe ListId -> TabType -> Maybe Limit => CorpusId -> ListId -> TabType -> Maybe Limit
-> m (HashMap NgramsTerm (Set ContextId)) -> m (HashMap NgramsTerm (Set ContextId))
getNgramsContexts cId maybeListId tabType maybeLimit = do getNgramsContexts cId lId tabType maybeLimit = do
(_ngs', ngs) <- getNgrams cId maybeListId tabType (_ngs', ngs) <- getNgrams lId tabType
lId <- defaultList cId
lIds <- selectNodesWithUsername NodeList userMaster lIds <- selectNodesWithUsername NodeList userMaster
-- TODO maybe add an option to group here -- TODO maybe add an option to group here
...@@ -91,17 +152,16 @@ getNgramsContexts cId maybeListId tabType maybeLimit = do ...@@ -91,17 +152,16 @@ getNgramsContexts cId maybeListId tabType maybeLimit = do
-- Used for scores in Doc Table -- Used for scores in Doc Table
getContextsNgramsScore :: (FlowCmdM env err m) getContextsNgramsScore :: (FlowCmdM env err m)
=> CorpusId -> Maybe ListId -> TabType -> ListType -> Maybe Limit => CorpusId -> ListId -> TabType -> ListType -> Maybe Limit
-> m (Map ContextId Int) -> m (Map ContextId Int)
getContextsNgramsScore cId maybeListId tabType listType maybeLimit getContextsNgramsScore cId lId tabType listType maybeLimit
= Map.map Set.size <$> getContextsNgrams cId maybeListId tabType listType maybeLimit = Map.map Set.size <$> getContextsNgrams cId lId tabType listType maybeLimit
getContextsNgrams :: (FlowCmdM env err m) getContextsNgrams :: (FlowCmdM env err m)
=> CorpusId -> Maybe ListId -> TabType -> ListType -> Maybe Limit => CorpusId -> ListId -> TabType -> ListType -> Maybe Limit
-> m (Map ContextId (Set NgramsTerm)) -> m (Map ContextId (Set NgramsTerm))
getContextsNgrams cId maybeListId tabType listType maybeLimit = do getContextsNgrams cId lId tabType listType maybeLimit = do
(ngs', ngs) <- getNgrams cId maybeListId tabType (ngs', ngs) <- getNgrams lId tabType
lId <- defaultList cId
lIds <- selectNodesWithUsername NodeList userMaster lIds <- selectNodesWithUsername NodeList userMaster
result <- groupNodesByNgrams ngs <$> getContextsByNgramsOnlyUser result <- groupNodesByNgrams ngs <$> getContextsByNgramsOnlyUser
...@@ -121,15 +181,11 @@ getContextsNgrams cId maybeListId tabType listType maybeLimit = do ...@@ -121,15 +181,11 @@ getContextsNgrams cId maybeListId tabType listType maybeLimit = do
getNgrams :: (HasMail env, HasNodeStory env err m) getNgrams :: (HasMail env, HasNodeStory env err m)
=> CorpusId -> Maybe ListId -> TabType => ListId -> TabType
-> m ( HashMap NgramsTerm (ListType, Maybe NgramsTerm) -> m ( HashMap NgramsTerm (ListType, Maybe NgramsTerm)
, HashMap NgramsTerm (Maybe RootTerm) , HashMap NgramsTerm (Maybe RootTerm)
) )
getNgrams cId maybeListId tabType = do getNgrams lId tabType = do
lId <- case maybeListId of
Nothing -> defaultList cId
Just lId' -> pure lId'
lists <- mapTermListRoot [lId] (ngramsTypeFromTabType tabType) <$> getRepo' [lId] lists <- mapTermListRoot [lId] (ngramsTypeFromTabType tabType) <$> getRepo' [lId]
let maybeSyn = HM.unions $ map (\t -> filterListWithRoot t lists) let maybeSyn = HM.unions $ map (\t -> filterListWithRoot t lists)
......
...@@ -19,6 +19,7 @@ Ngrams connection to the Database. ...@@ -19,6 +19,7 @@ Ngrams connection to the Database.
module Gargantext.Database.Schema.Ngrams module Gargantext.Database.Schema.Ngrams
where where
import Data.Maybe (fromMaybe)
import Data.HashMap.Strict (HashMap) import Data.HashMap.Strict (HashMap)
import Data.Hashable (Hashable) import Data.Hashable (Hashable)
import Codec.Serialise (Serialise()) import Codec.Serialise (Serialise())
...@@ -32,6 +33,7 @@ import Gargantext.Core.Types (TODO(..), Typed(..)) ...@@ -32,6 +33,7 @@ import Gargantext.Core.Types (TODO(..), Typed(..))
import Gargantext.Prelude import Gargantext.Prelude
import Servant (FromHttpApiData(..), Proxy(..), ToHttpApiData(..)) import Servant (FromHttpApiData(..), Proxy(..), ToHttpApiData(..))
import Text.Read (read) import Text.Read (read)
import Gargantext.Core (HasDBid(..))
import Gargantext.Database.Types import Gargantext.Database.Types
import Gargantext.Database.Schema.Prelude import Gargantext.Database.Schema.Prelude
import qualified Database.PostgreSQL.Simple as PGS import qualified Database.PostgreSQL.Simple as PGS
...@@ -82,6 +84,7 @@ data NgramsType = Authors | Institutes | Sources | NgramsTerms ...@@ -82,6 +84,7 @@ data NgramsType = Authors | Institutes | Sources | NgramsTerms
instance Serialise NgramsType instance Serialise NgramsType
ngramsTypes :: [NgramsType] ngramsTypes :: [NgramsType]
ngramsTypes = [minBound..] ngramsTypes = [minBound..]
...@@ -141,6 +144,16 @@ fromNgramsTypeId id = lookup id ...@@ -141,6 +144,16 @@ fromNgramsTypeId id = lookup id
| nt <- [minBound .. maxBound] :: [NgramsType] | nt <- [minBound .. maxBound] :: [NgramsType]
] ]
unNgramsTypeId :: NgramsTypeId -> Int
unNgramsTypeId (NgramsTypeId i) = i
toNgramsTypeId :: Int -> NgramsTypeId
toNgramsTypeId i = NgramsTypeId i
instance HasDBid NgramsType where
toDBid = unNgramsTypeId . ngramsTypeId
fromDBid = fromMaybe (panic "NgramsType id not indexed") . fromNgramsTypeId . toNgramsTypeId
------------------------------------------------------------------------ ------------------------------------------------------------------------
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | TODO put it in Gargantext.Core.Text.Ngrams -- | TODO put it in Gargantext.Core.Text.Ngrams
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment