Commit e4e7ee07 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FIX] Scores in ngrams table

parent c7d64791
......@@ -30,6 +30,7 @@ import Gargantext.API.Prelude (GargServer, simuLogs)
import Gargantext.Core.Methods.Distances (GraphMetric(..))
import Gargantext.Core.Types.Main (ListType(..))
import Gargantext.Core.Viz.Graph.API (recomputeGraph)
import Gargantext.Database.Action.Metrics (updateNgramsOccurrences)
import Gargantext.Database.Action.Flow.Pairing (pairing)
import Gargantext.Database.Action.Flow.Types (FlowCmdM)
import Gargantext.Database.Admin.Types.Node
......@@ -165,7 +166,10 @@ updateNode _uId lId (UpdateNodeParamsList _mode) logStatus = do
}
_ <- case corpusId of
Just cId -> reIndexWith cId lId NgramsTerms (Set.singleton MapTerm)
Just cId -> do
_ <- reIndexWith cId lId NgramsTerms (Set.singleton MapTerm)
_ <- updateNgramsOccurrences cId (Just lId)
pure ()
Nothing -> pure ()
pure JobLog { _scst_succeeded = Just 3
......
......@@ -86,6 +86,7 @@ import Gargantext.Database.Action.Flow.Types
import Gargantext.Database.Action.Flow.Utils (insertDocNgrams, DocumentIdWithNgrams(..))
import Gargantext.Database.Action.Search (searchDocInDatabase)
import Gargantext.Database.Admin.Config (userMaster, corpusMasterName)
import Gargantext.Database.Action.Metrics (updateNgramsOccurrences)
import Gargantext.Database.Admin.Types.Hyperdata
import Gargantext.Database.Admin.Types.Node -- (HyperdataDocument(..), NodeType(..), NodeId, UserId, ListId, CorpusId, RootId, MasterCorpusId, MasterUserId)
import Gargantext.Database.Prelude
......@@ -280,6 +281,8 @@ flowCorpusUser l user corpusName ctype ids mfslw = do
--_ <- mkPhylo userCorpusId userId
-- Annuaire Flow
-- _ <- mkAnnuaire rootUserId userId
_ <- updateNgramsOccurrences userCorpusId (Just listId)
pure userCorpusId
......
......@@ -10,18 +10,26 @@ Portability : POSIX
Node API
-}
{-# LANGUAGE QuasiQuotes #-}
module Gargantext.Database.Action.Metrics
where
import Database.PostgreSQL.Simple.SqlQQ (sql)
import Data.HashMap.Strict (HashMap)
import Data.Map (Map)
import Data.Set (Set)
import Database.PostgreSQL.Simple (Query, Only(..))
import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
import Data.Vector (Vector)
import Gargantext.Core (HasDBid(toDBid))
import Gargantext.API.Ngrams.Tools (filterListWithRoot, groupNodesByNgrams, Diagonal(..), getCoocByNgrams, mapTermListRoot, RootTerm, getRepo')
import Gargantext.API.Ngrams.Types (TabType(..), ngramsTypeFromTabType, NgramsTerm)
import Gargantext.Database.Prelude (runPGSQuery{-, formatPGSQuery-})
import Gargantext.API.Ngrams.Types (TabType(..), ngramsTypeFromTabType, NgramsTerm(..))
import Gargantext.Core.Mail.Types (HasMail)
import Gargantext.Core.NodeStory
import Gargantext.Core.Text.Metrics (scored, Scored(..), {-localMetrics, toScored-})
import Database.PostgreSQL.Simple.ToField (toField, Action{-, ToField-})
import Gargantext.Core.Types (ListType(..), Limit, NodeType(..), ContextId)
import Gargantext.Database.Action.Flow.Types (FlowCmdM)
import Gargantext.Database.Action.Metrics.NgramsByContext (getContextsByNgramsOnlyUser{-, getTficfWith-})
......@@ -34,6 +42,7 @@ import qualified Data.HashMap.Strict as HM
import qualified Data.Map as Map
import qualified Data.Set as Set
import qualified Data.List as List
import qualified Data.Text as Text
getMetrics :: FlowCmdM env err m
=> CorpusId -> Maybe ListId -> TabType -> Maybe Limit
......@@ -51,9 +60,13 @@ getNgramsCooc :: (FlowCmdM env err m)
, HashMap (NgramsTerm, NgramsTerm) Int
)
getNgramsCooc cId maybeListId tabType maybeLimit = do
(ngs', ngs) <- getNgrams cId maybeListId tabType
lId <- defaultList cId
lId <- case maybeListId of
Nothing -> defaultList cId
Just lId' -> pure lId'
(ngs', ngs) <- getNgrams lId tabType
lIds <- selectNodesWithUsername NodeList userMaster
myCooc <- HM.filter (>1) <$> getCoocByNgrams (Diagonal True)
......@@ -64,21 +77,69 @@ getNgramsCooc cId maybeListId tabType maybeLimit = do
(take' maybeLimit $ HM.keys ngs)
pure $ (ngs', ngs, myCooc)
------------------------------------------------------------------------
------------------------------------------------------------------------
updateNgramsOccurrences :: (FlowCmdM env err m)
=> CorpusId -> Maybe ListId
-> m ()
updateNgramsOccurrences cId mlId = do
_ <- mapM (updateNgramsOccurrences' cId mlId Nothing) [Terms, Sources, Authors, Institutes]
pure ()
updateNgramsOccurrences' :: (FlowCmdM env err m)
=> CorpusId -> Maybe ListId -> Maybe Limit -> TabType
-> m [Int]
updateNgramsOccurrences' cId maybeListId maybeLimit tabType = do
lId <- case maybeListId of
Nothing -> defaultList cId
Just lId' -> pure lId'
result <- getNgramsOccurrences cId lId tabType maybeLimit
let
toInsert :: [[Action]]
toInsert = map (\(ngramsTerm, score)
-> [ toField cId
, toField lId
, toField $ unNgramsTerm ngramsTerm
, toField $ toDBid $ ngramsTypeFromTabType tabType
, toField score
]
)
$ HM.toList result
queryInsert :: Query
queryInsert = [sql|
WITH input(corpus_id, list_id, terms, type_id, weight) AS (?)
INSERT into node_node_ngrams (node1_id, node2_id, ngrams_id, ngrams_type, weight)
SELECT input.corpus_id,input.list_id,ngrams.id,input.type_id,input.weight FROM input
JOIN ngrams on ngrams.terms = input.terms
ON CONFLICT (node1_id, node2_id, ngrams_id, ngrams_type)
DO UPDATE SET weight = excluded.weight
RETURNING 1
|]
let fields = map (\t-> QualifiedIdentifier Nothing t)
$ map Text.pack ["int4", "int4","text","int4","int4"]
map (\(Only a) -> a) <$> runPGSQuery queryInsert (Only $ Values fields toInsert)
------------------------------------------------------------------------
-- Used for scores in Ngrams Table
getNgramsOccurrences :: (FlowCmdM env err m)
=> CorpusId -> Maybe ListId -> TabType -> Maybe Limit
=> CorpusId -> ListId -> TabType -> Maybe Limit
-> m (HashMap NgramsTerm Int)
getNgramsOccurrences c l t ml = HM.map Set.size <$> getNgramsContexts c l t ml
getNgramsContexts :: (FlowCmdM env err m)
=> CorpusId -> Maybe ListId -> TabType -> Maybe Limit
=> CorpusId -> ListId -> TabType -> Maybe Limit
-> m (HashMap NgramsTerm (Set ContextId))
getNgramsContexts cId maybeListId tabType maybeLimit = do
(_ngs', ngs) <- getNgrams cId maybeListId tabType
lId <- defaultList cId
getNgramsContexts cId lId tabType maybeLimit = do
(_ngs', ngs) <- getNgrams lId tabType
lIds <- selectNodesWithUsername NodeList userMaster
-- TODO maybe add an option to group here
......@@ -91,17 +152,16 @@ getNgramsContexts cId maybeListId tabType maybeLimit = do
-- Used for scores in Doc Table
getContextsNgramsScore :: (FlowCmdM env err m)
=> CorpusId -> Maybe ListId -> TabType -> ListType -> Maybe Limit
=> CorpusId -> ListId -> TabType -> ListType -> Maybe Limit
-> m (Map ContextId Int)
getContextsNgramsScore cId maybeListId tabType listType maybeLimit
= Map.map Set.size <$> getContextsNgrams cId maybeListId tabType listType maybeLimit
getContextsNgramsScore cId lId tabType listType maybeLimit
= Map.map Set.size <$> getContextsNgrams cId lId tabType listType maybeLimit
getContextsNgrams :: (FlowCmdM env err m)
=> CorpusId -> Maybe ListId -> TabType -> ListType -> Maybe Limit
=> CorpusId -> ListId -> TabType -> ListType -> Maybe Limit
-> m (Map ContextId (Set NgramsTerm))
getContextsNgrams cId maybeListId tabType listType maybeLimit = do
(ngs', ngs) <- getNgrams cId maybeListId tabType
lId <- defaultList cId
getContextsNgrams cId lId tabType listType maybeLimit = do
(ngs', ngs) <- getNgrams lId tabType
lIds <- selectNodesWithUsername NodeList userMaster
result <- groupNodesByNgrams ngs <$> getContextsByNgramsOnlyUser
......@@ -121,15 +181,11 @@ getContextsNgrams cId maybeListId tabType listType maybeLimit = do
getNgrams :: (HasMail env, HasNodeStory env err m)
=> CorpusId -> Maybe ListId -> TabType
=> ListId -> TabType
-> m ( HashMap NgramsTerm (ListType, Maybe NgramsTerm)
, HashMap NgramsTerm (Maybe RootTerm)
)
getNgrams cId maybeListId tabType = do
lId <- case maybeListId of
Nothing -> defaultList cId
Just lId' -> pure lId'
getNgrams lId tabType = do
lists <- mapTermListRoot [lId] (ngramsTypeFromTabType tabType) <$> getRepo' [lId]
let maybeSyn = HM.unions $ map (\t -> filterListWithRoot t lists)
......
......@@ -19,6 +19,7 @@ Ngrams connection to the Database.
module Gargantext.Database.Schema.Ngrams
where
import Data.Maybe (fromMaybe)
import Data.HashMap.Strict (HashMap)
import Data.Hashable (Hashable)
import Codec.Serialise (Serialise())
......@@ -32,6 +33,7 @@ import Gargantext.Core.Types (TODO(..), Typed(..))
import Gargantext.Prelude
import Servant (FromHttpApiData(..), Proxy(..), ToHttpApiData(..))
import Text.Read (read)
import Gargantext.Core (HasDBid(..))
import Gargantext.Database.Types
import Gargantext.Database.Schema.Prelude
import qualified Database.PostgreSQL.Simple as PGS
......@@ -82,6 +84,7 @@ data NgramsType = Authors | Institutes | Sources | NgramsTerms
instance Serialise NgramsType
ngramsTypes :: [NgramsType]
ngramsTypes = [minBound..]
......@@ -141,6 +144,16 @@ fromNgramsTypeId id = lookup id
| nt <- [minBound .. maxBound] :: [NgramsType]
]
unNgramsTypeId :: NgramsTypeId -> Int
unNgramsTypeId (NgramsTypeId i) = i
toNgramsTypeId :: Int -> NgramsTypeId
toNgramsTypeId i = NgramsTypeId i
instance HasDBid NgramsType where
toDBid = unNgramsTypeId . ngramsTypeId
fromDBid = fromMaybe (panic "NgramsType id not indexed") . fromNgramsTypeId . toNgramsTypeId
------------------------------------------------------------------------
------------------------------------------------------------------------
-- | TODO put it in Gargantext.Core.Text.Ngrams
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment