Metrics.hs 4.09 KB
{-|
Module      : Gargantext.Database.Metrics
Description : Get Metrics from Storage (Database like)
Copyright   : (c) CNRS, 2017-Present
License     : AGPL + CECILL v3
Maintainer  : team@gargantext.org
Stability   : experimental
Portability : POSIX

Node API
-}


{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE FlexibleContexts  #-}

module Gargantext.Database.Metrics
  where

import Data.Map (Map)
import Data.Text (Text)
import Gargantext.API.Ngrams (TabType(..), ngramsTypeFromTabType)
import Gargantext.API.Ngrams.Tools (filterListWithRoot, groupNodesByNgrams, Diagonal(..), getCoocByNgrams, mapTermListRoot, RootTerm)
import Gargantext.Core.Types (ListType(..), Limit, NodeType(..))
import Gargantext.Database.Flow (FlowCmdM)
import Gargantext.Database.Metrics.NgramsByNode (getNodesByNgramsOnlyUser, getTficfWith)
import Gargantext.Database.Node.Select
import Gargantext.Database.Schema.Node (defaultList)
import Gargantext.Database.Types.Node (ListId, CorpusId, HyperdataCorpus)
import Gargantext.Database.Flow (getOrMkRootWithCorpus)
import Gargantext.Database.Config (userMaster)
import Gargantext.Prelude
import Gargantext.Text.Metrics (scored, Scored(..), localMetrics, toScored)
import qualified Data.Map    as Map
import qualified Data.Vector.Storable as Vec


getMetrics' :: FlowCmdM env err m
            => CorpusId -> Maybe ListId -> TabType -> Maybe Limit
            -> m (Map Text (ListType, Maybe Text), [Scored Text])
getMetrics' cId maybeListId tabType maybeLimit = do
  (ngs, _, myCooc) <- getNgramsCooc cId maybeListId tabType maybeLimit 
  pure (ngs, scored myCooc)


getMetrics :: FlowCmdM env err m
            => CorpusId -> Maybe ListId -> TabType -> Maybe Limit
            -> m (Map Text (ListType, Maybe Text), [Scored Text])
getMetrics cId maybeListId tabType maybeLimit = do
  (ngs, ngs', metrics)    <- getLocalMetrics cId maybeListId tabType maybeLimit
  
  (_masterUserId, _masterRootId, masterCorpusId) <- getOrMkRootWithCorpus userMaster "" (Nothing :: Maybe HyperdataCorpus)
  
  lId  <- defaultList cId
  lIds <- selectNodesWithUsername NodeList userMaster
  
  metrics' <- getTficfWith cId masterCorpusId (lIds <> [lId]) (ngramsTypeFromTabType tabType) ngs'

  pure (ngs , toScored [metrics, Map.fromList $ map (\(a,b) -> (a, Vec.fromList [fst b])) $ Map.toList metrics'])


getLocalMetrics  :: (FlowCmdM env err m)
            => CorpusId -> Maybe ListId -> TabType -> Maybe Limit
          -> m ( Map Text (ListType, Maybe Text)
               , Map Text (Maybe RootTerm)
                 , Map Text (Vec.Vector Double)
                 )
getLocalMetrics cId maybeListId tabType maybeLimit = do
  (ngs, ngs', myCooc) <- getNgramsCooc cId maybeListId tabType maybeLimit 
  pure (ngs, ngs', localMetrics myCooc)


getNgramsCooc :: (FlowCmdM env err m)
            => CorpusId -> Maybe ListId -> TabType -> Maybe Limit
            -> m ( Map Text (ListType, Maybe Text)
                 , Map Text (Maybe RootTerm)
                 , Map (Text, Text) Int
                 )
getNgramsCooc cId maybeListId tabType maybeLimit = do
  (ngs', ngs) <- getNgrams cId maybeListId tabType
  
  let
    take' Nothing xs  = xs
    take' (Just n) xs = take n xs

  lId  <- defaultList cId
  lIds <- selectNodesWithUsername NodeList userMaster

  myCooc <- Map.filter (>1) <$> getCoocByNgrams (Diagonal True)
                            <$> groupNodesByNgrams ngs
                            <$> getNodesByNgramsOnlyUser cId (lIds <> [lId]) (ngramsTypeFromTabType tabType)
                                                             (take' maybeLimit $ Map.keys ngs)
  pure $ (ngs', ngs, myCooc)



getNgrams :: (FlowCmdM env err m)
            => CorpusId -> Maybe ListId -> TabType
            -> m (Map Text (ListType, Maybe Text), Map Text (Maybe RootTerm))
getNgrams cId maybeListId tabType = do
  lId <- case maybeListId of
    Nothing   -> defaultList cId
    Just lId' -> pure lId'

  lists    <- mapTermListRoot [lId] (ngramsTypeFromTabType tabType)
  let maybeSyn = Map.unions $ map (\t -> filterListWithRoot t lists)
                             [GraphTerm, StopTerm, CandidateTerm]
  pure (lists, maybeSyn)