Commit ed19ff44 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[METRICS] Vector of metrics.

parent ed1c33f3
......@@ -79,7 +79,7 @@ groupNodesByNgrams syn occs = Map.fromListWith (<>) occs'
where
occs' = map toSyn (Map.toList occs)
toSyn (t,ns) = case Map.lookup t syn of
Nothing -> panic $ "Garg.API.Ngrams.Tools: groupNodesByNgrams, unknown key: " <> t
Nothing -> panic $ "[Garg.API.Ngrams.Tools.groupNodesByNgrams] unknown key: " <> t
Just r -> case r of
Nothing -> (t, ns)
Just r' -> (r',ns)
......
......@@ -47,7 +47,7 @@ import Gargantext.API.Search ( SearchAPI, searchIn, SearchInQuery)
import Gargantext.Core.Types (Offset, Limit, ListType(..))
import Gargantext.Core.Types.Main (Tree, NodeTree)
import Gargantext.Database.Facet (FacetDoc , runViewDocuments, OrderBy(..),FacetChart,runViewAuthorsDoc)
import Gargantext.Database.Metrics (getMetrics')
import qualified Gargantext.Database.Metrics as Metrics
import Gargantext.Database.Metrics.NgramsByNode (getNodesByNgramsOnlyUser)
import Gargantext.Database.Node.Children (getChildren)
import Gargantext.Database.Schema.Ngrams (NgramsType(..))
......@@ -394,12 +394,12 @@ type MetricsAPI = Summary "SepGen IncExc metrics"
getMetrics :: NodeId -> GargServer MetricsAPI
getMetrics cId maybeListId tabType maybeLimit = do
(ngs', scores) <- getMetrics' cId maybeListId tabType maybeLimit
(ngs', scores) <- Metrics.getMetrics' cId maybeListId tabType maybeLimit
let
metrics = map (\(Scored t s1 s2) -> Metric t s1 s2 (listType t ngs')) scores
errorMsg = "API.Node.metrics: key absent"
metrics = map (\(Scored t s1 s2) -> Metric t s1 s2 (listType t ngs')) scores
listType t m = maybe (panic errorMsg) fst $ Map.lookup t m
errorMsg = "API.Node.metrics: key absent"
pure $ Metrics metrics
......
......@@ -11,6 +11,7 @@ Node API
-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE FlexibleContexts #-}
......@@ -20,31 +21,62 @@ module Gargantext.Database.Metrics
import Data.Map (Map)
import Data.Text (Text)
import Gargantext.API.Ngrams (TabType(..), ngramsTypeFromTabType)
import Gargantext.API.Ngrams.Tools
import Gargantext.API.Ngrams.Tools (filterListWithRoot, groupNodesByNgrams, Diagonal(..), getCoocByNgrams, mapTermListRoot, RootTerm)
import Gargantext.Core.Types (ListType(..), Limit)
import Gargantext.Database.Flow (FlowCmdM)
import Gargantext.Database.Metrics.NgramsByNode (getNodesByNgramsOnlyUser)
import Gargantext.Database.Metrics.NgramsByNode (getNodesByNgramsOnlyUser, getTficfWith)
import Gargantext.Database.Schema.Node (defaultList)
import Gargantext.Database.Types.Node (ListId, CorpusId)
import Gargantext.Database.Flow (getOrMkRootWithCorpus)
import Gargantext.Database.Config (userMaster)
import Gargantext.Prelude
import Gargantext.Text.Metrics (scored, Scored(..))
import Gargantext.Text.Metrics (scored, Scored(..), localMetrics, toScored)
import Servant (ServantErr)
import qualified Data.Map as Map
import qualified Data.Map as Map
import qualified Data.Vector.Storable as Vec
getMetrics' :: FlowCmdM env ServantErr m
=> CorpusId -> Maybe ListId -> TabType -> Maybe Limit
-> m (Map Text (ListType, Maybe Text), [Scored Text])
getMetrics' cId maybeListId tabType maybeLimit = do
(ngs, _, myCooc) <- getNgramsCooc cId maybeListId tabType maybeLimit
pure (ngs, scored myCooc)
lId <- case maybeListId of
Nothing -> defaultList cId
Just lId' -> pure lId'
let ngramsType = ngramsTypeFromTabType tabType
getMetrics :: FlowCmdM env ServantErr m
=> CorpusId -> Maybe ListId -> TabType -> Maybe Limit
-> m (Map Text (ListType, Maybe Text), [Scored Text])
getMetrics cId maybeListId tabType maybeLimit = do
(ngs, ngs', metrics) <- getLocalMetrics cId maybeListId tabType maybeLimit
(_masterUserId, _masterRootId, masterCorpusId) <- getOrMkRootWithCorpus userMaster ""
metrics' <- getTficfWith cId masterCorpusId (ngramsTypeFromTabType tabType) ngs'
pure (ngs , toScored [metrics, Map.fromList $ map (\(a,b) -> (a, Vec.fromList [fst b])) $ Map.toList metrics'])
ngs' <- mapTermListRoot [lId] ngramsType
let ngs = Map.unions $ map (\t -> filterListWithRoot t ngs')
[GraphTerm, StopTerm, CandidateTerm]
getLocalMetrics :: (FlowCmdM env ServantErr m)
=> CorpusId -> Maybe ListId -> TabType -> Maybe Limit
-> m ( Map Text (ListType, Maybe Text)
, Map Text (Maybe RootTerm)
, Map Text (Vec.Vector Double)
)
getLocalMetrics cId maybeListId tabType maybeLimit = do
(ngs, ngs', myCooc) <- getNgramsCooc cId maybeListId tabType maybeLimit
pure (ngs, ngs', localMetrics myCooc)
getNgramsCooc :: (FlowCmdM env ServantErr m)
=> CorpusId -> Maybe ListId -> TabType -> Maybe Limit
-> m ( Map Text (ListType, Maybe Text)
, Map Text (Maybe RootTerm)
, Map (Text, Text) Int
)
getNgramsCooc cId maybeListId tabType maybeLimit = do
(ngs', ngs) <- getNgrams cId maybeListId tabType
let
take' Nothing xs = xs
......@@ -52,7 +84,22 @@ getMetrics' cId maybeListId tabType maybeLimit = do
myCooc <- Map.filter (>1) <$> getCoocByNgrams (Diagonal True)
<$> groupNodesByNgrams ngs
<$> getNodesByNgramsOnlyUser cId ngramsType (take' maybeLimit $ Map.keys ngs)
<$> getNodesByNgramsOnlyUser cId (ngramsTypeFromTabType tabType)
(take' maybeLimit $ Map.keys ngs)
pure $ (ngs', ngs, myCooc)
pure $ (ngs', scored myCooc)
getNgrams :: (FlowCmdM env ServantErr m)
=> CorpusId -> Maybe ListId -> TabType
-> m (Map Text (ListType, Maybe Text), Map Text (Maybe RootTerm))
getNgrams cId maybeListId tabType = do
lId <- case maybeListId of
Nothing -> defaultList cId
Just lId' -> pure lId'
lists <- mapTermListRoot [lId] (ngramsTypeFromTabType tabType)
let maybeSyn = Map.unions $ map (\t -> filterListWithRoot t lists)
[GraphTerm, StopTerm, CandidateTerm]
pure (lists, maybeSyn)
......@@ -59,15 +59,31 @@ sortTficf :: (Map Text (Double, Set Text))
sortTficf = List.sortOn (fst . snd) . toList
getTficf' :: UserCorpusId -> MasterCorpusId -> (Text -> Text)
getTficf' :: UserCorpusId -> MasterCorpusId -> NgramsType -> (Text -> Text)
-> Cmd err (Map Text (Double, Set Text))
getTficf' u m f = do
u' <- getNodesByNgramsUser u NgramsTerms
getTficf' u m nt f = do
u' <- getNodesByNgramsUser u nt
m' <- getNodesByNgramsMaster u m
pure $ toTficfData (countNodesByNgramsWith f u')
(countNodesByNgramsWith f m')
--{-
getTficfWith :: UserCorpusId -> MasterCorpusId
-> NgramsType -> Map Text (Maybe Text)
-> Cmd err (Map Text (Double, Set Text))
getTficfWith u m nt mtxt = do
u' <- getNodesByNgramsOnlyUser u nt (Map.keys mtxt)
m' <- getNodesByNgramsMaster u m
let f x = case Map.lookup x mtxt of
Nothing -> x
Just x' -> maybe x identity x'
pure $ toTficfData (countNodesByNgramsWith f u')
(countNodesByNgramsWith f m')
--}
type Context = (Double, Map Text (Double, Set Text))
type Supra = Context
......@@ -269,7 +285,3 @@ SELECT m.node_id, m.terms FROM nodesByNgramsMaster m
RIGHT JOIN nodesByNgramsUser u ON u.id = m.id
|]
......@@ -57,7 +57,7 @@ buildNgramsOthersList uCid groupIt nt = do
buildNgramsTermsList :: Lang -> Int -> Int -> UserCorpusId -> MasterCorpusId
-> Cmd err (Map NgramsType [NgramsElement])
buildNgramsTermsList l n m uCid mCid = do
candidates <- sortTficf <$> getTficf' uCid mCid (ngramsGroup l n m)
candidates <- sortTficf <$> getTficf' uCid mCid NgramsTerms (ngramsGroup l n m)
--printDebug "candidate" (length candidates)
let termList = toTermList (isStopTerm . fst) candidates
......
......@@ -42,27 +42,37 @@ type GraphListSize = Int
type InclusionSize = Int
takeScored :: Ord t => GraphListSize -> InclusionSize -> Map (t,t) Int -> [t]
takeScored listSize incSize = map _scored_terms
. linearTakes listSize incSize _scored_speGen
_scored_incExc
. scored
toScored :: Ord t => [Map t (Vec.Vector Double)] -> [Scored t]
toScored = map2scored
. (reduceTo (Dimension 2))
. (Map.filter (\v -> Vec.length v > 1))
. (Map.unionsWith (<>))
scored :: Ord t => Map (t,t) Int -> [Scored t]
scored = map2scored . (reduceDim 2) . scored2map
scored = map2scored . (reduceTo (Dimension 2)) . scored2map
scored2map :: Ord t => Map (t,t) Int -> Map t (Vec.Vector Double)
scored2map m = Map.fromList $ map (\(Scored t i s) -> (t, Vec.fromList [i,s])) $ scored' m
map2scored :: Ord t => Map t (Vec.Vector Double) -> [Scored t]
map2scored = map (\(t, ds) -> Scored t (Vec.head ds) (Vec.last ds)) . Map.toList
-- TODO change type with (x,y)
data Scored ts = Scored
{ _scored_terms :: !ts
, _scored_incExc :: !InclusionExclusion
, _scored_speGen :: !SpecificityGenericity
} deriving (Show)
data Dimension = Dimension Int
reduceDim :: Ord t => Int -> Map t (Vec.Vector Double)
-> Map t (Vec.Vector Double)
reduceDim d ss = Map.fromList $ zip txts $ elems $ pcaReduceN ss'' d
reduceTo :: Ord t
=> Dimension
-> Map t (Vec.Vector Double)
-> Map t (Vec.Vector Double)
reduceTo (Dimension d) ss = Map.fromList $ zip txts $ elems $ pcaReduceN ss'' d
where
ss'' :: Array Int (Vec.Vector Double)
ss'' = listArray (1, List.length ss') ss'
......@@ -70,12 +80,21 @@ reduceDim d ss = Map.fromList $ zip txts $ elems $ pcaReduceN ss'' d
(txts,ss') = List.unzip $ Map.toList ss
scored2map :: Ord t => Map (t,t) Int -> Map t (Vec.Vector Double)
scored2map m = Map.fromList $ map (\(Scored t i s) -> (t, Vec.fromList [i,s])) $ scored' m
localMetrics :: Ord t => Map (t,t) Int -> Map t (Vec.Vector Double)
localMetrics m = Map.fromList $ zipWith (\(_,t) (inc,spe) -> (t, Vec.fromList [inc,spe]))
(Map.toList fi)
scores
where
(ti, fi) = createIndices m
(is, ss) = incExcSpeGen $ cooc2mat ti m
scores = DAA.toList
$ DAA.run
$ DAA.zip (DAA.use is) (DAA.use ss)
map2scored :: Ord t => Map t (Vec.Vector Double) -> [Scored t]
map2scored = map (\(t, ds) -> Scored t (Vec.head ds) (Vec.last ds)) . Map.toList
-- TODO Code to be remove below
-- TODO in the textflow we end up needing these indices , it might be
-- better to compute them earlier and pass them around.
scored' :: Ord t => Map (t,t) Int -> [Scored t]
......@@ -87,6 +106,18 @@ scored' m = zipWith (\(_,t) (inc,spe) -> Scored t (inc) (spe)) (Map.toList fi) s
$ DAA.run
$ DAA.zip (DAA.use is) (DAA.use ss)
takeScored :: Ord t => GraphListSize -> InclusionSize -> Map (t,t) Int -> [t]
takeScored listSize incSize = map _scored_terms
. linearTakes listSize incSize _scored_speGen
_scored_incExc
. scored
-- | Filter Scored data
-- >>> linearTakes 2 3 fst snd $ Prelude.zip ([1..10] :: [Int]) (reverse $ [1..10] :: [Int])
-- [(3,8),(6,5)]
......@@ -104,17 +135,3 @@ linearTakes gls incSize speGen incExc = take gls
. sortOn speGen
-- | Filters
{- splitKmeans k scores
TODO: benchmark with accelerate-example kmeans version
splitKmeans x xs = L.concat $ map elements
$ V.take (k-1)
$ kmeans (\i -> VU.fromList ([(_scored_incExc i :: Double)]))
euclidSq x xs
-- Kmeans split into (Clusters::Int) main clusters with Inclusion/Exclusion (relevance score)
-- Sample the main cluster ordered by specificity/genericity in (SampleBins::Double) parts
-- each parts is then ordered by Inclusion/Exclusion
-- take n scored terms in each parts where n * SampleBins = MapListSize.
-}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment