Commit ed19ff44 authored by Alexandre Delanoë's avatar Alexandre Delanoë

[METRICS] Vector of metrics.

parent ed1c33f3
...@@ -79,7 +79,7 @@ groupNodesByNgrams syn occs = Map.fromListWith (<>) occs' ...@@ -79,7 +79,7 @@ groupNodesByNgrams syn occs = Map.fromListWith (<>) occs'
where where
occs' = map toSyn (Map.toList occs) occs' = map toSyn (Map.toList occs)
toSyn (t,ns) = case Map.lookup t syn of toSyn (t,ns) = case Map.lookup t syn of
Nothing -> panic $ "Garg.API.Ngrams.Tools: groupNodesByNgrams, unknown key: " <> t Nothing -> panic $ "[Garg.API.Ngrams.Tools.groupNodesByNgrams] unknown key: " <> t
Just r -> case r of Just r -> case r of
Nothing -> (t, ns) Nothing -> (t, ns)
Just r' -> (r',ns) Just r' -> (r',ns)
......
...@@ -47,7 +47,7 @@ import Gargantext.API.Search ( SearchAPI, searchIn, SearchInQuery) ...@@ -47,7 +47,7 @@ import Gargantext.API.Search ( SearchAPI, searchIn, SearchInQuery)
import Gargantext.Core.Types (Offset, Limit, ListType(..)) import Gargantext.Core.Types (Offset, Limit, ListType(..))
import Gargantext.Core.Types.Main (Tree, NodeTree) import Gargantext.Core.Types.Main (Tree, NodeTree)
import Gargantext.Database.Facet (FacetDoc , runViewDocuments, OrderBy(..),FacetChart,runViewAuthorsDoc) import Gargantext.Database.Facet (FacetDoc , runViewDocuments, OrderBy(..),FacetChart,runViewAuthorsDoc)
import Gargantext.Database.Metrics (getMetrics') import qualified Gargantext.Database.Metrics as Metrics
import Gargantext.Database.Metrics.NgramsByNode (getNodesByNgramsOnlyUser) import Gargantext.Database.Metrics.NgramsByNode (getNodesByNgramsOnlyUser)
import Gargantext.Database.Node.Children (getChildren) import Gargantext.Database.Node.Children (getChildren)
import Gargantext.Database.Schema.Ngrams (NgramsType(..)) import Gargantext.Database.Schema.Ngrams (NgramsType(..))
...@@ -394,12 +394,12 @@ type MetricsAPI = Summary "SepGen IncExc metrics" ...@@ -394,12 +394,12 @@ type MetricsAPI = Summary "SepGen IncExc metrics"
getMetrics :: NodeId -> GargServer MetricsAPI getMetrics :: NodeId -> GargServer MetricsAPI
getMetrics cId maybeListId tabType maybeLimit = do getMetrics cId maybeListId tabType maybeLimit = do
(ngs', scores) <- getMetrics' cId maybeListId tabType maybeLimit (ngs', scores) <- Metrics.getMetrics' cId maybeListId tabType maybeLimit
let let
metrics = map (\(Scored t s1 s2) -> Metric t s1 s2 (listType t ngs')) scores metrics = map (\(Scored t s1 s2) -> Metric t s1 s2 (listType t ngs')) scores
errorMsg = "API.Node.metrics: key absent"
listType t m = maybe (panic errorMsg) fst $ Map.lookup t m listType t m = maybe (panic errorMsg) fst $ Map.lookup t m
errorMsg = "API.Node.metrics: key absent"
pure $ Metrics metrics pure $ Metrics metrics
......
...@@ -11,6 +11,7 @@ Node API ...@@ -11,6 +11,7 @@ Node API
-} -}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleContexts #-}
...@@ -20,31 +21,62 @@ module Gargantext.Database.Metrics ...@@ -20,31 +21,62 @@ module Gargantext.Database.Metrics
import Data.Map (Map) import Data.Map (Map)
import Data.Text (Text) import Data.Text (Text)
import Gargantext.API.Ngrams (TabType(..), ngramsTypeFromTabType) import Gargantext.API.Ngrams (TabType(..), ngramsTypeFromTabType)
import Gargantext.API.Ngrams.Tools import Gargantext.API.Ngrams.Tools (filterListWithRoot, groupNodesByNgrams, Diagonal(..), getCoocByNgrams, mapTermListRoot, RootTerm)
import Gargantext.Core.Types (ListType(..), Limit) import Gargantext.Core.Types (ListType(..), Limit)
import Gargantext.Database.Flow (FlowCmdM) import Gargantext.Database.Flow (FlowCmdM)
import Gargantext.Database.Metrics.NgramsByNode (getNodesByNgramsOnlyUser) import Gargantext.Database.Metrics.NgramsByNode (getNodesByNgramsOnlyUser, getTficfWith)
import Gargantext.Database.Schema.Node (defaultList) import Gargantext.Database.Schema.Node (defaultList)
import Gargantext.Database.Types.Node (ListId, CorpusId) import Gargantext.Database.Types.Node (ListId, CorpusId)
import Gargantext.Database.Flow (getOrMkRootWithCorpus)
import Gargantext.Database.Config (userMaster)
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Text.Metrics (scored, Scored(..)) import Gargantext.Text.Metrics (scored, Scored(..), localMetrics, toScored)
import Servant (ServantErr) import Servant (ServantErr)
import qualified Data.Map as Map import qualified Data.Map as Map
import qualified Data.Vector.Storable as Vec
getMetrics' :: FlowCmdM env ServantErr m getMetrics' :: FlowCmdM env ServantErr m
=> CorpusId -> Maybe ListId -> TabType -> Maybe Limit => CorpusId -> Maybe ListId -> TabType -> Maybe Limit
-> m (Map Text (ListType, Maybe Text), [Scored Text]) -> m (Map Text (ListType, Maybe Text), [Scored Text])
getMetrics' cId maybeListId tabType maybeLimit = do getMetrics' cId maybeListId tabType maybeLimit = do
(ngs, _, myCooc) <- getNgramsCooc cId maybeListId tabType maybeLimit
pure (ngs, scored myCooc)
lId <- case maybeListId of
Nothing -> defaultList cId
Just lId' -> pure lId'
let ngramsType = ngramsTypeFromTabType tabType getMetrics :: FlowCmdM env ServantErr m
=> CorpusId -> Maybe ListId -> TabType -> Maybe Limit
-> m (Map Text (ListType, Maybe Text), [Scored Text])
getMetrics cId maybeListId tabType maybeLimit = do
(ngs, ngs', metrics) <- getLocalMetrics cId maybeListId tabType maybeLimit
(_masterUserId, _masterRootId, masterCorpusId) <- getOrMkRootWithCorpus userMaster ""
metrics' <- getTficfWith cId masterCorpusId (ngramsTypeFromTabType tabType) ngs'
pure (ngs , toScored [metrics, Map.fromList $ map (\(a,b) -> (a, Vec.fromList [fst b])) $ Map.toList metrics'])
ngs' <- mapTermListRoot [lId] ngramsType
let ngs = Map.unions $ map (\t -> filterListWithRoot t ngs') getLocalMetrics :: (FlowCmdM env ServantErr m)
[GraphTerm, StopTerm, CandidateTerm] => CorpusId -> Maybe ListId -> TabType -> Maybe Limit
-> m ( Map Text (ListType, Maybe Text)
, Map Text (Maybe RootTerm)
, Map Text (Vec.Vector Double)
)
getLocalMetrics cId maybeListId tabType maybeLimit = do
(ngs, ngs', myCooc) <- getNgramsCooc cId maybeListId tabType maybeLimit
pure (ngs, ngs', localMetrics myCooc)
getNgramsCooc :: (FlowCmdM env ServantErr m)
=> CorpusId -> Maybe ListId -> TabType -> Maybe Limit
-> m ( Map Text (ListType, Maybe Text)
, Map Text (Maybe RootTerm)
, Map (Text, Text) Int
)
getNgramsCooc cId maybeListId tabType maybeLimit = do
(ngs', ngs) <- getNgrams cId maybeListId tabType
let let
take' Nothing xs = xs take' Nothing xs = xs
...@@ -52,7 +84,22 @@ getMetrics' cId maybeListId tabType maybeLimit = do ...@@ -52,7 +84,22 @@ getMetrics' cId maybeListId tabType maybeLimit = do
myCooc <- Map.filter (>1) <$> getCoocByNgrams (Diagonal True) myCooc <- Map.filter (>1) <$> getCoocByNgrams (Diagonal True)
<$> groupNodesByNgrams ngs <$> groupNodesByNgrams ngs
<$> getNodesByNgramsOnlyUser cId ngramsType (take' maybeLimit $ Map.keys ngs) <$> getNodesByNgramsOnlyUser cId (ngramsTypeFromTabType tabType)
(take' maybeLimit $ Map.keys ngs)
pure $ (ngs', ngs, myCooc)
pure $ (ngs', scored myCooc) getNgrams :: (FlowCmdM env ServantErr m)
=> CorpusId -> Maybe ListId -> TabType
-> m (Map Text (ListType, Maybe Text), Map Text (Maybe RootTerm))
getNgrams cId maybeListId tabType = do
lId <- case maybeListId of
Nothing -> defaultList cId
Just lId' -> pure lId'
lists <- mapTermListRoot [lId] (ngramsTypeFromTabType tabType)
let maybeSyn = Map.unions $ map (\t -> filterListWithRoot t lists)
[GraphTerm, StopTerm, CandidateTerm]
pure (lists, maybeSyn)
...@@ -59,15 +59,31 @@ sortTficf :: (Map Text (Double, Set Text)) ...@@ -59,15 +59,31 @@ sortTficf :: (Map Text (Double, Set Text))
sortTficf = List.sortOn (fst . snd) . toList sortTficf = List.sortOn (fst . snd) . toList
getTficf' :: UserCorpusId -> MasterCorpusId -> (Text -> Text) getTficf' :: UserCorpusId -> MasterCorpusId -> NgramsType -> (Text -> Text)
-> Cmd err (Map Text (Double, Set Text)) -> Cmd err (Map Text (Double, Set Text))
getTficf' u m f = do getTficf' u m nt f = do
u' <- getNodesByNgramsUser u NgramsTerms u' <- getNodesByNgramsUser u nt
m' <- getNodesByNgramsMaster u m m' <- getNodesByNgramsMaster u m
pure $ toTficfData (countNodesByNgramsWith f u') pure $ toTficfData (countNodesByNgramsWith f u')
(countNodesByNgramsWith f m') (countNodesByNgramsWith f m')
--{-
getTficfWith :: UserCorpusId -> MasterCorpusId
-> NgramsType -> Map Text (Maybe Text)
-> Cmd err (Map Text (Double, Set Text))
getTficfWith u m nt mtxt = do
u' <- getNodesByNgramsOnlyUser u nt (Map.keys mtxt)
m' <- getNodesByNgramsMaster u m
let f x = case Map.lookup x mtxt of
Nothing -> x
Just x' -> maybe x identity x'
pure $ toTficfData (countNodesByNgramsWith f u')
(countNodesByNgramsWith f m')
--}
type Context = (Double, Map Text (Double, Set Text)) type Context = (Double, Map Text (Double, Set Text))
type Supra = Context type Supra = Context
...@@ -269,7 +285,3 @@ SELECT m.node_id, m.terms FROM nodesByNgramsMaster m ...@@ -269,7 +285,3 @@ SELECT m.node_id, m.terms FROM nodesByNgramsMaster m
RIGHT JOIN nodesByNgramsUser u ON u.id = m.id RIGHT JOIN nodesByNgramsUser u ON u.id = m.id
|] |]
...@@ -57,7 +57,7 @@ buildNgramsOthersList uCid groupIt nt = do ...@@ -57,7 +57,7 @@ buildNgramsOthersList uCid groupIt nt = do
buildNgramsTermsList :: Lang -> Int -> Int -> UserCorpusId -> MasterCorpusId buildNgramsTermsList :: Lang -> Int -> Int -> UserCorpusId -> MasterCorpusId
-> Cmd err (Map NgramsType [NgramsElement]) -> Cmd err (Map NgramsType [NgramsElement])
buildNgramsTermsList l n m uCid mCid = do buildNgramsTermsList l n m uCid mCid = do
candidates <- sortTficf <$> getTficf' uCid mCid (ngramsGroup l n m) candidates <- sortTficf <$> getTficf' uCid mCid NgramsTerms (ngramsGroup l n m)
--printDebug "candidate" (length candidates) --printDebug "candidate" (length candidates)
let termList = toTermList (isStopTerm . fst) candidates let termList = toTermList (isStopTerm . fst) candidates
......
...@@ -42,27 +42,37 @@ type GraphListSize = Int ...@@ -42,27 +42,37 @@ type GraphListSize = Int
type InclusionSize = Int type InclusionSize = Int
takeScored :: Ord t => GraphListSize -> InclusionSize -> Map (t,t) Int -> [t]
takeScored listSize incSize = map _scored_terms toScored :: Ord t => [Map t (Vec.Vector Double)] -> [Scored t]
. linearTakes listSize incSize _scored_speGen toScored = map2scored
_scored_incExc . (reduceTo (Dimension 2))
. scored . (Map.filter (\v -> Vec.length v > 1))
. (Map.unionsWith (<>))
scored :: Ord t => Map (t,t) Int -> [Scored t] scored :: Ord t => Map (t,t) Int -> [Scored t]
scored = map2scored . (reduceDim 2) . scored2map scored = map2scored . (reduceTo (Dimension 2)) . scored2map
scored2map :: Ord t => Map (t,t) Int -> Map t (Vec.Vector Double)
scored2map m = Map.fromList $ map (\(Scored t i s) -> (t, Vec.fromList [i,s])) $ scored' m
map2scored :: Ord t => Map t (Vec.Vector Double) -> [Scored t]
map2scored = map (\(t, ds) -> Scored t (Vec.head ds) (Vec.last ds)) . Map.toList
-- TODO change type with (x,y)
data Scored ts = Scored data Scored ts = Scored
{ _scored_terms :: !ts { _scored_terms :: !ts
, _scored_incExc :: !InclusionExclusion , _scored_incExc :: !InclusionExclusion
, _scored_speGen :: !SpecificityGenericity , _scored_speGen :: !SpecificityGenericity
} deriving (Show) } deriving (Show)
data Dimension = Dimension Int
reduceDim :: Ord t => Int -> Map t (Vec.Vector Double) reduceTo :: Ord t
-> Map t (Vec.Vector Double) => Dimension
reduceDim d ss = Map.fromList $ zip txts $ elems $ pcaReduceN ss'' d -> Map t (Vec.Vector Double)
-> Map t (Vec.Vector Double)
reduceTo (Dimension d) ss = Map.fromList $ zip txts $ elems $ pcaReduceN ss'' d
where where
ss'' :: Array Int (Vec.Vector Double) ss'' :: Array Int (Vec.Vector Double)
ss'' = listArray (1, List.length ss') ss' ss'' = listArray (1, List.length ss') ss'
...@@ -70,12 +80,21 @@ reduceDim d ss = Map.fromList $ zip txts $ elems $ pcaReduceN ss'' d ...@@ -70,12 +80,21 @@ reduceDim d ss = Map.fromList $ zip txts $ elems $ pcaReduceN ss'' d
(txts,ss') = List.unzip $ Map.toList ss (txts,ss') = List.unzip $ Map.toList ss
scored2map :: Ord t => Map (t,t) Int -> Map t (Vec.Vector Double) localMetrics :: Ord t => Map (t,t) Int -> Map t (Vec.Vector Double)
scored2map m = Map.fromList $ map (\(Scored t i s) -> (t, Vec.fromList [i,s])) $ scored' m localMetrics m = Map.fromList $ zipWith (\(_,t) (inc,spe) -> (t, Vec.fromList [inc,spe]))
(Map.toList fi)
scores
where
(ti, fi) = createIndices m
(is, ss) = incExcSpeGen $ cooc2mat ti m
scores = DAA.toList
$ DAA.run
$ DAA.zip (DAA.use is) (DAA.use ss)
map2scored :: Ord t => Map t (Vec.Vector Double) -> [Scored t]
map2scored = map (\(t, ds) -> Scored t (Vec.head ds) (Vec.last ds)) . Map.toList
-- TODO Code to be remove below
-- TODO in the textflow we end up needing these indices , it might be -- TODO in the textflow we end up needing these indices , it might be
-- better to compute them earlier and pass them around. -- better to compute them earlier and pass them around.
scored' :: Ord t => Map (t,t) Int -> [Scored t] scored' :: Ord t => Map (t,t) Int -> [Scored t]
...@@ -87,6 +106,18 @@ scored' m = zipWith (\(_,t) (inc,spe) -> Scored t (inc) (spe)) (Map.toList fi) s ...@@ -87,6 +106,18 @@ scored' m = zipWith (\(_,t) (inc,spe) -> Scored t (inc) (spe)) (Map.toList fi) s
$ DAA.run $ DAA.run
$ DAA.zip (DAA.use is) (DAA.use ss) $ DAA.zip (DAA.use is) (DAA.use ss)
takeScored :: Ord t => GraphListSize -> InclusionSize -> Map (t,t) Int -> [t]
takeScored listSize incSize = map _scored_terms
. linearTakes listSize incSize _scored_speGen
_scored_incExc
. scored
-- | Filter Scored data -- | Filter Scored data
-- >>> linearTakes 2 3 fst snd $ Prelude.zip ([1..10] :: [Int]) (reverse $ [1..10] :: [Int]) -- >>> linearTakes 2 3 fst snd $ Prelude.zip ([1..10] :: [Int]) (reverse $ [1..10] :: [Int])
-- [(3,8),(6,5)] -- [(3,8),(6,5)]
...@@ -104,17 +135,3 @@ linearTakes gls incSize speGen incExc = take gls ...@@ -104,17 +135,3 @@ linearTakes gls incSize speGen incExc = take gls
. sortOn speGen . sortOn speGen
-- | Filters
{- splitKmeans k scores
TODO: benchmark with accelerate-example kmeans version
splitKmeans x xs = L.concat $ map elements
$ V.take (k-1)
$ kmeans (\i -> VU.fromList ([(_scored_incExc i :: Double)]))
euclidSq x xs
-- Kmeans split into (Clusters::Int) main clusters with Inclusion/Exclusion (relevance score)
-- Sample the main cluster ordered by specificity/genericity in (SampleBins::Double) parts
-- each parts is then ordered by Inclusion/Exclusion
-- take n scored terms in each parts where n * SampleBins = MapListSize.
-}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment