Commit 9813fc11 authored by Alexandre Delanoë's avatar Alexandre Delanoë

Merge branch 'dev-merge' into dev

parents 69c6ee20 13d7bad7
...@@ -150,6 +150,7 @@ library: ...@@ -150,6 +150,7 @@ library:
- full-text-search - full-text-search
- fullstop - fullstop
- graphviz - graphviz
- hashable
- haskell-igraph - haskell-igraph
- hlcm - hlcm
- hsparql - hsparql
......
...@@ -19,7 +19,8 @@ module Gargantext.API.Metrics ...@@ -19,7 +19,8 @@ module Gargantext.API.Metrics
where where
import Control.Lens import Control.Lens
import qualified Data.Map as Map import Data.HashMap.Strict (HashMap)
import qualified Data.HashMap.Strict as HM
import Data.Text (Text) import Data.Text (Text)
import Data.Time (UTCTime) import Data.Time (UTCTime)
import Servant import Servant
...@@ -78,7 +79,7 @@ getScatter cId maybeListId tabType _maybeLimit = do ...@@ -78,7 +79,7 @@ getScatter cId maybeListId tabType _maybeLimit = do
Nothing -> defaultList cId Nothing -> defaultList cId
node <- getNodeWith listId (Proxy :: Proxy HyperdataList) node <- getNodeWith listId (Proxy :: Proxy HyperdataList)
let HyperdataList { _hl_scatter = scatterMap } = node ^. node_hyperdata let HyperdataList { _hl_scatter = scatterMap } = node ^. node_hyperdata
mChart = Map.lookup tabType scatterMap mChart = HM.lookup tabType scatterMap
chart <- case mChart of chart <- case mChart of
Just chart -> pure chart Just chart -> pure chart
...@@ -111,9 +112,9 @@ updateScatter' cId maybeListId tabType maybeLimit = do ...@@ -111,9 +112,9 @@ updateScatter' cId maybeListId tabType maybeLimit = do
(ngs', scores) <- Metrics.getMetrics cId maybeListId tabType maybeLimit (ngs', scores) <- Metrics.getMetrics cId maybeListId tabType maybeLimit
let let
metrics = map (\(Scored t s1 s2) -> Metric t s1 s2 (listType t ngs')) metrics = fmap (\(Scored t s1 s2) -> Metric t s1 s2 (listType t ngs'))
$ map normalizeLocal scores $ fmap normalizeLocal scores
listType t m = maybe (panic errorMsg) fst $ Map.lookup t m listType t m = maybe (panic errorMsg) fst $ HM.lookup t m
errorMsg = "API.Node.metrics: key absent" errorMsg = "API.Node.metrics: key absent"
listId <- case maybeListId of listId <- case maybeListId of
...@@ -122,7 +123,7 @@ updateScatter' cId maybeListId tabType maybeLimit = do ...@@ -122,7 +123,7 @@ updateScatter' cId maybeListId tabType maybeLimit = do
node <- getNodeWith listId (Proxy :: Proxy HyperdataList) node <- getNodeWith listId (Proxy :: Proxy HyperdataList)
let hl = node ^. node_hyperdata let hl = node ^. node_hyperdata
scatterMap = hl ^. hl_scatter scatterMap = hl ^. hl_scatter
_ <- updateHyperdata listId $ hl { _hl_scatter = Map.insert tabType (Metrics metrics) scatterMap } _ <- updateHyperdata listId $ hl { _hl_scatter = HM.insert tabType (Metrics metrics) scatterMap }
pure $ Metrics metrics pure $ Metrics metrics
...@@ -172,7 +173,7 @@ getChart cId _start _end maybeListId tabType = do ...@@ -172,7 +173,7 @@ getChart cId _start _end maybeListId tabType = do
Nothing -> defaultList cId Nothing -> defaultList cId
node <- getNodeWith listId (Proxy :: Proxy HyperdataList) node <- getNodeWith listId (Proxy :: Proxy HyperdataList)
let chartMap = node ^. node_hyperdata ^. hl_chart let chartMap = node ^. node_hyperdata ^. hl_chart
mChart = Map.lookup tabType chartMap mChart = HM.lookup tabType chartMap
chart <- case mChart of chart <- case mChart of
Just chart -> pure chart Just chart -> pure chart
...@@ -209,7 +210,7 @@ updateChart' cId maybeListId tabType _maybeLimit = do ...@@ -209,7 +210,7 @@ updateChart' cId maybeListId tabType _maybeLimit = do
let hl = node ^. node_hyperdata let hl = node ^. node_hyperdata
chartMap = hl ^. hl_chart chartMap = hl ^. hl_chart
h <- histoData cId h <- histoData cId
_ <- updateHyperdata listId $ hl { _hl_chart = Map.insert tabType (ChartMetrics h) chartMap } _ <- updateHyperdata listId $ hl { _hl_chart = HM.insert tabType (ChartMetrics h) chartMap }
pure $ ChartMetrics h pure $ ChartMetrics h
...@@ -258,7 +259,7 @@ getPie cId _start _end maybeListId tabType = do ...@@ -258,7 +259,7 @@ getPie cId _start _end maybeListId tabType = do
Nothing -> defaultList cId Nothing -> defaultList cId
node <- getNodeWith listId (Proxy :: Proxy HyperdataList) node <- getNodeWith listId (Proxy :: Proxy HyperdataList)
let pieMap = node ^. node_hyperdata ^. hl_pie let pieMap = node ^. node_hyperdata ^. hl_pie
mChart = Map.lookup tabType pieMap mChart = HM.lookup tabType pieMap
chart <- case mChart of chart <- case mChart of
Just chart -> pure chart Just chart -> pure chart
...@@ -296,7 +297,7 @@ updatePie' cId maybeListId tabType _maybeLimit = do ...@@ -296,7 +297,7 @@ updatePie' cId maybeListId tabType _maybeLimit = do
pieMap = hl ^. hl_pie pieMap = hl ^. hl_pie
p <- chartData cId (ngramsTypeFromTabType tabType) MapTerm p <- chartData cId (ngramsTypeFromTabType tabType) MapTerm
_ <- updateHyperdata listId $ hl { _hl_pie = Map.insert tabType (ChartMetrics p) pieMap } _ <- updateHyperdata listId $ hl { _hl_pie = HM.insert tabType (ChartMetrics p) pieMap }
pure $ ChartMetrics p pure $ ChartMetrics p
...@@ -349,7 +350,7 @@ getTree cId _start _end maybeListId tabType listType = do ...@@ -349,7 +350,7 @@ getTree cId _start _end maybeListId tabType listType = do
node <- getNodeWith listId (Proxy :: Proxy HyperdataList) node <- getNodeWith listId (Proxy :: Proxy HyperdataList)
let treeMap = node ^. node_hyperdata ^. hl_tree let treeMap = node ^. node_hyperdata ^. hl_tree
mChart = Map.lookup tabType treeMap mChart = HM.lookup tabType treeMap
chart <- case mChart of chart <- case mChart of
Just chart -> pure chart Just chart -> pure chart
...@@ -387,7 +388,7 @@ updateTree' cId maybeListId tabType listType = do ...@@ -387,7 +388,7 @@ updateTree' cId maybeListId tabType listType = do
let hl = node ^. node_hyperdata let hl = node ^. node_hyperdata
treeMap = hl ^. hl_tree treeMap = hl ^. hl_tree
t <- treeData cId (ngramsTypeFromTabType tabType) listType t <- treeData cId (ngramsTypeFromTabType tabType) listType
_ <- updateHyperdata listId $ hl { _hl_tree = Map.insert tabType (ChartMetrics t) treeMap } _ <- updateHyperdata listId $ hl { _hl_tree = HM.insert tabType (ChartMetrics t) treeMap }
pure $ ChartMetrics t pure $ ChartMetrics t
......
...@@ -9,13 +9,18 @@ Portability : POSIX ...@@ -9,13 +9,18 @@ Portability : POSIX
-} -}
{-# LANGUAGE TypeFamilies #-}
module Gargantext.API.Ngrams.Tools module Gargantext.API.Ngrams.Tools
where where
import Control.Concurrent import Control.Concurrent
import Control.Lens (_Just, (^.), at, view) import Control.Lens (_Just, (^.), at, view, At, Index, IxValue)
import Control.Monad.Reader import Control.Monad.Reader
import Data.Hashable (Hashable)
import Data.HashMap.Strict (HashMap)
import qualified Data.HashMap.Strict as HM
import Gargantext.Data.HashMap.Strict.Utils as HM
import Data.Map.Strict (Map) import Data.Map.Strict (Map)
import qualified Data.Map.Strict as Map import qualified Data.Map.Strict as Map
import qualified Data.Set as Set import qualified Data.Set as Set
...@@ -31,7 +36,7 @@ import Gargantext.Prelude ...@@ -31,7 +36,7 @@ import Gargantext.Prelude
mergeNgramsElement :: NgramsRepoElement -> NgramsRepoElement -> NgramsRepoElement mergeNgramsElement :: NgramsRepoElement -> NgramsRepoElement -> NgramsRepoElement
mergeNgramsElement _neOld neNew = neNew mergeNgramsElement _neOld neNew = neNew
type RootTerm = Text type RootTerm = NgramsTerm
getRepo :: RepoCmdM env err m => m NgramsRepo getRepo :: RepoCmdM env err m => m NgramsRepo
getRepo = do getRepo = do
...@@ -39,8 +44,8 @@ getRepo = do ...@@ -39,8 +44,8 @@ getRepo = do
liftBase $ readMVar v liftBase $ readMVar v
listNgramsFromRepo :: [ListId] -> NgramsType listNgramsFromRepo :: [ListId] -> NgramsType
-> NgramsRepo -> Map Text NgramsRepoElement -> NgramsRepo -> Map NgramsTerm NgramsRepoElement
listNgramsFromRepo nodeIds ngramsType repo = Map.mapKeys unNgramsTerm ngrams listNgramsFromRepo nodeIds ngramsType repo = ngrams
where where
ngramsMap = repo ^. r_state . at ngramsType . _Just ngramsMap = repo ^. r_state . at ngramsType . _Just
...@@ -55,73 +60,88 @@ listNgramsFromRepo nodeIds ngramsType repo = Map.mapKeys unNgramsTerm ngrams ...@@ -55,73 +60,88 @@ listNgramsFromRepo nodeIds ngramsType repo = Map.mapKeys unNgramsTerm ngrams
-- be properly guarded. -- be properly guarded.
getListNgrams :: RepoCmdM env err m getListNgrams :: RepoCmdM env err m
=> [ListId] -> NgramsType => [ListId] -> NgramsType
-> m (Map Text NgramsRepoElement) -> m (Map NgramsTerm NgramsRepoElement)
getListNgrams nodeIds ngramsType = listNgramsFromRepo nodeIds ngramsType <$> getRepo getListNgrams nodeIds ngramsType = listNgramsFromRepo nodeIds ngramsType <$> getRepo
getTermsWith :: (RepoCmdM env err m, Ord a) getTermsWith :: (RepoCmdM env err m, Eq a, Hashable a)
=> (Text -> a ) -> [ListId] => (NgramsTerm -> a) -> [ListId]
-> NgramsType -> ListType -> NgramsType -> ListType
-> m (Map a [a]) -> m (HashMap a [a])
getTermsWith f ls ngt lt = Map.fromListWith (<>) getTermsWith f ls ngt lt = HM.fromListWith (<>)
<$> map (toTreeWith f) <$> map toTreeWith
<$> Map.toList <$> Map.toList
<$> Map.filter (\f' -> (fst f') == lt) <$> Map.filter (\f' -> fst f' == lt)
<$> mapTermListRoot ls ngt <$> mapTermListRoot ls ngt
<$> getRepo <$> getRepo
where where
toTreeWith f'' (t, (_lt, maybeRoot)) = case maybeRoot of toTreeWith (t, (_lt, maybeRoot)) = case maybeRoot of
Nothing -> (f'' t, []) Nothing -> (f t, [])
Just r -> (f'' r, map f'' [t]) Just r -> (f r, [f t])
mapTermListRoot :: [ListId] mapTermListRoot :: [ListId]
-> NgramsType -> NgramsType
-> NgramsRepo -> NgramsRepo
-> Map Text (ListType, (Maybe Text)) -> Map NgramsTerm (ListType, Maybe NgramsTerm)
mapTermListRoot nodeIds ngramsType repo = mapTermListRoot nodeIds ngramsType repo =
Map.fromList [ (t, (_nre_list nre, unNgramsTerm <$> _nre_root nre)) (\nre -> (_nre_list nre, _nre_root nre)) <$>
| (t, nre) <- Map.toList ngrams listNgramsFromRepo nodeIds ngramsType repo
]
where ngrams = listNgramsFromRepo nodeIds ngramsType repo filterListWithRootHashMap :: ListType
-> HashMap NgramsTerm (ListType, Maybe NgramsTerm)
-> HashMap NgramsTerm (Maybe RootTerm)
filterListWithRootHashMap lt m = snd <$> HM.filter isMapTerm m
where
isMapTerm (l, maybeRoot) = case maybeRoot of
Nothing -> l == lt
Just r -> case HM.lookup r m of
Nothing -> panic $ "Garg.API.Ngrams.Tools: filterWithRoot, unknown key: " <> unNgramsTerm r
Just (l',_) -> l' == lt
filterListWithRoot :: ListType filterListWithRoot :: ListType
-> Map Text (ListType, Maybe Text) -> Map NgramsTerm (ListType, Maybe NgramsTerm)
-> Map Text (Maybe RootTerm) -> Map NgramsTerm (Maybe RootTerm)
filterListWithRoot lt m = Map.fromList filterListWithRoot lt m = snd <$> Map.filter isMapTerm m
$ map (\(t,(_,r)) -> (t,r))
$ filter isMapTerm (Map.toList m)
where where
isMapTerm (_t,(l, maybeRoot)) = case maybeRoot of isMapTerm (l, maybeRoot) = case maybeRoot of
Nothing -> l == lt Nothing -> l == lt
Just r -> case Map.lookup r m of Just r -> case Map.lookup r m of
Nothing -> panic $ "Garg.API.Ngrams.Tools: filterWithRoot, unknown key: " <> r Nothing -> panic $ "Garg.API.Ngrams.Tools: filterWithRoot, unknown key: " <> unNgramsTerm r
Just (l',_) -> l' == lt Just (l',_) -> l' == lt
groupNodesByNgrams :: Map Text (Maybe RootTerm) groupNodesByNgrams :: ( At root_map
-> Map Text (Set NodeId) , Index root_map ~ NgramsTerm
-> Map Text (Set NodeId) , IxValue root_map ~ Maybe RootTerm
groupNodesByNgrams syn occs = Map.fromListWith (<>) occs' )
=> root_map
-> HashMap NgramsTerm (Set NodeId)
-> HashMap NgramsTerm (Set NodeId)
groupNodesByNgrams syn occs = HM.fromListWith (<>) occs'
where where
occs' = map toSyn (Map.toList occs) occs' = map toSyn (HM.toList occs)
toSyn (t,ns) = case Map.lookup t syn of toSyn (t,ns) = case syn ^. at t of
Nothing -> panic $ "[Garg.API.Ngrams.Tools.groupNodesByNgrams] unknown key: " <> t Nothing -> panic $ "[Garg.API.Ngrams.Tools.groupNodesByNgrams] unknown key: " <> unNgramsTerm t
Just r -> case r of Just r -> case r of
Nothing -> (t, ns) Nothing -> (t, ns)
Just r' -> (r',ns) Just r' -> (r',ns)
data Diagonal = Diagonal Bool data Diagonal = Diagonal Bool
getCoocByNgrams :: Diagonal -> Map Text (Set NodeId) -> Map (Text, Text) Int getCoocByNgrams :: Diagonal -> HashMap Text (Set NodeId) -> HashMap (Text, Text) Int
getCoocByNgrams = getCoocByNgrams' identity getCoocByNgrams = getCoocByNgrams' identity
getCoocByNgrams' :: (Ord a, Ord c) => (b -> Set c) -> Diagonal -> Map a b -> Map (a, a) Int getCoocByNgrams' :: (Hashable a, Ord a, Ord c) => (b -> Set c) -> Diagonal -> HashMap a b -> HashMap (a, a) Int
getCoocByNgrams' f (Diagonal diag) m = getCoocByNgrams' f (Diagonal diag) m =
Map.fromList [( (t1,t2) HM.fromList [( (t1,t2)
, maybe 0 Set.size $ Set.intersection , maybe 0 Set.size $ Set.intersection
<$> (fmap f $ Map.lookup t1 m) <$> (fmap f $ HM.lookup t1 m)
<*> (fmap f $ Map.lookup t2 m) <*> (fmap f $ HM.lookup t2 m)
) | (t1,t2) <- case diag of )
True -> [ (x,y) | x <- Map.keys m, y <- Map.keys m, x <= y] | (t1,t2) <- if diag then
False -> listToCombi identity (Map.keys m) [ (x,y) | x <- ks, y <- ks, x <= y] -- TODO if we keep a Data.Map here it might be
-- more efficient to enumerate all the y <= x.
else
listToCombi identity ks
] ]
where ks = HM.keys m
\ No newline at end of file
...@@ -19,6 +19,7 @@ import Data.Aeson hiding ((.=)) ...@@ -19,6 +19,7 @@ import Data.Aeson hiding ((.=))
import Data.Aeson.TH (deriveJSON) import Data.Aeson.TH (deriveJSON)
import Data.Either (Either(..)) import Data.Either (Either(..))
import Data.Foldable import Data.Foldable
import Data.Hashable (Hashable)
import qualified Data.HashMap.Strict.InsOrd as InsOrdHashMap import qualified Data.HashMap.Strict.InsOrd as InsOrdHashMap
import qualified Data.List as List import qualified Data.List as List
import Data.Map.Strict (Map) import Data.Map.Strict (Map)
...@@ -60,6 +61,9 @@ data TabType = Docs | Trash | MoreFav | MoreTrash ...@@ -60,6 +61,9 @@ data TabType = Docs | Trash | MoreFav | MoreTrash
| Contacts | Contacts
deriving (Bounded, Enum, Eq, Generic, Ord, Show) deriving (Bounded, Enum, Eq, Generic, Ord, Show)
instance Hashable TabType
instance FromHttpApiData TabType instance FromHttpApiData TabType
where where
parseUrlPiece "Docs" = pure Docs parseUrlPiece "Docs" = pure Docs
...@@ -120,7 +124,7 @@ instance (ToJSONKey a, ToSchema a) => ToSchema (MSet a) where ...@@ -120,7 +124,7 @@ instance (ToJSONKey a, ToSchema a) => ToSchema (MSet a) where
------------------------------------------------------------------------ ------------------------------------------------------------------------
newtype NgramsTerm = NgramsTerm { unNgramsTerm :: Text } newtype NgramsTerm = NgramsTerm { unNgramsTerm :: Text }
deriving (Ord, Eq, Show, Generic, ToJSONKey, ToJSON, FromJSON, Semigroup, Arbitrary, Serialise, ToSchema) deriving (Ord, Eq, Show, Generic, ToJSONKey, ToJSON, FromJSON, Semigroup, Arbitrary, Serialise, ToSchema, Hashable)
instance FromJSONKey NgramsTerm where instance FromJSONKey NgramsTerm where
fromJSONKey = FromJSONKeyTextParser $ \t -> pure $ NgramsTerm $ strip t fromJSONKey = FromJSONKeyTextParser $ \t -> pure $ NgramsTerm $ strip t
......
...@@ -17,6 +17,7 @@ module Gargantext.API.Node.Corpus.Export ...@@ -17,6 +17,7 @@ module Gargantext.API.Node.Corpus.Export
where where
import Data.HashMap.Strict (HashMap)
import Data.Map (Map) import Data.Map (Map)
import Data.Maybe (fromMaybe) import Data.Maybe (fromMaybe)
import Data.Set (Set) import Data.Set (Set)
...@@ -76,7 +77,7 @@ getNodeNgrams :: HasNodeError err ...@@ -76,7 +77,7 @@ getNodeNgrams :: HasNodeError err
-> Maybe ListId -> Maybe ListId
-> NgramsType -> NgramsType
-> NgramsRepo -> NgramsRepo
-> Cmd err (Map NodeId (Set Text)) -> Cmd err (HashMap NodeId (Set Text))
getNodeNgrams cId lId' nt repo = do getNodeNgrams cId lId' nt repo = do
lId <- case lId' of lId <- case lId' of
Nothing -> defaultList cId Nothing -> defaultList cId
......
...@@ -188,19 +188,19 @@ buildNgramsTermsList user uCid mCid groupParams (nt, _mapListSize)= do ...@@ -188,19 +188,19 @@ buildNgramsTermsList user uCid mCid groupParams (nt, _mapListSize)= do
selectedTerms selectedTerms
let let
groupedTreeScores_SetNodeId :: Map Text (GroupedTreeScores (Set NodeId)) groupedTreeScores_SetNodeId :: HashMap Text (GroupedTreeScores (Set NodeId))
groupedTreeScores_SetNodeId = setScoresWithMap mapTextDocIds (groupedMonoHead <> groupedMultHead) groupedTreeScores_SetNodeId = setScoresWithMap mapTextDocIds (groupedMonoHead <> groupedMultHead)
-- | Coocurrences computation -- | Coocurrences computation
--, t1 >= t2 -- permute byAxis diag -- since matrix symmetric --, t1 >= t2 -- permute byAxis diag -- since matrix symmetric
let mapCooc = Map.filter (>2) let mapCooc = HM.filter (>2)
$ Map.fromList [ ((t1, t2), Set.size $ Set.intersection s1 s2) $ HM.fromList [ ((t1, t2), Set.size $ Set.intersection s1 s2)
| (t1, s1) <- mapStemNodeIds | (t1, s1) <- mapStemNodeIds
, (t2, s2) <- mapStemNodeIds , (t2, s2) <- mapStemNodeIds
] ]
where where
mapStemNodeIds = Map.toList mapStemNodeIds = HM.toList
$ Map.map viewScores $ HM.map viewScores
$ groupedTreeScores_SetNodeId $ groupedTreeScores_SetNodeId
let let
-- computing scores -- computing scores
......
...@@ -30,19 +30,20 @@ import qualified Data.Array.Accelerate as DAA ...@@ -30,19 +30,20 @@ import qualified Data.Array.Accelerate as DAA
import qualified Data.Array.Accelerate.Interpreter as DAA import qualified Data.Array.Accelerate.Interpreter as DAA
import qualified Data.Map as Map import qualified Data.Map as Map
import qualified Data.Vector as V
import qualified Data.Vector.Storable as Vec import qualified Data.Vector.Storable as Vec
type MapListSize = Int type MapListSize = Int
type InclusionSize = Int type InclusionSize = Int
scored :: Ord t => Map (t,t) Int -> [Scored t] scored :: Ord t => Map (t,t) Int -> V.Vector (Scored t)
scored = map2scored . (pcaReduceTo (Dimension 2)) . scored2map scored = map2scored . (pcaReduceTo (Dimension 2)) . scored2map
where where
scored2map :: Ord t => Map (t,t) Int -> Map t (Vec.Vector Double) scored2map :: Ord t => Map (t,t) Int -> Map t (Vec.Vector Double)
scored2map m = Map.fromList $ map (\(Scored t i s) -> (t, Vec.fromList [i,s])) $ scored' m scored2map m = Map.fromList $ map (\(Scored t i s) -> (t, Vec.fromList [i,s])) $ scored' m
map2scored :: Ord t => Map t (Vec.Vector Double) -> [Scored t] map2scored :: Ord t => Map t (Vec.Vector Double) -> V.Vector (Scored t)
map2scored = map (\(t, ds) -> Scored t (Vec.head ds) (Vec.last ds)) . Map.toList map2scored = V.map (\(t, ds) -> Scored t (Vec.head ds) (Vec.last ds)) . V.fromList . Map.toList
-- TODO change type with (x,y) -- TODO change type with (x,y)
data Scored ts = Scored data Scored ts = Scored
......
...@@ -14,11 +14,12 @@ Portability : POSIX ...@@ -14,11 +14,12 @@ Portability : POSIX
module Gargantext.Core.Viz.Chart module Gargantext.Core.Viz.Chart
where where
import Data.List (unzip, sortOn) import Data.List (sortOn)
import Data.Map (toList) import Data.Map (toList)
import qualified Data.List as List import qualified Data.List as List
import qualified Data.Map as Map import qualified Data.Map as Map
import Data.Maybe (catMaybes) import Data.Maybe (catMaybes)
import qualified Data.Vector as V
import Gargantext.Core.Types.Main import Gargantext.Core.Types.Main
import Gargantext.Database.Admin.Config import Gargantext.Database.Admin.Config
...@@ -42,8 +43,9 @@ import Gargantext.Core.Viz.Types ...@@ -42,8 +43,9 @@ import Gargantext.Core.Viz.Types
histoData :: CorpusId -> Cmd err Histo histoData :: CorpusId -> Cmd err Histo
histoData cId = do histoData cId = do
dates <- selectDocsDates cId dates <- selectDocsDates cId
let (ls, css) = unzip let (ls, css) = V.unzip
$ sortOn fst $ V.fromList
$ sortOn fst -- TODO Vector.sortOn
$ toList $ toList
$ occurrencesWith identity dates $ occurrencesWith identity dates
pure (Histo ls css) pure (Histo ls css)
...@@ -65,8 +67,8 @@ chartData cId nt lt = do ...@@ -65,8 +67,8 @@ chartData cId nt lt = do
(_total,mapTerms) <- countNodesByNgramsWith (group dico) (_total,mapTerms) <- countNodesByNgramsWith (group dico)
<$> getNodesByNgramsOnlyUser cId (ls' <> ls) nt terms <$> getNodesByNgramsOnlyUser cId (ls' <> ls) nt terms
let (dates, count) = unzip $ map (\(t,(d,_)) -> (t, d)) $ Map.toList mapTerms let (dates, count) = V.unzip $ fmap (\(t,(d,_)) -> (t, d)) $ V.fromList $ Map.toList mapTerms
pure (Histo dates (map round count)) pure (Histo (dates) (round <$> count))
treeData :: FlowCmdM env err m treeData :: FlowCmdM env err m
......
...@@ -150,7 +150,7 @@ computeGraph cId d nt repo = do ...@@ -150,7 +150,7 @@ computeGraph cId d nt repo = do
let ngs = filterListWithRoot MapTerm $ mapTermListRoot [lId] nt repo let ngs = filterListWithRoot MapTerm $ mapTermListRoot [lId] nt repo
-- TODO split diagonal -- TODO split diagonal
myCooc <- Map.filter (>1) myCooc <- HM.filter (>1)
<$> getCoocByNgrams (Diagonal True) <$> getCoocByNgrams (Diagonal True)
<$> groupNodesByNgrams ngs <$> groupNodesByNgrams ngs
<$> getNodesByNgramsOnlyUser cId (lIds <> [lId]) nt (Map.keys ngs) <$> getNodesByNgramsOnlyUser cId (lIds <> [lId]) nt (Map.keys ngs)
......
...@@ -9,6 +9,8 @@ module Gargantext.Core.Viz.Types where ...@@ -9,6 +9,8 @@ module Gargantext.Core.Viz.Types where
import Data.Aeson.TH (deriveJSON) import Data.Aeson.TH (deriveJSON)
import Data.Swagger import Data.Swagger
import Data.Vector (Vector)
import qualified Data.Vector as V
import Protolude import Protolude
import Test.QuickCheck (elements) import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary) import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
...@@ -23,8 +25,8 @@ data Chart = ChartHisto | ChartScatter | ChartPie ...@@ -23,8 +25,8 @@ data Chart = ChartHisto | ChartScatter | ChartPie
deriving (Generic) deriving (Generic)
-- TODO use UTCTime -- TODO use UTCTime
data Histo = Histo { histo_dates :: ![Text] data Histo = Histo { histo_dates :: !(Vector Text)
, histo_count :: ![Int] , histo_count :: !(Vector Int)
} }
deriving (Show, Generic) deriving (Show, Generic)
...@@ -32,7 +34,7 @@ instance ToSchema Histo where ...@@ -32,7 +34,7 @@ instance ToSchema Histo where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "histo_") declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "histo_")
instance Arbitrary Histo instance Arbitrary Histo
where where
arbitrary = elements [ Histo ["2012"] [1] arbitrary = elements [ Histo (V.singleton "2012") (V.singleton 1)
, Histo ["2013"] [1] , Histo (V.singleton "2013") (V.singleton 1)
] ]
deriveJSON (unPrefix "histo_") ''Histo deriveJSON (unPrefix "histo_") ''Histo
...@@ -17,13 +17,14 @@ module Gargantext.Database.Action.Flow.Pairing ...@@ -17,13 +17,14 @@ module Gargantext.Database.Action.Flow.Pairing
where where
import Control.Lens (_Just, (^.)) import Control.Lens (_Just, (^.))
import Data.Map (Map, fromList, fromListWith) import Data.HashMap.Strict (HashMap)
import qualified Data.HashMap.Strict as HM
import Data.Maybe (catMaybes, fromMaybe) import Data.Maybe (catMaybes, fromMaybe)
import Data.Set (Set) import Data.Set (Set)
import Data.Text (Text)
import Gargantext.API.Ngrams.Tools import Gargantext.API.Ngrams.Tools
import Gargantext.API.Ngrams.Types (NgramsTerm(..))
import Gargantext.API.Prelude (GargNoServer) import Gargantext.API.Prelude (GargNoServer)
import Gargantext.Core.Types (TableResult(..), Term) import Gargantext.Core.Types (TableResult(..))
import Gargantext.Core.Types.Main import Gargantext.Core.Types.Main
import Gargantext.Database import Gargantext.Database
import Gargantext.Database.Action.Metrics.NgramsByNode (getNodesByNgramsOnlyUser) import Gargantext.Database.Action.Metrics.NgramsByNode (getNodesByNgramsOnlyUser)
...@@ -79,7 +80,7 @@ dataPairing :: AnnuaireId ...@@ -79,7 +80,7 @@ dataPairing :: AnnuaireId
-> (CorpusId, ListId, NgramsType) -> (CorpusId, ListId, NgramsType)
-> (ContactName -> Projected) -> (ContactName -> Projected)
-> (DocAuthor -> Projected) -> (DocAuthor -> Projected)
-> GargNoServer (Map ContactId (Set DocId)) -> GargNoServer (HashMap ContactId (Set DocId))
dataPairing aId (cId, lId, ngt) fc fa = do dataPairing aId (cId, lId, ngt) fc fa = do
mc <- getNgramsContactId aId mc <- getNgramsContactId aId
md <- getNgramsDocId cId lId ngt md <- getNgramsDocId cId lId ngt
...@@ -87,14 +88,14 @@ dataPairing aId (cId, lId, ngt) fc fa = do ...@@ -87,14 +88,14 @@ dataPairing aId (cId, lId, ngt) fc fa = do
printDebug "ngramsContactId" mc printDebug "ngramsContactId" mc
printDebug "ngramsDocId" md printDebug "ngramsDocId" md
let let
from = projectionFrom (Set.fromList $ Map.keys mc) fc from = projectionFrom (Set.fromList $ HM.keys mc) fc
to = projectionTo (Set.fromList $ Map.keys md) fa to = projectionTo (Set.fromList $ HM.keys md) fa
pure $ fusion mc $ align from to md pure $ fusion mc $ align from to md
prepareInsert :: Map ContactId (Set DocId) -> [NodeNode] prepareInsert :: HashMap ContactId (Set DocId) -> [NodeNode]
prepareInsert m = map (\(n1,n2) -> NodeNode n1 n2 Nothing Nothing) prepareInsert m = map (\(n1,n2) -> NodeNode n1 n2 Nothing Nothing)
$ List.concat $ List.concat
$ map (\(contactId, setDocIds) $ map (\(contactId, setDocIds)
...@@ -102,21 +103,21 @@ prepareInsert m = map (\(n1,n2) -> NodeNode n1 n2 Nothing Nothing) ...@@ -102,21 +103,21 @@ prepareInsert m = map (\(n1,n2) -> NodeNode n1 n2 Nothing Nothing)
-> (contactId, setDocId) -> (contactId, setDocId)
) $ Set.toList setDocIds ) $ Set.toList setDocIds
) )
$ Map.toList m $ HM.toList m
------------------------------------------------------------------------ ------------------------------------------------------------------------
type ContactName = Text type ContactName = NgramsTerm
type DocAuthor = Text type DocAuthor = NgramsTerm
type Projected = Text type Projected = NgramsTerm
projectionFrom :: Set ContactName -> (ContactName -> Projected) -> Map ContactName Projected projectionFrom :: Set ContactName -> (ContactName -> Projected) -> HashMap ContactName Projected
projectionFrom ss f = fromList $ map (\s -> (s, f s)) (Set.toList ss) projectionFrom ss f = HM.fromList $ map (\s -> (s, f s)) (Set.toList ss) -- use HS.toMap
projectionTo :: Set DocAuthor -> (DocAuthor -> Projected) -> Map Projected (Set DocAuthor) projectionTo :: Set DocAuthor -> (DocAuthor -> Projected) -> HashMap Projected (Set DocAuthor)
projectionTo ss f = fromListWith (<>) $ map (\s -> (f s, Set.singleton s)) (Set.toList ss) projectionTo ss f = HM.fromListWith (<>) $ map (\s -> (f s, Set.singleton s)) (Set.toList ss) -- use HS.toMap
------------------------------------------------------------------------ ------------------------------------------------------------------------
takeName :: Term -> Term takeName :: NgramsTerm -> NgramsTerm
takeName texte = DT.toLower texte' takeName (NgramsTerm texte) = NgramsTerm $ DT.toLower texte'
where where
texte' = maybe texte (\x -> if DT.length x > 3 then x else texte) texte' = maybe texte (\x -> if DT.length x > 3 then x else texte)
(lastName' texte) (lastName' texte)
...@@ -124,51 +125,51 @@ takeName texte = DT.toLower texte' ...@@ -124,51 +125,51 @@ takeName texte = DT.toLower texte'
------------------------------------------------------------------------ ------------------------------------------------------------------------
align :: Map ContactName Projected align :: HashMap ContactName Projected
-> Map Projected (Set DocAuthor) -> HashMap Projected (Set DocAuthor)
-> Map DocAuthor (Set DocId) -> HashMap DocAuthor (Set DocId)
-> Map ContactName (Set DocId) -> HashMap ContactName (Set DocId)
align mc ma md = fromListWith (<>) align mc ma md = HM.fromListWith (<>)
$ map (\c -> (c, getProjection md $ testProjection c mc ma)) $ map (\c -> (c, getProjection md $ testProjection c mc ma))
$ Map.keys mc $ HM.keys mc
where where
getProjection :: Map DocAuthor (Set DocId) -> Set DocAuthor -> Set DocId getProjection :: HashMap DocAuthor (Set DocId) -> Set DocAuthor -> Set DocId
getProjection ma' sa' = getProjection ma' sa' =
if Set.null sa' if Set.null sa'
then Set.empty then Set.empty
else Set.unions $ sets ma' sa' else Set.unions $ sets ma' sa'
where where
sets ma'' sa'' = Set.map (\s -> lookup s ma'') sa'' sets ma'' sa'' = Set.map (\s -> lookup s ma'') sa''
lookup s' ma''= fromMaybe Set.empty (Map.lookup s' ma'') lookup s' ma''= fromMaybe Set.empty (HM.lookup s' ma'')
testProjection :: ContactName testProjection :: ContactName
-> Map ContactName Projected -> HashMap ContactName Projected
-> Map Projected (Set DocAuthor) -> HashMap Projected (Set DocAuthor)
-> Set DocAuthor -> Set DocAuthor
testProjection cn' mc' ma' = case Map.lookup cn' mc' of testProjection cn' mc' ma' = case HM.lookup cn' mc' of
Nothing -> Set.empty Nothing -> Set.empty
Just c -> case Map.lookup c ma' of Just c -> case HM.lookup c ma' of
Nothing -> Set.empty Nothing -> Set.empty
Just a -> a Just a -> a
fusion :: Map ContactName (Set ContactId) fusion :: HashMap ContactName (Set ContactId)
-> Map ContactName (Set DocId) -> HashMap ContactName (Set DocId)
-> Map ContactId (Set DocId) -> HashMap ContactId (Set DocId)
fusion mc md = Map.fromListWith (<>) fusion mc md = HM.fromListWith (<>)
$ catMaybes $ catMaybes
$ [ (,) <$> Just cId <*> Map.lookup cn md $ [ (,) <$> Just cId <*> HM.lookup cn md
| (cn, setContactId) <- Map.toList mc | (cn, setContactId) <- HM.toList mc
, cId <- Set.toList setContactId , cId <- Set.toList setContactId
] ]
------------------------------------------------------------------------ ------------------------------------------------------------------------
getNgramsContactId :: AnnuaireId getNgramsContactId :: AnnuaireId
-> Cmd err (Map ContactName (Set NodeId)) -> Cmd err (HashMap ContactName (Set NodeId))
getNgramsContactId aId = do getNgramsContactId aId = do
contacts <- getAllContacts aId contacts <- getAllContacts aId
pure $ fromListWith (<>) pure $ HM.fromListWith (<>)
$ catMaybes $ catMaybes
$ map (\contact -> (,) <$> contact^.(node_hyperdata . hc_who . _Just . cw_lastName) $ map (\contact -> (,) <$> (NgramsTerm <$> contact^.(node_hyperdata . hc_who . _Just . cw_lastName))
<*> Just ( Set.singleton (contact^.node_id)) <*> Just ( Set.singleton (contact^.node_id))
) (tr_docs contacts) ) (tr_docs contacts)
...@@ -176,7 +177,7 @@ getNgramsContactId aId = do ...@@ -176,7 +177,7 @@ getNgramsContactId aId = do
getNgramsDocId :: CorpusId getNgramsDocId :: CorpusId
-> ListId -> ListId
-> NgramsType -> NgramsType
-> GargNoServer (Map DocAuthor (Set NodeId)) -> GargNoServer (HashMap DocAuthor (Set NodeId))
getNgramsDocId cId lId nt = do getNgramsDocId cId lId nt = do
repo <- getRepo repo <- getRepo
lIds <- selectNodesWithUsername NodeList userMaster lIds <- selectNodesWithUsername NodeList userMaster
......
...@@ -18,6 +18,7 @@ module Gargantext.Database.Action.Metrics ...@@ -18,6 +18,7 @@ module Gargantext.Database.Action.Metrics
import Data.Map (Map) import Data.Map (Map)
import qualified Data.Map as Map import qualified Data.Map as Map
import Data.Text (Text) import Data.Text (Text)
import Data.Vector (Vector)
import Gargantext.API.Ngrams.Types (TabType(..), ngramsTypeFromTabType) import Gargantext.API.Ngrams.Types (TabType(..), ngramsTypeFromTabType)
import Gargantext.API.Ngrams.Tools (filterListWithRoot, groupNodesByNgrams, Diagonal(..), getCoocByNgrams, mapTermListRoot, RootTerm, getRepo) import Gargantext.API.Ngrams.Tools (filterListWithRoot, groupNodesByNgrams, Diagonal(..), getCoocByNgrams, mapTermListRoot, RootTerm, getRepo)
...@@ -33,7 +34,7 @@ import Gargantext.Core.Text.Metrics (scored, Scored(..), {-localMetrics, toScore ...@@ -33,7 +34,7 @@ import Gargantext.Core.Text.Metrics (scored, Scored(..), {-localMetrics, toScore
getMetrics :: FlowCmdM env err m getMetrics :: FlowCmdM env err m
=> CorpusId -> Maybe ListId -> TabType -> Maybe Limit => CorpusId -> Maybe ListId -> TabType -> Maybe Limit
-> m (Map Text (ListType, Maybe Text), [Scored Text]) -> m (Map Text (ListType, Maybe Text), Vector (Scored Text))
getMetrics cId maybeListId tabType maybeLimit = do getMetrics cId maybeListId tabType maybeLimit = do
(ngs, _, myCooc) <- getNgramsCooc cId maybeListId tabType maybeLimit (ngs, _, myCooc) <- getNgramsCooc cId maybeListId tabType maybeLimit
pure (ngs, scored myCooc) pure (ngs, scored myCooc)
...@@ -43,7 +44,7 @@ getNgramsCooc :: (FlowCmdM env err m) ...@@ -43,7 +44,7 @@ getNgramsCooc :: (FlowCmdM env err m)
=> CorpusId -> Maybe ListId -> TabType -> Maybe Limit => CorpusId -> Maybe ListId -> TabType -> Maybe Limit
-> m ( Map Text (ListType, Maybe Text) -> m ( Map Text (ListType, Maybe Text)
, Map Text (Maybe RootTerm) , Map Text (Maybe RootTerm)
, Map (Text, Text) Int , HashMap (Text, Text) Int
) )
getNgramsCooc cId maybeListId tabType maybeLimit = do getNgramsCooc cId maybeListId tabType maybeLimit = do
(ngs', ngs) <- getNgrams cId maybeListId tabType (ngs', ngs) <- getNgrams cId maybeListId tabType
...@@ -55,7 +56,7 @@ getNgramsCooc cId maybeListId tabType maybeLimit = do ...@@ -55,7 +56,7 @@ getNgramsCooc cId maybeListId tabType maybeLimit = do
lId <- defaultList cId lId <- defaultList cId
lIds <- selectNodesWithUsername NodeList userMaster lIds <- selectNodesWithUsername NodeList userMaster
myCooc <- Map.filter (>1) <$> getCoocByNgrams (Diagonal True) myCooc <- HM.filter (>1) <$> getCoocByNgrams (Diagonal True)
<$> groupNodesByNgrams ngs <$> groupNodesByNgrams ngs
<$> getNodesByNgramsOnlyUser cId (lIds <> [lId]) (ngramsTypeFromTabType tabType) <$> getNodesByNgramsOnlyUser cId (lIds <> [lId]) (ngramsTypeFromTabType tabType)
(take' maybeLimit $ Map.keys ngs) (take' maybeLimit $ Map.keys ngs)
......
...@@ -56,5 +56,5 @@ getMetrics' cId maybeListId tabType maybeLimit = do ...@@ -56,5 +56,5 @@ getMetrics' cId maybeListId tabType maybeLimit = do
{- {-
_ <- Learn.grid 100 110 metrics' metrics' _ <- Learn.grid 100 110 metrics' metrics'
--} --}
pure $ Map.fromListWith (<>) metrics pure $ Map.fromListWith (<>) $ Vec.toList metrics
...@@ -17,50 +17,51 @@ module Gargantext.Database.Action.Metrics.NgramsByNode ...@@ -17,50 +17,51 @@ module Gargantext.Database.Action.Metrics.NgramsByNode
where where
import Data.Map.Strict (Map, fromListWith, elems, toList) import Data.HashMap.Strict (HashMap)
import Data.Map.Strict.Patch (PatchMap, Replace, diff) import qualified Data.HashMap.Strict as HM
--import Data.Map.Strict.Patch (PatchMap, Replace, diff)
import Data.Set (Set) import Data.Set (Set)
import Data.Text (Text) import Data.Text (Text)
import Data.Tuple.Extra (second, swap) import Data.Tuple.Extra (first, second, swap)
import Database.PostgreSQL.Simple.SqlQQ (sql) import Database.PostgreSQL.Simple.SqlQQ (sql)
import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..)) import Database.PostgreSQL.Simple.Types (Values(..), QualifiedIdentifier(..))
import Debug.Trace (trace) import Debug.Trace (trace)
import Gargantext.API.Ngrams.Types (NgramsTerm(..))
import Gargantext.Database.Admin.Config (nodeTypeId) import Gargantext.Database.Admin.Config (nodeTypeId)
import Gargantext.Database.Admin.Types.Node -- (ListId, CorpusId, NodeId) import Gargantext.Database.Admin.Types.Node -- (ListId, CorpusId, NodeId)
import Gargantext.Database.Prelude (Cmd, runPGSQuery) import Gargantext.Database.Prelude (Cmd, runPGSQuery)
import Gargantext.Database.Schema.Ngrams (ngramsTypeId, NgramsType(..)) import Gargantext.Database.Schema.Ngrams (ngramsTypeId, NgramsType(..))
import Gargantext.Data.HashMap.Strict.Utils as HM
import Gargantext.Prelude import Gargantext.Prelude
import qualified Data.Map.Strict as Map
import qualified Data.Set as Set import qualified Data.Set as Set
import qualified Database.PostgreSQL.Simple as DPS import qualified Database.PostgreSQL.Simple as DPS
-- | fst is size of Supra Corpus -- | fst is size of Supra Corpus
-- snd is Texts and size of Occurrences (different docs) -- snd is Texts and size of Occurrences (different docs)
countNodesByNgramsWith :: (Text -> Text) countNodesByNgramsWith :: (NgramsTerm -> NgramsTerm)
-> Map Text (Set NodeId) -> HashMap NgramsTerm (Set NodeId)
-> (Double, Map Text (Double, Set Text)) -> (Double, HashMap NgramsTerm (Double, Set NgramsTerm))
countNodesByNgramsWith f m = (total, m') countNodesByNgramsWith f m = (total, m')
where where
total = fromIntegral $ Set.size $ Set.unions $ elems m total = fromIntegral $ Set.size $ Set.unions $ HM.elems m
m' = Map.map ( swap . second (fromIntegral . Set.size)) m' = HM.map ( swap . second (fromIntegral . Set.size))
$ groupNodesByNgramsWith f m $ groupNodesByNgramsWith f m
groupNodesByNgramsWith :: (Text -> Text) groupNodesByNgramsWith :: (NgramsTerm -> NgramsTerm)
-> Map Text (Set NodeId) -> HashMap NgramsTerm (Set NodeId)
-> Map Text (Set Text, Set NodeId) -> HashMap NgramsTerm (Set NgramsTerm, Set NodeId)
groupNodesByNgramsWith f m = groupNodesByNgramsWith f m =
fromListWith (<>) $ map (\(t,ns) -> (f t, (Set.singleton t, ns))) HM.fromListWith (<>) $ map (\(t,ns) -> (f t, (Set.singleton t, ns)))
$ toList m $ HM.toList m
------------------------------------------------------------------------ ------------------------------------------------------------------------
getNodesByNgramsUser :: CorpusId getNodesByNgramsUser :: CorpusId
-> NgramsType -> NgramsType
-> Cmd err (Map Text (Set NodeId)) -> Cmd err (HashMap NgramsTerm (Set NodeId))
getNodesByNgramsUser cId nt = getNodesByNgramsUser cId nt =
fromListWith (<>) <$> map (\(n,t) -> (t, Set.singleton n)) HM.fromListWith (<>) <$> map (\(n,t) -> (NgramsTerm t, Set.singleton n))
<$> selectNgramsByNodeUser cId nt <$> selectNgramsByNodeUser cId nt
where where
...@@ -95,19 +96,19 @@ getNodesByNgramsUser cId nt = ...@@ -95,19 +96,19 @@ getNodesByNgramsUser cId nt =
-- TODO add groups -- TODO add groups
getOccByNgramsOnlyFast :: CorpusId getOccByNgramsOnlyFast :: CorpusId
-> NgramsType -> NgramsType
-> [Text] -> [NgramsTerm]
-> Cmd err (Map Text Int) -> Cmd err (HashMap NgramsTerm Int)
getOccByNgramsOnlyFast cId nt ngs = getOccByNgramsOnlyFast cId nt ngs =
fromListWith (+) <$> selectNgramsOccurrencesOnlyByNodeUser cId nt ngs HM.fromListWith (+) <$> selectNgramsOccurrencesOnlyByNodeUser cId nt ngs
getOccByNgramsOnlyFast' :: CorpusId getOccByNgramsOnlyFast' :: CorpusId
-> ListId -> ListId
-> NgramsType -> NgramsType
-> [Text] -> [NgramsTerm]
-> Cmd err (Map Text Int) -> Cmd err (HashMap NgramsTerm Int)
getOccByNgramsOnlyFast' cId lId nt tms = trace (show (cId, lId)) $ getOccByNgramsOnlyFast' cId lId nt tms = trace (show (cId, lId)) $
fromListWith (+) <$> map (second round) <$> run cId lId nt tms HM.fromListWith (+) <$> map (second round) <$> run cId lId nt tms
where where
fields = [QualifiedIdentifier Nothing "text"] fields = [QualifiedIdentifier Nothing "text"]
...@@ -115,10 +116,10 @@ getOccByNgramsOnlyFast' cId lId nt tms = trace (show (cId, lId)) $ ...@@ -115,10 +116,10 @@ getOccByNgramsOnlyFast' cId lId nt tms = trace (show (cId, lId)) $
run :: CorpusId run :: CorpusId
-> ListId -> ListId
-> NgramsType -> NgramsType
-> [Text] -> [NgramsTerm]
-> Cmd err [(Text, Double)] -> Cmd err [(NgramsTerm, Double)]
run cId' lId' nt' tms' = runPGSQuery query run cId' lId' nt' tms' = fmap (first NgramsTerm) <$> runPGSQuery query
( Values fields (DPS.Only <$> tms') ( Values fields ((DPS.Only . unNgramsTerm) <$> tms')
, cId' , cId'
, lId' , lId'
, ngramsTypeId nt' , ngramsTypeId nt'
...@@ -143,10 +144,10 @@ getOccByNgramsOnlySlow :: NodeType ...@@ -143,10 +144,10 @@ getOccByNgramsOnlySlow :: NodeType
-> CorpusId -> CorpusId
-> [ListId] -> [ListId]
-> NgramsType -> NgramsType
-> [Text] -> [NgramsTerm]
-> Cmd err (Map Text Int) -> Cmd err (HashMap NgramsTerm Int)
getOccByNgramsOnlySlow t cId ls nt ngs = getOccByNgramsOnlySlow t cId ls nt ngs =
Map.map Set.size <$> getScore' t cId ls nt ngs HM.map Set.size <$> getScore' t cId ls nt ngs
where where
getScore' NodeCorpus = getNodesByNgramsOnlyUser getScore' NodeCorpus = getNodesByNgramsOnlyUser
getScore' NodeDocument = getNgramsByDocOnlyUser getScore' NodeDocument = getNgramsByDocOnlyUser
...@@ -155,25 +156,27 @@ getOccByNgramsOnlySlow t cId ls nt ngs = ...@@ -155,25 +156,27 @@ getOccByNgramsOnlySlow t cId ls nt ngs =
getOccByNgramsOnlySafe :: CorpusId getOccByNgramsOnlySafe :: CorpusId
-> [ListId] -> [ListId]
-> NgramsType -> NgramsType
-> [Text] -> [NgramsTerm]
-> Cmd err (Map Text Int) -> Cmd err (HashMap NgramsTerm Int)
getOccByNgramsOnlySafe cId ls nt ngs = do getOccByNgramsOnlySafe cId ls nt ngs = do
printDebug "getOccByNgramsOnlySafe" (cId, nt, length ngs) printDebug "getOccByNgramsOnlySafe" (cId, nt, length ngs)
fast <- getOccByNgramsOnlyFast cId nt ngs fast <- getOccByNgramsOnlyFast cId nt ngs
slow <- getOccByNgramsOnlySlow NodeCorpus cId ls nt ngs slow <- getOccByNgramsOnlySlow NodeCorpus cId ls nt ngs
when (fast /= slow) $ when (fast /= slow) $
printDebug "getOccByNgramsOnlySafe: difference" printDebug "getOccByNgramsOnlySafe: difference"
(diff slow fast :: PatchMap Text (Replace (Maybe Int))) (HM.difference slow fast, HM.difference fast slow)
-- diff slow fast :: PatchMap Text (Replace (Maybe Int))
pure slow pure slow
selectNgramsOccurrencesOnlyByNodeUser :: CorpusId selectNgramsOccurrencesOnlyByNodeUser :: CorpusId
-> NgramsType -> NgramsType
-> [Text] -> [NgramsTerm]
-> Cmd err [(Text, Int)] -> Cmd err [(NgramsTerm, Int)]
selectNgramsOccurrencesOnlyByNodeUser cId nt tms = selectNgramsOccurrencesOnlyByNodeUser cId nt tms =
fmap (first NgramsTerm) <$>
runPGSQuery queryNgramsOccurrencesOnlyByNodeUser runPGSQuery queryNgramsOccurrencesOnlyByNodeUser
( Values fields (DPS.Only <$> tms) ( Values fields ((DPS.Only . unNgramsTerm) <$> tms)
, cId , cId
, nodeTypeId NodeDocument , nodeTypeId NodeDocument
, ngramsTypeId nt , ngramsTypeId nt
...@@ -218,11 +221,11 @@ queryNgramsOccurrencesOnlyByNodeUser' = [sql| ...@@ -218,11 +221,11 @@ queryNgramsOccurrencesOnlyByNodeUser' = [sql|
getNodesByNgramsOnlyUser :: CorpusId getNodesByNgramsOnlyUser :: CorpusId
-> [ListId] -> [ListId]
-> NgramsType -> NgramsType
-> [Text] -> [NgramsTerm]
-> Cmd err (Map Text (Set NodeId)) -> Cmd err (HashMap NgramsTerm (Set NodeId))
getNodesByNgramsOnlyUser cId ls nt ngs = getNodesByNgramsOnlyUser cId ls nt ngs =
Map.unionsWith (<>) unionsWith (<>)
. map (fromListWith (<>) . map (HM.fromListWith (<>)
. map (second Set.singleton)) . map (second Set.singleton))
<$> mapM (selectNgramsOnlyByNodeUser cId ls nt) <$> mapM (selectNgramsOnlyByNodeUser cId ls nt)
(splitEvery 1000 ngs) (splitEvery 1000 ngs)
...@@ -231,11 +234,11 @@ getNodesByNgramsOnlyUser cId ls nt ngs = ...@@ -231,11 +234,11 @@ getNodesByNgramsOnlyUser cId ls nt ngs =
getNgramsByNodeOnlyUser :: NodeId getNgramsByNodeOnlyUser :: NodeId
-> [ListId] -> [ListId]
-> NgramsType -> NgramsType
-> [Text] -> [NgramsTerm]
-> Cmd err (Map NodeId (Set Text)) -> Cmd err (HashMap NodeId (Set NgramsTerm))
getNgramsByNodeOnlyUser cId ls nt ngs = getNgramsByNodeOnlyUser cId ls nt ngs =
Map.unionsWith (<>) unionsWith (<>)
. map (fromListWith (<>) . map (HM.fromListWith (<>)
. map (second Set.singleton)) . map (second Set.singleton))
. map (map swap) . map (map swap)
<$> mapM (selectNgramsOnlyByNodeUser cId ls nt) <$> mapM (selectNgramsOnlyByNodeUser cId ls nt)
...@@ -245,11 +248,12 @@ getNgramsByNodeOnlyUser cId ls nt ngs = ...@@ -245,11 +248,12 @@ getNgramsByNodeOnlyUser cId ls nt ngs =
selectNgramsOnlyByNodeUser :: CorpusId selectNgramsOnlyByNodeUser :: CorpusId
-> [ListId] -> [ListId]
-> NgramsType -> NgramsType
-> [Text] -> [NgramsTerm]
-> Cmd err [(Text, NodeId)] -> Cmd err [(NgramsTerm, NodeId)]
selectNgramsOnlyByNodeUser cId ls nt tms = selectNgramsOnlyByNodeUser cId ls nt tms =
fmap (first NgramsTerm) <$>
runPGSQuery queryNgramsOnlyByNodeUser runPGSQuery queryNgramsOnlyByNodeUser
( Values fields (DPS.Only <$> tms) ( Values fields ((DPS.Only . unNgramsTerm) <$> tms)
, Values [QualifiedIdentifier Nothing "int4"] , Values [QualifiedIdentifier Nothing "int4"]
(DPS.Only <$> (map (\(NodeId n) -> n) ls)) (DPS.Only <$> (map (\(NodeId n) -> n) ls))
, cId , cId
...@@ -312,22 +316,23 @@ queryNgramsOnlyByNodeUser' = [sql| ...@@ -312,22 +316,23 @@ queryNgramsOnlyByNodeUser' = [sql|
getNgramsByDocOnlyUser :: DocId getNgramsByDocOnlyUser :: DocId
-> [ListId] -> [ListId]
-> NgramsType -> NgramsType
-> [Text] -> [NgramsTerm]
-> Cmd err (Map Text (Set NodeId)) -> Cmd err (HashMap NgramsTerm (Set NodeId))
getNgramsByDocOnlyUser cId ls nt ngs = getNgramsByDocOnlyUser cId ls nt ngs =
Map.unionsWith (<>) unionsWith (<>)
. map (fromListWith (<>) . map (second Set.singleton)) . map (HM.fromListWith (<>) . map (second Set.singleton))
<$> mapM (selectNgramsOnlyByDocUser cId ls nt) (splitEvery 1000 ngs) <$> mapM (selectNgramsOnlyByDocUser cId ls nt) (splitEvery 1000 ngs)
selectNgramsOnlyByDocUser :: DocId selectNgramsOnlyByDocUser :: DocId
-> [ListId] -> [ListId]
-> NgramsType -> NgramsType
-> [Text] -> [NgramsTerm]
-> Cmd err [(Text, NodeId)] -> Cmd err [(NgramsTerm, NodeId)]
selectNgramsOnlyByDocUser dId ls nt tms = selectNgramsOnlyByDocUser dId ls nt tms =
fmap (first NgramsTerm) <$>
runPGSQuery queryNgramsOnlyByDocUser runPGSQuery queryNgramsOnlyByDocUser
( Values fields (DPS.Only <$> tms) ( Values fields ((DPS.Only . unNgramsTerm) <$> tms)
, Values [QualifiedIdentifier Nothing "int4"] , Values [QualifiedIdentifier Nothing "int4"]
(DPS.Only <$> (map (\(NodeId n) -> n) ls)) (DPS.Only <$> (map (\(NodeId n) -> n) ls))
, dId , dId
...@@ -352,9 +357,9 @@ queryNgramsOnlyByDocUser = [sql| ...@@ -352,9 +357,9 @@ queryNgramsOnlyByDocUser = [sql|
------------------------------------------------------------------------ ------------------------------------------------------------------------
-- | TODO filter by language, database, any social field -- | TODO filter by language, database, any social field
getNodesByNgramsMaster :: UserCorpusId -> MasterCorpusId -> Cmd err (Map Text (Set NodeId)) getNodesByNgramsMaster :: UserCorpusId -> MasterCorpusId -> Cmd err (HashMap Text (Set NodeId))
getNodesByNgramsMaster ucId mcId = Map.unionsWith (<>) getNodesByNgramsMaster ucId mcId = unionsWith (<>)
. map (fromListWith (<>) . map (\(n,t) -> (t, Set.singleton n))) . map (HM.fromListWith (<>) . map (\(n,t) -> (t, Set.singleton n)))
-- . takeWhile (not . List.null) -- . takeWhile (not . List.null)
-- . takeWhile (\l -> List.length l > 3) -- . takeWhile (\l -> List.length l > 3)
<$> mapM (selectNgramsByNodeMaster 1000 ucId mcId) [0,500..10000] <$> mapM (selectNgramsByNodeMaster 1000 ucId mcId) [0,500..10000]
......
...@@ -16,7 +16,8 @@ module Gargantext.Database.Action.Metrics.TFICF ...@@ -16,7 +16,8 @@ module Gargantext.Database.Action.Metrics.TFICF
-- import Debug.Trace (trace) -- import Debug.Trace (trace)
-- import Gargantext.Core (Lang(..)) -- import Gargantext.Core (Lang(..))
import Data.Map.Strict (Map, toList, fromList) import Data.HashMap.Strict (HashMap)
import qualified Data.HashMap.Strict as HM
import Data.Maybe (fromMaybe) import Data.Maybe (fromMaybe)
import Data.Text (Text) import Data.Text (Text)
import Gargantext.Core.Text.Metrics.TFICF import Gargantext.Core.Text.Metrics.TFICF
...@@ -25,31 +26,28 @@ import Gargantext.Database.Admin.Types.Node -- (ListId, CorpusId, NodeId) ...@@ -25,31 +26,28 @@ import Gargantext.Database.Admin.Types.Node -- (ListId, CorpusId, NodeId)
import Gargantext.Database.Prelude (Cmd) import Gargantext.Database.Prelude (Cmd)
import Gargantext.Database.Query.Table.NodeNode (selectCountDocs) import Gargantext.Database.Query.Table.NodeNode (selectCountDocs)
import Gargantext.Database.Schema.Ngrams (NgramsType(..)) import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.API.Ngrams.Types
import Gargantext.Prelude import Gargantext.Prelude
import qualified Data.Map.Strict as Map
import qualified Data.Set as Set import qualified Data.Set as Set
getTficf :: UserCorpusId getTficf :: UserCorpusId
-> MasterCorpusId -> MasterCorpusId
-> NgramsType -> NgramsType
-> Cmd err (Map Text Double) -> Cmd err (HashMap NgramsTerm Double)
getTficf cId mId nt = do getTficf cId mId nt = do
mapTextDoubleLocal <- Map.filter (> 1) mapTextDoubleLocal <- HM.filter (> 1)
<$> Map.map (fromIntegral . Set.size) <$> HM.map (fromIntegral . Set.size)
<$> getNodesByNgramsUser cId nt <$> getNodesByNgramsUser cId nt
mapTextDoubleGlobal <- Map.map fromIntegral mapTextDoubleGlobal <- HM.map fromIntegral
<$> getOccByNgramsOnlyFast mId nt (Map.keys mapTextDoubleLocal) <$> getOccByNgramsOnlyFast mId nt (HM.keys mapTextDoubleLocal)
countLocal <- selectCountDocs cId countLocal <- selectCountDocs cId
countGlobal <- selectCountDocs mId countGlobal <- selectCountDocs mId
pure $ fromList [ ( t pure $ HM.mapWithKey (\t n ->
, tficf (TficfInfra (Count n ) tficf (TficfInfra (Count n )
(Total $ fromIntegral countLocal )) (Total $ fromIntegral countLocal))
(TficfSupra (Count $ fromMaybe 0 $ Map.lookup t mapTextDoubleGlobal) (TficfSupra (Count $ fromMaybe 0 $ HM.lookup t mapTextDoubleGlobal)
(Total $ fromIntegral countGlobal)) (Total $ fromIntegral countGlobal))
) ) mapTextDoubleLocal
| (t, n) <- toList mapTextDoubleLocal \ No newline at end of file
]
...@@ -21,8 +21,10 @@ Portability : POSIX ...@@ -21,8 +21,10 @@ Portability : POSIX
module Gargantext.Database.Admin.Types.Hyperdata.List module Gargantext.Database.Admin.Types.Hyperdata.List
where where
import Data.Map (Map) import Data.Vector (Vector)
import qualified Data.Map as Map --import qualified Data.Vector as V
import Data.HashMap.Strict (HashMap)
import qualified Data.HashMap.Strict as HM
import Control.Applicative import Control.Applicative
import Gargantext.Prelude import Gargantext.Prelude
...@@ -34,11 +36,11 @@ import Gargantext.Database.Admin.Types.Metrics (ChartMetrics(..), Metrics) ...@@ -34,11 +36,11 @@ import Gargantext.Database.Admin.Types.Metrics (ChartMetrics(..), Metrics)
------------------------------------------------------------------------ ------------------------------------------------------------------------
data HyperdataList = data HyperdataList =
HyperdataList { _hl_chart :: !(Map TabType (ChartMetrics Histo)) HyperdataList { _hl_chart :: !(HashMap TabType (ChartMetrics Histo))
, _hl_list :: !(Maybe Text) , _hl_list :: !(Maybe Text)
, _hl_pie :: !(Map TabType (ChartMetrics Histo)) , _hl_pie :: !(HashMap TabType (ChartMetrics Histo))
, _hl_scatter :: !(Map TabType Metrics) , _hl_scatter :: !(HashMap TabType Metrics)
, _hl_tree :: !(Map TabType (ChartMetrics [NgramsTree])) , _hl_tree :: !(HashMap TabType (ChartMetrics (Vector NgramsTree)))
} deriving (Show, Generic) } deriving (Show, Generic)
-- HyperdataList { _hl_chart :: !(Maybe (ChartMetrics Histo)) -- HyperdataList { _hl_chart :: !(Maybe (ChartMetrics Histo))
-- , _hl_list :: !(Maybe Text) -- , _hl_list :: !(Maybe Text)
...@@ -49,11 +51,11 @@ data HyperdataList = ...@@ -49,11 +51,11 @@ data HyperdataList =
defaultHyperdataList :: HyperdataList defaultHyperdataList :: HyperdataList
defaultHyperdataList = defaultHyperdataList =
HyperdataList { _hl_chart = Map.empty HyperdataList { _hl_chart = HM.empty
, _hl_list = Nothing , _hl_list = Nothing
, _hl_pie = Map.empty , _hl_pie = HM.empty
, _hl_scatter = Map.empty , _hl_scatter = HM.empty
, _hl_tree = Map.empty , _hl_tree = HM.empty
} }
------------------------------------------------------------------------ ------------------------------------------------------------------------
......
...@@ -5,6 +5,8 @@ module Gargantext.Database.Admin.Types.Metrics where ...@@ -5,6 +5,8 @@ module Gargantext.Database.Admin.Types.Metrics where
import Data.Aeson.TH (deriveJSON) import Data.Aeson.TH (deriveJSON)
import Data.Swagger import Data.Swagger
import Data.Vector (Vector)
import qualified Data.Vector as V
import Protolude import Protolude
import Test.QuickCheck.Arbitrary import Test.QuickCheck.Arbitrary
...@@ -13,15 +15,15 @@ import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger, wellNamedSchema) ...@@ -13,15 +15,15 @@ import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger, wellNamedSchema)
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
data Metrics = Metrics newtype Metrics = Metrics
{ metrics_data :: [Metric]} { metrics_data :: Vector Metric}
deriving (Generic, Show) deriving (Generic, Show)
instance ToSchema Metrics where instance ToSchema Metrics where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "metrics_") declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "metrics_")
instance Arbitrary Metrics instance Arbitrary Metrics
where where
arbitrary = Metrics <$> arbitrary arbitrary = (Metrics . V.fromList) <$> arbitrary
data Metric = Metric data Metric = Metric
{ m_label :: !Text { m_label :: !Text
...@@ -43,7 +45,7 @@ deriveJSON (unPrefix "metrics_") ''Metrics ...@@ -43,7 +45,7 @@ deriveJSON (unPrefix "metrics_") ''Metrics
deriveJSON (unPrefix "m_") ''Metric deriveJSON (unPrefix "m_") ''Metric
data ChartMetrics a = ChartMetrics { chartMetrics_data :: a } newtype ChartMetrics a = ChartMetrics { chartMetrics_data :: a }
deriving (Generic, Show) deriving (Generic, Show)
instance (Typeable a, ToSchema a) => ToSchema (ChartMetrics a) where instance (Typeable a, ToSchema a) => ToSchema (ChartMetrics a) where
......
...@@ -24,6 +24,7 @@ import Control.Monad (mzero) ...@@ -24,6 +24,7 @@ import Control.Monad (mzero)
import Data.Aeson import Data.Aeson
import Data.Aeson.TH (deriveJSON) import Data.Aeson.TH (deriveJSON)
import Data.Either import Data.Either
import Data.Hashable (Hashable)
import Data.Swagger import Data.Swagger
import Data.Text (Text, unpack) import Data.Text (Text, unpack)
import Data.Time (UTCTime) import Data.Time (UTCTime)
...@@ -130,7 +131,7 @@ pgNodeId = O.pgInt4 . id2int ...@@ -130,7 +131,7 @@ pgNodeId = O.pgInt4 . id2int
------------------------------------------------------------------------ ------------------------------------------------------------------------
newtype NodeId = NodeId Int newtype NodeId = NodeId Int
deriving (Show, Read, Generic, Num, Eq, Ord, Enum, ToJSONKey, FromJSONKey, ToJSON, FromJSON) deriving (Show, Read, Generic, Num, Eq, Ord, Enum, ToJSONKey, FromJSONKey, ToJSON, FromJSON, Hashable)
unNodeId :: NodeId -> Int unNodeId :: NodeId -> Int
unNodeId (NodeId n) = n unNodeId (NodeId n) = n
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment