Tools.hs 8.34 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11
{-|
Module      : Gargantext.API.Ngrams.Tools
Description : Tools to manage Ngrams Elements (from the API)
Copyright   : (c) CNRS, 2017-Present
License     : AGPL + CECILL v3
Maintainer  : team@gargantext.org
Stability   : experimental
Portability : POSIX

-}

12
{-# LANGUAGE TypeFamilies #-}
13 14 15 16 17

module Gargantext.API.Ngrams.Tools
  where

import Control.Concurrent
18
import Control.Lens (_Just, (^.), at, view, At, Index, IxValue)
19
import Control.Monad.Reader
20
import Data.HashMap.Strict (HashMap)
21
import Data.Hashable (Hashable)
22
import Data.Pool (withResource)
23 24
import Data.Set (Set)
import Data.Validity
25
import Gargantext.API.Ngrams.Types
26
import Gargantext.Core.NodeStory
27
import Gargantext.Core.Types (ListType(..), NodeId, NodeType(..), ListId)
28
import Gargantext.Database.Prelude (CmdM, HasConnectionPool(..))
29 30
import Gargantext.Database.Schema.Ngrams (NgramsType)
import Gargantext.Prelude
31
import qualified Data.HashMap.Strict as HM
32 33
import qualified Data.Map.Strict     as Map
import qualified Data.Set            as Set
34
import qualified Gargantext.Core.NodeStoryFile as NSF
35

36

37 38 39
mergeNgramsElement :: NgramsRepoElement -> NgramsRepoElement -> NgramsRepoElement
mergeNgramsElement _neOld neNew = neNew

40
type RootTerm = NgramsTerm
41

42

43
getRepo :: HasNodeStory env err m
44
         => [ListId] -> m NodeListStory
45
getRepo listIds = do
46 47 48 49 50
  f <- getNodeListStory
  v  <- liftBase $ f listIds
  v' <- liftBase $ readMVar v
  pure $ v'

51

52 53 54 55 56 57 58 59 60 61
repoSize :: Ord k1 => NodeStory (Map.Map k1 (Map.Map k2 a)) p
                   -> NodeId
                   -> Map.Map k1 Int
repoSize repo node_id = Map.map Map.size state
  where
    state = repo ^. unNodeStory
                  . at node_id . _Just
                  . a_state


62
getNodeStoryVar :: HasNodeStory env err m
63
           => [ListId] -> m (MVar NodeListStory)
64
getNodeStoryVar l = do
65 66 67
  f <- getNodeListStory
  v  <- liftBase $ f l
  pure v
68

69

70
getNodeListStory :: HasNodeStory env err m
71
                 => m ([NodeId] -> IO (MVar NodeListStory))
72 73 74 75 76 77
getNodeListStory = do
  env <- view hasNodeStory
  pure $ view nse_getter env



78 79 80 81
listNgramsFromRepo :: [ListId]
                   -> NgramsType
                   -> NodeListStory
                   -> HashMap NgramsTerm NgramsRepoElement
82
listNgramsFromRepo nodeIds ngramsType repo =
83 84 85 86 87 88 89 90 91 92 93
  HM.fromList $ Map.toList
              $ Map.unionsWith mergeNgramsElement ngrams
    where
      ngrams = [ repo
               ^. unNodeStory
                . at nodeId . _Just
                . a_state
                . at ngramsType . _Just
                | nodeId <- nodeIds
                ]

94 95 96 97
-- TODO-ACCESS: We want to do the security check before entering here.
--              Add a static capability parameter would be nice.
--              Ideally this is the access to `repoVar` which needs to
--              be properly guarded.
98
getListNgrams :: HasNodeStory env err m
99
              => [ListId] -> NgramsType
100
              -> m (HashMap NgramsTerm NgramsRepoElement)
101
getListNgrams nodeIds ngramsType = listNgramsFromRepo nodeIds ngramsType
102
                                 <$> getRepo nodeIds
103 104


105
getTermsWith :: (HasNodeStory env err m, Eq a, Hashable a)
106
          => (NgramsTerm -> a) -> [ListId]
107
          -> NgramsType -> Set ListType
108
          -> m (HashMap a [a])
109
getTermsWith f ls ngt lts  = HM.fromListWith (<>)
110
                      <$> map toTreeWith
111
                      <$> HM.toList
112
                      <$> HM.filter (\f' -> Set.member (fst f') lts)
113
                      <$> mapTermListRoot ls ngt
114
                      <$> getRepo ls
115 116 117 118 119 120 121
  where
    toTreeWith (t, (_lt, maybeRoot)) = case maybeRoot of
      Nothing -> (f t, [])
      Just  r -> (f r, [f t])



122 123
mapTermListRoot :: [ListId]
                -> NgramsType
124
                -> NodeListStory
125
                -> HashMap NgramsTerm (ListType, Maybe NgramsTerm)
126
mapTermListRoot nodeIds ngramsType repo =
127 128
      (\nre -> (_nre_list nre, _nre_root nre))
  <$> listNgramsFromRepo nodeIds ngramsType repo
129 130 131



132 133 134 135 136 137 138 139 140

filterListWithRootHashMap :: ListType
                          -> HashMap NgramsTerm (ListType, Maybe NgramsTerm)
                          -> HashMap NgramsTerm (Maybe RootTerm)
filterListWithRootHashMap lt m = snd <$> HM.filter isMapTerm m
  where
    isMapTerm (l, maybeRoot) = case maybeRoot of
      Nothing -> l == lt
      Just  r -> case HM.lookup r m of
141
        Nothing -> panic $ "[Garg.API.Ngrams.Tools] filterWithRoot, unknown key: " <> unNgramsTerm r
142
        Just  (l',_) -> l' == lt
143

144
filterListWithRoot :: [ListType]
145 146 147
                   -> HashMap NgramsTerm (ListType, Maybe NgramsTerm)
                   -> HashMap NgramsTerm (Maybe RootTerm)
filterListWithRoot lt m = snd <$> HM.filter isMapTerm m
148
  where
149
    isMapTerm (l, maybeRoot) = case maybeRoot of
150
      Nothing -> elem l lt
151
      Just  r -> case HM.lookup r m of
152
        Nothing -> panic $ "[Garg.API.Ngrams.Tools] filterWithRoot, unknown key: " <> unNgramsTerm r
153
        Just  (l',_) -> elem l' lt
154

155 156 157 158 159 160 161 162
groupNodesByNgrams :: ( At root_map
                      , Index root_map ~ NgramsTerm
                      , IxValue root_map ~ Maybe RootTerm
                      )
                   => root_map
                   -> HashMap NgramsTerm (Set NodeId)
                   -> HashMap NgramsTerm (Set NodeId)
groupNodesByNgrams syn occs = HM.fromListWith (<>) occs'
163
  where
164 165 166
    occs' = map toSyn (HM.toList occs)
    toSyn (t,ns) = case syn ^. at t of
      Nothing -> panic $ "[Garg.API.Ngrams.Tools.groupNodesByNgrams] unknown key: " <> unNgramsTerm t
167 168 169 170 171 172
      Just  r -> case r of
        Nothing  -> (t, ns)
        Just  r' -> (r',ns)

data Diagonal = Diagonal Bool

173 174 175
getCoocByNgrams :: Diagonal
                -> HashMap NgramsTerm (Set NodeId)
                -> HashMap (NgramsTerm, NgramsTerm) Int
176 177 178
getCoocByNgrams = getCoocByNgrams' identity


179 180 181 182 183
getCoocByNgrams' :: (Hashable a, Ord a, Ord c)
                 => (b -> Set c)
                 -> Diagonal
                 -> HashMap a b
                 -> HashMap (a, a) Int
184
getCoocByNgrams' f (Diagonal diag) m =
185 186 187 188 189
  HM.fromList [( (t1,t2)
               , maybe 0 Set.size $ Set.intersection
                                 <$> (fmap f $ HM.lookup t1 m)
                                 <*> (fmap f $ HM.lookup t2 m)
               )
Alexandre Delanoë's avatar
Alexandre Delanoë committed
190 191 192 193 194 195
              | (t1,t2) <- if diag
                              then [ (x,y) | x <- ks, y <- ks, x <= y]
                                   -- TODO if we keep a Data.Map here it might be
                                   -- more efficient to enumerate all the y <= x.
                              else
                                listToCombi identity ks
196 197
              ]

198
  where
199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222
    ks = HM.keys m

-- TODO k could be either k1 or k2 here
getCoocByNgrams'' :: (Hashable k, Ord k, Ord contexts)
                  => Diagonal
                  -> (contextA -> Set contexts, contextB -> Set contexts)
                  -> (HashMap k contextA, HashMap k contextB)
                  -> HashMap (k, k) Int
getCoocByNgrams'' (Diagonal diag) (f1,f2) (m1,m2) =
  HM.fromList [( (t1,t2)
               , maybe 0 Set.size $ Set.intersection
                                 <$> (fmap f1 $ HM.lookup t1 m1)
                                 <*> (fmap f2 $ HM.lookup t2 m2)
               )
              | (t1,t2) <- if diag
                              then
                                [ (x,y) | x <- ks1, y <- ks2, x <= y]
                                   -- TODO if we keep a Data.Map here it might be
                                   -- more efficient to enumerate all the y <= x.
                              else
                                [ (x,y) | x <- ks1, y <- ks2, x < y]
                                -- TODO check optim
                                -- listToCombi identity ks1
              ]
223
  where
224 225 226 227
    ks1 = HM.keys m1
    ks2 = HM.keys m2


228 229

------------------------------------------
230 231


232
migrateFromDirToDb :: (CmdM env err m) -- , HasNodeStory env err m)
233 234
                   => m ()
migrateFromDirToDb = do
235
  pool <- view connPool
236 237
  withResource pool $ \c -> do
    listIds <- liftBase $ getNodesIdWithType c NodeList
238
    -- printDebug "[migrateFromDirToDb] listIds" listIds
239
    (NodeStory nls) <- NSF.getRepoReadConfig listIds
240
    -- printDebug "[migrateFromDirToDb] nls" nls
241 242 243 244 245 246 247 248
    _ <- mapM (\(nId, a) -> do
                  n <- liftBase $ nodeExists c nId
                  case n of
                    False -> pure ()
                    True  -> liftBase $ upsertNodeStories c nId a
              ) $ Map.toList nls
    --_ <- nodeStoryIncs (Just $ NodeStory nls) listIds
    pure ()