Tools.hs 5.33 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11
{-|
Module      : Gargantext.API.Ngrams.Tools
Description : Tools to manage Ngrams Elements (from the API)
Copyright   : (c) CNRS, 2017-Present
License     : AGPL + CECILL v3
Maintainer  : team@gargantext.org
Stability   : experimental
Portability : POSIX

-}

12
{-# LANGUAGE TypeFamilies #-}
13 14 15 16 17

module Gargantext.API.Ngrams.Tools
  where

import Control.Concurrent
18
import Control.Lens (_Just, (^.), at, view, At, Index, IxValue)
19
import Control.Monad.Reader
20
import Data.HashMap.Strict (HashMap)
21
import Data.Hashable (Hashable)
22 23
import Data.Set (Set)
import Data.Validity
24
import Gargantext.API.Ngrams.Types
25 26 27
import Gargantext.Core.Types (ListType(..), NodeId, ListId)
import Gargantext.Database.Schema.Ngrams (NgramsType)
import Gargantext.Prelude
28
import qualified Data.HashMap.Strict as HM
29 30
import qualified Data.Map.Strict     as Map
import qualified Data.Set            as Set
31

32 33 34
mergeNgramsElement :: NgramsRepoElement -> NgramsRepoElement -> NgramsRepoElement
mergeNgramsElement _neOld neNew = neNew

35
type RootTerm = NgramsTerm
36

37 38 39
getRepo :: RepoCmdM env err m => m NgramsRepo
getRepo = do
  v <- view repoVar
40
  liftBase $ readMVar v
41

42
listNgramsFromRepo :: [ListId] -> NgramsType
43
                   -> NgramsRepo -> HashMap NgramsTerm NgramsRepoElement
44
listNgramsFromRepo nodeIds ngramsType repo = ngrams
45
  where
46 47
    ngramsMap = repo ^. r_state . at ngramsType . _Just

48 49
    -- TODO HashMap linked
    ngrams    = HM.fromList $ Map.toList $ Map.unionsWith mergeNgramsElement
50
                [ ngramsMap ^. at nodeId . _Just | nodeId <- nodeIds ]
51

52

53 54 55 56 57 58
-- TODO-ACCESS: We want to do the security check before entering here.
--              Add a static capability parameter would be nice.
--              Ideally this is the access to `repoVar` which needs to
--              be properly guarded.
getListNgrams :: RepoCmdM env err m
              => [ListId] -> NgramsType
59
              -> m (HashMap NgramsTerm NgramsRepoElement)
60
getListNgrams nodeIds ngramsType = listNgramsFromRepo nodeIds ngramsType <$> getRepo
61

62 63
getTermsWith :: (RepoCmdM env err m, Eq a, Hashable a)
          => (NgramsTerm -> a) -> [ListId]
64
          -> NgramsType -> Set ListType
65
          -> m (HashMap a [a])
66
getTermsWith f ls ngt lts = HM.fromListWith (<>)
67
                      <$> map toTreeWith
68
                      <$> HM.toList
69
                      <$> HM.filter (\f' -> Set.member (fst f') lts)
70
                      <$> mapTermListRoot ls ngt
71
                      <$> getRepo
72
  where
73 74 75
    toTreeWith (t, (_lt, maybeRoot)) = case maybeRoot of
      Nothing -> (f t, [])
      Just  r -> (f r, [f t])
76

77 78 79
mapTermListRoot :: [ListId]
                -> NgramsType
                -> NgramsRepo
80
                -> HashMap NgramsTerm (ListType, Maybe NgramsTerm)
81
mapTermListRoot nodeIds ngramsType repo =
82 83
      (\nre -> (_nre_list nre, _nre_root nre))
  <$> listNgramsFromRepo nodeIds ngramsType repo
84 85 86 87 88 89 90 91 92

filterListWithRootHashMap :: ListType
                          -> HashMap NgramsTerm (ListType, Maybe NgramsTerm)
                          -> HashMap NgramsTerm (Maybe RootTerm)
filterListWithRootHashMap lt m = snd <$> HM.filter isMapTerm m
  where
    isMapTerm (l, maybeRoot) = case maybeRoot of
      Nothing -> l == lt
      Just  r -> case HM.lookup r m of
93
        Nothing -> panic $ "[Garg.API.Ngrams.Tools] filterWithRoot, unknown key: " <> unNgramsTerm r
94
        Just  (l',_) -> l' == lt
95

96
filterListWithRoot :: ListType
97 98 99
                   -> HashMap NgramsTerm (ListType, Maybe NgramsTerm)
                   -> HashMap NgramsTerm (Maybe RootTerm)
filterListWithRoot lt m = snd <$> HM.filter isMapTerm m
100
  where
101
    isMapTerm (l, maybeRoot) = case maybeRoot of
102
      Nothing -> l == lt
103
      Just  r -> case HM.lookup r m of
104
        Nothing -> panic $ "[Garg.API.Ngrams.Tools] filterWithRoot, unknown key: " <> unNgramsTerm r
105 106
        Just  (l',_) -> l' == lt

107 108 109 110 111 112 113 114
groupNodesByNgrams :: ( At root_map
                      , Index root_map ~ NgramsTerm
                      , IxValue root_map ~ Maybe RootTerm
                      )
                   => root_map
                   -> HashMap NgramsTerm (Set NodeId)
                   -> HashMap NgramsTerm (Set NodeId)
groupNodesByNgrams syn occs = HM.fromListWith (<>) occs'
115
  where
116 117 118
    occs' = map toSyn (HM.toList occs)
    toSyn (t,ns) = case syn ^. at t of
      Nothing -> panic $ "[Garg.API.Ngrams.Tools.groupNodesByNgrams] unknown key: " <> unNgramsTerm t
119 120 121 122 123 124
      Just  r -> case r of
        Nothing  -> (t, ns)
        Just  r' -> (r',ns)

data Diagonal = Diagonal Bool

125
getCoocByNgrams :: Diagonal -> HashMap NgramsTerm (Set NodeId) -> HashMap (NgramsTerm, NgramsTerm) Int
126 127 128
getCoocByNgrams = getCoocByNgrams' identity


129
getCoocByNgrams' :: (Hashable a, Ord a, Ord c) => (b -> Set c) -> Diagonal -> HashMap a b -> HashMap (a, a) Int
130
getCoocByNgrams' f (Diagonal diag) m =
131 132 133 134 135 136 137 138 139 140 141 142
  HM.fromList [( (t1,t2)
               , maybe 0 Set.size $ Set.intersection
                                 <$> (fmap f $ HM.lookup t1 m)
                                 <*> (fmap f $ HM.lookup t2 m)
               )
              | (t1,t2) <- if diag then
                             [ (x,y) | x <- ks, y <- ks, x <= y] -- TODO if we keep a Data.Map here it might be
                                                                 -- more efficient to enumerate all the y <= x.
                           else
                             listToCombi identity ks
              ]

143
  where ks = HM.keys m
144 145

------------------------------------------