{-|
Module      : Gargantext.API.Ngrams
Description : Server API
Copyright   : (c) CNRS, 2017-Present
License     : AGPL + CECILL v3
Maintainer  : team@gargantext.org
Stability   : experimental
Portability : POSIX

Ngrams API

-- | TODO
get ngrams filtered by NgramsType
add get

-}

{-# LANGUAGE BangPatterns        #-}
{-# LANGUAGE ConstraintKinds     #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TemplateHaskell     #-}
{-# LANGUAGE TemplateHaskell     #-}
{-# LANGUAGE TupleSections       #-}
{-# LANGUAGE TypeFamilies        #-}
{-# LANGUAGE TypeOperators       #-}

module Gargantext.API.Ngrams
  (
    commitStatePatch

  , searchTableNgrams
  , getTableNgrams
  , getTableNgramsCorpus
  , setListNgrams
  --, rmListNgrams TODO fix before exporting

  , NgramsTablePatch
  , NgramsTableMap

  , NgramsTerm(..)

  , NgramsElement(..)
  , mkNgramsElement

  , RootParent(..)

  , MSet
  , mSetFromList
  , mSetToList

  , Repo(..)
  , r_version
  , r_state
  , r_history
  , NgramsRepoElement(..)
  , saveNodeStory
  , initRepo

  , TabType(..)

  , QueryParamR
  , TODO

  -- Internals
  , getNgramsTableMap
  , dumpJsonTableMap
  , tableNgramsPull
  , tableNgramsPut

  , getNgramsTable'
  , setNgramsTableScores

  , Version
  , Versioned(..)
  , VersionedWithCount(..)
  , currentVersion
  , listNgramsChangedSince
  , MinSize, MaxSize, OrderBy, NgramsTable
  , UpdateTableNgramsCharts

  -- * Handlers to be used when serving top-level API requests
  , getTableNgramsCorpusHandler

  -- * Internals for testing
  , compute_new_state_patches
  , PatchHistory(..)
  , newNgramsFromNgramsStatePatch
  , filterNgramsNodes

  -- * Operations on a forest
  , BuildForestError(..)
  , renderLoop
  , buildForest
  , destroyForest
  , pruneForest
  )
  where

import Control.Lens (view, (^..), (+~), (%~), msumOf, at, ix, _Just, Each(..), (%%~), ifolded, to, withIndex)
import Data.Aeson.Text qualified as DAT
import Data.List qualified as List
import Data.Map.Strict qualified as Map
import Data.Map.Strict.Patch qualified as PM
import Data.Patch.Class (Action(act), Transformable(..), ours)
import Data.Set qualified as Set
import Data.Text (isInfixOf, toLower, unpack)
import Data.Text qualified as T
import Data.Text.Lazy.IO as DTL ( writeFile )
import Data.Tree
import Gargantext.API.Ngrams.Tools (getNodeStory)
import Gargantext.API.Ngrams.Types
import Gargantext.Core.NodeStory hiding (buildForest)
import Gargantext.Core.NodeStory qualified as NodeStory
import Gargantext.Core.Text.Ngrams (Ngrams, NgramsType)
import Gargantext.Core.Types (ListType(..), NodeId, ListId, TODO, assertValid, HasValidationError)
import Gargantext.Core.Types.Query (Limit(..), Offset(..), MinSize(..), MaxSize(..))
import Gargantext.Database.Action.Metrics.NgramsByContext (getOccByNgramsOnlyFast)
import Gargantext.Database.Prelude
import Gargantext.Database.Query.Table.Ngrams ( text2ngrams, insertNgrams )
import Gargantext.Prelude hiding (log, to, toLower, (%), isInfixOf)
import Gargantext.System.Logging.Types (LogLevel(DEBUG))
import Text.Collate qualified as Unicode


{-
-- TODO sequences of modifications (Patchs)
type NgramsIdPatch = Patch NgramsId NgramsPatch

ngramsPatch :: Int -> NgramsPatch
ngramsPatch n = NgramsPatch (DM.fromList [(1, StopTerm)]) (Set.fromList [n]) Set.empty

toEdit :: NgramsId -> NgramsPatch -> Edit NgramsId NgramsPatch
toEdit n p = Edit n p
ngramsIdPatch :: Patch NgramsId NgramsPatch
ngramsIdPatch = fromList $ catMaybes $ reverse [ replace (1::NgramsId) (Just $ ngramsPatch 1) Nothing
                                       , replace (1::NgramsId) Nothing (Just $ ngramsPatch 2)
                                       , replace (2::NgramsId) Nothing (Just $ ngramsPatch 2)
                                       ]

-- applyPatchBack :: Patch -> IO Patch
-- isEmptyPatch = Map.all (\x -> Set.isEmpty (add_children x) && Set.isEmpty ... )
-}
------------------------------------------------------------------------
------------------------------------------------------------------------
------------------------------------------------------------------------

{-
-- TODO: Replace.old is ignored which means that if the current list
-- `MapTerm` and that the patch is `Replace CandidateTerm StopTerm` then
-- the list is going to be `StopTerm` while it should keep `MapTerm`.
-- However this should not happen in non conflicting situations.
mkListsUpdate :: NgramsType -> NgramsTablePatch -> [(NgramsTypeId, NgramsTerm, ListTypeId)]
mkListsUpdate nt patches =
  [ (ngramsTypeId nt, ng, listTypeId lt)
  | (ng, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
  , lt <- patch ^.. patch_list . new
  ]

mkChildrenGroups :: (PatchSet NgramsTerm -> Set NgramsTerm)
                 -> NgramsType
                 -> NgramsTablePatch
                 -> [(NgramsTypeId, NgramsParent, NgramsChild)]
mkChildrenGroups addOrRem nt patches =
  [ (ngramsTypeId nt, parent, child)
  | (parent, patch) <- patches ^.. ntp_ngrams_patches . ifolded . withIndex
  , child <- patch ^.. patch_children . to addOrRem . folded
  ]
-}

------------------------------------------------------------------------

saveNodeStory :: NodeStoryEnv err -> NodeId -> ArchiveList -> DBUpdate err ()
saveNodeStory env nId a = do
  let saver = view hasNodeStoryImmediateSaver env
  saver nId a

ngramsStatePatchConflictResolution :: NgramsType
                                   -> NgramsTerm
                                   -> ConflictResolutionNgramsPatch
ngramsStatePatchConflictResolution _ngramsType _ngramsTerm
   = (ours, (const ours, ours), (False, False))
                             -- (False, False) mean here that Mod has always priority.
 -- = (ours, (const ours, ours), (True, False))
                             -- (True, False) <- would mean priority to the left (same as ours).
  -- undefined {- TODO think this through -}, listTypeConflictResolution)




{- unused
-- TODO refactor with putListNgrams
copyListNgrams :: RepoCmdM env err m
               => NodeId -> NodeId -> NgramsType
               -> m ()
copyListNgrams srcListId dstListId ngramsType = do
  var <- view repoVar
  liftBase $ modifyMVar_ var $
    pure . (r_state . at ngramsType %~ (Just . f . something))
  saveNodeStory
  where
    f :: Map NodeId NgramsTableMap -> Map NodeId NgramsTableMap
    f m = m & at dstListId %~ insertNewOnly (m ^. at srcListId)

-- TODO refactor with putListNgrams
-- The list must be non-empty!
-- The added ngrams must be non-existent!
addListNgrams :: RepoCmdM env err m
              => NodeId -> NgramsType
              -> [NgramsElement] -> m ()
addListNgrams listId ngramsType nes = do
  var <- view repoVar
  liftBase $ modifyMVar_ var $
    pure . (r_state . at ngramsType . _Just . at listId . _Just <>~ m)
  saveNodeStory
  where
    m = Map.fromList $ (\n -> (n ^. ne_ngrams, n)) <$> nes
-}

-- | TODO: incr the Version number
-- && should use patch
-- UNSAFE
-- FIXME(adinapoli): This function used to be very dangerous as it didn't
-- prevent imports from creating loops: if we had a list of imported terms with a tree
-- referencing an existing node in a forest, we could accidentally create loops. The most
-- efficient way would be to use the patch API to generate a patch for the input, apply it
-- to the current state and handle conflicts, discovering loops there. However, given that
-- it's complex to do that, for the moment we use the Forest API to detect loops, failing
-- if one is found.
setListNgrams :: NodeStoryEnv err
              -> NodeId
              -> NgramsType
              -> Map NgramsTerm NgramsRepoElement
              -> DBUpdate err ()
setListNgrams env listId ngramsType ns = do
  -- printDebug "[setListNgrams]" (listId, ngramsType)
  a <- getNodeStory env listId
  let a' = a & a_state . at ngramsType %~ (\mns' -> case mns' of
                                              Nothing -> Just ns
                                              Just ns' -> Just $ ns <> ns')
  saveNodeStory env listId a'


newNgramsFromNgramsStatePatch :: NgramsStatePatch' -> [Ngrams]
newNgramsFromNgramsStatePatch p =
  [ text2ngrams (unNgramsTerm n)
  | (n,np) <- p ^.. _PatchMap
                -- . each . _PatchMap
                . each . _NgramsTablePatch
                . _PatchMap . ifolded . withIndex
  , _ <- np ^.. patch_new . _Just
  ]




commitStatePatch :: NodeStoryEnv err
                 -> ListId
                 -> Versioned NgramsStatePatch'
                 -> DBUpdate err (Versioned NgramsStatePatch')
commitStatePatch env listId (Versioned _p_version p) = do
  a <- getNodeStory env listId
  let archiveSaver = view hasNodeArchiveStoryImmediateSaver env

  let
    (p', q') = compute_new_state_patches p (PatchHistory $ a ^. a_history)
    a' = a & a_version +~ 1
           & a_state   %~ act p'
           & a_history %~ (p' :)

  {-
  -- Ideally we would like to check these properties. However:
  -- * They should be checked only to debug the code. The client data
  --   should be able to trigger these.
  -- * What kind of error should they throw (we are in IO here)?
  -- * Should we keep modifyMVar?
  -- * Should we throw the validation in an Exception, catch it around
  --   modifyMVar and throw it back as an Error?
  assertValid $ transformable p q
  assertValid $ applicable p' (r ^. r_state)
  -}
  -- printDebug "[commitStatePatch] a version" (a ^. a_version)
  -- printDebug "[commitStatePatch] a' version" (a' ^. a_version)
  -- let newNs = ( ns & unNodeStory . at listId .~ (Just a')
  --      , Versioned (a' ^. a_version) q'
  --      )

  let newA = Versioned (a' ^. a_version) q'

  -- NOTE Now is the only good time to save the archive history. We
  -- have the handle to the MVar and we need to save its exact
  -- snapshot. Node Story archive is a linear table, so it's only
  -- couple of inserts, it shouldn't take long...

  -- NOTE This is changed now. Before we used MVar's, now it's TVars
  -- (MVar's blocked). It was wrapped in withMVar before, now we read
  -- the TVar, modify archive with archiveSaver, then write the TVar.

  -- pure (newNs', snd newNs)
  -- writeTVar var newNs'

  --pure newNs

  -- If we postponed saving the archive to the debounce action, we
  -- would have issues like
  -- https://gitlab.iscpif.fr/gargantext/purescript-gargantext/issues/476
  -- where the `q` computation from above (which uses the archive)
  -- would cause incorrect patch application (before the previous
  -- archive was saved and applied)
  -- newNs' <- archiveSaver $ fst newNs
  -- newNs' <- archiveSaver $ fst newNs
  -- atomically $ writeTVar var newNs'
  void $ archiveSaver listId a'

  -- Save new ngrams
  _ <- insertNgrams (newNgramsFromNgramsStatePatch p)
  saveNodeStory env listId a'

  pure newA


newtype PatchHistory =
  PatchHistory { _PatchHistory :: [ NgramsStatePatch' ] }
  deriving (Show, Eq)

-- | Computes the new state patch from the new patch and
-- the history of patches applied up to this point.
-- Returns a pair of patches (p,q) following the semantic of
-- the 'Transformable' class, that says:
--
-- Given two diverging patches @p@ and @q@, @transformWith m p q@ returns
-- a pair of updated patches @(p',q')@ such that @p' <> q@ and
-- @q' <> p@ are equivalent patches that incorporate the changes
-- of /both/ @p@ and @q@, up to merge conflicts, which are handled by
-- the provided function @m@.
compute_new_state_patches :: NgramsStatePatch'
                          -> PatchHistory
                          -> (NgramsStatePatch', NgramsStatePatch')
compute_new_state_patches latest_patch (PatchHistory history) =
  let squashed_history = mconcat history
  in transformWith ngramsStatePatchConflictResolution latest_patch squashed_history


-- This is a special case of tableNgramsPut where the input patch is empty.
tableNgramsPull :: NodeStoryEnv err
                -> ListId
                -> NgramsType
                -> Version
                -> DBQuery err x (Versioned NgramsTablePatch)
tableNgramsPull env listId ngramsType p_version = do
  -- printDebug "[tableNgramsPull]" (listId, ngramsType)
  a <- getNodeStory env listId
  -- r <- liftBase $ atomically $ readTVar var

  let
    -- a = r ^. unNodeStory . at listId . non initArchive
    q = mconcat $ take (a ^. a_version - p_version) (a ^. a_history)
    q_table = q ^. _PatchMap . ix ngramsType

  pure (Versioned (a ^. a_version) q_table)




-- tableNgramsPut :: (HasInvalidError err, RepoCmdM env err m)
-- Apply the given patch to the DB and returns the patch to be applied on the
-- client.
-- TODO-ACCESS check
-- TODO(adinapoli) DB-transactional
tableNgramsPut :: (HasNodeStoryEnv env err, IsDBCmd env err m, HasValidationError err)
               => TabType
               -> ListId
               -> Versioned NgramsTablePatch
               -> m (Versioned NgramsTablePatch)
tableNgramsPut tabType listId (Versioned p_version p_table)
  | p_table == mempty = do
      -- printDebug "[tableNgramsPut]" ("TableEmpty" :: Text)
      let ngramsType        = ngramsTypeFromTabType tabType
      env <- view hasNodeStory
      runDBTx $ tableNgramsPull env listId ngramsType p_version

  | otherwise         = do
      -- printDebug "[tableNgramsPut]" ("TableNonEmpty" :: Text)
      let ngramsType        = ngramsTypeFromTabType tabType
          (p, p_validity)   = PM.singleton ngramsType p_table

      assertValid p_validity

      env <- view hasNodeStory
      ret <- runDBTx $ commitStatePatch env listId (Versioned p_version p)
        <&> v_data %~ view (_PatchMap . ix ngramsType)

      pure ret



getNgramsTableMap :: NodeStoryEnv err
                  -> NodeId
                  -> NgramsType
                  -> DBQuery err x (Versioned NgramsTableMap)
getNgramsTableMap env nodeId ngramsType = do
  a <- getNodeStory env nodeId
  pure $ Versioned (a ^. a_version)
                   (a ^. a_state . ix ngramsType)


dumpJsonTableMap :: (HasNodeStory env err m, IsDBCmd env err m)
                 => Text
                 -> NodeId
                 -> NgramsType
                 -> m ()
dumpJsonTableMap fpath nodeId ngramsType = do
  env <- view hasNodeStory
  m <- runDBQuery $ getNgramsTableMap env nodeId ngramsType
  liftBase $ DTL.writeFile (unpack fpath) (DAT.encodeToLazyText m)
  pure ()

-- | Filters the given `tableMap` with the search criteria. It returns
-- the input map, where each bucket indexed by a 'NgramsTerm' has been
-- filtered via the given predicate. Removes the key from the map if
-- the filtering would result in the empty set.
filterNgramsNodes :: Maybe ListType
                  -> Maybe MinSize
                  -> Maybe MaxSize
                  -> (NgramsTerm -> Bool)
                  -> Forest NgramsElement
                  -> Forest NgramsElement
filterNgramsNodes listTy minSize maxSize searchFn =
  filter (matchingNode listTy minSize maxSize searchFn)

-- | Returns 'True' if the input 'NgramsElement' satisfies the search criteria
-- mandated by 'NgramsSearchQuery'.
matchingNode :: Maybe ListType
             -> Maybe MinSize
             -> Maybe MaxSize
             -> (NgramsTerm -> Bool)
             -> Tree NgramsElement
             -> Bool
matchingNode listType minSize maxSize searchFn (Node inputNode children) =
  let nodeSize        = inputNode ^. ne_size
      matchesListType = maybe (const True) (==) listType
      respectsMinSize = maybe (const True) ((<=) . getMinSize) minSize
      respectsMaxSize = maybe (const True) ((>=) . getMaxSize) maxSize

  in    respectsMinSize nodeSize
     && respectsMaxSize nodeSize
     -- Search for the query either in the root or in the children.
     && (searchFn (inputNode ^. ne_ngrams) || any (matchingNode listType minSize maxSize searchFn) children)
     && matchesListType (inputNode ^. ne_list)

-- | Version of 'buildForest' specialised over the 'NgramsElement' as the values of the tree.
-- /IMPORTANT/: This functions returns an error in case we found a loop.
buildForest :: OnLoopDetectedStrategy -> Map NgramsTerm NgramsElement -> Either BuildForestError (Forest NgramsElement)
buildForest onLoopStrategy = fmap (map (fmap snd)) . NodeStory.buildForest onLoopStrategy

-- | Folds an Ngrams forest back to a table map.
-- This function doesn't aggregate information, but merely just recostructs the original
-- map without loss of information. To perform operations on the forest, use the appropriate
-- functions.
-- /NOTA BENE:/ We return a list and not a Map because we might have sorted the forest, and
-- converting into a map would trash the carefully-constructed sorting.
destroyForest :: Forest NgramsElement -> [(NgramsTerm, NgramsElement)]
destroyForest f = concatMap (map (\el -> (_ne_ngrams el, el)) . flatten) $ f
  where
    -- _destroyTree :: NgramsElement -> [(NgramsTerm, [NgramsElement])] -> (NgramsTerm, [NgramsElement])
    -- _destroyTree rootEl childrenEl = (_ne_ngrams rootEl, childrenEl)

    -- _squashElements :: NgramsElement -> [(NgramsTerm, NgramsElement)] -> NgramsElement
    -- _squashElements r _ = r

-- | TODO Errors management
--  TODO: polymorphic for Annuaire or Corpus or ...
-- | Table of Ngrams is a ListNgrams formatted (sorted and/or cut).
-- TODO: should take only one ListId

-- | /pure/ function to query a 'Map NgramsTerm NgramsElement', according to a
--  search function. Returns a /versioned/ 'NgramsTable' which is paginated and
-- sorted according to the input 'NgramsSearchQuery', together with the
-- occurrences of the elements.
searchTableNgrams :: Versioned (Map NgramsTerm NgramsElement)
                  -> NgramsSearchQuery
                  -- ^ The search query on the retrieved data
                  -> Either BuildForestError (VersionedWithCount NgramsTable)
searchTableNgrams versionedTableMap NgramsSearchQuery{..} =
  let tableMap     = versionedTableMap ^. v_data
  in case keepRoots <$> buildForest (BreakLoop LBA_just_do_it) tableMap of
       Left err -> Left err
       Right fs ->
         let forestRoots = filterNgramsNodes _nsq_listType _nsq_minSize _nsq_maxSize _nsq_searchQuery $ fs
             tableMapSorted = versionedTableMap
                            & v_data .~ (NgramsTable . map snd
                                                     . destroyForest
                                                     . sortAndPaginateForest _nsq_offset _nsq_limit _nsq_orderBy
                                                     . withInnersForest
                                                     $ forestRoots
                                                     )

         in Right $ toVersionedWithCount (length forestRoots) tableMapSorted

keepRoots :: Forest NgramsElement -> Forest NgramsElement
keepRoots = filter (\(Node r _) -> isNothing (_ne_root r) || isNothing (_ne_parent r))

-- | For each input root, extends its occurrence count with
-- the information found in the subforest.
withInnersForest :: Forest NgramsElement -> Forest NgramsElement
withInnersForest = map sumSubitemsOccurrences
  where
    sumSubitemsOccurrences :: Tree NgramsElement -> Tree NgramsElement
    sumSubitemsOccurrences (Node root children) =
      let children' = withInnersForest children
          root' = root { _ne_occurrences = (_ne_occurrences root) <> foldMap (_ne_occurrences . rootLabel) children' }
      in Node root' children'

sortAndPaginateForest :: Maybe Offset
                      -> Limit
                      -> Maybe OrderBy
                      -> Forest NgramsElement
                      -> Forest NgramsElement
sortAndPaginateForest mb_offset limit orderBy xs =
  let offset'  = getOffset $ maybe 0 identity mb_offset
  in   take (getLimit limit)
     . drop offset'
     . sortOnOrderForest orderBy
     $ xs

-- Sorts the input 'NgramsElement' list.
-- /IMPORTANT/: As we might be sorting ngrams in all sorts of language,
-- some of them might include letters with accents and other unicode symbols,
-- but we need to filter those /diacritics/ out so that the sorting would
-- happen in the way users would expect. See ticket #331.
sortOnOrderForest :: Maybe OrderBy -> (Forest NgramsElement -> Forest NgramsElement)
sortOnOrderForest Nothing          = sortOnOrderForest (Just ScoreDesc)
sortOnOrderForest (Just TermAsc)   = List.sortBy (\(Node t1 _) (Node t2 _) -> ngramTermsAscSorter t1 t2)
sortOnOrderForest (Just TermDesc)  = List.sortBy (\(Node t1 _) (Node t2 _) -> ngramTermsDescSorter t1 t2)
sortOnOrderForest (Just ScoreAsc)  = List.sortOn $ \(Node root _) -> root ^. (ne_occurrences . to Set.size)
sortOnOrderForest (Just ScoreDesc) = List.sortOn $ Down . (\(Node root _) -> root ^. (ne_occurrences . to Set.size))

ngramTermsAscSorter :: NgramsElement -> NgramsElement -> Ordering
ngramTermsAscSorter  = on unicodeDUCETSorter (unNgramsTerm . view ne_ngrams)

ngramTermsDescSorter :: NgramsElement -> NgramsElement -> Ordering
ngramTermsDescSorter = on (\n1 n2 -> unicodeDUCETSorter n2 n1) (unNgramsTerm . view ne_ngrams)

-- | This function allows sorting two texts via their unicode sorting
-- (as opposed as the standard lexicographical sorting) by relying on
-- the DUCET table, a table that specifies the ordering of all unicode
-- characters. This is enough for mimicking the \"natural sort\" effect
-- that users would expect.
unicodeDUCETSorter :: Text -> Text -> Ordering
unicodeDUCETSorter = Unicode.collate Unicode.rootCollator

getTableNgrams :: NodeStoryEnv err
               -> NodeId
               -> ListId
               -> TabType
               -> NgramsSearchQuery
               -> DBQuery err x (VersionedWithCount NgramsTable)
getTableNgrams env nodeId listId tabType searchQuery = do
  let ngramsType = ngramsTypeFromTabType tabType
  versionedInput <- getNgramsTable' env nodeId listId ngramsType
  -- FIXME(adn) In case of a loop at the moment we just return the
  -- empty result set, but we should probably bubble the error upstream.
  pure $ case searchTableNgrams versionedInput searchQuery of
    Left _err -> VersionedWithCount 0 0 (NgramsTable mempty)
    Right x   -> x

-- | Helper function to get the ngrams table with scores.
getNgramsTable' :: NodeStoryEnv err
                -> NodeId
                -> ListId
                -> NgramsType
                -> DBQuery err x (Versioned (Map.Map NgramsTerm NgramsElement))
getNgramsTable' env nId listId ngramsType = do
  tableMap <- getNgramsTableMap env listId ngramsType
  tableMap & v_data %%~ setNgramsTableScores nId listId ngramsType
                        . Map.mapWithKey ngramsElementFromRepo

-- | Helper function to set scores on an `NgramsTable`.
setNgramsTableScores :: forall err t x.
                        ( Each t t NgramsElement NgramsElement
                        , Show t
                        )
                     => NodeId
                     -> ListId
                     -> NgramsType
                     -> t
                     -> DBQuery err x t
setNgramsTableScores nId listId ngramsType table = do
  -- FIXME(adn) RESTORE these!
  --t1 <- getTime
  occurrences <- getOccByNgramsOnlyFast nId listId ngramsType
  $(txLogLocM) DEBUG $ "occurrences: " <> T.pack (show occurrences)
  --t2 <- getTime
  let ngrams_terms = table ^.. each . ne_ngrams
  $(txLogLocM) DEBUG $ "ngrams_terms: " <> show ngrams_terms
  -- $(txLogLocM) DEBUG $ sformat ("getTableNgrams/setScores #ngrams=" % int % " time=" % hasTime % "\n") (length ngrams_terms) t1 t2
  let setOcc ne = ne & ne_occurrences .~ Set.fromList (msumOf (ix (ne ^. ne_ngrams)) occurrences)

  $(txLogLocM) DEBUG $ "with occurences: " <> T.pack (show $ table & each %~ setOcc)

  pure $ table & each %~ setOcc


-- APIs

-- TODO: find a better place for the code above, All APIs stay here

getTableNgramsCorpusHandler :: (IsDBCmd err env m, HasNodeStoryEnv err env)
                            => NodeId
                            -> TabType
                            -> ListId
                            -> Limit
                            -> Maybe Offset
                            -> Maybe ListType
                            -> Maybe MinSize -> Maybe MaxSize
                            -> Maybe OrderBy
                            -> Maybe Text -- full text search
                            -> m (VersionedWithCount NgramsTable)
getTableNgramsCorpusHandler nId tabType listId limit_ offset listType minSize maxSize orderBy mt = do
  env <- view hasNodeStory
  runDBQuery $ getTableNgramsCorpus env nId tabType listId limit_ offset listType minSize maxSize orderBy mt

getTableNgramsCorpus :: NodeStoryEnv err
                     -> NodeId
                     -> TabType
                     -> ListId
                     -> Limit
                     -> Maybe Offset
                     -> Maybe ListType
                     -> Maybe MinSize -> Maybe MaxSize
                     -> Maybe OrderBy
                     -> Maybe Text -- full text search
                     -> DBQuery err x (VersionedWithCount NgramsTable)
getTableNgramsCorpus env nId tabType listId limit_ offset listType minSize maxSize orderBy mt =
  getTableNgrams env nId listId tabType searchQuery
    where
      searchQueryFn (NgramsTerm nt) = maybe (const True) (isInfixOf . toLower) mt (toLower nt)
      searchQuery = NgramsSearchQuery {
                    _nsq_limit       = limit_
                  , _nsq_offset      = offset
                  , _nsq_listType    = listType
                  , _nsq_minSize     = minSize
                  , _nsq_maxSize     = maxSize
                  , _nsq_orderBy     = orderBy
                  , _nsq_searchQuery = searchQueryFn
                  }




  -- TODO: limit?
  -- Versioned { _v_version = v } <- getTableNgramsCorpus nId tabType listId 100000 Nothing Nothing Nothing Nothing Nothing Nothing
  -- This line above looks like a waste of computation to finally get only the version.
  -- See the comment about listNgramsChangedSince.


-- Did the given list of ngrams changed since the given version?
-- The returned value is versioned boolean value, meaning that one always retrieve the
-- latest version.
-- If the given version is negative then one simply receive the latest version and True.
-- Using this function is more precise than simply comparing the latest version number
-- with the local version number. Indeed there might be no change to this particular list
-- and still the version number has changed because of other lists.
--
-- Here the added value is to make a compromise between precision, computation, and bandwidth:
-- * currentVersion: good computation, good bandwidth, bad precision.
-- * listNgramsChangedSince: good precision, good bandwidth, bad computation.
-- * tableNgramsPull: good precision, good bandwidth (if you use the received data!), bad computation.
listNgramsChangedSince :: NodeStoryEnv err
                       -> ListId -> NgramsType -> Version -> DBQuery err x (Versioned Bool)
listNgramsChangedSince env listId ngramsType version
  | version < 0 =
      Versioned <$> currentVersion listId <*> pure True
  | otherwise   =
      tableNgramsPull env listId ngramsType version <&> v_data %~ (== mempty)
