{-# LANGUAGE BangPatterns #-}
{-|
Module      : Gargantext.Database.Flow.Utils
Description : Database Flow
Copyright   : (c) CNRS, 2017-Present
License     : AGPL + CECILL v3
Maintainer  : team@gargantext.org
Stability   : experimental
Portability : POSIX

-}

module Gargantext.Database.Action.Flow.Utils
  ( docNgrams
  , docNgrams'
  , documentIdWithNgrams
  , mapDocumentIdWithNgrams
  , insertDocNgrams
  , insertDocs
  , insertDoc
  , mkNodeIdNgramsMap
  , ngramsByDoc )
where

import Data.HashMap.Strict (HashMap)
import Data.HashMap.Strict qualified as HashMap
import Data.Map.Strict qualified as DM
import Data.Text qualified as T
import Gargantext.API.Ngrams.Types qualified as NT
import Gargantext.Core (Lang, toDBid)
import Gargantext.Core.Text.Ngrams ( Ngrams, NgramsType )
import Gargantext.Core.Text.Terms (ExtractedNgrams(..))
import Gargantext.Core.Text.Terms.WithList (MatchedText, buildPatternsWith, termsInText)
import Gargantext.Core.Types (TermsCount, TermsWeight(..))
import Gargantext.Core.Utils (addTuples)
import Gargantext.Data.HashMap.Strict.Utils qualified as HashMap
import Gargantext.Database.Action.Flow.Types (DocumentIdWithNgrams(..), FlowInsertDB)
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument, hd_abstract, hd_title )
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Prelude
import Gargantext.Database.Query.Table.ContextNodeNgrams ( ContextNodeNgramsPoly(..), insertContextNodeNgrams )
import Gargantext.Database.Query.Table.Node.Document.Add qualified as Doc (add)
import Gargantext.Database.Query.Table.Node.Document.Insert (addUniqId, reId, insertDbOne)
import Gargantext.Database.Schema.Context (context_oid_hyperdata, context_oid_id)
import Gargantext.Database.Schema.Ngrams (NgramsId, NgramsTypeId(..), text2ngrams)
import Gargantext.Database.Types ( Indexed(..), index )
import Gargantext.Prelude


insertDocNgrams :: ListId
                -> HashMap (Indexed NgramsId Ngrams) (Map NgramsType (Map DocId (TermsWeight, TermsCount)))
                -> DBUpdate err Int
insertDocNgrams lId m = do
  -- printDebug "[insertDocNgrams] ns" ns
  insertContextNodeNgrams ns
  where
    ns = [ ContextNodeNgrams (nodeId2ContextId docId)
                             lId (ng^.index)
                             (NgramsTypeId $ toDBid t)
                             (fromIntegral $ unTermsWeight w)
                             cnt
         | (ng, t2n2i)       <- HashMap.toList m
         , (t,  n2i)         <- DM.toList t2n2i
         , (docId, (w, cnt)) <- DM.toList n2i
         ]

-- [(NodeId, {Ngrams: ({NgramsType: Int}, TermsCount)})]
-- {Ngrams: {NgramsType: {NodeId: (Int, TermsCount)}}}



-- | Given language, ngrams type, a list of terms and a
-- HyperdataDocument, return ngrams that are in this text, with
-- counts.  This is a pure function (doesn't use corenlp nor
-- PostgreSQL FTS).
docNgrams :: Lang
          -> [NT.NgramsTerm]
          -> ContextOnlyId HyperdataDocument
          -> [(MatchedText, TermsCount)]
docNgrams lang ts doc =
  docNgrams' lang ts
      $ T.unlines $ catMaybes
        [ doc ^. context_oid_hyperdata . hd_title
        , doc ^. context_oid_hyperdata . hd_abstract
        ]

-- | Given language, ngrams type, a list of terms and a text, return
-- ngrams that are in this text, with counts.
docNgrams' :: Lang
           -> [NT.NgramsTerm]
           -> Text
           -> [(MatchedText, TermsCount)]
docNgrams' lang ts txt =
  termsInText lang (buildPatternsWith lang ts) txt


documentIdWithNgrams :: Monad m
                     => ( a -> m (HashMap.HashMap b (Map NgramsType TermsWeight, TermsCount)) )
                     -> Indexed ix a
                     -> m (DocumentIdWithNgrams ix a b)
documentIdWithNgrams f = toDocumentIdWithNgrams
  where
    toDocumentIdWithNgrams d = do
      e <- f $ _unIndex         d
      pure $ DocumentIdWithNgrams { documentWithId = d
                                  , documentNgrams = e }

mapDocumentIdWithNgrams :: Monad m
                        => ( a -> m (HashMap.HashMap b (Map NgramsType TermsWeight, TermsCount)) )
                        -> [Indexed ix a]
                        -> m [DocumentIdWithNgrams ix a b]
mapDocumentIdWithNgrams f = mapM (documentIdWithNgrams f)


-- | Creates a NodeIdNgrams map out of the input 'DocumentIdWithNgrams' list.
-- TODO check optimization
mkNodeIdNgramsMap :: forall ix a b. (Ord b, Hashable b, Ord ix)
                => [DocumentIdWithNgrams ix a b]
                -> HashMap.HashMap b
                       (Map NgramsType
                            (Map ix (TermsWeight, TermsCount))
                       )
mkNodeIdNgramsMap = HashMap.unionsWith (DM.unionWith (DM.unionWith addTuples)) . fmap f
  where
    -- | NOTE We are somehow multiplying 'TermsCount' here: If the
    -- same ngrams term has different ngrams types, the 'TermsCount'
    -- for it (which is the number of times the terms appears in a
    -- document) is copied over to all its types.
    f :: DocumentIdWithNgrams ix a b
      -> HashMap.HashMap b (Map NgramsType (Map ix (TermsWeight, TermsCount)))
    f d = fmap (\(ngramsTypeMap, cnt) -> fmap (\w -> DM.singleton nId (w, cnt)) ngramsTypeMap) $ documentNgrams d
      where
        nId = _index $ documentWithId d


-- TODO Type NodeDocumentUnicised
insertDocs :: FlowInsertDB a
           => UserId
           -> CorpusId
           -> [a]
           -> DBUpdate err ([ContextId], [Indexed ContextId a])
insertDocs uId cId hs = bimap reverse reverse . foldl collectIds (mempty, mempty)
  <$> mapM (insertDoc uId cId) hs
  where
    collectIds :: ([ContextId], [Indexed ContextId a]) -> Indexed ContextId a -> ([ContextId], [Indexed ContextId a])
    collectIds (!ctxs, !indexed) a = (_index a : ctxs, a : indexed)

insertDoc :: FlowInsertDB a
          => UserId
          -> CorpusId
          -> a
          -> DBUpdate err (Indexed ContextId a)
insertDoc uId cId h = do
  let doc = addUniqId h
  newId  <- nodeId2ContextId . reId <$> insertDbOne uId Nothing doc
  void $ Doc.add cId [newId]
  pure $ Indexed newId doc

-- Apparently unused functions


-- | TODO putelsewhere
-- | Upgrade function
-- Suppose all documents are English (this is the case actually)
-- indexAllDocumentsWithPosTag :: ( HasNodeStory env err m
--                                , HasNLPServer env )
--                             => m ()
-- indexAllDocumentsWithPosTag = do
--   rootId    <- getRootId (UserName userMaster)
--   corpusIds <- findNodesId rootId [NodeCorpus]
--   docs      <- List.concat <$> mapM getDocumentsWithParentId corpusIds
--   _ <- mapM extractInsert (splitEvery 1000 docs)
--   pure ()



-- | Given list of terms and a document, produce a map for this doc's
-- terms count and weights. Notice that the weight is always 1 here.
ngramsByDoc :: Lang
            -> NgramsType
            -> [NT.NgramsTerm]
            -> ContextOnlyId HyperdataDocument
            -> HashMap.HashMap ExtractedNgrams (DM.Map NgramsType (Map NodeId (TermsWeight, TermsCount)))
ngramsByDoc l nt ts doc =
  HashMap.map (\cnt -> DM.singleton nt $ DM.singleton nId (1, cnt)) extractedMap
  where
    matched :: [(MatchedText, TermsCount)]
    matched = docNgrams l ts doc

    nId :: NodeId
    nId = doc ^. context_oid_id

    withExtractedNgrams :: [(ExtractedNgrams, TermsCount)]
    withExtractedNgrams = first (SimpleNgrams . text2ngrams) <$> matched

    extractedMap :: HashMap.HashMap ExtractedNgrams TermsCount
    extractedMap = HashMap.fromListWith (+) withExtractedNgrams
