{-|
Module      : Gargantext.Database.Flow.Utils
Description : Database Flow
Copyright   : (c) CNRS, 2017-Present
License     : AGPL + CECILL v3
Maintainer  : team@gargantext.org
Stability   : experimental
Portability : POSIX

-}

module Gargantext.Database.Action.Flow.Utils
  ( docNgrams
  , documentIdWithNgrams
  , insertDocNgrams
  , insertDocs
  , mapNodeIdNgrams
  , ngramsByDoc )
where

import Data.HashMap.Strict (HashMap)
import Data.HashMap.Strict qualified as HashMap
import Data.Map.Strict qualified as DM
import Data.Text qualified as T
import Gargantext.API.Ngrams.Types qualified as NT
import Gargantext.Core (Lang, toDBid)
import Gargantext.Core.Flow.Types (UniqId, uniqId)
import Gargantext.Core.Text.Ngrams ( Ngrams, NgramsType )
import Gargantext.Core.Text.Terms (ExtractedNgrams(..))
import Gargantext.Core.Text.Terms.WithList (MatchedText, buildPatternsWith, termsInText)
import Gargantext.Core.Types (TermsCount, TermsWeight(..))
import Gargantext.Core.Utils (addTuples)
import Gargantext.Data.HashMap.Strict.Utils qualified as HashMap
import Gargantext.Database.Action.Flow.Types (DocumentIdWithNgrams(..), FlowInsertDB)
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument, hd_abstract, hd_title )
import Gargantext.Database.Admin.Types.Node
import Gargantext.Database.Prelude
import Gargantext.Database.Query.Table.ContextNodeNgrams ( ContextNodeNgramsPoly(..), insertContextNodeNgrams )
import Gargantext.Database.Query.Table.Node.Document.Add qualified as Doc (add)
import Gargantext.Database.Query.Table.Node.Document.Insert (ReturnId, addUniqId, insertDb, reId, reInserted, reUniqId)
import Gargantext.Database.Query.Table.Node.Error (HasNodeError(..))
import Gargantext.Database.Schema.Context (context_oid_hyperdata, context_oid_id)
import Gargantext.Database.Schema.Ngrams (NgramsId, NgramsTypeId(..), text2ngrams)
import Gargantext.Database.Types ( Indexed(..), index )
import Gargantext.Prelude
import Gargantext.Prelude.Crypto.Hash (Hash)


insertDocNgrams :: ListId
                -> HashMap (Indexed NgramsId Ngrams) (Map NgramsType (Map DocId (TermsWeight, TermsCount)))
                -> DBUpdate err Int
insertDocNgrams lId m = do
  -- printDebug "[insertDocNgrams] ns" ns
  insertContextNodeNgrams ns
  where
    ns = [ ContextNodeNgrams (nodeId2ContextId docId)
                             lId (ng^.index)
                             (NgramsTypeId $ toDBid t)
                             (fromIntegral $ unTermsWeight w)
                             cnt
         | (ng, t2n2i)       <- HashMap.toList m
         , (t,  n2i)         <- DM.toList t2n2i
         , (docId, (w, cnt)) <- DM.toList n2i
         ]

-- [(NodeId, {Ngrams: ({NgramsType: Int}, TermsCount)})]
-- {Ngrams: {NgramsType: {NodeId: (Int, TermsCount)}}}


-- Given language, ngrams type, a list of terms and a
-- HyperdataDocument, return ngrams that are in this text, with counts.
-- This is a pure function (doesn't use corenlp nor PostgreSQL FTS).
docNgrams :: Lang
          -> [NT.NgramsTerm]
          -> ContextOnlyId HyperdataDocument
          -> [(MatchedText, TermsCount)]
docNgrams lang ts doc =
  (
    termsInText lang (buildPatternsWith lang ts)
      $ T.unlines $ catMaybes
        [ doc ^. context_oid_hyperdata . hd_title
        , doc ^. context_oid_hyperdata . hd_abstract
        ]
  )


documentIdWithNgrams :: HasNodeError err
                     => ( a
                       -> DBCmd err (HashMap.HashMap b (Map NgramsType TermsWeight, TermsCount)) )
                     -> [Indexed NodeId a]
                     -> DBCmd err [DocumentIdWithNgrams a b]
documentIdWithNgrams f = traverse toDocumentIdWithNgrams
  where
    toDocumentIdWithNgrams d = do
      e <- f $ _unIndex         d
      pure $ DocumentIdWithNgrams d e


-- | TODO check optimization
mapNodeIdNgrams :: (Ord b, Hashable b)
                => [DocumentIdWithNgrams a b]
                -> HashMap.HashMap b
                       (Map NgramsType
                            (Map NodeId (TermsWeight, TermsCount))
                       )
mapNodeIdNgrams = HashMap.unionsWith (DM.unionWith (DM.unionWith addTuples)) . fmap f
  where
    -- | NOTE We are somehow multiplying 'TermsCount' here: If the
    -- same ngrams term has different ngrams types, the 'TermsCount'
    -- for it (which is the number of times the terms appears in a
    -- document) is copied over to all its types.
    f :: DocumentIdWithNgrams a b
      -> HashMap.HashMap b (Map NgramsType (Map NodeId (TermsWeight, TermsCount)))
    f d = fmap (\(ngramsTypeMap, cnt) -> fmap (\w -> DM.singleton nId (w, cnt)) ngramsTypeMap) $ documentNgrams d
      where
        nId = _index $ documentWithId d


-- TODO Type NodeDocumentUnicised
insertDocs :: FlowInsertDB a
           => UserId
           -> CorpusId
           -> [a]
           -> DBUpdate err ([ContextId], [Indexed ContextId a])
insertDocs uId cId hs = do
  let docs = map addUniqId hs
  newIds <- insertDb uId Nothing docs
  -- printDebug "newIds" newIds
  let
    newIds' = map (nodeId2ContextId . reId) newIds
    documentsWithId = mergeData (toInserted newIds) (DM.fromList $ map viewUniqId' docs)
  _ <- Doc.add cId newIds'
  pure (newIds', map (first nodeId2ContextId) documentsWithId)


------------------------------------------------------------------------
viewUniqId' :: UniqId a
            => a
            -> (Hash, a)
viewUniqId' d = maybe err (\h -> (h,d)) (d ^. uniqId)
      where
        err = panicTrace "[ERROR] Database.Flow.toInsert"


mergeData :: Map Hash ReturnId
          -> Map Hash a
          -> [Indexed NodeId a]
mergeData rs = catMaybes . map toDocumentWithId . DM.toList
  where
    toDocumentWithId (sha,hpd) =
      Indexed <$> fmap reId (DM.lookup sha rs)
              <*> Just hpd


toInserted :: [ReturnId]
           -> Map Hash ReturnId
toInserted =
  DM.fromList . map    (\r -> (reUniqId r, r)     )
              . filter (\r -> reInserted r == True)


-- Apparently unused functions


-- | TODO putelsewhere
-- | Upgrade function
-- Suppose all documents are English (this is the case actually)
-- indexAllDocumentsWithPosTag :: ( HasNodeStory env err m
--                                , HasNLPServer env )
--                             => m ()
-- indexAllDocumentsWithPosTag = do
--   rootId    <- getRootId (UserName userMaster)
--   corpusIds <- findNodesId rootId [NodeCorpus]
--   docs      <- List.concat <$> mapM getDocumentsWithParentId corpusIds
--   _ <- mapM extractInsert (splitEvery 1000 docs)
--   pure ()


-- | Given list of terms and a document, produce a map for this doc's
-- terms count and weights. Notice that the weight is always 1 here.
ngramsByDoc :: Lang
            -> NgramsType
            -> [NT.NgramsTerm]
            -> ContextOnlyId HyperdataDocument
            -> HashMap.HashMap ExtractedNgrams (DM.Map NgramsType (Map NodeId (TermsWeight, TermsCount)))
ngramsByDoc l nt ts doc =
  HashMap.map (\cnt -> DM.singleton nt $ DM.singleton nId (1, cnt)) extractedMap
  where
    matched :: [(MatchedText, TermsCount)]
    matched = docNgrams l ts doc

    nId :: NodeId
    nId = doc ^. context_oid_id

    withExtractedNgrams :: [(ExtractedNgrams, TermsCount)]
    withExtractedNgrams = first (SimpleNgrams . text2ngrams) <$> matched

    extractedMap :: HashMap.HashMap ExtractedNgrams TermsCount
    extractedMap = HashMap.fromListWith (+) withExtractedNgrams