{-|
Module      : Gargantext.Database.Flow.Extract
Description : Database Flow
Copyright   : (c) CNRS, 2017-Present
License     : AGPL + CECILL v3
Maintainer  : team@gargantext.org
Stability   : experimental
Portability : POSIX

-}

{-# OPTIONS_GHC -fno-warn-orphans #-}

{-# LANGUAGE InstanceSigs            #-}
{-# LANGUAGE ScopedTypeVariables     #-}


module Gargantext.Database.Action.Flow.Extract
    where

import Control.Lens (_Just, view)
import Data.HashMap.Strict qualified as HashMap
import Data.Map.Strict qualified as DM
import Gargantext.Core (Lang, NLPServerConfig(server))
import Gargantext.Core.Text (HasText(..))
import Gargantext.Core.Text.Corpus.Parsers (splitOn)
import Gargantext.Core.Text.Ngrams (NgramsType(..))
import Gargantext.Core.Text.Terms (ExtractNgramsT, ExtractedNgrams(..), TermType, cleanExtractedNgrams, enrichedTerms, extractNgramsT, extractTerms, tt_lang)
import Gargantext.Core.Types (POS(NP), TermsCount, TermsWeight)
import Gargantext.Database.Admin.Types.Hyperdata.Contact ( HyperdataContact, cw_lastName, hc_who )
import Gargantext.Database.Admin.Types.Hyperdata.Document ( HyperdataDocument, hd_authors, hd_bdd, hd_institutes, hd_source )
import Gargantext.Database.Admin.Types.Node ( Node )
import Gargantext.Database.Prelude (DBCmd)
import Gargantext.Database.Query.Table.NgramsPostag (NgramsPostag)
import Gargantext.Database.Schema.Ngrams ( text2ngrams )
import Gargantext.Database.Schema.Node (NodePoly(..))
import Gargantext.Prelude



------------------------------------------------------------------------
instance ExtractNgramsT HyperdataContact
  where
    extractNgramsT _ncs l hc = HashMap.mapKeys (cleanExtractedNgrams 255) <$> extract l hc
      where
        extract :: TermType Lang -> HyperdataContact
                -> DBCmd err (HashMap.HashMap ExtractedNgrams (Map NgramsType TermsWeight, TermsCount))
        extract _l hc' = do
          let authors = map text2ngrams
                      $ maybe ["Nothing"] (\a -> [a])
                      $ view (hc_who . _Just . cw_lastName) hc'

          pure $ HashMap.fromList $ [(SimpleNgrams a', (DM.singleton Authors 1, 1)) | a' <- authors ]


-- | Main ngrams extraction functionality.
--   For NgramsTerms, this calls NLP server under the hood.
--   For Sources, Institutes, Authors, this uses simple split on " ".
instance ExtractNgramsT HyperdataDocument
  where
    extractNgramsT :: NLPServerConfig
                   -> TermType Lang
                   -> HyperdataDocument
                   -> DBCmd err (HashMap.HashMap ExtractedNgrams (Map NgramsType TermsWeight, TermsCount))
    extractNgramsT ncs lang hd = HashMap.mapKeys (cleanExtractedNgrams 255) <$> extractNgramsT' hd
      where
        extractNgramsT' :: HyperdataDocument
                        -> DBCmd err (HashMap.HashMap ExtractedNgrams (Map NgramsType TermsWeight, TermsCount))
        extractNgramsT' doc = do
          let source    = text2ngrams
                        $ maybe "Nothing" identity
                        $ doc ^. hd_source

              institutes = map text2ngrams
                         $ maybe ["Nothing"] (splitOn Institutes (doc^. hd_bdd))
                         $ doc ^. hd_institutes

              authors    = map text2ngrams
                         $ maybe ["Nothing"] (splitOn Authors (doc^. hd_bdd))
                         $ doc ^. hd_authors

          termsWithCounts' :: [(NgramsPostag, TermsCount)] <-
            map (first (enrichedTerms (lang ^. tt_lang) (server ncs) NP)) . concat <$>
                                  liftBase (extractTerms ncs lang $ hasText doc)

          pure $ HashMap.fromList
               $  [(SimpleNgrams source, (DM.singleton Sources     1, 1))                    ]
               <> [(SimpleNgrams     i', (DM.singleton Institutes  1, 1)) | i' <- institutes ]
               <> [(SimpleNgrams     a', (DM.singleton Authors     1, 1)) | a' <- authors    ]
               <> [(EnrichedNgrams   t', (DM.singleton NgramsTerms 1, cnt')) | (t', cnt') <- termsWithCounts' ]

instance (ExtractNgramsT a, HasText a) => ExtractNgramsT (Node a)
  where
    extractNgramsT ncs l (Node { _node_hyperdata = h }) = extractNgramsT ncs l h


instance HasText a => HasText (Node a)
  where
    hasText (Node { _node_hyperdata = h }) = hasText h


-- Apparently unused functions

-- extractInsert :: ( HasNodeStory env err m
--                  , HasNLPServer env )
--               => [Node HyperdataDocument] -> m ()
-- extractInsert docs = do
--   let documentsWithId = map (\doc -> Indexed (doc ^. node_id) doc) docs
--   let lang = EN
--   ncs <- view $ nlpServerGet lang
--   mapNgramsDocs' <- mapNodeIdNgrams
--                 <$> documentIdWithNgrams
--                     (extractNgramsT ncs $ withLang (Multi lang) documentsWithId)
--                     documentsWithId
--   _ <- insertExtractedNgrams $ HashMap.keys mapNgramsDocs'
--   pure ()