module Gargantext.Core.Text.Corpus (makeSubcorpusFromQuery, subcorpusEasy) where

import Control.Lens (view)
import Data.Set.Internal qualified as Set (singleton)
import Data.Text qualified as T
import Gargantext.API.Dev (runCmdReplEasy)
import Gargantext.API.Errors.Types (BackendInternalError(InternalNodeError))
import Gargantext.Core (Lang(EN))
import Gargantext.Core.NodeStory.Types (HasNodeStoryEnv, hasNodeStory)
import Gargantext.Core.NLP (HasNLPServer)
import Gargantext.Core.Text.Corpus.Query qualified as Q
import Gargantext.Core.Text.List.Social (FlowSocialListWith (..), FlowSocialListPriority (..))
import Gargantext.Core.Text.Ngrams (NgramsType (..))
import Gargantext.Core.Types.Individu (User (..))
import Gargantext.Core.Types.Main (ListType (..))
import Gargantext.Database.Action.Flow (buildSocialList, reIndexWith)
import Gargantext.Database.Action.Metrics (updateContextScore, updateNgramsOccurrences)
import Gargantext.Database.Action.Search (searchInCorpus)
import Gargantext.Database.Action.User (getUserId)
import Gargantext.Database.Admin.Types.Hyperdata.Corpus (HyperdataCorpus, hc_lang)
import Gargantext.Database.Admin.Types.Node (CorpusId, NodeId(UnsafeMkNodeId), NodeType(..), nodeId2ContextId)
import Gargantext.Database.Prelude
import Gargantext.Database.Query.Facet.Types (facetDoc_id)
import Gargantext.Database.Query.Table.Node (insertDefaultNode, copyNodeStories, defaultList, getNodeWithType)
import Gargantext.Database.Query.Table.Node.Document.Add qualified as Document (add)
import Gargantext.Database.Query.Table.Node.Error (NodeError(NoCorpusFound))
import Gargantext.Database.Schema.Node (node_hyperdata)
import Gargantext.Prelude



-- | A version of the below function for use in the REPL (so you don't need to
-- manually import tons of constructors etc.)
subcorpusEasy :: Text -- ^ Username
              -> Int  -- ^ Original corpus ID
              -> Text -- ^ Search string
              -> Bool -- ^ Whether to reuse the parent term list (True) or recompute one from scratch (False)
              -> IO ()
subcorpusEasy username cId rawQuery reuseParentList = do
  let eitherQuery = Q.parseQuery $ Q.RawQuery rawQuery
  case eitherQuery of
    Left msg -> print $ "Error parsing query \"" <> rawQuery <> "\": " <> T.pack msg
    Right query -> void $ runCmdReplEasy $ makeSubcorpusFromQuery (UserName username) (UnsafeMkNodeId cId) query reuseParentList


-- | Given a "parent" corpus and a query, search for all docs in the parent
--   that match the query, and create a corpus from those. The created corpus
--   is inserted in the tree as a child of the parent corpus.
--   Creation of subcorpus "Docs" and "Terms" nodes is handled. The terms can be
--   either copied from the parent corpus or recomputed based on the subcorpus docs.
-- TODO(adn) Make (more) DB-transactional.
makeSubcorpusFromQuery :: ( HasNodeStoryEnv env BackendInternalError
                          , HasNLPServer    env
                          )
  => User     -- ^ The corpus owner
  -> CorpusId -- ^ ID of the parent corpus
  -> Q.Query  -- ^ The query to determine the subset of documents that will appear in the subcorpus
  -> Bool     -- ^ Whether to reuse parent term list (True) or compute a new one based only on the documents in the subcorpus (False)
  -> DBCmdWithEnv env BackendInternalError CorpusId -- ^ The child corpus ID
makeSubcorpusFromQuery user supercorpusId query reuseParentList = do
  env <- view hasNodeStory

  (subcorpusId, subListId, superListId) <- runDBTx $ do
    userId <- getUserId user
    -- Insert the required nodes:
    -- 1. The subcorpus root (under the original corpus root)
    subcorpusId' <- insertDefaultNode NodeCorpus supercorpusId userId
    -- 2. The context (aka "Docs", aka "Terms") node (under the subcorpus root)
    _ <- insertDefaultNode NodeTexts subcorpusId' userId
    -- 3. The terms (aka "List") node
    subListId' <- insertDefaultNode NodeList subcorpusId' userId
    -- Get the ID of the original terms node
    superListId' <- defaultList supercorpusId

    -- Get ahold of all contexts that match the query, and add them to the subcorpus
    -- (note that contexts are attached to a *corpus* node, not a *docs* node,
    -- notwithstanding what you might think from th UI)
    facetDocs <- searchInCorpus supercorpusId False query Nothing Nothing Nothing
    void $ Document.add subcorpusId' $ nodeId2ContextId . facetDoc_id <$> facetDocs
    pure (subcorpusId', subListId', superListId')

  if reuseParentList
    -- Either simply copy parent terms...
    then runDBTx $ void $ copyNodeStories superListId subListId
    -- ... or rebuild a term list from scratch
    -- TODO Check whether reusing the parent hyperdata is the right thing to do
    else do
      -- Get hyperdata from the original corpus
      supercorpuses <- runDBQuery $ getNodeWithType supercorpusId NodeCorpus (Proxy :: Proxy HyperdataCorpus)
      superHyperdata <- case supercorpuses of
        [supercorpus] -> return $ view node_hyperdata supercorpus
        _ -> throwError $ InternalNodeError NoCorpusFound

      -- NOTE(adn) Unfortunately this function prevents us from running the whole
      -- function in a single DBTx, because that relies deep down its guts to
      -- the NLP server to extract the ngrams, something that could happen before
      -- calling this.
      buildSocialList
           (fromMaybe EN $ view hc_lang superHyperdata)
           user
           subcorpusId
           subListId
           (Just superHyperdata)
           -- TODO Not completely sure what the following parameter is for
           -- but I am guessing there should be a dialog to let the user decide
           -- what it should be
           (Just (FlowSocialListWithPriority MySelfFirst) :: Maybe FlowSocialListWith)
  -- In both cases we'll need to reindex our terms list so it matches the contexts
  -- in the newly created subcorpus
  runDBTx $ do
    reIndexWith env subcorpusId subListId NgramsTerms (Set.singleton MapTerm)
    _ <- updateContextScore      env subcorpusId subListId
    _ <- updateNgramsOccurrences env subcorpusId subListId
    pure subcorpusId
