{-|
Module      : Gargantext.Database.Flow
Description : Database Flow
Copyright   : (c) CNRS, 2017-Present
License     : AGPL + CECILL v3
Maintainer  : team@gargantext.org
Stability   : experimental
Portability : POSIX

-}

{-# OPTIONS_GHC -fno-warn-deprecations #-}

{-# LANGUAGE Arrows      #-}

module Gargantext.Database.Action.Flow.Pairing
  -- (pairing)
    where

import Control.Lens (_Just, view)
import Data.HashMap.Strict (HashMap)
import Data.HashMap.Strict qualified as HM
import Data.HashMap.Strict qualified as HashMap
import Data.List qualified as List
import Data.Set qualified as Set
import Data.Text qualified as Text
import Gargantext.API.Ngrams.Tools ( filterListWithRoot, getRepo, groupNodesByNgrams, mapTermListRoot )
import Gargantext.API.Ngrams.Types (NgramsTerm(..))
import Gargantext.Core ( HasDBid(toDBid) )
import Gargantext.Core.NodeStory.Types ( HasNodeStory )
import Gargantext.Core.Text.Metrics.CharByChar (levenshtein)
import Gargantext.Core.Text.Ngrams (NgramsType(..))
import Gargantext.Core.Types (TableResult(..))
import Gargantext.Core.Types.Main ( ListType(CandidateTerm, MapTerm) )
import Gargantext.Database.Action.Metrics.NgramsByContext (getContextsByNgramsOnlyUser)
import Gargantext.Database.Admin.Config ( userMaster )
import Gargantext.Database.Admin.Types.Hyperdata.Contact ( HyperdataContact, cw_firstName, cw_lastName, hc_who ) -- (HyperdataContact(..))
import Gargantext.Database.Admin.Types.Node -- (AnnuaireId, CorpusId, ListId, DocId, ContactId, NodeId)
import Gargantext.Database.Prelude (DBCmd, DBCmdExtra, runOpaQuery)
import Gargantext.Database.Query.Prelude (returnA)
import Gargantext.Database.Query.Table.Node (defaultList)
import Gargantext.Database.Query.Table.Node.Children (getAllContacts)
import Gargantext.Database.Query.Table.Node.Error (HasNodeError)
import Gargantext.Database.Query.Table.Node.Select (selectNodesWithUsername)
import Gargantext.Database.Query.Table.NodeContext_NodeContext (insertNodeContext_NodeContext)
import Gargantext.Database.Query.Table.NodeNode 
import Gargantext.Database.Schema.Node ( node_hyperdata, node_id, node_typename, queryNodeTable )
import Gargantext.Prelude hiding (sum)
import Opaleye

-- | isPairedWith
-- All NodeAnnuaire paired with a Corpus of NodeId nId:
-- isPairedWith NodeAnnuaire corpusId
isPairedWith :: NodeId -> NodeType -> DBCmdExtra err [NodeId]
isPairedWith nId nt = runOpaQuery (selectQuery nt nId)
  where
    selectQuery :: NodeType -> NodeId -> Select (Column SqlInt4)
    selectQuery nt' nId' = proc () -> do
      node <- queryNodeTable -< ()
      node_node <- optionalRestrict queryNodeNodeTable -<
        \node_node' -> (node ^. node_id) .== (node_node' ^. nn_node2_id)
      restrict -< (node^.node_typename)  .== sqlInt4 (toDBid nt')
      restrict -< (view nn_node1_id <$> node_node) .=== justFields (pgNodeId nId')
      returnA  -<  node^.node_id

-----------------------------------------------------------------------
pairing :: (HasNodeStory env err m, HasNodeError err)
        => AnnuaireId -> CorpusId -> Maybe ListId -> m [Int]
pairing a c l' = do
  l <- case l' of
    Nothing -> defaultList c
    Just l'' -> pure l''
  dataPaired <- dataPairing a (c,l,Authors)
  pairCorpusWithAnnuaire (SourceId c) (TargetId a)
  insertNodeContext_NodeContext $ prepareInsert c a dataPaired


dataPairing :: HasNodeStory env err m
             => AnnuaireId
             -> (CorpusId, ListId, NgramsType)
             -> m (HashMap ContactId (Set DocId))
dataPairing aId (cId, lId, ngt) = do
  -- mc :: HM.HashMap ContactName (Set ContactId)
  mc <- getNgramsContactId aId
  -- md :: HM.HashMap DocAuthor   (Set DocId)
  md <- getNgramsDocId     cId lId ngt
  -- printDebug "dataPairing authors" (HM.keys md)
  let result = fusion mc md
  -- printDebug "dataPairing" (length $ HM.keys result)
  pure result


prepareInsert :: CorpusId -> AnnuaireId -> HashMap ContactId (Set DocId)
              -> [(CorpusId, AnnuaireId, DocId, ContactId)]
prepareInsert corpusId annuaireId mapContactDocs =
  map (\(contactId,docId) -> (corpusId, docId, annuaireId, contactId))
        $ List.concat
        $ map (\(contactId, setDocIds)
                -> map (\setDocId
                         -> (contactId, setDocId)
                       ) $ Set.toList setDocIds
               )
        $ HM.toList mapContactDocs

------------------------------------------------------------------------
type ContactName = NgramsTerm
type DocAuthor   = NgramsTerm
type Projected   = NgramsTerm

fusion :: HashMap ContactName (Set ContactId)
       -> HashMap DocAuthor   (Set DocId)
       -> HashMap ContactId   (Set DocId)
fusion mc md = HM.fromListWith (<>)
             $ List.concat
             $ map (\(docAuthor, docs)
                     -> case (getClosest Text.toLower docAuthor (HM.keys mc)) of
                          Nothing -> []
                          Just author -> case HM.lookup author mc of
                                          Nothing    -> []
                                          Just contactIds -> map (\contactId -> (contactId, docs))
                                                                 $ Set.toList contactIds
                   )
             $ HM.toList md

fusion'' :: HashMap ContactName (Set ContactId)
       -> HashMap DocAuthor   (Set DocId)
       -> HashMap ContactId   (Set DocId)
fusion'' mc md = hashmapReverse $ fusion' mc (hashmapReverse md)


fusion' :: HashMap ContactName (Set ContactId)
       -> HashMap DocId (Set DocAuthor)
       -> HashMap DocId (Set ContactId)
fusion' mc md = HM.fromListWith (<>)
             $ map (\(docId, setAuthors) -> (docId, getContactIds mc $ getClosest' setAuthors (HM.keys mc)))
             $ HM.toList md

getContactIds :: HashMap ContactName (Set ContactId) -> Set ContactName -> Set ContactId
getContactIds mapContactNames contactNames =
  if Set.null contactNames
     then Set.empty
     else Set.unions $ catMaybes $ map (\contactName -> HM.lookup contactName mapContactNames) $ Set.toList contactNames

getClosest' :: Set DocAuthor -> [ContactName] -> Set ContactName
getClosest' setAuthors contactNames = trace (show (setAuthors, setContactNames) :: Text) $ setContactNames
  where
    setContactNames = if Set.null xs then ys else xs
    xs = Set.fromList $ catMaybes $ map (\author -> getClosest Text.toLower author contactNames) $ Set.toList setAuthors
    ys = Set.fromList $ catMaybes $ map (\(NgramsTerm author) -> case ((lastMay . (Text.splitOn " ")) author) of
                                                      Nothing      -> Nothing
                                                      Just authorReduced -> getClosest Text.toLower (NgramsTerm authorReduced) contactNames)
                                  $ Set.toList setAuthors


getClosest :: (Text -> Text) -> NgramsTerm -> [NgramsTerm] -> Maybe NgramsTerm
getClosest f (NgramsTerm from') candidates = fst <$> head scored
  where
    scored   = List.sortOn snd
             $ List.filter (\(_,score) -> score <= 2)
             $ map (\cand@(NgramsTerm candidate) -> (cand, levenshtein (f from') (f candidate))) candidates


------------------------------------------------------------------------
getNgramsContactId :: AnnuaireId
                   -> DBCmd err (HashMap ContactName (Set NodeId))
getNgramsContactId aId = do
  contacts <- getAllContacts aId
  -- printDebug "getAllContexts" (tr_count contacts)
  let paired= HM.fromListWith (<>)
       $ map (\contact -> (toName contact, Set.singleton (contact^.node_id))
              ) (tr_docs contacts)
  -- printDebug "paired" (HM.keys paired)
  pure paired
-- POC here, should be a probabilistic function (see the one used to find lang)
toName :: Node HyperdataContact -> NgramsTerm
-- toName contact = NgramsTerm $ (Text.toTitle $ Text.take 1 firstName) <> ". " <> (Text.toTitle lastName)
toName contact = NgramsTerm $ (Text.toTitle firstName) <> " " <> (Text.toTitle lastName)
  where
    firstName = fromMaybe "" $ contact^.(node_hyperdata . hc_who . _Just . cw_firstName)
    lastName  = fromMaybe "" $ contact^.(node_hyperdata . hc_who . _Just . cw_lastName)

getNgramsDocId :: HasNodeStory env err m
                => CorpusId
                -> ListId
                -> NgramsType
                -> m (HashMap DocAuthor (Set NodeId))
getNgramsDocId cId lId nt = do
  lIds <- selectNodesWithUsername NodeList userMaster
  repo <- getRepo (lId:lIds)
  let ngs = filterListWithRoot [MapTerm, CandidateTerm] $ mapTermListRoot (lId:lIds) nt repo
  -- printDebug "getNgramsDocId" ngs

  -- FIXME(adinapoli) we should audit this, we are converting from 'ContextId' to 'NodeId'.
  HM.map (Set.map contextId2NodeId) . groupNodesByNgrams ngs <$> getContextsByNgramsOnlyUser cId (lIds <> [lId]) nt (HashMap.keys ngs)

hashmapReverse :: (Ord a, Eq b, Hashable b)
        => HashMap a (Set b) -> HashMap b (Set a)
hashmapReverse m = HM.fromListWith (<>)
          $ List.concat
          $ map (\(k,vs) -> [ (v, Set.singleton k) | v <- Set.toList vs])
          $ HM.toList m