{-|
Module      : Gargantext.Core.Text.List.Group.WithStem
Description : 
Copyright   : (c) CNRS, 2017-Present
License     : AGPL + CECILL v3
Maintainer  : team@gargantext.org
Stability   : experimental
Portability : POSIX

-}

{-# LANGUAGE TemplateHaskell        #-}
{-# LANGUAGE ConstraintKinds        #-}
{-# LANGUAGE TypeFamilies           #-}
{-# LANGUAGE FunctionalDependencies #-}

module Gargantext.Core.Text.List.Group.WithStem
  where

import Control.Lens (makeLenses, view, over)
import Data.Set (Set)
import Data.Map (Map)
import Data.Monoid (mempty)
import Data.Text (Text)
import Gargantext.Core (Lang(..))
import Gargantext.Core.Text (size)
import Gargantext.Database.Admin.Types.Node (NodeId)
import Gargantext.Core.Text.List.Group.WithScores
import Gargantext.Core.Text.List.Group.Prelude
import Gargantext.Core.Text.List.Social.Prelude
import Gargantext.Core.Text.Terms.Mono.Stem (stem)
import Gargantext.Prelude
import qualified Data.Set  as Set
import qualified Data.Map  as Map
import qualified Data.List as List
import qualified Data.Text as Text

-- | Main Types
data StopSize = StopSize {unStopSize :: !Int}
  deriving (Eq)

-- | TODO: group with 2 terms only can be
-- discussed. Main purpose of this is offering
-- a first grouping option to user and get some
-- enriched data to better learn and improve that algo
data GroupParams = GroupParams { unGroupParams_lang     :: !Lang
                               , unGroupParams_len      :: !Int
                               , unGroupParams_limit    :: !Int
                               , unGroupParams_stopSize :: !StopSize
                               }
                 | GroupIdentity
  deriving (Eq)

------------------------------------------------------------------------
groupWithStem' :: GroupParams
               -> FlowCont Text (GroupedTreeScores (Set NodeId))
               -> FlowCont Text (GroupedTreeScores (Set NodeId))
groupWithStem' g flc
    | g == GroupIdentity = FlowCont ( (<>) 
                                      (view flc_scores flc)
                                      (view flc_cont   flc)
                                    ) mempty
    | otherwise = mergeWith (groupWith g) flc

-- | MergeWith : with stem, we always have an answer
-- if Maybe lems then we should add it to continuation
mergeWith :: (Text -> Text)
          -> FlowCont Text (GroupedTreeScores (Set NodeId))
          -> FlowCont Text (GroupedTreeScores (Set NodeId))
mergeWith fun flc = FlowCont scores mempty
  where

    scores :: Map Text (GroupedTreeScores (Set NodeId))
    scores = foldl' (alter (mapStems scores')) scores' cont'
      where
        scores' = view flc_scores flc
        cont'   = Map.toList $ view flc_cont flc

    -- TODO insert at the right place in group hierarchy
    -- adding as child of the parent for now
    alter :: Map Stem Text
          -> Map Text (GroupedTreeScores (Set NodeId))
          -> (Text, GroupedTreeScores (Set NodeId))
          -> Map Text (GroupedTreeScores (Set NodeId))
    alter st target (t,g) = case Map.lookup t st of
      Nothing -> Map.alter (alter' (t,g)) t  target
      Just t' -> Map.alter (alter' (t,g)) t' target

    alter' (_t,g) Nothing   = Just g
    alter' ( t,g) (Just g') = Just $ over gts'_children
                                   ( Map.union (Map.singleton t g))
                                   g'

    mapStems :: Map Text (GroupedTreeScores (Set NodeId))
             -> Map Stem Text
    mapStems = (Map.fromListWith (<>)) . List.concat . (map mapStem) . Map.toList

    mapStem :: (Text, GroupedTreeScores (Set NodeId))
            -> [(Stem, Text)]
    mapStem (s,g) = parent : children
      where
        parent   = (fun s, s)
        children = List.concat $ map mapStem (Map.toList $ view gts'_children g)


groupWith :: GroupParams
            -> Text
            -> Text
groupWith GroupIdentity  = identity
groupWith (GroupParams l _m _n _) = 
                    Text.intercalate " "
                  . map (stem l)
                  -- . take n
                  . List.sort
                  -- . (List.filter (\t -> Text.length t > m))
                  . Text.splitOn " "
                  . Text.replace "-" " "


-- 8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--8<--
-------------------------------------------------------------------
-- TODO to remove
data GroupedTextParams a b =
  GroupedTextParams { _gt_fun_stem    :: Text -> Text
                    , _gt_fun_score   :: a -> b
                    , _gt_fun_texts   :: a -> Set Text
                    , _gt_fun_nodeIds :: a -> Set NodeId
                    -- , _gt_fun_size    :: a -> Int
                    }
makeLenses 'GroupedTextParams


groupWithStem :: {- ( HasNgrams a
                 , HasGroupWithScores a b
                 , Semigroup a
                 ,  Ord b
                 ) 
              => -} GroupedTextParams a b
              -> Map Text (GroupedTextScores (Set NodeId))
              -> Map Stem (GroupedText Int)
groupWithStem _ = Map.mapWithKey scores2groupedText

scores2groupedText :: Text -> GroupedTextScores (Set NodeId) -> GroupedText Int
scores2groupedText t g = GroupedText (view gts_listType g)
                                     t
                                     (Set.size $ view gts_score g)
                                     (Set.delete t $ view gts_children g)
                                     (size t)
                                     t
                                     (view gts_score g)

------------------------------------------------------------------------
------------------------------------------------------------------------
groupedTextWithStem :: Ord b
              => GroupedTextParams a b
              -> Map Text a
              -> Map Stem (GroupedText b)
groupedTextWithStem gparams from =
  Map.fromListWith (<>) $ map (group gparams) $ Map.toList from
    where
      group gparams' (t,d) = let t' = (view gt_fun_stem gparams') t
                     in (t', GroupedText
                                Nothing
                                t
                                ((view gt_fun_score gparams')   d)
                                ((view gt_fun_texts gparams')   d)
                                (size        t)
                                t'
                                ((view gt_fun_nodeIds gparams') d)
                         )
------------------------------------------------------------------------
