{-|
Module      : Gargantext.API.Ngrams.NgramsTree
Description : Tree of Ngrams
Copyright   : (c) CNRS, 2017-Present
License     : AGPL + CECILL v3
Maintainer  : team@gargantext.org
Stability   : experimental
Portability : POSIX

-}

{-# LANGUAGE TemplateHaskell   #-}
module Gargantext.API.Ngrams.NgramsTree
  ( -- * Types
    NgramsForest(..)
  , NgramsTree
  , GeneralisedNgramsTree(..)
    -- * Construction
  , toNgramsTree
  , toNgramsForest
  ) where

import Data.Aeson
import Data.HashMap.Strict (HashMap)
import Data.HashMap.Strict qualified as HashMap
import Data.List qualified as List
import Data.Set qualified as Set
import Data.Swagger ( ToSchema(..), genericDeclareNamedSchema )
import Data.Tree ( Tree(Node), unfoldForest )
import Gargantext.API.Ngrams.Types
import Gargantext.Core.Types.Main ( ListType(..) )
import Gargantext.Database.Admin.Types.Node ( NodeId )
import Gargantext.Core.Utils.Prefix (unPrefixSwagger)
import Gargantext.Prelude
import Test.QuickCheck ( Arbitrary(arbitrary) )

-- | Ngrams forms a forest, i.e. a set of trees, each tree represents a strong grouping
-- between terms. Each tree has a root and some children, of arbitrary depth. We use
-- this data structure internally to represent ngrams tables in a principled way, and later
-- we \"render\" them back into an 'NgramsTable' and/or a set of ngrams elements, where
-- each 'NgramElement' is a standalone tree.
--
-- Properties:
--
-- * Aciclic: each tree is a DAG, and therefore there must be no cycles within the tree,
--   and no cycles between trees in the forest.
--
newtype NgramsForest =
  NgramsForest { getNgramsForest :: [NgramsTree] }
  deriving (Show, Eq, Ord)

type NgramsTree = GeneralisedNgramsTree Text Int

-- | Models a general ngram tree polymorphic over a label 'l' and a measure 'm'.
data GeneralisedNgramsTree l m =
  GeneralisedNgramsTree { mt_label    :: l
                        , mt_value    :: m
                        , mt_children :: [NgramsTree]
                        }
    deriving (Generic, Show, Eq, Ord)

instance (ToJSON l, ToJSON m) => ToJSON (GeneralisedNgramsTree l m) where
  toJSON (GeneralisedNgramsTree l m children) =
    object [ "label"    .= toJSON l
           , "value"    .= toJSON m
           , "children" .= toJSON children
           ]

instance (FromJSON l, FromJSON m) => FromJSON (GeneralisedNgramsTree l m) where
  parseJSON = withObject "NgramsTree" $ \o -> do
    mt_label    <- o .: "label"
    mt_value    <- o .: "value"
    mt_children <- o .: "children"
    pure $ GeneralisedNgramsTree{..}

instance ToSchema NgramsTree where
  declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "mt_")
instance Arbitrary NgramsTree
  where
    arbitrary = GeneralisedNgramsTree <$> arbitrary <*> arbitrary <*> arbitrary

--
-- Constructing trees and forests
--

toNgramsTree :: Tree (NgramsTerm,Int) -> NgramsTree
toNgramsTree (Node (NgramsTerm l,v) xs) = GeneralisedNgramsTree l v (map toNgramsTree xs)

toNgramsForest :: ListType
               -> HashMap NgramsTerm (Set NodeId)
               -> HashMap NgramsTerm NgramsRepoElement
               -> NgramsForest
toNgramsForest lt vs m = NgramsForest $ map toNgramsTree $ unfoldForest buildNode roots
  where
    buildNode r = maybe ((r, value r),[])
                        (\x -> ((r, value r), mSetToList $ _nre_children x))
                        (HashMap.lookup r m)

    value l = maybe 0 Set.size $ HashMap.lookup l vs

    rootsCandidates :: [NgramsTerm]
    rootsCandidates = catMaybes
                    $ List.nub
                    $ map (\(c, c') -> case _nre_root c' of
                                       Nothing -> Just c
                                       _ -> _nre_root c'
                          ) (HashMap.toList m)

    roots = map fst
          $ filter (\(_,l) -> l == lt)
          $ catMaybes
          $ map (\c -> (,) <$> Just c <*> (_nre_list <$> HashMap.lookup c m))
          $ rootsCandidates
