{-|
Module      : Gargantext.Text.Ngrams.Lists
Description : Tools to build lists
Copyright   : (c) CNRS, 2017-Present
License     : AGPL + CECILL v3
Maintainer  : team@gargantext.org
Stability   : experimental
Portability : POSIX

-}


module Gargantext.Text.List
  where

-- import Data.Either (partitionEithers, Either(..))
import Data.Map (Map)
import Data.Set (Set)
import Data.Text (Text)
import Gargantext.API.Ngrams (NgramsElement, mkNgramsElement, RootParent(..), mSetFromList)
-- import Gargantext.API.Ngrams.Tools (getCoocByNgrams', Diagonal(..))
import Gargantext.Core (Lang(..))
import Gargantext.Core.Types (ListType(..), MasterCorpusId, UserCorpusId, Ordering(..))
import Gargantext.Database.Action.Metrics.NgramsByNode (getTficf, sortTficf, ngramsGroup, getNodesByNgramsUser, groupNodesByNgramsWith)
import Gargantext.Database.Prelude (Cmd)
import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.Prelude
import Gargantext.Text.List.Learn (Model(..))
-- import Gargantext.Text.Metrics (takeScored)
import qualified Data.Char as Char
import qualified Data.List as List
import qualified Data.Map  as Map
import qualified Data.Set  as Set
import qualified Data.Text as Text


data NgramsListBuilder = BuilderStepO { stemSize :: Int
                                      , stemX    :: Int
                                      , stopSize :: Int
                                      }
                       | BuilderStep1 { withModel :: Model }
                       | BuilderStepN { withModel :: Model }
                       | Tficf { nlb_lang           :: Lang
                               , nlb_group1         :: Int
                               , nlb_group2         :: Int
                               , nlb_stopSize       :: StopSize
                               , nlb_userCorpusId   :: UserCorpusId
                               , nlb_masterCorpusId :: MasterCorpusId
                               }


data StopSize = StopSize {unStopSize :: Int}

-- | TODO improve grouping functions of Authors, Sources, Institutes..
buildNgramsLists :: Lang
                 -> Int
                 -> Int
                 -> StopSize
                 -> UserCorpusId
                 -> MasterCorpusId
                 -> Cmd err (Map NgramsType [NgramsElement])
buildNgramsLists l n m s uCid mCid = do
  ngTerms     <- buildNgramsTermsList l n m s uCid mCid
  othersTerms <- mapM (buildNgramsOthersList uCid identity) [Authors, Sources, Institutes]
  pure $ Map.unions $ othersTerms <> [ngTerms]


buildNgramsOthersList :: UserCorpusId
                      -> (Text -> Text)
                      -> NgramsType
                      -> Cmd err (Map NgramsType [NgramsElement])
buildNgramsOthersList uCid groupIt nt = do
  ngs <- groupNodesByNgramsWith groupIt <$> getNodesByNgramsUser uCid nt

  let
    listSize = 9
    all'     = List.reverse $ List.sortOn (Set.size . snd . snd) $ Map.toList ngs

    graphTerms = List.take listSize all'
    candiTerms = List.drop listSize all'

  pure $ Map.unionsWith (<>) [ toElements GraphTerm     graphTerms
                             , toElements CandidateTerm candiTerms
                             ]
    where
      toElements nType x =
        Map.fromList [(nt, [ mkNgramsElement t nType Nothing (mSetFromList [])
                           | (t,_ns) <- x
                           ]
                     )]

{-
buildNgramsTermsList' :: UserCorpusId
                      -> (Text -> Text)
                      -> ((Text, (Set Text, Set NodeId)) -> Bool)
                      -> Int
                      -> Int
                      -> Cmd err (Map NgramsType [NgramsElement])

buildNgramsTermsList' uCid groupIt stop gls is = do
  ngs <- groupNodesByNgramsWith groupIt <$> getNodesByNgramsUser uCid NgramsTerms
  
  let
    (stops, candidates) = partitionEithers
                          $ map (\t -> if stop t then Left t else Right t)
                          $ Map.toList
                          $ Map.filter ((\s' -> Set.size s' > 1) . snd) ngs

    (maps, candidates') = takeScored gls is
                        $ getCoocByNgrams' snd (Diagonal True)
                        $ Map.fromList candidates


    toList' t = (fst t, (fromIntegral $ Set.size $ snd $ snd t, fst $ snd t))

    (s,c,m) = (stops
       , List.filter (\(k,_) -> List.elem k candidates') candidates
       , List.filter (\(k,_) -> List.elem k maps) candidates
       )

  let ngs' = List.concat
          $ map toNgramsElement
          $ map (\t -> (StopTerm     , toList' t)) s
         <> map (\t -> (CandidateTerm, toList' t)) c
         <> map (\t -> (GraphTerm    , toList' t)) m

  pure $ Map.fromList [(NgramsTerms, ngs')]
-}




buildNgramsTermsList :: Lang
                     -> Int
                     -> Int
                     -> StopSize
                     -> UserCorpusId
                     -> MasterCorpusId
                     -> Cmd err (Map NgramsType [NgramsElement])
buildNgramsTermsList l n m s uCid mCid = do
  candidates <- sortTficf Down <$> getTficf uCid mCid NgramsTerms (ngramsGroup l n m)

  let
    candidatesSize = 400
{-
    a = 50
    b = 50
-}
    candidatesHead = List.take candidatesSize candidates
    candidatesTail = List.drop candidatesSize candidates

    termList = 
          -- (toTermList a b ((isStopTerm s) . fst) candidatesHead)
                (map (toGargList ((isStopTerm s) .fst) GraphTerm)     candidatesHead)
             <> (map (toGargList ((isStopTerm s) .fst) CandidateTerm) candidatesTail)

    ngs = List.concat $ map toNgramsElement termList

  pure $ Map.fromList [(NgramsTerms, ngs)]


toTermList :: Int
           -> Int
           -> (a -> Bool)
           -> [a]
           -> [(ListType, a)]
toTermList _ _ _ [] = []
toTermList a b stop ns =  -- trace ("computing toTermList") $
                      map (toGargList stop CandidateTerm) xs
                   <> map (toGargList stop GraphTerm)     ys
                   <> toTermList a b stop zs
    where
      xs = take a ns
      xz = drop a ns

      ys = take b xz
      zs = drop b xz


toNgramsElement :: (ListType, (Text, (Double, Set Text))) -> [NgramsElement]
toNgramsElement (listType, (_stem, (_score, setNgrams))) =
  case Set.toList setNgrams of
    []                -> []
    (parent:children) -> [parentElem] <> childrenElems
      where
        parentElem    = mkNgramsElement parent
                                        listType
                                        Nothing
                                        (mSetFromList children)
        childrenElems = map (\t -> mkNgramsElement t listType
                                                   (Just $ RootParent parent parent)
                                                   (mSetFromList [])
                            ) children


toGargList :: (b -> Bool) -> ListType -> b -> (ListType, b)
toGargList stop l n = case stop n of
    True  -> (StopTerm, n)
    False -> (l, n)



isStopTerm :: StopSize -> Text -> Bool
isStopTerm (StopSize n) x = Text.length x < n || any isStopChar (Text.unpack x)
  where
    isStopChar c = not (c `elem` ("- /()%" :: [Char]) || Char.isAlpha c)