{-|
Module      : Gargantext.Core.Text.Metrics.FrequentItemSet
Description : Ngrams tools
Copyright   : (c) CNRS, 2018
License     : AGPL + CECILL v3
Maintainer  : team@gargantext.org
Stability   : experimental
Portability : POSIX

Domain Specific Language to manage Frequent Item Set (FIS)

-}


module Gargantext.Core.Text.Metrics.FrequentItemSet
  ( Fis, Size(..)
  , occ_hlcm, cooc_hlcm
  , allFis, between
  , fisWithSize
  , fisWith
  , fisWithSizePoly
  , fisWithSizePoly2
  , fisWithSizePolyMap
  , fisWithSizePolyMap'
  , module HLCM
  )
  where

import Data.List (concat, null)
import Data.Map.Strict (Map)
import Data.Maybe (catMaybes, isNothing)
import Data.Set (Set)
import Gargantext.Prelude
import HLCM
import qualified Data.Map.Strict as Map
import qualified Data.Set as Set
import qualified Data.Vector as V

import Control.Monad (sequence)

data Size = Point Int | Segment Int Int

------------------------------------------------------------------------
-- | Occurrence is Frequent Item Set of size 1
occ_hlcm  :: Frequency -> [[Item]] -> [Fis]
occ_hlcm = fisWithSize (Point 1)

-- | Cooccurrence is Frequent Item Set of size 2
cooc_hlcm :: Frequency -> [[Item]] -> [Fis]
cooc_hlcm = fisWithSize (Point 2)

allFis :: Frequency -> [[Item]] -> [Fis]
allFis = fisWith Nothing

------------------------------------------------------------------------
between :: (Int, Int) -> Frequency -> [[Item]] -> [Fis]
between (x,y) = fisWithSize (Segment x y)

--maximum :: Int -> Frequency -> [[Item]] -> [Fis]
--maximum m = between (0,m)


------------------------------------------------------------------------
------------------------------------------------------------------------
-- | Data type to type the Frequent Item Set
-- TODO replace List with Set in fisItemSet 
-- be careful : risks to erase HLCM behavior
type Fis = Fis' Item
data Fis' a = Fis' { _fisCount   :: Int
                   , _fisItemSet :: [a]
                   } deriving (Show)

instance Functor Fis' where
  fmap f (Fis' c is) = Fis' c (fmap f is)

-- | Sugar from items to FIS
items2fis :: [Item] -> Maybe Fis
items2fis []     = Nothing
items2fis (i:is) = Just $ Fis' i is

------------------------------------------------------------------------
------------------------------------------------------------------------

fisWithSize :: Size -> Frequency -> [[Item]] -> [Fis]
fisWithSize n f is = case n of
  Point  n'   -> fisWith (Just (\x -> length x == (n'+1) )) f is
  Segment a b -> fisWith (Just (\x -> cond a (length x) b)) f is
                    where
                      cond a' x b' = a' <= x && x <= b'


                      --- Filter on Fis and not on [Item]
fisWith :: Maybe ([Item] -> Bool) -> Frequency -> [[Item]] -> [Fis]
fisWith s f is = case filter (not . null) is of
                   [] -> []
                   js -> catMaybes $ map items2fis $ filter' $ runLCMmatrix js f
-- drop unMaybe
  where
    filter' = case s of
                Nothing  -> identity
                Just fun -> filter fun

-- Here the sole purpose to take the keys as a Set is tell we do not want
-- duplicates.
fisWithSizePoly :: Ord a => Size -> Frequency -> Set a -> [[a]] -> [Fis' a]
fisWithSizePoly n f ks = map (fmap fromItem) . fisWithSize n f . map (map toItem)
  where
    ksv = V.fromList $ Set.toList ks
    ksm = Map.fromList . flip zip [0..] $ V.toList ksv
    toItem = (ksm Map.!)
    fromItem = (ksv V.!)

fisWithSizePoly2 :: Ord a => Size -> Frequency -> [[a]] -> [Fis' a]
fisWithSizePoly2 n f is = fisWithSizePoly n f ks is
  where
    ks = Set.fromList $ concat is

fisWithSizePolyMap :: Ord a => Size -> Frequency -> [[a]] -> Map (Set a) Int
fisWithSizePolyMap n f is =
  Map.fromList $ (\i -> (Set.fromList (_fisItemSet i), _fisCount i)) <$> fisWithSizePoly2 n f is


------------------------------------------------------------------------
------------------------------------------------------------------------

---- Weighted [[Item]]

isSublistOf :: Ord a => [a] -> [a] -> Bool
isSublistOf sub lst = all (\i -> elem i lst) sub

reIndexFis :: Ord a => [([a],(b,c))] -> [Fis' a] -> [(Fis' a,([b],[c]))]
reIndexFis items fis = map (\f -> 
    let docs = filter (\(lst,_) -> isSublistOf (_fisItemSet f) lst) items
     in (f, (map (fst . snd) docs, map (snd . snd) docs))) fis

wsum :: [Maybe Double] -> Int -> Maybe Double
wsum lst sup =
  let w = fmap sum $ sequence lst
   in
      if (isNothing w)
        then Just $ fromIntegral sup
        else w

fisWithSizePolyMap' :: Ord a => Size -> Frequency -> [([a], (Maybe Double,[Int]))] -> Map (Set a) (Int, (Maybe Double,[Int]))
fisWithSizePolyMap' n f is = Map.fromList
  $ map (\(fis,(ws,sources)) -> (Set.fromList (_fisItemSet fis),(_fisCount fis,(wsum ws (_fisCount fis),concat sources))))
  $ reIndexFis is 
  $ fisWithSizePoly2 n f (map fst is)

------------------------------------------------------------------------
------------------------------------------------------------------------


--
---- | /!\ indexes are not the same:
--
---- | Index ngrams from Map
----indexNgram :: Ord a => Map a Occ -> Map Index a
----indexNgram m = fromList (zip [1..] (keys m))
--
---- | Index ngrams from Map
----ngramIndex :: Ord a => Map a Occ -> Map a Index
----ngramIndex m = fromList (zip (keys m) [1..])
--
--indexWith :: Ord a => Map a Occ -> [a] -> [Int]
--indexWith m xs = unMaybe $ map (\x -> lookupIndex x m) xs
--
--indexIt :: Ord a => [[a]] -> (Map a Int, [[Int]])
--indexIt xs = (m, is)
--  where
--    m  = sumOcc (map occ  xs)
--    is = map    (indexWith m) xs
--
--list2fis :: Ord a => FIS.Frequency -> [[a]] -> (Map a Int, [FIS.Fis])
--list2fis n xs = (m', fs)
--  where
--    (m, is) = indexIt xs
--    m'      = M.filter (>50000) m
--    fs      = FIS.all n is
--
--text2fis :: FIS.Frequency -> [Text] -> (Map Text Int, [FIS.Fis])
--text2fis n xs = list2fis n (map terms xs)
--
----text2fisWith :: FIS.Size -> FIS.Frequency -> [Text] -> (Map Text Int, [FIS.Fis])
----text2fisWith = undefined
--