{-|
Module      : Gargantext.API
Description : Server API
Copyright   : (c) CNRS, 2017-Present
License     : AGPL + CECILL v3
Maintainer  : team@gargantext.org
Stability   : experimental
Portability : POSIX
-}


module Gargantext.Core.Ext.IMT where

import Data.Either (Either(..))
import Data.Map (Map)
import Data.Text (Text, splitOn)

import qualified Data.Set    as S
import qualified Data.List   as DL
import qualified Data.Vector as DV
import qualified Data.Map    as M
import qualified Prelude

import Gargantext.Prelude

import Gargantext.Core.Text.Metrics.Utils      as Utils
import Gargantext.Core.Text.Corpus.Parsers.CSV as CSV

data School = School { school_shortName :: Text
                     , school_longName  :: Text
                     , school_id        :: Text
} deriving (Show, Read, Eq)

schools :: [School]
schools = [ School
            { school_shortName = "Mines Albi-Carmaux"
            , school_longName = "Mines Albi-Carmaux - École nationale supérieure des Mines d'Albi‐Carmaux"
            , school_id = "469216" }
          , School
            { school_shortName = "Mines Alès"
            , school_longName = "EMA - École des Mines d'Alès"
            , school_id = "6279" }
          , School
            { school_shortName = "Mines Douai"
            , school_longName = "Mines Douai EMD - École des Mines de Douai"
            , school_id = "224096" }
          , School
            { school_shortName = "Mines Lille"
            , school_longName = "Mines Lille - École des Mines de Lille"
            , school_id = "144103" }
          , School
            { school_shortName = "IMT Lille Douai"
            , school_longName = "IMT Lille Douai"
            , school_id = "497330" }
          , School
            { school_shortName = "Mines Nantes"
            , school_longName = "Mines Nantes - Mines Nantes"
            , school_id = "84538" }
          , School
            { school_shortName = "Télécom Bretagne"
            , school_longName = "Télécom Bretagne"
            , school_id = "301262" }
          , School
            { school_shortName = "IMT Atlantique"
            , school_longName = "IMT Atlantique - IMT Atlantique Bretagne-Pays de la Loire"
            , school_id = "481355" }
          , School
            { school_shortName = "Mines Saint-Étienne"
            , school_longName = "Mines Saint-Étienne MSE - École des Mines de Saint-Étienne"
            , school_id = "29212" }
          , School
            { school_shortName = "Télécom École de Management"
            , school_longName = "TEM - Télécom Ecole de Management"
            , school_id = "301442" }
          , School
            { school_shortName = "IMT Business School"
            , school_longName = "IMT Business School"
            , school_id = "542824" }
          , School
            { school_shortName = "Télécom ParisTech"
            , school_longName = "Télécom ParisTech"
            , school_id = "300362" }
          , School
            { school_shortName = "Télécom SudParis"
            , school_longName = "TSP - Télécom SudParis"
            , school_id = "352124" }
          , School
            { school_shortName = "ARMINES"
            , school_longName = "ARMINES"
            , school_id = "300362" }
          , School
            { school_shortName = "Eurecom"
            , school_longName = "Eurecom"
            , school_id = "421532" }
          , School
            { school_shortName = "Mines ParisTech"
            , school_longName = "MINES ParisTech - École nationale supérieure des mines de Paris"
            , school_id = "301492" }
            ]

mapIdSchool :: Map Text Text
mapIdSchool = M.fromList $ Gargantext.Prelude.map
                (\(School { school_shortName, school_id }) -> (school_id, school_shortName)) schools

hal_data :: IO (Either Prelude.String (DV.Vector CsvHal))
hal_data = do
  r <- CSV.readCsvHal "doc/corpus_imt/Gargantext_Corpus.csv"
  pure $ snd <$> r

names :: S.Set Text
names = S.fromList $ Gargantext.Prelude.map (\s -> school_id s) schools

toSchoolName :: Text -> Text
toSchoolName t = case M.lookup t mapIdSchool of
  Nothing -> t
  Just t' -> t'

publisBySchool :: DV.Vector CsvHal -> [(Maybe Text, Int)]
publisBySchool hal_data' = Gargantext.Prelude.map (\(i,n) -> (M.lookup i mapIdSchool, n))
                        $ DL.filter (\i -> S.member  (fst i) names) 
                        $ DL.reverse
                        $ DL.sortOn snd
                        $ M.toList
                        $ Utils.freq
                        $ DL.concat
                        $ DV.toList
                        $ DV.map (\n -> splitOn ( ", ") (csvHal_instStructId_i n) )
                        $ DV.filter (\n -> csvHal_publication_year n == 2017) hal_data'