{-# LANGUAGE OverloadedStrings #-}
module Main where

import Common
import Data.Aeson
import Data.List (nub)
import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.API.Tools
import Gargantext.Core.Viz.Phylo.PhyloExport (toPhyloExport, dotToFile)
import Gargantext.Core.Viz.Phylo.PhyloMaker  (toPhylo, toPhyloWithoutLink)
import Gargantext.Core.Viz.Phylo.PhyloTools  (printIOMsg, printIOComment, setConfig, toPeriods, getTimePeriod, getTimeStep)
import GHC.IO.Encoding
import GHC.Stack
import Paths_gargantext
import Prelude
import qualified Data.Text as T
import Shelly
import System.Directory

--------------
-- | Main | --
--------------

phyloConfig :: FilePath -> PhyloConfig
phyloConfig outdir = PhyloConfig {
    corpusPath = "corpus.csv"
  , listPath = "list.csv"
  , outputPath = outdir
  , corpusParser = Csv {_csv_limit = 150000}
  , listParser = V4
  , phyloName = "phylo_profile_test"
  , phyloScale = 2
  , similarity = WeightedLogJaccard {_wlj_sensibility = 0.5, _wlj_minSharedNgrams = 2}
  , seaElevation = Constante {_cons_start = 0.1, _cons_gap = 0.1}
  , defaultMode = True
  , findAncestors = False
  , phyloSynchrony = ByProximityThreshold {_bpt_threshold = 0.5, _bpt_sensibility = 0.0, _bpt_scope = AllBranches, _bpt_strategy = MergeAllGroups}
  , phyloQuality = Quality {_qua_granularity = 0.8, _qua_minBranch = 3}
  , timeUnit = Year {_year_period = 3, _year_step = 1, _year_matchingFrame = 5}
  , clique = MaxClique {_mcl_size = 5, _mcl_threshold = 1.0e-4, _mcl_filter = ByThreshold}
  , exportLabel = [ BranchLabel {_branch_labelTagger = MostEmergentTfIdf, _branch_labelSize = 2}
                  , GroupLabel {_group_labelTagger = MostEmergentInclusive, _group_labelSize = 2}
                  ]
  , exportSort = ByHierarchy {_sort_order = Desc}
  , exportFilter = [ByBranchSize {_branch_size = 3.0}]
  }


main :: HasCallStack => IO ()
main = do

    shelly $ escaping False $ withTmpDir $ \tdir -> do
      curDir <- pwd
      let output = curDir <> "/" <> "gargantext_profile_out.dot"
      chdir tdir $ do
        liftIO $ setLocaleEncoding utf8

        bpaConfig      <- liftIO $ getDataFileName "bench-data/phylo/bpa-config.json"
        corpusPath'    <- liftIO $ getDataFileName "bench-data/phylo/GarganText_DocsList-nodeId-185487.csv"
        listPath'      <- liftIO $ getDataFileName "bench-data/phylo/GarganText_NgramsList-185488.csv"
        (Right config) <- fmap (\pcfg -> pcfg { outputPath = tdir
                                              , corpusPath = corpusPath'
                                              , listPath   = listPath'
                                              }) <$> liftIO (eitherDecodeFileStrict' bpaConfig)

        mapList <-  liftIO $ fileToList (listParser config) (listPath config)

        corpus  <- liftIO $ if (defaultMode config)
                    then fileToDocsDefault (corpusParser config) (corpusPath config) [Year 3 1 5,Month 3 1 5,Week 4 2 5] mapList
                    else fileToDocsAdvanced (corpusParser config) (corpusPath config) (timeUnit config)  mapList

        liftIO $ do
          printIOComment (show (length corpus) <> " parsed docs from the corpus")
          printIOComment (show (length $ nub $ concat $ map text corpus) <> " Size ngs_coterms")
          printIOComment (show (length mapList) <> " Size ngs_terms List Map Ngrams")
          printIOMsg "Reconstruct the phylo"

          -- check the existing backup files

          let backupPhyloWithoutLink = (outputPath config) <> "backupPhyloWithoutLink_" <> (configToSha BackupPhyloWithoutLink config) <> ".json"
          let backupPhylo = (outputPath config) <> "backupPhylo_"   <> (configToSha BackupPhylo config) <> ".json"

          phyloWithoutLinkExists <- doesFileExist backupPhyloWithoutLink
          phyloExists            <- doesFileExist backupPhylo

          -- reconstruct the phylo

          phylo <- if phyloExists
                      then do
                        printIOMsg "Reconstruct the phylo from an existing file"
                        readPhylo backupPhylo
                      else do
                        if phyloWithoutLinkExists
                          then do
                            printIOMsg "Reconstruct the phylo from an existing file without links"
                            phyloWithoutLink <- readPhylo backupPhyloWithoutLink
                            writePhylo backupPhyloWithoutLink phyloWithoutLink
                            pure $ toPhylo (setConfig config phyloWithoutLink)
                          else do
                            printIOMsg "Reconstruct the phylo from scratch"
                            phyloWithoutLink <- pure $ toPhyloWithoutLink corpus config
                            writePhylo backupPhyloWithoutLink phyloWithoutLink
                            pure $ toPhylo (setConfig config phyloWithoutLink)

          writePhylo backupPhylo phylo

          printIOMsg "End of reconstruction, start the export"

          let dot = toPhyloExport (setConfig config phylo)

          dotToFile output dot
        echo "Done."