{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE QuasiQuotes #-}

module Test.Offline.Phylo (tests) where

import CLI.Phylo.Common
import Data.Aeson as JSON
import Data.Aeson.Types qualified as JSON
import Data.GraphViz.Attributes.Complete qualified as Graphviz
import Data.Text.Lazy as TL
import Data.TreeDiff
import Data.Vector qualified as V
import Gargantext.Core.Text.List.Formats.TSV
import Gargantext.Core.Types.Phylo hiding (Phylo(..))
import Gargantext.Core.Viz.Phylo hiding (EdgeType(..))
import Gargantext.Core.Viz.Phylo.API.Tools (readPhylo, phylo2dot2json)
import Gargantext.Core.Viz.Phylo.Example qualified as Cleopatre
import Gargantext.Core.Viz.Phylo.PhyloExport
import Gargantext.Core.Viz.Phylo.PhyloMaker (toPhylo, toPhyloWithoutLink)
import Gargantext.Core.Viz.Phylo.PhyloTools
import Paths_gargantext
import Prelude
import Test.Tasty
import Test.Tasty.HUnit

phyloTestConfig :: PhyloConfig
phyloTestConfig = PhyloConfig {
    corpusPath = "corpus.csv"
  , listPath = "list.csv"
  , outputPath = "data/"
  , corpusParser = Tsv {_tsv_limit = 150000}
  , listParser = V4
  , phyloName = "Phylo Name"
  , phyloScale = 2
  , similarity = WeightedLogJaccard {_wlj_sensibility = 0.5, _wlj_minSharedNgrams = 2}
  , seaElevation = Constante {_cons_start = 0.1, _cons_gap = 0.1}
  , defaultMode = True
  , findAncestors = False
  , phyloSynchrony = ByProximityThreshold {_bpt_threshold = 0.5, _bpt_sensibility = 0.0, _bpt_scope = AllBranches, _bpt_strategy = MergeAllGroups}
  , phyloQuality = Quality {_qua_granularity = 0.8, _qua_minBranch = 3}
  , timeUnit = Year {_year_period = 3, _year_step = 1, _year_matchingFrame = 5}
  , clique = MaxClique {_mcl_size = 5, _mcl_threshold = 1.0e-4, _mcl_filter = ByThreshold}
  , exportLabel = [ BranchLabel {_branch_labelTagger = MostEmergentTfIdf, _branch_labelSize = 2}
                  , GroupLabel {_group_labelTagger = MostEmergentInclusive, _group_labelSize = 2}
                  ]
  , exportSort = ByHierarchy {_sort_order = Desc}
  , exportFilter = [ByBranchSize {_branch_size = 3.0}]
  }

tests :: TestTree
tests = testGroup "Phylo" [
  testGroup "Export" [
      testCase "ngramsToLabel respects encoding" test_ngramsToLabel_01
    , testCase "ngramsToLabel is rendered correctly in CustomAttribute" test_ngramsToLabel_02
  ]
  , testGroup "toPhyloWithoutLink" [
    testCase "returns expected data" testSmallPhyloWithoutLinkExpectedOutput
  , testCase "phyloCleopatre returns expected data" testCleopatreWithoutLinkExpectedOutput
  , testCase "Nadal canned corpus returns expected data" testNadalWithoutLinkExpectedOutput
  ]
  , testGroup "phylo2dot2json" [
    testCase "is deterministic" testPhylo2dot2json
  ]
  , testGroup "toPhylo" [
    testCase "is deterministic" testToPhyloDeterminism
  ]
  , testGroup "relatedComponents" [
    testCase "finds simple connection" testRelComp_Connected
  ]
  , testCase "parses csv phylo" testCsvPhylo
  ]

testCleopatreWithoutLinkExpectedOutput :: Assertion
testCleopatreWithoutLinkExpectedOutput = do
  let actual = toPhyloWithoutLink Cleopatre.docs Cleopatre.config
  expected <- readPhylo =<< getDataFileName "test-data/phylo/cleopatre.golden.json"
  assertBool (show $ ansiWlEditExprCompact $ ediff expected actual) (expected == actual)

testNadalWithoutLinkExpectedOutput :: Assertion
testNadalWithoutLinkExpectedOutput = do
  corpusPath'    <- getDataFileName "test-data/phylo/nadal_docslist.golden.tsv"
  listPath'      <- getDataFileName "test-data/phylo/nadal_ngramslist.golden.tsv"
  let config  = phyloTestConfig { corpusPath = corpusPath'
                                , listPath   = listPath'
                                , listParser = V3
                                }
  mapList <- fileToList (listParser config) (listPath config)
  corpus <- fileToDocsDefault (corpusParser config)
                        (corpusPath config)
                        [Year 3 1 5,Month 3 1 5,Week 4 2 5]
                        mapList
  let actual = setConfig phyloTestConfig $ toPhyloWithoutLink corpus config
  expected   <- setConfig phyloTestConfig <$> (readPhylo =<< getDataFileName "test-data/phylo/nadal.golden.json")
  assertBool (show $ ansiWlEditExprCompact $ ediff expected actual) (expected == actual)

testSmallPhyloWithoutLinkExpectedOutput :: Assertion
testSmallPhyloWithoutLinkExpectedOutput = do
  bpaConfig      <- getDataFileName "bench-data/phylo/bpa-config.json"
  corpusPath'    <- getDataFileName "test-data/phylo/small_phylo_docslist.tsv"
  listPath'      <- getDataFileName "test-data/phylo/small_phylo_ngramslist.tsv"
  (Right config) <- fmap (\pcfg -> pcfg { corpusPath = corpusPath'
                                        , listPath   = listPath'
                                        }) <$> (JSON.eitherDecodeFileStrict' bpaConfig)
  mapList <- fileToList (listParser config) (listPath config)
  corpus <- fileToDocsDefault (corpusParser config)
                        (corpusPath config)
                        [Year 3 1 5,Month 3 1 5,Week 4 2 5]
                        mapList
  let actual = setConfig phyloTestConfig $ toPhyloWithoutLink corpus config
  expected   <- setConfig phyloTestConfig <$> (readPhylo =<< getDataFileName "test-data/phylo/small-phylo.golden.json")
  assertBool (show $ ansiWlEditExprCompact $ ediff expected actual) (expected == actual)

testPhylo2dot2json :: Assertion
testPhylo2dot2json = do
  expected_e <- JSON.eitherDecodeFileStrict' =<< getDataFileName "test-data/phylo/phylo2dot2json.golden.json"
  case expected_e of
    Left err -> fail err
    Right (expected :: GraphData) -> do
      actual_e <- JSON.parseEither JSON.parseJSON <$> phylo2dot2json Cleopatre.phyloCleopatre
      case actual_e of
        Left err -> fail err
        Right (actual :: GraphData) -> do
          assertBool ("Phylo mismatch!" <> show (ansiWlEditExprCompact $ ediff expected actual)) (expected `compareGraphDataFuzzy` actual)

compareGraphDataFuzzy :: GraphData -> GraphData -> Bool
compareGraphDataFuzzy gd1 gd2 =
  and [ _gd__subgraph_cnt gd1 == _gd__subgraph_cnt gd2
      , _gd_directed gd1      == _gd_directed gd2
      , and $ Prelude.map (uncurry compareEdgeDataFuzzy) $ Prelude.zip (_gd_edges gd1) (_gd_edges gd2)
      , and $ Prelude.map (uncurry compareObjectDataFuzzy) $ Prelude.zip (_gd_objects gd1) (_gd_objects gd2)
      , _gd_strict gd1        == _gd_strict gd2
      , _gd_data gd1 `compareGraphDataDataFuzzy` _gd_data gd2
      ]
  where
    gdd1 `compareEdgeDataFuzzy` ggd2 = case (gdd1, ggd2) of
      (GroupToAncestor gvId1 ecd1 gad1, GroupToAncestor gvId2 ecd2 gad2)
        -> and [ gvId1 == gvId2, ecd1 `compareEdgeCommonDataFuzzy` ecd2, gad1 == gad2 ]
      (GroupToAncestor{}, _)
        -> False
      (GroupToGroup gvId1 ecd1 gd1', GroupToGroup gvId2 ecd2 gd2')
        -> and [ gvId1 == gvId2, ecd1 `compareEdgeCommonDataFuzzy` ecd2, gd1' == gd2' ]
      (GroupToGroup{}, _)
        -> False
      (BranchToGroup gvId1 ecd1 bgd1, BranchToGroup gvId2 ecd2 bgd2)
        -> and [ gvId1 == gvId2, ecd1 `compareEdgeCommonDataFuzzy` ecd2, bgd1 == bgd2 ]
      (BranchToGroup{}, _)
        -> False
      (PeriodToPeriod gvId1 ecd1, PeriodToPeriod gvId2 ecd2)
        -> and [ gvId1 == gvId2, ecd1 `compareEdgeCommonDataFuzzy` ecd2 ]
      (PeriodToPeriod{}, _)
        -> False

    gdd1 `compareObjectDataFuzzy` ggd2 = case (gdd1, ggd2) of
      (GroupToNode  gvId1 ncd1 gnd1, GroupToNode  gvId2 ncd2 gnd2)
        -> and [ gvId1 == gvId2, ncd1 `compareNodeCommonDataFuzzy` ncd2, gnd1 == gnd2 ]
      (GroupToNode{}, _)
        -> False
      (BranchToNode gvId1 ncd1 bnd1, BranchToNode gvId2 ncd2 bnd2)
        -> and [ gvId1 == gvId2, ncd1 `compareNodeCommonDataFuzzy` ncd2, bnd1 == bnd2 ]
      (BranchToNode{}, _)
        -> False
      (PeriodToNode gvId1 ncd1 pnd1, PeriodToNode gvId2 ncd2 pnd2)
        -> and [ gvId1 == gvId2, ncd1 `compareNodeCommonDataFuzzy` ncd2, pnd1 == pnd2 ]
      (PeriodToNode{}, _)
        -> False
      (Layer gvId1 gdd1' ld1, Layer gvId2 gdd2 ld2)
        -> and [ gvId1 == gvId2, gdd1' `compareGraphDataDataFuzzy` gdd2, ld1 == ld2 ]
      (Layer{}, _)
        -> False

    gdd1 `compareNodeCommonDataFuzzy` ggd2 =
      -- Excluded fields: pos, width, height.
      and [ _nd_label    gdd1 == _nd_label ggd2
          , _nd_name     gdd1 == _nd_name ggd2
          , _nd_nodeType gdd1 == _nd_nodeType ggd2
          , _nd_shape    gdd1 == _nd_shape ggd2
          ]

    gdd1 `compareEdgeCommonDataFuzzy` gdd2 =
      -- Excluded fields: pos, width.
      and [ _ed_color gdd1 == _ed_color gdd2
          , _ed_head  gdd1 == _ed_head gdd2
          , _ed_tail  gdd1 == _ed_tail gdd2
          ]

    gdd1 `compareGraphDataDataFuzzy` ggd2 =
      -- Excluded fields: bb, lp, lheight, lwidth.
      and [ _gdd_color             gdd1 == _gdd_color ggd2
          , _gdd_fontsize          gdd1 == _gdd_fontsize ggd2
          , _gdd_label             gdd1 == _gdd_label ggd2
          , _gdd_labelloc          gdd1 == _gdd_labelloc ggd2
          , _gdd_name              gdd1 == _gdd_name ggd2
          , _gdd_nodesep           gdd1 == _gdd_nodesep ggd2
          , _gdd_overlap           gdd1 == _gdd_overlap ggd2
          , _gdd_phyloBranches     gdd1 == _gdd_phyloBranches ggd2
          , _gdd_phyloDocs         gdd1 == _gdd_phyloDocs ggd2
          , _gdd_phyloFoundations  gdd1 == _gdd_phyloFoundations ggd2
          , _gdd_phyloGroups       gdd1 == _gdd_phyloGroups ggd2
          , _gdd_phyloPeriods      gdd1 == _gdd_phyloPeriods ggd2
          , _gdd_phyloSources      gdd1 == _gdd_phyloSources ggd2
          , _gdd_phyloTerms        gdd1 == _gdd_phyloTerms ggd2
          , _gdd_phyloTimeScale    gdd1 == _gdd_phyloTimeScale ggd2
          , _gdd_rank              gdd1 == _gdd_rank ggd2
          , _gdd_ranksep           gdd1 == _gdd_ranksep ggd2
          , _gdd_ratio             gdd1 == _gdd_ratio ggd2
          , _gdd_splines           gdd1 == _gdd_splines ggd2
          , _gdd_style             gdd1 == _gdd_style ggd2
          ]

test_ngramsToLabel_01 :: Assertion
test_ngramsToLabel_01 =
  let ngrams = V.fromList [ "évaluation", "méthodologique" ]
  in ngramsToLabel ngrams [0,1] @?= "évaluation | méthodologique"

test_ngramsToLabel_02 :: Assertion
test_ngramsToLabel_02 =
  let ngrams = V.fromList [ "钱", "狗" ]
  in (Graphviz.customValue $ toAttr "lbl" $ TL.fromStrict $ ngramsToLabel ngrams [0,1]) @?= "钱 | 狗"

testRelComp_Connected :: Assertion
testRelComp_Connected = do
  (relatedComponents @Int) []                                @?= []
  (relatedComponents @Int) [[]]                              @?= [[]]
  (relatedComponents @Int) [[],[1,2]]                        @?= [[],[1,2]]
  (relatedComponents @Int) [[1,2],[]]                        @?= [[1,2],[]]
  (relatedComponents @Int) [[1,2], [2]]                      @?= [[1,2]]
  (relatedComponents @Int) [[1,2], [2],[2]]                  @?= [[1,2]]
  (relatedComponents @Int) [[1,2], [2],[2,1]]                @?= [[1,2]]
  (relatedComponents @Int) [[1,2], [2,4]]                    @?= [[1,2,4]]
  (relatedComponents @Int) [[1,2], [3,5], [2,4]]             @?= [[3,5], [1,2,4]]
  (relatedComponents @Int) [[1,2], [3,5], [2,4],[9,5],[5,4]] @?= [[1,2,4,3,5,9]]
  (relatedComponents @Int) [[1,2,5], [4,5,9]]                @?= [[1,2,5,4,9]]

testToPhyloDeterminism :: Assertion
testToPhyloDeterminism = do
  corpusPath' <- getDataFileName "test-data/phylo/GarganText_DocsList-nodeId-187481.tsv"
  listPath'   <- getDataFileName "test-data/phylo/GarganText_NgramsList-187482.tsv"
  let config  = phyloTestConfig { corpusPath = corpusPath'
                                , listPath   = listPath'
                                , listParser = V3
                                }
  mapList <- tsvMapTermList (listPath config)
  corpus <- fileToDocsDefault (corpusParser config)
                              (corpusPath config)
                              [Year 3 1 5,Month 3 1 5,Week 4 2 5]
                              mapList
  let actual = setConfig phyloTestConfig $ toPhylo $ toPhyloWithoutLink corpus config
  expected    <- setConfig phyloTestConfig <$> (readPhylo =<< getDataFileName "test-data/phylo/187481.json")
  assertBool ("Phylo mismatch! " <> show (ansiWlEditExprCompact $ ediff expected actual)) (expected == actual)

testCsvPhylo :: Assertion
testCsvPhylo = do
  pth <- getDataFileName "test-data/phylo/cleopatre.golden.csv.json"
  phyloJson <- eitherDecodeFileStrict' @Phylo pth
  case phyloJson of
    Left err -> error err
    Right _  -> pure ()