{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE TupleSections #-}

module Test.Offline.Phylo (tests) where

import CLI.Phylo.Common
import Data.Aeson as JSON
import Data.Aeson.Types qualified as JSON
import Data.Aeson.Encode.Pretty qualified as JSON
import Data.ByteString.Lazy qualified as BL
import Data.GraphViz.Attributes.Complete qualified as Graphviz
import Data.Text.Lazy as TL
import Data.TreeDiff
import Data.Vector qualified as V
import Gargantext.Core.Text.List.Formats.TSV
import Gargantext.Core.Types.Phylo hiding (Phylo(..))
import Gargantext.Core.Viz.Phylo.API.Tools (readPhylo, phylo2dot2json)
import Gargantext.Core.Viz.Phylo.Example qualified as Cleopatre
import Gargantext.Core.Viz.Phylo hiding (EdgeType(..))
import Gargantext.Core.Viz.Phylo.PhyloExport
import Gargantext.Core.Viz.Phylo.PhyloMaker (toPhylo, toPhyloWithoutLink)
import Gargantext.Core.Viz.Phylo.PhyloTools
import Paths_gargantext
import Prelude
import Test.Tasty
import Test.Tasty.Golden (goldenVsStringDiff)
import Test.Tasty.HUnit
import qualified Test.Tasty.Golden.Advanced as Advanced

phyloTestConfig :: PhyloConfig
phyloTestConfig = PhyloConfig {
    corpusPath = "corpus.csv"
  , listPath = "list.csv"
  , outputPath = "data/"
  , corpusParser = Tsv {_tsv_limit = 150000}
  , listParser = V4
  , phyloName = "Phylo Name"
  , phyloScale = 2
  , similarity = WeightedLogJaccard {_wlj_sensibility = 0.5, _wlj_minSharedNgrams = 2}
  , seaElevation = Constante {_cons_start = 0.1, _cons_gap = 0.1}
  , defaultMode = True
  , findAncestors = False
  , phyloSynchrony = ByProximityThreshold {_bpt_threshold = 0.5, _bpt_sensibility = 0.0, _bpt_scope = AllBranches, _bpt_strategy = MergeAllGroups}
  , phyloQuality = Quality {_qua_granularity = 0.8, _qua_minBranch = 3}
  , timeUnit = Year {_year_period = 3, _year_step = 1, _year_matchingFrame = 5}
  , clique = MaxClique {_mcl_size = 5, _mcl_threshold = 1.0e-4, _mcl_filter = ByThreshold}
  , exportLabel = [ BranchLabel {_branch_labelTagger = MostEmergentTfIdf, _branch_labelSize = 2}
                  , GroupLabel {_group_labelTagger = MostEmergentInclusive, _group_labelSize = 2}
                  ]
  , exportSort = ByHierarchy {_sort_order = Desc}
  , exportFilter = [ByBranchSize {_branch_size = 3.0}]
  }

phyloGolden :: TestName -> (FilePath, IO BL.ByteString) -> TestTree
phyloGolden testName (fp, action) =
  goldenVsStringDiff testName differ fp action
  where
    differ ref new = [ "diff", "-u", "-w", "--color=always", ref, new]

-- | Use this variant for those tests which requires a more sophisticated way of
-- comparing outputs directly on the GraphData
phyloGoldenGraphData :: TestName -> (FilePath, IO GraphData) -> TestTree
phyloGoldenGraphData testName (goldenPath, getActual) =
  Advanced.goldenTest testName getGolden getActual differ updateGolden
  where
    differ ref new = pure $ case compareGraphDataFuzzy ref new of
      True  -> Nothing
      False -> Just $ show (ansiWlEditExprCompact $ ediff ref new)

    updateGolden :: GraphData -> IO ()
    updateGolden gd = BL.writeFile goldenPath (JSON.encodePretty gd)

    getGolden :: IO GraphData
    getGolden = do
      expected_e <- JSON.eitherDecodeFileStrict' =<< getDataFileName goldenPath
      case expected_e of
        Left err -> fail err
        Right (expected :: GraphData) -> pure expected

tests :: TestTree
tests = testGroup "Phylo" [
  testGroup "Export" [
      testCase "ngramsToLabel respects encoding" test_ngramsToLabel_01
    , testCase "ngramsToLabel is rendered correctly in CustomAttribute" test_ngramsToLabel_02
  ]
  , testGroup "toPhyloWithoutLink" [
    testCase "returns expected data" testSmallPhyloWithoutLinkExpectedOutput
  , phyloGolden "phyloCleopatre returns expected data" testCleopatreWithoutLinkExpectedOutput
  , phyloGolden "Nadal canned corpus returns expected data" testNadalWithoutLinkExpectedOutput
  ]
  , testGroup "phylo2dot2json" [
    phyloGoldenGraphData "is deterministic" testPhylo2dot2json
  ]
  , testGroup "toPhylo" [
    phyloGolden "is deterministic" testToPhyloDeterminism
  ]
  , testGroup "relatedComponents" [
    testCase "finds simple connection" testRelComp_Connected
  ]
  , testCase "parses csv phylo" testCsvPhylo
  ]

testCleopatreWithoutLinkExpectedOutput :: (FilePath, IO BL.ByteString)
testCleopatreWithoutLinkExpectedOutput =
  let actual = toPhyloWithoutLink Cleopatre.docs Cleopatre.config
  in ("test-data/phylo/cleopatre.golden.json", pure $ JSON.encodePretty actual)

testNadalWithoutLinkExpectedOutput :: (FilePath, IO BL.ByteString)
testNadalWithoutLinkExpectedOutput = ("test-data/phylo/nadal.golden.json",) $ do
  corpusPath'    <- getDataFileName "test-data/phylo/nadal_docslist.golden.tsv"
  listPath'      <- getDataFileName "test-data/phylo/nadal_ngramslist.golden.tsv"
  let config  = phyloTestConfig { corpusPath = corpusPath'
                                , listPath   = listPath'
                                , listParser = V3
                                }
  mapList <- fileToList (listParser config) (listPath config)
  corpus <- fileToDocsDefault (corpusParser config)
                        (corpusPath config)
                        [Year 3 1 5,Month 3 1 5,Week 4 2 5]
                        mapList
  pure $ JSON.encodePretty $ setConfig phyloTestConfig $ toPhyloWithoutLink corpus config

testSmallPhyloWithoutLinkExpectedOutput :: Assertion
testSmallPhyloWithoutLinkExpectedOutput = do
  bpaConfig      <- getDataFileName "bench-data/phylo/bpa-config.json"
  corpusPath'    <- getDataFileName "test-data/phylo/small_phylo_docslist.tsv"
  listPath'      <- getDataFileName "test-data/phylo/small_phylo_ngramslist.tsv"
  (Right config) <- fmap (\pcfg -> pcfg { corpusPath = corpusPath'
                                        , listPath   = listPath'
                                        }) <$> (JSON.eitherDecodeFileStrict' bpaConfig)
  mapList <- fileToList (listParser config) (listPath config)
  corpus <- fileToDocsDefault (corpusParser config)
                        (corpusPath config)
                        [Year 3 1 5,Month 3 1 5,Week 4 2 5]
                        mapList
  let actual = setConfig phyloTestConfig $ toPhyloWithoutLink corpus config
  expected   <- setConfig phyloTestConfig <$> (readPhylo =<< getDataFileName "test-data/phylo/small-phylo.golden.json")
  assertBool (show $ ansiWlEditExprCompact $ ediff expected actual) (expected == actual)

testPhylo2dot2json :: (FilePath, IO GraphData)
testPhylo2dot2json = ("test-data/phylo/phylo2dot2json.golden.json",) $ do
  actual_e <- JSON.parseEither JSON.parseJSON <$> phylo2dot2json Cleopatre.phyloCleopatre
  case actual_e of
    Left err -> fail err
    Right (actual :: GraphData) -> pure actual

compareGraphDataFuzzy :: GraphData -> GraphData -> Bool
compareGraphDataFuzzy gd1 gd2 =
  and [ _gd__subgraph_cnt gd1 == _gd__subgraph_cnt gd2
      , _gd_directed gd1      == _gd_directed gd2
      , and $ Prelude.map (uncurry compareEdgeDataFuzzy) $ Prelude.zip (_gd_edges gd1) (_gd_edges gd2)
      , and $ Prelude.map (uncurry compareObjectDataFuzzy) $ Prelude.zip (_gd_objects gd1) (_gd_objects gd2)
      , _gd_strict gd1        == _gd_strict gd2
      , _gd_data gd1 `compareGraphDataDataFuzzy` _gd_data gd2
      ]
  where
    gdd1 `compareEdgeDataFuzzy` ggd2 = case (gdd1, ggd2) of
      (GroupToAncestor gvId1 ecd1 gad1, GroupToAncestor gvId2 ecd2 gad2)
        -> and [ gvId1 == gvId2, ecd1 `compareEdgeCommonDataFuzzy` ecd2, gad1 == gad2 ]
      (GroupToAncestor{}, _)
        -> False
      (GroupToGroup gvId1 ecd1 gd1', GroupToGroup gvId2 ecd2 gd2')
        -> and [ gvId1 == gvId2, ecd1 `compareEdgeCommonDataFuzzy` ecd2, gd1' == gd2' ]
      (GroupToGroup{}, _)
        -> False
      (BranchToGroup gvId1 ecd1 bgd1, BranchToGroup gvId2 ecd2 bgd2)
        -> and [ gvId1 == gvId2, ecd1 `compareEdgeCommonDataFuzzy` ecd2, bgd1 == bgd2 ]
      (BranchToGroup{}, _)
        -> False
      (PeriodToPeriod gvId1 ecd1, PeriodToPeriod gvId2 ecd2)
        -> and [ gvId1 == gvId2, ecd1 `compareEdgeCommonDataFuzzy` ecd2 ]
      (PeriodToPeriod{}, _)
        -> False

    gdd1 `compareObjectDataFuzzy` ggd2 = case (gdd1, ggd2) of
      (GroupToNode  gvId1 ncd1 gnd1, GroupToNode  gvId2 ncd2 gnd2)
        -> and [ gvId1 == gvId2, ncd1 `compareNodeCommonDataFuzzy` ncd2, gnd1 == gnd2 ]
      (GroupToNode{}, _)
        -> False
      (BranchToNode gvId1 ncd1 bnd1, BranchToNode gvId2 ncd2 bnd2)
        -> and [ gvId1 == gvId2, ncd1 `compareNodeCommonDataFuzzy` ncd2, bnd1 == bnd2 ]
      (BranchToNode{}, _)
        -> False
      (PeriodToNode gvId1 ncd1 pnd1, PeriodToNode gvId2 ncd2 pnd2)
        -> and [ gvId1 == gvId2, ncd1 `compareNodeCommonDataFuzzy` ncd2, pnd1 == pnd2 ]
      (PeriodToNode{}, _)
        -> False
      (Layer gvId1 gdd1' ld1, Layer gvId2 gdd2 ld2)
        -> and [ gvId1 == gvId2, gdd1' `compareGraphDataDataFuzzy` gdd2, ld1 == ld2 ]
      (Layer{}, _)
        -> False

    gdd1 `compareNodeCommonDataFuzzy` ggd2 =
      -- Excluded fields: pos, width, height.
      and [ _nd_label    gdd1 == _nd_label ggd2
          , _nd_name     gdd1 == _nd_name ggd2
          , _nd_nodeType gdd1 == _nd_nodeType ggd2
          , _nd_shape    gdd1 == _nd_shape ggd2
          ]

    gdd1 `compareEdgeCommonDataFuzzy` gdd2 =
      -- Excluded fields: pos, width.
      and [ _ed_color gdd1 == _ed_color gdd2
          , _ed_head  gdd1 == _ed_head gdd2
          , _ed_tail  gdd1 == _ed_tail gdd2
          ]

    gdd1 `compareGraphDataDataFuzzy` ggd2 =
      -- Excluded fields: bb, lp, lheight, lwidth.
      and [ _gdd_color             gdd1 == _gdd_color ggd2
          , _gdd_fontsize          gdd1 == _gdd_fontsize ggd2
          , _gdd_label             gdd1 == _gdd_label ggd2
          , _gdd_labelloc          gdd1 == _gdd_labelloc ggd2
          , _gdd_name              gdd1 == _gdd_name ggd2
          , _gdd_nodesep           gdd1 == _gdd_nodesep ggd2
          , _gdd_overlap           gdd1 == _gdd_overlap ggd2
          , _gdd_phyloBranches     gdd1 == _gdd_phyloBranches ggd2
          , _gdd_phyloDocs         gdd1 == _gdd_phyloDocs ggd2
          , _gdd_phyloFoundations  gdd1 == _gdd_phyloFoundations ggd2
          , _gdd_phyloGroups       gdd1 == _gdd_phyloGroups ggd2
          , _gdd_phyloPeriods      gdd1 == _gdd_phyloPeriods ggd2
          , _gdd_phyloSources      gdd1 == _gdd_phyloSources ggd2
          , _gdd_phyloTerms        gdd1 == _gdd_phyloTerms ggd2
          , _gdd_phyloTimeScale    gdd1 == _gdd_phyloTimeScale ggd2
          , _gdd_rank              gdd1 == _gdd_rank ggd2
          , _gdd_ranksep           gdd1 == _gdd_ranksep ggd2
          , _gdd_ratio             gdd1 == _gdd_ratio ggd2
          , _gdd_splines           gdd1 == _gdd_splines ggd2
          , _gdd_style             gdd1 == _gdd_style ggd2
          ]

test_ngramsToLabel_01 :: Assertion
test_ngramsToLabel_01 =
  let ngrams = V.fromList [ "évaluation", "méthodologique" ]
  in ngramsToLabel ngrams [0,1] @?= "évaluation | méthodologique"

test_ngramsToLabel_02 :: Assertion
test_ngramsToLabel_02 =
  let ngrams = V.fromList [ "钱", "狗" ]
  in (Graphviz.customValue $ toAttr "lbl" $ TL.fromStrict $ ngramsToLabel ngrams [0,1]) @?= "钱 | 狗"

testRelComp_Connected :: Assertion
testRelComp_Connected = do
  (relatedComponents @Int) []                                @?= []
  (relatedComponents @Int) [[]]                              @?= [[]]
  (relatedComponents @Int) [[],[1,2]]                        @?= [[],[1,2]]
  (relatedComponents @Int) [[1,2],[]]                        @?= [[1,2],[]]
  (relatedComponents @Int) [[1,2], [2]]                      @?= [[1,2]]
  (relatedComponents @Int) [[1,2], [2],[2]]                  @?= [[1,2]]
  (relatedComponents @Int) [[1,2], [2],[2,1]]                @?= [[1,2]]
  (relatedComponents @Int) [[1,2], [2,4]]                    @?= [[1,2,4]]
  (relatedComponents @Int) [[1,2], [3,5], [2,4]]             @?= [[3,5], [1,2,4]]
  (relatedComponents @Int) [[1,2], [3,5], [2,4],[9,5],[5,4]] @?= [[1,2,4,3,5,9]]
  (relatedComponents @Int) [[1,2,5], [4,5,9]]                @?= [[1,2,5,4,9]]

testToPhyloDeterminism :: (FilePath, IO BL.ByteString)
testToPhyloDeterminism = ("test-data/phylo/187481.json",) $ do
  corpusPath' <- getDataFileName "test-data/phylo/GarganText_DocsList-nodeId-187481.tsv"
  listPath'   <- getDataFileName "test-data/phylo/GarganText_NgramsList-187482.tsv"
  let config  = phyloTestConfig { corpusPath = corpusPath'
                                , listPath   = listPath'
                                , listParser = V3
                                }
  mapList <- tsvMapTermList (listPath config)
  corpus <- fileToDocsDefault (corpusParser config)
                              (corpusPath config)
                              [Year 3 1 5,Month 3 1 5,Week 4 2 5]
                              mapList
  let actual = setConfig phyloTestConfig $ toPhylo $ toPhyloWithoutLink corpus config
  pure $ JSON.encodePretty actual

testCsvPhylo :: Assertion
testCsvPhylo = do
  pth <- getDataFileName "test-data/phylo/cleopatre.golden.csv.json"
  phyloJson <- eitherDecodeFileStrict' @Phylo pth
  case phyloJson of
    Left err -> error err
    Right _  -> pure ()