{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE TypeApplications #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE QuasiQuotes #-} module Test.Offline.Phylo (tests) where import CLI.Phylo.Common import Data.Aeson as JSON import Data.Aeson.Types qualified as JSON import Data.GraphViz.Attributes.Complete qualified as Graphviz import Data.Text.Lazy as TL import Data.TreeDiff import Data.Vector qualified as V import Gargantext.Core.Text.List.Formats.TSV import Gargantext.Core.Types.Phylo hiding (Phylo(..)) import Gargantext.Core.Viz.Phylo hiding (EdgeType(..)) import Gargantext.Core.Viz.Phylo.API.Tools (readPhylo, phylo2dot2json) import Gargantext.Core.Viz.Phylo.Example qualified as Cleopatre import Gargantext.Core.Viz.Phylo.PhyloExport import Gargantext.Core.Viz.Phylo.PhyloMaker (toPhylo, toPhyloWithoutLink) import Gargantext.Core.Viz.Phylo.PhyloTools import Paths_gargantext import Prelude import Test.Tasty import Test.Tasty.HUnit phyloTestConfig :: PhyloConfig phyloTestConfig = PhyloConfig { corpusPath = "corpus.csv" , listPath = "list.csv" , outputPath = "data/" , corpusParser = Tsv {_tsv_limit = 150000} , listParser = V4 , phyloName = "Phylo Name" , phyloScale = 2 , similarity = WeightedLogJaccard {_wlj_sensibility = 0.5, _wlj_minSharedNgrams = 2} , seaElevation = Constante {_cons_start = 0.1, _cons_gap = 0.1} , defaultMode = True , findAncestors = False , phyloSynchrony = ByProximityThreshold {_bpt_threshold = 0.5, _bpt_sensibility = 0.0, _bpt_scope = AllBranches, _bpt_strategy = MergeAllGroups} , phyloQuality = Quality {_qua_granularity = 0.8, _qua_minBranch = 3} , timeUnit = Year {_year_period = 3, _year_step = 1, _year_matchingFrame = 5} , clique = MaxClique {_mcl_size = 5, _mcl_threshold = 1.0e-4, _mcl_filter = ByThreshold} , exportLabel = [ BranchLabel {_branch_labelTagger = MostEmergentTfIdf, _branch_labelSize = 2} , GroupLabel {_group_labelTagger = MostEmergentInclusive, _group_labelSize = 2} ] , exportSort = ByHierarchy {_sort_order = Desc} , exportFilter = [ByBranchSize {_branch_size = 3.0}] } tests :: TestTree tests = testGroup "Phylo" [ testGroup "Export" [ testCase "ngramsToLabel respects encoding" test_ngramsToLabel_01 , testCase "ngramsToLabel is rendered correctly in CustomAttribute" test_ngramsToLabel_02 ] , testGroup "toPhyloWithoutLink" [ testCase "returns expected data" testSmallPhyloWithoutLinkExpectedOutput , testCase "phyloCleopatre returns expected data" testCleopatreWithoutLinkExpectedOutput , testCase "Nadal canned corpus returns expected data" testNadalWithoutLinkExpectedOutput ] , testGroup "phylo2dot2json" [ testCase "is deterministic" testPhylo2dot2json ] , testGroup "toPhylo" [ testCase "is deterministic" testToPhyloDeterminism ] , testGroup "relatedComponents" [ testCase "finds simple connection" testRelComp_Connected ] , testCase "parses csv phylo" testCsvPhylo ] testCleopatreWithoutLinkExpectedOutput :: Assertion testCleopatreWithoutLinkExpectedOutput = do let actual = toPhyloWithoutLink Cleopatre.docs Cleopatre.config expected <- readPhylo =<< getDataFileName "test-data/phylo/cleopatre.golden.json" assertBool (show $ ansiWlEditExprCompact $ ediff expected actual) (expected == actual) testNadalWithoutLinkExpectedOutput :: Assertion testNadalWithoutLinkExpectedOutput = do corpusPath' <- getDataFileName "test-data/phylo/nadal_docslist.golden.tsv" listPath' <- getDataFileName "test-data/phylo/nadal_ngramslist.golden.tsv" let config = phyloTestConfig { corpusPath = corpusPath' , listPath = listPath' , listParser = V3 } mapList <- fileToList (listParser config) (listPath config) corpus <- fileToDocsDefault (corpusParser config) (corpusPath config) [Year 3 1 5,Month 3 1 5,Week 4 2 5] mapList let actual = setConfig phyloTestConfig $ toPhyloWithoutLink corpus config expected <- setConfig phyloTestConfig <$> (readPhylo =<< getDataFileName "test-data/phylo/nadal.golden.json") assertBool (show $ ansiWlEditExprCompact $ ediff expected actual) (expected == actual) testSmallPhyloWithoutLinkExpectedOutput :: Assertion testSmallPhyloWithoutLinkExpectedOutput = do bpaConfig <- getDataFileName "bench-data/phylo/bpa-config.json" corpusPath' <- getDataFileName "test-data/phylo/small_phylo_docslist.tsv" listPath' <- getDataFileName "test-data/phylo/small_phylo_ngramslist.tsv" (Right config) <- fmap (\pcfg -> pcfg { corpusPath = corpusPath' , listPath = listPath' }) <$> (JSON.eitherDecodeFileStrict' bpaConfig) mapList <- fileToList (listParser config) (listPath config) corpus <- fileToDocsDefault (corpusParser config) (corpusPath config) [Year 3 1 5,Month 3 1 5,Week 4 2 5] mapList let actual = setConfig phyloTestConfig $ toPhyloWithoutLink corpus config expected <- setConfig phyloTestConfig <$> (readPhylo =<< getDataFileName "test-data/phylo/small-phylo.golden.json") assertBool (show $ ansiWlEditExprCompact $ ediff expected actual) (expected == actual) testPhylo2dot2json :: Assertion testPhylo2dot2json = do expected_e <- JSON.eitherDecodeFileStrict' =<< getDataFileName "test-data/phylo/phylo2dot2json.golden.json" case expected_e of Left err -> fail err Right (expected :: GraphData) -> do actual_e <- JSON.parseEither JSON.parseJSON <$> phylo2dot2json Cleopatre.phyloCleopatre case actual_e of Left err -> fail err Right (actual :: GraphData) -> do assertBool ("Phylo mismatch!" <> show (ansiWlEditExprCompact $ ediff expected actual)) (expected `compareGraphDataFuzzy` actual) compareGraphDataFuzzy :: GraphData -> GraphData -> Bool compareGraphDataFuzzy gd1 gd2 = and [ _gd__subgraph_cnt gd1 == _gd__subgraph_cnt gd2 , _gd_directed gd1 == _gd_directed gd2 , and $ Prelude.map (uncurry compareEdgeDataFuzzy) $ Prelude.zip (_gd_edges gd1) (_gd_edges gd2) , and $ Prelude.map (uncurry compareObjectDataFuzzy) $ Prelude.zip (_gd_objects gd1) (_gd_objects gd2) , _gd_strict gd1 == _gd_strict gd2 , _gd_data gd1 `compareGraphDataDataFuzzy` _gd_data gd2 ] where gdd1 `compareEdgeDataFuzzy` ggd2 = case (gdd1, ggd2) of (GroupToAncestor gvId1 ecd1 gad1, GroupToAncestor gvId2 ecd2 gad2) -> and [ gvId1 == gvId2, ecd1 `compareEdgeCommonDataFuzzy` ecd2, gad1 == gad2 ] (GroupToAncestor{}, _) -> False (GroupToGroup gvId1 ecd1 gd1', GroupToGroup gvId2 ecd2 gd2') -> and [ gvId1 == gvId2, ecd1 `compareEdgeCommonDataFuzzy` ecd2, gd1' == gd2' ] (GroupToGroup{}, _) -> False (BranchToGroup gvId1 ecd1 bgd1, BranchToGroup gvId2 ecd2 bgd2) -> and [ gvId1 == gvId2, ecd1 `compareEdgeCommonDataFuzzy` ecd2, bgd1 == bgd2 ] (BranchToGroup{}, _) -> False (PeriodToPeriod gvId1 ecd1, PeriodToPeriod gvId2 ecd2) -> and [ gvId1 == gvId2, ecd1 `compareEdgeCommonDataFuzzy` ecd2 ] (PeriodToPeriod{}, _) -> False gdd1 `compareObjectDataFuzzy` ggd2 = case (gdd1, ggd2) of (GroupToNode gvId1 ncd1 gnd1, GroupToNode gvId2 ncd2 gnd2) -> and [ gvId1 == gvId2, ncd1 `compareNodeCommonDataFuzzy` ncd2, gnd1 == gnd2 ] (GroupToNode{}, _) -> False (BranchToNode gvId1 ncd1 bnd1, BranchToNode gvId2 ncd2 bnd2) -> and [ gvId1 == gvId2, ncd1 `compareNodeCommonDataFuzzy` ncd2, bnd1 == bnd2 ] (BranchToNode{}, _) -> False (PeriodToNode gvId1 ncd1 pnd1, PeriodToNode gvId2 ncd2 pnd2) -> and [ gvId1 == gvId2, ncd1 `compareNodeCommonDataFuzzy` ncd2, pnd1 == pnd2 ] (PeriodToNode{}, _) -> False (Layer gvId1 gdd1' ld1, Layer gvId2 gdd2 ld2) -> and [ gvId1 == gvId2, gdd1' `compareGraphDataDataFuzzy` gdd2, ld1 == ld2 ] (Layer{}, _) -> False gdd1 `compareNodeCommonDataFuzzy` ggd2 = -- Excluded fields: pos, width, height. and [ _nd_label gdd1 == _nd_label ggd2 , _nd_name gdd1 == _nd_name ggd2 , _nd_nodeType gdd1 == _nd_nodeType ggd2 , _nd_shape gdd1 == _nd_shape ggd2 ] gdd1 `compareEdgeCommonDataFuzzy` gdd2 = -- Excluded fields: pos, width. and [ _ed_color gdd1 == _ed_color gdd2 , _ed_head gdd1 == _ed_head gdd2 , _ed_tail gdd1 == _ed_tail gdd2 ] gdd1 `compareGraphDataDataFuzzy` ggd2 = -- Excluded fields: bb, lp, lheight, lwidth. and [ _gdd_color gdd1 == _gdd_color ggd2 , _gdd_fontsize gdd1 == _gdd_fontsize ggd2 , _gdd_label gdd1 == _gdd_label ggd2 , _gdd_labelloc gdd1 == _gdd_labelloc ggd2 , _gdd_name gdd1 == _gdd_name ggd2 , _gdd_nodesep gdd1 == _gdd_nodesep ggd2 , _gdd_overlap gdd1 == _gdd_overlap ggd2 , _gdd_phyloBranches gdd1 == _gdd_phyloBranches ggd2 , _gdd_phyloDocs gdd1 == _gdd_phyloDocs ggd2 , _gdd_phyloFoundations gdd1 == _gdd_phyloFoundations ggd2 , _gdd_phyloGroups gdd1 == _gdd_phyloGroups ggd2 , _gdd_phyloPeriods gdd1 == _gdd_phyloPeriods ggd2 , _gdd_phyloSources gdd1 == _gdd_phyloSources ggd2 , _gdd_phyloTerms gdd1 == _gdd_phyloTerms ggd2 , _gdd_phyloTimeScale gdd1 == _gdd_phyloTimeScale ggd2 , _gdd_rank gdd1 == _gdd_rank ggd2 , _gdd_ranksep gdd1 == _gdd_ranksep ggd2 , _gdd_ratio gdd1 == _gdd_ratio ggd2 , _gdd_splines gdd1 == _gdd_splines ggd2 , _gdd_style gdd1 == _gdd_style ggd2 ] test_ngramsToLabel_01 :: Assertion test_ngramsToLabel_01 = let ngrams = V.fromList [ "évaluation", "méthodologique" ] in ngramsToLabel ngrams [0,1] @?= "évaluation | méthodologique" test_ngramsToLabel_02 :: Assertion test_ngramsToLabel_02 = let ngrams = V.fromList [ "钱", "狗" ] in (Graphviz.customValue $ toAttr "lbl" $ TL.fromStrict $ ngramsToLabel ngrams [0,1]) @?= "钱 | 狗" testRelComp_Connected :: Assertion testRelComp_Connected = do (relatedComponents @Int) [] @?= [] (relatedComponents @Int) [[]] @?= [[]] (relatedComponents @Int) [[],[1,2]] @?= [[],[1,2]] (relatedComponents @Int) [[1,2],[]] @?= [[1,2],[]] (relatedComponents @Int) [[1,2], [2]] @?= [[1,2]] (relatedComponents @Int) [[1,2], [2],[2]] @?= [[1,2]] (relatedComponents @Int) [[1,2], [2],[2,1]] @?= [[1,2]] (relatedComponents @Int) [[1,2], [2,4]] @?= [[1,2,4]] (relatedComponents @Int) [[1,2], [3,5], [2,4]] @?= [[3,5], [1,2,4]] (relatedComponents @Int) [[1,2], [3,5], [2,4],[9,5],[5,4]] @?= [[1,2,4,3,5,9]] (relatedComponents @Int) [[1,2,5], [4,5,9]] @?= [[1,2,5,4,9]] testToPhyloDeterminism :: Assertion testToPhyloDeterminism = do corpusPath' <- getDataFileName "test-data/phylo/GarganText_DocsList-nodeId-187481.tsv" listPath' <- getDataFileName "test-data/phylo/GarganText_NgramsList-187482.tsv" let config = phyloTestConfig { corpusPath = corpusPath' , listPath = listPath' , listParser = V3 } mapList <- tsvMapTermList (listPath config) corpus <- fileToDocsDefault (corpusParser config) (corpusPath config) [Year 3 1 5,Month 3 1 5,Week 4 2 5] mapList let actual = setConfig phyloTestConfig $ toPhylo $ toPhyloWithoutLink corpus config expected <- setConfig phyloTestConfig <$> (readPhylo =<< getDataFileName "test-data/phylo/187481.json") assertBool ("Phylo mismatch! " <> show (ansiWlEditExprCompact $ ediff expected actual)) (expected == actual) testCsvPhylo :: Assertion testCsvPhylo = do pth <- getDataFileName "test-data/phylo/cleopatre.golden.csv.json" phyloJson <- eitherDecodeFileStrict' @Phylo pth case phyloJson of Left err -> error err Right _ -> pure ()