{-# LANGUAGE TypeApplications #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE QuasiQuotes #-} {-# LANGUAGE TupleSections #-} module Test.Offline.Phylo (tests) where import CLI.Phylo.Common import Data.Aeson as JSON import Data.Aeson.Types qualified as JSON import Data.Aeson.Encode.Pretty qualified as JSON import Data.ByteString.Lazy qualified as BL import Data.GraphViz.Attributes.Complete qualified as Graphviz import Data.Text.Lazy as TL import Data.TreeDiff import Data.Vector qualified as V import Gargantext.Core.Text.List.Formats.TSV import Gargantext.Core.Types.Phylo hiding (Phylo(..)) import Gargantext.Core.Viz.Phylo.API.Tools (readPhylo, phylo2dot2json) import Gargantext.Core.Viz.Phylo.Example qualified as Cleopatre import Gargantext.Core.Viz.Phylo hiding (EdgeType(..)) import Gargantext.Core.Viz.Phylo.PhyloExport import Gargantext.Core.Viz.Phylo.PhyloMaker (toPhylo, toPhyloWithoutLink) import Gargantext.Core.Viz.Phylo.PhyloTools import Paths_gargantext import Prelude import Test.Tasty import Test.Tasty.Golden (goldenVsStringDiff) import Test.Tasty.HUnit import qualified Test.Tasty.Golden.Advanced as Advanced phyloTestConfig :: PhyloConfig phyloTestConfig = PhyloConfig { corpusPath = "corpus.csv" , listPath = "list.csv" , outputPath = "data/" , corpusParser = Tsv {_tsv_limit = 150000} , listParser = V4 , phyloName = "Phylo Name" , phyloScale = 2 , similarity = WeightedLogJaccard {_wlj_sensibility = 0.5, _wlj_minSharedNgrams = 2} , seaElevation = Constante {_cons_start = 0.1, _cons_gap = 0.1} , defaultMode = True , findAncestors = False , phyloSynchrony = ByProximityThreshold {_bpt_threshold = 0.5, _bpt_sensibility = 0.0, _bpt_scope = AllBranches, _bpt_strategy = MergeAllGroups} , phyloQuality = Quality {_qua_granularity = 0.8, _qua_minBranch = 3} , timeUnit = Year {_year_period = 3, _year_step = 1, _year_matchingFrame = 5} , clique = MaxClique {_mcl_size = 5, _mcl_threshold = 1.0e-4, _mcl_filter = ByThreshold} , exportLabel = [ BranchLabel {_branch_labelTagger = MostEmergentTfIdf, _branch_labelSize = 2} , GroupLabel {_group_labelTagger = MostEmergentInclusive, _group_labelSize = 2} ] , exportSort = ByHierarchy {_sort_order = Desc} , exportFilter = [ByBranchSize {_branch_size = 3.0}] } phyloGolden :: TestName -> (FilePath, IO BL.ByteString) -> TestTree phyloGolden testName (fp, action) = goldenVsStringDiff testName differ fp action where differ ref new = [ "diff", "-u", "-w", "--color=always", ref, new] -- | Use this variant for those tests which requires a more sophisticated way of -- comparing outputs directly on the GraphData phyloGoldenGraphData :: TestName -> (FilePath, IO GraphData) -> TestTree phyloGoldenGraphData testName (goldenPath, getActual) = Advanced.goldenTest testName getGolden getActual differ updateGolden where differ ref new = pure $ case compareGraphDataFuzzy ref new of True -> Nothing False -> Just $ show (ansiWlEditExprCompact $ ediff ref new) updateGolden :: GraphData -> IO () updateGolden gd = BL.writeFile goldenPath (JSON.encodePretty gd) getGolden :: IO GraphData getGolden = do expected_e <- JSON.eitherDecodeFileStrict' =<< getDataFileName goldenPath case expected_e of Left err -> fail err Right (expected :: GraphData) -> pure expected tests :: TestTree tests = testGroup "Phylo" [ testGroup "Export" [ testCase "ngramsToLabel respects encoding" test_ngramsToLabel_01 , testCase "ngramsToLabel is rendered correctly in CustomAttribute" test_ngramsToLabel_02 ] , testGroup "toPhyloWithoutLink" [ testCase "returns expected data" testSmallPhyloWithoutLinkExpectedOutput , phyloGolden "phyloCleopatre returns expected data" testCleopatreWithoutLinkExpectedOutput , phyloGolden "Nadal canned corpus returns expected data" testNadalWithoutLinkExpectedOutput ] , testGroup "phylo2dot2json" [ phyloGoldenGraphData "is deterministic" testPhylo2dot2json ] , testGroup "toPhylo" [ phyloGolden "is deterministic" testToPhyloDeterminism ] , testGroup "relatedComponents" [ testCase "finds simple connection" testRelComp_Connected ] , testCase "parses csv phylo" testCsvPhylo ] testCleopatreWithoutLinkExpectedOutput :: (FilePath, IO BL.ByteString) testCleopatreWithoutLinkExpectedOutput = let actual = toPhyloWithoutLink Cleopatre.docs Cleopatre.config in ("test-data/phylo/cleopatre.golden.json", pure $ JSON.encodePretty actual) testNadalWithoutLinkExpectedOutput :: (FilePath, IO BL.ByteString) testNadalWithoutLinkExpectedOutput = ("test-data/phylo/nadal.golden.json",) $ do corpusPath' <- getDataFileName "test-data/phylo/nadal_docslist.golden.tsv" listPath' <- getDataFileName "test-data/phylo/nadal_ngramslist.golden.tsv" let config = phyloTestConfig { corpusPath = corpusPath' , listPath = listPath' , listParser = V3 } mapList <- fileToList (listParser config) (listPath config) corpus <- fileToDocsDefault (corpusParser config) (corpusPath config) [Year 3 1 5,Month 3 1 5,Week 4 2 5] mapList pure $ JSON.encodePretty $ setConfig phyloTestConfig $ toPhyloWithoutLink corpus config testSmallPhyloWithoutLinkExpectedOutput :: Assertion testSmallPhyloWithoutLinkExpectedOutput = do bpaConfig <- getDataFileName "bench-data/phylo/bpa-config.json" corpusPath' <- getDataFileName "test-data/phylo/small_phylo_docslist.tsv" listPath' <- getDataFileName "test-data/phylo/small_phylo_ngramslist.tsv" (Right config) <- fmap (\pcfg -> pcfg { corpusPath = corpusPath' , listPath = listPath' }) <$> (JSON.eitherDecodeFileStrict' bpaConfig) mapList <- fileToList (listParser config) (listPath config) corpus <- fileToDocsDefault (corpusParser config) (corpusPath config) [Year 3 1 5,Month 3 1 5,Week 4 2 5] mapList let actual = setConfig phyloTestConfig $ toPhyloWithoutLink corpus config expected <- setConfig phyloTestConfig <$> (readPhylo =<< getDataFileName "test-data/phylo/small-phylo.golden.json") assertBool (show $ ansiWlEditExprCompact $ ediff expected actual) (expected == actual) testPhylo2dot2json :: (FilePath, IO GraphData) testPhylo2dot2json = ("test-data/phylo/phylo2dot2json.golden.json",) $ do actual_e <- JSON.parseEither JSON.parseJSON <$> phylo2dot2json Cleopatre.phyloCleopatre case actual_e of Left err -> fail err Right (actual :: GraphData) -> pure actual compareGraphDataFuzzy :: GraphData -> GraphData -> Bool compareGraphDataFuzzy gd1 gd2 = and [ _gd__subgraph_cnt gd1 == _gd__subgraph_cnt gd2 , _gd_directed gd1 == _gd_directed gd2 , and $ Prelude.map (uncurry compareEdgeDataFuzzy) $ Prelude.zip (_gd_edges gd1) (_gd_edges gd2) , and $ Prelude.map (uncurry compareObjectDataFuzzy) $ Prelude.zip (_gd_objects gd1) (_gd_objects gd2) , _gd_strict gd1 == _gd_strict gd2 , _gd_data gd1 `compareGraphDataDataFuzzy` _gd_data gd2 ] where gdd1 `compareEdgeDataFuzzy` ggd2 = case (gdd1, ggd2) of (GroupToAncestor gvId1 ecd1 gad1, GroupToAncestor gvId2 ecd2 gad2) -> and [ gvId1 == gvId2, ecd1 `compareEdgeCommonDataFuzzy` ecd2, gad1 == gad2 ] (GroupToAncestor{}, _) -> False (GroupToGroup gvId1 ecd1 gd1', GroupToGroup gvId2 ecd2 gd2') -> and [ gvId1 == gvId2, ecd1 `compareEdgeCommonDataFuzzy` ecd2, gd1' == gd2' ] (GroupToGroup{}, _) -> False (BranchToGroup gvId1 ecd1 bgd1, BranchToGroup gvId2 ecd2 bgd2) -> and [ gvId1 == gvId2, ecd1 `compareEdgeCommonDataFuzzy` ecd2, bgd1 == bgd2 ] (BranchToGroup{}, _) -> False (PeriodToPeriod gvId1 ecd1, PeriodToPeriod gvId2 ecd2) -> and [ gvId1 == gvId2, ecd1 `compareEdgeCommonDataFuzzy` ecd2 ] (PeriodToPeriod{}, _) -> False gdd1 `compareObjectDataFuzzy` ggd2 = case (gdd1, ggd2) of (GroupToNode gvId1 ncd1 gnd1, GroupToNode gvId2 ncd2 gnd2) -> and [ gvId1 == gvId2, ncd1 `compareNodeCommonDataFuzzy` ncd2, gnd1 == gnd2 ] (GroupToNode{}, _) -> False (BranchToNode gvId1 ncd1 bnd1, BranchToNode gvId2 ncd2 bnd2) -> and [ gvId1 == gvId2, ncd1 `compareNodeCommonDataFuzzy` ncd2, bnd1 == bnd2 ] (BranchToNode{}, _) -> False (PeriodToNode gvId1 ncd1 pnd1, PeriodToNode gvId2 ncd2 pnd2) -> and [ gvId1 == gvId2, ncd1 `compareNodeCommonDataFuzzy` ncd2, pnd1 == pnd2 ] (PeriodToNode{}, _) -> False (Layer gvId1 gdd1' ld1, Layer gvId2 gdd2 ld2) -> and [ gvId1 == gvId2, gdd1' `compareGraphDataDataFuzzy` gdd2, ld1 == ld2 ] (Layer{}, _) -> False gdd1 `compareNodeCommonDataFuzzy` ggd2 = -- Excluded fields: pos, width, height. and [ _nd_label gdd1 == _nd_label ggd2 , _nd_name gdd1 == _nd_name ggd2 , _nd_nodeType gdd1 == _nd_nodeType ggd2 , _nd_shape gdd1 == _nd_shape ggd2 ] gdd1 `compareEdgeCommonDataFuzzy` gdd2 = -- Excluded fields: pos, width. and [ _ed_color gdd1 == _ed_color gdd2 , _ed_head gdd1 == _ed_head gdd2 , _ed_tail gdd1 == _ed_tail gdd2 ] gdd1 `compareGraphDataDataFuzzy` ggd2 = -- Excluded fields: bb, lp, lheight, lwidth. and [ _gdd_color gdd1 == _gdd_color ggd2 , _gdd_fontsize gdd1 == _gdd_fontsize ggd2 , _gdd_label gdd1 == _gdd_label ggd2 , _gdd_labelloc gdd1 == _gdd_labelloc ggd2 , _gdd_name gdd1 == _gdd_name ggd2 , _gdd_nodesep gdd1 == _gdd_nodesep ggd2 , _gdd_overlap gdd1 == _gdd_overlap ggd2 , _gdd_phyloBranches gdd1 == _gdd_phyloBranches ggd2 , _gdd_phyloDocs gdd1 == _gdd_phyloDocs ggd2 , _gdd_phyloFoundations gdd1 == _gdd_phyloFoundations ggd2 , _gdd_phyloGroups gdd1 == _gdd_phyloGroups ggd2 , _gdd_phyloPeriods gdd1 == _gdd_phyloPeriods ggd2 , _gdd_phyloSources gdd1 == _gdd_phyloSources ggd2 , _gdd_phyloTerms gdd1 == _gdd_phyloTerms ggd2 , _gdd_phyloTimeScale gdd1 == _gdd_phyloTimeScale ggd2 , _gdd_rank gdd1 == _gdd_rank ggd2 , _gdd_ranksep gdd1 == _gdd_ranksep ggd2 , _gdd_ratio gdd1 == _gdd_ratio ggd2 , _gdd_splines gdd1 == _gdd_splines ggd2 , _gdd_style gdd1 == _gdd_style ggd2 ] test_ngramsToLabel_01 :: Assertion test_ngramsToLabel_01 = let ngrams = V.fromList [ "évaluation", "méthodologique" ] in ngramsToLabel ngrams [0,1] @?= "évaluation | méthodologique" test_ngramsToLabel_02 :: Assertion test_ngramsToLabel_02 = let ngrams = V.fromList [ "钱", "狗" ] in (Graphviz.customValue $ toAttr "lbl" $ TL.fromStrict $ ngramsToLabel ngrams [0,1]) @?= "钱 | 狗" testRelComp_Connected :: Assertion testRelComp_Connected = do (relatedComponents @Int) [] @?= [] (relatedComponents @Int) [[]] @?= [[]] (relatedComponents @Int) [[],[1,2]] @?= [[],[1,2]] (relatedComponents @Int) [[1,2],[]] @?= [[1,2],[]] (relatedComponents @Int) [[1,2], [2]] @?= [[1,2]] (relatedComponents @Int) [[1,2], [2],[2]] @?= [[1,2]] (relatedComponents @Int) [[1,2], [2],[2,1]] @?= [[1,2]] (relatedComponents @Int) [[1,2], [2,4]] @?= [[1,2,4]] (relatedComponents @Int) [[1,2], [3,5], [2,4]] @?= [[3,5], [1,2,4]] (relatedComponents @Int) [[1,2], [3,5], [2,4],[9,5],[5,4]] @?= [[1,2,4,3,5,9]] (relatedComponents @Int) [[1,2,5], [4,5,9]] @?= [[1,2,5,4,9]] testToPhyloDeterminism :: (FilePath, IO BL.ByteString) testToPhyloDeterminism = ("test-data/phylo/187481.json",) $ do corpusPath' <- getDataFileName "test-data/phylo/GarganText_DocsList-nodeId-187481.tsv" listPath' <- getDataFileName "test-data/phylo/GarganText_NgramsList-187482.tsv" let config = phyloTestConfig { corpusPath = corpusPath' , listPath = listPath' , listParser = V3 } mapList <- tsvMapTermList (listPath config) corpus <- fileToDocsDefault (corpusParser config) (corpusPath config) [Year 3 1 5,Month 3 1 5,Week 4 2 5] mapList let actual = setConfig phyloTestConfig $ toPhylo $ toPhyloWithoutLink corpus config pure $ JSON.encodePretty actual testCsvPhylo :: Assertion testCsvPhylo = do pth <- getDataFileName "test-data/phylo/cleopatre.golden.csv.json" phyloJson <- eitherDecodeFileStrict' @Phylo pth case phyloJson of Left err -> error err Right _ -> pure ()