Verified Commit 0405e007 authored by Przemyslaw Kaminski's avatar Przemyslaw Kaminski

Merge branch 'dev' into 224-dev-uniform-ngrams-creation

parents dc21080f fe201115
......@@ -4,6 +4,10 @@
# Profiling
*.prof
*.prof.html
*.hp
*.eventlog
*.eventlog.html
profiling
# Stack
......@@ -39,4 +43,5 @@ data
devops/docker/js-cache
cabal.project.local
\ No newline at end of file
cabal.project.local
gargantext_profile_out.dot
......@@ -50,7 +50,7 @@ bench:
- .cabal/
policy: pull-push
script:
- nix-shell --run "./bin/update-project-dependencies $CABAL_STORE_DIR && cabal --store-dir=$CABAL_STORE_DIR v2-bench --ghc-options='-O2 -fclear-plugins'"
- nix-shell --run "./bin/update-project-dependencies $CABAL_STORE_DIR && cabal --store-dir=$CABAL_STORE_DIR v2-bench --flags +no-phylo-debug-logs --ghc-options='-O2 -fclear-plugins'"
allow_failure: true
test:
......@@ -84,7 +84,7 @@ test:
mkdir -p /builds/gargantext/haskell-gargantext/devops/coreNLP/stanford-corenlp-current
cp -R /root/devops/coreNLP/stanford-corenlp-${CORENLP}/* /builds/gargantext/haskell-gargantext/devops/coreNLP/stanford-corenlp-current/
nix-shell --run "chown -R test:test /root/.config/ && su -m test -c \"export PATH=$PATH:$TEST_NIX_PATH && cd /builds/gargantext/haskell-gargantext; $CABAL --store-dir=$CABAL_STORE_DIR v2-test --test-show-details=streaming --flags test-crypto --ghc-options='-O0 -fclear-plugins'\""
nix-shell --run "chown -R test:test /root/.config/ && su -m test -c \"export PATH=$PATH:$TEST_NIX_PATH && cd /builds/gargantext/haskell-gargantext; $CABAL --store-dir=$CABAL_STORE_DIR v2-test --test-show-details=streaming --flags 'test-crypto no-phylo-debug-logs' --ghc-options='-O0 -fclear-plugins'\""
chown -R root:root dist-newstyle/
chown -R root:root /root/
chown -R root:root $CABAL_STORE_DIR
......
## Version 0.0.6.9.9.9.6.2 [Release Candidate for 007]
* [BACK][FIX][Node stories insertion error (SqlError violates foreign key constraint) (#303)](https://gitlab.iscpif.fr/gargantext/haskell-gargantext/issues/303)
* [BACK][DOC][Welcome: Door To enter the project (#177)](https://gitlab.iscpif.fr/gargantext/haskell-gargantext/issues/177)
* [BACK][OPTIM][Improve Phylo robustness and performance (#292)](https://gitlab.iscpif.fr/gargantext/haskell-gargantext/issues/292)
## Version 0.0.6.9.9.9.6.1
* [BACK][FEAT][Removing Order2_A and Order2_B and use Order2 only instead (#308)](https://gitlab.iscpif.fr/gargantext/haskell-gargantext/issues/308)
......
......@@ -199,7 +199,7 @@ See https://www.haskell.org/ghcup/guide/#hls for more details.
``` sh
# If docker is not installed:
# curl -sSL https://gitlab.iscpif.fr/gargantext/haskell-gargantext/raw/dev/devops/docker/docker-install | sh
# curl -sSL https://gitlab.iscpif.fr/gargantext/haskell-gargantext/raw/dev/devops/docker/install_docker | sh
cd devops/docker
docker compose up
```
......
This source diff could not be displayed because it is too large. You can view the blob instead.
This diff is collapsed.
{
"corpusPath" : "Gargantext_DocsList-nodeId-185487.csv",
"listPath" : "Gargantext_NgramsList-185488.csv",
"outputPath" : "data",
"corpusParser" : {
"tag" : "Csv",
"_csv_limit" : 1500000
},
"listParser" : "V3",
"phyloName" : "bpa",
"phyloScale" : 2,
"similarity" : {
"tag" : "WeightedLogJaccard",
"_wlj_sensibility" : 0.5,
"_wlj_minSharedNgrams" : 1
},
"seaElevation" : {
"tag" : "Evolving",
"_evol_neighborhood" : true
},
"defaultMode" : false,
"findAncestors" : true,
"phyloSynchrony" : {
"tag" : "ByProximityThreshold",
"_bpt_threshold" : 0.6,
"_bpt_sensibility" : 0,
"_bpt_scope" : "AllBranches",
"_bpt_strategy" : "MergeAllGroups"
},
"phyloQuality" : {
"tag" : "Quality",
"_qua_granularity" : 0.1,
"_qua_minBranch" : 2
},
"timeUnit" : {
"tag" : "Week",
"_week_period" : 4,
"_week_step" : 2,
"_week_matchingFrame" : 5
},
"clique" : {
"tag" : "Fis",
"_fis_support" : 3,
"_fis_size" : 1
},
"exportLabel" : [
{
"tag" : "BranchLabel",
"_branch_labelTagger" : "MostEmergentTfIdf",
"_branch_labelSize" : 2
},
{
"tag" : "GroupLabel",
"_group_labelTagger" : "MostEmergentInclusive",
"_group_labelSize" : 2
}
],
"exportSort" : {
"tag" : "ByHierarchy",
"_sort_order" : "Desc"
},
"exportFilter" : [
{
"tag" : "ByBranchSize",
"_branch_size" : 2
}
]
}
{-# LANGUAGE OverloadedStrings #-}
module Main where
import Common
import Data.Aeson
import Data.List (nub)
import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.API.Tools
import Gargantext.Core.Viz.Phylo.PhyloExport (toPhyloExport, dotToFile)
import Gargantext.Core.Viz.Phylo.PhyloMaker (toPhylo, toPhyloWithoutLink)
import Gargantext.Core.Viz.Phylo.PhyloTools (printIOMsg, printIOComment, setConfig, toPeriods, getTimePeriod, getTimeStep)
import GHC.IO.Encoding
import GHC.Stack
import Paths_gargantext
import Prelude
import qualified Data.Text as T
import Shelly
import System.Directory
--------------
-- | Main | --
--------------
phyloConfig :: FilePath -> PhyloConfig
phyloConfig outdir = PhyloConfig {
corpusPath = "corpus.csv"
, listPath = "list.csv"
, outputPath = outdir
, corpusParser = Csv {_csv_limit = 150000}
, listParser = V4
, phyloName = "phylo_profile_test"
, phyloScale = 2
, similarity = WeightedLogJaccard {_wlj_sensibility = 0.5, _wlj_minSharedNgrams = 2}
, seaElevation = Constante {_cons_start = 0.1, _cons_gap = 0.1}
, defaultMode = True
, findAncestors = True
, phyloSynchrony = ByProximityThreshold {_bpt_threshold = 0.5, _bpt_sensibility = 0.0, _bpt_scope = AllBranches, _bpt_strategy = MergeAllGroups}
, phyloQuality = Quality {_qua_granularity = 0.8, _qua_minBranch = 3}
, timeUnit = Year {_year_period = 3, _year_step = 1, _year_matchingFrame = 5}
, clique = MaxClique {_mcl_size = 5, _mcl_threshold = 1.0e-4, _mcl_filter = ByThreshold}
, exportLabel = [ BranchLabel {_branch_labelTagger = MostEmergentTfIdf, _branch_labelSize = 2}
, GroupLabel {_group_labelTagger = MostEmergentInclusive, _group_labelSize = 2}
]
, exportSort = ByHierarchy {_sort_order = Desc}
, exportFilter = [ByBranchSize {_branch_size = 3.0}]
}
main :: HasCallStack => IO ()
main = do
shelly $ escaping False $ withTmpDir $ \tdir -> do
curDir <- pwd
let output = curDir <> "/" <> "gargantext_profile_out.dot"
chdir tdir $ do
liftIO $ setLocaleEncoding utf8
bpaConfig <- liftIO $ getDataFileName "bench-data/phylo/bpa-config.json"
corpusPath' <- liftIO $ getDataFileName "bench-data/phylo/GarganText_DocsList-nodeId-185487.csv"
listPath' <- liftIO $ getDataFileName "bench-data/phylo/GarganText_NgramsList-185488.csv"
(Right config) <- fmap (\pcfg -> pcfg { outputPath = tdir
, corpusPath = corpusPath'
, listPath = listPath'
}) <$> liftIO (eitherDecodeFileStrict' bpaConfig)
mapList <- liftIO $ fileToList (listParser config) (listPath config)
corpus <- liftIO $ if (defaultMode config)
then fileToDocsDefault (corpusParser config) (corpusPath config) [Year 3 1 5,Month 3 1 5,Week 4 2 5] mapList
else fileToDocsAdvanced (corpusParser config) (corpusPath config) (timeUnit config) mapList
liftIO $ do
printIOComment (show (length corpus) <> " parsed docs from the corpus")
printIOComment (show (length $ nub $ concat $ map text corpus) <> " Size ngs_coterms")
printIOComment (show (length mapList) <> " Size ngs_terms List Map Ngrams")
printIOMsg "Reconstruct the phylo"
-- check the existing backup files
let backupPhyloWithoutLink = (outputPath config) <> "backupPhyloWithoutLink_" <> (configToSha BackupPhyloWithoutLink config) <> ".json"
let backupPhylo = (outputPath config) <> "backupPhylo_" <> (configToSha BackupPhylo config) <> ".json"
phyloWithoutLinkExists <- doesFileExist backupPhyloWithoutLink
phyloExists <- doesFileExist backupPhylo
-- reconstruct the phylo
phylo <- if phyloExists
then do
printIOMsg "Reconstruct the phylo from an existing file"
readPhylo backupPhylo
else do
if phyloWithoutLinkExists
then do
printIOMsg "Reconstruct the phylo from an existing file without links"
phyloWithoutLink <- readPhylo backupPhyloWithoutLink
writePhylo backupPhyloWithoutLink phyloWithoutLink
pure $ toPhylo (setConfig config phyloWithoutLink)
else do
printIOMsg "Reconstruct the phylo from scratch"
phyloWithoutLink <- pure $ toPhyloWithoutLink corpus config
writePhylo backupPhyloWithoutLink phyloWithoutLink
pure $ toPhylo (setConfig config phyloWithoutLink)
writePhylo backupPhylo phylo
printIOMsg "End of reconstruction, start the export"
let dot = toPhyloExport (setConfig config phylo)
dotToFile output dot
echo "Done."
......@@ -47,210 +47,7 @@ import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.Prelude hiding (hash, replace)
import Prelude qualified
import System.Directory (listDirectory,doesFileExist)
data Backup = BackupPhyloWithoutLink | BackupPhylo deriving (Show)
---------------
-- | Tools | --
---------------
-- | To get all the files in a directory or just a file
getFilesFromPath :: FilePath -> IO [FilePath]
getFilesFromPath path = do
if (isSuffixOf "/" path)
then (listDirectory path)
else return [path]
----------------
-- | Parser | --
----------------
-- | To filter the Ngrams of a document based on the termList
termsInText :: Patterns -> Text -> [Text]
termsInText pats txt = nub $ concat $ map (map unwords) $ extractTermsWithList pats txt
-- | To transform a Wos file (or [file]) into a list of Docs
wosToDocs :: Int -> Patterns -> TimeUnit -> FilePath -> IO [Document]
wosToDocs limit patterns time path = do
files <- getFilesFromPath path
take limit
<$> map (\d -> let title = fromJust $ _hd_title d
abstr = if (isJust $ _hd_abstract d)
then fromJust $ _hd_abstract d
else ""
in Document (toPhyloDate
(fromIntegral $ fromJust $ _hd_publication_year d)
(fromJust $ _hd_publication_month d)
(fromJust $ _hd_publication_day d) time)
(toPhyloDate'
(fromIntegral $ fromJust $ _hd_publication_year d)
(fromJust $ _hd_publication_month d)
(fromJust $ _hd_publication_day d) time)
(termsInText patterns $ title <> " " <> abstr) Nothing [] time)
<$> concat
<$> mapConcurrently (\file ->
filter (\d -> (isJust $ _hd_publication_year d)
&& (isJust $ _hd_title d))
<$> fromRight [] <$> parseFile WOS Plain (path <> file) ) files
-- To transform a Csv file into a list of Document
csvToDocs :: CorpusParser -> Patterns -> TimeUnit -> FilePath -> IO [Document]
csvToDocs parser patterns time path =
case parser of
Wos _ -> Prelude.error "csvToDocs: unimplemented"
Csv limit -> Vector.toList
<$> Vector.take limit
<$> Vector.map (\row -> Document (toPhyloDate (Csv.fromMIntOrDec Csv.defaultYear $ csv_publication_year row) (fromMaybe Csv.defaultMonth $ csv_publication_month row) (fromMaybe Csv.defaultDay $ csv_publication_day row) time)
(toPhyloDate' (Csv.fromMIntOrDec Csv.defaultYear $ csv_publication_year row) (fromMaybe Csv.defaultMonth $ csv_publication_month row) (fromMaybe Csv.defaultDay $ csv_publication_day row) time)
(termsInText patterns $ (csv_title row) <> " " <> (csv_abstract row))
Nothing
[]
time
) <$> snd <$> either (\err -> panicTrace $ "CSV error" <> (show err)) identity <$> Csv.readCSVFile path
Csv' limit -> Vector.toList
<$> Vector.take limit
<$> Vector.map (\row -> Document (toPhyloDate (csv'_publication_year row) (csv'_publication_month row) (csv'_publication_day row) time)
(toPhyloDate' (csv'_publication_year row) (csv'_publication_month row) (csv'_publication_day row) time)
(termsInText patterns $ (csv'_title row) <> " " <> (csv'_abstract row))
(Just $ csv'_weight row)
(map (T.strip . pack) $ splitOn ";" (unpack $ (csv'_source row)))
time
) <$> snd <$> Csv.readWeightedCsv path
-- To parse a file into a list of Document
fileToDocsAdvanced :: CorpusParser -> FilePath -> TimeUnit -> TermList -> IO [Document]
fileToDocsAdvanced parser path time lst = do
let patterns = buildPatterns lst
case parser of
Wos limit -> wosToDocs limit patterns time path
Csv _ -> csvToDocs parser patterns time path
Csv' _ -> csvToDocs parser patterns time path
fileToDocsDefault :: CorpusParser -> FilePath -> [TimeUnit] -> TermList -> IO [Document]
fileToDocsDefault parser path timeUnits lst =
if length timeUnits > 0
then
do
let timeUnit = (head' "fileToDocsDefault" timeUnits)
docs <- fileToDocsAdvanced parser path timeUnit lst
let periods = toPeriods (sort $ nub $ map date docs) (getTimePeriod timeUnit) (getTimeStep timeUnit)
if (length periods < 3)
then fileToDocsDefault parser path (tail timeUnits) lst
else pure docs
else panicTrace "this corpus is incompatible with the phylomemy reconstruction"
-- on passe à passer la time unit dans la conf envoyé au phyloMaker
-- dans le phyloMaker si default est true alors dans le setDefault ou pense à utiliser la TimeUnit de la conf
---------------
-- | Label | --
---------------
-- Config time parameters to label
timeToLabel :: PhyloConfig -> [Char]
timeToLabel config = case (timeUnit config) of
Epoch p s f -> ("time_epochs" <> "_" <> (show p) <> "_" <> (show s) <> "_" <> (show f))
Year p s f -> ("time_years" <> "_" <> (show p) <> "_" <> (show s) <> "_" <> (show f))
Month p s f -> ("time_months" <> "_" <> (show p) <> "_" <> (show s) <> "_" <> (show f))
Week p s f -> ("time_weeks" <> "_" <> (show p) <> "_" <> (show s) <> "_" <> (show f))
Day p s f -> ("time_days" <> "_" <> (show p) <> "_" <> (show s) <> "_" <> (show f))
seaToLabel :: PhyloConfig -> [Char]
seaToLabel config = case (seaElevation config) of
Constante start step -> ("sea_cst_" <> (show start) <> "_" <> (show step))
Adaptative granularity -> ("sea_adapt" <> (show granularity))
Evolving _ -> ("sea_evolv")
sensToLabel :: PhyloConfig -> [Char]
sensToLabel config = case (similarity config) of
Hamming _ _ -> Prelude.error "sensToLabel: unimplemented"
WeightedLogJaccard s _ -> ("WeightedLogJaccard_" <> show s)
WeightedLogSim s _ -> ( "WeightedLogSim-sens_" <> show s)
cliqueToLabel :: PhyloConfig -> [Char]
cliqueToLabel config = case (clique config) of
Fis s s' -> "fis_" <> (show s) <> "_" <> (show s')
MaxClique s t f -> "clique_" <> (show s)<> "_" <> (show f)<> "_" <> (show t)
syncToLabel :: PhyloConfig -> [Char]
syncToLabel config = case (phyloSynchrony config) of
ByProximityThreshold scl sync_sens scope _ -> ("scale_" <> (show scope) <> "_" <> (show sync_sens) <> "_" <> (show scl))
ByProximityDistribution _ _ -> "syncToLabel: unimplemented"
qualToConfig :: PhyloConfig -> [Char]
qualToConfig config = case (phyloQuality config) of
Quality g m -> "quality_" <> (show g) <> "_" <> (show m)
-- To set up the export file's label from the configuration
configToLabel :: PhyloConfig -> [Char]
configToLabel config = outputPath config
<> (unpack $ phyloName config)
<> "-" <> (timeToLabel config)
<> "-scale_" <> (show (phyloScale config))
<> "-" <> (seaToLabel config)
<> "-" <> (sensToLabel config)
<> "-" <> (cliqueToLabel config)
<> "-level_" <> (show (_qua_granularity $ phyloQuality config))
<> "-" <> (syncToLabel config)
<> ".dot"
-- To write a sha256 from a set of config's parameters
configToSha :: Backup -> PhyloConfig -> [Char]
configToSha stage config = unpack
$ replace "/" "-"
$ T.pack (show (hash $ C8.pack label))
where
label :: [Char]
label = case stage of
BackupPhyloWithoutLink -> (corpusPath config)
<> (listPath config)
<> (timeToLabel config)
<> (cliqueToLabel config)
BackupPhylo -> (corpusPath config)
<> (listPath config)
<> (timeToLabel config)
<> (cliqueToLabel config)
<> (sensToLabel config)
<> (seaToLabel config)
<> (syncToLabel config)
<> (qualToConfig config)
<> (show (phyloScale config))
readListV4 :: [Char] -> IO NgramsList
readListV4 path = do
listJson <- (eitherDecode <$> readJson path) :: IO (Either Prelude.String NgramsList)
case listJson of
Left err -> do
putStrLn err
Prelude.error "readListV4 unimplemented"
Right listV4 -> pure listV4
fileToList :: ListParser -> FilePath -> IO TermList
fileToList parser path =
case parser of
V3 -> csvMapTermList path
V4 -> fromJust
<$> toTermList MapTerm NgramsTerms
<$> readListV4 path
--------------
-- | Main | --
--------------
import Common
main :: IO ()
main = do
......@@ -274,7 +71,7 @@ main = do
corpus <- if (defaultMode config)
then fileToDocsDefault (corpusParser config) (corpusPath config) [Year 3 1 5,Month 3 1 5,Week 4 2 5] mapList
else fileToDocsAdvanced (corpusParser config) (corpusPath config) (timeUnit config) mapList
printIOComment (show (length corpus) <> " parsed docs from the corpus")
printIOComment (show (length $ nub $ concat $ map text corpus) <> " Size ngs_coterms")
......
This diff is collapsed.
......@@ -18,8 +18,8 @@ fi
# with the `sha256sum` result calculated on the `cabal.project` and
# `cabal.project.freeze`. This ensures the files stay deterministic so that CI
# cache can kick in.
expected_cabal_project_hash="20253fb02ed59b6e8e72f974aad2aef3409ca6f9d005f7e84bb660812f1a70db"
expected_cabal_project_freeze_hash="745c65c246998cfda4d2a7a22df44a9f1f7fb0927e2afc2f16712861bf552c76"
expected_cabal_project_hash="c9fe39301e8b60bfd183e60e7e25a14cd1c9c66d8739bf9041ca3f4db89db7c6"
expected_cabal_project_freeze_hash="db24c7d3006167102532e3101e2b49bae13d478003459c7d3f1d66590e57740a"
cabal --store-dir=$STORE_DIR v2-build --dry-run
cabal2stack --system-ghc --allow-newer --resolver lts-21.17 --resolver-file devops/stack/lts-21.17.yaml -o stack.yaml
......
......@@ -5,7 +5,7 @@ cabal-version: 3.4
-- see: https://github.com/sol/hpack
name: gargantext
version: 0.0.6.9.9.9.6.1
version: 0.0.6.9.9.9.6.2
synopsis: Search, map, share
description: Please see README.md
category: Data
......@@ -17,6 +17,9 @@ license: AGPL-3.0-or-later
license-file: LICENSE
build-type: Simple
data-files:
bench-data/phylo/bpa-config.json
bench-data/phylo/GarganText_DocsList-nodeId-185487.csv
bench-data/phylo/GarganText_NgramsList-185488.csv
bench-data/phylo/issue-290.json
bench-data/phylo/issue-290-small.json
devops/postgres/extensions.sql
......@@ -82,6 +85,13 @@ flag disable-db-obfuscation-executable
default: False
manual: True
-- When enabled, it suppresses at compile time the
-- debug output for the phylo code, so that it doesn't
-- hinder its performance.
flag no-phylo-debug-logs
default: False
manual: True
library
import:
defaults
......@@ -431,6 +441,8 @@ library
src
if flag(test-crypto)
cpp-options: -DTEST_CRYPTO
if flag(no-phylo-debug-logs)
cpp-options: -DNO_PHYLO_DEBUG_LOGS
build-depends:
HSvm ^>= 0.1.1.3.22
, KMP ^>= 0.2.0.0
......@@ -476,6 +488,7 @@ library
, data-time-segment ^>= 0.1.0.0
, deepseq ^>= 1.4.4.0
, directory ^>= 1.3.6.0
, discrimination >= 0.5
, duckling ^>= 0.2.0.0
, ekg-core ^>= 0.1.1.7
, ekg-json ^>= 0.1.0.7
......@@ -745,8 +758,25 @@ executable gargantext-phylo
main-is: Main.hs
other-modules:
Paths_gargantext
Common
hs-source-dirs:
bin/gargantext-phylo
bin/gargantext-phylo bin/gargantext-phylo/Phylo
default-extensions:
DataKinds
DeriveGeneric
FlexibleContexts
FlexibleInstances
GADTs
GeneralizedNewtypeDeriving
ImportQualifiedPost
MultiParamTypeClasses
NamedFieldPuns
NoImplicitPrelude
OverloadedStrings
RankNTypes
RecordWildCards
StrictData
ghc-options: -Wall -threaded -rtsopts -with-rtsopts=-N -O2 -Wmissing-signatures
build-depends:
async ^>= 2.2.4
, bytestring ^>= 0.10.12.0
......@@ -1009,4 +1039,26 @@ benchmark garg-bench
if impl(ghc >= 8.6)
ghc-options: "-with-rtsopts=--nonmoving-gc"
executable gargantext-phylo-profile
main-is: Main.hs
other-modules:
Paths_gargantext
Common
hs-source-dirs:
bin/gargantext-phylo-profile bin/gargantext-phylo/Phylo
default-extensions: GHC2021
ghc-options: -Wall -threaded -rtsopts -with-rtsopts=-N -O2 -Wmissing-signatures
build-depends:
base
, bytestring
, gargantext
, gargantext-prelude
, shelly
, text
, async
, cryptohash
, aeson
, split
, vector
, directory
default-language: Haskell2010
......@@ -64,10 +64,11 @@ instance ToNamedRecord CsvList where
]
------------------------------------------------------------------------
instance FromField CsvListType where
parseField "map" = pure CsvMap
parseField "main" = pure CsvCandidate
parseField "stop" = pure CsvStop
parseField _ = mzero
parseField "map" = pure CsvMap
parseField "main" = pure CsvCandidate
parseField "candidate" = pure CsvCandidate -- backward compat
parseField "stop" = pure CsvStop
parseField _ = mzero
instance ToField CsvListType where
toField CsvMap = "map"
......
......@@ -195,7 +195,7 @@ toPhyloDate' y m d _ = pack $ showGregorian $ fromGregorian (toIntege
-- Utils
writePhylo :: [Char] -> Phylo -> IO ()
writePhylo :: HasCallStack => [Char] -> Phylo -> IO ()
writePhylo path phylo = Lazy.writeFile path $ encode phylo
......
......@@ -702,17 +702,17 @@ toPhyloExport phylo = exportToDot phylo
traceExportBranches :: [PhyloBranch] -> [PhyloBranch]
traceExportBranches branches =
trace ("\n"
tracePhylo ("\n"
<> "-- | Export " <> show(length branches) <> " branches" :: Text) branches
tracePhyloAncestors :: [[PhyloGroup]] -> [[PhyloGroup]]
tracePhyloAncestors groups =
trace ( "-- | Found " <> show(length $ concat $ map _phylo_groupAncestors $ concat groups)
tracePhylo ( "-- | Found " <> show(length $ concat $ map _phylo_groupAncestors $ concat groups)
<> " ancestors" :: Text) groups
tracePhyloInfo :: Phylo -> Phylo
tracePhyloInfo phylo =
trace ("\n" <> "##########################" <> "\n\n" <> "-- | Phylo with level = "
tracePhylo ("\n" <> "##########################" <> "\n\n" <> "-- | Phylo with level = "
<> show(getLevel phylo) <> " applied to "
<> show(length $ Vector.toList $ getRoots phylo) <> " foundations" :: Text
) phylo
......@@ -720,7 +720,7 @@ tracePhyloInfo phylo =
traceExportGroups :: [PhyloGroup] -> [PhyloGroup]
traceExportGroups groups =
trace ("\n" <> "-- | Export "
tracePhylo ("\n" <> "-- | Export "
<> show(length $ nub $ map (\g -> g ^. phylo_groupBranchId) groups) <> " branches, "
<> show(length groups) <> " groups and "
<> show(length $ nub $ concat $ map (\g -> g ^. phylo_groupNgrams) groups) <> " terms" :: Text
......
This diff is collapsed.
......@@ -10,7 +10,8 @@ Portability : POSIX
{-# OPTIONS_GHC -fno-warn-deprecations #-}
{-# LANGUAGE ViewPatterns #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE ViewPatterns #-}
module Gargantext.Core.Viz.Phylo.PhyloTools where
......@@ -232,6 +233,14 @@ keepFilled f thr l = if (null $ f thr l) && (not $ null l)
then keepFilled f (thr - 1) l
else f thr l
-- | General workhorse to use in lieu of /trace/. It decides at compile
-- time whether or not debug logs are enabled.
tracePhylo :: (Print s, IsString s) => s -> a -> a
#if NO_PHYLO_DEBUG_LOGS
tracePhylo _ p = p
#else
tracePhylo msg p = trace msg p
#endif
traceClique :: Map (Date, Date) [Clustering] -> String
traceClique mFis = foldl' (\msg cpt -> msg <> show (countSup cpt cliques) <> " (>" <> show (cpt) <> ") " ) "" [1..6]
......@@ -252,7 +261,7 @@ traceSupport mFis = foldl' (\msg cpt -> msg <> show (countSup cpt supports) <> "
traceFis :: [Char] -> Map (Date, Date) [Clustering] -> Map (Date, Date) [Clustering]
traceFis msg mFis = trace ( "\n" <> "-- | " <> msg <> " : " <> show (sum $ map length $ elems mFis) <> "\n"
traceFis msg mFis = tracePhylo ( "\n" <> "-- | " <> msg <> " : " <> show (sum $ map length $ elems mFis) <> "\n"
<> "Support : " <> traceSupport mFis <> "\n"
<> "Nb Ngrams : " <> traceClique mFis <> "\n"
) mFis
......@@ -636,7 +645,7 @@ updateLevel level phylo = phylo { _phylo_level = level }
traceToPhylo :: Scale -> Phylo -> Phylo
traceToPhylo lvl phylo =
trace ("\n" <> "-- | End of phylo making at scale " <> show (lvl) <> " with "
tracePhylo ("\n" <> "-- | End of phylo making at scale " <> show (lvl) <> " with "
<> show (length $ getGroupsFromScale lvl phylo) <> " groups and "
<> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromScale lvl phylo)
<> " branches" <> "\n" :: Text
......@@ -702,7 +711,7 @@ toRelatedComponents nodes edges =
traceSynchronyEnd :: Phylo -> Phylo
traceSynchronyEnd phylo =
trace ( "-- | End synchronic clustering at scale " <> show (getLastLevel phylo)
tracePhylo ( "-- | End synchronic clustering at scale " <> show (getLastLevel phylo)
<> " with " <> show (length $ getGroupsFromScale (getLastLevel phylo) phylo) <> " groups"
<> " and " <> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromScale (getLastLevel phylo) phylo)
<> " branches" <> "\n" :: Text
......@@ -710,7 +719,7 @@ traceSynchronyEnd phylo =
traceSynchronyStart :: Phylo -> Phylo
traceSynchronyStart phylo =
trace ( "\n" <> "-- | Start synchronic clustering at scale " <> show (getLastLevel phylo)
tracePhylo ( "\n" <> "-- | Start synchronic clustering at scale " <> show (getLastLevel phylo)
<> " with " <> show (length $ getGroupsFromScale (getLastLevel phylo) phylo) <> " groups"
<> " and " <> show (length $ nub $ map _phylo_groupBranchId $ getGroupsFromScale (getLastLevel phylo) phylo)
<> " branches" <> "\n" :: Text
......@@ -754,7 +763,7 @@ ngramsInBranches branches = nub $ foldl' (\acc g -> acc ++ (g ^. phylo_groupNgra
traceMatchSuccess :: Double -> Double -> Double -> [[[PhyloGroup]]] -> [[[PhyloGroup]]]
traceMatchSuccess thr qua qua' nextBranches =
trace ( "\n" <> "-- local branches : "
tracePhylo ( "\n" <> "-- local branches : "
<> (Text.pack $ init $ show ((init . init . snd)
$ (head' "trace" $ head' "trace" $ head' "trace" nextBranches) ^. phylo_groupBranchId))
<> ",(1.." <> show (length nextBranches) <> ")]"
......@@ -767,7 +776,7 @@ traceMatchSuccess thr qua qua' nextBranches =
traceMatchFailure :: Double -> Double -> Double -> [[PhyloGroup]] -> [[PhyloGroup]]
traceMatchFailure thr qua qua' branches =
trace ( "\n" <> "-- local branches : "
tracePhylo ( "\n" <> "-- local branches : "
<> (Text.pack $ init $ show ((init . snd) $ (head' "trace" $ head' "trace" branches) ^. phylo_groupBranchId))
<> ",(1.." <> show (length branches) <> ")]"
<> " | " <> show (length $ concat branches) <> " groups" <> "\n"
......@@ -778,7 +787,7 @@ traceMatchFailure thr qua qua' branches =
traceMatchNoSplit :: [[PhyloGroup]] -> [[PhyloGroup]]
traceMatchNoSplit branches =
trace ( "\n" <> "-- local branches : "
tracePhylo ( "\n" <> "-- local branches : "
<> (Text.pack $ init $ show ((init . snd) $ (head' "trace" $ head' "trace" branches) ^. phylo_groupBranchId))
<> ",(1.." <> show (length branches) <> ")]"
<> " | " <> show (length $ concat branches) <> " groups" <> "\n"
......@@ -788,7 +797,7 @@ traceMatchNoSplit branches =
traceMatchLimit :: [[PhyloGroup]] -> [[PhyloGroup]]
traceMatchLimit branches =
trace ( "\n" <> "-- local branches : "
tracePhylo ( "\n" <> "-- local branches : "
<> (Text.pack $ init $ show ((init . snd) $ (head' "trace" $ head' "trace" branches) ^. phylo_groupBranchId))
<> ",(1.." <> show (length branches) <> ")]"
<> " | " <> show (length $ concat branches) <> " groups" <> "\n"
......@@ -798,15 +807,15 @@ traceMatchLimit branches =
traceMatchEnd :: [PhyloGroup] -> [PhyloGroup]
traceMatchEnd groups =
trace ("\n" <> "-- | End temporal matching with " <> show (length $ nub $ map (\g -> g ^. phylo_groupBranchId) groups)
tracePhylo ("\n" <> "-- | End temporal matching with " <> show (length $ nub $ map (\g -> g ^. phylo_groupBranchId) groups)
<> " branches and " <> show (length groups) <> " groups" <> "\n" :: Text) groups
traceTemporalMatching :: [PhyloGroup] -> [PhyloGroup]
traceTemporalMatching groups =
trace ( "\n" <> "-- | Start temporal matching for " <> show(length groups) <> " groups" <> "\n" :: Text ) groups
tracePhylo ( "\n" <> "-- | Start temporal matching for " <> show(length groups) <> " groups" <> "\n" :: Text ) groups
traceGroupsProxi :: [Double] -> [Double]
traceGroupsProxi l =
trace ( "\n" <> "-- | " <> show(List.length l) <> " computed pairs of groups Similarity" <> "\n" :: Text ) l
tracePhylo ( "\n" <> "-- | " <> show(List.length l) <> " computed pairs of groups Similarity" <> "\n" :: Text ) l
......@@ -662,7 +662,7 @@ seaLevelRise fdt similarity lambda minBranch frequency ladder rise frame periods
else
-- start breaking up all the possible branches for the current similarity threshold
let thr = List.head ladder
branches' = trace ( "threshold = " <> (T.pack $ printf "%.3f" thr)
branches' = tracePhylo ( "threshold = " <> (T.pack $ printf "%.3f" thr)
<> " F(λ) = " <> (T.pack $ printf "%.5f" (toPhyloQuality fdt lambda frequency (map fst branches)))
<> " ξ = " <> (T.pack $ printf "%.5f" (globalAccuracy frequency (map fst branches)))
<> " ρ = " <> (T.pack $ printf "%.5f" (globalRecall frequency (map fst branches)))
......
......@@ -324,6 +324,7 @@ flags:
"build-search-demo": false
gargantext:
"disable-db-obfuscation-executable": false
"no-phylo-debug-logs": false
"test-crypto": false
"generic-deriving":
"base-4-9": true
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment