diff --git a/bin/gargantext-adaptative-phylo/Main.hs b/bin/gargantext-adaptative-phylo/Main.hs index 6811e8f8f4e50d27b885d26cd8bea0195f47f448..1a472cbaa6f389bcbadd849d315a39827ad3fce2 100644 --- a/bin/gargantext-adaptative-phylo/Main.hs +++ b/bin/gargantext-adaptative-phylo/Main.hs @@ -21,7 +21,7 @@ import Data.Aeson -- import Data.Maybe (isJust, fromJust) import Data.List (concat, nub, isSuffixOf) import Data.String (String) -import Data.Text (Text, unwords, unpack) +import Data.Text (Text, unwords, unpack, replace) import Crypto.Hash.SHA256 (hash) -- import Data.Digest.Pure.SHA @@ -33,21 +33,22 @@ import Gargantext.Core.Text.Corpus.Parsers (FileFormat(..),parseFile) import Gargantext.Core.Text.List.CSV (csvMapTermList) import Gargantext.Core.Text.Terms.WithList (Patterns, buildPatterns, extractTermsWithList) import Gargantext.Core.Viz.AdaptativePhylo -import Gargantext.Core.Viz.Phylo.PhyloMaker (toPhylo) -import Gargantext.Core.Viz.Phylo.PhyloTools (printIOMsg, printIOComment) +import Gargantext.Core.Viz.Phylo.PhyloMaker (toPhylo, toPhyloStep) +import Gargantext.Core.Viz.Phylo.PhyloTools (printIOMsg, printIOComment, setConfig) import Gargantext.Core.Viz.Phylo.PhyloExport (toPhyloExport, dotToFile) -- import Gargantext.Core.Viz.Phylo.SynchronicClustering (synchronicDistance') import GHC.IO (FilePath) -import Prelude (Either(..)) +import Prelude (Either(Left, Right)) import System.Environment -import System.Directory (listDirectory) +import System.Directory (listDirectory,doesFileExist) import Control.Concurrent.Async (mapConcurrently) import qualified Data.ByteString.Char8 as C8 import qualified Data.ByteString.Lazy as Lazy import qualified Data.Vector as Vector import qualified Gargantext.Core.Text.Corpus.Parsers.CSV as Csv +import qualified Data.Text as T @@ -132,7 +133,7 @@ fileToDocs parser path lst = do -- Config time parameters to label timeToLabel :: Config -> [Char] timeToLabel config = case (timeUnit config) of - Year p s f -> ("time"<> "_"<> (show p) <> "_"<> (show s) <> (show f)) + Year p s f -> ("time"<> "_"<> (show p) <> "_" <> (show s) <> "_" <> (show f)) seaToLabel :: Config -> [Char] @@ -173,15 +174,30 @@ configToLabel config = outputPath config <> "-" <> (syncToLabel config) <> ".dot" + configToSha :: Config -> [Char] -configToSha config = show (hash $ C8.pack label) +configToSha config = unpack $ replace "/" "-" $ T.pack (show (hash $ C8.pack label)) where label :: [Char] label = (corpusPath config) <> (listPath config) <> (timeToLabel config) <> (cliqueToLabel config) - <> (sensToLabel config) + + +writePhylo :: [Char] -> Phylo -> IO () +writePhylo path phylo = Lazy.writeFile path $ encode phylo + + +readPhylo :: [Char] -> IO Phylo +readPhylo path = do + phyloStep <- (eitherDecode <$> readJson path) :: IO (Either String Phylo) + case phyloStep of + Left err -> do + putStrLn err + undefined + Right phylo -> pure phylo + -------------- -- | Main | -- @@ -206,9 +222,23 @@ main = do corpus <- fileToDocs (corpusParser config) (corpusPath config) mapList printIOComment (show (length corpus) <> " parsed docs from the corpus") - printIOMsg "Reconstruct the Phylo" - - let phylo = toPhylo corpus mapList config + printIOMsg "Reconstruct the phylo" + + let stepFile = (outputPath config) <> "phyloStep_" <> (configToSha config) <> ".json" + + phyloStepExists <- doesFileExist stepFile + + phyloStep <- if phyloStepExists + then do + printIOMsg "Reconstruct the phylo step from an existing file" + readPhylo stepFile + else do + printIOMsg "Reconstruct the phylo step from scratch" + pure $ toPhyloStep corpus mapList config + + writePhylo stepFile phyloStep + + let phylo = toPhylo (setConfig config phyloStep) -- | probes diff --git a/src/Gargantext/Core/Viz/AdaptativePhylo.hs b/src/Gargantext/Core/Viz/AdaptativePhylo.hs index e41296c8a9c08a63d723e70a1c9ac72b403a7fea..3e5deeaba9dcdb39417a2885b6dfd9575bcd5957 100644 --- a/src/Gargantext/Core/Viz/AdaptativePhylo.hs +++ b/src/Gargantext/Core/Viz/AdaptativePhylo.hs @@ -439,5 +439,17 @@ makeLenses ''PhyloBranch -- | JSON instances | -- ------------------------ +instance FromJSON Phylo +instance ToJSON Phylo +instance FromJSON PhyloParam +instance ToJSON PhyloParam +instance FromJSON PhyloPeriod +instance ToJSON PhyloPeriod +instance FromJSON PhyloLevel +instance ToJSON PhyloLevel +instance FromJSON Software +instance ToJSON Software +instance FromJSON PhyloGroup +instance ToJSON PhyloGroup $(deriveJSON (unPrefix "_foundations_" ) ''PhyloFoundations) diff --git a/src/Gargantext/Core/Viz/Phylo/PhyloMaker.hs b/src/Gargantext/Core/Viz/Phylo/PhyloMaker.hs index d626d81998a84eb06013ed1cd21970885a65f348..b91a1242df6e68f4c4431eaf654f9a41c180a4d5 100644 --- a/src/Gargantext/Core/Viz/Phylo/PhyloMaker.hs +++ b/src/Gargantext/Core/Viz/Phylo/PhyloMaker.hs @@ -50,17 +50,17 @@ toPhylo' (PhyloBase phylo) = toPhylo -} -toPhylo :: [Document] -> TermList -> Config -> Phylo -toPhylo docs lst conf = trace ("# phylo1 groups " <> show(length $ getGroupsFromLevel 1 phylo1)) - $ traceToPhylo (phyloLevel conf) $ - if (phyloLevel conf) > 1 - then foldl' (\phylo' _ -> synchronicClustering phylo') phyloAncestors [2..(phyloLevel conf)] +toPhylo :: Phylo -> Phylo +toPhylo phyloStep = trace ("# phylo1 groups " <> show(length $ getGroupsFromLevel 1 phylo1)) + $ traceToPhylo (phyloLevel $ getConfig phyloStep) $ + if (phyloLevel $ getConfig phyloStep) > 1 + then foldl' (\phylo' _ -> synchronicClustering phylo') phyloAncestors [2..(phyloLevel $ getConfig phyloStep)] else phylo1 where -------------------------------------- phyloAncestors :: Phylo phyloAncestors = - if (findAncestors conf) + if (findAncestors $ getConfig phyloStep) then toHorizon phylo1 else phylo1 -------------------------------------- @@ -68,14 +68,6 @@ toPhylo docs lst conf = trace ("# phylo1 groups " <> show(length $ getGroupsFrom phylo1 = toPhylo1 phyloStep -- > AD to db here -------------------------------------- - phyloStep :: Phylo - phyloStep = toFirstPhyloStep docs phyloBase - -- > AD to db here - -------------------------------------- - phyloBase :: Phylo - phyloBase = toPhyloBase docs lst conf - -- > AD to db here - -------------------------------------- @@ -142,9 +134,19 @@ cliqueToGroup fis pId lvl idx coocs = PhyloGroup pId lvl idx "" [] [] [] [] [] --- To build the first phylo step from docs and phyloBase -toFirstPhyloStep :: [Document] -> Phylo -> Phylo -toFirstPhyloStep docs phyloBase = case (getSeaElevation phyloBase) of +toPhylo1 :: Phylo -> Phylo +toPhylo1 phyloStep = case (getSeaElevation phyloStep) of + Constante start gap -> constanteTemporalMatching start gap phyloStep + Adaptative steps -> adaptativeTemporalMatching steps phyloStep + +----------------------- +-- | To Phylo Step | -- +----------------------- + + +-- To build the first phylo step from docs and terms +toPhyloStep :: [Document] -> TermList -> Config -> Phylo +toPhyloStep docs lst conf = case (getSeaElevation phyloBase) of Constante _ _ -> appendGroups cliqueToGroup 1 phyloClique phyloBase Adaptative _ -> toGroupsProxi 1 $ appendGroups cliqueToGroup 1 phyloClique phyloBase where @@ -154,15 +156,10 @@ toFirstPhyloStep docs phyloBase = case (getSeaElevation phyloBase) of -------------------------------------- docs' :: Map (Date,Date) [Document] docs' = groupDocsByPeriodRec date (getPeriodIds phyloBase) (sortOn date docs) empty - -------------------------------------- - - - -toPhylo1 :: Phylo -> Phylo -toPhylo1 phyloStep = case (getSeaElevation phyloStep) of - Constante start gap -> constanteTemporalMatching start gap phyloStep - Adaptative steps -> adaptativeTemporalMatching steps phyloStep - + -------------------------------------- + phyloBase :: Phylo + phyloBase = toPhyloBase docs lst conf + -------------------------------------- --------------------------- -- | Frequent Item Set | -- diff --git a/src/Gargantext/Core/Viz/Phylo/PhyloTools.hs b/src/Gargantext/Core/Viz/Phylo/PhyloTools.hs index 97925c8e63e4fe5a41e218ee024d8d34182c1137..d4b675cb0f4640e1d4a4ec152cc75122444c88a8 100644 --- a/src/Gargantext/Core/Viz/Phylo/PhyloTools.hs +++ b/src/Gargantext/Core/Viz/Phylo/PhyloTools.hs @@ -346,6 +346,16 @@ getConfig :: Phylo -> Config getConfig phylo = (phylo ^. phylo_param) ^. phyloParam_config +setConfig :: Config -> Phylo -> Phylo +setConfig config phylo = phylo + & phylo_param .~ (PhyloParam + ((phylo ^. phylo_param) ^. phyloParam_version) + ((phylo ^. phylo_param) ^. phyloParam_software) + config) + +-- & phylo_param & phyloParam_config & phyloParam_config .~ config + + getRoots :: Phylo -> Vector Ngrams getRoots phylo = (phylo ^. phylo_foundations) ^. foundations_roots