Commit 85fcd70b authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FEAT][PHYLO] preparing integration to backend

parent 92316028
{-|
Module : Main.hs
Description : Gargantext starter binary with Adaptative Phylo
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Adaptative Phylo binaries
-}
{-# LANGUAGE StandaloneDeriving #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE Strict #-}
module Main where
import Control.Concurrent.Async (mapConcurrently)
import Crypto.Hash.SHA256 (hash)
import Data.Aeson
import Data.Either (Either(..))
import Data.List (concat, nub, isSuffixOf)
import Data.Maybe (fromMaybe)
import Data.String (String)
import GHC.IO (FilePath)
import qualified Prelude as Prelude
import System.Environment
import System.Directory (listDirectory,doesFileExist)
import Data.Text (Text, unwords, unpack, replace, pack)
import Data.Time.Calendar (fromGregorian, diffGregorianDurationClip, cdMonths, diffDays, showGregorian)
import qualified Data.ByteString.Char8 as C8
import qualified Data.ByteString.Lazy as Lazy
import qualified Data.Vector as Vector
import qualified Data.Text as T
import Gargantext.Prelude
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Core.Text.Context (TermList)
import Gargantext.Core.Text.Corpus.Parsers.CSV (csv_title, csv_abstract, csv_publication_year, csv_publication_month, csv_publication_day,
csv'_source, csv'_title, csv'_abstract, csv'_publication_year, csv'_publication_month, csv'_publication_day, csv'_weight)
import qualified Gargantext.Core.Text.Corpus.Parsers.CSV as Csv
import Gargantext.Core.Text.Corpus.Parsers (FileFormat(..),parseFile)
import Gargantext.Core.Text.List.Formats.CSV (csvMapTermList)
import Gargantext.Core.Text.Terms.WithList (Patterns, buildPatterns, extractTermsWithList)
import Gargantext.Core.Viz.AdaptativePhylo
import Gargantext.Core.Viz.Phylo.PhyloMaker (toPhylo, toPhyloStep)
import Gargantext.Core.Viz.Phylo.PhyloTools (printIOMsg, printIOComment, setConfig)
import Gargantext.Core.Viz.Phylo.PhyloExport (toPhyloExport, dotToFile)
-- import Gargantext.API.Ngrams.Prelude (toTermList)
-- import Debug.Trace (trace)
data PhyloStage = PhyloWithCliques | PhyloWithLinks deriving (Show)
---------------
-- | Tools | --
---------------
-- | To get all the files in a directory or just a file
getFilesFromPath :: FilePath -> IO([FilePath])
getFilesFromPath path = do
if (isSuffixOf "/" path)
then (listDirectory path)
else return [path]
---------------
-- | Dates | --
---------------
toMonths :: Integer -> Int -> Int -> Date
toMonths y m d = fromIntegral $ cdMonths
$ diffGregorianDurationClip (fromGregorian y m d) (fromGregorian 0000 0 0)
toDays :: Integer -> Int -> Int -> Date
toDays y m d = fromIntegral
$ diffDays (fromGregorian y m d) (fromGregorian 0000 0 0)
toPhyloDate :: Int -> Int -> Int -> TimeUnit -> Date
toPhyloDate y m d tu = case tu of
Year _ _ _ -> y
Month _ _ _ -> toMonths (Prelude.toInteger y) m d
Week _ _ _ -> div (toDays (Prelude.toInteger y) m d) 7
Day _ _ _ -> toDays (Prelude.toInteger y) m d
toPhyloDate' :: Int -> Int -> Int -> Text
toPhyloDate' y m d = pack $ showGregorian $ fromGregorian (Prelude.toInteger y) m d
--------------
-- | Json | --
--------------
-- | To read and decode a Json file
readJson :: FilePath -> IO Lazy.ByteString
readJson path = Lazy.readFile path
----------------
-- | Parser | --
----------------
-- | To filter the Ngrams of a document based on the termList
termsInText :: Patterns -> Text -> [Text]
termsInText pats txt = nub $ concat $ map (map unwords) $ extractTermsWithList pats txt
-- | To transform a Wos file (or [file]) into a list of Docs
wosToDocs :: Int -> Patterns -> TimeUnit -> FilePath -> IO [Document]
wosToDocs limit patterns time path = do
files <- getFilesFromPath path
let parseFile' file = do
eParsed <- parseFile WOS (path <> file)
case eParsed of
Right ps -> pure ps
Left e -> panic $ "Error: " <> (pack e)
take limit
<$> map (\d -> let title = fromJust $ _hd_title d
abstr = if (isJust $ _hd_abstract d)
then fromJust $ _hd_abstract d
else ""
in Document (toPhyloDate
(fromIntegral $ fromJust $ _hd_publication_year d)
(fromJust $ _hd_publication_month d)
(fromJust $ _hd_publication_day d) time)
(toPhyloDate'
(fromIntegral $ fromJust $ _hd_publication_year d)
(fromJust $ _hd_publication_month d)
(fromJust $ _hd_publication_day d))
(termsInText patterns $ title <> " " <> abstr) Nothing [])
<$> concat
<$> mapConcurrently (\file ->
filter (\d -> (isJust $ _hd_publication_year d)
&& (isJust $ _hd_title d))
<$> parseFile' file) files
-- To transform a Csv file into a list of Document
csvToDocs :: CorpusParser -> Patterns -> TimeUnit -> FilePath -> IO [Document]
csvToDocs parser patterns time path =
case parser of
Wos _ -> undefined
Csv limit -> do
eR <- Csv.readFile path
case eR of
Right r ->
pure $ Vector.toList
$ Vector.take limit
$ Vector.map (\row -> Document (toPhyloDate (Csv.fromMIntOrDec Csv.defaultYear $ csv_publication_year row)
(fromMaybe Csv.defaultMonth $ csv_publication_month row)
(fromMaybe Csv.defaultDay $ csv_publication_day row)
time)
(toPhyloDate' (Csv.fromMIntOrDec Csv.defaultYear $ csv_publication_year row)
(fromMaybe Csv.defaultMonth $ csv_publication_month row)
(fromMaybe Csv.defaultDay $ csv_publication_day row))
(termsInText patterns $ (csv_title row) <> " " <> (csv_abstract row))
Nothing
[]
) $ snd r
Left e -> panic $ "Error: " <> (pack e)
Csv' limit -> Vector.toList
<$> Vector.take limit
<$> Vector.map (\row -> Document (toPhyloDate (csv'_publication_year row) (csv'_publication_month row) (csv'_publication_day row) time)
(toPhyloDate' (csv'_publication_year row) (csv'_publication_month row) (csv'_publication_day row))
(termsInText patterns $ (csv'_title row) <> " " <> (csv'_abstract row))
(Just $ csv'_weight row)
[csv'_source row]
) <$> snd <$> Csv.readWeightedCsv path
-- To parse a file into a list of Document
fileToDocs' :: CorpusParser -> FilePath -> TimeUnit -> TermList -> IO [Document]
fileToDocs' parser path time lst = do
let patterns = buildPatterns lst
case parser of
Wos limit -> wosToDocs limit patterns time path
Csv _ -> csvToDocs parser patterns time path
Csv' _ -> csvToDocs parser patterns time path
---------------
-- | Label | --
---------------
-- Config time parameters to label
timeToLabel :: Config -> [Char]
timeToLabel config = case (timeUnit config) of
Year p s f -> ("time_years" <> "_" <> (show p) <> "_" <> (show s) <> "_" <> (show f))
Month p s f -> ("time_months"<> "_" <> (show p) <> "_" <> (show s) <> "_" <> (show f))
Week p s f -> ("time_weeks" <> "_" <> (show p) <> "_" <> (show s) <> "_" <> (show f))
Day p s f -> ("time_days" <> "_" <> (show p) <> "_" <> (show s) <> "_" <> (show f))
seaToLabel :: Config -> [Char]
seaToLabel config = case (seaElevation config) of
Constante start step -> ("sea_cst_" <> (show start) <> "_" <> (show step))
Adaptative granularity -> ("sea_adapt" <> (show granularity))
sensToLabel :: Config -> [Char]
sensToLabel config = case (phyloProximity config) of
Hamming -> undefined
WeightedLogJaccard s -> ("WeightedLogJaccard_" <> show s)
WeightedLogSim s -> ( "WeightedLogSim-sens_" <> show s)
cliqueToLabel :: Config -> [Char]
cliqueToLabel config = case (clique config) of
Fis s s' -> "fis_" <> (show s) <> "_" <> (show s')
MaxClique s t f -> "clique_" <> (show s)<> "_" <> (show f)<> "_" <> (show t)
syncToLabel :: Config -> [Char]
syncToLabel config = case (phyloSynchrony config) of
ByProximityThreshold scl sync_sens scope _ -> ("scale_" <> (show scope) <> "_" <> (show sync_sens) <> "_" <> (show scl))
ByProximityDistribution _ _ -> undefined
qualToConfig :: Config -> [Char]
qualToConfig config = case (phyloQuality config) of
Quality g m -> "quality_" <> (show g) <> "_" <> (show m)
-- To set up the export file's label from the configuration
configToLabel :: Config -> [Char]
configToLabel config = outputPath config
<> (unpack $ phyloName config)
<> "-" <> (timeToLabel config)
<> "-scale_" <> (show (phyloLevel config))
<> "-" <> (seaToLabel config)
<> "-" <> (sensToLabel config)
<> "-" <> (cliqueToLabel config)
<> "-level_" <> (show (_qua_granularity $ phyloQuality config))
<> "-" <> (syncToLabel config)
<> ".dot"
-- To write a sha256 from a set of config's parameters
configToSha :: PhyloStage -> Config -> [Char]
configToSha stage config = unpack
$ replace "/" "-"
$ T.pack (show (hash $ C8.pack label))
where
label :: [Char]
label = case stage of
PhyloWithCliques -> (corpusPath config)
<> (listPath config)
<> (timeToLabel config)
<> (cliqueToLabel config)
PhyloWithLinks -> (corpusPath config)
<> (listPath config)
<> (timeToLabel config)
<> (cliqueToLabel config)
<> (sensToLabel config)
<> (seaToLabel config)
<> (syncToLabel config)
<> (qualToConfig config)
<> (show (phyloLevel config))
writePhylo :: [Char] -> Phylo -> IO ()
writePhylo path phylo = Lazy.writeFile path $ encode phylo
readPhylo :: [Char] -> IO Phylo
readPhylo path = do
phyloJson <- (eitherDecode <$> readJson path) :: IO (Either String Phylo)
case phyloJson of
Left err -> do
putStrLn err
undefined
Right phylo -> pure phylo
--------------
-- | Main | --
--------------
main :: IO ()
main = do
printIOMsg "Starting the reconstruction"
printIOMsg "Read the configuration file"
[args] <- getArgs
jsonArgs <- (eitherDecode <$> readJson args) :: IO (Either String Config)
case jsonArgs of
Left err -> putStrLn err
Right config -> do
printIOMsg "Parse the corpus"
mapList <- csvMapTermList (listPath config)
corpus <- fileToDocs' (corpusParser config) (corpusPath config) (timeUnit config) mapList
printIOComment (show (length corpus) <> " parsed docs from the corpus")
printIOMsg "Reconstruct the phylo"
let phyloWithCliquesFile = (outputPath config) <> "phyloWithCliques_" <> (configToSha PhyloWithCliques config) <> ".json"
let phyloWithLinksFile = (outputPath config) <> "phyloWithLinks_" <> (configToSha PhyloWithLinks config) <> ".json"
phyloWithCliquesExists <- doesFileExist phyloWithCliquesFile
phyloWithLinksExists <- doesFileExist phyloWithLinksFile
-- phyloStep <- if phyloWithCliquesExists
-- then do
-- printIOMsg "Reconstruct the phylo step from an existing file"
-- readPhylo phyloWithCliquesFile
-- else do
-- printIOMsg "Reconstruct the phylo step from scratch"
-- pure $ toPhyloStep corpus mapList config
-- writePhylo phyloWithCliquesFile phyloStep
-- let phylo = toPhylo (setConfig config phyloStep)
phyloWithLinks <- if phyloWithLinksExists
then do
printIOMsg "Reconstruct the phylo from an existing file with intertemporal links"
readPhylo phyloWithLinksFile
else do
if phyloWithCliquesExists
then do
printIOMsg "Reconstruct the phylo from an existing file with cliques"
phyloWithCliques <- readPhylo phyloWithCliquesFile
writePhylo phyloWithCliquesFile phyloWithCliques
pure $ toPhylo (setConfig config phyloWithCliques)
else do
printIOMsg "Reconstruct the phylo from scratch"
phyloWithCliques <- pure $ toPhyloStep corpus mapList config
writePhylo phyloWithCliquesFile phyloWithCliques
pure $ toPhylo (setConfig config phyloWithCliques)
writePhylo phyloWithLinksFile phyloWithLinks
-- probes
-- writeFile ((outputPath config) <> (unpack $ phyloName config) <> "_synchronic_distance_cumu_jaccard.txt")
-- $ synchronicDistance' phylo 1
-- writeFile ((outputPath config) <> (unpack $ phyloName config) <> "_inflexion_points.txt")
-- $ inflexionPoints phylo 1
printIOMsg "End of reconstruction, start the export"
let dot = toPhyloExport (setConfig config phyloWithLinks)
let output = configToLabel config
dotToFile output dot
{-| {-|
Module : Main.hs Module : Main.hs
Description : Gargantext starter binary with Phylo Description : Gargantext starter binary with Adaptative Phylo
Copyright : (c) CNRS, 2017-Present Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3 License : AGPL + CECILL v3
Maintainer : team@gargantext.org Maintainer : team@gargantext.org
Stability : experimental Stability : experimental
Portability : POSIX Portability : POSIX
Phylo binaries Adaptative Phylo binaries
-} -}
{-# LANGUAGE StandaloneDeriving #-} {-# LANGUAGE StandaloneDeriving #-}
...@@ -17,153 +16,267 @@ Phylo binaries ...@@ -17,153 +16,267 @@ Phylo binaries
module Main where module Main where
import Control.Concurrent.Async as CCA (mapConcurrently) import Control.Concurrent.Async (mapConcurrently)
import Crypto.Hash.SHA256 (hash)
import Data.Aeson import Data.Aeson
import Data.List ((++),concat) import Data.Either (Either(..))
import Data.Maybe import Data.List (concat, nub, isSuffixOf)
import Data.Text (Text, unwords) import Data.Maybe (fromMaybe)
import GHC.Generics import Data.String (String)
import GHC.IO (FilePath) import GHC.IO (FilePath)
import System.Directory (doesFileExist) import qualified Prelude as Prelude
import System.Environment import System.Environment
import qualified Data.ByteString.Lazy as L import System.Directory (listDirectory,doesFileExist)
import qualified Data.List as DL import Data.Text (Text, unwords, unpack, replace, pack)
import qualified Data.Map as DM import Data.Time.Calendar (fromGregorian, diffGregorianDurationClip, cdMonths, diffDays, showGregorian)
import qualified Data.Text as DT
import qualified Data.Vector as DV import qualified Data.ByteString.Char8 as C8
import qualified Gargantext.Core.Text.Corpus.Parsers.CSV as CSV import qualified Data.ByteString.Lazy as Lazy
import qualified Prelude as P import qualified Data.Vector as Vector
import qualified Data.Text as T
import Gargantext.Database.Admin.Types.Hyperdata
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Core.Text.Context (TermList) import Gargantext.Core.Text.Context (TermList)
import Gargantext.Core.Text.Corpus.Parsers.CSV (csv_title, csv_abstract, csv_publication_year, csv_publication_month, csv_publication_day,
csv'_source, csv'_title, csv'_abstract, csv'_publication_year, csv'_publication_month, csv'_publication_day, csv'_weight)
import qualified Gargantext.Core.Text.Corpus.Parsers.CSV as Csv
import Gargantext.Core.Text.Corpus.Parsers (FileFormat(..),parseFile) import Gargantext.Core.Text.Corpus.Parsers (FileFormat(..),parseFile)
import Gargantext.Core.Text.Corpus.Parsers.CSV (csv_title, csv_abstract, csv_publication_year)
import Gargantext.Core.Text.List.Formats.CSV (csvMapTermList) import Gargantext.Core.Text.List.Formats.CSV (csvMapTermList)
import Gargantext.Core.Text.Terms.WithList import Gargantext.Core.Text.Terms.WithList (Patterns, buildPatterns, extractTermsWithList)
import Gargantext.Core.Viz.Phylo import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.LevelMaker import Gargantext.Core.Viz.Phylo.PhyloMaker (toPhylo, toPhyloStep)
import Gargantext.Core.Viz.Phylo.Tools import Gargantext.Core.Viz.Phylo.PhyloTools (printIOMsg, printIOComment, setConfig)
import Gargantext.Core.Viz.Phylo.View.Export import Gargantext.Core.Viz.Phylo.PhyloExport (toPhyloExport, dotToFile)
import Gargantext.Core.Viz.Phylo.View.ViewMaker -- import Gargantext.API.Ngrams.Prelude (toTermList)
-- import Debug.Trace (trace)
data PhyloStage = PhyloWithCliques | PhyloWithLinks deriving (Show)
---------------
-- | Tools | --
---------------
-- | To get all the files in a directory or just a file
getFilesFromPath :: FilePath -> IO([FilePath])
getFilesFromPath path = do
if (isSuffixOf "/" path)
then (listDirectory path)
else return [path]
---------------
-- | Dates | --
---------------
toMonths :: Integer -> Int -> Int -> Date
toMonths y m d = fromIntegral $ cdMonths
$ diffGregorianDurationClip (fromGregorian y m d) (fromGregorian 0000 0 0)
toDays :: Integer -> Int -> Int -> Date
toDays y m d = fromIntegral
$ diffDays (fromGregorian y m d) (fromGregorian 0000 0 0)
toPhyloDate :: Int -> Int -> Int -> TimeUnit -> Date
toPhyloDate y m d tu = case tu of
Year _ _ _ -> y
Month _ _ _ -> toMonths (Prelude.toInteger y) m d
Week _ _ _ -> div (toDays (Prelude.toInteger y) m d) 7
Day _ _ _ -> toDays (Prelude.toInteger y) m d
-- Function to use in Database export
toPhyloDate' :: Int -> Int -> Int -> Text
toPhyloDate' y m d = pack $ showGregorian $ fromGregorian (Prelude.toInteger y) m d
-------------- --------------
-- | Conf | -- -- | Json | --
-------------- --------------
type ListPath = FilePath -- | To read and decode a Json file
type FisPath = FilePath readJson :: FilePath -> IO Lazy.ByteString
type CorpusPath = FilePath readJson path = Lazy.readFile path
data CorpusType = Wos | Csv deriving (Show,Generic)
type Limit = Int
----------------
data Conf = -- | Parser | --
Conf { corpusPath :: CorpusPath ----------------
, corpusType :: CorpusType
, listPath :: ListPath -- | To filter the Ngrams of a document based on the termList
, fisPath :: FilePath termsInText :: Patterns -> Text -> [Text]
, outputPath :: FilePath termsInText pats txt = nub $ concat $ map (map unwords) $ extractTermsWithList pats txt
, phyloName :: Text
, limit :: Limit
, timeGrain :: Int -- | To transform a Wos file (or [file]) into a list of Docs
, timeStep :: Int wosToDocs :: Int -> Patterns -> TimeUnit -> FilePath -> IO [Document]
, timeFrame :: Int wosToDocs limit patterns time path = do
, timeFrameTh :: Double files <- getFilesFromPath path
, timeTh :: Double let parseFile' file = do
, timeSens :: Double eParsed <- parseFile WOS (path <> file)
, reBranchThr :: Double case eParsed of
, reBranchNth :: Int Right ps -> pure ps
, clusterTh :: Double Left e -> panic $ "Error: " <> (pack e)
, clusterSens :: Double take limit
, phyloLevel :: Int <$> map (\d -> let title = fromJust $ _hd_title d
, viewLevel :: Int abstr = if (isJust $ _hd_abstract d)
, fisSupport :: Int then fromJust $ _hd_abstract d
, fisClique :: Int else ""
, minSizeBranch :: Int in Document (toPhyloDate
} deriving (Show,Generic) (fromIntegral $ fromJust $ _hd_publication_year d)
(fromJust $ _hd_publication_month d)
instance FromJSON Conf (fromJust $ _hd_publication_day d) time)
instance ToJSON Conf (toPhyloDate'
(fromIntegral $ fromJust $ _hd_publication_year d)
instance FromJSON CorpusType (fromJust $ _hd_publication_month d)
instance ToJSON CorpusType (fromJust $ _hd_publication_day d))
(termsInText patterns $ title <> " " <> abstr) Nothing [])
<$> concat
decoder :: P.Either a b -> b <$> mapConcurrently (\file ->
decoder (P.Left _) = P.error "Error" filter (\d -> (isJust $ _hd_publication_year d)
decoder (P.Right x) = x && (isJust $ _hd_title d))
<$> parseFile' file) files
-- | Get the conf from a Json file
getJson :: FilePath -> IO L.ByteString
getJson path = L.readFile path -- To transform a Csv file into a list of Document
csvToDocs :: CorpusParser -> Patterns -> TimeUnit -> FilePath -> IO [Document]
csvToDocs parser patterns time path =
case parser of
Wos _ -> undefined
Csv limit -> do
eR <- Csv.readFile path
case eR of
Right r ->
pure $ Vector.toList
$ Vector.take limit
$ Vector.map (\row -> Document (toPhyloDate (Csv.fromMIntOrDec Csv.defaultYear $ csv_publication_year row)
(fromMaybe Csv.defaultMonth $ csv_publication_month row)
(fromMaybe Csv.defaultDay $ csv_publication_day row)
time)
(toPhyloDate' (Csv.fromMIntOrDec Csv.defaultYear $ csv_publication_year row)
(fromMaybe Csv.defaultMonth $ csv_publication_month row)
(fromMaybe Csv.defaultDay $ csv_publication_day row))
(termsInText patterns $ (csv_title row) <> " " <> (csv_abstract row))
Nothing
[]
) $ snd r
Left e -> panic $ "Error: " <> (pack e)
Csv' limit -> Vector.toList
<$> Vector.take limit
<$> Vector.map (\row -> Document (toPhyloDate (csv'_publication_year row) (csv'_publication_month row) (csv'_publication_day row) time)
(toPhyloDate' (csv'_publication_year row) (csv'_publication_month row) (csv'_publication_day row))
(termsInText patterns $ (csv'_title row) <> " " <> (csv'_abstract row))
(Just $ csv'_weight row)
[csv'_source row]
) <$> snd <$> Csv.readWeightedCsv path
-- To parse a file into a list of Document
fileToDocs' :: CorpusParser -> FilePath -> TimeUnit -> TermList -> IO [Document]
fileToDocs' parser path time lst = do
let patterns = buildPatterns lst
case parser of
Wos limit -> wosToDocs limit patterns time path
Csv _ -> csvToDocs parser patterns time path
Csv' _ -> csvToDocs parser patterns time path
--------------- ---------------
-- | Parse | -- -- | Label | --
--------------- ---------------
-- | To filter the Ngrams of a document based on the termList
filterTerms :: Patterns -> (a, Text) -> (a, [Text]) -- Config time parameters to label
filterTerms patterns (y,d) = (y,termsInText patterns d) timeToLabel :: Config -> [Char]
timeToLabel config = case (timeUnit config) of
Year p s f -> ("time_years" <> "_" <> (show p) <> "_" <> (show s) <> "_" <> (show f))
-- | To transform a Csv nfile into a readable corpus Month p s f -> ("time_months"<> "_" <> (show p) <> "_" <> (show s) <> "_" <> (show f))
csvToCorpus :: Limit -> CorpusPath -> IO ([(Int,Text)]) Week p s f -> ("time_weeks" <> "_" <> (show p) <> "_" <> (show s) <> "_" <> (show f))
csvToCorpus limit csv = DV.toList Day p s f -> ("time_days" <> "_" <> (show p) <> "_" <> (show s) <> "_" <> (show f))
-- . DV.reverse
. DV.take limit
-- . DV.reverse seaToLabel :: Config -> [Char]
. DV.map (\n -> (csv_publication_year n, (csv_title n) <> " " <> (csv_abstract n))) seaToLabel config = case (seaElevation config) of
. snd <$> CSV.readFile csv Constante start step -> ("sea_cst_" <> (show start) <> "_" <> (show step))
Adaptative granularity -> ("sea_adapt" <> (show granularity))
-- | To transform a Wos nfile into a readable corpus
wosToCorpus :: Limit -> CorpusPath -> IO ([(Int,Text)]) sensToLabel :: Config -> [Char]
wosToCorpus limit path = DL.take limit sensToLabel config = case (phyloProximity config) of
. map (\d -> ((fromJust $_hd_publication_year d) Hamming -> undefined
,(fromJust $_hd_title d) <> " " <> (fromJust $_hd_abstract d))) WeightedLogJaccard s -> ("WeightedLogJaccard_" <> show s)
. filter (\d -> (isJust $_hd_publication_year d) WeightedLogSim s -> ( "WeightedLogSim-sens_" <> show s)
&& (isJust $_hd_title d)
&& (isJust $_hd_abstract d))
. concat cliqueToLabel :: Config -> [Char]
<$> mapConcurrently (\idx -> parseFile WOS (path <> show(idx) <> ".txt")) [1..20] cliqueToLabel config = case (clique config) of
Fis s s' -> "fis_" <> (show s) <> "_" <> (show s')
MaxClique s t f -> "clique_" <> (show s)<> "_" <> (show f)<> "_" <> (show t)
-- | To use the correct parser given a CorpusType
fileToCorpus :: CorpusType -> Limit -> CorpusPath -> IO ([(Int,Text)])
fileToCorpus format limit path = case format of syncToLabel :: Config -> [Char]
Wos -> wosToCorpus limit path syncToLabel config = case (phyloSynchrony config) of
Csv -> csvToCorpus limit path ByProximityThreshold scl sync_sens scope _ -> ("scale_" <> (show scope) <> "_" <> (show sync_sens) <> "_" <> (show scl))
ByProximityDistribution _ _ -> undefined
-- | To parse a file into a list of Document qualToConfig :: Config -> [Char]
parse :: CorpusType -> Limit -> CorpusPath -> TermList -> IO [Document] qualToConfig config = case (phyloQuality config) of
parse format limit path l = do Quality g m -> "quality_" <> (show g) <> "_" <> (show m)
corpus <- fileToCorpus format limit path
let patterns = buildPatterns l
pure $ map ( (\(y,t) -> Document y t) . filterTerms patterns) corpus -- To set up the export file's label from the configuration
configToLabel :: Config -> [Char]
configToLabel config = outputPath config
-- | To parse an existing Fis file <> (unpack $ phyloName config)
parseFis :: FisPath -> Text -> Int -> Int -> Int -> Int -> IO [PhyloFis] <> "-" <> (timeToLabel config)
parseFis path name grain step support clique = do <> "-scale_" <> (show (phyloLevel config))
fisExists <- doesFileExist (path <> (DT.unpack name) <> "_" <> show(grain) <> "_" <> show(step) <> "_" <> show(support) <> "_" <> show(clique) <> ".json") <> "-" <> (seaToLabel config)
if fisExists <> "-" <> (sensToLabel config)
then do <> "-" <> (cliqueToLabel config)
fisJson <- (eitherDecode <$> getJson (path <> (DT.unpack name) <> "_" <> show(grain) <> "_" <> show(step) <> "_" <> show(support) <> "_" <> show(clique) <> ".json")) :: IO (P.Either P.String [PhyloFis]) <> "-level_" <> (show (_qua_granularity $ phyloQuality config))
case fisJson of <> "-" <> (syncToLabel config)
P.Left err -> do <> ".dot"
-- To write a sha256 from a set of config's parameters
configToSha :: PhyloStage -> Config -> [Char]
configToSha stage config = unpack
$ replace "/" "-"
$ T.pack (show (hash $ C8.pack label))
where
label :: [Char]
label = case stage of
PhyloWithCliques -> (corpusPath config)
<> (listPath config)
<> (timeToLabel config)
<> (cliqueToLabel config)
PhyloWithLinks -> (corpusPath config)
<> (listPath config)
<> (timeToLabel config)
<> (cliqueToLabel config)
<> (sensToLabel config)
<> (seaToLabel config)
<> (syncToLabel config)
<> (qualToConfig config)
<> (show (phyloLevel config))
writePhylo :: [Char] -> Phylo -> IO ()
writePhylo path phylo = Lazy.writeFile path $ encode phylo
readPhylo :: [Char] -> IO Phylo
readPhylo path = do
phyloJson <- (eitherDecode <$> readJson path) :: IO (Either String Phylo)
case phyloJson of
Left err -> do
putStrLn err putStrLn err
pure [] undefined
P.Right fis -> pure fis Right phylo -> pure phylo
else pure []
writeFis :: FisPath -> Text -> Int -> Int -> Int -> Int -> DM.Map (Date,Date) [PhyloFis] -> IO ()
writeFis path name grain step support clique fis = do
let fisPath = path <> (DT.unpack name) <> "_" <> show(grain) <> "_" <> show(step) <> "_" <> show(support) <> "_" <> show(clique) <> ".json"
L.writeFile fisPath $ encode (DL.concat $ DM.elems fis)
-------------- --------------
-- | Main | -- -- | Main | --
...@@ -173,47 +286,74 @@ writeFis path name grain step support clique fis = do ...@@ -173,47 +286,74 @@ writeFis path name grain step support clique fis = do
main :: IO () main :: IO ()
main = do main = do
[jsonPath] <- getArgs printIOMsg "Starting the reconstruction"
confJson <- (eitherDecode <$> getJson jsonPath) :: IO (P.Either P.String Conf) printIOMsg "Read the configuration file"
[args] <- getArgs
jsonArgs <- (eitherDecode <$> readJson args) :: IO (Either String Config)
case confJson of case jsonArgs of
P.Left err -> putStrLn err Left err -> putStrLn err
P.Right conf -> do Right config -> do
termList <- csvMapTermList (listPath conf) printIOMsg "Parse the corpus"
mapList <- csvMapTermList (listPath config)
corpus <- fileToDocs' (corpusParser config) (corpusPath config) (timeUnit config) mapList
printIOComment (show (length corpus) <> " parsed docs from the corpus")
corpus <- parse (corpusType conf) (limit conf) (corpusPath conf) termList printIOMsg "Reconstruct the phylo"
putStrLn $ ("\n" <> show (length corpus) <> " parsed docs") let phyloWithCliquesFile = (outputPath config) <> "phyloWithCliques_" <> (configToSha PhyloWithCliques config) <> ".json"
let phyloWithLinksFile = (outputPath config) <> "phyloWithLinks_" <> (configToSha PhyloWithLinks config) <> ".json"
fis <- parseFis (fisPath conf) (phyloName conf) (timeGrain conf) (timeStep conf) (fisSupport conf) (fisClique conf) phyloWithCliquesExists <- doesFileExist phyloWithCliquesFile
phyloWithLinksExists <- doesFileExist phyloWithLinksFile
putStrLn $ ("\n" <> show (length fis) <> " parsed fis") -- phyloStep <- if phyloWithCliquesExists
-- then do
-- printIOMsg "Reconstruct the phylo step from an existing file"
-- readPhylo phyloWithCliquesFile
-- else do
-- printIOMsg "Reconstruct the phylo step from scratch"
-- pure $ toPhyloStep corpus mapList config
let fis' = DM.fromListWith (++) $ DL.sortOn (fst . fst) $ map (\f -> (getFisPeriod f,[f])) fis -- writePhylo phyloWithCliquesFile phyloStep
let query = PhyloQueryBuild (phyloName conf) "" (timeGrain conf) (timeStep conf) -- let phylo = toPhylo (setConfig config phyloStep)
(Fis $ FisParams True (fisSupport conf) (fisClique conf)) [] [] (WeightedLogJaccard $ WLJParams (timeTh conf) (timeSens conf)) (timeFrame conf) (timeFrameTh conf)
(reBranchThr conf) (reBranchNth conf) (phyloLevel conf)
(RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams (clusterTh conf) (clusterSens conf))
let queryView = PhyloQueryView (viewLevel conf) Merge False 1 [BranchAge,BranchBirth,BranchGroups] [SizeBranch $ SBParams (minSizeBranch conf)] [GroupLabelIncDyn,BranchPeakInc] (Just (ByBranchBirth,Asc)) Json Flat True -- QL: 2 files read from disk
phyloWithLinks <- if phyloWithLinksExists
then do
printIOMsg "Reconstruct the phylo from an existing file with intertemporal links"
readPhylo phyloWithLinksFile
else do
if phyloWithCliquesExists
then do
printIOMsg "Reconstruct the phylo from an existing file with cliques"
phyloWithCliques <- readPhylo phyloWithCliquesFile
writePhylo phyloWithCliquesFile phyloWithCliques
pure $ toPhylo (setConfig config phyloWithCliques)
else do
printIOMsg "Reconstruct the phylo from scratch"
phyloWithCliques <- pure $ toPhyloStep corpus mapList config
writePhylo phyloWithCliquesFile phyloWithCliques
pure $ toPhylo (setConfig config phyloWithCliques)
let phylo = toPhylo query corpus termList fis' writePhylo phyloWithLinksFile phyloWithLinks
writeFis (fisPath conf) (phyloName conf) (timeGrain conf) (timeStep conf) (fisSupport conf) (fisClique conf) (getPhyloFis phylo)
let view = toPhyloView queryView phylo -- probes
putStrLn $ ("phylo completed until level " <> show (phyloLevel conf) <> ", export at level " <> show (viewLevel conf)) -- writeFile ((outputPath config) <> (unpack $ phyloName config) <> "_synchronic_distance_cumu_jaccard.txt")
-- $ synchronicDistance' phylo 1
let outputFile = (outputPath conf) <> (DT.unpack $ phyloName conf) -- writeFile ((outputPath config) <> (unpack $ phyloName config) <> "_inflexion_points.txt")
<> "_" <> show (limit conf) <> "_" -- $ inflexionPoints phylo 1
<> "_" <> show (timeTh conf) <> "_"
<> "_" <> show (timeSens conf) <> "_" printIOMsg "End of reconstruction, start the export"
<> "_" <> show (clusterTh conf) <> "_"
<> "_" <> show (clusterSens conf) let dot = toPhyloExport (setConfig config phyloWithLinks)
<> ".dot"
let output = configToLabel config
P.writeFile outputFile $ dotToString $ viewToDot view dotToFile output dot
...@@ -100,7 +100,7 @@ library: ...@@ -100,7 +100,7 @@ library:
- Gargantext.Core.Viz.Graph.Tools - Gargantext.Core.Viz.Graph.Tools
- Gargantext.Core.Viz.Graph.Tools.IGraph - Gargantext.Core.Viz.Graph.Tools.IGraph
- Gargantext.Core.Viz.Graph.Index - Gargantext.Core.Viz.Graph.Index
- Gargantext.Core.Viz.AdaptativePhylo - Gargantext.Core.Viz.Phylo
- Gargantext.Core.Viz.Phylo.PhyloMaker - Gargantext.Core.Viz.Phylo.PhyloMaker
- Gargantext.Core.Viz.Phylo.PhyloTools - Gargantext.Core.Viz.Phylo.PhyloTools
- Gargantext.Core.Viz.Phylo.PhyloExport - Gargantext.Core.Viz.Phylo.PhyloExport
...@@ -322,9 +322,9 @@ executables: ...@@ -322,9 +322,9 @@ executables:
- unordered-containers - unordered-containers
- full-text-search - full-text-search
gargantext-adaptative-phylo: gargantext-phylo:
main: Main.hs main: Main.hs
source-dirs: bin/gargantext-adaptative-phylo source-dirs: bin/gargantext-phylo
ghc-options: ghc-options:
- -threaded - -threaded
- -rtsopts - -rtsopts
......
...@@ -64,7 +64,7 @@ import Gargantext.Core.Viz.Graph.FGL (Graph_Undirected, degree, neighbors, mkGra ...@@ -64,7 +64,7 @@ import Gargantext.Core.Viz.Graph.FGL (Graph_Undirected, degree, neighbors, mkGra
import Gargantext.Core.Viz.Graph.Tools (cooc2graph',cooc2graph'', Threshold) import Gargantext.Core.Viz.Graph.Tools (cooc2graph',cooc2graph'', Threshold)
import Gargantext.Core.Methods.Distances (Distance) import Gargantext.Core.Methods.Distances (Distance)
import Gargantext.Core.Viz.Graph.Index (createIndices, toIndex) import Gargantext.Core.Viz.Graph.Index (createIndices, toIndex)
import Gargantext.Core.Viz.AdaptativePhylo import Gargantext.Core.Viz.Phylo
-- import Debug.Trace (trace) -- import Debug.Trace (trace)
type Graph = Graph_Undirected type Graph = Graph_Undirected
type Neighbor = Node type Neighbor = Node
......
...@@ -24,31 +24,26 @@ one 8, e54847. ...@@ -24,31 +24,26 @@ one 8, e54847.
{-# LANGUAGE DeriveAnyClass #-} {-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE TemplateHaskell #-}
module Gargantext.Core.Viz.AdaptativePhylo where module Gargantext.Core.Viz.Phylo where
import Control.DeepSeq (NFData)
import Control.Lens (makeLenses)
import Data.Aeson import Data.Aeson
import Data.Aeson.TH (deriveJSON) import Data.Aeson.TH (deriveJSON)
import Data.Map (Map)
import Data.Text (Text, pack) import Data.Text (Text, pack)
import Data.Vector (Vector) import Data.Vector (Vector)
import Data.Map (Map)
import Gargantext.Core.Utils.Prefix (unPrefix)
import Gargantext.Prelude
import Gargantext.Core.Text.Context (TermList)
import GHC.Generics import GHC.Generics
import GHC.IO (FilePath) import GHC.IO (FilePath)
import Control.DeepSeq (NFData) import Gargantext.Core.Text.Context (TermList)
import Control.Lens (makeLenses) import Gargantext.Core.Utils.Prefix (unPrefix)
import Gargantext.Prelude
import qualified Data.Text.Lazy as TextLazy import qualified Data.Text.Lazy as TextLazy
---------------- ----------------
-- | Config | -- -- | Config | --
---------------- ----------------
data CorpusParser = data CorpusParser =
Wos {_wos_limit :: Int} Wos {_wos_limit :: Int}
| Csv {_csv_limit :: Int} | Csv {_csv_limit :: Int}
...@@ -182,34 +177,49 @@ defaultConfig = ...@@ -182,34 +177,49 @@ defaultConfig =
instance FromJSON Config instance FromJSON Config
instance ToJSON Config instance ToJSON Config
instance FromJSON CorpusParser instance FromJSON CorpusParser
instance ToJSON CorpusParser instance ToJSON CorpusParser
instance FromJSON Proximity instance FromJSON Proximity
instance ToJSON Proximity instance ToJSON Proximity
instance FromJSON SeaElevation instance FromJSON SeaElevation
instance ToJSON SeaElevation instance ToJSON SeaElevation
instance FromJSON TimeUnit instance FromJSON TimeUnit
instance ToJSON TimeUnit instance ToJSON TimeUnit
instance FromJSON CliqueFilter instance FromJSON CliqueFilter
instance ToJSON CliqueFilter instance ToJSON CliqueFilter
instance FromJSON Clique instance FromJSON Clique
instance ToJSON Clique instance ToJSON Clique
instance FromJSON PhyloLabel instance FromJSON PhyloLabel
instance ToJSON PhyloLabel instance ToJSON PhyloLabel
instance FromJSON Tagger instance FromJSON Tagger
instance ToJSON Tagger instance ToJSON Tagger
instance FromJSON Sort instance FromJSON Sort
instance ToJSON Sort instance ToJSON Sort
instance FromJSON Order instance FromJSON Order
instance ToJSON Order instance ToJSON Order
instance FromJSON Filter instance FromJSON Filter
instance ToJSON Filter instance ToJSON Filter
instance FromJSON SynchronyScope instance FromJSON SynchronyScope
instance ToJSON SynchronyScope instance ToJSON SynchronyScope
instance FromJSON SynchronyStrategy instance FromJSON SynchronyStrategy
instance ToJSON SynchronyStrategy instance ToJSON SynchronyStrategy
instance FromJSON Synchrony instance FromJSON Synchrony
instance ToJSON Synchrony instance ToJSON Synchrony
instance FromJSON Quality instance FromJSON Quality
instance ToJSON Quality instance ToJSON Quality
...@@ -252,9 +262,10 @@ type Ngrams = Text ...@@ -252,9 +262,10 @@ type Ngrams = Text
-- Document : a piece of Text linked to a Date -- Document : a piece of Text linked to a Date
-- date = computational date; date' = original string date yyyy-mm-dd -- date = computational date; date' = original string date yyyy-mm-dd
-- Export Database to Document
data Document = Document data Document = Document
{ date :: Date { date :: Date -- datatype Date {unDate :: Int}
, date' :: Text , date' :: Text -- show date
, text :: [Ngrams] , text :: [Ngrams]
, weight :: Maybe Double , weight :: Maybe Double
, sources :: [Text] , sources :: [Text]
...@@ -469,16 +480,22 @@ makeLenses ''PhyloBranch ...@@ -469,16 +480,22 @@ makeLenses ''PhyloBranch
instance FromJSON Phylo instance FromJSON Phylo
instance ToJSON Phylo instance ToJSON Phylo
instance FromJSON PhyloSources instance FromJSON PhyloSources
instance ToJSON PhyloSources instance ToJSON PhyloSources
instance FromJSON PhyloParam instance FromJSON PhyloParam
instance ToJSON PhyloParam instance ToJSON PhyloParam
instance FromJSON PhyloPeriod instance FromJSON PhyloPeriod
instance ToJSON PhyloPeriod instance ToJSON PhyloPeriod
instance FromJSON PhyloLevel instance FromJSON PhyloLevel
instance ToJSON PhyloLevel instance ToJSON PhyloLevel
instance FromJSON Software instance FromJSON Software
instance ToJSON Software instance ToJSON Software
instance FromJSON PhyloGroup instance FromJSON PhyloGroup
instance ToJSON PhyloGroup instance ToJSON PhyloGroup
......
...@@ -15,23 +15,20 @@ Portability : POSIX ...@@ -15,23 +15,20 @@ Portability : POSIX
module Gargantext.Core.Viz.Phylo.PhyloExample where module Gargantext.Core.Viz.Phylo.PhyloExample where
import Control.Lens
import Data.GraphViz.Types.Generalised (DotGraph)
import Data.List (sortOn, nub, sort) import Data.List (sortOn, nub, sort)
import Data.Map (Map) import Data.Map (Map)
import Data.Text (Text, toLower) import Data.Text (Text, toLower)
import Gargantext.Prelude
import Gargantext.Core.Text.Context (TermList) import Gargantext.Core.Text.Context (TermList)
import Gargantext.Core.Text.Terms.Mono (monoTexts) import Gargantext.Core.Text.Terms.Mono (monoTexts)
import Gargantext.Core.Viz.AdaptativePhylo import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.PhyloTools
import Gargantext.Core.Viz.Phylo.PhyloMaker
import Gargantext.Core.Viz.Phylo.PhyloExport import Gargantext.Core.Viz.Phylo.PhyloExport
import Gargantext.Core.Viz.Phylo.TemporalMatching (adaptativeTemporalMatching, constanteTemporalMatching) import Gargantext.Core.Viz.Phylo.PhyloMaker
import Gargantext.Core.Viz.Phylo.PhyloTools
import Gargantext.Core.Viz.Phylo.SynchronicClustering (synchronicClustering) import Gargantext.Core.Viz.Phylo.SynchronicClustering (synchronicClustering)
import Gargantext.Core.Viz.Phylo.TemporalMatching (adaptativeTemporalMatching, constanteTemporalMatching)
import Control.Lens import Gargantext.Prelude
import Data.GraphViz.Types.Generalised (DotGraph)
import qualified Data.Vector as Vector import qualified Data.Vector as Vector
--------------------------------- ---------------------------------
......
...@@ -12,30 +12,27 @@ Portability : POSIX ...@@ -12,30 +12,27 @@ Portability : POSIX
module Gargantext.Core.Viz.Phylo.PhyloExport where module Gargantext.Core.Viz.Phylo.PhyloExport where
import Data.Map (Map, fromList, empty, fromListWith, insert, (!), elems, unionWith, findWithDefault, toList, member)
import Data.List ((++), sort, nub, null, concat, sortOn, groupBy, union, (\\), (!!), init, partition, notElem, unwords, nubBy, inits, elemIndex)
import Data.Vector (Vector)
import Prelude (writeFile)
import Gargantext.Prelude
import Gargantext.Core.Viz.AdaptativePhylo
import Gargantext.Core.Viz.Phylo.PhyloTools
import Gargantext.Core.Viz.Phylo.TemporalMatching (filterDocs, filterDiago, reduceDiagos, toProximity, getNextPeriods)
import Control.Lens hiding (Level) import Control.Lens hiding (Level)
import Control.Parallel.Strategies (parList, rdeepseq, using) import Control.Parallel.Strategies (parList, rdeepseq, using)
import Data.GraphViz hiding (DotGraph, Order) import Data.GraphViz hiding (DotGraph, Order)
import Data.GraphViz.Types.Generalised (DotGraph)
import Data.GraphViz.Attributes.Complete hiding (EdgeType, Order) import Data.GraphViz.Attributes.Complete hiding (EdgeType, Order)
import Data.GraphViz.Types.Generalised (DotGraph)
import Data.GraphViz.Types.Monadic import Data.GraphViz.Types.Monadic
import Data.List ((++), sort, nub, null, concat, sortOn, groupBy, union, (\\), (!!), init, partition, notElem, unwords, nubBy, inits, elemIndex)
import Data.Map (Map, fromList, empty, fromListWith, insert, (!), elems, unionWith, findWithDefault, toList, member)
import Data.Text.Lazy (fromStrict, pack, unpack) import Data.Text.Lazy (fromStrict, pack, unpack)
import System.FilePath import Data.Vector (Vector)
import Debug.Trace (trace) import Debug.Trace (trace)
import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.PhyloTools
import Gargantext.Core.Viz.Phylo.TemporalMatching (filterDocs, filterDiago, reduceDiagos, toProximity, getNextPeriods)
import Gargantext.Prelude
import Prelude (writeFile)
import System.FilePath
import qualified Data.GraphViz.Attributes.HTML as H
import qualified Data.Text as Text import qualified Data.Text as Text
import qualified Data.Vector as Vector
import qualified Data.Text.Lazy as Lazy import qualified Data.Text.Lazy as Lazy
import qualified Data.GraphViz.Attributes.HTML as H import qualified Data.Vector as Vector
-------------------- --------------------
-- | Dot export | -- -- | Dot export | --
......
...@@ -11,30 +11,29 @@ Portability : POSIX ...@@ -11,30 +11,29 @@ Portability : POSIX
module Gargantext.Core.Viz.Phylo.PhyloMaker where module Gargantext.Core.Viz.Phylo.PhyloMaker where
import Control.DeepSeq (NFData)
import Control.Lens hiding (Level)
import Control.Parallel.Strategies (parList, rdeepseq, using)
import Data.List (concat, nub, partition, sort, (++), group, intersect, null, sortOn, groupBy, tail) import Data.List (concat, nub, partition, sort, (++), group, intersect, null, sortOn, groupBy, tail)
import Data.Map (Map, fromListWith, keys, unionWith, fromList, empty, toList, elems, (!), restrictKeys, foldlWithKey, insert) import Data.Map (Map, fromListWith, keys, unionWith, fromList, empty, toList, elems, (!), restrictKeys, foldlWithKey, insert)
import Data.Vector (Vector)
import Data.Text (Text) import Data.Text (Text)
import Data.Vector (Vector)
import Debug.Trace (trace)
import Gargantext.Prelude import Gargantext.Core.Methods.Distances (Distance(Conditional))
import Gargantext.Core.Viz.AdaptativePhylo import Gargantext.Core.Methods.Graph.MaxClique (getMaxCliques)
import Gargantext.Core.Viz.Phylo.PhyloTools
import Gargantext.Core.Viz.Phylo.TemporalMatching (adaptativeTemporalMatching, constanteTemporalMatching, getNextPeriods, filterDocs, filterDiago, reduceDiagos, toProximity)
import Gargantext.Core.Viz.Phylo.SynchronicClustering (synchronicClustering)
import Gargantext.Core.Text.Context (TermList) import Gargantext.Core.Text.Context (TermList)
import Gargantext.Core.Text.Metrics.FrequentItemSet (fisWithSizePolyMap, fisWithSizePolyMap', Size(..)) import Gargantext.Core.Text.Metrics.FrequentItemSet (fisWithSizePolyMap, fisWithSizePolyMap', Size(..))
import Gargantext.Core.Methods.Graph.MaxClique (getMaxCliques) import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Methods.Distances (Distance(Conditional))
import Gargantext.Core.Viz.Phylo.PhyloExport (toHorizon) import Gargantext.Core.Viz.Phylo.PhyloExport (toHorizon)
import Gargantext.Core.Viz.Phylo.PhyloTools
import Gargantext.Core.Viz.Phylo.SynchronicClustering (synchronicClustering)
import Gargantext.Core.Viz.Phylo.TemporalMatching (adaptativeTemporalMatching, constanteTemporalMatching, getNextPeriods, filterDocs, filterDiago, reduceDiagos, toProximity)
import Gargantext.Prelude
import Control.DeepSeq (NFData)
import Control.Parallel.Strategies (parList, rdeepseq, using)
import Debug.Trace (trace)
import Control.Lens hiding (Level)
import qualified Data.Vector as Vector
import qualified Data.Set as Set import qualified Data.Set as Set
import qualified Data.Vector as Vector
------------------ ------------------
-- | To Phylo | -- -- | To Phylo | --
...@@ -162,6 +161,7 @@ indexDates' m = map (\docs -> ...@@ -162,6 +161,7 @@ indexDates' m = map (\docs ->
-- To build the first phylo step from docs and terms -- To build the first phylo step from docs and terms
-- QL: backend entre phyloBase et phyloClique
toPhyloStep :: [Document] -> TermList -> Config -> Phylo toPhyloStep :: [Document] -> TermList -> Config -> Phylo
toPhyloStep docs lst conf = case (getSeaElevation phyloBase) of toPhyloStep docs lst conf = case (getSeaElevation phyloBase) of
Constante _ _ -> appendGroups cliqueToGroup 1 phyloClique (updatePeriods (indexDates' docs') phyloBase) Constante _ _ -> appendGroups cliqueToGroup 1 phyloClique (updatePeriods (indexDates' docs') phyloBase)
...@@ -173,6 +173,7 @@ toPhyloStep docs lst conf = case (getSeaElevation phyloBase) of ...@@ -173,6 +173,7 @@ toPhyloStep docs lst conf = case (getSeaElevation phyloBase) of
phyloClique = toPhyloClique phyloBase docs' phyloClique = toPhyloClique phyloBase docs'
-------------------------------------- --------------------------------------
docs' :: Map (Date,Date) [Document] docs' :: Map (Date,Date) [Document]
-- QL: Time Consuming here
docs' = groupDocsByPeriodRec date (getPeriodIds phyloBase) (sortOn date docs) empty docs' = groupDocsByPeriodRec date (getPeriodIds phyloBase) (sortOn date docs) empty
-------------------------------------- --------------------------------------
phyloBase :: Phylo phyloBase :: Phylo
......
...@@ -12,28 +12,23 @@ Portability : POSIX ...@@ -12,28 +12,23 @@ Portability : POSIX
module Gargantext.Core.Viz.Phylo.PhyloTools where module Gargantext.Core.Viz.Phylo.PhyloTools where
import Data.Vector (Vector, elemIndex) import Control.Lens hiding (Level)
import Data.List (sort, concat, null, union, (++), tails, sortOn, nub, init, tail, partition, tails, nubBy, group) import Data.List (sort, concat, null, union, (++), tails, sortOn, nub, init, tail, partition, tails, nubBy, group)
import Data.Set (Set, disjoint)
import Data.Map (Map, elems, fromList, unionWith, keys, member, (!), filterWithKey, fromListWith, empty, restrictKeys) import Data.Map (Map, elems, fromList, unionWith, keys, member, (!), filterWithKey, fromListWith, empty, restrictKeys)
import Data.Set (Set, disjoint)
import Data.String (String) import Data.String (String)
import Data.Text (Text,unpack) import Data.Text (Text,unpack)
import Data.Vector (Vector, elemIndex)
import Prelude (floor,read) import Debug.Trace (trace)
import Gargantext.Core.Viz.Phylo
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Core.Viz.AdaptativePhylo import Prelude (floor,read)
import Text.Printf import Text.Printf
import Debug.Trace (trace)
import Control.Lens hiding (Level)
import qualified Data.Vector as Vector
import qualified Data.List as List import qualified Data.List as List
import qualified Data.Set as Set
import qualified Data.Map as Map import qualified Data.Map as Map
import qualified Data.Set as Set
import qualified Data.Text as Text import qualified Data.Text as Text
import qualified Data.Vector as Vector
------------ ------------
-- | Io | -- -- | Io | --
......
...@@ -11,20 +11,17 @@ Portability : POSIX ...@@ -11,20 +11,17 @@ Portability : POSIX
module Gargantext.Core.Viz.Phylo.SynchronicClustering where module Gargantext.Core.Viz.Phylo.SynchronicClustering where
import Gargantext.Prelude -- import Debug.Trace (trace)
import Gargantext.Core.Viz.AdaptativePhylo
import Gargantext.Core.Viz.Phylo.PhyloTools
import Gargantext.Core.Viz.Phylo.TemporalMatching (weightedLogJaccard', filterDiago, reduceDiagos)
import Gargantext.Core.Viz.Phylo.PhyloExport (processDynamics)
import Data.List ((++), null, intersect, nub, concat, sort, sortOn, groupBy)
import Data.Map (Map, fromList, fromListWith, foldlWithKey, (!), insert, empty, restrictKeys, elems, mapWithKey, member)
import Control.Lens hiding (Level) import Control.Lens hiding (Level)
import Control.Parallel.Strategies (parList, rdeepseq, using)
import Control.Monad (sequence) import Control.Monad (sequence)
-- import Debug.Trace (trace) import Control.Parallel.Strategies (parList, rdeepseq, using)
import Data.List ((++), null, intersect, nub, concat, sort, sortOn, groupBy)
import Data.Map (Map, fromList, fromListWith, foldlWithKey, (!), insert, empty, restrictKeys, elems, mapWithKey, member)
import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.PhyloExport (processDynamics)
import Gargantext.Core.Viz.Phylo.PhyloTools
import Gargantext.Core.Viz.Phylo.TemporalMatching (weightedLogJaccard', filterDiago, reduceDiagos)
import Gargantext.Prelude
import qualified Data.Map as Map import qualified Data.Map as Map
...@@ -32,7 +29,6 @@ import qualified Data.Map as Map ...@@ -32,7 +29,6 @@ import qualified Data.Map as Map
-- | New Level Maker | -- -- | New Level Maker | --
------------------------- -------------------------
mergeGroups :: [Cooc] -> PhyloGroupId -> Map PhyloGroupId PhyloGroupId -> [PhyloGroup] -> PhyloGroup mergeGroups :: [Cooc] -> PhyloGroupId -> Map PhyloGroupId PhyloGroupId -> [PhyloGroup] -> PhyloGroup
mergeGroups coocs id mapIds childs = mergeGroups coocs id mapIds childs =
let ngrams = (sort . nub . concat) $ map _phylo_groupNgrams childs let ngrams = (sort . nub . concat) $ map _phylo_groupNgrams childs
......
...@@ -11,26 +11,21 @@ Portability : POSIX ...@@ -11,26 +11,21 @@ Portability : POSIX
module Gargantext.Core.Viz.Phylo.TemporalMatching where module Gargantext.Core.Viz.Phylo.TemporalMatching where
import Control.Lens hiding (Level)
import Control.Parallel.Strategies (parList, rdeepseq, using)
import Data.List (concat, splitAt, tail, sortOn, (++), intersect, null, inits, groupBy, scanl, nub, nubBy, union, dropWhile, partition, or, sort, (!!)) import Data.List (concat, splitAt, tail, sortOn, (++), intersect, null, inits, groupBy, scanl, nub, nubBy, union, dropWhile, partition, or, sort, (!!))
import Data.Map (Map, fromList, elems, restrictKeys, unionWith, findWithDefault, keys, (!), (!?), filterWithKey, singleton, empty, mapKeys, adjust) import Data.Map (Map, fromList, elems, restrictKeys, unionWith, findWithDefault, keys, (!), (!?), filterWithKey, singleton, empty, mapKeys, adjust)
import Debug.Trace (trace)
import Gargantext.Prelude import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.AdaptativePhylo
import Gargantext.Core.Viz.Phylo.PhyloTools import Gargantext.Core.Viz.Phylo.PhyloTools
import Gargantext.Prelude
import Prelude (floor,tan,pi) import Prelude (floor,tan,pi)
import Control.Lens hiding (Level)
import Control.Parallel.Strategies (parList, rdeepseq, using)
import Debug.Trace (trace)
import Text.Printf import Text.Printf
import qualified Data.Map as Map import qualified Data.Map as Map
import qualified Data.Set as Set import qualified Data.Set as Set
import qualified Data.Vector as Vector import qualified Data.Vector as Vector
------------------- -------------------
-- | Proximity | -- -- | Proximity | --
------------------- -------------------
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment