Commit 85fcd70b authored by Alexandre Delanoë's avatar Alexandre Delanoë

[FEAT][PHYLO] preparing integration to backend

parent 92316028
{-|
Module : Main.hs
Description : Gargantext starter binary with Adaptative Phylo
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Adaptative Phylo binaries
-}
{-# LANGUAGE StandaloneDeriving #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE Strict #-}
module Main where
import Control.Concurrent.Async (mapConcurrently)
import Crypto.Hash.SHA256 (hash)
import Data.Aeson
import Data.Either (Either(..))
import Data.List (concat, nub, isSuffixOf)
import Data.Maybe (fromMaybe)
import Data.String (String)
import GHC.IO (FilePath)
import qualified Prelude as Prelude
import System.Environment
import System.Directory (listDirectory,doesFileExist)
import Data.Text (Text, unwords, unpack, replace, pack)
import Data.Time.Calendar (fromGregorian, diffGregorianDurationClip, cdMonths, diffDays, showGregorian)
import qualified Data.ByteString.Char8 as C8
import qualified Data.ByteString.Lazy as Lazy
import qualified Data.Vector as Vector
import qualified Data.Text as T
import Gargantext.Prelude
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Core.Text.Context (TermList)
import Gargantext.Core.Text.Corpus.Parsers.CSV (csv_title, csv_abstract, csv_publication_year, csv_publication_month, csv_publication_day,
csv'_source, csv'_title, csv'_abstract, csv'_publication_year, csv'_publication_month, csv'_publication_day, csv'_weight)
import qualified Gargantext.Core.Text.Corpus.Parsers.CSV as Csv
import Gargantext.Core.Text.Corpus.Parsers (FileFormat(..),parseFile)
import Gargantext.Core.Text.List.Formats.CSV (csvMapTermList)
import Gargantext.Core.Text.Terms.WithList (Patterns, buildPatterns, extractTermsWithList)
import Gargantext.Core.Viz.AdaptativePhylo
import Gargantext.Core.Viz.Phylo.PhyloMaker (toPhylo, toPhyloStep)
import Gargantext.Core.Viz.Phylo.PhyloTools (printIOMsg, printIOComment, setConfig)
import Gargantext.Core.Viz.Phylo.PhyloExport (toPhyloExport, dotToFile)
-- import Gargantext.API.Ngrams.Prelude (toTermList)
-- import Debug.Trace (trace)
data PhyloStage = PhyloWithCliques | PhyloWithLinks deriving (Show)
---------------
-- | Tools | --
---------------
-- | To get all the files in a directory or just a file
getFilesFromPath :: FilePath -> IO([FilePath])
getFilesFromPath path = do
if (isSuffixOf "/" path)
then (listDirectory path)
else return [path]
---------------
-- | Dates | --
---------------
toMonths :: Integer -> Int -> Int -> Date
toMonths y m d = fromIntegral $ cdMonths
$ diffGregorianDurationClip (fromGregorian y m d) (fromGregorian 0000 0 0)
toDays :: Integer -> Int -> Int -> Date
toDays y m d = fromIntegral
$ diffDays (fromGregorian y m d) (fromGregorian 0000 0 0)
toPhyloDate :: Int -> Int -> Int -> TimeUnit -> Date
toPhyloDate y m d tu = case tu of
Year _ _ _ -> y
Month _ _ _ -> toMonths (Prelude.toInteger y) m d
Week _ _ _ -> div (toDays (Prelude.toInteger y) m d) 7
Day _ _ _ -> toDays (Prelude.toInteger y) m d
toPhyloDate' :: Int -> Int -> Int -> Text
toPhyloDate' y m d = pack $ showGregorian $ fromGregorian (Prelude.toInteger y) m d
--------------
-- | Json | --
--------------
-- | To read and decode a Json file
readJson :: FilePath -> IO Lazy.ByteString
readJson path = Lazy.readFile path
----------------
-- | Parser | --
----------------
-- | To filter the Ngrams of a document based on the termList
termsInText :: Patterns -> Text -> [Text]
termsInText pats txt = nub $ concat $ map (map unwords) $ extractTermsWithList pats txt
-- | To transform a Wos file (or [file]) into a list of Docs
wosToDocs :: Int -> Patterns -> TimeUnit -> FilePath -> IO [Document]
wosToDocs limit patterns time path = do
files <- getFilesFromPath path
let parseFile' file = do
eParsed <- parseFile WOS (path <> file)
case eParsed of
Right ps -> pure ps
Left e -> panic $ "Error: " <> (pack e)
take limit
<$> map (\d -> let title = fromJust $ _hd_title d
abstr = if (isJust $ _hd_abstract d)
then fromJust $ _hd_abstract d
else ""
in Document (toPhyloDate
(fromIntegral $ fromJust $ _hd_publication_year d)
(fromJust $ _hd_publication_month d)
(fromJust $ _hd_publication_day d) time)
(toPhyloDate'
(fromIntegral $ fromJust $ _hd_publication_year d)
(fromJust $ _hd_publication_month d)
(fromJust $ _hd_publication_day d))
(termsInText patterns $ title <> " " <> abstr) Nothing [])
<$> concat
<$> mapConcurrently (\file ->
filter (\d -> (isJust $ _hd_publication_year d)
&& (isJust $ _hd_title d))
<$> parseFile' file) files
-- To transform a Csv file into a list of Document
csvToDocs :: CorpusParser -> Patterns -> TimeUnit -> FilePath -> IO [Document]
csvToDocs parser patterns time path =
case parser of
Wos _ -> undefined
Csv limit -> do
eR <- Csv.readFile path
case eR of
Right r ->
pure $ Vector.toList
$ Vector.take limit
$ Vector.map (\row -> Document (toPhyloDate (Csv.fromMIntOrDec Csv.defaultYear $ csv_publication_year row)
(fromMaybe Csv.defaultMonth $ csv_publication_month row)
(fromMaybe Csv.defaultDay $ csv_publication_day row)
time)
(toPhyloDate' (Csv.fromMIntOrDec Csv.defaultYear $ csv_publication_year row)
(fromMaybe Csv.defaultMonth $ csv_publication_month row)
(fromMaybe Csv.defaultDay $ csv_publication_day row))
(termsInText patterns $ (csv_title row) <> " " <> (csv_abstract row))
Nothing
[]
) $ snd r
Left e -> panic $ "Error: " <> (pack e)
Csv' limit -> Vector.toList
<$> Vector.take limit
<$> Vector.map (\row -> Document (toPhyloDate (csv'_publication_year row) (csv'_publication_month row) (csv'_publication_day row) time)
(toPhyloDate' (csv'_publication_year row) (csv'_publication_month row) (csv'_publication_day row))
(termsInText patterns $ (csv'_title row) <> " " <> (csv'_abstract row))
(Just $ csv'_weight row)
[csv'_source row]
) <$> snd <$> Csv.readWeightedCsv path
-- To parse a file into a list of Document
fileToDocs' :: CorpusParser -> FilePath -> TimeUnit -> TermList -> IO [Document]
fileToDocs' parser path time lst = do
let patterns = buildPatterns lst
case parser of
Wos limit -> wosToDocs limit patterns time path
Csv _ -> csvToDocs parser patterns time path
Csv' _ -> csvToDocs parser patterns time path
---------------
-- | Label | --
---------------
-- Config time parameters to label
timeToLabel :: Config -> [Char]
timeToLabel config = case (timeUnit config) of
Year p s f -> ("time_years" <> "_" <> (show p) <> "_" <> (show s) <> "_" <> (show f))
Month p s f -> ("time_months"<> "_" <> (show p) <> "_" <> (show s) <> "_" <> (show f))
Week p s f -> ("time_weeks" <> "_" <> (show p) <> "_" <> (show s) <> "_" <> (show f))
Day p s f -> ("time_days" <> "_" <> (show p) <> "_" <> (show s) <> "_" <> (show f))
seaToLabel :: Config -> [Char]
seaToLabel config = case (seaElevation config) of
Constante start step -> ("sea_cst_" <> (show start) <> "_" <> (show step))
Adaptative granularity -> ("sea_adapt" <> (show granularity))
sensToLabel :: Config -> [Char]
sensToLabel config = case (phyloProximity config) of
Hamming -> undefined
WeightedLogJaccard s -> ("WeightedLogJaccard_" <> show s)
WeightedLogSim s -> ( "WeightedLogSim-sens_" <> show s)
cliqueToLabel :: Config -> [Char]
cliqueToLabel config = case (clique config) of
Fis s s' -> "fis_" <> (show s) <> "_" <> (show s')
MaxClique s t f -> "clique_" <> (show s)<> "_" <> (show f)<> "_" <> (show t)
syncToLabel :: Config -> [Char]
syncToLabel config = case (phyloSynchrony config) of
ByProximityThreshold scl sync_sens scope _ -> ("scale_" <> (show scope) <> "_" <> (show sync_sens) <> "_" <> (show scl))
ByProximityDistribution _ _ -> undefined
qualToConfig :: Config -> [Char]
qualToConfig config = case (phyloQuality config) of
Quality g m -> "quality_" <> (show g) <> "_" <> (show m)
-- To set up the export file's label from the configuration
configToLabel :: Config -> [Char]
configToLabel config = outputPath config
<> (unpack $ phyloName config)
<> "-" <> (timeToLabel config)
<> "-scale_" <> (show (phyloLevel config))
<> "-" <> (seaToLabel config)
<> "-" <> (sensToLabel config)
<> "-" <> (cliqueToLabel config)
<> "-level_" <> (show (_qua_granularity $ phyloQuality config))
<> "-" <> (syncToLabel config)
<> ".dot"
-- To write a sha256 from a set of config's parameters
configToSha :: PhyloStage -> Config -> [Char]
configToSha stage config = unpack
$ replace "/" "-"
$ T.pack (show (hash $ C8.pack label))
where
label :: [Char]
label = case stage of
PhyloWithCliques -> (corpusPath config)
<> (listPath config)
<> (timeToLabel config)
<> (cliqueToLabel config)
PhyloWithLinks -> (corpusPath config)
<> (listPath config)
<> (timeToLabel config)
<> (cliqueToLabel config)
<> (sensToLabel config)
<> (seaToLabel config)
<> (syncToLabel config)
<> (qualToConfig config)
<> (show (phyloLevel config))
writePhylo :: [Char] -> Phylo -> IO ()
writePhylo path phylo = Lazy.writeFile path $ encode phylo
readPhylo :: [Char] -> IO Phylo
readPhylo path = do
phyloJson <- (eitherDecode <$> readJson path) :: IO (Either String Phylo)
case phyloJson of
Left err -> do
putStrLn err
undefined
Right phylo -> pure phylo
--------------
-- | Main | --
--------------
main :: IO ()
main = do
printIOMsg "Starting the reconstruction"
printIOMsg "Read the configuration file"
[args] <- getArgs
jsonArgs <- (eitherDecode <$> readJson args) :: IO (Either String Config)
case jsonArgs of
Left err -> putStrLn err
Right config -> do
printIOMsg "Parse the corpus"
mapList <- csvMapTermList (listPath config)
corpus <- fileToDocs' (corpusParser config) (corpusPath config) (timeUnit config) mapList
printIOComment (show (length corpus) <> " parsed docs from the corpus")
printIOMsg "Reconstruct the phylo"
let phyloWithCliquesFile = (outputPath config) <> "phyloWithCliques_" <> (configToSha PhyloWithCliques config) <> ".json"
let phyloWithLinksFile = (outputPath config) <> "phyloWithLinks_" <> (configToSha PhyloWithLinks config) <> ".json"
phyloWithCliquesExists <- doesFileExist phyloWithCliquesFile
phyloWithLinksExists <- doesFileExist phyloWithLinksFile
-- phyloStep <- if phyloWithCliquesExists
-- then do
-- printIOMsg "Reconstruct the phylo step from an existing file"
-- readPhylo phyloWithCliquesFile
-- else do
-- printIOMsg "Reconstruct the phylo step from scratch"
-- pure $ toPhyloStep corpus mapList config
-- writePhylo phyloWithCliquesFile phyloStep
-- let phylo = toPhylo (setConfig config phyloStep)
phyloWithLinks <- if phyloWithLinksExists
then do
printIOMsg "Reconstruct the phylo from an existing file with intertemporal links"
readPhylo phyloWithLinksFile
else do
if phyloWithCliquesExists
then do
printIOMsg "Reconstruct the phylo from an existing file with cliques"
phyloWithCliques <- readPhylo phyloWithCliquesFile
writePhylo phyloWithCliquesFile phyloWithCliques
pure $ toPhylo (setConfig config phyloWithCliques)
else do
printIOMsg "Reconstruct the phylo from scratch"
phyloWithCliques <- pure $ toPhyloStep corpus mapList config
writePhylo phyloWithCliquesFile phyloWithCliques
pure $ toPhylo (setConfig config phyloWithCliques)
writePhylo phyloWithLinksFile phyloWithLinks
-- probes
-- writeFile ((outputPath config) <> (unpack $ phyloName config) <> "_synchronic_distance_cumu_jaccard.txt")
-- $ synchronicDistance' phylo 1
-- writeFile ((outputPath config) <> (unpack $ phyloName config) <> "_inflexion_points.txt")
-- $ inflexionPoints phylo 1
printIOMsg "End of reconstruction, start the export"
let dot = toPhyloExport (setConfig config phyloWithLinks)
let output = configToLabel config
dotToFile output dot
{-| {-|
Module : Main.hs Module : Main.hs
Description : Gargantext starter binary with Phylo Description : Gargantext starter binary with Adaptative Phylo
Copyright : (c) CNRS, 2017-Present Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3 License : AGPL + CECILL v3
Maintainer : team@gargantext.org Maintainer : team@gargantext.org
Stability : experimental Stability : experimental
Portability : POSIX Portability : POSIX
Phylo binaries Adaptative Phylo binaries
-} -}
{-# LANGUAGE StandaloneDeriving #-} {-# LANGUAGE StandaloneDeriving #-}
...@@ -17,153 +16,267 @@ Phylo binaries ...@@ -17,153 +16,267 @@ Phylo binaries
module Main where module Main where
import Control.Concurrent.Async as CCA (mapConcurrently) import Control.Concurrent.Async (mapConcurrently)
import Crypto.Hash.SHA256 (hash)
import Data.Aeson import Data.Aeson
import Data.List ((++),concat) import Data.Either (Either(..))
import Data.Maybe import Data.List (concat, nub, isSuffixOf)
import Data.Text (Text, unwords) import Data.Maybe (fromMaybe)
import GHC.Generics import Data.String (String)
import GHC.IO (FilePath) import GHC.IO (FilePath)
import System.Directory (doesFileExist) import qualified Prelude as Prelude
import System.Environment import System.Environment
import qualified Data.ByteString.Lazy as L import System.Directory (listDirectory,doesFileExist)
import qualified Data.List as DL import Data.Text (Text, unwords, unpack, replace, pack)
import qualified Data.Map as DM import Data.Time.Calendar (fromGregorian, diffGregorianDurationClip, cdMonths, diffDays, showGregorian)
import qualified Data.Text as DT
import qualified Data.Vector as DV import qualified Data.ByteString.Char8 as C8
import qualified Gargantext.Core.Text.Corpus.Parsers.CSV as CSV import qualified Data.ByteString.Lazy as Lazy
import qualified Prelude as P import qualified Data.Vector as Vector
import qualified Data.Text as T
import Gargantext.Database.Admin.Types.Hyperdata
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Database.Admin.Types.Hyperdata (HyperdataDocument(..))
import Gargantext.Core.Text.Context (TermList) import Gargantext.Core.Text.Context (TermList)
import Gargantext.Core.Text.Corpus.Parsers.CSV (csv_title, csv_abstract, csv_publication_year, csv_publication_month, csv_publication_day,
csv'_source, csv'_title, csv'_abstract, csv'_publication_year, csv'_publication_month, csv'_publication_day, csv'_weight)
import qualified Gargantext.Core.Text.Corpus.Parsers.CSV as Csv
import Gargantext.Core.Text.Corpus.Parsers (FileFormat(..),parseFile) import Gargantext.Core.Text.Corpus.Parsers (FileFormat(..),parseFile)
import Gargantext.Core.Text.Corpus.Parsers.CSV (csv_title, csv_abstract, csv_publication_year)
import Gargantext.Core.Text.List.Formats.CSV (csvMapTermList) import Gargantext.Core.Text.List.Formats.CSV (csvMapTermList)
import Gargantext.Core.Text.Terms.WithList import Gargantext.Core.Text.Terms.WithList (Patterns, buildPatterns, extractTermsWithList)
import Gargantext.Core.Viz.Phylo import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.LevelMaker import Gargantext.Core.Viz.Phylo.PhyloMaker (toPhylo, toPhyloStep)
import Gargantext.Core.Viz.Phylo.Tools import Gargantext.Core.Viz.Phylo.PhyloTools (printIOMsg, printIOComment, setConfig)
import Gargantext.Core.Viz.Phylo.View.Export import Gargantext.Core.Viz.Phylo.PhyloExport (toPhyloExport, dotToFile)
import Gargantext.Core.Viz.Phylo.View.ViewMaker -- import Gargantext.API.Ngrams.Prelude (toTermList)
-- import Debug.Trace (trace)
data PhyloStage = PhyloWithCliques | PhyloWithLinks deriving (Show)
---------------
-- | Tools | --
---------------
-- | To get all the files in a directory or just a file
getFilesFromPath :: FilePath -> IO([FilePath])
getFilesFromPath path = do
if (isSuffixOf "/" path)
then (listDirectory path)
else return [path]
---------------
-- | Dates | --
---------------
toMonths :: Integer -> Int -> Int -> Date
toMonths y m d = fromIntegral $ cdMonths
$ diffGregorianDurationClip (fromGregorian y m d) (fromGregorian 0000 0 0)
toDays :: Integer -> Int -> Int -> Date
toDays y m d = fromIntegral
$ diffDays (fromGregorian y m d) (fromGregorian 0000 0 0)
toPhyloDate :: Int -> Int -> Int -> TimeUnit -> Date
toPhyloDate y m d tu = case tu of
Year _ _ _ -> y
Month _ _ _ -> toMonths (Prelude.toInteger y) m d
Week _ _ _ -> div (toDays (Prelude.toInteger y) m d) 7
Day _ _ _ -> toDays (Prelude.toInteger y) m d
-- Function to use in Database export
toPhyloDate' :: Int -> Int -> Int -> Text
toPhyloDate' y m d = pack $ showGregorian $ fromGregorian (Prelude.toInteger y) m d
-------------- --------------
-- | Conf | -- -- | Json | --
-------------- --------------
type ListPath = FilePath -- | To read and decode a Json file
type FisPath = FilePath readJson :: FilePath -> IO Lazy.ByteString
type CorpusPath = FilePath readJson path = Lazy.readFile path
data CorpusType = Wos | Csv deriving (Show,Generic)
type Limit = Int
----------------
data Conf = -- | Parser | --
Conf { corpusPath :: CorpusPath ----------------
, corpusType :: CorpusType
, listPath :: ListPath -- | To filter the Ngrams of a document based on the termList
, fisPath :: FilePath termsInText :: Patterns -> Text -> [Text]
, outputPath :: FilePath termsInText pats txt = nub $ concat $ map (map unwords) $ extractTermsWithList pats txt
, phyloName :: Text
, limit :: Limit
, timeGrain :: Int -- | To transform a Wos file (or [file]) into a list of Docs
, timeStep :: Int wosToDocs :: Int -> Patterns -> TimeUnit -> FilePath -> IO [Document]
, timeFrame :: Int wosToDocs limit patterns time path = do
, timeFrameTh :: Double files <- getFilesFromPath path
, timeTh :: Double let parseFile' file = do
, timeSens :: Double eParsed <- parseFile WOS (path <> file)
, reBranchThr :: Double case eParsed of
, reBranchNth :: Int Right ps -> pure ps
, clusterTh :: Double Left e -> panic $ "Error: " <> (pack e)
, clusterSens :: Double take limit
, phyloLevel :: Int <$> map (\d -> let title = fromJust $ _hd_title d
, viewLevel :: Int abstr = if (isJust $ _hd_abstract d)
, fisSupport :: Int then fromJust $ _hd_abstract d
, fisClique :: Int else ""
, minSizeBranch :: Int in Document (toPhyloDate
} deriving (Show,Generic) (fromIntegral $ fromJust $ _hd_publication_year d)
(fromJust $ _hd_publication_month d)
instance FromJSON Conf (fromJust $ _hd_publication_day d) time)
instance ToJSON Conf (toPhyloDate'
(fromIntegral $ fromJust $ _hd_publication_year d)
instance FromJSON CorpusType (fromJust $ _hd_publication_month d)
instance ToJSON CorpusType (fromJust $ _hd_publication_day d))
(termsInText patterns $ title <> " " <> abstr) Nothing [])
<$> concat
decoder :: P.Either a b -> b <$> mapConcurrently (\file ->
decoder (P.Left _) = P.error "Error" filter (\d -> (isJust $ _hd_publication_year d)
decoder (P.Right x) = x && (isJust $ _hd_title d))
<$> parseFile' file) files
-- | Get the conf from a Json file
getJson :: FilePath -> IO L.ByteString
getJson path = L.readFile path -- To transform a Csv file into a list of Document
csvToDocs :: CorpusParser -> Patterns -> TimeUnit -> FilePath -> IO [Document]
csvToDocs parser patterns time path =
case parser of
Wos _ -> undefined
Csv limit -> do
eR <- Csv.readFile path
case eR of
Right r ->
pure $ Vector.toList
$ Vector.take limit
$ Vector.map (\row -> Document (toPhyloDate (Csv.fromMIntOrDec Csv.defaultYear $ csv_publication_year row)
(fromMaybe Csv.defaultMonth $ csv_publication_month row)
(fromMaybe Csv.defaultDay $ csv_publication_day row)
time)
(toPhyloDate' (Csv.fromMIntOrDec Csv.defaultYear $ csv_publication_year row)
(fromMaybe Csv.defaultMonth $ csv_publication_month row)
(fromMaybe Csv.defaultDay $ csv_publication_day row))
(termsInText patterns $ (csv_title row) <> " " <> (csv_abstract row))
Nothing
[]
) $ snd r
Left e -> panic $ "Error: " <> (pack e)
Csv' limit -> Vector.toList
<$> Vector.take limit
<$> Vector.map (\row -> Document (toPhyloDate (csv'_publication_year row) (csv'_publication_month row) (csv'_publication_day row) time)
(toPhyloDate' (csv'_publication_year row) (csv'_publication_month row) (csv'_publication_day row))
(termsInText patterns $ (csv'_title row) <> " " <> (csv'_abstract row))
(Just $ csv'_weight row)
[csv'_source row]
) <$> snd <$> Csv.readWeightedCsv path
-- To parse a file into a list of Document
fileToDocs' :: CorpusParser -> FilePath -> TimeUnit -> TermList -> IO [Document]
fileToDocs' parser path time lst = do
let patterns = buildPatterns lst
case parser of
Wos limit -> wosToDocs limit patterns time path
Csv _ -> csvToDocs parser patterns time path
Csv' _ -> csvToDocs parser patterns time path
--------------- ---------------
-- | Parse | -- -- | Label | --
--------------- ---------------
-- | To filter the Ngrams of a document based on the termList
filterTerms :: Patterns -> (a, Text) -> (a, [Text]) -- Config time parameters to label
filterTerms patterns (y,d) = (y,termsInText patterns d) timeToLabel :: Config -> [Char]
timeToLabel config = case (timeUnit config) of
Year p s f -> ("time_years" <> "_" <> (show p) <> "_" <> (show s) <> "_" <> (show f))
-- | To transform a Csv nfile into a readable corpus Month p s f -> ("time_months"<> "_" <> (show p) <> "_" <> (show s) <> "_" <> (show f))
csvToCorpus :: Limit -> CorpusPath -> IO ([(Int,Text)]) Week p s f -> ("time_weeks" <> "_" <> (show p) <> "_" <> (show s) <> "_" <> (show f))
csvToCorpus limit csv = DV.toList Day p s f -> ("time_days" <> "_" <> (show p) <> "_" <> (show s) <> "_" <> (show f))
-- . DV.reverse
. DV.take limit
-- . DV.reverse seaToLabel :: Config -> [Char]
. DV.map (\n -> (csv_publication_year n, (csv_title n) <> " " <> (csv_abstract n))) seaToLabel config = case (seaElevation config) of
. snd <$> CSV.readFile csv Constante start step -> ("sea_cst_" <> (show start) <> "_" <> (show step))
Adaptative granularity -> ("sea_adapt" <> (show granularity))
-- | To transform a Wos nfile into a readable corpus
wosToCorpus :: Limit -> CorpusPath -> IO ([(Int,Text)]) sensToLabel :: Config -> [Char]
wosToCorpus limit path = DL.take limit sensToLabel config = case (phyloProximity config) of
. map (\d -> ((fromJust $_hd_publication_year d) Hamming -> undefined
,(fromJust $_hd_title d) <> " " <> (fromJust $_hd_abstract d))) WeightedLogJaccard s -> ("WeightedLogJaccard_" <> show s)
. filter (\d -> (isJust $_hd_publication_year d) WeightedLogSim s -> ( "WeightedLogSim-sens_" <> show s)
&& (isJust $_hd_title d)
&& (isJust $_hd_abstract d))
. concat cliqueToLabel :: Config -> [Char]
<$> mapConcurrently (\idx -> parseFile WOS (path <> show(idx) <> ".txt")) [1..20] cliqueToLabel config = case (clique config) of
Fis s s' -> "fis_" <> (show s) <> "_" <> (show s')
MaxClique s t f -> "clique_" <> (show s)<> "_" <> (show f)<> "_" <> (show t)
-- | To use the correct parser given a CorpusType
fileToCorpus :: CorpusType -> Limit -> CorpusPath -> IO ([(Int,Text)])
fileToCorpus format limit path = case format of syncToLabel :: Config -> [Char]
Wos -> wosToCorpus limit path syncToLabel config = case (phyloSynchrony config) of
Csv -> csvToCorpus limit path ByProximityThreshold scl sync_sens scope _ -> ("scale_" <> (show scope) <> "_" <> (show sync_sens) <> "_" <> (show scl))
ByProximityDistribution _ _ -> undefined
-- | To parse a file into a list of Document qualToConfig :: Config -> [Char]
parse :: CorpusType -> Limit -> CorpusPath -> TermList -> IO [Document] qualToConfig config = case (phyloQuality config) of
parse format limit path l = do Quality g m -> "quality_" <> (show g) <> "_" <> (show m)
corpus <- fileToCorpus format limit path
let patterns = buildPatterns l
pure $ map ( (\(y,t) -> Document y t) . filterTerms patterns) corpus -- To set up the export file's label from the configuration
configToLabel :: Config -> [Char]
configToLabel config = outputPath config
-- | To parse an existing Fis file <> (unpack $ phyloName config)
parseFis :: FisPath -> Text -> Int -> Int -> Int -> Int -> IO [PhyloFis] <> "-" <> (timeToLabel config)
parseFis path name grain step support clique = do <> "-scale_" <> (show (phyloLevel config))
fisExists <- doesFileExist (path <> (DT.unpack name) <> "_" <> show(grain) <> "_" <> show(step) <> "_" <> show(support) <> "_" <> show(clique) <> ".json") <> "-" <> (seaToLabel config)
if fisExists <> "-" <> (sensToLabel config)
then do <> "-" <> (cliqueToLabel config)
fisJson <- (eitherDecode <$> getJson (path <> (DT.unpack name) <> "_" <> show(grain) <> "_" <> show(step) <> "_" <> show(support) <> "_" <> show(clique) <> ".json")) :: IO (P.Either P.String [PhyloFis]) <> "-level_" <> (show (_qua_granularity $ phyloQuality config))
case fisJson of <> "-" <> (syncToLabel config)
P.Left err -> do <> ".dot"
putStrLn err
pure []
P.Right fis -> pure fis -- To write a sha256 from a set of config's parameters
else pure [] configToSha :: PhyloStage -> Config -> [Char]
configToSha stage config = unpack
writeFis :: FisPath -> Text -> Int -> Int -> Int -> Int -> DM.Map (Date,Date) [PhyloFis] -> IO () $ replace "/" "-"
writeFis path name grain step support clique fis = do $ T.pack (show (hash $ C8.pack label))
let fisPath = path <> (DT.unpack name) <> "_" <> show(grain) <> "_" <> show(step) <> "_" <> show(support) <> "_" <> show(clique) <> ".json" where
L.writeFile fisPath $ encode (DL.concat $ DM.elems fis) label :: [Char]
label = case stage of
PhyloWithCliques -> (corpusPath config)
<> (listPath config)
<> (timeToLabel config)
<> (cliqueToLabel config)
PhyloWithLinks -> (corpusPath config)
<> (listPath config)
<> (timeToLabel config)
<> (cliqueToLabel config)
<> (sensToLabel config)
<> (seaToLabel config)
<> (syncToLabel config)
<> (qualToConfig config)
<> (show (phyloLevel config))
writePhylo :: [Char] -> Phylo -> IO ()
writePhylo path phylo = Lazy.writeFile path $ encode phylo
readPhylo :: [Char] -> IO Phylo
readPhylo path = do
phyloJson <- (eitherDecode <$> readJson path) :: IO (Either String Phylo)
case phyloJson of
Left err -> do
putStrLn err
undefined
Right phylo -> pure phylo
-------------- --------------
-- | Main | -- -- | Main | --
...@@ -171,49 +284,76 @@ writeFis path name grain step support clique fis = do ...@@ -171,49 +284,76 @@ writeFis path name grain step support clique fis = do
main :: IO () main :: IO ()
main = do main = do
printIOMsg "Starting the reconstruction"
printIOMsg "Read the configuration file"
[args] <- getArgs
jsonArgs <- (eitherDecode <$> readJson args) :: IO (Either String Config)
case jsonArgs of
Left err -> putStrLn err
Right config -> do
printIOMsg "Parse the corpus"
mapList <- csvMapTermList (listPath config)
corpus <- fileToDocs' (corpusParser config) (corpusPath config) (timeUnit config) mapList
printIOComment (show (length corpus) <> " parsed docs from the corpus")
[jsonPath] <- getArgs printIOMsg "Reconstruct the phylo"
confJson <- (eitherDecode <$> getJson jsonPath) :: IO (P.Either P.String Conf) let phyloWithCliquesFile = (outputPath config) <> "phyloWithCliques_" <> (configToSha PhyloWithCliques config) <> ".json"
let phyloWithLinksFile = (outputPath config) <> "phyloWithLinks_" <> (configToSha PhyloWithLinks config) <> ".json"
case confJson of phyloWithCliquesExists <- doesFileExist phyloWithCliquesFile
P.Left err -> putStrLn err phyloWithLinksExists <- doesFileExist phyloWithLinksFile
P.Right conf -> do
termList <- csvMapTermList (listPath conf) -- phyloStep <- if phyloWithCliquesExists
-- then do
-- printIOMsg "Reconstruct the phylo step from an existing file"
-- readPhylo phyloWithCliquesFile
-- else do
-- printIOMsg "Reconstruct the phylo step from scratch"
-- pure $ toPhyloStep corpus mapList config
corpus <- parse (corpusType conf) (limit conf) (corpusPath conf) termList -- writePhylo phyloWithCliquesFile phyloStep
putStrLn $ ("\n" <> show (length corpus) <> " parsed docs") -- let phylo = toPhylo (setConfig config phyloStep)
fis <- parseFis (fisPath conf) (phyloName conf) (timeGrain conf) (timeStep conf) (fisSupport conf) (fisClique conf) -- QL: 2 files read from disk
phyloWithLinks <- if phyloWithLinksExists
then do
printIOMsg "Reconstruct the phylo from an existing file with intertemporal links"
readPhylo phyloWithLinksFile
else do
if phyloWithCliquesExists
then do
printIOMsg "Reconstruct the phylo from an existing file with cliques"
phyloWithCliques <- readPhylo phyloWithCliquesFile
writePhylo phyloWithCliquesFile phyloWithCliques
pure $ toPhylo (setConfig config phyloWithCliques)
else do
printIOMsg "Reconstruct the phylo from scratch"
phyloWithCliques <- pure $ toPhyloStep corpus mapList config
writePhylo phyloWithCliquesFile phyloWithCliques
pure $ toPhylo (setConfig config phyloWithCliques)
putStrLn $ ("\n" <> show (length fis) <> " parsed fis") writePhylo phyloWithLinksFile phyloWithLinks
let fis' = DM.fromListWith (++) $ DL.sortOn (fst . fst) $ map (\f -> (getFisPeriod f,[f])) fis
let query = PhyloQueryBuild (phyloName conf) "" (timeGrain conf) (timeStep conf)
(Fis $ FisParams True (fisSupport conf) (fisClique conf)) [] [] (WeightedLogJaccard $ WLJParams (timeTh conf) (timeSens conf)) (timeFrame conf) (timeFrameTh conf)
(reBranchThr conf) (reBranchNth conf) (phyloLevel conf)
(RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams (clusterTh conf) (clusterSens conf))
let queryView = PhyloQueryView (viewLevel conf) Merge False 1 [BranchAge,BranchBirth,BranchGroups] [SizeBranch $ SBParams (minSizeBranch conf)] [GroupLabelIncDyn,BranchPeakInc] (Just (ByBranchBirth,Asc)) Json Flat True -- probes
let phylo = toPhylo query corpus termList fis' -- writeFile ((outputPath config) <> (unpack $ phyloName config) <> "_synchronic_distance_cumu_jaccard.txt")
-- $ synchronicDistance' phylo 1
writeFis (fisPath conf) (phyloName conf) (timeGrain conf) (timeStep conf) (fisSupport conf) (fisClique conf) (getPhyloFis phylo) -- writeFile ((outputPath config) <> (unpack $ phyloName config) <> "_inflexion_points.txt")
-- $ inflexionPoints phylo 1
let view = toPhyloView queryView phylo printIOMsg "End of reconstruction, start the export"
putStrLn $ ("phylo completed until level " <> show (phyloLevel conf) <> ", export at level " <> show (viewLevel conf)) let dot = toPhyloExport (setConfig config phyloWithLinks)
let outputFile = (outputPath conf) <> (DT.unpack $ phyloName conf) let output = configToLabel config
<> "_" <> show (limit conf) <> "_"
<> "_" <> show (timeTh conf) <> "_"
<> "_" <> show (timeSens conf) <> "_"
<> "_" <> show (clusterTh conf) <> "_"
<> "_" <> show (clusterSens conf)
<> ".dot"
P.writeFile outputFile $ dotToString $ viewToDot view dotToFile output dot
...@@ -100,7 +100,7 @@ library: ...@@ -100,7 +100,7 @@ library:
- Gargantext.Core.Viz.Graph.Tools - Gargantext.Core.Viz.Graph.Tools
- Gargantext.Core.Viz.Graph.Tools.IGraph - Gargantext.Core.Viz.Graph.Tools.IGraph
- Gargantext.Core.Viz.Graph.Index - Gargantext.Core.Viz.Graph.Index
- Gargantext.Core.Viz.AdaptativePhylo - Gargantext.Core.Viz.Phylo
- Gargantext.Core.Viz.Phylo.PhyloMaker - Gargantext.Core.Viz.Phylo.PhyloMaker
- Gargantext.Core.Viz.Phylo.PhyloTools - Gargantext.Core.Viz.Phylo.PhyloTools
- Gargantext.Core.Viz.Phylo.PhyloExport - Gargantext.Core.Viz.Phylo.PhyloExport
...@@ -322,9 +322,9 @@ executables: ...@@ -322,9 +322,9 @@ executables:
- unordered-containers - unordered-containers
- full-text-search - full-text-search
gargantext-adaptative-phylo: gargantext-phylo:
main: Main.hs main: Main.hs
source-dirs: bin/gargantext-adaptative-phylo source-dirs: bin/gargantext-phylo
ghc-options: ghc-options:
- -threaded - -threaded
- -rtsopts - -rtsopts
......
...@@ -64,7 +64,7 @@ import Gargantext.Core.Viz.Graph.FGL (Graph_Undirected, degree, neighbors, mkGra ...@@ -64,7 +64,7 @@ import Gargantext.Core.Viz.Graph.FGL (Graph_Undirected, degree, neighbors, mkGra
import Gargantext.Core.Viz.Graph.Tools (cooc2graph',cooc2graph'', Threshold) import Gargantext.Core.Viz.Graph.Tools (cooc2graph',cooc2graph'', Threshold)
import Gargantext.Core.Methods.Distances (Distance) import Gargantext.Core.Methods.Distances (Distance)
import Gargantext.Core.Viz.Graph.Index (createIndices, toIndex) import Gargantext.Core.Viz.Graph.Index (createIndices, toIndex)
import Gargantext.Core.Viz.AdaptativePhylo import Gargantext.Core.Viz.Phylo
-- import Debug.Trace (trace) -- import Debug.Trace (trace)
type Graph = Graph_Undirected type Graph = Graph_Undirected
type Neighbor = Node type Neighbor = Node
......
...@@ -24,47 +24,42 @@ one 8, e54847. ...@@ -24,47 +24,42 @@ one 8, e54847.
{-# LANGUAGE DeriveAnyClass #-} {-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE TemplateHaskell #-}
module Gargantext.Core.Viz.AdaptativePhylo where module Gargantext.Core.Viz.Phylo where
import Control.DeepSeq (NFData)
import Control.Lens (makeLenses)
import Data.Aeson import Data.Aeson
import Data.Aeson.TH (deriveJSON) import Data.Aeson.TH (deriveJSON)
import Data.Map (Map)
import Data.Text (Text, pack) import Data.Text (Text, pack)
import Data.Vector (Vector) import Data.Vector (Vector)
import Data.Map (Map)
import Gargantext.Core.Utils.Prefix (unPrefix)
import Gargantext.Prelude
import Gargantext.Core.Text.Context (TermList)
import GHC.Generics import GHC.Generics
import GHC.IO (FilePath) import GHC.IO (FilePath)
import Control.DeepSeq (NFData) import Gargantext.Core.Text.Context (TermList)
import Control.Lens (makeLenses) import Gargantext.Core.Utils.Prefix (unPrefix)
import Gargantext.Prelude
import qualified Data.Text.Lazy as TextLazy import qualified Data.Text.Lazy as TextLazy
---------------- ----------------
-- | Config | -- -- | Config | --
---------------- ----------------
data CorpusParser = data CorpusParser =
Wos {_wos_limit :: Int} Wos {_wos_limit :: Int}
| Csv {_csv_limit :: Int} | Csv {_csv_limit :: Int}
| Csv' {_csv'_limit :: Int} | Csv' {_csv'_limit :: Int}
deriving (Show,Generic,Eq) deriving (Show,Generic,Eq)
data SeaElevation = data SeaElevation =
Constante Constante
{ _cons_start :: Double { _cons_start :: Double
, _cons_step :: Double } , _cons_step :: Double }
| Adaptative | Adaptative
{ _adap_granularity :: Double } { _adap_granularity :: Double }
deriving (Show,Generic,Eq) deriving (Show,Generic,Eq)
data Proximity = data Proximity =
WeightedLogJaccard WeightedLogJaccard
{ _wlj_sensibility :: Double { _wlj_sensibility :: Double
{- {-
-- , _wlj_thresholdInit :: Double -- , _wlj_thresholdInit :: Double
...@@ -73,7 +68,7 @@ data Proximity = ...@@ -73,7 +68,7 @@ data Proximity =
-- , _wlj_elevation :: Double -- , _wlj_elevation :: Double
-} -}
} }
| WeightedLogSim | WeightedLogSim
{ _wlj_sensibility :: Double { _wlj_sensibility :: Double
{- {-
-- , _wlj_thresholdInit :: Double -- , _wlj_thresholdInit :: Double
...@@ -81,66 +76,66 @@ data Proximity = ...@@ -81,66 +76,66 @@ data Proximity =
-- | max height for sea level in temporal matching -- | max height for sea level in temporal matching
-- , _wlj_elevation :: Double -- , _wlj_elevation :: Double
-} -}
} }
| Hamming | Hamming
deriving (Show,Generic,Eq) deriving (Show,Generic,Eq)
data SynchronyScope = SingleBranch | SiblingBranches | AllBranches deriving (Show,Generic,Eq) data SynchronyScope = SingleBranch | SiblingBranches | AllBranches deriving (Show,Generic,Eq)
data SynchronyStrategy = MergeRegularGroups | MergeAllGroups deriving (Show,Generic,Eq) data SynchronyStrategy = MergeRegularGroups | MergeAllGroups deriving (Show,Generic,Eq)
data Synchrony = data Synchrony =
ByProximityThreshold ByProximityThreshold
{ _bpt_threshold :: Double { _bpt_threshold :: Double
, _bpt_sensibility :: Double , _bpt_sensibility :: Double
, _bpt_scope :: SynchronyScope , _bpt_scope :: SynchronyScope
, _bpt_strategy :: SynchronyStrategy } , _bpt_strategy :: SynchronyStrategy }
| ByProximityDistribution | ByProximityDistribution
{ _bpd_sensibility :: Double { _bpd_sensibility :: Double
, _bpd_strategy :: SynchronyStrategy } , _bpd_strategy :: SynchronyStrategy }
deriving (Show,Generic,Eq) deriving (Show,Generic,Eq)
data TimeUnit = data TimeUnit =
Year Year
{ _year_period :: Int { _year_period :: Int
, _year_step :: Int , _year_step :: Int
, _year_matchingFrame :: Int } , _year_matchingFrame :: Int }
| Month | Month
{ _month_period :: Int { _month_period :: Int
, _month_step :: Int , _month_step :: Int
, _month_matchingFrame :: Int } , _month_matchingFrame :: Int }
| Week | Week
{ _week_period :: Int { _week_period :: Int
, _week_step :: Int , _week_step :: Int
, _week_matchingFrame :: Int } , _week_matchingFrame :: Int }
| Day | Day
{ _day_period :: Int { _day_period :: Int
, _day_step :: Int , _day_step :: Int
, _day_matchingFrame :: Int } , _day_matchingFrame :: Int }
deriving (Show,Generic,Eq) deriving (Show,Generic,Eq)
data CliqueFilter = ByThreshold | ByNeighbours deriving (Show,Generic,Eq) data CliqueFilter = ByThreshold | ByNeighbours deriving (Show,Generic,Eq)
data Clique = data Clique =
Fis Fis
{ _fis_support :: Int { _fis_support :: Int
, _fis_size :: Int } , _fis_size :: Int }
| MaxClique | MaxClique
{ _mcl_size :: Int { _mcl_size :: Int
, _mcl_threshold :: Double , _mcl_threshold :: Double
, _mcl_filter :: CliqueFilter } , _mcl_filter :: CliqueFilter }
deriving (Show,Generic,Eq) deriving (Show,Generic,Eq)
data Quality = data Quality =
Quality { _qua_granularity :: Double Quality { _qua_granularity :: Double
, _qua_minBranch :: Int } , _qua_minBranch :: Int }
deriving (Show,Generic,Eq) deriving (Show,Generic,Eq)
data Config = data Config =
Config { corpusPath :: FilePath Config { corpusPath :: FilePath
, listPath :: FilePath , listPath :: FilePath
, outputPath :: FilePath , outputPath :: FilePath
...@@ -156,12 +151,12 @@ data Config = ...@@ -156,12 +151,12 @@ data Config =
, clique :: Clique , clique :: Clique
, exportLabel :: [PhyloLabel] , exportLabel :: [PhyloLabel]
, exportSort :: Sort , exportSort :: Sort
, exportFilter :: [Filter] , exportFilter :: [Filter]
} deriving (Show,Generic,Eq) } deriving (Show,Generic,Eq)
defaultConfig :: Config defaultConfig :: Config
defaultConfig = defaultConfig =
Config { corpusPath = "" Config { corpusPath = ""
, listPath = "" , listPath = ""
, outputPath = "" , outputPath = ""
...@@ -177,39 +172,54 @@ defaultConfig = ...@@ -177,39 +172,54 @@ defaultConfig =
, clique = MaxClique 0 3 ByNeighbours , clique = MaxClique 0 3 ByNeighbours
, exportLabel = [BranchLabel MostEmergentTfIdf 2, GroupLabel MostEmergentInclusive 2] , exportLabel = [BranchLabel MostEmergentTfIdf 2, GroupLabel MostEmergentInclusive 2]
, exportSort = ByHierarchy , exportSort = ByHierarchy
, exportFilter = [ByBranchSize 2] , exportFilter = [ByBranchSize 2]
} }
instance FromJSON Config instance FromJSON Config
instance ToJSON Config instance ToJSON Config
instance FromJSON CorpusParser instance FromJSON CorpusParser
instance ToJSON CorpusParser instance ToJSON CorpusParser
instance FromJSON Proximity instance FromJSON Proximity
instance ToJSON Proximity instance ToJSON Proximity
instance FromJSON SeaElevation instance FromJSON SeaElevation
instance ToJSON SeaElevation instance ToJSON SeaElevation
instance FromJSON TimeUnit instance FromJSON TimeUnit
instance ToJSON TimeUnit instance ToJSON TimeUnit
instance FromJSON CliqueFilter instance FromJSON CliqueFilter
instance ToJSON CliqueFilter instance ToJSON CliqueFilter
instance FromJSON Clique instance FromJSON Clique
instance ToJSON Clique instance ToJSON Clique
instance FromJSON PhyloLabel instance FromJSON PhyloLabel
instance ToJSON PhyloLabel instance ToJSON PhyloLabel
instance FromJSON Tagger instance FromJSON Tagger
instance ToJSON Tagger instance ToJSON Tagger
instance FromJSON Sort instance FromJSON Sort
instance ToJSON Sort instance ToJSON Sort
instance FromJSON Order instance FromJSON Order
instance ToJSON Order instance ToJSON Order
instance FromJSON Filter instance FromJSON Filter
instance ToJSON Filter instance ToJSON Filter
instance FromJSON SynchronyScope instance FromJSON SynchronyScope
instance ToJSON SynchronyScope instance ToJSON SynchronyScope
instance FromJSON SynchronyStrategy instance FromJSON SynchronyStrategy
instance ToJSON SynchronyStrategy instance ToJSON SynchronyStrategy
instance FromJSON Synchrony instance FromJSON Synchrony
instance ToJSON Synchrony instance ToJSON Synchrony
instance FromJSON Quality instance FromJSON Quality
instance ToJSON Quality instance ToJSON Quality
...@@ -221,7 +231,7 @@ data Software = ...@@ -221,7 +231,7 @@ data Software =
} deriving (Generic, Show, Eq) } deriving (Generic, Show, Eq)
defaultSoftware :: Software defaultSoftware :: Software
defaultSoftware = defaultSoftware =
Software { _software_name = pack "Gargantext" Software { _software_name = pack "Gargantext"
, _software_version = pack "v4" } , _software_version = pack "v4" }
...@@ -252,13 +262,14 @@ type Ngrams = Text ...@@ -252,13 +262,14 @@ type Ngrams = Text
-- Document : a piece of Text linked to a Date -- Document : a piece of Text linked to a Date
-- date = computational date; date' = original string date yyyy-mm-dd -- date = computational date; date' = original string date yyyy-mm-dd
-- Export Database to Document
data Document = Document data Document = Document
{ date :: Date { date :: Date -- datatype Date {unDate :: Int}
, date' :: Text , date' :: Text -- show date
, text :: [Ngrams] , text :: [Ngrams]
, weight :: Maybe Double , weight :: Maybe Double
, sources :: [Text] , sources :: [Text]
} deriving (Eq,Show,Generic,NFData) } deriving (Eq,Show,Generic,NFData)
-------------------- --------------------
...@@ -266,7 +277,7 @@ data Document = Document ...@@ -266,7 +277,7 @@ data Document = Document
-------------------- --------------------
-- | The Foundations of a Phylo created from a given TermList -- | The Foundations of a Phylo created from a given TermList
data PhyloFoundations = PhyloFoundations data PhyloFoundations = PhyloFoundations
{ _foundations_roots :: !(Vector Ngrams) { _foundations_roots :: !(Vector Ngrams)
, _foundations_mapList :: TermList , _foundations_mapList :: TermList
...@@ -303,8 +314,8 @@ data Phylo = ...@@ -303,8 +314,8 @@ data Phylo =
, _phylo_timeCooc :: !(Map Date Cooc) , _phylo_timeCooc :: !(Map Date Cooc)
, _phylo_timeDocs :: !(Map Date Double) , _phylo_timeDocs :: !(Map Date Double)
, _phylo_termFreq :: !(Map Int Double) , _phylo_termFreq :: !(Map Int Double)
, _phylo_lastTermFreq :: !(Map Int Double) , _phylo_lastTermFreq :: !(Map Int Double)
, _phylo_horizon :: !(Map (PhyloGroupId,PhyloGroupId) Double) , _phylo_horizon :: !(Map (PhyloGroupId,PhyloGroupId) Double)
, _phylo_groupsProxi :: !(Map (PhyloGroupId,PhyloGroupId) Double) , _phylo_groupsProxi :: !(Map (PhyloGroupId,PhyloGroupId) Double)
, _phylo_param :: PhyloParam , _phylo_param :: PhyloParam
, _phylo_periods :: Map PhyloPeriodId PhyloPeriod , _phylo_periods :: Map PhyloPeriodId PhyloPeriod
...@@ -322,13 +333,13 @@ data PhyloPeriod = ...@@ -322,13 +333,13 @@ data PhyloPeriod =
PhyloPeriod { _phylo_periodPeriod :: (Date,Date) PhyloPeriod { _phylo_periodPeriod :: (Date,Date)
, _phylo_periodPeriod' :: (Text,Text) , _phylo_periodPeriod' :: (Text,Text)
, _phylo_periodLevels :: Map PhyloLevelId PhyloLevel , _phylo_periodLevels :: Map PhyloLevelId PhyloLevel
} deriving (Generic, Show, Eq) } deriving (Generic, Show, Eq)
-- | Level : a level of clustering -- | Level : a level of clustering
type Level = Int type Level = Int
-- | PhyloLevelId : the id of a level of clustering in a given period -- | PhyloLevelId : the id of a level of clustering in a given period
type PhyloLevelId = (PhyloPeriodId,Level) type PhyloLevelId = (PhyloPeriodId,Level)
-- | PhyloLevel : levels of phylomemy on a synchronic axis -- | PhyloLevel : levels of phylomemy on a synchronic axis
...@@ -339,10 +350,10 @@ type PhyloLevelId = (PhyloPeriodId,Level) ...@@ -339,10 +350,10 @@ type PhyloLevelId = (PhyloPeriodId,Level)
data PhyloLevel = data PhyloLevel =
PhyloLevel { _phylo_levelPeriod :: (Date,Date) PhyloLevel { _phylo_levelPeriod :: (Date,Date)
, _phylo_levelPeriod' :: (Text,Text) , _phylo_levelPeriod' :: (Text,Text)
, _phylo_levelLevel :: Level , _phylo_levelLevel :: Level
, _phylo_levelGroups :: Map PhyloGroupId PhyloGroup , _phylo_levelGroups :: Map PhyloGroupId PhyloGroup
} }
deriving (Generic, Show, Eq) deriving (Generic, Show, Eq)
type PhyloGroupId = (PhyloLevelId, Int) type PhyloGroupId = (PhyloLevelId, Int)
...@@ -352,15 +363,15 @@ type PhyloGroupId = (PhyloLevelId, Int) ...@@ -352,15 +363,15 @@ type PhyloGroupId = (PhyloLevelId, Int)
type PhyloBranchId = (Level, [Int]) type PhyloBranchId = (Level, [Int])
-- | PhyloGroup : group of ngrams at each level and period -- | PhyloGroup : group of ngrams at each level and period
data PhyloGroup = data PhyloGroup =
PhyloGroup { _phylo_groupPeriod :: (Date,Date) PhyloGroup { _phylo_groupPeriod :: (Date,Date)
, _phylo_groupPeriod' :: (Text,Text) , _phylo_groupPeriod' :: (Text,Text)
, _phylo_groupLevel :: Level , _phylo_groupLevel :: Level
, _phylo_groupIndex :: Int , _phylo_groupIndex :: Int
, _phylo_groupLabel :: Text , _phylo_groupLabel :: Text
, _phylo_groupSupport :: Support , _phylo_groupSupport :: Support
, _phylo_groupWeight :: Maybe Double , _phylo_groupWeight :: Maybe Double
, _phylo_groupSources :: [Int] , _phylo_groupSources :: [Int]
, _phylo_groupNgrams :: [Int] , _phylo_groupNgrams :: [Int]
, _phylo_groupCooc :: !(Cooc) , _phylo_groupCooc :: !(Cooc)
, _phylo_groupBranchId :: PhyloBranchId , _phylo_groupBranchId :: PhyloBranchId
...@@ -379,8 +390,8 @@ type Weight = Double ...@@ -379,8 +390,8 @@ type Weight = Double
-- | Pointer : A weighted pointer to a given PhyloGroup -- | Pointer : A weighted pointer to a given PhyloGroup
type Pointer = (PhyloGroupId, Weight) type Pointer = (PhyloGroupId, Weight)
data Filiation = ToParents | ToChilds deriving (Generic, Show) data Filiation = ToParents | ToChilds deriving (Generic, Show)
data PointerType = TemporalPointer | LevelPointer deriving (Generic, Show) data PointerType = TemporalPointer | LevelPointer deriving (Generic, Show)
---------------------- ----------------------
...@@ -414,7 +425,7 @@ data Sort = ByBirthDate { _sort_order :: Order } | ByHierarchy deriving (Show,Ge ...@@ -414,7 +425,7 @@ data Sort = ByBirthDate { _sort_order :: Order } | ByHierarchy deriving (Show,Ge
data Tagger = MostInclusive | MostEmergentInclusive | MostEmergentTfIdf deriving (Show,Generic,Eq) data Tagger = MostInclusive | MostEmergentInclusive | MostEmergentTfIdf deriving (Show,Generic,Eq)
data PhyloLabel = data PhyloLabel =
BranchLabel BranchLabel
{ _branch_labelTagger :: Tagger { _branch_labelTagger :: Tagger
, _branch_labelSize :: Int } , _branch_labelSize :: Int }
...@@ -469,16 +480,22 @@ makeLenses ''PhyloBranch ...@@ -469,16 +480,22 @@ makeLenses ''PhyloBranch
instance FromJSON Phylo instance FromJSON Phylo
instance ToJSON Phylo instance ToJSON Phylo
instance FromJSON PhyloSources instance FromJSON PhyloSources
instance ToJSON PhyloSources instance ToJSON PhyloSources
instance FromJSON PhyloParam instance FromJSON PhyloParam
instance ToJSON PhyloParam instance ToJSON PhyloParam
instance FromJSON PhyloPeriod instance FromJSON PhyloPeriod
instance ToJSON PhyloPeriod instance ToJSON PhyloPeriod
instance FromJSON PhyloLevel instance FromJSON PhyloLevel
instance ToJSON PhyloLevel instance ToJSON PhyloLevel
instance FromJSON Software instance FromJSON Software
instance ToJSON Software instance ToJSON Software
instance FromJSON PhyloGroup instance FromJSON PhyloGroup
instance ToJSON PhyloGroup instance ToJSON PhyloGroup
......
...@@ -15,23 +15,20 @@ Portability : POSIX ...@@ -15,23 +15,20 @@ Portability : POSIX
module Gargantext.Core.Viz.Phylo.PhyloExample where module Gargantext.Core.Viz.Phylo.PhyloExample where
import Control.Lens
import Data.GraphViz.Types.Generalised (DotGraph)
import Data.List (sortOn, nub, sort) import Data.List (sortOn, nub, sort)
import Data.Map (Map) import Data.Map (Map)
import Data.Text (Text, toLower) import Data.Text (Text, toLower)
import Gargantext.Prelude
import Gargantext.Core.Text.Context (TermList) import Gargantext.Core.Text.Context (TermList)
import Gargantext.Core.Text.Terms.Mono (monoTexts) import Gargantext.Core.Text.Terms.Mono (monoTexts)
import Gargantext.Core.Viz.AdaptativePhylo import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.PhyloTools
import Gargantext.Core.Viz.Phylo.PhyloMaker
import Gargantext.Core.Viz.Phylo.PhyloExport import Gargantext.Core.Viz.Phylo.PhyloExport
import Gargantext.Core.Viz.Phylo.TemporalMatching (adaptativeTemporalMatching, constanteTemporalMatching) import Gargantext.Core.Viz.Phylo.PhyloMaker
import Gargantext.Core.Viz.Phylo.PhyloTools
import Gargantext.Core.Viz.Phylo.SynchronicClustering (synchronicClustering) import Gargantext.Core.Viz.Phylo.SynchronicClustering (synchronicClustering)
import Gargantext.Core.Viz.Phylo.TemporalMatching (adaptativeTemporalMatching, constanteTemporalMatching)
import Control.Lens import Gargantext.Prelude
import Data.GraphViz.Types.Generalised (DotGraph)
import qualified Data.Vector as Vector import qualified Data.Vector as Vector
--------------------------------- ---------------------------------
...@@ -158,4 +155,4 @@ corpus = sortOn fst [ ...@@ -158,4 +155,4 @@ corpus = sortOn fst [
(-44,"La guerre que se livrent les assassins de Cesar, Cassius et Brutus et ses heritiers, Octave et Marc-Antoine, oblige Cleopatre à des contorsions diplomatiques."), (-44,"La guerre que se livrent les assassins de Cesar, Cassius et Brutus et ses heritiers, Octave et Marc-Antoine, oblige Cleopatre à des contorsions diplomatiques."),
(-41,"Nous ignorons depuis quand Cleopatre, agee de 29 ans en -41, et Marc-Antoine, qui a une quarantaine d'annees, se connaissent. Marc-Antoine est l'un des officiers qui ont participe au retablissement de Ptolemee XII. Il est plus vraisemblable qu'ils se soient frequentes lors du sejour à Rome de Cleopatre."), (-41,"Nous ignorons depuis quand Cleopatre, agee de 29 ans en -41, et Marc-Antoine, qui a une quarantaine d'annees, se connaissent. Marc-Antoine est l'un des officiers qui ont participe au retablissement de Ptolemee XII. Il est plus vraisemblable qu'ils se soient frequentes lors du sejour à Rome de Cleopatre."),
(-42,"Brutus tient la Grèce tandis que Cassius s'installe en Syrie. Le gouverneur de Cleopatre à Chypre, Serapion, vient en aide à Cassius."), (-42,"Brutus tient la Grèce tandis que Cassius s'installe en Syrie. Le gouverneur de Cleopatre à Chypre, Serapion, vient en aide à Cassius."),
(-42,"Cassius aurait envisage de s'emparer d'Alexandrie quand le 'debarquement' en Grèce d'Antoine et d'Octave l'oblige à renoncer à ses projets")] (-42,"Cassius aurait envisage de s'emparer d'Alexandrie quand le 'debarquement' en Grèce d'Antoine et d'Octave l'oblige à renoncer à ses projets")]
\ No newline at end of file
...@@ -12,30 +12,27 @@ Portability : POSIX ...@@ -12,30 +12,27 @@ Portability : POSIX
module Gargantext.Core.Viz.Phylo.PhyloExport where module Gargantext.Core.Viz.Phylo.PhyloExport where
import Data.Map (Map, fromList, empty, fromListWith, insert, (!), elems, unionWith, findWithDefault, toList, member)
import Data.List ((++), sort, nub, null, concat, sortOn, groupBy, union, (\\), (!!), init, partition, notElem, unwords, nubBy, inits, elemIndex)
import Data.Vector (Vector)
import Prelude (writeFile)
import Gargantext.Prelude
import Gargantext.Core.Viz.AdaptativePhylo
import Gargantext.Core.Viz.Phylo.PhyloTools
import Gargantext.Core.Viz.Phylo.TemporalMatching (filterDocs, filterDiago, reduceDiagos, toProximity, getNextPeriods)
import Control.Lens hiding (Level) import Control.Lens hiding (Level)
import Control.Parallel.Strategies (parList, rdeepseq, using) import Control.Parallel.Strategies (parList, rdeepseq, using)
import Data.GraphViz hiding (DotGraph, Order) import Data.GraphViz hiding (DotGraph, Order)
import Data.GraphViz.Types.Generalised (DotGraph)
import Data.GraphViz.Attributes.Complete hiding (EdgeType, Order) import Data.GraphViz.Attributes.Complete hiding (EdgeType, Order)
import Data.GraphViz.Types.Generalised (DotGraph)
import Data.GraphViz.Types.Monadic import Data.GraphViz.Types.Monadic
import Data.List ((++), sort, nub, null, concat, sortOn, groupBy, union, (\\), (!!), init, partition, notElem, unwords, nubBy, inits, elemIndex)
import Data.Map (Map, fromList, empty, fromListWith, insert, (!), elems, unionWith, findWithDefault, toList, member)
import Data.Text.Lazy (fromStrict, pack, unpack) import Data.Text.Lazy (fromStrict, pack, unpack)
import System.FilePath import Data.Vector (Vector)
import Debug.Trace (trace) import Debug.Trace (trace)
import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.PhyloTools
import Gargantext.Core.Viz.Phylo.TemporalMatching (filterDocs, filterDiago, reduceDiagos, toProximity, getNextPeriods)
import Gargantext.Prelude
import Prelude (writeFile)
import System.FilePath
import qualified Data.GraphViz.Attributes.HTML as H
import qualified Data.Text as Text import qualified Data.Text as Text
import qualified Data.Vector as Vector
import qualified Data.Text.Lazy as Lazy import qualified Data.Text.Lazy as Lazy
import qualified Data.GraphViz.Attributes.HTML as H import qualified Data.Vector as Vector
-------------------- --------------------
-- | Dot export | -- -- | Dot export | --
......
...@@ -11,30 +11,29 @@ Portability : POSIX ...@@ -11,30 +11,29 @@ Portability : POSIX
module Gargantext.Core.Viz.Phylo.PhyloMaker where module Gargantext.Core.Viz.Phylo.PhyloMaker where
import Control.DeepSeq (NFData)
import Control.Lens hiding (Level)
import Control.Parallel.Strategies (parList, rdeepseq, using)
import Data.List (concat, nub, partition, sort, (++), group, intersect, null, sortOn, groupBy, tail) import Data.List (concat, nub, partition, sort, (++), group, intersect, null, sortOn, groupBy, tail)
import Data.Map (Map, fromListWith, keys, unionWith, fromList, empty, toList, elems, (!), restrictKeys, foldlWithKey, insert) import Data.Map (Map, fromListWith, keys, unionWith, fromList, empty, toList, elems, (!), restrictKeys, foldlWithKey, insert)
import Data.Vector (Vector)
import Data.Text (Text) import Data.Text (Text)
import Data.Vector (Vector)
import Debug.Trace (trace)
import Gargantext.Prelude import Gargantext.Core.Methods.Distances (Distance(Conditional))
import Gargantext.Core.Viz.AdaptativePhylo import Gargantext.Core.Methods.Graph.MaxClique (getMaxCliques)
import Gargantext.Core.Viz.Phylo.PhyloTools
import Gargantext.Core.Viz.Phylo.TemporalMatching (adaptativeTemporalMatching, constanteTemporalMatching, getNextPeriods, filterDocs, filterDiago, reduceDiagos, toProximity)
import Gargantext.Core.Viz.Phylo.SynchronicClustering (synchronicClustering)
import Gargantext.Core.Text.Context (TermList) import Gargantext.Core.Text.Context (TermList)
import Gargantext.Core.Text.Metrics.FrequentItemSet (fisWithSizePolyMap, fisWithSizePolyMap', Size(..)) import Gargantext.Core.Text.Metrics.FrequentItemSet (fisWithSizePolyMap, fisWithSizePolyMap', Size(..))
import Gargantext.Core.Methods.Graph.MaxClique (getMaxCliques) import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Methods.Distances (Distance(Conditional))
import Gargantext.Core.Viz.Phylo.PhyloExport (toHorizon) import Gargantext.Core.Viz.Phylo.PhyloExport (toHorizon)
import Gargantext.Core.Viz.Phylo.PhyloTools
import Gargantext.Core.Viz.Phylo.SynchronicClustering (synchronicClustering)
import Gargantext.Core.Viz.Phylo.TemporalMatching (adaptativeTemporalMatching, constanteTemporalMatching, getNextPeriods, filterDocs, filterDiago, reduceDiagos, toProximity)
import Gargantext.Prelude
import Control.DeepSeq (NFData)
import Control.Parallel.Strategies (parList, rdeepseq, using)
import Debug.Trace (trace)
import Control.Lens hiding (Level)
import qualified Data.Vector as Vector
import qualified Data.Set as Set import qualified Data.Set as Set
import qualified Data.Vector as Vector
------------------ ------------------
-- | To Phylo | -- -- | To Phylo | --
...@@ -162,6 +161,7 @@ indexDates' m = map (\docs -> ...@@ -162,6 +161,7 @@ indexDates' m = map (\docs ->
-- To build the first phylo step from docs and terms -- To build the first phylo step from docs and terms
-- QL: backend entre phyloBase et phyloClique
toPhyloStep :: [Document] -> TermList -> Config -> Phylo toPhyloStep :: [Document] -> TermList -> Config -> Phylo
toPhyloStep docs lst conf = case (getSeaElevation phyloBase) of toPhyloStep docs lst conf = case (getSeaElevation phyloBase) of
Constante _ _ -> appendGroups cliqueToGroup 1 phyloClique (updatePeriods (indexDates' docs') phyloBase) Constante _ _ -> appendGroups cliqueToGroup 1 phyloClique (updatePeriods (indexDates' docs') phyloBase)
...@@ -173,6 +173,7 @@ toPhyloStep docs lst conf = case (getSeaElevation phyloBase) of ...@@ -173,6 +173,7 @@ toPhyloStep docs lst conf = case (getSeaElevation phyloBase) of
phyloClique = toPhyloClique phyloBase docs' phyloClique = toPhyloClique phyloBase docs'
-------------------------------------- --------------------------------------
docs' :: Map (Date,Date) [Document] docs' :: Map (Date,Date) [Document]
-- QL: Time Consuming here
docs' = groupDocsByPeriodRec date (getPeriodIds phyloBase) (sortOn date docs) empty docs' = groupDocsByPeriodRec date (getPeriodIds phyloBase) (sortOn date docs) empty
-------------------------------------- --------------------------------------
phyloBase :: Phylo phyloBase :: Phylo
......
...@@ -12,28 +12,23 @@ Portability : POSIX ...@@ -12,28 +12,23 @@ Portability : POSIX
module Gargantext.Core.Viz.Phylo.PhyloTools where module Gargantext.Core.Viz.Phylo.PhyloTools where
import Data.Vector (Vector, elemIndex) import Control.Lens hiding (Level)
import Data.List (sort, concat, null, union, (++), tails, sortOn, nub, init, tail, partition, tails, nubBy, group) import Data.List (sort, concat, null, union, (++), tails, sortOn, nub, init, tail, partition, tails, nubBy, group)
import Data.Set (Set, disjoint)
import Data.Map (Map, elems, fromList, unionWith, keys, member, (!), filterWithKey, fromListWith, empty, restrictKeys) import Data.Map (Map, elems, fromList, unionWith, keys, member, (!), filterWithKey, fromListWith, empty, restrictKeys)
import Data.Set (Set, disjoint)
import Data.String (String) import Data.String (String)
import Data.Text (Text,unpack) import Data.Text (Text,unpack)
import Data.Vector (Vector, elemIndex)
import Prelude (floor,read) import Debug.Trace (trace)
import Gargantext.Core.Viz.Phylo
import Gargantext.Prelude import Gargantext.Prelude
import Gargantext.Core.Viz.AdaptativePhylo import Prelude (floor,read)
import Text.Printf import Text.Printf
import Debug.Trace (trace)
import Control.Lens hiding (Level)
import qualified Data.Vector as Vector
import qualified Data.List as List import qualified Data.List as List
import qualified Data.Set as Set
import qualified Data.Map as Map import qualified Data.Map as Map
import qualified Data.Set as Set
import qualified Data.Text as Text import qualified Data.Text as Text
import qualified Data.Vector as Vector
------------ ------------
-- | Io | -- -- | Io | --
......
...@@ -11,20 +11,17 @@ Portability : POSIX ...@@ -11,20 +11,17 @@ Portability : POSIX
module Gargantext.Core.Viz.Phylo.SynchronicClustering where module Gargantext.Core.Viz.Phylo.SynchronicClustering where
import Gargantext.Prelude -- import Debug.Trace (trace)
import Gargantext.Core.Viz.AdaptativePhylo
import Gargantext.Core.Viz.Phylo.PhyloTools
import Gargantext.Core.Viz.Phylo.TemporalMatching (weightedLogJaccard', filterDiago, reduceDiagos)
import Gargantext.Core.Viz.Phylo.PhyloExport (processDynamics)
import Data.List ((++), null, intersect, nub, concat, sort, sortOn, groupBy)
import Data.Map (Map, fromList, fromListWith, foldlWithKey, (!), insert, empty, restrictKeys, elems, mapWithKey, member)
import Control.Lens hiding (Level) import Control.Lens hiding (Level)
import Control.Parallel.Strategies (parList, rdeepseq, using)
import Control.Monad (sequence) import Control.Monad (sequence)
-- import Debug.Trace (trace) import Control.Parallel.Strategies (parList, rdeepseq, using)
import Data.List ((++), null, intersect, nub, concat, sort, sortOn, groupBy)
import Data.Map (Map, fromList, fromListWith, foldlWithKey, (!), insert, empty, restrictKeys, elems, mapWithKey, member)
import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.PhyloExport (processDynamics)
import Gargantext.Core.Viz.Phylo.PhyloTools
import Gargantext.Core.Viz.Phylo.TemporalMatching (weightedLogJaccard', filterDiago, reduceDiagos)
import Gargantext.Prelude
import qualified Data.Map as Map import qualified Data.Map as Map
...@@ -32,7 +29,6 @@ import qualified Data.Map as Map ...@@ -32,7 +29,6 @@ import qualified Data.Map as Map
-- | New Level Maker | -- -- | New Level Maker | --
------------------------- -------------------------
mergeGroups :: [Cooc] -> PhyloGroupId -> Map PhyloGroupId PhyloGroupId -> [PhyloGroup] -> PhyloGroup mergeGroups :: [Cooc] -> PhyloGroupId -> Map PhyloGroupId PhyloGroupId -> [PhyloGroup] -> PhyloGroup
mergeGroups coocs id mapIds childs = mergeGroups coocs id mapIds childs =
let ngrams = (sort . nub . concat) $ map _phylo_groupNgrams childs let ngrams = (sort . nub . concat) $ map _phylo_groupNgrams childs
......
...@@ -11,26 +11,21 @@ Portability : POSIX ...@@ -11,26 +11,21 @@ Portability : POSIX
module Gargantext.Core.Viz.Phylo.TemporalMatching where module Gargantext.Core.Viz.Phylo.TemporalMatching where
import Control.Lens hiding (Level)
import Control.Parallel.Strategies (parList, rdeepseq, using)
import Data.List (concat, splitAt, tail, sortOn, (++), intersect, null, inits, groupBy, scanl, nub, nubBy, union, dropWhile, partition, or, sort, (!!)) import Data.List (concat, splitAt, tail, sortOn, (++), intersect, null, inits, groupBy, scanl, nub, nubBy, union, dropWhile, partition, or, sort, (!!))
import Data.Map (Map, fromList, elems, restrictKeys, unionWith, findWithDefault, keys, (!), (!?), filterWithKey, singleton, empty, mapKeys, adjust) import Data.Map (Map, fromList, elems, restrictKeys, unionWith, findWithDefault, keys, (!), (!?), filterWithKey, singleton, empty, mapKeys, adjust)
import Debug.Trace (trace)
import Gargantext.Prelude import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.AdaptativePhylo
import Gargantext.Core.Viz.Phylo.PhyloTools import Gargantext.Core.Viz.Phylo.PhyloTools
import Gargantext.Prelude
import Prelude (floor,tan,pi) import Prelude (floor,tan,pi)
import Control.Lens hiding (Level)
import Control.Parallel.Strategies (parList, rdeepseq, using)
import Debug.Trace (trace)
import Text.Printf import Text.Printf
import qualified Data.Map as Map import qualified Data.Map as Map
import qualified Data.Set as Set import qualified Data.Set as Set
import qualified Data.Vector as Vector import qualified Data.Vector as Vector
------------------- -------------------
-- | Proximity | -- -- | Proximity | --
------------------- -------------------
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment