Commit 2b0ea17d authored by qlobbe's avatar qlobbe

refactoring 2021, remove old phylo files, keep API

parent 8bb9ff6c
Pipeline #1416 canceled with stage
{-|
Module : Main.hs
Description : Gargantext starter binary with Phylo
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Phylo binaries
-}
{-# LANGUAGE StandaloneDeriving #-}
{-# LANGUAGE TypeOperators #-}
{-# LANGUAGE Strict #-}
module Main where
import Control.Concurrent.Async as CCA (mapConcurrently)
import Control.Monad (mapM)
import Data.Aeson
import Data.List ((++),concat)
import Data.Maybe
import Data.Text (Text, unwords, unlines)
import GHC.Generics
import GHC.IO (FilePath)
import Gargantext.Database.Admin.Types.Hyperdata
import Gargantext.Database.Admin.Types.Node
import Gargantext.Prelude
import Gargantext.Core.Text.Context (TermList)
import Gargantext.Core.Text.Corpus.Parsers (FileFormat(..),parseFile)
import Gargantext.Core.Text.Corpus.Parsers.CSV (csv_title, csv_abstract, csv_publication_year)
import Gargantext.Core.Text.List.CSV (csvMapTermList)
import Gargantext.Core.Text.Terms.WithList
import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.LevelMaker
import Gargantext.Core.Viz.Phylo.Tools
import Gargantext.Core.Viz.Phylo.View.Export
import Gargantext.Core.Viz.Phylo.View.ViewMaker
import System.Directory (doesFileExist)
import System.Environment
import qualified Data.ByteString.Lazy as L
import qualified Data.List as DL
import qualified Data.Map as DM
import qualified Data.Text as DT
import qualified Data.Vector as DV
import qualified Gargantext.Core.Text.Corpus.Parsers.CSV as CSV
import qualified Prelude as P
--------------
-- | Conf | --
--------------
type ListPath = FilePath
type FisPath = FilePath
type CorpusPath = FilePath
data CorpusType = Wos | Csv deriving (Show,Generic)
type Limit = Int
data Conf =
Conf { corpusPath :: CorpusPath
, corpusType :: CorpusType
, listPath :: ListPath
, fisPath :: FilePath
, outputPath :: FilePath
, phyloName :: Text
, limit :: Limit
, timeGrain :: Int
, timeStep :: Int
, timeFrame :: Int
, timeFrameTh :: Double
, timeTh :: Double
, timeSens :: Double
, reBranchThr :: Double
, reBranchNth :: Int
, clusterTh :: Double
, clusterSens :: Double
, phyloLevel :: Int
, viewLevel :: Int
, fisSupport :: Int
, fisClique :: Int
, minSizeBranch :: Int
} deriving (Show,Generic)
instance FromJSON Conf
instance ToJSON Conf
instance FromJSON CorpusType
instance ToJSON CorpusType
decoder :: P.Either a b -> b
decoder (P.Left _) = P.error "Error"
decoder (P.Right x) = x
-- | Get the conf from a Json file
getJson :: FilePath -> IO L.ByteString
getJson path = L.readFile path
---------------
-- | Parse | --
---------------
-- | To filter the Ngrams of a document based on the termList
filterTerms :: Patterns -> (a, Text) -> (a, [Text])
filterTerms patterns (y,d) = (y,termsInText patterns d)
where
--------------------------------------
termsInText :: Patterns -> Text -> [Text]
termsInText pats txt = DL.nub
$ DL.concat
$ map (map unwords)
$ extractTermsWithList pats txt
--------------------------------------
-- | To transform a Csv nfile into a readable corpus
csvToCorpus :: Limit -> CorpusPath -> IO ([(Int,Text)])
csvToCorpus limit csv = DV.toList
-- . DV.reverse
. DV.take limit
-- . DV.reverse
. DV.map (\n -> (csv_publication_year n, (csv_title n) <> " " <> (csv_abstract n)))
. snd <$> CSV.readFile csv
-- | To transform a Wos nfile into a readable corpus
wosToCorpus :: Limit -> CorpusPath -> IO ([(Int,Text)])
wosToCorpus limit path = DL.take limit
. map (\d -> ((fromJust $_hd_publication_year d)
,(fromJust $_hd_title d) <> " " <> (fromJust $_hd_abstract d)))
. filter (\d -> (isJust $_hd_publication_year d)
&& (isJust $_hd_title d)
&& (isJust $_hd_abstract d))
. concat
<$> mapConcurrently (\idx -> parseFile WOS (path <> show(idx) <> ".txt")) [1..20]
-- | To use the correct parser given a CorpusType
fileToCorpus :: CorpusType -> Limit -> CorpusPath -> IO ([(Int,Text)])
fileToCorpus format limit path = case format of
Wos -> wosToCorpus limit path
Csv -> csvToCorpus limit path
-- | To parse a file into a list of Document
parse :: CorpusType -> Limit -> CorpusPath -> TermList -> IO [Document]
parse format limit path l = do
corpus <- fileToCorpus format limit path
let patterns = buildPatterns l
pure $ map ( (\(y,t) -> Document y t) . filterTerms patterns) corpus
-- | To parse an existing Fis file
parseFis :: FisPath -> Text -> Int -> Int -> Int -> Int -> IO [PhyloFis]
parseFis path name grain step support clique = do
fisExists <- doesFileExist (path <> (DT.unpack name) <> "_" <> show(grain) <> "_" <> show(step) <> "_" <> show(support) <> "_" <> show(clique) <> ".json")
if fisExists
then do
fisJson <- (eitherDecode <$> getJson (path <> (DT.unpack name) <> "_" <> show(grain) <> "_" <> show(step) <> "_" <> show(support) <> "_" <> show(clique) <> ".json")) :: IO (P.Either P.String [PhyloFis])
case fisJson of
P.Left err -> do
putStrLn err
pure []
P.Right fis -> pure fis
else pure []
writeFis :: FisPath -> Text -> Int -> Int -> Int -> Int -> DM.Map (Date,Date) [PhyloFis] -> IO ()
writeFis path name grain step support clique fis = do
let fisPath = path <> (DT.unpack name) <> "_" <> show(grain) <> "_" <> show(step) <> "_" <> show(support) <> "_" <> show(clique) <> ".json"
L.writeFile fisPath $ encode (DL.concat $ DM.elems fis)
--------------
-- | Main | --
--------------
main :: IO ()
main = do
[jsonPath] <- getArgs
confJson <- (eitherDecode <$> getJson jsonPath) :: IO (P.Either P.String Conf)
case confJson of
P.Left err -> putStrLn err
P.Right conf -> do
termList <- csvMapTermList (listPath conf)
corpus <- parse (corpusType conf) (limit conf) (corpusPath conf) termList
putStrLn $ ("\n" <> show (length corpus) <> " parsed docs")
fis <- parseFis (fisPath conf) (phyloName conf) (timeGrain conf) (timeStep conf) (fisSupport conf) (fisClique conf)
putStrLn $ ("\n" <> show (length fis) <> " parsed fis")
let fis' = DM.fromListWith (++) $ DL.sortOn (fst . fst) $ map (\f -> (getFisPeriod f,[f])) fis
let query = PhyloQueryBuild (phyloName conf) "" (timeGrain conf) (timeStep conf)
(Fis $ FisParams True (fisSupport conf) (fisClique conf)) [] [] (WeightedLogJaccard $ WLJParams (timeTh conf) (timeSens conf)) (timeFrame conf) (timeFrameTh conf)
(reBranchThr conf) (reBranchNth conf) (phyloLevel conf)
(RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams (clusterTh conf) (clusterSens conf))
let queryView = PhyloQueryView (viewLevel conf) Merge False 1 [BranchAge,BranchBirth,BranchGroups] [SizeBranch $ SBParams (minSizeBranch conf)] [GroupLabelIncDyn,BranchPeakInc] (Just (ByBranchBirth,Asc)) Json Flat True
let phylo = toPhylo query corpus termList fis'
writeFis (fisPath conf) (phyloName conf) (timeGrain conf) (timeStep conf) (fisSupport conf) (fisClique conf) (getPhyloFis phylo)
let view = toPhyloView queryView phylo
putStrLn $ ("phylo completed until level " <> show (phyloLevel conf) <> ", export at level " <> show (viewLevel conf))
let outputFile = (outputPath conf) <> (DT.unpack $ phyloName conf)
<> "_" <> show (limit conf) <> "_"
<> "_" <> show (timeTh conf) <> "_"
<> "_" <> show (timeSens conf) <> "_"
<> "_" <> show (clusterTh conf) <> "_"
<> "_" <> show (clusterSens conf)
<> ".dot"
P.writeFile outputFile $ dotToString $ viewToDot view
......@@ -65,7 +65,7 @@ import Gargantext.Database.Query.Table.Node.UpdateOpaleye (updateHyperdata)
import Gargantext.Database.Query.Table.NodeNode
import Gargantext.Database.Query.Tree (tree, TreeMode(..))
import Gargantext.Prelude
import Gargantext.Core.Viz.Phylo.API (PhyloAPI, phyloAPI)
import Gargantext.Core.Viz.Phylo.Legacy.LegacyAPI (PhyloAPI, phyloAPI)
import qualified Gargantext.API.Node.Share as Share
import qualified Gargantext.API.Node.Update as Update
import qualified Gargantext.API.Search as Search
......
{-|
Module : Gargantext.Core.Viz.Phylo
Description : Phylomemy definitions and types.
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
Specifications of Phylomemy export format.
Phylomemy can be described as a Temporal Graph with different scale of
granularity of group of ngrams (terms and multi-terms).
The main type is Phylo which is synonym of Phylomemy (only difference is
the number of chars).
References:
Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
in science evolution — the rise and fall of scientific fields. PloS
one 8, e54847.
-}
{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Core.Viz.Phylo where
import Control.DeepSeq
import Control.Lens (makeLenses)
import Data.Aeson.TH (deriveJSON,defaultOptions)
import Data.Map (Map)
import Data.Set (Set)
import Data.Swagger
import Data.Text (Text)
import Data.Vector (Vector)
import GHC.Generics (Generic)
import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
import Gargantext.Prelude
import Gargantext.Core.Text.Context (TermList)
--------------------
-- | PhyloParam | --
--------------------
-- | Global parameters of a Phylo
data PhyloParam =
PhyloParam { _phyloParam_version :: !Text -- Double ?
, _phyloParam_software :: !Software
, _phyloParam_query :: !PhyloQueryBuild
} deriving (Generic, Show, Eq)
-- | Software parameters
data Software =
Software { _software_name :: !Text
, _software_version :: !Text
} deriving (Generic, Show, Eq)
---------------
-- | Phylo | --
---------------
-- | Phylo datatype of a phylomemy
-- Duration : time Segment of the whole Phylo
-- Foundations : vector of all the Ngrams contained in a Phylo (build from a list of actants)
-- Periods : list of all the periods of a Phylo
data Phylo =
Phylo { _phylo_duration :: !(Start, End)
, _phylo_foundations :: !PhyloFoundations
, _phylo_periods :: [PhyloPeriod]
, _phylo_docsByYears :: !(Map Date Double)
, _phylo_cooc :: !(Map Date (Map (Int,Int) Double))
, _phylo_fis :: !(Map (Date,Date) [PhyloFis])
, _phylo_param :: !PhyloParam
}
deriving (Generic, Show, Eq)
-- | The foundations of a phylomemy created from a given TermList
data PhyloFoundations =
PhyloFoundations { _phylo_foundationsRoots :: !(Vector Ngrams)
, _phylo_foundationsTermsList :: !TermList
} deriving (Generic, Show, Eq)
-- | Date : a simple Integer
type Date = Int
-- | UTCTime in seconds since UNIX epoch
-- type Start = POSIXTime
-- type End = POSIXTime
type Start = Date
type End = Date
---------------------
-- | PhyloPeriod | --
---------------------
-- | PhyloStep : steps of phylomemy on temporal axis
-- Period: tuple (start date, end date) of the step of the phylomemy
-- Levels: levels of granularity
data PhyloPeriod =
PhyloPeriod { _phylo_periodId :: !PhyloPeriodId
, _phylo_periodLevels :: ![PhyloLevel]
}
deriving (Generic, Show, Eq)
--------------------
-- | PhyloLevel | --
--------------------
-- | PhyloLevel : levels of phylomemy on level axis
-- Levels description:
-- Level -1: Ngram equals itself (by identity) == _phylo_Ngrams
-- Level 0: Group of synonyms (by stems + by qualitative expert meaning)
-- Level 1: First level of clustering
-- Level N: Nth level of clustering
data PhyloLevel =
PhyloLevel { _phylo_levelId :: !PhyloLevelId
, _phylo_levelGroups :: ![PhyloGroup]
}
deriving (Generic, Show, Eq)
--------------------
-- | PhyloGroup | --
--------------------
-- | PhyloGroup : group of ngrams at each level and step
-- Label : maybe has a label as text
-- Ngrams: set of terms that build the group
-- Quality : map of measures (support, etc.) that depict some qualitative aspects of a phylo
-- Period Parents|Childs: weighted link to Parents|Childs (Temporal Period axis)
-- Level Parents|Childs: weighted link to Parents|Childs (Level Granularity axis)
-- Pointers are directed link from Self to any PhyloGroup (/= Self ?)
data PhyloGroup =
PhyloGroup { _phylo_groupId :: !PhyloGroupId
, _phylo_groupLabel :: !Text
, _phylo_groupNgrams :: ![Int]
, _phylo_groupNgramsMeta :: !(Map Text [Double])
, _phylo_groupMeta :: !(Map Text Double)
, _phylo_groupBranchId :: !(Maybe PhyloBranchId)
, _phylo_groupCooc :: !(Map (Int,Int) Double)
, _phylo_groupPeriodParents :: ![Pointer]
, _phylo_groupPeriodChilds :: ![Pointer]
, _phylo_groupLevelParents :: ![Pointer]
, _phylo_groupLevelChilds :: ![Pointer]
}
deriving (Generic, NFData, Show, Eq, Ord)
-- instance NFData PhyloGroup
-- | Level : A level of aggregation (-1 = Txt, 0 = Ngrams, 1 = Fis, [2..] = Cluster)
type Level = Int
-- | Index : A generic index of an element (PhyloGroup, PhyloBranch, etc) in a given List
type Index = Int
type PhyloPeriodId = (Start, End)
type PhyloLevelId = (PhyloPeriodId, Level)
type PhyloGroupId = (PhyloLevelId, Index)
type PhyloBranchId = (Level, Index)
-- | Weight : A generic mesure that can be associated with an Id
type Weight = Double
-- | Pointer : A weighted linked with a given PhyloGroup
type Pointer = (PhyloGroupId, Weight)
-- | Ngrams : a contiguous sequence of n terms
type Ngrams = Text
--------------------
-- | Aggregates | --
--------------------
-- | Document : a piece of Text linked to a Date
data Document = Document
{ date :: !Date
, text :: ![Ngrams]
} deriving (Show,Generic,NFData)
-- | Clique : Set of ngrams cooccurring in the same Document
type Clique = Set Ngrams
-- | Support : Number of Documents where a Clique occurs
type Support = Int
-- | Fis : Frequent Items Set (ie: the association between a Clique and a Support)
data PhyloFis = PhyloFis
{ _phyloFis_clique :: !Clique
, _phyloFis_support :: !Support
, _phyloFis_period :: !(Date,Date)
} deriving (Generic,NFData,Show,Eq)
-- | A list of clustered PhyloGroup
type PhyloCluster = [PhyloGroup]
-- | A PhyloGroup in a Graph
type GroupNode = PhyloGroup
-- | A weighted links between two PhyloGroups in a Graph
type GroupEdge = ((PhyloGroup,PhyloGroup),Weight)
-- | The association as a Graph between a list of Nodes and a list of Edges
type GroupGraph = ([GroupNode],[GroupEdge])
---------------
-- | Error | --
---------------
data PhyloError = LevelDoesNotExist
| LevelUnassigned
deriving (Show)
-----------------
-- | Cluster | --
-----------------
-- | Cluster constructors
data Cluster = Fis FisParams
| RelatedComponents RCParams
| Louvain LouvainParams
deriving (Generic, Show, Eq, Read)
-- | Parameters for Fis clustering
data FisParams = FisParams
{ _fis_keepMinorFis :: !Bool
, _fis_minSupport :: !Support
, _fis_minSize :: !Int
} deriving (Generic, Show, Eq, Read)
-- | Parameters for RelatedComponents clustering
data RCParams = RCParams
{ _rc_proximity :: !Proximity } deriving (Generic, Show, Eq, Read)
-- | Parameters for Louvain clustering
data LouvainParams = LouvainParams
{ _louvain_proximity :: !Proximity } deriving (Generic, Show, Eq, Read)
-------------------
-- | Proximity | --
-------------------
-- | Proximity constructors
data Proximity = WeightedLogJaccard WLJParams
| WeightedLogSim WLJParams
| Hamming HammingParams
| Filiation
deriving (Generic, Show, Eq, Read)
-- | Parameters for WeightedLogJaccard and WeightedLogSim proximity
data WLJParams = WLJParams
{ _wlj_threshold :: !Double
, _wlj_sensibility :: !Double
} deriving (Generic, Show, Eq, Read)
-- | Parameters for Hamming proximity
data HammingParams = HammingParams
{ _hamming_threshold :: !Double } deriving (Generic, Show, Eq, Read)
----------------
-- | Filter | --
----------------
-- | Filter constructors
data Filter = LonelyBranch LBParams
| SizeBranch SBParams
deriving (Generic, Show, Eq)
-- | Parameters for LonelyBranch filter
data LBParams = LBParams
{ _lb_periodsInf :: !Int
, _lb_periodsSup :: !Int
, _lb_minNodes :: !Int } deriving (Generic, Show, Eq)
-- | Parameters for SizeBranch filter
data SBParams = SBParams
{ _sb_minSize :: !Int } deriving (Generic, Show, Eq)
----------------
-- | Metric | --
----------------
-- | Metric constructors
data Metric = BranchAge | BranchBirth | BranchGroups deriving (Generic, Show, Eq, Read)
----------------
-- | Tagger | --
----------------
-- | Tagger constructors
data Tagger = BranchPeakFreq | BranchPeakCooc | BranchPeakInc
| GroupLabelCooc | GroupLabelInc | GroupLabelIncDyn deriving (Show,Generic,Read)
--------------
-- | Sort | --
--------------
-- | Sort constructors
data Sort = ByBranchAge | ByBranchBirth deriving (Generic, Show, Read, Enum, Bounded)
data Order = Asc | Desc deriving (Generic, Show, Read)
--------------------
-- | PhyloQuery | --
--------------------
-- | A Phyloquery describes a phylomemic reconstruction
data PhyloQueryBuild = PhyloQueryBuild
{ _q_phyloTitle :: !Text
, _q_phyloDesc :: !Text
-- Grain and Steps for the PhyloPeriods
, _q_periodGrain :: !Int
, _q_periodSteps :: !Int
-- Clustering method for building the contextual unit of Phylo (ie: level 1)
, _q_contextualUnit :: !Cluster
, _q_contextualUnitMetrics :: ![Metric]
, _q_contextualUnitFilters :: ![Filter]
-- Inter-temporal matching method of the Phylo
, _q_interTemporalMatching :: !Proximity
, _q_interTemporalMatchingFrame :: !Int
, _q_interTemporalMatchingFrameTh :: !Double
, _q_reBranchThr :: !Double
, _q_reBranchNth :: !Int
-- Last level of reconstruction
, _q_nthLevel :: !Level
-- Clustering method used from level 1 to nthLevel
, _q_nthCluster :: !Cluster
} deriving (Generic, Show, Eq)
-- | To choose the Phylo edge you want to export : --> <-- <--> <=>
data Filiation = Ascendant | Descendant | Merge | Complete deriving (Generic, Show, Read)
data EdgeType = PeriodEdge | LevelEdge deriving (Generic, Show, Eq)
-------------------
-- | PhyloView | --
-------------------
-- | A PhyloView is the output type of a Phylo
data PhyloView = PhyloView
{ _pv_param :: !PhyloParam
, _pv_title :: !Text
, _pv_description :: !Text
, _pv_filiation :: !Filiation
, _pv_level :: !Level
, _pv_periods :: ![PhyloPeriodId]
, _pv_metrics :: !(Map Text [Double])
, _pv_branches :: ![PhyloBranch]
, _pv_nodes :: ![PhyloNode]
, _pv_edges :: ![PhyloEdge]
} deriving (Generic, Show)
-- | A phyloview is made of PhyloBranches, edges and nodes
data PhyloBranch = PhyloBranch
{ _pb_id :: !PhyloBranchId
, _pb_peak :: !Text
, _pb_metrics :: !(Map Text [Double])
} deriving (Generic, Show)
data PhyloEdge = PhyloEdge
{ _pe_source :: !PhyloGroupId
, _pe_target :: !PhyloGroupId
, _pe_type :: !EdgeType
, _pe_weight :: !Weight
} deriving (Generic, Show)
data PhyloNode = PhyloNode
{ _pn_id :: !PhyloGroupId
, _pn_bid :: !(Maybe PhyloBranchId)
, _pn_label :: !Text
, _pn_idx :: ![Int]
, _pn_ngrams :: !(Maybe [Ngrams])
, _pn_metrics :: !(Map Text [Double])
, _pn_cooc :: !(Map (Int,Int) Double)
, _pn_parents :: !(Maybe [PhyloGroupId])
, _pn_childs :: ![PhyloNode]
} deriving (Generic, Show)
------------------------
-- | PhyloQueryView | --
------------------------
data ExportMode = Json | Dot | Svg
deriving (Generic, Show, Read)
data DisplayMode = Flat | Nested
deriving (Generic, Show, Read)
-- | A PhyloQueryView describes a Phylo as an output view
data PhyloQueryView = PhyloQueryView
{ _qv_lvl :: !Level
-- Does the PhyloGraph contain ascendant, descendant or a complete Filiation ? Complet redondant et merge (avec le max)
, _qv_filiation :: !Filiation
-- Does the PhyloGraph contain some levelChilds ? How deep must it go ?
, _qv_levelChilds :: !Bool
, _qv_levelChildsDepth :: !Level
-- Ordered lists of filters, taggers and metrics to be applied to the PhyloGraph
-- Firstly the metrics, then the filters and the taggers
, _qv_metrics :: ![Metric]
, _qv_filters :: ![Filter]
, _qv_taggers :: ![Tagger]
-- An asc or desc sort to apply to the PhyloGraph
, _qv_sort :: !(Maybe (Sort,Order))
-- A display mode to apply to the PhyloGraph, ie: [Node[Node,Edge],Edge] or [[Node,Node],[Edge,Edge]]
, _qv_export :: !ExportMode
, _qv_display :: !DisplayMode
, _qv_verbose :: !Bool
}
----------------
-- | Lenses | --
----------------
makeLenses ''PhyloParam
makeLenses ''Software
--
makeLenses ''Phylo
makeLenses ''PhyloFoundations
makeLenses ''PhyloGroup
makeLenses ''PhyloLevel
makeLenses ''PhyloPeriod
makeLenses ''PhyloFis
--
makeLenses ''Proximity
makeLenses ''Cluster
makeLenses ''Filter
--
makeLenses ''PhyloQueryBuild
makeLenses ''PhyloQueryView
--
makeLenses ''PhyloView
makeLenses ''PhyloBranch
makeLenses ''PhyloNode
makeLenses ''PhyloEdge
------------------------
-- | JSON instances | --
------------------------
$(deriveJSON (unPrefix "_phylo_" ) ''Phylo )
$(deriveJSON (unPrefix "_phylo_foundations" ) ''PhyloFoundations )
$(deriveJSON (unPrefix "_phylo_period" ) ''PhyloPeriod )
$(deriveJSON (unPrefix "_phylo_level" ) ''PhyloLevel )
$(deriveJSON (unPrefix "_phylo_group" ) ''PhyloGroup )
$(deriveJSON (unPrefix "_phyloFis_" ) ''PhyloFis )
--
$(deriveJSON (unPrefix "_software_" ) ''Software )
$(deriveJSON (unPrefix "_phyloParam_" ) ''PhyloParam )
--
$(deriveJSON defaultOptions ''Filter )
$(deriveJSON defaultOptions ''Metric )
$(deriveJSON defaultOptions ''Cluster )
$(deriveJSON defaultOptions ''Proximity )
--
$(deriveJSON (unPrefix "_fis_" ) ''FisParams )
$(deriveJSON (unPrefix "_hamming_" ) ''HammingParams )
$(deriveJSON (unPrefix "_louvain_" ) ''LouvainParams )
$(deriveJSON (unPrefix "_rc_" ) ''RCParams )
$(deriveJSON (unPrefix "_wlj_" ) ''WLJParams )
--
$(deriveJSON (unPrefix "_lb_" ) ''LBParams )
$(deriveJSON (unPrefix "_sb_" ) ''SBParams )
--
$(deriveJSON (unPrefix "_q_" ) ''PhyloQueryBuild )
$(deriveJSON (unPrefix "_pv_" ) ''PhyloView )
$(deriveJSON (unPrefix "_pb_" ) ''PhyloBranch )
$(deriveJSON (unPrefix "_pe_" ) ''PhyloEdge )
$(deriveJSON (unPrefix "_pn_" ) ''PhyloNode )
$(deriveJSON defaultOptions ''Filiation )
$(deriveJSON defaultOptions ''EdgeType )
---------------------------
-- | Swagger instances | --
---------------------------
instance ToSchema Phylo where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_")
instance ToSchema PhyloFoundations where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_foundations")
instance ToSchema PhyloPeriod where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_period")
instance ToSchema PhyloLevel where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_level")
instance ToSchema PhyloGroup where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_group")
instance ToSchema PhyloFis where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phyloFis_")
instance ToSchema Software where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_software_")
instance ToSchema PhyloParam where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phyloParam_")
instance ToSchema Filter
instance ToSchema Metric
instance ToSchema Cluster
instance ToSchema Proximity where
declareNamedSchema = genericDeclareNamedSchemaUnrestricted defaultSchemaOptions
instance ToSchema FisParams where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_fis_")
instance ToSchema HammingParams where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_hamming_")
instance ToSchema LouvainParams where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_louvain_")
instance ToSchema RCParams where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_rc_")
instance ToSchema WLJParams where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_wlj_")
instance ToSchema LBParams where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_lb_")
instance ToSchema SBParams where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_sb_")
instance ToSchema PhyloQueryBuild where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_q_")
instance ToSchema PhyloView where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_pv_")
instance ToSchema PhyloBranch where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_pb_")
instance ToSchema PhyloEdge where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_pe_")
instance ToSchema PhyloNode where
declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_pn_")
instance ToSchema Filiation
instance ToSchema EdgeType
----------------------------
-- | TODO XML instances | --
----------------------------
{-|
Module : Gargantext.Core.Viz.Phylo.API
Description : Phylo API
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE OverloadedLists #-} -- allows to write Map and HashMap as lists
{-# LANGUAGE TypeOperators #-}
module Gargantext.Core.Viz.Phylo.API
where
import Data.Maybe (fromMaybe)
import Control.Lens ((^.))
import Data.String.Conversions
--import Control.Monad.Reader (ask)
import qualified Data.ByteString as DB
import qualified Data.ByteString.Lazy as DBL
import Data.Swagger
import Network.HTTP.Media ((//), (/:))
import Servant
import Test.QuickCheck (elements)
import Test.QuickCheck.Arbitrary (Arbitrary, arbitrary)
import Web.HttpApiData (readTextData)
import Gargantext.API.Prelude
import Gargantext.Database.Admin.Types.Hyperdata
import Gargantext.Database.Admin.Types.Node -- (PhyloId, ListId, CorpusId, UserId, NodeId(..))
import Gargantext.Database.Query.Table.Node (insertNodes, node, getNodeWith)
import Gargantext.Database.Schema.Node (node_hyperdata)
import Gargantext.Prelude
import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.Main
import Gargantext.Core.Viz.Phylo.Example
import Gargantext.Core.Types (TODO(..))
------------------------------------------------------------------------
type PhyloAPI = Summary "Phylo API"
:> GetPhylo
-- :<|> PutPhylo
:<|> PostPhylo
phyloAPI :: PhyloId -> UserId -> GargServer PhyloAPI
phyloAPI n u = getPhylo n
:<|> postPhylo n u
-- :<|> putPhylo n
-- :<|> deletePhylo n
newtype SVG = SVG DB.ByteString
instance ToSchema SVG
where
declareNamedSchema _ = declareNamedSchema (Proxy :: Proxy TODO)
instance Show SVG where
show (SVG a) = show a
instance Accept SVG where
contentType _ = "SVG" // "image/svg+xml" /: ("charset", "utf-8")
instance Show a => MimeRender PlainText a where
mimeRender _ val = cs ("" <> show val)
instance MimeRender SVG SVG where
mimeRender _ (SVG s) = DBL.fromStrict s
------------------------------------------------------------------------
type GetPhylo = QueryParam "listId" ListId
:> QueryParam "level" Level
:> QueryParam "minSizeBranch" MinSizeBranch
{- :> QueryParam "filiation" Filiation
:> QueryParam "childs" Bool
:> QueryParam "depth" Level
:> QueryParam "metrics" [Metric]
:> QueryParam "periodsInf" Int
:> QueryParam "periodsSup" Int
:> QueryParam "minNodes" Int
:> QueryParam "taggers" [Tagger]
:> QueryParam "sort" Sort
:> QueryParam "order" Order
:> QueryParam "export" ExportMode
:> QueryParam "display" DisplayMode
:> QueryParam "verbose" Bool
-}
:> Get '[SVG] SVG
-- | TODO
-- Add real text processing
-- Fix Filter parameters
getPhylo :: PhyloId -> GargServer GetPhylo
getPhylo phId _lId l msb = do
phNode <- getNodeWith phId (Proxy :: Proxy HyperdataPhylo)
let
level = fromMaybe 2 l
branc = fromMaybe 2 msb
maybePhylo = phNode ^. (node_hyperdata . hp_data)
p <- liftBase $ viewPhylo2Svg
$ viewPhylo level branc
$ fromMaybe phyloFromQuery maybePhylo
pure (SVG p)
------------------------------------------------------------------------
type PostPhylo = QueryParam "listId" ListId
-- :> ReqBody '[JSON] PhyloQueryBuild
:> (Post '[JSON] NodeId)
postPhylo :: CorpusId -> UserId -> GargServer PostPhylo
postPhylo corpusId userId _lId = do
-- TODO get Reader settings
-- s <- ask
-- let
-- _vrs = Just ("1" :: Text)
-- _sft = Just (Software "Gargantext" "4")
-- _prm = initPhyloParam vrs sft (Just q)
phy <- flowPhylo corpusId -- params
phyloId <- insertNodes [node NodePhylo "Phylo" (HyperdataPhylo Nothing (Just phy)) (Just corpusId) userId]
pure $ NodeId (fromIntegral phyloId)
------------------------------------------------------------------------
-- | DELETE Phylo == delete a node
------------------------------------------------------------------------
------------------------------------------------------------------------
{-
type PutPhylo = (Put '[JSON] Phylo )
--putPhylo :: PhyloId -> Maybe ListId -> PhyloQueryBuild -> Phylo
putPhylo :: PhyloId -> GargServer PutPhylo
putPhylo = undefined
-}
-- | Instances
instance Arbitrary Phylo where arbitrary = elements [phylo]
instance Arbitrary PhyloGroup where arbitrary = elements []
instance Arbitrary PhyloView where arbitrary = elements [phyloView]
instance FromHttpApiData DisplayMode where parseUrlPiece = readTextData
instance FromHttpApiData ExportMode where parseUrlPiece = readTextData
instance FromHttpApiData Filiation where parseUrlPiece = readTextData
instance FromHttpApiData Metric where parseUrlPiece = readTextData
instance FromHttpApiData Order where parseUrlPiece = readTextData
instance FromHttpApiData Sort where parseUrlPiece = readTextData
instance FromHttpApiData Tagger where parseUrlPiece = readTextData
instance FromHttpApiData [Metric] where parseUrlPiece = readTextData
instance FromHttpApiData [Tagger] where parseUrlPiece = readTextData
instance ToParamSchema DisplayMode
instance ToParamSchema ExportMode
instance ToParamSchema Filiation
instance ToParamSchema Tagger
instance ToParamSchema Metric
instance ToParamSchema Order
instance ToParamSchema Sort
instance ToSchema Order
{-|
Module : Gargantext.Core.Viz.Phylo.Tools
Description : Phylomemy Tools to build/manage it
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE ViewPatterns #-}
module Gargantext.Core.Viz.Phylo.Aggregates
where
import Control.Parallel.Strategies
import Gargantext.Prelude hiding (elem)
import Gargantext.Core.Text.Context (TermList)
import Gargantext.Core.Text.Metrics.FrequentItemSet (fisWithSizePolyMap, Size(..))
import Gargantext.Core.Text.Terms.Mono (monoTexts)
import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.Tools
import Debug.Trace (trace)
import Data.List (partition, concat, nub, elem, sort, (++), null, union)
import Data.Map (Map, fromList, fromListWith, adjust, filterWithKey, elems, keys, unionWith, mapWithKey)
import Data.Set (size)
import Data.Text (Text, unwords)
import Data.Vector (Vector)
import qualified Data.Map as Map
import qualified Data.Set as Set
import qualified Data.Vector as Vector
---------------------
-- | Foundations | --
---------------------
-- | Extract all the labels of a termList
termListToNgrams :: TermList -> [Ngrams]
termListToNgrams = map (\(lbl,_) -> unwords lbl)
-------------------
-- | Documents | --
-------------------
-- | To group a list of Documents by fixed periods
groupDocsByPeriod :: (NFData doc, Ord date, Enum date) => (doc -> date) -> [(date,date)] -> [doc] -> Map (date, date) [doc]
groupDocsByPeriod _ _ [] = panic "[ERR][Viz.Phylo.Example.docsToPeriods] Empty [Documents] can not have any periods"
groupDocsByPeriod f pds es =
let periods = map (inPeriode f es) pds
periods' = periods `using` parList rdeepseq
in trace ("----\nGroup docs by periods\n") $ fromList $ zip pds periods'
where
--------------------------------------
inPeriode :: Ord b => (t -> b) -> [t] -> (b, b) -> [t]
inPeriode f' h (start,end) =
fst $ partition (\d -> f' d >= start && f' d <= end) h
--------------------------------------
-- | To parse a list of Documents by filtering on a Vector of Ngrams
parseDocs :: Vector Ngrams -> [(Date,Text)] -> [Document]
parseDocs roots c = map (\(d,t)
-> Document d ( filter (\x -> Vector.elem x roots)
$ monoTexts t)) c
-- | To count the number of documents by year
countDocs :: [(Date,a)] -> Map Date Double
countDocs corpus = fromListWith (+) $ map (\(d,_) -> (d,1)) corpus
-----------------
-- | Periods | --
-----------------
-- | To init a list of Periods framed by a starting Date and an ending Date
initPeriods :: (Eq date, Enum date) => Grain -> Step -> (date, date) -> [(date, date)]
initPeriods g s (start,end) = map (\l -> (head' "initPeriods" l, last' "initPeriods" l))
$ chunkAlong g s [start .. end]
--------------
-- | Cooc | --
--------------
-- | To transform a tuple of group's information into a coocurency Matrix
toCooc :: [([Int],Double)] -> Map (Int, Int) Double
toCooc l = map (/docs)
$ foldl (\mem x -> adjust (+1) x mem) cooc
$ concat
$ map (\x -> listToFullCombi $ fst x) l
where
--------------------------------------
idx :: [Int]
idx = nub $ concat $ map fst l
--------------------------------------
docs :: Double
docs = sum $ map snd l
--------------------------------------
cooc :: Map (Int, Int) (Double)
cooc = fromList $ map (\x -> (x,0)) $ listToFullCombi idx
--------------------------------------
-- | To reduce a coocurency Matrix to some keys
getSubCooc :: [Int] -> Map (Int, Int) Double -> Map (Int, Int) Double
getSubCooc idx cooc = filterWithKey (\k _ -> (elem (fst k) idx)
&& (elem (snd k) idx)) cooc
-- | To get a coocurency Matrix related to a given list of Periods
getCooc :: [PhyloPeriodId] -> Phylo -> Map (Int, Int) Double
getCooc prds p = toCooc $ map (\g -> (getGroupNgrams g,getGroupMeta "support" g)) gs
where
--------------------------------------
-- | Here we need to go back to the level 1 (aka : the Fis level)
gs :: [PhyloGroup]
gs = filter (\g -> elem (getGroupPeriod g) prds ) $ getGroupsWithLevel 1 p
--------------------------------------
-- | To transform a list of index into a cooc matrix
listToCooc :: [Int] -> Map (Int,Int) Double
listToCooc lst = fromList $ map (\combi -> (combi,1)) $ listToFullCombi lst
-- | To build the cooc matrix by years out of the corpus
docsToCooc :: [Document] -> Vector Ngrams -> Map Date (Map (Int,Int) Double)
docsToCooc docs fdt = fromListWith sumCooc
$ map (\(d,l) -> (d, listToCooc l))
$ map (\doc -> (date doc, ngramsToIdx (text doc) fdt)) docs
-------------
-- | Fis | --
-------------
-- | To apply a filter with the possibility of keeping some periods non empty (keep : True|False)
filterFis :: Bool -> Int -> (Int -> [PhyloFis] -> [PhyloFis]) -> Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis]
filterFis keep thr f m = case keep of
False -> map (\l -> f thr l) m
True -> map (\l -> keepFilled (f) thr l) m
-- | To filter Fis with small Support
filterFisBySupport :: Int -> [PhyloFis] -> [PhyloFis]
filterFisBySupport thr l = filter (\fis -> getSupport fis >= thr) l
-- | To filter Fis with small Clique size
filterFisByClique :: Int -> [PhyloFis] -> [PhyloFis]
filterFisByClique thr l = filter (\fis -> (size $ getClique fis) >= thr) l
-- | To find if l' is nested in l
isNested :: Eq a => [a] -> [a] -> Bool
isNested l l'
| null l' = True
| length l' > length l = False
| (union l l') == l = True
| otherwise = False
-- | To filter nested Fis
filterFisByNested :: Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis]
filterFisByNested m =
let fis = map (\l ->
foldl (\mem f -> if (any (\f' -> isNested (Set.toList $ getClique f') (Set.toList $ getClique f)) mem)
then mem
else
let fMax = filter (\f' -> not $ isNested (Set.toList $ getClique f) (Set.toList $ getClique f')) mem
in fMax ++ [f] ) [] l)
$ elems m
fis' = fis `using` parList rdeepseq
in fromList $ zip (keys m) fis'
-- | Choose if we use a set of Fis from a file or if we have to create them
docsToFis :: Map (Date,Date) [Document] -> Phylo -> Map (Date, Date) [PhyloFis]
docsToFis m p = if (null $ getPhyloFis p)
then trace("----\nRebuild the Fis from scratch\n")
$ mapWithKey (\k docs -> let fis = Map.toList $ fisWithSizePolyMap (Segment 1 20) 1 (map text docs)
in map (\f -> PhyloFis (fst f) (snd f) k) fis) m
else trace("----\nUse Fis from an existing file\n")
$ unionWith (++) (fromList $ map (\k -> (k,[])) $ keys m) (getPhyloFis p)
-- | Process some filters on top of a set of Fis
refineFis :: Map (Date, Date) [PhyloFis] -> Bool -> Support -> Int -> Map (Date, Date) [PhyloFis]
refineFis fis k s t = traceFis "----\nFiltered Fis by nested :\n"
$ filterFisByNested
$ traceFis "----\nFiltered Fis by clique size :\n"
$ filterFis k t (filterFisByClique)
$ traceFis "----\nFiltered Fis by support :\n"
$ filterFis k s (filterFisBySupport)
$ traceFis "----\nUnfiltered Fis :\n" fis
-----------------
-- | Tracers | --
-----------------
traceFis :: [Char] -> Map (Date, Date) [PhyloFis] -> Map (Date, Date) [PhyloFis]
traceFis lbl m = trace (lbl <> "count : " <> show (sum $ map length $ elems m) <> " Fis\n"
<> "support : " <> show (countSup 1 supps) <> " (>1) "
<> show (countSup 2 supps) <> " (>2) "
<> show (countSup 3 supps) <> " (>3) "
<> show (countSup 4 supps) <> " (>4) "
<> show (countSup 5 supps) <> " (>5) "
<> show (countSup 6 supps) <> " (>6)\n"
<> "clique size : " <> show (countSup 1 ngrms) <> " (>1) "
<> show (countSup 2 ngrms) <> " (>2) "
<> show (countSup 3 ngrms) <> " (>3) "
<> show (countSup 4 ngrms) <> " (>4) "
<> show (countSup 5 ngrms) <> " (>5) "
<> show (countSup 6 ngrms) <> " (>6)\n"
) m
where
--------------------------------------
countSup :: Double -> [Double] -> Int
countSup s l = length $ filter (>s) l
--------------------------------------
supps :: [Double]
supps = sort $ map (fromIntegral . _phyloFis_support) $ concat $ elems m
--------------------------------------
ngrms :: [Double]
ngrms = sort $ map (\f -> fromIntegral $ size $ _phyloFis_clique f) $ concat $ elems m
--------------------------------------
{-|
Module : Gargantext.Core.Viz.Phylo.Tools
Description : Phylomemy Tools to build/manage it
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module Gargantext.Core.Viz.Phylo.BranchMaker
where
import Control.Parallel.Strategies
import Control.Lens hiding (both, Level)
import Data.List (concat,nub,(++),sortOn,sort,null,intersect,union,delete)
import Data.Map (Map,(!), fromListWith, elems)
import Gargantext.Prelude
import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.Cluster
import Gargantext.Core.Viz.Phylo.Aggregates
import Gargantext.Core.Viz.Phylo.Metrics
import Gargantext.Core.Viz.Phylo.Tools
import Gargantext.Core.Viz.Phylo.LinkMaker
import qualified Data.Map as Map
-- import Debug.Trace (trace)
---------------------------
-- | Readability links | --
---------------------------
getGroupsPeriods :: [PhyloGroup] -> [(Date,Date)]
getGroupsPeriods gs = sortOn fst $ nub $ map getGroupPeriod gs
getFramedPeriod :: [PhyloGroup] -> (Date,Date)
getFramedPeriod gs = (fst $ (head' "getFramedPeriod" $ getGroupsPeriods gs), snd $ (last' "getFramedPeriod" $ getGroupsPeriods gs))
getGroupsNgrams :: [PhyloGroup] -> [Int]
getGroupsNgrams gs = (sort . nub . concat) $ map getGroupNgrams gs
areDistant :: (Date,Date) -> (Date,Date) -> Int -> Bool
areDistant prd prd' thr = (((fst prd') - (snd prd)) > thr) || (((fst prd) - (snd prd')) > thr)
-- | Process a Jaccard on top of two set of Branch Peaks
areTwinPeaks :: Double -> [Int] -> [Int] -> Bool
areTwinPeaks thr ns ns' = ( ((fromIntegral . length) $ intersect ns ns')
/ ((fromIntegral . length) $ union ns ns')) >= thr
-- | Get the framing period of a branch ([[PhyloGroup]])
getBranchPeriod :: [PhyloGroup] -> (Date,Date)
getBranchPeriod gs =
let dates = sort $ foldl (\mem g -> mem ++ [fst $ getGroupPeriod g, snd $ getGroupPeriod g]) [] gs
in (head' "getBranchPeriod" dates, last' "getBranchPeriod" dates)
-- | Get the Nth most coocurent Ngrams in a list of Groups
getGroupsPeaks :: [PhyloGroup] -> Int -> Phylo -> [Int]
getGroupsPeaks gs nth p = getNthMostOcc nth
$ getSubCooc (getGroupsNgrams gs)
$ getCooc (getGroupsPeriods gs) p
-- | Reduce a list of branches ([[Phylogroup]]) into possible candidates for rebranching
filterSimBranches :: [PhyloGroup] -> Phylo -> [[PhyloGroup]] -> [[PhyloGroup]]
filterSimBranches gs p branches = filter (\gs' -> (areTwinPeaks (getPhyloReBranchThr p)
(getGroupsPeaks gs (getPhyloReBranchNth p) p)
(getGroupsPeaks gs' (getPhyloReBranchNth p) p))
&& ((not . null) $ intersect (map getGroupNgrams gs') (map getGroupNgrams gs))
&& (areDistant (getBranchPeriod gs) (getBranchPeriod gs') (getPhyloMatchingFrame p))
) branches
-- | Try to connect a focused branch to other candidate branches by finding the best pointers
reBranch :: Phylo -> [PhyloGroup] -> [[PhyloGroup]] -> [(PhyloGroupId,Pointer)]
reBranch p branch candidates =
let newLinks = map (\branch' ->
let pointers = map (\g ->
-- define pairs of candidates groups
let pairs = listToPairs
$ filter (\g' -> (not . null) $ intersect (getGroupNgrams g') (getGroupNgrams g)) branch'
-- process the matching between the pairs and the current group
in foldl' (\mem (g2,g3) -> let s = 0.1 + matchWithPairs g (g2,g3) p
in if (g2 == g3)
then mem ++ [(getGroupId g,(getGroupId g2,s))]
else mem ++ [(getGroupId g,(getGroupId g2,s)),(getGroupId g,(getGroupId g3,s))]) [] pairs
) branch
pointers' = pointers `using` parList rdeepseq
-- keep the best pointer between the focused branch and the current candidates
in head' "reBranch" $ reverse $ sortOn (snd . snd)
$ filter (\(_,(_,s)) -> filterProximity s $ getPhyloProximity p) $ concat pointers'
) candidates
newLinks' = newLinks `using` parList rdeepseq
in newLinks'
reLinkPhyloBranches :: Level -> Phylo -> Phylo
reLinkPhyloBranches lvl p =
let pointers = Map.fromList $ map (\(_id,(_id',_s)) -> (_id,[(_id',100)])) $ fst
$ foldl' (\(pts,branches') gs -> (pts ++ (reBranch p gs (filterSimBranches gs p branches')), delete gs branches'))
([],branches) branches
in setPhyloBranches lvl $ updateGroups Descendant lvl pointers p
where
branches :: [[PhyloGroup]]
branches = elems
$ fromListWith (++)
$ foldl' (\mem g -> case getGroupBranchId g of
Nothing -> mem
Just i -> mem ++ [(i,[g])] )
[] $ getGroupsWithLevel lvl p
------------------
-- | Branches | --
------------------
-- | To transform a PhyloGraph into a list of PhyloBranches by using the relatedComp clustering
graphToBranches :: [PhyloGroup] -> Map PhyloGroupId Int
graphToBranches groups = Map.fromList
$ concat
$ map (\(idx,gIds) -> map (\id -> (id,idx)) gIds)
$ zip [1..]
$ relatedComp
$ map (\g -> [getGroupId g] ++ (getGroupPeriodParentsId g) ++ (getGroupPeriodChildsId g)) groups
-- | To set all the PhyloBranches for a given Level in a Phylo
setPhyloBranches :: Level -> Phylo -> Phylo
setPhyloBranches lvl p = alterGroupWithLevel (\g ->
let bIdx = branches ! (getGroupId g)
in over (phylo_groupBranchId) (\_ -> Just (lvl,bIdx)) g) lvl p
where
--------------------------------------
branches :: Map PhyloGroupId Int
branches = graphToBranches (getGroupsWithLevel lvl p)
--------------------------------------
-- trace' bs = trace bs
{-|
Module : Gargantext.Core.Viz.Phylo.Tools
Description : Phylomemy Tools to build/manage it
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE TemplateHaskell #-}
module Gargantext.Core.Viz.Phylo.Cluster
where
import Control.Parallel.Strategies
import Data.Graph.Clustering.Louvain.CplusPlus
import Data.Graph.Clustering.Louvain.Utils (LouvainNode(..))
import Data.List (null,concat,sort,intersect,(++), elemIndex, groupBy, nub, union, (\\), (!!))
import Data.Map (Map, fromList, mapKeys)
import Gargantext.Prelude
import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.Tools
import Gargantext.Core.Viz.Phylo.Metrics
import Gargantext.Core.Viz.Phylo.LinkMaker
import qualified Data.Map as Map
import qualified Data.Vector.Storable as VS
import Debug.Trace (trace)
import Numeric.Statistics (percentile)
--------------
-- | Algo | --
--------------
relatedComp :: Eq a => [[a]] -> [[a]]
relatedComp graphs = foldl' (\mem groups ->
if (null mem)
then mem ++ [groups]
else
let related = filter (\groups' -> (not . null) $ intersect groups groups') mem
in if (null related)
then mem ++ [groups]
else (mem \\ related) ++ [union groups (nub $ concat related)] ) [] graphs
louvain :: ([GroupNode],[GroupEdge]) -> IO [[PhyloGroup]]
louvain (nodes,edges) = map (\community -> map (\node -> nodes !! (l_node_id node)) community)
<$> groupBy (\a b -> (l_community_id a) == (l_community_id b))
<$> (cLouvain "0.0001" $ mapKeys (\(x,y) -> (idx x, idx y)) $ fromList edges)
where
--------------------------------------
idx :: PhyloGroup -> Int
idx e = case elemIndex e nodes of
Nothing -> panic "[ERR][Gargantext.Core.Viz.Phylo.Metrics.Clustering] a node is missing"
Just i -> i
--------------------------------------
-----------------------
-- | Cluster Maker | --
-----------------------
-- | Optimisation to filter only relevant candidates
getCandidates :: [PhyloGroup] -> [(PhyloGroup,PhyloGroup)]
getCandidates gs = filter (\(g,g') -> (not . null) $ intersect (getGroupNgrams g) (getGroupNgrams g'))
$ filter (\(g,g') -> g /= g')
$ listToDirectedCombi gs
-- | To transform a Graph into Clusters
graphToClusters :: Cluster -> GroupGraph -> [PhyloCluster]
graphToClusters clust (nodes,edges) = case clust of
Louvain (LouvainParams _) -> undefined
RelatedComponents (RCParams _) -> relatedComp $ ((map (\((g,g'),_) -> [g,g']) edges) ++ (map (\g -> [g]) nodes))
_ -> panic "[ERR][Viz.Phylo.Aggregates.Cluster.graphToClusters] not implemented"
-- | To transform a list of PhyloGroups into a Graph of Proximity
groupsToGraph :: Double -> Proximity -> [PhyloGroup] -> ([GroupNode],[GroupEdge])
groupsToGraph nbDocs prox gs = case prox of
WeightedLogJaccard (WLJParams _ sens) -> (gs, let candidates = map (\(x,y) -> ((x,y), weightedLogJaccard sens nbDocs (getGroupCooc x) (getGroupCooc y) (getGroupNgrams x) (getGroupNgrams y)))
$ getCandidates gs
candidates' = candidates `using` parList rdeepseq
in candidates' )
Hamming (HammingParams _) -> (gs, map (\(x,y) -> ((x,y), hamming (getGroupCooc x) (getGroupCooc y))) $ getCandidates gs)
_ -> undefined
-- | To filter a Graph of Proximity using a given threshold
filterGraph :: Proximity -> ([GroupNode],[GroupEdge]) -> ([GroupNode],[GroupEdge])
filterGraph prox (ns,es) = case prox of
WeightedLogJaccard (WLJParams thr _) -> (ns, filter (\(_,v) -> v >= thr) es)
Hamming (HammingParams thr) -> (ns, filter (\(_,v) -> v <= thr) es)
_ -> undefined
-- | To clusterise a Phylo
phyloToClusters :: Level -> Cluster -> Phylo -> Map (Date,Date) [PhyloCluster]
phyloToClusters lvl clus p = Map.fromList
$ zip periods
$ map (\g -> if null (fst g)
then []
else graphToClusters clus g) graphs'
where
--------------------------------------
graphs' :: [([GroupNode],[GroupEdge])]
graphs' = traceGraphFiltered lvl
$ map (\g -> filterGraph prox g) graphs
--------------------------------------
graphs :: [([GroupNode],[GroupEdge])]
graphs = traceGraph lvl (getThreshold prox)
$ let gs = (trace $ "PROX: " <> show prox) $ map (\prd -> groupsToGraph (periodsToNbDocs [prd] p) prox (getGroupsWithFilters lvl prd p)) periods
gs' = gs `using` parList rdeepseq
in gs'
--------------------------------------
prox :: Proximity
prox = getProximity clus
--------------------------------------
periods :: [PhyloPeriodId]
periods = getPhyloPeriods p
--------------------------------------
----------------
-- | Tracer | --
----------------
traceGraph :: Level -> Double -> [([GroupNode],[GroupEdge])] -> [([GroupNode],[GroupEdge])]
traceGraph lvl thr g = trace ( "----\nUnfiltered clustering in Phylo" <> show (lvl) <> " :\n"
<> "count : " <> show (length lst) <> " potential edges (" <> show (length $ filter (>= thr) lst) <> " >= " <> show (thr) <> ")\n"
<> "similarity : " <> show (percentile 25 (VS.fromList lst)) <> " (25%) "
<> show (percentile 50 (VS.fromList lst)) <> " (50%) "
<> show (percentile 75 (VS.fromList lst)) <> " (75%) "
<> show (percentile 90 (VS.fromList lst)) <> " (90%)\n") g
where
lst = sort $ map snd $ concat $ map snd g
traceGraphFiltered :: Level -> [([GroupNode],[GroupEdge])] -> [([GroupNode],[GroupEdge])]
traceGraphFiltered lvl g = trace ( "----\nClustering in Phylo" <> show (lvl) <> " :\n"
<> "count : " <> show (length lst) <> " edges\n"
<> "similarity : " <> show (percentile 25 (VS.fromList lst)) <> " (25%) "
<> show (percentile 50 (VS.fromList lst)) <> " (50%) "
<> show (percentile 75 (VS.fromList lst)) <> " (75%) "
<> show (percentile 90 (VS.fromList lst)) <> " (90%)\n") g
where
lst = sort $ map snd $ concat $ map snd g
{-|
Module : Gargantext.Core.Viz.Phylo.Example
Description : Phylomemy example based on history of Cleopatre.
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-- | Cesar et Cleôpatre
-- Exemple de phylomemie
-- French without accents
TODO:
- split the functions : RAW -> Document -> Ngrams
-- reverse history: antechronologique
-- metrics support
-}
module Gargantext.Core.Viz.Phylo.Example where
import Data.GraphViz.Types.Generalised (DotGraph)
import Control.Lens hiding (both, Level)
import Data.Text (Text, toLower)
import Data.List ((++))
import Data.Map (Map,empty)
import Data.Vector (Vector)
import Gargantext.Prelude
import Gargantext.Core.Text.Context (TermList)
import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.Cluster
import Gargantext.Core.Viz.Phylo.Aggregates
import Gargantext.Core.Viz.Phylo.BranchMaker
import Gargantext.Core.Viz.Phylo.LevelMaker
import Gargantext.Core.Viz.Phylo.LinkMaker
import Gargantext.Core.Viz.Phylo.Tools
import Gargantext.Core.Viz.Phylo.View.ViewMaker
import Gargantext.Core.Viz.Phylo.View.Export
import Gargantext.Core.Viz.Phylo.Main (writePhylo)
import GHC.IO (FilePath)
import qualified Data.List as List
------------------------------------------------------
-- | STEP 12 | -- Create a PhyloView from a user Query
------------------------------------------------------
export :: IO ()
export = dotToFile "/home/qlobbe/data/phylo/output/cesar_cleopatre.dot" phyloDot
phyloDot :: DotGraph DotId
phyloDot = viewToDot phyloView
phyloExport :: FilePath -> IO FilePath
phyloExport fp = writePhylo fp phyloView
phyloView :: PhyloView
phyloView = toPhyloView (queryParser' queryViewEx) phyloFromQuery
-- | To do : create an other request handler and an other query parser
queryParser' :: [Char] -> PhyloQueryView
queryParser' _q = phyloQueryView
queryViewEx :: [Char]
queryViewEx = "level=3"
++ "&childs=false"
++ "&filter=LonelyBranchFilter"
++ "&metric=BranchAge"
++ "&tagger=BranchPeakFreq"
++ "&tagger=GroupLabelCooc"
phyloQueryView :: PhyloQueryView
phyloQueryView = PhyloQueryView 1 Merge False 1 [BranchAge,BranchBirth,BranchGroups] [] [BranchPeakInc,GroupLabelIncDyn] (Just (ByBranchBirth,Asc)) Json Flat True
--------------------------------------------------
-- | STEP 11 | -- Create a Phylo from a user Query
--------------------------------------------------
phyloFromQuery :: Phylo
phyloFromQuery = toPhylo phyloQueryBuild docs termList empty
-- | To do : create a request handler and a query parser
queryParser :: [Char] -> PhyloQueryBuild
queryParser _q = phyloQueryBuild
queryEx :: [Char]
queryEx = "title=Cesar et Cleôpatre"
++ "&desc=An example of Phylomemy (french without accent)"
++ "grain=5&steps=3"
++ "cluster=FrequentItemSet"
++ "interTemporalMatching=WeightedLogJaccard"
++ "nthLevel=2"
++ "nthCluster=RelatedComponents"
++ "nthProximity=Filiation"
phyloQueryBuild :: PhyloQueryBuild
phyloQueryBuild = PhyloQueryBuild "Cesar et Cleôpatre" "An example of Phylomemy (french without accent)"
3 1 defaultFis [] [] (WeightedLogJaccard $ WLJParams 0.9 10) 5 0.8 0.5 4 1 (RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams 0.3 0)
----------------------------------------------------------------------------------------------------------------------------
-- | STEP 10 | -- Incrementaly cluster the PhyloGroups n times, link them through the Periods and build level n of the Phylo
----------------------------------------------------------------------------------------------------------------------------
phylo6 :: Phylo
phylo6 = toNthLevel 6 defaultWeightedLogJaccard (RelatedComponents (initRelatedComponents (Just defaultWeightedLogJaccard))) phylo3
phylo3 :: Phylo
phylo3 = setPhyloBranches 3
$ interTempoMatching Descendant 3 defaultWeightedLogJaccard
$ interTempoMatching Ascendant 3 defaultWeightedLogJaccard
$ setLevelLinks (2,3)
$ addPhyloLevel 3
(phyloToClusters 2 (RelatedComponents (initRelatedComponents (Just defaultWeightedLogJaccard))) phyloBranch2)
phyloBranch2
--------------------------------
-- | STEP 9 | -- Cluster the Fis
--------------------------------
phyloBranch2 :: Phylo
phyloBranch2 = setPhyloBranches 2 phylo2_c
phylo2_c :: Phylo
phylo2_c = interTempoMatching Descendant 2 defaultWeightedLogJaccard phylo2_p
phylo2_p :: Phylo
phylo2_p = interTempoMatching Ascendant 2 defaultWeightedLogJaccard phylo2_1_2
phylo2_1_2 :: Phylo
phylo2_1_2 = setLevelLinks (1,2) phylo2
-- | phylo2 allready contains the LevelChilds links from 2 to 1
phylo2 :: Phylo
phylo2 = addPhyloLevel 2 phyloCluster phyloBranch1
phyloCluster :: Map (Date,Date) [PhyloCluster]
phyloCluster = phyloToClusters 2 (RelatedComponents $ RCParams $ WeightedLogJaccard $ WLJParams 0.05 10) phyloBranch1
----------------------------------
-- | STEP 8 | -- Find the Branches
----------------------------------
phyloBranch1 :: Phylo
phyloBranch1 = setPhyloBranches 1 phylo1_c
--------------------------------------------------------------------
-- | STEP 7 | -- Link the PhyloGroups of level 1 through the Periods
--------------------------------------------------------------------
phylo1_c :: Phylo
phylo1_c = interTempoMatching Descendant 1 defaultWeightedLogJaccard phylo1_p
phylo1_p :: Phylo
phylo1_p = interTempoMatching Ascendant 1 defaultWeightedLogJaccard phylo1_0_1
-----------------------------------------------
-- | STEP 6 | -- Build the level 1 of the Phylo
-----------------------------------------------
phylo1_0_1 :: Phylo
phylo1_0_1 = setLevelLinks (0,1) phylo1
-- phylo1_1_0 :: Phylo
-- phylo1_1_0 = setLevelLinks (1,0) phylo1
phylo1 :: Phylo
phylo1 = addPhyloLevel (1) phyloFis phylo'
-------------------------------------------------------------------
-- | STEP 5 | -- Create lists of Frequent Items Set and filter them
-------------------------------------------------------------------
phylo' :: Phylo
phylo' = phylo & phylo_fis .~ phyloFis
phyloFis :: Map (Date, Date) [PhyloFis]
phyloFis = refineFis (docsToFis phyloDocs phylo) True 1 1
----------------------------------------
-- | STEP 2 | -- Init a Phylo of level 0
----------------------------------------
phylo :: Phylo
phylo = addPhyloLevel 0 phyloDocs phyloBase
phyloDocs :: Map (Date, Date) [Document]
phyloDocs = groupDocsByPeriod date (getPhyloPeriods phyloBase) docs
------------------------------------------------------------------------
-- | STEP 1 | -- Init the Base of the Phylo from Periods and Foundations
------------------------------------------------------------------------
phyloBase :: Phylo
phyloBase = toPhyloBase phyloQueryBuild phyloParam docs termList empty
phyloParam :: PhyloParam
phyloParam = (initPhyloParam (Just defaultPhyloVersion) (Just defaultSoftware) (Just phyloQueryBuild))
docs :: [Document]
docs = parseDocs foundationsRoots corpus
foundationsRoots :: Vector Ngrams
foundationsRoots = initFoundationsRoots (termListToNgrams termList)
--------------------------------------------
-- | STEP 0 | -- Let's start with an example
--------------------------------------------
termList :: TermList
termList = map (\a -> ([toLower a],[])) actants
actants :: [Ngrams]
actants = [ "Cleopatre" , "Ptolemee", "Ptolemee-XIII", "Ptolemee-XIV"
, "Marc-Antoine", "Cesar" , "Antoine" , "Octave" , "Rome"
, "Alexandrie" , "Auguste" , "Pompee" , "Cassius" , "Brutus"]
corpus :: [(Date, Text)]
corpus = List.sortOn fst [ (-51,"Cleopatre règne sur l’egypte entre 51 et 30 av. J.-C. avec ses frères-epoux Ptolemee-XIII et Ptolemee-XIV, puis aux côtes du general romain Marc-Antoine. Elle est celèbre pour avoir ete la compagne de Jules Cesar puis d'Antoine, avec lesquels elle a eu plusieurs enfants. Partie prenante dans la guerre civile opposant Antoine à Octave, elle est vaincue à la bataille d'Actium en 31 av. J.-C. Sa defaite va permettre aux Romains de mener à bien la conquête de l’egypte, evenement qui marquera la fin de l'epoque hellenistique."), (-40,"Il existe relativement peu d'informations sur son sejour à Rome, au lendemain de l'assassinat de Cesar, ou sur la periode passee à Alexandrie durant l'absence d'Antoine, entre -40 et -37."), (-48,"L'historiographie antique lui est globalement defavorable car inspiree par son vainqueur, l'empereur Auguste, et par son entourage, dont l'interêt est de la noircir, afin d'en faire l'adversaire malfaisant de Rome et le mauvais genie d'Antoine. On observe par ailleurs que Cesar ne fait aucune mention de sa liaison avec elle dans les Commentaires sur la Guerre civile"), (-69,"Cleopatre est nee au cours de l'hiver -69/-686 probablement à Alexandrie."), (-48,"Pompee a en effet ete le protecteur de Ptolemee XII, le père de Cleopatre et de Ptolemee-XIII dont il se considère comme le tuteur."), (-48,"Ptolemee-XIII et Cleopatre auraient d'ailleurs aide Pompee par l'envoi d'une flotte de soixante navires."), (-48,"Mais le jeune roi Ptolemee-XIII et ses conseillers jugent sa cause perdue et pensent s'attirer les bonnes graces du vainqueur en le faisant assassiner à peine a-t-il pose le pied sur le sol egyptien, près de Peluse, le 30 juillet 48 av. J.-C., sous les yeux de son entourage."), (-48,"Cesar fait enterrer la tête de Pompee dans le bosquet de Nemesis en bordure du mur est de l'enceinte d'Alexandrie. Pour autant la mort de Pompee est une aubaine pour Cesar qui tente par ailleurs de profiter des querelles dynastiques pour annexer l’egypte."), (-48,"Il est difficile de se prononcer clairement sur les raisons qui ont pousse Cesar à s'attarder à Alexandrie. Il y a des raisons politiques, mais aussi des raisons plus sentimentales (Cleopatre ?). Il tente d'abord d'obtenir le remboursement de dettes que Ptolemee XII"), (-46,"Les deux souverains sont convoques par Cesar au palais royal d'Alexandrie. Ptolemee-XIII s'y rend après diverses tergiversations ainsi que Cleopatre."), (-47,"A Rome, Cleopatre epouse alors un autre de ses frères cadets, à Alexandrie, Ptolemee-XIV, sur l'injonction de Jules Cesar"), (-46,"Cesar a-t-il comme objectif de montrer ce qu'il en coûte de se revolter contre Rome en faisant figurer dans son triomphe la sœur de Cleopatre et de Ptolemee-XIV, Arsinoe, qui s'est fait reconnaître reine par les troupes de Ptolemee-XIII ?"), (-44,"Au debut de l'annee -44, Cesar est assassine par Brutus. Profitant de la situation confuse qui s'ensuit, Cleopatre quitte alors Rome à la mi-avril, faisant escale en Grèce. Elle parvient à Alexandrie en juillet -44."), (-44,"La guerre que se livrent les assassins de Cesar, Cassius et Brutus et ses heritiers, Octave et Marc-Antoine, oblige Cleopatre à des contorsions diplomatiques."), (-41,"Nous ignorons depuis quand Cleopatre, agee de 29 ans en -41, et Marc-Antoine, qui a une quarantaine d'annees, se connaissent. Marc-Antoine est l'un des officiers qui ont participe au retablissement de Ptolemee XII. Il est plus vraisemblable qu'ils se soient frequentes lors du sejour à Rome de Cleopatre."), (-42,"Brutus tient la Grèce tandis que Cassius s'installe en Syrie. Le gouverneur de Cleopatre à Chypre, Serapion, vient en aide à Cassius."), (-42,"Cassius aurait envisage de s'emparer d'Alexandrie quand le 'debarquement' en Grèce d'Antoine et d'Octave l'oblige à renoncer à ses projets")]
{-|
Module : Gargantext.Core.Viz.Phylo.Tools
Description : Phylomemy Tools to build/manage it
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE TypeSynonymInstances #-}
module Gargantext.Core.Viz.Phylo.LevelMaker
where
import Control.Parallel.Strategies
import Control.Lens hiding (both, Level)
import Data.List ((++), sort, concat, nub, last, null)
import Data.Map (Map, (!), empty, singleton, size)
import Data.Text (Text)
import Data.Tuple.Extra
import Data.Vector (Vector)
import Gargantext.Prelude
import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.Metrics
import Gargantext.Core.Viz.Phylo.Aggregates
import Gargantext.Core.Viz.Phylo.Cluster
import Gargantext.Core.Viz.Phylo.BranchMaker
import Gargantext.Core.Viz.Phylo.LinkMaker
import Gargantext.Core.Viz.Phylo.Tools
import Gargantext.Core.Text.Context (TermList)
import qualified Data.Vector.Storable as VS
import qualified Data.Set as Set
import qualified Data.Vector as Vector
import Debug.Trace (trace)
import Numeric.Statistics (percentile)
-------------------------
-- | PhyloLevelMaker | --
-------------------------
-- | A typeClass for polymorphic PhyloLevel functions
class PhyloLevelMaker aggregate
where
-- | To add a new Level of PhyloGroups to a Phylo based on a list of Aggregates
addPhyloLevel :: Level -> Map (Date,Date) [aggregate] -> Phylo -> Phylo
-- | To create a list of PhyloGroups based on a list of aggregates a
toPhyloGroups :: Level -> (Date,Date) -> [aggregate] -> Map (Date,Date) [aggregate] -> Phylo -> [PhyloGroup]
instance PhyloLevelMaker PhyloCluster
where
--------------------------------------
-- | Level -> Map (Date,Date) [Cluster] -> Phylo -> Phylo
addPhyloLevel lvl m p
| lvl > 1 = addPhyloLevel' lvl m p
| otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Clusters at level < 2")
--------------------------------------
-- | Level -> (Date,Date) -> [Cluster] -> Map (Date,Date) [Cluster] -> Phylo -> [PhyloGroup]
toPhyloGroups lvl (d,d') l m p =
let clusters = map (\(idx,cluster) -> clusterToGroup (d,d') lvl idx "" cluster m p) $ zip [1..] l
clusters' = clusters `using` parList rdeepseq
in clusters'
--------------------------------------
instance PhyloLevelMaker PhyloFis
where
--------------------------------------
-- | Level -> Map (Date,Date) [Fis] -> Phylo -> Phylo
addPhyloLevel lvl m p
| lvl == 1 = addPhyloLevel' lvl m p
| otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Fis at level <> 1")
--------------------------------------
-- | Level -> (Date,Date) -> [Fis] -> Map (Date,Date) [Fis] -> Phylo -> [PhyloGroup]
toPhyloGroups lvl (d,d') l _ p =
let groups = map (\(idx,fis) -> cliqueToGroup (d,d') lvl idx "" fis (getPhyloCooc p) (getFoundationsRoots p)) $ zip [1..] l
groups' = groups `using` parList rdeepseq
in groups'
--------------------------------------
instance PhyloLevelMaker Document
where
--------------------------------------
-- | Level -> Map (Date,Date) [Document] -> Phylo -> Phylo
addPhyloLevel lvl m p
| lvl == 0 = addPhyloLevel' lvl m p
| otherwise = panic ("[ERR][Viz.Phylo.Example.addPhyloLevel] No process declared for adding Documents at level <> 0")
--------------------------------------
-- | Level -> (Date,Date) -> [Document] -> Map (Date,Date) [Fis] -> Phylo -> [PhyloGroup]
toPhyloGroups lvl (d,d') l _m p = map (\ngram -> ngramsToGroup (d,d') lvl (getIdxInRoots ngram p) ngram [ngram] p)
$ (nub . concat)
$ map text l
--------------------------------------
-- | To traverse a Phylo and add a new PhyloLevel linked to a new list of PhyloGroups
addPhyloLevel' :: PhyloLevelMaker a => Level -> Map (Date, Date) [a] -> Phylo -> Phylo
addPhyloLevel' lvl m p = alterPhyloPeriods
(\period -> let pId = _phylo_periodId period
in over phylo_periodLevels
(\phyloLevels ->
let groups = toPhyloGroups lvl pId (m ! pId) m p
in trace (show (length groups)
<> " groups for "
<> show (pId) )
$ phyloLevels ++ [PhyloLevel (pId, lvl) groups]
) period
) p
----------------------
-- | toPhyloGroup | --
----------------------
-- | To transform a Clique into a PhyloGroup
cliqueToGroup :: PhyloPeriodId
-> Level
-> Int
-> Text
-> PhyloFis
-> Map Date (Map (Int,Int) Double)
-> Vector Ngrams
-> PhyloGroup
cliqueToGroup prd lvl idx lbl fis cooc' root = PhyloGroup ((prd, lvl), idx) lbl ngrams
(getNgramsMeta cooc ngrams)
-- empty
(singleton "support" (fromIntegral $ getSupport fis))
Nothing
cooc
[] [] [] childs
where
--------------------------------------
cooc :: Map (Int, Int) Double
cooc = getMiniCooc (listToFullCombi ngrams) (periodsToYears [prd]) cooc'
--------------------------------------
ngrams :: [Int]
ngrams = sort $ map (\x -> getIdxInRoots' x root)
$ Set.toList
$ getClique fis
--------------------------------------
childs :: [Pointer]
childs = map (\n -> (((prd, lvl - 1), n),1)) ngrams
--------------------------------------
-- | To transform a Cluster into a Phylogroup
clusterToGroup :: PhyloPeriodId
-> Level
-> Int
-> Text
-> PhyloCluster
-> Map (Date,Date) [PhyloCluster]
-> Phylo
-> PhyloGroup
clusterToGroup prd lvl idx lbl groups _m p =
PhyloGroup ((prd, lvl), idx) lbl ngrams
(getNgramsMeta cooc ngrams)
-- empty
empty
Nothing
cooc
ascLink desLink [] childs
where
--------------------------------------
cooc :: Map (Int, Int) Double
cooc = getMiniCooc (listToFullCombi ngrams)
(periodsToYears [prd] )
(getPhyloCooc p )
--------------------------------------
childs :: [Pointer]
childs = map (\g -> (getGroupId g, 1)) groups
ascLink = concat $ map getGroupPeriodParents groups
desLink = concat $ map getGroupPeriodChilds groups
--------------------------------------
ngrams :: [Int]
ngrams = (sort . nub . concat) $ map getGroupNgrams groups
--------------------------------------
-- | To transform a list of Ngrams into a PhyloGroup
ngramsToGroup :: PhyloPeriodId -> Level -> Int -> Text -> [Ngrams] -> Phylo -> PhyloGroup
ngramsToGroup prd lvl idx lbl ngrams p = PhyloGroup ((prd, lvl), idx) lbl (sort $ map (\x -> getIdxInRoots x p) ngrams) empty empty Nothing
(getMiniCooc (listToFullCombi $ sort $ map (\x -> getIdxInRoots x p) ngrams) (periodsToYears [prd]) (getPhyloCooc p))
[] [] [] []
----------------------
-- | toPhyloLevel | --
----------------------
-- | To reconstruct the Phylo from a set of Document to a given Level
toPhylo :: PhyloQueryBuild -> [Document] -> TermList -> Map (Date,Date) [PhyloFis] -> Phylo
toPhylo q c termList fis = toNthLevel (getNthLevel q) (getInterTemporalMatching q) (getNthCluster q) phylo1
where
--------------------------------------
phylo1 :: Phylo
phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) phyloDocs phyloBase
-- phylo1 = toPhylo1 (getContextualUnit q) (getInterTemporalMatching q) phyloDocs phylo
--------------------------------------
-- phylo0 :: Phylo
-- phylo0 = tracePhyloN 0
-- $ addPhyloLevel 0 phyloDocs phyloBase
--------------------------------------
phyloDocs :: Map (Date, Date) [Document]
phyloDocs = groupDocsByPeriod date (getPhyloPeriods phyloBase) c
--------------------------------------
phyloBase :: Phylo
phyloBase = tracePhyloBase
$ toPhyloBase q init c termList fis
where
init = initPhyloParam (Just defaultPhyloVersion)
(Just defaultSoftware )
(Just q )
---------------------------------------
-- | To incrementally add new Levels to a Phylo by making all the linking and aggregation tasks
toNthLevel :: Level -> Proximity -> Cluster -> Phylo -> Phylo
toNthLevel lvlMax prox clus p
| lvl >= lvlMax = p
| otherwise = toNthLevel lvlMax prox clus
$ traceBranches (lvl + 1)
$ setPhyloBranches (lvl + 1)
-- \$ transposePeriodLinks (lvl + 1)
$ traceTranspose (lvl + 1) Descendant
$ transposeLinks (lvl + 1) Descendant
$ traceTranspose (lvl + 1) Ascendant
$ transposeLinks (lvl + 1) Ascendant
$ tracePhyloN (lvl + 1)
$ setLevelLinks (lvl, lvl + 1)
$ addPhyloLevel (lvl + 1) (clusters) p
where
--------------------------------------
clusters :: Map (Date,Date) [PhyloCluster]
clusters = phyloToClusters lvl clus p
--------------------------------------
lvl :: Level
lvl = getLastLevel p
--------------------------------------
-- | To reconstruct the Level 1 of a Phylo based on a Clustering Method
toPhylo1 :: Cluster -> Proximity -> Map (Date, Date) [Document] -> Phylo -> Phylo
toPhylo1 clus prox d p = case clus of
Fis (FisParams k s t) -> traceBranches 1
-- \$ reLinkPhyloBranches 1
-- \$ traceBranches 1
$ setPhyloBranches 1
$ traceTempoMatching Descendant 1
$ interTempoMatching Descendant 1 prox
$ traceTempoMatching Ascendant 1
$ interTempoMatching Ascendant 1 prox
$ tracePhyloN 1
-- \$ setLevelLinks (0,1)
$ addPhyloLevel 1 (getPhyloFis phyloFis)
$ trace (show (size $ getPhyloFis phyloFis) <> " Fis created") $ phyloFis
where
--------------------------------------
phyloFis :: Phylo
phyloFis = if (null $ getPhyloFis p)
then p & phylo_fis .~ refineFis (docsToFis d p) k s t
else p & phylo_fis .~ docsToFis d p
--------------------------------------
_ -> panic "[ERR][Viz.Phylo.LevelMaker.toPhylo1] fst clustering not recognized"
-- | To create the base of the Phylo (foundations, periods, cooc, etc)
toPhyloBase :: PhyloQueryBuild -> PhyloParam -> [Document] -> TermList -> Map (Date,Date) [PhyloFis] -> Phylo
toPhyloBase q p c termList fis = initPhyloBase periods foundations nbDocs cooc fis p
where
--------------------------------------
cooc :: Map Date (Map (Int,Int) Double)
cooc = docsToCooc c (foundations ^. phylo_foundationsRoots)
--------------------------------------
nbDocs :: Map Date Double
nbDocs = countDocs $ map (\doc -> (date doc, text doc)) c
--------------------------------------
foundations :: PhyloFoundations
foundations = PhyloFoundations (initFoundationsRoots (termListToNgrams termList)) termList
--------------------------------------
periods :: [(Date,Date)]
periods = initPeriods (getPeriodGrain q) (getPeriodSteps q)
$ both date (head' "toPhyloBase" c, last' "toPhyloBase" c)
--------------------------------------
-----------------
-- | Tracers | --
-----------------
tracePhyloN :: Level -> Phylo -> Phylo
tracePhyloN lvl p = trace ("\n---------------\n--| Phylo " <> show (lvl) <> " |--\n---------------\n\n"
<> show (length $ getGroupsWithLevel lvl p) <> " groups created \n") p
traceTranspose :: Level -> Filiation -> Phylo -> Phylo
traceTranspose lvl fil p = trace ("----\n Transpose " <> show (fil) <> " links for " <> show (length $ getGroupsWithLevel lvl p) <> " groups\n") p
tracePhyloBase :: Phylo -> Phylo
tracePhyloBase p = trace ( "\n-------------\n--| Phylo |--\n-------------\n\n"
<> show (length $ _phylo_periods p) <> " periods from "
<> show (getPhyloPeriodId $ (head' "PhyloMaker") $ _phylo_periods p)
<> " to "
<> show (getPhyloPeriodId $ last $ _phylo_periods p)
<> "\n"
<> show ( Vector.length $ getFoundationsRoots p) <> " foundations roots \n") p
traceTempoMatching :: Filiation -> Level -> Phylo -> Phylo
traceTempoMatching fil lvl p = trace ( "----\n" <> show (fil) <> " filtered temporal Matching in Phylo" <> show (lvl) <> " :\n"
<> "count : " <> show (length pts) <> " pointers\n") p
where
--------------------------------------
pts :: [Pointer]
pts = concat $ map (\g -> getGroupPointers PeriodEdge fil g) $ getGroupsWithLevel lvl p
--------------------------------------
traceBranches :: Level -> Phylo -> Phylo
traceBranches lvl p = trace ( "----\n" <> "Branches in Phylo" <> show lvl <> " :\n"
<> "count : " <> show (length $ filter (\(lvl',_) -> lvl' == lvl ) $ getBranchIds p) <> " branches\n"
<> "count : " <> show (length $ getGroupsWithLevel lvl p) <> " groups\n"
<> "groups by branch : " <> show (percentile 25 (VS.fromList brs)) <> " (25%) "
<> show (percentile 50 (VS.fromList brs)) <> " (50%) "
<> show (percentile 75 (VS.fromList brs)) <> " (75%) "
<> show (percentile 90 (VS.fromList brs)) <> " (90%)\n") p
where
--------------------------------------
brs :: [Double]
brs = sort $ map (\(_,gs) -> fromIntegral $ length gs)
$ filter (\(id,_) -> (fst id) == lvl)
$ getGroupsByBranches p
--------------------------------------
{-|
Module : Gargantext.Core.Viz.Phylo.Tools
Description : Phylomemy Tools to build/manage it
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module Gargantext.Core.Viz.Phylo.LinkMaker
where
import Control.Parallel.Strategies
import Control.Lens hiding (both, Level)
import Data.List ((++), sortOn, null, tail, splitAt, concat, delete, intersect, elemIndex, groupBy, union, inits, scanl, find)
import Data.Tuple.Extra
import Data.Map (Map, (!), fromListWith, elems, restrictKeys, filterWithKey, keys, unionWith, unions, intersectionWith, member, fromList)
import Gargantext.Prelude
import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.Tools
import Gargantext.Core.Viz.Phylo.Metrics
import qualified Data.List as List
import qualified Data.Maybe as Maybe
import qualified Data.Map as Map
import qualified Data.Vector.Storable as VS
import Debug.Trace (trace)
import Numeric.Statistics (percentile)
-----------------------------
-- | From Level to level | --
-----------------------------
-- | To set the LevelLinks between a given PhyloGroup and a list of childs/parents PhyloGroups
linkGroupToGroups :: PhyloGroup -> [PhyloGroup] -> PhyloGroup
linkGroupToGroups current targets = over (phylo_groupLevelParents) addPointers current
where
--------------------------------------
addPointers :: [Pointer] -> [Pointer]
addPointers lp = lp ++ Maybe.mapMaybe (\target ->
if (elem (getGroupId current) (getGroupLevelChildsId target))
then Just ((getGroupId target),1)
else Nothing) targets
--------------------------------------
setLevelLinks :: (Level,Level) -> Phylo -> Phylo
setLevelLinks (lvl,lvl') p = alterGroupWithLevel (\group -> linkGroupToGroups group
$ filter (\g' -> (not . null) $ intersect (getGroupNgrams group) (getGroupNgrams g'))
$ getGroupsWithFilters lvl' (getGroupPeriod group) p) lvl p
-------------------------------
-- | From Period to Period | --
-------------------------------
-- | To get the next or previous PhyloPeriod based on a given PhyloPeriodId
getNextPeriods :: Filiation -> Int -> PhyloPeriodId -> [PhyloPeriodId] -> [PhyloPeriodId]
getNextPeriods to' limit id l = case to' of
Descendant -> take limit $ (tail . snd) next
Ascendant -> take limit $ (reverse . fst) next
_ -> panic ("[ERR][Viz.Phylo.Example.getNextPeriods] Filiation type not defined")
where
--------------------------------------
next :: ([PhyloPeriodId], [PhyloPeriodId])
next = splitAt idx l
--------------------------------------
idx :: Int
idx = case (List.elemIndex id l) of
Nothing -> panic ("[ERR][Viz.Phylo.Example.getNextPeriods] PhyloPeriodId not defined")
Just i -> i
--------------------------------------
-- | To get the number of docs produced during a list of periods
periodsToNbDocs :: [PhyloPeriodId] -> Phylo -> Double
periodsToNbDocs prds phylo = sum $ elems
$ restrictKeys (phylo ^. phylo_docsByYears)
$ periodsToYears prds
-- | To process a given Proximity
processProximity :: Proximity -> Double -> Map (Int, Int) Double -> Map (Int, Int) Double -> [Int] -> [Int] -> Double
processProximity proximity nbDocs cooc cooc' ngrams ngrams' = case proximity of
WeightedLogJaccard (WLJParams _ sens) -> weightedLogJaccard sens nbDocs cooc cooc' ngrams ngrams'
Hamming (HammingParams _) -> hamming cooc cooc'
_ -> panic "[ERR][Viz.Phylo.LinkMaker.processProximity] Unknown proximity"
filterProximity :: Double -> Proximity -> Bool
filterProximity score prox = case prox of
WeightedLogJaccard (WLJParams thr _) -> score >= thr
Hamming (HammingParams thr) -> score <= thr
_ -> panic "[ERR][Viz.Phylo.LinkMaker.filterProximity] Unknown proximity"
makePairs :: [(Date,Date)] -> PhyloGroup -> Phylo -> [(PhyloGroup,PhyloGroup)]
makePairs prds g p = filter (\pair -> ((last' "makePairs" prds) == (getGroupPeriod $ fst pair))
|| ((last' "makePairs" prds) == (getGroupPeriod $ snd pair)))
$ listToPairs
$ filter (\g' -> (elem (getGroupPeriod g') prds)
&& ((not . null) $ intersect (getGroupNgrams g) (getGroupNgrams g'))
&& (((last' "makePairs" prds) == (getGroupPeriod g))
||((matchWithPairs g (g,g') p) >= (getPhyloMatchingFrameTh p))))
$ getGroupsWithLevel (getGroupLevel g) p
matchWithPairs :: PhyloGroup -> (PhyloGroup,PhyloGroup) -> Phylo -> Double
matchWithPairs g1 (g2,g3) p =
let nbDocs = periodsToNbDocs [(getGroupPeriod g1),(getGroupPeriod g2),(getGroupPeriod g3)] p
cooc = if (g2 == g3)
then getGroupCooc g2
else unionWith (+) (getGroupCooc g2) (getGroupCooc g3)
ngrams = if (g2 == g3)
then getGroupNgrams g2
else union (getGroupNgrams g2) (getGroupNgrams g3)
in processProximity (getPhyloProximity p) nbDocs (getGroupCooc g1) cooc (getGroupNgrams g1) ngrams
phyloGroupMatching :: [PhyloPeriodId] -> PhyloGroup -> Phylo -> [Pointer]
phyloGroupMatching periods g p = case pointers of
Nothing -> []
Just pts -> head' "phyloGroupMatching"
-- Keep only the best set of pointers grouped by proximity
$ groupBy (\pt pt' -> snd pt == snd pt')
$ reverse $ sortOn snd pts
-- Find the first time frame where at leats one pointer satisfies the proximity threshold
where
--------------------------------------
pointers :: Maybe [Pointer]
pointers = find (not . null)
-- For each time frame, process the Proximity on relevant pairs of targeted groups
$ scanl (\acc frame ->
let pairs = makePairs frame g p
in acc ++ ( filter (\(_,proxi) -> filterProximity proxi (getPhyloProximity p))
$ concat
$ map (\(t,t') ->
let proxi = matchWithPairs g (t,t') p
in
if (t == t')
then [(getGroupId t,proxi)]
else [(getGroupId t,proxi),(getGroupId t',proxi)] ) pairs ) ) []
-- [[1900],[1900,1901],[1900,1901,1902],...] | length max => + 5 years
$ inits periods
--------------------------------------
-- | To add some Pointer to a PhyloGroup
addPointers' :: Filiation -> [Pointer] -> PhyloGroup -> PhyloGroup
addPointers' fil pts g = g & case fil of
Descendant -> phylo_groupPeriodChilds %~ (++ pts)
Ascendant -> phylo_groupPeriodParents %~ (++ pts)
_ -> panic ("[ERR][Viz.Phylo.LinkMaker.addPointers] Wrong type of filiation")
-- | To update a list of phyloGroups with some Pointers
updateGroups :: Filiation -> Level -> Map PhyloGroupId [Pointer] -> Phylo -> Phylo
updateGroups fil lvl m p = alterPhyloGroups (\gs -> map (\g -> if ((getGroupLevel g) == lvl) && (member (getGroupId g) m)
then addPointers' fil (m ! (getGroupId g)) g
else g ) gs) p
-- | Optimisation : to keep only the groups that have at least one ngrams in commons with the target
initCandidates :: PhyloGroup -> [PhyloPeriodId] -> [PhyloGroup] -> [PhyloGroup]
initCandidates g prds gs = filter (\g' -> elem (getGroupPeriod g') prds)
$ filter (\g' -> (not . null) $ intersect (getGroupNgrams g) (getGroupNgrams g'))
$ delete g gs
-- | a init avec la [[head groups]] et la tail groups
toBranches :: [[PhyloGroup]] -> [PhyloGroup] -> [[PhyloGroup]]
toBranches mem gs
| null gs = mem
| otherwise = toBranches mem' $ tail gs
where
--------------------------------------
mem' :: [[PhyloGroup]]
mem' = if (null withHead)
then mem ++ [[head' "toBranches" gs]]
else (filter (\gs' -> not $ elem gs' withHead) mem)
++
[(concat withHead) ++ [head' "toBranches" gs]]
--------------------------------------
withHead :: [[PhyloGroup]]
withHead = filter (\gs' -> (not . null)
$ intersect (concat $ map getGroupNgrams gs')
(getGroupNgrams $ (head' "toBranches" gs))
) mem
--------------------------------------
-- | To process an intertemporal matching task to a Phylo at a given level
-- | 1) split all groups (of the level) in branches (ie:related components sharing at least one ngram)
-- | 2) for each branch, for each group find the best candidates (by Filiation and Proximity) and create the corresponding pointers
-- | 3) update all the groups with the new pointers if they exist
interTempoMatching :: Filiation -> Level -> Proximity -> Phylo -> Phylo
interTempoMatching fil lvl _ p = updateGroups fil lvl (Map.fromList pointers) p
where
--------------------------------------
pointers :: [(PhyloGroupId,[Pointer])]
pointers =
let pts = map (\g -> let periods = getNextPeriods fil (getPhyloMatchingFrame p) (getGroupPeriod g) (getPhyloPeriods p)
in (getGroupId g, phyloGroupMatching periods g p)) groups
pts' = pts `using` parList rdeepseq
in pts'
--------------------------------------
groups :: [PhyloGroup]
groups = getGroupsWithLevel lvl p
--------------------------------------
------------------------------------------------------------------------
-- | Make links from Period to Period after level 1
listToTuple :: (a -> b) -> [a] -> [(b,a)]
listToTuple f l = map (\x -> (f x, x)) l
groupsToMaps :: Ord b => (PhyloGroup -> b) -> [PhyloGroup] -> [Map PhyloGroupId PhyloGroup]
groupsToMaps f gs = map (\gs' -> fromList $ listToTuple getGroupId gs')
$ groupBy ((==) `on` f)
$ sortOn f gs
phyloToPeriodMaps :: Level -> Filiation -> Phylo -> [Map PhyloGroupId PhyloGroup]
phyloToPeriodMaps lvl fil p =
let prdMap = groupsToMaps (fst . getGroupPeriod) (getGroupsWithLevel lvl p)
in case fil of
Ascendant -> reverse prdMap
Descendant -> prdMap
_ -> panic ("[ERR][Viz.Phylo.LinkMaker.phyloToPeriodMaps] Wrong type of filiation")
trackPointersRec :: Filiation -> Map PhyloGroupId PhyloGroup -> [PhyloGroup] -> [PhyloGroup] -> [PhyloGroup]
trackPointersRec fil m gs res =
if (null gs) then res
else if (Map.null m) then res ++ gs
else
let g = head' "track" gs
pts = Map.fromList $ getGroupPointers PeriodEdge fil g
pts' = Map.toList $ fromListWith (\w w' -> max w w') $ concat $ elems
$ intersectionWith (\w g' -> map (\(id,_w') -> (id, w))
$ getGroupPointers LevelEdge Ascendant g') pts m
res' = res ++ [case fil of
Ascendant -> g & phylo_groupPeriodParents .~ pts'
Descendant -> g & phylo_groupPeriodChilds .~ pts'
_ -> panic ("[ERR][Viz.Phylo.LinkMaker.transposeLinks] Wrong type of filiation")]
in trackPointersRec fil (filterWithKey (\k _ -> not $ elem k (keys pts)) m) (tail' "track" gs) res'
transposeLinks :: Level -> Filiation -> Phylo -> Phylo
transposeLinks lvl fil p =
let prdMap = zip (phyloToPeriodMaps (lvl - 1) fil p) (phyloToPeriodMaps lvl fil p)
transposed = map (\(gs,gs') ->
let idx = fromJust $ elemIndex (gs,gs') prdMap
next = take (getPhyloMatchingFrame p) $ snd $ splitAt (idx + 1) prdMap
groups = trackPointersRec fil (unions $ map fst next) (elems gs') []
in (getGroupPeriod $ head' "transpose" groups ,groups)
) prdMap
transposed' = Map.fromList $ (transposed `using` parList rdeepseq)
in alterPhyloGroups
(\gs -> if ((not . null) gs) && (lvl == (getGroupLevel $ head' "transpose" gs))
then transposed' ! (getGroupPeriod $ head' "transpose" gs)
else gs
) p
-- | Transpose the parent/child pointers from one level to another
transposePeriodLinks :: Level -> Phylo -> Phylo
transposePeriodLinks lvl p = alterPhyloGroups
(\gs -> if ((not . null) gs) && (elem lvl $ map getGroupLevel gs)
then
let groups = map (\g -> let m = reduceGroups g lvlGroups
in g & phylo_groupPeriodParents .~ (trackPointers m $ g ^. phylo_groupPeriodParents)
& phylo_groupPeriodChilds .~ (trackPointers m $ g ^. phylo_groupPeriodChilds )) gs
groups' = groups `using` parList rdeepseq
in groups'
else gs
) p
where
--------------------------------------
-- | find an other way to find the group from the id
trackPointers :: Map PhyloGroupId PhyloGroup -> [Pointer] -> [Pointer]
trackPointers m pts = Map.toList
$ fromListWith (\w w' -> max w w')
$ map (\(id,_w) -> (getGroupLevelParentId $ m ! id,_w)) pts
--------------------------------------
reduceGroups :: PhyloGroup -> [PhyloGroup] -> Map PhyloGroupId PhyloGroup
reduceGroups g gs = Map.fromList
$ map (\g' -> (getGroupId g',g'))
$ filter (\g' -> ((not . null) $ intersect (getGroupNgrams g) (getGroupNgrams g'))) gs
--------------------------------------
lvlGroups :: [PhyloGroup]
lvlGroups = getGroupsWithLevel (lvl - 1) p
--------------------------------------
----------------
-- | Tracer | --
----------------
traceMatching :: Filiation -> Level -> Double -> [Double] -> Phylo -> Phylo
traceMatching fil lvl thr lst p = trace ( "----\n" <> show (fil) <> " unfiltered temporal Matching in Phylo" <> show (lvl) <> " :\n"
<> "count : " <> show (length lst) <> " potential pointers (" <> show (length $ filter (>= thr) lst) <> " >= " <> show (thr) <> ")\n"
<> "similarity : " <> show (percentile 25 (VS.fromList lst)) <> " (25%) "
<> show (percentile 50 (VS.fromList lst)) <> " (50%) "
<> show (percentile 75 (VS.fromList lst)) <> " (75%) "
<> show (percentile 90 (VS.fromList lst)) <> " (90%)\n") p
tracePreBranches :: [[PhyloGroup]] -> [[PhyloGroup]]
tracePreBranches bs = trace (show (length bs) <> " pre-branches" <> "\n"
<> "with sizes : " <> show (map length bs) <> "\n") bs
{-|
Module : Gargantext.Core.Viz.Phylo.Main
Description : Phylomemy Main
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE ViewPatterns #-}
module Gargantext.Core.Viz.Phylo.Main
where
import Data.GraphViz
import qualified Data.ByteString as DB
import qualified Data.List as List
import qualified Data.Map as Map
import Data.Maybe
import qualified Data.Text as Text
import Data.Text (Text)
import Debug.Trace (trace)
import GHC.IO (FilePath)
import Gargantext.API.Ngrams.Tools (getTermsWith)
import Gargantext.Core.Types
import Gargantext.Database.Action.Flow
import Gargantext.Database.Admin.Types.Hyperdata
import Gargantext.Database.Query.Table.Node(defaultList)
import Gargantext.Database.Query.Table.NodeNode (selectDocs)
import Gargantext.Database.Schema.Ngrams (NgramsType(..))
import Gargantext.Prelude
import Gargantext.Core.Text.Context (TermList)
import Gargantext.Core.Text.Terms.WithList
import Gargantext.Core.Viz.Phylo hiding (Svg, Dot)
import Gargantext.Core.Viz.Phylo.LevelMaker (toPhylo)
import Gargantext.Core.Viz.Phylo.Tools
import Gargantext.Core.Viz.Phylo.View.Export
import Gargantext.Core.Viz.Phylo.View.ViewMaker -- TODO Just Maker is fine
type MinSizeBranch = Int
flowPhylo :: FlowCmdM env err m
=> CorpusId
-> m Phylo
flowPhylo cId = do
list <- defaultList cId
termList <- Map.toList <$> getTermsWith Text.words [list] NgramsTerms MapTerm
docs' <- catMaybes
<$> map (\h -> (,) <$> _hd_publication_year h
<*> _hd_abstract h
)
<$> selectDocs cId
let
patterns = buildPatterns termList
-- | To filter the Ngrams of a document based on the termList
filterTerms :: Patterns -> (Date, Text) -> (Date, [Text])
filterTerms patterns' (y,d) = (y,termsInText patterns' d)
where
--------------------------------------
termsInText :: Patterns -> Text -> [Text]
termsInText pats txt = List.nub
$ List.concat
$ map (map Text.unwords)
$ extractTermsWithList pats txt
--------------------------------------
docs = map ((\(y,t) -> Document y t) . filterTerms patterns) docs'
--liftBase $ flowPhylo' (List.sortOn date docs) termList l m fp
pure $ buildPhylo (List.sortOn date docs) termList
-- TODO SortedList Document
flowPhylo' :: [Document] -> TermList -- ^Build
-> Level -> MinSizeBranch -- ^View
-> FilePath
-> IO FilePath
flowPhylo' corpus terms l m fp = do
let
phylo = buildPhylo corpus terms
phVie = viewPhylo l m phylo
writePhylo fp phVie
defaultQuery :: PhyloQueryBuild
defaultQuery = defaultQueryBuild'
"Default Title"
"Default Description"
buildPhylo :: [Document] -> TermList -> Phylo
buildPhylo = trace (show defaultQuery) $ buildPhylo' defaultQuery
buildPhylo' :: PhyloQueryBuild -> [Document] -> TermList -> Phylo
buildPhylo' q corpus termList = toPhylo q corpus termList Map.empty
queryView :: Level -> MinSizeBranch -> PhyloQueryView
queryView level _minSizeBranch = PhyloQueryView level Merge False 2
[BranchAge]
[]
-- [SizeBranch $ SBParams minSizeBranch]
[BranchPeakFreq,GroupLabelCooc]
(Just (ByBranchAge,Asc))
Json Flat True
viewPhylo :: Level -> MinSizeBranch -> Phylo -> PhyloView
viewPhylo l b phylo = toPhyloView (queryView l b) phylo
writePhylo :: FilePath -> PhyloView -> IO FilePath
writePhylo fp phview = runGraphviz (viewToDot phview) Svg fp
viewPhylo2Svg :: PhyloView -> IO DB.ByteString
viewPhylo2Svg p = graphvizWithHandle Dot (viewToDot p) Svg DB.hGetContents
{-|
Module : Gargantext.Core.Viz.Phylo.Tools
Description : Phylomemy Tools to build/manage it
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE ViewPatterns #-}
module Gargantext.Core.Viz.Phylo.Metrics
where
import Gargantext.Prelude
import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.Tools
import Control.Lens hiding (Level)
import Data.List ((\\), sortOn, concat, nub, union, intersect, null, (++), sort)
import Data.Map (Map, (!), toList, size, insert, unionWith, intersection, intersectionWith, filterWithKey, elems, fromList, findWithDefault, fromListWith)
import Data.Text (Text)
-- import Debug.Trace (trace)
----------------
-- | Ngrams | --
----------------
-- | Return the conditional probability of i knowing j
conditional :: Ord a => Map (a,a) Double -> a -> a -> Double
conditional m i j = (findWithDefault 0 (i,j) m)
/ (m ! (j,j))
-- | Return the genericity score of a given ngram
genericity :: Map (Int, Int) Double -> [Int] -> Int -> Double
genericity m l i = ( (sum $ map (\j -> conditional m i j) l)
- (sum $ map (\j -> conditional m j i) l)) / (fromIntegral $ (length l) + 1)
-- | Return the specificity score of a given ngram
specificity :: Map (Int, Int) Double -> [Int] -> Int -> Double
specificity m l i = ( (sum $ map (\j -> conditional m j i) l)
- (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
-- | Return the inclusion score of a given ngram
inclusion :: Map (Int, Int) Double -> [Int] -> Int -> Double
inclusion m l i = ( (sum $ map (\j -> conditional m j i) l)
+ (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
-- | Process some metrics on top of ngrams
getNgramsMeta :: Map (Int, Int) Double -> [Int] -> Map Text [Double]
getNgramsMeta m ngrams = fromList
[ ("genericity" , map (\n -> genericity m (ngrams \\ [n]) n) ngrams ),
("specificity", map (\n -> specificity m (ngrams \\ [n]) n) ngrams ),
("inclusion" , map (\n -> inclusion m (ngrams \\ [n]) n) ngrams )]
-- | To get the nth most occurent elems in a coocurency matrix
getNthMostOcc :: Int -> Map (Int,Int) Double -> [Int]
getNthMostOcc nth cooc = (nub . concat)
$ map (\((idx,idx'),_) -> [idx,idx'])
$ take nth
$ reverse
$ sortOn snd $ toList cooc
-------------------------
-- | Ngrams Dynamics | --
-------------------------
sharedWithParents :: Date -> PhyloBranchId -> Int -> PhyloView -> Bool
sharedWithParents inf bid n pv = elem n
$ foldl (\mem pn -> if ((bid == (fromJust $ (pn ^. pn_bid)))
&& (inf > (fst $ getNodePeriod pn)))
then nub $ mem ++ (pn ^. pn_idx)
else mem ) []
$ (pv ^. pv_nodes)
findDynamics :: Int -> PhyloView -> PhyloNode -> Map Int (Date,Date) -> Double
findDynamics n pv pn m =
let prd = getNodePeriod pn
bid = fromJust $ (pn ^. pn_bid)
end = last' "dynamics" (sort $ map snd $ elems m)
in if (((snd prd) == (snd $ m ! n)) && (snd prd /= end))
-- emergence
then 2
else if ((fst prd) == (fst $ m ! n))
-- recombination
then 0
else if (not $ sharedWithParents (fst prd) bid n pv)
-- decrease
then 1
else 3
processDynamics :: PhyloView -> PhyloView
processDynamics pv = alterPhyloNode (\pn ->
pn & pn_metrics %~ insert "dynamics" (map (\n -> findDynamics n pv pn ngramsDates) $ (pn ^. pn_idx) ) ) pv
where
--------------------------------------
ngramsDates :: Map Int (Date,Date)
ngramsDates = map (\ds -> let ds' = sort ds
in (head' "Dynamics" ds', last' "Dynamics" ds'))
$ fromListWith (++)
$ foldl (\mem pn -> mem ++ (map (\n -> (n, [fst $ getNodePeriod pn, snd $ getNodePeriod pn]))
$ (pn ^. pn_idx))) []
$ (pv ^. pv_nodes)
--------------------------------------
-------------------
-- | Proximity | --
-------------------
-- | Process the inverse sumLog
sumInvLog :: Double -> [Double] -> Double
sumInvLog s l = foldl (\mem x -> mem + (1 / log (s + x))) 0 l
-- | Process the sumLog
sumLog :: Double -> [Double] -> Double
sumLog s l = foldl (\mem x -> mem + log (s + x)) 0 l
-- | To compute a jaccard similarity between two lists
jaccard :: [Int] -> [Int] -> Double
jaccard inter' union' = ((fromIntegral . length) $ inter') / ((fromIntegral . length) $ union')
-- | To get the diagonal of a matrix
toDiago :: Map (Int, Int) Double -> [Double]
toDiago cooc = elems $ filterWithKey (\(x,x') _ -> x == x') cooc
-- | To process WeighedLogJaccard distance between to coocurency matrix
weightedLogJaccard :: Double -> Double -> Map (Int, Int) Double -> Map (Int, Int) Double -> [Int] -> [Int] -> Double
weightedLogJaccard sens nbDocs cooc cooc' ngrams ngrams'
| null gInter = 0
| gInter == gUnion = 1
| sens == 0 = jaccard gInter gUnion
| sens > 0 = (sumInvLog sens wInter) / (sumInvLog sens wUnion)
| otherwise = (sumLog sens wInter) / (sumLog sens wUnion)
where
--------------------------------------
gInter :: [Int]
gInter = intersect ngrams ngrams'
--------------------------------------
gUnion :: [Int]
gUnion = union ngrams ngrams'
--------------------------------------
wInter :: [Double]
wInter = toDiago $ map (/nbDocs) $ intersectionWith (+) cooc cooc'
--------------------------------------
wUnion :: [Double]
wUnion = toDiago $ map (/nbDocs) $ unionWith (+) cooc cooc'
--------------------------------------
-- | To process the Hamming distance between two PhyloGroup fields
hamming :: Map (Int, Int) Double -> Map (Int, Int) Double -> Double
hamming f1 f2 = fromIntegral $ max ((size inter) - (size f1)) ((size inter) - (size f2))
where
--------------------------------------
inter :: Map (Int, Int) Double
inter = intersection f1 f2
--------------------------------------
{-|
Module : Gargantext.Core.Viz.Phylo.Tools
Description : Phylomemy Tools to build/manage it
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
{-# LANGUAGE ViewPatterns #-}
module Gargantext.Core.Viz.Phylo.Tools
where
import Control.Lens hiding (both, Level, Empty)
import Data.List (intersect, (++), sort, null, tail, last, tails, delete, nub, sortOn, nubBy, concat)
import Data.Maybe (mapMaybe,fromMaybe)
import Data.Map (Map, mapKeys, member, (!), restrictKeys, elems, empty, filterWithKey, unionWith)
import Data.Set (Set)
import Data.Text (Text,toLower,unwords)
import Data.Tuple.Extra
import Data.Vector (Vector,elemIndex)
import Gargantext.Prelude
import Gargantext.Core.Viz.Phylo
import qualified Data.Map as Map
import qualified Data.Set as Set
import qualified Data.Vector as Vector
--------------
-- | Misc | --
--------------
-- | Define a default value
def :: a -> Maybe a -> a
def = fromMaybe
-- | Does a List of Sets contains at least one Set of an other List
doesAnySetContains :: Eq a => Set a -> [Set a] -> [Set a] -> Bool
doesAnySetContains h l l' = any (\c -> doesContains (Set.toList c) (Set.toList h)) (l' ++ l)
-- | Does a list of A contains an other list of A
doesContains :: Eq a => [a] -> [a] -> Bool
doesContains l l'
| null l' = True
| length l' > length l = False
| elem (head' "doesContains" l') l = doesContains l (tail l')
| otherwise = False
-- | Does a list of ordered A contains an other list of ordered A
doesContainsOrd :: Eq a => Ord a => [a] -> [a] -> Bool
doesContainsOrd l l'
| null l' = False
| last l < (head' "doesContainsOrd" l') = False
| (head' "doesContainsOrd" l') `elem` l = True
| otherwise = doesContainsOrd l (tail l')
-- | To filter nested Sets of a
filterNestedSets :: Eq a => Set a -> [Set a] -> [Set a] -> [Set a]
filterNestedSets h l l'
| null l = if doesAnySetContains h l l'
then l'
else h : l'
| doesAnySetContains h l l' = filterNestedSets (head' "filterNestedSets1" l) (tail l) l'
| otherwise = filterNestedSets (head' "filterNestedSets2" l) (tail l) (h : l')
-- | To get the good pair of keys (x,y) or (y,x) in a given Map (a,b) c
getKeyPair :: (Int,Int) -> Map (Int,Int) a -> (Int,Int)
getKeyPair (x,y) m = case findPair (x,y) m of
Nothing -> panic "[ERR][Viz.Phylo.Example.getKeyPair] Nothing"
Just i -> i
where
--------------------------------------
findPair :: (Int,Int) -> Map (Int,Int) a -> Maybe (Int,Int)
findPair (x',y') m'
| member (x',y') m' = Just (x',y')
| member (y',x') m' = Just (y',x')
| otherwise = Nothing
--------------------------------------
-- | To filter Fis with small Support but by keeping non empty Periods
keepFilled :: (Int -> [a] -> [a]) -> Int -> [a] -> [a]
keepFilled f thr l = if (null $ f thr l) && (not $ null l)
then keepFilled f (thr - 1) l
else f thr l
-- | To get all combinations of a list
listToFullCombi :: Eq a => [a] -> [(a,a)]
listToFullCombi l = [(x,y) | x <- l, y <- l]
-- | To get all combinations of a list
listToDirectedCombi :: Eq a => [a] -> [(a,a)]
listToDirectedCombi l = [(x,y) | x <- l, y <- l, x /= y]
listToEqualCombi :: Eq a => [a] -> [(a,a)]
listToEqualCombi l = [(x,y) | x <- l, y <- l, x == y]
listToPairs :: Eq a => [a] -> [(a,a)]
listToPairs l = (listToEqualCombi l) ++ (listToUnDirectedCombi l)
-- | To get all combinations of a list and apply a function to the resulting list of pairs
listToDirectedCombiWith :: Eq a => forall b. (a -> b) -> [a] -> [(b,b)]
listToDirectedCombiWith f l = [(f x,f y) | x <- l, y <- l, x /= y]
-- | To get the sequential combinations of an order list
listToSequentialCombi :: Eq a => [a] -> [(a,a)]
listToSequentialCombi l = nubBy (\x y -> fst x == fst y) $ listToUnDirectedCombi l
-- | To get all combinations of a list with no repetition
listToUnDirectedCombi :: [a] -> [(a,a)]
listToUnDirectedCombi l = [ (x,y) | (x:rest) <- tails l, y <- rest ]
-- | To get all combinations of a list with no repetition and apply a function to the resulting list of pairs
listToUnDirectedCombiWith :: forall a b. (a -> b) -> [a] -> [(b,b)]
listToUnDirectedCombiWith f l = [ (f x, f y) | (x:rest) <- tails l, y <- rest ]
-- | To transform a list of Ngrams Indexes into a Label
ngramsToLabel :: Vector Ngrams -> [Int] -> Text
ngramsToLabel ngrams l = unwords $ tail' "ngramsToLabel" $ concat $ map (\n -> ["|",n]) $ ngramsToText ngrams l
-- | To transform a list of Ngrams Indexes into a list of Text
ngramsToText :: Vector Ngrams -> [Int] -> [Text]
ngramsToText ngrams l = map (\idx -> ngrams Vector.! idx) l
-- | To transform a list of ngrams into a list of indexes
ngramsToIdx :: [Ngrams] -> Vector Ngrams -> [Int]
ngramsToIdx ns v = sort $ map (\n -> getIdxInVector n v) ns
-- | To unify the keys (x,y) that Map 1 share with Map 2 such as: (x,y) <=> (y,x)
unifySharedKeys :: Eq a => Ord a => Map (a,a) b -> Map (a,a) b -> Map (a,a) b
unifySharedKeys m1 m2 = mapKeys (\(x,y) -> if member (y,x) m2
then (y,x)
else (x,y) ) m1
---------------
-- | Phylo | --
---------------
-- | An analyzer ingests a Ngrams and generates a modified version of it
phyloAnalyzer :: Ngrams -> Ngrams
phyloAnalyzer n = toLower n
-- | To init the foundation roots of the Phylo as a Vector of Ngrams
initFoundationsRoots :: [Ngrams] -> Vector Ngrams
initFoundationsRoots l = Vector.fromList $ map phyloAnalyzer l
-- | To init the base of a Phylo from a List of Periods and Foundations
initPhyloBase :: [(Date, Date)] -> PhyloFoundations -> Map Date Double -> Map Date (Map (Int,Int) Double) -> Map (Date,Date) [PhyloFis] -> PhyloParam -> Phylo
initPhyloBase pds fds nbDocs cooc fis prm = Phylo ((fst . (head' "initPhyloBase")) pds, (snd . last) pds) fds (map (\pd -> initPhyloPeriod pd []) pds) nbDocs cooc fis prm
-- | To init the param of a Phylo
initPhyloParam :: Maybe Text -> Maybe Software -> Maybe PhyloQueryBuild -> PhyloParam
initPhyloParam (def defaultPhyloVersion -> v) (def defaultSoftware -> s) (def defaultQueryBuild -> q) = PhyloParam v s q
-- | To get the last computed Level in a Phylo
getLastLevel :: Phylo -> Level
getLastLevel p = (last . sort)
$ map (snd . getPhyloLevelId)
$ view ( phylo_periods
. traverse
. phylo_periodLevels ) p
-- | To get all the coocurency matrix of a phylo
getPhyloCooc :: Phylo -> Map Date (Map (Int,Int) Double)
getPhyloCooc p = p ^. phylo_cooc
-- | To get the PhyloParam of a Phylo
getPhyloParams :: Phylo -> PhyloParam
getPhyloParams = _phylo_param
-- | To get the title of a Phylo
getPhyloTitle :: Phylo -> Text
getPhyloTitle p = _q_phyloTitle $ _phyloParam_query $ getPhyloParams p
-- | To get the desc of a Phylo
getPhyloDescription :: Phylo -> Text
getPhyloDescription p = _q_phyloTitle $ _phyloParam_query $ getPhyloParams p
getPhyloMatchingFrame :: Phylo -> Int
getPhyloMatchingFrame p = _q_interTemporalMatchingFrame $ _phyloParam_query $ getPhyloParams p
getPhyloMatchingFrameTh :: Phylo -> Double
getPhyloMatchingFrameTh p = _q_interTemporalMatchingFrameTh $ _phyloParam_query $ getPhyloParams p
getPhyloProximity :: Phylo -> Proximity
getPhyloProximity p = _q_interTemporalMatching $ _phyloParam_query $ getPhyloParams p
getPhyloReBranchThr :: Phylo -> Double
getPhyloReBranchThr p = _q_reBranchThr $ _phyloParam_query $ getPhyloParams p
getPhyloReBranchNth :: Phylo -> Int
getPhyloReBranchNth p = _q_reBranchNth $ _phyloParam_query $ getPhyloParams p
getPhyloFis :: Phylo -> Map (Date,Date) [PhyloFis]
getPhyloFis = _phylo_fis
--------------------
-- | PhyloRoots | --
--------------------
-- | To get the foundations of a Phylo
getFoundations :: Phylo -> PhyloFoundations
getFoundations = _phylo_foundations
-- | To get the foundations roots of a Phylo
getFoundationsRoots :: Phylo -> Vector Ngrams
getFoundationsRoots p = (getFoundations p) ^. phylo_foundationsRoots
-- | To get the Index of a Ngrams in the foundationsRoots of a Phylo
getIdxInRoots :: Ngrams -> Phylo -> Int
getIdxInRoots n p = case (elemIndex n (getFoundationsRoots p)) of
Nothing -> panic $ "[ERR][Viz.Phylo.Tools.getIdxInRoots] Ngrams not in foundationsRoots: " <> cs n
Just idx -> idx
getIdxInRoots' :: Ngrams -> Vector Ngrams -> Int
getIdxInRoots' n root = case (elemIndex n root) of
Nothing -> panic $ "[ERR][Viz.Phylo.Tools.getIdxInRoots] Ngrams not in foundationsRoots: " <> cs n
Just idx -> idx
getIdxInVector :: Ngrams -> Vector Ngrams -> Int
getIdxInVector n ns = case (elemIndex n ns) of
Nothing -> panic $ "[ERR][Viz.Phylo.Tools.getIdxInVector] Ngrams not in foundationsRoots: " <> cs n
Just idx -> idx
--------------------
-- | PhyloGroup | --
--------------------
-- | To alter a PhyloGroup matching a given Level
alterGroupWithLevel :: (PhyloGroup -> PhyloGroup) -> Level -> Phylo -> Phylo
alterGroupWithLevel f lvl p = over ( phylo_periods
. traverse
. phylo_periodLevels
. traverse
. phylo_levelGroups
. traverse
) (\g -> if getGroupLevel g == lvl
then f g
else g ) p
-- | To alter each list of PhyloGroups following a given function
alterPhyloGroups :: ([PhyloGroup] -> [PhyloGroup]) -> Phylo -> Phylo
alterPhyloGroups f p = over ( phylo_periods
. traverse
. phylo_periodLevels
. traverse
. phylo_levelGroups
) f p
-- | To filter the PhyloGroup of a Phylo according to a function and a value
filterGroups :: Eq a => (PhyloGroup -> a) -> a -> [PhyloGroup] -> [PhyloGroup]
filterGroups f x l = filter (\g -> (f g) == x) l
-- | To maybe get the PhyloBranchId of a PhyloGroup
getGroupBranchId :: PhyloGroup -> Maybe PhyloBranchId
getGroupBranchId = _phylo_groupBranchId
-- | To get the PhyloGroups Childs of a PhyloGroup
getGroupChilds :: PhyloGroup -> Phylo -> [PhyloGroup]
getGroupChilds g p = getGroupsFromIds (getGroupPeriodChildsId g) p
-- | To get the id of a PhyloGroup
getGroupId :: PhyloGroup -> PhyloGroupId
getGroupId = _phylo_groupId
getGroupCooc :: PhyloGroup -> Map (Int,Int) Double
getGroupCooc = _phylo_groupCooc
-- | To get the level out of the id of a PhyloGroup
getGroupLevel :: PhyloGroup -> Int
getGroupLevel = snd . fst . getGroupId
-- | To get the level child pointers of a PhyloGroup
getGroupLevelChilds :: PhyloGroup -> [Pointer]
getGroupLevelChilds = _phylo_groupLevelChilds
-- | To get the PhyloGroups Level Childs Ids of a PhyloGroup
getGroupLevelChildsId :: PhyloGroup -> [PhyloGroupId]
getGroupLevelChildsId g = map fst $ getGroupLevelChilds g
-- | To get the level parent pointers of a PhyloGroup
getGroupLevelParents :: PhyloGroup -> [Pointer]
getGroupLevelParents = _phylo_groupLevelParents
-- | To get the PhyloGroups Level Parents Ids of a PhyloGroup
getGroupLevelParentsId :: PhyloGroup -> [PhyloGroupId]
getGroupLevelParentsId g = map fst $ getGroupLevelParents g
-- | To get the PhyloGroups Level Parents Ids of a PhyloGroup
getGroupLevelParentId :: PhyloGroup -> PhyloGroupId
getGroupLevelParentId g = (head' "getGroupLevelParentId") $ getGroupLevelParentsId g
-- | To get the Meta value of a PhyloGroup
getGroupMeta :: Text -> PhyloGroup -> Double
getGroupMeta k g = (g ^. phylo_groupMeta) ! k
-- | To get the Ngrams of a PhyloGroup
getGroupNgrams :: PhyloGroup -> [Int]
getGroupNgrams = _phylo_groupNgrams
-- | To get the list of pairs (Childs & Parents) of a PhyloGroup
getGroupPairs :: PhyloGroup -> Phylo -> [PhyloGroup]
getGroupPairs g p = (getGroupChilds g p) ++ (getGroupParents g p)
-- | To get the PhyloGroups Parents of a PhyloGroup
getGroupParents :: PhyloGroup -> Phylo -> [PhyloGroup]
getGroupParents g p = getGroupsFromIds (getGroupPeriodParentsId g) p
-- | To get the period out of the id of a PhyloGroup
getGroupPeriod :: PhyloGroup -> (Date,Date)
getGroupPeriod = fst . fst . getGroupId
-- | To get the period child pointers of a PhyloGroup
getGroupPeriodChilds :: PhyloGroup -> [Pointer]
getGroupPeriodChilds = _phylo_groupPeriodChilds
-- | To get the PhyloGroups Period Parents Ids of a PhyloGroup
getGroupPeriodChildsId :: PhyloGroup -> [PhyloGroupId]
getGroupPeriodChildsId g = map fst $ getGroupPeriodChilds g
-- | To get the period parent pointers of a PhyloGroup
getGroupPeriodParents :: PhyloGroup -> [Pointer]
getGroupPeriodParents = _phylo_groupPeriodParents
-- | To get the PhyloGroups Period Parents Ids of a PhyloGroup
getGroupPeriodParentsId :: PhyloGroup -> [PhyloGroupId]
getGroupPeriodParentsId g = map fst $ getGroupPeriodParents g
-- | To get the pointers of a given Phylogroup
getGroupPointers :: EdgeType -> Filiation -> PhyloGroup -> [Pointer]
getGroupPointers t f g = case t of
PeriodEdge -> case f of
Ascendant -> getGroupPeriodParents g
Descendant -> getGroupPeriodChilds g
_ -> panic "[ERR][Viz.Phylo.Tools.getGroupPointers] wrong filiation"
LevelEdge -> case f of
Ascendant -> getGroupLevelParents g
Descendant -> getGroupLevelChilds g
_ -> panic "[ERR][Viz.Phylo.Tools.getGroupPointers] wrong filiation"
-- | To get the roots labels of a list of group ngrams
getGroupText :: PhyloGroup -> Phylo -> [Text]
getGroupText g p = ngramsToText (getFoundationsRoots p) (getGroupNgrams g)
-- | To get all the PhyloGroup of a Phylo
getGroups :: Phylo -> [PhyloGroup]
getGroups = view ( phylo_periods
. traverse
. phylo_periodLevels
. traverse
. phylo_levelGroups
)
-- | To get all PhyloGroups matching a list of PhyloGoupIds in a Phylo
-- getGroupsFromIds :: [PhyloGroupId] -> Phylo -> [PhyloGroup]
-- getGroupsFromIds ids p = filter (\g -> elem (getGroupId g) ids) $ getGroups p
getGroupFromId :: PhyloGroupId -> Phylo -> PhyloGroup
getGroupFromId id p =
let groups = Map.fromList $ map (\g -> (getGroupId g, g)) $ getGroups p
in groups ! id
getGroupsFromIds :: [PhyloGroupId] -> Phylo -> [PhyloGroup]
getGroupsFromIds ids p =
let groups = Map.fromList $ map (\g -> (getGroupId g, g)) $ getGroups p
in elems $ restrictKeys groups (Set.fromList ids)
-- | To get the corresponding list of PhyloGroups from a list of PhyloNodes
getGroupsFromNodes :: [PhyloNode] -> Phylo -> [PhyloGroup]
getGroupsFromNodes ns p = getGroupsFromIds (map getNodeId ns) p
-- | To get all the PhyloGroup of a Phylo with a given level and period
getGroupsWithFilters :: Int -> (Date,Date) -> Phylo -> [PhyloGroup]
getGroupsWithFilters lvl prd p = (getGroupsWithLevel lvl p)
`intersect`
(getGroupsWithPeriod prd p)
-- | To get all the PhyloGroup of a Phylo with a given Level
getGroupsWithLevel :: Int -> Phylo -> [PhyloGroup]
getGroupsWithLevel lvl p = filterGroups getGroupLevel lvl (getGroups p)
-- | To get all the PhyloGroup of a Phylo with a given Period
getGroupsWithPeriod :: (Date,Date) -> Phylo -> [PhyloGroup]
getGroupsWithPeriod prd p = filterGroups getGroupPeriod prd (getGroups p)
-- | To create a PhyloGroup in a Phylo out of a list of Ngrams and a set of parameters
initGroup :: [Ngrams] -> Text -> Int -> Int -> Int -> Int -> Phylo -> PhyloGroup
initGroup ngrams lbl idx lvl from' to' p = PhyloGroup
(((from', to'), lvl), idx)
lbl
idxs
(Map.empty)
(Map.empty)
Nothing
(getMiniCooc (listToFullCombi idxs) (periodsToYears [(from', to')]) (getPhyloCooc p))
[] [] [] []
where
idxs = sort $ map (\x -> getIdxInRoots x p) ngrams
-- | To sum two coocurency Matrix
sumCooc :: Map (Int, Int) Double -> Map (Int, Int) Double -> Map (Int, Int) Double
sumCooc m m' = unionWith (+) m m'
-- | To build the mini cooc matrix of each group
getMiniCooc :: [(Int,Int)] -> Set Date -> Map Date (Map (Int,Int) Double) -> Map (Int,Int) Double
getMiniCooc pairs years cooc = filterWithKey (\(n,n') _ -> elem (n,n') pairs) cooc'
where
--------------------------------------
cooc' :: Map (Int,Int) Double
cooc' = foldl (\m m' -> sumCooc m m') empty
$ elems
$ restrictKeys cooc years
--------------------------------------
---------------------
-- | PhyloPeriod | --
---------------------
-- | To alter each PhyloPeriod of a Phylo following a given function
alterPhyloPeriods :: (PhyloPeriod -> PhyloPeriod) -> Phylo -> Phylo
alterPhyloPeriods f p = over ( phylo_periods
. traverse) f p
-- | To append a list of PhyloPeriod to a Phylo
appendToPhyloPeriods :: [PhyloPeriod] -> Phylo -> Phylo
appendToPhyloPeriods l p = over (phylo_periods) (++ l) p
-- | To get all the PhyloPeriodIds of a Phylo
getPhyloPeriods :: Phylo -> [PhyloPeriodId]
getPhyloPeriods p = map _phylo_periodId
$ view (phylo_periods) p
-- | To get the id of a given PhyloPeriod
getPhyloPeriodId :: PhyloPeriod -> PhyloPeriodId
getPhyloPeriodId prd = _phylo_periodId prd
-- | To create a PhyloPeriod
initPhyloPeriod :: PhyloPeriodId -> [PhyloLevel] -> PhyloPeriod
initPhyloPeriod id l = PhyloPeriod id l
-- | To transform a list of periods into a set of Dates
periodsToYears :: [(Date,Date)] -> Set Date
periodsToYears periods = (Set.fromList . sort . concat)
$ map (\(d,d') -> [d..d']) periods
--------------------
-- | PhyloLevel | --
--------------------
-- | To alter a list of PhyloLevels following a given function
alterPhyloLevels :: ([PhyloLevel] -> [PhyloLevel]) -> Phylo -> Phylo
alterPhyloLevels f p = over ( phylo_periods
. traverse
. phylo_periodLevels) f p
-- | To get the PhylolevelId of a given PhyloLevel
getPhyloLevelId :: PhyloLevel -> PhyloLevelId
getPhyloLevelId = _phylo_levelId
-- | To get all the Phylolevels of a given PhyloPeriod
getPhyloLevels :: PhyloPeriod -> [PhyloLevel]
getPhyloLevels = view (phylo_periodLevels)
-- | To create a PhyloLevel
initPhyloLevel :: PhyloLevelId -> [PhyloGroup] -> PhyloLevel
initPhyloLevel id groups = PhyloLevel id groups
-- | To set the LevelId of a PhyloLevel and of all its PhyloGroups
setPhyloLevelId :: Int -> PhyloLevel -> PhyloLevel
setPhyloLevelId lvl' (PhyloLevel (id, _lvl) groups)
= PhyloLevel (id, lvl') groups'
where
groups' = over (traverse . phylo_groupId)
(\((period, _lvl), idx) -> ((period, lvl'), idx))
groups
------------------
-- | PhyloFis | --
------------------
-- | To get the clique of a PhyloFis
getClique :: PhyloFis -> Clique
getClique = _phyloFis_clique
-- | To get the support of a PhyloFis
getSupport :: PhyloFis -> Support
getSupport = _phyloFis_support
-- | To get the period of a PhyloFis
getFisPeriod :: PhyloFis -> (Date,Date)
getFisPeriod = _phyloFis_period
----------------------------
-- | PhyloNodes & Edges | --
----------------------------
-- | To alter a PhyloNode
alterPhyloNode :: (PhyloNode -> PhyloNode) -> PhyloView -> PhyloView
alterPhyloNode f v = over ( pv_nodes
. traverse
) (\pn -> f pn ) v
-- | To filter some GroupEdges with a given threshold
filterGroupEdges :: Double -> [GroupEdge] -> [GroupEdge]
filterGroupEdges thr edges = filter (\((_s,_t),w) -> w > thr) edges
-- | To get the neighbours (directed/undirected) of a PhyloGroup from a list of GroupEdges
getNeighbours :: Bool -> PhyloGroup -> [GroupEdge] -> [PhyloGroup]
getNeighbours directed g e = case directed of
True -> map (\((_s,t),_w) -> t)
$ filter (\((s,_t),_w) -> s == g) e
False -> map (\((s,t),_w) -> (head' "getNeighbours") $ delete g $ nub [s,t,g])
$ filter (\((s,t),_w) -> s == g || t == g) e
-- | To get the PhyloBranchId of PhyloNode if it exists
getNodeBranchId :: PhyloNode -> PhyloBranchId
getNodeBranchId n = case n ^. pn_bid of
Nothing -> panic "[ERR][Viz.Phylo.Tools.getNodeBranchId] branchId not found"
Just i -> i
-- | To get the PhyloGroupId of a PhyloNode
getNodeId :: PhyloNode -> PhyloGroupId
getNodeId n = n ^. pn_id
getNodePeriod :: PhyloNode -> (Date,Date)
getNodePeriod n = fst $ fst $ getNodeId n
-- | To get the Level of a PhyloNode
getNodeLevel :: PhyloNode -> Level
getNodeLevel n = (snd . fst) $ getNodeId n
-- | To get the Parent Node of a PhyloNode in a PhyloView
getNodeParent :: PhyloNode -> PhyloView -> [PhyloNode]
getNodeParent n v = filter (\n' -> elem (getNodeId n') (getNodeParentsId n))
$ v ^. pv_nodes
-- | To get the Parent Node id of a PhyloNode if it exists
getNodeParentsId :: PhyloNode -> [PhyloGroupId]
getNodeParentsId n = case n ^. pn_parents of
Nothing -> panic "[ERR][Viz.Phylo.Tools.getNodeParentsId] node parent not found"
Just ids -> ids
-- | To get a list of PhyloNodes grouped by PhyloBranch in a PhyloView
getNodesByBranches :: PhyloView -> [(PhyloBranchId,[PhyloNode])]
getNodesByBranches v = zip bIds $ map (\id -> filter (\n -> (getNodeBranchId n) == id)
$ getNodesInBranches v ) bIds
where
--------------------------------------
bIds :: [PhyloBranchId]
bIds = getViewBranchIds v
--------------------------------------
-- | To get a list of PhyloNodes owned by any PhyloBranches in a PhyloView
getNodesInBranches :: PhyloView -> [PhyloNode]
getNodesInBranches v = filter (\n -> isJust $ n ^. pn_bid)
$ v ^. pv_nodes
-- | To get the PhyloGroupId of the Source of a PhyloEdge
getSourceId :: PhyloEdge -> PhyloGroupId
getSourceId e = e ^. pe_source
-- | To get the PhyloGroupId of the Target of a PhyloEdge
getTargetId :: PhyloEdge -> PhyloGroupId
getTargetId e = e ^. pe_target
---------------------
-- | PhyloBranch | --
---------------------
-- | To get the PhyloBranchId of a PhyloBranch
getBranchId :: PhyloBranch -> PhyloBranchId
getBranchId b = b ^. pb_id
-- | To get a list of PhyloBranchIds
getBranchIds :: Phylo -> [PhyloBranchId]
getBranchIds p = sortOn snd
$ nub
$ mapMaybe getGroupBranchId
$ getGroups p
-- | To get a list of PhyloBranchIds given a Level in a Phylo
getBranchIdsWith :: Level -> Phylo -> [PhyloBranchId]
getBranchIdsWith lvl p = sortOn snd
$ mapMaybe getGroupBranchId
$ getGroupsWithLevel lvl p
-- | To get the Meta value of a PhyloBranch
getBranchMeta :: Text -> PhyloBranch -> [Double]
getBranchMeta k b = (b ^. pb_metrics) ! k
-- | To get all the PhyloBranchIds of a PhyloView
getViewBranchIds :: PhyloView -> [PhyloBranchId]
getViewBranchIds v = map getBranchId $ v ^. pv_branches
-- | To get a list of PhyloGroup sharing the same PhyloBranchId
getGroupsByBranches :: Phylo -> [(PhyloBranchId,[PhyloGroup])]
getGroupsByBranches p = zip (getBranchIds p)
$ map (\id -> filter (\g -> (fromJust $ getGroupBranchId g) == id)
$ getGroupsInBranches p)
$ getBranchIds p
-- | To get the sublist of all the PhyloGroups linked to a branch
getGroupsInBranches :: Phylo -> [PhyloGroup]
getGroupsInBranches p = filter (\g -> isJust $ g ^. phylo_groupBranchId)
$ getGroups p
--------------------------------
-- | PhyloQuery & QueryView | --
--------------------------------
-- | To filter PhyloView's Branches by level
filterBranchesByLevel :: Level -> PhyloView -> [PhyloBranch]
filterBranchesByLevel lvl pv = filter (\pb -> lvl == (fst $ pb ^. pb_id))
$ pv ^. pv_branches
-- | To filter PhyloView's Edges by level
filterEdgesByLevel :: Level -> [PhyloEdge] -> [PhyloEdge]
filterEdgesByLevel lvl pes = filter (\pe -> (lvl == ((snd . fst) $ pe ^. pe_source))
&& (lvl == ((snd . fst) $ pe ^. pe_target))) pes
-- | To filter PhyloView's Edges by type
filterEdgesByType :: EdgeType -> [PhyloEdge] -> [PhyloEdge]
filterEdgesByType t pes = filter (\pe -> t == (pe ^. pe_type)) pes
-- | To filter PhyloView's Nodes by the oldest Period
filterNodesByFirstPeriod :: [PhyloNode] -> [PhyloNode]
filterNodesByFirstPeriod pns = filter (\pn -> fstPrd == ((fst . fst) $ pn ^. pn_id)) pns
where
--------------------------------------
fstPrd :: (Date,Date)
fstPrd = (head' "filterNodesByFirstPeriod")
$ sortOn fst
$ map (\pn -> (fst . fst) $ pn ^. pn_id) pns
--------------------------------------
-- | To filter PhyloView's Nodes by Branch
filterNodesByBranch :: PhyloBranchId -> [PhyloNode] -> [PhyloNode]
filterNodesByBranch bId pns = filter (\pn -> if isJust $ pn ^. pn_bid
then if bId == (fromJust $ pn ^. pn_bid)
then True
else False
else False ) pns
-- | To filter PhyloView's Nodes by level
filterNodesByLevel :: Level -> [PhyloNode] -> [PhyloNode]
filterNodesByLevel lvl pns = filter (\pn -> lvl == ((snd . fst) $ pn ^. pn_id)) pns
-- | To filter PhyloView's Nodes by Period
filterNodesByPeriod :: PhyloPeriodId -> [PhyloNode] -> [PhyloNode]
filterNodesByPeriod prd pns = filter (\pn -> prd == ((fst . fst) $ pn ^. pn_id)) pns
-- | To get the first clustering method to apply to get the contextual units of a Phylo
getContextualUnit :: PhyloQueryBuild -> Cluster
getContextualUnit q = q ^. q_contextualUnit
-- | To get the metrics to apply to contextual units
getContextualUnitMetrics :: PhyloQueryBuild -> [Metric]
getContextualUnitMetrics q = q ^. q_contextualUnitMetrics
-- | To get the filters to apply to contextual units
getContextualUnitFilters :: PhyloQueryBuild -> [Filter]
getContextualUnitFilters q = q ^. q_contextualUnitFilters
-- | To get the cluster methods to apply to the Nths levels of a Phylo
getNthCluster :: PhyloQueryBuild -> Cluster
getNthCluster q = q ^. q_nthCluster
-- | To get the Sup Level of a reconstruction of a Phylo from a PhyloQuery
getNthLevel :: PhyloQueryBuild -> Level
getNthLevel q = q ^. q_nthLevel
-- | To get the Grain of the PhyloPeriods from a PhyloQuery
getPeriodGrain :: PhyloQueryBuild -> Int
getPeriodGrain q = q ^. q_periodGrain
-- | To get the intertemporal matching strategy to apply to a Phylo from a PhyloQuery
getInterTemporalMatching :: PhyloQueryBuild -> Proximity
getInterTemporalMatching q = q ^. q_interTemporalMatching
-- | To get the Steps of the PhyloPeriods from a PhyloQuery
getPeriodSteps :: PhyloQueryBuild -> Int
getPeriodSteps q = q ^. q_periodSteps
--------------------------------------------------
-- | PhyloQueryBuild & PhyloQueryView Constructors | --
--------------------------------------------------
-- | To get the threshold of a Proximity
getThreshold :: Proximity -> Double
getThreshold prox = case prox of
WeightedLogJaccard (WLJParams thr _) -> thr
WeightedLogSim (WLJParams thr _) -> thr
Hamming (HammingParams thr) -> thr
Filiation -> panic "[ERR][Viz.Phylo.Tools.getThreshold] Filiation"
-- | To get the Proximity associated to a given Clustering method
getProximity :: Cluster -> Proximity
getProximity cluster = case cluster of
Louvain (LouvainParams proxi) -> proxi
RelatedComponents (RCParams proxi) -> proxi
_ -> panic "[ERR][Viz.Phylo.Tools.getProximity] this cluster has no associated Proximity"
-- | To initialize all the Cluster / Proximity with their default parameters
initFis :: Maybe Bool -> Maybe Support -> Maybe Int -> FisParams
initFis (def True -> kmf) (def 0 -> min') (def 0 -> thr) = FisParams kmf min' thr
initHamming :: Maybe Double -> HammingParams
initHamming (def 0.01 -> sens) = HammingParams sens
initLonelyBranch :: Maybe Int -> Maybe Int -> Maybe Int -> LBParams
initLonelyBranch (def 2 -> periodsInf) (def 2 -> periodsSup) (def 1 -> minNodes) = LBParams periodsInf periodsSup minNodes
initSizeBranch :: Maybe Int -> SBParams
initSizeBranch (def 1 -> minSize) = SBParams minSize
initLonelyBranch' :: Maybe Int -> Maybe Int -> Maybe Int -> LBParams
initLonelyBranch' (def 0 -> periodsInf) (def 0 -> periodsSup) (def 1 -> minNodes) = LBParams periodsInf periodsSup minNodes
initLouvain :: Maybe Proximity -> LouvainParams
initLouvain (def defaultWeightedLogJaccard -> proxi) = LouvainParams proxi
initRelatedComponents :: Maybe Proximity -> RCParams
initRelatedComponents (def defaultWeightedLogJaccard -> proxi) = RCParams proxi
-- | TODO user param in main function
initWeightedLogJaccard :: Maybe Double -> Maybe Double -> WLJParams
initWeightedLogJaccard (def 0.3 -> thr) (def 20.0 -> sens) = WLJParams thr sens
initWeightedLogSim :: Maybe Double -> Maybe Double -> WLJParams
initWeightedLogSim (def 0.3 -> thr) (def 20.0 -> sens) = WLJParams thr sens
-- | To initialize a PhyloQueryBuild from given and default parameters
initPhyloQueryBuild :: Text -> Text -> Maybe Int
-> Maybe Int -> Maybe Cluster -> Maybe [Metric]
-> Maybe [Filter]-> Maybe Proximity -> Maybe Int
-> Maybe Double -> Maybe Double -> Maybe Int
-> Maybe Level -> Maybe Cluster -> PhyloQueryBuild
initPhyloQueryBuild name desc (def 5 -> grain)
(def 1 -> steps) (def defaultFis -> cluster) (def [] -> metrics)
(def [] -> filters) (def defaultWeightedLogJaccard -> matching') (def 5 -> frame)
(def 0.8 -> frameThr) (def 0.5 -> reBranchThr) (def 4 -> reBranchNth)
(def 2 -> nthLevel) (def defaultRelatedComponents -> nthCluster) =
PhyloQueryBuild name desc grain
steps cluster metrics filters matching' frame frameThr reBranchThr reBranchNth nthLevel nthCluster
-- | To initialize a PhyloQueryView default parameters
initPhyloQueryView :: Maybe Level -> Maybe Filiation -> Maybe Bool -> Maybe Level -> Maybe [Metric] -> Maybe [Filter] -> Maybe [Tagger] -> Maybe (Sort, Order) -> Maybe ExportMode -> Maybe DisplayMode -> Maybe Bool -> PhyloQueryView
initPhyloQueryView (def 2 -> lvl) (def Descendant -> f) (def False -> c) (def 1 -> d) (def [] -> ms) (def [] -> fs) (def [] -> ts) s (def Json -> em) (def Flat -> dm) (def True -> v) =
PhyloQueryView lvl f c d ms fs ts s em dm v
-- | To define some obvious boolean getters
shouldKeepMinorFis :: FisParams -> Bool
shouldKeepMinorFis = _fis_keepMinorFis
----------------------------
-- | Default ressources | --
----------------------------
-- Clusters
defaultFis :: Cluster
defaultFis = Fis (initFis Nothing Nothing Nothing)
defaultLouvain :: Cluster
defaultLouvain = Louvain (initLouvain Nothing)
defaultRelatedComponents :: Cluster
defaultRelatedComponents = RelatedComponents (initRelatedComponents Nothing)
-- Filters
defaultLonelyBranch :: Filter
defaultLonelyBranch = LonelyBranch (initLonelyBranch Nothing Nothing Nothing)
defaultSizeBranch :: Filter
defaultSizeBranch = SizeBranch (initSizeBranch Nothing)
-- Params
defaultPhyloParam :: PhyloParam
defaultPhyloParam = initPhyloParam Nothing Nothing Nothing
-- Proximities
defaultHamming :: Proximity
defaultHamming = Hamming (initHamming Nothing)
defaultWeightedLogJaccard :: Proximity
defaultWeightedLogJaccard = WeightedLogJaccard (initWeightedLogJaccard Nothing Nothing)
defaultWeightedLogSim :: Proximity
defaultWeightedLogSim = WeightedLogSim (initWeightedLogSim Nothing Nothing)
-- Queries
type Title = Text
type Desc = Text
defaultQueryBuild :: PhyloQueryBuild
defaultQueryBuild = defaultQueryBuild'
"Cesar et Cleôpatre"
"An example of Phylomemy (french without accent)"
defaultQueryBuild' :: Title -> Desc -> PhyloQueryBuild
defaultQueryBuild' t d = initPhyloQueryBuild t d
Nothing Nothing Nothing
Nothing Nothing Nothing
Nothing Nothing Nothing
Nothing Nothing Nothing
defaultQueryView :: PhyloQueryView
defaultQueryView = initPhyloQueryView
Nothing Nothing Nothing
Nothing Nothing Nothing
Nothing Nothing Nothing
Nothing Nothing
-- Software
defaultSoftware :: Software
defaultSoftware = Software "Gargantext" "v4"
-- Version
defaultPhyloVersion :: Text
defaultPhyloVersion = "v1"
{-|
Module : Gargantext.Core.Viz.Phylo.Tools
Description : Phylomemy Tools to build/manage it
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module Gargantext.Core.Viz.Phylo.View.Display
where
import Control.Lens hiding (makeLenses, both, Level)
import Data.List (null,(++),sortOn)
import Gargantext.Prelude
import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.Tools
-- | To transform a flat Phyloview into a nested Phyloview
toNestedView :: [PhyloNode] -> [PhyloNode] -> [PhyloNode]
toNestedView ns ns'
| null ns' = ns
| otherwise = toNestedView (filter (\n -> lvl' == getNodeLevel n) nested)
(filter (\n -> lvl' < getNodeLevel n) nested)
where
--------------------------------------
lvl' :: Level
lvl' = getNodeLevel $ head' "toNestedView" nested
--------------------------------------
nested :: [PhyloNode]
nested = foldl (\ns'' n -> let nIds' = getNodeParentsId n
in map (\n' -> if elem (getNodeId n') nIds'
then n' & pn_childs %~ (++ [n])
else n') ns'') ns' ns
--------------------------------------
-- | To process a DisplayMode to a PhyloView
processDisplay :: DisplayMode -> ExportMode -> PhyloView -> PhyloView
processDisplay d e v = case e of
Json -> case d of
Flat -> v
Nested -> let ns = sortOn getNodeLevel $ v ^. pv_nodes
lvl = getNodeLevel $ head' "processDisplay" ns
in v & pv_nodes .~ toNestedView (filter (\n -> lvl == getNodeLevel n) ns)
(filter (\n -> lvl < getNodeLevel n) ns)
_ -> v
{-|
Module : Gargantext.Core.Viz.Phylo.Tools
Description : Phylomemy Tools to build/manage it
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module Gargantext.Core.Viz.Phylo.View.Export
where
import Control.Lens hiding (Level)
import Control.Monad
import Data.GraphViz hiding (DotGraph)
import Data.GraphViz.Attributes.Complete hiding (EdgeType)
import Data.GraphViz.Types.Generalised (DotGraph)
import Data.GraphViz.Types.Monadic
import Data.List ((++),unwords,concat,sortOn,nub)
import Data.Map (Map,toList,(!))
import Data.Maybe (isNothing)
import Data.Text.Lazy (fromStrict, pack, unpack)
import qualified Data.Text as T
import qualified Data.Text.Lazy as T'
import qualified Data.GraphViz.Attributes.HTML as H
import Gargantext.Prelude
import Gargantext.Core.Viz.Phylo hiding (Dot)
import Gargantext.Core.Viz.Phylo.Tools
-- import Debug.Trace (trace)
import Prelude (writeFile)
import System.FilePath
type DotId = T'.Text
---------------------
-- | Dot to File | --
---------------------
dotToFile :: FilePath -> DotGraph DotId -> IO ()
dotToFile filePath dotG = writeFile filePath $ dotToString dotG
dotToString :: DotGraph DotId -> [Char]
dotToString dotG = unpack (printDotGraph dotG)
--------------------------
-- | PhyloView to DOT | --
--------------------------
-- | From http://haroldcarr.com/posts/2014-02-28-using-graphviz-via-haskell.html & https://hackage.haskell.org/package/graphviz
-- | To create a custom Graphviz's Attribute
setAttr :: AttributeName -> T'.Text -> CustomAttribute
setAttr k v = customAttribute k v
-- | To create customs Graphviz's Attributes out of some Metrics
setAttrFromMetrics :: Map T.Text [Double] -> [CustomAttribute]
setAttrFromMetrics a = map (\(k,v) -> setAttr (fromStrict k)
$ (pack . unwords)
$ map show v) $ toList a
-- | To transform a PhyloBranchId into a DotId
toBranchDotId :: PhyloBranchId -> DotId
toBranchDotId (lvl,idx) = fromStrict $ T.pack $ (show lvl) ++ (show idx)
-- | To transform a PhyloGroupId into a DotId
toNodeDotId :: PhyloGroupId -> DotId
toNodeDotId (((d,d'),lvl),idx) = fromStrict $ T.pack $ (show d) ++ (show d') ++ (show lvl) ++ (show idx)
-- | To transform a PhyloPeriodId into a DotId
toPeriodDotId :: PhyloPeriodId -> DotId
toPeriodDotId (d,d') = fromStrict $ T.pack $ (show d) ++ (show d')
-- | To transform a PhyloPeriodId into a Graphviz's label
toPeriodDotLabel ::PhyloPeriodId -> Label
toPeriodDotLabel (d,d') = toDotLabel $ T.pack $ (show d) ++ " " ++ (show d')
-- | To get all the Phyloperiods covered by a PhyloView
getViewPeriods :: PhyloView -> [PhyloPeriodId]
getViewPeriods pv = sortOn fst $ nub $ map (\pn -> (fst . fst) $ pn ^. pn_id) $ pv ^. pv_nodes
-- | To get for each PhyloBranch, their corresponding oldest PhyloNodes
getFirstNodes :: Level -> PhyloView -> [(PhyloBranchId,PhyloGroupId)]
getFirstNodes lvl pv = concat
$ map (\bId -> map (\pn -> (bId,pn ^. pn_id))
$ filterNodesByFirstPeriod
$ filterNodesByBranch bId
$ filterNodesByLevel lvl
$ pv ^. pv_nodes) bIds
where
--------------------------------------
bIds :: [PhyloBranchId]
bIds = map getBranchId $ filterBranchesByLevel lvl pv
--------------------------------------
-- | To transform a Text into a Graphviz's Label
toDotLabel :: T.Text -> Label
toDotLabel lbl = StrLabel $ fromStrict lbl
-- | To set a Peak Node
setPeakDotNode :: PhyloBranch -> Dot DotId
setPeakDotNode pb = node (toBranchDotId $ pb ^. pb_id)
([FillColor [toWColor CornSilk], FontName "Arial", FontSize 40, Shape Egg, Style [SItem Bold []], Label (toDotLabel $ pb ^. pb_peak)]
<> (setAttrFromMetrics $ pb ^. pb_metrics)
<> [ setAttr "nodeType" "peak"
, setAttr "branchId" ((pack $ show (fst $ getBranchId pb)) <> (pack $ show (snd $ getBranchId pb)))
])
-- | To set a Peak Edge
setPeakDotEdge :: DotId -> DotId -> Dot DotId
setPeakDotEdge bId nId = edge bId nId [Width 3, Color [toWColor Black], ArrowHead (AType [(ArrMod FilledArrow RightSide,DotArrow)])]
colorFromDynamics :: Double -> H.Attribute
colorFromDynamics d
| d == 0 = H.BGColor (toColor LightCoral)
| d == 1 = H.BGColor (toColor Khaki)
| d == 2 = H.BGColor (toColor SkyBlue)
| otherwise = H.Color (toColor Black)
getGroupDynamic :: [Double] -> H.Attribute
getGroupDynamic dy
| elem 0 dy = colorFromDynamics 0
| elem 1 dy = colorFromDynamics 1
| elem 2 dy = colorFromDynamics 2
| otherwise = colorFromDynamics 3
-- | To set an HTML table
setHtmlTable :: PhyloNode -> H.Label
setHtmlTable pn = H.Table H.HTable
{ H.tableFontAttrs = Just [H.PointSize 14, H.Align H.HLeft]
, H.tableAttrs = [H.Border 0, H.CellBorder 0, H.BGColor (toColor White)]
, H.tableRows = [header]
<> [H.Cells [H.LabelCell [H.Height 10] $ H.Text [H.Str $ fromStrict ""]]]
<> (if isNothing $ pn ^. pn_ngrams
then []
else map ngramsToRow $ splitEvery 4
$ reverse $ sortOn (snd . snd)
$ zip (fromJust $ pn ^. pn_ngrams) $ zip dynamics inclusion) }
where
--------------------------------------
ngramsToRow :: [(Ngrams,(Double,Double))] -> H.Row
ngramsToRow ns = H.Cells $ map (\(n,(d,_)) -> H.LabelCell [H.Align H.HLeft,colorFromDynamics d]
$ H.Text [H.Str $ fromStrict n]) ns
--------------------------------------
inclusion :: [Double]
inclusion = (pn ^. pn_metrics) ! "inclusion"
--------------------------------------
dynamics :: [Double]
dynamics = (pn ^. pn_metrics) ! "dynamics"
--------------------------------------
header :: H.Row
header = H.Cells [H.LabelCell [getGroupDynamic dynamics]
$ H.Text [H.Str $ (((fromStrict . T.toUpper) $ pn ^. pn_label)
<> (fromStrict " ( ")
<> (pack $ show (fst $ getNodePeriod pn))
<> (fromStrict " , ")
<> (pack $ show (snd $ getNodePeriod pn))
<> (fromStrict " ) "))]]
--------------------------------------
-- | To set a Node
setDotNode :: PhyloNode -> Dot DotId
setDotNode pn = node (toNodeDotId $ pn ^. pn_id)
([FontName "Arial", Shape Square, toLabel (setHtmlTable pn)]
<> [ setAttr "nodeType" "group"
, setAttr "from" (pack $ show (fst $ getNodePeriod pn))
, setAttr "to" (pack $ show (fst $ getNodePeriod pn))
, setAttr "branchId" ((pack $ show (fst $ getNodeBranchId pn)) <> (pack $ show (snd $ getNodeBranchId pn)))
])
-- | To set an Edge
setDotEdge :: PhyloEdge -> Dot DotId
setDotEdge pe
| pe ^. pe_weight == 100 = edge (toNodeDotId $ pe ^. pe_source) (toNodeDotId $ pe ^. pe_target) [Width 2, Color [toWColor Red]]
| otherwise = edge (toNodeDotId $ pe ^. pe_source) (toNodeDotId $ pe ^. pe_target) [Width 2, Color [toWColor Black], Constraint True]
-- | To set a Period Edge
setDotPeriodEdge :: (PhyloPeriodId,PhyloPeriodId) -> Dot DotId
setDotPeriodEdge (prd,prd') = edge (toPeriodDotId prd) (toPeriodDotId prd') [Width 5, Color [toWColor Black]]
-- | To transform a given PhyloView into the corresponding GraphViz Graph (ie: Dot format)
viewToDot :: PhyloView -> DotGraph DotId
viewToDot pv = digraph ((Str . fromStrict) $ pv ^. pv_title)
$ do
-- set the global graph attributes
graphAttrs ( [Label (toDotLabel $ pv ^. pv_title)]
<> [setAttr "description" $ fromStrict $ pv ^. pv_description]
<> [setAttr "filiation" $ (pack . show) $ pv ^. pv_filiation]
<> (setAttrFromMetrics $ pv ^. pv_metrics)
<> [FontSize 30, LabelLoc VTop, NodeSep 1, RankSep [1], Rank SameRank, Splines SplineEdges, Overlap ScaleOverlaps
, Ratio FillRatio
, Style [SItem Filled []],Color [toWColor White]])
-- set the peaks
subgraph (Str "Peaks") $ do
graphAttrs [Rank SameRank]
mapM setPeakDotNode $ filterBranchesByLevel (pv ^. pv_level) pv
-- set the nodes, period by period
_ <- mapM (\prd ->
subgraph (Str $ fromStrict $ T.pack $ "subGraph " ++ (show $ (fst prd)) ++ (show $ (snd prd)))
$ do
graphAttrs [Rank SameRank]
-- set the period label
node (toPeriodDotId prd) ([Shape Square, FontSize 50, Label (toPeriodDotLabel prd)]
<> [setAttr "nodeType" "period",
setAttr "from" (fromStrict $ T.pack $ (show $ fst prd)),
setAttr "to" (fromStrict $ T.pack $ (show $ snd prd))])
mapM setDotNode $ filterNodesByPeriod prd $ filterNodesByLevel (pv ^. pv_level) (pv ^.pv_nodes)
) $ (pv ^. pv_periods)
-- set the edges : from peaks to nodes, from nodes to nodes, from periods to periods
_ <- mapM (\(bId,nId) -> setPeakDotEdge (toBranchDotId bId) (toNodeDotId nId)) $ getFirstNodes (pv ^. pv_level) pv
_ <- mapM setDotEdge $ filterEdgesByLevel (pv ^. pv_level) $ filterEdgesByType PeriodEdge (pv ^. pv_edges)
mapM setDotPeriodEdge $ listToSequentialCombi $ (pv ^. pv_periods)
{-|
Module : Gargantext.Core.Viz.Phylo.Tools
Description : Phylomemy Tools to build/manage it
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module Gargantext.Core.Viz.Phylo.View.Filters
where
import Control.Lens hiding (makeLenses, both, Level)
import Data.List (notElem,null,nub,(\\),intersect)
import Data.Maybe (isNothing)
import Gargantext.Prelude
import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.Tools
-- | To clean a PhyloView list of Nodes, Edges, etc after having filtered its Branches
cleanNodesEdges :: PhyloView -> PhyloView -> PhyloView
cleanNodesEdges v v' = v' & pv_nodes %~ (filter (\n -> not $ elem (getNodeId n) nIds))
& pv_nodes %~ (map (\n -> if isNothing (n ^. pn_parents)
then n
else if (not .null) $ (getNodeParentsId n) `intersect` nIds
then n & pn_parents .~ Nothing
else n ))
& pv_edges %~ (filter (\e -> (not $ elem (getSourceId e) nIds)
&& (not $ elem (getTargetId e) nIds)))
where
--------------------------------------
nIds :: [PhyloGroupId]
nIds = map getNodeId
$ filter (\n -> elem (getNodeBranchId n) bIds)
$ getNodesInBranches v
--------------------------------------
bIds :: [PhyloBranchId]
bIds = (getViewBranchIds v) \\ (getViewBranchIds v')
--------------------------------------
-- | To filter all the LonelyBranches (ie: isolated one in time & with a small number of nodes) of a PhyloView
filterLonelyBranch :: Int -> Int -> Int -> [PhyloPeriodId] -> PhyloView -> PhyloView
filterLonelyBranch inf sup min' prds v = cleanNodesEdges v v'
where
--------------------------------------
v' :: PhyloView
v' = v & pv_branches %~ (filter (\b -> let
ns = filter (\n -> (getBranchId b) == (getNodeBranchId n)) $ getNodesInBranches v
prds' = nub $ map (\n -> (fst . fst) $ getNodeId n) ns
in not (isLone ns prds')))
--------------------------------------
isLone :: [PhyloNode] -> [PhyloPeriodId] -> Bool
isLone ns prds' = (length ns <= min')
&& notElem (head' "filterLonelyBranch1" prds') (take inf prds)
&& notElem (head' "filterLonelyBranch2" prds') (take sup $ reverse prds)
--------------------------------------
-- | To filter all the branches with a minimal size in a PhyloView
filterSizeBranch :: Int -> PhyloView -> PhyloView
filterSizeBranch min' v = cleanNodesEdges v v'
where
--------------------------------------
v' :: PhyloView
v' = v & pv_branches %~ (filter (\b -> (length $ filter (\n -> (getBranchId b) == (getNodeBranchId n)) $ getNodesInBranches v) >= min'))
--------------------------------------
-- | To process a list of QueryFilter to a PhyloView
processFilters :: [Filter] -> Phylo -> PhyloView -> PhyloView
processFilters fs p v = foldl (\v' f -> case f of
LonelyBranch (LBParams inf sup min') -> filterLonelyBranch inf sup min' (getPhyloPeriods p) v'
SizeBranch (SBParams min') -> filterSizeBranch min' v'
-- _ -> panic "[ERR][Viz.Phylo.View.Filters.processFilters] filter not found"
) v fs
{-|
Module : Gargantext.Core.Viz.Phylo.Tools
Description : Phylomemy Tools to build/manage it
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module Gargantext.Core.Viz.Phylo.View.Metrics
where
import Control.Lens hiding (makeLenses, both, Level)
import Data.List (last,groupBy,sortOn)
import Data.Map (insert)
import Data.Text (Text)
import Gargantext.Prelude
import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.Tools
-- | To add a new meta Metric to a PhyloBranch
addBranchMetrics :: PhyloBranchId -> Text -> Double -> PhyloView -> PhyloView
addBranchMetrics id lbl val v = over (pv_branches
. traverse)
(\b -> if getBranchId b == id
then b & pb_metrics %~ insert lbl [val]
else b) v
branchGroups :: PhyloView -> PhyloView
branchGroups v = foldl (\v' (bId,nb) -> addBranchMetrics bId "nbGroups" nb v') v
$ map (\(bId,ns) -> (bId,fromIntegral $ length ns))
$ getNodesByBranches v
-- | To get the age (in year) of all the branches of a PhyloView
branchAge :: PhyloView -> PhyloView
branchAge v = foldl (\v' b -> let bId = (fst . (head' "branchAge")) b
prds = sortOn fst $ map snd b
in addBranchMetrics bId "age" ((abs . fromIntegral) $ ((snd . last) prds) - (fst $ head' "branchAge" prds)) v') v
$ groupBy ((==) `on` fst)
$ sortOn fst
$ map (\n -> (getNodeBranchId n, (fst . fst) $ getNodeId n))
$ getNodesInBranches v
-- | To get the age (in year) of all the branches of a PhyloView
branchBirth :: PhyloView -> PhyloView
branchBirth v = foldl (\v' b -> let bId = (fst . (head' "branchBirth")) b
prds = sortOn fst $ map snd b
in addBranchMetrics bId "birth" (fromIntegral $ fst $ head' "branchAge" prds) v') v
$ groupBy ((==) `on` fst)
$ sortOn fst
$ map (\n -> (getNodeBranchId n, (fst . fst) $ getNodeId n))
$ getNodesInBranches v
-- | To process a list of Metrics to a PhyloView
processMetrics :: [Metric] -> Phylo -> PhyloView -> PhyloView
processMetrics ms _p v = foldl (\v' m -> case m of
BranchAge -> branchAge v'
BranchBirth -> branchBirth v'
BranchGroups -> branchGroups v'
-- _ -> panic "[ERR][Viz.Phylo.Example.processMetrics] metric not found"
) v ms
{-|
Module : Gargantext.Core.Viz.Phylo.Tools
Description : Phylomemy Tools to build/manage it
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module Gargantext.Core.Viz.Phylo.View.Sort
where
import Control.Lens hiding (makeLenses, both, Level)
import Data.List (sortOn)
import Gargantext.Prelude
import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.Tools
-- | To sort a PhyloView by Age
sortBranchByAge :: Order -> PhyloView -> PhyloView
sortBranchByAge o v = v & pv_branches %~ f
where
--------------------------------------
f :: [PhyloBranch] -> [PhyloBranch]
f xs = case o of
Asc -> sortOn (getBranchMeta "age") xs
Desc -> reverse $ sortOn (getBranchMeta "age") xs
--------------------------------------
-- | To sort a PhyloView by Birth date of a branch
sortBranchByBirth :: Order -> PhyloView -> PhyloView
sortBranchByBirth o v = v & pv_branches %~ f
where
--------------------------------------
f :: [PhyloBranch] -> [PhyloBranch]
f xs = case o of
Asc -> sortOn (getBranchMeta "birth") xs
Desc -> reverse $ sortOn (getBranchMeta "birth") xs
--------------------------------------
-- | To process a Sort to a PhyloView
processSort :: Maybe (Sort,Order) -> Phylo -> PhyloView -> PhyloView
processSort s _p v = case s of
Nothing -> v
Just s' -> case fst s' of
ByBranchAge -> sortBranchByAge (snd s') v
ByBranchBirth -> sortBranchByBirth (snd s') v
--_ -> panic "[ERR][Viz.Phylo.View.Sort.processSort] sort not found"
{-|
Module : Gargantext.Core.Viz.Phylo.Tools
Description : Phylomemy Tools to build/manage it
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module Gargantext.Core.Viz.Phylo.View.Taggers
where
import Control.Lens hiding (makeLenses, both, Level)
import Data.List (concat,nub,groupBy,sortOn,sort, (!!), union, (\\))
import Data.Text (Text)
import Data.Vector (Vector)
import Data.Map (Map, (!), empty, unionWith)
import Gargantext.Prelude
import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.Tools
import Gargantext.Core.Viz.Phylo.BranchMaker
import Gargantext.Core.Viz.Phylo.Metrics
import qualified Data.Map as Map
import Control.Parallel.Strategies
-- import Debug.Trace (trace)
-- | To get the nth most frequent Ngrams in a list of PhyloGroups
mostFreqNgrams :: Int -> [PhyloGroup] -> [Int]
mostFreqNgrams thr groups = map fst
$ take thr
$ reverse
$ sortOn snd
$ map (\g -> (head' "mostFreqNgrams" g,length g))
$ groupBy (==)
$ (sort . concat)
$ map getGroupNgrams groups
-- | To transform the nth most frequent Ngrams into a label
freqToLabel :: Int -> Vector Ngrams -> [PhyloGroup] -> Text
freqToLabel thr ngs l = ngramsToLabel ngs $ mostFreqNgrams thr l
-- | To get the (nth `div` 2) most cooccuring Ngrams in a PhyloGroup
mostOccNgrams :: Int -> PhyloGroup -> [Int]
mostOccNgrams nth g = (nub . concat)
$ map (\((f,s),_d) -> [f,s])
$ take nth
$ reverse $ sortOn snd
$ Map.toList cooc
where
cooc :: Map (Int, Int) Double
cooc = getGroupCooc g
-- | To alter the peak of a PhyloBranch
alterBranchPeak :: (PhyloBranchId,Text) -> PhyloView -> PhyloView
alterBranchPeak (id,lbl) v = over (pv_branches
. traverse)
(\b -> if getBranchId b == id
then b & pb_peak .~ lbl
else b) v
-- | To set the peak of a PhyloBranch as the nth most frequent terms of its PhyloNodes
branchPeakFreq :: PhyloView -> Int -> Phylo -> PhyloView
branchPeakFreq v thr p = foldl (\v' (id,lbl) -> alterBranchPeak (id,lbl) v') v
$ map (\(id,ns) -> (id, freqToLabel thr (getFoundationsRoots p)
$ getGroupsFromNodes ns p))
$ getNodesByBranches v
branchPeakCooc :: PhyloView -> Int -> Phylo -> PhyloView
branchPeakCooc v nth p = foldl (\v' (id,lbl) -> alterBranchPeak (id,lbl) v') v
$ map (\(id,ns) -> (id, ngramsToLabel (getFoundationsRoots p) (getGroupsPeaks (getGroupsFromNodes ns p) nth p) ) )
$ getNodesByBranches v
getNthMostMeta :: Int -> [Double] -> [Int] -> [Int]
getNthMostMeta nth meta ns = map (\(idx,_) -> (ns !! idx))
$ take nth
$ reverse
$ sortOn snd $ zip [0..] meta
-- | To set the label of a PhyloNode as the nth most coocurent terms of its PhyloNodes
nodeLabelCooc :: PhyloView -> Int -> Phylo -> PhyloView
nodeLabelCooc v thr p = over (pv_nodes
. traverse)
(\n -> let g = head' "nodeLabelCooc" $ getGroupsFromIds [getNodeId n] p
lbl = ngramsToLabel (getFoundationsRoots p) $ mostOccNgrams thr g
in n & pn_label .~ lbl) v
-- | To set the label of a PhyloNode as the nth most inclusives terms of its PhyloNodes
nodeLabelInc :: PhyloView -> Int -> Phylo -> PhyloView
nodeLabelInc v thr p = over (pv_nodes
. traverse)
(\n -> let g = head' "inclusion" $ getGroupsFromIds [getNodeId n] p
lbl = ngramsToLabel (getFoundationsRoots p)
$ getNthMostMeta thr ((g ^. phylo_groupNgramsMeta) ! "inclusion") (getGroupNgrams g)
in n & pn_label .~ lbl) v
nodeLabelInc' :: PhyloView -> Int -> Phylo -> PhyloView
nodeLabelInc' v nth p = over (pv_nodes
. traverse)
(\pn -> let lbl = ngramsToLabel (getFoundationsRoots p)
$ take nth
$ map (\(_,(_,idx)) -> idx)
$ concat
$ map (\groups -> sortOn (fst . snd) groups)
$ groupBy ((==) `on` fst) $ reverse $ sortOn fst
$ zip ((pn ^. pn_metrics) ! "inclusion")
$ zip ((pn ^. pn_metrics) ! "dynamics") (pn ^. pn_idx)
in pn & pn_label .~ lbl) v
branchPeakInc :: PhyloView -> Int -> Phylo -> PhyloView
branchPeakInc v nth p =
let labels = map (\(id,nodes) ->
let cooc = foldl (\mem pn -> unionWith (+) mem (pn ^. pn_cooc)) empty nodes
ngrams = sort $ foldl (\mem pn -> union mem (pn ^. pn_idx)) [] nodes
inc = map (\n -> inclusion cooc (ngrams \\ [n]) n) ngrams
lbl = ngramsToLabel (getFoundationsRoots p) $ getNthMostMeta nth inc ngrams
in (id, lbl))
$ getNodesByBranches v
labels' = labels `using` parList rdeepseq
in foldl (\v' (id,lbl) -> alterBranchPeak (id,lbl) v') v labels'
-- | To process a sorted list of Taggers to a PhyloView
processTaggers :: [Tagger] -> Phylo -> PhyloView -> PhyloView
processTaggers ts p v = foldl (\v' t -> case t of
BranchPeakFreq -> branchPeakFreq v' 2 p
BranchPeakCooc -> branchPeakCooc v' 2 p
BranchPeakInc -> branchPeakInc v' 2 p
GroupLabelInc -> nodeLabelInc v' 2 p
GroupLabelIncDyn -> nodeLabelInc' v' 2 p
GroupLabelCooc -> nodeLabelCooc v' 2 p) v ts
{-|
Module : Gargantext.Core.Viz.Phylo.Tools
Description : Phylomemy Tools to build/manage it
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Maintainer : team@gargantext.org
Stability : experimental
Portability : POSIX
-}
module Gargantext.Core.Viz.Phylo.View.ViewMaker
where
import Control.Lens hiding (makeLenses, both, Level)
import Data.List (concat,nub,(++),sort)
import Data.Text (Text)
import Data.Map (Map, empty, elems, unionWithKey, fromList)
import Data.Vector (Vector)
import Gargantext.Prelude
import Gargantext.Core.Viz.Phylo
import Gargantext.Core.Viz.Phylo.Tools
import Gargantext.Core.Viz.Phylo.Metrics
import Gargantext.Core.Viz.Phylo.View.Display
import Gargantext.Core.Viz.Phylo.View.Filters
import Gargantext.Core.Viz.Phylo.View.Metrics
import Gargantext.Core.Viz.Phylo.View.Sort
import Gargantext.Core.Viz.Phylo.View.Taggers
import qualified Data.Vector.Storable as VS
import Debug.Trace (trace)
import Numeric.Statistics (percentile)
-- | To init a PhyloBranch
initPhyloBranch :: PhyloBranchId -> Text -> PhyloBranch
initPhyloBranch id lbl = PhyloBranch id lbl empty
-- | To init a PhyloEdge
initPhyloEdge :: PhyloGroupId -> [Pointer] -> EdgeType -> [PhyloEdge]
initPhyloEdge id pts et = map (\pt -> PhyloEdge id (fst pt) et (snd pt)) pts
-- | To init a PhyloView
initPhyloView :: Level -> Text -> Text -> Filiation -> Bool -> Phylo -> PhyloView
initPhyloView lvl lbl dsc fl vb p = PhyloView (getPhyloParams p) lbl dsc fl lvl
(getPhyloPeriods p)
empty
([] ++ (phyloToBranches lvl p))
([] ++ (groupsToNodes True vb (getFoundationsRoots p) gs))
([] ++ (groupsToEdges fl PeriodEdge gs))
where
--------------------------------------
gs :: [PhyloGroup]
gs = getGroupsWithLevel lvl p
--------------------------------------
-- | To transform a list of PhyloGroups into a list of PhyloNodes
groupsToNodes :: Bool -> Bool -> Vector Ngrams -> [PhyloGroup] -> [PhyloNode]
groupsToNodes isR isV ns gs = map (\g -> let idxs = getGroupNgrams g
in PhyloNode
(getGroupId g)
(getGroupBranchId g)
"" idxs
(if isV
then Just (ngramsToText ns idxs)
else Nothing)
(g ^. phylo_groupNgramsMeta)
(g ^. phylo_groupCooc)
(if (not isR)
then Just (getGroupLevelParentsId g)
else Nothing)
[]
) gs
-- | To merge edges by keeping the maximum weight
mergeEdges :: [PhyloEdge] -> [PhyloEdge] -> [PhyloEdge]
mergeEdges lAsc lDes = elems
$ unionWithKey (\_k vAsc vDes -> vDes & pe_weight .~ (max (vAsc ^. pe_weight) (vDes ^. pe_weight))) mAsc mDes
where
--------------------------------------
mAsc :: Map (PhyloGroupId,PhyloGroupId) PhyloEdge
mAsc = fromList
$ map (\(k,e) -> (k, e & pe_source .~ fst k
& pe_target .~ snd k))
$ zip (map (\e -> (e ^. pe_target,e ^. pe_source)) lAsc) lAsc
--------------------------------------
mDes :: Map (PhyloGroupId,PhyloGroupId) PhyloEdge
mDes = fromList
$ zip (map (\e -> (e ^. pe_source,e ^. pe_target)) lDes) lDes
--------------------------------------
-- | To transform a list of PhyloGroups into a list of PhyloEdges
groupsToEdges :: Filiation -> EdgeType -> [PhyloGroup] -> [PhyloEdge]
groupsToEdges fl et gs = case fl of
Complete -> (groupsToEdges Ascendant et gs) ++ (groupsToEdges Descendant et gs)
Merge -> mergeEdges (groupsToEdges Ascendant et gs) (groupsToEdges Descendant et gs)
_ -> concat
$ map (\g -> case fl of
Ascendant -> case et of
PeriodEdge -> initPhyloEdge (getGroupId g) (getGroupPeriodParents g) et
LevelEdge -> initPhyloEdge (getGroupId g) (getGroupLevelParents g) et
Descendant -> case et of
PeriodEdge -> initPhyloEdge (getGroupId g) (getGroupPeriodChilds g) et
LevelEdge -> initPhyloEdge (getGroupId g) (getGroupLevelChilds g) et
_Type -> panic "[ERR][Viz.Phylo.View.ViewMaker.groupsToEdges] not implemented"
) gs
-- | To transform a Phylo into a list of PhyloBranch for a given Level
phyloToBranches :: Level -> Phylo -> [PhyloBranch]
phyloToBranches lvl p = map (\id -> initPhyloBranch id "") $ nub $ getBranchIdsWith lvl p
-- | To add recursively a list of PhyloNodes and Edges to PhyloView from a given Level and Depth
addChildNodes :: Bool -> Level -> Level -> Bool -> Filiation -> Phylo -> PhyloView -> PhyloView
addChildNodes shouldDo lvl lvlMin vb fl p v =
if (not shouldDo) || (lvl == lvlMin)
then v
else addChildNodes shouldDo (lvl - 1) lvlMin vb fl p
$ v & pv_branches %~ (++ (phyloToBranches (lvl - 1) p))
& pv_nodes %~ (++ (groupsToNodes False vb (getFoundationsRoots p) gs'))
& pv_edges %~ (++ (groupsToEdges fl PeriodEdge gs'))
& pv_edges %~ (++ (groupsToEdges Descendant LevelEdge gs ))
& pv_edges %~ (++ (groupsToEdges Ascendant LevelEdge gs'))
where
--------------------------------------
gs :: [PhyloGroup]
gs = getGroupsWithLevel lvl p
--------------------------------------
gs' :: [PhyloGroup]
gs' = getGroupsWithLevel (lvl - 1) p
--------------------------------------
-- | To transform a PhyloQuery into a PhyloView
toPhyloView :: PhyloQueryView -> Phylo -> PhyloView
toPhyloView q p = traceView
$ processDisplay (q ^. qv_display) (q ^. qv_export)
$ processSort (q ^. qv_sort ) p
$ processTaggers (q ^. qv_taggers) p
$ processDynamics
$ processFilters (q ^. qv_filters) p
$ processMetrics (q ^. qv_metrics) p
$ addChildNodes (q ^. qv_levelChilds) (q ^. qv_lvl) (q ^. qv_levelChildsDepth) (q ^. qv_verbose) (q ^. qv_filiation) p
$ initPhyloView (q ^. qv_lvl) (getPhyloTitle p) (getPhyloDescription p) (q ^. qv_filiation) (q ^. qv_verbose) p
-----------------
-- | Taggers | --
-----------------
traceView :: PhyloView -> PhyloView
traceView pv = trace ("------------\n--| View |--\n------------\n\n"
<> "view level : " <> show (pv ^. pv_level) <> "\n"
<> show (length $ pv ^. pv_branches) <> " exported branches with " <> show (length $ pv ^. pv_nodes) <> " groups\n"
<> "groups by branches : " <> show (percentile 25 (VS.fromList lst)) <> " (25%) "
<> show (percentile 50 (VS.fromList lst)) <> " (50%) "
<> show (percentile 75 (VS.fromList lst)) <> " (75%) "
<> show (percentile 90 (VS.fromList lst)) <> " (90%)\n") pv
where
lst = sort $ map (fromIntegral . length . snd) $ getNodesByBranches pv
......@@ -23,7 +23,7 @@ module Gargantext.Database.Admin.Types.Hyperdata.Phylo
import Gargantext.Prelude
import Gargantext.Database.Admin.Types.Hyperdata.Prelude
import Gargantext.Core.Viz.Phylo (Phylo(..))
import Gargantext.Core.Viz.LegacyPhylo (Phylo(..))
------------------------------------------------------------------------
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment